src/cpu/x86/vm/stubGenerator_x86_32.cpp

Tue, 08 Aug 2017 15:57:29 +0800

author
aoqi
date
Tue, 08 Aug 2017 15:57:29 +0800
changeset 6876
710a3c8b516e
parent 6312
04d32e7fad07
parent 0
f90c822e73f8
child 7994
04ff2f6cd0eb
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation.
aoqi@0 8 *
aoqi@0 9 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 12 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 13 * accompanied this code).
aoqi@0 14 *
aoqi@0 15 * You should have received a copy of the GNU General Public License version
aoqi@0 16 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 18 *
aoqi@0 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 20 * or visit www.oracle.com if you need additional information or have any
aoqi@0 21 * questions.
aoqi@0 22 *
aoqi@0 23 */
aoqi@0 24
aoqi@0 25 #include "precompiled.hpp"
aoqi@0 26 #include "asm/macroAssembler.hpp"
aoqi@0 27 #include "asm/macroAssembler.inline.hpp"
aoqi@0 28 #include "interpreter/interpreter.hpp"
aoqi@0 29 #include "nativeInst_x86.hpp"
aoqi@0 30 #include "oops/instanceOop.hpp"
aoqi@0 31 #include "oops/method.hpp"
aoqi@0 32 #include "oops/objArrayKlass.hpp"
aoqi@0 33 #include "oops/oop.inline.hpp"
aoqi@0 34 #include "prims/methodHandles.hpp"
aoqi@0 35 #include "runtime/frame.inline.hpp"
aoqi@0 36 #include "runtime/handles.inline.hpp"
aoqi@0 37 #include "runtime/sharedRuntime.hpp"
aoqi@0 38 #include "runtime/stubCodeGenerator.hpp"
aoqi@0 39 #include "runtime/stubRoutines.hpp"
aoqi@0 40 #include "runtime/thread.inline.hpp"
aoqi@0 41 #include "utilities/top.hpp"
aoqi@0 42 #ifdef COMPILER2
aoqi@0 43 #include "opto/runtime.hpp"
aoqi@0 44 #endif
aoqi@0 45
aoqi@0 46 // Declaration and definition of StubGenerator (no .hpp file).
aoqi@0 47 // For a more detailed description of the stub routine structure
aoqi@0 48 // see the comment in stubRoutines.hpp
aoqi@0 49
aoqi@0 50 #define __ _masm->
aoqi@0 51 #define a__ ((Assembler*)_masm)->
aoqi@0 52
aoqi@0 53 #ifdef PRODUCT
aoqi@0 54 #define BLOCK_COMMENT(str) /* nothing */
aoqi@0 55 #else
aoqi@0 56 #define BLOCK_COMMENT(str) __ block_comment(str)
aoqi@0 57 #endif
aoqi@0 58
aoqi@0 59 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
aoqi@0 60
aoqi@0 61 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
aoqi@0 62 const int FPU_CNTRL_WRD_MASK = 0xFFFF;
aoqi@0 63
aoqi@0 64 // -------------------------------------------------------------------------------------------------------------------------
aoqi@0 65 // Stub Code definitions
aoqi@0 66
aoqi@0 67 static address handle_unsafe_access() {
aoqi@0 68 JavaThread* thread = JavaThread::current();
aoqi@0 69 address pc = thread->saved_exception_pc();
aoqi@0 70 // pc is the instruction which we must emulate
aoqi@0 71 // doing a no-op is fine: return garbage from the load
aoqi@0 72 // therefore, compute npc
aoqi@0 73 address npc = Assembler::locate_next_instruction(pc);
aoqi@0 74
aoqi@0 75 // request an async exception
aoqi@0 76 thread->set_pending_unsafe_access_error();
aoqi@0 77
aoqi@0 78 // return address of next instruction to execute
aoqi@0 79 return npc;
aoqi@0 80 }
aoqi@0 81
aoqi@0 82 class StubGenerator: public StubCodeGenerator {
aoqi@0 83 private:
aoqi@0 84
aoqi@0 85 #ifdef PRODUCT
aoqi@0 86 #define inc_counter_np(counter) ((void)0)
aoqi@0 87 #else
aoqi@0 88 void inc_counter_np_(int& counter) {
aoqi@0 89 __ incrementl(ExternalAddress((address)&counter));
aoqi@0 90 }
aoqi@0 91 #define inc_counter_np(counter) \
aoqi@0 92 BLOCK_COMMENT("inc_counter " #counter); \
aoqi@0 93 inc_counter_np_(counter);
aoqi@0 94 #endif //PRODUCT
aoqi@0 95
aoqi@0 96 void inc_copy_counter_np(BasicType t) {
aoqi@0 97 #ifndef PRODUCT
aoqi@0 98 switch (t) {
aoqi@0 99 case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return;
aoqi@0 100 case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return;
aoqi@0 101 case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return;
aoqi@0 102 case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return;
aoqi@0 103 case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return;
aoqi@0 104 }
aoqi@0 105 ShouldNotReachHere();
aoqi@0 106 #endif //PRODUCT
aoqi@0 107 }
aoqi@0 108
aoqi@0 109 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 110 // Call stubs are used to call Java from C
aoqi@0 111 //
aoqi@0 112 // [ return_from_Java ] <--- rsp
aoqi@0 113 // [ argument word n ]
aoqi@0 114 // ...
aoqi@0 115 // -N [ argument word 1 ]
aoqi@0 116 // -7 [ Possible padding for stack alignment ]
aoqi@0 117 // -6 [ Possible padding for stack alignment ]
aoqi@0 118 // -5 [ Possible padding for stack alignment ]
aoqi@0 119 // -4 [ mxcsr save ] <--- rsp_after_call
aoqi@0 120 // -3 [ saved rbx, ]
aoqi@0 121 // -2 [ saved rsi ]
aoqi@0 122 // -1 [ saved rdi ]
aoqi@0 123 // 0 [ saved rbp, ] <--- rbp,
aoqi@0 124 // 1 [ return address ]
aoqi@0 125 // 2 [ ptr. to call wrapper ]
aoqi@0 126 // 3 [ result ]
aoqi@0 127 // 4 [ result_type ]
aoqi@0 128 // 5 [ method ]
aoqi@0 129 // 6 [ entry_point ]
aoqi@0 130 // 7 [ parameters ]
aoqi@0 131 // 8 [ parameter_size ]
aoqi@0 132 // 9 [ thread ]
aoqi@0 133
aoqi@0 134
aoqi@0 135 address generate_call_stub(address& return_address) {
aoqi@0 136 StubCodeMark mark(this, "StubRoutines", "call_stub");
aoqi@0 137 address start = __ pc();
aoqi@0 138
aoqi@0 139 // stub code parameters / addresses
aoqi@0 140 assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
aoqi@0 141 bool sse_save = false;
aoqi@0 142 const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()!
aoqi@0 143 const int locals_count_in_bytes (4*wordSize);
aoqi@0 144 const Address mxcsr_save (rbp, -4 * wordSize);
aoqi@0 145 const Address saved_rbx (rbp, -3 * wordSize);
aoqi@0 146 const Address saved_rsi (rbp, -2 * wordSize);
aoqi@0 147 const Address saved_rdi (rbp, -1 * wordSize);
aoqi@0 148 const Address result (rbp, 3 * wordSize);
aoqi@0 149 const Address result_type (rbp, 4 * wordSize);
aoqi@0 150 const Address method (rbp, 5 * wordSize);
aoqi@0 151 const Address entry_point (rbp, 6 * wordSize);
aoqi@0 152 const Address parameters (rbp, 7 * wordSize);
aoqi@0 153 const Address parameter_size(rbp, 8 * wordSize);
aoqi@0 154 const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()!
aoqi@0 155 sse_save = UseSSE > 0;
aoqi@0 156
aoqi@0 157 // stub code
aoqi@0 158 __ enter();
aoqi@0 159 __ movptr(rcx, parameter_size); // parameter counter
aoqi@0 160 __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
aoqi@0 161 __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves
aoqi@0 162 __ subptr(rsp, rcx);
aoqi@0 163 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
aoqi@0 164
aoqi@0 165 // save rdi, rsi, & rbx, according to C calling conventions
aoqi@0 166 __ movptr(saved_rdi, rdi);
aoqi@0 167 __ movptr(saved_rsi, rsi);
aoqi@0 168 __ movptr(saved_rbx, rbx);
aoqi@0 169 // save and initialize %mxcsr
aoqi@0 170 if (sse_save) {
aoqi@0 171 Label skip_ldmx;
aoqi@0 172 __ stmxcsr(mxcsr_save);
aoqi@0 173 __ movl(rax, mxcsr_save);
aoqi@0 174 __ andl(rax, MXCSR_MASK); // Only check control and mask bits
aoqi@0 175 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
aoqi@0 176 __ cmp32(rax, mxcsr_std);
aoqi@0 177 __ jcc(Assembler::equal, skip_ldmx);
aoqi@0 178 __ ldmxcsr(mxcsr_std);
aoqi@0 179 __ bind(skip_ldmx);
aoqi@0 180 }
aoqi@0 181
aoqi@0 182 // make sure the control word is correct.
aoqi@0 183 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
aoqi@0 184
aoqi@0 185 #ifdef ASSERT
aoqi@0 186 // make sure we have no pending exceptions
aoqi@0 187 { Label L;
aoqi@0 188 __ movptr(rcx, thread);
aoqi@0 189 __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
aoqi@0 190 __ jcc(Assembler::equal, L);
aoqi@0 191 __ stop("StubRoutines::call_stub: entered with pending exception");
aoqi@0 192 __ bind(L);
aoqi@0 193 }
aoqi@0 194 #endif
aoqi@0 195
aoqi@0 196 // pass parameters if any
aoqi@0 197 BLOCK_COMMENT("pass parameters if any");
aoqi@0 198 Label parameters_done;
aoqi@0 199 __ movl(rcx, parameter_size); // parameter counter
aoqi@0 200 __ testl(rcx, rcx);
aoqi@0 201 __ jcc(Assembler::zero, parameters_done);
aoqi@0 202
aoqi@0 203 // parameter passing loop
aoqi@0 204
aoqi@0 205 Label loop;
aoqi@0 206 // Copy Java parameters in reverse order (receiver last)
aoqi@0 207 // Note that the argument order is inverted in the process
aoqi@0 208 // source is rdx[rcx: N-1..0]
aoqi@0 209 // dest is rsp[rbx: 0..N-1]
aoqi@0 210
aoqi@0 211 __ movptr(rdx, parameters); // parameter pointer
aoqi@0 212 __ xorptr(rbx, rbx);
aoqi@0 213
aoqi@0 214 __ BIND(loop);
aoqi@0 215
aoqi@0 216 // get parameter
aoqi@0 217 __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize));
aoqi@0 218 __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
aoqi@0 219 Interpreter::expr_offset_in_bytes(0)), rax); // store parameter
aoqi@0 220 __ increment(rbx);
aoqi@0 221 __ decrement(rcx);
aoqi@0 222 __ jcc(Assembler::notZero, loop);
aoqi@0 223
aoqi@0 224 // call Java function
aoqi@0 225 __ BIND(parameters_done);
aoqi@0 226 __ movptr(rbx, method); // get Method*
aoqi@0 227 __ movptr(rax, entry_point); // get entry_point
aoqi@0 228 __ mov(rsi, rsp); // set sender sp
aoqi@0 229 BLOCK_COMMENT("call Java function");
aoqi@0 230 __ call(rax);
aoqi@0 231
aoqi@0 232 BLOCK_COMMENT("call_stub_return_address:");
aoqi@0 233 return_address = __ pc();
aoqi@0 234
aoqi@0 235 #ifdef COMPILER2
aoqi@0 236 {
aoqi@0 237 Label L_skip;
aoqi@0 238 if (UseSSE >= 2) {
aoqi@0 239 __ verify_FPU(0, "call_stub_return");
aoqi@0 240 } else {
aoqi@0 241 for (int i = 1; i < 8; i++) {
aoqi@0 242 __ ffree(i);
aoqi@0 243 }
aoqi@0 244
aoqi@0 245 // UseSSE <= 1 so double result should be left on TOS
aoqi@0 246 __ movl(rsi, result_type);
aoqi@0 247 __ cmpl(rsi, T_DOUBLE);
aoqi@0 248 __ jcc(Assembler::equal, L_skip);
aoqi@0 249 if (UseSSE == 0) {
aoqi@0 250 // UseSSE == 0 so float result should be left on TOS
aoqi@0 251 __ cmpl(rsi, T_FLOAT);
aoqi@0 252 __ jcc(Assembler::equal, L_skip);
aoqi@0 253 }
aoqi@0 254 __ ffree(0);
aoqi@0 255 }
aoqi@0 256 __ BIND(L_skip);
aoqi@0 257 }
aoqi@0 258 #endif // COMPILER2
aoqi@0 259
aoqi@0 260 // store result depending on type
aoqi@0 261 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
aoqi@0 262 __ movptr(rdi, result);
aoqi@0 263 Label is_long, is_float, is_double, exit;
aoqi@0 264 __ movl(rsi, result_type);
aoqi@0 265 __ cmpl(rsi, T_LONG);
aoqi@0 266 __ jcc(Assembler::equal, is_long);
aoqi@0 267 __ cmpl(rsi, T_FLOAT);
aoqi@0 268 __ jcc(Assembler::equal, is_float);
aoqi@0 269 __ cmpl(rsi, T_DOUBLE);
aoqi@0 270 __ jcc(Assembler::equal, is_double);
aoqi@0 271
aoqi@0 272 // handle T_INT case
aoqi@0 273 __ movl(Address(rdi, 0), rax);
aoqi@0 274 __ BIND(exit);
aoqi@0 275
aoqi@0 276 // check that FPU stack is empty
aoqi@0 277 __ verify_FPU(0, "generate_call_stub");
aoqi@0 278
aoqi@0 279 // pop parameters
aoqi@0 280 __ lea(rsp, rsp_after_call);
aoqi@0 281
aoqi@0 282 // restore %mxcsr
aoqi@0 283 if (sse_save) {
aoqi@0 284 __ ldmxcsr(mxcsr_save);
aoqi@0 285 }
aoqi@0 286
aoqi@0 287 // restore rdi, rsi and rbx,
aoqi@0 288 __ movptr(rbx, saved_rbx);
aoqi@0 289 __ movptr(rsi, saved_rsi);
aoqi@0 290 __ movptr(rdi, saved_rdi);
aoqi@0 291 __ addptr(rsp, 4*wordSize);
aoqi@0 292
aoqi@0 293 // return
aoqi@0 294 __ pop(rbp);
aoqi@0 295 __ ret(0);
aoqi@0 296
aoqi@0 297 // handle return types different from T_INT
aoqi@0 298 __ BIND(is_long);
aoqi@0 299 __ movl(Address(rdi, 0 * wordSize), rax);
aoqi@0 300 __ movl(Address(rdi, 1 * wordSize), rdx);
aoqi@0 301 __ jmp(exit);
aoqi@0 302
aoqi@0 303 __ BIND(is_float);
aoqi@0 304 // interpreter uses xmm0 for return values
aoqi@0 305 if (UseSSE >= 1) {
aoqi@0 306 __ movflt(Address(rdi, 0), xmm0);
aoqi@0 307 } else {
aoqi@0 308 __ fstp_s(Address(rdi, 0));
aoqi@0 309 }
aoqi@0 310 __ jmp(exit);
aoqi@0 311
aoqi@0 312 __ BIND(is_double);
aoqi@0 313 // interpreter uses xmm0 for return values
aoqi@0 314 if (UseSSE >= 2) {
aoqi@0 315 __ movdbl(Address(rdi, 0), xmm0);
aoqi@0 316 } else {
aoqi@0 317 __ fstp_d(Address(rdi, 0));
aoqi@0 318 }
aoqi@0 319 __ jmp(exit);
aoqi@0 320
aoqi@0 321 return start;
aoqi@0 322 }
aoqi@0 323
aoqi@0 324
aoqi@0 325 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 326 // Return point for a Java call if there's an exception thrown in Java code.
aoqi@0 327 // The exception is caught and transformed into a pending exception stored in
aoqi@0 328 // JavaThread that can be tested from within the VM.
aoqi@0 329 //
aoqi@0 330 // Note: Usually the parameters are removed by the callee. In case of an exception
aoqi@0 331 // crossing an activation frame boundary, that is not the case if the callee
aoqi@0 332 // is compiled code => need to setup the rsp.
aoqi@0 333 //
aoqi@0 334 // rax,: exception oop
aoqi@0 335
aoqi@0 336 address generate_catch_exception() {
aoqi@0 337 StubCodeMark mark(this, "StubRoutines", "catch_exception");
aoqi@0 338 const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()!
aoqi@0 339 const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()!
aoqi@0 340 address start = __ pc();
aoqi@0 341
aoqi@0 342 // get thread directly
aoqi@0 343 __ movptr(rcx, thread);
aoqi@0 344 #ifdef ASSERT
aoqi@0 345 // verify that threads correspond
aoqi@0 346 { Label L;
aoqi@0 347 __ get_thread(rbx);
aoqi@0 348 __ cmpptr(rbx, rcx);
aoqi@0 349 __ jcc(Assembler::equal, L);
aoqi@0 350 __ stop("StubRoutines::catch_exception: threads must correspond");
aoqi@0 351 __ bind(L);
aoqi@0 352 }
aoqi@0 353 #endif
aoqi@0 354 // set pending exception
aoqi@0 355 __ verify_oop(rax);
aoqi@0 356 __ movptr(Address(rcx, Thread::pending_exception_offset()), rax );
aoqi@0 357 __ lea(Address(rcx, Thread::exception_file_offset ()),
aoqi@0 358 ExternalAddress((address)__FILE__));
aoqi@0 359 __ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ );
aoqi@0 360 // complete return to VM
aoqi@0 361 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
aoqi@0 362 __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
aoqi@0 363
aoqi@0 364 return start;
aoqi@0 365 }
aoqi@0 366
aoqi@0 367
aoqi@0 368 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 369 // Continuation point for runtime calls returning with a pending exception.
aoqi@0 370 // The pending exception check happened in the runtime or native call stub.
aoqi@0 371 // The pending exception in Thread is converted into a Java-level exception.
aoqi@0 372 //
aoqi@0 373 // Contract with Java-level exception handlers:
aoqi@0 374 // rax: exception
aoqi@0 375 // rdx: throwing pc
aoqi@0 376 //
aoqi@0 377 // NOTE: At entry of this stub, exception-pc must be on stack !!
aoqi@0 378
aoqi@0 379 address generate_forward_exception() {
aoqi@0 380 StubCodeMark mark(this, "StubRoutines", "forward exception");
aoqi@0 381 address start = __ pc();
aoqi@0 382 const Register thread = rcx;
aoqi@0 383
aoqi@0 384 // other registers used in this stub
aoqi@0 385 const Register exception_oop = rax;
aoqi@0 386 const Register handler_addr = rbx;
aoqi@0 387 const Register exception_pc = rdx;
aoqi@0 388
aoqi@0 389 // Upon entry, the sp points to the return address returning into Java
aoqi@0 390 // (interpreted or compiled) code; i.e., the return address becomes the
aoqi@0 391 // throwing pc.
aoqi@0 392 //
aoqi@0 393 // Arguments pushed before the runtime call are still on the stack but
aoqi@0 394 // the exception handler will reset the stack pointer -> ignore them.
aoqi@0 395 // A potential result in registers can be ignored as well.
aoqi@0 396
aoqi@0 397 #ifdef ASSERT
aoqi@0 398 // make sure this code is only executed if there is a pending exception
aoqi@0 399 { Label L;
aoqi@0 400 __ get_thread(thread);
aoqi@0 401 __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
aoqi@0 402 __ jcc(Assembler::notEqual, L);
aoqi@0 403 __ stop("StubRoutines::forward exception: no pending exception (1)");
aoqi@0 404 __ bind(L);
aoqi@0 405 }
aoqi@0 406 #endif
aoqi@0 407
aoqi@0 408 // compute exception handler into rbx,
aoqi@0 409 __ get_thread(thread);
aoqi@0 410 __ movptr(exception_pc, Address(rsp, 0));
aoqi@0 411 BLOCK_COMMENT("call exception_handler_for_return_address");
aoqi@0 412 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc);
aoqi@0 413 __ mov(handler_addr, rax);
aoqi@0 414
aoqi@0 415 // setup rax & rdx, remove return address & clear pending exception
aoqi@0 416 __ get_thread(thread);
aoqi@0 417 __ pop(exception_pc);
aoqi@0 418 __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
aoqi@0 419 __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
aoqi@0 420
aoqi@0 421 #ifdef ASSERT
aoqi@0 422 // make sure exception is set
aoqi@0 423 { Label L;
aoqi@0 424 __ testptr(exception_oop, exception_oop);
aoqi@0 425 __ jcc(Assembler::notEqual, L);
aoqi@0 426 __ stop("StubRoutines::forward exception: no pending exception (2)");
aoqi@0 427 __ bind(L);
aoqi@0 428 }
aoqi@0 429 #endif
aoqi@0 430
aoqi@0 431 // Verify that there is really a valid exception in RAX.
aoqi@0 432 __ verify_oop(exception_oop);
aoqi@0 433
aoqi@0 434 // continue at exception handler (return address removed)
aoqi@0 435 // rax: exception
aoqi@0 436 // rbx: exception handler
aoqi@0 437 // rdx: throwing pc
aoqi@0 438 __ jmp(handler_addr);
aoqi@0 439
aoqi@0 440 return start;
aoqi@0 441 }
aoqi@0 442
aoqi@0 443
aoqi@0 444 //----------------------------------------------------------------------------------------------------
aoqi@0 445 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest)
aoqi@0 446 //
aoqi@0 447 // xchg exists as far back as 8086, lock needed for MP only
aoqi@0 448 // Stack layout immediately after call:
aoqi@0 449 //
aoqi@0 450 // 0 [ret addr ] <--- rsp
aoqi@0 451 // 1 [ ex ]
aoqi@0 452 // 2 [ dest ]
aoqi@0 453 //
aoqi@0 454 // Result: *dest <- ex, return (old *dest)
aoqi@0 455 //
aoqi@0 456 // Note: win32 does not currently use this code
aoqi@0 457
aoqi@0 458 address generate_atomic_xchg() {
aoqi@0 459 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
aoqi@0 460 address start = __ pc();
aoqi@0 461
aoqi@0 462 __ push(rdx);
aoqi@0 463 Address exchange(rsp, 2 * wordSize);
aoqi@0 464 Address dest_addr(rsp, 3 * wordSize);
aoqi@0 465 __ movl(rax, exchange);
aoqi@0 466 __ movptr(rdx, dest_addr);
aoqi@0 467 __ xchgl(rax, Address(rdx, 0));
aoqi@0 468 __ pop(rdx);
aoqi@0 469 __ ret(0);
aoqi@0 470
aoqi@0 471 return start;
aoqi@0 472 }
aoqi@0 473
aoqi@0 474 //----------------------------------------------------------------------------------------------------
aoqi@0 475 // Support for void verify_mxcsr()
aoqi@0 476 //
aoqi@0 477 // This routine is used with -Xcheck:jni to verify that native
aoqi@0 478 // JNI code does not return to Java code without restoring the
aoqi@0 479 // MXCSR register to our expected state.
aoqi@0 480
aoqi@0 481
aoqi@0 482 address generate_verify_mxcsr() {
aoqi@0 483 StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
aoqi@0 484 address start = __ pc();
aoqi@0 485
aoqi@0 486 const Address mxcsr_save(rsp, 0);
aoqi@0 487
aoqi@0 488 if (CheckJNICalls && UseSSE > 0 ) {
aoqi@0 489 Label ok_ret;
aoqi@0 490 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
aoqi@0 491 __ push(rax);
aoqi@0 492 __ subptr(rsp, wordSize); // allocate a temp location
aoqi@0 493 __ stmxcsr(mxcsr_save);
aoqi@0 494 __ movl(rax, mxcsr_save);
aoqi@0 495 __ andl(rax, MXCSR_MASK);
aoqi@0 496 __ cmp32(rax, mxcsr_std);
aoqi@0 497 __ jcc(Assembler::equal, ok_ret);
aoqi@0 498
aoqi@0 499 __ warn("MXCSR changed by native JNI code.");
aoqi@0 500
aoqi@0 501 __ ldmxcsr(mxcsr_std);
aoqi@0 502
aoqi@0 503 __ bind(ok_ret);
aoqi@0 504 __ addptr(rsp, wordSize);
aoqi@0 505 __ pop(rax);
aoqi@0 506 }
aoqi@0 507
aoqi@0 508 __ ret(0);
aoqi@0 509
aoqi@0 510 return start;
aoqi@0 511 }
aoqi@0 512
aoqi@0 513
aoqi@0 514 //---------------------------------------------------------------------------
aoqi@0 515 // Support for void verify_fpu_cntrl_wrd()
aoqi@0 516 //
aoqi@0 517 // This routine is used with -Xcheck:jni to verify that native
aoqi@0 518 // JNI code does not return to Java code without restoring the
aoqi@0 519 // FP control word to our expected state.
aoqi@0 520
aoqi@0 521 address generate_verify_fpu_cntrl_wrd() {
aoqi@0 522 StubCodeMark mark(this, "StubRoutines", "verify_spcw");
aoqi@0 523 address start = __ pc();
aoqi@0 524
aoqi@0 525 const Address fpu_cntrl_wrd_save(rsp, 0);
aoqi@0 526
aoqi@0 527 if (CheckJNICalls) {
aoqi@0 528 Label ok_ret;
aoqi@0 529 __ push(rax);
aoqi@0 530 __ subptr(rsp, wordSize); // allocate a temp location
aoqi@0 531 __ fnstcw(fpu_cntrl_wrd_save);
aoqi@0 532 __ movl(rax, fpu_cntrl_wrd_save);
aoqi@0 533 __ andl(rax, FPU_CNTRL_WRD_MASK);
aoqi@0 534 ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std());
aoqi@0 535 __ cmp32(rax, fpu_std);
aoqi@0 536 __ jcc(Assembler::equal, ok_ret);
aoqi@0 537
aoqi@0 538 __ warn("Floating point control word changed by native JNI code.");
aoqi@0 539
aoqi@0 540 __ fldcw(fpu_std);
aoqi@0 541
aoqi@0 542 __ bind(ok_ret);
aoqi@0 543 __ addptr(rsp, wordSize);
aoqi@0 544 __ pop(rax);
aoqi@0 545 }
aoqi@0 546
aoqi@0 547 __ ret(0);
aoqi@0 548
aoqi@0 549 return start;
aoqi@0 550 }
aoqi@0 551
aoqi@0 552 //---------------------------------------------------------------------------
aoqi@0 553 // Wrapper for slow-case handling of double-to-integer conversion
aoqi@0 554 // d2i or f2i fast case failed either because it is nan or because
aoqi@0 555 // of under/overflow.
aoqi@0 556 // Input: FPU TOS: float value
aoqi@0 557 // Output: rax, (rdx): integer (long) result
aoqi@0 558
aoqi@0 559 address generate_d2i_wrapper(BasicType t, address fcn) {
aoqi@0 560 StubCodeMark mark(this, "StubRoutines", "d2i_wrapper");
aoqi@0 561 address start = __ pc();
aoqi@0 562
aoqi@0 563 // Capture info about frame layout
aoqi@0 564 enum layout { FPUState_off = 0,
aoqi@0 565 rbp_off = FPUStateSizeInWords,
aoqi@0 566 rdi_off,
aoqi@0 567 rsi_off,
aoqi@0 568 rcx_off,
aoqi@0 569 rbx_off,
aoqi@0 570 saved_argument_off,
aoqi@0 571 saved_argument_off2, // 2nd half of double
aoqi@0 572 framesize
aoqi@0 573 };
aoqi@0 574
aoqi@0 575 assert(FPUStateSizeInWords == 27, "update stack layout");
aoqi@0 576
aoqi@0 577 // Save outgoing argument to stack across push_FPU_state()
aoqi@0 578 __ subptr(rsp, wordSize * 2);
aoqi@0 579 __ fstp_d(Address(rsp, 0));
aoqi@0 580
aoqi@0 581 // Save CPU & FPU state
aoqi@0 582 __ push(rbx);
aoqi@0 583 __ push(rcx);
aoqi@0 584 __ push(rsi);
aoqi@0 585 __ push(rdi);
aoqi@0 586 __ push(rbp);
aoqi@0 587 __ push_FPU_state();
aoqi@0 588
aoqi@0 589 // push_FPU_state() resets the FP top of stack
aoqi@0 590 // Load original double into FP top of stack
aoqi@0 591 __ fld_d(Address(rsp, saved_argument_off * wordSize));
aoqi@0 592 // Store double into stack as outgoing argument
aoqi@0 593 __ subptr(rsp, wordSize*2);
aoqi@0 594 __ fst_d(Address(rsp, 0));
aoqi@0 595
aoqi@0 596 // Prepare FPU for doing math in C-land
aoqi@0 597 __ empty_FPU_stack();
aoqi@0 598 // Call the C code to massage the double. Result in EAX
aoqi@0 599 if (t == T_INT)
aoqi@0 600 { BLOCK_COMMENT("SharedRuntime::d2i"); }
aoqi@0 601 else if (t == T_LONG)
aoqi@0 602 { BLOCK_COMMENT("SharedRuntime::d2l"); }
aoqi@0 603 __ call_VM_leaf( fcn, 2 );
aoqi@0 604
aoqi@0 605 // Restore CPU & FPU state
aoqi@0 606 __ pop_FPU_state();
aoqi@0 607 __ pop(rbp);
aoqi@0 608 __ pop(rdi);
aoqi@0 609 __ pop(rsi);
aoqi@0 610 __ pop(rcx);
aoqi@0 611 __ pop(rbx);
aoqi@0 612 __ addptr(rsp, wordSize * 2);
aoqi@0 613
aoqi@0 614 __ ret(0);
aoqi@0 615
aoqi@0 616 return start;
aoqi@0 617 }
aoqi@0 618
aoqi@0 619
aoqi@0 620 //---------------------------------------------------------------------------
aoqi@0 621 // The following routine generates a subroutine to throw an asynchronous
aoqi@0 622 // UnknownError when an unsafe access gets a fault that could not be
aoqi@0 623 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
aoqi@0 624 address generate_handler_for_unsafe_access() {
aoqi@0 625 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
aoqi@0 626 address start = __ pc();
aoqi@0 627
aoqi@0 628 __ push(0); // hole for return address-to-be
aoqi@0 629 __ pusha(); // push registers
aoqi@0 630 Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@0 631 BLOCK_COMMENT("call handle_unsafe_access");
aoqi@0 632 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
aoqi@0 633 __ movptr(next_pc, rax); // stuff next address
aoqi@0 634 __ popa();
aoqi@0 635 __ ret(0); // jump to next address
aoqi@0 636
aoqi@0 637 return start;
aoqi@0 638 }
aoqi@0 639
aoqi@0 640
aoqi@0 641 //----------------------------------------------------------------------------------------------------
aoqi@0 642 // Non-destructive plausibility checks for oops
aoqi@0 643
aoqi@0 644 address generate_verify_oop() {
aoqi@0 645 StubCodeMark mark(this, "StubRoutines", "verify_oop");
aoqi@0 646 address start = __ pc();
aoqi@0 647
aoqi@0 648 // Incoming arguments on stack after saving rax,:
aoqi@0 649 //
aoqi@0 650 // [tos ]: saved rdx
aoqi@0 651 // [tos + 1]: saved EFLAGS
aoqi@0 652 // [tos + 2]: return address
aoqi@0 653 // [tos + 3]: char* error message
aoqi@0 654 // [tos + 4]: oop object to verify
aoqi@0 655 // [tos + 5]: saved rax, - saved by caller and bashed
aoqi@0 656
aoqi@0 657 Label exit, error;
aoqi@0 658 __ pushf();
aoqi@0 659 __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
aoqi@0 660 __ push(rdx); // save rdx
aoqi@0 661 // make sure object is 'reasonable'
aoqi@0 662 __ movptr(rax, Address(rsp, 4 * wordSize)); // get object
aoqi@0 663 __ testptr(rax, rax);
aoqi@0 664 __ jcc(Assembler::zero, exit); // if obj is NULL it is ok
aoqi@0 665
aoqi@0 666 // Check if the oop is in the right area of memory
aoqi@0 667 const int oop_mask = Universe::verify_oop_mask();
aoqi@0 668 const int oop_bits = Universe::verify_oop_bits();
aoqi@0 669 __ mov(rdx, rax);
aoqi@0 670 __ andptr(rdx, oop_mask);
aoqi@0 671 __ cmpptr(rdx, oop_bits);
aoqi@0 672 __ jcc(Assembler::notZero, error);
aoqi@0 673
aoqi@0 674 // make sure klass is 'reasonable', which is not zero.
aoqi@0 675 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
aoqi@0 676 __ testptr(rax, rax);
aoqi@0 677 __ jcc(Assembler::zero, error); // if klass is NULL it is broken
aoqi@0 678
aoqi@0 679 // return if everything seems ok
aoqi@0 680 __ bind(exit);
aoqi@0 681 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back
aoqi@0 682 __ pop(rdx); // restore rdx
aoqi@0 683 __ popf(); // restore EFLAGS
aoqi@0 684 __ ret(3 * wordSize); // pop arguments
aoqi@0 685
aoqi@0 686 // handle errors
aoqi@0 687 __ bind(error);
aoqi@0 688 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back
aoqi@0 689 __ pop(rdx); // get saved rdx back
aoqi@0 690 __ popf(); // get saved EFLAGS off stack -- will be ignored
aoqi@0 691 __ pusha(); // push registers (eip = return address & msg are already pushed)
aoqi@0 692 BLOCK_COMMENT("call MacroAssembler::debug");
aoqi@0 693 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
aoqi@0 694 __ popa();
aoqi@0 695 __ ret(3 * wordSize); // pop arguments
aoqi@0 696 return start;
aoqi@0 697 }
aoqi@0 698
aoqi@0 699 //
aoqi@0 700 // Generate pre-barrier for array stores
aoqi@0 701 //
aoqi@0 702 // Input:
aoqi@0 703 // start - starting address
aoqi@0 704 // count - element count
aoqi@0 705 void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) {
aoqi@0 706 assert_different_registers(start, count);
aoqi@0 707 BarrierSet* bs = Universe::heap()->barrier_set();
aoqi@0 708 switch (bs->kind()) {
aoqi@0 709 case BarrierSet::G1SATBCT:
aoqi@0 710 case BarrierSet::G1SATBCTLogging:
aoqi@0 711 // With G1, don't generate the call if we statically know that the target in uninitialized
aoqi@0 712 if (!uninitialized_target) {
aoqi@0 713 __ pusha(); // push registers
aoqi@0 714 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
aoqi@0 715 start, count);
aoqi@0 716 __ popa();
aoqi@0 717 }
aoqi@0 718 break;
aoqi@0 719 case BarrierSet::CardTableModRef:
aoqi@0 720 case BarrierSet::CardTableExtension:
aoqi@0 721 case BarrierSet::ModRef:
aoqi@0 722 break;
aoqi@0 723 default :
aoqi@0 724 ShouldNotReachHere();
aoqi@0 725
aoqi@0 726 }
aoqi@0 727 }
aoqi@0 728
aoqi@0 729
aoqi@0 730 //
aoqi@0 731 // Generate a post-barrier for an array store
aoqi@0 732 //
aoqi@0 733 // start - starting address
aoqi@0 734 // count - element count
aoqi@0 735 //
aoqi@0 736 // The two input registers are overwritten.
aoqi@0 737 //
aoqi@0 738 void gen_write_ref_array_post_barrier(Register start, Register count) {
aoqi@0 739 BarrierSet* bs = Universe::heap()->barrier_set();
aoqi@0 740 assert_different_registers(start, count);
aoqi@0 741 switch (bs->kind()) {
aoqi@0 742 case BarrierSet::G1SATBCT:
aoqi@0 743 case BarrierSet::G1SATBCTLogging:
aoqi@0 744 {
aoqi@0 745 __ pusha(); // push registers
aoqi@0 746 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post),
aoqi@0 747 start, count);
aoqi@0 748 __ popa();
aoqi@0 749 }
aoqi@0 750 break;
aoqi@0 751
aoqi@0 752 case BarrierSet::CardTableModRef:
aoqi@0 753 case BarrierSet::CardTableExtension:
aoqi@0 754 {
aoqi@0 755 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
aoqi@0 756 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
aoqi@0 757
aoqi@0 758 Label L_loop;
aoqi@0 759 const Register end = count; // elements count; end == start+count-1
aoqi@0 760 assert_different_registers(start, end);
aoqi@0 761
aoqi@0 762 __ lea(end, Address(start, count, Address::times_ptr, -wordSize));
aoqi@0 763 __ shrptr(start, CardTableModRefBS::card_shift);
aoqi@0 764 __ shrptr(end, CardTableModRefBS::card_shift);
aoqi@0 765 __ subptr(end, start); // end --> count
aoqi@0 766 __ BIND(L_loop);
aoqi@0 767 intptr_t disp = (intptr_t) ct->byte_map_base;
aoqi@0 768 Address cardtable(start, count, Address::times_1, disp);
aoqi@0 769 __ movb(cardtable, 0);
aoqi@0 770 __ decrement(count);
aoqi@0 771 __ jcc(Assembler::greaterEqual, L_loop);
aoqi@0 772 }
aoqi@0 773 break;
aoqi@0 774 case BarrierSet::ModRef:
aoqi@0 775 break;
aoqi@0 776 default :
aoqi@0 777 ShouldNotReachHere();
aoqi@0 778
aoqi@0 779 }
aoqi@0 780 }
aoqi@0 781
aoqi@0 782
aoqi@0 783 // Copy 64 bytes chunks
aoqi@0 784 //
aoqi@0 785 // Inputs:
aoqi@0 786 // from - source array address
aoqi@0 787 // to_from - destination array address - from
aoqi@0 788 // qword_count - 8-bytes element count, negative
aoqi@0 789 //
aoqi@0 790 void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
aoqi@0 791 assert( UseSSE >= 2, "supported cpu only" );
aoqi@0 792 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
aoqi@0 793 // Copy 64-byte chunks
aoqi@0 794 __ jmpb(L_copy_64_bytes);
aoqi@0 795 __ align(OptoLoopAlignment);
aoqi@0 796 __ BIND(L_copy_64_bytes_loop);
aoqi@0 797
aoqi@0 798 if (UseUnalignedLoadStores) {
aoqi@0 799 if (UseAVX >= 2) {
aoqi@0 800 __ vmovdqu(xmm0, Address(from, 0));
aoqi@0 801 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
aoqi@0 802 __ vmovdqu(xmm1, Address(from, 32));
aoqi@0 803 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
aoqi@0 804 } else {
aoqi@0 805 __ movdqu(xmm0, Address(from, 0));
aoqi@0 806 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
aoqi@0 807 __ movdqu(xmm1, Address(from, 16));
aoqi@0 808 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
aoqi@0 809 __ movdqu(xmm2, Address(from, 32));
aoqi@0 810 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
aoqi@0 811 __ movdqu(xmm3, Address(from, 48));
aoqi@0 812 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
aoqi@0 813 }
aoqi@0 814 } else {
aoqi@0 815 __ movq(xmm0, Address(from, 0));
aoqi@0 816 __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
aoqi@0 817 __ movq(xmm1, Address(from, 8));
aoqi@0 818 __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
aoqi@0 819 __ movq(xmm2, Address(from, 16));
aoqi@0 820 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
aoqi@0 821 __ movq(xmm3, Address(from, 24));
aoqi@0 822 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
aoqi@0 823 __ movq(xmm4, Address(from, 32));
aoqi@0 824 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
aoqi@0 825 __ movq(xmm5, Address(from, 40));
aoqi@0 826 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
aoqi@0 827 __ movq(xmm6, Address(from, 48));
aoqi@0 828 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
aoqi@0 829 __ movq(xmm7, Address(from, 56));
aoqi@0 830 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
aoqi@0 831 }
aoqi@0 832
aoqi@0 833 __ addl(from, 64);
aoqi@0 834 __ BIND(L_copy_64_bytes);
aoqi@0 835 __ subl(qword_count, 8);
aoqi@0 836 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
aoqi@0 837
aoqi@0 838 if (UseUnalignedLoadStores && (UseAVX >= 2)) {
aoqi@0 839 // clean upper bits of YMM registers
aoqi@0 840 __ vzeroupper();
aoqi@0 841 }
aoqi@0 842 __ addl(qword_count, 8);
aoqi@0 843 __ jccb(Assembler::zero, L_exit);
aoqi@0 844 //
aoqi@0 845 // length is too short, just copy qwords
aoqi@0 846 //
aoqi@0 847 __ BIND(L_copy_8_bytes);
aoqi@0 848 __ movq(xmm0, Address(from, 0));
aoqi@0 849 __ movq(Address(from, to_from, Address::times_1), xmm0);
aoqi@0 850 __ addl(from, 8);
aoqi@0 851 __ decrement(qword_count);
aoqi@0 852 __ jcc(Assembler::greater, L_copy_8_bytes);
aoqi@0 853 __ BIND(L_exit);
aoqi@0 854 }
aoqi@0 855
aoqi@0 856 // Copy 64 bytes chunks
aoqi@0 857 //
aoqi@0 858 // Inputs:
aoqi@0 859 // from - source array address
aoqi@0 860 // to_from - destination array address - from
aoqi@0 861 // qword_count - 8-bytes element count, negative
aoqi@0 862 //
aoqi@0 863 void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
aoqi@0 864 assert( VM_Version::supports_mmx(), "supported cpu only" );
aoqi@0 865 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
aoqi@0 866 // Copy 64-byte chunks
aoqi@0 867 __ jmpb(L_copy_64_bytes);
aoqi@0 868 __ align(OptoLoopAlignment);
aoqi@0 869 __ BIND(L_copy_64_bytes_loop);
aoqi@0 870 __ movq(mmx0, Address(from, 0));
aoqi@0 871 __ movq(mmx1, Address(from, 8));
aoqi@0 872 __ movq(mmx2, Address(from, 16));
aoqi@0 873 __ movq(Address(from, to_from, Address::times_1, 0), mmx0);
aoqi@0 874 __ movq(mmx3, Address(from, 24));
aoqi@0 875 __ movq(Address(from, to_from, Address::times_1, 8), mmx1);
aoqi@0 876 __ movq(mmx4, Address(from, 32));
aoqi@0 877 __ movq(Address(from, to_from, Address::times_1, 16), mmx2);
aoqi@0 878 __ movq(mmx5, Address(from, 40));
aoqi@0 879 __ movq(Address(from, to_from, Address::times_1, 24), mmx3);
aoqi@0 880 __ movq(mmx6, Address(from, 48));
aoqi@0 881 __ movq(Address(from, to_from, Address::times_1, 32), mmx4);
aoqi@0 882 __ movq(mmx7, Address(from, 56));
aoqi@0 883 __ movq(Address(from, to_from, Address::times_1, 40), mmx5);
aoqi@0 884 __ movq(Address(from, to_from, Address::times_1, 48), mmx6);
aoqi@0 885 __ movq(Address(from, to_from, Address::times_1, 56), mmx7);
aoqi@0 886 __ addptr(from, 64);
aoqi@0 887 __ BIND(L_copy_64_bytes);
aoqi@0 888 __ subl(qword_count, 8);
aoqi@0 889 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
aoqi@0 890 __ addl(qword_count, 8);
aoqi@0 891 __ jccb(Assembler::zero, L_exit);
aoqi@0 892 //
aoqi@0 893 // length is too short, just copy qwords
aoqi@0 894 //
aoqi@0 895 __ BIND(L_copy_8_bytes);
aoqi@0 896 __ movq(mmx0, Address(from, 0));
aoqi@0 897 __ movq(Address(from, to_from, Address::times_1), mmx0);
aoqi@0 898 __ addptr(from, 8);
aoqi@0 899 __ decrement(qword_count);
aoqi@0 900 __ jcc(Assembler::greater, L_copy_8_bytes);
aoqi@0 901 __ BIND(L_exit);
aoqi@0 902 __ emms();
aoqi@0 903 }
aoqi@0 904
aoqi@0 905 address generate_disjoint_copy(BasicType t, bool aligned,
aoqi@0 906 Address::ScaleFactor sf,
aoqi@0 907 address* entry, const char *name,
aoqi@0 908 bool dest_uninitialized = false) {
aoqi@0 909 __ align(CodeEntryAlignment);
aoqi@0 910 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 911 address start = __ pc();
aoqi@0 912
aoqi@0 913 Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
aoqi@0 914 Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes;
aoqi@0 915
aoqi@0 916 int shift = Address::times_ptr - sf;
aoqi@0 917
aoqi@0 918 const Register from = rsi; // source array address
aoqi@0 919 const Register to = rdi; // destination array address
aoqi@0 920 const Register count = rcx; // elements count
aoqi@0 921 const Register to_from = to; // (to - from)
aoqi@0 922 const Register saved_to = rdx; // saved destination array address
aoqi@0 923
aoqi@0 924 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 925 __ push(rsi);
aoqi@0 926 __ push(rdi);
aoqi@0 927 __ movptr(from , Address(rsp, 12+ 4));
aoqi@0 928 __ movptr(to , Address(rsp, 12+ 8));
aoqi@0 929 __ movl(count, Address(rsp, 12+ 12));
aoqi@0 930
aoqi@0 931 if (entry != NULL) {
aoqi@0 932 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
aoqi@0 933 BLOCK_COMMENT("Entry:");
aoqi@0 934 }
aoqi@0 935
aoqi@0 936 if (t == T_OBJECT) {
aoqi@0 937 __ testl(count, count);
aoqi@0 938 __ jcc(Assembler::zero, L_0_count);
aoqi@0 939 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
aoqi@0 940 __ mov(saved_to, to); // save 'to'
aoqi@0 941 }
aoqi@0 942
aoqi@0 943 __ subptr(to, from); // to --> to_from
aoqi@0 944 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
aoqi@0 945 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
aoqi@0 946 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
aoqi@0 947 // align source address at 4 bytes address boundary
aoqi@0 948 if (t == T_BYTE) {
aoqi@0 949 // One byte misalignment happens only for byte arrays
aoqi@0 950 __ testl(from, 1);
aoqi@0 951 __ jccb(Assembler::zero, L_skip_align1);
aoqi@0 952 __ movb(rax, Address(from, 0));
aoqi@0 953 __ movb(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 954 __ increment(from);
aoqi@0 955 __ decrement(count);
aoqi@0 956 __ BIND(L_skip_align1);
aoqi@0 957 }
aoqi@0 958 // Two bytes misalignment happens only for byte and short (char) arrays
aoqi@0 959 __ testl(from, 2);
aoqi@0 960 __ jccb(Assembler::zero, L_skip_align2);
aoqi@0 961 __ movw(rax, Address(from, 0));
aoqi@0 962 __ movw(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 963 __ addptr(from, 2);
aoqi@0 964 __ subl(count, 1<<(shift-1));
aoqi@0 965 __ BIND(L_skip_align2);
aoqi@0 966 }
aoqi@0 967 if (!VM_Version::supports_mmx()) {
aoqi@0 968 __ mov(rax, count); // save 'count'
aoqi@0 969 __ shrl(count, shift); // bytes count
aoqi@0 970 __ addptr(to_from, from);// restore 'to'
aoqi@0 971 __ rep_mov();
aoqi@0 972 __ subptr(to_from, from);// restore 'to_from'
aoqi@0 973 __ mov(count, rax); // restore 'count'
aoqi@0 974 __ jmpb(L_copy_2_bytes); // all dwords were copied
aoqi@0 975 } else {
aoqi@0 976 if (!UseUnalignedLoadStores) {
aoqi@0 977 // align to 8 bytes, we know we are 4 byte aligned to start
aoqi@0 978 __ testptr(from, 4);
aoqi@0 979 __ jccb(Assembler::zero, L_copy_64_bytes);
aoqi@0 980 __ movl(rax, Address(from, 0));
aoqi@0 981 __ movl(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 982 __ addptr(from, 4);
aoqi@0 983 __ subl(count, 1<<shift);
aoqi@0 984 }
aoqi@0 985 __ BIND(L_copy_64_bytes);
aoqi@0 986 __ mov(rax, count);
aoqi@0 987 __ shrl(rax, shift+1); // 8 bytes chunk count
aoqi@0 988 //
aoqi@0 989 // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
aoqi@0 990 //
aoqi@0 991 if (UseXMMForArrayCopy) {
aoqi@0 992 xmm_copy_forward(from, to_from, rax);
aoqi@0 993 } else {
aoqi@0 994 mmx_copy_forward(from, to_from, rax);
aoqi@0 995 }
aoqi@0 996 }
aoqi@0 997 // copy tailing dword
aoqi@0 998 __ BIND(L_copy_4_bytes);
aoqi@0 999 __ testl(count, 1<<shift);
aoqi@0 1000 __ jccb(Assembler::zero, L_copy_2_bytes);
aoqi@0 1001 __ movl(rax, Address(from, 0));
aoqi@0 1002 __ movl(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 1003 if (t == T_BYTE || t == T_SHORT) {
aoqi@0 1004 __ addptr(from, 4);
aoqi@0 1005 __ BIND(L_copy_2_bytes);
aoqi@0 1006 // copy tailing word
aoqi@0 1007 __ testl(count, 1<<(shift-1));
aoqi@0 1008 __ jccb(Assembler::zero, L_copy_byte);
aoqi@0 1009 __ movw(rax, Address(from, 0));
aoqi@0 1010 __ movw(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 1011 if (t == T_BYTE) {
aoqi@0 1012 __ addptr(from, 2);
aoqi@0 1013 __ BIND(L_copy_byte);
aoqi@0 1014 // copy tailing byte
aoqi@0 1015 __ testl(count, 1);
aoqi@0 1016 __ jccb(Assembler::zero, L_exit);
aoqi@0 1017 __ movb(rax, Address(from, 0));
aoqi@0 1018 __ movb(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 1019 __ BIND(L_exit);
aoqi@0 1020 } else {
aoqi@0 1021 __ BIND(L_copy_byte);
aoqi@0 1022 }
aoqi@0 1023 } else {
aoqi@0 1024 __ BIND(L_copy_2_bytes);
aoqi@0 1025 }
aoqi@0 1026
aoqi@0 1027 if (t == T_OBJECT) {
aoqi@0 1028 __ movl(count, Address(rsp, 12+12)); // reread 'count'
aoqi@0 1029 __ mov(to, saved_to); // restore 'to'
aoqi@0 1030 gen_write_ref_array_post_barrier(to, count);
aoqi@0 1031 __ BIND(L_0_count);
aoqi@0 1032 }
aoqi@0 1033 inc_copy_counter_np(t);
aoqi@0 1034 __ pop(rdi);
aoqi@0 1035 __ pop(rsi);
aoqi@0 1036 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1037 __ xorptr(rax, rax); // return 0
aoqi@0 1038 __ ret(0);
aoqi@0 1039 return start;
aoqi@0 1040 }
aoqi@0 1041
aoqi@0 1042
aoqi@0 1043 address generate_fill(BasicType t, bool aligned, const char *name) {
aoqi@0 1044 __ align(CodeEntryAlignment);
aoqi@0 1045 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1046 address start = __ pc();
aoqi@0 1047
aoqi@0 1048 BLOCK_COMMENT("Entry:");
aoqi@0 1049
aoqi@0 1050 const Register to = rdi; // source array address
aoqi@0 1051 const Register value = rdx; // value
aoqi@0 1052 const Register count = rsi; // elements count
aoqi@0 1053
aoqi@0 1054 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1055 __ push(rsi);
aoqi@0 1056 __ push(rdi);
aoqi@0 1057 __ movptr(to , Address(rsp, 12+ 4));
aoqi@0 1058 __ movl(value, Address(rsp, 12+ 8));
aoqi@0 1059 __ movl(count, Address(rsp, 12+ 12));
aoqi@0 1060
aoqi@0 1061 __ generate_fill(t, aligned, to, value, count, rax, xmm0);
aoqi@0 1062
aoqi@0 1063 __ pop(rdi);
aoqi@0 1064 __ pop(rsi);
aoqi@0 1065 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1066 __ ret(0);
aoqi@0 1067 return start;
aoqi@0 1068 }
aoqi@0 1069
aoqi@0 1070 address generate_conjoint_copy(BasicType t, bool aligned,
aoqi@0 1071 Address::ScaleFactor sf,
aoqi@0 1072 address nooverlap_target,
aoqi@0 1073 address* entry, const char *name,
aoqi@0 1074 bool dest_uninitialized = false) {
aoqi@0 1075 __ align(CodeEntryAlignment);
aoqi@0 1076 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1077 address start = __ pc();
aoqi@0 1078
aoqi@0 1079 Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
aoqi@0 1080 Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop;
aoqi@0 1081
aoqi@0 1082 int shift = Address::times_ptr - sf;
aoqi@0 1083
aoqi@0 1084 const Register src = rax; // source array address
aoqi@0 1085 const Register dst = rdx; // destination array address
aoqi@0 1086 const Register from = rsi; // source array address
aoqi@0 1087 const Register to = rdi; // destination array address
aoqi@0 1088 const Register count = rcx; // elements count
aoqi@0 1089 const Register end = rax; // array end address
aoqi@0 1090
aoqi@0 1091 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1092 __ push(rsi);
aoqi@0 1093 __ push(rdi);
aoqi@0 1094 __ movptr(src , Address(rsp, 12+ 4)); // from
aoqi@0 1095 __ movptr(dst , Address(rsp, 12+ 8)); // to
aoqi@0 1096 __ movl2ptr(count, Address(rsp, 12+12)); // count
aoqi@0 1097
aoqi@0 1098 if (entry != NULL) {
aoqi@0 1099 *entry = __ pc(); // Entry point from generic arraycopy stub.
aoqi@0 1100 BLOCK_COMMENT("Entry:");
aoqi@0 1101 }
aoqi@0 1102
aoqi@0 1103 // nooverlap_target expects arguments in rsi and rdi.
aoqi@0 1104 __ mov(from, src);
aoqi@0 1105 __ mov(to , dst);
aoqi@0 1106
aoqi@0 1107 // arrays overlap test: dispatch to disjoint stub if necessary.
aoqi@0 1108 RuntimeAddress nooverlap(nooverlap_target);
aoqi@0 1109 __ cmpptr(dst, src);
aoqi@0 1110 __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
aoqi@0 1111 __ jump_cc(Assembler::belowEqual, nooverlap);
aoqi@0 1112 __ cmpptr(dst, end);
aoqi@0 1113 __ jump_cc(Assembler::aboveEqual, nooverlap);
aoqi@0 1114
aoqi@0 1115 if (t == T_OBJECT) {
aoqi@0 1116 __ testl(count, count);
aoqi@0 1117 __ jcc(Assembler::zero, L_0_count);
aoqi@0 1118 gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized);
aoqi@0 1119 }
aoqi@0 1120
aoqi@0 1121 // copy from high to low
aoqi@0 1122 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
aoqi@0 1123 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
aoqi@0 1124 if (t == T_BYTE || t == T_SHORT) {
aoqi@0 1125 // Align the end of destination array at 4 bytes address boundary
aoqi@0 1126 __ lea(end, Address(dst, count, sf, 0));
aoqi@0 1127 if (t == T_BYTE) {
aoqi@0 1128 // One byte misalignment happens only for byte arrays
aoqi@0 1129 __ testl(end, 1);
aoqi@0 1130 __ jccb(Assembler::zero, L_skip_align1);
aoqi@0 1131 __ decrement(count);
aoqi@0 1132 __ movb(rdx, Address(from, count, sf, 0));
aoqi@0 1133 __ movb(Address(to, count, sf, 0), rdx);
aoqi@0 1134 __ BIND(L_skip_align1);
aoqi@0 1135 }
aoqi@0 1136 // Two bytes misalignment happens only for byte and short (char) arrays
aoqi@0 1137 __ testl(end, 2);
aoqi@0 1138 __ jccb(Assembler::zero, L_skip_align2);
aoqi@0 1139 __ subptr(count, 1<<(shift-1));
aoqi@0 1140 __ movw(rdx, Address(from, count, sf, 0));
aoqi@0 1141 __ movw(Address(to, count, sf, 0), rdx);
aoqi@0 1142 __ BIND(L_skip_align2);
aoqi@0 1143 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
aoqi@0 1144 __ jcc(Assembler::below, L_copy_4_bytes);
aoqi@0 1145 }
aoqi@0 1146
aoqi@0 1147 if (!VM_Version::supports_mmx()) {
aoqi@0 1148 __ std();
aoqi@0 1149 __ mov(rax, count); // Save 'count'
aoqi@0 1150 __ mov(rdx, to); // Save 'to'
aoqi@0 1151 __ lea(rsi, Address(from, count, sf, -4));
aoqi@0 1152 __ lea(rdi, Address(to , count, sf, -4));
aoqi@0 1153 __ shrptr(count, shift); // bytes count
aoqi@0 1154 __ rep_mov();
aoqi@0 1155 __ cld();
aoqi@0 1156 __ mov(count, rax); // restore 'count'
aoqi@0 1157 __ andl(count, (1<<shift)-1); // mask the number of rest elements
aoqi@0 1158 __ movptr(from, Address(rsp, 12+4)); // reread 'from'
aoqi@0 1159 __ mov(to, rdx); // restore 'to'
aoqi@0 1160 __ jmpb(L_copy_2_bytes); // all dword were copied
aoqi@0 1161 } else {
aoqi@0 1162 // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
aoqi@0 1163 __ testptr(end, 4);
aoqi@0 1164 __ jccb(Assembler::zero, L_copy_8_bytes);
aoqi@0 1165 __ subl(count, 1<<shift);
aoqi@0 1166 __ movl(rdx, Address(from, count, sf, 0));
aoqi@0 1167 __ movl(Address(to, count, sf, 0), rdx);
aoqi@0 1168 __ jmpb(L_copy_8_bytes);
aoqi@0 1169
aoqi@0 1170 __ align(OptoLoopAlignment);
aoqi@0 1171 // Move 8 bytes
aoqi@0 1172 __ BIND(L_copy_8_bytes_loop);
aoqi@0 1173 if (UseXMMForArrayCopy) {
aoqi@0 1174 __ movq(xmm0, Address(from, count, sf, 0));
aoqi@0 1175 __ movq(Address(to, count, sf, 0), xmm0);
aoqi@0 1176 } else {
aoqi@0 1177 __ movq(mmx0, Address(from, count, sf, 0));
aoqi@0 1178 __ movq(Address(to, count, sf, 0), mmx0);
aoqi@0 1179 }
aoqi@0 1180 __ BIND(L_copy_8_bytes);
aoqi@0 1181 __ subl(count, 2<<shift);
aoqi@0 1182 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
aoqi@0 1183 __ addl(count, 2<<shift);
aoqi@0 1184 if (!UseXMMForArrayCopy) {
aoqi@0 1185 __ emms();
aoqi@0 1186 }
aoqi@0 1187 }
aoqi@0 1188 __ BIND(L_copy_4_bytes);
aoqi@0 1189 // copy prefix qword
aoqi@0 1190 __ testl(count, 1<<shift);
aoqi@0 1191 __ jccb(Assembler::zero, L_copy_2_bytes);
aoqi@0 1192 __ movl(rdx, Address(from, count, sf, -4));
aoqi@0 1193 __ movl(Address(to, count, sf, -4), rdx);
aoqi@0 1194
aoqi@0 1195 if (t == T_BYTE || t == T_SHORT) {
aoqi@0 1196 __ subl(count, (1<<shift));
aoqi@0 1197 __ BIND(L_copy_2_bytes);
aoqi@0 1198 // copy prefix dword
aoqi@0 1199 __ testl(count, 1<<(shift-1));
aoqi@0 1200 __ jccb(Assembler::zero, L_copy_byte);
aoqi@0 1201 __ movw(rdx, Address(from, count, sf, -2));
aoqi@0 1202 __ movw(Address(to, count, sf, -2), rdx);
aoqi@0 1203 if (t == T_BYTE) {
aoqi@0 1204 __ subl(count, 1<<(shift-1));
aoqi@0 1205 __ BIND(L_copy_byte);
aoqi@0 1206 // copy prefix byte
aoqi@0 1207 __ testl(count, 1);
aoqi@0 1208 __ jccb(Assembler::zero, L_exit);
aoqi@0 1209 __ movb(rdx, Address(from, 0));
aoqi@0 1210 __ movb(Address(to, 0), rdx);
aoqi@0 1211 __ BIND(L_exit);
aoqi@0 1212 } else {
aoqi@0 1213 __ BIND(L_copy_byte);
aoqi@0 1214 }
aoqi@0 1215 } else {
aoqi@0 1216 __ BIND(L_copy_2_bytes);
aoqi@0 1217 }
aoqi@0 1218 if (t == T_OBJECT) {
aoqi@0 1219 __ movl2ptr(count, Address(rsp, 12+12)); // reread count
aoqi@0 1220 gen_write_ref_array_post_barrier(to, count);
aoqi@0 1221 __ BIND(L_0_count);
aoqi@0 1222 }
aoqi@0 1223 inc_copy_counter_np(t);
aoqi@0 1224 __ pop(rdi);
aoqi@0 1225 __ pop(rsi);
aoqi@0 1226 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1227 __ xorptr(rax, rax); // return 0
aoqi@0 1228 __ ret(0);
aoqi@0 1229 return start;
aoqi@0 1230 }
aoqi@0 1231
aoqi@0 1232
aoqi@0 1233 address generate_disjoint_long_copy(address* entry, const char *name) {
aoqi@0 1234 __ align(CodeEntryAlignment);
aoqi@0 1235 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1236 address start = __ pc();
aoqi@0 1237
aoqi@0 1238 Label L_copy_8_bytes, L_copy_8_bytes_loop;
aoqi@0 1239 const Register from = rax; // source array address
aoqi@0 1240 const Register to = rdx; // destination array address
aoqi@0 1241 const Register count = rcx; // elements count
aoqi@0 1242 const Register to_from = rdx; // (to - from)
aoqi@0 1243
aoqi@0 1244 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1245 __ movptr(from , Address(rsp, 8+0)); // from
aoqi@0 1246 __ movptr(to , Address(rsp, 8+4)); // to
aoqi@0 1247 __ movl2ptr(count, Address(rsp, 8+8)); // count
aoqi@0 1248
aoqi@0 1249 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
aoqi@0 1250 BLOCK_COMMENT("Entry:");
aoqi@0 1251
aoqi@0 1252 __ subptr(to, from); // to --> to_from
aoqi@0 1253 if (VM_Version::supports_mmx()) {
aoqi@0 1254 if (UseXMMForArrayCopy) {
aoqi@0 1255 xmm_copy_forward(from, to_from, count);
aoqi@0 1256 } else {
aoqi@0 1257 mmx_copy_forward(from, to_from, count);
aoqi@0 1258 }
aoqi@0 1259 } else {
aoqi@0 1260 __ jmpb(L_copy_8_bytes);
aoqi@0 1261 __ align(OptoLoopAlignment);
aoqi@0 1262 __ BIND(L_copy_8_bytes_loop);
aoqi@0 1263 __ fild_d(Address(from, 0));
aoqi@0 1264 __ fistp_d(Address(from, to_from, Address::times_1));
aoqi@0 1265 __ addptr(from, 8);
aoqi@0 1266 __ BIND(L_copy_8_bytes);
aoqi@0 1267 __ decrement(count);
aoqi@0 1268 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
aoqi@0 1269 }
aoqi@0 1270 inc_copy_counter_np(T_LONG);
aoqi@0 1271 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1272 __ xorptr(rax, rax); // return 0
aoqi@0 1273 __ ret(0);
aoqi@0 1274 return start;
aoqi@0 1275 }
aoqi@0 1276
aoqi@0 1277 address generate_conjoint_long_copy(address nooverlap_target,
aoqi@0 1278 address* entry, const char *name) {
aoqi@0 1279 __ align(CodeEntryAlignment);
aoqi@0 1280 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1281 address start = __ pc();
aoqi@0 1282
aoqi@0 1283 Label L_copy_8_bytes, L_copy_8_bytes_loop;
aoqi@0 1284 const Register from = rax; // source array address
aoqi@0 1285 const Register to = rdx; // destination array address
aoqi@0 1286 const Register count = rcx; // elements count
aoqi@0 1287 const Register end_from = rax; // source array end address
aoqi@0 1288
aoqi@0 1289 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1290 __ movptr(from , Address(rsp, 8+0)); // from
aoqi@0 1291 __ movptr(to , Address(rsp, 8+4)); // to
aoqi@0 1292 __ movl2ptr(count, Address(rsp, 8+8)); // count
aoqi@0 1293
aoqi@0 1294 *entry = __ pc(); // Entry point from generic arraycopy stub.
aoqi@0 1295 BLOCK_COMMENT("Entry:");
aoqi@0 1296
aoqi@0 1297 // arrays overlap test
aoqi@0 1298 __ cmpptr(to, from);
aoqi@0 1299 RuntimeAddress nooverlap(nooverlap_target);
aoqi@0 1300 __ jump_cc(Assembler::belowEqual, nooverlap);
aoqi@0 1301 __ lea(end_from, Address(from, count, Address::times_8, 0));
aoqi@0 1302 __ cmpptr(to, end_from);
aoqi@0 1303 __ movptr(from, Address(rsp, 8)); // from
aoqi@0 1304 __ jump_cc(Assembler::aboveEqual, nooverlap);
aoqi@0 1305
aoqi@0 1306 __ jmpb(L_copy_8_bytes);
aoqi@0 1307
aoqi@0 1308 __ align(OptoLoopAlignment);
aoqi@0 1309 __ BIND(L_copy_8_bytes_loop);
aoqi@0 1310 if (VM_Version::supports_mmx()) {
aoqi@0 1311 if (UseXMMForArrayCopy) {
aoqi@0 1312 __ movq(xmm0, Address(from, count, Address::times_8));
aoqi@0 1313 __ movq(Address(to, count, Address::times_8), xmm0);
aoqi@0 1314 } else {
aoqi@0 1315 __ movq(mmx0, Address(from, count, Address::times_8));
aoqi@0 1316 __ movq(Address(to, count, Address::times_8), mmx0);
aoqi@0 1317 }
aoqi@0 1318 } else {
aoqi@0 1319 __ fild_d(Address(from, count, Address::times_8));
aoqi@0 1320 __ fistp_d(Address(to, count, Address::times_8));
aoqi@0 1321 }
aoqi@0 1322 __ BIND(L_copy_8_bytes);
aoqi@0 1323 __ decrement(count);
aoqi@0 1324 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
aoqi@0 1325
aoqi@0 1326 if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
aoqi@0 1327 __ emms();
aoqi@0 1328 }
aoqi@0 1329 inc_copy_counter_np(T_LONG);
aoqi@0 1330 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1331 __ xorptr(rax, rax); // return 0
aoqi@0 1332 __ ret(0);
aoqi@0 1333 return start;
aoqi@0 1334 }
aoqi@0 1335
aoqi@0 1336
aoqi@0 1337 // Helper for generating a dynamic type check.
aoqi@0 1338 // The sub_klass must be one of {rbx, rdx, rsi}.
aoqi@0 1339 // The temp is killed.
aoqi@0 1340 void generate_type_check(Register sub_klass,
aoqi@0 1341 Address& super_check_offset_addr,
aoqi@0 1342 Address& super_klass_addr,
aoqi@0 1343 Register temp,
aoqi@0 1344 Label* L_success, Label* L_failure) {
aoqi@0 1345 BLOCK_COMMENT("type_check:");
aoqi@0 1346
aoqi@0 1347 Label L_fallthrough;
aoqi@0 1348 #define LOCAL_JCC(assembler_con, label_ptr) \
aoqi@0 1349 if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \
aoqi@0 1350 else __ jcc(assembler_con, L_fallthrough) /*omit semi*/
aoqi@0 1351
aoqi@0 1352 // The following is a strange variation of the fast path which requires
aoqi@0 1353 // one less register, because needed values are on the argument stack.
aoqi@0 1354 // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
aoqi@0 1355 // L_success, L_failure, NULL);
aoqi@0 1356 assert_different_registers(sub_klass, temp);
aoqi@0 1357
aoqi@0 1358 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
aoqi@0 1359
aoqi@0 1360 // if the pointers are equal, we are done (e.g., String[] elements)
aoqi@0 1361 __ cmpptr(sub_klass, super_klass_addr);
aoqi@0 1362 LOCAL_JCC(Assembler::equal, L_success);
aoqi@0 1363
aoqi@0 1364 // check the supertype display:
aoqi@0 1365 __ movl2ptr(temp, super_check_offset_addr);
aoqi@0 1366 Address super_check_addr(sub_klass, temp, Address::times_1, 0);
aoqi@0 1367 __ movptr(temp, super_check_addr); // load displayed supertype
aoqi@0 1368 __ cmpptr(temp, super_klass_addr); // test the super type
aoqi@0 1369 LOCAL_JCC(Assembler::equal, L_success);
aoqi@0 1370
aoqi@0 1371 // if it was a primary super, we can just fail immediately
aoqi@0 1372 __ cmpl(super_check_offset_addr, sc_offset);
aoqi@0 1373 LOCAL_JCC(Assembler::notEqual, L_failure);
aoqi@0 1374
aoqi@0 1375 // The repne_scan instruction uses fixed registers, which will get spilled.
aoqi@0 1376 // We happen to know this works best when super_klass is in rax.
aoqi@0 1377 Register super_klass = temp;
aoqi@0 1378 __ movptr(super_klass, super_klass_addr);
aoqi@0 1379 __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
aoqi@0 1380 L_success, L_failure);
aoqi@0 1381
aoqi@0 1382 __ bind(L_fallthrough);
aoqi@0 1383
aoqi@0 1384 if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
aoqi@0 1385 if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
aoqi@0 1386
aoqi@0 1387 #undef LOCAL_JCC
aoqi@0 1388 }
aoqi@0 1389
aoqi@0 1390 //
aoqi@0 1391 // Generate checkcasting array copy stub
aoqi@0 1392 //
aoqi@0 1393 // Input:
aoqi@0 1394 // 4(rsp) - source array address
aoqi@0 1395 // 8(rsp) - destination array address
aoqi@0 1396 // 12(rsp) - element count, can be zero
aoqi@0 1397 // 16(rsp) - size_t ckoff (super_check_offset)
aoqi@0 1398 // 20(rsp) - oop ckval (super_klass)
aoqi@0 1399 //
aoqi@0 1400 // Output:
aoqi@0 1401 // rax, == 0 - success
aoqi@0 1402 // rax, == -1^K - failure, where K is partial transfer count
aoqi@0 1403 //
aoqi@0 1404 address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
aoqi@0 1405 __ align(CodeEntryAlignment);
aoqi@0 1406 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1407 address start = __ pc();
aoqi@0 1408
aoqi@0 1409 Label L_load_element, L_store_element, L_do_card_marks, L_done;
aoqi@0 1410
aoqi@0 1411 // register use:
aoqi@0 1412 // rax, rdx, rcx -- loop control (end_from, end_to, count)
aoqi@0 1413 // rdi, rsi -- element access (oop, klass)
aoqi@0 1414 // rbx, -- temp
aoqi@0 1415 const Register from = rax; // source array address
aoqi@0 1416 const Register to = rdx; // destination array address
aoqi@0 1417 const Register length = rcx; // elements count
aoqi@0 1418 const Register elem = rdi; // each oop copied
aoqi@0 1419 const Register elem_klass = rsi; // each elem._klass (sub_klass)
aoqi@0 1420 const Register temp = rbx; // lone remaining temp
aoqi@0 1421
aoqi@0 1422 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1423
aoqi@0 1424 __ push(rsi);
aoqi@0 1425 __ push(rdi);
aoqi@0 1426 __ push(rbx);
aoqi@0 1427
aoqi@0 1428 Address from_arg(rsp, 16+ 4); // from
aoqi@0 1429 Address to_arg(rsp, 16+ 8); // to
aoqi@0 1430 Address length_arg(rsp, 16+12); // elements count
aoqi@0 1431 Address ckoff_arg(rsp, 16+16); // super_check_offset
aoqi@0 1432 Address ckval_arg(rsp, 16+20); // super_klass
aoqi@0 1433
aoqi@0 1434 // Load up:
aoqi@0 1435 __ movptr(from, from_arg);
aoqi@0 1436 __ movptr(to, to_arg);
aoqi@0 1437 __ movl2ptr(length, length_arg);
aoqi@0 1438
aoqi@0 1439 if (entry != NULL) {
aoqi@0 1440 *entry = __ pc(); // Entry point from generic arraycopy stub.
aoqi@0 1441 BLOCK_COMMENT("Entry:");
aoqi@0 1442 }
aoqi@0 1443
aoqi@0 1444 //---------------------------------------------------------------
aoqi@0 1445 // Assembler stub will be used for this call to arraycopy
aoqi@0 1446 // if the two arrays are subtypes of Object[] but the
aoqi@0 1447 // destination array type is not equal to or a supertype
aoqi@0 1448 // of the source type. Each element must be separately
aoqi@0 1449 // checked.
aoqi@0 1450
aoqi@0 1451 // Loop-invariant addresses. They are exclusive end pointers.
aoqi@0 1452 Address end_from_addr(from, length, Address::times_ptr, 0);
aoqi@0 1453 Address end_to_addr(to, length, Address::times_ptr, 0);
aoqi@0 1454
aoqi@0 1455 Register end_from = from; // re-use
aoqi@0 1456 Register end_to = to; // re-use
aoqi@0 1457 Register count = length; // re-use
aoqi@0 1458
aoqi@0 1459 // Loop-variant addresses. They assume post-incremented count < 0.
aoqi@0 1460 Address from_element_addr(end_from, count, Address::times_ptr, 0);
aoqi@0 1461 Address to_element_addr(end_to, count, Address::times_ptr, 0);
aoqi@0 1462 Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
aoqi@0 1463
aoqi@0 1464 // Copy from low to high addresses, indexed from the end of each array.
aoqi@0 1465 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
aoqi@0 1466 __ lea(end_from, end_from_addr);
aoqi@0 1467 __ lea(end_to, end_to_addr);
aoqi@0 1468 assert(length == count, ""); // else fix next line:
aoqi@0 1469 __ negptr(count); // negate and test the length
aoqi@0 1470 __ jccb(Assembler::notZero, L_load_element);
aoqi@0 1471
aoqi@0 1472 // Empty array: Nothing to do.
aoqi@0 1473 __ xorptr(rax, rax); // return 0 on (trivial) success
aoqi@0 1474 __ jmp(L_done);
aoqi@0 1475
aoqi@0 1476 // ======== begin loop ========
aoqi@0 1477 // (Loop is rotated; its entry is L_load_element.)
aoqi@0 1478 // Loop control:
aoqi@0 1479 // for (count = -count; count != 0; count++)
aoqi@0 1480 // Base pointers src, dst are biased by 8*count,to last element.
aoqi@0 1481 __ align(OptoLoopAlignment);
aoqi@0 1482
aoqi@0 1483 __ BIND(L_store_element);
aoqi@0 1484 __ movptr(to_element_addr, elem); // store the oop
aoqi@0 1485 __ increment(count); // increment the count toward zero
aoqi@0 1486 __ jccb(Assembler::zero, L_do_card_marks);
aoqi@0 1487
aoqi@0 1488 // ======== loop entry is here ========
aoqi@0 1489 __ BIND(L_load_element);
aoqi@0 1490 __ movptr(elem, from_element_addr); // load the oop
aoqi@0 1491 __ testptr(elem, elem);
aoqi@0 1492 __ jccb(Assembler::zero, L_store_element);
aoqi@0 1493
aoqi@0 1494 // (Could do a trick here: Remember last successful non-null
aoqi@0 1495 // element stored and make a quick oop equality check on it.)
aoqi@0 1496
aoqi@0 1497 __ movptr(elem_klass, elem_klass_addr); // query the object klass
aoqi@0 1498 generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
aoqi@0 1499 &L_store_element, NULL);
aoqi@0 1500 // (On fall-through, we have failed the element type check.)
aoqi@0 1501 // ======== end loop ========
aoqi@0 1502
aoqi@0 1503 // It was a real error; we must depend on the caller to finish the job.
aoqi@0 1504 // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
aoqi@0 1505 // Emit GC store barriers for the oops we have copied (length_arg + count),
aoqi@0 1506 // and report their number to the caller.
aoqi@0 1507 assert_different_registers(to, count, rax);
aoqi@0 1508 Label L_post_barrier;
aoqi@0 1509 __ addl(count, length_arg); // transfers = (length - remaining)
aoqi@0 1510 __ movl2ptr(rax, count); // save the value
aoqi@0 1511 __ notptr(rax); // report (-1^K) to caller (does not affect flags)
aoqi@0 1512 __ jccb(Assembler::notZero, L_post_barrier);
aoqi@0 1513 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
aoqi@0 1514
aoqi@0 1515 // Come here on success only.
aoqi@0 1516 __ BIND(L_do_card_marks);
aoqi@0 1517 __ xorptr(rax, rax); // return 0 on success
aoqi@0 1518 __ movl2ptr(count, length_arg);
aoqi@0 1519
aoqi@0 1520 __ BIND(L_post_barrier);
aoqi@0 1521 __ movptr(to, to_arg); // reload
aoqi@0 1522 gen_write_ref_array_post_barrier(to, count);
aoqi@0 1523
aoqi@0 1524 // Common exit point (success or failure).
aoqi@0 1525 __ BIND(L_done);
aoqi@0 1526 __ pop(rbx);
aoqi@0 1527 __ pop(rdi);
aoqi@0 1528 __ pop(rsi);
aoqi@0 1529 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
aoqi@0 1530 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1531 __ ret(0);
aoqi@0 1532
aoqi@0 1533 return start;
aoqi@0 1534 }
aoqi@0 1535
aoqi@0 1536 //
aoqi@0 1537 // Generate 'unsafe' array copy stub
aoqi@0 1538 // Though just as safe as the other stubs, it takes an unscaled
aoqi@0 1539 // size_t argument instead of an element count.
aoqi@0 1540 //
aoqi@0 1541 // Input:
aoqi@0 1542 // 4(rsp) - source array address
aoqi@0 1543 // 8(rsp) - destination array address
aoqi@0 1544 // 12(rsp) - byte count, can be zero
aoqi@0 1545 //
aoqi@0 1546 // Output:
aoqi@0 1547 // rax, == 0 - success
aoqi@0 1548 // rax, == -1 - need to call System.arraycopy
aoqi@0 1549 //
aoqi@0 1550 // Examines the alignment of the operands and dispatches
aoqi@0 1551 // to a long, int, short, or byte copy loop.
aoqi@0 1552 //
aoqi@0 1553 address generate_unsafe_copy(const char *name,
aoqi@0 1554 address byte_copy_entry,
aoqi@0 1555 address short_copy_entry,
aoqi@0 1556 address int_copy_entry,
aoqi@0 1557 address long_copy_entry) {
aoqi@0 1558
aoqi@0 1559 Label L_long_aligned, L_int_aligned, L_short_aligned;
aoqi@0 1560
aoqi@0 1561 __ align(CodeEntryAlignment);
aoqi@0 1562 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1563 address start = __ pc();
aoqi@0 1564
aoqi@0 1565 const Register from = rax; // source array address
aoqi@0 1566 const Register to = rdx; // destination array address
aoqi@0 1567 const Register count = rcx; // elements count
aoqi@0 1568
aoqi@0 1569 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1570 __ push(rsi);
aoqi@0 1571 __ push(rdi);
aoqi@0 1572 Address from_arg(rsp, 12+ 4); // from
aoqi@0 1573 Address to_arg(rsp, 12+ 8); // to
aoqi@0 1574 Address count_arg(rsp, 12+12); // byte count
aoqi@0 1575
aoqi@0 1576 // Load up:
aoqi@0 1577 __ movptr(from , from_arg);
aoqi@0 1578 __ movptr(to , to_arg);
aoqi@0 1579 __ movl2ptr(count, count_arg);
aoqi@0 1580
aoqi@0 1581 // bump this on entry, not on exit:
aoqi@0 1582 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
aoqi@0 1583
aoqi@0 1584 const Register bits = rsi;
aoqi@0 1585 __ mov(bits, from);
aoqi@0 1586 __ orptr(bits, to);
aoqi@0 1587 __ orptr(bits, count);
aoqi@0 1588
aoqi@0 1589 __ testl(bits, BytesPerLong-1);
aoqi@0 1590 __ jccb(Assembler::zero, L_long_aligned);
aoqi@0 1591
aoqi@0 1592 __ testl(bits, BytesPerInt-1);
aoqi@0 1593 __ jccb(Assembler::zero, L_int_aligned);
aoqi@0 1594
aoqi@0 1595 __ testl(bits, BytesPerShort-1);
aoqi@0 1596 __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
aoqi@0 1597
aoqi@0 1598 __ BIND(L_short_aligned);
aoqi@0 1599 __ shrptr(count, LogBytesPerShort); // size => short_count
aoqi@0 1600 __ movl(count_arg, count); // update 'count'
aoqi@0 1601 __ jump(RuntimeAddress(short_copy_entry));
aoqi@0 1602
aoqi@0 1603 __ BIND(L_int_aligned);
aoqi@0 1604 __ shrptr(count, LogBytesPerInt); // size => int_count
aoqi@0 1605 __ movl(count_arg, count); // update 'count'
aoqi@0 1606 __ jump(RuntimeAddress(int_copy_entry));
aoqi@0 1607
aoqi@0 1608 __ BIND(L_long_aligned);
aoqi@0 1609 __ shrptr(count, LogBytesPerLong); // size => qword_count
aoqi@0 1610 __ movl(count_arg, count); // update 'count'
aoqi@0 1611 __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
aoqi@0 1612 __ pop(rsi);
aoqi@0 1613 __ jump(RuntimeAddress(long_copy_entry));
aoqi@0 1614
aoqi@0 1615 return start;
aoqi@0 1616 }
aoqi@0 1617
aoqi@0 1618
aoqi@0 1619 // Perform range checks on the proposed arraycopy.
aoqi@0 1620 // Smashes src_pos and dst_pos. (Uses them up for temps.)
aoqi@0 1621 void arraycopy_range_checks(Register src,
aoqi@0 1622 Register src_pos,
aoqi@0 1623 Register dst,
aoqi@0 1624 Register dst_pos,
aoqi@0 1625 Address& length,
aoqi@0 1626 Label& L_failed) {
aoqi@0 1627 BLOCK_COMMENT("arraycopy_range_checks:");
aoqi@0 1628 const Register src_end = src_pos; // source array end position
aoqi@0 1629 const Register dst_end = dst_pos; // destination array end position
aoqi@0 1630 __ addl(src_end, length); // src_pos + length
aoqi@0 1631 __ addl(dst_end, length); // dst_pos + length
aoqi@0 1632
aoqi@0 1633 // if (src_pos + length > arrayOop(src)->length() ) FAIL;
aoqi@0 1634 __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes()));
aoqi@0 1635 __ jcc(Assembler::above, L_failed);
aoqi@0 1636
aoqi@0 1637 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
aoqi@0 1638 __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes()));
aoqi@0 1639 __ jcc(Assembler::above, L_failed);
aoqi@0 1640
aoqi@0 1641 BLOCK_COMMENT("arraycopy_range_checks done");
aoqi@0 1642 }
aoqi@0 1643
aoqi@0 1644
aoqi@0 1645 //
aoqi@0 1646 // Generate generic array copy stubs
aoqi@0 1647 //
aoqi@0 1648 // Input:
aoqi@0 1649 // 4(rsp) - src oop
aoqi@0 1650 // 8(rsp) - src_pos
aoqi@0 1651 // 12(rsp) - dst oop
aoqi@0 1652 // 16(rsp) - dst_pos
aoqi@0 1653 // 20(rsp) - element count
aoqi@0 1654 //
aoqi@0 1655 // Output:
aoqi@0 1656 // rax, == 0 - success
aoqi@0 1657 // rax, == -1^K - failure, where K is partial transfer count
aoqi@0 1658 //
aoqi@0 1659 address generate_generic_copy(const char *name,
aoqi@0 1660 address entry_jbyte_arraycopy,
aoqi@0 1661 address entry_jshort_arraycopy,
aoqi@0 1662 address entry_jint_arraycopy,
aoqi@0 1663 address entry_oop_arraycopy,
aoqi@0 1664 address entry_jlong_arraycopy,
aoqi@0 1665 address entry_checkcast_arraycopy) {
aoqi@0 1666 Label L_failed, L_failed_0, L_objArray;
aoqi@0 1667
aoqi@0 1668 { int modulus = CodeEntryAlignment;
aoqi@0 1669 int target = modulus - 5; // 5 = sizeof jmp(L_failed)
aoqi@0 1670 int advance = target - (__ offset() % modulus);
aoqi@0 1671 if (advance < 0) advance += modulus;
aoqi@0 1672 if (advance > 0) __ nop(advance);
aoqi@0 1673 }
aoqi@0 1674 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1675
aoqi@0 1676 // Short-hop target to L_failed. Makes for denser prologue code.
aoqi@0 1677 __ BIND(L_failed_0);
aoqi@0 1678 __ jmp(L_failed);
aoqi@0 1679 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
aoqi@0 1680
aoqi@0 1681 __ align(CodeEntryAlignment);
aoqi@0 1682 address start = __ pc();
aoqi@0 1683
aoqi@0 1684 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1685 __ push(rsi);
aoqi@0 1686 __ push(rdi);
aoqi@0 1687
aoqi@0 1688 // bump this on entry, not on exit:
aoqi@0 1689 inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
aoqi@0 1690
aoqi@0 1691 // Input values
aoqi@0 1692 Address SRC (rsp, 12+ 4);
aoqi@0 1693 Address SRC_POS (rsp, 12+ 8);
aoqi@0 1694 Address DST (rsp, 12+12);
aoqi@0 1695 Address DST_POS (rsp, 12+16);
aoqi@0 1696 Address LENGTH (rsp, 12+20);
aoqi@0 1697
aoqi@0 1698 //-----------------------------------------------------------------------
aoqi@0 1699 // Assembler stub will be used for this call to arraycopy
aoqi@0 1700 // if the following conditions are met:
aoqi@0 1701 //
aoqi@0 1702 // (1) src and dst must not be null.
aoqi@0 1703 // (2) src_pos must not be negative.
aoqi@0 1704 // (3) dst_pos must not be negative.
aoqi@0 1705 // (4) length must not be negative.
aoqi@0 1706 // (5) src klass and dst klass should be the same and not NULL.
aoqi@0 1707 // (6) src and dst should be arrays.
aoqi@0 1708 // (7) src_pos + length must not exceed length of src.
aoqi@0 1709 // (8) dst_pos + length must not exceed length of dst.
aoqi@0 1710 //
aoqi@0 1711
aoqi@0 1712 const Register src = rax; // source array oop
aoqi@0 1713 const Register src_pos = rsi;
aoqi@0 1714 const Register dst = rdx; // destination array oop
aoqi@0 1715 const Register dst_pos = rdi;
aoqi@0 1716 const Register length = rcx; // transfer count
aoqi@0 1717
aoqi@0 1718 // if (src == NULL) return -1;
aoqi@0 1719 __ movptr(src, SRC); // src oop
aoqi@0 1720 __ testptr(src, src);
aoqi@0 1721 __ jccb(Assembler::zero, L_failed_0);
aoqi@0 1722
aoqi@0 1723 // if (src_pos < 0) return -1;
aoqi@0 1724 __ movl2ptr(src_pos, SRC_POS); // src_pos
aoqi@0 1725 __ testl(src_pos, src_pos);
aoqi@0 1726 __ jccb(Assembler::negative, L_failed_0);
aoqi@0 1727
aoqi@0 1728 // if (dst == NULL) return -1;
aoqi@0 1729 __ movptr(dst, DST); // dst oop
aoqi@0 1730 __ testptr(dst, dst);
aoqi@0 1731 __ jccb(Assembler::zero, L_failed_0);
aoqi@0 1732
aoqi@0 1733 // if (dst_pos < 0) return -1;
aoqi@0 1734 __ movl2ptr(dst_pos, DST_POS); // dst_pos
aoqi@0 1735 __ testl(dst_pos, dst_pos);
aoqi@0 1736 __ jccb(Assembler::negative, L_failed_0);
aoqi@0 1737
aoqi@0 1738 // if (length < 0) return -1;
aoqi@0 1739 __ movl2ptr(length, LENGTH); // length
aoqi@0 1740 __ testl(length, length);
aoqi@0 1741 __ jccb(Assembler::negative, L_failed_0);
aoqi@0 1742
aoqi@0 1743 // if (src->klass() == NULL) return -1;
aoqi@0 1744 Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
aoqi@0 1745 Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
aoqi@0 1746 const Register rcx_src_klass = rcx; // array klass
aoqi@0 1747 __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes()));
aoqi@0 1748
aoqi@0 1749 #ifdef ASSERT
aoqi@0 1750 // assert(src->klass() != NULL);
aoqi@0 1751 BLOCK_COMMENT("assert klasses not null");
aoqi@0 1752 { Label L1, L2;
aoqi@0 1753 __ testptr(rcx_src_klass, rcx_src_klass);
aoqi@0 1754 __ jccb(Assembler::notZero, L2); // it is broken if klass is NULL
aoqi@0 1755 __ bind(L1);
aoqi@0 1756 __ stop("broken null klass");
aoqi@0 1757 __ bind(L2);
aoqi@0 1758 __ cmpptr(dst_klass_addr, (int32_t)NULL_WORD);
aoqi@0 1759 __ jccb(Assembler::equal, L1); // this would be broken also
aoqi@0 1760 BLOCK_COMMENT("assert done");
aoqi@0 1761 }
aoqi@0 1762 #endif //ASSERT
aoqi@0 1763
aoqi@0 1764 // Load layout helper (32-bits)
aoqi@0 1765 //
aoqi@0 1766 // |array_tag| | header_size | element_type | |log2_element_size|
aoqi@0 1767 // 32 30 24 16 8 2 0
aoqi@0 1768 //
aoqi@0 1769 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
aoqi@0 1770 //
aoqi@0 1771
aoqi@0 1772 int lh_offset = in_bytes(Klass::layout_helper_offset());
aoqi@0 1773 Address src_klass_lh_addr(rcx_src_klass, lh_offset);
aoqi@0 1774
aoqi@0 1775 // Handle objArrays completely differently...
aoqi@0 1776 jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
aoqi@0 1777 __ cmpl(src_klass_lh_addr, objArray_lh);
aoqi@0 1778 __ jcc(Assembler::equal, L_objArray);
aoqi@0 1779
aoqi@0 1780 // if (src->klass() != dst->klass()) return -1;
aoqi@0 1781 __ cmpptr(rcx_src_klass, dst_klass_addr);
aoqi@0 1782 __ jccb(Assembler::notEqual, L_failed_0);
aoqi@0 1783
aoqi@0 1784 const Register rcx_lh = rcx; // layout helper
aoqi@0 1785 assert(rcx_lh == rcx_src_klass, "known alias");
aoqi@0 1786 __ movl(rcx_lh, src_klass_lh_addr);
aoqi@0 1787
aoqi@0 1788 // if (!src->is_Array()) return -1;
aoqi@0 1789 __ cmpl(rcx_lh, Klass::_lh_neutral_value);
aoqi@0 1790 __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp
aoqi@0 1791
aoqi@0 1792 // At this point, it is known to be a typeArray (array_tag 0x3).
aoqi@0 1793 #ifdef ASSERT
aoqi@0 1794 { Label L;
aoqi@0 1795 __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
aoqi@0 1796 __ jcc(Assembler::greaterEqual, L); // signed cmp
aoqi@0 1797 __ stop("must be a primitive array");
aoqi@0 1798 __ bind(L);
aoqi@0 1799 }
aoqi@0 1800 #endif
aoqi@0 1801
aoqi@0 1802 assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
aoqi@0 1803 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
aoqi@0 1804
aoqi@0 1805 // TypeArrayKlass
aoqi@0 1806 //
aoqi@0 1807 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
aoqi@0 1808 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
aoqi@0 1809 //
aoqi@0 1810 const Register rsi_offset = rsi; // array offset
aoqi@0 1811 const Register src_array = src; // src array offset
aoqi@0 1812 const Register dst_array = dst; // dst array offset
aoqi@0 1813 const Register rdi_elsize = rdi; // log2 element size
aoqi@0 1814
aoqi@0 1815 __ mov(rsi_offset, rcx_lh);
aoqi@0 1816 __ shrptr(rsi_offset, Klass::_lh_header_size_shift);
aoqi@0 1817 __ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset
aoqi@0 1818 __ addptr(src_array, rsi_offset); // src array offset
aoqi@0 1819 __ addptr(dst_array, rsi_offset); // dst array offset
aoqi@0 1820 __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize
aoqi@0 1821
aoqi@0 1822 // next registers should be set before the jump to corresponding stub
aoqi@0 1823 const Register from = src; // source array address
aoqi@0 1824 const Register to = dst; // destination array address
aoqi@0 1825 const Register count = rcx; // elements count
aoqi@0 1826 // some of them should be duplicated on stack
aoqi@0 1827 #define FROM Address(rsp, 12+ 4)
aoqi@0 1828 #define TO Address(rsp, 12+ 8) // Not used now
aoqi@0 1829 #define COUNT Address(rsp, 12+12) // Only for oop arraycopy
aoqi@0 1830
aoqi@0 1831 BLOCK_COMMENT("scale indexes to element size");
aoqi@0 1832 __ movl2ptr(rsi, SRC_POS); // src_pos
aoqi@0 1833 __ shlptr(rsi); // src_pos << rcx (log2 elsize)
aoqi@0 1834 assert(src_array == from, "");
aoqi@0 1835 __ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize
aoqi@0 1836 __ movl2ptr(rdi, DST_POS); // dst_pos
aoqi@0 1837 __ shlptr(rdi); // dst_pos << rcx (log2 elsize)
aoqi@0 1838 assert(dst_array == to, "");
aoqi@0 1839 __ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize
aoqi@0 1840 __ movptr(FROM, from); // src_addr
aoqi@0 1841 __ mov(rdi_elsize, rcx_lh); // log2 elsize
aoqi@0 1842 __ movl2ptr(count, LENGTH); // elements count
aoqi@0 1843
aoqi@0 1844 BLOCK_COMMENT("choose copy loop based on element size");
aoqi@0 1845 __ cmpl(rdi_elsize, 0);
aoqi@0 1846
aoqi@0 1847 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy));
aoqi@0 1848 __ cmpl(rdi_elsize, LogBytesPerShort);
aoqi@0 1849 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy));
aoqi@0 1850 __ cmpl(rdi_elsize, LogBytesPerInt);
aoqi@0 1851 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy));
aoqi@0 1852 #ifdef ASSERT
aoqi@0 1853 __ cmpl(rdi_elsize, LogBytesPerLong);
aoqi@0 1854 __ jccb(Assembler::notEqual, L_failed);
aoqi@0 1855 #endif
aoqi@0 1856 __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
aoqi@0 1857 __ pop(rsi);
aoqi@0 1858 __ jump(RuntimeAddress(entry_jlong_arraycopy));
aoqi@0 1859
aoqi@0 1860 __ BIND(L_failed);
aoqi@0 1861 __ xorptr(rax, rax);
aoqi@0 1862 __ notptr(rax); // return -1
aoqi@0 1863 __ pop(rdi);
aoqi@0 1864 __ pop(rsi);
aoqi@0 1865 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1866 __ ret(0);
aoqi@0 1867
aoqi@0 1868 // ObjArrayKlass
aoqi@0 1869 __ BIND(L_objArray);
aoqi@0 1870 // live at this point: rcx_src_klass, src[_pos], dst[_pos]
aoqi@0 1871
aoqi@0 1872 Label L_plain_copy, L_checkcast_copy;
aoqi@0 1873 // test array classes for subtyping
aoqi@0 1874 __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality
aoqi@0 1875 __ jccb(Assembler::notEqual, L_checkcast_copy);
aoqi@0 1876
aoqi@0 1877 // Identically typed arrays can be copied without element-wise checks.
aoqi@0 1878 assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass);
aoqi@0 1879 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
aoqi@0 1880
aoqi@0 1881 __ BIND(L_plain_copy);
aoqi@0 1882 __ movl2ptr(count, LENGTH); // elements count
aoqi@0 1883 __ movl2ptr(src_pos, SRC_POS); // reload src_pos
aoqi@0 1884 __ lea(from, Address(src, src_pos, Address::times_ptr,
aoqi@0 1885 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
aoqi@0 1886 __ movl2ptr(dst_pos, DST_POS); // reload dst_pos
aoqi@0 1887 __ lea(to, Address(dst, dst_pos, Address::times_ptr,
aoqi@0 1888 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
aoqi@0 1889 __ movptr(FROM, from); // src_addr
aoqi@0 1890 __ movptr(TO, to); // dst_addr
aoqi@0 1891 __ movl(COUNT, count); // count
aoqi@0 1892 __ jump(RuntimeAddress(entry_oop_arraycopy));
aoqi@0 1893
aoqi@0 1894 __ BIND(L_checkcast_copy);
aoqi@0 1895 // live at this point: rcx_src_klass, dst[_pos], src[_pos]
aoqi@0 1896 {
aoqi@0 1897 // Handy offsets:
aoqi@0 1898 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
aoqi@0 1899 int sco_offset = in_bytes(Klass::super_check_offset_offset());
aoqi@0 1900
aoqi@0 1901 Register rsi_dst_klass = rsi;
aoqi@0 1902 Register rdi_temp = rdi;
aoqi@0 1903 assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
aoqi@0 1904 assert(rdi_temp == dst_pos, "expected alias w/ dst_pos");
aoqi@0 1905 Address dst_klass_lh_addr(rsi_dst_klass, lh_offset);
aoqi@0 1906
aoqi@0 1907 // Before looking at dst.length, make sure dst is also an objArray.
aoqi@0 1908 __ movptr(rsi_dst_klass, dst_klass_addr);
aoqi@0 1909 __ cmpl(dst_klass_lh_addr, objArray_lh);
aoqi@0 1910 __ jccb(Assembler::notEqual, L_failed);
aoqi@0 1911
aoqi@0 1912 // It is safe to examine both src.length and dst.length.
aoqi@0 1913 __ movl2ptr(src_pos, SRC_POS); // reload rsi
aoqi@0 1914 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
aoqi@0 1915 // (Now src_pos and dst_pos are killed, but not src and dst.)
aoqi@0 1916
aoqi@0 1917 // We'll need this temp (don't forget to pop it after the type check).
aoqi@0 1918 __ push(rbx);
aoqi@0 1919 Register rbx_src_klass = rbx;
aoqi@0 1920
aoqi@0 1921 __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx
aoqi@0 1922 __ movptr(rsi_dst_klass, dst_klass_addr);
aoqi@0 1923 Address super_check_offset_addr(rsi_dst_klass, sco_offset);
aoqi@0 1924 Label L_fail_array_check;
aoqi@0 1925 generate_type_check(rbx_src_klass,
aoqi@0 1926 super_check_offset_addr, dst_klass_addr,
aoqi@0 1927 rdi_temp, NULL, &L_fail_array_check);
aoqi@0 1928 // (On fall-through, we have passed the array type check.)
aoqi@0 1929 __ pop(rbx);
aoqi@0 1930 __ jmp(L_plain_copy);
aoqi@0 1931
aoqi@0 1932 __ BIND(L_fail_array_check);
aoqi@0 1933 // Reshuffle arguments so we can call checkcast_arraycopy:
aoqi@0 1934
aoqi@0 1935 // match initial saves for checkcast_arraycopy
aoqi@0 1936 // push(rsi); // already done; see above
aoqi@0 1937 // push(rdi); // already done; see above
aoqi@0 1938 // push(rbx); // already done; see above
aoqi@0 1939
aoqi@0 1940 // Marshal outgoing arguments now, freeing registers.
aoqi@0 1941 Address from_arg(rsp, 16+ 4); // from
aoqi@0 1942 Address to_arg(rsp, 16+ 8); // to
aoqi@0 1943 Address length_arg(rsp, 16+12); // elements count
aoqi@0 1944 Address ckoff_arg(rsp, 16+16); // super_check_offset
aoqi@0 1945 Address ckval_arg(rsp, 16+20); // super_klass
aoqi@0 1946
aoqi@0 1947 Address SRC_POS_arg(rsp, 16+ 8);
aoqi@0 1948 Address DST_POS_arg(rsp, 16+16);
aoqi@0 1949 Address LENGTH_arg(rsp, 16+20);
aoqi@0 1950 // push rbx, changed the incoming offsets (why not just use rbp,??)
aoqi@0 1951 // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, "");
aoqi@0 1952
aoqi@0 1953 __ movptr(rbx, Address(rsi_dst_klass, ek_offset));
aoqi@0 1954 __ movl2ptr(length, LENGTH_arg); // reload elements count
aoqi@0 1955 __ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos
aoqi@0 1956 __ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos
aoqi@0 1957
aoqi@0 1958 __ movptr(ckval_arg, rbx); // destination element type
aoqi@0 1959 __ movl(rbx, Address(rbx, sco_offset));
aoqi@0 1960 __ movl(ckoff_arg, rbx); // corresponding class check offset
aoqi@0 1961
aoqi@0 1962 __ movl(length_arg, length); // outgoing length argument
aoqi@0 1963
aoqi@0 1964 __ lea(from, Address(src, src_pos, Address::times_ptr,
aoqi@0 1965 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
aoqi@0 1966 __ movptr(from_arg, from);
aoqi@0 1967
aoqi@0 1968 __ lea(to, Address(dst, dst_pos, Address::times_ptr,
aoqi@0 1969 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
aoqi@0 1970 __ movptr(to_arg, to);
aoqi@0 1971 __ jump(RuntimeAddress(entry_checkcast_arraycopy));
aoqi@0 1972 }
aoqi@0 1973
aoqi@0 1974 return start;
aoqi@0 1975 }
aoqi@0 1976
aoqi@0 1977 void generate_arraycopy_stubs() {
aoqi@0 1978 address entry;
aoqi@0 1979 address entry_jbyte_arraycopy;
aoqi@0 1980 address entry_jshort_arraycopy;
aoqi@0 1981 address entry_jint_arraycopy;
aoqi@0 1982 address entry_oop_arraycopy;
aoqi@0 1983 address entry_jlong_arraycopy;
aoqi@0 1984 address entry_checkcast_arraycopy;
aoqi@0 1985
aoqi@0 1986 StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
aoqi@0 1987 generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry,
aoqi@0 1988 "arrayof_jbyte_disjoint_arraycopy");
aoqi@0 1989 StubRoutines::_arrayof_jbyte_arraycopy =
aoqi@0 1990 generate_conjoint_copy(T_BYTE, true, Address::times_1, entry,
aoqi@0 1991 NULL, "arrayof_jbyte_arraycopy");
aoqi@0 1992 StubRoutines::_jbyte_disjoint_arraycopy =
aoqi@0 1993 generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry,
aoqi@0 1994 "jbyte_disjoint_arraycopy");
aoqi@0 1995 StubRoutines::_jbyte_arraycopy =
aoqi@0 1996 generate_conjoint_copy(T_BYTE, false, Address::times_1, entry,
aoqi@0 1997 &entry_jbyte_arraycopy, "jbyte_arraycopy");
aoqi@0 1998
aoqi@0 1999 StubRoutines::_arrayof_jshort_disjoint_arraycopy =
aoqi@0 2000 generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry,
aoqi@0 2001 "arrayof_jshort_disjoint_arraycopy");
aoqi@0 2002 StubRoutines::_arrayof_jshort_arraycopy =
aoqi@0 2003 generate_conjoint_copy(T_SHORT, true, Address::times_2, entry,
aoqi@0 2004 NULL, "arrayof_jshort_arraycopy");
aoqi@0 2005 StubRoutines::_jshort_disjoint_arraycopy =
aoqi@0 2006 generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry,
aoqi@0 2007 "jshort_disjoint_arraycopy");
aoqi@0 2008 StubRoutines::_jshort_arraycopy =
aoqi@0 2009 generate_conjoint_copy(T_SHORT, false, Address::times_2, entry,
aoqi@0 2010 &entry_jshort_arraycopy, "jshort_arraycopy");
aoqi@0 2011
aoqi@0 2012 // Next arrays are always aligned on 4 bytes at least.
aoqi@0 2013 StubRoutines::_jint_disjoint_arraycopy =
aoqi@0 2014 generate_disjoint_copy(T_INT, true, Address::times_4, &entry,
aoqi@0 2015 "jint_disjoint_arraycopy");
aoqi@0 2016 StubRoutines::_jint_arraycopy =
aoqi@0 2017 generate_conjoint_copy(T_INT, true, Address::times_4, entry,
aoqi@0 2018 &entry_jint_arraycopy, "jint_arraycopy");
aoqi@0 2019
aoqi@0 2020 StubRoutines::_oop_disjoint_arraycopy =
aoqi@0 2021 generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
aoqi@0 2022 "oop_disjoint_arraycopy");
aoqi@0 2023 StubRoutines::_oop_arraycopy =
aoqi@0 2024 generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry,
aoqi@0 2025 &entry_oop_arraycopy, "oop_arraycopy");
aoqi@0 2026
aoqi@0 2027 StubRoutines::_oop_disjoint_arraycopy_uninit =
aoqi@0 2028 generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
aoqi@0 2029 "oop_disjoint_arraycopy_uninit",
aoqi@0 2030 /*dest_uninitialized*/true);
aoqi@0 2031 StubRoutines::_oop_arraycopy_uninit =
aoqi@0 2032 generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry,
aoqi@0 2033 NULL, "oop_arraycopy_uninit",
aoqi@0 2034 /*dest_uninitialized*/true);
aoqi@0 2035
aoqi@0 2036 StubRoutines::_jlong_disjoint_arraycopy =
aoqi@0 2037 generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
aoqi@0 2038 StubRoutines::_jlong_arraycopy =
aoqi@0 2039 generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
aoqi@0 2040 "jlong_arraycopy");
aoqi@0 2041
aoqi@0 2042 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
aoqi@0 2043 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
aoqi@0 2044 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
aoqi@0 2045 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
aoqi@0 2046 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
aoqi@0 2047 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
aoqi@0 2048
aoqi@0 2049 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
aoqi@0 2050 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
aoqi@0 2051 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
aoqi@0 2052 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
aoqi@0 2053
aoqi@0 2054 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
aoqi@0 2055 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
aoqi@0 2056 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
aoqi@0 2057 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
aoqi@0 2058
aoqi@0 2059 StubRoutines::_checkcast_arraycopy =
aoqi@0 2060 generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
aoqi@0 2061 StubRoutines::_checkcast_arraycopy_uninit =
aoqi@0 2062 generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true);
aoqi@0 2063
aoqi@0 2064 StubRoutines::_unsafe_arraycopy =
aoqi@0 2065 generate_unsafe_copy("unsafe_arraycopy",
aoqi@0 2066 entry_jbyte_arraycopy,
aoqi@0 2067 entry_jshort_arraycopy,
aoqi@0 2068 entry_jint_arraycopy,
aoqi@0 2069 entry_jlong_arraycopy);
aoqi@0 2070
aoqi@0 2071 StubRoutines::_generic_arraycopy =
aoqi@0 2072 generate_generic_copy("generic_arraycopy",
aoqi@0 2073 entry_jbyte_arraycopy,
aoqi@0 2074 entry_jshort_arraycopy,
aoqi@0 2075 entry_jint_arraycopy,
aoqi@0 2076 entry_oop_arraycopy,
aoqi@0 2077 entry_jlong_arraycopy,
aoqi@0 2078 entry_checkcast_arraycopy);
aoqi@0 2079 }
aoqi@0 2080
aoqi@0 2081 void generate_math_stubs() {
aoqi@0 2082 {
aoqi@0 2083 StubCodeMark mark(this, "StubRoutines", "log");
aoqi@0 2084 StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
aoqi@0 2085
aoqi@0 2086 __ fld_d(Address(rsp, 4));
aoqi@0 2087 __ flog();
aoqi@0 2088 __ ret(0);
aoqi@0 2089 }
aoqi@0 2090 {
aoqi@0 2091 StubCodeMark mark(this, "StubRoutines", "log10");
aoqi@0 2092 StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
aoqi@0 2093
aoqi@0 2094 __ fld_d(Address(rsp, 4));
aoqi@0 2095 __ flog10();
aoqi@0 2096 __ ret(0);
aoqi@0 2097 }
aoqi@0 2098 {
aoqi@0 2099 StubCodeMark mark(this, "StubRoutines", "sin");
aoqi@0 2100 StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc();
aoqi@0 2101
aoqi@0 2102 __ fld_d(Address(rsp, 4));
aoqi@0 2103 __ trigfunc('s');
aoqi@0 2104 __ ret(0);
aoqi@0 2105 }
aoqi@0 2106 {
aoqi@0 2107 StubCodeMark mark(this, "StubRoutines", "cos");
aoqi@0 2108 StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc();
aoqi@0 2109
aoqi@0 2110 __ fld_d(Address(rsp, 4));
aoqi@0 2111 __ trigfunc('c');
aoqi@0 2112 __ ret(0);
aoqi@0 2113 }
aoqi@0 2114 {
aoqi@0 2115 StubCodeMark mark(this, "StubRoutines", "tan");
aoqi@0 2116 StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
aoqi@0 2117
aoqi@0 2118 __ fld_d(Address(rsp, 4));
aoqi@0 2119 __ trigfunc('t');
aoqi@0 2120 __ ret(0);
aoqi@0 2121 }
aoqi@0 2122 {
aoqi@0 2123 StubCodeMark mark(this, "StubRoutines", "exp");
aoqi@0 2124 StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
aoqi@0 2125
aoqi@0 2126 __ fld_d(Address(rsp, 4));
aoqi@0 2127 __ exp_with_fallback(0);
aoqi@0 2128 __ ret(0);
aoqi@0 2129 }
aoqi@0 2130 {
aoqi@0 2131 StubCodeMark mark(this, "StubRoutines", "pow");
aoqi@0 2132 StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
aoqi@0 2133
aoqi@0 2134 __ fld_d(Address(rsp, 12));
aoqi@0 2135 __ fld_d(Address(rsp, 4));
aoqi@0 2136 __ pow_with_fallback(0);
aoqi@0 2137 __ ret(0);
aoqi@0 2138 }
aoqi@0 2139 }
aoqi@0 2140
aoqi@0 2141 // AES intrinsic stubs
aoqi@0 2142 enum {AESBlockSize = 16};
aoqi@0 2143
aoqi@0 2144 address generate_key_shuffle_mask() {
aoqi@0 2145 __ align(16);
aoqi@0 2146 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
aoqi@0 2147 address start = __ pc();
aoqi@0 2148 __ emit_data(0x00010203, relocInfo::none, 0 );
aoqi@0 2149 __ emit_data(0x04050607, relocInfo::none, 0 );
aoqi@0 2150 __ emit_data(0x08090a0b, relocInfo::none, 0 );
aoqi@0 2151 __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
aoqi@0 2152 return start;
aoqi@0 2153 }
aoqi@0 2154
aoqi@0 2155 // Utility routine for loading a 128-bit key word in little endian format
aoqi@0 2156 // can optionally specify that the shuffle mask is already in an xmmregister
aoqi@0 2157 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
aoqi@0 2158 __ movdqu(xmmdst, Address(key, offset));
aoqi@0 2159 if (xmm_shuf_mask != NULL) {
aoqi@0 2160 __ pshufb(xmmdst, xmm_shuf_mask);
aoqi@0 2161 } else {
aoqi@0 2162 __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2163 }
aoqi@0 2164 }
aoqi@0 2165
aoqi@0 2166 // aesenc using specified key+offset
aoqi@0 2167 // can optionally specify that the shuffle mask is already in an xmmregister
aoqi@0 2168 void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
aoqi@0 2169 load_key(xmmtmp, key, offset, xmm_shuf_mask);
aoqi@0 2170 __ aesenc(xmmdst, xmmtmp);
aoqi@0 2171 }
aoqi@0 2172
aoqi@0 2173 // aesdec using specified key+offset
aoqi@0 2174 // can optionally specify that the shuffle mask is already in an xmmregister
aoqi@0 2175 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
aoqi@0 2176 load_key(xmmtmp, key, offset, xmm_shuf_mask);
aoqi@0 2177 __ aesdec(xmmdst, xmmtmp);
aoqi@0 2178 }
aoqi@0 2179
aoqi@0 2180
aoqi@0 2181 // Arguments:
aoqi@0 2182 //
aoqi@0 2183 // Inputs:
aoqi@0 2184 // c_rarg0 - source byte array address
aoqi@0 2185 // c_rarg1 - destination byte array address
aoqi@0 2186 // c_rarg2 - K (key) in little endian int array
aoqi@0 2187 //
aoqi@0 2188 address generate_aescrypt_encryptBlock() {
aoqi@0 2189 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2190 __ align(CodeEntryAlignment);
aoqi@0 2191 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
aoqi@0 2192 Label L_doLast;
aoqi@0 2193 address start = __ pc();
aoqi@0 2194
aoqi@0 2195 const Register from = rdx; // source array address
aoqi@0 2196 const Register to = rdx; // destination array address
aoqi@0 2197 const Register key = rcx; // key array address
aoqi@0 2198 const Register keylen = rax;
aoqi@0 2199 const Address from_param(rbp, 8+0);
aoqi@0 2200 const Address to_param (rbp, 8+4);
aoqi@0 2201 const Address key_param (rbp, 8+8);
aoqi@0 2202
aoqi@0 2203 const XMMRegister xmm_result = xmm0;
aoqi@0 2204 const XMMRegister xmm_key_shuf_mask = xmm1;
aoqi@0 2205 const XMMRegister xmm_temp1 = xmm2;
aoqi@0 2206 const XMMRegister xmm_temp2 = xmm3;
aoqi@0 2207 const XMMRegister xmm_temp3 = xmm4;
aoqi@0 2208 const XMMRegister xmm_temp4 = xmm5;
aoqi@0 2209
aoqi@0 2210 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2211 __ movptr(from, from_param);
aoqi@0 2212 __ movptr(key, key_param);
aoqi@0 2213
aoqi@0 2214 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
aoqi@0 2215 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2216
aoqi@0 2217 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2218 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
aoqi@0 2219 __ movptr(to, to_param);
aoqi@0 2220
aoqi@0 2221 // For encryption, the java expanded key ordering is just what we need
aoqi@0 2222
aoqi@0 2223 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
aoqi@0 2224 __ pxor(xmm_result, xmm_temp1);
aoqi@0 2225
aoqi@0 2226 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
aoqi@0 2227 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
aoqi@0 2228 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
aoqi@0 2229 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
aoqi@0 2230
aoqi@0 2231 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2232 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2233 __ aesenc(xmm_result, xmm_temp3);
aoqi@0 2234 __ aesenc(xmm_result, xmm_temp4);
aoqi@0 2235
aoqi@0 2236 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
aoqi@0 2237 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
aoqi@0 2238 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
aoqi@0 2239 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
aoqi@0 2240
aoqi@0 2241 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2242 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2243 __ aesenc(xmm_result, xmm_temp3);
aoqi@0 2244 __ aesenc(xmm_result, xmm_temp4);
aoqi@0 2245
aoqi@0 2246 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
aoqi@0 2247 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
aoqi@0 2248
aoqi@0 2249 __ cmpl(keylen, 44);
aoqi@0 2250 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2251
aoqi@0 2252 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2253 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2254
aoqi@0 2255 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
aoqi@0 2256 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
aoqi@0 2257
aoqi@0 2258 __ cmpl(keylen, 52);
aoqi@0 2259 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2260
aoqi@0 2261 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2262 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2263
aoqi@0 2264 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
aoqi@0 2265 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
aoqi@0 2266
aoqi@0 2267 __ BIND(L_doLast);
aoqi@0 2268 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2269 __ aesenclast(xmm_result, xmm_temp2);
aoqi@0 2270 __ movdqu(Address(to, 0), xmm_result); // store the result
aoqi@0 2271 __ xorptr(rax, rax); // return 0
aoqi@0 2272 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2273 __ ret(0);
aoqi@0 2274
aoqi@0 2275 return start;
aoqi@0 2276 }
aoqi@0 2277
aoqi@0 2278
aoqi@0 2279 // Arguments:
aoqi@0 2280 //
aoqi@0 2281 // Inputs:
aoqi@0 2282 // c_rarg0 - source byte array address
aoqi@0 2283 // c_rarg1 - destination byte array address
aoqi@0 2284 // c_rarg2 - K (key) in little endian int array
aoqi@0 2285 //
aoqi@0 2286 address generate_aescrypt_decryptBlock() {
aoqi@0 2287 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2288 __ align(CodeEntryAlignment);
aoqi@0 2289 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
aoqi@0 2290 Label L_doLast;
aoqi@0 2291 address start = __ pc();
aoqi@0 2292
aoqi@0 2293 const Register from = rdx; // source array address
aoqi@0 2294 const Register to = rdx; // destination array address
aoqi@0 2295 const Register key = rcx; // key array address
aoqi@0 2296 const Register keylen = rax;
aoqi@0 2297 const Address from_param(rbp, 8+0);
aoqi@0 2298 const Address to_param (rbp, 8+4);
aoqi@0 2299 const Address key_param (rbp, 8+8);
aoqi@0 2300
aoqi@0 2301 const XMMRegister xmm_result = xmm0;
aoqi@0 2302 const XMMRegister xmm_key_shuf_mask = xmm1;
aoqi@0 2303 const XMMRegister xmm_temp1 = xmm2;
aoqi@0 2304 const XMMRegister xmm_temp2 = xmm3;
aoqi@0 2305 const XMMRegister xmm_temp3 = xmm4;
aoqi@0 2306 const XMMRegister xmm_temp4 = xmm5;
aoqi@0 2307
aoqi@0 2308 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2309 __ movptr(from, from_param);
aoqi@0 2310 __ movptr(key, key_param);
aoqi@0 2311
aoqi@0 2312 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
aoqi@0 2313 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2314
aoqi@0 2315 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2316 __ movdqu(xmm_result, Address(from, 0));
aoqi@0 2317 __ movptr(to, to_param);
aoqi@0 2318
aoqi@0 2319 // for decryption java expanded key ordering is rotated one position from what we want
aoqi@0 2320 // so we start from 0x10 here and hit 0x00 last
aoqi@0 2321 // we don't know if the key is aligned, hence not using load-execute form
aoqi@0 2322 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
aoqi@0 2323 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
aoqi@0 2324 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
aoqi@0 2325 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
aoqi@0 2326
aoqi@0 2327 __ pxor (xmm_result, xmm_temp1);
aoqi@0 2328 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2329 __ aesdec(xmm_result, xmm_temp3);
aoqi@0 2330 __ aesdec(xmm_result, xmm_temp4);
aoqi@0 2331
aoqi@0 2332 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
aoqi@0 2333 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
aoqi@0 2334 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
aoqi@0 2335 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
aoqi@0 2336
aoqi@0 2337 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2338 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2339 __ aesdec(xmm_result, xmm_temp3);
aoqi@0 2340 __ aesdec(xmm_result, xmm_temp4);
aoqi@0 2341
aoqi@0 2342 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
aoqi@0 2343 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
aoqi@0 2344 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
aoqi@0 2345
aoqi@0 2346 __ cmpl(keylen, 44);
aoqi@0 2347 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2348
aoqi@0 2349 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2350 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2351
aoqi@0 2352 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
aoqi@0 2353 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
aoqi@0 2354
aoqi@0 2355 __ cmpl(keylen, 52);
aoqi@0 2356 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2357
aoqi@0 2358 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2359 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2360
aoqi@0 2361 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
aoqi@0 2362 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
aoqi@0 2363
aoqi@0 2364 __ BIND(L_doLast);
aoqi@0 2365 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2366 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2367
aoqi@0 2368 // for decryption the aesdeclast operation is always on key+0x00
aoqi@0 2369 __ aesdeclast(xmm_result, xmm_temp3);
aoqi@0 2370 __ movdqu(Address(to, 0), xmm_result); // store the result
aoqi@0 2371 __ xorptr(rax, rax); // return 0
aoqi@0 2372 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2373 __ ret(0);
aoqi@0 2374
aoqi@0 2375 return start;
aoqi@0 2376 }
aoqi@0 2377
aoqi@0 2378 void handleSOERegisters(bool saving) {
aoqi@0 2379 const int saveFrameSizeInBytes = 4 * wordSize;
aoqi@0 2380 const Address saved_rbx (rbp, -3 * wordSize);
aoqi@0 2381 const Address saved_rsi (rbp, -2 * wordSize);
aoqi@0 2382 const Address saved_rdi (rbp, -1 * wordSize);
aoqi@0 2383
aoqi@0 2384 if (saving) {
aoqi@0 2385 __ subptr(rsp, saveFrameSizeInBytes);
aoqi@0 2386 __ movptr(saved_rsi, rsi);
aoqi@0 2387 __ movptr(saved_rdi, rdi);
aoqi@0 2388 __ movptr(saved_rbx, rbx);
aoqi@0 2389 } else {
aoqi@0 2390 // restoring
aoqi@0 2391 __ movptr(rsi, saved_rsi);
aoqi@0 2392 __ movptr(rdi, saved_rdi);
aoqi@0 2393 __ movptr(rbx, saved_rbx);
aoqi@0 2394 }
aoqi@0 2395 }
aoqi@0 2396
aoqi@0 2397 // Arguments:
aoqi@0 2398 //
aoqi@0 2399 // Inputs:
aoqi@0 2400 // c_rarg0 - source byte array address
aoqi@0 2401 // c_rarg1 - destination byte array address
aoqi@0 2402 // c_rarg2 - K (key) in little endian int array
aoqi@0 2403 // c_rarg3 - r vector byte array address
aoqi@0 2404 // c_rarg4 - input length
aoqi@0 2405 //
aoqi@0 2406 // Output:
aoqi@0 2407 // rax - input length
aoqi@0 2408 //
aoqi@0 2409 address generate_cipherBlockChaining_encryptAESCrypt() {
aoqi@0 2410 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2411 __ align(CodeEntryAlignment);
aoqi@0 2412 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
aoqi@0 2413 address start = __ pc();
aoqi@0 2414
aoqi@0 2415 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
aoqi@0 2416 const Register from = rsi; // source array address
aoqi@0 2417 const Register to = rdx; // destination array address
aoqi@0 2418 const Register key = rcx; // key array address
aoqi@0 2419 const Register rvec = rdi; // r byte array initialized from initvector array address
aoqi@0 2420 // and left with the results of the last encryption block
aoqi@0 2421 const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
aoqi@0 2422 const Register pos = rax;
aoqi@0 2423
aoqi@0 2424 // xmm register assignments for the loops below
aoqi@0 2425 const XMMRegister xmm_result = xmm0;
aoqi@0 2426 const XMMRegister xmm_temp = xmm1;
aoqi@0 2427 // first 6 keys preloaded into xmm2-xmm7
aoqi@0 2428 const int XMM_REG_NUM_KEY_FIRST = 2;
aoqi@0 2429 const int XMM_REG_NUM_KEY_LAST = 7;
aoqi@0 2430 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
aoqi@0 2431
aoqi@0 2432 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2433 handleSOERegisters(true /*saving*/);
aoqi@0 2434
aoqi@0 2435 // load registers from incoming parameters
aoqi@0 2436 const Address from_param(rbp, 8+0);
aoqi@0 2437 const Address to_param (rbp, 8+4);
aoqi@0 2438 const Address key_param (rbp, 8+8);
aoqi@0 2439 const Address rvec_param (rbp, 8+12);
aoqi@0 2440 const Address len_param (rbp, 8+16);
aoqi@0 2441 __ movptr(from , from_param);
aoqi@0 2442 __ movptr(to , to_param);
aoqi@0 2443 __ movptr(key , key_param);
aoqi@0 2444 __ movptr(rvec , rvec_param);
aoqi@0 2445 __ movptr(len_reg , len_param);
aoqi@0 2446
aoqi@0 2447 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
aoqi@0 2448 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2449 // load up xmm regs 2 thru 7 with keys 0-5
aoqi@0 2450 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2451 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
aoqi@0 2452 offset += 0x10;
aoqi@0 2453 }
aoqi@0 2454
aoqi@0 2455 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
aoqi@0 2456
aoqi@0 2457 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
aoqi@0 2458 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2459 __ cmpl(rax, 44);
aoqi@0 2460 __ jcc(Assembler::notEqual, L_key_192_256);
aoqi@0 2461
aoqi@0 2462 // 128 bit code follows here
aoqi@0 2463 __ movl(pos, 0);
aoqi@0 2464 __ align(OptoLoopAlignment);
aoqi@0 2465 __ BIND(L_loopTop_128);
aoqi@0 2466 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
aoqi@0 2467 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2468
aoqi@0 2469 __ pxor (xmm_result, xmm_key0); // do the aes rounds
aoqi@0 2470 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2471 __ aesenc(xmm_result, as_XMMRegister(rnum));
aoqi@0 2472 }
aoqi@0 2473 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
aoqi@0 2474 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2475 }
aoqi@0 2476 load_key(xmm_temp, key, 0xa0);
aoqi@0 2477 __ aesenclast(xmm_result, xmm_temp);
aoqi@0 2478
aoqi@0 2479 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2480 // no need to store r to memory until we exit
aoqi@0 2481 __ addptr(pos, AESBlockSize);
aoqi@0 2482 __ subptr(len_reg, AESBlockSize);
aoqi@0 2483 __ jcc(Assembler::notEqual, L_loopTop_128);
aoqi@0 2484
aoqi@0 2485 __ BIND(L_exit);
aoqi@0 2486 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
aoqi@0 2487
aoqi@0 2488 handleSOERegisters(false /*restoring*/);
aoqi@0 2489 __ movptr(rax, len_param); // return length
aoqi@0 2490 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2491 __ ret(0);
aoqi@0 2492
aoqi@0 2493 __ BIND(L_key_192_256);
aoqi@0 2494 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
aoqi@0 2495 __ cmpl(rax, 52);
aoqi@0 2496 __ jcc(Assembler::notEqual, L_key_256);
aoqi@0 2497
aoqi@0 2498 // 192-bit code follows here (could be changed to use more xmm registers)
aoqi@0 2499 __ movl(pos, 0);
aoqi@0 2500 __ align(OptoLoopAlignment);
aoqi@0 2501 __ BIND(L_loopTop_192);
aoqi@0 2502 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
aoqi@0 2503 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2504
aoqi@0 2505 __ pxor (xmm_result, xmm_key0); // do the aes rounds
aoqi@0 2506 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2507 __ aesenc(xmm_result, as_XMMRegister(rnum));
aoqi@0 2508 }
aoqi@0 2509 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
aoqi@0 2510 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2511 }
aoqi@0 2512 load_key(xmm_temp, key, 0xc0);
aoqi@0 2513 __ aesenclast(xmm_result, xmm_temp);
aoqi@0 2514
aoqi@0 2515 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2516 // no need to store r to memory until we exit
aoqi@0 2517 __ addptr(pos, AESBlockSize);
aoqi@0 2518 __ subptr(len_reg, AESBlockSize);
aoqi@0 2519 __ jcc(Assembler::notEqual, L_loopTop_192);
aoqi@0 2520 __ jmp(L_exit);
aoqi@0 2521
aoqi@0 2522 __ BIND(L_key_256);
aoqi@0 2523 // 256-bit code follows here (could be changed to use more xmm registers)
aoqi@0 2524 __ movl(pos, 0);
aoqi@0 2525 __ align(OptoLoopAlignment);
aoqi@0 2526 __ BIND(L_loopTop_256);
aoqi@0 2527 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
aoqi@0 2528 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2529
aoqi@0 2530 __ pxor (xmm_result, xmm_key0); // do the aes rounds
aoqi@0 2531 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2532 __ aesenc(xmm_result, as_XMMRegister(rnum));
aoqi@0 2533 }
aoqi@0 2534 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
aoqi@0 2535 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2536 }
aoqi@0 2537 load_key(xmm_temp, key, 0xe0);
aoqi@0 2538 __ aesenclast(xmm_result, xmm_temp);
aoqi@0 2539
aoqi@0 2540 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2541 // no need to store r to memory until we exit
aoqi@0 2542 __ addptr(pos, AESBlockSize);
aoqi@0 2543 __ subptr(len_reg, AESBlockSize);
aoqi@0 2544 __ jcc(Assembler::notEqual, L_loopTop_256);
aoqi@0 2545 __ jmp(L_exit);
aoqi@0 2546
aoqi@0 2547 return start;
aoqi@0 2548 }
aoqi@0 2549
aoqi@0 2550
aoqi@0 2551 // CBC AES Decryption.
aoqi@0 2552 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
aoqi@0 2553 //
aoqi@0 2554 // Arguments:
aoqi@0 2555 //
aoqi@0 2556 // Inputs:
aoqi@0 2557 // c_rarg0 - source byte array address
aoqi@0 2558 // c_rarg1 - destination byte array address
aoqi@0 2559 // c_rarg2 - K (key) in little endian int array
aoqi@0 2560 // c_rarg3 - r vector byte array address
aoqi@0 2561 // c_rarg4 - input length
aoqi@0 2562 //
aoqi@0 2563 // Output:
aoqi@0 2564 // rax - input length
aoqi@0 2565 //
aoqi@0 2566
aoqi@0 2567 address generate_cipherBlockChaining_decryptAESCrypt() {
aoqi@0 2568 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2569 __ align(CodeEntryAlignment);
aoqi@0 2570 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
aoqi@0 2571 address start = __ pc();
aoqi@0 2572
aoqi@0 2573 Label L_exit, L_key_192_256, L_key_256;
aoqi@0 2574 Label L_singleBlock_loopTop_128;
aoqi@0 2575 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
aoqi@0 2576 const Register from = rsi; // source array address
aoqi@0 2577 const Register to = rdx; // destination array address
aoqi@0 2578 const Register key = rcx; // key array address
aoqi@0 2579 const Register rvec = rdi; // r byte array initialized from initvector array address
aoqi@0 2580 // and left with the results of the last encryption block
aoqi@0 2581 const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
aoqi@0 2582 const Register pos = rax;
aoqi@0 2583
aoqi@0 2584 // xmm register assignments for the loops below
aoqi@0 2585 const XMMRegister xmm_result = xmm0;
aoqi@0 2586 const XMMRegister xmm_temp = xmm1;
aoqi@0 2587 // first 6 keys preloaded into xmm2-xmm7
aoqi@0 2588 const int XMM_REG_NUM_KEY_FIRST = 2;
aoqi@0 2589 const int XMM_REG_NUM_KEY_LAST = 7;
aoqi@0 2590 const int FIRST_NON_REG_KEY_offset = 0x70;
aoqi@0 2591 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
aoqi@0 2592
aoqi@0 2593 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2594 handleSOERegisters(true /*saving*/);
aoqi@0 2595
aoqi@0 2596 // load registers from incoming parameters
aoqi@0 2597 const Address from_param(rbp, 8+0);
aoqi@0 2598 const Address to_param (rbp, 8+4);
aoqi@0 2599 const Address key_param (rbp, 8+8);
aoqi@0 2600 const Address rvec_param (rbp, 8+12);
aoqi@0 2601 const Address len_param (rbp, 8+16);
aoqi@0 2602 __ movptr(from , from_param);
aoqi@0 2603 __ movptr(to , to_param);
aoqi@0 2604 __ movptr(key , key_param);
aoqi@0 2605 __ movptr(rvec , rvec_param);
aoqi@0 2606 __ movptr(len_reg , len_param);
aoqi@0 2607
aoqi@0 2608 // the java expanded key ordering is rotated one position from what we want
aoqi@0 2609 // so we start from 0x10 here and hit 0x00 last
aoqi@0 2610 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
aoqi@0 2611 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2612 // load up xmm regs 2 thru 6 with first 5 keys
aoqi@0 2613 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2614 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
aoqi@0 2615 offset += 0x10;
aoqi@0 2616 }
aoqi@0 2617
aoqi@0 2618 // inside here, use the rvec register to point to previous block cipher
aoqi@0 2619 // with which we xor at the end of each newly decrypted block
aoqi@0 2620 const Register prev_block_cipher_ptr = rvec;
aoqi@0 2621
aoqi@0 2622 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
aoqi@0 2623 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2624 __ cmpl(rax, 44);
aoqi@0 2625 __ jcc(Assembler::notEqual, L_key_192_256);
aoqi@0 2626
aoqi@0 2627
aoqi@0 2628 // 128-bit code follows here, parallelized
aoqi@0 2629 __ movl(pos, 0);
aoqi@0 2630 __ align(OptoLoopAlignment);
aoqi@0 2631 __ BIND(L_singleBlock_loopTop_128);
aoqi@0 2632 __ cmpptr(len_reg, 0); // any blocks left??
aoqi@0 2633 __ jcc(Assembler::equal, L_exit);
aoqi@0 2634 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
aoqi@0 2635 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
aoqi@0 2636 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2637 __ aesdec(xmm_result, as_XMMRegister(rnum));
aoqi@0 2638 }
aoqi@0 2639 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0
aoqi@0 2640 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2641 }
aoqi@0 2642 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
aoqi@0 2643 __ aesdeclast(xmm_result, xmm_temp);
aoqi@0 2644 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2645 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2646 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2647 // no need to store r to memory until we exit
aoqi@0 2648 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
aoqi@0 2649 __ addptr(pos, AESBlockSize);
aoqi@0 2650 __ subptr(len_reg, AESBlockSize);
aoqi@0 2651 __ jmp(L_singleBlock_loopTop_128);
aoqi@0 2652
aoqi@0 2653
aoqi@0 2654 __ BIND(L_exit);
aoqi@0 2655 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2656 __ movptr(rvec , rvec_param); // restore this since used in loop
aoqi@0 2657 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
aoqi@0 2658 handleSOERegisters(false /*restoring*/);
aoqi@0 2659 __ movptr(rax, len_param); // return length
aoqi@0 2660 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2661 __ ret(0);
aoqi@0 2662
aoqi@0 2663
aoqi@0 2664 __ BIND(L_key_192_256);
aoqi@0 2665 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
aoqi@0 2666 __ cmpl(rax, 52);
aoqi@0 2667 __ jcc(Assembler::notEqual, L_key_256);
aoqi@0 2668
aoqi@0 2669 // 192-bit code follows here (could be optimized to use parallelism)
aoqi@0 2670 __ movl(pos, 0);
aoqi@0 2671 __ align(OptoLoopAlignment);
aoqi@0 2672 __ BIND(L_singleBlock_loopTop_192);
aoqi@0 2673 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
aoqi@0 2674 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
aoqi@0 2675 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2676 __ aesdec(xmm_result, as_XMMRegister(rnum));
aoqi@0 2677 }
aoqi@0 2678 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0
aoqi@0 2679 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2680 }
aoqi@0 2681 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
aoqi@0 2682 __ aesdeclast(xmm_result, xmm_temp);
aoqi@0 2683 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2684 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2685 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2686 // no need to store r to memory until we exit
aoqi@0 2687 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
aoqi@0 2688 __ addptr(pos, AESBlockSize);
aoqi@0 2689 __ subptr(len_reg, AESBlockSize);
aoqi@0 2690 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
aoqi@0 2691 __ jmp(L_exit);
aoqi@0 2692
aoqi@0 2693 __ BIND(L_key_256);
aoqi@0 2694 // 256-bit code follows here (could be optimized to use parallelism)
aoqi@0 2695 __ movl(pos, 0);
aoqi@0 2696 __ align(OptoLoopAlignment);
aoqi@0 2697 __ BIND(L_singleBlock_loopTop_256);
aoqi@0 2698 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
aoqi@0 2699 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
aoqi@0 2700 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2701 __ aesdec(xmm_result, as_XMMRegister(rnum));
aoqi@0 2702 }
aoqi@0 2703 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
aoqi@0 2704 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2705 }
aoqi@0 2706 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
aoqi@0 2707 __ aesdeclast(xmm_result, xmm_temp);
aoqi@0 2708 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2709 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2710 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2711 // no need to store r to memory until we exit
aoqi@0 2712 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
aoqi@0 2713 __ addptr(pos, AESBlockSize);
aoqi@0 2714 __ subptr(len_reg, AESBlockSize);
aoqi@0 2715 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
aoqi@0 2716 __ jmp(L_exit);
aoqi@0 2717
aoqi@0 2718 return start;
aoqi@0 2719 }
aoqi@0 2720
aoqi@0 2721 /**
aoqi@0 2722 * Arguments:
aoqi@0 2723 *
aoqi@0 2724 * Inputs:
aoqi@0 2725 * rsp(4) - int crc
aoqi@0 2726 * rsp(8) - byte* buf
aoqi@0 2727 * rsp(12) - int length
aoqi@0 2728 *
aoqi@0 2729 * Ouput:
aoqi@0 2730 * rax - int crc result
aoqi@0 2731 */
aoqi@0 2732 address generate_updateBytesCRC32() {
aoqi@0 2733 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
aoqi@0 2734
aoqi@0 2735 __ align(CodeEntryAlignment);
aoqi@0 2736 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
aoqi@0 2737
aoqi@0 2738 address start = __ pc();
aoqi@0 2739
aoqi@0 2740 const Register crc = rdx; // crc
aoqi@0 2741 const Register buf = rsi; // source java byte array address
aoqi@0 2742 const Register len = rcx; // length
aoqi@0 2743 const Register table = rdi; // crc_table address (reuse register)
aoqi@0 2744 const Register tmp = rbx;
aoqi@0 2745 assert_different_registers(crc, buf, len, table, tmp, rax);
aoqi@0 2746
aoqi@0 2747 BLOCK_COMMENT("Entry:");
aoqi@0 2748 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2749 __ push(rsi);
aoqi@0 2750 __ push(rdi);
aoqi@0 2751 __ push(rbx);
aoqi@0 2752
aoqi@0 2753 Address crc_arg(rbp, 8 + 0);
aoqi@0 2754 Address buf_arg(rbp, 8 + 4);
aoqi@0 2755 Address len_arg(rbp, 8 + 8);
aoqi@0 2756
aoqi@0 2757 // Load up:
aoqi@0 2758 __ movl(crc, crc_arg);
aoqi@0 2759 __ movptr(buf, buf_arg);
aoqi@0 2760 __ movl(len, len_arg);
aoqi@0 2761
aoqi@0 2762 __ kernel_crc32(crc, buf, len, table, tmp);
aoqi@0 2763
aoqi@0 2764 __ movl(rax, crc);
aoqi@0 2765 __ pop(rbx);
aoqi@0 2766 __ pop(rdi);
aoqi@0 2767 __ pop(rsi);
aoqi@0 2768 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2769 __ ret(0);
aoqi@0 2770
aoqi@0 2771 return start;
aoqi@0 2772 }
aoqi@0 2773
aoqi@0 2774 // Safefetch stubs.
aoqi@0 2775 void generate_safefetch(const char* name, int size, address* entry,
aoqi@0 2776 address* fault_pc, address* continuation_pc) {
aoqi@0 2777 // safefetch signatures:
aoqi@0 2778 // int SafeFetch32(int* adr, int errValue);
aoqi@0 2779 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
aoqi@0 2780
aoqi@0 2781 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 2782
aoqi@0 2783 // Entry point, pc or function descriptor.
aoqi@0 2784 *entry = __ pc();
aoqi@0 2785
aoqi@0 2786 __ movl(rax, Address(rsp, 0x8));
aoqi@0 2787 __ movl(rcx, Address(rsp, 0x4));
aoqi@0 2788 // Load *adr into eax, may fault.
aoqi@0 2789 *fault_pc = __ pc();
aoqi@0 2790 switch (size) {
aoqi@0 2791 case 4:
aoqi@0 2792 // int32_t
aoqi@0 2793 __ movl(rax, Address(rcx, 0));
aoqi@0 2794 break;
aoqi@0 2795 case 8:
aoqi@0 2796 // int64_t
aoqi@0 2797 Unimplemented();
aoqi@0 2798 break;
aoqi@0 2799 default:
aoqi@0 2800 ShouldNotReachHere();
aoqi@0 2801 }
aoqi@0 2802
aoqi@0 2803 // Return errValue or *adr.
aoqi@0 2804 *continuation_pc = __ pc();
aoqi@0 2805 __ ret(0);
aoqi@0 2806 }
aoqi@0 2807
aoqi@0 2808 public:
aoqi@0 2809 // Information about frame layout at time of blocking runtime call.
aoqi@0 2810 // Note that we only have to preserve callee-saved registers since
aoqi@0 2811 // the compilers are responsible for supplying a continuation point
aoqi@0 2812 // if they expect all registers to be preserved.
aoqi@0 2813 enum layout {
aoqi@0 2814 thread_off, // last_java_sp
aoqi@0 2815 arg1_off,
aoqi@0 2816 arg2_off,
aoqi@0 2817 rbp_off, // callee saved register
aoqi@0 2818 ret_pc,
aoqi@0 2819 framesize
aoqi@0 2820 };
aoqi@0 2821
aoqi@0 2822 private:
aoqi@0 2823
aoqi@0 2824 #undef __
aoqi@0 2825 #define __ masm->
aoqi@0 2826
aoqi@0 2827 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 2828 // Continuation point for throwing of implicit exceptions that are not handled in
aoqi@0 2829 // the current activation. Fabricates an exception oop and initiates normal
aoqi@0 2830 // exception dispatching in this frame.
aoqi@0 2831 //
aoqi@0 2832 // Previously the compiler (c2) allowed for callee save registers on Java calls.
aoqi@0 2833 // This is no longer true after adapter frames were removed but could possibly
aoqi@0 2834 // be brought back in the future if the interpreter code was reworked and it
aoqi@0 2835 // was deemed worthwhile. The comment below was left to describe what must
aoqi@0 2836 // happen here if callee saves were resurrected. As it stands now this stub
aoqi@0 2837 // could actually be a vanilla BufferBlob and have now oopMap at all.
aoqi@0 2838 // Since it doesn't make much difference we've chosen to leave it the
aoqi@0 2839 // way it was in the callee save days and keep the comment.
aoqi@0 2840
aoqi@0 2841 // If we need to preserve callee-saved values we need a callee-saved oop map and
aoqi@0 2842 // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs.
aoqi@0 2843 // If the compiler needs all registers to be preserved between the fault
aoqi@0 2844 // point and the exception handler then it must assume responsibility for that in
aoqi@0 2845 // AbstractCompiler::continuation_for_implicit_null_exception or
aoqi@0 2846 // continuation_for_implicit_division_by_zero_exception. All other implicit
aoqi@0 2847 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
aoqi@0 2848 // either at call sites or otherwise assume that stack unwinding will be initiated,
aoqi@0 2849 // so caller saved registers were assumed volatile in the compiler.
aoqi@0 2850 address generate_throw_exception(const char* name, address runtime_entry,
aoqi@0 2851 Register arg1 = noreg, Register arg2 = noreg) {
aoqi@0 2852
aoqi@0 2853 int insts_size = 256;
aoqi@0 2854 int locs_size = 32;
aoqi@0 2855
aoqi@0 2856 CodeBuffer code(name, insts_size, locs_size);
aoqi@0 2857 OopMapSet* oop_maps = new OopMapSet();
aoqi@0 2858 MacroAssembler* masm = new MacroAssembler(&code);
aoqi@0 2859
aoqi@0 2860 address start = __ pc();
aoqi@0 2861
aoqi@0 2862 // This is an inlined and slightly modified version of call_VM
aoqi@0 2863 // which has the ability to fetch the return PC out of
aoqi@0 2864 // thread-local storage and also sets up last_Java_sp slightly
aoqi@0 2865 // differently than the real call_VM
aoqi@0 2866 Register java_thread = rbx;
aoqi@0 2867 __ get_thread(java_thread);
aoqi@0 2868
aoqi@0 2869 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2870
aoqi@0 2871 // pc and rbp, already pushed
aoqi@0 2872 __ subptr(rsp, (framesize-2) * wordSize); // prolog
aoqi@0 2873
aoqi@0 2874 // Frame is now completed as far as size and linkage.
aoqi@0 2875
aoqi@0 2876 int frame_complete = __ pc() - start;
aoqi@0 2877
aoqi@0 2878 // push java thread (becomes first argument of C function)
aoqi@0 2879 __ movptr(Address(rsp, thread_off * wordSize), java_thread);
aoqi@0 2880 if (arg1 != noreg) {
aoqi@0 2881 __ movptr(Address(rsp, arg1_off * wordSize), arg1);
aoqi@0 2882 }
aoqi@0 2883 if (arg2 != noreg) {
aoqi@0 2884 assert(arg1 != noreg, "missing reg arg");
aoqi@0 2885 __ movptr(Address(rsp, arg2_off * wordSize), arg2);
aoqi@0 2886 }
aoqi@0 2887
aoqi@0 2888 // Set up last_Java_sp and last_Java_fp
aoqi@0 2889 __ set_last_Java_frame(java_thread, rsp, rbp, NULL);
aoqi@0 2890
aoqi@0 2891 // Call runtime
aoqi@0 2892 BLOCK_COMMENT("call runtime_entry");
aoqi@0 2893 __ call(RuntimeAddress(runtime_entry));
aoqi@0 2894 // Generate oop map
aoqi@0 2895 OopMap* map = new OopMap(framesize, 0);
aoqi@0 2896 oop_maps->add_gc_map(__ pc() - start, map);
aoqi@0 2897
aoqi@0 2898 // restore the thread (cannot use the pushed argument since arguments
aoqi@0 2899 // may be overwritten by C code generated by an optimizing compiler);
aoqi@0 2900 // however can use the register value directly if it is callee saved.
aoqi@0 2901 __ get_thread(java_thread);
aoqi@0 2902
aoqi@0 2903 __ reset_last_Java_frame(java_thread, true, false);
aoqi@0 2904
aoqi@0 2905 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2906
aoqi@0 2907 // check for pending exceptions
aoqi@0 2908 #ifdef ASSERT
aoqi@0 2909 Label L;
aoqi@0 2910 __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
aoqi@0 2911 __ jcc(Assembler::notEqual, L);
aoqi@0 2912 __ should_not_reach_here();
aoqi@0 2913 __ bind(L);
aoqi@0 2914 #endif /* ASSERT */
aoqi@0 2915 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
aoqi@0 2916
aoqi@0 2917
aoqi@0 2918 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
aoqi@0 2919 return stub->entry_point();
aoqi@0 2920 }
aoqi@0 2921
aoqi@0 2922
aoqi@0 2923 void create_control_words() {
aoqi@0 2924 // Round to nearest, 53-bit mode, exceptions masked
aoqi@0 2925 StubRoutines::_fpu_cntrl_wrd_std = 0x027F;
aoqi@0 2926 // Round to zero, 53-bit mode, exception mased
aoqi@0 2927 StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F;
aoqi@0 2928 // Round to nearest, 24-bit mode, exceptions masked
aoqi@0 2929 StubRoutines::_fpu_cntrl_wrd_24 = 0x007F;
aoqi@0 2930 // Round to nearest, 64-bit mode, exceptions masked
aoqi@0 2931 StubRoutines::_fpu_cntrl_wrd_64 = 0x037F;
aoqi@0 2932 // Round to nearest, 64-bit mode, exceptions masked
aoqi@0 2933 StubRoutines::_mxcsr_std = 0x1F80;
aoqi@0 2934 // Note: the following two constants are 80-bit values
aoqi@0 2935 // layout is critical for correct loading by FPU.
aoqi@0 2936 // Bias for strict fp multiply/divide
aoqi@0 2937 StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000
aoqi@0 2938 StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000;
aoqi@0 2939 StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff;
aoqi@0 2940 // Un-Bias for strict fp multiply/divide
aoqi@0 2941 StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000
aoqi@0 2942 StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000;
aoqi@0 2943 StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff;
aoqi@0 2944 }
aoqi@0 2945
aoqi@0 2946 //---------------------------------------------------------------------------
aoqi@0 2947 // Initialization
aoqi@0 2948
aoqi@0 2949 void generate_initial() {
aoqi@0 2950 // Generates all stubs and initializes the entry points
aoqi@0 2951
aoqi@0 2952 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 2953 // entry points that exist in all platforms
aoqi@0 2954 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
aoqi@0 2955 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
aoqi@0 2956 StubRoutines::_forward_exception_entry = generate_forward_exception();
aoqi@0 2957
aoqi@0 2958 StubRoutines::_call_stub_entry =
aoqi@0 2959 generate_call_stub(StubRoutines::_call_stub_return_address);
aoqi@0 2960 // is referenced by megamorphic call
aoqi@0 2961 StubRoutines::_catch_exception_entry = generate_catch_exception();
aoqi@0 2962
aoqi@0 2963 // These are currently used by Solaris/Intel
aoqi@0 2964 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
aoqi@0 2965
aoqi@0 2966 StubRoutines::_handler_for_unsafe_access_entry =
aoqi@0 2967 generate_handler_for_unsafe_access();
aoqi@0 2968
aoqi@0 2969 // platform dependent
aoqi@0 2970 create_control_words();
aoqi@0 2971
aoqi@0 2972 StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr();
aoqi@0 2973 StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd();
aoqi@0 2974 StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT,
aoqi@0 2975 CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
aoqi@0 2976 StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG,
aoqi@0 2977 CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
aoqi@0 2978
aoqi@0 2979 // Build this early so it's available for the interpreter
aoqi@0 2980 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
aoqi@0 2981
aoqi@0 2982 if (UseCRC32Intrinsics) {
aoqi@0 2983 // set table address before stub generation which use it
aoqi@0 2984 StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
aoqi@0 2985 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
aoqi@0 2986 }
aoqi@0 2987 }
aoqi@0 2988
aoqi@0 2989
aoqi@0 2990 void generate_all() {
aoqi@0 2991 // Generates all stubs and initializes the entry points
aoqi@0 2992
aoqi@0 2993 // These entry points require SharedInfo::stack0 to be set up in non-core builds
aoqi@0 2994 // and need to be relocatable, so they each fabricate a RuntimeStub internally.
aoqi@0 2995 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
aoqi@0 2996 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
aoqi@0 2997 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
aoqi@0 2998
aoqi@0 2999 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 3000 // entry points that are platform specific
aoqi@0 3001
aoqi@0 3002 // support for verify_oop (must happen after universe_init)
aoqi@0 3003 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
aoqi@0 3004
aoqi@0 3005 // arraycopy stubs used by compilers
aoqi@0 3006 generate_arraycopy_stubs();
aoqi@0 3007
aoqi@0 3008 generate_math_stubs();
aoqi@0 3009
aoqi@0 3010 // don't bother generating these AES intrinsic stubs unless global flag is set
aoqi@0 3011 if (UseAESIntrinsics) {
aoqi@0 3012 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
aoqi@0 3013
aoqi@0 3014 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
aoqi@0 3015 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
aoqi@0 3016 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
aoqi@0 3017 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
aoqi@0 3018 }
aoqi@0 3019
aoqi@0 3020 // Safefetch stubs.
aoqi@0 3021 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
aoqi@0 3022 &StubRoutines::_safefetch32_fault_pc,
aoqi@0 3023 &StubRoutines::_safefetch32_continuation_pc);
aoqi@0 3024 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
aoqi@0 3025 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
aoqi@0 3026 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
aoqi@0 3027 }
aoqi@0 3028
aoqi@0 3029
aoqi@0 3030 public:
aoqi@0 3031 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
aoqi@0 3032 if (all) {
aoqi@0 3033 generate_all();
aoqi@0 3034 } else {
aoqi@0 3035 generate_initial();
aoqi@0 3036 }
aoqi@0 3037 }
aoqi@0 3038 }; // end class declaration
aoqi@0 3039
aoqi@0 3040
aoqi@0 3041 void StubGenerator_generate(CodeBuffer* code, bool all) {
aoqi@0 3042 StubGenerator g(code, all);
aoqi@0 3043 }

mercurial