src/cpu/x86/vm/stubGenerator_x86_32.cpp

Mon, 28 May 2018 10:33:52 +0800

author
aoqi
date
Mon, 28 May 2018 10:33:52 +0800
changeset 9041
95a08233f46c
parent 8877
f04097176542
parent 7994
04ff2f6cd0eb
child 9806
758c07667682
permissions
-rw-r--r--

Merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation.
aoqi@0 8 *
aoqi@0 9 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 12 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 13 * accompanied this code).
aoqi@0 14 *
aoqi@0 15 * You should have received a copy of the GNU General Public License version
aoqi@0 16 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 18 *
aoqi@0 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 20 * or visit www.oracle.com if you need additional information or have any
aoqi@0 21 * questions.
aoqi@0 22 *
aoqi@0 23 */
aoqi@0 24
aoqi@0 25 #include "precompiled.hpp"
aoqi@0 26 #include "asm/macroAssembler.hpp"
aoqi@0 27 #include "asm/macroAssembler.inline.hpp"
aoqi@0 28 #include "interpreter/interpreter.hpp"
aoqi@0 29 #include "nativeInst_x86.hpp"
aoqi@0 30 #include "oops/instanceOop.hpp"
aoqi@0 31 #include "oops/method.hpp"
aoqi@0 32 #include "oops/objArrayKlass.hpp"
aoqi@0 33 #include "oops/oop.inline.hpp"
aoqi@0 34 #include "prims/methodHandles.hpp"
aoqi@0 35 #include "runtime/frame.inline.hpp"
aoqi@0 36 #include "runtime/handles.inline.hpp"
aoqi@0 37 #include "runtime/sharedRuntime.hpp"
aoqi@0 38 #include "runtime/stubCodeGenerator.hpp"
aoqi@0 39 #include "runtime/stubRoutines.hpp"
aoqi@0 40 #include "runtime/thread.inline.hpp"
aoqi@0 41 #include "utilities/top.hpp"
aoqi@0 42 #ifdef COMPILER2
aoqi@0 43 #include "opto/runtime.hpp"
aoqi@0 44 #endif
aoqi@0 45
aoqi@0 46 // Declaration and definition of StubGenerator (no .hpp file).
aoqi@0 47 // For a more detailed description of the stub routine structure
aoqi@0 48 // see the comment in stubRoutines.hpp
aoqi@0 49
aoqi@0 50 #define __ _masm->
aoqi@0 51 #define a__ ((Assembler*)_masm)->
aoqi@0 52
aoqi@0 53 #ifdef PRODUCT
aoqi@0 54 #define BLOCK_COMMENT(str) /* nothing */
aoqi@0 55 #else
aoqi@0 56 #define BLOCK_COMMENT(str) __ block_comment(str)
aoqi@0 57 #endif
aoqi@0 58
aoqi@0 59 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
aoqi@0 60
aoqi@0 61 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
aoqi@0 62 const int FPU_CNTRL_WRD_MASK = 0xFFFF;
aoqi@0 63
aoqi@0 64 // -------------------------------------------------------------------------------------------------------------------------
aoqi@0 65 // Stub Code definitions
aoqi@0 66
aoqi@0 67 static address handle_unsafe_access() {
aoqi@0 68 JavaThread* thread = JavaThread::current();
aoqi@0 69 address pc = thread->saved_exception_pc();
aoqi@0 70 // pc is the instruction which we must emulate
aoqi@0 71 // doing a no-op is fine: return garbage from the load
aoqi@0 72 // therefore, compute npc
aoqi@0 73 address npc = Assembler::locate_next_instruction(pc);
aoqi@0 74
aoqi@0 75 // request an async exception
aoqi@0 76 thread->set_pending_unsafe_access_error();
aoqi@0 77
aoqi@0 78 // return address of next instruction to execute
aoqi@0 79 return npc;
aoqi@0 80 }
aoqi@0 81
aoqi@0 82 class StubGenerator: public StubCodeGenerator {
aoqi@0 83 private:
aoqi@0 84
aoqi@0 85 #ifdef PRODUCT
aoqi@0 86 #define inc_counter_np(counter) ((void)0)
aoqi@0 87 #else
aoqi@0 88 void inc_counter_np_(int& counter) {
aoqi@0 89 __ incrementl(ExternalAddress((address)&counter));
aoqi@0 90 }
aoqi@0 91 #define inc_counter_np(counter) \
aoqi@0 92 BLOCK_COMMENT("inc_counter " #counter); \
aoqi@0 93 inc_counter_np_(counter);
aoqi@0 94 #endif //PRODUCT
aoqi@0 95
aoqi@0 96 void inc_copy_counter_np(BasicType t) {
aoqi@0 97 #ifndef PRODUCT
aoqi@0 98 switch (t) {
aoqi@0 99 case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return;
aoqi@0 100 case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return;
aoqi@0 101 case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return;
aoqi@0 102 case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return;
aoqi@0 103 case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return;
aoqi@0 104 }
aoqi@0 105 ShouldNotReachHere();
aoqi@0 106 #endif //PRODUCT
aoqi@0 107 }
aoqi@0 108
aoqi@0 109 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 110 // Call stubs are used to call Java from C
aoqi@0 111 //
aoqi@0 112 // [ return_from_Java ] <--- rsp
aoqi@0 113 // [ argument word n ]
aoqi@0 114 // ...
aoqi@0 115 // -N [ argument word 1 ]
aoqi@0 116 // -7 [ Possible padding for stack alignment ]
aoqi@0 117 // -6 [ Possible padding for stack alignment ]
aoqi@0 118 // -5 [ Possible padding for stack alignment ]
aoqi@0 119 // -4 [ mxcsr save ] <--- rsp_after_call
aoqi@0 120 // -3 [ saved rbx, ]
aoqi@0 121 // -2 [ saved rsi ]
aoqi@0 122 // -1 [ saved rdi ]
aoqi@0 123 // 0 [ saved rbp, ] <--- rbp,
aoqi@0 124 // 1 [ return address ]
aoqi@0 125 // 2 [ ptr. to call wrapper ]
aoqi@0 126 // 3 [ result ]
aoqi@0 127 // 4 [ result_type ]
aoqi@0 128 // 5 [ method ]
aoqi@0 129 // 6 [ entry_point ]
aoqi@0 130 // 7 [ parameters ]
aoqi@0 131 // 8 [ parameter_size ]
aoqi@0 132 // 9 [ thread ]
aoqi@0 133
aoqi@0 134
aoqi@0 135 address generate_call_stub(address& return_address) {
aoqi@0 136 StubCodeMark mark(this, "StubRoutines", "call_stub");
aoqi@0 137 address start = __ pc();
aoqi@0 138
aoqi@0 139 // stub code parameters / addresses
aoqi@0 140 assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
aoqi@0 141 bool sse_save = false;
aoqi@0 142 const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()!
aoqi@0 143 const int locals_count_in_bytes (4*wordSize);
aoqi@0 144 const Address mxcsr_save (rbp, -4 * wordSize);
aoqi@0 145 const Address saved_rbx (rbp, -3 * wordSize);
aoqi@0 146 const Address saved_rsi (rbp, -2 * wordSize);
aoqi@0 147 const Address saved_rdi (rbp, -1 * wordSize);
aoqi@0 148 const Address result (rbp, 3 * wordSize);
aoqi@0 149 const Address result_type (rbp, 4 * wordSize);
aoqi@0 150 const Address method (rbp, 5 * wordSize);
aoqi@0 151 const Address entry_point (rbp, 6 * wordSize);
aoqi@0 152 const Address parameters (rbp, 7 * wordSize);
aoqi@0 153 const Address parameter_size(rbp, 8 * wordSize);
aoqi@0 154 const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()!
aoqi@0 155 sse_save = UseSSE > 0;
aoqi@0 156
aoqi@0 157 // stub code
aoqi@0 158 __ enter();
aoqi@0 159 __ movptr(rcx, parameter_size); // parameter counter
aoqi@0 160 __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
aoqi@0 161 __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves
aoqi@0 162 __ subptr(rsp, rcx);
aoqi@0 163 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
aoqi@0 164
aoqi@0 165 // save rdi, rsi, & rbx, according to C calling conventions
aoqi@0 166 __ movptr(saved_rdi, rdi);
aoqi@0 167 __ movptr(saved_rsi, rsi);
aoqi@0 168 __ movptr(saved_rbx, rbx);
aoqi@0 169 // save and initialize %mxcsr
aoqi@0 170 if (sse_save) {
aoqi@0 171 Label skip_ldmx;
aoqi@0 172 __ stmxcsr(mxcsr_save);
aoqi@0 173 __ movl(rax, mxcsr_save);
aoqi@0 174 __ andl(rax, MXCSR_MASK); // Only check control and mask bits
aoqi@0 175 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
aoqi@0 176 __ cmp32(rax, mxcsr_std);
aoqi@0 177 __ jcc(Assembler::equal, skip_ldmx);
aoqi@0 178 __ ldmxcsr(mxcsr_std);
aoqi@0 179 __ bind(skip_ldmx);
aoqi@0 180 }
aoqi@0 181
aoqi@0 182 // make sure the control word is correct.
aoqi@0 183 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
aoqi@0 184
aoqi@0 185 #ifdef ASSERT
aoqi@0 186 // make sure we have no pending exceptions
aoqi@0 187 { Label L;
aoqi@0 188 __ movptr(rcx, thread);
aoqi@0 189 __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
aoqi@0 190 __ jcc(Assembler::equal, L);
aoqi@0 191 __ stop("StubRoutines::call_stub: entered with pending exception");
aoqi@0 192 __ bind(L);
aoqi@0 193 }
aoqi@0 194 #endif
aoqi@0 195
aoqi@0 196 // pass parameters if any
aoqi@0 197 BLOCK_COMMENT("pass parameters if any");
aoqi@0 198 Label parameters_done;
aoqi@0 199 __ movl(rcx, parameter_size); // parameter counter
aoqi@0 200 __ testl(rcx, rcx);
aoqi@0 201 __ jcc(Assembler::zero, parameters_done);
aoqi@0 202
aoqi@0 203 // parameter passing loop
aoqi@0 204
aoqi@0 205 Label loop;
aoqi@0 206 // Copy Java parameters in reverse order (receiver last)
aoqi@0 207 // Note that the argument order is inverted in the process
aoqi@0 208 // source is rdx[rcx: N-1..0]
aoqi@0 209 // dest is rsp[rbx: 0..N-1]
aoqi@0 210
aoqi@0 211 __ movptr(rdx, parameters); // parameter pointer
aoqi@0 212 __ xorptr(rbx, rbx);
aoqi@0 213
aoqi@0 214 __ BIND(loop);
aoqi@0 215
aoqi@0 216 // get parameter
aoqi@0 217 __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize));
aoqi@0 218 __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
aoqi@0 219 Interpreter::expr_offset_in_bytes(0)), rax); // store parameter
aoqi@0 220 __ increment(rbx);
aoqi@0 221 __ decrement(rcx);
aoqi@0 222 __ jcc(Assembler::notZero, loop);
aoqi@0 223
aoqi@0 224 // call Java function
aoqi@0 225 __ BIND(parameters_done);
aoqi@0 226 __ movptr(rbx, method); // get Method*
aoqi@0 227 __ movptr(rax, entry_point); // get entry_point
aoqi@0 228 __ mov(rsi, rsp); // set sender sp
aoqi@0 229 BLOCK_COMMENT("call Java function");
aoqi@0 230 __ call(rax);
aoqi@0 231
aoqi@0 232 BLOCK_COMMENT("call_stub_return_address:");
aoqi@0 233 return_address = __ pc();
aoqi@0 234
aoqi@0 235 #ifdef COMPILER2
aoqi@0 236 {
aoqi@0 237 Label L_skip;
aoqi@0 238 if (UseSSE >= 2) {
aoqi@0 239 __ verify_FPU(0, "call_stub_return");
aoqi@0 240 } else {
aoqi@0 241 for (int i = 1; i < 8; i++) {
aoqi@0 242 __ ffree(i);
aoqi@0 243 }
aoqi@0 244
aoqi@0 245 // UseSSE <= 1 so double result should be left on TOS
aoqi@0 246 __ movl(rsi, result_type);
aoqi@0 247 __ cmpl(rsi, T_DOUBLE);
aoqi@0 248 __ jcc(Assembler::equal, L_skip);
aoqi@0 249 if (UseSSE == 0) {
aoqi@0 250 // UseSSE == 0 so float result should be left on TOS
aoqi@0 251 __ cmpl(rsi, T_FLOAT);
aoqi@0 252 __ jcc(Assembler::equal, L_skip);
aoqi@0 253 }
aoqi@0 254 __ ffree(0);
aoqi@0 255 }
aoqi@0 256 __ BIND(L_skip);
aoqi@0 257 }
aoqi@0 258 #endif // COMPILER2
aoqi@0 259
aoqi@0 260 // store result depending on type
aoqi@0 261 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
aoqi@0 262 __ movptr(rdi, result);
aoqi@0 263 Label is_long, is_float, is_double, exit;
aoqi@0 264 __ movl(rsi, result_type);
aoqi@0 265 __ cmpl(rsi, T_LONG);
aoqi@0 266 __ jcc(Assembler::equal, is_long);
aoqi@0 267 __ cmpl(rsi, T_FLOAT);
aoqi@0 268 __ jcc(Assembler::equal, is_float);
aoqi@0 269 __ cmpl(rsi, T_DOUBLE);
aoqi@0 270 __ jcc(Assembler::equal, is_double);
aoqi@0 271
aoqi@0 272 // handle T_INT case
aoqi@0 273 __ movl(Address(rdi, 0), rax);
aoqi@0 274 __ BIND(exit);
aoqi@0 275
aoqi@0 276 // check that FPU stack is empty
aoqi@0 277 __ verify_FPU(0, "generate_call_stub");
aoqi@0 278
aoqi@0 279 // pop parameters
aoqi@0 280 __ lea(rsp, rsp_after_call);
aoqi@0 281
aoqi@0 282 // restore %mxcsr
aoqi@0 283 if (sse_save) {
aoqi@0 284 __ ldmxcsr(mxcsr_save);
aoqi@0 285 }
aoqi@0 286
aoqi@0 287 // restore rdi, rsi and rbx,
aoqi@0 288 __ movptr(rbx, saved_rbx);
aoqi@0 289 __ movptr(rsi, saved_rsi);
aoqi@0 290 __ movptr(rdi, saved_rdi);
aoqi@0 291 __ addptr(rsp, 4*wordSize);
aoqi@0 292
aoqi@0 293 // return
aoqi@0 294 __ pop(rbp);
aoqi@0 295 __ ret(0);
aoqi@0 296
aoqi@0 297 // handle return types different from T_INT
aoqi@0 298 __ BIND(is_long);
aoqi@0 299 __ movl(Address(rdi, 0 * wordSize), rax);
aoqi@0 300 __ movl(Address(rdi, 1 * wordSize), rdx);
aoqi@0 301 __ jmp(exit);
aoqi@0 302
aoqi@0 303 __ BIND(is_float);
aoqi@0 304 // interpreter uses xmm0 for return values
aoqi@0 305 if (UseSSE >= 1) {
aoqi@0 306 __ movflt(Address(rdi, 0), xmm0);
aoqi@0 307 } else {
aoqi@0 308 __ fstp_s(Address(rdi, 0));
aoqi@0 309 }
aoqi@0 310 __ jmp(exit);
aoqi@0 311
aoqi@0 312 __ BIND(is_double);
aoqi@0 313 // interpreter uses xmm0 for return values
aoqi@0 314 if (UseSSE >= 2) {
aoqi@0 315 __ movdbl(Address(rdi, 0), xmm0);
aoqi@0 316 } else {
aoqi@0 317 __ fstp_d(Address(rdi, 0));
aoqi@0 318 }
aoqi@0 319 __ jmp(exit);
aoqi@0 320
aoqi@0 321 return start;
aoqi@0 322 }
aoqi@0 323
aoqi@0 324
aoqi@0 325 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 326 // Return point for a Java call if there's an exception thrown in Java code.
aoqi@0 327 // The exception is caught and transformed into a pending exception stored in
aoqi@0 328 // JavaThread that can be tested from within the VM.
aoqi@0 329 //
aoqi@0 330 // Note: Usually the parameters are removed by the callee. In case of an exception
aoqi@0 331 // crossing an activation frame boundary, that is not the case if the callee
aoqi@0 332 // is compiled code => need to setup the rsp.
aoqi@0 333 //
aoqi@0 334 // rax,: exception oop
aoqi@0 335
aoqi@0 336 address generate_catch_exception() {
aoqi@0 337 StubCodeMark mark(this, "StubRoutines", "catch_exception");
aoqi@0 338 const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()!
aoqi@0 339 const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()!
aoqi@0 340 address start = __ pc();
aoqi@0 341
aoqi@0 342 // get thread directly
aoqi@0 343 __ movptr(rcx, thread);
aoqi@0 344 #ifdef ASSERT
aoqi@0 345 // verify that threads correspond
aoqi@0 346 { Label L;
aoqi@0 347 __ get_thread(rbx);
aoqi@0 348 __ cmpptr(rbx, rcx);
aoqi@0 349 __ jcc(Assembler::equal, L);
aoqi@0 350 __ stop("StubRoutines::catch_exception: threads must correspond");
aoqi@0 351 __ bind(L);
aoqi@0 352 }
aoqi@0 353 #endif
aoqi@0 354 // set pending exception
aoqi@0 355 __ verify_oop(rax);
aoqi@0 356 __ movptr(Address(rcx, Thread::pending_exception_offset()), rax );
aoqi@0 357 __ lea(Address(rcx, Thread::exception_file_offset ()),
aoqi@0 358 ExternalAddress((address)__FILE__));
aoqi@0 359 __ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ );
aoqi@0 360 // complete return to VM
aoqi@0 361 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
aoqi@0 362 __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
aoqi@0 363
aoqi@0 364 return start;
aoqi@0 365 }
aoqi@0 366
aoqi@0 367
aoqi@0 368 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 369 // Continuation point for runtime calls returning with a pending exception.
aoqi@0 370 // The pending exception check happened in the runtime or native call stub.
aoqi@0 371 // The pending exception in Thread is converted into a Java-level exception.
aoqi@0 372 //
aoqi@0 373 // Contract with Java-level exception handlers:
aoqi@0 374 // rax: exception
aoqi@0 375 // rdx: throwing pc
aoqi@0 376 //
aoqi@0 377 // NOTE: At entry of this stub, exception-pc must be on stack !!
aoqi@0 378
aoqi@0 379 address generate_forward_exception() {
aoqi@0 380 StubCodeMark mark(this, "StubRoutines", "forward exception");
aoqi@0 381 address start = __ pc();
aoqi@0 382 const Register thread = rcx;
aoqi@0 383
aoqi@0 384 // other registers used in this stub
aoqi@0 385 const Register exception_oop = rax;
aoqi@0 386 const Register handler_addr = rbx;
aoqi@0 387 const Register exception_pc = rdx;
aoqi@0 388
aoqi@0 389 // Upon entry, the sp points to the return address returning into Java
aoqi@0 390 // (interpreted or compiled) code; i.e., the return address becomes the
aoqi@0 391 // throwing pc.
aoqi@0 392 //
aoqi@0 393 // Arguments pushed before the runtime call are still on the stack but
aoqi@0 394 // the exception handler will reset the stack pointer -> ignore them.
aoqi@0 395 // A potential result in registers can be ignored as well.
aoqi@0 396
aoqi@0 397 #ifdef ASSERT
aoqi@0 398 // make sure this code is only executed if there is a pending exception
aoqi@0 399 { Label L;
aoqi@0 400 __ get_thread(thread);
aoqi@0 401 __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
aoqi@0 402 __ jcc(Assembler::notEqual, L);
aoqi@0 403 __ stop("StubRoutines::forward exception: no pending exception (1)");
aoqi@0 404 __ bind(L);
aoqi@0 405 }
aoqi@0 406 #endif
aoqi@0 407
aoqi@0 408 // compute exception handler into rbx,
aoqi@0 409 __ get_thread(thread);
aoqi@0 410 __ movptr(exception_pc, Address(rsp, 0));
aoqi@0 411 BLOCK_COMMENT("call exception_handler_for_return_address");
aoqi@0 412 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc);
aoqi@0 413 __ mov(handler_addr, rax);
aoqi@0 414
aoqi@0 415 // setup rax & rdx, remove return address & clear pending exception
aoqi@0 416 __ get_thread(thread);
aoqi@0 417 __ pop(exception_pc);
aoqi@0 418 __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
aoqi@0 419 __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
aoqi@0 420
aoqi@0 421 #ifdef ASSERT
aoqi@0 422 // make sure exception is set
aoqi@0 423 { Label L;
aoqi@0 424 __ testptr(exception_oop, exception_oop);
aoqi@0 425 __ jcc(Assembler::notEqual, L);
aoqi@0 426 __ stop("StubRoutines::forward exception: no pending exception (2)");
aoqi@0 427 __ bind(L);
aoqi@0 428 }
aoqi@0 429 #endif
aoqi@0 430
aoqi@0 431 // Verify that there is really a valid exception in RAX.
aoqi@0 432 __ verify_oop(exception_oop);
aoqi@0 433
aoqi@0 434 // continue at exception handler (return address removed)
aoqi@0 435 // rax: exception
aoqi@0 436 // rbx: exception handler
aoqi@0 437 // rdx: throwing pc
aoqi@0 438 __ jmp(handler_addr);
aoqi@0 439
aoqi@0 440 return start;
aoqi@0 441 }
aoqi@0 442
aoqi@0 443
aoqi@0 444 //----------------------------------------------------------------------------------------------------
aoqi@0 445 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest)
aoqi@0 446 //
aoqi@0 447 // xchg exists as far back as 8086, lock needed for MP only
aoqi@0 448 // Stack layout immediately after call:
aoqi@0 449 //
aoqi@0 450 // 0 [ret addr ] <--- rsp
aoqi@0 451 // 1 [ ex ]
aoqi@0 452 // 2 [ dest ]
aoqi@0 453 //
aoqi@0 454 // Result: *dest <- ex, return (old *dest)
aoqi@0 455 //
aoqi@0 456 // Note: win32 does not currently use this code
aoqi@0 457
aoqi@0 458 address generate_atomic_xchg() {
aoqi@0 459 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
aoqi@0 460 address start = __ pc();
aoqi@0 461
aoqi@0 462 __ push(rdx);
aoqi@0 463 Address exchange(rsp, 2 * wordSize);
aoqi@0 464 Address dest_addr(rsp, 3 * wordSize);
aoqi@0 465 __ movl(rax, exchange);
aoqi@0 466 __ movptr(rdx, dest_addr);
aoqi@0 467 __ xchgl(rax, Address(rdx, 0));
aoqi@0 468 __ pop(rdx);
aoqi@0 469 __ ret(0);
aoqi@0 470
aoqi@0 471 return start;
aoqi@0 472 }
aoqi@0 473
aoqi@0 474 //----------------------------------------------------------------------------------------------------
aoqi@0 475 // Support for void verify_mxcsr()
aoqi@0 476 //
aoqi@0 477 // This routine is used with -Xcheck:jni to verify that native
aoqi@0 478 // JNI code does not return to Java code without restoring the
aoqi@0 479 // MXCSR register to our expected state.
aoqi@0 480
aoqi@0 481
aoqi@0 482 address generate_verify_mxcsr() {
aoqi@0 483 StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
aoqi@0 484 address start = __ pc();
aoqi@0 485
aoqi@0 486 const Address mxcsr_save(rsp, 0);
aoqi@0 487
aoqi@0 488 if (CheckJNICalls && UseSSE > 0 ) {
aoqi@0 489 Label ok_ret;
aoqi@0 490 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
aoqi@0 491 __ push(rax);
aoqi@0 492 __ subptr(rsp, wordSize); // allocate a temp location
aoqi@0 493 __ stmxcsr(mxcsr_save);
aoqi@0 494 __ movl(rax, mxcsr_save);
aoqi@0 495 __ andl(rax, MXCSR_MASK);
aoqi@0 496 __ cmp32(rax, mxcsr_std);
aoqi@0 497 __ jcc(Assembler::equal, ok_ret);
aoqi@0 498
aoqi@0 499 __ warn("MXCSR changed by native JNI code.");
aoqi@0 500
aoqi@0 501 __ ldmxcsr(mxcsr_std);
aoqi@0 502
aoqi@0 503 __ bind(ok_ret);
aoqi@0 504 __ addptr(rsp, wordSize);
aoqi@0 505 __ pop(rax);
aoqi@0 506 }
aoqi@0 507
aoqi@0 508 __ ret(0);
aoqi@0 509
aoqi@0 510 return start;
aoqi@0 511 }
aoqi@0 512
aoqi@0 513
aoqi@0 514 //---------------------------------------------------------------------------
aoqi@0 515 // Support for void verify_fpu_cntrl_wrd()
aoqi@0 516 //
aoqi@0 517 // This routine is used with -Xcheck:jni to verify that native
aoqi@0 518 // JNI code does not return to Java code without restoring the
aoqi@0 519 // FP control word to our expected state.
aoqi@0 520
aoqi@0 521 address generate_verify_fpu_cntrl_wrd() {
aoqi@0 522 StubCodeMark mark(this, "StubRoutines", "verify_spcw");
aoqi@0 523 address start = __ pc();
aoqi@0 524
aoqi@0 525 const Address fpu_cntrl_wrd_save(rsp, 0);
aoqi@0 526
aoqi@0 527 if (CheckJNICalls) {
aoqi@0 528 Label ok_ret;
aoqi@0 529 __ push(rax);
aoqi@0 530 __ subptr(rsp, wordSize); // allocate a temp location
aoqi@0 531 __ fnstcw(fpu_cntrl_wrd_save);
aoqi@0 532 __ movl(rax, fpu_cntrl_wrd_save);
aoqi@0 533 __ andl(rax, FPU_CNTRL_WRD_MASK);
aoqi@0 534 ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std());
aoqi@0 535 __ cmp32(rax, fpu_std);
aoqi@0 536 __ jcc(Assembler::equal, ok_ret);
aoqi@0 537
aoqi@0 538 __ warn("Floating point control word changed by native JNI code.");
aoqi@0 539
aoqi@0 540 __ fldcw(fpu_std);
aoqi@0 541
aoqi@0 542 __ bind(ok_ret);
aoqi@0 543 __ addptr(rsp, wordSize);
aoqi@0 544 __ pop(rax);
aoqi@0 545 }
aoqi@0 546
aoqi@0 547 __ ret(0);
aoqi@0 548
aoqi@0 549 return start;
aoqi@0 550 }
aoqi@0 551
aoqi@0 552 //---------------------------------------------------------------------------
aoqi@0 553 // Wrapper for slow-case handling of double-to-integer conversion
aoqi@0 554 // d2i or f2i fast case failed either because it is nan or because
aoqi@0 555 // of under/overflow.
aoqi@0 556 // Input: FPU TOS: float value
aoqi@0 557 // Output: rax, (rdx): integer (long) result
aoqi@0 558
aoqi@0 559 address generate_d2i_wrapper(BasicType t, address fcn) {
aoqi@0 560 StubCodeMark mark(this, "StubRoutines", "d2i_wrapper");
aoqi@0 561 address start = __ pc();
aoqi@0 562
aoqi@0 563 // Capture info about frame layout
aoqi@0 564 enum layout { FPUState_off = 0,
aoqi@0 565 rbp_off = FPUStateSizeInWords,
aoqi@0 566 rdi_off,
aoqi@0 567 rsi_off,
aoqi@0 568 rcx_off,
aoqi@0 569 rbx_off,
aoqi@0 570 saved_argument_off,
aoqi@0 571 saved_argument_off2, // 2nd half of double
aoqi@0 572 framesize
aoqi@0 573 };
aoqi@0 574
aoqi@0 575 assert(FPUStateSizeInWords == 27, "update stack layout");
aoqi@0 576
aoqi@0 577 // Save outgoing argument to stack across push_FPU_state()
aoqi@0 578 __ subptr(rsp, wordSize * 2);
aoqi@0 579 __ fstp_d(Address(rsp, 0));
aoqi@0 580
aoqi@0 581 // Save CPU & FPU state
aoqi@0 582 __ push(rbx);
aoqi@0 583 __ push(rcx);
aoqi@0 584 __ push(rsi);
aoqi@0 585 __ push(rdi);
aoqi@0 586 __ push(rbp);
aoqi@0 587 __ push_FPU_state();
aoqi@0 588
aoqi@0 589 // push_FPU_state() resets the FP top of stack
aoqi@0 590 // Load original double into FP top of stack
aoqi@0 591 __ fld_d(Address(rsp, saved_argument_off * wordSize));
aoqi@0 592 // Store double into stack as outgoing argument
aoqi@0 593 __ subptr(rsp, wordSize*2);
aoqi@0 594 __ fst_d(Address(rsp, 0));
aoqi@0 595
aoqi@0 596 // Prepare FPU for doing math in C-land
aoqi@0 597 __ empty_FPU_stack();
aoqi@0 598 // Call the C code to massage the double. Result in EAX
aoqi@0 599 if (t == T_INT)
aoqi@0 600 { BLOCK_COMMENT("SharedRuntime::d2i"); }
aoqi@0 601 else if (t == T_LONG)
aoqi@0 602 { BLOCK_COMMENT("SharedRuntime::d2l"); }
aoqi@0 603 __ call_VM_leaf( fcn, 2 );
aoqi@0 604
aoqi@0 605 // Restore CPU & FPU state
aoqi@0 606 __ pop_FPU_state();
aoqi@0 607 __ pop(rbp);
aoqi@0 608 __ pop(rdi);
aoqi@0 609 __ pop(rsi);
aoqi@0 610 __ pop(rcx);
aoqi@0 611 __ pop(rbx);
aoqi@0 612 __ addptr(rsp, wordSize * 2);
aoqi@0 613
aoqi@0 614 __ ret(0);
aoqi@0 615
aoqi@0 616 return start;
aoqi@0 617 }
aoqi@0 618
aoqi@0 619
aoqi@0 620 //---------------------------------------------------------------------------
aoqi@0 621 // The following routine generates a subroutine to throw an asynchronous
aoqi@0 622 // UnknownError when an unsafe access gets a fault that could not be
aoqi@0 623 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
aoqi@0 624 address generate_handler_for_unsafe_access() {
aoqi@0 625 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
aoqi@0 626 address start = __ pc();
aoqi@0 627
aoqi@0 628 __ push(0); // hole for return address-to-be
aoqi@0 629 __ pusha(); // push registers
aoqi@0 630 Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@0 631 BLOCK_COMMENT("call handle_unsafe_access");
aoqi@0 632 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
aoqi@0 633 __ movptr(next_pc, rax); // stuff next address
aoqi@0 634 __ popa();
aoqi@0 635 __ ret(0); // jump to next address
aoqi@0 636
aoqi@0 637 return start;
aoqi@0 638 }
aoqi@0 639
aoqi@0 640
aoqi@0 641 //----------------------------------------------------------------------------------------------------
aoqi@0 642 // Non-destructive plausibility checks for oops
aoqi@0 643
aoqi@0 644 address generate_verify_oop() {
aoqi@0 645 StubCodeMark mark(this, "StubRoutines", "verify_oop");
aoqi@0 646 address start = __ pc();
aoqi@0 647
aoqi@0 648 // Incoming arguments on stack after saving rax,:
aoqi@0 649 //
aoqi@0 650 // [tos ]: saved rdx
aoqi@0 651 // [tos + 1]: saved EFLAGS
aoqi@0 652 // [tos + 2]: return address
aoqi@0 653 // [tos + 3]: char* error message
aoqi@0 654 // [tos + 4]: oop object to verify
aoqi@0 655 // [tos + 5]: saved rax, - saved by caller and bashed
aoqi@0 656
aoqi@0 657 Label exit, error;
aoqi@0 658 __ pushf();
aoqi@0 659 __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
aoqi@0 660 __ push(rdx); // save rdx
aoqi@0 661 // make sure object is 'reasonable'
aoqi@0 662 __ movptr(rax, Address(rsp, 4 * wordSize)); // get object
aoqi@0 663 __ testptr(rax, rax);
aoqi@0 664 __ jcc(Assembler::zero, exit); // if obj is NULL it is ok
aoqi@0 665
aoqi@0 666 // Check if the oop is in the right area of memory
aoqi@0 667 const int oop_mask = Universe::verify_oop_mask();
aoqi@0 668 const int oop_bits = Universe::verify_oop_bits();
aoqi@0 669 __ mov(rdx, rax);
aoqi@0 670 __ andptr(rdx, oop_mask);
aoqi@0 671 __ cmpptr(rdx, oop_bits);
aoqi@0 672 __ jcc(Assembler::notZero, error);
aoqi@0 673
aoqi@0 674 // make sure klass is 'reasonable', which is not zero.
aoqi@0 675 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
aoqi@0 676 __ testptr(rax, rax);
aoqi@0 677 __ jcc(Assembler::zero, error); // if klass is NULL it is broken
aoqi@0 678
aoqi@0 679 // return if everything seems ok
aoqi@0 680 __ bind(exit);
aoqi@0 681 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back
aoqi@0 682 __ pop(rdx); // restore rdx
aoqi@0 683 __ popf(); // restore EFLAGS
aoqi@0 684 __ ret(3 * wordSize); // pop arguments
aoqi@0 685
aoqi@0 686 // handle errors
aoqi@0 687 __ bind(error);
aoqi@0 688 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back
aoqi@0 689 __ pop(rdx); // get saved rdx back
aoqi@0 690 __ popf(); // get saved EFLAGS off stack -- will be ignored
aoqi@0 691 __ pusha(); // push registers (eip = return address & msg are already pushed)
aoqi@0 692 BLOCK_COMMENT("call MacroAssembler::debug");
aoqi@0 693 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
aoqi@0 694 __ popa();
aoqi@0 695 __ ret(3 * wordSize); // pop arguments
aoqi@0 696 return start;
aoqi@0 697 }
aoqi@0 698
aoqi@0 699 //
aoqi@0 700 // Generate pre-barrier for array stores
aoqi@0 701 //
aoqi@0 702 // Input:
aoqi@0 703 // start - starting address
aoqi@0 704 // count - element count
aoqi@0 705 void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) {
aoqi@0 706 assert_different_registers(start, count);
aoqi@0 707 BarrierSet* bs = Universe::heap()->barrier_set();
aoqi@0 708 switch (bs->kind()) {
aoqi@0 709 case BarrierSet::G1SATBCT:
aoqi@0 710 case BarrierSet::G1SATBCTLogging:
aoqi@0 711 // With G1, don't generate the call if we statically know that the target in uninitialized
aoqi@0 712 if (!uninitialized_target) {
aoqi@0 713 __ pusha(); // push registers
aoqi@0 714 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
aoqi@0 715 start, count);
aoqi@0 716 __ popa();
aoqi@0 717 }
aoqi@0 718 break;
aoqi@0 719 case BarrierSet::CardTableModRef:
aoqi@0 720 case BarrierSet::CardTableExtension:
aoqi@0 721 case BarrierSet::ModRef:
aoqi@0 722 break;
aoqi@0 723 default :
aoqi@0 724 ShouldNotReachHere();
aoqi@0 725
aoqi@0 726 }
aoqi@0 727 }
aoqi@0 728
aoqi@0 729
aoqi@0 730 //
aoqi@0 731 // Generate a post-barrier for an array store
aoqi@0 732 //
aoqi@0 733 // start - starting address
aoqi@0 734 // count - element count
aoqi@0 735 //
aoqi@0 736 // The two input registers are overwritten.
aoqi@0 737 //
aoqi@0 738 void gen_write_ref_array_post_barrier(Register start, Register count) {
aoqi@0 739 BarrierSet* bs = Universe::heap()->barrier_set();
aoqi@0 740 assert_different_registers(start, count);
aoqi@0 741 switch (bs->kind()) {
aoqi@0 742 case BarrierSet::G1SATBCT:
aoqi@0 743 case BarrierSet::G1SATBCTLogging:
aoqi@0 744 {
aoqi@0 745 __ pusha(); // push registers
aoqi@0 746 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post),
aoqi@0 747 start, count);
aoqi@0 748 __ popa();
aoqi@0 749 }
aoqi@0 750 break;
aoqi@0 751
aoqi@0 752 case BarrierSet::CardTableModRef:
aoqi@0 753 case BarrierSet::CardTableExtension:
aoqi@0 754 {
aoqi@0 755 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
aoqi@0 756 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
aoqi@0 757
aoqi@0 758 Label L_loop;
aoqi@0 759 const Register end = count; // elements count; end == start+count-1
aoqi@0 760 assert_different_registers(start, end);
aoqi@0 761
aoqi@0 762 __ lea(end, Address(start, count, Address::times_ptr, -wordSize));
aoqi@0 763 __ shrptr(start, CardTableModRefBS::card_shift);
aoqi@0 764 __ shrptr(end, CardTableModRefBS::card_shift);
aoqi@0 765 __ subptr(end, start); // end --> count
aoqi@0 766 __ BIND(L_loop);
aoqi@0 767 intptr_t disp = (intptr_t) ct->byte_map_base;
aoqi@0 768 Address cardtable(start, count, Address::times_1, disp);
aoqi@0 769 __ movb(cardtable, 0);
aoqi@0 770 __ decrement(count);
aoqi@0 771 __ jcc(Assembler::greaterEqual, L_loop);
aoqi@0 772 }
aoqi@0 773 break;
aoqi@0 774 case BarrierSet::ModRef:
aoqi@0 775 break;
aoqi@0 776 default :
aoqi@0 777 ShouldNotReachHere();
aoqi@0 778
aoqi@0 779 }
aoqi@0 780 }
aoqi@0 781
aoqi@0 782
aoqi@0 783 // Copy 64 bytes chunks
aoqi@0 784 //
aoqi@0 785 // Inputs:
aoqi@0 786 // from - source array address
aoqi@0 787 // to_from - destination array address - from
aoqi@0 788 // qword_count - 8-bytes element count, negative
aoqi@0 789 //
aoqi@0 790 void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
aoqi@0 791 assert( UseSSE >= 2, "supported cpu only" );
aoqi@0 792 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
aoqi@0 793 // Copy 64-byte chunks
aoqi@0 794 __ jmpb(L_copy_64_bytes);
aoqi@0 795 __ align(OptoLoopAlignment);
aoqi@0 796 __ BIND(L_copy_64_bytes_loop);
aoqi@0 797
aoqi@0 798 if (UseUnalignedLoadStores) {
aoqi@0 799 if (UseAVX >= 2) {
aoqi@0 800 __ vmovdqu(xmm0, Address(from, 0));
aoqi@0 801 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
aoqi@0 802 __ vmovdqu(xmm1, Address(from, 32));
aoqi@0 803 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
aoqi@0 804 } else {
aoqi@0 805 __ movdqu(xmm0, Address(from, 0));
aoqi@0 806 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
aoqi@0 807 __ movdqu(xmm1, Address(from, 16));
aoqi@0 808 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
aoqi@0 809 __ movdqu(xmm2, Address(from, 32));
aoqi@0 810 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
aoqi@0 811 __ movdqu(xmm3, Address(from, 48));
aoqi@0 812 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
aoqi@0 813 }
aoqi@0 814 } else {
aoqi@0 815 __ movq(xmm0, Address(from, 0));
aoqi@0 816 __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
aoqi@0 817 __ movq(xmm1, Address(from, 8));
aoqi@0 818 __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
aoqi@0 819 __ movq(xmm2, Address(from, 16));
aoqi@0 820 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
aoqi@0 821 __ movq(xmm3, Address(from, 24));
aoqi@0 822 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
aoqi@0 823 __ movq(xmm4, Address(from, 32));
aoqi@0 824 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
aoqi@0 825 __ movq(xmm5, Address(from, 40));
aoqi@0 826 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
aoqi@0 827 __ movq(xmm6, Address(from, 48));
aoqi@0 828 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
aoqi@0 829 __ movq(xmm7, Address(from, 56));
aoqi@0 830 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
aoqi@0 831 }
aoqi@0 832
aoqi@0 833 __ addl(from, 64);
aoqi@0 834 __ BIND(L_copy_64_bytes);
aoqi@0 835 __ subl(qword_count, 8);
aoqi@0 836 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
aoqi@0 837
aoqi@0 838 if (UseUnalignedLoadStores && (UseAVX >= 2)) {
aoqi@0 839 // clean upper bits of YMM registers
kvn@7816 840 __ vpxor(xmm0, xmm0);
kvn@7816 841 __ vpxor(xmm1, xmm1);
aoqi@0 842 }
aoqi@0 843 __ addl(qword_count, 8);
aoqi@0 844 __ jccb(Assembler::zero, L_exit);
aoqi@0 845 //
aoqi@0 846 // length is too short, just copy qwords
aoqi@0 847 //
aoqi@0 848 __ BIND(L_copy_8_bytes);
aoqi@0 849 __ movq(xmm0, Address(from, 0));
aoqi@0 850 __ movq(Address(from, to_from, Address::times_1), xmm0);
aoqi@0 851 __ addl(from, 8);
aoqi@0 852 __ decrement(qword_count);
aoqi@0 853 __ jcc(Assembler::greater, L_copy_8_bytes);
aoqi@0 854 __ BIND(L_exit);
aoqi@0 855 }
aoqi@0 856
aoqi@0 857 // Copy 64 bytes chunks
aoqi@0 858 //
aoqi@0 859 // Inputs:
aoqi@0 860 // from - source array address
aoqi@0 861 // to_from - destination array address - from
aoqi@0 862 // qword_count - 8-bytes element count, negative
aoqi@0 863 //
aoqi@0 864 void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
aoqi@0 865 assert( VM_Version::supports_mmx(), "supported cpu only" );
aoqi@0 866 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
aoqi@0 867 // Copy 64-byte chunks
aoqi@0 868 __ jmpb(L_copy_64_bytes);
aoqi@0 869 __ align(OptoLoopAlignment);
aoqi@0 870 __ BIND(L_copy_64_bytes_loop);
aoqi@0 871 __ movq(mmx0, Address(from, 0));
aoqi@0 872 __ movq(mmx1, Address(from, 8));
aoqi@0 873 __ movq(mmx2, Address(from, 16));
aoqi@0 874 __ movq(Address(from, to_from, Address::times_1, 0), mmx0);
aoqi@0 875 __ movq(mmx3, Address(from, 24));
aoqi@0 876 __ movq(Address(from, to_from, Address::times_1, 8), mmx1);
aoqi@0 877 __ movq(mmx4, Address(from, 32));
aoqi@0 878 __ movq(Address(from, to_from, Address::times_1, 16), mmx2);
aoqi@0 879 __ movq(mmx5, Address(from, 40));
aoqi@0 880 __ movq(Address(from, to_from, Address::times_1, 24), mmx3);
aoqi@0 881 __ movq(mmx6, Address(from, 48));
aoqi@0 882 __ movq(Address(from, to_from, Address::times_1, 32), mmx4);
aoqi@0 883 __ movq(mmx7, Address(from, 56));
aoqi@0 884 __ movq(Address(from, to_from, Address::times_1, 40), mmx5);
aoqi@0 885 __ movq(Address(from, to_from, Address::times_1, 48), mmx6);
aoqi@0 886 __ movq(Address(from, to_from, Address::times_1, 56), mmx7);
aoqi@0 887 __ addptr(from, 64);
aoqi@0 888 __ BIND(L_copy_64_bytes);
aoqi@0 889 __ subl(qword_count, 8);
aoqi@0 890 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
aoqi@0 891 __ addl(qword_count, 8);
aoqi@0 892 __ jccb(Assembler::zero, L_exit);
aoqi@0 893 //
aoqi@0 894 // length is too short, just copy qwords
aoqi@0 895 //
aoqi@0 896 __ BIND(L_copy_8_bytes);
aoqi@0 897 __ movq(mmx0, Address(from, 0));
aoqi@0 898 __ movq(Address(from, to_from, Address::times_1), mmx0);
aoqi@0 899 __ addptr(from, 8);
aoqi@0 900 __ decrement(qword_count);
aoqi@0 901 __ jcc(Assembler::greater, L_copy_8_bytes);
aoqi@0 902 __ BIND(L_exit);
aoqi@0 903 __ emms();
aoqi@0 904 }
aoqi@0 905
aoqi@0 906 address generate_disjoint_copy(BasicType t, bool aligned,
aoqi@0 907 Address::ScaleFactor sf,
aoqi@0 908 address* entry, const char *name,
aoqi@0 909 bool dest_uninitialized = false) {
aoqi@0 910 __ align(CodeEntryAlignment);
aoqi@0 911 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 912 address start = __ pc();
aoqi@0 913
aoqi@0 914 Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
aoqi@0 915 Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes;
aoqi@0 916
aoqi@0 917 int shift = Address::times_ptr - sf;
aoqi@0 918
aoqi@0 919 const Register from = rsi; // source array address
aoqi@0 920 const Register to = rdi; // destination array address
aoqi@0 921 const Register count = rcx; // elements count
aoqi@0 922 const Register to_from = to; // (to - from)
aoqi@0 923 const Register saved_to = rdx; // saved destination array address
aoqi@0 924
aoqi@0 925 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 926 __ push(rsi);
aoqi@0 927 __ push(rdi);
aoqi@0 928 __ movptr(from , Address(rsp, 12+ 4));
aoqi@0 929 __ movptr(to , Address(rsp, 12+ 8));
aoqi@0 930 __ movl(count, Address(rsp, 12+ 12));
aoqi@0 931
aoqi@0 932 if (entry != NULL) {
aoqi@0 933 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
aoqi@0 934 BLOCK_COMMENT("Entry:");
aoqi@0 935 }
aoqi@0 936
aoqi@0 937 if (t == T_OBJECT) {
aoqi@0 938 __ testl(count, count);
aoqi@0 939 __ jcc(Assembler::zero, L_0_count);
aoqi@0 940 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
aoqi@0 941 __ mov(saved_to, to); // save 'to'
aoqi@0 942 }
aoqi@0 943
aoqi@0 944 __ subptr(to, from); // to --> to_from
aoqi@0 945 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
aoqi@0 946 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
aoqi@0 947 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
aoqi@0 948 // align source address at 4 bytes address boundary
aoqi@0 949 if (t == T_BYTE) {
aoqi@0 950 // One byte misalignment happens only for byte arrays
aoqi@0 951 __ testl(from, 1);
aoqi@0 952 __ jccb(Assembler::zero, L_skip_align1);
aoqi@0 953 __ movb(rax, Address(from, 0));
aoqi@0 954 __ movb(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 955 __ increment(from);
aoqi@0 956 __ decrement(count);
aoqi@0 957 __ BIND(L_skip_align1);
aoqi@0 958 }
aoqi@0 959 // Two bytes misalignment happens only for byte and short (char) arrays
aoqi@0 960 __ testl(from, 2);
aoqi@0 961 __ jccb(Assembler::zero, L_skip_align2);
aoqi@0 962 __ movw(rax, Address(from, 0));
aoqi@0 963 __ movw(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 964 __ addptr(from, 2);
aoqi@0 965 __ subl(count, 1<<(shift-1));
aoqi@0 966 __ BIND(L_skip_align2);
aoqi@0 967 }
aoqi@0 968 if (!VM_Version::supports_mmx()) {
aoqi@0 969 __ mov(rax, count); // save 'count'
aoqi@0 970 __ shrl(count, shift); // bytes count
aoqi@0 971 __ addptr(to_from, from);// restore 'to'
aoqi@0 972 __ rep_mov();
aoqi@0 973 __ subptr(to_from, from);// restore 'to_from'
aoqi@0 974 __ mov(count, rax); // restore 'count'
aoqi@0 975 __ jmpb(L_copy_2_bytes); // all dwords were copied
aoqi@0 976 } else {
aoqi@0 977 if (!UseUnalignedLoadStores) {
aoqi@0 978 // align to 8 bytes, we know we are 4 byte aligned to start
aoqi@0 979 __ testptr(from, 4);
aoqi@0 980 __ jccb(Assembler::zero, L_copy_64_bytes);
aoqi@0 981 __ movl(rax, Address(from, 0));
aoqi@0 982 __ movl(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 983 __ addptr(from, 4);
aoqi@0 984 __ subl(count, 1<<shift);
aoqi@0 985 }
aoqi@0 986 __ BIND(L_copy_64_bytes);
aoqi@0 987 __ mov(rax, count);
aoqi@0 988 __ shrl(rax, shift+1); // 8 bytes chunk count
aoqi@0 989 //
aoqi@0 990 // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
aoqi@0 991 //
aoqi@0 992 if (UseXMMForArrayCopy) {
aoqi@0 993 xmm_copy_forward(from, to_from, rax);
aoqi@0 994 } else {
aoqi@0 995 mmx_copy_forward(from, to_from, rax);
aoqi@0 996 }
aoqi@0 997 }
aoqi@0 998 // copy tailing dword
aoqi@0 999 __ BIND(L_copy_4_bytes);
aoqi@0 1000 __ testl(count, 1<<shift);
aoqi@0 1001 __ jccb(Assembler::zero, L_copy_2_bytes);
aoqi@0 1002 __ movl(rax, Address(from, 0));
aoqi@0 1003 __ movl(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 1004 if (t == T_BYTE || t == T_SHORT) {
aoqi@0 1005 __ addptr(from, 4);
aoqi@0 1006 __ BIND(L_copy_2_bytes);
aoqi@0 1007 // copy tailing word
aoqi@0 1008 __ testl(count, 1<<(shift-1));
aoqi@0 1009 __ jccb(Assembler::zero, L_copy_byte);
aoqi@0 1010 __ movw(rax, Address(from, 0));
aoqi@0 1011 __ movw(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 1012 if (t == T_BYTE) {
aoqi@0 1013 __ addptr(from, 2);
aoqi@0 1014 __ BIND(L_copy_byte);
aoqi@0 1015 // copy tailing byte
aoqi@0 1016 __ testl(count, 1);
aoqi@0 1017 __ jccb(Assembler::zero, L_exit);
aoqi@0 1018 __ movb(rax, Address(from, 0));
aoqi@0 1019 __ movb(Address(from, to_from, Address::times_1, 0), rax);
aoqi@0 1020 __ BIND(L_exit);
aoqi@0 1021 } else {
aoqi@0 1022 __ BIND(L_copy_byte);
aoqi@0 1023 }
aoqi@0 1024 } else {
aoqi@0 1025 __ BIND(L_copy_2_bytes);
aoqi@0 1026 }
aoqi@0 1027
aoqi@0 1028 if (t == T_OBJECT) {
aoqi@0 1029 __ movl(count, Address(rsp, 12+12)); // reread 'count'
aoqi@0 1030 __ mov(to, saved_to); // restore 'to'
aoqi@0 1031 gen_write_ref_array_post_barrier(to, count);
aoqi@0 1032 __ BIND(L_0_count);
aoqi@0 1033 }
aoqi@0 1034 inc_copy_counter_np(t);
aoqi@0 1035 __ pop(rdi);
aoqi@0 1036 __ pop(rsi);
aoqi@0 1037 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1038 __ xorptr(rax, rax); // return 0
aoqi@0 1039 __ ret(0);
aoqi@0 1040 return start;
aoqi@0 1041 }
aoqi@0 1042
aoqi@0 1043
aoqi@0 1044 address generate_fill(BasicType t, bool aligned, const char *name) {
aoqi@0 1045 __ align(CodeEntryAlignment);
aoqi@0 1046 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1047 address start = __ pc();
aoqi@0 1048
aoqi@0 1049 BLOCK_COMMENT("Entry:");
aoqi@0 1050
aoqi@0 1051 const Register to = rdi; // source array address
aoqi@0 1052 const Register value = rdx; // value
aoqi@0 1053 const Register count = rsi; // elements count
aoqi@0 1054
aoqi@0 1055 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1056 __ push(rsi);
aoqi@0 1057 __ push(rdi);
aoqi@0 1058 __ movptr(to , Address(rsp, 12+ 4));
aoqi@0 1059 __ movl(value, Address(rsp, 12+ 8));
aoqi@0 1060 __ movl(count, Address(rsp, 12+ 12));
aoqi@0 1061
aoqi@0 1062 __ generate_fill(t, aligned, to, value, count, rax, xmm0);
aoqi@0 1063
aoqi@0 1064 __ pop(rdi);
aoqi@0 1065 __ pop(rsi);
aoqi@0 1066 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1067 __ ret(0);
aoqi@0 1068 return start;
aoqi@0 1069 }
aoqi@0 1070
aoqi@0 1071 address generate_conjoint_copy(BasicType t, bool aligned,
aoqi@0 1072 Address::ScaleFactor sf,
aoqi@0 1073 address nooverlap_target,
aoqi@0 1074 address* entry, const char *name,
aoqi@0 1075 bool dest_uninitialized = false) {
aoqi@0 1076 __ align(CodeEntryAlignment);
aoqi@0 1077 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1078 address start = __ pc();
aoqi@0 1079
aoqi@0 1080 Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
aoqi@0 1081 Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop;
aoqi@0 1082
aoqi@0 1083 int shift = Address::times_ptr - sf;
aoqi@0 1084
aoqi@0 1085 const Register src = rax; // source array address
aoqi@0 1086 const Register dst = rdx; // destination array address
aoqi@0 1087 const Register from = rsi; // source array address
aoqi@0 1088 const Register to = rdi; // destination array address
aoqi@0 1089 const Register count = rcx; // elements count
aoqi@0 1090 const Register end = rax; // array end address
aoqi@0 1091
aoqi@0 1092 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1093 __ push(rsi);
aoqi@0 1094 __ push(rdi);
aoqi@0 1095 __ movptr(src , Address(rsp, 12+ 4)); // from
aoqi@0 1096 __ movptr(dst , Address(rsp, 12+ 8)); // to
aoqi@0 1097 __ movl2ptr(count, Address(rsp, 12+12)); // count
aoqi@0 1098
aoqi@0 1099 if (entry != NULL) {
aoqi@0 1100 *entry = __ pc(); // Entry point from generic arraycopy stub.
aoqi@0 1101 BLOCK_COMMENT("Entry:");
aoqi@0 1102 }
aoqi@0 1103
aoqi@0 1104 // nooverlap_target expects arguments in rsi and rdi.
aoqi@0 1105 __ mov(from, src);
aoqi@0 1106 __ mov(to , dst);
aoqi@0 1107
aoqi@0 1108 // arrays overlap test: dispatch to disjoint stub if necessary.
aoqi@0 1109 RuntimeAddress nooverlap(nooverlap_target);
aoqi@0 1110 __ cmpptr(dst, src);
aoqi@0 1111 __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
aoqi@0 1112 __ jump_cc(Assembler::belowEqual, nooverlap);
aoqi@0 1113 __ cmpptr(dst, end);
aoqi@0 1114 __ jump_cc(Assembler::aboveEqual, nooverlap);
aoqi@0 1115
aoqi@0 1116 if (t == T_OBJECT) {
aoqi@0 1117 __ testl(count, count);
aoqi@0 1118 __ jcc(Assembler::zero, L_0_count);
aoqi@0 1119 gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized);
aoqi@0 1120 }
aoqi@0 1121
aoqi@0 1122 // copy from high to low
aoqi@0 1123 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
aoqi@0 1124 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
aoqi@0 1125 if (t == T_BYTE || t == T_SHORT) {
aoqi@0 1126 // Align the end of destination array at 4 bytes address boundary
aoqi@0 1127 __ lea(end, Address(dst, count, sf, 0));
aoqi@0 1128 if (t == T_BYTE) {
aoqi@0 1129 // One byte misalignment happens only for byte arrays
aoqi@0 1130 __ testl(end, 1);
aoqi@0 1131 __ jccb(Assembler::zero, L_skip_align1);
aoqi@0 1132 __ decrement(count);
aoqi@0 1133 __ movb(rdx, Address(from, count, sf, 0));
aoqi@0 1134 __ movb(Address(to, count, sf, 0), rdx);
aoqi@0 1135 __ BIND(L_skip_align1);
aoqi@0 1136 }
aoqi@0 1137 // Two bytes misalignment happens only for byte and short (char) arrays
aoqi@0 1138 __ testl(end, 2);
aoqi@0 1139 __ jccb(Assembler::zero, L_skip_align2);
aoqi@0 1140 __ subptr(count, 1<<(shift-1));
aoqi@0 1141 __ movw(rdx, Address(from, count, sf, 0));
aoqi@0 1142 __ movw(Address(to, count, sf, 0), rdx);
aoqi@0 1143 __ BIND(L_skip_align2);
aoqi@0 1144 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
aoqi@0 1145 __ jcc(Assembler::below, L_copy_4_bytes);
aoqi@0 1146 }
aoqi@0 1147
aoqi@0 1148 if (!VM_Version::supports_mmx()) {
aoqi@0 1149 __ std();
aoqi@0 1150 __ mov(rax, count); // Save 'count'
aoqi@0 1151 __ mov(rdx, to); // Save 'to'
aoqi@0 1152 __ lea(rsi, Address(from, count, sf, -4));
aoqi@0 1153 __ lea(rdi, Address(to , count, sf, -4));
aoqi@0 1154 __ shrptr(count, shift); // bytes count
aoqi@0 1155 __ rep_mov();
aoqi@0 1156 __ cld();
aoqi@0 1157 __ mov(count, rax); // restore 'count'
aoqi@0 1158 __ andl(count, (1<<shift)-1); // mask the number of rest elements
aoqi@0 1159 __ movptr(from, Address(rsp, 12+4)); // reread 'from'
aoqi@0 1160 __ mov(to, rdx); // restore 'to'
aoqi@0 1161 __ jmpb(L_copy_2_bytes); // all dword were copied
aoqi@0 1162 } else {
aoqi@0 1163 // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
aoqi@0 1164 __ testptr(end, 4);
aoqi@0 1165 __ jccb(Assembler::zero, L_copy_8_bytes);
aoqi@0 1166 __ subl(count, 1<<shift);
aoqi@0 1167 __ movl(rdx, Address(from, count, sf, 0));
aoqi@0 1168 __ movl(Address(to, count, sf, 0), rdx);
aoqi@0 1169 __ jmpb(L_copy_8_bytes);
aoqi@0 1170
aoqi@0 1171 __ align(OptoLoopAlignment);
aoqi@0 1172 // Move 8 bytes
aoqi@0 1173 __ BIND(L_copy_8_bytes_loop);
aoqi@0 1174 if (UseXMMForArrayCopy) {
aoqi@0 1175 __ movq(xmm0, Address(from, count, sf, 0));
aoqi@0 1176 __ movq(Address(to, count, sf, 0), xmm0);
aoqi@0 1177 } else {
aoqi@0 1178 __ movq(mmx0, Address(from, count, sf, 0));
aoqi@0 1179 __ movq(Address(to, count, sf, 0), mmx0);
aoqi@0 1180 }
aoqi@0 1181 __ BIND(L_copy_8_bytes);
aoqi@0 1182 __ subl(count, 2<<shift);
aoqi@0 1183 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
aoqi@0 1184 __ addl(count, 2<<shift);
aoqi@0 1185 if (!UseXMMForArrayCopy) {
aoqi@0 1186 __ emms();
aoqi@0 1187 }
aoqi@0 1188 }
aoqi@0 1189 __ BIND(L_copy_4_bytes);
aoqi@0 1190 // copy prefix qword
aoqi@0 1191 __ testl(count, 1<<shift);
aoqi@0 1192 __ jccb(Assembler::zero, L_copy_2_bytes);
aoqi@0 1193 __ movl(rdx, Address(from, count, sf, -4));
aoqi@0 1194 __ movl(Address(to, count, sf, -4), rdx);
aoqi@0 1195
aoqi@0 1196 if (t == T_BYTE || t == T_SHORT) {
aoqi@0 1197 __ subl(count, (1<<shift));
aoqi@0 1198 __ BIND(L_copy_2_bytes);
aoqi@0 1199 // copy prefix dword
aoqi@0 1200 __ testl(count, 1<<(shift-1));
aoqi@0 1201 __ jccb(Assembler::zero, L_copy_byte);
aoqi@0 1202 __ movw(rdx, Address(from, count, sf, -2));
aoqi@0 1203 __ movw(Address(to, count, sf, -2), rdx);
aoqi@0 1204 if (t == T_BYTE) {
aoqi@0 1205 __ subl(count, 1<<(shift-1));
aoqi@0 1206 __ BIND(L_copy_byte);
aoqi@0 1207 // copy prefix byte
aoqi@0 1208 __ testl(count, 1);
aoqi@0 1209 __ jccb(Assembler::zero, L_exit);
aoqi@0 1210 __ movb(rdx, Address(from, 0));
aoqi@0 1211 __ movb(Address(to, 0), rdx);
aoqi@0 1212 __ BIND(L_exit);
aoqi@0 1213 } else {
aoqi@0 1214 __ BIND(L_copy_byte);
aoqi@0 1215 }
aoqi@0 1216 } else {
aoqi@0 1217 __ BIND(L_copy_2_bytes);
aoqi@0 1218 }
aoqi@0 1219 if (t == T_OBJECT) {
aoqi@0 1220 __ movl2ptr(count, Address(rsp, 12+12)); // reread count
aoqi@0 1221 gen_write_ref_array_post_barrier(to, count);
aoqi@0 1222 __ BIND(L_0_count);
aoqi@0 1223 }
aoqi@0 1224 inc_copy_counter_np(t);
aoqi@0 1225 __ pop(rdi);
aoqi@0 1226 __ pop(rsi);
aoqi@0 1227 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1228 __ xorptr(rax, rax); // return 0
aoqi@0 1229 __ ret(0);
aoqi@0 1230 return start;
aoqi@0 1231 }
aoqi@0 1232
aoqi@0 1233
aoqi@0 1234 address generate_disjoint_long_copy(address* entry, const char *name) {
aoqi@0 1235 __ align(CodeEntryAlignment);
aoqi@0 1236 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1237 address start = __ pc();
aoqi@0 1238
aoqi@0 1239 Label L_copy_8_bytes, L_copy_8_bytes_loop;
aoqi@0 1240 const Register from = rax; // source array address
aoqi@0 1241 const Register to = rdx; // destination array address
aoqi@0 1242 const Register count = rcx; // elements count
aoqi@0 1243 const Register to_from = rdx; // (to - from)
aoqi@0 1244
aoqi@0 1245 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1246 __ movptr(from , Address(rsp, 8+0)); // from
aoqi@0 1247 __ movptr(to , Address(rsp, 8+4)); // to
aoqi@0 1248 __ movl2ptr(count, Address(rsp, 8+8)); // count
aoqi@0 1249
aoqi@0 1250 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
aoqi@0 1251 BLOCK_COMMENT("Entry:");
aoqi@0 1252
aoqi@0 1253 __ subptr(to, from); // to --> to_from
aoqi@0 1254 if (VM_Version::supports_mmx()) {
aoqi@0 1255 if (UseXMMForArrayCopy) {
aoqi@0 1256 xmm_copy_forward(from, to_from, count);
aoqi@0 1257 } else {
aoqi@0 1258 mmx_copy_forward(from, to_from, count);
aoqi@0 1259 }
aoqi@0 1260 } else {
aoqi@0 1261 __ jmpb(L_copy_8_bytes);
aoqi@0 1262 __ align(OptoLoopAlignment);
aoqi@0 1263 __ BIND(L_copy_8_bytes_loop);
aoqi@0 1264 __ fild_d(Address(from, 0));
aoqi@0 1265 __ fistp_d(Address(from, to_from, Address::times_1));
aoqi@0 1266 __ addptr(from, 8);
aoqi@0 1267 __ BIND(L_copy_8_bytes);
aoqi@0 1268 __ decrement(count);
aoqi@0 1269 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
aoqi@0 1270 }
aoqi@0 1271 inc_copy_counter_np(T_LONG);
aoqi@0 1272 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1273 __ xorptr(rax, rax); // return 0
aoqi@0 1274 __ ret(0);
aoqi@0 1275 return start;
aoqi@0 1276 }
aoqi@0 1277
aoqi@0 1278 address generate_conjoint_long_copy(address nooverlap_target,
aoqi@0 1279 address* entry, const char *name) {
aoqi@0 1280 __ align(CodeEntryAlignment);
aoqi@0 1281 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1282 address start = __ pc();
aoqi@0 1283
aoqi@0 1284 Label L_copy_8_bytes, L_copy_8_bytes_loop;
aoqi@0 1285 const Register from = rax; // source array address
aoqi@0 1286 const Register to = rdx; // destination array address
aoqi@0 1287 const Register count = rcx; // elements count
aoqi@0 1288 const Register end_from = rax; // source array end address
aoqi@0 1289
aoqi@0 1290 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1291 __ movptr(from , Address(rsp, 8+0)); // from
aoqi@0 1292 __ movptr(to , Address(rsp, 8+4)); // to
aoqi@0 1293 __ movl2ptr(count, Address(rsp, 8+8)); // count
aoqi@0 1294
aoqi@0 1295 *entry = __ pc(); // Entry point from generic arraycopy stub.
aoqi@0 1296 BLOCK_COMMENT("Entry:");
aoqi@0 1297
aoqi@0 1298 // arrays overlap test
aoqi@0 1299 __ cmpptr(to, from);
aoqi@0 1300 RuntimeAddress nooverlap(nooverlap_target);
aoqi@0 1301 __ jump_cc(Assembler::belowEqual, nooverlap);
aoqi@0 1302 __ lea(end_from, Address(from, count, Address::times_8, 0));
aoqi@0 1303 __ cmpptr(to, end_from);
aoqi@0 1304 __ movptr(from, Address(rsp, 8)); // from
aoqi@0 1305 __ jump_cc(Assembler::aboveEqual, nooverlap);
aoqi@0 1306
aoqi@0 1307 __ jmpb(L_copy_8_bytes);
aoqi@0 1308
aoqi@0 1309 __ align(OptoLoopAlignment);
aoqi@0 1310 __ BIND(L_copy_8_bytes_loop);
aoqi@0 1311 if (VM_Version::supports_mmx()) {
aoqi@0 1312 if (UseXMMForArrayCopy) {
aoqi@0 1313 __ movq(xmm0, Address(from, count, Address::times_8));
aoqi@0 1314 __ movq(Address(to, count, Address::times_8), xmm0);
aoqi@0 1315 } else {
aoqi@0 1316 __ movq(mmx0, Address(from, count, Address::times_8));
aoqi@0 1317 __ movq(Address(to, count, Address::times_8), mmx0);
aoqi@0 1318 }
aoqi@0 1319 } else {
aoqi@0 1320 __ fild_d(Address(from, count, Address::times_8));
aoqi@0 1321 __ fistp_d(Address(to, count, Address::times_8));
aoqi@0 1322 }
aoqi@0 1323 __ BIND(L_copy_8_bytes);
aoqi@0 1324 __ decrement(count);
aoqi@0 1325 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
aoqi@0 1326
aoqi@0 1327 if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
aoqi@0 1328 __ emms();
aoqi@0 1329 }
aoqi@0 1330 inc_copy_counter_np(T_LONG);
aoqi@0 1331 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1332 __ xorptr(rax, rax); // return 0
aoqi@0 1333 __ ret(0);
aoqi@0 1334 return start;
aoqi@0 1335 }
aoqi@0 1336
aoqi@0 1337
aoqi@0 1338 // Helper for generating a dynamic type check.
aoqi@0 1339 // The sub_klass must be one of {rbx, rdx, rsi}.
aoqi@0 1340 // The temp is killed.
aoqi@0 1341 void generate_type_check(Register sub_klass,
aoqi@0 1342 Address& super_check_offset_addr,
aoqi@0 1343 Address& super_klass_addr,
aoqi@0 1344 Register temp,
aoqi@0 1345 Label* L_success, Label* L_failure) {
aoqi@0 1346 BLOCK_COMMENT("type_check:");
aoqi@0 1347
aoqi@0 1348 Label L_fallthrough;
aoqi@0 1349 #define LOCAL_JCC(assembler_con, label_ptr) \
aoqi@0 1350 if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \
aoqi@0 1351 else __ jcc(assembler_con, L_fallthrough) /*omit semi*/
aoqi@0 1352
aoqi@0 1353 // The following is a strange variation of the fast path which requires
aoqi@0 1354 // one less register, because needed values are on the argument stack.
aoqi@0 1355 // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
aoqi@0 1356 // L_success, L_failure, NULL);
aoqi@0 1357 assert_different_registers(sub_klass, temp);
aoqi@0 1358
aoqi@0 1359 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
aoqi@0 1360
aoqi@0 1361 // if the pointers are equal, we are done (e.g., String[] elements)
aoqi@0 1362 __ cmpptr(sub_klass, super_klass_addr);
aoqi@0 1363 LOCAL_JCC(Assembler::equal, L_success);
aoqi@0 1364
aoqi@0 1365 // check the supertype display:
aoqi@0 1366 __ movl2ptr(temp, super_check_offset_addr);
aoqi@0 1367 Address super_check_addr(sub_klass, temp, Address::times_1, 0);
aoqi@0 1368 __ movptr(temp, super_check_addr); // load displayed supertype
aoqi@0 1369 __ cmpptr(temp, super_klass_addr); // test the super type
aoqi@0 1370 LOCAL_JCC(Assembler::equal, L_success);
aoqi@0 1371
aoqi@0 1372 // if it was a primary super, we can just fail immediately
aoqi@0 1373 __ cmpl(super_check_offset_addr, sc_offset);
aoqi@0 1374 LOCAL_JCC(Assembler::notEqual, L_failure);
aoqi@0 1375
aoqi@0 1376 // The repne_scan instruction uses fixed registers, which will get spilled.
aoqi@0 1377 // We happen to know this works best when super_klass is in rax.
aoqi@0 1378 Register super_klass = temp;
aoqi@0 1379 __ movptr(super_klass, super_klass_addr);
aoqi@0 1380 __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
aoqi@0 1381 L_success, L_failure);
aoqi@0 1382
aoqi@0 1383 __ bind(L_fallthrough);
aoqi@0 1384
aoqi@0 1385 if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
aoqi@0 1386 if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
aoqi@0 1387
aoqi@0 1388 #undef LOCAL_JCC
aoqi@0 1389 }
aoqi@0 1390
aoqi@0 1391 //
aoqi@0 1392 // Generate checkcasting array copy stub
aoqi@0 1393 //
aoqi@0 1394 // Input:
aoqi@0 1395 // 4(rsp) - source array address
aoqi@0 1396 // 8(rsp) - destination array address
aoqi@0 1397 // 12(rsp) - element count, can be zero
aoqi@0 1398 // 16(rsp) - size_t ckoff (super_check_offset)
aoqi@0 1399 // 20(rsp) - oop ckval (super_klass)
aoqi@0 1400 //
aoqi@0 1401 // Output:
aoqi@0 1402 // rax, == 0 - success
aoqi@0 1403 // rax, == -1^K - failure, where K is partial transfer count
aoqi@0 1404 //
aoqi@0 1405 address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
aoqi@0 1406 __ align(CodeEntryAlignment);
aoqi@0 1407 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1408 address start = __ pc();
aoqi@0 1409
aoqi@0 1410 Label L_load_element, L_store_element, L_do_card_marks, L_done;
aoqi@0 1411
aoqi@0 1412 // register use:
aoqi@0 1413 // rax, rdx, rcx -- loop control (end_from, end_to, count)
aoqi@0 1414 // rdi, rsi -- element access (oop, klass)
aoqi@0 1415 // rbx, -- temp
aoqi@0 1416 const Register from = rax; // source array address
aoqi@0 1417 const Register to = rdx; // destination array address
aoqi@0 1418 const Register length = rcx; // elements count
aoqi@0 1419 const Register elem = rdi; // each oop copied
aoqi@0 1420 const Register elem_klass = rsi; // each elem._klass (sub_klass)
aoqi@0 1421 const Register temp = rbx; // lone remaining temp
aoqi@0 1422
aoqi@0 1423 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1424
aoqi@0 1425 __ push(rsi);
aoqi@0 1426 __ push(rdi);
aoqi@0 1427 __ push(rbx);
aoqi@0 1428
aoqi@0 1429 Address from_arg(rsp, 16+ 4); // from
aoqi@0 1430 Address to_arg(rsp, 16+ 8); // to
aoqi@0 1431 Address length_arg(rsp, 16+12); // elements count
aoqi@0 1432 Address ckoff_arg(rsp, 16+16); // super_check_offset
aoqi@0 1433 Address ckval_arg(rsp, 16+20); // super_klass
aoqi@0 1434
aoqi@0 1435 // Load up:
aoqi@0 1436 __ movptr(from, from_arg);
aoqi@0 1437 __ movptr(to, to_arg);
aoqi@0 1438 __ movl2ptr(length, length_arg);
aoqi@0 1439
aoqi@0 1440 if (entry != NULL) {
aoqi@0 1441 *entry = __ pc(); // Entry point from generic arraycopy stub.
aoqi@0 1442 BLOCK_COMMENT("Entry:");
aoqi@0 1443 }
aoqi@0 1444
aoqi@0 1445 //---------------------------------------------------------------
aoqi@0 1446 // Assembler stub will be used for this call to arraycopy
aoqi@0 1447 // if the two arrays are subtypes of Object[] but the
aoqi@0 1448 // destination array type is not equal to or a supertype
aoqi@0 1449 // of the source type. Each element must be separately
aoqi@0 1450 // checked.
aoqi@0 1451
aoqi@0 1452 // Loop-invariant addresses. They are exclusive end pointers.
aoqi@0 1453 Address end_from_addr(from, length, Address::times_ptr, 0);
aoqi@0 1454 Address end_to_addr(to, length, Address::times_ptr, 0);
aoqi@0 1455
aoqi@0 1456 Register end_from = from; // re-use
aoqi@0 1457 Register end_to = to; // re-use
aoqi@0 1458 Register count = length; // re-use
aoqi@0 1459
aoqi@0 1460 // Loop-variant addresses. They assume post-incremented count < 0.
aoqi@0 1461 Address from_element_addr(end_from, count, Address::times_ptr, 0);
aoqi@0 1462 Address to_element_addr(end_to, count, Address::times_ptr, 0);
aoqi@0 1463 Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
aoqi@0 1464
aoqi@0 1465 // Copy from low to high addresses, indexed from the end of each array.
aoqi@0 1466 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
aoqi@0 1467 __ lea(end_from, end_from_addr);
aoqi@0 1468 __ lea(end_to, end_to_addr);
aoqi@0 1469 assert(length == count, ""); // else fix next line:
aoqi@0 1470 __ negptr(count); // negate and test the length
aoqi@0 1471 __ jccb(Assembler::notZero, L_load_element);
aoqi@0 1472
aoqi@0 1473 // Empty array: Nothing to do.
aoqi@0 1474 __ xorptr(rax, rax); // return 0 on (trivial) success
aoqi@0 1475 __ jmp(L_done);
aoqi@0 1476
aoqi@0 1477 // ======== begin loop ========
aoqi@0 1478 // (Loop is rotated; its entry is L_load_element.)
aoqi@0 1479 // Loop control:
aoqi@0 1480 // for (count = -count; count != 0; count++)
aoqi@0 1481 // Base pointers src, dst are biased by 8*count,to last element.
aoqi@0 1482 __ align(OptoLoopAlignment);
aoqi@0 1483
aoqi@0 1484 __ BIND(L_store_element);
aoqi@0 1485 __ movptr(to_element_addr, elem); // store the oop
aoqi@0 1486 __ increment(count); // increment the count toward zero
aoqi@0 1487 __ jccb(Assembler::zero, L_do_card_marks);
aoqi@0 1488
aoqi@0 1489 // ======== loop entry is here ========
aoqi@0 1490 __ BIND(L_load_element);
aoqi@0 1491 __ movptr(elem, from_element_addr); // load the oop
aoqi@0 1492 __ testptr(elem, elem);
aoqi@0 1493 __ jccb(Assembler::zero, L_store_element);
aoqi@0 1494
aoqi@0 1495 // (Could do a trick here: Remember last successful non-null
aoqi@0 1496 // element stored and make a quick oop equality check on it.)
aoqi@0 1497
aoqi@0 1498 __ movptr(elem_klass, elem_klass_addr); // query the object klass
aoqi@0 1499 generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
aoqi@0 1500 &L_store_element, NULL);
aoqi@0 1501 // (On fall-through, we have failed the element type check.)
aoqi@0 1502 // ======== end loop ========
aoqi@0 1503
aoqi@0 1504 // It was a real error; we must depend on the caller to finish the job.
aoqi@0 1505 // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
aoqi@0 1506 // Emit GC store barriers for the oops we have copied (length_arg + count),
aoqi@0 1507 // and report their number to the caller.
aoqi@0 1508 assert_different_registers(to, count, rax);
aoqi@0 1509 Label L_post_barrier;
aoqi@0 1510 __ addl(count, length_arg); // transfers = (length - remaining)
aoqi@0 1511 __ movl2ptr(rax, count); // save the value
aoqi@0 1512 __ notptr(rax); // report (-1^K) to caller (does not affect flags)
aoqi@0 1513 __ jccb(Assembler::notZero, L_post_barrier);
aoqi@0 1514 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
aoqi@0 1515
aoqi@0 1516 // Come here on success only.
aoqi@0 1517 __ BIND(L_do_card_marks);
aoqi@0 1518 __ xorptr(rax, rax); // return 0 on success
aoqi@0 1519 __ movl2ptr(count, length_arg);
aoqi@0 1520
aoqi@0 1521 __ BIND(L_post_barrier);
aoqi@0 1522 __ movptr(to, to_arg); // reload
aoqi@0 1523 gen_write_ref_array_post_barrier(to, count);
aoqi@0 1524
aoqi@0 1525 // Common exit point (success or failure).
aoqi@0 1526 __ BIND(L_done);
aoqi@0 1527 __ pop(rbx);
aoqi@0 1528 __ pop(rdi);
aoqi@0 1529 __ pop(rsi);
aoqi@0 1530 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
aoqi@0 1531 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1532 __ ret(0);
aoqi@0 1533
aoqi@0 1534 return start;
aoqi@0 1535 }
aoqi@0 1536
aoqi@0 1537 //
aoqi@0 1538 // Generate 'unsafe' array copy stub
aoqi@0 1539 // Though just as safe as the other stubs, it takes an unscaled
aoqi@0 1540 // size_t argument instead of an element count.
aoqi@0 1541 //
aoqi@0 1542 // Input:
aoqi@0 1543 // 4(rsp) - source array address
aoqi@0 1544 // 8(rsp) - destination array address
aoqi@0 1545 // 12(rsp) - byte count, can be zero
aoqi@0 1546 //
aoqi@0 1547 // Output:
aoqi@0 1548 // rax, == 0 - success
aoqi@0 1549 // rax, == -1 - need to call System.arraycopy
aoqi@0 1550 //
aoqi@0 1551 // Examines the alignment of the operands and dispatches
aoqi@0 1552 // to a long, int, short, or byte copy loop.
aoqi@0 1553 //
aoqi@0 1554 address generate_unsafe_copy(const char *name,
aoqi@0 1555 address byte_copy_entry,
aoqi@0 1556 address short_copy_entry,
aoqi@0 1557 address int_copy_entry,
aoqi@0 1558 address long_copy_entry) {
aoqi@0 1559
aoqi@0 1560 Label L_long_aligned, L_int_aligned, L_short_aligned;
aoqi@0 1561
aoqi@0 1562 __ align(CodeEntryAlignment);
aoqi@0 1563 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1564 address start = __ pc();
aoqi@0 1565
aoqi@0 1566 const Register from = rax; // source array address
aoqi@0 1567 const Register to = rdx; // destination array address
aoqi@0 1568 const Register count = rcx; // elements count
aoqi@0 1569
aoqi@0 1570 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1571 __ push(rsi);
aoqi@0 1572 __ push(rdi);
aoqi@0 1573 Address from_arg(rsp, 12+ 4); // from
aoqi@0 1574 Address to_arg(rsp, 12+ 8); // to
aoqi@0 1575 Address count_arg(rsp, 12+12); // byte count
aoqi@0 1576
aoqi@0 1577 // Load up:
aoqi@0 1578 __ movptr(from , from_arg);
aoqi@0 1579 __ movptr(to , to_arg);
aoqi@0 1580 __ movl2ptr(count, count_arg);
aoqi@0 1581
aoqi@0 1582 // bump this on entry, not on exit:
aoqi@0 1583 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
aoqi@0 1584
aoqi@0 1585 const Register bits = rsi;
aoqi@0 1586 __ mov(bits, from);
aoqi@0 1587 __ orptr(bits, to);
aoqi@0 1588 __ orptr(bits, count);
aoqi@0 1589
aoqi@0 1590 __ testl(bits, BytesPerLong-1);
aoqi@0 1591 __ jccb(Assembler::zero, L_long_aligned);
aoqi@0 1592
aoqi@0 1593 __ testl(bits, BytesPerInt-1);
aoqi@0 1594 __ jccb(Assembler::zero, L_int_aligned);
aoqi@0 1595
aoqi@0 1596 __ testl(bits, BytesPerShort-1);
aoqi@0 1597 __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
aoqi@0 1598
aoqi@0 1599 __ BIND(L_short_aligned);
aoqi@0 1600 __ shrptr(count, LogBytesPerShort); // size => short_count
aoqi@0 1601 __ movl(count_arg, count); // update 'count'
aoqi@0 1602 __ jump(RuntimeAddress(short_copy_entry));
aoqi@0 1603
aoqi@0 1604 __ BIND(L_int_aligned);
aoqi@0 1605 __ shrptr(count, LogBytesPerInt); // size => int_count
aoqi@0 1606 __ movl(count_arg, count); // update 'count'
aoqi@0 1607 __ jump(RuntimeAddress(int_copy_entry));
aoqi@0 1608
aoqi@0 1609 __ BIND(L_long_aligned);
aoqi@0 1610 __ shrptr(count, LogBytesPerLong); // size => qword_count
aoqi@0 1611 __ movl(count_arg, count); // update 'count'
aoqi@0 1612 __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
aoqi@0 1613 __ pop(rsi);
aoqi@0 1614 __ jump(RuntimeAddress(long_copy_entry));
aoqi@0 1615
aoqi@0 1616 return start;
aoqi@0 1617 }
aoqi@0 1618
aoqi@0 1619
aoqi@0 1620 // Perform range checks on the proposed arraycopy.
aoqi@0 1621 // Smashes src_pos and dst_pos. (Uses them up for temps.)
aoqi@0 1622 void arraycopy_range_checks(Register src,
aoqi@0 1623 Register src_pos,
aoqi@0 1624 Register dst,
aoqi@0 1625 Register dst_pos,
aoqi@0 1626 Address& length,
aoqi@0 1627 Label& L_failed) {
aoqi@0 1628 BLOCK_COMMENT("arraycopy_range_checks:");
aoqi@0 1629 const Register src_end = src_pos; // source array end position
aoqi@0 1630 const Register dst_end = dst_pos; // destination array end position
aoqi@0 1631 __ addl(src_end, length); // src_pos + length
aoqi@0 1632 __ addl(dst_end, length); // dst_pos + length
aoqi@0 1633
aoqi@0 1634 // if (src_pos + length > arrayOop(src)->length() ) FAIL;
aoqi@0 1635 __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes()));
aoqi@0 1636 __ jcc(Assembler::above, L_failed);
aoqi@0 1637
aoqi@0 1638 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
aoqi@0 1639 __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes()));
aoqi@0 1640 __ jcc(Assembler::above, L_failed);
aoqi@0 1641
aoqi@0 1642 BLOCK_COMMENT("arraycopy_range_checks done");
aoqi@0 1643 }
aoqi@0 1644
aoqi@0 1645
aoqi@0 1646 //
aoqi@0 1647 // Generate generic array copy stubs
aoqi@0 1648 //
aoqi@0 1649 // Input:
aoqi@0 1650 // 4(rsp) - src oop
aoqi@0 1651 // 8(rsp) - src_pos
aoqi@0 1652 // 12(rsp) - dst oop
aoqi@0 1653 // 16(rsp) - dst_pos
aoqi@0 1654 // 20(rsp) - element count
aoqi@0 1655 //
aoqi@0 1656 // Output:
aoqi@0 1657 // rax, == 0 - success
aoqi@0 1658 // rax, == -1^K - failure, where K is partial transfer count
aoqi@0 1659 //
aoqi@0 1660 address generate_generic_copy(const char *name,
aoqi@0 1661 address entry_jbyte_arraycopy,
aoqi@0 1662 address entry_jshort_arraycopy,
aoqi@0 1663 address entry_jint_arraycopy,
aoqi@0 1664 address entry_oop_arraycopy,
aoqi@0 1665 address entry_jlong_arraycopy,
aoqi@0 1666 address entry_checkcast_arraycopy) {
aoqi@0 1667 Label L_failed, L_failed_0, L_objArray;
aoqi@0 1668
aoqi@0 1669 { int modulus = CodeEntryAlignment;
aoqi@0 1670 int target = modulus - 5; // 5 = sizeof jmp(L_failed)
aoqi@0 1671 int advance = target - (__ offset() % modulus);
aoqi@0 1672 if (advance < 0) advance += modulus;
aoqi@0 1673 if (advance > 0) __ nop(advance);
aoqi@0 1674 }
aoqi@0 1675 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 1676
aoqi@0 1677 // Short-hop target to L_failed. Makes for denser prologue code.
aoqi@0 1678 __ BIND(L_failed_0);
aoqi@0 1679 __ jmp(L_failed);
aoqi@0 1680 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
aoqi@0 1681
aoqi@0 1682 __ align(CodeEntryAlignment);
aoqi@0 1683 address start = __ pc();
aoqi@0 1684
aoqi@0 1685 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1686 __ push(rsi);
aoqi@0 1687 __ push(rdi);
aoqi@0 1688
aoqi@0 1689 // bump this on entry, not on exit:
aoqi@0 1690 inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
aoqi@0 1691
aoqi@0 1692 // Input values
aoqi@0 1693 Address SRC (rsp, 12+ 4);
aoqi@0 1694 Address SRC_POS (rsp, 12+ 8);
aoqi@0 1695 Address DST (rsp, 12+12);
aoqi@0 1696 Address DST_POS (rsp, 12+16);
aoqi@0 1697 Address LENGTH (rsp, 12+20);
aoqi@0 1698
aoqi@0 1699 //-----------------------------------------------------------------------
aoqi@0 1700 // Assembler stub will be used for this call to arraycopy
aoqi@0 1701 // if the following conditions are met:
aoqi@0 1702 //
aoqi@0 1703 // (1) src and dst must not be null.
aoqi@0 1704 // (2) src_pos must not be negative.
aoqi@0 1705 // (3) dst_pos must not be negative.
aoqi@0 1706 // (4) length must not be negative.
aoqi@0 1707 // (5) src klass and dst klass should be the same and not NULL.
aoqi@0 1708 // (6) src and dst should be arrays.
aoqi@0 1709 // (7) src_pos + length must not exceed length of src.
aoqi@0 1710 // (8) dst_pos + length must not exceed length of dst.
aoqi@0 1711 //
aoqi@0 1712
aoqi@0 1713 const Register src = rax; // source array oop
aoqi@0 1714 const Register src_pos = rsi;
aoqi@0 1715 const Register dst = rdx; // destination array oop
aoqi@0 1716 const Register dst_pos = rdi;
aoqi@0 1717 const Register length = rcx; // transfer count
aoqi@0 1718
aoqi@0 1719 // if (src == NULL) return -1;
aoqi@0 1720 __ movptr(src, SRC); // src oop
aoqi@0 1721 __ testptr(src, src);
aoqi@0 1722 __ jccb(Assembler::zero, L_failed_0);
aoqi@0 1723
aoqi@0 1724 // if (src_pos < 0) return -1;
aoqi@0 1725 __ movl2ptr(src_pos, SRC_POS); // src_pos
aoqi@0 1726 __ testl(src_pos, src_pos);
aoqi@0 1727 __ jccb(Assembler::negative, L_failed_0);
aoqi@0 1728
aoqi@0 1729 // if (dst == NULL) return -1;
aoqi@0 1730 __ movptr(dst, DST); // dst oop
aoqi@0 1731 __ testptr(dst, dst);
aoqi@0 1732 __ jccb(Assembler::zero, L_failed_0);
aoqi@0 1733
aoqi@0 1734 // if (dst_pos < 0) return -1;
aoqi@0 1735 __ movl2ptr(dst_pos, DST_POS); // dst_pos
aoqi@0 1736 __ testl(dst_pos, dst_pos);
aoqi@0 1737 __ jccb(Assembler::negative, L_failed_0);
aoqi@0 1738
aoqi@0 1739 // if (length < 0) return -1;
aoqi@0 1740 __ movl2ptr(length, LENGTH); // length
aoqi@0 1741 __ testl(length, length);
aoqi@0 1742 __ jccb(Assembler::negative, L_failed_0);
aoqi@0 1743
aoqi@0 1744 // if (src->klass() == NULL) return -1;
aoqi@0 1745 Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
aoqi@0 1746 Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
aoqi@0 1747 const Register rcx_src_klass = rcx; // array klass
aoqi@0 1748 __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes()));
aoqi@0 1749
aoqi@0 1750 #ifdef ASSERT
aoqi@0 1751 // assert(src->klass() != NULL);
aoqi@0 1752 BLOCK_COMMENT("assert klasses not null");
aoqi@0 1753 { Label L1, L2;
aoqi@0 1754 __ testptr(rcx_src_klass, rcx_src_klass);
aoqi@0 1755 __ jccb(Assembler::notZero, L2); // it is broken if klass is NULL
aoqi@0 1756 __ bind(L1);
aoqi@0 1757 __ stop("broken null klass");
aoqi@0 1758 __ bind(L2);
aoqi@0 1759 __ cmpptr(dst_klass_addr, (int32_t)NULL_WORD);
aoqi@0 1760 __ jccb(Assembler::equal, L1); // this would be broken also
aoqi@0 1761 BLOCK_COMMENT("assert done");
aoqi@0 1762 }
aoqi@0 1763 #endif //ASSERT
aoqi@0 1764
aoqi@0 1765 // Load layout helper (32-bits)
aoqi@0 1766 //
aoqi@0 1767 // |array_tag| | header_size | element_type | |log2_element_size|
aoqi@0 1768 // 32 30 24 16 8 2 0
aoqi@0 1769 //
aoqi@0 1770 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
aoqi@0 1771 //
aoqi@0 1772
aoqi@0 1773 int lh_offset = in_bytes(Klass::layout_helper_offset());
aoqi@0 1774 Address src_klass_lh_addr(rcx_src_klass, lh_offset);
aoqi@0 1775
aoqi@0 1776 // Handle objArrays completely differently...
aoqi@0 1777 jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
aoqi@0 1778 __ cmpl(src_klass_lh_addr, objArray_lh);
aoqi@0 1779 __ jcc(Assembler::equal, L_objArray);
aoqi@0 1780
aoqi@0 1781 // if (src->klass() != dst->klass()) return -1;
aoqi@0 1782 __ cmpptr(rcx_src_klass, dst_klass_addr);
aoqi@0 1783 __ jccb(Assembler::notEqual, L_failed_0);
aoqi@0 1784
aoqi@0 1785 const Register rcx_lh = rcx; // layout helper
aoqi@0 1786 assert(rcx_lh == rcx_src_klass, "known alias");
aoqi@0 1787 __ movl(rcx_lh, src_klass_lh_addr);
aoqi@0 1788
aoqi@0 1789 // if (!src->is_Array()) return -1;
aoqi@0 1790 __ cmpl(rcx_lh, Klass::_lh_neutral_value);
aoqi@0 1791 __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp
aoqi@0 1792
aoqi@0 1793 // At this point, it is known to be a typeArray (array_tag 0x3).
aoqi@0 1794 #ifdef ASSERT
aoqi@0 1795 { Label L;
aoqi@0 1796 __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
aoqi@0 1797 __ jcc(Assembler::greaterEqual, L); // signed cmp
aoqi@0 1798 __ stop("must be a primitive array");
aoqi@0 1799 __ bind(L);
aoqi@0 1800 }
aoqi@0 1801 #endif
aoqi@0 1802
aoqi@0 1803 assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
aoqi@0 1804 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
aoqi@0 1805
aoqi@0 1806 // TypeArrayKlass
aoqi@0 1807 //
aoqi@0 1808 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
aoqi@0 1809 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
aoqi@0 1810 //
aoqi@0 1811 const Register rsi_offset = rsi; // array offset
aoqi@0 1812 const Register src_array = src; // src array offset
aoqi@0 1813 const Register dst_array = dst; // dst array offset
aoqi@0 1814 const Register rdi_elsize = rdi; // log2 element size
aoqi@0 1815
aoqi@0 1816 __ mov(rsi_offset, rcx_lh);
aoqi@0 1817 __ shrptr(rsi_offset, Klass::_lh_header_size_shift);
aoqi@0 1818 __ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset
aoqi@0 1819 __ addptr(src_array, rsi_offset); // src array offset
aoqi@0 1820 __ addptr(dst_array, rsi_offset); // dst array offset
aoqi@0 1821 __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize
aoqi@0 1822
aoqi@0 1823 // next registers should be set before the jump to corresponding stub
aoqi@0 1824 const Register from = src; // source array address
aoqi@0 1825 const Register to = dst; // destination array address
aoqi@0 1826 const Register count = rcx; // elements count
aoqi@0 1827 // some of them should be duplicated on stack
aoqi@0 1828 #define FROM Address(rsp, 12+ 4)
aoqi@0 1829 #define TO Address(rsp, 12+ 8) // Not used now
aoqi@0 1830 #define COUNT Address(rsp, 12+12) // Only for oop arraycopy
aoqi@0 1831
aoqi@0 1832 BLOCK_COMMENT("scale indexes to element size");
aoqi@0 1833 __ movl2ptr(rsi, SRC_POS); // src_pos
aoqi@0 1834 __ shlptr(rsi); // src_pos << rcx (log2 elsize)
aoqi@0 1835 assert(src_array == from, "");
aoqi@0 1836 __ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize
aoqi@0 1837 __ movl2ptr(rdi, DST_POS); // dst_pos
aoqi@0 1838 __ shlptr(rdi); // dst_pos << rcx (log2 elsize)
aoqi@0 1839 assert(dst_array == to, "");
aoqi@0 1840 __ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize
aoqi@0 1841 __ movptr(FROM, from); // src_addr
aoqi@0 1842 __ mov(rdi_elsize, rcx_lh); // log2 elsize
aoqi@0 1843 __ movl2ptr(count, LENGTH); // elements count
aoqi@0 1844
aoqi@0 1845 BLOCK_COMMENT("choose copy loop based on element size");
aoqi@0 1846 __ cmpl(rdi_elsize, 0);
aoqi@0 1847
aoqi@0 1848 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy));
aoqi@0 1849 __ cmpl(rdi_elsize, LogBytesPerShort);
aoqi@0 1850 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy));
aoqi@0 1851 __ cmpl(rdi_elsize, LogBytesPerInt);
aoqi@0 1852 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy));
aoqi@0 1853 #ifdef ASSERT
aoqi@0 1854 __ cmpl(rdi_elsize, LogBytesPerLong);
aoqi@0 1855 __ jccb(Assembler::notEqual, L_failed);
aoqi@0 1856 #endif
aoqi@0 1857 __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
aoqi@0 1858 __ pop(rsi);
aoqi@0 1859 __ jump(RuntimeAddress(entry_jlong_arraycopy));
aoqi@0 1860
aoqi@0 1861 __ BIND(L_failed);
aoqi@0 1862 __ xorptr(rax, rax);
aoqi@0 1863 __ notptr(rax); // return -1
aoqi@0 1864 __ pop(rdi);
aoqi@0 1865 __ pop(rsi);
aoqi@0 1866 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 1867 __ ret(0);
aoqi@0 1868
aoqi@0 1869 // ObjArrayKlass
aoqi@0 1870 __ BIND(L_objArray);
aoqi@0 1871 // live at this point: rcx_src_klass, src[_pos], dst[_pos]
aoqi@0 1872
aoqi@0 1873 Label L_plain_copy, L_checkcast_copy;
aoqi@0 1874 // test array classes for subtyping
aoqi@0 1875 __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality
aoqi@0 1876 __ jccb(Assembler::notEqual, L_checkcast_copy);
aoqi@0 1877
aoqi@0 1878 // Identically typed arrays can be copied without element-wise checks.
aoqi@0 1879 assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass);
aoqi@0 1880 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
aoqi@0 1881
aoqi@0 1882 __ BIND(L_plain_copy);
aoqi@0 1883 __ movl2ptr(count, LENGTH); // elements count
aoqi@0 1884 __ movl2ptr(src_pos, SRC_POS); // reload src_pos
aoqi@0 1885 __ lea(from, Address(src, src_pos, Address::times_ptr,
aoqi@0 1886 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
aoqi@0 1887 __ movl2ptr(dst_pos, DST_POS); // reload dst_pos
aoqi@0 1888 __ lea(to, Address(dst, dst_pos, Address::times_ptr,
aoqi@0 1889 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
aoqi@0 1890 __ movptr(FROM, from); // src_addr
aoqi@0 1891 __ movptr(TO, to); // dst_addr
aoqi@0 1892 __ movl(COUNT, count); // count
aoqi@0 1893 __ jump(RuntimeAddress(entry_oop_arraycopy));
aoqi@0 1894
aoqi@0 1895 __ BIND(L_checkcast_copy);
aoqi@0 1896 // live at this point: rcx_src_klass, dst[_pos], src[_pos]
aoqi@0 1897 {
aoqi@0 1898 // Handy offsets:
aoqi@0 1899 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
aoqi@0 1900 int sco_offset = in_bytes(Klass::super_check_offset_offset());
aoqi@0 1901
aoqi@0 1902 Register rsi_dst_klass = rsi;
aoqi@0 1903 Register rdi_temp = rdi;
aoqi@0 1904 assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
aoqi@0 1905 assert(rdi_temp == dst_pos, "expected alias w/ dst_pos");
aoqi@0 1906 Address dst_klass_lh_addr(rsi_dst_klass, lh_offset);
aoqi@0 1907
aoqi@0 1908 // Before looking at dst.length, make sure dst is also an objArray.
aoqi@0 1909 __ movptr(rsi_dst_klass, dst_klass_addr);
aoqi@0 1910 __ cmpl(dst_klass_lh_addr, objArray_lh);
aoqi@0 1911 __ jccb(Assembler::notEqual, L_failed);
aoqi@0 1912
aoqi@0 1913 // It is safe to examine both src.length and dst.length.
aoqi@0 1914 __ movl2ptr(src_pos, SRC_POS); // reload rsi
aoqi@0 1915 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
aoqi@0 1916 // (Now src_pos and dst_pos are killed, but not src and dst.)
aoqi@0 1917
aoqi@0 1918 // We'll need this temp (don't forget to pop it after the type check).
aoqi@0 1919 __ push(rbx);
aoqi@0 1920 Register rbx_src_klass = rbx;
aoqi@0 1921
aoqi@0 1922 __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx
aoqi@0 1923 __ movptr(rsi_dst_klass, dst_klass_addr);
aoqi@0 1924 Address super_check_offset_addr(rsi_dst_klass, sco_offset);
aoqi@0 1925 Label L_fail_array_check;
aoqi@0 1926 generate_type_check(rbx_src_klass,
aoqi@0 1927 super_check_offset_addr, dst_klass_addr,
aoqi@0 1928 rdi_temp, NULL, &L_fail_array_check);
aoqi@0 1929 // (On fall-through, we have passed the array type check.)
aoqi@0 1930 __ pop(rbx);
aoqi@0 1931 __ jmp(L_plain_copy);
aoqi@0 1932
aoqi@0 1933 __ BIND(L_fail_array_check);
aoqi@0 1934 // Reshuffle arguments so we can call checkcast_arraycopy:
aoqi@0 1935
aoqi@0 1936 // match initial saves for checkcast_arraycopy
aoqi@0 1937 // push(rsi); // already done; see above
aoqi@0 1938 // push(rdi); // already done; see above
aoqi@0 1939 // push(rbx); // already done; see above
aoqi@0 1940
aoqi@0 1941 // Marshal outgoing arguments now, freeing registers.
aoqi@0 1942 Address from_arg(rsp, 16+ 4); // from
aoqi@0 1943 Address to_arg(rsp, 16+ 8); // to
aoqi@0 1944 Address length_arg(rsp, 16+12); // elements count
aoqi@0 1945 Address ckoff_arg(rsp, 16+16); // super_check_offset
aoqi@0 1946 Address ckval_arg(rsp, 16+20); // super_klass
aoqi@0 1947
aoqi@0 1948 Address SRC_POS_arg(rsp, 16+ 8);
aoqi@0 1949 Address DST_POS_arg(rsp, 16+16);
aoqi@0 1950 Address LENGTH_arg(rsp, 16+20);
aoqi@0 1951 // push rbx, changed the incoming offsets (why not just use rbp,??)
aoqi@0 1952 // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, "");
aoqi@0 1953
aoqi@0 1954 __ movptr(rbx, Address(rsi_dst_klass, ek_offset));
aoqi@0 1955 __ movl2ptr(length, LENGTH_arg); // reload elements count
aoqi@0 1956 __ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos
aoqi@0 1957 __ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos
aoqi@0 1958
aoqi@0 1959 __ movptr(ckval_arg, rbx); // destination element type
aoqi@0 1960 __ movl(rbx, Address(rbx, sco_offset));
aoqi@0 1961 __ movl(ckoff_arg, rbx); // corresponding class check offset
aoqi@0 1962
aoqi@0 1963 __ movl(length_arg, length); // outgoing length argument
aoqi@0 1964
aoqi@0 1965 __ lea(from, Address(src, src_pos, Address::times_ptr,
aoqi@0 1966 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
aoqi@0 1967 __ movptr(from_arg, from);
aoqi@0 1968
aoqi@0 1969 __ lea(to, Address(dst, dst_pos, Address::times_ptr,
aoqi@0 1970 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
aoqi@0 1971 __ movptr(to_arg, to);
aoqi@0 1972 __ jump(RuntimeAddress(entry_checkcast_arraycopy));
aoqi@0 1973 }
aoqi@0 1974
aoqi@0 1975 return start;
aoqi@0 1976 }
aoqi@0 1977
aoqi@0 1978 void generate_arraycopy_stubs() {
aoqi@0 1979 address entry;
aoqi@0 1980 address entry_jbyte_arraycopy;
aoqi@0 1981 address entry_jshort_arraycopy;
aoqi@0 1982 address entry_jint_arraycopy;
aoqi@0 1983 address entry_oop_arraycopy;
aoqi@0 1984 address entry_jlong_arraycopy;
aoqi@0 1985 address entry_checkcast_arraycopy;
aoqi@0 1986
aoqi@0 1987 StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
aoqi@0 1988 generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry,
aoqi@0 1989 "arrayof_jbyte_disjoint_arraycopy");
aoqi@0 1990 StubRoutines::_arrayof_jbyte_arraycopy =
aoqi@0 1991 generate_conjoint_copy(T_BYTE, true, Address::times_1, entry,
aoqi@0 1992 NULL, "arrayof_jbyte_arraycopy");
aoqi@0 1993 StubRoutines::_jbyte_disjoint_arraycopy =
aoqi@0 1994 generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry,
aoqi@0 1995 "jbyte_disjoint_arraycopy");
aoqi@0 1996 StubRoutines::_jbyte_arraycopy =
aoqi@0 1997 generate_conjoint_copy(T_BYTE, false, Address::times_1, entry,
aoqi@0 1998 &entry_jbyte_arraycopy, "jbyte_arraycopy");
aoqi@0 1999
aoqi@0 2000 StubRoutines::_arrayof_jshort_disjoint_arraycopy =
aoqi@0 2001 generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry,
aoqi@0 2002 "arrayof_jshort_disjoint_arraycopy");
aoqi@0 2003 StubRoutines::_arrayof_jshort_arraycopy =
aoqi@0 2004 generate_conjoint_copy(T_SHORT, true, Address::times_2, entry,
aoqi@0 2005 NULL, "arrayof_jshort_arraycopy");
aoqi@0 2006 StubRoutines::_jshort_disjoint_arraycopy =
aoqi@0 2007 generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry,
aoqi@0 2008 "jshort_disjoint_arraycopy");
aoqi@0 2009 StubRoutines::_jshort_arraycopy =
aoqi@0 2010 generate_conjoint_copy(T_SHORT, false, Address::times_2, entry,
aoqi@0 2011 &entry_jshort_arraycopy, "jshort_arraycopy");
aoqi@0 2012
aoqi@0 2013 // Next arrays are always aligned on 4 bytes at least.
aoqi@0 2014 StubRoutines::_jint_disjoint_arraycopy =
aoqi@0 2015 generate_disjoint_copy(T_INT, true, Address::times_4, &entry,
aoqi@0 2016 "jint_disjoint_arraycopy");
aoqi@0 2017 StubRoutines::_jint_arraycopy =
aoqi@0 2018 generate_conjoint_copy(T_INT, true, Address::times_4, entry,
aoqi@0 2019 &entry_jint_arraycopy, "jint_arraycopy");
aoqi@0 2020
aoqi@0 2021 StubRoutines::_oop_disjoint_arraycopy =
aoqi@0 2022 generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
aoqi@0 2023 "oop_disjoint_arraycopy");
aoqi@0 2024 StubRoutines::_oop_arraycopy =
aoqi@0 2025 generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry,
aoqi@0 2026 &entry_oop_arraycopy, "oop_arraycopy");
aoqi@0 2027
aoqi@0 2028 StubRoutines::_oop_disjoint_arraycopy_uninit =
aoqi@0 2029 generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
aoqi@0 2030 "oop_disjoint_arraycopy_uninit",
aoqi@0 2031 /*dest_uninitialized*/true);
aoqi@0 2032 StubRoutines::_oop_arraycopy_uninit =
aoqi@0 2033 generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry,
aoqi@0 2034 NULL, "oop_arraycopy_uninit",
aoqi@0 2035 /*dest_uninitialized*/true);
aoqi@0 2036
aoqi@0 2037 StubRoutines::_jlong_disjoint_arraycopy =
aoqi@0 2038 generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
aoqi@0 2039 StubRoutines::_jlong_arraycopy =
aoqi@0 2040 generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
aoqi@0 2041 "jlong_arraycopy");
aoqi@0 2042
aoqi@0 2043 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
aoqi@0 2044 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
aoqi@0 2045 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
aoqi@0 2046 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
aoqi@0 2047 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
aoqi@0 2048 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
aoqi@0 2049
aoqi@0 2050 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
aoqi@0 2051 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
aoqi@0 2052 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
aoqi@0 2053 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
aoqi@0 2054
aoqi@0 2055 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
aoqi@0 2056 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
aoqi@0 2057 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
aoqi@0 2058 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
aoqi@0 2059
aoqi@0 2060 StubRoutines::_checkcast_arraycopy =
aoqi@0 2061 generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
aoqi@0 2062 StubRoutines::_checkcast_arraycopy_uninit =
aoqi@0 2063 generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true);
aoqi@0 2064
aoqi@0 2065 StubRoutines::_unsafe_arraycopy =
aoqi@0 2066 generate_unsafe_copy("unsafe_arraycopy",
aoqi@0 2067 entry_jbyte_arraycopy,
aoqi@0 2068 entry_jshort_arraycopy,
aoqi@0 2069 entry_jint_arraycopy,
aoqi@0 2070 entry_jlong_arraycopy);
aoqi@0 2071
aoqi@0 2072 StubRoutines::_generic_arraycopy =
aoqi@0 2073 generate_generic_copy("generic_arraycopy",
aoqi@0 2074 entry_jbyte_arraycopy,
aoqi@0 2075 entry_jshort_arraycopy,
aoqi@0 2076 entry_jint_arraycopy,
aoqi@0 2077 entry_oop_arraycopy,
aoqi@0 2078 entry_jlong_arraycopy,
aoqi@0 2079 entry_checkcast_arraycopy);
aoqi@0 2080 }
aoqi@0 2081
aoqi@0 2082 void generate_math_stubs() {
aoqi@0 2083 {
aoqi@0 2084 StubCodeMark mark(this, "StubRoutines", "log");
aoqi@0 2085 StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
aoqi@0 2086
aoqi@0 2087 __ fld_d(Address(rsp, 4));
aoqi@0 2088 __ flog();
aoqi@0 2089 __ ret(0);
aoqi@0 2090 }
aoqi@0 2091 {
aoqi@0 2092 StubCodeMark mark(this, "StubRoutines", "log10");
aoqi@0 2093 StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
aoqi@0 2094
aoqi@0 2095 __ fld_d(Address(rsp, 4));
aoqi@0 2096 __ flog10();
aoqi@0 2097 __ ret(0);
aoqi@0 2098 }
aoqi@0 2099 {
aoqi@0 2100 StubCodeMark mark(this, "StubRoutines", "sin");
aoqi@0 2101 StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc();
aoqi@0 2102
aoqi@0 2103 __ fld_d(Address(rsp, 4));
aoqi@0 2104 __ trigfunc('s');
aoqi@0 2105 __ ret(0);
aoqi@0 2106 }
aoqi@0 2107 {
aoqi@0 2108 StubCodeMark mark(this, "StubRoutines", "cos");
aoqi@0 2109 StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc();
aoqi@0 2110
aoqi@0 2111 __ fld_d(Address(rsp, 4));
aoqi@0 2112 __ trigfunc('c');
aoqi@0 2113 __ ret(0);
aoqi@0 2114 }
aoqi@0 2115 {
aoqi@0 2116 StubCodeMark mark(this, "StubRoutines", "tan");
aoqi@0 2117 StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
aoqi@0 2118
aoqi@0 2119 __ fld_d(Address(rsp, 4));
aoqi@0 2120 __ trigfunc('t');
aoqi@0 2121 __ ret(0);
aoqi@0 2122 }
aoqi@0 2123 {
aoqi@0 2124 StubCodeMark mark(this, "StubRoutines", "exp");
aoqi@0 2125 StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
aoqi@0 2126
aoqi@0 2127 __ fld_d(Address(rsp, 4));
aoqi@0 2128 __ exp_with_fallback(0);
aoqi@0 2129 __ ret(0);
aoqi@0 2130 }
aoqi@0 2131 {
aoqi@0 2132 StubCodeMark mark(this, "StubRoutines", "pow");
aoqi@0 2133 StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
aoqi@0 2134
aoqi@0 2135 __ fld_d(Address(rsp, 12));
aoqi@0 2136 __ fld_d(Address(rsp, 4));
aoqi@0 2137 __ pow_with_fallback(0);
aoqi@0 2138 __ ret(0);
aoqi@0 2139 }
aoqi@0 2140 }
aoqi@0 2141
aoqi@0 2142 // AES intrinsic stubs
aoqi@0 2143 enum {AESBlockSize = 16};
aoqi@0 2144
aoqi@0 2145 address generate_key_shuffle_mask() {
aoqi@0 2146 __ align(16);
aoqi@0 2147 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
aoqi@0 2148 address start = __ pc();
aoqi@0 2149 __ emit_data(0x00010203, relocInfo::none, 0 );
aoqi@0 2150 __ emit_data(0x04050607, relocInfo::none, 0 );
aoqi@0 2151 __ emit_data(0x08090a0b, relocInfo::none, 0 );
aoqi@0 2152 __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
aoqi@0 2153 return start;
aoqi@0 2154 }
aoqi@0 2155
aoqi@0 2156 // Utility routine for loading a 128-bit key word in little endian format
aoqi@0 2157 // can optionally specify that the shuffle mask is already in an xmmregister
aoqi@0 2158 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
aoqi@0 2159 __ movdqu(xmmdst, Address(key, offset));
aoqi@0 2160 if (xmm_shuf_mask != NULL) {
aoqi@0 2161 __ pshufb(xmmdst, xmm_shuf_mask);
aoqi@0 2162 } else {
aoqi@0 2163 __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2164 }
aoqi@0 2165 }
aoqi@0 2166
aoqi@0 2167 // aesenc using specified key+offset
aoqi@0 2168 // can optionally specify that the shuffle mask is already in an xmmregister
aoqi@0 2169 void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
aoqi@0 2170 load_key(xmmtmp, key, offset, xmm_shuf_mask);
aoqi@0 2171 __ aesenc(xmmdst, xmmtmp);
aoqi@0 2172 }
aoqi@0 2173
aoqi@0 2174 // aesdec using specified key+offset
aoqi@0 2175 // can optionally specify that the shuffle mask is already in an xmmregister
aoqi@0 2176 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
aoqi@0 2177 load_key(xmmtmp, key, offset, xmm_shuf_mask);
aoqi@0 2178 __ aesdec(xmmdst, xmmtmp);
aoqi@0 2179 }
aoqi@0 2180
aoqi@0 2181
aoqi@0 2182 // Arguments:
aoqi@0 2183 //
aoqi@0 2184 // Inputs:
aoqi@0 2185 // c_rarg0 - source byte array address
aoqi@0 2186 // c_rarg1 - destination byte array address
aoqi@0 2187 // c_rarg2 - K (key) in little endian int array
aoqi@0 2188 //
aoqi@0 2189 address generate_aescrypt_encryptBlock() {
aoqi@0 2190 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2191 __ align(CodeEntryAlignment);
aoqi@0 2192 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
aoqi@0 2193 Label L_doLast;
aoqi@0 2194 address start = __ pc();
aoqi@0 2195
aoqi@0 2196 const Register from = rdx; // source array address
aoqi@0 2197 const Register to = rdx; // destination array address
aoqi@0 2198 const Register key = rcx; // key array address
aoqi@0 2199 const Register keylen = rax;
aoqi@0 2200 const Address from_param(rbp, 8+0);
aoqi@0 2201 const Address to_param (rbp, 8+4);
aoqi@0 2202 const Address key_param (rbp, 8+8);
aoqi@0 2203
aoqi@0 2204 const XMMRegister xmm_result = xmm0;
aoqi@0 2205 const XMMRegister xmm_key_shuf_mask = xmm1;
aoqi@0 2206 const XMMRegister xmm_temp1 = xmm2;
aoqi@0 2207 const XMMRegister xmm_temp2 = xmm3;
aoqi@0 2208 const XMMRegister xmm_temp3 = xmm4;
aoqi@0 2209 const XMMRegister xmm_temp4 = xmm5;
aoqi@0 2210
aoqi@0 2211 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2212 __ movptr(from, from_param);
aoqi@0 2213 __ movptr(key, key_param);
aoqi@0 2214
aoqi@0 2215 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
aoqi@0 2216 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2217
aoqi@0 2218 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2219 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
aoqi@0 2220 __ movptr(to, to_param);
aoqi@0 2221
aoqi@0 2222 // For encryption, the java expanded key ordering is just what we need
aoqi@0 2223
aoqi@0 2224 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
aoqi@0 2225 __ pxor(xmm_result, xmm_temp1);
aoqi@0 2226
aoqi@0 2227 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
aoqi@0 2228 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
aoqi@0 2229 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
aoqi@0 2230 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
aoqi@0 2231
aoqi@0 2232 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2233 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2234 __ aesenc(xmm_result, xmm_temp3);
aoqi@0 2235 __ aesenc(xmm_result, xmm_temp4);
aoqi@0 2236
aoqi@0 2237 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
aoqi@0 2238 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
aoqi@0 2239 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
aoqi@0 2240 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
aoqi@0 2241
aoqi@0 2242 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2243 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2244 __ aesenc(xmm_result, xmm_temp3);
aoqi@0 2245 __ aesenc(xmm_result, xmm_temp4);
aoqi@0 2246
aoqi@0 2247 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
aoqi@0 2248 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
aoqi@0 2249
aoqi@0 2250 __ cmpl(keylen, 44);
aoqi@0 2251 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2252
aoqi@0 2253 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2254 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2255
aoqi@0 2256 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
aoqi@0 2257 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
aoqi@0 2258
aoqi@0 2259 __ cmpl(keylen, 52);
aoqi@0 2260 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2261
aoqi@0 2262 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2263 __ aesenc(xmm_result, xmm_temp2);
aoqi@0 2264
aoqi@0 2265 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
aoqi@0 2266 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
aoqi@0 2267
aoqi@0 2268 __ BIND(L_doLast);
aoqi@0 2269 __ aesenc(xmm_result, xmm_temp1);
aoqi@0 2270 __ aesenclast(xmm_result, xmm_temp2);
aoqi@0 2271 __ movdqu(Address(to, 0), xmm_result); // store the result
aoqi@0 2272 __ xorptr(rax, rax); // return 0
aoqi@0 2273 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2274 __ ret(0);
aoqi@0 2275
aoqi@0 2276 return start;
aoqi@0 2277 }
aoqi@0 2278
aoqi@0 2279
aoqi@0 2280 // Arguments:
aoqi@0 2281 //
aoqi@0 2282 // Inputs:
aoqi@0 2283 // c_rarg0 - source byte array address
aoqi@0 2284 // c_rarg1 - destination byte array address
aoqi@0 2285 // c_rarg2 - K (key) in little endian int array
aoqi@0 2286 //
aoqi@0 2287 address generate_aescrypt_decryptBlock() {
aoqi@0 2288 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2289 __ align(CodeEntryAlignment);
aoqi@0 2290 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
aoqi@0 2291 Label L_doLast;
aoqi@0 2292 address start = __ pc();
aoqi@0 2293
aoqi@0 2294 const Register from = rdx; // source array address
aoqi@0 2295 const Register to = rdx; // destination array address
aoqi@0 2296 const Register key = rcx; // key array address
aoqi@0 2297 const Register keylen = rax;
aoqi@0 2298 const Address from_param(rbp, 8+0);
aoqi@0 2299 const Address to_param (rbp, 8+4);
aoqi@0 2300 const Address key_param (rbp, 8+8);
aoqi@0 2301
aoqi@0 2302 const XMMRegister xmm_result = xmm0;
aoqi@0 2303 const XMMRegister xmm_key_shuf_mask = xmm1;
aoqi@0 2304 const XMMRegister xmm_temp1 = xmm2;
aoqi@0 2305 const XMMRegister xmm_temp2 = xmm3;
aoqi@0 2306 const XMMRegister xmm_temp3 = xmm4;
aoqi@0 2307 const XMMRegister xmm_temp4 = xmm5;
aoqi@0 2308
aoqi@0 2309 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2310 __ movptr(from, from_param);
aoqi@0 2311 __ movptr(key, key_param);
aoqi@0 2312
aoqi@0 2313 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
aoqi@0 2314 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2315
aoqi@0 2316 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2317 __ movdqu(xmm_result, Address(from, 0));
aoqi@0 2318 __ movptr(to, to_param);
aoqi@0 2319
aoqi@0 2320 // for decryption java expanded key ordering is rotated one position from what we want
aoqi@0 2321 // so we start from 0x10 here and hit 0x00 last
aoqi@0 2322 // we don't know if the key is aligned, hence not using load-execute form
aoqi@0 2323 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
aoqi@0 2324 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
aoqi@0 2325 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
aoqi@0 2326 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
aoqi@0 2327
aoqi@0 2328 __ pxor (xmm_result, xmm_temp1);
aoqi@0 2329 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2330 __ aesdec(xmm_result, xmm_temp3);
aoqi@0 2331 __ aesdec(xmm_result, xmm_temp4);
aoqi@0 2332
aoqi@0 2333 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
aoqi@0 2334 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
aoqi@0 2335 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
aoqi@0 2336 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
aoqi@0 2337
aoqi@0 2338 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2339 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2340 __ aesdec(xmm_result, xmm_temp3);
aoqi@0 2341 __ aesdec(xmm_result, xmm_temp4);
aoqi@0 2342
aoqi@0 2343 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
aoqi@0 2344 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
aoqi@0 2345 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
aoqi@0 2346
aoqi@0 2347 __ cmpl(keylen, 44);
aoqi@0 2348 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2349
aoqi@0 2350 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2351 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2352
aoqi@0 2353 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
aoqi@0 2354 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
aoqi@0 2355
aoqi@0 2356 __ cmpl(keylen, 52);
aoqi@0 2357 __ jccb(Assembler::equal, L_doLast);
aoqi@0 2358
aoqi@0 2359 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2360 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2361
aoqi@0 2362 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
aoqi@0 2363 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
aoqi@0 2364
aoqi@0 2365 __ BIND(L_doLast);
aoqi@0 2366 __ aesdec(xmm_result, xmm_temp1);
aoqi@0 2367 __ aesdec(xmm_result, xmm_temp2);
aoqi@0 2368
aoqi@0 2369 // for decryption the aesdeclast operation is always on key+0x00
aoqi@0 2370 __ aesdeclast(xmm_result, xmm_temp3);
aoqi@0 2371 __ movdqu(Address(to, 0), xmm_result); // store the result
aoqi@0 2372 __ xorptr(rax, rax); // return 0
aoqi@0 2373 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2374 __ ret(0);
aoqi@0 2375
aoqi@0 2376 return start;
aoqi@0 2377 }
aoqi@0 2378
aoqi@0 2379 void handleSOERegisters(bool saving) {
aoqi@0 2380 const int saveFrameSizeInBytes = 4 * wordSize;
aoqi@0 2381 const Address saved_rbx (rbp, -3 * wordSize);
aoqi@0 2382 const Address saved_rsi (rbp, -2 * wordSize);
aoqi@0 2383 const Address saved_rdi (rbp, -1 * wordSize);
aoqi@0 2384
aoqi@0 2385 if (saving) {
aoqi@0 2386 __ subptr(rsp, saveFrameSizeInBytes);
aoqi@0 2387 __ movptr(saved_rsi, rsi);
aoqi@0 2388 __ movptr(saved_rdi, rdi);
aoqi@0 2389 __ movptr(saved_rbx, rbx);
aoqi@0 2390 } else {
aoqi@0 2391 // restoring
aoqi@0 2392 __ movptr(rsi, saved_rsi);
aoqi@0 2393 __ movptr(rdi, saved_rdi);
aoqi@0 2394 __ movptr(rbx, saved_rbx);
aoqi@0 2395 }
aoqi@0 2396 }
aoqi@0 2397
aoqi@0 2398 // Arguments:
aoqi@0 2399 //
aoqi@0 2400 // Inputs:
aoqi@0 2401 // c_rarg0 - source byte array address
aoqi@0 2402 // c_rarg1 - destination byte array address
aoqi@0 2403 // c_rarg2 - K (key) in little endian int array
aoqi@0 2404 // c_rarg3 - r vector byte array address
aoqi@0 2405 // c_rarg4 - input length
aoqi@0 2406 //
aoqi@0 2407 // Output:
aoqi@0 2408 // rax - input length
aoqi@0 2409 //
aoqi@0 2410 address generate_cipherBlockChaining_encryptAESCrypt() {
aoqi@0 2411 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2412 __ align(CodeEntryAlignment);
aoqi@0 2413 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
aoqi@0 2414 address start = __ pc();
aoqi@0 2415
aoqi@0 2416 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
aoqi@0 2417 const Register from = rsi; // source array address
aoqi@0 2418 const Register to = rdx; // destination array address
aoqi@0 2419 const Register key = rcx; // key array address
aoqi@0 2420 const Register rvec = rdi; // r byte array initialized from initvector array address
aoqi@0 2421 // and left with the results of the last encryption block
aoqi@0 2422 const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
aoqi@0 2423 const Register pos = rax;
aoqi@0 2424
aoqi@0 2425 // xmm register assignments for the loops below
aoqi@0 2426 const XMMRegister xmm_result = xmm0;
aoqi@0 2427 const XMMRegister xmm_temp = xmm1;
aoqi@0 2428 // first 6 keys preloaded into xmm2-xmm7
aoqi@0 2429 const int XMM_REG_NUM_KEY_FIRST = 2;
aoqi@0 2430 const int XMM_REG_NUM_KEY_LAST = 7;
aoqi@0 2431 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
aoqi@0 2432
aoqi@0 2433 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2434 handleSOERegisters(true /*saving*/);
aoqi@0 2435
aoqi@0 2436 // load registers from incoming parameters
aoqi@0 2437 const Address from_param(rbp, 8+0);
aoqi@0 2438 const Address to_param (rbp, 8+4);
aoqi@0 2439 const Address key_param (rbp, 8+8);
aoqi@0 2440 const Address rvec_param (rbp, 8+12);
aoqi@0 2441 const Address len_param (rbp, 8+16);
aoqi@0 2442 __ movptr(from , from_param);
aoqi@0 2443 __ movptr(to , to_param);
aoqi@0 2444 __ movptr(key , key_param);
aoqi@0 2445 __ movptr(rvec , rvec_param);
aoqi@0 2446 __ movptr(len_reg , len_param);
aoqi@0 2447
aoqi@0 2448 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
aoqi@0 2449 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2450 // load up xmm regs 2 thru 7 with keys 0-5
aoqi@0 2451 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2452 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
aoqi@0 2453 offset += 0x10;
aoqi@0 2454 }
aoqi@0 2455
aoqi@0 2456 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
aoqi@0 2457
aoqi@0 2458 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
aoqi@0 2459 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2460 __ cmpl(rax, 44);
aoqi@0 2461 __ jcc(Assembler::notEqual, L_key_192_256);
aoqi@0 2462
aoqi@0 2463 // 128 bit code follows here
aoqi@0 2464 __ movl(pos, 0);
aoqi@0 2465 __ align(OptoLoopAlignment);
aoqi@0 2466 __ BIND(L_loopTop_128);
aoqi@0 2467 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
aoqi@0 2468 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2469
aoqi@0 2470 __ pxor (xmm_result, xmm_key0); // do the aes rounds
aoqi@0 2471 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2472 __ aesenc(xmm_result, as_XMMRegister(rnum));
aoqi@0 2473 }
aoqi@0 2474 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
aoqi@0 2475 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2476 }
aoqi@0 2477 load_key(xmm_temp, key, 0xa0);
aoqi@0 2478 __ aesenclast(xmm_result, xmm_temp);
aoqi@0 2479
aoqi@0 2480 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2481 // no need to store r to memory until we exit
aoqi@0 2482 __ addptr(pos, AESBlockSize);
aoqi@0 2483 __ subptr(len_reg, AESBlockSize);
aoqi@0 2484 __ jcc(Assembler::notEqual, L_loopTop_128);
aoqi@0 2485
aoqi@0 2486 __ BIND(L_exit);
aoqi@0 2487 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
aoqi@0 2488
aoqi@0 2489 handleSOERegisters(false /*restoring*/);
aoqi@0 2490 __ movptr(rax, len_param); // return length
aoqi@0 2491 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2492 __ ret(0);
aoqi@0 2493
aoqi@0 2494 __ BIND(L_key_192_256);
aoqi@0 2495 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
aoqi@0 2496 __ cmpl(rax, 52);
aoqi@0 2497 __ jcc(Assembler::notEqual, L_key_256);
aoqi@0 2498
aoqi@0 2499 // 192-bit code follows here (could be changed to use more xmm registers)
aoqi@0 2500 __ movl(pos, 0);
aoqi@0 2501 __ align(OptoLoopAlignment);
aoqi@0 2502 __ BIND(L_loopTop_192);
aoqi@0 2503 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
aoqi@0 2504 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2505
aoqi@0 2506 __ pxor (xmm_result, xmm_key0); // do the aes rounds
aoqi@0 2507 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2508 __ aesenc(xmm_result, as_XMMRegister(rnum));
aoqi@0 2509 }
aoqi@0 2510 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
aoqi@0 2511 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2512 }
aoqi@0 2513 load_key(xmm_temp, key, 0xc0);
aoqi@0 2514 __ aesenclast(xmm_result, xmm_temp);
aoqi@0 2515
aoqi@0 2516 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2517 // no need to store r to memory until we exit
aoqi@0 2518 __ addptr(pos, AESBlockSize);
aoqi@0 2519 __ subptr(len_reg, AESBlockSize);
aoqi@0 2520 __ jcc(Assembler::notEqual, L_loopTop_192);
aoqi@0 2521 __ jmp(L_exit);
aoqi@0 2522
aoqi@0 2523 __ BIND(L_key_256);
aoqi@0 2524 // 256-bit code follows here (could be changed to use more xmm registers)
aoqi@0 2525 __ movl(pos, 0);
aoqi@0 2526 __ align(OptoLoopAlignment);
aoqi@0 2527 __ BIND(L_loopTop_256);
aoqi@0 2528 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
aoqi@0 2529 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2530
aoqi@0 2531 __ pxor (xmm_result, xmm_key0); // do the aes rounds
aoqi@0 2532 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2533 __ aesenc(xmm_result, as_XMMRegister(rnum));
aoqi@0 2534 }
aoqi@0 2535 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
aoqi@0 2536 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2537 }
aoqi@0 2538 load_key(xmm_temp, key, 0xe0);
aoqi@0 2539 __ aesenclast(xmm_result, xmm_temp);
aoqi@0 2540
aoqi@0 2541 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2542 // no need to store r to memory until we exit
aoqi@0 2543 __ addptr(pos, AESBlockSize);
aoqi@0 2544 __ subptr(len_reg, AESBlockSize);
aoqi@0 2545 __ jcc(Assembler::notEqual, L_loopTop_256);
aoqi@0 2546 __ jmp(L_exit);
aoqi@0 2547
aoqi@0 2548 return start;
aoqi@0 2549 }
aoqi@0 2550
aoqi@0 2551
aoqi@0 2552 // CBC AES Decryption.
aoqi@0 2553 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
aoqi@0 2554 //
aoqi@0 2555 // Arguments:
aoqi@0 2556 //
aoqi@0 2557 // Inputs:
aoqi@0 2558 // c_rarg0 - source byte array address
aoqi@0 2559 // c_rarg1 - destination byte array address
aoqi@0 2560 // c_rarg2 - K (key) in little endian int array
aoqi@0 2561 // c_rarg3 - r vector byte array address
aoqi@0 2562 // c_rarg4 - input length
aoqi@0 2563 //
aoqi@0 2564 // Output:
aoqi@0 2565 // rax - input length
aoqi@0 2566 //
aoqi@0 2567
aoqi@0 2568 address generate_cipherBlockChaining_decryptAESCrypt() {
aoqi@0 2569 assert(UseAES, "need AES instructions and misaligned SSE support");
aoqi@0 2570 __ align(CodeEntryAlignment);
aoqi@0 2571 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
aoqi@0 2572 address start = __ pc();
aoqi@0 2573
aoqi@0 2574 Label L_exit, L_key_192_256, L_key_256;
aoqi@0 2575 Label L_singleBlock_loopTop_128;
aoqi@0 2576 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
aoqi@0 2577 const Register from = rsi; // source array address
aoqi@0 2578 const Register to = rdx; // destination array address
aoqi@0 2579 const Register key = rcx; // key array address
aoqi@0 2580 const Register rvec = rdi; // r byte array initialized from initvector array address
aoqi@0 2581 // and left with the results of the last encryption block
aoqi@0 2582 const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
aoqi@0 2583 const Register pos = rax;
aoqi@0 2584
aoqi@0 2585 // xmm register assignments for the loops below
aoqi@0 2586 const XMMRegister xmm_result = xmm0;
aoqi@0 2587 const XMMRegister xmm_temp = xmm1;
aoqi@0 2588 // first 6 keys preloaded into xmm2-xmm7
aoqi@0 2589 const int XMM_REG_NUM_KEY_FIRST = 2;
aoqi@0 2590 const int XMM_REG_NUM_KEY_LAST = 7;
aoqi@0 2591 const int FIRST_NON_REG_KEY_offset = 0x70;
aoqi@0 2592 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
aoqi@0 2593
aoqi@0 2594 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2595 handleSOERegisters(true /*saving*/);
aoqi@0 2596
aoqi@0 2597 // load registers from incoming parameters
aoqi@0 2598 const Address from_param(rbp, 8+0);
aoqi@0 2599 const Address to_param (rbp, 8+4);
aoqi@0 2600 const Address key_param (rbp, 8+8);
aoqi@0 2601 const Address rvec_param (rbp, 8+12);
aoqi@0 2602 const Address len_param (rbp, 8+16);
aoqi@0 2603 __ movptr(from , from_param);
aoqi@0 2604 __ movptr(to , to_param);
aoqi@0 2605 __ movptr(key , key_param);
aoqi@0 2606 __ movptr(rvec , rvec_param);
aoqi@0 2607 __ movptr(len_reg , len_param);
aoqi@0 2608
aoqi@0 2609 // the java expanded key ordering is rotated one position from what we want
aoqi@0 2610 // so we start from 0x10 here and hit 0x00 last
aoqi@0 2611 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
aoqi@0 2612 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
aoqi@0 2613 // load up xmm regs 2 thru 6 with first 5 keys
aoqi@0 2614 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2615 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
aoqi@0 2616 offset += 0x10;
aoqi@0 2617 }
aoqi@0 2618
aoqi@0 2619 // inside here, use the rvec register to point to previous block cipher
aoqi@0 2620 // with which we xor at the end of each newly decrypted block
aoqi@0 2621 const Register prev_block_cipher_ptr = rvec;
aoqi@0 2622
aoqi@0 2623 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
aoqi@0 2624 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
aoqi@0 2625 __ cmpl(rax, 44);
aoqi@0 2626 __ jcc(Assembler::notEqual, L_key_192_256);
aoqi@0 2627
aoqi@0 2628
aoqi@0 2629 // 128-bit code follows here, parallelized
aoqi@0 2630 __ movl(pos, 0);
aoqi@0 2631 __ align(OptoLoopAlignment);
aoqi@0 2632 __ BIND(L_singleBlock_loopTop_128);
aoqi@0 2633 __ cmpptr(len_reg, 0); // any blocks left??
aoqi@0 2634 __ jcc(Assembler::equal, L_exit);
aoqi@0 2635 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
aoqi@0 2636 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
aoqi@0 2637 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2638 __ aesdec(xmm_result, as_XMMRegister(rnum));
aoqi@0 2639 }
aoqi@0 2640 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0
aoqi@0 2641 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2642 }
aoqi@0 2643 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
aoqi@0 2644 __ aesdeclast(xmm_result, xmm_temp);
aoqi@0 2645 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2646 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2647 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2648 // no need to store r to memory until we exit
aoqi@0 2649 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
aoqi@0 2650 __ addptr(pos, AESBlockSize);
aoqi@0 2651 __ subptr(len_reg, AESBlockSize);
aoqi@0 2652 __ jmp(L_singleBlock_loopTop_128);
aoqi@0 2653
aoqi@0 2654
aoqi@0 2655 __ BIND(L_exit);
aoqi@0 2656 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2657 __ movptr(rvec , rvec_param); // restore this since used in loop
aoqi@0 2658 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
aoqi@0 2659 handleSOERegisters(false /*restoring*/);
aoqi@0 2660 __ movptr(rax, len_param); // return length
aoqi@0 2661 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2662 __ ret(0);
aoqi@0 2663
aoqi@0 2664
aoqi@0 2665 __ BIND(L_key_192_256);
aoqi@0 2666 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
aoqi@0 2667 __ cmpl(rax, 52);
aoqi@0 2668 __ jcc(Assembler::notEqual, L_key_256);
aoqi@0 2669
aoqi@0 2670 // 192-bit code follows here (could be optimized to use parallelism)
aoqi@0 2671 __ movl(pos, 0);
aoqi@0 2672 __ align(OptoLoopAlignment);
aoqi@0 2673 __ BIND(L_singleBlock_loopTop_192);
aoqi@0 2674 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
aoqi@0 2675 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
aoqi@0 2676 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2677 __ aesdec(xmm_result, as_XMMRegister(rnum));
aoqi@0 2678 }
aoqi@0 2679 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0
aoqi@0 2680 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2681 }
aoqi@0 2682 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
aoqi@0 2683 __ aesdeclast(xmm_result, xmm_temp);
aoqi@0 2684 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2685 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2686 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2687 // no need to store r to memory until we exit
aoqi@0 2688 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
aoqi@0 2689 __ addptr(pos, AESBlockSize);
aoqi@0 2690 __ subptr(len_reg, AESBlockSize);
aoqi@0 2691 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
aoqi@0 2692 __ jmp(L_exit);
aoqi@0 2693
aoqi@0 2694 __ BIND(L_key_256);
aoqi@0 2695 // 256-bit code follows here (could be optimized to use parallelism)
aoqi@0 2696 __ movl(pos, 0);
aoqi@0 2697 __ align(OptoLoopAlignment);
aoqi@0 2698 __ BIND(L_singleBlock_loopTop_256);
aoqi@0 2699 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
aoqi@0 2700 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
aoqi@0 2701 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
aoqi@0 2702 __ aesdec(xmm_result, as_XMMRegister(rnum));
aoqi@0 2703 }
aoqi@0 2704 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
aoqi@0 2705 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
aoqi@0 2706 }
aoqi@0 2707 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
aoqi@0 2708 __ aesdeclast(xmm_result, xmm_temp);
aoqi@0 2709 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
aoqi@0 2710 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
aoqi@0 2711 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
aoqi@0 2712 // no need to store r to memory until we exit
aoqi@0 2713 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
aoqi@0 2714 __ addptr(pos, AESBlockSize);
aoqi@0 2715 __ subptr(len_reg, AESBlockSize);
aoqi@0 2716 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
aoqi@0 2717 __ jmp(L_exit);
aoqi@0 2718
aoqi@0 2719 return start;
aoqi@0 2720 }
aoqi@0 2721
aoqi@0 2722 /**
aoqi@0 2723 * Arguments:
aoqi@0 2724 *
aoqi@0 2725 * Inputs:
aoqi@0 2726 * rsp(4) - int crc
aoqi@0 2727 * rsp(8) - byte* buf
aoqi@0 2728 * rsp(12) - int length
aoqi@0 2729 *
aoqi@0 2730 * Ouput:
aoqi@0 2731 * rax - int crc result
aoqi@0 2732 */
aoqi@0 2733 address generate_updateBytesCRC32() {
aoqi@0 2734 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
aoqi@0 2735
aoqi@0 2736 __ align(CodeEntryAlignment);
aoqi@0 2737 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
aoqi@0 2738
aoqi@0 2739 address start = __ pc();
aoqi@0 2740
aoqi@0 2741 const Register crc = rdx; // crc
aoqi@0 2742 const Register buf = rsi; // source java byte array address
aoqi@0 2743 const Register len = rcx; // length
aoqi@0 2744 const Register table = rdi; // crc_table address (reuse register)
aoqi@0 2745 const Register tmp = rbx;
aoqi@0 2746 assert_different_registers(crc, buf, len, table, tmp, rax);
aoqi@0 2747
aoqi@0 2748 BLOCK_COMMENT("Entry:");
aoqi@0 2749 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2750 __ push(rsi);
aoqi@0 2751 __ push(rdi);
aoqi@0 2752 __ push(rbx);
aoqi@0 2753
aoqi@0 2754 Address crc_arg(rbp, 8 + 0);
aoqi@0 2755 Address buf_arg(rbp, 8 + 4);
aoqi@0 2756 Address len_arg(rbp, 8 + 8);
aoqi@0 2757
aoqi@0 2758 // Load up:
aoqi@0 2759 __ movl(crc, crc_arg);
aoqi@0 2760 __ movptr(buf, buf_arg);
aoqi@0 2761 __ movl(len, len_arg);
aoqi@0 2762
aoqi@0 2763 __ kernel_crc32(crc, buf, len, table, tmp);
aoqi@0 2764
aoqi@0 2765 __ movl(rax, crc);
aoqi@0 2766 __ pop(rbx);
aoqi@0 2767 __ pop(rdi);
aoqi@0 2768 __ pop(rsi);
aoqi@0 2769 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2770 __ ret(0);
aoqi@0 2771
aoqi@0 2772 return start;
aoqi@0 2773 }
aoqi@0 2774
aoqi@0 2775 // Safefetch stubs.
aoqi@0 2776 void generate_safefetch(const char* name, int size, address* entry,
aoqi@0 2777 address* fault_pc, address* continuation_pc) {
aoqi@0 2778 // safefetch signatures:
aoqi@0 2779 // int SafeFetch32(int* adr, int errValue);
aoqi@0 2780 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
aoqi@0 2781
aoqi@0 2782 StubCodeMark mark(this, "StubRoutines", name);
aoqi@0 2783
aoqi@0 2784 // Entry point, pc or function descriptor.
aoqi@0 2785 *entry = __ pc();
aoqi@0 2786
aoqi@0 2787 __ movl(rax, Address(rsp, 0x8));
aoqi@0 2788 __ movl(rcx, Address(rsp, 0x4));
aoqi@0 2789 // Load *adr into eax, may fault.
aoqi@0 2790 *fault_pc = __ pc();
aoqi@0 2791 switch (size) {
aoqi@0 2792 case 4:
aoqi@0 2793 // int32_t
aoqi@0 2794 __ movl(rax, Address(rcx, 0));
aoqi@0 2795 break;
aoqi@0 2796 case 8:
aoqi@0 2797 // int64_t
aoqi@0 2798 Unimplemented();
aoqi@0 2799 break;
aoqi@0 2800 default:
aoqi@0 2801 ShouldNotReachHere();
aoqi@0 2802 }
aoqi@0 2803
aoqi@0 2804 // Return errValue or *adr.
aoqi@0 2805 *continuation_pc = __ pc();
aoqi@0 2806 __ ret(0);
aoqi@0 2807 }
aoqi@0 2808
aoqi@0 2809 public:
aoqi@0 2810 // Information about frame layout at time of blocking runtime call.
aoqi@0 2811 // Note that we only have to preserve callee-saved registers since
aoqi@0 2812 // the compilers are responsible for supplying a continuation point
aoqi@0 2813 // if they expect all registers to be preserved.
aoqi@0 2814 enum layout {
aoqi@0 2815 thread_off, // last_java_sp
aoqi@0 2816 arg1_off,
aoqi@0 2817 arg2_off,
aoqi@0 2818 rbp_off, // callee saved register
aoqi@0 2819 ret_pc,
aoqi@0 2820 framesize
aoqi@0 2821 };
aoqi@0 2822
aoqi@0 2823 private:
aoqi@0 2824
aoqi@0 2825 #undef __
aoqi@0 2826 #define __ masm->
aoqi@0 2827
aoqi@0 2828 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 2829 // Continuation point for throwing of implicit exceptions that are not handled in
aoqi@0 2830 // the current activation. Fabricates an exception oop and initiates normal
aoqi@0 2831 // exception dispatching in this frame.
aoqi@0 2832 //
aoqi@0 2833 // Previously the compiler (c2) allowed for callee save registers on Java calls.
aoqi@0 2834 // This is no longer true after adapter frames were removed but could possibly
aoqi@0 2835 // be brought back in the future if the interpreter code was reworked and it
aoqi@0 2836 // was deemed worthwhile. The comment below was left to describe what must
aoqi@0 2837 // happen here if callee saves were resurrected. As it stands now this stub
aoqi@0 2838 // could actually be a vanilla BufferBlob and have now oopMap at all.
aoqi@0 2839 // Since it doesn't make much difference we've chosen to leave it the
aoqi@0 2840 // way it was in the callee save days and keep the comment.
aoqi@0 2841
aoqi@0 2842 // If we need to preserve callee-saved values we need a callee-saved oop map and
aoqi@0 2843 // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs.
aoqi@0 2844 // If the compiler needs all registers to be preserved between the fault
aoqi@0 2845 // point and the exception handler then it must assume responsibility for that in
aoqi@0 2846 // AbstractCompiler::continuation_for_implicit_null_exception or
aoqi@0 2847 // continuation_for_implicit_division_by_zero_exception. All other implicit
aoqi@0 2848 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
aoqi@0 2849 // either at call sites or otherwise assume that stack unwinding will be initiated,
aoqi@0 2850 // so caller saved registers were assumed volatile in the compiler.
aoqi@0 2851 address generate_throw_exception(const char* name, address runtime_entry,
aoqi@0 2852 Register arg1 = noreg, Register arg2 = noreg) {
aoqi@0 2853
aoqi@0 2854 int insts_size = 256;
aoqi@0 2855 int locs_size = 32;
aoqi@0 2856
aoqi@0 2857 CodeBuffer code(name, insts_size, locs_size);
aoqi@0 2858 OopMapSet* oop_maps = new OopMapSet();
aoqi@0 2859 MacroAssembler* masm = new MacroAssembler(&code);
aoqi@0 2860
aoqi@0 2861 address start = __ pc();
aoqi@0 2862
aoqi@0 2863 // This is an inlined and slightly modified version of call_VM
aoqi@0 2864 // which has the ability to fetch the return PC out of
aoqi@0 2865 // thread-local storage and also sets up last_Java_sp slightly
aoqi@0 2866 // differently than the real call_VM
aoqi@0 2867 Register java_thread = rbx;
aoqi@0 2868 __ get_thread(java_thread);
aoqi@0 2869
aoqi@0 2870 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2871
aoqi@0 2872 // pc and rbp, already pushed
aoqi@0 2873 __ subptr(rsp, (framesize-2) * wordSize); // prolog
aoqi@0 2874
aoqi@0 2875 // Frame is now completed as far as size and linkage.
aoqi@0 2876
aoqi@0 2877 int frame_complete = __ pc() - start;
aoqi@0 2878
aoqi@0 2879 // push java thread (becomes first argument of C function)
aoqi@0 2880 __ movptr(Address(rsp, thread_off * wordSize), java_thread);
aoqi@0 2881 if (arg1 != noreg) {
aoqi@0 2882 __ movptr(Address(rsp, arg1_off * wordSize), arg1);
aoqi@0 2883 }
aoqi@0 2884 if (arg2 != noreg) {
aoqi@0 2885 assert(arg1 != noreg, "missing reg arg");
aoqi@0 2886 __ movptr(Address(rsp, arg2_off * wordSize), arg2);
aoqi@0 2887 }
aoqi@0 2888
aoqi@0 2889 // Set up last_Java_sp and last_Java_fp
aoqi@0 2890 __ set_last_Java_frame(java_thread, rsp, rbp, NULL);
aoqi@0 2891
aoqi@0 2892 // Call runtime
aoqi@0 2893 BLOCK_COMMENT("call runtime_entry");
aoqi@0 2894 __ call(RuntimeAddress(runtime_entry));
aoqi@0 2895 // Generate oop map
aoqi@0 2896 OopMap* map = new OopMap(framesize, 0);
aoqi@0 2897 oop_maps->add_gc_map(__ pc() - start, map);
aoqi@0 2898
aoqi@0 2899 // restore the thread (cannot use the pushed argument since arguments
aoqi@0 2900 // may be overwritten by C code generated by an optimizing compiler);
aoqi@0 2901 // however can use the register value directly if it is callee saved.
aoqi@0 2902 __ get_thread(java_thread);
aoqi@0 2903
kevinw@8877 2904 __ reset_last_Java_frame(java_thread, true);
aoqi@0 2905
aoqi@0 2906 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@0 2907
aoqi@0 2908 // check for pending exceptions
aoqi@0 2909 #ifdef ASSERT
aoqi@0 2910 Label L;
aoqi@0 2911 __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
aoqi@0 2912 __ jcc(Assembler::notEqual, L);
aoqi@0 2913 __ should_not_reach_here();
aoqi@0 2914 __ bind(L);
aoqi@0 2915 #endif /* ASSERT */
aoqi@0 2916 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
aoqi@0 2917
aoqi@0 2918
aoqi@0 2919 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
aoqi@0 2920 return stub->entry_point();
aoqi@0 2921 }
aoqi@0 2922
aoqi@0 2923
aoqi@0 2924 void create_control_words() {
aoqi@0 2925 // Round to nearest, 53-bit mode, exceptions masked
aoqi@0 2926 StubRoutines::_fpu_cntrl_wrd_std = 0x027F;
aoqi@0 2927 // Round to zero, 53-bit mode, exception mased
aoqi@0 2928 StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F;
aoqi@0 2929 // Round to nearest, 24-bit mode, exceptions masked
aoqi@0 2930 StubRoutines::_fpu_cntrl_wrd_24 = 0x007F;
aoqi@0 2931 // Round to nearest, 64-bit mode, exceptions masked
aoqi@0 2932 StubRoutines::_fpu_cntrl_wrd_64 = 0x037F;
aoqi@0 2933 // Round to nearest, 64-bit mode, exceptions masked
aoqi@0 2934 StubRoutines::_mxcsr_std = 0x1F80;
aoqi@0 2935 // Note: the following two constants are 80-bit values
aoqi@0 2936 // layout is critical for correct loading by FPU.
aoqi@0 2937 // Bias for strict fp multiply/divide
aoqi@0 2938 StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000
aoqi@0 2939 StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000;
aoqi@0 2940 StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff;
aoqi@0 2941 // Un-Bias for strict fp multiply/divide
aoqi@0 2942 StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000
aoqi@0 2943 StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000;
aoqi@0 2944 StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff;
aoqi@0 2945 }
aoqi@0 2946
aoqi@0 2947 //---------------------------------------------------------------------------
aoqi@0 2948 // Initialization
aoqi@0 2949
aoqi@0 2950 void generate_initial() {
aoqi@0 2951 // Generates all stubs and initializes the entry points
aoqi@0 2952
aoqi@0 2953 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 2954 // entry points that exist in all platforms
aoqi@0 2955 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
aoqi@0 2956 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
aoqi@0 2957 StubRoutines::_forward_exception_entry = generate_forward_exception();
aoqi@0 2958
aoqi@0 2959 StubRoutines::_call_stub_entry =
aoqi@0 2960 generate_call_stub(StubRoutines::_call_stub_return_address);
aoqi@0 2961 // is referenced by megamorphic call
aoqi@0 2962 StubRoutines::_catch_exception_entry = generate_catch_exception();
aoqi@0 2963
aoqi@0 2964 // These are currently used by Solaris/Intel
aoqi@0 2965 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
aoqi@0 2966
aoqi@0 2967 StubRoutines::_handler_for_unsafe_access_entry =
aoqi@0 2968 generate_handler_for_unsafe_access();
aoqi@0 2969
aoqi@0 2970 // platform dependent
aoqi@0 2971 create_control_words();
aoqi@0 2972
aoqi@0 2973 StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr();
aoqi@0 2974 StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd();
aoqi@0 2975 StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT,
aoqi@0 2976 CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
aoqi@0 2977 StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG,
aoqi@0 2978 CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
aoqi@0 2979
aoqi@0 2980 // Build this early so it's available for the interpreter
aoqi@0 2981 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
aoqi@0 2982
aoqi@0 2983 if (UseCRC32Intrinsics) {
aoqi@0 2984 // set table address before stub generation which use it
aoqi@0 2985 StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
aoqi@0 2986 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
aoqi@0 2987 }
aoqi@0 2988 }
aoqi@0 2989
aoqi@0 2990
aoqi@0 2991 void generate_all() {
aoqi@0 2992 // Generates all stubs and initializes the entry points
aoqi@0 2993
aoqi@0 2994 // These entry points require SharedInfo::stack0 to be set up in non-core builds
aoqi@0 2995 // and need to be relocatable, so they each fabricate a RuntimeStub internally.
aoqi@0 2996 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
aoqi@0 2997 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
aoqi@0 2998 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
aoqi@0 2999
aoqi@0 3000 //------------------------------------------------------------------------------------------------------------------------
aoqi@0 3001 // entry points that are platform specific
aoqi@0 3002
aoqi@0 3003 // support for verify_oop (must happen after universe_init)
aoqi@0 3004 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
aoqi@0 3005
aoqi@0 3006 // arraycopy stubs used by compilers
aoqi@0 3007 generate_arraycopy_stubs();
aoqi@0 3008
aoqi@0 3009 generate_math_stubs();
aoqi@0 3010
aoqi@0 3011 // don't bother generating these AES intrinsic stubs unless global flag is set
aoqi@0 3012 if (UseAESIntrinsics) {
aoqi@0 3013 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
aoqi@0 3014
aoqi@0 3015 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
aoqi@0 3016 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
aoqi@0 3017 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
aoqi@0 3018 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
aoqi@0 3019 }
aoqi@0 3020
aoqi@0 3021 // Safefetch stubs.
aoqi@0 3022 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
aoqi@0 3023 &StubRoutines::_safefetch32_fault_pc,
aoqi@0 3024 &StubRoutines::_safefetch32_continuation_pc);
aoqi@0 3025 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
aoqi@0 3026 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
aoqi@0 3027 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
aoqi@0 3028 }
aoqi@0 3029
aoqi@0 3030
aoqi@0 3031 public:
aoqi@0 3032 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
aoqi@0 3033 if (all) {
aoqi@0 3034 generate_all();
aoqi@0 3035 } else {
aoqi@0 3036 generate_initial();
aoqi@0 3037 }
aoqi@0 3038 }
aoqi@0 3039 }; // end class declaration
aoqi@0 3040
aoqi@0 3041
aoqi@0 3042 void StubGenerator_generate(CodeBuffer* code, bool all) {
aoqi@0 3043 StubGenerator g(code, all);
aoqi@0 3044 }

mercurial