src/cpu/sparc/vm/stubGenerator_sparc.cpp

Thu, 07 Apr 2011 09:53:20 -0700

author
johnc
date
Thu, 07 Apr 2011 09:53:20 -0700
changeset 2781
e1162778c1c8
parent 2606
0ac769a57c64
child 2978
d83ac25d0304
permissions
-rw-r--r--

7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes

duke@435 1 /*
iveresov@2595 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
duke@435 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@435 4 *
duke@435 5 * This code is free software; you can redistribute it and/or modify it
duke@435 6 * under the terms of the GNU General Public License version 2 only, as
duke@435 7 * published by the Free Software Foundation.
duke@435 8 *
duke@435 9 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@435 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@435 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@435 12 * version 2 for more details (a copy is included in the LICENSE file that
duke@435 13 * accompanied this code).
duke@435 14 *
duke@435 15 * You should have received a copy of the GNU General Public License version
duke@435 16 * 2 along with this work; if not, write to the Free Software Foundation,
duke@435 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@435 18 *
trims@1907 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
trims@1907 20 * or visit www.oracle.com if you need additional information or have any
trims@1907 21 * questions.
duke@435 22 *
duke@435 23 */
duke@435 24
stefank@2314 25 #include "precompiled.hpp"
stefank@2314 26 #include "asm/assembler.hpp"
stefank@2314 27 #include "assembler_sparc.inline.hpp"
stefank@2314 28 #include "interpreter/interpreter.hpp"
stefank@2314 29 #include "nativeInst_sparc.hpp"
stefank@2314 30 #include "oops/instanceOop.hpp"
stefank@2314 31 #include "oops/methodOop.hpp"
stefank@2314 32 #include "oops/objArrayKlass.hpp"
stefank@2314 33 #include "oops/oop.inline.hpp"
stefank@2314 34 #include "prims/methodHandles.hpp"
stefank@2314 35 #include "runtime/frame.inline.hpp"
stefank@2314 36 #include "runtime/handles.inline.hpp"
stefank@2314 37 #include "runtime/sharedRuntime.hpp"
stefank@2314 38 #include "runtime/stubCodeGenerator.hpp"
stefank@2314 39 #include "runtime/stubRoutines.hpp"
stefank@2314 40 #include "utilities/top.hpp"
stefank@2314 41 #ifdef TARGET_OS_FAMILY_linux
stefank@2314 42 # include "thread_linux.inline.hpp"
stefank@2314 43 #endif
stefank@2314 44 #ifdef TARGET_OS_FAMILY_solaris
stefank@2314 45 # include "thread_solaris.inline.hpp"
stefank@2314 46 #endif
stefank@2314 47 #ifdef COMPILER2
stefank@2314 48 #include "opto/runtime.hpp"
stefank@2314 49 #endif
duke@435 50
duke@435 51 // Declaration and definition of StubGenerator (no .hpp file).
duke@435 52 // For a more detailed description of the stub routine structure
duke@435 53 // see the comment in stubRoutines.hpp.
duke@435 54
duke@435 55 #define __ _masm->
duke@435 56
duke@435 57 #ifdef PRODUCT
duke@435 58 #define BLOCK_COMMENT(str) /* nothing */
duke@435 59 #else
duke@435 60 #define BLOCK_COMMENT(str) __ block_comment(str)
duke@435 61 #endif
duke@435 62
duke@435 63 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
duke@435 64
duke@435 65 // Note: The register L7 is used as L7_thread_cache, and may not be used
duke@435 66 // any other way within this module.
duke@435 67
duke@435 68
duke@435 69 static const Register& Lstub_temp = L2;
duke@435 70
duke@435 71 // -------------------------------------------------------------------------------------------------------------------------
duke@435 72 // Stub Code definitions
duke@435 73
duke@435 74 static address handle_unsafe_access() {
duke@435 75 JavaThread* thread = JavaThread::current();
duke@435 76 address pc = thread->saved_exception_pc();
duke@435 77 address npc = thread->saved_exception_npc();
duke@435 78 // pc is the instruction which we must emulate
duke@435 79 // doing a no-op is fine: return garbage from the load
duke@435 80
duke@435 81 // request an async exception
duke@435 82 thread->set_pending_unsafe_access_error();
duke@435 83
duke@435 84 // return address of next instruction to execute
duke@435 85 return npc;
duke@435 86 }
duke@435 87
duke@435 88 class StubGenerator: public StubCodeGenerator {
duke@435 89 private:
duke@435 90
duke@435 91 #ifdef PRODUCT
duke@435 92 #define inc_counter_np(a,b,c) (0)
duke@435 93 #else
duke@435 94 #define inc_counter_np(counter, t1, t2) \
duke@435 95 BLOCK_COMMENT("inc_counter " #counter); \
twisti@1162 96 __ inc_counter(&counter, t1, t2);
duke@435 97 #endif
duke@435 98
duke@435 99 //----------------------------------------------------------------------------------------------------
duke@435 100 // Call stubs are used to call Java from C
duke@435 101
duke@435 102 address generate_call_stub(address& return_pc) {
duke@435 103 StubCodeMark mark(this, "StubRoutines", "call_stub");
duke@435 104 address start = __ pc();
duke@435 105
duke@435 106 // Incoming arguments:
duke@435 107 //
duke@435 108 // o0 : call wrapper address
duke@435 109 // o1 : result (address)
duke@435 110 // o2 : result type
duke@435 111 // o3 : method
duke@435 112 // o4 : (interpreter) entry point
duke@435 113 // o5 : parameters (address)
duke@435 114 // [sp + 0x5c]: parameter size (in words)
duke@435 115 // [sp + 0x60]: thread
duke@435 116 //
duke@435 117 // +---------------+ <--- sp + 0
duke@435 118 // | |
duke@435 119 // . reg save area .
duke@435 120 // | |
duke@435 121 // +---------------+ <--- sp + 0x40
duke@435 122 // | |
duke@435 123 // . extra 7 slots .
duke@435 124 // | |
duke@435 125 // +---------------+ <--- sp + 0x5c
duke@435 126 // | param. size |
duke@435 127 // +---------------+ <--- sp + 0x60
duke@435 128 // | thread |
duke@435 129 // +---------------+
duke@435 130 // | |
duke@435 131
duke@435 132 // note: if the link argument position changes, adjust
duke@435 133 // the code in frame::entry_frame_call_wrapper()
duke@435 134
duke@435 135 const Argument link = Argument(0, false); // used only for GC
duke@435 136 const Argument result = Argument(1, false);
duke@435 137 const Argument result_type = Argument(2, false);
duke@435 138 const Argument method = Argument(3, false);
duke@435 139 const Argument entry_point = Argument(4, false);
duke@435 140 const Argument parameters = Argument(5, false);
duke@435 141 const Argument parameter_size = Argument(6, false);
duke@435 142 const Argument thread = Argument(7, false);
duke@435 143
duke@435 144 // setup thread register
duke@435 145 __ ld_ptr(thread.as_address(), G2_thread);
coleenp@548 146 __ reinit_heapbase();
duke@435 147
duke@435 148 #ifdef ASSERT
duke@435 149 // make sure we have no pending exceptions
duke@435 150 { const Register t = G3_scratch;
duke@435 151 Label L;
duke@435 152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
duke@435 153 __ br_null(t, false, Assembler::pt, L);
duke@435 154 __ delayed()->nop();
duke@435 155 __ stop("StubRoutines::call_stub: entered with pending exception");
duke@435 156 __ bind(L);
duke@435 157 }
duke@435 158 #endif
duke@435 159
duke@435 160 // create activation frame & allocate space for parameters
duke@435 161 { const Register t = G3_scratch;
duke@435 162 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
duke@435 163 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
duke@435 164 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
twisti@1861 165 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
duke@435 166 __ neg(t); // negate so it can be used with save
duke@435 167 __ save(SP, t, SP); // setup new frame
duke@435 168 }
duke@435 169
duke@435 170 // +---------------+ <--- sp + 0
duke@435 171 // | |
duke@435 172 // . reg save area .
duke@435 173 // | |
duke@435 174 // +---------------+ <--- sp + 0x40
duke@435 175 // | |
duke@435 176 // . extra 7 slots .
duke@435 177 // | |
duke@435 178 // +---------------+ <--- sp + 0x5c
duke@435 179 // | empty slot | (only if parameter size is even)
duke@435 180 // +---------------+
duke@435 181 // | |
duke@435 182 // . parameters .
duke@435 183 // | |
duke@435 184 // +---------------+ <--- fp + 0
duke@435 185 // | |
duke@435 186 // . reg save area .
duke@435 187 // | |
duke@435 188 // +---------------+ <--- fp + 0x40
duke@435 189 // | |
duke@435 190 // . extra 7 slots .
duke@435 191 // | |
duke@435 192 // +---------------+ <--- fp + 0x5c
duke@435 193 // | param. size |
duke@435 194 // +---------------+ <--- fp + 0x60
duke@435 195 // | thread |
duke@435 196 // +---------------+
duke@435 197 // | |
duke@435 198
duke@435 199 // pass parameters if any
duke@435 200 BLOCK_COMMENT("pass parameters if any");
duke@435 201 { const Register src = parameters.as_in().as_register();
duke@435 202 const Register dst = Lentry_args;
duke@435 203 const Register tmp = G3_scratch;
duke@435 204 const Register cnt = G4_scratch;
duke@435 205
duke@435 206 // test if any parameters & setup of Lentry_args
duke@435 207 Label exit;
duke@435 208 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
duke@435 209 __ add( FP, STACK_BIAS, dst );
duke@435 210 __ tst(cnt);
duke@435 211 __ br(Assembler::zero, false, Assembler::pn, exit);
duke@435 212 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
duke@435 213
duke@435 214 // copy parameters if any
duke@435 215 Label loop;
duke@435 216 __ BIND(loop);
duke@435 217 // Store parameter value
duke@435 218 __ ld_ptr(src, 0, tmp);
duke@435 219 __ add(src, BytesPerWord, src);
twisti@1861 220 __ st_ptr(tmp, dst, 0);
duke@435 221 __ deccc(cnt);
duke@435 222 __ br(Assembler::greater, false, Assembler::pt, loop);
twisti@1861 223 __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
duke@435 224
duke@435 225 // done
duke@435 226 __ BIND(exit);
duke@435 227 }
duke@435 228
duke@435 229 // setup parameters, method & call Java function
duke@435 230 #ifdef ASSERT
duke@435 231 // layout_activation_impl checks it's notion of saved SP against
duke@435 232 // this register, so if this changes update it as well.
duke@435 233 const Register saved_SP = Lscratch;
duke@435 234 __ mov(SP, saved_SP); // keep track of SP before call
duke@435 235 #endif
duke@435 236
duke@435 237 // setup parameters
duke@435 238 const Register t = G3_scratch;
duke@435 239 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
twisti@1861 240 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
duke@435 241 __ sub(FP, t, Gargs); // setup parameter pointer
duke@435 242 #ifdef _LP64
duke@435 243 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias
duke@435 244 #endif
duke@435 245 __ mov(SP, O5_savedSP);
duke@435 246
duke@435 247
duke@435 248 // do the call
duke@435 249 //
duke@435 250 // the following register must be setup:
duke@435 251 //
duke@435 252 // G2_thread
duke@435 253 // G5_method
duke@435 254 // Gargs
duke@435 255 BLOCK_COMMENT("call Java function");
duke@435 256 __ jmpl(entry_point.as_in().as_register(), G0, O7);
duke@435 257 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method
duke@435 258
duke@435 259 BLOCK_COMMENT("call_stub_return_address:");
duke@435 260 return_pc = __ pc();
duke@435 261
duke@435 262 // The callee, if it wasn't interpreted, can return with SP changed so
duke@435 263 // we can no longer assert of change of SP.
duke@435 264
duke@435 265 // store result depending on type
duke@435 266 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
duke@435 267 // is treated as T_INT)
duke@435 268 { const Register addr = result .as_in().as_register();
duke@435 269 const Register type = result_type.as_in().as_register();
duke@435 270 Label is_long, is_float, is_double, is_object, exit;
duke@435 271 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
duke@435 272 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
duke@435 273 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
duke@435 274 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
duke@435 275 __ delayed()->nop();
duke@435 276
duke@435 277 // store int result
duke@435 278 __ st(O0, addr, G0);
duke@435 279
duke@435 280 __ BIND(exit);
duke@435 281 __ ret();
duke@435 282 __ delayed()->restore();
duke@435 283
duke@435 284 __ BIND(is_object);
duke@435 285 __ ba(false, exit);
duke@435 286 __ delayed()->st_ptr(O0, addr, G0);
duke@435 287
duke@435 288 __ BIND(is_float);
duke@435 289 __ ba(false, exit);
duke@435 290 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
duke@435 291
duke@435 292 __ BIND(is_double);
duke@435 293 __ ba(false, exit);
duke@435 294 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
duke@435 295
duke@435 296 __ BIND(is_long);
duke@435 297 #ifdef _LP64
duke@435 298 __ ba(false, exit);
duke@435 299 __ delayed()->st_long(O0, addr, G0); // store entire long
duke@435 300 #else
duke@435 301 #if defined(COMPILER2)
duke@435 302 // All return values are where we want them, except for Longs. C2 returns
duke@435 303 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
duke@435 304 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
duke@435 305 // build we simply always use G1.
duke@435 306 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
duke@435 307 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
duke@435 308 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
duke@435 309
duke@435 310 __ ba(false, exit);
duke@435 311 __ delayed()->stx(G1, addr, G0); // store entire long
duke@435 312 #else
duke@435 313 __ st(O1, addr, BytesPerInt);
duke@435 314 __ ba(false, exit);
duke@435 315 __ delayed()->st(O0, addr, G0);
duke@435 316 #endif /* COMPILER2 */
duke@435 317 #endif /* _LP64 */
duke@435 318 }
duke@435 319 return start;
duke@435 320 }
duke@435 321
duke@435 322
duke@435 323 //----------------------------------------------------------------------------------------------------
duke@435 324 // Return point for a Java call if there's an exception thrown in Java code.
duke@435 325 // The exception is caught and transformed into a pending exception stored in
duke@435 326 // JavaThread that can be tested from within the VM.
duke@435 327 //
duke@435 328 // Oexception: exception oop
duke@435 329
duke@435 330 address generate_catch_exception() {
duke@435 331 StubCodeMark mark(this, "StubRoutines", "catch_exception");
duke@435 332
duke@435 333 address start = __ pc();
duke@435 334 // verify that thread corresponds
duke@435 335 __ verify_thread();
duke@435 336
duke@435 337 const Register& temp_reg = Gtemp;
twisti@1162 338 Address pending_exception_addr (G2_thread, Thread::pending_exception_offset());
twisti@1162 339 Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ());
twisti@1162 340 Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ());
duke@435 341
duke@435 342 // set pending exception
duke@435 343 __ verify_oop(Oexception);
duke@435 344 __ st_ptr(Oexception, pending_exception_addr);
duke@435 345 __ set((intptr_t)__FILE__, temp_reg);
duke@435 346 __ st_ptr(temp_reg, exception_file_offset_addr);
duke@435 347 __ set((intptr_t)__LINE__, temp_reg);
duke@435 348 __ st(temp_reg, exception_line_offset_addr);
duke@435 349
duke@435 350 // complete return to VM
duke@435 351 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
duke@435 352
twisti@1162 353 AddressLiteral stub_ret(StubRoutines::_call_stub_return_address);
twisti@1162 354 __ jump_to(stub_ret, temp_reg);
duke@435 355 __ delayed()->nop();
duke@435 356
duke@435 357 return start;
duke@435 358 }
duke@435 359
duke@435 360
duke@435 361 //----------------------------------------------------------------------------------------------------
duke@435 362 // Continuation point for runtime calls returning with a pending exception
duke@435 363 // The pending exception check happened in the runtime or native call stub
duke@435 364 // The pending exception in Thread is converted into a Java-level exception
duke@435 365 //
duke@435 366 // Contract with Java-level exception handler: O0 = exception
duke@435 367 // O1 = throwing pc
duke@435 368
duke@435 369 address generate_forward_exception() {
duke@435 370 StubCodeMark mark(this, "StubRoutines", "forward_exception");
duke@435 371 address start = __ pc();
duke@435 372
duke@435 373 // Upon entry, O7 has the return address returning into Java
duke@435 374 // (interpreted or compiled) code; i.e. the return address
duke@435 375 // becomes the throwing pc.
duke@435 376
duke@435 377 const Register& handler_reg = Gtemp;
duke@435 378
twisti@1162 379 Address exception_addr(G2_thread, Thread::pending_exception_offset());
duke@435 380
duke@435 381 #ifdef ASSERT
duke@435 382 // make sure that this code is only executed if there is a pending exception
duke@435 383 { Label L;
duke@435 384 __ ld_ptr(exception_addr, Gtemp);
duke@435 385 __ br_notnull(Gtemp, false, Assembler::pt, L);
duke@435 386 __ delayed()->nop();
duke@435 387 __ stop("StubRoutines::forward exception: no pending exception (1)");
duke@435 388 __ bind(L);
duke@435 389 }
duke@435 390 #endif
duke@435 391
duke@435 392 // compute exception handler into handler_reg
duke@435 393 __ get_thread();
duke@435 394 __ ld_ptr(exception_addr, Oexception);
duke@435 395 __ verify_oop(Oexception);
duke@435 396 __ save_frame(0); // compensates for compiler weakness
duke@435 397 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
duke@435 398 BLOCK_COMMENT("call exception_handler_for_return_address");
twisti@1730 399 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
duke@435 400 __ mov(O0, handler_reg);
duke@435 401 __ restore(); // compensates for compiler weakness
duke@435 402
duke@435 403 __ ld_ptr(exception_addr, Oexception);
duke@435 404 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
duke@435 405
duke@435 406 #ifdef ASSERT
duke@435 407 // make sure exception is set
duke@435 408 { Label L;
duke@435 409 __ br_notnull(Oexception, false, Assembler::pt, L);
duke@435 410 __ delayed()->nop();
duke@435 411 __ stop("StubRoutines::forward exception: no pending exception (2)");
duke@435 412 __ bind(L);
duke@435 413 }
duke@435 414 #endif
duke@435 415 // jump to exception handler
duke@435 416 __ jmp(handler_reg, 0);
duke@435 417 // clear pending exception
duke@435 418 __ delayed()->st_ptr(G0, exception_addr);
duke@435 419
duke@435 420 return start;
duke@435 421 }
duke@435 422
duke@435 423
duke@435 424 //------------------------------------------------------------------------------------------------------------------------
duke@435 425 // Continuation point for throwing of implicit exceptions that are not handled in
duke@435 426 // the current activation. Fabricates an exception oop and initiates normal
duke@435 427 // exception dispatching in this frame. Only callee-saved registers are preserved
duke@435 428 // (through the normal register window / RegisterMap handling).
duke@435 429 // If the compiler needs all registers to be preserved between the fault
duke@435 430 // point and the exception handler then it must assume responsibility for that in
duke@435 431 // AbstractCompiler::continuation_for_implicit_null_exception or
duke@435 432 // continuation_for_implicit_division_by_zero_exception. All other implicit
duke@435 433 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
duke@435 434 // either at call sites or otherwise assume that stack unwinding will be initiated,
duke@435 435 // so caller saved registers were assumed volatile in the compiler.
duke@435 436
duke@435 437 // Note that we generate only this stub into a RuntimeStub, because it needs to be
duke@435 438 // properly traversed and ignored during GC, so we change the meaning of the "__"
duke@435 439 // macro within this method.
duke@435 440 #undef __
duke@435 441 #define __ masm->
duke@435 442
duke@435 443 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) {
duke@435 444 #ifdef ASSERT
duke@435 445 int insts_size = VerifyThread ? 1 * K : 600;
duke@435 446 #else
duke@435 447 int insts_size = VerifyThread ? 1 * K : 256;
duke@435 448 #endif /* ASSERT */
duke@435 449 int locs_size = 32;
duke@435 450
duke@435 451 CodeBuffer code(name, insts_size, locs_size);
duke@435 452 MacroAssembler* masm = new MacroAssembler(&code);
duke@435 453
duke@435 454 __ verify_thread();
duke@435 455
duke@435 456 // This is an inlined and slightly modified version of call_VM
duke@435 457 // which has the ability to fetch the return PC out of thread-local storage
duke@435 458 __ assert_not_delayed();
duke@435 459
duke@435 460 // Note that we always push a frame because on the SPARC
duke@435 461 // architecture, for all of our implicit exception kinds at call
duke@435 462 // sites, the implicit exception is taken before the callee frame
duke@435 463 // is pushed.
duke@435 464 __ save_frame(0);
duke@435 465
duke@435 466 int frame_complete = __ offset();
duke@435 467
duke@435 468 if (restore_saved_exception_pc) {
twisti@1162 469 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7);
duke@435 470 __ sub(I7, frame::pc_return_offset, I7);
duke@435 471 }
duke@435 472
duke@435 473 // Note that we always have a runtime stub frame on the top of stack by this point
duke@435 474 Register last_java_sp = SP;
duke@435 475 // 64-bit last_java_sp is biased!
duke@435 476 __ set_last_Java_frame(last_java_sp, G0);
duke@435 477 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early
duke@435 478 __ save_thread(noreg);
duke@435 479 // do the call
duke@435 480 BLOCK_COMMENT("call runtime_entry");
duke@435 481 __ call(runtime_entry, relocInfo::runtime_call_type);
duke@435 482 if (!VerifyThread)
duke@435 483 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
duke@435 484 else
duke@435 485 __ delayed()->nop(); // (thread already passed)
duke@435 486 __ restore_thread(noreg);
duke@435 487 __ reset_last_Java_frame();
duke@435 488
duke@435 489 // check for pending exceptions. use Gtemp as scratch register.
duke@435 490 #ifdef ASSERT
duke@435 491 Label L;
duke@435 492
twisti@1162 493 Address exception_addr(G2_thread, Thread::pending_exception_offset());
duke@435 494 Register scratch_reg = Gtemp;
duke@435 495 __ ld_ptr(exception_addr, scratch_reg);
duke@435 496 __ br_notnull(scratch_reg, false, Assembler::pt, L);
duke@435 497 __ delayed()->nop();
duke@435 498 __ should_not_reach_here();
duke@435 499 __ bind(L);
duke@435 500 #endif // ASSERT
duke@435 501 BLOCK_COMMENT("call forward_exception_entry");
duke@435 502 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
duke@435 503 // we use O7 linkage so that forward_exception_entry has the issuing PC
duke@435 504 __ delayed()->restore();
duke@435 505
duke@435 506 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
duke@435 507 return stub->entry_point();
duke@435 508 }
duke@435 509
duke@435 510 #undef __
duke@435 511 #define __ _masm->
duke@435 512
duke@435 513
duke@435 514 // Generate a routine that sets all the registers so we
duke@435 515 // can tell if the stop routine prints them correctly.
duke@435 516 address generate_test_stop() {
duke@435 517 StubCodeMark mark(this, "StubRoutines", "test_stop");
duke@435 518 address start = __ pc();
duke@435 519
duke@435 520 int i;
duke@435 521
duke@435 522 __ save_frame(0);
duke@435 523
duke@435 524 static jfloat zero = 0.0, one = 1.0;
duke@435 525
duke@435 526 // put addr in L0, then load through L0 to F0
duke@435 527 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0);
duke@435 528 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1
duke@435 529
duke@435 530 // use add to put 2..18 in F2..F18
duke@435 531 for ( i = 2; i <= 18; ++i ) {
duke@435 532 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i));
duke@435 533 }
duke@435 534
duke@435 535 // Now put double 2 in F16, double 18 in F18
duke@435 536 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 );
duke@435 537 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 );
duke@435 538
duke@435 539 // use add to put 20..32 in F20..F32
duke@435 540 for (i = 20; i < 32; i += 2) {
duke@435 541 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i));
duke@435 542 }
duke@435 543
duke@435 544 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's
duke@435 545 for ( i = 0; i < 8; ++i ) {
duke@435 546 if (i < 6) {
duke@435 547 __ set( i, as_iRegister(i));
duke@435 548 __ set(16 + i, as_oRegister(i));
duke@435 549 __ set(24 + i, as_gRegister(i));
duke@435 550 }
duke@435 551 __ set( 8 + i, as_lRegister(i));
duke@435 552 }
duke@435 553
duke@435 554 __ stop("testing stop");
duke@435 555
duke@435 556
duke@435 557 __ ret();
duke@435 558 __ delayed()->restore();
duke@435 559
duke@435 560 return start;
duke@435 561 }
duke@435 562
duke@435 563
duke@435 564 address generate_stop_subroutine() {
duke@435 565 StubCodeMark mark(this, "StubRoutines", "stop_subroutine");
duke@435 566 address start = __ pc();
duke@435 567
duke@435 568 __ stop_subroutine();
duke@435 569
duke@435 570 return start;
duke@435 571 }
duke@435 572
duke@435 573 address generate_flush_callers_register_windows() {
duke@435 574 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows");
duke@435 575 address start = __ pc();
duke@435 576
duke@435 577 __ flush_windows();
duke@435 578 __ retl(false);
duke@435 579 __ delayed()->add( FP, STACK_BIAS, O0 );
duke@435 580 // The returned value must be a stack pointer whose register save area
duke@435 581 // is flushed, and will stay flushed while the caller executes.
duke@435 582
duke@435 583 return start;
duke@435 584 }
duke@435 585
duke@435 586 // Helper functions for v8 atomic operations.
duke@435 587 //
duke@435 588 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) {
duke@435 589 if (mark_oop_reg == noreg) {
duke@435 590 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
duke@435 591 __ set((intptr_t)lock_ptr, lock_ptr_reg);
duke@435 592 } else {
duke@435 593 assert(scratch_reg != noreg, "just checking");
duke@435 594 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
duke@435 595 __ set((intptr_t)lock_ptr, lock_ptr_reg);
duke@435 596 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
duke@435 597 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
duke@435 598 }
duke@435 599 }
duke@435 600
duke@435 601 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
duke@435 602
duke@435 603 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
duke@435 604 __ set(StubRoutines::Sparc::locked, lock_reg);
duke@435 605 // Initialize yield counter
duke@435 606 __ mov(G0,yield_reg);
duke@435 607
duke@435 608 __ BIND(retry);
duke@435 609 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
duke@435 610 __ br(Assembler::less, false, Assembler::pt, dontyield);
duke@435 611 __ delayed()->nop();
duke@435 612
duke@435 613 // This code can only be called from inside the VM, this
duke@435 614 // stub is only invoked from Atomic::add(). We do not
duke@435 615 // want to use call_VM, because _last_java_sp and such
duke@435 616 // must already be set.
duke@435 617 //
duke@435 618 // Save the regs and make space for a C call
duke@435 619 __ save(SP, -96, SP);
duke@435 620 __ save_all_globals_into_locals();
duke@435 621 BLOCK_COMMENT("call os::naked_sleep");
duke@435 622 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
duke@435 623 __ delayed()->nop();
duke@435 624 __ restore_globals_from_locals();
duke@435 625 __ restore();
duke@435 626 // reset the counter
duke@435 627 __ mov(G0,yield_reg);
duke@435 628
duke@435 629 __ BIND(dontyield);
duke@435 630
duke@435 631 // try to get lock
duke@435 632 __ swap(lock_ptr_reg, 0, lock_reg);
duke@435 633
duke@435 634 // did we get the lock?
duke@435 635 __ cmp(lock_reg, StubRoutines::Sparc::unlocked);
duke@435 636 __ br(Assembler::notEqual, true, Assembler::pn, retry);
duke@435 637 __ delayed()->add(yield_reg,1,yield_reg);
duke@435 638
duke@435 639 // yes, got lock. do the operation here.
duke@435 640 }
duke@435 641
duke@435 642 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
duke@435 643 __ st(lock_reg, lock_ptr_reg, 0); // unlock
duke@435 644 }
duke@435 645
duke@435 646 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
duke@435 647 //
duke@435 648 // Arguments :
duke@435 649 //
duke@435 650 // exchange_value: O0
duke@435 651 // dest: O1
duke@435 652 //
duke@435 653 // Results:
duke@435 654 //
duke@435 655 // O0: the value previously stored in dest
duke@435 656 //
duke@435 657 address generate_atomic_xchg() {
duke@435 658 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
duke@435 659 address start = __ pc();
duke@435 660
duke@435 661 if (UseCASForSwap) {
duke@435 662 // Use CAS instead of swap, just in case the MP hardware
duke@435 663 // prefers to work with just one kind of synch. instruction.
duke@435 664 Label retry;
duke@435 665 __ BIND(retry);
duke@435 666 __ mov(O0, O3); // scratch copy of exchange value
duke@435 667 __ ld(O1, 0, O2); // observe the previous value
duke@435 668 // try to replace O2 with O3
duke@435 669 __ cas_under_lock(O1, O2, O3,
duke@435 670 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
duke@435 671 __ cmp(O2, O3);
duke@435 672 __ br(Assembler::notEqual, false, Assembler::pn, retry);
duke@435 673 __ delayed()->nop();
duke@435 674
duke@435 675 __ retl(false);
duke@435 676 __ delayed()->mov(O2, O0); // report previous value to caller
duke@435 677
duke@435 678 } else {
duke@435 679 if (VM_Version::v9_instructions_work()) {
duke@435 680 __ retl(false);
duke@435 681 __ delayed()->swap(O1, 0, O0);
duke@435 682 } else {
duke@435 683 const Register& lock_reg = O2;
duke@435 684 const Register& lock_ptr_reg = O3;
duke@435 685 const Register& yield_reg = O4;
duke@435 686
duke@435 687 Label retry;
duke@435 688 Label dontyield;
duke@435 689
duke@435 690 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@435 691 // got the lock, do the swap
duke@435 692 __ swap(O1, 0, O0);
duke@435 693
duke@435 694 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@435 695 __ retl(false);
duke@435 696 __ delayed()->nop();
duke@435 697 }
duke@435 698 }
duke@435 699
duke@435 700 return start;
duke@435 701 }
duke@435 702
duke@435 703
duke@435 704 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
duke@435 705 //
duke@435 706 // Arguments :
duke@435 707 //
duke@435 708 // exchange_value: O0
duke@435 709 // dest: O1
duke@435 710 // compare_value: O2
duke@435 711 //
duke@435 712 // Results:
duke@435 713 //
duke@435 714 // O0: the value previously stored in dest
duke@435 715 //
duke@435 716 // Overwrites (v8): O3,O4,O5
duke@435 717 //
duke@435 718 address generate_atomic_cmpxchg() {
duke@435 719 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
duke@435 720 address start = __ pc();
duke@435 721
duke@435 722 // cmpxchg(dest, compare_value, exchange_value)
duke@435 723 __ cas_under_lock(O1, O2, O0,
duke@435 724 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
duke@435 725 __ retl(false);
duke@435 726 __ delayed()->nop();
duke@435 727
duke@435 728 return start;
duke@435 729 }
duke@435 730
duke@435 731 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
duke@435 732 //
duke@435 733 // Arguments :
duke@435 734 //
duke@435 735 // exchange_value: O1:O0
duke@435 736 // dest: O2
duke@435 737 // compare_value: O4:O3
duke@435 738 //
duke@435 739 // Results:
duke@435 740 //
duke@435 741 // O1:O0: the value previously stored in dest
duke@435 742 //
duke@435 743 // This only works on V9, on V8 we don't generate any
duke@435 744 // code and just return NULL.
duke@435 745 //
duke@435 746 // Overwrites: G1,G2,G3
duke@435 747 //
duke@435 748 address generate_atomic_cmpxchg_long() {
duke@435 749 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
duke@435 750 address start = __ pc();
duke@435 751
duke@435 752 if (!VM_Version::supports_cx8())
duke@435 753 return NULL;;
duke@435 754 __ sllx(O0, 32, O0);
duke@435 755 __ srl(O1, 0, O1);
duke@435 756 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value
duke@435 757 __ sllx(O3, 32, O3);
duke@435 758 __ srl(O4, 0, O4);
duke@435 759 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value
duke@435 760 __ casx(O2, O3, O0);
duke@435 761 __ srl(O0, 0, O1); // unpacked return value in O1:O0
duke@435 762 __ retl(false);
duke@435 763 __ delayed()->srlx(O0, 32, O0);
duke@435 764
duke@435 765 return start;
duke@435 766 }
duke@435 767
duke@435 768
duke@435 769 // Support for jint Atomic::add(jint add_value, volatile jint* dest).
duke@435 770 //
duke@435 771 // Arguments :
duke@435 772 //
duke@435 773 // add_value: O0 (e.g., +1 or -1)
duke@435 774 // dest: O1
duke@435 775 //
duke@435 776 // Results:
duke@435 777 //
duke@435 778 // O0: the new value stored in dest
duke@435 779 //
duke@435 780 // Overwrites (v9): O3
duke@435 781 // Overwrites (v8): O3,O4,O5
duke@435 782 //
duke@435 783 address generate_atomic_add() {
duke@435 784 StubCodeMark mark(this, "StubRoutines", "atomic_add");
duke@435 785 address start = __ pc();
duke@435 786 __ BIND(_atomic_add_stub);
duke@435 787
duke@435 788 if (VM_Version::v9_instructions_work()) {
duke@435 789 Label(retry);
duke@435 790 __ BIND(retry);
duke@435 791
duke@435 792 __ lduw(O1, 0, O2);
duke@435 793 __ add(O0, O2, O3);
duke@435 794 __ cas(O1, O2, O3);
duke@435 795 __ cmp( O2, O3);
duke@435 796 __ br(Assembler::notEqual, false, Assembler::pn, retry);
duke@435 797 __ delayed()->nop();
duke@435 798 __ retl(false);
duke@435 799 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
duke@435 800 } else {
duke@435 801 const Register& lock_reg = O2;
duke@435 802 const Register& lock_ptr_reg = O3;
duke@435 803 const Register& value_reg = O4;
duke@435 804 const Register& yield_reg = O5;
duke@435 805
duke@435 806 Label(retry);
duke@435 807 Label(dontyield);
duke@435 808
duke@435 809 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@435 810 // got lock, do the increment
duke@435 811 __ ld(O1, 0, value_reg);
duke@435 812 __ add(O0, value_reg, value_reg);
duke@435 813 __ st(value_reg, O1, 0);
duke@435 814
duke@435 815 // %%% only for RMO and PSO
duke@435 816 __ membar(Assembler::StoreStore);
duke@435 817
duke@435 818 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@435 819
duke@435 820 __ retl(false);
duke@435 821 __ delayed()->mov(value_reg, O0);
duke@435 822 }
duke@435 823
duke@435 824 return start;
duke@435 825 }
duke@435 826 Label _atomic_add_stub; // called from other stubs
duke@435 827
duke@435 828
duke@435 829 //------------------------------------------------------------------------------------------------------------------------
duke@435 830 // The following routine generates a subroutine to throw an asynchronous
duke@435 831 // UnknownError when an unsafe access gets a fault that could not be
duke@435 832 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
duke@435 833 //
duke@435 834 // Arguments :
duke@435 835 //
duke@435 836 // trapping PC: O7
duke@435 837 //
duke@435 838 // Results:
duke@435 839 // posts an asynchronous exception, skips the trapping instruction
duke@435 840 //
duke@435 841
duke@435 842 address generate_handler_for_unsafe_access() {
duke@435 843 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
duke@435 844 address start = __ pc();
duke@435 845
duke@435 846 const int preserve_register_words = (64 * 2);
twisti@1162 847 Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS);
duke@435 848
duke@435 849 Register Lthread = L7_thread_cache;
duke@435 850 int i;
duke@435 851
duke@435 852 __ save_frame(0);
duke@435 853 __ mov(G1, L1);
duke@435 854 __ mov(G2, L2);
duke@435 855 __ mov(G3, L3);
duke@435 856 __ mov(G4, L4);
duke@435 857 __ mov(G5, L5);
duke@435 858 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
duke@435 859 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize);
duke@435 860 }
duke@435 861
duke@435 862 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access);
duke@435 863 BLOCK_COMMENT("call handle_unsafe_access");
duke@435 864 __ call(entry_point, relocInfo::runtime_call_type);
duke@435 865 __ delayed()->nop();
duke@435 866
duke@435 867 __ mov(L1, G1);
duke@435 868 __ mov(L2, G2);
duke@435 869 __ mov(L3, G3);
duke@435 870 __ mov(L4, G4);
duke@435 871 __ mov(L5, G5);
duke@435 872 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
duke@435 873 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize);
duke@435 874 }
duke@435 875
duke@435 876 __ verify_thread();
duke@435 877
duke@435 878 __ jmp(O0, 0);
duke@435 879 __ delayed()->restore();
duke@435 880
duke@435 881 return start;
duke@435 882 }
duke@435 883
duke@435 884
duke@435 885 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
duke@435 886 // Arguments :
duke@435 887 //
duke@435 888 // ret : O0, returned
duke@435 889 // icc/xcc: set as O0 (depending on wordSize)
duke@435 890 // sub : O1, argument, not changed
duke@435 891 // super: O2, argument, not changed
duke@435 892 // raddr: O7, blown by call
duke@435 893 address generate_partial_subtype_check() {
coleenp@548 894 __ align(CodeEntryAlignment);
duke@435 895 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
duke@435 896 address start = __ pc();
jrose@1079 897 Label miss;
duke@435 898
duke@435 899 #if defined(COMPILER2) && !defined(_LP64)
duke@435 900 // Do not use a 'save' because it blows the 64-bit O registers.
coleenp@548 901 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned)
duke@435 902 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
duke@435 903 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
duke@435 904 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
duke@435 905 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
duke@435 906 Register Rret = O0;
duke@435 907 Register Rsub = O1;
duke@435 908 Register Rsuper = O2;
duke@435 909 #else
duke@435 910 __ save_frame(0);
duke@435 911 Register Rret = I0;
duke@435 912 Register Rsub = I1;
duke@435 913 Register Rsuper = I2;
duke@435 914 #endif
duke@435 915
duke@435 916 Register L0_ary_len = L0;
duke@435 917 Register L1_ary_ptr = L1;
duke@435 918 Register L2_super = L2;
duke@435 919 Register L3_index = L3;
duke@435 920
jrose@1079 921 __ check_klass_subtype_slow_path(Rsub, Rsuper,
jrose@1079 922 L0, L1, L2, L3,
jrose@1079 923 NULL, &miss);
jrose@1079 924
jrose@1079 925 // Match falls through here.
jrose@1079 926 __ addcc(G0,0,Rret); // set Z flags, Z result
duke@435 927
duke@435 928 #if defined(COMPILER2) && !defined(_LP64)
duke@435 929 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
duke@435 930 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
duke@435 931 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
duke@435 932 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
duke@435 933 __ retl(); // Result in Rret is zero; flags set to Z
duke@435 934 __ delayed()->add(SP,4*wordSize,SP);
duke@435 935 #else
duke@435 936 __ ret(); // Result in Rret is zero; flags set to Z
duke@435 937 __ delayed()->restore();
duke@435 938 #endif
duke@435 939
duke@435 940 __ BIND(miss);
duke@435 941 __ addcc(G0,1,Rret); // set NZ flags, NZ result
duke@435 942
duke@435 943 #if defined(COMPILER2) && !defined(_LP64)
duke@435 944 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
duke@435 945 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
duke@435 946 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
duke@435 947 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
duke@435 948 __ retl(); // Result in Rret is != 0; flags set to NZ
duke@435 949 __ delayed()->add(SP,4*wordSize,SP);
duke@435 950 #else
duke@435 951 __ ret(); // Result in Rret is != 0; flags set to NZ
duke@435 952 __ delayed()->restore();
duke@435 953 #endif
duke@435 954
duke@435 955 return start;
duke@435 956 }
duke@435 957
duke@435 958
duke@435 959 // Called from MacroAssembler::verify_oop
duke@435 960 //
duke@435 961 address generate_verify_oop_subroutine() {
duke@435 962 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
duke@435 963
duke@435 964 address start = __ pc();
duke@435 965
duke@435 966 __ verify_oop_subroutine();
duke@435 967
duke@435 968 return start;
duke@435 969 }
duke@435 970
duke@435 971
duke@435 972 //
duke@435 973 // Verify that a register contains clean 32-bits positive value
duke@435 974 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
duke@435 975 //
duke@435 976 // Input:
duke@435 977 // Rint - 32-bits value
duke@435 978 // Rtmp - scratch
duke@435 979 //
duke@435 980 void assert_clean_int(Register Rint, Register Rtmp) {
duke@435 981 #if defined(ASSERT) && defined(_LP64)
duke@435 982 __ signx(Rint, Rtmp);
duke@435 983 __ cmp(Rint, Rtmp);
duke@435 984 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
duke@435 985 #endif
duke@435 986 }
duke@435 987
duke@435 988 //
duke@435 989 // Generate overlap test for array copy stubs
duke@435 990 //
duke@435 991 // Input:
duke@435 992 // O0 - array1
duke@435 993 // O1 - array2
duke@435 994 // O2 - element count
duke@435 995 //
duke@435 996 // Kills temps: O3, O4
duke@435 997 //
duke@435 998 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
duke@435 999 assert(no_overlap_target != NULL, "must be generated");
duke@435 1000 array_overlap_test(no_overlap_target, NULL, log2_elem_size);
duke@435 1001 }
duke@435 1002 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
duke@435 1003 array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
duke@435 1004 }
duke@435 1005 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {
duke@435 1006 const Register from = O0;
duke@435 1007 const Register to = O1;
duke@435 1008 const Register count = O2;
duke@435 1009 const Register to_from = O3; // to - from
duke@435 1010 const Register byte_count = O4; // count << log2_elem_size
duke@435 1011
duke@435 1012 __ subcc(to, from, to_from);
duke@435 1013 __ sll_ptr(count, log2_elem_size, byte_count);
duke@435 1014 if (NOLp == NULL)
duke@435 1015 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target);
duke@435 1016 else
duke@435 1017 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp));
duke@435 1018 __ delayed()->cmp(to_from, byte_count);
duke@435 1019 if (NOLp == NULL)
tonyp@2010 1020 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, no_overlap_target);
duke@435 1021 else
tonyp@2010 1022 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, (*NOLp));
duke@435 1023 __ delayed()->nop();
duke@435 1024 }
duke@435 1025
duke@435 1026 //
duke@435 1027 // Generate pre-write barrier for array.
duke@435 1028 //
duke@435 1029 // Input:
duke@435 1030 // addr - register containing starting address
duke@435 1031 // count - register containing element count
duke@435 1032 // tmp - scratch register
duke@435 1033 //
duke@435 1034 // The input registers are overwritten.
duke@435 1035 //
iveresov@2606 1036 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
duke@435 1037 BarrierSet* bs = Universe::heap()->barrier_set();
iveresov@2606 1038 switch (bs->kind()) {
iveresov@2606 1039 case BarrierSet::G1SATBCT:
iveresov@2606 1040 case BarrierSet::G1SATBCTLogging:
iveresov@2606 1041 // With G1, don't generate the call if we statically know that the target in uninitialized
iveresov@2606 1042 if (!dest_uninitialized) {
iveresov@2606 1043 __ save_frame(0);
iveresov@2606 1044 // Save the necessary global regs... will be used after.
iveresov@2606 1045 if (addr->is_global()) {
iveresov@2606 1046 __ mov(addr, L0);
iveresov@2606 1047 }
iveresov@2606 1048 if (count->is_global()) {
iveresov@2606 1049 __ mov(count, L1);
iveresov@2606 1050 }
iveresov@2606 1051 __ mov(addr->after_save(), O0);
iveresov@2606 1052 // Get the count into O1
iveresov@2606 1053 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
iveresov@2606 1054 __ delayed()->mov(count->after_save(), O1);
iveresov@2606 1055 if (addr->is_global()) {
iveresov@2606 1056 __ mov(L0, addr);
iveresov@2606 1057 }
iveresov@2606 1058 if (count->is_global()) {
iveresov@2606 1059 __ mov(L1, count);
iveresov@2606 1060 }
iveresov@2606 1061 __ restore();
iveresov@2606 1062 }
iveresov@2606 1063 break;
iveresov@2606 1064 case BarrierSet::CardTableModRef:
iveresov@2606 1065 case BarrierSet::CardTableExtension:
iveresov@2606 1066 case BarrierSet::ModRef:
iveresov@2606 1067 break;
iveresov@2606 1068 default:
iveresov@2606 1069 ShouldNotReachHere();
duke@435 1070 }
duke@435 1071 }
duke@435 1072 //
duke@435 1073 // Generate post-write barrier for array.
duke@435 1074 //
duke@435 1075 // Input:
duke@435 1076 // addr - register containing starting address
duke@435 1077 // count - register containing element count
duke@435 1078 // tmp - scratch register
duke@435 1079 //
duke@435 1080 // The input registers are overwritten.
duke@435 1081 //
duke@435 1082 void gen_write_ref_array_post_barrier(Register addr, Register count,
iveresov@2606 1083 Register tmp) {
duke@435 1084 BarrierSet* bs = Universe::heap()->barrier_set();
duke@435 1085
duke@435 1086 switch (bs->kind()) {
duke@435 1087 case BarrierSet::G1SATBCT:
duke@435 1088 case BarrierSet::G1SATBCTLogging:
duke@435 1089 {
duke@435 1090 // Get some new fresh output registers.
duke@435 1091 __ save_frame(0);
ysr@777 1092 __ mov(addr->after_save(), O0);
duke@435 1093 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
ysr@777 1094 __ delayed()->mov(count->after_save(), O1);
duke@435 1095 __ restore();
duke@435 1096 }
duke@435 1097 break;
duke@435 1098 case BarrierSet::CardTableModRef:
duke@435 1099 case BarrierSet::CardTableExtension:
duke@435 1100 {
duke@435 1101 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
duke@435 1102 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
duke@435 1103 assert_different_registers(addr, count, tmp);
duke@435 1104
duke@435 1105 Label L_loop;
duke@435 1106
coleenp@548 1107 __ sll_ptr(count, LogBytesPerHeapOop, count);
coleenp@548 1108 __ sub(count, BytesPerHeapOop, count);
duke@435 1109 __ add(count, addr, count);
duke@435 1110 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
duke@435 1111 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr);
duke@435 1112 __ srl_ptr(count, CardTableModRefBS::card_shift, count);
duke@435 1113 __ sub(count, addr, count);
twisti@1162 1114 AddressLiteral rs(ct->byte_map_base);
twisti@1162 1115 __ set(rs, tmp);
duke@435 1116 __ BIND(L_loop);
twisti@1162 1117 __ stb(G0, tmp, addr);
duke@435 1118 __ subcc(count, 1, count);
duke@435 1119 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
duke@435 1120 __ delayed()->add(addr, 1, addr);
twisti@1162 1121 }
duke@435 1122 break;
duke@435 1123 case BarrierSet::ModRef:
duke@435 1124 break;
twisti@1162 1125 default:
duke@435 1126 ShouldNotReachHere();
duke@435 1127 }
duke@435 1128 }
duke@435 1129
duke@435 1130
duke@435 1131 // Copy big chunks forward with shift
duke@435 1132 //
duke@435 1133 // Inputs:
duke@435 1134 // from - source arrays
duke@435 1135 // to - destination array aligned to 8-bytes
duke@435 1136 // count - elements count to copy >= the count equivalent to 16 bytes
duke@435 1137 // count_dec - elements count's decrement equivalent to 16 bytes
duke@435 1138 // L_copy_bytes - copy exit label
duke@435 1139 //
duke@435 1140 void copy_16_bytes_forward_with_shift(Register from, Register to,
duke@435 1141 Register count, int count_dec, Label& L_copy_bytes) {
duke@435 1142 Label L_loop, L_aligned_copy, L_copy_last_bytes;
duke@435 1143
duke@435 1144 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
duke@435 1145 __ andcc(from, 7, G1); // misaligned bytes
duke@435 1146 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@435 1147 __ delayed()->nop();
duke@435 1148
duke@435 1149 const Register left_shift = G1; // left shift bit counter
duke@435 1150 const Register right_shift = G5; // right shift bit counter
duke@435 1151
duke@435 1152 __ sll(G1, LogBitsPerByte, left_shift);
duke@435 1153 __ mov(64, right_shift);
duke@435 1154 __ sub(right_shift, left_shift, right_shift);
duke@435 1155
duke@435 1156 //
duke@435 1157 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@435 1158 // to form 2 aligned 8-bytes chunks to store.
duke@435 1159 //
duke@435 1160 __ deccc(count, count_dec); // Pre-decrement 'count'
duke@435 1161 __ andn(from, 7, from); // Align address
duke@435 1162 __ ldx(from, 0, O3);
duke@435 1163 __ inc(from, 8);
kvn@1800 1164 __ align(OptoLoopAlignment);
duke@435 1165 __ BIND(L_loop);
duke@435 1166 __ ldx(from, 0, O4);
duke@435 1167 __ deccc(count, count_dec); // Can we do next iteration after this one?
duke@435 1168 __ ldx(from, 8, G4);
duke@435 1169 __ inc(to, 16);
duke@435 1170 __ inc(from, 16);
duke@435 1171 __ sllx(O3, left_shift, O3);
duke@435 1172 __ srlx(O4, right_shift, G3);
duke@435 1173 __ bset(G3, O3);
duke@435 1174 __ stx(O3, to, -16);
duke@435 1175 __ sllx(O4, left_shift, O4);
duke@435 1176 __ srlx(G4, right_shift, G3);
duke@435 1177 __ bset(G3, O4);
duke@435 1178 __ stx(O4, to, -8);
duke@435 1179 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
duke@435 1180 __ delayed()->mov(G4, O3);
duke@435 1181
duke@435 1182 __ inccc(count, count_dec>>1 ); // + 8 bytes
duke@435 1183 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
duke@435 1184 __ delayed()->inc(count, count_dec>>1); // restore 'count'
duke@435 1185
duke@435 1186 // copy 8 bytes, part of them already loaded in O3
duke@435 1187 __ ldx(from, 0, O4);
duke@435 1188 __ inc(to, 8);
duke@435 1189 __ inc(from, 8);
duke@435 1190 __ sllx(O3, left_shift, O3);
duke@435 1191 __ srlx(O4, right_shift, G3);
duke@435 1192 __ bset(O3, G3);
duke@435 1193 __ stx(G3, to, -8);
duke@435 1194
duke@435 1195 __ BIND(L_copy_last_bytes);
duke@435 1196 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
duke@435 1197 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
duke@435 1198 __ delayed()->sub(from, right_shift, from); // restore address
duke@435 1199
duke@435 1200 __ BIND(L_aligned_copy);
duke@435 1201 }
duke@435 1202
duke@435 1203 // Copy big chunks backward with shift
duke@435 1204 //
duke@435 1205 // Inputs:
duke@435 1206 // end_from - source arrays end address
duke@435 1207 // end_to - destination array end address aligned to 8-bytes
duke@435 1208 // count - elements count to copy >= the count equivalent to 16 bytes
duke@435 1209 // count_dec - elements count's decrement equivalent to 16 bytes
duke@435 1210 // L_aligned_copy - aligned copy exit label
duke@435 1211 // L_copy_bytes - copy exit label
duke@435 1212 //
duke@435 1213 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to,
duke@435 1214 Register count, int count_dec,
duke@435 1215 Label& L_aligned_copy, Label& L_copy_bytes) {
duke@435 1216 Label L_loop, L_copy_last_bytes;
duke@435 1217
duke@435 1218 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
duke@435 1219 __ andcc(end_from, 7, G1); // misaligned bytes
duke@435 1220 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@435 1221 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count'
duke@435 1222
duke@435 1223 const Register left_shift = G1; // left shift bit counter
duke@435 1224 const Register right_shift = G5; // right shift bit counter
duke@435 1225
duke@435 1226 __ sll(G1, LogBitsPerByte, left_shift);
duke@435 1227 __ mov(64, right_shift);
duke@435 1228 __ sub(right_shift, left_shift, right_shift);
duke@435 1229
duke@435 1230 //
duke@435 1231 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@435 1232 // to form 2 aligned 8-bytes chunks to store.
duke@435 1233 //
duke@435 1234 __ andn(end_from, 7, end_from); // Align address
duke@435 1235 __ ldx(end_from, 0, O3);
kvn@1800 1236 __ align(OptoLoopAlignment);
duke@435 1237 __ BIND(L_loop);
duke@435 1238 __ ldx(end_from, -8, O4);
duke@435 1239 __ deccc(count, count_dec); // Can we do next iteration after this one?
duke@435 1240 __ ldx(end_from, -16, G4);
duke@435 1241 __ dec(end_to, 16);
duke@435 1242 __ dec(end_from, 16);
duke@435 1243 __ srlx(O3, right_shift, O3);
duke@435 1244 __ sllx(O4, left_shift, G3);
duke@435 1245 __ bset(G3, O3);
duke@435 1246 __ stx(O3, end_to, 8);
duke@435 1247 __ srlx(O4, right_shift, O4);
duke@435 1248 __ sllx(G4, left_shift, G3);
duke@435 1249 __ bset(G3, O4);
duke@435 1250 __ stx(O4, end_to, 0);
duke@435 1251 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
duke@435 1252 __ delayed()->mov(G4, O3);
duke@435 1253
duke@435 1254 __ inccc(count, count_dec>>1 ); // + 8 bytes
duke@435 1255 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
duke@435 1256 __ delayed()->inc(count, count_dec>>1); // restore 'count'
duke@435 1257
duke@435 1258 // copy 8 bytes, part of them already loaded in O3
duke@435 1259 __ ldx(end_from, -8, O4);
duke@435 1260 __ dec(end_to, 8);
duke@435 1261 __ dec(end_from, 8);
duke@435 1262 __ srlx(O3, right_shift, O3);
duke@435 1263 __ sllx(O4, left_shift, G3);
duke@435 1264 __ bset(O3, G3);
duke@435 1265 __ stx(G3, end_to, 0);
duke@435 1266
duke@435 1267 __ BIND(L_copy_last_bytes);
duke@435 1268 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes
duke@435 1269 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
duke@435 1270 __ delayed()->add(end_from, left_shift, end_from); // restore address
duke@435 1271 }
duke@435 1272
duke@435 1273 //
duke@435 1274 // Generate stub for disjoint byte copy. If "aligned" is true, the
duke@435 1275 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 1276 //
duke@435 1277 // Arguments for generated stub:
duke@435 1278 // from: O0
duke@435 1279 // to: O1
duke@435 1280 // count: O2 treated as signed
duke@435 1281 //
iveresov@2595 1282 address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) {
duke@435 1283 __ align(CodeEntryAlignment);
duke@435 1284 StubCodeMark mark(this, "StubRoutines", name);
duke@435 1285 address start = __ pc();
duke@435 1286
duke@435 1287 Label L_skip_alignment, L_align;
duke@435 1288 Label L_copy_byte, L_copy_byte_loop, L_exit;
duke@435 1289
duke@435 1290 const Register from = O0; // source array address
duke@435 1291 const Register to = O1; // destination array address
duke@435 1292 const Register count = O2; // elements count
duke@435 1293 const Register offset = O5; // offset from start of arrays
duke@435 1294 // O3, O4, G3, G4 are used as temp registers
duke@435 1295
duke@435 1296 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@435 1297
iveresov@2595 1298 if (entry != NULL) {
iveresov@2595 1299 *entry = __ pc();
iveresov@2595 1300 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 1301 BLOCK_COMMENT("Entry:");
iveresov@2595 1302 }
duke@435 1303
duke@435 1304 // for short arrays, just do single element copy
duke@435 1305 __ cmp(count, 23); // 16 + 7
duke@435 1306 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
duke@435 1307 __ delayed()->mov(G0, offset);
duke@435 1308
duke@435 1309 if (aligned) {
duke@435 1310 // 'aligned' == true when it is known statically during compilation
duke@435 1311 // of this arraycopy call site that both 'from' and 'to' addresses
duke@435 1312 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
duke@435 1313 //
duke@435 1314 // Aligned arrays have 4 bytes alignment in 32-bits VM
duke@435 1315 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
duke@435 1316 //
duke@435 1317 #ifndef _LP64
duke@435 1318 // copy a 4-bytes word if necessary to align 'to' to 8 bytes
duke@435 1319 __ andcc(to, 7, G0);
duke@435 1320 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
duke@435 1321 __ delayed()->ld(from, 0, O3);
duke@435 1322 __ inc(from, 4);
duke@435 1323 __ inc(to, 4);
duke@435 1324 __ dec(count, 4);
duke@435 1325 __ st(O3, to, -4);
duke@435 1326 __ BIND(L_skip_alignment);
duke@435 1327 #endif
duke@435 1328 } else {
duke@435 1329 // copy bytes to align 'to' on 8 byte boundary
duke@435 1330 __ andcc(to, 7, G1); // misaligned bytes
duke@435 1331 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@435 1332 __ delayed()->neg(G1);
duke@435 1333 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment
duke@435 1334 __ sub(count, G1, count);
duke@435 1335 __ BIND(L_align);
duke@435 1336 __ ldub(from, 0, O3);
duke@435 1337 __ deccc(G1);
duke@435 1338 __ inc(from);
duke@435 1339 __ stb(O3, to, 0);
duke@435 1340 __ br(Assembler::notZero, false, Assembler::pt, L_align);
duke@435 1341 __ delayed()->inc(to);
duke@435 1342 __ BIND(L_skip_alignment);
duke@435 1343 }
duke@435 1344 #ifdef _LP64
duke@435 1345 if (!aligned)
duke@435 1346 #endif
duke@435 1347 {
duke@435 1348 // Copy with shift 16 bytes per iteration if arrays do not have
duke@435 1349 // the same alignment mod 8, otherwise fall through to the next
duke@435 1350 // code for aligned copy.
duke@435 1351 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
duke@435 1352 // Also jump over aligned copy after the copy with shift completed.
duke@435 1353
duke@435 1354 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
duke@435 1355 }
duke@435 1356
duke@435 1357 // Both array are 8 bytes aligned, copy 16 bytes at a time
duke@435 1358 __ and3(count, 7, G4); // Save count
duke@435 1359 __ srl(count, 3, count);
duke@435 1360 generate_disjoint_long_copy_core(aligned);
duke@435 1361 __ mov(G4, count); // Restore count
duke@435 1362
duke@435 1363 // copy tailing bytes
duke@435 1364 __ BIND(L_copy_byte);
duke@435 1365 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@435 1366 __ delayed()->nop();
kvn@1800 1367 __ align(OptoLoopAlignment);
duke@435 1368 __ BIND(L_copy_byte_loop);
duke@435 1369 __ ldub(from, offset, O3);
duke@435 1370 __ deccc(count);
duke@435 1371 __ stb(O3, to, offset);
duke@435 1372 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
duke@435 1373 __ delayed()->inc(offset);
duke@435 1374
duke@435 1375 __ BIND(L_exit);
duke@435 1376 // O3, O4 are used as temp registers
duke@435 1377 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
duke@435 1378 __ retl();
duke@435 1379 __ delayed()->mov(G0, O0); // return 0
duke@435 1380 return start;
duke@435 1381 }
duke@435 1382
duke@435 1383 //
duke@435 1384 // Generate stub for conjoint byte copy. If "aligned" is true, the
duke@435 1385 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 1386 //
duke@435 1387 // Arguments for generated stub:
duke@435 1388 // from: O0
duke@435 1389 // to: O1
duke@435 1390 // count: O2 treated as signed
duke@435 1391 //
iveresov@2595 1392 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
iveresov@2595 1393 address *entry, const char *name) {
duke@435 1394 // Do reverse copy.
duke@435 1395
duke@435 1396 __ align(CodeEntryAlignment);
duke@435 1397 StubCodeMark mark(this, "StubRoutines", name);
duke@435 1398 address start = __ pc();
duke@435 1399
duke@435 1400 Label L_skip_alignment, L_align, L_aligned_copy;
duke@435 1401 Label L_copy_byte, L_copy_byte_loop, L_exit;
duke@435 1402
duke@435 1403 const Register from = O0; // source array address
duke@435 1404 const Register to = O1; // destination array address
duke@435 1405 const Register count = O2; // elements count
duke@435 1406 const Register end_from = from; // source array end address
duke@435 1407 const Register end_to = to; // destination array end address
duke@435 1408
duke@435 1409 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@435 1410
iveresov@2595 1411 if (entry != NULL) {
iveresov@2595 1412 *entry = __ pc();
iveresov@2595 1413 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 1414 BLOCK_COMMENT("Entry:");
iveresov@2595 1415 }
duke@435 1416
duke@435 1417 array_overlap_test(nooverlap_target, 0);
duke@435 1418
duke@435 1419 __ add(to, count, end_to); // offset after last copied element
duke@435 1420
duke@435 1421 // for short arrays, just do single element copy
duke@435 1422 __ cmp(count, 23); // 16 + 7
duke@435 1423 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
duke@435 1424 __ delayed()->add(from, count, end_from);
duke@435 1425
duke@435 1426 {
duke@435 1427 // Align end of arrays since they could be not aligned even
duke@435 1428 // when arrays itself are aligned.
duke@435 1429
duke@435 1430 // copy bytes to align 'end_to' on 8 byte boundary
duke@435 1431 __ andcc(end_to, 7, G1); // misaligned bytes
duke@435 1432 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@435 1433 __ delayed()->nop();
duke@435 1434 __ sub(count, G1, count);
duke@435 1435 __ BIND(L_align);
duke@435 1436 __ dec(end_from);
duke@435 1437 __ dec(end_to);
duke@435 1438 __ ldub(end_from, 0, O3);
duke@435 1439 __ deccc(G1);
duke@435 1440 __ brx(Assembler::notZero, false, Assembler::pt, L_align);
duke@435 1441 __ delayed()->stb(O3, end_to, 0);
duke@435 1442 __ BIND(L_skip_alignment);
duke@435 1443 }
duke@435 1444 #ifdef _LP64
duke@435 1445 if (aligned) {
duke@435 1446 // Both arrays are aligned to 8-bytes in 64-bits VM.
duke@435 1447 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
duke@435 1448 // in unaligned case.
duke@435 1449 __ dec(count, 16);
duke@435 1450 } else
duke@435 1451 #endif
duke@435 1452 {
duke@435 1453 // Copy with shift 16 bytes per iteration if arrays do not have
duke@435 1454 // the same alignment mod 8, otherwise jump to the next
duke@435 1455 // code for aligned copy (and substracting 16 from 'count' before jump).
duke@435 1456 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
duke@435 1457 // Also jump over aligned copy after the copy with shift completed.
duke@435 1458
duke@435 1459 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
duke@435 1460 L_aligned_copy, L_copy_byte);
duke@435 1461 }
duke@435 1462 // copy 4 elements (16 bytes) at a time
kvn@1800 1463 __ align(OptoLoopAlignment);
duke@435 1464 __ BIND(L_aligned_copy);
duke@435 1465 __ dec(end_from, 16);
duke@435 1466 __ ldx(end_from, 8, O3);
duke@435 1467 __ ldx(end_from, 0, O4);
duke@435 1468 __ dec(end_to, 16);
duke@435 1469 __ deccc(count, 16);
duke@435 1470 __ stx(O3, end_to, 8);
duke@435 1471 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
duke@435 1472 __ delayed()->stx(O4, end_to, 0);
duke@435 1473 __ inc(count, 16);
duke@435 1474
duke@435 1475 // copy 1 element (2 bytes) at a time
duke@435 1476 __ BIND(L_copy_byte);
duke@435 1477 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@435 1478 __ delayed()->nop();
kvn@1800 1479 __ align(OptoLoopAlignment);
duke@435 1480 __ BIND(L_copy_byte_loop);
duke@435 1481 __ dec(end_from);
duke@435 1482 __ dec(end_to);
duke@435 1483 __ ldub(end_from, 0, O4);
duke@435 1484 __ deccc(count);
duke@435 1485 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
duke@435 1486 __ delayed()->stb(O4, end_to, 0);
duke@435 1487
duke@435 1488 __ BIND(L_exit);
duke@435 1489 // O3, O4 are used as temp registers
duke@435 1490 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
duke@435 1491 __ retl();
duke@435 1492 __ delayed()->mov(G0, O0); // return 0
duke@435 1493 return start;
duke@435 1494 }
duke@435 1495
duke@435 1496 //
duke@435 1497 // Generate stub for disjoint short copy. If "aligned" is true, the
duke@435 1498 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 1499 //
duke@435 1500 // Arguments for generated stub:
duke@435 1501 // from: O0
duke@435 1502 // to: O1
duke@435 1503 // count: O2 treated as signed
duke@435 1504 //
iveresov@2595 1505 address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {
duke@435 1506 __ align(CodeEntryAlignment);
duke@435 1507 StubCodeMark mark(this, "StubRoutines", name);
duke@435 1508 address start = __ pc();
duke@435 1509
duke@435 1510 Label L_skip_alignment, L_skip_alignment2;
duke@435 1511 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
duke@435 1512
duke@435 1513 const Register from = O0; // source array address
duke@435 1514 const Register to = O1; // destination array address
duke@435 1515 const Register count = O2; // elements count
duke@435 1516 const Register offset = O5; // offset from start of arrays
duke@435 1517 // O3, O4, G3, G4 are used as temp registers
duke@435 1518
duke@435 1519 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@435 1520
iveresov@2595 1521 if (entry != NULL) {
iveresov@2595 1522 *entry = __ pc();
iveresov@2595 1523 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 1524 BLOCK_COMMENT("Entry:");
iveresov@2595 1525 }
duke@435 1526
duke@435 1527 // for short arrays, just do single element copy
duke@435 1528 __ cmp(count, 11); // 8 + 3 (22 bytes)
duke@435 1529 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
duke@435 1530 __ delayed()->mov(G0, offset);
duke@435 1531
duke@435 1532 if (aligned) {
duke@435 1533 // 'aligned' == true when it is known statically during compilation
duke@435 1534 // of this arraycopy call site that both 'from' and 'to' addresses
duke@435 1535 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
duke@435 1536 //
duke@435 1537 // Aligned arrays have 4 bytes alignment in 32-bits VM
duke@435 1538 // and 8 bytes - in 64-bits VM.
duke@435 1539 //
duke@435 1540 #ifndef _LP64
duke@435 1541 // copy a 2-elements word if necessary to align 'to' to 8 bytes
duke@435 1542 __ andcc(to, 7, G0);
duke@435 1543 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@435 1544 __ delayed()->ld(from, 0, O3);
duke@435 1545 __ inc(from, 4);
duke@435 1546 __ inc(to, 4);
duke@435 1547 __ dec(count, 2);
duke@435 1548 __ st(O3, to, -4);
duke@435 1549 __ BIND(L_skip_alignment);
duke@435 1550 #endif
duke@435 1551 } else {
duke@435 1552 // copy 1 element if necessary to align 'to' on an 4 bytes
duke@435 1553 __ andcc(to, 3, G0);
duke@435 1554 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@435 1555 __ delayed()->lduh(from, 0, O3);
duke@435 1556 __ inc(from, 2);
duke@435 1557 __ inc(to, 2);
duke@435 1558 __ dec(count);
duke@435 1559 __ sth(O3, to, -2);
duke@435 1560 __ BIND(L_skip_alignment);
duke@435 1561
duke@435 1562 // copy 2 elements to align 'to' on an 8 byte boundary
duke@435 1563 __ andcc(to, 7, G0);
duke@435 1564 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
duke@435 1565 __ delayed()->lduh(from, 0, O3);
duke@435 1566 __ dec(count, 2);
duke@435 1567 __ lduh(from, 2, O4);
duke@435 1568 __ inc(from, 4);
duke@435 1569 __ inc(to, 4);
duke@435 1570 __ sth(O3, to, -4);
duke@435 1571 __ sth(O4, to, -2);
duke@435 1572 __ BIND(L_skip_alignment2);
duke@435 1573 }
duke@435 1574 #ifdef _LP64
duke@435 1575 if (!aligned)
duke@435 1576 #endif
duke@435 1577 {
duke@435 1578 // Copy with shift 16 bytes per iteration if arrays do not have
duke@435 1579 // the same alignment mod 8, otherwise fall through to the next
duke@435 1580 // code for aligned copy.
duke@435 1581 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
duke@435 1582 // Also jump over aligned copy after the copy with shift completed.
duke@435 1583
duke@435 1584 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
duke@435 1585 }
duke@435 1586
duke@435 1587 // Both array are 8 bytes aligned, copy 16 bytes at a time
duke@435 1588 __ and3(count, 3, G4); // Save
duke@435 1589 __ srl(count, 2, count);
duke@435 1590 generate_disjoint_long_copy_core(aligned);
duke@435 1591 __ mov(G4, count); // restore
duke@435 1592
duke@435 1593 // copy 1 element at a time
duke@435 1594 __ BIND(L_copy_2_bytes);
duke@435 1595 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@435 1596 __ delayed()->nop();
kvn@1800 1597 __ align(OptoLoopAlignment);
duke@435 1598 __ BIND(L_copy_2_bytes_loop);
duke@435 1599 __ lduh(from, offset, O3);
duke@435 1600 __ deccc(count);
duke@435 1601 __ sth(O3, to, offset);
duke@435 1602 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
duke@435 1603 __ delayed()->inc(offset, 2);
duke@435 1604
duke@435 1605 __ BIND(L_exit);
duke@435 1606 // O3, O4 are used as temp registers
duke@435 1607 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
duke@435 1608 __ retl();
duke@435 1609 __ delayed()->mov(G0, O0); // return 0
duke@435 1610 return start;
duke@435 1611 }
duke@435 1612
duke@435 1613 //
never@2118 1614 // Generate stub for disjoint short fill. If "aligned" is true, the
never@2118 1615 // "to" address is assumed to be heapword aligned.
never@2118 1616 //
never@2118 1617 // Arguments for generated stub:
never@2118 1618 // to: O0
never@2118 1619 // value: O1
never@2118 1620 // count: O2 treated as signed
never@2118 1621 //
never@2118 1622 address generate_fill(BasicType t, bool aligned, const char* name) {
never@2118 1623 __ align(CodeEntryAlignment);
never@2118 1624 StubCodeMark mark(this, "StubRoutines", name);
never@2118 1625 address start = __ pc();
never@2118 1626
never@2118 1627 const Register to = O0; // source array address
never@2118 1628 const Register value = O1; // fill value
never@2118 1629 const Register count = O2; // elements count
never@2118 1630 // O3 is used as a temp register
never@2118 1631
never@2118 1632 assert_clean_int(count, O3); // Make sure 'count' is clean int.
never@2118 1633
never@2118 1634 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
never@2149 1635 Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes;
never@2118 1636
never@2118 1637 int shift = -1;
never@2118 1638 switch (t) {
never@2118 1639 case T_BYTE:
never@2118 1640 shift = 2;
never@2118 1641 break;
never@2118 1642 case T_SHORT:
never@2118 1643 shift = 1;
never@2118 1644 break;
never@2118 1645 case T_INT:
never@2118 1646 shift = 0;
never@2118 1647 break;
never@2118 1648 default: ShouldNotReachHere();
never@2118 1649 }
never@2118 1650
never@2118 1651 BLOCK_COMMENT("Entry:");
never@2118 1652
never@2118 1653 if (t == T_BYTE) {
never@2118 1654 // Zero extend value
never@2118 1655 __ and3(value, 0xff, value);
never@2118 1656 __ sllx(value, 8, O3);
never@2118 1657 __ or3(value, O3, value);
never@2118 1658 }
never@2118 1659 if (t == T_SHORT) {
never@2118 1660 // Zero extend value
never@2149 1661 __ sllx(value, 48, value);
never@2149 1662 __ srlx(value, 48, value);
never@2118 1663 }
never@2118 1664 if (t == T_BYTE || t == T_SHORT) {
never@2118 1665 __ sllx(value, 16, O3);
never@2118 1666 __ or3(value, O3, value);
never@2118 1667 }
never@2118 1668
never@2118 1669 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
never@2149 1670 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_elements); // use unsigned cmp
never@2149 1671 __ delayed()->andcc(count, 1, G0);
never@2118 1672
never@2118 1673 if (!aligned && (t == T_BYTE || t == T_SHORT)) {
never@2118 1674 // align source address at 4 bytes address boundary
never@2118 1675 if (t == T_BYTE) {
never@2118 1676 // One byte misalignment happens only for byte arrays
never@2118 1677 __ andcc(to, 1, G0);
never@2118 1678 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
never@2118 1679 __ delayed()->nop();
never@2118 1680 __ stb(value, to, 0);
never@2118 1681 __ inc(to, 1);
never@2118 1682 __ dec(count, 1);
never@2118 1683 __ BIND(L_skip_align1);
never@2118 1684 }
never@2118 1685 // Two bytes misalignment happens only for byte and short (char) arrays
never@2118 1686 __ andcc(to, 2, G0);
never@2118 1687 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
never@2118 1688 __ delayed()->nop();
never@2118 1689 __ sth(value, to, 0);
never@2118 1690 __ inc(to, 2);
never@2118 1691 __ dec(count, 1 << (shift - 1));
never@2118 1692 __ BIND(L_skip_align2);
never@2118 1693 }
never@2118 1694 #ifdef _LP64
never@2118 1695 if (!aligned) {
never@2118 1696 #endif
never@2118 1697 // align to 8 bytes, we know we are 4 byte aligned to start
never@2118 1698 __ andcc(to, 7, G0);
never@2118 1699 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
never@2118 1700 __ delayed()->nop();
never@2118 1701 __ stw(value, to, 0);
never@2118 1702 __ inc(to, 4);
never@2118 1703 __ dec(count, 1 << shift);
never@2118 1704 __ BIND(L_fill_32_bytes);
never@2118 1705 #ifdef _LP64
never@2118 1706 }
never@2118 1707 #endif
never@2118 1708
never@2118 1709 if (t == T_INT) {
never@2118 1710 // Zero extend value
never@2118 1711 __ srl(value, 0, value);
never@2118 1712 }
never@2118 1713 if (t == T_BYTE || t == T_SHORT || t == T_INT) {
never@2118 1714 __ sllx(value, 32, O3);
never@2118 1715 __ or3(value, O3, value);
never@2118 1716 }
never@2118 1717
never@2137 1718 Label L_check_fill_8_bytes;
never@2137 1719 // Fill 32-byte chunks
never@2137 1720 __ subcc(count, 8 << shift, count);
never@2137 1721 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
never@2137 1722 __ delayed()->nop();
never@2137 1723
never@2149 1724 Label L_fill_32_bytes_loop, L_fill_4_bytes;
never@2118 1725 __ align(16);
never@2118 1726 __ BIND(L_fill_32_bytes_loop);
never@2118 1727
never@2118 1728 __ stx(value, to, 0);
never@2118 1729 __ stx(value, to, 8);
never@2118 1730 __ stx(value, to, 16);
never@2118 1731 __ stx(value, to, 24);
never@2118 1732
never@2118 1733 __ subcc(count, 8 << shift, count);
never@2118 1734 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
never@2118 1735 __ delayed()->add(to, 32, to);
never@2118 1736
never@2118 1737 __ BIND(L_check_fill_8_bytes);
never@2118 1738 __ addcc(count, 8 << shift, count);
never@2118 1739 __ brx(Assembler::zero, false, Assembler::pn, L_exit);
never@2118 1740 __ delayed()->subcc(count, 1 << (shift + 1), count);
never@2118 1741 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
never@2118 1742 __ delayed()->andcc(count, 1<<shift, G0);
never@2118 1743
never@2118 1744 //
never@2118 1745 // length is too short, just fill 8 bytes at a time
never@2118 1746 //
never@2118 1747 Label L_fill_8_bytes_loop;
never@2118 1748 __ BIND(L_fill_8_bytes_loop);
never@2118 1749 __ stx(value, to, 0);
never@2118 1750 __ subcc(count, 1 << (shift + 1), count);
never@2118 1751 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
never@2118 1752 __ delayed()->add(to, 8, to);
never@2118 1753
never@2118 1754 // fill trailing 4 bytes
never@2118 1755 __ andcc(count, 1<<shift, G0); // in delay slot of branches
never@2149 1756 if (t == T_INT) {
never@2149 1757 __ BIND(L_fill_elements);
never@2149 1758 }
never@2118 1759 __ BIND(L_fill_4_bytes);
never@2118 1760 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
never@2118 1761 if (t == T_BYTE || t == T_SHORT) {
never@2118 1762 __ delayed()->andcc(count, 1<<(shift-1), G0);
never@2118 1763 } else {
never@2118 1764 __ delayed()->nop();
never@2118 1765 }
never@2118 1766 __ stw(value, to, 0);
never@2118 1767 if (t == T_BYTE || t == T_SHORT) {
never@2118 1768 __ inc(to, 4);
never@2118 1769 // fill trailing 2 bytes
never@2118 1770 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
never@2118 1771 __ BIND(L_fill_2_bytes);
never@2118 1772 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
never@2118 1773 __ delayed()->andcc(count, 1, count);
never@2118 1774 __ sth(value, to, 0);
never@2118 1775 if (t == T_BYTE) {
never@2118 1776 __ inc(to, 2);
never@2118 1777 // fill trailing byte
never@2118 1778 __ andcc(count, 1, count); // in delay slot of branches
never@2118 1779 __ BIND(L_fill_byte);
never@2118 1780 __ brx(Assembler::zero, false, Assembler::pt, L_exit);
never@2118 1781 __ delayed()->nop();
never@2118 1782 __ stb(value, to, 0);
never@2118 1783 } else {
never@2118 1784 __ BIND(L_fill_byte);
never@2118 1785 }
never@2118 1786 } else {
never@2118 1787 __ BIND(L_fill_2_bytes);
never@2118 1788 }
never@2118 1789 __ BIND(L_exit);
never@2118 1790 __ retl();
never@2149 1791 __ delayed()->nop();
never@2149 1792
never@2149 1793 // Handle copies less than 8 bytes. Int is handled elsewhere.
never@2149 1794 if (t == T_BYTE) {
never@2149 1795 __ BIND(L_fill_elements);
never@2149 1796 Label L_fill_2, L_fill_4;
never@2149 1797 // in delay slot __ andcc(count, 1, G0);
never@2149 1798 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2);
never@2149 1799 __ delayed()->andcc(count, 2, G0);
never@2149 1800 __ stb(value, to, 0);
never@2149 1801 __ inc(to, 1);
never@2149 1802 __ BIND(L_fill_2);
never@2149 1803 __ brx(Assembler::zero, false, Assembler::pt, L_fill_4);
never@2149 1804 __ delayed()->andcc(count, 4, G0);
never@2149 1805 __ stb(value, to, 0);
never@2149 1806 __ stb(value, to, 1);
never@2149 1807 __ inc(to, 2);
never@2149 1808 __ BIND(L_fill_4);
never@2149 1809 __ brx(Assembler::zero, false, Assembler::pt, L_exit);
never@2149 1810 __ delayed()->nop();
never@2149 1811 __ stb(value, to, 0);
never@2149 1812 __ stb(value, to, 1);
never@2149 1813 __ stb(value, to, 2);
never@2149 1814 __ retl();
never@2149 1815 __ delayed()->stb(value, to, 3);
never@2149 1816 }
never@2149 1817
never@2149 1818 if (t == T_SHORT) {
never@2149 1819 Label L_fill_2;
never@2149 1820 __ BIND(L_fill_elements);
never@2149 1821 // in delay slot __ andcc(count, 1, G0);
never@2149 1822 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2);
never@2149 1823 __ delayed()->andcc(count, 2, G0);
never@2149 1824 __ sth(value, to, 0);
never@2149 1825 __ inc(to, 2);
never@2149 1826 __ BIND(L_fill_2);
never@2149 1827 __ brx(Assembler::zero, false, Assembler::pt, L_exit);
never@2149 1828 __ delayed()->nop();
never@2149 1829 __ sth(value, to, 0);
never@2149 1830 __ retl();
never@2149 1831 __ delayed()->sth(value, to, 2);
never@2149 1832 }
never@2118 1833 return start;
never@2118 1834 }
never@2118 1835
never@2118 1836 //
duke@435 1837 // Generate stub for conjoint short copy. If "aligned" is true, the
duke@435 1838 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 1839 //
duke@435 1840 // Arguments for generated stub:
duke@435 1841 // from: O0
duke@435 1842 // to: O1
duke@435 1843 // count: O2 treated as signed
duke@435 1844 //
iveresov@2595 1845 address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
iveresov@2595 1846 address *entry, const char *name) {
duke@435 1847 // Do reverse copy.
duke@435 1848
duke@435 1849 __ align(CodeEntryAlignment);
duke@435 1850 StubCodeMark mark(this, "StubRoutines", name);
duke@435 1851 address start = __ pc();
duke@435 1852
duke@435 1853 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy;
duke@435 1854 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
duke@435 1855
duke@435 1856 const Register from = O0; // source array address
duke@435 1857 const Register to = O1; // destination array address
duke@435 1858 const Register count = O2; // elements count
duke@435 1859 const Register end_from = from; // source array end address
duke@435 1860 const Register end_to = to; // destination array end address
duke@435 1861
duke@435 1862 const Register byte_count = O3; // bytes count to copy
duke@435 1863
duke@435 1864 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@435 1865
iveresov@2595 1866 if (entry != NULL) {
iveresov@2595 1867 *entry = __ pc();
iveresov@2595 1868 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 1869 BLOCK_COMMENT("Entry:");
iveresov@2595 1870 }
duke@435 1871
duke@435 1872 array_overlap_test(nooverlap_target, 1);
duke@435 1873
duke@435 1874 __ sllx(count, LogBytesPerShort, byte_count);
duke@435 1875 __ add(to, byte_count, end_to); // offset after last copied element
duke@435 1876
duke@435 1877 // for short arrays, just do single element copy
duke@435 1878 __ cmp(count, 11); // 8 + 3 (22 bytes)
duke@435 1879 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
duke@435 1880 __ delayed()->add(from, byte_count, end_from);
duke@435 1881
duke@435 1882 {
duke@435 1883 // Align end of arrays since they could be not aligned even
duke@435 1884 // when arrays itself are aligned.
duke@435 1885
duke@435 1886 // copy 1 element if necessary to align 'end_to' on an 4 bytes
duke@435 1887 __ andcc(end_to, 3, G0);
duke@435 1888 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@435 1889 __ delayed()->lduh(end_from, -2, O3);
duke@435 1890 __ dec(end_from, 2);
duke@435 1891 __ dec(end_to, 2);
duke@435 1892 __ dec(count);
duke@435 1893 __ sth(O3, end_to, 0);
duke@435 1894 __ BIND(L_skip_alignment);
duke@435 1895
duke@435 1896 // copy 2 elements to align 'end_to' on an 8 byte boundary
duke@435 1897 __ andcc(end_to, 7, G0);
duke@435 1898 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
duke@435 1899 __ delayed()->lduh(end_from, -2, O3);
duke@435 1900 __ dec(count, 2);
duke@435 1901 __ lduh(end_from, -4, O4);
duke@435 1902 __ dec(end_from, 4);
duke@435 1903 __ dec(end_to, 4);
duke@435 1904 __ sth(O3, end_to, 2);
duke@435 1905 __ sth(O4, end_to, 0);
duke@435 1906 __ BIND(L_skip_alignment2);
duke@435 1907 }
duke@435 1908 #ifdef _LP64
duke@435 1909 if (aligned) {
duke@435 1910 // Both arrays are aligned to 8-bytes in 64-bits VM.
duke@435 1911 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
duke@435 1912 // in unaligned case.
duke@435 1913 __ dec(count, 8);
duke@435 1914 } else
duke@435 1915 #endif
duke@435 1916 {
duke@435 1917 // Copy with shift 16 bytes per iteration if arrays do not have
duke@435 1918 // the same alignment mod 8, otherwise jump to the next
duke@435 1919 // code for aligned copy (and substracting 8 from 'count' before jump).
duke@435 1920 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
duke@435 1921 // Also jump over aligned copy after the copy with shift completed.
duke@435 1922
duke@435 1923 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
duke@435 1924 L_aligned_copy, L_copy_2_bytes);
duke@435 1925 }
duke@435 1926 // copy 4 elements (16 bytes) at a time
kvn@1800 1927 __ align(OptoLoopAlignment);
duke@435 1928 __ BIND(L_aligned_copy);
duke@435 1929 __ dec(end_from, 16);
duke@435 1930 __ ldx(end_from, 8, O3);
duke@435 1931 __ ldx(end_from, 0, O4);
duke@435 1932 __ dec(end_to, 16);
duke@435 1933 __ deccc(count, 8);
duke@435 1934 __ stx(O3, end_to, 8);
duke@435 1935 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
duke@435 1936 __ delayed()->stx(O4, end_to, 0);
duke@435 1937 __ inc(count, 8);
duke@435 1938
duke@435 1939 // copy 1 element (2 bytes) at a time
duke@435 1940 __ BIND(L_copy_2_bytes);
duke@435 1941 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@435 1942 __ delayed()->nop();
duke@435 1943 __ BIND(L_copy_2_bytes_loop);
duke@435 1944 __ dec(end_from, 2);
duke@435 1945 __ dec(end_to, 2);
duke@435 1946 __ lduh(end_from, 0, O4);
duke@435 1947 __ deccc(count);
duke@435 1948 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
duke@435 1949 __ delayed()->sth(O4, end_to, 0);
duke@435 1950
duke@435 1951 __ BIND(L_exit);
duke@435 1952 // O3, O4 are used as temp registers
duke@435 1953 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
duke@435 1954 __ retl();
duke@435 1955 __ delayed()->mov(G0, O0); // return 0
duke@435 1956 return start;
duke@435 1957 }
duke@435 1958
duke@435 1959 //
duke@435 1960 // Generate core code for disjoint int copy (and oop copy on 32-bit).
duke@435 1961 // If "aligned" is true, the "from" and "to" addresses are assumed
duke@435 1962 // to be heapword aligned.
duke@435 1963 //
duke@435 1964 // Arguments:
duke@435 1965 // from: O0
duke@435 1966 // to: O1
duke@435 1967 // count: O2 treated as signed
duke@435 1968 //
duke@435 1969 void generate_disjoint_int_copy_core(bool aligned) {
duke@435 1970
duke@435 1971 Label L_skip_alignment, L_aligned_copy;
duke@435 1972 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
duke@435 1973
duke@435 1974 const Register from = O0; // source array address
duke@435 1975 const Register to = O1; // destination array address
duke@435 1976 const Register count = O2; // elements count
duke@435 1977 const Register offset = O5; // offset from start of arrays
duke@435 1978 // O3, O4, G3, G4 are used as temp registers
duke@435 1979
duke@435 1980 // 'aligned' == true when it is known statically during compilation
duke@435 1981 // of this arraycopy call site that both 'from' and 'to' addresses
duke@435 1982 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
duke@435 1983 //
duke@435 1984 // Aligned arrays have 4 bytes alignment in 32-bits VM
duke@435 1985 // and 8 bytes - in 64-bits VM.
duke@435 1986 //
duke@435 1987 #ifdef _LP64
duke@435 1988 if (!aligned)
duke@435 1989 #endif
duke@435 1990 {
duke@435 1991 // The next check could be put under 'ifndef' since the code in
duke@435 1992 // generate_disjoint_long_copy_core() has own checks and set 'offset'.
duke@435 1993
duke@435 1994 // for short arrays, just do single element copy
duke@435 1995 __ cmp(count, 5); // 4 + 1 (20 bytes)
duke@435 1996 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
duke@435 1997 __ delayed()->mov(G0, offset);
duke@435 1998
duke@435 1999 // copy 1 element to align 'to' on an 8 byte boundary
duke@435 2000 __ andcc(to, 7, G0);
duke@435 2001 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@435 2002 __ delayed()->ld(from, 0, O3);
duke@435 2003 __ inc(from, 4);
duke@435 2004 __ inc(to, 4);
duke@435 2005 __ dec(count);
duke@435 2006 __ st(O3, to, -4);
duke@435 2007 __ BIND(L_skip_alignment);
duke@435 2008
duke@435 2009 // if arrays have same alignment mod 8, do 4 elements copy
duke@435 2010 __ andcc(from, 7, G0);
duke@435 2011 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@435 2012 __ delayed()->ld(from, 0, O3);
duke@435 2013
duke@435 2014 //
duke@435 2015 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@435 2016 // to form 2 aligned 8-bytes chunks to store.
duke@435 2017 //
duke@435 2018 // copy_16_bytes_forward_with_shift() is not used here since this
duke@435 2019 // code is more optimal.
duke@435 2020
duke@435 2021 // copy with shift 4 elements (16 bytes) at a time
duke@435 2022 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4
duke@435 2023
kvn@1800 2024 __ align(OptoLoopAlignment);
duke@435 2025 __ BIND(L_copy_16_bytes);
duke@435 2026 __ ldx(from, 4, O4);
duke@435 2027 __ deccc(count, 4); // Can we do next iteration after this one?
duke@435 2028 __ ldx(from, 12, G4);
duke@435 2029 __ inc(to, 16);
duke@435 2030 __ inc(from, 16);
duke@435 2031 __ sllx(O3, 32, O3);
duke@435 2032 __ srlx(O4, 32, G3);
duke@435 2033 __ bset(G3, O3);
duke@435 2034 __ stx(O3, to, -16);
duke@435 2035 __ sllx(O4, 32, O4);
duke@435 2036 __ srlx(G4, 32, G3);
duke@435 2037 __ bset(G3, O4);
duke@435 2038 __ stx(O4, to, -8);
duke@435 2039 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
duke@435 2040 __ delayed()->mov(G4, O3);
duke@435 2041
duke@435 2042 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
duke@435 2043 __ delayed()->inc(count, 4); // restore 'count'
duke@435 2044
duke@435 2045 __ BIND(L_aligned_copy);
duke@435 2046 }
duke@435 2047 // copy 4 elements (16 bytes) at a time
duke@435 2048 __ and3(count, 1, G4); // Save
duke@435 2049 __ srl(count, 1, count);
duke@435 2050 generate_disjoint_long_copy_core(aligned);
duke@435 2051 __ mov(G4, count); // Restore
duke@435 2052
duke@435 2053 // copy 1 element at a time
duke@435 2054 __ BIND(L_copy_4_bytes);
duke@435 2055 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@435 2056 __ delayed()->nop();
duke@435 2057 __ BIND(L_copy_4_bytes_loop);
duke@435 2058 __ ld(from, offset, O3);
duke@435 2059 __ deccc(count);
duke@435 2060 __ st(O3, to, offset);
duke@435 2061 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
duke@435 2062 __ delayed()->inc(offset, 4);
duke@435 2063 __ BIND(L_exit);
duke@435 2064 }
duke@435 2065
duke@435 2066 //
duke@435 2067 // Generate stub for disjoint int copy. If "aligned" is true, the
duke@435 2068 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 2069 //
duke@435 2070 // Arguments for generated stub:
duke@435 2071 // from: O0
duke@435 2072 // to: O1
duke@435 2073 // count: O2 treated as signed
duke@435 2074 //
iveresov@2595 2075 address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
duke@435 2076 __ align(CodeEntryAlignment);
duke@435 2077 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2078 address start = __ pc();
duke@435 2079
duke@435 2080 const Register count = O2;
duke@435 2081 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@435 2082
iveresov@2595 2083 if (entry != NULL) {
iveresov@2595 2084 *entry = __ pc();
iveresov@2595 2085 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 2086 BLOCK_COMMENT("Entry:");
iveresov@2595 2087 }
duke@435 2088
duke@435 2089 generate_disjoint_int_copy_core(aligned);
duke@435 2090
duke@435 2091 // O3, O4 are used as temp registers
duke@435 2092 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
duke@435 2093 __ retl();
duke@435 2094 __ delayed()->mov(G0, O0); // return 0
duke@435 2095 return start;
duke@435 2096 }
duke@435 2097
duke@435 2098 //
duke@435 2099 // Generate core code for conjoint int copy (and oop copy on 32-bit).
duke@435 2100 // If "aligned" is true, the "from" and "to" addresses are assumed
duke@435 2101 // to be heapword aligned.
duke@435 2102 //
duke@435 2103 // Arguments:
duke@435 2104 // from: O0
duke@435 2105 // to: O1
duke@435 2106 // count: O2 treated as signed
duke@435 2107 //
duke@435 2108 void generate_conjoint_int_copy_core(bool aligned) {
duke@435 2109 // Do reverse copy.
duke@435 2110
duke@435 2111 Label L_skip_alignment, L_aligned_copy;
duke@435 2112 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
duke@435 2113
duke@435 2114 const Register from = O0; // source array address
duke@435 2115 const Register to = O1; // destination array address
duke@435 2116 const Register count = O2; // elements count
duke@435 2117 const Register end_from = from; // source array end address
duke@435 2118 const Register end_to = to; // destination array end address
duke@435 2119 // O3, O4, O5, G3 are used as temp registers
duke@435 2120
duke@435 2121 const Register byte_count = O3; // bytes count to copy
duke@435 2122
duke@435 2123 __ sllx(count, LogBytesPerInt, byte_count);
duke@435 2124 __ add(to, byte_count, end_to); // offset after last copied element
duke@435 2125
duke@435 2126 __ cmp(count, 5); // for short arrays, just do single element copy
duke@435 2127 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
duke@435 2128 __ delayed()->add(from, byte_count, end_from);
duke@435 2129
duke@435 2130 // copy 1 element to align 'to' on an 8 byte boundary
duke@435 2131 __ andcc(end_to, 7, G0);
duke@435 2132 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@435 2133 __ delayed()->nop();
duke@435 2134 __ dec(count);
duke@435 2135 __ dec(end_from, 4);
duke@435 2136 __ dec(end_to, 4);
duke@435 2137 __ ld(end_from, 0, O4);
duke@435 2138 __ st(O4, end_to, 0);
duke@435 2139 __ BIND(L_skip_alignment);
duke@435 2140
duke@435 2141 // Check if 'end_from' and 'end_to' has the same alignment.
duke@435 2142 __ andcc(end_from, 7, G0);
duke@435 2143 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@435 2144 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4
duke@435 2145
duke@435 2146 // copy with shift 4 elements (16 bytes) at a time
duke@435 2147 //
duke@435 2148 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@435 2149 // to form 2 aligned 8-bytes chunks to store.
duke@435 2150 //
duke@435 2151 __ ldx(end_from, -4, O3);
kvn@1800 2152 __ align(OptoLoopAlignment);
duke@435 2153 __ BIND(L_copy_16_bytes);
duke@435 2154 __ ldx(end_from, -12, O4);
duke@435 2155 __ deccc(count, 4);
duke@435 2156 __ ldx(end_from, -20, O5);
duke@435 2157 __ dec(end_to, 16);
duke@435 2158 __ dec(end_from, 16);
duke@435 2159 __ srlx(O3, 32, O3);
duke@435 2160 __ sllx(O4, 32, G3);
duke@435 2161 __ bset(G3, O3);
duke@435 2162 __ stx(O3, end_to, 8);
duke@435 2163 __ srlx(O4, 32, O4);
duke@435 2164 __ sllx(O5, 32, G3);
duke@435 2165 __ bset(O4, G3);
duke@435 2166 __ stx(G3, end_to, 0);
duke@435 2167 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
duke@435 2168 __ delayed()->mov(O5, O3);
duke@435 2169
duke@435 2170 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
duke@435 2171 __ delayed()->inc(count, 4);
duke@435 2172
duke@435 2173 // copy 4 elements (16 bytes) at a time
kvn@1800 2174 __ align(OptoLoopAlignment);
duke@435 2175 __ BIND(L_aligned_copy);
duke@435 2176 __ dec(end_from, 16);
duke@435 2177 __ ldx(end_from, 8, O3);
duke@435 2178 __ ldx(end_from, 0, O4);
duke@435 2179 __ dec(end_to, 16);
duke@435 2180 __ deccc(count, 4);
duke@435 2181 __ stx(O3, end_to, 8);
duke@435 2182 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
duke@435 2183 __ delayed()->stx(O4, end_to, 0);
duke@435 2184 __ inc(count, 4);
duke@435 2185
duke@435 2186 // copy 1 element (4 bytes) at a time
duke@435 2187 __ BIND(L_copy_4_bytes);
duke@435 2188 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@435 2189 __ delayed()->nop();
duke@435 2190 __ BIND(L_copy_4_bytes_loop);
duke@435 2191 __ dec(end_from, 4);
duke@435 2192 __ dec(end_to, 4);
duke@435 2193 __ ld(end_from, 0, O4);
duke@435 2194 __ deccc(count);
duke@435 2195 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
duke@435 2196 __ delayed()->st(O4, end_to, 0);
duke@435 2197 __ BIND(L_exit);
duke@435 2198 }
duke@435 2199
duke@435 2200 //
duke@435 2201 // Generate stub for conjoint int copy. If "aligned" is true, the
duke@435 2202 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 2203 //
duke@435 2204 // Arguments for generated stub:
duke@435 2205 // from: O0
duke@435 2206 // to: O1
duke@435 2207 // count: O2 treated as signed
duke@435 2208 //
iveresov@2595 2209 address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
iveresov@2595 2210 address *entry, const char *name) {
duke@435 2211 __ align(CodeEntryAlignment);
duke@435 2212 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2213 address start = __ pc();
duke@435 2214
duke@435 2215 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
duke@435 2216
iveresov@2595 2217 if (entry != NULL) {
iveresov@2595 2218 *entry = __ pc();
iveresov@2595 2219 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 2220 BLOCK_COMMENT("Entry:");
iveresov@2595 2221 }
duke@435 2222
duke@435 2223 array_overlap_test(nooverlap_target, 2);
duke@435 2224
duke@435 2225 generate_conjoint_int_copy_core(aligned);
duke@435 2226
duke@435 2227 // O3, O4 are used as temp registers
duke@435 2228 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
duke@435 2229 __ retl();
duke@435 2230 __ delayed()->mov(G0, O0); // return 0
duke@435 2231 return start;
duke@435 2232 }
duke@435 2233
duke@435 2234 //
duke@435 2235 // Generate core code for disjoint long copy (and oop copy on 64-bit).
duke@435 2236 // "aligned" is ignored, because we must make the stronger
duke@435 2237 // assumption that both addresses are always 64-bit aligned.
duke@435 2238 //
duke@435 2239 // Arguments:
duke@435 2240 // from: O0
duke@435 2241 // to: O1
duke@435 2242 // count: O2 treated as signed
duke@435 2243 //
kvn@1799 2244 // count -= 2;
kvn@1799 2245 // if ( count >= 0 ) { // >= 2 elements
kvn@1799 2246 // if ( count > 6) { // >= 8 elements
kvn@1799 2247 // count -= 6; // original count - 8
kvn@1799 2248 // do {
kvn@1799 2249 // copy_8_elements;
kvn@1799 2250 // count -= 8;
kvn@1799 2251 // } while ( count >= 0 );
kvn@1799 2252 // count += 6;
kvn@1799 2253 // }
kvn@1799 2254 // if ( count >= 0 ) { // >= 2 elements
kvn@1799 2255 // do {
kvn@1799 2256 // copy_2_elements;
kvn@1799 2257 // } while ( (count=count-2) >= 0 );
kvn@1799 2258 // }
kvn@1799 2259 // }
kvn@1799 2260 // count += 2;
kvn@1799 2261 // if ( count != 0 ) { // 1 element left
kvn@1799 2262 // copy_1_element;
kvn@1799 2263 // }
kvn@1799 2264 //
duke@435 2265 void generate_disjoint_long_copy_core(bool aligned) {
duke@435 2266 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
duke@435 2267 const Register from = O0; // source array address
duke@435 2268 const Register to = O1; // destination array address
duke@435 2269 const Register count = O2; // elements count
duke@435 2270 const Register offset0 = O4; // element offset
duke@435 2271 const Register offset8 = O5; // next element offset
duke@435 2272
duke@435 2273 __ deccc(count, 2);
duke@435 2274 __ mov(G0, offset0); // offset from start of arrays (0)
duke@435 2275 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
duke@435 2276 __ delayed()->add(offset0, 8, offset8);
kvn@1799 2277
kvn@1799 2278 // Copy by 64 bytes chunks
kvn@1799 2279 Label L_copy_64_bytes;
kvn@1799 2280 const Register from64 = O3; // source address
kvn@1799 2281 const Register to64 = G3; // destination address
kvn@1799 2282 __ subcc(count, 6, O3);
kvn@1799 2283 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
kvn@1799 2284 __ delayed()->mov(to, to64);
kvn@1799 2285 // Now we can use O4(offset0), O5(offset8) as temps
kvn@1799 2286 __ mov(O3, count);
kvn@1799 2287 __ mov(from, from64);
kvn@1799 2288
kvn@1800 2289 __ align(OptoLoopAlignment);
kvn@1799 2290 __ BIND(L_copy_64_bytes);
kvn@1799 2291 for( int off = 0; off < 64; off += 16 ) {
kvn@1799 2292 __ ldx(from64, off+0, O4);
kvn@1799 2293 __ ldx(from64, off+8, O5);
kvn@1799 2294 __ stx(O4, to64, off+0);
kvn@1799 2295 __ stx(O5, to64, off+8);
kvn@1799 2296 }
kvn@1799 2297 __ deccc(count, 8);
kvn@1799 2298 __ inc(from64, 64);
kvn@1799 2299 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes);
kvn@1799 2300 __ delayed()->inc(to64, 64);
kvn@1799 2301
kvn@1799 2302 // Restore O4(offset0), O5(offset8)
kvn@1799 2303 __ sub(from64, from, offset0);
kvn@1799 2304 __ inccc(count, 6);
kvn@1799 2305 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
kvn@1799 2306 __ delayed()->add(offset0, 8, offset8);
kvn@1799 2307
kvn@1799 2308 // Copy by 16 bytes chunks
kvn@1800 2309 __ align(OptoLoopAlignment);
duke@435 2310 __ BIND(L_copy_16_bytes);
duke@435 2311 __ ldx(from, offset0, O3);
duke@435 2312 __ ldx(from, offset8, G3);
duke@435 2313 __ deccc(count, 2);
duke@435 2314 __ stx(O3, to, offset0);
duke@435 2315 __ inc(offset0, 16);
duke@435 2316 __ stx(G3, to, offset8);
duke@435 2317 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
duke@435 2318 __ delayed()->inc(offset8, 16);
duke@435 2319
kvn@1799 2320 // Copy last 8 bytes
duke@435 2321 __ BIND(L_copy_8_bytes);
duke@435 2322 __ inccc(count, 2);
duke@435 2323 __ brx(Assembler::zero, true, Assembler::pn, L_exit );
duke@435 2324 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs
duke@435 2325 __ ldx(from, offset0, O3);
duke@435 2326 __ stx(O3, to, offset0);
duke@435 2327 __ BIND(L_exit);
duke@435 2328 }
duke@435 2329
duke@435 2330 //
duke@435 2331 // Generate stub for disjoint long copy.
duke@435 2332 // "aligned" is ignored, because we must make the stronger
duke@435 2333 // assumption that both addresses are always 64-bit aligned.
duke@435 2334 //
duke@435 2335 // Arguments for generated stub:
duke@435 2336 // from: O0
duke@435 2337 // to: O1
duke@435 2338 // count: O2 treated as signed
duke@435 2339 //
iveresov@2595 2340 address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) {
duke@435 2341 __ align(CodeEntryAlignment);
duke@435 2342 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2343 address start = __ pc();
duke@435 2344
duke@435 2345 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
duke@435 2346
iveresov@2595 2347 if (entry != NULL) {
iveresov@2595 2348 *entry = __ pc();
iveresov@2595 2349 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 2350 BLOCK_COMMENT("Entry:");
iveresov@2595 2351 }
duke@435 2352
duke@435 2353 generate_disjoint_long_copy_core(aligned);
duke@435 2354
duke@435 2355 // O3, O4 are used as temp registers
duke@435 2356 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
duke@435 2357 __ retl();
duke@435 2358 __ delayed()->mov(G0, O0); // return 0
duke@435 2359 return start;
duke@435 2360 }
duke@435 2361
duke@435 2362 //
duke@435 2363 // Generate core code for conjoint long copy (and oop copy on 64-bit).
duke@435 2364 // "aligned" is ignored, because we must make the stronger
duke@435 2365 // assumption that both addresses are always 64-bit aligned.
duke@435 2366 //
duke@435 2367 // Arguments:
duke@435 2368 // from: O0
duke@435 2369 // to: O1
duke@435 2370 // count: O2 treated as signed
duke@435 2371 //
duke@435 2372 void generate_conjoint_long_copy_core(bool aligned) {
duke@435 2373 // Do reverse copy.
duke@435 2374 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
duke@435 2375 const Register from = O0; // source array address
duke@435 2376 const Register to = O1; // destination array address
duke@435 2377 const Register count = O2; // elements count
duke@435 2378 const Register offset8 = O4; // element offset
duke@435 2379 const Register offset0 = O5; // previous element offset
duke@435 2380
duke@435 2381 __ subcc(count, 1, count);
duke@435 2382 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes );
duke@435 2383 __ delayed()->sllx(count, LogBytesPerLong, offset8);
duke@435 2384 __ sub(offset8, 8, offset0);
kvn@1800 2385 __ align(OptoLoopAlignment);
duke@435 2386 __ BIND(L_copy_16_bytes);
duke@435 2387 __ ldx(from, offset8, O2);
duke@435 2388 __ ldx(from, offset0, O3);
duke@435 2389 __ stx(O2, to, offset8);
duke@435 2390 __ deccc(offset8, 16); // use offset8 as counter
duke@435 2391 __ stx(O3, to, offset0);
duke@435 2392 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes);
duke@435 2393 __ delayed()->dec(offset0, 16);
duke@435 2394
duke@435 2395 __ BIND(L_copy_8_bytes);
duke@435 2396 __ brx(Assembler::negative, false, Assembler::pn, L_exit );
duke@435 2397 __ delayed()->nop();
duke@435 2398 __ ldx(from, 0, O3);
duke@435 2399 __ stx(O3, to, 0);
duke@435 2400 __ BIND(L_exit);
duke@435 2401 }
duke@435 2402
duke@435 2403 // Generate stub for conjoint long copy.
duke@435 2404 // "aligned" is ignored, because we must make the stronger
duke@435 2405 // assumption that both addresses are always 64-bit aligned.
duke@435 2406 //
duke@435 2407 // Arguments for generated stub:
duke@435 2408 // from: O0
duke@435 2409 // to: O1
duke@435 2410 // count: O2 treated as signed
duke@435 2411 //
iveresov@2595 2412 address generate_conjoint_long_copy(bool aligned, address nooverlap_target,
iveresov@2595 2413 address *entry, const char *name) {
duke@435 2414 __ align(CodeEntryAlignment);
duke@435 2415 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2416 address start = __ pc();
duke@435 2417
iveresov@2606 2418 assert(aligned, "Should always be aligned");
duke@435 2419
duke@435 2420 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
duke@435 2421
iveresov@2595 2422 if (entry != NULL) {
iveresov@2595 2423 *entry = __ pc();
iveresov@2595 2424 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
iveresov@2595 2425 BLOCK_COMMENT("Entry:");
iveresov@2595 2426 }
duke@435 2427
duke@435 2428 array_overlap_test(nooverlap_target, 3);
duke@435 2429
duke@435 2430 generate_conjoint_long_copy_core(aligned);
duke@435 2431
duke@435 2432 // O3, O4 are used as temp registers
duke@435 2433 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
duke@435 2434 __ retl();
duke@435 2435 __ delayed()->mov(G0, O0); // return 0
duke@435 2436 return start;
duke@435 2437 }
duke@435 2438
duke@435 2439 // Generate stub for disjoint oop copy. If "aligned" is true, the
duke@435 2440 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 2441 //
duke@435 2442 // Arguments for generated stub:
duke@435 2443 // from: O0
duke@435 2444 // to: O1
duke@435 2445 // count: O2 treated as signed
duke@435 2446 //
iveresov@2606 2447 address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name,
iveresov@2606 2448 bool dest_uninitialized = false) {
duke@435 2449
duke@435 2450 const Register from = O0; // source array address
duke@435 2451 const Register to = O1; // destination array address
duke@435 2452 const Register count = O2; // elements count
duke@435 2453
duke@435 2454 __ align(CodeEntryAlignment);
duke@435 2455 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2456 address start = __ pc();
duke@435 2457
duke@435 2458 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@435 2459
iveresov@2595 2460 if (entry != NULL) {
iveresov@2595 2461 *entry = __ pc();
iveresov@2595 2462 // caller can pass a 64-bit byte count here
iveresov@2595 2463 BLOCK_COMMENT("Entry:");
iveresov@2595 2464 }
duke@435 2465
duke@435 2466 // save arguments for barrier generation
duke@435 2467 __ mov(to, G1);
duke@435 2468 __ mov(count, G5);
iveresov@2606 2469 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
duke@435 2470 #ifdef _LP64
coleenp@548 2471 assert_clean_int(count, O3); // Make sure 'count' is clean int.
coleenp@548 2472 if (UseCompressedOops) {
coleenp@548 2473 generate_disjoint_int_copy_core(aligned);
coleenp@548 2474 } else {
coleenp@548 2475 generate_disjoint_long_copy_core(aligned);
coleenp@548 2476 }
duke@435 2477 #else
duke@435 2478 generate_disjoint_int_copy_core(aligned);
duke@435 2479 #endif
duke@435 2480 // O0 is used as temp register
duke@435 2481 gen_write_ref_array_post_barrier(G1, G5, O0);
duke@435 2482
duke@435 2483 // O3, O4 are used as temp registers
duke@435 2484 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
duke@435 2485 __ retl();
duke@435 2486 __ delayed()->mov(G0, O0); // return 0
duke@435 2487 return start;
duke@435 2488 }
duke@435 2489
duke@435 2490 // Generate stub for conjoint oop copy. If "aligned" is true, the
duke@435 2491 // "from" and "to" addresses are assumed to be heapword aligned.
duke@435 2492 //
duke@435 2493 // Arguments for generated stub:
duke@435 2494 // from: O0
duke@435 2495 // to: O1
duke@435 2496 // count: O2 treated as signed
duke@435 2497 //
iveresov@2595 2498 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
iveresov@2606 2499 address *entry, const char *name,
iveresov@2606 2500 bool dest_uninitialized = false) {
duke@435 2501
duke@435 2502 const Register from = O0; // source array address
duke@435 2503 const Register to = O1; // destination array address
duke@435 2504 const Register count = O2; // elements count
duke@435 2505
duke@435 2506 __ align(CodeEntryAlignment);
duke@435 2507 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2508 address start = __ pc();
duke@435 2509
duke@435 2510 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@435 2511
iveresov@2595 2512 if (entry != NULL) {
iveresov@2595 2513 *entry = __ pc();
iveresov@2595 2514 // caller can pass a 64-bit byte count here
iveresov@2595 2515 BLOCK_COMMENT("Entry:");
iveresov@2595 2516 }
iveresov@2595 2517
iveresov@2595 2518 array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
duke@435 2519
duke@435 2520 // save arguments for barrier generation
duke@435 2521 __ mov(to, G1);
duke@435 2522 __ mov(count, G5);
iveresov@2606 2523 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
duke@435 2524
duke@435 2525 #ifdef _LP64
coleenp@548 2526 if (UseCompressedOops) {
coleenp@548 2527 generate_conjoint_int_copy_core(aligned);
coleenp@548 2528 } else {
coleenp@548 2529 generate_conjoint_long_copy_core(aligned);
coleenp@548 2530 }
duke@435 2531 #else
duke@435 2532 generate_conjoint_int_copy_core(aligned);
duke@435 2533 #endif
duke@435 2534
duke@435 2535 // O0 is used as temp register
duke@435 2536 gen_write_ref_array_post_barrier(G1, G5, O0);
duke@435 2537
duke@435 2538 // O3, O4 are used as temp registers
duke@435 2539 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
duke@435 2540 __ retl();
duke@435 2541 __ delayed()->mov(G0, O0); // return 0
duke@435 2542 return start;
duke@435 2543 }
duke@435 2544
duke@435 2545
duke@435 2546 // Helper for generating a dynamic type check.
duke@435 2547 // Smashes only the given temp registers.
duke@435 2548 void generate_type_check(Register sub_klass,
duke@435 2549 Register super_check_offset,
duke@435 2550 Register super_klass,
duke@435 2551 Register temp,
jrose@1079 2552 Label& L_success) {
duke@435 2553 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
duke@435 2554
duke@435 2555 BLOCK_COMMENT("type_check:");
duke@435 2556
jrose@1079 2557 Label L_miss, L_pop_to_miss;
duke@435 2558
duke@435 2559 assert_clean_int(super_check_offset, temp);
duke@435 2560
jrose@1079 2561 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
jrose@1079 2562 &L_success, &L_miss, NULL,
jrose@1079 2563 super_check_offset);
jrose@1079 2564
jrose@1079 2565 BLOCK_COMMENT("type_check_slow_path:");
duke@435 2566 __ save_frame(0);
jrose@1079 2567 __ check_klass_subtype_slow_path(sub_klass->after_save(),
jrose@1079 2568 super_klass->after_save(),
jrose@1079 2569 L0, L1, L2, L4,
jrose@1079 2570 NULL, &L_pop_to_miss);
jrose@1079 2571 __ ba(false, L_success);
jrose@1079 2572 __ delayed()->restore();
jrose@1079 2573
jrose@1079 2574 __ bind(L_pop_to_miss);
duke@435 2575 __ restore();
duke@435 2576
duke@435 2577 // Fall through on failure!
duke@435 2578 __ BIND(L_miss);
duke@435 2579 }
duke@435 2580
duke@435 2581
duke@435 2582 // Generate stub for checked oop copy.
duke@435 2583 //
duke@435 2584 // Arguments for generated stub:
duke@435 2585 // from: O0
duke@435 2586 // to: O1
duke@435 2587 // count: O2 treated as signed
duke@435 2588 // ckoff: O3 (super_check_offset)
duke@435 2589 // ckval: O4 (super_klass)
duke@435 2590 // ret: O0 zero for success; (-1^K) where K is partial transfer count
duke@435 2591 //
iveresov@2606 2592 address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) {
duke@435 2593
duke@435 2594 const Register O0_from = O0; // source array address
duke@435 2595 const Register O1_to = O1; // destination array address
duke@435 2596 const Register O2_count = O2; // elements count
duke@435 2597 const Register O3_ckoff = O3; // super_check_offset
duke@435 2598 const Register O4_ckval = O4; // super_klass
duke@435 2599
duke@435 2600 const Register O5_offset = O5; // loop var, with stride wordSize
duke@435 2601 const Register G1_remain = G1; // loop var, with stride -1
duke@435 2602 const Register G3_oop = G3; // actual oop copied
duke@435 2603 const Register G4_klass = G4; // oop._klass
duke@435 2604 const Register G5_super = G5; // oop._klass._primary_supers[ckval]
duke@435 2605
duke@435 2606 __ align(CodeEntryAlignment);
duke@435 2607 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2608 address start = __ pc();
duke@435 2609
duke@435 2610 #ifdef ASSERT
jrose@1079 2611 // We sometimes save a frame (see generate_type_check below).
duke@435 2612 // If this will cause trouble, let's fail now instead of later.
duke@435 2613 __ save_frame(0);
duke@435 2614 __ restore();
duke@435 2615 #endif
duke@435 2616
never@2199 2617 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int.
never@2199 2618
duke@435 2619 #ifdef ASSERT
duke@435 2620 // caller guarantees that the arrays really are different
duke@435 2621 // otherwise, we would have to make conjoint checks
duke@435 2622 { Label L;
duke@435 2623 __ mov(O3, G1); // spill: overlap test smashes O3
duke@435 2624 __ mov(O4, G4); // spill: overlap test smashes O4
coleenp@548 2625 array_overlap_test(L, LogBytesPerHeapOop);
duke@435 2626 __ stop("checkcast_copy within a single array");
duke@435 2627 __ bind(L);
duke@435 2628 __ mov(G1, O3);
duke@435 2629 __ mov(G4, O4);
duke@435 2630 }
duke@435 2631 #endif //ASSERT
duke@435 2632
iveresov@2595 2633 if (entry != NULL) {
iveresov@2595 2634 *entry = __ pc();
iveresov@2595 2635 // caller can pass a 64-bit byte count here (from generic stub)
iveresov@2595 2636 BLOCK_COMMENT("Entry:");
iveresov@2595 2637 }
iveresov@2606 2638 gen_write_ref_array_pre_barrier(O1_to, O2_count, dest_uninitialized);
duke@435 2639
duke@435 2640 Label load_element, store_element, do_card_marks, fail, done;
duke@435 2641 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it
duke@435 2642 __ brx(Assembler::notZero, false, Assembler::pt, load_element);
duke@435 2643 __ delayed()->mov(G0, O5_offset); // offset from start of arrays
duke@435 2644
duke@435 2645 // Empty array: Nothing to do.
duke@435 2646 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
duke@435 2647 __ retl();
duke@435 2648 __ delayed()->set(0, O0); // return 0 on (trivial) success
duke@435 2649
duke@435 2650 // ======== begin loop ========
duke@435 2651 // (Loop is rotated; its entry is load_element.)
duke@435 2652 // Loop variables:
duke@435 2653 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
duke@435 2654 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
duke@435 2655 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
kvn@1800 2656 __ align(OptoLoopAlignment);
duke@435 2657
jrose@1079 2658 __ BIND(store_element);
jrose@1079 2659 __ deccc(G1_remain); // decrement the count
coleenp@548 2660 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
coleenp@548 2661 __ inc(O5_offset, heapOopSize); // step to next offset
duke@435 2662 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
duke@435 2663 __ delayed()->set(0, O0); // return -1 on success
duke@435 2664
duke@435 2665 // ======== loop entry is here ========
jrose@1079 2666 __ BIND(load_element);
coleenp@548 2667 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
duke@435 2668 __ br_null(G3_oop, true, Assembler::pt, store_element);
jrose@1079 2669 __ delayed()->nop();
duke@435 2670
coleenp@548 2671 __ load_klass(G3_oop, G4_klass); // query the object klass
duke@435 2672
duke@435 2673 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
duke@435 2674 // branch to this on success:
jrose@1079 2675 store_element);
duke@435 2676 // ======== end loop ========
duke@435 2677
duke@435 2678 // It was a real error; we must depend on the caller to finish the job.
duke@435 2679 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
duke@435 2680 // Emit GC store barriers for the oops we have copied (O2 minus G1),
duke@435 2681 // and report their number to the caller.
jrose@1079 2682 __ BIND(fail);
duke@435 2683 __ subcc(O2_count, G1_remain, O2_count);
duke@435 2684 __ brx(Assembler::zero, false, Assembler::pt, done);
duke@435 2685 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
duke@435 2686
jrose@1079 2687 __ BIND(do_card_marks);
duke@435 2688 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
duke@435 2689
jrose@1079 2690 __ BIND(done);
duke@435 2691 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
duke@435 2692 __ retl();
duke@435 2693 __ delayed()->nop(); // return value in 00
duke@435 2694
duke@435 2695 return start;
duke@435 2696 }
duke@435 2697
duke@435 2698
duke@435 2699 // Generate 'unsafe' array copy stub
duke@435 2700 // Though just as safe as the other stubs, it takes an unscaled
duke@435 2701 // size_t argument instead of an element count.
duke@435 2702 //
duke@435 2703 // Arguments for generated stub:
duke@435 2704 // from: O0
duke@435 2705 // to: O1
duke@435 2706 // count: O2 byte count, treated as ssize_t, can be zero
duke@435 2707 //
duke@435 2708 // Examines the alignment of the operands and dispatches
duke@435 2709 // to a long, int, short, or byte copy loop.
duke@435 2710 //
iveresov@2595 2711 address generate_unsafe_copy(const char* name,
iveresov@2595 2712 address byte_copy_entry,
iveresov@2595 2713 address short_copy_entry,
iveresov@2595 2714 address int_copy_entry,
iveresov@2595 2715 address long_copy_entry) {
duke@435 2716
duke@435 2717 const Register O0_from = O0; // source array address
duke@435 2718 const Register O1_to = O1; // destination array address
duke@435 2719 const Register O2_count = O2; // elements count
duke@435 2720
duke@435 2721 const Register G1_bits = G1; // test copy of low bits
duke@435 2722
duke@435 2723 __ align(CodeEntryAlignment);
duke@435 2724 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2725 address start = __ pc();
duke@435 2726
duke@435 2727 // bump this on entry, not on exit:
duke@435 2728 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3);
duke@435 2729
duke@435 2730 __ or3(O0_from, O1_to, G1_bits);
duke@435 2731 __ or3(O2_count, G1_bits, G1_bits);
duke@435 2732
duke@435 2733 __ btst(BytesPerLong-1, G1_bits);
duke@435 2734 __ br(Assembler::zero, true, Assembler::pt,
duke@435 2735 long_copy_entry, relocInfo::runtime_call_type);
duke@435 2736 // scale the count on the way out:
duke@435 2737 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count);
duke@435 2738
duke@435 2739 __ btst(BytesPerInt-1, G1_bits);
duke@435 2740 __ br(Assembler::zero, true, Assembler::pt,
duke@435 2741 int_copy_entry, relocInfo::runtime_call_type);
duke@435 2742 // scale the count on the way out:
duke@435 2743 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count);
duke@435 2744
duke@435 2745 __ btst(BytesPerShort-1, G1_bits);
duke@435 2746 __ br(Assembler::zero, true, Assembler::pt,
duke@435 2747 short_copy_entry, relocInfo::runtime_call_type);
duke@435 2748 // scale the count on the way out:
duke@435 2749 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count);
duke@435 2750
duke@435 2751 __ br(Assembler::always, false, Assembler::pt,
duke@435 2752 byte_copy_entry, relocInfo::runtime_call_type);
duke@435 2753 __ delayed()->nop();
duke@435 2754
duke@435 2755 return start;
duke@435 2756 }
duke@435 2757
duke@435 2758
duke@435 2759 // Perform range checks on the proposed arraycopy.
duke@435 2760 // Kills the two temps, but nothing else.
duke@435 2761 // Also, clean the sign bits of src_pos and dst_pos.
duke@435 2762 void arraycopy_range_checks(Register src, // source array oop (O0)
duke@435 2763 Register src_pos, // source position (O1)
duke@435 2764 Register dst, // destination array oo (O2)
duke@435 2765 Register dst_pos, // destination position (O3)
duke@435 2766 Register length, // length of copy (O4)
duke@435 2767 Register temp1, Register temp2,
duke@435 2768 Label& L_failed) {
duke@435 2769 BLOCK_COMMENT("arraycopy_range_checks:");
duke@435 2770
duke@435 2771 // if (src_pos + length > arrayOop(src)->length() ) FAIL;
duke@435 2772
duke@435 2773 const Register array_length = temp1; // scratch
duke@435 2774 const Register end_pos = temp2; // scratch
duke@435 2775
duke@435 2776 // Note: This next instruction may be in the delay slot of a branch:
duke@435 2777 __ add(length, src_pos, end_pos); // src_pos + length
duke@435 2778 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length);
duke@435 2779 __ cmp(end_pos, array_length);
duke@435 2780 __ br(Assembler::greater, false, Assembler::pn, L_failed);
duke@435 2781
duke@435 2782 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
duke@435 2783 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length
duke@435 2784 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length);
duke@435 2785 __ cmp(end_pos, array_length);
duke@435 2786 __ br(Assembler::greater, false, Assembler::pn, L_failed);
duke@435 2787
duke@435 2788 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
duke@435 2789 // Move with sign extension can be used since they are positive.
duke@435 2790 __ delayed()->signx(src_pos, src_pos);
duke@435 2791 __ signx(dst_pos, dst_pos);
duke@435 2792
duke@435 2793 BLOCK_COMMENT("arraycopy_range_checks done");
duke@435 2794 }
duke@435 2795
duke@435 2796
duke@435 2797 //
duke@435 2798 // Generate generic array copy stubs
duke@435 2799 //
duke@435 2800 // Input:
duke@435 2801 // O0 - src oop
duke@435 2802 // O1 - src_pos
duke@435 2803 // O2 - dst oop
duke@435 2804 // O3 - dst_pos
duke@435 2805 // O4 - element count
duke@435 2806 //
duke@435 2807 // Output:
duke@435 2808 // O0 == 0 - success
duke@435 2809 // O0 == -1 - need to call System.arraycopy
duke@435 2810 //
iveresov@2595 2811 address generate_generic_copy(const char *name,
iveresov@2595 2812 address entry_jbyte_arraycopy,
iveresov@2595 2813 address entry_jshort_arraycopy,
iveresov@2595 2814 address entry_jint_arraycopy,
iveresov@2595 2815 address entry_oop_arraycopy,
iveresov@2595 2816 address entry_jlong_arraycopy,
iveresov@2595 2817 address entry_checkcast_arraycopy) {
duke@435 2818 Label L_failed, L_objArray;
duke@435 2819
duke@435 2820 // Input registers
duke@435 2821 const Register src = O0; // source array oop
duke@435 2822 const Register src_pos = O1; // source position
duke@435 2823 const Register dst = O2; // destination array oop
duke@435 2824 const Register dst_pos = O3; // destination position
duke@435 2825 const Register length = O4; // elements count
duke@435 2826
duke@435 2827 // registers used as temp
duke@435 2828 const Register G3_src_klass = G3; // source array klass
duke@435 2829 const Register G4_dst_klass = G4; // destination array klass
duke@435 2830 const Register G5_lh = G5; // layout handler
duke@435 2831 const Register O5_temp = O5;
duke@435 2832
duke@435 2833 __ align(CodeEntryAlignment);
duke@435 2834 StubCodeMark mark(this, "StubRoutines", name);
duke@435 2835 address start = __ pc();
duke@435 2836
duke@435 2837 // bump this on entry, not on exit:
duke@435 2838 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3);
duke@435 2839
duke@435 2840 // In principle, the int arguments could be dirty.
duke@435 2841 //assert_clean_int(src_pos, G1);
duke@435 2842 //assert_clean_int(dst_pos, G1);
duke@435 2843 //assert_clean_int(length, G1);
duke@435 2844
duke@435 2845 //-----------------------------------------------------------------------
duke@435 2846 // Assembler stubs will be used for this call to arraycopy
duke@435 2847 // if the following conditions are met:
duke@435 2848 //
duke@435 2849 // (1) src and dst must not be null.
duke@435 2850 // (2) src_pos must not be negative.
duke@435 2851 // (3) dst_pos must not be negative.
duke@435 2852 // (4) length must not be negative.
duke@435 2853 // (5) src klass and dst klass should be the same and not NULL.
duke@435 2854 // (6) src and dst should be arrays.
duke@435 2855 // (7) src_pos + length must not exceed length of src.
duke@435 2856 // (8) dst_pos + length must not exceed length of dst.
duke@435 2857 BLOCK_COMMENT("arraycopy initial argument checks");
duke@435 2858
duke@435 2859 // if (src == NULL) return -1;
duke@435 2860 __ br_null(src, false, Assembler::pn, L_failed);
duke@435 2861
duke@435 2862 // if (src_pos < 0) return -1;
duke@435 2863 __ delayed()->tst(src_pos);
duke@435 2864 __ br(Assembler::negative, false, Assembler::pn, L_failed);
duke@435 2865 __ delayed()->nop();
duke@435 2866
duke@435 2867 // if (dst == NULL) return -1;
duke@435 2868 __ br_null(dst, false, Assembler::pn, L_failed);
duke@435 2869
duke@435 2870 // if (dst_pos < 0) return -1;
duke@435 2871 __ delayed()->tst(dst_pos);
duke@435 2872 __ br(Assembler::negative, false, Assembler::pn, L_failed);
duke@435 2873
duke@435 2874 // if (length < 0) return -1;
duke@435 2875 __ delayed()->tst(length);
duke@435 2876 __ br(Assembler::negative, false, Assembler::pn, L_failed);
duke@435 2877
duke@435 2878 BLOCK_COMMENT("arraycopy argument klass checks");
duke@435 2879 // get src->klass()
coleenp@548 2880 if (UseCompressedOops) {
coleenp@548 2881 __ delayed()->nop(); // ??? not good
coleenp@548 2882 __ load_klass(src, G3_src_klass);
coleenp@548 2883 } else {
coleenp@548 2884 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
coleenp@548 2885 }
duke@435 2886
duke@435 2887 #ifdef ASSERT
duke@435 2888 // assert(src->klass() != NULL);
duke@435 2889 BLOCK_COMMENT("assert klasses not null");
duke@435 2890 { Label L_a, L_b;
duke@435 2891 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
coleenp@548 2892 __ delayed()->nop();
duke@435 2893 __ bind(L_a);
duke@435 2894 __ stop("broken null klass");
duke@435 2895 __ bind(L_b);
coleenp@548 2896 __ load_klass(dst, G4_dst_klass);
duke@435 2897 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
duke@435 2898 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
duke@435 2899 BLOCK_COMMENT("assert done");
duke@435 2900 }
duke@435 2901 #endif
duke@435 2902
duke@435 2903 // Load layout helper
duke@435 2904 //
duke@435 2905 // |array_tag| | header_size | element_type | |log2_element_size|
duke@435 2906 // 32 30 24 16 8 2 0
duke@435 2907 //
duke@435 2908 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
duke@435 2909 //
duke@435 2910
duke@435 2911 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
duke@435 2912 Klass::layout_helper_offset_in_bytes();
duke@435 2913
duke@435 2914 // Load 32-bits signed value. Use br() instruction with it to check icc.
duke@435 2915 __ lduw(G3_src_klass, lh_offset, G5_lh);
duke@435 2916
coleenp@548 2917 if (UseCompressedOops) {
coleenp@548 2918 __ load_klass(dst, G4_dst_klass);
coleenp@548 2919 }
duke@435 2920 // Handle objArrays completely differently...
duke@435 2921 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
duke@435 2922 __ set(objArray_lh, O5_temp);
duke@435 2923 __ cmp(G5_lh, O5_temp);
duke@435 2924 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
coleenp@548 2925 if (UseCompressedOops) {
coleenp@548 2926 __ delayed()->nop();
coleenp@548 2927 } else {
coleenp@548 2928 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
coleenp@548 2929 }
duke@435 2930
duke@435 2931 // if (src->klass() != dst->klass()) return -1;
duke@435 2932 __ cmp(G3_src_klass, G4_dst_klass);
duke@435 2933 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
duke@435 2934 __ delayed()->nop();
duke@435 2935
duke@435 2936 // if (!src->is_Array()) return -1;
duke@435 2937 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
duke@435 2938 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
duke@435 2939
duke@435 2940 // At this point, it is known to be a typeArray (array_tag 0x3).
duke@435 2941 #ifdef ASSERT
duke@435 2942 __ delayed()->nop();
duke@435 2943 { Label L;
duke@435 2944 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
duke@435 2945 __ set(lh_prim_tag_in_place, O5_temp);
duke@435 2946 __ cmp(G5_lh, O5_temp);
duke@435 2947 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
duke@435 2948 __ delayed()->nop();
duke@435 2949 __ stop("must be a primitive array");
duke@435 2950 __ bind(L);
duke@435 2951 }
duke@435 2952 #else
duke@435 2953 __ delayed(); // match next insn to prev branch
duke@435 2954 #endif
duke@435 2955
duke@435 2956 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
duke@435 2957 O5_temp, G4_dst_klass, L_failed);
duke@435 2958
duke@435 2959 // typeArrayKlass
duke@435 2960 //
duke@435 2961 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
duke@435 2962 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
duke@435 2963 //
duke@435 2964
duke@435 2965 const Register G4_offset = G4_dst_klass; // array offset
duke@435 2966 const Register G3_elsize = G3_src_klass; // log2 element size
duke@435 2967
duke@435 2968 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset);
duke@435 2969 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset
duke@435 2970 __ add(src, G4_offset, src); // src array offset
duke@435 2971 __ add(dst, G4_offset, dst); // dst array offset
duke@435 2972 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size
duke@435 2973
duke@435 2974 // next registers should be set before the jump to corresponding stub
duke@435 2975 const Register from = O0; // source array address
duke@435 2976 const Register to = O1; // destination array address
duke@435 2977 const Register count = O2; // elements count
duke@435 2978
duke@435 2979 // 'from', 'to', 'count' registers should be set in this order
duke@435 2980 // since they are the same as 'src', 'src_pos', 'dst'.
duke@435 2981
duke@435 2982 BLOCK_COMMENT("scale indexes to element size");
duke@435 2983 __ sll_ptr(src_pos, G3_elsize, src_pos);
duke@435 2984 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
duke@435 2985 __ add(src, src_pos, from); // src_addr
duke@435 2986 __ add(dst, dst_pos, to); // dst_addr
duke@435 2987
duke@435 2988 BLOCK_COMMENT("choose copy loop based on element size");
duke@435 2989 __ cmp(G3_elsize, 0);
iveresov@2595 2990 __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
duke@435 2991 __ delayed()->signx(length, count); // length
duke@435 2992
duke@435 2993 __ cmp(G3_elsize, LogBytesPerShort);
iveresov@2595 2994 __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
duke@435 2995 __ delayed()->signx(length, count); // length
duke@435 2996
duke@435 2997 __ cmp(G3_elsize, LogBytesPerInt);
iveresov@2595 2998 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
duke@435 2999 __ delayed()->signx(length, count); // length
duke@435 3000 #ifdef ASSERT
duke@435 3001 { Label L;
duke@435 3002 __ cmp(G3_elsize, LogBytesPerLong);
duke@435 3003 __ br(Assembler::equal, false, Assembler::pt, L);
duke@435 3004 __ delayed()->nop();
duke@435 3005 __ stop("must be long copy, but elsize is wrong");
duke@435 3006 __ bind(L);
duke@435 3007 }
duke@435 3008 #endif
iveresov@2595 3009 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
duke@435 3010 __ delayed()->signx(length, count); // length
duke@435 3011
duke@435 3012 // objArrayKlass
duke@435 3013 __ BIND(L_objArray);
duke@435 3014 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
duke@435 3015
duke@435 3016 Label L_plain_copy, L_checkcast_copy;
duke@435 3017 // test array classes for subtyping
duke@435 3018 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
duke@435 3019 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
duke@435 3020 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
duke@435 3021
duke@435 3022 // Identically typed arrays can be copied without element-wise checks.
duke@435 3023 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
duke@435 3024 O5_temp, G5_lh, L_failed);
duke@435 3025
duke@435 3026 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
duke@435 3027 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
coleenp@548 3028 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
coleenp@548 3029 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
duke@435 3030 __ add(src, src_pos, from); // src_addr
duke@435 3031 __ add(dst, dst_pos, to); // dst_addr
duke@435 3032 __ BIND(L_plain_copy);
iveresov@2595 3033 __ br(Assembler::always, false, Assembler::pt, entry_oop_arraycopy);
duke@435 3034 __ delayed()->signx(length, count); // length
duke@435 3035
duke@435 3036 __ BIND(L_checkcast_copy);
duke@435 3037 // live at this point: G3_src_klass, G4_dst_klass
duke@435 3038 {
duke@435 3039 // Before looking at dst.length, make sure dst is also an objArray.
duke@435 3040 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot
duke@435 3041 __ cmp(G5_lh, O5_temp);
duke@435 3042 __ br(Assembler::notEqual, false, Assembler::pn, L_failed);
duke@435 3043
duke@435 3044 // It is safe to examine both src.length and dst.length.
duke@435 3045 __ delayed(); // match next insn to prev branch
duke@435 3046 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
duke@435 3047 O5_temp, G5_lh, L_failed);
duke@435 3048
duke@435 3049 // Marshal the base address arguments now, freeing registers.
duke@435 3050 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
duke@435 3051 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
coleenp@548 3052 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
coleenp@548 3053 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
duke@435 3054 __ add(src, src_pos, from); // src_addr
duke@435 3055 __ add(dst, dst_pos, to); // dst_addr
duke@435 3056 __ signx(length, count); // length (reloaded)
duke@435 3057
duke@435 3058 Register sco_temp = O3; // this register is free now
duke@435 3059 assert_different_registers(from, to, count, sco_temp,
duke@435 3060 G4_dst_klass, G3_src_klass);
duke@435 3061
duke@435 3062 // Generate the type check.
duke@435 3063 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
duke@435 3064 Klass::super_check_offset_offset_in_bytes());
duke@435 3065 __ lduw(G4_dst_klass, sco_offset, sco_temp);
duke@435 3066 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,
duke@435 3067 O5_temp, L_plain_copy);
duke@435 3068
duke@435 3069 // Fetch destination element klass from the objArrayKlass header.
duke@435 3070 int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
duke@435 3071 objArrayKlass::element_klass_offset_in_bytes());
duke@435 3072
duke@435 3073 // the checkcast_copy loop needs two extra arguments:
duke@435 3074 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass
duke@435 3075 // lduw(O4, sco_offset, O3); // sco of elem klass
duke@435 3076
iveresov@2595 3077 __ br(Assembler::always, false, Assembler::pt, entry_checkcast_arraycopy);
duke@435 3078 __ delayed()->lduw(O4, sco_offset, O3);
duke@435 3079 }
duke@435 3080
duke@435 3081 __ BIND(L_failed);
duke@435 3082 __ retl();
duke@435 3083 __ delayed()->sub(G0, 1, O0); // return -1
duke@435 3084 return start;
duke@435 3085 }
duke@435 3086
duke@435 3087 void generate_arraycopy_stubs() {
iveresov@2595 3088 address entry;
iveresov@2595 3089 address entry_jbyte_arraycopy;
iveresov@2595 3090 address entry_jshort_arraycopy;
iveresov@2595 3091 address entry_jint_arraycopy;
iveresov@2595 3092 address entry_oop_arraycopy;
iveresov@2595 3093 address entry_jlong_arraycopy;
iveresov@2595 3094 address entry_checkcast_arraycopy;
iveresov@2595 3095
iveresov@2606 3096 //*** jbyte
iveresov@2606 3097 // Always need aligned and unaligned versions
iveresov@2606 3098 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
iveresov@2606 3099 "jbyte_disjoint_arraycopy");
iveresov@2606 3100 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry,
iveresov@2606 3101 &entry_jbyte_arraycopy,
iveresov@2606 3102 "jbyte_arraycopy");
iveresov@2606 3103 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
iveresov@2606 3104 "arrayof_jbyte_disjoint_arraycopy");
iveresov@2606 3105 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL,
iveresov@2606 3106 "arrayof_jbyte_arraycopy");
iveresov@2606 3107
iveresov@2606 3108 //*** jshort
iveresov@2606 3109 // Always need aligned and unaligned versions
iveresov@2606 3110 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
iveresov@2606 3111 "jshort_disjoint_arraycopy");
iveresov@2606 3112 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
iveresov@2606 3113 &entry_jshort_arraycopy,
iveresov@2606 3114 "jshort_arraycopy");
iveresov@2595 3115 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
iveresov@2595 3116 "arrayof_jshort_disjoint_arraycopy");
iveresov@2595 3117 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
iveresov@2595 3118 "arrayof_jshort_arraycopy");
iveresov@2595 3119
iveresov@2606 3120 //*** jint
iveresov@2606 3121 // Aligned versions
iveresov@2606 3122 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
iveresov@2606 3123 "arrayof_jint_disjoint_arraycopy");
iveresov@2606 3124 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
iveresov@2606 3125 "arrayof_jint_arraycopy");
duke@435 3126 #ifdef _LP64
iveresov@2606 3127 // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
iveresov@2606 3128 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
iveresov@2606 3129 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
iveresov@2606 3130 "jint_disjoint_arraycopy");
iveresov@2606 3131 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
iveresov@2606 3132 &entry_jint_arraycopy,
iveresov@2606 3133 "jint_arraycopy");
iveresov@2606 3134 #else
iveresov@2606 3135 // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
iveresov@2606 3136 // (in fact in 32bit we always have a pre-loop part even in the aligned version,
iveresov@2606 3137 // because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
iveresov@2606 3138 StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
iveresov@2606 3139 StubRoutines::_jint_arraycopy = StubRoutines::_arrayof_jint_arraycopy;
duke@435 3140 #endif
iveresov@2595 3141
iveresov@2606 3142
iveresov@2606 3143 //*** jlong
iveresov@2606 3144 // It is always aligned
iveresov@2606 3145 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
iveresov@2606 3146 "arrayof_jlong_disjoint_arraycopy");
iveresov@2606 3147 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
iveresov@2606 3148 "arrayof_jlong_arraycopy");
iveresov@2606 3149 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
iveresov@2606 3150 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
iveresov@2606 3151
iveresov@2606 3152
iveresov@2606 3153 //*** oops
iveresov@2606 3154 // Aligned versions
iveresov@2606 3155 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry,
iveresov@2606 3156 "arrayof_oop_disjoint_arraycopy");
iveresov@2606 3157 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
iveresov@2606 3158 "arrayof_oop_arraycopy");
iveresov@2606 3159 // Aligned versions without pre-barriers
iveresov@2606 3160 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
iveresov@2606 3161 "arrayof_oop_disjoint_arraycopy_uninit",
iveresov@2606 3162 /*dest_uninitialized*/true);
iveresov@2606 3163 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL,
iveresov@2606 3164 "arrayof_oop_arraycopy_uninit",
iveresov@2606 3165 /*dest_uninitialized*/true);
iveresov@2606 3166 #ifdef _LP64
iveresov@2606 3167 if (UseCompressedOops) {
iveresov@2606 3168 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
iveresov@2606 3169 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry,
iveresov@2606 3170 "oop_disjoint_arraycopy");
iveresov@2606 3171 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
iveresov@2606 3172 "oop_arraycopy");
iveresov@2606 3173 // Unaligned versions without pre-barriers
iveresov@2606 3174 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry,
iveresov@2606 3175 "oop_disjoint_arraycopy_uninit",
iveresov@2606 3176 /*dest_uninitialized*/true);
iveresov@2606 3177 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL,
iveresov@2606 3178 "oop_arraycopy_uninit",
iveresov@2606 3179 /*dest_uninitialized*/true);
iveresov@2606 3180 } else
iveresov@2606 3181 #endif
iveresov@2606 3182 {
iveresov@2606 3183 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
iveresov@2606 3184 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
iveresov@2606 3185 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
iveresov@2606 3186 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
iveresov@2606 3187 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
iveresov@2606 3188 }
iveresov@2606 3189
iveresov@2606 3190 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
iveresov@2606 3191 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
iveresov@2606 3192 /*dest_uninitialized*/true);
iveresov@2606 3193
iveresov@2595 3194 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
iveresov@2595 3195 entry_jbyte_arraycopy,
iveresov@2595 3196 entry_jshort_arraycopy,
iveresov@2595 3197 entry_jint_arraycopy,
iveresov@2595 3198 entry_jlong_arraycopy);
iveresov@2595 3199 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
iveresov@2595 3200 entry_jbyte_arraycopy,
iveresov@2595 3201 entry_jshort_arraycopy,
iveresov@2595 3202 entry_jint_arraycopy,
iveresov@2595 3203 entry_oop_arraycopy,
iveresov@2595 3204 entry_jlong_arraycopy,
iveresov@2595 3205 entry_checkcast_arraycopy);
never@2118 3206
never@2118 3207 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
never@2118 3208 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
never@2118 3209 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
never@2118 3210 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
never@2118 3211 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
never@2118 3212 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
duke@435 3213 }
duke@435 3214
duke@435 3215 void generate_initial() {
duke@435 3216 // Generates all stubs and initializes the entry points
duke@435 3217
duke@435 3218 //------------------------------------------------------------------------------------------------------------------------
duke@435 3219 // entry points that exist in all platforms
duke@435 3220 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
duke@435 3221 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
duke@435 3222 StubRoutines::_forward_exception_entry = generate_forward_exception();
duke@435 3223
duke@435 3224 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
duke@435 3225 StubRoutines::_catch_exception_entry = generate_catch_exception();
duke@435 3226
duke@435 3227 //------------------------------------------------------------------------------------------------------------------------
duke@435 3228 // entry points that are platform specific
duke@435 3229 StubRoutines::Sparc::_test_stop_entry = generate_test_stop();
duke@435 3230
duke@435 3231 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine();
duke@435 3232 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
duke@435 3233
duke@435 3234 #if !defined(COMPILER2) && !defined(_LP64)
duke@435 3235 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
duke@435 3236 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
duke@435 3237 StubRoutines::_atomic_add_entry = generate_atomic_add();
duke@435 3238 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry;
duke@435 3239 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry;
duke@435 3240 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
duke@435 3241 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry;
duke@435 3242 #endif // COMPILER2 !=> _LP64
duke@435 3243 }
duke@435 3244
duke@435 3245
duke@435 3246 void generate_all() {
duke@435 3247 // Generates all stubs and initializes the entry points
duke@435 3248
kvn@1077 3249 // Generate partial_subtype_check first here since its code depends on
kvn@1077 3250 // UseZeroBaseCompressedOops which is defined after heap initialization.
kvn@1077 3251 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check();
duke@435 3252 // These entry points require SharedInfo::stack0 to be set up in non-core builds
duke@435 3253 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
dcubed@451 3254 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false);
duke@435 3255 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
duke@435 3256 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
duke@435 3257 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
duke@435 3258 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
duke@435 3259
duke@435 3260 StubRoutines::_handler_for_unsafe_access_entry =
duke@435 3261 generate_handler_for_unsafe_access();
duke@435 3262
duke@435 3263 // support for verify_oop (must happen after universe_init)
duke@435 3264 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine();
duke@435 3265
duke@435 3266 // arraycopy stubs used by compilers
duke@435 3267 generate_arraycopy_stubs();
never@1609 3268
never@1609 3269 // Don't initialize the platform math functions since sparc
never@1609 3270 // doesn't have intrinsics for these operations.
duke@435 3271 }
duke@435 3272
duke@435 3273
duke@435 3274 public:
duke@435 3275 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
duke@435 3276 // replace the standard masm with a special one:
duke@435 3277 _masm = new MacroAssembler(code);
duke@435 3278
duke@435 3279 _stub_count = !all ? 0x100 : 0x200;
duke@435 3280 if (all) {
duke@435 3281 generate_all();
duke@435 3282 } else {
duke@435 3283 generate_initial();
duke@435 3284 }
duke@435 3285
duke@435 3286 // make sure this stub is available for all local calls
duke@435 3287 if (_atomic_add_stub.is_unbound()) {
duke@435 3288 // generate a second time, if necessary
duke@435 3289 (void) generate_atomic_add();
duke@435 3290 }
duke@435 3291 }
duke@435 3292
duke@435 3293
duke@435 3294 private:
duke@435 3295 int _stub_count;
duke@435 3296 void stub_prolog(StubCodeDesc* cdesc) {
duke@435 3297 # ifdef ASSERT
duke@435 3298 // put extra information in the stub code, to make it more readable
duke@435 3299 #ifdef _LP64
duke@435 3300 // Write the high part of the address
duke@435 3301 // [RGV] Check if there is a dependency on the size of this prolog
duke@435 3302 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none);
duke@435 3303 #endif
duke@435 3304 __ emit_data((intptr_t)cdesc, relocInfo::none);
duke@435 3305 __ emit_data(++_stub_count, relocInfo::none);
duke@435 3306 # endif
duke@435 3307 align(true);
duke@435 3308 }
duke@435 3309
duke@435 3310 void align(bool at_header = false) {
duke@435 3311 // %%%%% move this constant somewhere else
duke@435 3312 // UltraSPARC cache line size is 8 instructions:
duke@435 3313 const unsigned int icache_line_size = 32;
duke@435 3314 const unsigned int icache_half_line_size = 16;
duke@435 3315
duke@435 3316 if (at_header) {
duke@435 3317 while ((intptr_t)(__ pc()) % icache_line_size != 0) {
duke@435 3318 __ emit_data(0, relocInfo::none);
duke@435 3319 }
duke@435 3320 } else {
duke@435 3321 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
duke@435 3322 __ nop();
duke@435 3323 }
duke@435 3324 }
duke@435 3325 }
duke@435 3326
duke@435 3327 }; // end class declaration
duke@435 3328
duke@435 3329 void StubGenerator_generate(CodeBuffer* code, bool all) {
duke@435 3330 StubGenerator g(code, all);
duke@435 3331 }

mercurial