src/cpu/mips/vm/stubGenerator_mips_64.cpp

Mon, 13 Nov 2017 15:49:42 +0800

author
aoqi
date
Mon, 13 Nov 2017 15:49:42 +0800
changeset 8009
0477693968a6
parent 8005
b5abf640a085
child 9144
cecfc245b19a
permissions
-rw-r--r--

#5963 wrong frame offset (SP) in StackOverflowError handler
Summary: push/pop before/after bang_stack_with_offset is removed. compiler/6865265/StackOverflowBug.java passed.
This patch also includes code cleanup and code style fix.

aoqi@1 1 /*
aoqi@1 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@1 3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
aoqi@1 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@1 5 *
aoqi@1 6 * This code is free software; you can redistribute it and/or modify it
aoqi@1 7 * under the terms of the GNU General Public License version 2 only, as
aoqi@1 8 * published by the Free Software Foundation.
aoqi@1 9 *
aoqi@1 10 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@1 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@1 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@1 13 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@1 14 * accompanied this code).
aoqi@1 15 *
aoqi@1 16 * You should have received a copy of the GNU General Public License version
aoqi@1 17 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@1 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@1 19 *
aoqi@1 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@1 21 * or visit www.oracle.com if you need additional information or have any
aoqi@1 22 * questions.
aoqi@1 23 *
aoqi@1 24 */
aoqi@1 25
aoqi@1 26 #include "precompiled.hpp"
aoqi@1 27 #include "asm/macroAssembler.hpp"
aoqi@1 28 #include "asm/macroAssembler.inline.hpp"
aoqi@1 29 #include "interpreter/interpreter.hpp"
aoqi@1 30 #include "nativeInst_mips.hpp"
aoqi@1 31 #include "oops/instanceOop.hpp"
aoqi@1 32 #include "oops/method.hpp"
aoqi@1 33 #include "oops/objArrayKlass.hpp"
aoqi@1 34 #include "oops/oop.inline.hpp"
aoqi@1 35 #include "prims/methodHandles.hpp"
aoqi@1 36 #include "runtime/frame.inline.hpp"
aoqi@1 37 #include "runtime/handles.inline.hpp"
aoqi@1 38 #include "runtime/sharedRuntime.hpp"
aoqi@1 39 #include "runtime/stubCodeGenerator.hpp"
aoqi@1 40 #include "runtime/stubRoutines.hpp"
aoqi@1 41 #include "runtime/thread.inline.hpp"
aoqi@1 42 #include "utilities/top.hpp"
aoqi@1 43 #ifdef COMPILER2
aoqi@1 44 #include "opto/runtime.hpp"
aoqi@1 45 #endif
aoqi@1 46
aoqi@1 47 // Declaration and definition of StubGenerator (no .hpp file).
aoqi@1 48 // For a more detailed description of the stub routine structure
aoqi@1 49 // see the comment in stubRoutines.hpp
aoqi@1 50
aoqi@1 51 #define __ _masm->
aoqi@6880 52 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
aoqi@1 53 //#define a__ ((Assembler*)_masm)->
aoqi@1 54
aoqi@1 55 //#ifdef PRODUCT
aoqi@1 56 //#define BLOCK_COMMENT(str) /* nothing */
aoqi@1 57 //#else
aoqi@1 58 //#define BLOCK_COMMENT(str) __ block_comment(str)
aoqi@1 59 //#endif
aoqi@1 60
aoqi@1 61 //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
aoqi@1 62 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
aoqi@1 63
aoqi@1 64 // Stub Code definitions
aoqi@1 65
aoqi@1 66 static address handle_unsafe_access() {
aoqi@1 67 JavaThread* thread = JavaThread::current();
aoqi@1 68 address pc = thread->saved_exception_pc();
aoqi@1 69 // pc is the instruction which we must emulate
aoqi@1 70 // doing a no-op is fine: return garbage from the load
aoqi@1 71 // therefore, compute npc
aoqi@6880 72 address npc = (address)((unsigned long)pc + sizeof(unsigned long));
aoqi@1 73
aoqi@1 74 // request an async exception
aoqi@1 75 thread->set_pending_unsafe_access_error();
aoqi@1 76
aoqi@1 77 // return address of next instruction to execute
aoqi@1 78 return npc;
aoqi@1 79 }
aoqi@1 80
aoqi@1 81 class StubGenerator: public StubCodeGenerator {
aoqi@1 82 private:
aoqi@1 83
aoqi@1 84 // ABI mips n64
aoqi@1 85 // This fig is not MIPS ABI. It is call Java from C ABI.
aoqi@1 86 // Call stubs are used to call Java from C
aoqi@1 87 //
aoqi@1 88 // [ return_from_Java ]
aoqi@1 89 // [ argument word n-1 ] <--- sp
aoqi@1 90 // ...
aoqi@1 91 // [ argument word 0 ]
aoqi@1 92 // ...
aoqi@6880 93 //-10 [ S6 ]
aoqi@6880 94 // -9 [ S5 ]
aoqi@6880 95 // -8 [ S4 ]
aoqi@1 96 // -7 [ S3 ]
aoqi@6880 97 // -6 [ S0 ]
aoqi@6880 98 // -5 [ TSR(S2) ]
aoqi@1 99 // -4 [ LVP(S7) ]
aoqi@1 100 // -3 [ BCP(S1) ]
aoqi@1 101 // -2 [ saved fp ] <--- fp_after_call
aoqi@6880 102 // -1 [ return address ]
aoqi@1 103 // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
aoqi@1 104 // 1 [ result ] <--- a1
aoqi@1 105 // 2 [ result_type ] <--- a2
aoqi@1 106 // 3 [ method ] <--- a3
aoqi@1 107 // 4 [ entry_point ] <--- a4
aoqi@1 108 // 5 [ parameters ] <--- a5
aoqi@1 109 // 6 [ parameter_size ] <--- a6
aoqi@1 110 // 7 [ thread ] <--- a7
aoqi@1 111
aoqi@1 112 //
aoqi@1 113 // _LP64: n64 does not save paras in sp.
aoqi@1 114 //
aoqi@1 115 // [ return_from_Java ]
aoqi@1 116 // [ argument word n-1 ] <--- sp
aoqi@1 117 // ...
aoqi@1 118 // [ argument word 0 ]
aoqi@1 119 // ...
aoqi@1 120 //-14 [ thread ]
aoqi@1 121 //-13 [ result_type ] <--- a2
aoqi@1 122 //-12 [ result ] <--- a1
aoqi@1 123 //-11 [ ptr. to call wrapper ] <--- a0
aoqi@6880 124 //-10 [ S6 ]
aoqi@6880 125 // -9 [ S5 ]
aoqi@6880 126 // -8 [ S4 ]
aoqi@1 127 // -7 [ S3 ]
aoqi@6880 128 // -6 [ S0 ]
aoqi@6880 129 // -5 [ TSR(S2) ]
aoqi@1 130 // -4 [ LVP(S7) ]
aoqi@1 131 // -3 [ BCP(S1) ]
aoqi@1 132 // -2 [ saved fp ] <--- fp_after_call
aoqi@6880 133 // -1 [ return address ]
aoqi@6880 134 // 0 [ ] <--- old sp
aoqi@1 135 /*
aoqi@1 136 * 2014/01/16 Fu: Find a right place in the call_stub for GP.
aoqi@6880 137 * GP will point to the starting point of Interpreter::dispatch_table(itos).
aoqi@6880 138 * It should be saved/restored before/after Java calls.
aoqi@1 139 *
aoqi@1 140 */
aoqi@6880 141 enum call_stub_layout {
aoqi@6880 142 RA_off = -1,
aoqi@6880 143 FP_off = -2,
aoqi@6880 144 BCP_off = -3,
aoqi@6880 145 LVP_off = -4,
aoqi@6880 146 TSR_off = -5,
aoqi@6880 147 S1_off = -6,
aoqi@6880 148 S3_off = -7,
aoqi@6880 149 S4_off = -8,
aoqi@6880 150 S5_off = -9,
aoqi@6880 151 S6_off = -10,
aoqi@6880 152 result_off = -11,
aoqi@6880 153 result_type_off = -12,
aoqi@6880 154 thread_off = -13,
aoqi@6880 155 total_off = thread_off - 3,
aoqi@6880 156 GP_off = -16,
aoqi@6880 157 };
aoqi@1 158
aoqi@1 159 address generate_call_stub(address& return_address) {
aoqi@1 160
aoqi@1 161 StubCodeMark mark(this, "StubRoutines", "call_stub");
aoqi@1 162 address start = __ pc();
aoqi@1 163
aoqi@1 164 // same as in generate_catch_exception()!
aoqi@1 165
aoqi@1 166 // stub code
aoqi@1 167 // save ra and fp
aoqi@1 168 __ sd(RA, SP, RA_off * wordSize);
aoqi@1 169 __ sd(FP, SP, FP_off * wordSize);
aoqi@1 170 __ sd(BCP, SP, BCP_off * wordSize);
aoqi@1 171 __ sd(LVP, SP, LVP_off * wordSize);
aoqi@1 172 __ sd(GP, SP, GP_off * wordSize);
aoqi@1 173 __ sd(TSR, SP, TSR_off * wordSize);
aoqi@1 174 __ sd(S1, SP, S1_off * wordSize);
aoqi@1 175 __ sd(S3, SP, S3_off * wordSize);
aoqi@1 176 __ sd(S4, SP, S4_off * wordSize);
aoqi@1 177 __ sd(S5, SP, S5_off * wordSize);
aoqi@1 178 __ sd(S6, SP, S6_off * wordSize);
aoqi@1 179
aoqi@1 180
fujie@368 181 __ set64(GP, (long)Interpreter::dispatch_table(itos));
aoqi@6880 182
aoqi@1 183 // I think 14 is the max gap between argument and callee saved register
aoqi@1 184 __ daddi(FP, SP, (-2) * wordSize);
aoqi@1 185 __ daddi(SP, SP, total_off * wordSize);
aoqi@1 186 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
aoqi@1 187 __ sd(A1, FP, result_off * wordSize);
aoqi@1 188 __ sd(A2, FP, result_type_off * wordSize);
aoqi@1 189 __ sd(A7, FP, thread_off * wordSize);
aoqi@1 190
aoqi@1 191 #ifdef OPT_THREAD
aoqi@1 192 __ move(TREG, A7);
aoqi@1 193 #endif
aoqi@1 194 //add for compressedoops
aoqi@1 195 __ reinit_heapbase();
aoqi@1 196
aoqi@1 197 #ifdef ASSERT
aoqi@1 198 // make sure we have no pending exceptions
aoqi@6880 199 {
aoqi@1 200 Label L;
aoqi@6880 201 __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 202 __ beq(AT, R0, L);
aoqi@6880 203 __ delayed()->nop();
aoqi@6880 204 /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
aoqi@6880 205 __ stop("StubRoutines::call_stub: entered with pending exception");
aoqi@6880 206 __ bind(L);
aoqi@1 207 }
aoqi@1 208 #endif
aoqi@1 209
aoqi@1 210 // pass parameters if any
aoqi@1 211 // A5: parameter
aoqi@1 212 // A6: parameter_size
aoqi@1 213 // T0: parameter_size_tmp(--)
aoqi@1 214 // T2: offset(++)
aoqi@1 215 // T3: tmp
aoqi@1 216 Label parameters_done;
aoqi@1 217 // judge if the parameter_size equals 0
aoqi@1 218 __ beq(A6, R0, parameters_done);
aoqi@1 219 __ delayed()->nop();
aoqi@1 220 __ dsll(AT, A6, Interpreter::logStackElementSize);
aoqi@6880 221 __ dsub(SP, SP, AT);
aoqi@6880 222 __ move(AT, -StackAlignmentInBytes);
aoqi@6880 223 __ andr(SP, SP , AT);
aoqi@1 224 // Copy Java parameters in reverse order (receiver last)
aoqi@1 225 // Note that the argument order is inverted in the process
aoqi@1 226 // source is edx[ecx: N-1..0]
aoqi@1 227 // dest is esp[ebx: 0..N-1]
aoqi@1 228 Label loop;
aoqi@1 229 __ move(T0, A6);
aoqi@1 230 __ move(T2, R0);
aoqi@1 231 __ bind(loop);
aoqi@6880 232
aoqi@1 233 // get parameter
aoqi@6880 234 __ dsll(T3, T0, LogBytesPerWord);
aoqi@6880 235 __ dadd(T3, T3, A5);
aoqi@1 236 __ ld(AT, T3, -wordSize);
aoqi@6880 237 __ dsll(T3, T2, LogBytesPerWord);
aoqi@6880 238 __ dadd(T3, T3, SP);
aoqi@1 239 __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
aoqi@6880 240 __ daddi(T2, T2, 1);
aoqi@6880 241 __ daddi(T0, T0, -1);
aoqi@1 242 __ bne(T0, R0, loop);
aoqi@1 243 __ delayed()->nop();
aoqi@1 244 // advance to next parameter
aoqi@6880 245
aoqi@1 246 // call Java function
aoqi@1 247 __ bind(parameters_done);
aoqi@6880 248
aoqi@1 249 // receiver in V0, methodOop in Rmethod
aoqi@6880 250
aoqi@1 251 __ move(Rmethod, A3);
aoqi@1 252 __ move(Rsender, SP); //set sender sp
aoqi@1 253 __ jalr(A4);
aoqi@1 254 __ delayed()->nop();
aoqi@1 255 return_address = __ pc();
aoqi@6880 256
aoqi@1 257 Label common_return;
aoqi@1 258 __ bind(common_return);
aoqi@6880 259
aoqi@1 260 // store result depending on type
aoqi@1 261 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
aoqi@6880 262 __ ld(T0, FP, result_off * wordSize); // result --> T0
aoqi@1 263 Label is_long, is_float, is_double, exit;
aoqi@6880 264 __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2
aoqi@1 265 __ daddi(T3, T2, (-1) * T_LONG);
aoqi@1 266 __ beq(T3, R0, is_long);
aoqi@1 267 __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
aoqi@1 268 __ beq(T3, R0, is_float);
aoqi@1 269 __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
aoqi@1 270 __ beq(T3, R0, is_double);
aoqi@1 271 __ delayed()->nop();
aoqi@6880 272
aoqi@1 273 // handle T_INT case
aoqi@1 274 __ sd(V0, T0, 0 * wordSize);
aoqi@1 275 __ bind(exit);
aoqi@6880 276
aoqi@6880 277 // restore
aoqi@1 278 __ daddi(SP, FP, 2 * wordSize );
aoqi@1 279 __ ld(RA, SP, RA_off * wordSize);
aoqi@1 280 __ ld(FP, SP, FP_off * wordSize);
aoqi@1 281 __ ld(BCP, SP, BCP_off * wordSize);
aoqi@1 282 __ ld(LVP, SP, LVP_off * wordSize);
aoqi@1 283 __ ld(GP, SP, GP_off * wordSize);
aoqi@1 284 __ ld(TSR, SP, TSR_off * wordSize);
aoqi@1 285
aoqi@1 286 __ ld(S1, SP, S1_off * wordSize);
aoqi@1 287 __ ld(S3, SP, S3_off * wordSize);
aoqi@1 288 __ ld(S4, SP, S4_off * wordSize);
aoqi@1 289 __ ld(S5, SP, S5_off * wordSize);
aoqi@1 290 __ ld(S6, SP, S6_off * wordSize);
aoqi@1 291
aoqi@1 292 // return
aoqi@1 293 __ jr(RA);
aoqi@1 294 __ delayed()->nop();
aoqi@6880 295
aoqi@1 296 // handle return types different from T_INT
aoqi@1 297 __ bind(is_long);
aoqi@1 298 __ sd(V0, T0, 0 * wordSize);
aoqi@1 299 //__ sd(V1, T0, 1 * wordSize);
aoqi@35 300 //__ sd(R0, T0, 1 * wordSize);
aoqi@1 301 __ b(exit);
aoqi@1 302 __ delayed()->nop();
aoqi@6880 303
aoqi@1 304 __ bind(is_float);
aoqi@1 305 __ swc1(F0, T0, 0 * wordSize);
aoqi@1 306 __ b(exit);
aoqi@1 307 __ delayed()->nop();
aoqi@6880 308
aoqi@1 309 __ bind(is_double);
aoqi@1 310 __ sdc1(F0, T0, 0 * wordSize);
aoqi@1 311 __ b(exit);
aoqi@1 312 __ delayed()->nop();
aoqi@1 313 //FIXME, 1.6 mips version add operation of fpu here
aoqi@1 314 StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
aoqi@1 315 __ b(common_return);
aoqi@6880 316 __ delayed()->nop();
aoqi@1 317 return start;
aoqi@1 318 }
aoqi@1 319
aoqi@1 320 // Return point for a Java call if there's an exception thrown in
aoqi@1 321 // Java code. The exception is caught and transformed into a
aoqi@1 322 // pending exception stored in JavaThread that can be tested from
aoqi@1 323 // within the VM.
aoqi@1 324 //
aoqi@1 325 // Note: Usually the parameters are removed by the callee. In case
aoqi@1 326 // of an exception crossing an activation frame boundary, that is
aoqi@1 327 // not the case if the callee is compiled code => need to setup the
aoqi@1 328 // rsp.
aoqi@1 329 //
aoqi@1 330 // rax: exception oop
aoqi@1 331
aoqi@1 332 address generate_catch_exception() {
aoqi@1 333 StubCodeMark mark(this, "StubRoutines", "catch_exception");
aoqi@1 334 address start = __ pc();
aoqi@1 335
aoqi@1 336 Register thread = TREG;
aoqi@1 337
aoqi@1 338 // get thread directly
aoqi@1 339 #ifndef OPT_THREAD
aoqi@1 340 __ ld(thread, FP, thread_off * wordSize);
aoqi@1 341 #endif
aoqi@1 342
aoqi@1 343 #ifdef ASSERT
aoqi@1 344 // verify that threads correspond
aoqi@1 345 { Label L;
aoqi@1 346 __ get_thread(T8);
aoqi@1 347 __ beq(T8, thread, L);
aoqi@1 348 __ delayed()->nop();
aoqi@1 349 __ stop("StubRoutines::catch_exception: threads must correspond");
aoqi@1 350 __ bind(L);
aoqi@1 351 }
aoqi@1 352 #endif
aoqi@1 353 // set pending exception
aoqi@1 354 __ verify_oop(V0);
aoqi@1 355 __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 356 __ li(AT, (long)__FILE__);
aoqi@1 357 __ sd(AT, thread, in_bytes(Thread::exception_file_offset ()));
aoqi@1 358 __ li(AT, (long)__LINE__);
aoqi@1 359 __ sd(AT, thread, in_bytes(Thread::exception_line_offset ()));
aoqi@1 360
aoqi@1 361 // complete return to VM
aoqi@1 362 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
aoqi@1 363 __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
aoqi@1 364 __ delayed()->nop();
aoqi@1 365
aoqi@1 366 return start;
aoqi@1 367 }
aoqi@1 368
aoqi@1 369 // Continuation point for runtime calls returning with a pending
aoqi@1 370 // exception. The pending exception check happened in the runtime
aoqi@1 371 // or native call stub. The pending exception in Thread is
aoqi@1 372 // converted into a Java-level exception.
aoqi@1 373 //
aoqi@1 374 // Contract with Java-level exception handlers:
aoqi@1 375 // rax: exception
aoqi@1 376 // rdx: throwing pc
aoqi@1 377 //
aoqi@1 378 // NOTE: At entry of this stub, exception-pc must be on stack !!
aoqi@1 379
aoqi@1 380 address generate_forward_exception() {
aoqi@1 381 StubCodeMark mark(this, "StubRoutines", "forward exception");
aoqi@1 382 //Register thread = TREG;
aoqi@1 383 Register thread = TREG;
aoqi@1 384 address start = __ pc();
aoqi@1 385
aoqi@6880 386 // Upon entry, the sp points to the return address returning into
aoqi@6880 387 // Java (interpreted or compiled) code; i.e., the return address
aoqi@1 388 // throwing pc.
aoqi@1 389 //
aoqi@6880 390 // Arguments pushed before the runtime call are still on the stack
aoqi@6880 391 // but the exception handler will reset the stack pointer ->
aoqi@6880 392 // ignore them. A potential result in registers can be ignored as
aoqi@6880 393 // well.
aoqi@1 394
aoqi@1 395 #ifndef OPT_THREAD
aoqi@1 396 __ get_thread(thread);
aoqi@1 397 #endif
aoqi@8009 398 #ifdef ASSERT
aoqi@8009 399 // make sure this code is only executed if there is a pending exception
aoqi@6880 400 {
aoqi@6880 401 Label L;
aoqi@1 402 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 403 __ bne(AT, R0, L);
aoqi@1 404 __ delayed()->nop();
aoqi@1 405 __ stop("StubRoutines::forward exception: no pending exception (1)");
aoqi@1 406 __ bind(L);
aoqi@1 407 }
aoqi@1 408 #endif
aoqi@1 409
aoqi@1 410 // compute exception handler into T9
aoqi@1 411 __ ld(A1, SP, 0);
aoqi@1 412 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
aoqi@1 413 __ move(T9, V0);
aoqi@1 414 __ pop(V1);
aoqi@1 415
aoqi@1 416 #ifndef OPT_THREAD
aoqi@1 417 __ get_thread(thread);
aoqi@1 418 #endif
aoqi@1 419 __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 420 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 421
aoqi@1 422 #ifdef ASSERT
aoqi@1 423 // make sure exception is set
aoqi@6880 424 {
aoqi@6880 425 Label L;
aoqi@1 426 __ bne(V0, R0, L);
aoqi@1 427 __ delayed()->nop();
aoqi@1 428 __ stop("StubRoutines::forward exception: no pending exception (2)");
aoqi@1 429 __ bind(L);
aoqi@1 430 }
aoqi@1 431 #endif
aoqi@1 432
aoqi@1 433 // continue at exception handler (return address removed)
aoqi@1 434 // V0: exception
aoqi@1 435 // T9: exception handler
aoqi@1 436 // V1: throwing pc
aoqi@1 437 __ verify_oop(V0);
aoqi@1 438 __ jr(T9);
aoqi@1 439 __ delayed()->nop();
aoqi@1 440
aoqi@1 441 return start;
aoqi@1 442 }
aoqi@1 443
aoqi@1 444 // Support for intptr_t get_previous_fp()
aoqi@1 445 //
aoqi@1 446 // This routine is used to find the previous frame pointer for the
aoqi@1 447 // caller (current_frame_guess). This is used as part of debugging
aoqi@1 448 // ps() is seemingly lost trying to find frames.
aoqi@1 449 // This code assumes that caller current_frame_guess) has a frame.
aoqi@1 450 address generate_get_previous_fp() {
aoqi@1 451 StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
aoqi@1 452 const Address old_fp (FP, 0);
aoqi@1 453 const Address older_fp (V0, 0);
aoqi@1 454 address start = __ pc();
aoqi@6880 455 __ enter();
aoqi@1 456 __ lw(V0, old_fp); // callers fp
aoqi@1 457 __ lw(V0, older_fp); // the frame for ps()
aoqi@1 458 __ leave();
aoqi@1 459 __ jr(RA);
aoqi@1 460 __ delayed()->nop();
aoqi@1 461 return start;
aoqi@1 462 }
aoqi@6880 463
aoqi@1 464 // The following routine generates a subroutine to throw an
aoqi@1 465 // asynchronous UnknownError when an unsafe access gets a fault that
aoqi@1 466 // could not be reasonably prevented by the programmer. (Example:
aoqi@1 467 // SIGBUS/OBJERR.)
aoqi@1 468 address generate_handler_for_unsafe_access() {
aoqi@6880 469 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
aoqi@6880 470 address start = __ pc();
aoqi@6880 471 __ pushad(); // push registers
aoqi@6880 472 // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@6880 473 __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
aoqi@6880 474 __ delayed()->nop();
aoqi@6880 475 __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@6880 476 __ popad();
aoqi@6880 477 __ jr(RA);
aoqi@6880 478 __ delayed()->nop();
aoqi@6880 479 return start;
aoqi@1 480 }
aoqi@1 481
aoqi@1 482 // Non-destructive plausibility checks for oops
aoqi@1 483 //
aoqi@1 484 // Arguments:
aoqi@1 485 // all args on stack!
aoqi@1 486 //
aoqi@1 487 // Stack after saving c_rarg3:
aoqi@1 488 // [tos + 0]: saved c_rarg3
aoqi@1 489 // [tos + 1]: saved c_rarg2
aoqi@1 490 // [tos + 2]: saved r12 (several TemplateTable methods use it)
aoqi@1 491 // [tos + 3]: saved flags
aoqi@1 492 // [tos + 4]: return address
aoqi@1 493 // * [tos + 5]: error message (char*)
aoqi@1 494 // * [tos + 6]: object to verify (oop)
aoqi@1 495 // * [tos + 7]: saved rax - saved by caller and bashed
aoqi@1 496 // * = popped on exit
aoqi@1 497 address generate_verify_oop() {
aoqi@6880 498 StubCodeMark mark(this, "StubRoutines", "verify_oop");
aoqi@6880 499 address start = __ pc();
aoqi@6880 500 __ reinit_heapbase();
aoqi@6880 501 __ verify_oop_subroutine();
aoqi@1 502 address end = __ pc();
aoqi@6880 503 return start;
aoqi@1 504 }
aoqi@1 505
aoqi@1 506 //
aoqi@1 507 // Generate overlap test for array copy stubs
aoqi@1 508 //
aoqi@1 509 // Input:
aoqi@1 510 // A0 - array1
aoqi@1 511 // A1 - array2
aoqi@1 512 // A2 - element count
aoqi@1 513 //
aoqi@1 514 // Note: this code can only use %eax, %ecx, and %edx
aoqi@1 515 //
aoqi@1 516
aoqi@6880 517 // use T9 as temp
aoqi@1 518 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
aoqi@1 519 int elem_size = 1 << log2_elem_size;
aoqi@1 520 Address::ScaleFactor sf = Address::times_1;
aoqi@1 521
aoqi@1 522 switch (log2_elem_size) {
aoqi@1 523 case 0: sf = Address::times_1; break;
aoqi@1 524 case 1: sf = Address::times_2; break;
aoqi@1 525 case 2: sf = Address::times_4; break;
aoqi@1 526 case 3: sf = Address::times_8; break;
aoqi@1 527 }
aoqi@1 528
aoqi@1 529 __ dsll(AT, A2, sf);
aoqi@6880 530 __ dadd(AT, AT, A0);
aoqi@6880 531 __ lea(T9, Address(AT, -elem_size));
aoqi@6880 532 __ dsub(AT, A1, A0);
aoqi@6880 533 __ blez(AT, no_overlap_target);
aoqi@6880 534 __ delayed()->nop();
aoqi@6880 535 __ dsub(AT, A1, T9);
aoqi@6880 536 __ bgtz(AT, no_overlap_target);
aoqi@6880 537 __ delayed()->nop();
aoqi@1 538
aoqi@6880 539 // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
aoqi@8 540 Label L;
aoqi@8 541 __ bgez(A0, L);
aoqi@6880 542 __ delayed()->nop();
aoqi@8 543 __ bgtz(A1, no_overlap_target);
aoqi@6880 544 __ delayed()->nop();
aoqi@8 545 __ bind(L);
aoqi@8 546
aoqi@1 547 }
aoqi@1 548
aoqi@1 549 //
aoqi@1 550 // Generate store check for array
aoqi@1 551 //
aoqi@1 552 // Input:
aoqi@1 553 // T0 - starting address(edi)
aoqi@1 554 // T1 - element count (ecx)
aoqi@1 555 //
aoqi@1 556 // The 2 input registers are overwritten
aoqi@1 557 //
aoqi@1 558
aoqi@1 559
fujie@8002 560 void array_store_check(Register tmp) {
fujie@8002 561 assert_different_registers(tmp, AT, T0, T1);
aoqi@6880 562 BarrierSet* bs = Universe::heap()->barrier_set();
aoqi@6880 563 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
aoqi@6880 564 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
aoqi@6880 565 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
aoqi@6880 566 Label l_0;
aoqi@1 567
fujie@8002 568 if (UseConcMarkSweepGC) __ sync();
fujie@8002 569
fujie@8002 570 __ set64(tmp, (long)ct->byte_map_base);
fujie@8002 571
aoqi@6880 572 __ dsll(AT, T1, TIMES_OOP);
aoqi@6880 573 __ dadd(AT, T0, AT);
aoqi@6880 574 __ daddiu(T1, AT, - BytesPerHeapOop);
aoqi@1 575
aoqi@6880 576 __ shr(T0, CardTableModRefBS::card_shift);
aoqi@6880 577 __ shr(T1, CardTableModRefBS::card_shift);
aoqi@1 578
aoqi@6880 579 __ dsub(T1, T1, T0); // end --> cards count
aoqi@6880 580 __ bind(l_0);
aoqi@6880 581
fujie@8002 582 __ dadd(AT, tmp, T0);
fujie@8002 583 if (UseLoongsonISA) {
fujie@8002 584 __ gssbx(R0, AT, T1, 0);
fujie@8002 585 } else {
fujie@8002 586 __ dadd(AT, AT, T1);
fujie@8002 587 __ sb(R0, AT, 0);
fujie@8002 588 }
fujie@8002 589
aoqi@6880 590 __ bgtz(T1, l_0);
aoqi@6880 591 __ delayed()->daddi(T1, T1, - 1);
aoqi@6880 592 }
aoqi@1 593
fujie@8004 594 // Generate code for an array write pre barrier
fujie@8004 595 //
fujie@8004 596 // addr - starting address
fujie@8004 597 // count - element count
fujie@8004 598 // tmp - scratch register
fujie@8004 599 //
fujie@8004 600 // Destroy no registers!
fujie@8004 601 //
fujie@8004 602 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
fujie@8004 603 BarrierSet* bs = Universe::heap()->barrier_set();
fujie@8004 604 switch (bs->kind()) {
fujie@8004 605 case BarrierSet::G1SATBCT:
fujie@8004 606 case BarrierSet::G1SATBCTLogging:
fujie@8005 607 // With G1, don't generate the call if we statically know that the target in uninitialized
fujie@8005 608 if (!dest_uninitialized) {
fujie@8005 609 __ pushad(); // push registers
fujie@8005 610 if (count == A0) {
fujie@8005 611 if (addr == A1) {
fujie@8005 612 // exactly backwards!!
fujie@8005 613 //__ xchgptr(c_rarg1, c_rarg0);
fujie@8005 614 __ move(AT, A0);
aoqi@8009 615 __ move(A0, A1);
fujie@8005 616 __ move(A1, AT);
fujie@8005 617 } else {
fujie@8005 618 __ move(A1, count);
fujie@8005 619 __ move(A0, addr);
fujie@8005 620 }
fujie@8005 621 } else {
fujie@8005 622 __ move(A0, addr);
fujie@8005 623 __ move(A1, count);
fujie@8005 624 }
fujie@8005 625 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
fujie@8005 626 __ popad();
fujie@8005 627 }
fujie@8004 628 break;
fujie@8004 629 case BarrierSet::CardTableModRef:
fujie@8004 630 case BarrierSet::CardTableExtension:
fujie@8004 631 case BarrierSet::ModRef:
fujie@8004 632 break;
fujie@8004 633 default:
fujie@8004 634 ShouldNotReachHere();
fujie@8004 635
fujie@8004 636 }
fujie@8004 637 }
fujie@8004 638
fujie@8004 639 //
fujie@8004 640 // Generate code for an array write post barrier
fujie@8004 641 //
fujie@8004 642 // Input:
fujie@8004 643 // start - register containing starting address of destination array
fujie@8004 644 // count - elements count
fujie@8004 645 // scratch - scratch register
fujie@8004 646 //
fujie@8004 647 // The input registers are overwritten.
fujie@8004 648 //
fujie@8004 649 void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
fujie@8004 650 assert_different_registers(start, count, scratch, AT);
fujie@8004 651 BarrierSet* bs = Universe::heap()->barrier_set();
fujie@8004 652 switch (bs->kind()) {
fujie@8004 653 case BarrierSet::G1SATBCT:
fujie@8004 654 case BarrierSet::G1SATBCTLogging:
fujie@8005 655 {
fujie@8005 656 __ pushad(); // push registers (overkill)
fujie@8005 657 if (count == A0) {
fujie@8005 658 if (start == A1) {
fujie@8005 659 // exactly backwards!!
fujie@8005 660 //__ xchgptr(c_rarg1, c_rarg0);
fujie@8005 661 __ move(AT, A0);
aoqi@8009 662 __ move(A0, A1);
fujie@8005 663 __ move(A1, AT);
fujie@8005 664 } else {
fujie@8005 665 __ move(A1, count);
fujie@8005 666 __ move(A0, start);
fujie@8005 667 }
fujie@8005 668 } else {
fujie@8005 669 __ move(A0, start);
fujie@8005 670 __ move(A1, count);
fujie@8005 671 }
fujie@8005 672 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
fujie@8005 673 __ popad();
fujie@8005 674 }
fujie@8004 675 break;
fujie@8004 676 case BarrierSet::CardTableModRef:
fujie@8004 677 case BarrierSet::CardTableExtension:
fujie@8004 678 {
fujie@8004 679 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
fujie@8004 680 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
fujie@8004 681
fujie@8004 682 Label L_loop;
fujie@8004 683 const Register end = count;
fujie@8004 684
fujie@8004 685 if (UseConcMarkSweepGC) __ sync();
fujie@8004 686
fujie@8004 687 int64_t disp = (int64_t) ct->byte_map_base;
fujie@8004 688 __ set64(scratch, disp);
fujie@8004 689
fujie@8004 690 __ lea(end, Address(start, count, TIMES_OOP, 0)); // end == start+count*oop_size
fujie@8004 691 __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
fujie@8004 692 __ shr(start, CardTableModRefBS::card_shift);
fujie@8004 693 __ shr(end, CardTableModRefBS::card_shift);
fujie@8004 694 __ dsubu(end, end, start); // end --> cards count
fujie@8004 695
fujie@8004 696 __ daddu(start, start, scratch);
aoqi@8009 697
fujie@8004 698 __ bind(L_loop);
fujie@8004 699 if (UseLoongsonISA) {
fujie@8004 700 __ gssbx(R0, start, count, 0);
fujie@8004 701 } else {
fujie@8004 702 __ daddu(AT, start, count);
fujie@8004 703 __ sb(R0, AT, 0);
fujie@8004 704 }
fujie@8004 705 __ daddiu(count, count, -1);
fujie@8004 706 __ slt(AT, count, R0);
fujie@8004 707 __ beq(AT, R0, L_loop);
fujie@8004 708 __ nop();
fujie@8004 709 }
fujie@8004 710 break;
fujie@8004 711 default:
fujie@8004 712 ShouldNotReachHere();
fujie@8004 713 }
fujie@8004 714 }
fujie@8004 715
aoqi@1 716 // Arguments:
aoqi@1 717 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 718 // ignored
aoqi@1 719 // name - stub name string
aoqi@1 720 //
aoqi@1 721 // Inputs:
aoqi@1 722 // c_rarg0 - source array address
aoqi@1 723 // c_rarg1 - destination array address
aoqi@1 724 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 725 //
aoqi@1 726 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
aoqi@1 727 // we let the hardware handle it. The one to eight bytes within words,
aoqi@1 728 // dwords or qwords that span cache line boundaries will still be loaded
aoqi@1 729 // and stored atomically.
aoqi@1 730 //
aoqi@1 731 // Side Effects:
aoqi@1 732 // disjoint_byte_copy_entry is set to the no-overlap entry point
aoqi@1 733 // used by generate_conjoint_byte_copy().
aoqi@1 734 //
jiangshaofeng@117 735 address generate_disjoint_byte_copy(bool aligned, const char * name) {
jiangshaofeng@117 736 StubCodeMark mark(this, "StubRoutines", name);
jiangshaofeng@117 737 __ align(CodeEntryAlignment);
aoqi@1 738
jiangshaofeng@117 739
jiangshaofeng@117 740 Register tmp1 = T0;
jiangshaofeng@117 741 Register tmp2 = T1;
jiangshaofeng@117 742 Register tmp3 = T3;
jiangshaofeng@117 743
jiangshaofeng@117 744 address start = __ pc();
jiangshaofeng@117 745
jiangshaofeng@117 746 __ push(tmp1);
jiangshaofeng@117 747 __ push(tmp2);
jiangshaofeng@117 748 __ push(tmp3);
jiangshaofeng@117 749 __ move(tmp1, A0);
jiangshaofeng@117 750 __ move(tmp2, A1);
jiangshaofeng@117 751 __ move(tmp3, A2);
jiangshaofeng@117 752
jiangshaofeng@117 753
jiangshaofeng@117 754 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
jiangshaofeng@117 755 Label l_debug;
jiangshaofeng@117 756
jiangshaofeng@117 757 __ daddi(AT, tmp3, -9); //why the number is 9 ?
jiangshaofeng@117 758 __ blez(AT, l_9);
jiangshaofeng@117 759 __ delayed()->nop();
jiangshaofeng@117 760
jiangshaofeng@117 761 if (!aligned) {
jiangshaofeng@117 762 __ xorr(AT, tmp1, tmp2);
jiangshaofeng@117 763 __ andi(AT, AT, 1);
jiangshaofeng@117 764 __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
jiangshaofeng@117 765 __ delayed()->nop();
jiangshaofeng@117 766
jiangshaofeng@117 767 __ andi(AT, tmp1, 1);
jiangshaofeng@117 768 __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
jiangshaofeng@117 769 __ delayed()->nop();
jiangshaofeng@117 770
jiangshaofeng@117 771 __ lb(AT, tmp1, 0);
jiangshaofeng@117 772 __ daddi(tmp1, tmp1, 1);
jiangshaofeng@117 773 __ sb(AT, tmp2, 0);
jiangshaofeng@117 774 __ daddi(tmp2, tmp2, 1);
jiangshaofeng@117 775 __ daddi(tmp3, tmp3, -1);
jiangshaofeng@117 776 __ bind(l_10);
jiangshaofeng@117 777
jiangshaofeng@117 778 __ xorr(AT, tmp1, tmp2);
jiangshaofeng@117 779 __ andi(AT, AT, 3);
jiangshaofeng@117 780 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
jiangshaofeng@117 781 __ delayed()->nop();
jiangshaofeng@117 782
jiangshaofeng@117 783 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
jiangshaofeng@117 784
jiangshaofeng@117 785 // Copy 2 elements if necessary to align to 4 bytes.
jiangshaofeng@117 786 __ andi(AT, tmp1, 3);
jiangshaofeng@117 787 __ beq(AT, R0, l_2);
jiangshaofeng@117 788 __ delayed()->nop();
jiangshaofeng@117 789
jiangshaofeng@117 790 __ lhu(AT, tmp1, 0);
jiangshaofeng@117 791 __ daddi(tmp1, tmp1, 2);
jiangshaofeng@117 792 __ sh(AT, tmp2, 0);
jiangshaofeng@117 793 __ daddi(tmp2, tmp2, 2);
jiangshaofeng@117 794 __ daddi(tmp3, tmp3, -2);
jiangshaofeng@117 795 __ bind(l_2);
jiangshaofeng@117 796
jiangshaofeng@117 797 // At this point the positions of both, from and to, are at least 4 byte aligned.
jiangshaofeng@117 798
jiangshaofeng@117 799 // Copy 4 elements at a time.
jiangshaofeng@117 800 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
jiangshaofeng@117 801 __ xorr(AT, tmp1, tmp2);
jiangshaofeng@117 802 __ andi(AT, AT, 7);
jiangshaofeng@117 803 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
jiangshaofeng@117 804 __ delayed()->nop();
jiangshaofeng@117 805
jiangshaofeng@117 806 // Copy a 4 elements if necessary to align to 8 bytes.
jiangshaofeng@117 807 __ andi(AT, tmp1, 7);
jiangshaofeng@117 808 __ beq(AT, R0, l_7);
jiangshaofeng@117 809 __ delayed()->nop();
jiangshaofeng@117 810
jiangshaofeng@117 811 __ lw(AT, tmp1, 0);
jiangshaofeng@117 812 __ daddi(tmp3, tmp3, -4);
jiangshaofeng@117 813 __ sw(AT, tmp2, 0);
jiangshaofeng@117 814 { // FasterArrayCopy
jiangshaofeng@117 815 __ daddi(tmp1, tmp1, 4);
jiangshaofeng@117 816 __ daddi(tmp2, tmp2, 4);
jiangshaofeng@117 817 }
jiangshaofeng@117 818 }
jiangshaofeng@117 819
jiangshaofeng@117 820 __ bind(l_7);
jiangshaofeng@117 821
jiangshaofeng@117 822 // Copy 4 elements at a time; either the loads or the stores can
jiangshaofeng@117 823 // be unaligned if aligned == false.
jiangshaofeng@117 824
jiangshaofeng@117 825 { // FasterArrayCopy
jiangshaofeng@117 826 __ daddi(AT, tmp3, -7);
jiangshaofeng@117 827 __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
jiangshaofeng@117 828 __ delayed()->nop();
jiangshaofeng@117 829
jiangshaofeng@117 830 __ bind(l_8);
jiangshaofeng@117 831 // For Loongson, there is 128-bit memory access. TODO
jiangshaofeng@117 832 __ ld(AT, tmp1, 0);
jiangshaofeng@117 833 __ sd(AT, tmp2, 0);
jiangshaofeng@117 834 __ daddi(tmp1, tmp1, 8);
jiangshaofeng@117 835 __ daddi(tmp2, tmp2, 8);
jiangshaofeng@117 836 __ daddi(tmp3, tmp3, -8);
jiangshaofeng@117 837 __ daddi(AT, tmp3, -8);
jiangshaofeng@117 838 __ bgez(AT, l_8);
jiangshaofeng@117 839 __ delayed()->nop();
jiangshaofeng@117 840 }
jiangshaofeng@117 841 __ bind(l_6);
jiangshaofeng@117 842
jiangshaofeng@117 843 // copy 4 bytes at a time
jiangshaofeng@117 844 { // FasterArrayCopy
jiangshaofeng@117 845 __ daddi(AT, tmp3, -3);
jiangshaofeng@117 846 __ blez(AT, l_1);
jiangshaofeng@117 847 __ delayed()->nop();
jiangshaofeng@117 848
jiangshaofeng@117 849 __ bind(l_3);
jiangshaofeng@117 850 __ lw(AT, tmp1, 0);
jiangshaofeng@117 851 __ sw(AT, tmp2, 0);
jiangshaofeng@117 852 __ daddi(tmp1, tmp1, 4);
jiangshaofeng@117 853 __ daddi(tmp2, tmp2, 4);
jiangshaofeng@117 854 __ daddi(tmp3, tmp3, -4);
jiangshaofeng@117 855 __ daddi(AT, tmp3, -4);
jiangshaofeng@117 856 __ bgez(AT, l_3);
jiangshaofeng@117 857 __ delayed()->nop();
jiangshaofeng@117 858
jiangshaofeng@117 859 }
jiangshaofeng@117 860
jiangshaofeng@117 861 // do 2 bytes copy
jiangshaofeng@117 862 __ bind(l_1);
aoqi@6880 863 {
jiangshaofeng@117 864 __ daddi(AT, tmp3, -1);
jiangshaofeng@117 865 __ blez(AT, l_9);
jiangshaofeng@117 866 __ delayed()->nop();
jiangshaofeng@117 867
jiangshaofeng@117 868 __ bind(l_5);
jiangshaofeng@117 869 __ lhu(AT, tmp1, 0);
jiangshaofeng@117 870 __ daddi(tmp3, tmp3, -2);
jiangshaofeng@117 871 __ sh(AT, tmp2, 0);
jiangshaofeng@117 872 __ daddi(tmp1, tmp1, 2);
jiangshaofeng@117 873 __ daddi(tmp2, tmp2, 2);
jiangshaofeng@117 874 __ daddi(AT, tmp3, -2);
jiangshaofeng@117 875 __ bgez(AT, l_5);
jiangshaofeng@117 876 __ delayed()->nop();
jiangshaofeng@117 877 }
jiangshaofeng@117 878
jiangshaofeng@117 879 //do 1 element copy--byte
jiangshaofeng@117 880 __ bind(l_9);
jiangshaofeng@117 881 __ beq(R0, tmp3, l_4);
jiangshaofeng@117 882 __ delayed()->nop();
jiangshaofeng@117 883
jiangshaofeng@117 884 {
jiangshaofeng@117 885 __ bind(l_11);
jiangshaofeng@117 886 __ lb(AT, tmp1, 0);
jiangshaofeng@117 887 __ daddi(tmp3, tmp3, -1);
jiangshaofeng@117 888 __ sb(AT, tmp2, 0);
jiangshaofeng@117 889 __ daddi(tmp1, tmp1, 1);
jiangshaofeng@117 890 __ daddi(tmp2, tmp2, 1);
jiangshaofeng@117 891 __ daddi(AT, tmp3, -1);
jiangshaofeng@117 892 __ bgez(AT, l_11);
jiangshaofeng@117 893 __ delayed()->nop();
jiangshaofeng@117 894 }
jiangshaofeng@117 895
jiangshaofeng@117 896 __ bind(l_4);
jiangshaofeng@117 897 __ pop(tmp3);
jiangshaofeng@117 898 __ pop(tmp2);
jiangshaofeng@117 899 __ pop(tmp1);
jiangshaofeng@117 900
jiangshaofeng@117 901 __ jr(RA);
jiangshaofeng@117 902 __ delayed()->nop();
jiangshaofeng@117 903
jiangshaofeng@117 904 return start;
aoqi@1 905 }
aoqi@1 906
aoqi@1 907 // Arguments:
aoqi@1 908 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 909 // ignored
aoqi@1 910 // name - stub name string
aoqi@1 911 //
aoqi@1 912 // Inputs:
aoqi@8 913 // A0 - source array address
aoqi@8 914 // A1 - destination array address
aoqi@8 915 // A2 - element count, treated as ssize_t, can be zero
aoqi@1 916 //
aoqi@1 917 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
aoqi@1 918 // we let the hardware handle it. The one to eight bytes within words,
aoqi@1 919 // dwords or qwords that span cache line boundaries will still be loaded
aoqi@1 920 // and stored atomically.
aoqi@1 921 //
aoqi@1 922 address generate_conjoint_byte_copy(bool aligned, const char *name) {
aoqi@8 923 __ align(CodeEntryAlignment);
aoqi@8 924 StubCodeMark mark(this, "StubRoutines", name);
aoqi@8 925 address start = __ pc();
aoqi@1 926
aoqi@8 927 Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
aoqi@8 928 Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
aoqi@1 929
aoqi@8 930 address nooverlap_target = aligned ?
aoqi@6880 931 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
aoqi@6880 932 StubRoutines::jbyte_disjoint_arraycopy();
aoqi@1 933
aoqi@8 934 array_overlap_test(nooverlap_target, 0);
Jin@7 935
aoqi@8 936 const Register from = A0; // source array address
aoqi@8 937 const Register to = A1; // destination array address
aoqi@8 938 const Register count = A2; // elements count
aoqi@8 939 const Register end_from = T3; // source array end address
aoqi@8 940 const Register end_to = T0; // destination array end address
aoqi@8 941 const Register end_count = T1; // destination array end address
Jin@7 942
aoqi@6880 943 __ push(end_from);
aoqi@6880 944 __ push(end_to);
aoqi@6880 945 __ push(end_count);
aoqi@6880 946 __ push(T8);
Jin@7 947
aoqi@8 948 // copy from high to low
aoqi@6880 949 __ move(end_count, count);
aoqi@6880 950 __ dadd(end_from, from, end_count);
aoqi@6880 951 __ dadd(end_to, to, end_count);
Jin@7 952
aoqi@8 953 // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
aoqi@6880 954 __ andi(AT, end_from, 3);
aoqi@6880 955 __ andi(T8, end_to, 3);
aoqi@6880 956 __ bne(AT, T8, l_copy_byte);
aoqi@6880 957 __ delayed()->nop();
Jin@7 958
aoqi@8 959 // First deal with the unaligned data at the top.
aoqi@8 960 __ bind(l_unaligned);
aoqi@6880 961 __ beq(end_count, R0, l_exit);
aoqi@6880 962 __ delayed()->nop();
aoqi@8 963
aoqi@6880 964 __ andi(AT, end_from, 3);
aoqi@6880 965 __ bne(AT, R0, l_from_unaligned);
aoqi@6880 966 __ delayed()->nop();
aoqi@8 967
aoqi@6880 968 __ andi(AT, end_to, 3);
aoqi@6880 969 __ beq(AT, R0, l_4_bytes_aligned);
aoqi@6880 970 __ delayed()->nop();
aoqi@8 971
aoqi@8 972 __ bind(l_from_unaligned);
aoqi@6880 973 __ lb(AT, end_from, -1);
aoqi@6880 974 __ sb(AT, end_to, -1);
aoqi@6880 975 __ daddi(end_from, end_from, -1);
aoqi@6880 976 __ daddi(end_to, end_to, -1);
aoqi@6880 977 __ daddi(end_count, end_count, -1);
aoqi@6880 978 __ b(l_unaligned);
aoqi@6880 979 __ delayed()->nop();
aoqi@8 980
aoqi@8 981 // now end_to, end_from point to 4-byte aligned high-ends
aoqi@8 982 // end_count contains byte count that is not copied.
aoqi@8 983 // copy 4 bytes at a time
aoqi@8 984 __ bind(l_4_bytes_aligned);
aoqi@8 985
aoqi@6880 986 __ move(T8, end_count);
aoqi@6880 987 __ daddi(AT, end_count, -3);
aoqi@6880 988 __ blez(AT, l_copy_suffix);
aoqi@6880 989 __ delayed()->nop();
aoqi@8 990
aoqi@6880 991 //__ andi(T8, T8, 3);
aoqi@8 992 __ lea(end_from, Address(end_from, -4));
aoqi@8 993 __ lea(end_to, Address(end_to, -4));
aoqi@8 994
aoqi@6880 995 __ dsrl(end_count, end_count, 2);
aoqi@8 996 __ align(16);
aoqi@8 997 __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
aoqi@6880 998 __ lw(AT, end_from, 0);
aoqi@6880 999 __ sw(AT, end_to, 0);
aoqi@6880 1000 __ addi(end_from, end_from, -4);
aoqi@6880 1001 __ addi(end_to, end_to, -4);
aoqi@6880 1002 __ addi(end_count, end_count, -1);
aoqi@6880 1003 __ bne(end_count, R0, l_copy_4_bytes_loop);
aoqi@6880 1004 __ delayed()->nop();
aoqi@8 1005
aoqi@6880 1006 __ b(l_copy_suffix);
aoqi@6880 1007 __ delayed()->nop();
aoqi@8 1008 // copy dwords aligned or not with repeat move
aoqi@8 1009 // l_copy_suffix
aoqi@8 1010 // copy suffix (0-3 bytes)
aoqi@6880 1011 __ bind(l_copy_suffix);
aoqi@6880 1012 __ andi(T8, T8, 3);
aoqi@6880 1013 __ beq(T8, R0, l_exit);
aoqi@6880 1014 __ delayed()->nop();
aoqi@6880 1015 __ addi(end_from, end_from, 3);
aoqi@6880 1016 __ addi(end_to, end_to, 3);
aoqi@8 1017 __ bind(l_copy_suffix_loop);
aoqi@6880 1018 __ lb(AT, end_from, 0);
aoqi@6880 1019 __ sb(AT, end_to, 0);
aoqi@6880 1020 __ addi(end_from, end_from, -1);
aoqi@6880 1021 __ addi(end_to, end_to, -1);
aoqi@6880 1022 __ addi(T8, T8, -1);
aoqi@6880 1023 __ bne(T8, R0, l_copy_suffix_loop);
aoqi@6880 1024 __ delayed()->nop();
aoqi@8 1025
aoqi@8 1026 __ bind(l_copy_byte);
aoqi@6880 1027 __ beq(end_count, R0, l_exit);
aoqi@6880 1028 __ delayed()->nop();
aoqi@6880 1029 __ lb(AT, end_from, -1);
aoqi@6880 1030 __ sb(AT, end_to, -1);
aoqi@6880 1031 __ daddi(end_from, end_from, -1);
aoqi@6880 1032 __ daddi(end_to, end_to, -1);
aoqi@6880 1033 __ daddi(end_count, end_count, -1);
aoqi@6880 1034 __ b(l_copy_byte);
aoqi@6880 1035 __ delayed()->nop();
aoqi@8 1036
aoqi@8 1037 __ bind(l_exit);
aoqi@6880 1038 __ pop(T8);
aoqi@6880 1039 __ pop(end_count);
aoqi@6880 1040 __ pop(end_to);
aoqi@6880 1041 __ pop(end_from);
aoqi@6880 1042 __ jr(RA);
aoqi@6880 1043 __ delayed()->nop();
aoqi@8 1044 return start;
aoqi@1 1045 }
aoqi@1 1046
aoqi@13 1047 // Generate stub for disjoint short copy. If "aligned" is true, the
aoqi@13 1048 // "from" and "to" addresses are assumed to be heapword aligned.
aoqi@1 1049 //
aoqi@13 1050 // Arguments for generated stub:
aoqi@13 1051 // from: A0
aoqi@13 1052 // to: A1
aoqi@13 1053 // elm.count: A2 treated as signed
aoqi@13 1054 // one element: 2 bytes
aoqi@1 1055 //
aoqi@13 1056 // Strategy for aligned==true:
aoqi@1 1057 //
aoqi@13 1058 // If length <= 9:
aoqi@13 1059 // 1. copy 1 elements at a time (l_5)
aoqi@1 1060 //
aoqi@13 1061 // If length > 9:
aoqi@13 1062 // 1. copy 4 elements at a time until less than 4 elements are left (l_7)
aoqi@13 1063 // 2. copy 2 elements at a time until less than 2 elements are left (l_6)
aoqi@13 1064 // 3. copy last element if one was left in step 2. (l_1)
aoqi@13 1065 //
aoqi@13 1066 //
aoqi@13 1067 // Strategy for aligned==false:
aoqi@13 1068 //
aoqi@13 1069 // If length <= 9: same as aligned==true case
aoqi@13 1070 //
aoqi@13 1071 // If length > 9:
aoqi@13 1072 // 1. continue with step 7. if the alignment of from and to mod 4
aoqi@13 1073 // is different.
aoqi@13 1074 // 2. align from and to to 4 bytes by copying 1 element if necessary
aoqi@13 1075 // 3. at l_2 from and to are 4 byte aligned; continue with
aoqi@13 1076 // 6. if they cannot be aligned to 8 bytes because they have
aoqi@13 1077 // got different alignment mod 8.
aoqi@13 1078 // 4. at this point we know that both, from and to, have the same
aoqi@13 1079 // alignment mod 8, now copy one element if necessary to get
aoqi@13 1080 // 8 byte alignment of from and to.
aoqi@13 1081 // 5. copy 4 elements at a time until less than 4 elements are
aoqi@13 1082 // left; depending on step 3. all load/stores are aligned.
aoqi@13 1083 // 6. copy 2 elements at a time until less than 2 elements are
aoqi@13 1084 // left. (l_6)
aoqi@13 1085 // 7. copy 1 element at a time. (l_5)
aoqi@13 1086 // 8. copy last element if one was left in step 6. (l_1)
chenhaoxuan@126 1087
aoqi@13 1088 address generate_disjoint_short_copy(bool aligned, const char * name) {
aoqi@13 1089 StubCodeMark mark(this, "StubRoutines", name);
aoqi@13 1090 __ align(CodeEntryAlignment);
aoqi@1 1091
aoqi@13 1092 Register tmp1 = T0;
aoqi@13 1093 Register tmp2 = T1;
aoqi@13 1094 Register tmp3 = T3;
lifangyuan@125 1095 Register tmp4 = T8;
chenhaoxuan@126 1096 Register tmp5 = T9;
chenhaoxuan@126 1097 Register tmp6 = T2;
aoqi@1 1098
aoqi@13 1099 address start = __ pc();
aoqi@13 1100
aoqi@13 1101 __ push(tmp1);
aoqi@13 1102 __ push(tmp2);
aoqi@13 1103 __ push(tmp3);
aoqi@13 1104 __ move(tmp1, A0);
aoqi@13 1105 __ move(tmp2, A1);
aoqi@13 1106 __ move(tmp3, A2);
aoqi@13 1107
chenhaoxuan@126 1108 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
aoqi@13 1109 Label l_debug;
aoqi@13 1110 // don't try anything fancy if arrays don't have many elements
chenhaoxuan@126 1111 __ daddi(AT, tmp3, -23);
chenhaoxuan@126 1112 __ blez(AT, l_14);
aoqi@13 1113 __ delayed()->nop();
chenhaoxuan@126 1114 // move push here
chenhaoxuan@126 1115 __ push(tmp4);
chenhaoxuan@126 1116 __ push(tmp5);
chenhaoxuan@126 1117 __ push(tmp6);
aoqi@13 1118
aoqi@13 1119 if (!aligned) {
aoqi@13 1120 __ xorr(AT, A0, A1);
aoqi@13 1121 __ andi(AT, AT, 1);
aoqi@13 1122 __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
aoqi@13 1123 __ delayed()->nop();
aoqi@13 1124
aoqi@13 1125 __ xorr(AT, A0, A1);
aoqi@13 1126 __ andi(AT, AT, 3);
aoqi@13 1127 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
aoqi@13 1128 __ delayed()->nop();
aoqi@13 1129
aoqi@13 1130 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
aoqi@13 1131
aoqi@13 1132 // Copy 1 element if necessary to align to 4 bytes.
aoqi@13 1133 __ andi(AT, A0, 3);
aoqi@13 1134 __ beq(AT, R0, l_2);
aoqi@13 1135 __ delayed()->nop();
aoqi@13 1136
aoqi@13 1137 __ lhu(AT, tmp1, 0);
aoqi@13 1138 __ daddi(tmp1, tmp1, 2);
aoqi@13 1139 __ sh(AT, tmp2, 0);
aoqi@13 1140 __ daddi(tmp2, tmp2, 2);
aoqi@13 1141 __ daddi(tmp3, tmp3, -1);
aoqi@13 1142 __ bind(l_2);
aoqi@13 1143
aoqi@13 1144 // At this point the positions of both, from and to, are at least 4 byte aligned.
aoqi@13 1145
aoqi@13 1146 // Copy 4 elements at a time.
aoqi@13 1147 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
aoqi@13 1148 __ xorr(AT, tmp1, tmp2);
aoqi@13 1149 __ andi(AT, AT, 7);
aoqi@13 1150 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
aoqi@13 1151 __ delayed()->nop();
aoqi@13 1152
aoqi@13 1153 // Copy a 2-element word if necessary to align to 8 bytes.
aoqi@13 1154 __ andi(AT, tmp1, 7);
aoqi@13 1155 __ beq(AT, R0, l_7);
aoqi@13 1156 __ delayed()->nop();
aoqi@13 1157
aoqi@13 1158 __ lw(AT, tmp1, 0);
aoqi@13 1159 __ daddi(tmp3, tmp3, -2);
aoqi@13 1160 __ sw(AT, tmp2, 0);
chenhaoxuan@126 1161 __ daddi(tmp1, tmp1, 4);
chenhaoxuan@126 1162 __ daddi(tmp2, tmp2, 4);
chenhaoxuan@126 1163 }// end of if (!aligned)
lifangyuan@125 1164
chenhaoxuan@126 1165 __ bind(l_7);
chenhaoxuan@126 1166 // At this time the position of both, from and to, are at least 8 byte aligned.
chenhaoxuan@126 1167 // Copy 8 elemnets at a time.
chenhaoxuan@126 1168 // Align to 16 bytes, but only if both from and to have same alignment mod 8.
chenhaoxuan@126 1169 __ xorr(AT, tmp1, tmp2);
chenhaoxuan@126 1170 __ andi(AT, AT, 15);
chenhaoxuan@126 1171 __ bne(AT, R0, l_9);
chenhaoxuan@126 1172 __ delayed()->nop();
lifangyuan@125 1173
chenhaoxuan@126 1174 // Copy 4-element word if necessary to align to 16 bytes,
chenhaoxuan@126 1175 __ andi(AT, tmp1, 15);
chenhaoxuan@126 1176 __ beq(AT, R0, l_10);
chenhaoxuan@126 1177 __ delayed()->nop();
lifangyuan@125 1178
chenhaoxuan@126 1179 __ ld(AT, tmp1, 0);
chenhaoxuan@126 1180 __ daddi(tmp3, tmp3, -4);
chenhaoxuan@126 1181 __ sd(AT, tmp2, 0);
chenhaoxuan@126 1182 __ daddi(tmp1, tmp1, 8);
chenhaoxuan@126 1183 __ daddi(tmp2, tmp2, 8);
lifangyuan@125 1184
chenhaoxuan@126 1185 __ bind(l_10);
lifangyuan@125 1186
aoqi@6880 1187 // Copy 8 elements at a time; either the loads or the stores can
lifangyuan@125 1188 // be unalligned if aligned == false
lifangyuan@125 1189
lifangyuan@125 1190 { // FasterArrayCopy
lifangyuan@125 1191 __ bind(l_11);
lifangyuan@125 1192 // For loongson the 128-bit memory access instruction is gslq/gssq
aoqi@127 1193 if (UseLoongsonISA) {
aoqi@127 1194 __ gslq(AT, tmp4, tmp1, 0);
aoqi@127 1195 __ gslq(tmp5, tmp6, tmp1, 16);
aoqi@127 1196 __ daddi(tmp1, tmp1, 32);
aoqi@127 1197 __ daddi(tmp2, tmp2, 32);
aoqi@127 1198 __ gssq(AT, tmp4, tmp2, -32);
aoqi@127 1199 __ gssq(tmp5, tmp6, tmp2, -16);
aoqi@127 1200 } else {
aoqi@127 1201 __ ld(AT, tmp1, 0);
aoqi@127 1202 __ ld(tmp4, tmp1, 8);
aoqi@127 1203 __ ld(tmp5, tmp1, 16);
aoqi@127 1204 __ ld(tmp6, tmp1, 24);
aoqi@127 1205 __ daddi(tmp1, tmp1, 32);
aoqi@127 1206 __ sd(AT, tmp2, 0);
aoqi@127 1207 __ sd(tmp4, tmp2, 8);
aoqi@127 1208 __ sd(tmp5, tmp2, 16);
aoqi@127 1209 __ sd(tmp6, tmp2, 24);
aoqi@127 1210 __ daddi(tmp2, tmp2, 32);
aoqi@127 1211 }
chenhaoxuan@126 1212 __ daddi(tmp3, tmp3, -16);
chenhaoxuan@126 1213 __ daddi(AT, tmp3, -16);
lifangyuan@125 1214 __ bgez(AT, l_11);
lifangyuan@125 1215 __ delayed()->nop();
aoqi@13 1216 }
lifangyuan@125 1217 __ bind(l_9);
chenhaoxuan@126 1218
aoqi@13 1219 // Copy 4 elements at a time; either the loads or the stores can
aoqi@13 1220 // be unaligned if aligned == false.
aoqi@13 1221 { // FasterArrayCopy
chenhaoxuan@126 1222 __ daddi(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
chenhaoxuan@126 1223 __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
aoqi@13 1224 __ delayed()->nop();
aoqi@13 1225
aoqi@13 1226 __ bind(l_8);
aoqi@13 1227 __ ld(AT, tmp1, 0);
chenhaoxuan@126 1228 __ ld(tmp4, tmp1, 8);
chenhaoxuan@126 1229 __ ld(tmp5, tmp1, 16);
chenhaoxuan@126 1230 __ ld(tmp6, tmp1, 24);
aoqi@13 1231 __ sd(AT, tmp2, 0);
chenhaoxuan@126 1232 __ sd(tmp4, tmp2, 8);
chenhaoxuan@126 1233 __ sd(tmp5, tmp2,16);
chenhaoxuan@126 1234 __ daddi(tmp1, tmp1, 32);
chenhaoxuan@126 1235 __ daddi(tmp2, tmp2, 32);
chenhaoxuan@126 1236 __ daddi(tmp3, tmp3, -16);
chenhaoxuan@126 1237 __ daddi(AT, tmp3, -16);
aoqi@13 1238 __ bgez(AT, l_8);
chenhaoxuan@126 1239 __ sd(tmp6, tmp2, -8);
aoqi@13 1240 }
aoqi@13 1241 __ bind(l_6);
aoqi@13 1242
aoqi@13 1243 // copy 2 element at a time
aoqi@13 1244 { // FasterArrayCopy
chenhaoxuan@126 1245 __ daddi(AT, tmp3, -7);
chenhaoxuan@126 1246 __ blez(AT, l_4);
aoqi@13 1247 __ delayed()->nop();
aoqi@13 1248
aoqi@13 1249 __ bind(l_3);
aoqi@13 1250 __ lw(AT, tmp1, 0);
chenhaoxuan@126 1251 __ lw(tmp4, tmp1, 4);
chenhaoxuan@126 1252 __ lw(tmp5, tmp1, 8);
chenhaoxuan@126 1253 __ lw(tmp6, tmp1, 12);
aoqi@13 1254 __ sw(AT, tmp2, 0);
chenhaoxuan@126 1255 __ sw(tmp4, tmp2, 4);
chenhaoxuan@126 1256 __ sw(tmp5, tmp2, 8);
chenhaoxuan@126 1257 __ daddi(tmp1, tmp1, 16);
chenhaoxuan@126 1258 __ daddi(tmp2, tmp2, 16);
chenhaoxuan@126 1259 __ daddi(tmp3, tmp3, -8);
chenhaoxuan@126 1260 __ daddi(AT, tmp3, -8);
aoqi@13 1261 __ bgez(AT, l_3);
chenhaoxuan@126 1262 __ sw(tmp6, tmp2, -4);
aoqi@13 1263 }
aoqi@13 1264
chenhaoxuan@126 1265 __ bind(l_1);
aoqi@13 1266 // do single element copy (8 bit), can this happen?
aoqi@13 1267 { // FasterArrayCopy
chenhaoxuan@126 1268 __ daddi(AT, tmp3, -3);
chenhaoxuan@126 1269 __ blez(AT, l_4);
chenhaoxuan@126 1270 __ delayed()->nop();
aoqi@13 1271
aoqi@13 1272 __ bind(l_5);
aoqi@13 1273 __ lhu(AT, tmp1, 0);
chenhaoxuan@126 1274 __ lhu(tmp4, tmp1, 2);
chenhaoxuan@126 1275 __ lhu(tmp5, tmp1, 4);
chenhaoxuan@126 1276 __ lhu(tmp6, tmp1, 6);
chenhaoxuan@126 1277 __ sh(AT, tmp2, 0);
chenhaoxuan@126 1278 __ sh(tmp4, tmp2, 2);
chenhaoxuan@126 1279 __ sh(tmp5, tmp2, 4);
chenhaoxuan@126 1280 __ daddi(tmp1, tmp1, 8);
chenhaoxuan@126 1281 __ daddi(tmp2, tmp2, 8);
chenhaoxuan@126 1282 __ daddi(tmp3, tmp3, -4);
chenhaoxuan@126 1283 __ daddi(AT, tmp3, -4);
chenhaoxuan@126 1284 __ bgez(AT, l_5);
chenhaoxuan@126 1285 __ sh(tmp6, tmp2, -2);
chenhaoxuan@126 1286 }
chenhaoxuan@126 1287 // single element
chenhaoxuan@126 1288 __ bind(l_4);
chenhaoxuan@126 1289
chenhaoxuan@126 1290 __ pop(tmp6);
chenhaoxuan@126 1291 __ pop(tmp5);
chenhaoxuan@126 1292 __ pop(tmp4);
chenhaoxuan@126 1293
chenhaoxuan@126 1294 __ bind(l_14);
chenhaoxuan@126 1295 { // FasterArrayCopy
chenhaoxuan@126 1296 __ beq(R0, tmp3, l_13);
chenhaoxuan@126 1297 __ delayed()->nop();
chenhaoxuan@126 1298
chenhaoxuan@126 1299 __ bind(l_12);
chenhaoxuan@126 1300 __ lhu(AT, tmp1, 0);
aoqi@13 1301 __ sh(AT, tmp2, 0);
aoqi@13 1302 __ daddi(tmp1, tmp1, 2);
aoqi@13 1303 __ daddi(tmp2, tmp2, 2);
chenhaoxuan@126 1304 __ daddi(tmp3, tmp3, -1);
aoqi@13 1305 __ daddi(AT, tmp3, -1);
chenhaoxuan@126 1306 __ bgez(AT, l_12);
aoqi@13 1307 __ delayed()->nop();
aoqi@13 1308 }
chenhaoxuan@126 1309
chenhaoxuan@126 1310 __ bind(l_13);
aoqi@13 1311 __ pop(tmp3);
aoqi@13 1312 __ pop(tmp2);
aoqi@13 1313 __ pop(tmp1);
aoqi@13 1314
aoqi@13 1315 __ jr(RA);
aoqi@13 1316 __ delayed()->nop();
aoqi@13 1317
aoqi@13 1318 __ bind(l_debug);
aoqi@13 1319 __ stop("generate_disjoint_short_copy should not reach here");
aoqi@13 1320 return start;
aoqi@1 1321 }
aoqi@1 1322
aoqi@1 1323 // Arguments:
aoqi@1 1324 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1325 // ignored
aoqi@1 1326 // name - stub name string
aoqi@1 1327 //
aoqi@1 1328 // Inputs:
aoqi@1 1329 // c_rarg0 - source array address
aoqi@1 1330 // c_rarg1 - destination array address
aoqi@1 1331 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1332 //
aoqi@1 1333 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
aoqi@1 1334 // let the hardware handle it. The two or four words within dwords
aoqi@1 1335 // or qwords that span cache line boundaries will still be loaded
aoqi@1 1336 // and stored atomically.
aoqi@1 1337 //
aoqi@1 1338 address generate_conjoint_short_copy(bool aligned, const char *name) {
aoqi@6880 1339 Label l_1, l_2, l_3, l_4, l_5;
aoqi@6880 1340 StubCodeMark mark(this, "StubRoutines", name);
aoqi@6880 1341 __ align(CodeEntryAlignment);
aoqi@6880 1342 address start = __ pc();
aoqi@6880 1343 address nooverlap_target = aligned ?
aoqi@6880 1344 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
aoqi@6880 1345 StubRoutines::jshort_disjoint_arraycopy();
aoqi@1 1346
aoqi@6880 1347 array_overlap_test(nooverlap_target, 1);
aoqi@1 1348
aoqi@6880 1349 __ push(T3);
aoqi@6880 1350 __ push(T0);
aoqi@6880 1351 __ push(T1);
aoqi@6880 1352 __ push(T8);
aoqi@1 1353
aoqi@6880 1354 __ move(T1, A2);
aoqi@6880 1355 __ move(T3, A0);
aoqi@6880 1356 __ move(T0, A1);
aoqi@1 1357
aoqi@1 1358
aoqi@6880 1359 // copy dwords from high to low
aoqi@6880 1360 __ sll(AT, T1, Address::times_2);
aoqi@6880 1361 __ add(AT, T3, AT);
aoqi@6880 1362 __ lea(T3, Address( AT, -4));
aoqi@6880 1363 __ sll(AT,T1 , Address::times_2);
aoqi@6880 1364 __ add(AT, T0, AT);
aoqi@6880 1365 __ lea(T0, Address( AT, -4));
aoqi@6880 1366 __ move(T8, T1);
aoqi@6880 1367 __ bind(l_1);
aoqi@6880 1368 __ sra(T1,T1, 1);
aoqi@6880 1369 __ beq(T1, R0, l_4);
aoqi@6880 1370 __ delayed()->nop();
aoqi@6880 1371 __ align(16);
aoqi@6880 1372 __ bind(l_2);
aoqi@6880 1373 __ lw(AT, T3, 0);
aoqi@6880 1374 __ sw(AT, T0, 0);
aoqi@6880 1375 __ addi(T3, T3, -4);
aoqi@6880 1376 __ addi(T0, T0, -4);
aoqi@6880 1377 __ addi(T1, T1, -1);
aoqi@6880 1378 __ bne(T1, R0, l_2);
aoqi@6880 1379 __ delayed()->nop();
aoqi@6880 1380 __ b(l_4);
aoqi@6880 1381 __ delayed()->nop();
aoqi@6880 1382 // copy dwords with repeat move
aoqi@6880 1383 __ bind(l_3);
aoqi@6880 1384 __ bind(l_4);
aoqi@6880 1385 __ andi(T8, T8, 1); // suffix count
aoqi@6880 1386 __ beq(T8, R0, l_5 );
aoqi@6880 1387 __ delayed()->nop();
aoqi@6880 1388 // copy suffix
aoqi@6880 1389 __ lh(AT, T3, 2);
aoqi@6880 1390 __ sh(AT, T0, 2);
aoqi@6880 1391 __ bind(l_5);
aoqi@6880 1392 __ pop(T8);
aoqi@6880 1393 __ pop(T1);
aoqi@6880 1394 __ pop(T0);
aoqi@6880 1395 __ pop(T3);
aoqi@6880 1396 __ jr(RA);
aoqi@6880 1397 __ delayed()->nop();
aoqi@6880 1398 return start;
aoqi@1 1399 }
aoqi@1 1400
aoqi@1 1401 // Arguments:
aoqi@1 1402 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1403 // ignored
aoqi@1 1404 // is_oop - true => oop array, so generate store check code
aoqi@1 1405 // name - stub name string
aoqi@1 1406 //
aoqi@1 1407 // Inputs:
aoqi@1 1408 // c_rarg0 - source array address
aoqi@1 1409 // c_rarg1 - destination array address
aoqi@1 1410 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1411 //
aoqi@1 1412 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1413 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1414 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1415 //
aoqi@1 1416 // Side Effects:
aoqi@1 1417 // disjoint_int_copy_entry is set to the no-overlap entry point
aoqi@1 1418 // used by generate_conjoint_int_oop_copy().
aoqi@1 1419 //
fujie@8004 1420 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
aoqi@119 1421 Label l_3, l_4, l_5, l_6, l_7;
jiangshaofeng@118 1422 StubCodeMark mark(this, "StubRoutines", name);
fujie@109 1423
jiangshaofeng@118 1424 __ align(CodeEntryAlignment);
jiangshaofeng@118 1425 address start = __ pc();
aoqi@119 1426 __ push(T3);
aoqi@119 1427 __ push(T0);
aoqi@119 1428 __ push(T1);
aoqi@119 1429 __ push(T8);
fujie@8002 1430 __ push(T9);
aoqi@119 1431 __ move(T1, A2);
aoqi@119 1432 __ move(T3, A0);
jiangshaofeng@118 1433 __ move(T0, A1);
fujie@109 1434
fujie@8004 1435 if (is_oop) {
fujie@8004 1436 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
fujie@8004 1437 }
fujie@8004 1438
jiangshaofeng@118 1439 if(!aligned) {
jiangshaofeng@118 1440 __ xorr(AT, T3, T0);
jiangshaofeng@118 1441 __ andi(AT, AT, 7);
jiangshaofeng@118 1442 __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
jiangshaofeng@118 1443 __ delayed()->nop();
fujie@109 1444
jiangshaofeng@118 1445 __ andi(AT, T3, 7);
jiangshaofeng@118 1446 __ beq(AT, R0, l_6); //copy 2 elements each time
jiangshaofeng@118 1447 __ delayed()->nop();
aoqi@1 1448
jiangshaofeng@118 1449 __ lw(AT, T3, 0);
jiangshaofeng@118 1450 __ daddi(T1, T1, -1);
jiangshaofeng@118 1451 __ sw(AT, T0, 0);
jiangshaofeng@118 1452 __ daddi(T3, T3, 4);
jiangshaofeng@118 1453 __ daddi(T0, T0, 4);
jiangshaofeng@118 1454 }
fujie@109 1455
jiangshaofeng@118 1456 {
aoqi@119 1457 __ bind(l_6);
jiangshaofeng@118 1458 __ daddi(AT, T1, -1);
jiangshaofeng@118 1459 __ blez(AT, l_5);
jiangshaofeng@118 1460 __ delayed()->nop();
fujie@109 1461
jiangshaofeng@118 1462 __ bind(l_7);
jiangshaofeng@118 1463 __ ld(AT, T3, 0);
jiangshaofeng@118 1464 __ sd(AT, T0, 0);
jiangshaofeng@118 1465 __ daddi(T3, T3, 8);
jiangshaofeng@118 1466 __ daddi(T0, T0, 8);
jiangshaofeng@118 1467 __ daddi(T1, T1, -2);
jiangshaofeng@118 1468 __ daddi(AT, T1, -2);
jiangshaofeng@118 1469 __ bgez(AT, l_7);
jiangshaofeng@118 1470 __ delayed()->nop();
jiangshaofeng@118 1471 }
jiangshaofeng@118 1472
jiangshaofeng@118 1473 __ bind(l_5);
aoqi@119 1474 __ beq(T1, R0, l_4);
aoqi@119 1475 __ delayed()->nop();
aoqi@119 1476
jiangshaofeng@118 1477 __ align(16);
jiangshaofeng@118 1478 __ bind(l_3);
aoqi@119 1479 __ lw(AT, T3, 0);
aoqi@119 1480 __ sw(AT, T0, 0);
jiangshaofeng@118 1481 __ addi(T3, T3, 4);
jiangshaofeng@118 1482 __ addi(T0, T0, 4);
aoqi@119 1483 __ addi(T1, T1, -1);
aoqi@119 1484 __ bne(T1, R0, l_3);
aoqi@119 1485 __ delayed()->nop();
aoqi@119 1486
aoqi@119 1487 // exit
jiangshaofeng@118 1488 __ bind(l_4);
fujie@8004 1489 if (is_oop) {
fujie@8004 1490 gen_write_ref_array_post_barrier(A1, A2, T1);
fujie@8004 1491 }
fujie@8002 1492 __ pop(T9);
jiangshaofeng@118 1493 __ pop(T8);
jiangshaofeng@118 1494 __ pop(T1);
jiangshaofeng@118 1495 __ pop(T0);
jiangshaofeng@118 1496 __ pop(T3);
aoqi@119 1497 __ jr(RA);
aoqi@119 1498 __ delayed()->nop();
aoqi@119 1499
jiangshaofeng@118 1500 return start;
jiangshaofeng@118 1501 }
aoqi@1 1502
aoqi@1 1503 // Arguments:
aoqi@1 1504 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1505 // ignored
aoqi@1 1506 // is_oop - true => oop array, so generate store check code
aoqi@1 1507 // name - stub name string
aoqi@1 1508 //
aoqi@1 1509 // Inputs:
aoqi@1 1510 // c_rarg0 - source array address
aoqi@1 1511 // c_rarg1 - destination array address
aoqi@1 1512 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1513 //
aoqi@1 1514 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1515 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1516 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1517 //
fujie@8004 1518 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
aoqi@6880 1519 Label l_2, l_4;
aoqi@6880 1520 StubCodeMark mark(this, "StubRoutines", name);
aoqi@6880 1521 __ align(CodeEntryAlignment);
aoqi@6880 1522 address start = __ pc();
aoqi@6880 1523 address nooverlap_target;
aoqi@1 1524
aoqi@6880 1525 if (is_oop) {
aoqi@6880 1526 nooverlap_target = aligned ?
aoqi@6880 1527 StubRoutines::arrayof_oop_disjoint_arraycopy() :
aoqi@6880 1528 StubRoutines::oop_disjoint_arraycopy();
aoqi@6880 1529 }else {
aoqi@6880 1530 nooverlap_target = aligned ?
aoqi@6880 1531 StubRoutines::arrayof_jint_disjoint_arraycopy() :
aoqi@6880 1532 StubRoutines::jint_disjoint_arraycopy();
aoqi@6880 1533 }
aoqi@1 1534
aoqi@6880 1535 array_overlap_test(nooverlap_target, 2);
aoqi@1 1536
fujie@8004 1537 if (is_oop) {
fujie@8004 1538 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
fujie@8004 1539 }
fujie@8004 1540
aoqi@6880 1541 __ push(T3);
aoqi@6880 1542 __ push(T0);
aoqi@6880 1543 __ push(T1);
aoqi@6880 1544 __ push(T8);
fujie@8002 1545 __ push(T9);
aoqi@1 1546
aoqi@6880 1547 __ move(T1, A2);
aoqi@6880 1548 __ move(T3, A0);
aoqi@6880 1549 __ move(T0, A1);
aoqi@1 1550
fujie@8002 1551 // T3: source array address
fujie@8002 1552 // T0: destination array address
fujie@8002 1553 // T1: element count
fujie@110 1554
aoqi@6880 1555 __ sll(AT, T1, Address::times_4);
aoqi@6880 1556 __ add(AT, T3, AT);
aoqi@6880 1557 __ lea(T3 , Address(AT, -4));
aoqi@6880 1558 __ sll(AT, T1, Address::times_4);
aoqi@6880 1559 __ add(AT, T0, AT);
aoqi@6880 1560 __ lea(T0 , Address(AT, -4));
aoqi@1 1561
aoqi@6880 1562 __ beq(T1, R0, l_4);
aoqi@6880 1563 __ delayed()->nop();
fujie@110 1564
aoqi@6880 1565 __ align(16);
aoqi@6880 1566 __ bind(l_2);
aoqi@6880 1567 __ lw(AT, T3, 0);
aoqi@6880 1568 __ sw(AT, T0, 0);
aoqi@6880 1569 __ addi(T3, T3, -4);
aoqi@6880 1570 __ addi(T0, T0, -4);
aoqi@6880 1571 __ addi(T1, T1, -1);
aoqi@6880 1572 __ bne(T1, R0, l_2);
aoqi@6880 1573 __ delayed()->nop();
fujie@110 1574
fujie@8004 1575 __ bind(l_4);
aoqi@6880 1576 if (is_oop) {
fujie@8004 1577 gen_write_ref_array_post_barrier(A1, A2, T1);
aoqi@6880 1578 }
fujie@8002 1579 __ pop(T9);
aoqi@6880 1580 __ pop(T8);
aoqi@6880 1581 __ pop(T1);
aoqi@6880 1582 __ pop(T0);
aoqi@6880 1583 __ pop(T3);
aoqi@6880 1584 __ jr(RA);
aoqi@6880 1585 __ delayed()->nop();
fujie@110 1586
aoqi@6880 1587 return start;
aoqi@1 1588 }
aoqi@1 1589
aoqi@1 1590 // Arguments:
aoqi@1 1591 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1592 // ignored
aoqi@1 1593 // is_oop - true => oop array, so generate store check code
aoqi@1 1594 // name - stub name string
aoqi@1 1595 //
aoqi@1 1596 // Inputs:
aoqi@1 1597 // c_rarg0 - source array address
aoqi@1 1598 // c_rarg1 - destination array address
aoqi@1 1599 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1600 //
aoqi@1 1601 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1602 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1603 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1604 //
aoqi@1 1605 // Side Effects:
aoqi@1 1606 // disjoint_int_copy_entry is set to the no-overlap entry point
aoqi@1 1607 // used by generate_conjoint_int_oop_copy().
aoqi@1 1608 //
fujie@8004 1609 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
aoqi@6880 1610 Label l_3, l_4;
aoqi@6880 1611 StubCodeMark mark(this, "StubRoutines", name);
aoqi@6880 1612 __ align(CodeEntryAlignment);
aoqi@6880 1613 address start = __ pc();
fujie@111 1614
fujie@8004 1615 if (is_oop) {
fujie@8004 1616 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
fujie@8004 1617 }
fujie@8004 1618
aoqi@6880 1619 __ push(T3);
aoqi@6880 1620 __ push(T0);
aoqi@6880 1621 __ push(T1);
aoqi@6880 1622 __ push(T8);
fujie@8002 1623 __ push(T9);
fujie@111 1624
aoqi@6880 1625 __ move(T1, A2);
aoqi@6880 1626 __ move(T3, A0);
aoqi@6880 1627 __ move(T0, A1);
aoqi@1 1628
fujie@8002 1629 // T3: source array address
fujie@8002 1630 // T0: destination array address
fujie@8002 1631 // T1: element count
fujie@111 1632
aoqi@6880 1633 __ beq(T1, R0, l_4);
aoqi@6880 1634 __ delayed()->nop();
aoqi@1 1635
aoqi@6880 1636 __ align(16);
aoqi@6880 1637 __ bind(l_3);
aoqi@6880 1638 __ ld(AT, T3, 0);
aoqi@6880 1639 __ sd(AT, T0, 0);
aoqi@6880 1640 __ addi(T3, T3, 8);
aoqi@6880 1641 __ addi(T0, T0, 8);
aoqi@6880 1642 __ addi(T1, T1, -1);
aoqi@6880 1643 __ bne(T1, R0, l_3);
aoqi@6880 1644 __ delayed()->nop();
fujie@111 1645
fujie@8004 1646 // exit
fujie@8004 1647 __ bind(l_4);
aoqi@6880 1648 if (is_oop) {
fujie@8004 1649 gen_write_ref_array_post_barrier(A1, A2, T1);
aoqi@6880 1650 }
fujie@8002 1651 __ pop(T9);
aoqi@6880 1652 __ pop(T8);
aoqi@6880 1653 __ pop(T1);
aoqi@6880 1654 __ pop(T0);
aoqi@6880 1655 __ pop(T3);
aoqi@6880 1656 __ jr(RA);
aoqi@6880 1657 __ delayed()->nop();
aoqi@6880 1658 return start;
aoqi@6880 1659 }
aoqi@1 1660
aoqi@1 1661 // Arguments:
aoqi@1 1662 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1663 // ignored
aoqi@1 1664 // is_oop - true => oop array, so generate store check code
aoqi@1 1665 // name - stub name string
aoqi@1 1666 //
aoqi@1 1667 // Inputs:
aoqi@1 1668 // c_rarg0 - source array address
aoqi@1 1669 // c_rarg1 - destination array address
aoqi@1 1670 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1671 //
aoqi@1 1672 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1673 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1674 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1675 //
fujie@8004 1676 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
aoqi@6880 1677 Label l_2, l_4;
aoqi@6880 1678 StubCodeMark mark(this, "StubRoutines", name);
aoqi@6880 1679 __ align(CodeEntryAlignment);
aoqi@6880 1680 address start = __ pc();
aoqi@6880 1681 address nooverlap_target;
aoqi@1 1682
aoqi@6880 1683 if (is_oop) {
aoqi@6880 1684 nooverlap_target = aligned ?
aoqi@6880 1685 StubRoutines::arrayof_oop_disjoint_arraycopy() :
aoqi@6880 1686 StubRoutines::oop_disjoint_arraycopy();
aoqi@6880 1687 }else {
aoqi@6880 1688 nooverlap_target = aligned ?
aoqi@6880 1689 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
aoqi@6880 1690 StubRoutines::jlong_disjoint_arraycopy();
aoqi@6880 1691 }
aoqi@1 1692
aoqi@6880 1693 array_overlap_test(nooverlap_target, 3);
aoqi@1 1694
fujie@8004 1695 if (is_oop) {
fujie@8004 1696 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
fujie@8004 1697 }
fujie@8004 1698
aoqi@6880 1699 __ push(T3);
aoqi@6880 1700 __ push(T0);
aoqi@6880 1701 __ push(T1);
aoqi@6880 1702 __ push(T8);
fujie@8002 1703 __ push(T9);
aoqi@1 1704
aoqi@6880 1705 __ move(T1, A2);
aoqi@6880 1706 __ move(T3, A0);
aoqi@6880 1707 __ move(T0, A1);
aoqi@1 1708
aoqi@6880 1709 __ sll(AT, T1, Address::times_8);
aoqi@6880 1710 __ add(AT, T3, AT);
aoqi@6880 1711 __ lea(T3 , Address(AT, -8));
aoqi@6880 1712 __ sll(AT, T1, Address::times_8);
aoqi@6880 1713 __ add(AT, T0, AT);
aoqi@6880 1714 __ lea(T0 , Address(AT, -8));
aoqi@1 1715
aoqi@6880 1716 __ beq(T1, R0, l_4);
aoqi@6880 1717 __ delayed()->nop();
fujie@113 1718
aoqi@6880 1719 __ align(16);
aoqi@6880 1720 __ bind(l_2);
aoqi@6880 1721 __ ld(AT, T3, 0);
aoqi@6880 1722 __ sd(AT, T0, 0);
aoqi@6880 1723 __ addi(T3, T3, -8);
aoqi@6880 1724 __ addi(T0, T0, -8);
aoqi@6880 1725 __ addi(T1, T1, -1);
aoqi@6880 1726 __ bne(T1, R0, l_2);
aoqi@6880 1727 __ delayed()->nop();
fujie@113 1728
fujie@8004 1729 // exit
fujie@8004 1730 __ bind(l_4);
aoqi@6880 1731 if (is_oop) {
fujie@8004 1732 gen_write_ref_array_post_barrier(A1, A2, T1);
aoqi@6880 1733 }
fujie@8002 1734 __ pop(T9);
aoqi@6880 1735 __ pop(T8);
aoqi@6880 1736 __ pop(T1);
aoqi@6880 1737 __ pop(T0);
aoqi@6880 1738 __ pop(T3);
aoqi@6880 1739 __ jr(RA);
aoqi@6880 1740 __ delayed()->nop();
aoqi@6880 1741 return start;
aoqi@1 1742 }
aoqi@1 1743
aoqi@6880 1744 //FIXME
aoqi@1 1745 address generate_disjoint_long_copy(bool aligned, const char *name) {
aoqi@6880 1746 Label l_1, l_2;
aoqi@6880 1747 StubCodeMark mark(this, "StubRoutines", name);
aoqi@6880 1748 __ align(CodeEntryAlignment);
aoqi@6880 1749 address start = __ pc();
aoqi@1 1750
aoqi@6880 1751 __ move(T1, A2);
aoqi@6880 1752 __ move(T3, A0);
aoqi@6880 1753 __ move(T0, A1);
aoqi@6880 1754 __ push(T3);
aoqi@6880 1755 __ push(T0);
aoqi@6880 1756 __ push(T1);
aoqi@6880 1757 __ b(l_2);
aoqi@6880 1758 __ delayed()->nop();
aoqi@6880 1759 __ align(16);
aoqi@6880 1760 __ bind(l_1);
aoqi@6880 1761 __ ld(AT, T3, 0);
aoqi@6880 1762 __ sd (AT, T0, 0);
aoqi@6880 1763 __ addi(T3, T3, 8);
aoqi@6880 1764 __ addi(T0, T0, 8);
aoqi@6880 1765 __ bind(l_2);
aoqi@6880 1766 __ addi(T1, T1, -1);
aoqi@6880 1767 __ bgez(T1, l_1);
aoqi@6880 1768 __ delayed()->nop();
aoqi@6880 1769 __ pop(T1);
aoqi@6880 1770 __ pop(T0);
aoqi@6880 1771 __ pop(T3);
aoqi@6880 1772 __ jr(RA);
aoqi@6880 1773 __ delayed()->nop();
aoqi@6880 1774 return start;
aoqi@1 1775 }
aoqi@1 1776
aoqi@1 1777
aoqi@1 1778 address generate_conjoint_long_copy(bool aligned, const char *name) {
aoqi@6880 1779 Label l_1, l_2;
aoqi@6880 1780 StubCodeMark mark(this, "StubRoutines", name);
aoqi@6880 1781 __ align(CodeEntryAlignment);
aoqi@6880 1782 address start = __ pc();
aoqi@6880 1783 address nooverlap_target = aligned ?
aoqi@6880 1784 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
aoqi@6880 1785 StubRoutines::jlong_disjoint_arraycopy();
aoqi@6880 1786 array_overlap_test(nooverlap_target, 3);
aoqi@1 1787
aoqi@6880 1788 __ push(T3);
aoqi@6880 1789 __ push(T0);
aoqi@6880 1790 __ push(T1);
aoqi@1 1791
aoqi@6880 1792 __ move(T1, A2);
aoqi@6880 1793 __ move(T3, A0);
aoqi@6880 1794 __ move(T0, A1);
aoqi@6880 1795 __ sll(AT, T1, Address::times_8);
aoqi@6880 1796 __ add(AT, T3, AT);
aoqi@6880 1797 __ lea(T3 , Address(AT, -8));
aoqi@6880 1798 __ sll(AT, T1, Address::times_8);
aoqi@6880 1799 __ add(AT, T0, AT);
aoqi@6880 1800 __ lea(T0 , Address(AT, -8));
aoqi@1 1801
aoqi@6880 1802 __ b(l_2);
aoqi@6880 1803 __ delayed()->nop();
aoqi@6880 1804 __ align(16);
aoqi@6880 1805 __ bind(l_1);
aoqi@6880 1806 __ ld(AT, T3, 0);
aoqi@6880 1807 __ sd (AT, T0, 0);
aoqi@6880 1808 __ addi(T3, T3, -8);
aoqi@6880 1809 __ addi(T0, T0,-8);
aoqi@6880 1810 __ bind(l_2);
aoqi@6880 1811 __ addi(T1, T1, -1);
aoqi@6880 1812 __ bgez(T1, l_1);
aoqi@6880 1813 __ delayed()->nop();
aoqi@6880 1814 __ pop(T1);
aoqi@6880 1815 __ pop(T0);
aoqi@6880 1816 __ pop(T3);
aoqi@6880 1817 __ jr(RA);
aoqi@6880 1818 __ delayed()->nop();
aoqi@6880 1819 return start;
aoqi@1 1820 }
aoqi@1 1821
aoqi@1 1822 void generate_arraycopy_stubs() {
aoqi@1 1823 if (UseCompressedOops) {
aoqi@178 1824 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true,
aoqi@178 1825 "oop_disjoint_arraycopy");
aoqi@178 1826 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true,
aoqi@178 1827 "oop_arraycopy");
aoqi@178 1828 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true,
fujie@8004 1829 "oop_disjoint_arraycopy_uninit", true);
aoqi@178 1830 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true,
fujie@8004 1831 "oop_arraycopy_uninit", true);
aoqi@1 1832 } else {
aoqi@178 1833 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true,
aoqi@178 1834 "oop_disjoint_arraycopy");
aoqi@178 1835 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true,
aoqi@178 1836 "oop_arraycopy");
aoqi@178 1837 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true,
fujie@8004 1838 "oop_disjoint_arraycopy_uninit", true);
aoqi@178 1839 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true,
fujie@8004 1840 "oop_arraycopy_uninit", true);
aoqi@1 1841 }
aoqi@1 1842
aoqi@178 1843 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
aoqi@178 1844 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
aoqi@178 1845 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
aoqi@178 1846 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
aoqi@1 1847
aoqi@1 1848 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
aoqi@1 1849 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
aoqi@1 1850 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
aoqi@1 1851 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
aoqi@1 1852
aoqi@178 1853 // We don't generate specialized code for HeapWord-aligned source
aoqi@178 1854 // arrays, so just use the code we've already generated
aoqi@178 1855 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
aoqi@178 1856 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
aoqi@178 1857
aoqi@178 1858 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
aoqi@178 1859 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
aoqi@178 1860
aoqi@178 1861 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
aoqi@178 1862 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
aoqi@178 1863
aoqi@178 1864 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
aoqi@178 1865 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
aoqi@1 1866
aoqi@1 1867 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
aoqi@1 1868 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
aoqi@178 1869
aoqi@178 1870 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
aoqi@178 1871 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
aoqi@1 1872 }
aoqi@1 1873
aoqi@6880 1874 //Wang: add a function to implement SafeFetch32 and SafeFetchN
aoqi@1 1875 void generate_safefetch(const char* name, int size, address* entry,
aoqi@1 1876 address* fault_pc, address* continuation_pc) {
aoqi@1 1877 // safefetch signatures:
aoqi@1 1878 // int SafeFetch32(int* adr, int errValue);
aoqi@1 1879 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
aoqi@1 1880 //
aoqi@1 1881 // arguments:
aoqi@1 1882 // A0 = adr
aoqi@1 1883 // A1 = errValue
aoqi@1 1884 //
aoqi@1 1885 // result:
aoqi@1 1886 // PPC_RET = *adr or errValue
aoqi@1 1887
aoqi@1 1888 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1889
aoqi@1 1890 // Entry point, pc or function descriptor.
aoqi@1 1891 *entry = __ pc();
aoqi@1 1892
aoqi@1 1893 // Load *adr into A1, may fault.
aoqi@1 1894 *fault_pc = __ pc();
aoqi@1 1895 switch (size) {
aoqi@1 1896 case 4:
aoqi@1 1897 // int32_t
aoqi@6880 1898 __ lw(A1, A0, 0);
aoqi@1 1899 break;
aoqi@1 1900 case 8:
aoqi@1 1901 // int64_t
aoqi@6880 1902 __ ld(A1, A0, 0);
aoqi@1 1903 break;
aoqi@1 1904 default:
aoqi@1 1905 ShouldNotReachHere();
aoqi@1 1906 }
aoqi@1 1907
aoqi@1 1908 // return errValue or *adr
aoqi@1 1909 *continuation_pc = __ pc();
aoqi@1 1910 __ addu(V0,A1,R0);
aoqi@1 1911 __ jr(RA);
aoqi@1 1912 __ delayed()->nop();
aoqi@1 1913 }
aoqi@1 1914
aoqi@1 1915
aoqi@1 1916 #undef __
aoqi@1 1917 #define __ masm->
aoqi@1 1918
aoqi@1 1919 // Continuation point for throwing of implicit exceptions that are
aoqi@1 1920 // not handled in the current activation. Fabricates an exception
aoqi@1 1921 // oop and initiates normal exception dispatching in this
aoqi@1 1922 // frame. Since we need to preserve callee-saved values (currently
aoqi@1 1923 // only for C2, but done for C1 as well) we need a callee-saved oop
aoqi@1 1924 // map and therefore have to make these stubs into RuntimeStubs
aoqi@1 1925 // rather than BufferBlobs. If the compiler needs all registers to
aoqi@1 1926 // be preserved between the fault point and the exception handler
aoqi@1 1927 // then it must assume responsibility for that in
aoqi@1 1928 // AbstractCompiler::continuation_for_implicit_null_exception or
aoqi@1 1929 // continuation_for_implicit_division_by_zero_exception. All other
aoqi@1 1930 // implicit exceptions (e.g., NullPointerException or
aoqi@1 1931 // AbstractMethodError on entry) are either at call sites or
aoqi@1 1932 // otherwise assume that stack unwinding will be initiated, so
aoqi@1 1933 // caller saved registers were assumed volatile in the compiler.
aoqi@1 1934 address generate_throw_exception(const char* name,
aoqi@1 1935 address runtime_entry,
aoqi@1 1936 bool restore_saved_exception_pc) {
aoqi@1 1937 // Information about frame layout at time of blocking runtime call.
aoqi@1 1938 // Note that we only have to preserve callee-saved registers since
aoqi@1 1939 // the compilers are responsible for supplying a continuation point
aoqi@6880 1940 // if they expect all registers to be preserved.
aoqi@6880 1941 enum layout {
aoqi@6880 1942 thread_off, // last_java_sp
aoqi@6880 1943 S7_off, // callee saved register sp + 1
aoqi@6880 1944 S6_off, // callee saved register sp + 2
aoqi@6880 1945 S5_off, // callee saved register sp + 3
aoqi@6880 1946 S4_off, // callee saved register sp + 4
aoqi@6880 1947 S3_off, // callee saved register sp + 5
aoqi@6880 1948 S2_off, // callee saved register sp + 6
aoqi@6880 1949 S1_off, // callee saved register sp + 7
aoqi@6880 1950 S0_off, // callee saved register sp + 8
aoqi@6880 1951 FP_off,
aoqi@6880 1952 ret_address,
aoqi@6880 1953 framesize
aoqi@6880 1954 };
aoqi@1 1955
aoqi@6880 1956 int insts_size = 2048;
aoqi@6880 1957 int locs_size = 32;
aoqi@1 1958
aoqi@6880 1959 // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
aoqi@6880 1960 // NULL, NULL, NULL, false, NULL, name, false);
aoqi@6880 1961 CodeBuffer code (name , insts_size, locs_size);
aoqi@6880 1962 OopMapSet* oop_maps = new OopMapSet();
aoqi@6880 1963 MacroAssembler* masm = new MacroAssembler(&code);
aoqi@1 1964
aoqi@6880 1965 address start = __ pc();
aoqi@1 1966
aoqi@6880 1967 // This is an inlined and slightly modified version of call_VM
aoqi@6880 1968 // which has the ability to fetch the return PC out of
aoqi@6880 1969 // thread-local storage and also sets up last_Java_sp slightly
aoqi@6880 1970 // differently than the real call_VM
aoqi@6880 1971 #ifndef OPT_THREAD
aoqi@6880 1972 Register java_thread = TREG;
aoqi@6880 1973 __ get_thread(java_thread);
aoqi@1 1974 #else
aoqi@6880 1975 Register java_thread = TREG;
aoqi@1 1976 #endif
aoqi@6880 1977 if (restore_saved_exception_pc) {
aoqi@6880 1978 __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
aoqi@6880 1979 }
aoqi@1 1980
aoqi@6880 1981 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1982
aoqi@6880 1983 __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
aoqi@6880 1984 __ sd(S0, SP, S0_off * wordSize);
aoqi@6880 1985 __ sd(S1, SP, S1_off * wordSize);
aoqi@6880 1986 __ sd(S2, SP, S2_off * wordSize);
aoqi@6880 1987 __ sd(S3, SP, S3_off * wordSize);
aoqi@6880 1988 __ sd(S4, SP, S4_off * wordSize);
aoqi@6880 1989 __ sd(S5, SP, S5_off * wordSize);
aoqi@6880 1990 __ sd(S6, SP, S6_off * wordSize);
aoqi@6880 1991 __ sd(S7, SP, S7_off * wordSize);
aoqi@1 1992
aoqi@6880 1993 int frame_complete = __ pc() - start;
aoqi@6880 1994 // push java thread (becomes first argument of C function)
aoqi@6880 1995 __ sd(java_thread, SP, thread_off * wordSize);
aoqi@8009 1996 if (java_thread != A0)
aoqi@6880 1997 __ move(A0, java_thread);
aoqi@1 1998
aoqi@6880 1999 // Set up last_Java_sp and last_Java_fp
aoqi@6880 2000 __ set_last_Java_frame(java_thread, SP, FP, NULL);
aoqi@8009 2001 // Align stack
aoqi@8009 2002 __ set64(AT, -(StackAlignmentInBytes));
aoqi@8009 2003 __ andr(SP, SP, AT);
fujie@407 2004
aoqi@6880 2005 __ relocate(relocInfo::internal_pc_type);
aoqi@6880 2006 {
aoqi@6880 2007 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
aoqi@6880 2008 __ patchable_set48(AT, save_pc);
aoqi@6880 2009 }
aoqi@6880 2010 __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
aoqi@1 2011
aoqi@6880 2012 // Call runtime
aoqi@6880 2013 __ call(runtime_entry);
aoqi@6880 2014 __ delayed()->nop();
aoqi@6880 2015 // Generate oop map
aoqi@6880 2016 OopMap* map = new OopMap(framesize, 0);
aoqi@6880 2017 oop_maps->add_gc_map(__ offset(), map);
aoqi@1 2018
aoqi@6880 2019 // restore the thread (cannot use the pushed argument since arguments
aoqi@6880 2020 // may be overwritten by C code generated by an optimizing compiler);
aoqi@6880 2021 // however can use the register value directly if it is callee saved.
aoqi@1 2022 #ifndef OPT_THREAD
aoqi@6880 2023 __ get_thread(java_thread);
aoqi@1 2024 #endif
aoqi@1 2025
aoqi@6880 2026 __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
aoqi@6880 2027 __ reset_last_Java_frame(java_thread, true, true);
aoqi@1 2028
aoqi@6880 2029 // Restore callee save registers. This must be done after resetting the Java frame
aoqi@6880 2030 __ ld(S0, SP, S0_off * wordSize);
aoqi@6880 2031 __ ld(S1, SP, S1_off * wordSize);
aoqi@6880 2032 __ ld(S2, SP, S2_off * wordSize);
aoqi@6880 2033 __ ld(S3, SP, S3_off * wordSize);
aoqi@6880 2034 __ ld(S4, SP, S4_off * wordSize);
aoqi@6880 2035 __ ld(S5, SP, S5_off * wordSize);
aoqi@6880 2036 __ ld(S6, SP, S6_off * wordSize);
aoqi@6880 2037 __ ld(S7, SP, S7_off * wordSize);
aoqi@1 2038
aoqi@6880 2039 // discard arguments
aoqi@6880 2040 __ addi(SP, SP, (framesize-2) * wordSize); // epilog
aoqi@6880 2041 __ addi(SP, FP, wordSize);
aoqi@6880 2042 __ ld(FP, SP, -1*wordSize);
aoqi@6880 2043 // check for pending exceptions
aoqi@1 2044 #ifdef ASSERT
aoqi@6880 2045 Label L;
aoqi@6880 2046 __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 2047 __ bne(AT, R0, L);
aoqi@6880 2048 __ delayed()->nop();
aoqi@6880 2049 __ should_not_reach_here();
aoqi@6880 2050 __ bind(L);
aoqi@1 2051 #endif //ASSERT
aoqi@6880 2052 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
aoqi@6880 2053 __ delayed()->nop();
aoqi@8009 2054 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
aoqi@8009 2055 &code,
aoqi@8009 2056 frame_complete,
aoqi@8009 2057 framesize,
aoqi@8009 2058 oop_maps, false);
aoqi@6880 2059 return stub->entry_point();
aoqi@1 2060 }
aoqi@1 2061
aoqi@6880 2062 // Initialization
aoqi@6880 2063 void generate_initial() {
fujie@407 2064 // Generates all stubs and initializes the entry points
aoqi@6880 2065
fujie@407 2066 //-------------------------------------------------------------
fujie@407 2067 //-----------------------------------------------------------
fujie@407 2068 // entry points that exist in all platforms
aoqi@6880 2069 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
aoqi@6880 2070 // than the disadvantage of having a much more complicated generator structure.
fujie@407 2071 // See also comment in stubRoutines.hpp.
aoqi@6880 2072 StubRoutines::_forward_exception_entry = generate_forward_exception();
fujie@407 2073 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
aoqi@6880 2074 // is referenced by megamorphic call
aoqi@6880 2075 StubRoutines::_catch_exception_entry = generate_catch_exception();
aoqi@6880 2076
fujie@407 2077 StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
aoqi@6880 2078
aoqi@6880 2079 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception",
fujie@407 2080 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
aoqi@1 2081 // platform dependent
fujie@407 2082 StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
aoqi@6880 2083 }
aoqi@1 2084
aoqi@6880 2085 void generate_all() {
aoqi@1 2086 // Generates all stubs and initializes the entry points
aoqi@1 2087
aoqi@1 2088 // These entry points require SharedInfo::stack0 to be set up in
aoqi@1 2089 // non-core builds and need to be relocatable, so they each
aoqi@1 2090 // fabricate a RuntimeStub internally.
aoqi@6880 2091 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
fujie@407 2092 CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
fujie@410 2093
fujie@410 2094 StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
fujie@410 2095 CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
fujie@410 2096
aoqi@6880 2097 StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
fujie@407 2098 CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
aoqi@1 2099
fujie@407 2100 //------------------------------------------------------------------
aoqi@6880 2101 // entry points that are platform specific
aoqi@6880 2102
aoqi@1 2103 // support for verify_oop (must happen after universe_init)
aoqi@6880 2104 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
aoqi@1 2105 #ifndef CORE
aoqi@6880 2106 // arraycopy stubs used by compilers
fujie@407 2107 generate_arraycopy_stubs();
aoqi@1 2108 #endif
aoqi@1 2109
aoqi@1 2110 // Safefetch stubs.
aoqi@1 2111 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
aoqi@1 2112 &StubRoutines::_safefetch32_fault_pc,
aoqi@1 2113 &StubRoutines::_safefetch32_continuation_pc);
aoqi@1 2114 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
aoqi@1 2115 &StubRoutines::_safefetchN_fault_pc,
aoqi@1 2116 &StubRoutines::_safefetchN_continuation_pc);
aoqi@6880 2117 }
aoqi@1 2118
aoqi@1 2119 public:
aoqi@1 2120 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
aoqi@1 2121 if (all) {
aoqi@1 2122 generate_all();
aoqi@1 2123 } else {
aoqi@1 2124 generate_initial();
aoqi@1 2125 }
aoqi@1 2126 }
aoqi@1 2127 }; // end class declaration
aoqi@1 2128
aoqi@1 2129 void StubGenerator_generate(CodeBuffer* code, bool all) {
aoqi@1 2130 StubGenerator g(code, all);
aoqi@1 2131 }

mercurial