src/cpu/mips/vm/stubGenerator_mips_64.cpp

Wed, 29 Mar 2017 09:41:51 +0800

author
aoqi
date
Wed, 29 Mar 2017 09:41:51 +0800
changeset 392
4bfb40d1e17a
parent 373
3a34fc828b4a
child 401
721a83ed5111
permissions
-rw-r--r--

#4662 TieredCompilation is turned off.
TieredCompilation is not supported yet.

aoqi@1 1 /*
aoqi@1 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@1 3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
aoqi@1 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@1 5 *
aoqi@1 6 * This code is free software; you can redistribute it and/or modify it
aoqi@1 7 * under the terms of the GNU General Public License version 2 only, as
aoqi@1 8 * published by the Free Software Foundation.
aoqi@1 9 *
aoqi@1 10 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@1 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@1 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@1 13 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@1 14 * accompanied this code).
aoqi@1 15 *
aoqi@1 16 * You should have received a copy of the GNU General Public License version
aoqi@1 17 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@1 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@1 19 *
aoqi@1 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@1 21 * or visit www.oracle.com if you need additional information or have any
aoqi@1 22 * questions.
aoqi@1 23 *
aoqi@1 24 */
aoqi@1 25
aoqi@1 26 #include "precompiled.hpp"
aoqi@1 27 #include "asm/macroAssembler.hpp"
aoqi@1 28 #include "asm/macroAssembler.inline.hpp"
aoqi@1 29 #include "interpreter/interpreter.hpp"
aoqi@1 30 #include "nativeInst_mips.hpp"
aoqi@1 31 #include "oops/instanceOop.hpp"
aoqi@1 32 #include "oops/method.hpp"
aoqi@1 33 #include "oops/objArrayKlass.hpp"
aoqi@1 34 #include "oops/oop.inline.hpp"
aoqi@1 35 #include "prims/methodHandles.hpp"
aoqi@1 36 #include "runtime/frame.inline.hpp"
aoqi@1 37 #include "runtime/handles.inline.hpp"
aoqi@1 38 #include "runtime/sharedRuntime.hpp"
aoqi@1 39 #include "runtime/stubCodeGenerator.hpp"
aoqi@1 40 #include "runtime/stubRoutines.hpp"
aoqi@1 41 #include "runtime/thread.inline.hpp"
aoqi@1 42 #include "utilities/top.hpp"
aoqi@1 43 #ifdef COMPILER2
aoqi@1 44 #include "opto/runtime.hpp"
aoqi@1 45 #endif
aoqi@1 46
aoqi@1 47
aoqi@1 48 // Declaration and definition of StubGenerator (no .hpp file).
aoqi@1 49 // For a more detailed description of the stub routine structure
aoqi@1 50 // see the comment in stubRoutines.hpp
aoqi@1 51
aoqi@1 52 #define __ _masm->
aoqi@1 53 //#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
aoqi@1 54 //#define a__ ((Assembler*)_masm)->
aoqi@1 55
aoqi@1 56 //#ifdef PRODUCT
aoqi@1 57 //#define BLOCK_COMMENT(str) /* nothing */
aoqi@1 58 //#else
aoqi@1 59 //#define BLOCK_COMMENT(str) __ block_comment(str)
aoqi@1 60 //#endif
aoqi@1 61
aoqi@1 62 //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
aoqi@1 63 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
aoqi@1 64
aoqi@1 65 // Stub Code definitions
aoqi@1 66
aoqi@1 67 static address handle_unsafe_access() {
aoqi@1 68 JavaThread* thread = JavaThread::current();
aoqi@1 69 address pc = thread->saved_exception_pc();
aoqi@1 70 // pc is the instruction which we must emulate
aoqi@1 71 // doing a no-op is fine: return garbage from the load
aoqi@1 72 // therefore, compute npc
aoqi@1 73 //address npc = Assembler::locate_next_instruction(pc);
aoqi@1 74 address npc = (address)((unsigned long)pc + sizeof(unsigned long));
aoqi@1 75
aoqi@1 76 // request an async exception
aoqi@1 77 thread->set_pending_unsafe_access_error();
aoqi@1 78
aoqi@1 79 // return address of next instruction to execute
aoqi@1 80 return npc;
aoqi@1 81 }
aoqi@1 82
aoqi@1 83 class StubGenerator: public StubCodeGenerator {
aoqi@1 84 private:
aoqi@1 85
aoqi@1 86 // ABI mips n64
aoqi@1 87 // This fig is not MIPS ABI. It is call Java from C ABI.
aoqi@1 88 // Call stubs are used to call Java from C
aoqi@1 89 //
aoqi@1 90 // [ return_from_Java ]
aoqi@1 91 // [ argument word n-1 ] <--- sp
aoqi@1 92 // ...
aoqi@1 93 // [ argument word 0 ]
aoqi@1 94 // ...
aoqi@1 95 //-10 [ S6 ]
aoqi@1 96 // -9 [ S5 ]
aoqi@1 97 // -8 [ S4 ]
aoqi@1 98 // -7 [ S3 ]
aoqi@1 99 // -6 [ S0 ]
aoqi@1 100 // -5 [ TSR(S2) ]
aoqi@1 101 // -4 [ LVP(S7) ]
aoqi@1 102 // -3 [ BCP(S1) ]
aoqi@1 103 // -2 [ saved fp ] <--- fp_after_call
aoqi@1 104 // -1 [ return address ]
aoqi@1 105 // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
aoqi@1 106 // 1 [ result ] <--- a1
aoqi@1 107 // 2 [ result_type ] <--- a2
aoqi@1 108 // 3 [ method ] <--- a3
aoqi@1 109 // 4 [ entry_point ] <--- a4
aoqi@1 110 // 5 [ parameters ] <--- a5
aoqi@1 111 // 6 [ parameter_size ] <--- a6
aoqi@1 112 // 7 [ thread ] <--- a7
aoqi@1 113
aoqi@1 114 //
aoqi@1 115 // _LP64: n64 does not save paras in sp.
aoqi@1 116 //
aoqi@1 117 // [ return_from_Java ]
aoqi@1 118 // [ argument word n-1 ] <--- sp
aoqi@1 119 // ...
aoqi@1 120 // [ argument word 0 ]
aoqi@1 121 // ...
aoqi@1 122 //-14 [ thread ]
aoqi@1 123 //-13 [ result_type ] <--- a2
aoqi@1 124 //-12 [ result ] <--- a1
aoqi@1 125 //-11 [ ptr. to call wrapper ] <--- a0
aoqi@1 126 //-10 [ S6 ]
aoqi@1 127 // -9 [ S5 ]
aoqi@1 128 // -8 [ S4 ]
aoqi@1 129 // -7 [ S3 ]
aoqi@1 130 // -6 [ S0 ]
aoqi@1 131 // -5 [ TSR(S2) ]
aoqi@1 132 // -4 [ LVP(S7) ]
aoqi@1 133 // -3 [ BCP(S1) ]
aoqi@1 134 // -2 [ saved fp ] <--- fp_after_call
aoqi@1 135 // -1 [ return address ]
aoqi@1 136 // 0 [ ] <--- old sp
aoqi@1 137 /*
aoqi@1 138 * 2014/01/16 Fu: Find a right place in the call_stub for GP.
aoqi@1 139 * GP will point to the starting point of Interpreter::dispatch_table(itos).
aoqi@1 140 * It should be saved/restored before/after Java calls.
aoqi@1 141 *
aoqi@1 142 */
aoqi@1 143 enum call_stub_layout {
aoqi@1 144 RA_off = -1,
aoqi@1 145 FP_off = -2,
aoqi@1 146 BCP_off = -3,
aoqi@1 147 LVP_off = -4,
aoqi@1 148 TSR_off = -5,
aoqi@1 149 S1_off = -6,
aoqi@1 150 S3_off = -7,
aoqi@1 151 S4_off = -8,
aoqi@1 152 S5_off = -9,
aoqi@1 153 S6_off = -10,
aoqi@1 154 result_off = -11,
aoqi@1 155 result_type_off = -12,
aoqi@1 156 thread_off = -13,
aoqi@1 157 total_off = thread_off - 3,
aoqi@1 158 GP_off = -16,
aoqi@1 159 };
aoqi@1 160
aoqi@1 161 address generate_call_stub(address& return_address) {
aoqi@1 162
aoqi@1 163 StubCodeMark mark(this, "StubRoutines", "call_stub");
aoqi@1 164 address start = __ pc();
aoqi@1 165
aoqi@1 166 // same as in generate_catch_exception()!
aoqi@1 167
aoqi@1 168 // stub code
aoqi@1 169 // save ra and fp
aoqi@1 170 __ sd(RA, SP, RA_off * wordSize);
aoqi@1 171 __ sd(FP, SP, FP_off * wordSize);
aoqi@1 172 __ sd(BCP, SP, BCP_off * wordSize);
aoqi@1 173 __ sd(LVP, SP, LVP_off * wordSize);
aoqi@1 174 __ sd(GP, SP, GP_off * wordSize);
aoqi@1 175 __ sd(TSR, SP, TSR_off * wordSize);
aoqi@1 176 __ sd(S1, SP, S1_off * wordSize);
aoqi@1 177 __ sd(S3, SP, S3_off * wordSize);
aoqi@1 178 __ sd(S4, SP, S4_off * wordSize);
aoqi@1 179 __ sd(S5, SP, S5_off * wordSize);
aoqi@1 180 __ sd(S6, SP, S6_off * wordSize);
aoqi@1 181
aoqi@1 182
fujie@368 183 __ set64(GP, (long)Interpreter::dispatch_table(itos));
aoqi@1 184
aoqi@1 185 // I think 14 is the max gap between argument and callee saved register
aoqi@1 186 __ daddi(FP, SP, (-2) * wordSize);
aoqi@1 187 __ daddi(SP, SP, total_off * wordSize);
aoqi@1 188 //FIXME, aoqi. find a suitable place to save A1 & A2.
aoqi@1 189 /*
aoqi@1 190 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
aoqi@1 191 __ sd(A1, FP, 3 * wordSize);
aoqi@1 192 __ sd(A2, FP, 4 * wordSize);
aoqi@1 193 __ sd(A3, FP, 5 * wordSize);
aoqi@1 194 __ sd(A4, FP, 6 * wordSize);
aoqi@1 195 __ sd(A5, FP, 7 * wordSize);
aoqi@1 196 __ sd(A6, FP, 8 * wordSize);
aoqi@1 197 __ sd(A7, FP, 9 * wordSize);
aoqi@1 198 */
aoqi@1 199 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
aoqi@1 200 __ sd(A1, FP, result_off * wordSize);
aoqi@1 201 __ sd(A2, FP, result_type_off * wordSize);
aoqi@1 202 __ sd(A7, FP, thread_off * wordSize);
aoqi@1 203
aoqi@1 204 #ifdef OPT_THREAD
aoqi@1 205 //__ get_thread(TREG);
aoqi@1 206 __ move(TREG, A7);
aoqi@1 207
aoqi@1 208 //__ ld(TREG, FP, thread_off * wordSize);
aoqi@1 209 #endif
aoqi@1 210 //add for compressedoops
aoqi@1 211 __ reinit_heapbase();
aoqi@1 212
aoqi@1 213 #ifdef ASSERT
aoqi@1 214 // make sure we have no pending exceptions
aoqi@1 215 {
aoqi@1 216 Label L;
aoqi@1 217 __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
aoqi@1 218 __ beq(AT, R0, L);
aoqi@1 219 __ delayed()->nop();
aoqi@1 220 /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
aoqi@1 221 __ stop("StubRoutines::call_stub: entered with pending exception");
aoqi@1 222 __ bind(L);
aoqi@1 223 }
aoqi@1 224 #endif
aoqi@1 225
aoqi@1 226 // pass parameters if any
aoqi@1 227 // A5: parameter
aoqi@1 228 // A6: parameter_size
aoqi@1 229 // T0: parameter_size_tmp(--)
aoqi@1 230 // T2: offset(++)
aoqi@1 231 // T3: tmp
aoqi@1 232 Label parameters_done;
aoqi@1 233 // judge if the parameter_size equals 0
aoqi@1 234 __ beq(A6, R0, parameters_done);
aoqi@1 235 __ delayed()->nop();
aoqi@1 236 __ dsll(AT, A6, Interpreter::logStackElementSize);
aoqi@1 237 __ dsub(SP, SP, AT);
aoqi@1 238 __ move(AT, -StackAlignmentInBytes);
aoqi@1 239 __ andr(SP, SP , AT);
aoqi@1 240 // Copy Java parameters in reverse order (receiver last)
aoqi@1 241 // Note that the argument order is inverted in the process
aoqi@1 242 // source is edx[ecx: N-1..0]
aoqi@1 243 // dest is esp[ebx: 0..N-1]
aoqi@1 244 Label loop;
aoqi@1 245 __ move(T0, A6);
aoqi@1 246 __ move(T2, R0);
aoqi@1 247 __ bind(loop);
aoqi@1 248
aoqi@1 249 // get parameter
aoqi@1 250 __ dsll(T3, T0, LogBytesPerWord);
aoqi@1 251 __ dadd(T3, T3, A5);
aoqi@1 252 __ ld(AT, T3, -wordSize);
aoqi@1 253 __ dsll(T3, T2, LogBytesPerWord);
aoqi@1 254 __ dadd(T3, T3, SP);
aoqi@1 255 __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
aoqi@1 256 __ daddi(T2, T2, 1);
aoqi@1 257 __ daddi(T0, T0, -1);
aoqi@1 258 __ bne(T0, R0, loop);
aoqi@1 259 __ delayed()->nop();
aoqi@1 260 // advance to next parameter
aoqi@1 261
aoqi@1 262 // call Java function
aoqi@1 263 __ bind(parameters_done);
aoqi@1 264
aoqi@1 265 // receiver in V0, methodOop in Rmethod
aoqi@1 266
aoqi@1 267 __ move(Rmethod, A3);
aoqi@1 268 __ move(Rsender, SP); //set sender sp
aoqi@1 269 __ jalr(A4);
aoqi@1 270 __ delayed()->nop();
aoqi@1 271 return_address = __ pc();
aoqi@1 272
aoqi@1 273 Label common_return;
aoqi@1 274 __ bind(common_return);
aoqi@1 275
aoqi@1 276 // store result depending on type
aoqi@1 277 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
aoqi@1 278 __ ld(T0, FP, result_off * wordSize); // result --> T0
aoqi@1 279 Label is_long, is_float, is_double, exit;
aoqi@1 280 __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2
aoqi@1 281 __ daddi(T3, T2, (-1) * T_LONG);
aoqi@1 282 __ beq(T3, R0, is_long);
aoqi@1 283 __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
aoqi@1 284 __ beq(T3, R0, is_float);
aoqi@1 285 __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
aoqi@1 286 __ beq(T3, R0, is_double);
aoqi@1 287 __ delayed()->nop();
aoqi@1 288
aoqi@1 289 // handle T_INT case
aoqi@1 290 __ sd(V0, T0, 0 * wordSize);
aoqi@1 291 __ bind(exit);
aoqi@1 292
aoqi@1 293 // restore
aoqi@1 294 __ daddi(SP, FP, 2 * wordSize );
aoqi@1 295 __ ld(RA, SP, RA_off * wordSize);
aoqi@1 296 __ ld(FP, SP, FP_off * wordSize);
aoqi@1 297 __ ld(BCP, SP, BCP_off * wordSize);
aoqi@1 298 __ ld(LVP, SP, LVP_off * wordSize);
aoqi@1 299 __ ld(GP, SP, GP_off * wordSize);
aoqi@1 300 __ ld(TSR, SP, TSR_off * wordSize);
aoqi@1 301
aoqi@1 302 __ ld(S1, SP, S1_off * wordSize);
aoqi@1 303 __ ld(S3, SP, S3_off * wordSize);
aoqi@1 304 __ ld(S4, SP, S4_off * wordSize);
aoqi@1 305 __ ld(S5, SP, S5_off * wordSize);
aoqi@1 306 __ ld(S6, SP, S6_off * wordSize);
aoqi@1 307
aoqi@1 308 // return
aoqi@1 309 __ jr(RA);
aoqi@1 310 __ delayed()->nop();
aoqi@1 311
aoqi@1 312 // handle return types different from T_INT
aoqi@1 313 __ bind(is_long);
aoqi@1 314 __ sd(V0, T0, 0 * wordSize);
aoqi@1 315 //__ sd(V1, T0, 1 * wordSize);
aoqi@35 316 //__ sd(R0, T0, 1 * wordSize);
aoqi@1 317 __ b(exit);
aoqi@1 318 __ delayed()->nop();
aoqi@1 319
aoqi@1 320 __ bind(is_float);
aoqi@1 321 __ swc1(F0, T0, 0 * wordSize);
aoqi@1 322 __ b(exit);
aoqi@1 323 __ delayed()->nop();
aoqi@1 324
aoqi@1 325 __ bind(is_double);
aoqi@1 326 __ sdc1(F0, T0, 0 * wordSize);
aoqi@1 327 //__ sdc1(F1, T0, 1 * wordSize);
aoqi@35 328 //__ sd(R0, T0, 1 * wordSize);
aoqi@1 329 __ b(exit);
aoqi@1 330 __ delayed()->nop();
aoqi@1 331 //FIXME, 1.6 mips version add operation of fpu here
aoqi@1 332 StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
aoqi@1 333 __ b(common_return);
aoqi@1 334 __ delayed()->nop();
aoqi@1 335 return start;
aoqi@1 336 }
aoqi@1 337
aoqi@1 338 // Return point for a Java call if there's an exception thrown in
aoqi@1 339 // Java code. The exception is caught and transformed into a
aoqi@1 340 // pending exception stored in JavaThread that can be tested from
aoqi@1 341 // within the VM.
aoqi@1 342 //
aoqi@1 343 // Note: Usually the parameters are removed by the callee. In case
aoqi@1 344 // of an exception crossing an activation frame boundary, that is
aoqi@1 345 // not the case if the callee is compiled code => need to setup the
aoqi@1 346 // rsp.
aoqi@1 347 //
aoqi@1 348 // rax: exception oop
aoqi@1 349
aoqi@1 350 address generate_catch_exception() {
aoqi@1 351 StubCodeMark mark(this, "StubRoutines", "catch_exception");
aoqi@1 352 address start = __ pc();
aoqi@1 353
aoqi@1 354 Register thread = TREG;
aoqi@1 355
aoqi@1 356 // get thread directly
aoqi@1 357 #ifndef OPT_THREAD
aoqi@1 358 __ ld(thread, FP, thread_off * wordSize);
aoqi@1 359 #endif
aoqi@1 360
aoqi@1 361 #ifdef ASSERT
aoqi@1 362 // verify that threads correspond
aoqi@1 363 { Label L;
aoqi@1 364 __ get_thread(T8);
aoqi@1 365 __ beq(T8, thread, L);
aoqi@1 366 __ delayed()->nop();
aoqi@1 367 __ stop("StubRoutines::catch_exception: threads must correspond");
aoqi@1 368 __ bind(L);
aoqi@1 369 }
aoqi@1 370 #endif
aoqi@1 371 // set pending exception
aoqi@1 372 __ verify_oop(V0);
aoqi@1 373 __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 374 __ li(AT, (long)__FILE__);
aoqi@1 375 __ sd(AT, thread, in_bytes(Thread::exception_file_offset ()));
aoqi@1 376 __ li(AT, (long)__LINE__);
aoqi@1 377 __ sd(AT, thread, in_bytes(Thread::exception_line_offset ()));
aoqi@1 378
aoqi@1 379 // complete return to VM
aoqi@1 380 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
aoqi@1 381 __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
aoqi@1 382 __ delayed()->nop();
aoqi@1 383
aoqi@1 384 return start;
aoqi@1 385 }
aoqi@1 386
aoqi@1 387 // Continuation point for runtime calls returning with a pending
aoqi@1 388 // exception. The pending exception check happened in the runtime
aoqi@1 389 // or native call stub. The pending exception in Thread is
aoqi@1 390 // converted into a Java-level exception.
aoqi@1 391 //
aoqi@1 392 // Contract with Java-level exception handlers:
aoqi@1 393 // rax: exception
aoqi@1 394 // rdx: throwing pc
aoqi@1 395 //
aoqi@1 396 // NOTE: At entry of this stub, exception-pc must be on stack !!
aoqi@1 397
aoqi@1 398 address generate_forward_exception() {
aoqi@1 399 StubCodeMark mark(this, "StubRoutines", "forward exception");
aoqi@1 400 //Register thread = TREG;
aoqi@1 401 Register thread = TREG;
aoqi@1 402 address start = __ pc();
aoqi@1 403
aoqi@1 404 // Upon entry, the sp points to the return address returning into Java
aoqi@1 405 // (interpreted or compiled) code; i.e., the return address becomes the
aoqi@1 406 // throwing pc.
aoqi@1 407 //
aoqi@1 408 // Arguments pushed before the runtime call are still on the stack but
aoqi@1 409 // the exception handler will reset the stack pointer -> ignore them.
aoqi@1 410 // A potential result in registers can be ignored as well.
aoqi@1 411
aoqi@1 412 #ifdef ASSERT
aoqi@1 413 // make sure this code is only executed if there is a pending exception
aoqi@1 414 #ifndef OPT_THREAD
aoqi@1 415 __ get_thread(thread);
aoqi@1 416 #endif
aoqi@1 417 { Label L;
aoqi@1 418 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 419 __ bne(AT, R0, L);
aoqi@1 420 __ delayed()->nop();
aoqi@1 421 __ stop("StubRoutines::forward exception: no pending exception (1)");
aoqi@1 422 __ bind(L);
aoqi@1 423 }
aoqi@1 424 #endif
aoqi@1 425
aoqi@1 426 // compute exception handler into T9
aoqi@1 427 __ ld(A1, SP, 0);
aoqi@1 428 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
aoqi@1 429 __ move(T9, V0);
aoqi@1 430 __ pop(V1);
aoqi@1 431
aoqi@1 432 #ifndef OPT_THREAD
aoqi@1 433 __ get_thread(thread);
aoqi@1 434 #endif
aoqi@1 435 __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 436 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 437
aoqi@1 438 #ifdef ASSERT
aoqi@1 439 // make sure exception is set
aoqi@1 440 { Label L;
aoqi@1 441 __ bne(V0, R0, L);
aoqi@1 442 __ delayed()->nop();
aoqi@1 443 __ stop("StubRoutines::forward exception: no pending exception (2)");
aoqi@1 444 __ bind(L);
aoqi@1 445 }
aoqi@1 446 #endif
aoqi@1 447
aoqi@1 448 // continue at exception handler (return address removed)
aoqi@1 449 // V0: exception
aoqi@1 450 // T9: exception handler
aoqi@1 451 // V1: throwing pc
aoqi@1 452 __ verify_oop(V0);
aoqi@1 453 __ jr(T9);
aoqi@1 454 __ delayed()->nop();
aoqi@1 455
aoqi@1 456 return start;
aoqi@1 457 }
aoqi@1 458
aoqi@1 459 // Support for intptr_t get_previous_fp()
aoqi@1 460 //
aoqi@1 461 // This routine is used to find the previous frame pointer for the
aoqi@1 462 // caller (current_frame_guess). This is used as part of debugging
aoqi@1 463 // ps() is seemingly lost trying to find frames.
aoqi@1 464 // This code assumes that caller current_frame_guess) has a frame.
aoqi@1 465 address generate_get_previous_fp() {
aoqi@1 466 StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
aoqi@1 467 const Address old_fp (FP, 0);
aoqi@1 468 const Address older_fp (V0, 0);
aoqi@1 469 address start = __ pc();
aoqi@1 470 __ enter();
aoqi@1 471 __ lw(V0, old_fp); // callers fp
aoqi@1 472 __ lw(V0, older_fp); // the frame for ps()
aoqi@1 473 __ leave();
aoqi@1 474 __ jr(RA);
aoqi@1 475 __ delayed()->nop();
aoqi@1 476 return start;
aoqi@1 477 }
aoqi@1 478 // The following routine generates a subroutine to throw an
aoqi@1 479 // asynchronous UnknownError when an unsafe access gets a fault that
aoqi@1 480 // could not be reasonably prevented by the programmer. (Example:
aoqi@1 481 // SIGBUS/OBJERR.)
aoqi@1 482 address generate_handler_for_unsafe_access() {
aoqi@1 483 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
aoqi@1 484 address start = __ pc();
aoqi@1 485 __ pushad(); // push registers
aoqi@1 486 // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@1 487 __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
aoqi@1 488 __ delayed()->nop();
aoqi@1 489 __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@1 490 __ popad();
aoqi@1 491 __ jr(RA);
aoqi@1 492 __ delayed()->nop();
aoqi@1 493 return start;
aoqi@1 494 }
aoqi@1 495
aoqi@1 496 // Non-destructive plausibility checks for oops
aoqi@1 497 //
aoqi@1 498 // Arguments:
aoqi@1 499 // all args on stack!
aoqi@1 500 //
aoqi@1 501 // Stack after saving c_rarg3:
aoqi@1 502 // [tos + 0]: saved c_rarg3
aoqi@1 503 // [tos + 1]: saved c_rarg2
aoqi@1 504 // [tos + 2]: saved r12 (several TemplateTable methods use it)
aoqi@1 505 // [tos + 3]: saved flags
aoqi@1 506 // [tos + 4]: return address
aoqi@1 507 // * [tos + 5]: error message (char*)
aoqi@1 508 // * [tos + 6]: object to verify (oop)
aoqi@1 509 // * [tos + 7]: saved rax - saved by caller and bashed
aoqi@1 510 // * = popped on exit
aoqi@1 511 address generate_verify_oop() {
aoqi@1 512 StubCodeMark mark(this, "StubRoutines", "verify_oop");
aoqi@1 513 address start = __ pc();
aoqi@1 514 __ reinit_heapbase();
aoqi@1 515 __ verify_oop_subroutine();
aoqi@1 516 address end = __ pc();
aoqi@1 517 return start;
aoqi@1 518 }
aoqi@1 519
aoqi@1 520 //
aoqi@1 521 // Generate overlap test for array copy stubs
aoqi@1 522 //
aoqi@1 523 // Input:
aoqi@1 524 // A0 - array1
aoqi@1 525 // A1 - array2
aoqi@1 526 // A2 - element count
aoqi@1 527 //
aoqi@1 528 // Note: this code can only use %eax, %ecx, and %edx
aoqi@1 529 //
aoqi@1 530
aoqi@1 531 // use T9 as temp
aoqi@1 532 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
aoqi@1 533 int elem_size = 1 << log2_elem_size;
aoqi@1 534 Address::ScaleFactor sf = Address::times_1;
aoqi@1 535
aoqi@1 536 switch (log2_elem_size) {
aoqi@1 537 case 0: sf = Address::times_1; break;
aoqi@1 538 case 1: sf = Address::times_2; break;
aoqi@1 539 case 2: sf = Address::times_4; break;
aoqi@1 540 case 3: sf = Address::times_8; break;
aoqi@1 541 }
aoqi@1 542
aoqi@1 543 __ dsll(AT, A2, sf);
aoqi@1 544 __ dadd(AT, AT, A0);
aoqi@1 545 __ lea(T9, Address(AT, -elem_size));
aoqi@1 546 __ dsub(AT, A1, A0);
aoqi@1 547 __ blez(AT, no_overlap_target);
aoqi@1 548 __ delayed()->nop();
aoqi@1 549 __ dsub(AT, A1, T9);
aoqi@1 550 __ bgtz(AT, no_overlap_target);
aoqi@1 551 __ delayed()->nop();
aoqi@1 552
aoqi@8 553 // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
aoqi@8 554 Label L;
aoqi@8 555 __ bgez(A0, L);
aoqi@8 556 __ delayed()->nop();
aoqi@8 557 __ bgtz(A1, no_overlap_target);
aoqi@8 558 __ delayed()->nop();
aoqi@8 559 __ bind(L);
aoqi@8 560
aoqi@1 561 }
aoqi@1 562
aoqi@1 563 //
aoqi@1 564 // Generate store check for array
aoqi@1 565 //
aoqi@1 566 // Input:
aoqi@1 567 // T0 - starting address(edi)
aoqi@1 568 // T1 - element count (ecx)
aoqi@1 569 //
aoqi@1 570 // The 2 input registers are overwritten
aoqi@1 571 //
aoqi@1 572
aoqi@1 573 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
aoqi@1 574
aoqi@1 575 void array_store_check() {
aoqi@1 576 BarrierSet* bs = Universe::heap()->barrier_set();
aoqi@1 577 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
aoqi@1 578 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
aoqi@1 579 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
aoqi@1 580 Label l_0;
aoqi@1 581
aoqi@1 582 __ dsll(AT, T1, TIMES_OOP);
aoqi@1 583 __ dadd(AT, T0, AT);
aoqi@1 584 __ daddiu(T1, AT, - BytesPerHeapOop);
aoqi@1 585
aoqi@1 586 __ shr(T0, CardTableModRefBS::card_shift);
aoqi@1 587 __ shr(T1, CardTableModRefBS::card_shift);
aoqi@1 588
aoqi@1 589 __ dsub(T1, T1, T0); // end --> cards count
aoqi@1 590 __ bind(l_0);
aoqi@1 591
fujie@368 592 __ set64(AT, (long)ct->byte_map_base);
aoqi@1 593 __ dadd(AT, AT, T0);
aoqi@1 594 __ dadd(AT, AT, T1);
fujie@190 595 __ sb(R0, AT, 0);
fujie@337 596 __ sync();
fujie@190 597 __ bgez(T1, l_0);
fujie@190 598 __ delayed()->daddi(T1, T1, - 1);
aoqi@1 599 }
aoqi@1 600
aoqi@1 601 // Arguments:
aoqi@1 602 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 603 // ignored
aoqi@1 604 // name - stub name string
aoqi@1 605 //
aoqi@1 606 // Inputs:
aoqi@1 607 // c_rarg0 - source array address
aoqi@1 608 // c_rarg1 - destination array address
aoqi@1 609 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 610 //
aoqi@1 611 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
aoqi@1 612 // we let the hardware handle it. The one to eight bytes within words,
aoqi@1 613 // dwords or qwords that span cache line boundaries will still be loaded
aoqi@1 614 // and stored atomically.
aoqi@1 615 //
aoqi@1 616 // Side Effects:
aoqi@1 617 // disjoint_byte_copy_entry is set to the no-overlap entry point
aoqi@1 618 // used by generate_conjoint_byte_copy().
aoqi@1 619 //
jiangshaofeng@117 620 address generate_disjoint_byte_copy(bool aligned, const char * name) {
jiangshaofeng@117 621 StubCodeMark mark(this, "StubRoutines", name);
jiangshaofeng@117 622 __ align(CodeEntryAlignment);
aoqi@1 623
jiangshaofeng@117 624
jiangshaofeng@117 625 Register tmp1 = T0;
jiangshaofeng@117 626 Register tmp2 = T1;
jiangshaofeng@117 627 Register tmp3 = T3;
jiangshaofeng@117 628
jiangshaofeng@117 629 address start = __ pc();
jiangshaofeng@117 630
jiangshaofeng@117 631 __ push(tmp1);
jiangshaofeng@117 632 __ push(tmp2);
jiangshaofeng@117 633 __ push(tmp3);
jiangshaofeng@117 634 __ move(tmp1, A0);
jiangshaofeng@117 635 __ move(tmp2, A1);
jiangshaofeng@117 636 __ move(tmp3, A2);
jiangshaofeng@117 637
jiangshaofeng@117 638
jiangshaofeng@117 639 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
jiangshaofeng@117 640 Label l_debug;
jiangshaofeng@117 641
jiangshaofeng@117 642 __ daddi(AT, tmp3, -9); //why the number is 9 ?
jiangshaofeng@117 643 __ blez(AT, l_9);
jiangshaofeng@117 644 __ delayed()->nop();
jiangshaofeng@117 645
jiangshaofeng@117 646 if (!aligned) {
jiangshaofeng@117 647 __ xorr(AT, tmp1, tmp2);
jiangshaofeng@117 648 __ andi(AT, AT, 1);
jiangshaofeng@117 649 __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
jiangshaofeng@117 650 __ delayed()->nop();
jiangshaofeng@117 651
jiangshaofeng@117 652 __ andi(AT, tmp1, 1);
jiangshaofeng@117 653 __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
jiangshaofeng@117 654 __ delayed()->nop();
jiangshaofeng@117 655
jiangshaofeng@117 656 __ lb(AT, tmp1, 0);
jiangshaofeng@117 657 __ daddi(tmp1, tmp1, 1);
jiangshaofeng@117 658 __ sb(AT, tmp2, 0);
jiangshaofeng@117 659 __ daddi(tmp2, tmp2, 1);
jiangshaofeng@117 660 __ daddi(tmp3, tmp3, -1);
jiangshaofeng@117 661 __ bind(l_10);
jiangshaofeng@117 662
jiangshaofeng@117 663 __ xorr(AT, tmp1, tmp2);
jiangshaofeng@117 664 __ andi(AT, AT, 3);
jiangshaofeng@117 665 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
jiangshaofeng@117 666 __ delayed()->nop();
jiangshaofeng@117 667
jiangshaofeng@117 668 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
jiangshaofeng@117 669
jiangshaofeng@117 670 // Copy 2 elements if necessary to align to 4 bytes.
jiangshaofeng@117 671 __ andi(AT, tmp1, 3);
jiangshaofeng@117 672 __ beq(AT, R0, l_2);
jiangshaofeng@117 673 __ delayed()->nop();
jiangshaofeng@117 674
jiangshaofeng@117 675 __ lhu(AT, tmp1, 0);
jiangshaofeng@117 676 __ daddi(tmp1, tmp1, 2);
jiangshaofeng@117 677 __ sh(AT, tmp2, 0);
jiangshaofeng@117 678 __ daddi(tmp2, tmp2, 2);
jiangshaofeng@117 679 __ daddi(tmp3, tmp3, -2);
jiangshaofeng@117 680 __ bind(l_2);
jiangshaofeng@117 681
jiangshaofeng@117 682 // At this point the positions of both, from and to, are at least 4 byte aligned.
jiangshaofeng@117 683
jiangshaofeng@117 684 // Copy 4 elements at a time.
jiangshaofeng@117 685 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
jiangshaofeng@117 686 __ xorr(AT, tmp1, tmp2);
jiangshaofeng@117 687 __ andi(AT, AT, 7);
jiangshaofeng@117 688 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
jiangshaofeng@117 689 __ delayed()->nop();
jiangshaofeng@117 690
jiangshaofeng@117 691 // Copy a 4 elements if necessary to align to 8 bytes.
jiangshaofeng@117 692 __ andi(AT, tmp1, 7);
jiangshaofeng@117 693 __ beq(AT, R0, l_7);
jiangshaofeng@117 694 __ delayed()->nop();
jiangshaofeng@117 695
jiangshaofeng@117 696 __ lw(AT, tmp1, 0);
jiangshaofeng@117 697 __ daddi(tmp3, tmp3, -4);
jiangshaofeng@117 698 __ sw(AT, tmp2, 0);
jiangshaofeng@117 699 { // FasterArrayCopy
jiangshaofeng@117 700 __ daddi(tmp1, tmp1, 4);
jiangshaofeng@117 701 __ daddi(tmp2, tmp2, 4);
jiangshaofeng@117 702 }
jiangshaofeng@117 703 }
jiangshaofeng@117 704
jiangshaofeng@117 705 __ bind(l_7);
jiangshaofeng@117 706
jiangshaofeng@117 707 // Copy 4 elements at a time; either the loads or the stores can
jiangshaofeng@117 708 // be unaligned if aligned == false.
jiangshaofeng@117 709
jiangshaofeng@117 710 { // FasterArrayCopy
jiangshaofeng@117 711 __ daddi(AT, tmp3, -7);
jiangshaofeng@117 712 __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
jiangshaofeng@117 713 __ delayed()->nop();
jiangshaofeng@117 714
jiangshaofeng@117 715 __ bind(l_8);
jiangshaofeng@117 716 // For Loongson, there is 128-bit memory access. TODO
jiangshaofeng@117 717 __ ld(AT, tmp1, 0);
jiangshaofeng@117 718 __ sd(AT, tmp2, 0);
jiangshaofeng@117 719 __ daddi(tmp1, tmp1, 8);
jiangshaofeng@117 720 __ daddi(tmp2, tmp2, 8);
jiangshaofeng@117 721 __ daddi(tmp3, tmp3, -8);
jiangshaofeng@117 722 __ daddi(AT, tmp3, -8);
jiangshaofeng@117 723 __ bgez(AT, l_8);
jiangshaofeng@117 724 __ delayed()->nop();
jiangshaofeng@117 725 }
jiangshaofeng@117 726 __ bind(l_6);
jiangshaofeng@117 727
jiangshaofeng@117 728 // copy 4 bytes at a time
jiangshaofeng@117 729 { // FasterArrayCopy
jiangshaofeng@117 730 __ daddi(AT, tmp3, -3);
jiangshaofeng@117 731 __ blez(AT, l_1);
jiangshaofeng@117 732 __ delayed()->nop();
jiangshaofeng@117 733
jiangshaofeng@117 734 __ bind(l_3);
jiangshaofeng@117 735 __ lw(AT, tmp1, 0);
jiangshaofeng@117 736 __ sw(AT, tmp2, 0);
jiangshaofeng@117 737 __ daddi(tmp1, tmp1, 4);
jiangshaofeng@117 738 __ daddi(tmp2, tmp2, 4);
jiangshaofeng@117 739 __ daddi(tmp3, tmp3, -4);
jiangshaofeng@117 740 __ daddi(AT, tmp3, -4);
jiangshaofeng@117 741 __ bgez(AT, l_3);
jiangshaofeng@117 742 __ delayed()->nop();
jiangshaofeng@117 743
jiangshaofeng@117 744 }
jiangshaofeng@117 745
jiangshaofeng@117 746 // do 2 bytes copy
jiangshaofeng@117 747 __ bind(l_1);
jiangshaofeng@117 748 {
jiangshaofeng@117 749 __ daddi(AT, tmp3, -1);
jiangshaofeng@117 750 __ blez(AT, l_9);
jiangshaofeng@117 751 __ delayed()->nop();
jiangshaofeng@117 752
jiangshaofeng@117 753 __ bind(l_5);
jiangshaofeng@117 754 __ lhu(AT, tmp1, 0);
jiangshaofeng@117 755 __ daddi(tmp3, tmp3, -2);
jiangshaofeng@117 756 __ sh(AT, tmp2, 0);
jiangshaofeng@117 757 __ daddi(tmp1, tmp1, 2);
jiangshaofeng@117 758 __ daddi(tmp2, tmp2, 2);
jiangshaofeng@117 759 __ daddi(AT, tmp3, -2);
jiangshaofeng@117 760 __ bgez(AT, l_5);
jiangshaofeng@117 761 __ delayed()->nop();
jiangshaofeng@117 762 }
jiangshaofeng@117 763
jiangshaofeng@117 764 //do 1 element copy--byte
jiangshaofeng@117 765 __ bind(l_9);
jiangshaofeng@117 766 __ beq(R0, tmp3, l_4);
jiangshaofeng@117 767 __ delayed()->nop();
jiangshaofeng@117 768
jiangshaofeng@117 769 {
jiangshaofeng@117 770 __ bind(l_11);
jiangshaofeng@117 771 __ lb(AT, tmp1, 0);
jiangshaofeng@117 772 __ daddi(tmp3, tmp3, -1);
jiangshaofeng@117 773 __ sb(AT, tmp2, 0);
jiangshaofeng@117 774 __ daddi(tmp1, tmp1, 1);
jiangshaofeng@117 775 __ daddi(tmp2, tmp2, 1);
jiangshaofeng@117 776 __ daddi(AT, tmp3, -1);
jiangshaofeng@117 777 __ bgez(AT, l_11);
jiangshaofeng@117 778 __ delayed()->nop();
jiangshaofeng@117 779 }
jiangshaofeng@117 780
jiangshaofeng@117 781 __ bind(l_4);
jiangshaofeng@117 782 __ pop(tmp3);
jiangshaofeng@117 783 __ pop(tmp2);
jiangshaofeng@117 784 __ pop(tmp1);
jiangshaofeng@117 785
jiangshaofeng@117 786 __ jr(RA);
jiangshaofeng@117 787 __ delayed()->nop();
jiangshaofeng@117 788
jiangshaofeng@117 789 return start;
aoqi@1 790 }
aoqi@1 791
aoqi@1 792 // Arguments:
aoqi@1 793 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 794 // ignored
aoqi@1 795 // name - stub name string
aoqi@1 796 //
aoqi@1 797 // Inputs:
aoqi@8 798 // A0 - source array address
aoqi@8 799 // A1 - destination array address
aoqi@8 800 // A2 - element count, treated as ssize_t, can be zero
aoqi@1 801 //
aoqi@1 802 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
aoqi@1 803 // we let the hardware handle it. The one to eight bytes within words,
aoqi@1 804 // dwords or qwords that span cache line boundaries will still be loaded
aoqi@1 805 // and stored atomically.
aoqi@1 806 //
aoqi@1 807 address generate_conjoint_byte_copy(bool aligned, const char *name) {
aoqi@8 808 __ align(CodeEntryAlignment);
aoqi@8 809 StubCodeMark mark(this, "StubRoutines", name);
aoqi@8 810 address start = __ pc();
aoqi@1 811
aoqi@8 812 Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
aoqi@8 813 Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
aoqi@1 814
aoqi@8 815 address nooverlap_target = aligned ?
aoqi@8 816 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
aoqi@8 817 StubRoutines::jbyte_disjoint_arraycopy();
aoqi@1 818
aoqi@8 819 array_overlap_test(nooverlap_target, 0);
Jin@7 820
aoqi@8 821 const Register from = A0; // source array address
aoqi@8 822 const Register to = A1; // destination array address
aoqi@8 823 const Register count = A2; // elements count
aoqi@8 824 const Register end_from = T3; // source array end address
aoqi@8 825 const Register end_to = T0; // destination array end address
aoqi@8 826 const Register end_count = T1; // destination array end address
Jin@7 827
aoqi@8 828 __ push(end_from);
aoqi@8 829 __ push(end_to);
aoqi@8 830 __ push(end_count);
aoqi@8 831 __ push(T8);
Jin@7 832
aoqi@8 833 // copy from high to low
aoqi@8 834 __ move(end_count, count);
aoqi@8 835 __ dadd(end_from, from, end_count);
aoqi@8 836 __ dadd(end_to, to, end_count);
Jin@7 837
aoqi@8 838 // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
aoqi@8 839 __ andi(AT, end_from, 3);
aoqi@8 840 __ andi(T8, end_to, 3);
aoqi@8 841 __ bne(AT, T8, l_copy_byte);
aoqi@8 842 __ delayed()->nop();
Jin@7 843
aoqi@8 844 // First deal with the unaligned data at the top.
aoqi@8 845 __ bind(l_unaligned);
aoqi@8 846 __ beq(end_count, R0, l_exit);
aoqi@8 847 __ delayed()->nop();
aoqi@8 848
aoqi@8 849 __ andi(AT, end_from, 3);
aoqi@8 850 __ bne(AT, R0, l_from_unaligned);
aoqi@8 851 __ delayed()->nop();
aoqi@8 852
aoqi@8 853 __ andi(AT, end_to, 3);
aoqi@8 854 __ beq(AT, R0, l_4_bytes_aligned);
aoqi@8 855 __ delayed()->nop();
aoqi@8 856
aoqi@8 857 __ bind(l_from_unaligned);
aoqi@8 858 __ lb(AT, end_from, -1);
aoqi@8 859 __ sb(AT, end_to, -1);
aoqi@8 860 __ daddi(end_from, end_from, -1);
aoqi@8 861 __ daddi(end_to, end_to, -1);
aoqi@8 862 __ daddi(end_count, end_count, -1);
aoqi@8 863 __ b(l_unaligned);
aoqi@8 864 __ delayed()->nop();
aoqi@8 865
aoqi@8 866 // now end_to, end_from point to 4-byte aligned high-ends
aoqi@8 867 // end_count contains byte count that is not copied.
aoqi@8 868 // copy 4 bytes at a time
aoqi@8 869 __ bind(l_4_bytes_aligned);
aoqi@8 870
aoqi@8 871 __ move(T8, end_count);
aoqi@8 872 __ daddi(AT, end_count, -3);
aoqi@8 873 __ blez(AT, l_copy_suffix);
aoqi@8 874 __ delayed()->nop();
aoqi@8 875
aoqi@8 876 //__ andi(T8, T8, 3);
aoqi@8 877 __ lea(end_from, Address(end_from, -4));
aoqi@8 878 __ lea(end_to, Address(end_to, -4));
aoqi@8 879
aoqi@8 880 __ dsrl(end_count, end_count, 2);
aoqi@8 881 __ align(16);
aoqi@8 882 __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
aoqi@8 883 __ lw(AT, end_from, 0);
aoqi@8 884 __ sw(AT, end_to, 0);
aoqi@8 885 __ addi(end_from, end_from, -4);
aoqi@8 886 __ addi(end_to, end_to, -4);
aoqi@8 887 __ addi(end_count, end_count, -1);
aoqi@8 888 __ bne(end_count, R0, l_copy_4_bytes_loop);
aoqi@8 889 __ delayed()->nop();
aoqi@8 890
aoqi@8 891 __ b(l_copy_suffix);
aoqi@8 892 __ delayed()->nop();
aoqi@8 893 // copy dwords aligned or not with repeat move
aoqi@8 894 // l_copy_suffix
aoqi@8 895 // copy suffix (0-3 bytes)
aoqi@8 896 __ bind(l_copy_suffix);
aoqi@8 897 __ andi(T8, T8, 3);
aoqi@8 898 __ beq(T8, R0, l_exit);
aoqi@8 899 __ delayed()->nop();
aoqi@8 900 __ addi(end_from, end_from, 3);
aoqi@8 901 __ addi(end_to, end_to, 3);
aoqi@8 902 __ bind(l_copy_suffix_loop);
aoqi@8 903 __ lb(AT, end_from, 0);
aoqi@8 904 __ sb(AT, end_to, 0);
aoqi@8 905 __ addi(end_from, end_from, -1);
aoqi@8 906 __ addi(end_to, end_to, -1);
aoqi@8 907 __ addi(T8, T8, -1);
aoqi@8 908 __ bne(T8, R0, l_copy_suffix_loop);
aoqi@8 909 __ delayed()->nop();
aoqi@8 910
aoqi@8 911 __ bind(l_copy_byte);
aoqi@8 912 __ beq(end_count, R0, l_exit);
aoqi@8 913 __ delayed()->nop();
aoqi@8 914 __ lb(AT, end_from, -1);
aoqi@8 915 __ sb(AT, end_to, -1);
aoqi@8 916 __ daddi(end_from, end_from, -1);
aoqi@8 917 __ daddi(end_to, end_to, -1);
aoqi@8 918 __ daddi(end_count, end_count, -1);
aoqi@8 919 __ b(l_copy_byte);
aoqi@8 920 __ delayed()->nop();
aoqi@8 921
aoqi@8 922 __ bind(l_exit);
aoqi@8 923 __ pop(T8);
aoqi@8 924 __ pop(end_count);
aoqi@8 925 __ pop(end_to);
aoqi@8 926 __ pop(end_from);
aoqi@8 927 __ jr(RA);
aoqi@8 928 __ delayed()->nop();
aoqi@8 929 return start;
aoqi@1 930 }
aoqi@1 931
aoqi@13 932 // Generate stub for disjoint short copy. If "aligned" is true, the
aoqi@13 933 // "from" and "to" addresses are assumed to be heapword aligned.
aoqi@1 934 //
aoqi@13 935 // Arguments for generated stub:
aoqi@13 936 // from: A0
aoqi@13 937 // to: A1
aoqi@13 938 // elm.count: A2 treated as signed
aoqi@13 939 // one element: 2 bytes
aoqi@1 940 //
aoqi@13 941 // Strategy for aligned==true:
aoqi@1 942 //
aoqi@13 943 // If length <= 9:
aoqi@13 944 // 1. copy 1 elements at a time (l_5)
aoqi@1 945 //
aoqi@13 946 // If length > 9:
aoqi@13 947 // 1. copy 4 elements at a time until less than 4 elements are left (l_7)
aoqi@13 948 // 2. copy 2 elements at a time until less than 2 elements are left (l_6)
aoqi@13 949 // 3. copy last element if one was left in step 2. (l_1)
aoqi@13 950 //
aoqi@13 951 //
aoqi@13 952 // Strategy for aligned==false:
aoqi@13 953 //
aoqi@13 954 // If length <= 9: same as aligned==true case
aoqi@13 955 //
aoqi@13 956 // If length > 9:
aoqi@13 957 // 1. continue with step 7. if the alignment of from and to mod 4
aoqi@13 958 // is different.
aoqi@13 959 // 2. align from and to to 4 bytes by copying 1 element if necessary
aoqi@13 960 // 3. at l_2 from and to are 4 byte aligned; continue with
aoqi@13 961 // 6. if they cannot be aligned to 8 bytes because they have
aoqi@13 962 // got different alignment mod 8.
aoqi@13 963 // 4. at this point we know that both, from and to, have the same
aoqi@13 964 // alignment mod 8, now copy one element if necessary to get
aoqi@13 965 // 8 byte alignment of from and to.
aoqi@13 966 // 5. copy 4 elements at a time until less than 4 elements are
aoqi@13 967 // left; depending on step 3. all load/stores are aligned.
aoqi@13 968 // 6. copy 2 elements at a time until less than 2 elements are
aoqi@13 969 // left. (l_6)
aoqi@13 970 // 7. copy 1 element at a time. (l_5)
aoqi@13 971 // 8. copy last element if one was left in step 6. (l_1)
chenhaoxuan@126 972
aoqi@13 973 address generate_disjoint_short_copy(bool aligned, const char * name) {
aoqi@13 974 StubCodeMark mark(this, "StubRoutines", name);
aoqi@13 975 __ align(CodeEntryAlignment);
aoqi@1 976
aoqi@13 977 Register tmp1 = T0;
aoqi@13 978 Register tmp2 = T1;
aoqi@13 979 Register tmp3 = T3;
lifangyuan@125 980 Register tmp4 = T8;
chenhaoxuan@126 981 Register tmp5 = T9;
chenhaoxuan@126 982 Register tmp6 = T2;
aoqi@1 983
aoqi@13 984 address start = __ pc();
aoqi@13 985
aoqi@13 986 __ push(tmp1);
aoqi@13 987 __ push(tmp2);
aoqi@13 988 __ push(tmp3);
aoqi@13 989 __ move(tmp1, A0);
aoqi@13 990 __ move(tmp2, A1);
aoqi@13 991 __ move(tmp3, A2);
aoqi@13 992
chenhaoxuan@126 993 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
aoqi@13 994 Label l_debug;
aoqi@13 995 // don't try anything fancy if arrays don't have many elements
chenhaoxuan@126 996 __ daddi(AT, tmp3, -23);
chenhaoxuan@126 997 __ blez(AT, l_14);
aoqi@13 998 __ delayed()->nop();
chenhaoxuan@126 999 // move push here
chenhaoxuan@126 1000 __ push(tmp4);
chenhaoxuan@126 1001 __ push(tmp5);
chenhaoxuan@126 1002 __ push(tmp6);
aoqi@13 1003
aoqi@13 1004 if (!aligned) {
aoqi@13 1005 __ xorr(AT, A0, A1);
aoqi@13 1006 __ andi(AT, AT, 1);
aoqi@13 1007 __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
aoqi@13 1008 __ delayed()->nop();
aoqi@13 1009
aoqi@13 1010 __ xorr(AT, A0, A1);
aoqi@13 1011 __ andi(AT, AT, 3);
aoqi@13 1012 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
aoqi@13 1013 __ delayed()->nop();
aoqi@13 1014
aoqi@13 1015 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
aoqi@13 1016
aoqi@13 1017 // Copy 1 element if necessary to align to 4 bytes.
aoqi@13 1018 __ andi(AT, A0, 3);
aoqi@13 1019 __ beq(AT, R0, l_2);
aoqi@13 1020 __ delayed()->nop();
aoqi@13 1021
aoqi@13 1022 __ lhu(AT, tmp1, 0);
aoqi@13 1023 __ daddi(tmp1, tmp1, 2);
aoqi@13 1024 __ sh(AT, tmp2, 0);
aoqi@13 1025 __ daddi(tmp2, tmp2, 2);
aoqi@13 1026 __ daddi(tmp3, tmp3, -1);
aoqi@13 1027 __ bind(l_2);
aoqi@13 1028
aoqi@13 1029 // At this point the positions of both, from and to, are at least 4 byte aligned.
aoqi@13 1030
aoqi@13 1031 // Copy 4 elements at a time.
aoqi@13 1032 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
aoqi@13 1033 __ xorr(AT, tmp1, tmp2);
aoqi@13 1034 __ andi(AT, AT, 7);
aoqi@13 1035 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
aoqi@13 1036 __ delayed()->nop();
aoqi@13 1037
aoqi@13 1038 // Copy a 2-element word if necessary to align to 8 bytes.
aoqi@13 1039 __ andi(AT, tmp1, 7);
aoqi@13 1040 __ beq(AT, R0, l_7);
aoqi@13 1041 __ delayed()->nop();
aoqi@13 1042
aoqi@13 1043 __ lw(AT, tmp1, 0);
aoqi@13 1044 __ daddi(tmp3, tmp3, -2);
aoqi@13 1045 __ sw(AT, tmp2, 0);
chenhaoxuan@126 1046 __ daddi(tmp1, tmp1, 4);
chenhaoxuan@126 1047 __ daddi(tmp2, tmp2, 4);
chenhaoxuan@126 1048 }// end of if (!aligned)
lifangyuan@125 1049
chenhaoxuan@126 1050 __ bind(l_7);
chenhaoxuan@126 1051 // At this time the position of both, from and to, are at least 8 byte aligned.
chenhaoxuan@126 1052 // Copy 8 elemnets at a time.
chenhaoxuan@126 1053 // Align to 16 bytes, but only if both from and to have same alignment mod 8.
chenhaoxuan@126 1054 __ xorr(AT, tmp1, tmp2);
chenhaoxuan@126 1055 __ andi(AT, AT, 15);
chenhaoxuan@126 1056 __ bne(AT, R0, l_9);
chenhaoxuan@126 1057 __ delayed()->nop();
lifangyuan@125 1058
chenhaoxuan@126 1059 // Copy 4-element word if necessary to align to 16 bytes,
chenhaoxuan@126 1060 __ andi(AT, tmp1, 15);
chenhaoxuan@126 1061 __ beq(AT, R0, l_10);
chenhaoxuan@126 1062 __ delayed()->nop();
lifangyuan@125 1063
chenhaoxuan@126 1064 __ ld(AT, tmp1, 0);
chenhaoxuan@126 1065 __ daddi(tmp3, tmp3, -4);
chenhaoxuan@126 1066 __ sd(AT, tmp2, 0);
chenhaoxuan@126 1067 __ daddi(tmp1, tmp1, 8);
chenhaoxuan@126 1068 __ daddi(tmp2, tmp2, 8);
lifangyuan@125 1069
chenhaoxuan@126 1070 __ bind(l_10);
lifangyuan@125 1071
lifangyuan@125 1072 // Copy 8 elements at a time; either the loads or the stores can
lifangyuan@125 1073 // be unalligned if aligned == false
lifangyuan@125 1074
lifangyuan@125 1075 { // FasterArrayCopy
lifangyuan@125 1076 __ bind(l_11);
lifangyuan@125 1077 // For loongson the 128-bit memory access instruction is gslq/gssq
aoqi@127 1078 if (UseLoongsonISA) {
aoqi@127 1079 __ gslq(AT, tmp4, tmp1, 0);
aoqi@127 1080 __ gslq(tmp5, tmp6, tmp1, 16);
aoqi@127 1081 __ daddi(tmp1, tmp1, 32);
aoqi@127 1082 __ daddi(tmp2, tmp2, 32);
aoqi@127 1083 __ gssq(AT, tmp4, tmp2, -32);
aoqi@127 1084 __ gssq(tmp5, tmp6, tmp2, -16);
aoqi@127 1085 } else {
aoqi@127 1086 __ ld(AT, tmp1, 0);
aoqi@127 1087 __ ld(tmp4, tmp1, 8);
aoqi@127 1088 __ ld(tmp5, tmp1, 16);
aoqi@127 1089 __ ld(tmp6, tmp1, 24);
aoqi@127 1090 __ daddi(tmp1, tmp1, 32);
aoqi@127 1091 __ sd(AT, tmp2, 0);
aoqi@127 1092 __ sd(tmp4, tmp2, 8);
aoqi@127 1093 __ sd(tmp5, tmp2, 16);
aoqi@127 1094 __ sd(tmp6, tmp2, 24);
aoqi@127 1095 __ daddi(tmp2, tmp2, 32);
aoqi@127 1096 }
chenhaoxuan@126 1097 __ daddi(tmp3, tmp3, -16);
chenhaoxuan@126 1098 __ daddi(AT, tmp3, -16);
lifangyuan@125 1099 __ bgez(AT, l_11);
lifangyuan@125 1100 __ delayed()->nop();
aoqi@13 1101 }
lifangyuan@125 1102 __ bind(l_9);
chenhaoxuan@126 1103
aoqi@13 1104 // Copy 4 elements at a time; either the loads or the stores can
aoqi@13 1105 // be unaligned if aligned == false.
aoqi@13 1106 { // FasterArrayCopy
chenhaoxuan@126 1107 __ daddi(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
chenhaoxuan@126 1108 __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
aoqi@13 1109 __ delayed()->nop();
aoqi@13 1110
aoqi@13 1111 __ bind(l_8);
aoqi@13 1112 __ ld(AT, tmp1, 0);
chenhaoxuan@126 1113 __ ld(tmp4, tmp1, 8);
chenhaoxuan@126 1114 __ ld(tmp5, tmp1, 16);
chenhaoxuan@126 1115 __ ld(tmp6, tmp1, 24);
aoqi@13 1116 __ sd(AT, tmp2, 0);
chenhaoxuan@126 1117 __ sd(tmp4, tmp2, 8);
chenhaoxuan@126 1118 __ sd(tmp5, tmp2,16);
chenhaoxuan@126 1119 __ daddi(tmp1, tmp1, 32);
chenhaoxuan@126 1120 __ daddi(tmp2, tmp2, 32);
chenhaoxuan@126 1121 __ daddi(tmp3, tmp3, -16);
chenhaoxuan@126 1122 __ daddi(AT, tmp3, -16);
aoqi@13 1123 __ bgez(AT, l_8);
chenhaoxuan@126 1124 __ sd(tmp6, tmp2, -8);
aoqi@13 1125 }
aoqi@13 1126 __ bind(l_6);
aoqi@13 1127
aoqi@13 1128 // copy 2 element at a time
aoqi@13 1129 { // FasterArrayCopy
chenhaoxuan@126 1130 __ daddi(AT, tmp3, -7);
chenhaoxuan@126 1131 __ blez(AT, l_4);
aoqi@13 1132 __ delayed()->nop();
aoqi@13 1133
aoqi@13 1134 __ bind(l_3);
aoqi@13 1135 __ lw(AT, tmp1, 0);
chenhaoxuan@126 1136 __ lw(tmp4, tmp1, 4);
chenhaoxuan@126 1137 __ lw(tmp5, tmp1, 8);
chenhaoxuan@126 1138 __ lw(tmp6, tmp1, 12);
aoqi@13 1139 __ sw(AT, tmp2, 0);
chenhaoxuan@126 1140 __ sw(tmp4, tmp2, 4);
chenhaoxuan@126 1141 __ sw(tmp5, tmp2, 8);
chenhaoxuan@126 1142 __ daddi(tmp1, tmp1, 16);
chenhaoxuan@126 1143 __ daddi(tmp2, tmp2, 16);
chenhaoxuan@126 1144 __ daddi(tmp3, tmp3, -8);
chenhaoxuan@126 1145 __ daddi(AT, tmp3, -8);
aoqi@13 1146 __ bgez(AT, l_3);
chenhaoxuan@126 1147 __ sw(tmp6, tmp2, -4);
aoqi@13 1148 }
aoqi@13 1149
chenhaoxuan@126 1150 __ bind(l_1);
aoqi@13 1151 // do single element copy (8 bit), can this happen?
aoqi@13 1152 { // FasterArrayCopy
chenhaoxuan@126 1153 __ daddi(AT, tmp3, -3);
chenhaoxuan@126 1154 __ blez(AT, l_4);
chenhaoxuan@126 1155 __ delayed()->nop();
aoqi@13 1156
aoqi@13 1157 __ bind(l_5);
aoqi@13 1158 __ lhu(AT, tmp1, 0);
chenhaoxuan@126 1159 __ lhu(tmp4, tmp1, 2);
chenhaoxuan@126 1160 __ lhu(tmp5, tmp1, 4);
chenhaoxuan@126 1161 __ lhu(tmp6, tmp1, 6);
chenhaoxuan@126 1162 __ sh(AT, tmp2, 0);
chenhaoxuan@126 1163 __ sh(tmp4, tmp2, 2);
chenhaoxuan@126 1164 __ sh(tmp5, tmp2, 4);
chenhaoxuan@126 1165 __ daddi(tmp1, tmp1, 8);
chenhaoxuan@126 1166 __ daddi(tmp2, tmp2, 8);
chenhaoxuan@126 1167 __ daddi(tmp3, tmp3, -4);
chenhaoxuan@126 1168 __ daddi(AT, tmp3, -4);
chenhaoxuan@126 1169 __ bgez(AT, l_5);
chenhaoxuan@126 1170 __ sh(tmp6, tmp2, -2);
chenhaoxuan@126 1171 }
chenhaoxuan@126 1172 // single element
chenhaoxuan@126 1173 __ bind(l_4);
chenhaoxuan@126 1174
chenhaoxuan@126 1175 __ pop(tmp6);
chenhaoxuan@126 1176 __ pop(tmp5);
chenhaoxuan@126 1177 __ pop(tmp4);
chenhaoxuan@126 1178
chenhaoxuan@126 1179 __ bind(l_14);
chenhaoxuan@126 1180 { // FasterArrayCopy
chenhaoxuan@126 1181 __ beq(R0, tmp3, l_13);
chenhaoxuan@126 1182 __ delayed()->nop();
chenhaoxuan@126 1183
chenhaoxuan@126 1184 __ bind(l_12);
chenhaoxuan@126 1185 __ lhu(AT, tmp1, 0);
aoqi@13 1186 __ sh(AT, tmp2, 0);
aoqi@13 1187 __ daddi(tmp1, tmp1, 2);
aoqi@13 1188 __ daddi(tmp2, tmp2, 2);
chenhaoxuan@126 1189 __ daddi(tmp3, tmp3, -1);
aoqi@13 1190 __ daddi(AT, tmp3, -1);
chenhaoxuan@126 1191 __ bgez(AT, l_12);
aoqi@13 1192 __ delayed()->nop();
aoqi@13 1193 }
chenhaoxuan@126 1194
chenhaoxuan@126 1195 __ bind(l_13);
aoqi@13 1196 __ pop(tmp3);
aoqi@13 1197 __ pop(tmp2);
aoqi@13 1198 __ pop(tmp1);
aoqi@13 1199
aoqi@13 1200 __ jr(RA);
aoqi@13 1201 __ delayed()->nop();
aoqi@13 1202
aoqi@13 1203 __ bind(l_debug);
aoqi@13 1204 __ stop("generate_disjoint_short_copy should not reach here");
aoqi@13 1205 return start;
aoqi@1 1206 }
aoqi@1 1207
aoqi@1 1208 // Arguments:
aoqi@1 1209 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1210 // ignored
aoqi@1 1211 // name - stub name string
aoqi@1 1212 //
aoqi@1 1213 // Inputs:
aoqi@1 1214 // c_rarg0 - source array address
aoqi@1 1215 // c_rarg1 - destination array address
aoqi@1 1216 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1217 //
aoqi@1 1218 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
aoqi@1 1219 // let the hardware handle it. The two or four words within dwords
aoqi@1 1220 // or qwords that span cache line boundaries will still be loaded
aoqi@1 1221 // and stored atomically.
aoqi@1 1222 //
aoqi@1 1223 address generate_conjoint_short_copy(bool aligned, const char *name) {
aoqi@1 1224 Label l_1, l_2, l_3, l_4, l_5;
aoqi@1 1225 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1226 __ align(CodeEntryAlignment);
aoqi@1 1227 address start = __ pc();
aoqi@1 1228 address nooverlap_target = aligned ?
aoqi@1 1229 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
aoqi@1 1230 StubRoutines::jshort_disjoint_arraycopy();
aoqi@1 1231
aoqi@1 1232 array_overlap_test(nooverlap_target, 1);
aoqi@1 1233
aoqi@1 1234 __ push(T3);
aoqi@1 1235 __ push(T0);
aoqi@1 1236 __ push(T1);
aoqi@1 1237 __ push(T8);
aoqi@1 1238
aoqi@1 1239 /*
aoqi@1 1240 __ pushl(esi);
aoqi@1 1241 __ movl(ecx, Address(esp, 4+12)); // count
aoqi@1 1242 __ pushl(edi);
aoqi@1 1243 __ movl(esi, Address(esp, 8+ 4)); // from
aoqi@1 1244 __ movl(edi, Address(esp, 8+ 8)); // to
aoqi@1 1245 */
aoqi@1 1246 __ move(T1, A2);
aoqi@1 1247 __ move(T3, A0);
aoqi@1 1248 __ move(T0, A1);
aoqi@1 1249
aoqi@1 1250
aoqi@1 1251 // copy dwords from high to low
aoqi@1 1252 // __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4
aoqi@1 1253 __ sll(AT, T1, Address::times_2);
aoqi@1 1254 __ add(AT, T3, AT);
aoqi@1 1255 __ lea(T3, Address( AT, -4));
aoqi@1 1256 //__ std();
aoqi@1 1257 //__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4
aoqi@1 1258 __ sll(AT,T1 , Address::times_2);
aoqi@1 1259 __ add(AT, T0, AT);
aoqi@1 1260 __ lea(T0, Address( AT, -4));
aoqi@1 1261 // __ movl(eax, ecx);
aoqi@1 1262 __ move(T8, T1);
aoqi@1 1263 __ bind(l_1);
aoqi@1 1264 // __ sarl(ecx, 1); // dword count
aoqi@1 1265 __ sra(T1,T1, 1);
aoqi@1 1266 //__ jcc(Assembler::equal, l_4); // no dwords to move
aoqi@1 1267 __ beq(T1, R0, l_4);
aoqi@1 1268 __ delayed()->nop();
aoqi@1 1269 /* __ cmpl(ecx, 32);
aoqi@1 1270 __ jcc(Assembler::above, l_3); // > 32 dwords
aoqi@1 1271 // copy dwords with loop
aoqi@1 1272 __ subl(edi, esi);
aoqi@1 1273 */ __ align(16);
aoqi@1 1274 __ bind(l_2);
aoqi@1 1275 //__ movl(edx, Address(esi));
aoqi@1 1276 __ lw(AT, T3, 0);
aoqi@1 1277 //__ movl(Address(edi, esi, Address::times_1), edx);
aoqi@1 1278 __ sw(AT, T0, 0);
aoqi@1 1279 //__ subl(esi, 4);
aoqi@1 1280 __ addi(T3, T3, -4);
aoqi@1 1281 __ addi(T0, T0, -4);
aoqi@1 1282 //__ decl(ecx);
aoqi@1 1283 __ addi(T1, T1, -1);
aoqi@1 1284 // __ jcc(Assembler::notEqual, l_2);
aoqi@1 1285 __ bne(T1, R0, l_2);
aoqi@1 1286 __ delayed()->nop();
aoqi@1 1287 // __ addl(edi, esi);
aoqi@1 1288 // __ jmp(l_4);
aoqi@1 1289 __ b(l_4);
aoqi@1 1290 __ delayed()->nop();
aoqi@1 1291 // copy dwords with repeat move
aoqi@1 1292 __ bind(l_3);
aoqi@1 1293 // __ rep_movl();
aoqi@1 1294 __ bind(l_4);
aoqi@1 1295 // __ andl(eax, 1); // suffix count
aoqi@1 1296 __ andi(T8, T8, 1); // suffix count
aoqi@1 1297 //__ jcc(Assembler::equal, l_5); // no suffix
aoqi@1 1298 __ beq(T8, R0, l_5 );
aoqi@1 1299 __ delayed()->nop();
aoqi@1 1300 // copy suffix
aoqi@1 1301 // __ movw(edx, Address(esi, 2));
aoqi@1 1302 __ lh(AT, T3, 2);
aoqi@1 1303 // __ movw(Address(edi, 2), edx);
aoqi@1 1304 __ sh(AT, T0, 2);
aoqi@1 1305 __ bind(l_5);
aoqi@1 1306 // __ cld();
aoqi@1 1307 // __ popl(edi);
aoqi@1 1308 // __ popl(esi);
aoqi@1 1309 // __ ret(0);
aoqi@1 1310 __ pop(T8);
aoqi@1 1311 __ pop(T1);
aoqi@1 1312 __ pop(T0);
aoqi@1 1313 __ pop(T3);
aoqi@1 1314 __ jr(RA);
aoqi@1 1315 __ delayed()->nop();
aoqi@1 1316 return start;
aoqi@1 1317 }
aoqi@1 1318
aoqi@1 1319 // Arguments:
aoqi@1 1320 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1321 // ignored
aoqi@1 1322 // is_oop - true => oop array, so generate store check code
aoqi@1 1323 // name - stub name string
aoqi@1 1324 //
aoqi@1 1325 // Inputs:
aoqi@1 1326 // c_rarg0 - source array address
aoqi@1 1327 // c_rarg1 - destination array address
aoqi@1 1328 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1329 //
aoqi@1 1330 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1331 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1332 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1333 //
aoqi@1 1334 // Side Effects:
aoqi@1 1335 // disjoint_int_copy_entry is set to the no-overlap entry point
aoqi@1 1336 // used by generate_conjoint_int_oop_copy().
aoqi@1 1337 //
aoqi@1 1338 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
aoqi@119 1339 Label l_3, l_4, l_5, l_6, l_7;
jiangshaofeng@118 1340 StubCodeMark mark(this, "StubRoutines", name);
fujie@109 1341
jiangshaofeng@118 1342 __ align(CodeEntryAlignment);
jiangshaofeng@118 1343 address start = __ pc();
aoqi@119 1344 __ push(T3);
aoqi@119 1345 __ push(T0);
aoqi@119 1346 __ push(T1);
aoqi@119 1347 __ push(T8);
aoqi@119 1348 __ move(T1, A2);
aoqi@119 1349 __ move(T3, A0);
jiangshaofeng@118 1350 __ move(T0, A1);
fujie@109 1351
jiangshaofeng@118 1352 if (is_oop) {
fujie@121 1353 if (Use3A2000) __ sync();
jiangshaofeng@118 1354 }
aoqi@1 1355
jiangshaofeng@118 1356 if(!aligned) {
jiangshaofeng@118 1357 __ xorr(AT, T3, T0);
jiangshaofeng@118 1358 __ andi(AT, AT, 7);
jiangshaofeng@118 1359 __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
jiangshaofeng@118 1360 __ delayed()->nop();
fujie@109 1361
jiangshaofeng@118 1362 __ andi(AT, T3, 7);
jiangshaofeng@118 1363 __ beq(AT, R0, l_6); //copy 2 elements each time
jiangshaofeng@118 1364 __ delayed()->nop();
aoqi@1 1365
jiangshaofeng@118 1366 __ lw(AT, T3, 0);
jiangshaofeng@118 1367 __ daddi(T1, T1, -1);
jiangshaofeng@118 1368 __ sw(AT, T0, 0);
jiangshaofeng@118 1369 __ daddi(T3, T3, 4);
jiangshaofeng@118 1370 __ daddi(T0, T0, 4);
jiangshaofeng@118 1371 }
fujie@109 1372
jiangshaofeng@118 1373 {
aoqi@119 1374 __ bind(l_6);
jiangshaofeng@118 1375 __ daddi(AT, T1, -1);
jiangshaofeng@118 1376 __ blez(AT, l_5);
jiangshaofeng@118 1377 __ delayed()->nop();
fujie@109 1378
jiangshaofeng@118 1379 __ bind(l_7);
jiangshaofeng@118 1380 __ ld(AT, T3, 0);
jiangshaofeng@118 1381 __ sd(AT, T0, 0);
jiangshaofeng@118 1382 __ daddi(T3, T3, 8);
jiangshaofeng@118 1383 __ daddi(T0, T0, 8);
jiangshaofeng@118 1384 __ daddi(T1, T1, -2);
jiangshaofeng@118 1385 __ daddi(AT, T1, -2);
jiangshaofeng@118 1386 __ bgez(AT, l_7);
jiangshaofeng@118 1387 __ delayed()->nop();
jiangshaofeng@118 1388 }
jiangshaofeng@118 1389
jiangshaofeng@118 1390 __ bind(l_5);
aoqi@119 1391 __ beq(T1, R0, l_4);
aoqi@119 1392 __ delayed()->nop();
aoqi@119 1393
jiangshaofeng@118 1394 __ align(16);
jiangshaofeng@118 1395 __ bind(l_3);
aoqi@119 1396 __ lw(AT, T3, 0);
aoqi@119 1397 __ sw(AT, T0, 0);
jiangshaofeng@118 1398 __ addi(T3, T3, 4);
jiangshaofeng@118 1399 __ addi(T0, T0, 4);
aoqi@119 1400 __ addi(T1, T1, -1);
aoqi@119 1401 __ bne(T1, R0, l_3);
aoqi@119 1402 __ delayed()->nop();
aoqi@119 1403
jiangshaofeng@118 1404 if (is_oop) {
aoqi@119 1405 __ move(T0, A1);
aoqi@119 1406 __ move(T1, A2);
jiangshaofeng@118 1407 array_store_check();
jiangshaofeng@118 1408 }
aoqi@119 1409
aoqi@119 1410 // exit
jiangshaofeng@118 1411 __ bind(l_4);
jiangshaofeng@118 1412 __ pop(T8);
jiangshaofeng@118 1413 __ pop(T1);
jiangshaofeng@118 1414 __ pop(T0);
jiangshaofeng@118 1415 __ pop(T3);
aoqi@119 1416 __ jr(RA);
aoqi@119 1417 __ delayed()->nop();
aoqi@119 1418
jiangshaofeng@118 1419 return start;
jiangshaofeng@118 1420 }
aoqi@1 1421
aoqi@1 1422 // Arguments:
aoqi@1 1423 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1424 // ignored
aoqi@1 1425 // is_oop - true => oop array, so generate store check code
aoqi@1 1426 // name - stub name string
aoqi@1 1427 //
aoqi@1 1428 // Inputs:
aoqi@1 1429 // c_rarg0 - source array address
aoqi@1 1430 // c_rarg1 - destination array address
aoqi@1 1431 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1432 //
aoqi@1 1433 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1434 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1435 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1436 //
aoqi@1 1437 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
fujie@110 1438 Label l_2, l_4;
aoqi@1 1439 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1440 __ align(CodeEntryAlignment);
aoqi@1 1441 address start = __ pc();
aoqi@1 1442 address nooverlap_target;
aoqi@1 1443
aoqi@1 1444 if (is_oop) {
aoqi@1 1445 nooverlap_target = aligned ?
aoqi@1 1446 StubRoutines::arrayof_oop_disjoint_arraycopy() :
aoqi@1 1447 StubRoutines::oop_disjoint_arraycopy();
aoqi@1 1448 }else {
aoqi@1 1449 nooverlap_target = aligned ?
aoqi@1 1450 StubRoutines::arrayof_jint_disjoint_arraycopy() :
aoqi@1 1451 StubRoutines::jint_disjoint_arraycopy();
aoqi@1 1452 }
aoqi@1 1453
aoqi@1 1454 array_overlap_test(nooverlap_target, 2);
aoqi@1 1455
aoqi@1 1456 __ push(T3);
aoqi@1 1457 __ push(T0);
aoqi@1 1458 __ push(T1);
aoqi@1 1459 __ push(T8);
aoqi@1 1460
aoqi@1 1461 __ move(T1, A2);
aoqi@1 1462 __ move(T3, A0);
aoqi@1 1463 __ move(T0, A1);
fujie@110 1464 // T3: source array address
fujie@110 1465 // T0: destination array address
fujie@110 1466 // T1: element count
aoqi@1 1467
fujie@110 1468 if (is_oop) {
fujie@121 1469 if (Use3A2000) __ sync();
fujie@110 1470 }
fujie@110 1471
aoqi@1 1472 __ sll(AT, T1, Address::times_4);
aoqi@1 1473 __ add(AT, T3, AT);
aoqi@1 1474 __ lea(T3 , Address(AT, -4));
aoqi@1 1475 __ sll(AT, T1, Address::times_4);
aoqi@1 1476 __ add(AT, T0, AT);
aoqi@1 1477 __ lea(T0 , Address(AT, -4));
aoqi@1 1478
aoqi@1 1479 __ beq(T1, R0, l_4);
aoqi@1 1480 __ delayed()->nop();
fujie@110 1481
aoqi@1 1482 __ align(16);
aoqi@1 1483 __ bind(l_2);
aoqi@1 1484 __ lw(AT, T3, 0);
aoqi@1 1485 __ sw(AT, T0, 0);
aoqi@1 1486 __ addi(T3, T3, -4);
aoqi@1 1487 __ addi(T0, T0, -4);
aoqi@1 1488 __ addi(T1, T1, -1);
aoqi@1 1489 __ bne(T1, R0, l_2);
aoqi@1 1490 __ delayed()->nop();
fujie@110 1491
aoqi@1 1492 if (is_oop) {
fujie@110 1493 __ move(T0, A1);
fujie@110 1494 __ move(T1, A2);
fujie@110 1495 array_store_check();
aoqi@1 1496 }
aoqi@1 1497 __ bind(l_4);
aoqi@1 1498 __ pop(T8);
aoqi@1 1499 __ pop(T1);
aoqi@1 1500 __ pop(T0);
aoqi@1 1501 __ pop(T3);
aoqi@1 1502 __ jr(RA);
aoqi@1 1503 __ delayed()->nop();
fujie@110 1504
aoqi@1 1505 return start;
aoqi@1 1506 }
aoqi@1 1507
aoqi@1 1508 // Arguments:
aoqi@1 1509 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1510 // ignored
aoqi@1 1511 // is_oop - true => oop array, so generate store check code
aoqi@1 1512 // name - stub name string
aoqi@1 1513 //
aoqi@1 1514 // Inputs:
aoqi@1 1515 // c_rarg0 - source array address
aoqi@1 1516 // c_rarg1 - destination array address
aoqi@1 1517 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1518 //
aoqi@1 1519 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1520 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1521 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1522 //
aoqi@1 1523 // Side Effects:
aoqi@1 1524 // disjoint_int_copy_entry is set to the no-overlap entry point
aoqi@1 1525 // used by generate_conjoint_int_oop_copy().
aoqi@1 1526 //
aoqi@1 1527 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
fujie@111 1528 Label l_3, l_4;
aoqi@1 1529 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1530 __ align(CodeEntryAlignment);
aoqi@1 1531 address start = __ pc();
fujie@111 1532
aoqi@1 1533 __ push(T3);
aoqi@1 1534 __ push(T0);
aoqi@1 1535 __ push(T1);
aoqi@1 1536 __ push(T8);
fujie@111 1537
aoqi@1 1538 __ move(T1, A2);
aoqi@1 1539 __ move(T3, A0);
aoqi@1 1540 __ move(T0, A1);
fujie@111 1541 // T3: source array address
fujie@111 1542 // T0: destination array address
fujie@111 1543 // T1: element count
aoqi@1 1544
aoqi@1 1545 if (is_oop) {
fujie@121 1546 if (Use3A2000) __ sync();
aoqi@1 1547 }
fujie@111 1548
fujie@111 1549 __ beq(T1, R0, l_4);
aoqi@1 1550 __ delayed()->nop();
aoqi@1 1551
aoqi@1 1552 __ align(16);
aoqi@1 1553 __ bind(l_3);
aoqi@1 1554 __ ld(AT, T3, 0);
aoqi@1 1555 __ sd(AT, T0, 0);
aoqi@1 1556 __ addi(T3, T3, 8);
aoqi@1 1557 __ addi(T0, T0, 8);
aoqi@1 1558 __ addi(T1, T1, -1);
aoqi@1 1559 __ bne(T1, R0, l_3);
aoqi@1 1560 __ delayed()->nop();
fujie@111 1561
aoqi@1 1562 if (is_oop) {
aoqi@1 1563 __ move(T0, A1);
aoqi@1 1564 __ move(T1, A2);
aoqi@1 1565 array_store_check();
aoqi@1 1566 }
fujie@111 1567
fujie@111 1568 // exit
aoqi@1 1569 __ bind(l_4);
aoqi@1 1570 __ pop(T8);
aoqi@1 1571 __ pop(T1);
aoqi@1 1572 __ pop(T0);
aoqi@1 1573 __ pop(T3);
aoqi@1 1574 __ jr(RA);
aoqi@1 1575 __ delayed()->nop();
aoqi@1 1576 return start;
aoqi@1 1577 }
aoqi@1 1578
aoqi@1 1579 // Arguments:
aoqi@1 1580 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1581 // ignored
aoqi@1 1582 // is_oop - true => oop array, so generate store check code
aoqi@1 1583 // name - stub name string
aoqi@1 1584 //
aoqi@1 1585 // Inputs:
aoqi@1 1586 // c_rarg0 - source array address
aoqi@1 1587 // c_rarg1 - destination array address
aoqi@1 1588 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1589 //
aoqi@1 1590 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1591 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1592 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1593 //
aoqi@1 1594 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
fujie@113 1595 Label l_2, l_4;
aoqi@1 1596 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1597 __ align(CodeEntryAlignment);
aoqi@1 1598 address start = __ pc();
aoqi@1 1599 address nooverlap_target;
aoqi@1 1600
aoqi@1 1601 if (is_oop) {
aoqi@1 1602 nooverlap_target = aligned ?
aoqi@1 1603 StubRoutines::arrayof_oop_disjoint_arraycopy() :
aoqi@1 1604 StubRoutines::oop_disjoint_arraycopy();
aoqi@1 1605 }else {
aoqi@1 1606 nooverlap_target = aligned ?
aoqi@1 1607 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
aoqi@1 1608 StubRoutines::jlong_disjoint_arraycopy();
aoqi@1 1609 }
aoqi@1 1610
aoqi@1 1611 array_overlap_test(nooverlap_target, 3);
aoqi@1 1612
aoqi@1 1613 __ push(T3);
aoqi@1 1614 __ push(T0);
aoqi@1 1615 __ push(T1);
aoqi@1 1616 __ push(T8);
aoqi@1 1617
aoqi@1 1618 __ move(T1, A2);
aoqi@1 1619 __ move(T3, A0);
aoqi@1 1620 __ move(T0, A1);
aoqi@1 1621
fujie@113 1622 if (is_oop) {
fujie@121 1623 if (Use3A2000) __ sync();
fujie@113 1624 }
fujie@113 1625
aoqi@1 1626 __ sll(AT, T1, Address::times_8);
aoqi@1 1627 __ add(AT, T3, AT);
aoqi@1 1628 __ lea(T3 , Address(AT, -8));
aoqi@1 1629 __ sll(AT, T1, Address::times_8);
aoqi@1 1630 __ add(AT, T0, AT);
aoqi@1 1631 __ lea(T0 , Address(AT, -8));
aoqi@1 1632
aoqi@1 1633 __ beq(T1, R0, l_4);
aoqi@1 1634 __ delayed()->nop();
fujie@113 1635
aoqi@1 1636 __ align(16);
aoqi@1 1637 __ bind(l_2);
aoqi@1 1638 __ ld(AT, T3, 0);
aoqi@1 1639 __ sd(AT, T0, 0);
aoqi@1 1640 __ addi(T3, T3, -8);
aoqi@1 1641 __ addi(T0, T0, -8);
aoqi@1 1642 __ addi(T1, T1, -1);
aoqi@1 1643 __ bne(T1, R0, l_2);
aoqi@1 1644 __ delayed()->nop();
fujie@113 1645
aoqi@1 1646 if (is_oop) {
fujie@113 1647 __ move(T0, A1);
fujie@113 1648 __ move(T1, A2);
fujie@113 1649 array_store_check();
aoqi@1 1650 }
aoqi@1 1651 __ bind(l_4);
aoqi@1 1652 __ pop(T8);
aoqi@1 1653 __ pop(T1);
aoqi@1 1654 __ pop(T0);
aoqi@1 1655 __ pop(T3);
aoqi@1 1656 __ jr(RA);
aoqi@1 1657 __ delayed()->nop();
aoqi@1 1658 return start;
aoqi@1 1659 }
aoqi@1 1660
aoqi@1 1661 //FIXME
aoqi@1 1662 address generate_disjoint_long_copy(bool aligned, const char *name) {
aoqi@1 1663 Label l_1, l_2;
aoqi@1 1664 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1665 __ align(CodeEntryAlignment);
aoqi@1 1666 address start = __ pc();
aoqi@1 1667
aoqi@1 1668 // __ movl(ecx, Address(esp, 4+8)); // count
aoqi@1 1669 // __ movl(eax, Address(esp, 4+0)); // from
aoqi@1 1670 // __ movl(edx, Address(esp, 4+4)); // to
aoqi@1 1671 __ move(T1, A2);
aoqi@1 1672 __ move(T3, A0);
aoqi@1 1673 __ move(T0, A1);
aoqi@1 1674 __ push(T3);
aoqi@1 1675 __ push(T0);
aoqi@1 1676 __ push(T1);
aoqi@1 1677 //__ subl(edx, eax);
aoqi@1 1678 //__ jmp(l_2);
aoqi@1 1679 __ b(l_2);
aoqi@1 1680 __ delayed()->nop();
aoqi@1 1681 __ align(16);
aoqi@1 1682 __ bind(l_1);
aoqi@1 1683 // if (VM_Version::supports_mmx()) {
aoqi@1 1684 // __ movq(mmx0, Address(eax));
aoqi@1 1685 // __ movq(Address(eax, edx, Address::times_1), mmx0);
aoqi@1 1686 // } else {
aoqi@1 1687 // __ fild_d(Address(eax));
aoqi@1 1688 __ ld(AT, T3, 0);
aoqi@1 1689 // __ fistp_d(Address(eax, edx, Address::times_1));
aoqi@1 1690 __ sd (AT, T0, 0);
aoqi@1 1691 // }
aoqi@1 1692 // __ addl(eax, 8);
aoqi@1 1693 __ addi(T3, T3, 8);
aoqi@1 1694 __ addi(T0, T0, 8);
aoqi@1 1695 __ bind(l_2);
aoqi@1 1696 // __ decl(ecx);
aoqi@1 1697 __ addi(T1, T1, -1);
aoqi@1 1698 // __ jcc(Assembler::greaterEqual, l_1);
aoqi@1 1699 __ bgez(T1, l_1);
aoqi@1 1700 __ delayed()->nop();
aoqi@1 1701 // if (VM_Version::supports_mmx()) {
aoqi@1 1702 // __ emms();
aoqi@1 1703 // }
aoqi@1 1704 // __ ret(0);
aoqi@1 1705 __ pop(T1);
aoqi@1 1706 __ pop(T0);
aoqi@1 1707 __ pop(T3);
aoqi@1 1708 __ jr(RA);
aoqi@1 1709 __ delayed()->nop();
aoqi@1 1710 return start;
aoqi@1 1711 }
aoqi@1 1712
aoqi@1 1713
aoqi@1 1714 address generate_conjoint_long_copy(bool aligned, const char *name) {
aoqi@1 1715 Label l_1, l_2;
aoqi@1 1716 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1717 __ align(CodeEntryAlignment);
aoqi@1 1718 address start = __ pc();
aoqi@1 1719 address nooverlap_target = aligned ?
aoqi@1 1720 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
aoqi@1 1721 StubRoutines::jlong_disjoint_arraycopy();
aoqi@1 1722 array_overlap_test(nooverlap_target, 3);
aoqi@1 1723
aoqi@1 1724 __ push(T3);
aoqi@1 1725 __ push(T0);
aoqi@1 1726 __ push(T1);
aoqi@1 1727
aoqi@1 1728 /* __ movl(ecx, Address(esp, 4+8)); // count
aoqi@1 1729 __ movl(eax, Address(esp, 4+0)); // from
aoqi@1 1730 __ movl(edx, Address(esp, 4+4)); // to
aoqi@1 1731 __ jmp(l_2);
aoqi@1 1732
aoqi@1 1733 */
aoqi@1 1734 __ move(T1, A2);
aoqi@1 1735 __ move(T3, A0);
aoqi@1 1736 __ move(T0, A1);
aoqi@1 1737 __ sll(AT, T1, Address::times_8);
aoqi@1 1738 __ add(AT, T3, AT);
aoqi@1 1739 __ lea(T3 , Address(AT, -8));
aoqi@1 1740 __ sll(AT, T1, Address::times_8);
aoqi@1 1741 __ add(AT, T0, AT);
aoqi@1 1742 __ lea(T0 , Address(AT, -8));
aoqi@1 1743
aoqi@1 1744
aoqi@1 1745
aoqi@1 1746 __ b(l_2);
aoqi@1 1747 __ delayed()->nop();
aoqi@1 1748 __ align(16);
aoqi@1 1749 __ bind(l_1);
aoqi@1 1750 /* if (VM_Version::supports_mmx()) {
aoqi@1 1751 __ movq(mmx0, Address(eax, ecx, Address::times_8));
aoqi@1 1752 __ movq(Address(edx, ecx,Address::times_8), mmx0);
aoqi@1 1753 } else {
aoqi@1 1754 __ fild_d(Address(eax, ecx, Address::times_8));
aoqi@1 1755 __ fistp_d(Address(edx, ecx,Address::times_8));
aoqi@1 1756 }
aoqi@1 1757 */
aoqi@1 1758 __ ld(AT, T3, 0);
aoqi@1 1759 __ sd (AT, T0, 0);
aoqi@1 1760 __ addi(T3, T3, -8);
aoqi@1 1761 __ addi(T0, T0,-8);
aoqi@1 1762 __ bind(l_2);
aoqi@1 1763 // __ decl(ecx);
aoqi@1 1764 __ addi(T1, T1, -1);
aoqi@1 1765 //__ jcc(Assembler::greaterEqual, l_1);
aoqi@1 1766 __ bgez(T1, l_1);
aoqi@1 1767 __ delayed()->nop();
aoqi@1 1768 // if (VM_Version::supports_mmx()) {
aoqi@1 1769 // __ emms();
aoqi@1 1770 // }
aoqi@1 1771 // __ ret(0);
aoqi@1 1772 __ pop(T1);
aoqi@1 1773 __ pop(T0);
aoqi@1 1774 __ pop(T3);
aoqi@1 1775 __ jr(RA);
aoqi@1 1776 __ delayed()->nop();
aoqi@1 1777 return start;
aoqi@1 1778 }
aoqi@1 1779
aoqi@1 1780 void generate_arraycopy_stubs() {
aoqi@1 1781 if (UseCompressedOops) {
aoqi@178 1782 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true,
aoqi@178 1783 "oop_disjoint_arraycopy");
aoqi@178 1784 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true,
aoqi@178 1785 "oop_arraycopy");
aoqi@178 1786 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true,
aoqi@178 1787 "oop_disjoint_arraycopy_uninit");
aoqi@178 1788 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true,
aoqi@178 1789 "oop_arraycopy_uninit");
aoqi@1 1790 } else {
aoqi@178 1791 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true,
aoqi@178 1792 "oop_disjoint_arraycopy");
aoqi@178 1793 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true,
aoqi@178 1794 "oop_arraycopy");
aoqi@178 1795 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true,
aoqi@178 1796 "oop_disjoint_arraycopy_uninit");
aoqi@178 1797 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true,
aoqi@178 1798 "oop_arraycopy_uninit");
aoqi@1 1799 }
aoqi@1 1800
aoqi@178 1801 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
aoqi@178 1802 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
aoqi@178 1803 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
aoqi@178 1804 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
aoqi@1 1805
aoqi@1 1806 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
aoqi@1 1807 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
aoqi@1 1808 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
aoqi@1 1809 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
aoqi@1 1810
aoqi@178 1811 // We don't generate specialized code for HeapWord-aligned source
aoqi@178 1812 // arrays, so just use the code we've already generated
aoqi@178 1813 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
aoqi@178 1814 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
aoqi@178 1815
aoqi@178 1816 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
aoqi@178 1817 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
aoqi@178 1818
aoqi@178 1819 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
aoqi@178 1820 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
aoqi@178 1821
aoqi@178 1822 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
aoqi@178 1823 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
aoqi@1 1824
aoqi@1 1825 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
aoqi@1 1826 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
aoqi@178 1827
aoqi@178 1828 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
aoqi@178 1829 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
aoqi@1 1830 }
aoqi@1 1831
aoqi@1 1832 //Wang: add a function to implement SafeFetch32 and SafeFetchN
aoqi@1 1833 void generate_safefetch(const char* name, int size, address* entry,
aoqi@1 1834 address* fault_pc, address* continuation_pc) {
aoqi@1 1835 // safefetch signatures:
aoqi@1 1836 // int SafeFetch32(int* adr, int errValue);
aoqi@1 1837 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
aoqi@1 1838 //
aoqi@1 1839 // arguments:
aoqi@1 1840 // A0 = adr
aoqi@1 1841 // A1 = errValue
aoqi@1 1842 //
aoqi@1 1843 // result:
aoqi@1 1844 // PPC_RET = *adr or errValue
aoqi@1 1845
aoqi@1 1846 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1847
aoqi@1 1848 // Entry point, pc or function descriptor.
aoqi@1 1849 *entry = __ pc();
aoqi@1 1850
aoqi@1 1851 // Load *adr into A1, may fault.
aoqi@1 1852 *fault_pc = __ pc();
aoqi@1 1853 switch (size) {
aoqi@1 1854 case 4:
aoqi@1 1855 // int32_t
aoqi@1 1856 __ lw(A1, A0, 0);
aoqi@1 1857 break;
aoqi@1 1858 case 8:
aoqi@1 1859 // int64_t
aoqi@1 1860 __ ld(A1, A0, 0);
aoqi@1 1861 break;
aoqi@1 1862 default:
aoqi@1 1863 ShouldNotReachHere();
aoqi@1 1864 }
aoqi@1 1865
aoqi@1 1866 // return errValue or *adr
aoqi@1 1867 *continuation_pc = __ pc();
aoqi@1 1868 __ addu(V0,A1,R0);
aoqi@1 1869 __ jr(RA);
aoqi@1 1870 __ delayed()->nop();
aoqi@1 1871 }
aoqi@1 1872
aoqi@1 1873
aoqi@1 1874 #undef __
aoqi@1 1875 #define __ masm->
aoqi@1 1876
aoqi@1 1877 // Continuation point for throwing of implicit exceptions that are
aoqi@1 1878 // not handled in the current activation. Fabricates an exception
aoqi@1 1879 // oop and initiates normal exception dispatching in this
aoqi@1 1880 // frame. Since we need to preserve callee-saved values (currently
aoqi@1 1881 // only for C2, but done for C1 as well) we need a callee-saved oop
aoqi@1 1882 // map and therefore have to make these stubs into RuntimeStubs
aoqi@1 1883 // rather than BufferBlobs. If the compiler needs all registers to
aoqi@1 1884 // be preserved between the fault point and the exception handler
aoqi@1 1885 // then it must assume responsibility for that in
aoqi@1 1886 // AbstractCompiler::continuation_for_implicit_null_exception or
aoqi@1 1887 // continuation_for_implicit_division_by_zero_exception. All other
aoqi@1 1888 // implicit exceptions (e.g., NullPointerException or
aoqi@1 1889 // AbstractMethodError on entry) are either at call sites or
aoqi@1 1890 // otherwise assume that stack unwinding will be initiated, so
aoqi@1 1891 // caller saved registers were assumed volatile in the compiler.
aoqi@1 1892 address generate_throw_exception(const char* name,
aoqi@1 1893 address runtime_entry,
aoqi@1 1894 bool restore_saved_exception_pc) {
aoqi@1 1895 // Information about frame layout at time of blocking runtime call.
aoqi@1 1896 // Note that we only have to preserve callee-saved registers since
aoqi@1 1897 // the compilers are responsible for supplying a continuation point
aoqi@1 1898 // if they expect all registers to be preserved.
aoqi@1 1899 //#define aoqi_test
aoqi@1 1900 #ifdef aoqi_test
aoqi@1 1901 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 1902 #endif
aoqi@1 1903 enum layout {
aoqi@1 1904 thread_off, // last_java_sp
aoqi@1 1905 S7_off, // callee saved register sp + 1
aoqi@1 1906 S6_off, // callee saved register sp + 2
aoqi@1 1907 S5_off, // callee saved register sp + 3
aoqi@1 1908 S4_off, // callee saved register sp + 4
aoqi@1 1909 S3_off, // callee saved register sp + 5
aoqi@1 1910 S2_off, // callee saved register sp + 6
aoqi@1 1911 S1_off, // callee saved register sp + 7
aoqi@1 1912 S0_off, // callee saved register sp + 8
aoqi@1 1913 FP_off,
aoqi@1 1914 ret_address,
aoqi@1 1915 framesize
aoqi@1 1916 };
aoqi@1 1917
aoqi@1 1918 int insts_size = 2048;
aoqi@1 1919 int locs_size = 32;
aoqi@1 1920
aoqi@1 1921 // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
aoqi@1 1922 // NULL, NULL, NULL, false, NULL, name, false);
aoqi@1 1923 CodeBuffer code (name , insts_size, locs_size);
aoqi@1 1924 #ifdef aoqi_test
aoqi@1 1925 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 1926 #endif
aoqi@1 1927 OopMapSet* oop_maps = new OopMapSet();
aoqi@1 1928 #ifdef aoqi_test
aoqi@1 1929 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 1930 #endif
aoqi@1 1931 MacroAssembler* masm = new MacroAssembler(&code);
aoqi@1 1932 #ifdef aoqi_test
aoqi@1 1933 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 1934 #endif
aoqi@1 1935
aoqi@1 1936 address start = __ pc();
aoqi@1 1937 //__ stop("generate_throw_exception");
aoqi@1 1938 /*
aoqi@1 1939 __ move(AT, (int)&jerome1 );
aoqi@1 1940 __ sw(SP, AT, 0);
aoqi@1 1941 __ move(AT, (int)&jerome2 );
aoqi@1 1942 __ sw(FP, AT, 0);
aoqi@1 1943 __ move(AT, (int)&jerome3 );
aoqi@1 1944 __ sw(RA, AT, 0);
aoqi@1 1945 __ move(AT, (int)&jerome4 );
aoqi@1 1946 __ sw(R0, AT, 0);
aoqi@1 1947 __ move(AT, (int)&jerome5 );
aoqi@1 1948 __ sw(R0, AT, 0);
aoqi@1 1949 __ move(AT, (int)&jerome6 );
aoqi@1 1950 __ sw(R0, AT, 0);
aoqi@1 1951 __ move(AT, (int)&jerome7 );
aoqi@1 1952 __ sw(R0, AT, 0);
aoqi@1 1953 __ move(AT, (int)&jerome10 );
aoqi@1 1954 __ sw(R0, AT, 0);
aoqi@1 1955
aoqi@1 1956 __ pushad();
aoqi@1 1957
aoqi@1 1958 //__ enter();
aoqi@1 1959 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
aoqi@1 1960 relocInfo::runtime_call_type);
aoqi@1 1961 __ delayed()->nop();
aoqi@1 1962
aoqi@1 1963 //__ leave();
aoqi@1 1964 __ popad();
aoqi@1 1965
aoqi@1 1966 */
aoqi@1 1967
aoqi@1 1968 // This is an inlined and slightly modified version of call_VM
aoqi@1 1969 // which has the ability to fetch the return PC out of
aoqi@1 1970 // thread-local storage and also sets up last_Java_sp slightly
aoqi@1 1971 // differently than the real call_VM
aoqi@1 1972 #ifndef OPT_THREAD
aoqi@1 1973 Register java_thread = TREG;
aoqi@1 1974 __ get_thread(java_thread);
aoqi@1 1975 #else
aoqi@1 1976 Register java_thread = TREG;
aoqi@1 1977 #endif
aoqi@1 1978 #ifdef aoqi_test
aoqi@1 1979 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 1980 #endif
aoqi@1 1981 if (restore_saved_exception_pc) {
aoqi@1 1982 __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
aoqi@1 1983 }
aoqi@1 1984
aoqi@1 1985 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1986
aoqi@1 1987 __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
aoqi@1 1988 __ sd(S0, SP, S0_off * wordSize);
aoqi@1 1989 __ sd(S1, SP, S1_off * wordSize);
aoqi@1 1990 __ sd(S2, SP, S2_off * wordSize);
aoqi@1 1991 __ sd(S3, SP, S3_off * wordSize);
aoqi@1 1992 __ sd(S4, SP, S4_off * wordSize);
aoqi@1 1993 __ sd(S5, SP, S5_off * wordSize);
aoqi@1 1994 __ sd(S6, SP, S6_off * wordSize);
aoqi@1 1995 __ sd(S7, SP, S7_off * wordSize);
aoqi@1 1996
aoqi@1 1997 int frame_complete = __ pc() - start;
aoqi@1 1998 // push java thread (becomes first argument of C function)
aoqi@1 1999 __ sd(java_thread, SP, thread_off * wordSize);
aoqi@1 2000 if (java_thread!=A0)
aoqi@1 2001 __ move(A0, java_thread);
aoqi@1 2002
aoqi@1 2003 // Set up last_Java_sp and last_Java_fp
aoqi@1 2004 __ set_last_Java_frame(java_thread, SP, FP, NULL);
aoqi@1 2005 __ relocate(relocInfo::internal_pc_type);
aoqi@1 2006 {
fujie@373 2007 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
fujie@368 2008 __ patchable_set48(AT, save_pc);
aoqi@1 2009 }
aoqi@1 2010 __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
aoqi@1 2011
aoqi@1 2012 // Call runtime
aoqi@1 2013 __ call(runtime_entry);
aoqi@1 2014 __ delayed()->nop();
aoqi@1 2015 // Generate oop map
aoqi@1 2016 OopMap* map = new OopMap(framesize, 0);
aoqi@1 2017 oop_maps->add_gc_map(__ offset(), map);
aoqi@1 2018
aoqi@1 2019 // restore the thread (cannot use the pushed argument since arguments
aoqi@1 2020 // may be overwritten by C code generated by an optimizing compiler);
aoqi@1 2021 // however can use the register value directly if it is callee saved.
aoqi@1 2022 #ifndef OPT_THREAD
aoqi@1 2023 __ get_thread(java_thread);
aoqi@1 2024 #endif
aoqi@1 2025
aoqi@1 2026 __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
aoqi@1 2027 // __ reset_last_Java_frame(java_thread, true);
aoqi@1 2028 __ reset_last_Java_frame(java_thread, true, true);
aoqi@1 2029
aoqi@1 2030 // Restore callee save registers. This must be done after resetting the Java frame
aoqi@1 2031 __ ld(S0, SP, S0_off * wordSize);
aoqi@1 2032 __ ld(S1, SP, S1_off * wordSize);
aoqi@1 2033 __ ld(S2, SP, S2_off * wordSize);
aoqi@1 2034 __ ld(S3, SP, S3_off * wordSize);
aoqi@1 2035 __ ld(S4, SP, S4_off * wordSize);
aoqi@1 2036 __ ld(S5, SP, S5_off * wordSize);
aoqi@1 2037 __ ld(S6, SP, S6_off * wordSize);
aoqi@1 2038 __ ld(S7, SP, S7_off * wordSize);
aoqi@1 2039
aoqi@1 2040 // discard arguments
aoqi@1 2041 __ addi(SP, SP, (framesize-2) * wordSize); // epilog
aoqi@1 2042 // __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 2043 __ addi(SP, FP, wordSize);
aoqi@1 2044 __ ld(FP, SP, -1*wordSize);
aoqi@1 2045 // check for pending exceptions
aoqi@1 2046 #ifdef ASSERT
aoqi@1 2047 Label L;
aoqi@1 2048 __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 2049 __ bne(AT, R0, L);
aoqi@1 2050 __ delayed()->nop();
aoqi@1 2051 __ should_not_reach_here();
aoqi@1 2052 __ bind(L);
aoqi@1 2053 #endif //ASSERT
aoqi@1 2054 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
aoqi@1 2055 __ delayed()->nop();
aoqi@1 2056 #ifdef aoqi_test
aoqi@1 2057 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2058 #endif
aoqi@1 2059 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete,
aoqi@1 2060 framesize, oop_maps, false);
aoqi@1 2061 #ifdef aoqi_test
aoqi@1 2062 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2063 #endif
aoqi@1 2064 return stub->entry_point();
aoqi@1 2065 }
aoqi@1 2066
aoqi@1 2067 // Initialization
aoqi@1 2068 void generate_initial() {
aoqi@1 2069 /*
aoqi@1 2070 // Generates all stubs and initializes the entry points
aoqi@1 2071
aoqi@1 2072 // This platform-specific stub is needed by generate_call_stub()
aoqi@1 2073 StubRoutines::mips::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80);
aoqi@1 2074
aoqi@1 2075 // entry points that exist in all platforms Note: This is code
aoqi@1 2076 // that could be shared among different platforms - however the
aoqi@1 2077 // benefit seems to be smaller than the disadvantage of having a
aoqi@1 2078 // much more complicated generator structure. See also comment in
aoqi@1 2079 // stubRoutines.hpp.
aoqi@1 2080
aoqi@1 2081 StubRoutines::_forward_exception_entry = generate_forward_exception();
aoqi@1 2082
aoqi@1 2083 StubRoutines::_call_stub_entry =
aoqi@1 2084 generate_call_stub(StubRoutines::_call_stub_return_address);
aoqi@1 2085
aoqi@1 2086 // is referenced by megamorphic call
aoqi@1 2087 StubRoutines::_catch_exception_entry = generate_catch_exception();
aoqi@1 2088
aoqi@1 2089 // atomic calls
aoqi@1 2090 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
aoqi@1 2091 StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr();
aoqi@1 2092 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
aoqi@1 2093 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
aoqi@1 2094 StubRoutines::_atomic_add_entry = generate_atomic_add();
aoqi@1 2095 StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr();
aoqi@1 2096 StubRoutines::_fence_entry = generate_orderaccess_fence();
aoqi@1 2097
aoqi@1 2098 StubRoutines::_handler_for_unsafe_access_entry =
aoqi@1 2099 generate_handler_for_unsafe_access();
aoqi@1 2100
aoqi@1 2101 // platform dependent
aoqi@1 2102 StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp();
aoqi@1 2103
aoqi@1 2104 StubRoutines::mips::_verify_mxcsr_entry = generate_verify_mxcsr();
aoqi@1 2105 */
aoqi@1 2106 // Generates all stubs and initializes the entry points
aoqi@1 2107
aoqi@1 2108 //-------------------------------------------------------------
aoqi@1 2109 //-----------------------------------------------------------
aoqi@1 2110 // entry points that exist in all platforms
aoqi@1 2111 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
aoqi@1 2112 // than the disadvantage of having a much more complicated generator structure.
aoqi@1 2113 // See also comment in stubRoutines.hpp.
aoqi@1 2114 StubRoutines::_forward_exception_entry = generate_forward_exception();
aoqi@1 2115 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
aoqi@1 2116 // is referenced by megamorphic call
aoqi@1 2117 StubRoutines::_catch_exception_entry = generate_catch_exception();
aoqi@1 2118
aoqi@1 2119 StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
aoqi@1 2120
aoqi@1 2121 // platform dependent
aoqi@1 2122 StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
aoqi@1 2123 }
aoqi@1 2124
aoqi@1 2125 void generate_all() {
aoqi@1 2126 #ifdef aoqi_test
aoqi@1 2127 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2128 #endif
aoqi@1 2129 // Generates all stubs and initializes the entry points
aoqi@1 2130
aoqi@1 2131 // These entry points require SharedInfo::stack0 to be set up in
aoqi@1 2132 // non-core builds and need to be relocatable, so they each
aoqi@1 2133 // fabricate a RuntimeStub internally.
aoqi@1 2134 /*
aoqi@1 2135 StubRoutines::_throw_AbstractMethodError_entry =
aoqi@1 2136 generate_throw_exception("AbstractMethodError throw_exception",
aoqi@1 2137 CAST_FROM_FN_PTR(address,
aoqi@1 2138 SharedRuntime::
aoqi@1 2139 throw_AbstractMethodError),
aoqi@1 2140 false);
aoqi@1 2141
aoqi@1 2142 StubRoutines::_throw_IncompatibleClassChangeError_entry =
aoqi@1 2143 generate_throw_exception("IncompatibleClassChangeError throw_exception",
aoqi@1 2144 CAST_FROM_FN_PTR(address,
aoqi@1 2145 SharedRuntime::
aoqi@1 2146 throw_IncompatibleClassChangeError),
aoqi@1 2147 false);
aoqi@1 2148
aoqi@1 2149 StubRoutines::_throw_ArithmeticException_entry =
aoqi@1 2150 generate_throw_exception("ArithmeticException throw_exception",
aoqi@1 2151 CAST_FROM_FN_PTR(address,
aoqi@1 2152 SharedRuntime::
aoqi@1 2153 throw_ArithmeticException),
aoqi@1 2154 true);
aoqi@1 2155
aoqi@1 2156 StubRoutines::_throw_NullPointerException_entry =
aoqi@1 2157 generate_throw_exception("NullPointerException throw_exception",
aoqi@1 2158 CAST_FROM_FN_PTR(address,
aoqi@1 2159 SharedRuntime::
aoqi@1 2160 throw_NullPointerException),
aoqi@1 2161 true);
aoqi@1 2162
aoqi@1 2163 StubRoutines::_throw_NullPointerException_at_call_entry =
aoqi@1 2164 generate_throw_exception("NullPointerException at call throw_exception",
aoqi@1 2165 CAST_FROM_FN_PTR(address,
aoqi@1 2166 SharedRuntime::
aoqi@1 2167 throw_NullPointerException_at_call),
aoqi@1 2168 false);
aoqi@1 2169
aoqi@1 2170 StubRoutines::_throw_StackOverflowError_entry =
aoqi@1 2171 generate_throw_exception("StackOverflowError throw_exception",
aoqi@1 2172 CAST_FROM_FN_PTR(address,
aoqi@1 2173 SharedRuntime::
aoqi@1 2174 throw_StackOverflowError),
aoqi@1 2175 false);
aoqi@1 2176
aoqi@1 2177 // entry points that are platform specific
aoqi@1 2178 StubRoutines::mips::_f2i_fixup = generate_f2i_fixup();
aoqi@1 2179 StubRoutines::mips::_f2l_fixup = generate_f2l_fixup();
aoqi@1 2180 StubRoutines::mips::_d2i_fixup = generate_d2i_fixup();
aoqi@1 2181 StubRoutines::mips::_d2l_fixup = generate_d2l_fixup();
aoqi@1 2182
aoqi@1 2183 StubRoutines::mips::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
aoqi@1 2184 StubRoutines::mips::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
aoqi@1 2185 StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
aoqi@1 2186 StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
aoqi@1 2187
aoqi@1 2188 // support for verify_oop (must happen after universe_init)
aoqi@1 2189 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
aoqi@1 2190
aoqi@1 2191 // arraycopy stubs used by compilers
aoqi@1 2192 generate_arraycopy_stubs();
aoqi@1 2193 */
aoqi@1 2194 #ifdef aoqi_test
aoqi@1 2195 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2196 #endif
aoqi@1 2197 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
aoqi@1 2198 #ifdef aoqi_test
aoqi@1 2199 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2200 #endif
aoqi@1 2201 // StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
aoqi@1 2202 #ifdef aoqi_test
aoqi@1 2203 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2204 #endif
aoqi@1 2205 // StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
aoqi@1 2206 #ifdef aoqi_test
aoqi@1 2207 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2208 #endif
aoqi@1 2209 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
aoqi@1 2210 #ifdef aoqi_test
aoqi@1 2211 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2212 #endif
aoqi@1 2213 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
aoqi@1 2214 #ifdef aoqi_test
aoqi@1 2215 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2216 #endif
aoqi@1 2217
aoqi@1 2218 //------------------------------------------------------
aoqi@1 2219 //------------------------------------------------------------------
aoqi@1 2220 // entry points that are platform specific
aoqi@1 2221
aoqi@1 2222 // support for verify_oop (must happen after universe_init)
aoqi@1 2223 #ifdef aoqi_test
aoqi@1 2224 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2225 #endif
aoqi@1 2226 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
aoqi@1 2227 #ifdef aoqi_test
aoqi@1 2228 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2229 #endif
aoqi@1 2230 #ifndef CORE
aoqi@1 2231 // arraycopy stubs used by compilers
aoqi@1 2232 generate_arraycopy_stubs();
aoqi@1 2233 #ifdef aoqi_test
aoqi@1 2234 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2235 #endif
aoqi@1 2236 #endif
aoqi@1 2237
aoqi@1 2238 // Safefetch stubs.
aoqi@1 2239 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
aoqi@1 2240 &StubRoutines::_safefetch32_fault_pc,
aoqi@1 2241 &StubRoutines::_safefetch32_continuation_pc);
aoqi@1 2242 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
aoqi@1 2243 &StubRoutines::_safefetchN_fault_pc,
aoqi@1 2244 &StubRoutines::_safefetchN_continuation_pc);
aoqi@1 2245 }
aoqi@1 2246
aoqi@1 2247 public:
aoqi@1 2248 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
aoqi@1 2249 if (all) {
aoqi@1 2250 generate_all();
aoqi@1 2251 } else {
aoqi@1 2252 generate_initial();
aoqi@1 2253 }
aoqi@1 2254 }
aoqi@1 2255 }; // end class declaration
aoqi@1 2256 /*
aoqi@1 2257 address StubGenerator::disjoint_byte_copy_entry = NULL;
aoqi@1 2258 address StubGenerator::disjoint_short_copy_entry = NULL;
aoqi@1 2259 address StubGenerator::disjoint_int_copy_entry = NULL;
aoqi@1 2260 address StubGenerator::disjoint_long_copy_entry = NULL;
aoqi@1 2261 address StubGenerator::disjoint_oop_copy_entry = NULL;
aoqi@1 2262
aoqi@1 2263 address StubGenerator::byte_copy_entry = NULL;
aoqi@1 2264 address StubGenerator::short_copy_entry = NULL;
aoqi@1 2265 address StubGenerator::int_copy_entry = NULL;
aoqi@1 2266 address StubGenerator::long_copy_entry = NULL;
aoqi@1 2267 address StubGenerator::oop_copy_entry = NULL;
aoqi@1 2268
aoqi@1 2269 address StubGenerator::checkcast_copy_entry = NULL;
aoqi@1 2270 */
aoqi@1 2271 void StubGenerator_generate(CodeBuffer* code, bool all) {
aoqi@1 2272 StubGenerator g(code, all);
aoqi@1 2273 }

mercurial