1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Fri Apr 29 00:06:10 2016 +0800 1.3 @@ -0,0 +1,2749 @@ 1.4 +/* 1.5 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. 1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 + * 1.9 + * This code is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License version 2 only, as 1.11 + * published by the Free Software Foundation. 1.12 + * 1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.16 + * version 2 for more details (a copy is included in the LICENSE file that 1.17 + * accompanied this code). 1.18 + * 1.19 + * You should have received a copy of the GNU General Public License version 1.20 + * 2 along with this work; if not, write to the Free Software Foundation, 1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.22 + * 1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.24 + * or visit www.oracle.com if you need additional information or have any 1.25 + * questions. 1.26 + * 1.27 + */ 1.28 + 1.29 +#include "precompiled.hpp" 1.30 +#include "asm/macroAssembler.hpp" 1.31 +#include "asm/macroAssembler.inline.hpp" 1.32 +#include "interpreter/interpreter.hpp" 1.33 +#include "nativeInst_mips.hpp" 1.34 +#include "oops/instanceOop.hpp" 1.35 +#include "oops/method.hpp" 1.36 +#include "oops/objArrayKlass.hpp" 1.37 +#include "oops/oop.inline.hpp" 1.38 +#include "prims/methodHandles.hpp" 1.39 +#include "runtime/frame.inline.hpp" 1.40 +#include "runtime/handles.inline.hpp" 1.41 +#include "runtime/sharedRuntime.hpp" 1.42 +#include "runtime/stubCodeGenerator.hpp" 1.43 +#include "runtime/stubRoutines.hpp" 1.44 +#include "runtime/thread.inline.hpp" 1.45 +#include "utilities/top.hpp" 1.46 +#ifdef COMPILER2 1.47 +#include "opto/runtime.hpp" 1.48 +#endif 1.49 + 1.50 + 1.51 +// Declaration and definition of StubGenerator (no .hpp file). 1.52 +// For a more detailed description of the stub routine structure 1.53 +// see the comment in stubRoutines.hpp 1.54 + 1.55 +#define __ _masm-> 1.56 +//#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) 1.57 +//#define a__ ((Assembler*)_masm)-> 1.58 + 1.59 +//#ifdef PRODUCT 1.60 +//#define BLOCK_COMMENT(str) /* nothing */ 1.61 +//#else 1.62 +//#define BLOCK_COMMENT(str) __ block_comment(str) 1.63 +//#endif 1.64 + 1.65 +//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 1.66 +const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions 1.67 + 1.68 +// Stub Code definitions 1.69 + 1.70 +static address handle_unsafe_access() { 1.71 + JavaThread* thread = JavaThread::current(); 1.72 + address pc = thread->saved_exception_pc(); 1.73 + // pc is the instruction which we must emulate 1.74 + // doing a no-op is fine: return garbage from the load 1.75 + // therefore, compute npc 1.76 + //address npc = Assembler::locate_next_instruction(pc); 1.77 + address npc = (address)((unsigned long)pc + sizeof(unsigned long)); 1.78 + 1.79 + // request an async exception 1.80 + thread->set_pending_unsafe_access_error(); 1.81 + 1.82 + // return address of next instruction to execute 1.83 + return npc; 1.84 +} 1.85 + 1.86 +class StubGenerator: public StubCodeGenerator { 1.87 + private: 1.88 + 1.89 + // ABI mips n64 1.90 + // This fig is not MIPS ABI. It is call Java from C ABI. 1.91 + // Call stubs are used to call Java from C 1.92 + // 1.93 + // [ return_from_Java ] 1.94 + // [ argument word n-1 ] <--- sp 1.95 + // ... 1.96 + // [ argument word 0 ] 1.97 + // ... 1.98 + //-10 [ S6 ] 1.99 + // -9 [ S5 ] 1.100 + // -8 [ S4 ] 1.101 + // -7 [ S3 ] 1.102 + // -6 [ S0 ] 1.103 + // -5 [ TSR(S2) ] 1.104 + // -4 [ LVP(S7) ] 1.105 + // -3 [ BCP(S1) ] 1.106 + // -2 [ saved fp ] <--- fp_after_call 1.107 + // -1 [ return address ] 1.108 + // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp 1.109 + // 1 [ result ] <--- a1 1.110 + // 2 [ result_type ] <--- a2 1.111 + // 3 [ method ] <--- a3 1.112 + // 4 [ entry_point ] <--- a4 1.113 + // 5 [ parameters ] <--- a5 1.114 + // 6 [ parameter_size ] <--- a6 1.115 + // 7 [ thread ] <--- a7 1.116 + 1.117 + // 1.118 + // _LP64: n64 does not save paras in sp. 1.119 + // 1.120 + // [ return_from_Java ] 1.121 + // [ argument word n-1 ] <--- sp 1.122 + // ... 1.123 + // [ argument word 0 ] 1.124 + // ... 1.125 + //-14 [ thread ] 1.126 + //-13 [ result_type ] <--- a2 1.127 + //-12 [ result ] <--- a1 1.128 + //-11 [ ptr. to call wrapper ] <--- a0 1.129 + //-10 [ S6 ] 1.130 + // -9 [ S5 ] 1.131 + // -8 [ S4 ] 1.132 + // -7 [ S3 ] 1.133 + // -6 [ S0 ] 1.134 + // -5 [ TSR(S2) ] 1.135 + // -4 [ LVP(S7) ] 1.136 + // -3 [ BCP(S1) ] 1.137 + // -2 [ saved fp ] <--- fp_after_call 1.138 + // -1 [ return address ] 1.139 + // 0 [ ] <--- old sp 1.140 + /* 1.141 + * 2014/01/16 Fu: Find a right place in the call_stub for GP. 1.142 + * GP will point to the starting point of Interpreter::dispatch_table(itos). 1.143 + * It should be saved/restored before/after Java calls. 1.144 + * 1.145 + */ 1.146 + enum call_stub_layout { 1.147 + RA_off = -1, 1.148 + FP_off = -2, 1.149 + BCP_off = -3, 1.150 + LVP_off = -4, 1.151 + TSR_off = -5, 1.152 + S1_off = -6, 1.153 + S3_off = -7, 1.154 + S4_off = -8, 1.155 + S5_off = -9, 1.156 + S6_off = -10, 1.157 + result_off = -11, 1.158 + result_type_off = -12, 1.159 + thread_off = -13, 1.160 + total_off = thread_off - 3, 1.161 + GP_off = -16, 1.162 + }; 1.163 + 1.164 + address generate_call_stub(address& return_address) { 1.165 + 1.166 + StubCodeMark mark(this, "StubRoutines", "call_stub"); 1.167 + address start = __ pc(); 1.168 + 1.169 + // same as in generate_catch_exception()! 1.170 + 1.171 + // stub code 1.172 + // save ra and fp 1.173 + __ sd(RA, SP, RA_off * wordSize); 1.174 + __ sd(FP, SP, FP_off * wordSize); 1.175 + __ sd(BCP, SP, BCP_off * wordSize); 1.176 + __ sd(LVP, SP, LVP_off * wordSize); 1.177 + __ sd(GP, SP, GP_off * wordSize); 1.178 + __ sd(TSR, SP, TSR_off * wordSize); 1.179 + __ sd(S1, SP, S1_off * wordSize); 1.180 + __ sd(S3, SP, S3_off * wordSize); 1.181 + __ sd(S4, SP, S4_off * wordSize); 1.182 + __ sd(S5, SP, S5_off * wordSize); 1.183 + __ sd(S6, SP, S6_off * wordSize); 1.184 + 1.185 + 1.186 + __ li48(GP, (long)Interpreter::dispatch_table(itos)); 1.187 + 1.188 + // I think 14 is the max gap between argument and callee saved register 1.189 + __ daddi(FP, SP, (-2) * wordSize); 1.190 + __ daddi(SP, SP, total_off * wordSize); 1.191 +//FIXME, aoqi. find a suitable place to save A1 & A2. 1.192 + /* 1.193 + __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize); 1.194 + __ sd(A1, FP, 3 * wordSize); 1.195 + __ sd(A2, FP, 4 * wordSize); 1.196 + __ sd(A3, FP, 5 * wordSize); 1.197 + __ sd(A4, FP, 6 * wordSize); 1.198 + __ sd(A5, FP, 7 * wordSize); 1.199 + __ sd(A6, FP, 8 * wordSize); 1.200 + __ sd(A7, FP, 9 * wordSize); 1.201 + */ 1.202 + __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize); 1.203 + __ sd(A1, FP, result_off * wordSize); 1.204 + __ sd(A2, FP, result_type_off * wordSize); 1.205 + __ sd(A7, FP, thread_off * wordSize); 1.206 + 1.207 +#ifdef OPT_THREAD 1.208 + //__ get_thread(TREG); 1.209 + __ move(TREG, A7); 1.210 + 1.211 + //__ ld(TREG, FP, thread_off * wordSize); 1.212 +#endif 1.213 + //add for compressedoops 1.214 + __ reinit_heapbase(); 1.215 + 1.216 +#ifdef ASSERT 1.217 + // make sure we have no pending exceptions 1.218 + { 1.219 + Label L; 1.220 + __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); 1.221 + __ beq(AT, R0, L); 1.222 + __ delayed()->nop(); 1.223 + /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ 1.224 + __ stop("StubRoutines::call_stub: entered with pending exception"); 1.225 + __ bind(L); 1.226 + } 1.227 +#endif 1.228 + 1.229 + // pass parameters if any 1.230 + // A5: parameter 1.231 + // A6: parameter_size 1.232 + // T0: parameter_size_tmp(--) 1.233 + // T2: offset(++) 1.234 + // T3: tmp 1.235 + Label parameters_done; 1.236 + // judge if the parameter_size equals 0 1.237 + __ beq(A6, R0, parameters_done); 1.238 + __ delayed()->nop(); 1.239 + __ dsll(AT, A6, Interpreter::logStackElementSize); 1.240 + __ dsub(SP, SP, AT); 1.241 + __ move(AT, -StackAlignmentInBytes); 1.242 + __ andr(SP, SP , AT); 1.243 + // Copy Java parameters in reverse order (receiver last) 1.244 + // Note that the argument order is inverted in the process 1.245 + // source is edx[ecx: N-1..0] 1.246 + // dest is esp[ebx: 0..N-1] 1.247 + Label loop; 1.248 + __ move(T0, A6); 1.249 + __ move(T2, R0); 1.250 + __ bind(loop); 1.251 + 1.252 + // get parameter 1.253 + __ dsll(T3, T0, LogBytesPerWord); 1.254 + __ dadd(T3, T3, A5); 1.255 + __ ld(AT, T3, -wordSize); 1.256 + __ dsll(T3, T2, LogBytesPerWord); 1.257 + __ dadd(T3, T3, SP); 1.258 + __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); 1.259 + __ daddi(T2, T2, 1); 1.260 + __ daddi(T0, T0, -1); 1.261 + __ bne(T0, R0, loop); 1.262 + __ delayed()->nop(); 1.263 + // advance to next parameter 1.264 + 1.265 + // call Java function 1.266 + __ bind(parameters_done); 1.267 + 1.268 + // receiver in V0, methodOop in Rmethod 1.269 + 1.270 + __ move(Rmethod, A3); 1.271 + __ move(Rsender, SP); //set sender sp 1.272 + __ jalr(A4); 1.273 + __ delayed()->nop(); 1.274 + return_address = __ pc(); 1.275 + 1.276 + Label common_return; 1.277 + __ bind(common_return); 1.278 + 1.279 + // store result depending on type 1.280 + // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) 1.281 + __ ld(T0, FP, result_off * wordSize); // result --> T0 1.282 + Label is_long, is_float, is_double, exit; 1.283 + __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 1.284 + __ daddi(T3, T2, (-1) * T_LONG); 1.285 + __ beq(T3, R0, is_long); 1.286 + __ delayed()->daddi(T3, T2, (-1) * T_FLOAT); 1.287 + __ beq(T3, R0, is_float); 1.288 + __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE); 1.289 + __ beq(T3, R0, is_double); 1.290 + __ delayed()->nop(); 1.291 + 1.292 + // handle T_INT case 1.293 + __ sd(V0, T0, 0 * wordSize); 1.294 + __ bind(exit); 1.295 + 1.296 + // restore 1.297 + __ daddi(SP, FP, 2 * wordSize ); 1.298 + __ ld(RA, SP, RA_off * wordSize); 1.299 + __ ld(FP, SP, FP_off * wordSize); 1.300 + __ ld(BCP, SP, BCP_off * wordSize); 1.301 + __ ld(LVP, SP, LVP_off * wordSize); 1.302 + __ ld(GP, SP, GP_off * wordSize); 1.303 + __ ld(TSR, SP, TSR_off * wordSize); 1.304 + 1.305 + __ ld(S1, SP, S1_off * wordSize); 1.306 + __ ld(S3, SP, S3_off * wordSize); 1.307 + __ ld(S4, SP, S4_off * wordSize); 1.308 + __ ld(S5, SP, S5_off * wordSize); 1.309 + __ ld(S6, SP, S6_off * wordSize); 1.310 + 1.311 + // return 1.312 + __ jr(RA); 1.313 + __ delayed()->nop(); 1.314 + 1.315 + // handle return types different from T_INT 1.316 + __ bind(is_long); 1.317 + __ sd(V0, T0, 0 * wordSize); 1.318 + //__ sd(V1, T0, 1 * wordSize); 1.319 + __ sd(R0, T0, 1 * wordSize); 1.320 + __ b(exit); 1.321 + __ delayed()->nop(); 1.322 + 1.323 + __ bind(is_float); 1.324 + __ swc1(F0, T0, 0 * wordSize); 1.325 + __ b(exit); 1.326 + __ delayed()->nop(); 1.327 + 1.328 + __ bind(is_double); 1.329 + __ sdc1(F0, T0, 0 * wordSize); 1.330 + //__ sdc1(F1, T0, 1 * wordSize); 1.331 + __ sd(R0, T0, 1 * wordSize); 1.332 + __ b(exit); 1.333 + __ delayed()->nop(); 1.334 + //FIXME, 1.6 mips version add operation of fpu here 1.335 + StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); 1.336 + __ b(common_return); 1.337 + __ delayed()->nop(); 1.338 + return start; 1.339 + } 1.340 + 1.341 + // Return point for a Java call if there's an exception thrown in 1.342 + // Java code. The exception is caught and transformed into a 1.343 + // pending exception stored in JavaThread that can be tested from 1.344 + // within the VM. 1.345 + // 1.346 + // Note: Usually the parameters are removed by the callee. In case 1.347 + // of an exception crossing an activation frame boundary, that is 1.348 + // not the case if the callee is compiled code => need to setup the 1.349 + // rsp. 1.350 + // 1.351 + // rax: exception oop 1.352 + 1.353 + address generate_catch_exception() { 1.354 + StubCodeMark mark(this, "StubRoutines", "catch_exception"); 1.355 + address start = __ pc(); 1.356 + 1.357 + Register thread = TREG; 1.358 + 1.359 + // get thread directly 1.360 +#ifndef OPT_THREAD 1.361 + __ ld(thread, FP, thread_off * wordSize); 1.362 +#endif 1.363 + 1.364 +#ifdef ASSERT 1.365 + // verify that threads correspond 1.366 + { Label L; 1.367 + __ get_thread(T8); 1.368 + __ beq(T8, thread, L); 1.369 + __ delayed()->nop(); 1.370 + __ stop("StubRoutines::catch_exception: threads must correspond"); 1.371 + __ bind(L); 1.372 + } 1.373 +#endif 1.374 + // set pending exception 1.375 + __ verify_oop(V0); 1.376 + __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); 1.377 + __ li(AT, (long)__FILE__); 1.378 + __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); 1.379 + __ li(AT, (long)__LINE__); 1.380 + __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); 1.381 + 1.382 + // complete return to VM 1.383 + assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); 1.384 + __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); 1.385 + __ delayed()->nop(); 1.386 + 1.387 + return start; 1.388 + } 1.389 + 1.390 + // Continuation point for runtime calls returning with a pending 1.391 + // exception. The pending exception check happened in the runtime 1.392 + // or native call stub. The pending exception in Thread is 1.393 + // converted into a Java-level exception. 1.394 + // 1.395 + // Contract with Java-level exception handlers: 1.396 + // rax: exception 1.397 + // rdx: throwing pc 1.398 + // 1.399 + // NOTE: At entry of this stub, exception-pc must be on stack !! 1.400 + 1.401 + address generate_forward_exception() { 1.402 + StubCodeMark mark(this, "StubRoutines", "forward exception"); 1.403 + //Register thread = TREG; 1.404 + Register thread = TREG; 1.405 + address start = __ pc(); 1.406 + 1.407 + // Upon entry, the sp points to the return address returning into Java 1.408 + // (interpreted or compiled) code; i.e., the return address becomes the 1.409 + // throwing pc. 1.410 + // 1.411 + // Arguments pushed before the runtime call are still on the stack but 1.412 + // the exception handler will reset the stack pointer -> ignore them. 1.413 + // A potential result in registers can be ignored as well. 1.414 + 1.415 +#ifdef ASSERT 1.416 + // make sure this code is only executed if there is a pending exception 1.417 +#ifndef OPT_THREAD 1.418 + __ get_thread(thread); 1.419 +#endif 1.420 + { Label L; 1.421 + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.422 + __ bne(AT, R0, L); 1.423 + __ delayed()->nop(); 1.424 + __ stop("StubRoutines::forward exception: no pending exception (1)"); 1.425 + __ bind(L); 1.426 + } 1.427 +#endif 1.428 + 1.429 + // compute exception handler into T9 1.430 + __ ld(A1, SP, 0); 1.431 + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); 1.432 + __ move(T9, V0); 1.433 + __ pop(V1); 1.434 + 1.435 +#ifndef OPT_THREAD 1.436 + __ get_thread(thread); 1.437 +#endif 1.438 + __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); 1.439 + __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); 1.440 + 1.441 +#ifdef ASSERT 1.442 + // make sure exception is set 1.443 + { Label L; 1.444 + __ bne(V0, R0, L); 1.445 + __ delayed()->nop(); 1.446 + __ stop("StubRoutines::forward exception: no pending exception (2)"); 1.447 + __ bind(L); 1.448 + } 1.449 +#endif 1.450 + 1.451 + // continue at exception handler (return address removed) 1.452 + // V0: exception 1.453 + // T9: exception handler 1.454 + // V1: throwing pc 1.455 + __ verify_oop(V0); 1.456 + __ jr(T9); 1.457 + __ delayed()->nop(); 1.458 + 1.459 + return start; 1.460 + } 1.461 + 1.462 + // Support for intptr_t get_previous_fp() 1.463 + // 1.464 + // This routine is used to find the previous frame pointer for the 1.465 + // caller (current_frame_guess). This is used as part of debugging 1.466 + // ps() is seemingly lost trying to find frames. 1.467 + // This code assumes that caller current_frame_guess) has a frame. 1.468 + address generate_get_previous_fp() { 1.469 + StubCodeMark mark(this, "StubRoutines", "get_previous_fp"); 1.470 + const Address old_fp (FP, 0); 1.471 + const Address older_fp (V0, 0); 1.472 + address start = __ pc(); 1.473 + __ enter(); 1.474 + __ lw(V0, old_fp); // callers fp 1.475 + __ lw(V0, older_fp); // the frame for ps() 1.476 + __ leave(); 1.477 + __ jr(RA); 1.478 + __ delayed()->nop(); 1.479 + return start; 1.480 + } 1.481 + // The following routine generates a subroutine to throw an 1.482 + // asynchronous UnknownError when an unsafe access gets a fault that 1.483 + // could not be reasonably prevented by the programmer. (Example: 1.484 + // SIGBUS/OBJERR.) 1.485 + address generate_handler_for_unsafe_access() { 1.486 + StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); 1.487 + address start = __ pc(); 1.488 + __ pushad(); // push registers 1.489 + // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord); 1.490 + __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type); 1.491 + __ delayed()->nop(); 1.492 + __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord); 1.493 + __ popad(); 1.494 + __ jr(RA); 1.495 + __ delayed()->nop(); 1.496 + return start; 1.497 + } 1.498 + 1.499 + // Non-destructive plausibility checks for oops 1.500 + // 1.501 + // Arguments: 1.502 + // all args on stack! 1.503 + // 1.504 + // Stack after saving c_rarg3: 1.505 + // [tos + 0]: saved c_rarg3 1.506 + // [tos + 1]: saved c_rarg2 1.507 + // [tos + 2]: saved r12 (several TemplateTable methods use it) 1.508 + // [tos + 3]: saved flags 1.509 + // [tos + 4]: return address 1.510 + // * [tos + 5]: error message (char*) 1.511 + // * [tos + 6]: object to verify (oop) 1.512 + // * [tos + 7]: saved rax - saved by caller and bashed 1.513 + // * = popped on exit 1.514 + address generate_verify_oop() { 1.515 + StubCodeMark mark(this, "StubRoutines", "verify_oop"); 1.516 + address start = __ pc(); 1.517 + __ reinit_heapbase(); 1.518 + __ verify_oop_subroutine(); 1.519 + address end = __ pc(); 1.520 + return start; 1.521 + } 1.522 + 1.523 + // 1.524 + // Generate overlap test for array copy stubs 1.525 + // 1.526 + // Input: 1.527 + // A0 - array1 1.528 + // A1 - array2 1.529 + // A2 - element count 1.530 + // 1.531 + // Note: this code can only use %eax, %ecx, and %edx 1.532 + // 1.533 + 1.534 + // use T9 as temp 1.535 + void array_overlap_test(address no_overlap_target, int log2_elem_size) { 1.536 + int elem_size = 1 << log2_elem_size; 1.537 + Address::ScaleFactor sf = Address::times_1; 1.538 + 1.539 + switch (log2_elem_size) { 1.540 + case 0: sf = Address::times_1; break; 1.541 + case 1: sf = Address::times_2; break; 1.542 + case 2: sf = Address::times_4; break; 1.543 + case 3: sf = Address::times_8; break; 1.544 + } 1.545 + 1.546 + __ dsll(AT, A2, sf); 1.547 + __ dadd(AT, AT, A0); 1.548 + __ lea(T9, Address(AT, -elem_size)); 1.549 + __ dsub(AT, A1, A0); 1.550 + __ blez(AT, no_overlap_target); 1.551 + __ delayed()->nop(); 1.552 + __ dsub(AT, A1, T9); 1.553 + __ bgtz(AT, no_overlap_target); 1.554 + __ delayed()->nop(); 1.555 + 1.556 + } 1.557 + 1.558 + // 1.559 + // Generate store check for array 1.560 + // 1.561 + // Input: 1.562 + // %edi - starting address 1.563 + // %ecx - element count 1.564 + // 1.565 + // The 2 input registers are overwritten 1.566 + // 1.567 + 1.568 + // 1.569 + // Generate store check for array 1.570 + // 1.571 + // Input: 1.572 + // T0 - starting address(edi) 1.573 + // T1 - element count (ecx) 1.574 + // 1.575 + // The 2 input registers are overwritten 1.576 + // 1.577 + 1.578 +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) 1.579 + 1.580 + void array_store_check() { 1.581 + BarrierSet* bs = Universe::heap()->barrier_set(); 1.582 + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 1.583 + CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1.584 + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1.585 + Label l_0; 1.586 + 1.587 + __ dsll(AT, T1, TIMES_OOP); 1.588 + __ dadd(AT, T0, AT); 1.589 + __ daddiu(T1, AT, - BytesPerHeapOop); 1.590 + 1.591 + __ shr(T0, CardTableModRefBS::card_shift); 1.592 + __ shr(T1, CardTableModRefBS::card_shift); 1.593 + 1.594 + __ dsub(T1, T1, T0); // end --> cards count 1.595 + __ bind(l_0); 1.596 + 1.597 + __ li48(AT, (long)ct->byte_map_base); 1.598 + __ dadd(AT, AT, T0); 1.599 + __ dadd(AT, AT, T1); 1.600 + __ sb(R0, AT, 0); 1.601 + //__ daddi(T1, T1, -4); 1.602 + __ daddi(T1, T1, - 1); 1.603 + __ bgez(T1, l_0); 1.604 + __ delayed()->nop(); 1.605 + } 1.606 + 1.607 + // Arguments: 1.608 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.609 + // ignored 1.610 + // name - stub name string 1.611 + // 1.612 + // Inputs: 1.613 + // c_rarg0 - source array address 1.614 + // c_rarg1 - destination array address 1.615 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.616 + // 1.617 + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1.618 + // we let the hardware handle it. The one to eight bytes within words, 1.619 + // dwords or qwords that span cache line boundaries will still be loaded 1.620 + // and stored atomically. 1.621 + // 1.622 + // Side Effects: 1.623 + // disjoint_byte_copy_entry is set to the no-overlap entry point 1.624 + // used by generate_conjoint_byte_copy(). 1.625 + // 1.626 + address generate_disjoint_byte_copy(bool aligned, const char *name) { 1.627 + StubCodeMark mark(this, "StubRoutines", name); 1.628 + __ align(CodeEntryAlignment); 1.629 + address start = __ pc(); 1.630 + Label l_0, l_1, l_2, l_3, l_4, l_5, l_6; 1.631 + 1.632 + __ push(T3); 1.633 + __ push(T0); 1.634 + __ push(T1); 1.635 + __ push(T8); 1.636 + __ move(T3, A0); 1.637 + __ move(T0, A1); 1.638 + __ move(T1, A2); 1.639 + __ move(T8, T1); // original count in T1 1.640 + __ daddi(AT, T1, -3); 1.641 + __ blez(AT, l_4); 1.642 + __ delayed()->nop(); 1.643 + if (!aligned) { 1.644 + // align source address at dword address boundary 1.645 + __ move(T1, 4); 1.646 + __ sub(T1, T1, T3); 1.647 + __ andi(T1, T1, 3); 1.648 + __ beq(T1, R0, l_1); 1.649 + __ delayed()->nop(); 1.650 + __ sub(T8,T8,T1); 1.651 + __ bind(l_0); 1.652 + __ lb(AT, T3, 0); 1.653 + __ sb(AT, T0, 0); 1.654 + __ addi(T3, T3, 1); 1.655 + __ addi(T0, T0, 1); 1.656 + __ addi(T1 ,T1, -1); 1.657 + __ bne(T1, R0, l_0); 1.658 + __ delayed()->nop(); 1.659 + __ bind(l_1); 1.660 + __ move(T1, T8); 1.661 + } 1.662 + __ shr(T1, 2); 1.663 + __ beq(T1, R0, l_4); // no dwords to move 1.664 + __ delayed()->nop(); 1.665 + // copy aligned dwords 1.666 + __ bind(l_2); 1.667 + __ align(16); 1.668 + __ bind(l_3); 1.669 + __ lw(AT, T3, 0); 1.670 + __ sw(AT, T0, 0 ); 1.671 + __ addi(T3, T3, 4); 1.672 + __ addi(T0, T0, 4); 1.673 + __ addi(T1, T1, -1); 1.674 + __ bne(T1, R0, l_3); 1.675 + __ delayed()->nop(); 1.676 + __ bind(l_4); 1.677 + __ move(T1, T8); 1.678 + __ andi(T1, T1, 3); 1.679 + __ beq(T1, R0, l_6); 1.680 + __ delayed()->nop(); 1.681 + // copy suffix 1.682 + __ bind(l_5); 1.683 + __ lb(AT, T3, 0); 1.684 + __ sb(AT, T0, 0); 1.685 + __ addi(T3, T3, 1); 1.686 + __ addi(T0, T0, 1); 1.687 + __ addi(T1, T1, -1); 1.688 + __ bne(T1, R0, l_5 ); 1.689 + __ delayed()->nop(); 1.690 + __ bind(l_6); 1.691 + __ pop(T8); 1.692 + __ pop(T1); 1.693 + __ pop(T0); 1.694 + __ pop(T3); 1.695 + __ jr(RA); 1.696 + __ delayed()->nop(); 1.697 + return start; 1.698 + } 1.699 + 1.700 + // Arguments: 1.701 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.702 + // ignored 1.703 + // name - stub name string 1.704 + // 1.705 + // Inputs: 1.706 + // c_rarg0 - source array address 1.707 + // c_rarg1 - destination array address 1.708 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.709 + // 1.710 + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1.711 + // we let the hardware handle it. The one to eight bytes within words, 1.712 + // dwords or qwords that span cache line boundaries will still be loaded 1.713 + // and stored atomically. 1.714 + // 1.715 + address generate_conjoint_byte_copy(bool aligned, const char *name) { 1.716 + Label l_1, l_2, l_3, l_4, l_5; 1.717 + StubCodeMark mark(this, "StubRoutines", name); 1.718 + __ align(CodeEntryAlignment); 1.719 + address start = __ pc(); 1.720 + address nooverlap_target = aligned ? 1.721 + StubRoutines::arrayof_jbyte_disjoint_arraycopy() : 1.722 + StubRoutines::jbyte_disjoint_arraycopy(); 1.723 + 1.724 + array_overlap_test(nooverlap_target, 0); 1.725 + 1.726 + __ push(T3); 1.727 + __ push(T0); 1.728 + __ push(T1); 1.729 + __ push(T8); 1.730 + 1.731 + 1.732 + // copy from high to low 1.733 + __ move(T3, A0); 1.734 + __ move(T0, A1); 1.735 + __ move(T1, A2); 1.736 + __ dadd(AT, T3, T1); 1.737 + __ lea(T3, Address(AT, -4)); 1.738 + __ dadd(AT, T0, T1); 1.739 + __ lea(T0, Address(AT, -4)); 1.740 + __ move(T8, T1); 1.741 + __ daddi(AT, T1, -3); 1.742 + __ blez(AT, l_3); 1.743 + __ delayed()->nop(); 1.744 + __ dsrl(T1, T1, 2); 1.745 + __ align(16); 1.746 + __ bind(l_1); 1.747 + __ lw(AT, T3, 0); 1.748 + __ sw(AT, T0, 0); 1.749 + __ addi(T3, T3, -4); 1.750 + __ addi(T0, T0, -4); 1.751 + __ addi(T1, T1, -1); 1.752 + __ bne(T1, R0, l_1); 1.753 + __ delayed()->nop(); 1.754 + __ b(l_3); 1.755 + __ delayed()->nop(); 1.756 + // copy dwords aligned or not with repeat move 1.757 + __ bind(l_2); 1.758 + __ bind(l_3); 1.759 + // copy suffix (0-3 bytes) 1.760 + __ andi(T8, T8, 3); 1.761 + __ beq(T8, R0, l_5); 1.762 + __ delayed()->nop(); 1.763 + __ addi(T3, T3, 3); 1.764 + __ addi(T0, T0, 3); 1.765 + __ bind(l_4); 1.766 + __ lb(AT, T3, 0); 1.767 + __ sb(AT, T0, 0); 1.768 + __ addi(T3, T3, -1); 1.769 + __ addi(T0, T0, -1); 1.770 + __ addi(T8, T8, -1); 1.771 + __ bne(T8, R0, l_4); 1.772 + __ delayed()->nop(); 1.773 + __ bind(l_5); 1.774 + __ pop(T8); 1.775 + __ pop(T1); 1.776 + __ pop(T0); 1.777 + __ pop(T3); 1.778 + __ jr(RA); 1.779 + __ delayed()->nop(); 1.780 + return start; 1.781 + } 1.782 + 1.783 + // Arguments: 1.784 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.785 + // ignored 1.786 + // name - stub name string 1.787 + // 1.788 + // Inputs: 1.789 + // c_rarg0 - source array address 1.790 + // c_rarg1 - destination array address 1.791 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.792 + // 1.793 + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1.794 + // let the hardware handle it. The two or four words within dwords 1.795 + // or qwords that span cache line boundaries will still be loaded 1.796 + // and stored atomically. 1.797 + // 1.798 + // Side Effects: 1.799 + // disjoint_short_copy_entry is set to the no-overlap entry point 1.800 + // used by generate_conjoint_short_copy(). 1.801 + // 1.802 + address generate_disjoint_short_copy(bool aligned, const char *name) { 1.803 + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8; 1.804 + StubCodeMark mark(this, "StubRoutines", name); 1.805 + __ align(CodeEntryAlignment); 1.806 + address start = __ pc(); 1.807 + 1.808 + __ push(T3); 1.809 + __ push(T0); 1.810 + __ push(T1); 1.811 + __ push(T8); 1.812 + __ move(T1, A2); 1.813 + __ move(T3, A0); 1.814 + __ move(T0, A1); 1.815 + 1.816 + if (!aligned) { 1.817 + __ beq(T1, R0, l_5); 1.818 + __ delayed()->nop(); 1.819 + // align source address at dword address boundary 1.820 + __ move(T8, T3); // original from 1.821 + __ andi(T8, T8, 3); // either 0 or 2 1.822 + __ beq(T8, R0, l_1); // no prefix 1.823 + __ delayed()->nop(); 1.824 + // copy prefix 1.825 + __ lh(AT, T3, 0); 1.826 + __ sh(AT, T0, 0); 1.827 + __ add(T3, T3, T8); 1.828 + __ add(T0, T0, T8); 1.829 + __ addi(T1, T1, -1); 1.830 + __ bind(l_1); 1.831 + } 1.832 + __ move(T8, T1); // word count less prefix 1.833 + __ sra(T1, T1, 1); 1.834 + __ beq(T1, R0, l_4); 1.835 + __ delayed()->nop(); 1.836 + // copy aligned dwords 1.837 + __ bind(l_2); 1.838 + __ align(16); 1.839 + __ bind(l_3); 1.840 + __ lw(AT, T3, 0); 1.841 + __ sw(AT, T0, 0 ); 1.842 + __ addi(T3, T3, 4); 1.843 + __ addi(T0, T0, 4); 1.844 + __ addi(T1, T1, -1); 1.845 + __ bne(T1, R0, l_3); 1.846 + __ delayed()->nop(); 1.847 + __ bind(l_4); 1.848 + __ andi(T8, T8, 1); 1.849 + __ beq(T8, R0, l_5); 1.850 + __ delayed()->nop(); 1.851 + // copy suffix 1.852 + __ lh(AT, T3, 0); 1.853 + __ sh(AT, T0, 0); 1.854 + __ bind(l_5); 1.855 + __ pop(T8); 1.856 + __ pop(T1); 1.857 + __ pop(T0); 1.858 + __ pop(T3); 1.859 + __ jr(RA); 1.860 + __ delayed()->nop(); 1.861 + return start; 1.862 + } 1.863 + 1.864 + // Arguments: 1.865 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.866 + // ignored 1.867 + // name - stub name string 1.868 + // 1.869 + // Inputs: 1.870 + // c_rarg0 - source array address 1.871 + // c_rarg1 - destination array address 1.872 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.873 + // 1.874 + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1.875 + // let the hardware handle it. The two or four words within dwords 1.876 + // or qwords that span cache line boundaries will still be loaded 1.877 + // and stored atomically. 1.878 + // 1.879 + address generate_conjoint_short_copy(bool aligned, const char *name) { 1.880 + Label l_1, l_2, l_3, l_4, l_5; 1.881 + StubCodeMark mark(this, "StubRoutines", name); 1.882 + __ align(CodeEntryAlignment); 1.883 + address start = __ pc(); 1.884 + address nooverlap_target = aligned ? 1.885 + StubRoutines::arrayof_jshort_disjoint_arraycopy() : 1.886 + StubRoutines::jshort_disjoint_arraycopy(); 1.887 + 1.888 + array_overlap_test(nooverlap_target, 1); 1.889 + 1.890 + __ push(T3); 1.891 + __ push(T0); 1.892 + __ push(T1); 1.893 + __ push(T8); 1.894 + 1.895 + /* 1.896 + __ pushl(esi); 1.897 + __ movl(ecx, Address(esp, 4+12)); // count 1.898 + __ pushl(edi); 1.899 + __ movl(esi, Address(esp, 8+ 4)); // from 1.900 + __ movl(edi, Address(esp, 8+ 8)); // to 1.901 + */ 1.902 + __ move(T1, A2); 1.903 + __ move(T3, A0); 1.904 + __ move(T0, A1); 1.905 + 1.906 + 1.907 + // copy dwords from high to low 1.908 + // __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4 1.909 + __ sll(AT, T1, Address::times_2); 1.910 + __ add(AT, T3, AT); 1.911 + __ lea(T3, Address( AT, -4)); 1.912 + //__ std(); 1.913 + //__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4 1.914 + __ sll(AT,T1 , Address::times_2); 1.915 + __ add(AT, T0, AT); 1.916 + __ lea(T0, Address( AT, -4)); 1.917 + // __ movl(eax, ecx); 1.918 + __ move(T8, T1); 1.919 + __ bind(l_1); 1.920 + // __ sarl(ecx, 1); // dword count 1.921 + __ sra(T1,T1, 1); 1.922 + //__ jcc(Assembler::equal, l_4); // no dwords to move 1.923 + __ beq(T1, R0, l_4); 1.924 + __ delayed()->nop(); 1.925 + /* __ cmpl(ecx, 32); 1.926 + __ jcc(Assembler::above, l_3); // > 32 dwords 1.927 + // copy dwords with loop 1.928 + __ subl(edi, esi); 1.929 + */ __ align(16); 1.930 + __ bind(l_2); 1.931 + //__ movl(edx, Address(esi)); 1.932 + __ lw(AT, T3, 0); 1.933 + //__ movl(Address(edi, esi, Address::times_1), edx); 1.934 + __ sw(AT, T0, 0); 1.935 + //__ subl(esi, 4); 1.936 + __ addi(T3, T3, -4); 1.937 + __ addi(T0, T0, -4); 1.938 + //__ decl(ecx); 1.939 + __ addi(T1, T1, -1); 1.940 + // __ jcc(Assembler::notEqual, l_2); 1.941 + __ bne(T1, R0, l_2); 1.942 + __ delayed()->nop(); 1.943 + // __ addl(edi, esi); 1.944 + // __ jmp(l_4); 1.945 + __ b(l_4); 1.946 + __ delayed()->nop(); 1.947 + // copy dwords with repeat move 1.948 + __ bind(l_3); 1.949 + // __ rep_movl(); 1.950 + __ bind(l_4); 1.951 + // __ andl(eax, 1); // suffix count 1.952 + __ andi(T8, T8, 1); // suffix count 1.953 + //__ jcc(Assembler::equal, l_5); // no suffix 1.954 + __ beq(T8, R0, l_5 ); 1.955 + __ delayed()->nop(); 1.956 + // copy suffix 1.957 + // __ movw(edx, Address(esi, 2)); 1.958 + __ lh(AT, T3, 2); 1.959 + // __ movw(Address(edi, 2), edx); 1.960 + __ sh(AT, T0, 2); 1.961 + __ bind(l_5); 1.962 + // __ cld(); 1.963 + // __ popl(edi); 1.964 + // __ popl(esi); 1.965 + // __ ret(0); 1.966 + __ pop(T8); 1.967 + __ pop(T1); 1.968 + __ pop(T0); 1.969 + __ pop(T3); 1.970 + __ jr(RA); 1.971 + __ delayed()->nop(); 1.972 + return start; 1.973 + } 1.974 + 1.975 + // Arguments: 1.976 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.977 + // ignored 1.978 + // is_oop - true => oop array, so generate store check code 1.979 + // name - stub name string 1.980 + // 1.981 + // Inputs: 1.982 + // c_rarg0 - source array address 1.983 + // c_rarg1 - destination array address 1.984 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.985 + // 1.986 + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1.987 + // the hardware handle it. The two dwords within qwords that span 1.988 + // cache line boundaries will still be loaded and stored atomicly. 1.989 + // 1.990 + // Side Effects: 1.991 + // disjoint_int_copy_entry is set to the no-overlap entry point 1.992 + // used by generate_conjoint_int_oop_copy(). 1.993 + // 1.994 + address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { 1.995 + Label l_2, l_3, l_4, l_stchk; 1.996 + StubCodeMark mark(this, "StubRoutines", name); 1.997 + __ align(CodeEntryAlignment); 1.998 + address start = __ pc(); 1.999 + /* 1.1000 + __ pushl(esi); 1.1001 + __ movl(ecx, Address(esp, 4+12)); // count 1.1002 + __ pushl(edi); 1.1003 + __ movl(esi, Address(esp, 8+ 4)); // from 1.1004 + __ movl(edi, Address(esp, 8+ 8)); // to 1.1005 + */ 1.1006 + __ push(T3); 1.1007 + __ push(T0); 1.1008 + __ push(T1); 1.1009 + __ push(T8); 1.1010 + __ move(T1, A2); 1.1011 + __ move(T3, A0); 1.1012 + __ move(T0, A1); 1.1013 + 1.1014 + // __ cmpl(ecx, 32); 1.1015 + // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords 1.1016 + // __ rep_movl(); 1.1017 + __ b(l_2); 1.1018 + __ delayed()->nop(); 1.1019 + if (is_oop) { 1.1020 + // __ jmp(l_stchk); 1.1021 + __ b(l_stchk); 1.1022 + __ delayed()->nop(); 1.1023 + } 1.1024 + // __ popl(edi); 1.1025 + // __ popl(esi); 1.1026 + // __ ret(0); 1.1027 + __ pop(T8); 1.1028 + __ pop(T1); 1.1029 + __ pop(T0); 1.1030 + __ pop(T3); 1.1031 + __ jr(RA); 1.1032 + __ delayed()->nop(); 1.1033 + 1.1034 + __ bind(l_2); 1.1035 + // __ subl(edi, esi); 1.1036 + // __ testl(ecx, ecx); 1.1037 + // __ jcc(Assembler::zero, l_4); 1.1038 + __ beq(T1, R0, l_4); 1.1039 + __ delayed()->nop(); 1.1040 + __ align(16); 1.1041 + __ bind(l_3); 1.1042 + //__ movl(edx, Address(esi)); 1.1043 + __ lw(AT, T3, 0); 1.1044 + // __ movl(Address(edi, esi, Address::times_1), edx); 1.1045 + __ sw(AT, T0, 0); 1.1046 + // __ addl(esi, 4); 1.1047 + __ addi(T3, T3, 4); 1.1048 + __ addi(T0, T0, 4); 1.1049 + // __ decl(ecx); 1.1050 + __ addi(T1, T1, -1); 1.1051 + // __ jcc(Assembler::notEqual, l_3); 1.1052 + __ bne(T1, R0, l_3); 1.1053 + __ delayed()->nop(); 1.1054 + if (is_oop) { 1.1055 + __ bind(l_stchk); 1.1056 + // __ movl(edi, Address(esp, 8+ 8)); 1.1057 + // __ movl(ecx, Address(esp, 8+ 12)); 1.1058 + __ move(T0, A1); 1.1059 + __ move(T1, A2); 1.1060 + array_store_check(); 1.1061 + } 1.1062 + __ bind(l_4); 1.1063 + // __ popl(edi); 1.1064 + // __ popl(esi); 1.1065 + // __ ret(0); 1.1066 + __ pop(T8); 1.1067 + __ pop(T1); 1.1068 + __ pop(T0); 1.1069 + __ pop(T3); 1.1070 + __ jr(RA); 1.1071 + __ delayed()->nop(); 1.1072 + return start; 1.1073 + } 1.1074 + 1.1075 + // Arguments: 1.1076 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.1077 + // ignored 1.1078 + // is_oop - true => oop array, so generate store check code 1.1079 + // name - stub name string 1.1080 + // 1.1081 + // Inputs: 1.1082 + // c_rarg0 - source array address 1.1083 + // c_rarg1 - destination array address 1.1084 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.1085 + // 1.1086 + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1.1087 + // the hardware handle it. The two dwords within qwords that span 1.1088 + // cache line boundaries will still be loaded and stored atomicly. 1.1089 + // 1.1090 + address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { 1.1091 + Label l_2, l_3, l_4, l_stchk; 1.1092 + StubCodeMark mark(this, "StubRoutines", name); 1.1093 + __ align(CodeEntryAlignment); 1.1094 + address start = __ pc(); 1.1095 + address nooverlap_target; 1.1096 + 1.1097 + if (is_oop) { 1.1098 + nooverlap_target = aligned ? 1.1099 + StubRoutines::arrayof_oop_disjoint_arraycopy() : 1.1100 + StubRoutines::oop_disjoint_arraycopy(); 1.1101 + }else { 1.1102 + nooverlap_target = aligned ? 1.1103 + StubRoutines::arrayof_jint_disjoint_arraycopy() : 1.1104 + StubRoutines::jint_disjoint_arraycopy(); 1.1105 + } 1.1106 + 1.1107 + array_overlap_test(nooverlap_target, 2); 1.1108 + 1.1109 + __ push(T3); 1.1110 + __ push(T0); 1.1111 + __ push(T1); 1.1112 + __ push(T8); 1.1113 + 1.1114 + /* 1.1115 + __ pushl(esi); 1.1116 + __ movl(ecx, Address(esp, 4+12)); // count 1.1117 + __ pushl(edi); 1.1118 + __ movl(esi, Address(esp, 8+ 4)); // from 1.1119 + __ movl(edi, Address(esp, 8+ 8)); // to 1.1120 + */ 1.1121 + __ move(T1, A2); 1.1122 + __ move(T3, A0); 1.1123 + __ move(T0, A1); 1.1124 + 1.1125 + //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 1.1126 + __ sll(AT, T1, Address::times_4); 1.1127 + __ add(AT, T3, AT); 1.1128 + __ lea(T3 , Address(AT, -4)); 1.1129 + //__ std(); 1.1130 + //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 1.1131 + __ sll(AT, T1, Address::times_4); 1.1132 + __ add(AT, T0, AT); 1.1133 + __ lea(T0 , Address(AT, -4)); 1.1134 + 1.1135 + // __ cmpl(ecx, 32); 1.1136 + // __ jcc(Assembler::above, l_3); // > 32 dwords 1.1137 + // __ testl(ecx, ecx); 1.1138 + //__ jcc(Assembler::zero, l_4); 1.1139 + __ beq(T1, R0, l_4); 1.1140 + __ delayed()->nop(); 1.1141 + // __ subl(edi, esi); 1.1142 + __ align(16); 1.1143 + __ bind(l_2); 1.1144 + // __ movl(edx, Address(esi)); 1.1145 + __ lw(AT, T3, 0); 1.1146 + // __ movl(Address(esi, edi, Address::times_1), edx); 1.1147 + __ sw(AT, T0, 0); 1.1148 + // __ subl(esi, 4); 1.1149 + __ addi(T3, T3, -4); 1.1150 + __ addi(T0, T0, -4); 1.1151 + // __ decl(ecx); 1.1152 + __ addi(T1, T1, -1); 1.1153 + //__ jcc(Assembler::notEqual, l_2); 1.1154 + __ bne(T1, R0, l_2); 1.1155 + __ delayed()->nop(); 1.1156 + if (is_oop) { 1.1157 + // __ jmp(l_stchk); 1.1158 + __ b( l_stchk); 1.1159 + __ delayed()->nop(); 1.1160 + } 1.1161 + __ bind(l_4); 1.1162 + // __ cld(); 1.1163 + // __ popl(edi); 1.1164 + // __ popl(esi); 1.1165 + // __ ret(0); 1.1166 + __ pop(T8); 1.1167 + __ pop(T1); 1.1168 + __ pop(T0); 1.1169 + __ pop(T3); 1.1170 + __ jr(RA); 1.1171 + __ delayed()->nop(); 1.1172 + __ bind(l_3); 1.1173 + // __ rep_movl(); 1.1174 + if (is_oop) { 1.1175 + __ bind(l_stchk); 1.1176 + // __ movl(edi, Address(esp, 8+ 8)); 1.1177 + __ move(T0, A1); 1.1178 + // __ movl(ecx, Address(esp, 8+ 12)); 1.1179 + __ move(T1, A2); 1.1180 + array_store_check(); 1.1181 + } 1.1182 + // __ cld(); 1.1183 + // __ popl(edi); 1.1184 + // __ popl(esi); 1.1185 + // __ ret(0); 1.1186 + __ pop(T8); 1.1187 + __ pop(T1); 1.1188 + __ pop(T0); 1.1189 + __ pop(T3); 1.1190 + __ jr(RA); 1.1191 + __ delayed()->nop(); 1.1192 + return start; 1.1193 + } 1.1194 + 1.1195 + // Arguments: 1.1196 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.1197 + // ignored 1.1198 + // is_oop - true => oop array, so generate store check code 1.1199 + // name - stub name string 1.1200 + // 1.1201 + // Inputs: 1.1202 + // c_rarg0 - source array address 1.1203 + // c_rarg1 - destination array address 1.1204 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.1205 + // 1.1206 + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1.1207 + // the hardware handle it. The two dwords within qwords that span 1.1208 + // cache line boundaries will still be loaded and stored atomicly. 1.1209 + // 1.1210 + // Side Effects: 1.1211 + // disjoint_int_copy_entry is set to the no-overlap entry point 1.1212 + // used by generate_conjoint_int_oop_copy(). 1.1213 + // 1.1214 + address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 1.1215 + Label l_2, l_3, l_4, l_stchk; 1.1216 + StubCodeMark mark(this, "StubRoutines", name); 1.1217 + __ align(CodeEntryAlignment); 1.1218 + address start = __ pc(); 1.1219 + __ push(T3); 1.1220 + __ push(T0); 1.1221 + __ push(T1); 1.1222 + __ push(T8); 1.1223 + __ move(T1, A2); 1.1224 + __ move(T3, A0); 1.1225 + __ move(T0, A1); 1.1226 + 1.1227 + // __ cmpl(ecx, 32); 1.1228 + // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords 1.1229 + // __ rep_movl(); 1.1230 + __ b(l_2); 1.1231 + __ delayed()->nop(); 1.1232 + if (is_oop) { 1.1233 + // __ jmp(l_stchk); 1.1234 + __ b(l_stchk); 1.1235 + __ delayed()->nop(); 1.1236 + } 1.1237 + // __ popl(edi); 1.1238 + // __ popl(esi); 1.1239 + // __ ret(0); 1.1240 + __ pop(T8); 1.1241 + __ pop(T1); 1.1242 + __ pop(T0); 1.1243 + __ pop(T3); 1.1244 + __ jr(RA); 1.1245 + __ delayed()->nop(); 1.1246 + 1.1247 + __ bind(l_2); 1.1248 + // __ subl(edi, esi); 1.1249 + // __ testl(ecx, ecx); 1.1250 + // __ jcc(Assembler::zero, l_4); 1.1251 + __ beq(T1, R0, l_4); 1.1252 + __ delayed()->nop(); 1.1253 + __ align(16); 1.1254 + __ bind(l_3); 1.1255 + //__ movl(edx, Address(esi)); 1.1256 + __ ld(AT, T3, 0); 1.1257 + // __ movl(Address(edi, esi, Address::times_1), edx); 1.1258 + __ sd(AT, T0, 0); 1.1259 + // __ addl(esi, 4); 1.1260 + __ addi(T3, T3, 8); 1.1261 + __ addi(T0, T0, 8); 1.1262 + // __ decl(ecx); 1.1263 + __ addi(T1, T1, -1); 1.1264 + // __ jcc(Assembler::notEqual, l_3); 1.1265 + __ bne(T1, R0, l_3); 1.1266 + __ delayed()->nop(); 1.1267 + if (is_oop) { 1.1268 + __ bind(l_stchk); 1.1269 + // __ movl(edi, Address(esp, 8+ 8)); 1.1270 + // __ movl(ecx, Address(esp, 8+ 12)); 1.1271 + __ move(T0, A1); 1.1272 + __ move(T1, A2); 1.1273 + array_store_check(); 1.1274 + } 1.1275 + __ bind(l_4); 1.1276 + // __ popl(edi); 1.1277 + // __ popl(esi); 1.1278 + // __ ret(0); 1.1279 + __ pop(T8); 1.1280 + __ pop(T1); 1.1281 + __ pop(T0); 1.1282 + __ pop(T3); 1.1283 + __ jr(RA); 1.1284 + __ delayed()->nop(); 1.1285 + return start; 1.1286 + } 1.1287 + 1.1288 + // Arguments: 1.1289 + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1.1290 + // ignored 1.1291 + // is_oop - true => oop array, so generate store check code 1.1292 + // name - stub name string 1.1293 + // 1.1294 + // Inputs: 1.1295 + // c_rarg0 - source array address 1.1296 + // c_rarg1 - destination array address 1.1297 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.1298 + // 1.1299 + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1.1300 + // the hardware handle it. The two dwords within qwords that span 1.1301 + // cache line boundaries will still be loaded and stored atomicly. 1.1302 + // 1.1303 + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 1.1304 + Label l_2, l_3, l_4, l_stchk; 1.1305 + StubCodeMark mark(this, "StubRoutines", name); 1.1306 + __ align(CodeEntryAlignment); 1.1307 + address start = __ pc(); 1.1308 + address nooverlap_target; 1.1309 + 1.1310 + if (is_oop) { 1.1311 + nooverlap_target = aligned ? 1.1312 + StubRoutines::arrayof_oop_disjoint_arraycopy() : 1.1313 + StubRoutines::oop_disjoint_arraycopy(); 1.1314 + }else { 1.1315 + nooverlap_target = aligned ? 1.1316 + StubRoutines::arrayof_jlong_disjoint_arraycopy() : 1.1317 + StubRoutines::jlong_disjoint_arraycopy(); 1.1318 + } 1.1319 + 1.1320 + array_overlap_test(nooverlap_target, 3); 1.1321 + 1.1322 + __ push(T3); 1.1323 + __ push(T0); 1.1324 + __ push(T1); 1.1325 + __ push(T8); 1.1326 + 1.1327 + __ move(T1, A2); 1.1328 + __ move(T3, A0); 1.1329 + __ move(T0, A1); 1.1330 + 1.1331 + //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 1.1332 + __ sll(AT, T1, Address::times_8); 1.1333 + __ add(AT, T3, AT); 1.1334 + __ lea(T3 , Address(AT, -8)); 1.1335 + //__ std(); 1.1336 + //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 1.1337 + __ sll(AT, T1, Address::times_8); 1.1338 + __ add(AT, T0, AT); 1.1339 + __ lea(T0 , Address(AT, -8)); 1.1340 + 1.1341 + // __ cmpl(ecx, 32); 1.1342 + // __ jcc(Assembler::above, l_3); // > 32 dwords 1.1343 + // __ testl(ecx, ecx); 1.1344 + //__ jcc(Assembler::zero, l_4); 1.1345 + __ beq(T1, R0, l_4); 1.1346 + __ delayed()->nop(); 1.1347 + // __ subl(edi, esi); 1.1348 + __ align(16); 1.1349 + __ bind(l_2); 1.1350 + // __ movl(edx, Address(esi)); 1.1351 + __ ld(AT, T3, 0); 1.1352 + // __ movl(Address(esi, edi, Address::times_1), edx); 1.1353 + __ sd(AT, T0, 0); 1.1354 + // __ subl(esi, 4); 1.1355 + __ addi(T3, T3, -8); 1.1356 + __ addi(T0, T0, -8); 1.1357 + // __ decl(ecx); 1.1358 + __ addi(T1, T1, -1); 1.1359 + //__ jcc(Assembler::notEqual, l_2); 1.1360 + __ bne(T1, R0, l_2); 1.1361 + __ delayed()->nop(); 1.1362 + if (is_oop) { 1.1363 + // __ jmp(l_stchk); 1.1364 + __ b( l_stchk); 1.1365 + __ delayed()->nop(); 1.1366 + } 1.1367 + __ bind(l_4); 1.1368 + // __ cld(); 1.1369 + // __ popl(edi); 1.1370 + // __ popl(esi); 1.1371 + // __ ret(0); 1.1372 + __ pop(T8); 1.1373 + __ pop(T1); 1.1374 + __ pop(T0); 1.1375 + __ pop(T3); 1.1376 + __ jr(RA); 1.1377 + __ delayed()->nop(); 1.1378 + __ bind(l_3); 1.1379 + // __ rep_movl(); 1.1380 + if (is_oop) { 1.1381 + __ bind(l_stchk); 1.1382 + // __ movl(edi, Address(esp, 8+ 8)); 1.1383 + __ move(T0, A1); 1.1384 + // __ movl(ecx, Address(esp, 8+ 12)); 1.1385 + __ move(T1, A2); 1.1386 + array_store_check(); 1.1387 + } 1.1388 + // __ cld(); 1.1389 + // __ popl(edi); 1.1390 + // __ popl(esi); 1.1391 + // __ ret(0); 1.1392 + __ pop(T8); 1.1393 + __ pop(T1); 1.1394 + __ pop(T0); 1.1395 + __ pop(T3); 1.1396 + __ jr(RA); 1.1397 + __ delayed()->nop(); 1.1398 + return start; 1.1399 + } 1.1400 +#if 0 1.1401 + // Arguments: 1.1402 + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 1.1403 + // ignored 1.1404 + // is_oop - true => oop array, so generate store check code 1.1405 + // name - stub name string 1.1406 + // 1.1407 + // Inputs: 1.1408 + // c_rarg0 - source array address 1.1409 + // c_rarg1 - destination array address 1.1410 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.1411 + // 1.1412 + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 1.1413 + __ align(CodeEntryAlignment); 1.1414 + StubCodeMark mark(this, "StubRoutines", name); 1.1415 + address start = __ pc(); 1.1416 + 1.1417 + Label L_copy_32_bytes, L_copy_8_bytes, L_exit; 1.1418 + const Register from = rdi; // source array address 1.1419 + const Register to = rsi; // destination array address 1.1420 + const Register qword_count = rdx; // elements count 1.1421 + const Register saved_count = rcx; 1.1422 + 1.1423 + __ enter(); // required for proper stackwalking of RuntimeStub frame 1.1424 + assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1.1425 + 1.1426 + address disjoint_copy_entry = NULL; 1.1427 + if (is_oop) { 1.1428 + assert(!UseCompressedOops, "shouldn't be called for compressed oops"); 1.1429 + disjoint_copy_entry = disjoint_oop_copy_entry; 1.1430 + oop_copy_entry = __ pc(); 1.1431 + array_overlap_test(disjoint_oop_copy_entry, Address::times_8); 1.1432 + } else { 1.1433 + disjoint_copy_entry = disjoint_long_copy_entry; 1.1434 + long_copy_entry = __ pc(); 1.1435 + array_overlap_test(disjoint_long_copy_entry, Address::times_8); 1.1436 + } 1.1437 + BLOCK_COMMENT("Entry:"); 1.1438 + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1.1439 + 1.1440 + array_overlap_test(disjoint_copy_entry, Address::times_8); 1.1441 + setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1.1442 + // r9 and r10 may be used to save non-volatile registers 1.1443 + 1.1444 + // 'from', 'to' and 'qword_count' are now valid 1.1445 + 1.1446 + if (is_oop) { 1.1447 + // Save to and count for store barrier 1.1448 + __ movptr(saved_count, qword_count); 1.1449 + // No registers are destroyed by this call 1.1450 + gen_write_ref_array_pre_barrier(to, saved_count); 1.1451 + } 1.1452 + 1.1453 + __ jmp(L_copy_32_bytes); 1.1454 + 1.1455 + // Copy trailing qwords 1.1456 + __ BIND(L_copy_8_bytes); 1.1457 + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1.1458 + __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1.1459 + __ decrement(qword_count); 1.1460 + __ jcc(Assembler::notZero, L_copy_8_bytes); 1.1461 + 1.1462 + if (is_oop) { 1.1463 + __ jmp(L_exit); 1.1464 + } else { 1.1465 + inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); 1.1466 + restore_arg_regs(); 1.1467 + __ xorptr(rax, rax); // return 0 1.1468 + __ leave(); // required for proper stackwalking of RuntimeStub frame 1.1469 + __ ret(0); 1.1470 + } 1.1471 + 1.1472 + // Copy in 32-bytes chunks 1.1473 + copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1.1474 + 1.1475 + if (is_oop) { 1.1476 + __ BIND(L_exit); 1.1477 + __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); 1.1478 + gen_write_ref_array_post_barrier(to, rcx, rax); 1.1479 + inc_counter_np(SharedRuntime::_oop_array_copy_ctr); 1.1480 + } else { 1.1481 + inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); 1.1482 + } 1.1483 + restore_arg_regs(); 1.1484 + __ xorptr(rax, rax); // return 0 1.1485 + __ leave(); // required for proper stackwalking of RuntimeStub frame 1.1486 + __ ret(0); 1.1487 + 1.1488 + return start; 1.1489 + } 1.1490 + 1.1491 + 1.1492 + // Helper for generating a dynamic type check. 1.1493 + // Smashes no registers. 1.1494 + void generate_type_check(Register sub_klass, 1.1495 + Register super_check_offset, 1.1496 + Register super_klass, 1.1497 + Label& L_success) { 1.1498 + assert_different_registers(sub_klass, super_check_offset, super_klass); 1.1499 + 1.1500 + BLOCK_COMMENT("type_check:"); 1.1501 + 1.1502 + Label L_miss; 1.1503 + 1.1504 + // a couple of useful fields in sub_klass: 1.1505 + int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 1.1506 + Klass::secondary_supers_offset_in_bytes()); 1.1507 + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 1.1508 + Klass::secondary_super_cache_offset_in_bytes()); 1.1509 + Address secondary_supers_addr(sub_klass, ss_offset); 1.1510 + Address super_cache_addr( sub_klass, sc_offset); 1.1511 + 1.1512 + // if the pointers are equal, we are done (e.g., String[] elements) 1.1513 + __ cmpptr(super_klass, sub_klass); 1.1514 + __ jcc(Assembler::equal, L_success); 1.1515 + 1.1516 + // check the supertype display: 1.1517 + Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 1.1518 + __ cmpptr(super_klass, super_check_addr); // test the super type 1.1519 + __ jcc(Assembler::equal, L_success); 1.1520 + 1.1521 + // if it was a primary super, we can just fail immediately 1.1522 + __ cmpl(super_check_offset, sc_offset); 1.1523 + __ jcc(Assembler::notEqual, L_miss); 1.1524 + 1.1525 + // Now do a linear scan of the secondary super-klass chain. 1.1526 + // The repne_scan instruction uses fixed registers, which we must spill. 1.1527 + // (We need a couple more temps in any case.) 1.1528 + // This code is rarely used, so simplicity is a virtue here. 1.1529 + inc_counter_np(SharedRuntime::_partial_subtype_ctr); 1.1530 + { 1.1531 + __ push(rax); 1.1532 + __ push(rcx); 1.1533 + __ push(rdi); 1.1534 + assert_different_registers(sub_klass, super_klass, rax, rcx, rdi); 1.1535 + 1.1536 + __ movptr(rdi, secondary_supers_addr); 1.1537 + // Load the array length. 1.1538 + __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 1.1539 + // Skip to start of data. 1.1540 + __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 1.1541 + // Scan rcx words at [rdi] for occurance of rax 1.1542 + // Set NZ/Z based on last compare 1.1543 + __ movptr(rax, super_klass); 1.1544 + if (UseCompressedOops) { 1.1545 + // Compare against compressed form. Don't need to uncompress because 1.1546 + // looks like orig rax is restored in popq below. 1.1547 + __ encode_heap_oop(rax); 1.1548 + __ repne_scanl(); 1.1549 + } else { 1.1550 + __ repne_scan(); 1.1551 + } 1.1552 + 1.1553 + // Unspill the temp. registers: 1.1554 + __ pop(rdi); 1.1555 + __ pop(rcx); 1.1556 + __ pop(rax); 1.1557 + 1.1558 + __ jcc(Assembler::notEqual, L_miss); 1.1559 + } 1.1560 + 1.1561 + // Success. Cache the super we found and proceed in triumph. 1.1562 + __ movptr(super_cache_addr, super_klass); // note: rax is dead 1.1563 + __ jmp(L_success); 1.1564 + 1.1565 + // Fall through on failure! 1.1566 + __ BIND(L_miss); 1.1567 + } 1.1568 + 1.1569 + // 1.1570 + // Generate checkcasting array copy stub 1.1571 + // 1.1572 + // Input: 1.1573 + // c_rarg0 - source array address 1.1574 + // c_rarg1 - destination array address 1.1575 + // c_rarg2 - element count, treated as ssize_t, can be zero 1.1576 + // c_rarg3 - size_t ckoff (super_check_offset) 1.1577 + // not Win64 1.1578 + // c_rarg4 - oop ckval (super_klass) 1.1579 + // Win64 1.1580 + // rsp+40 - oop ckval (super_klass) 1.1581 + // 1.1582 + // Output: 1.1583 + // rax == 0 - success 1.1584 + // rax == -1^K - failure, where K is partial transfer count 1.1585 + // 1.1586 + address generate_checkcast_copy(const char *name) { 1.1587 + 1.1588 + Label L_load_element, L_store_element, L_do_card_marks, L_done; 1.1589 + 1.1590 + // Input registers (after setup_arg_regs) 1.1591 + const Register from = rdi; // source array address 1.1592 + const Register to = rsi; // destination array address 1.1593 + const Register length = rdx; // elements count 1.1594 + const Register ckoff = rcx; // super_check_offset 1.1595 + const Register ckval = r8; // super_klass 1.1596 + 1.1597 + // Registers used as temps (r13, r14 are save-on-entry) 1.1598 + const Register end_from = from; // source array end address 1.1599 + const Register end_to = r13; // destination array end address 1.1600 + const Register count = rdx; // -(count_remaining) 1.1601 + const Register r14_length = r14; // saved copy of length 1.1602 + // End pointers are inclusive, and if length is not zero they point 1.1603 + // to the last unit copied: end_to[0] := end_from[0] 1.1604 + 1.1605 + const Register rax_oop = rax; // actual oop copied 1.1606 + const Register r11_klass = r11; // oop._klass 1.1607 + 1.1608 + //--------------------------------------------------------------- 1.1609 + // Assembler stub will be used for this call to arraycopy 1.1610 + // if the two arrays are subtypes of Object[] but the 1.1611 + // destination array type is not equal to or a supertype 1.1612 + // of the source type. Each element must be separately 1.1613 + // checked. 1.1614 + 1.1615 + __ align(CodeEntryAlignment); 1.1616 + StubCodeMark mark(this, "StubRoutines", name); 1.1617 + address start = __ pc(); 1.1618 + 1.1619 + __ enter(); // required for proper stackwalking of RuntimeStub frame 1.1620 + 1.1621 + checkcast_copy_entry = __ pc(); 1.1622 + BLOCK_COMMENT("Entry:"); 1.1623 + 1.1624 +#ifdef ASSERT 1.1625 + // caller guarantees that the arrays really are different 1.1626 + // otherwise, we would have to make conjoint checks 1.1627 + { Label L; 1.1628 + array_overlap_test(L, TIMES_OOP); 1.1629 + __ stop("checkcast_copy within a single array"); 1.1630 + __ bind(L); 1.1631 + } 1.1632 +#endif //ASSERT 1.1633 + 1.1634 + // allocate spill slots for r13, r14 1.1635 + enum { 1.1636 + saved_r13_offset, 1.1637 + saved_r14_offset, 1.1638 + saved_rbp_offset, 1.1639 + saved_rip_offset, 1.1640 + saved_rarg0_offset 1.1641 + }; 1.1642 + __ subptr(rsp, saved_rbp_offset * wordSize); 1.1643 + __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); 1.1644 + __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); 1.1645 + setup_arg_regs(4); // from => rdi, to => rsi, length => rdx 1.1646 + // ckoff => rcx, ckval => r8 1.1647 + // r9 and r10 may be used to save non-volatile registers 1.1648 +#ifdef _WIN64 1.1649 + // last argument (#4) is on stack on Win64 1.1650 + const int ckval_offset = saved_rarg0_offset + 4; 1.1651 + __ movptr(ckval, Address(rsp, ckval_offset * wordSize)); 1.1652 +#endif 1.1653 + 1.1654 + // check that int operands are properly extended to size_t 1.1655 + assert_clean_int(length, rax); 1.1656 + assert_clean_int(ckoff, rax); 1.1657 + 1.1658 +#ifdef ASSERT 1.1659 + BLOCK_COMMENT("assert consistent ckoff/ckval"); 1.1660 + // The ckoff and ckval must be mutually consistent, 1.1661 + // even though caller generates both. 1.1662 + { Label L; 1.1663 + int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 1.1664 + Klass::super_check_offset_offset_in_bytes()); 1.1665 + __ cmpl(ckoff, Address(ckval, sco_offset)); 1.1666 + __ jcc(Assembler::equal, L); 1.1667 + __ stop("super_check_offset inconsistent"); 1.1668 + __ bind(L); 1.1669 + } 1.1670 +#endif //ASSERT 1.1671 + 1.1672 + // Loop-invariant addresses. They are exclusive end pointers. 1.1673 + Address end_from_addr(from, length, TIMES_OOP, 0); 1.1674 + Address end_to_addr(to, length, TIMES_OOP, 0); 1.1675 + // Loop-variant addresses. They assume post-incremented count < 0. 1.1676 + Address from_element_addr(end_from, count, TIMES_OOP, 0); 1.1677 + Address to_element_addr(end_to, count, TIMES_OOP, 0); 1.1678 + 1.1679 + gen_write_ref_array_pre_barrier(to, count); 1.1680 + 1.1681 + // Copy from low to high addresses, indexed from the end of each array. 1.1682 + __ lea(end_from, end_from_addr); 1.1683 + __ lea(end_to, end_to_addr); 1.1684 + __ movptr(r14_length, length); // save a copy of the length 1.1685 + assert(length == count, ""); // else fix next line: 1.1686 + __ negptr(count); // negate and test the length 1.1687 + __ jcc(Assembler::notZero, L_load_element); 1.1688 + 1.1689 + // Empty array: Nothing to do. 1.1690 + __ xorptr(rax, rax); // return 0 on (trivial) success 1.1691 + __ jmp(L_done); 1.1692 + 1.1693 + // ======== begin loop ======== 1.1694 + // (Loop is rotated; its entry is L_load_element.) 1.1695 + // Loop control: 1.1696 + // for (count = -count; count != 0; count++) 1.1697 + // Base pointers src, dst are biased by 8*(count-1),to last element. 1.1698 + __ align(16); 1.1699 + 1.1700 + __ BIND(L_store_element); 1.1701 + __ store_heap_oop(rax_oop, to_element_addr); // store the oop 1.1702 + __ increment(count); // increment the count toward zero 1.1703 + __ jcc(Assembler::zero, L_do_card_marks); 1.1704 + 1.1705 + // ======== loop entry is here ======== 1.1706 + __ BIND(L_load_element); 1.1707 + __ load_heap_oop(rax_oop, from_element_addr); // load the oop 1.1708 + __ testptr(rax_oop, rax_oop); 1.1709 + __ jcc(Assembler::zero, L_store_element); 1.1710 + 1.1711 + __ load_klass(r11_klass, rax_oop);// query the object klass 1.1712 + generate_type_check(r11_klass, ckoff, ckval, L_store_element); 1.1713 + // ======== end loop ======== 1.1714 + 1.1715 + // It was a real error; we must depend on the caller to finish the job. 1.1716 + // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. 1.1717 + // Emit GC store barriers for the oops we have copied (r14 + rdx), 1.1718 + // and report their number to the caller. 1.1719 + assert_different_registers(rax, r14_length, count, to, end_to, rcx); 1.1720 + __ lea(end_to, to_element_addr); 1.1721 + gen_write_ref_array_post_barrier(to, end_to, rscratch1); 1.1722 + __ movptr(rax, r14_length); // original oops 1.1723 + __ addptr(rax, count); // K = (original - remaining) oops 1.1724 + __ notptr(rax); // report (-1^K) to caller 1.1725 + __ jmp(L_done); 1.1726 + 1.1727 + // Come here on success only. 1.1728 + __ BIND(L_do_card_marks); 1.1729 + __ addptr(end_to, -wordSize); // make an inclusive end pointer 1.1730 + gen_write_ref_array_post_barrier(to, end_to, rscratch1); 1.1731 + __ xorptr(rax, rax); // return 0 on success 1.1732 + 1.1733 + // Common exit point (success or failure). 1.1734 + __ BIND(L_done); 1.1735 + __ movptr(r13, Address(rsp, saved_r13_offset * wordSize)); 1.1736 + __ movptr(r14, Address(rsp, saved_r14_offset * wordSize)); 1.1737 + inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); 1.1738 + restore_arg_regs(); 1.1739 + __ leave(); // required for proper stackwalking of RuntimeStub frame 1.1740 + __ ret(0); 1.1741 + 1.1742 + return start; 1.1743 + } 1.1744 + 1.1745 + // 1.1746 + // Generate 'unsafe' array copy stub 1.1747 + // Though just as safe as the other stubs, it takes an unscaled 1.1748 + // size_t argument instead of an element count. 1.1749 + // 1.1750 + // Input: 1.1751 + // c_rarg0 - source array address 1.1752 + // c_rarg1 - destination array address 1.1753 + // c_rarg2 - byte count, treated as ssize_t, can be zero 1.1754 + // 1.1755 + // Examines the alignment of the operands and dispatches 1.1756 + // to a long, int, short, or byte copy loop. 1.1757 + // 1.1758 + address generate_unsafe_copy(const char *name) { 1.1759 + 1.1760 + Label L_long_aligned, L_int_aligned, L_short_aligned; 1.1761 + 1.1762 + // Input registers (before setup_arg_regs) 1.1763 + const Register from = c_rarg0; // source array address 1.1764 + const Register to = c_rarg1; // destination array address 1.1765 + const Register size = c_rarg2; // byte count (size_t) 1.1766 + 1.1767 + // Register used as a temp 1.1768 + const Register bits = rax; // test copy of low bits 1.1769 + 1.1770 + __ align(CodeEntryAlignment); 1.1771 + StubCodeMark mark(this, "StubRoutines", name); 1.1772 + address start = __ pc(); 1.1773 + 1.1774 + __ enter(); // required for proper stackwalking of RuntimeStub frame 1.1775 + 1.1776 + // bump this on entry, not on exit: 1.1777 + inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); 1.1778 + 1.1779 + __ mov(bits, from); 1.1780 + __ orptr(bits, to); 1.1781 + __ orptr(bits, size); 1.1782 + 1.1783 + __ testb(bits, BytesPerLong-1); 1.1784 + __ jccb(Assembler::zero, L_long_aligned); 1.1785 + 1.1786 + __ testb(bits, BytesPerInt-1); 1.1787 + __ jccb(Assembler::zero, L_int_aligned); 1.1788 + 1.1789 + __ testb(bits, BytesPerShort-1); 1.1790 + __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); 1.1791 + 1.1792 + __ BIND(L_short_aligned); 1.1793 + __ shrptr(size, LogBytesPerShort); // size => short_count 1.1794 + __ jump(RuntimeAddress(short_copy_entry)); 1.1795 + 1.1796 + __ BIND(L_int_aligned); 1.1797 + __ shrptr(size, LogBytesPerInt); // size => int_count 1.1798 + __ jump(RuntimeAddress(int_copy_entry)); 1.1799 + 1.1800 + __ BIND(L_long_aligned); 1.1801 + __ shrptr(size, LogBytesPerLong); // size => qword_count 1.1802 + __ jump(RuntimeAddress(long_copy_entry)); 1.1803 + 1.1804 + return start; 1.1805 + } 1.1806 + 1.1807 + // Perform range checks on the proposed arraycopy. 1.1808 + // Kills temp, but nothing else. 1.1809 + // Also, clean the sign bits of src_pos and dst_pos. 1.1810 + void arraycopy_range_checks(Register src, // source array oop (c_rarg0) 1.1811 + Register src_pos, // source position (c_rarg1) 1.1812 + Register dst, // destination array oo (c_rarg2) 1.1813 + Register dst_pos, // destination position (c_rarg3) 1.1814 + Register length, 1.1815 + Register temp, 1.1816 + Label& L_failed) { 1.1817 + BLOCK_COMMENT("arraycopy_range_checks:"); 1.1818 + 1.1819 + // if (src_pos + length > arrayOop(src)->length()) FAIL; 1.1820 + __ movl(temp, length); 1.1821 + __ addl(temp, src_pos); // src_pos + length 1.1822 + __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); 1.1823 + __ jcc(Assembler::above, L_failed); 1.1824 + 1.1825 + // if (dst_pos + length > arrayOop(dst)->length()) FAIL; 1.1826 + __ movl(temp, length); 1.1827 + __ addl(temp, dst_pos); // dst_pos + length 1.1828 + __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); 1.1829 + __ jcc(Assembler::above, L_failed); 1.1830 + 1.1831 + // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 1.1832 + // Move with sign extension can be used since they are positive. 1.1833 + __ movslq(src_pos, src_pos); 1.1834 + __ movslq(dst_pos, dst_pos); 1.1835 + 1.1836 + BLOCK_COMMENT("arraycopy_range_checks done"); 1.1837 + } 1.1838 + 1.1839 + // 1.1840 + // Generate generic array copy stubs 1.1841 + // 1.1842 + // Input: 1.1843 + // c_rarg0 - src oop 1.1844 + // c_rarg1 - src_pos (32-bits) 1.1845 + // c_rarg2 - dst oop 1.1846 + // c_rarg3 - dst_pos (32-bits) 1.1847 + // not Win64 1.1848 + // c_rarg4 - element count (32-bits) 1.1849 + // Win64 1.1850 + // rsp+40 - element count (32-bits) 1.1851 + // 1.1852 + // Output: 1.1853 + // rax == 0 - success 1.1854 + // rax == -1^K - failure, where K is partial transfer count 1.1855 + // 1.1856 + address generate_generic_copy(const char *name) { 1.1857 + 1.1858 + Label L_failed, L_failed_0, L_objArray; 1.1859 + Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; 1.1860 + 1.1861 + // Input registers 1.1862 + const Register src = c_rarg0; // source array oop 1.1863 + const Register src_pos = c_rarg1; // source position 1.1864 + const Register dst = c_rarg2; // destination array oop 1.1865 + const Register dst_pos = c_rarg3; // destination position 1.1866 + // elements count is on stack on Win64 1.1867 +#ifdef _WIN64 1.1868 +#define C_RARG4 Address(rsp, 6 * wordSize) 1.1869 +#else 1.1870 +#define C_RARG4 c_rarg4 1.1871 +#endif 1.1872 + 1.1873 + { int modulus = CodeEntryAlignment; 1.1874 + int target = modulus - 5; // 5 = sizeof jmp(L_failed) 1.1875 + int advance = target - (__ offset() % modulus); 1.1876 + if (advance < 0) advance += modulus; 1.1877 + if (advance > 0) __ nop(advance); 1.1878 + } 1.1879 + StubCodeMark mark(this, "StubRoutines", name); 1.1880 + 1.1881 + // Short-hop target to L_failed. Makes for denser prologue code. 1.1882 + __ BIND(L_failed_0); 1.1883 + __ jmp(L_failed); 1.1884 + assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); 1.1885 + 1.1886 + __ align(CodeEntryAlignment); 1.1887 + address start = __ pc(); 1.1888 + 1.1889 + __ enter(); // required for proper stackwalking of RuntimeStub frame 1.1890 + 1.1891 + // bump this on entry, not on exit: 1.1892 + inc_counter_np(SharedRuntime::_generic_array_copy_ctr); 1.1893 + 1.1894 + //----------------------------------------------------------------------- 1.1895 + // Assembler stub will be used for this call to arraycopy 1.1896 + // if the following conditions are met: 1.1897 + // 1.1898 + // (1) src and dst must not be null. 1.1899 + // (2) src_pos must not be negative. 1.1900 + // (3) dst_pos must not be negative. 1.1901 + // (4) length must not be negative. 1.1902 + // (5) src klass and dst klass should be the same and not NULL. 1.1903 + // (6) src and dst should be arrays. 1.1904 + // (7) src_pos + length must not exceed length of src. 1.1905 + // (8) dst_pos + length must not exceed length of dst. 1.1906 + // 1.1907 + 1.1908 + // if (src == NULL) return -1; 1.1909 + __ testptr(src, src); // src oop 1.1910 + size_t j1off = __ offset(); 1.1911 + __ jccb(Assembler::zero, L_failed_0); 1.1912 + 1.1913 + // if (src_pos < 0) return -1; 1.1914 + __ testl(src_pos, src_pos); // src_pos (32-bits) 1.1915 + __ jccb(Assembler::negative, L_failed_0); 1.1916 + 1.1917 + // if (dst == NULL) return -1; 1.1918 + __ testptr(dst, dst); // dst oop 1.1919 + __ jccb(Assembler::zero, L_failed_0); 1.1920 + 1.1921 + // if (dst_pos < 0) return -1; 1.1922 + __ testl(dst_pos, dst_pos); // dst_pos (32-bits) 1.1923 + size_t j4off = __ offset(); 1.1924 + __ jccb(Assembler::negative, L_failed_0); 1.1925 + 1.1926 + // The first four tests are very dense code, 1.1927 + // but not quite dense enough to put four 1.1928 + // jumps in a 16-byte instruction fetch buffer. 1.1929 + // That's good, because some branch predicters 1.1930 + // do not like jumps so close together. 1.1931 + // Make sure of this. 1.1932 + guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); 1.1933 + 1.1934 + // registers used as temp 1.1935 + const Register r11_length = r11; // elements count to copy 1.1936 + const Register r10_src_klass = r10; // array klass 1.1937 + const Register r9_dst_klass = r9; // dest array klass 1.1938 + 1.1939 + // if (length < 0) return -1; 1.1940 + __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value) 1.1941 + __ testl(r11_length, r11_length); 1.1942 + __ jccb(Assembler::negative, L_failed_0); 1.1943 + 1.1944 + __ load_klass(r10_src_klass, src); 1.1945 +#ifdef ASSERT 1.1946 + // assert(src->klass() != NULL); 1.1947 + BLOCK_COMMENT("assert klasses not null"); 1.1948 + { Label L1, L2; 1.1949 + __ testptr(r10_src_klass, r10_src_klass); 1.1950 + __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL 1.1951 + __ bind(L1); 1.1952 + __ stop("broken null klass"); 1.1953 + __ bind(L2); 1.1954 + __ load_klass(r9_dst_klass, dst); 1.1955 + __ cmpq(r9_dst_klass, 0); 1.1956 + __ jcc(Assembler::equal, L1); // this would be broken also 1.1957 + BLOCK_COMMENT("assert done"); 1.1958 + } 1.1959 +#endif 1.1960 + 1.1961 + // Load layout helper (32-bits) 1.1962 + // 1.1963 + // |array_tag| | header_size | element_type | |log2_element_size| 1.1964 + // 32 30 24 16 8 2 0 1.1965 + // 1.1966 + // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 1.1967 + // 1.1968 + 1.1969 + int lh_offset = klassOopDesc::header_size() * HeapWordSize + 1.1970 + Klass::layout_helper_offset_in_bytes(); 1.1971 + 1.1972 + const Register rax_lh = rax; // layout helper 1.1973 + 1.1974 + __ movl(rax_lh, Address(r10_src_klass, lh_offset)); 1.1975 + 1.1976 + // Handle objArrays completely differently... 1.1977 + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 1.1978 + __ cmpl(rax_lh, objArray_lh); 1.1979 + __ jcc(Assembler::equal, L_objArray); 1.1980 + 1.1981 + // if (src->klass() != dst->klass()) return -1; 1.1982 + __ load_klass(r9_dst_klass, dst); 1.1983 + __ cmpq(r10_src_klass, r9_dst_klass); 1.1984 + __ jcc(Assembler::notEqual, L_failed); 1.1985 + 1.1986 + // if (!src->is_Array()) return -1; 1.1987 + __ cmpl(rax_lh, Klass::_lh_neutral_value); 1.1988 + __ jcc(Assembler::greaterEqual, L_failed); 1.1989 + 1.1990 + // At this point, it is known to be a typeArray (array_tag 0x3). 1.1991 +#ifdef ASSERT 1.1992 + { Label L; 1.1993 + __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); 1.1994 + __ jcc(Assembler::greaterEqual, L); 1.1995 + __ stop("must be a primitive array"); 1.1996 + __ bind(L); 1.1997 + } 1.1998 +#endif 1.1999 + 1.2000 + arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 1.2001 + r10, L_failed); 1.2002 + 1.2003 + // typeArrayKlass 1.2004 + // 1.2005 + // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 1.2006 + // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 1.2007 + // 1.2008 + 1.2009 + const Register r10_offset = r10; // array offset 1.2010 + const Register rax_elsize = rax_lh; // element size 1.2011 + 1.2012 + __ movl(r10_offset, rax_lh); 1.2013 + __ shrl(r10_offset, Klass::_lh_header_size_shift); 1.2014 + __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset 1.2015 + __ addptr(src, r10_offset); // src array offset 1.2016 + __ addptr(dst, r10_offset); // dst array offset 1.2017 + BLOCK_COMMENT("choose copy loop based on element size"); 1.2018 + __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize 1.2019 + 1.2020 + // next registers should be set before the jump to corresponding stub 1.2021 + const Register from = c_rarg0; // source array address 1.2022 + const Register to = c_rarg1; // destination array address 1.2023 + const Register count = c_rarg2; // elements count 1.2024 + 1.2025 + // 'from', 'to', 'count' registers should be set in such order 1.2026 + // since they are the same as 'src', 'src_pos', 'dst'. 1.2027 + 1.2028 + __ BIND(L_copy_bytes); 1.2029 + __ cmpl(rax_elsize, 0); 1.2030 + __ jccb(Assembler::notEqual, L_copy_shorts); 1.2031 + __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr 1.2032 + __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr 1.2033 + __ movl2ptr(count, r11_length); // length 1.2034 + __ jump(RuntimeAddress(byte_copy_entry)); 1.2035 + 1.2036 + __ BIND(L_copy_shorts); 1.2037 + __ cmpl(rax_elsize, LogBytesPerShort); 1.2038 + __ jccb(Assembler::notEqual, L_copy_ints); 1.2039 + __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr 1.2040 + __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr 1.2041 + __ movl2ptr(count, r11_length); // length 1.2042 + __ jump(RuntimeAddress(short_copy_entry)); 1.2043 + 1.2044 + __ BIND(L_copy_ints); 1.2045 + __ cmpl(rax_elsize, LogBytesPerInt); 1.2046 + __ jccb(Assembler::notEqual, L_copy_longs); 1.2047 + __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr 1.2048 + __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr 1.2049 + __ movl2ptr(count, r11_length); // length 1.2050 + __ jump(RuntimeAddress(int_copy_entry)); 1.2051 + 1.2052 + __ BIND(L_copy_longs); 1.2053 +#ifdef ASSERT 1.2054 + { Label L; 1.2055 + __ cmpl(rax_elsize, LogBytesPerLong); 1.2056 + __ jcc(Assembler::equal, L); 1.2057 + __ stop("must be long copy, but elsize is wrong"); 1.2058 + __ bind(L); 1.2059 + } 1.2060 +#endif 1.2061 + __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr 1.2062 + __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr 1.2063 + __ movl2ptr(count, r11_length); // length 1.2064 + __ jump(RuntimeAddress(long_copy_entry)); 1.2065 + 1.2066 + // objArrayKlass 1.2067 + __ BIND(L_objArray); 1.2068 + // live at this point: r10_src_klass, src[_pos], dst[_pos] 1.2069 + 1.2070 + Label L_plain_copy, L_checkcast_copy; 1.2071 + // test array classes for subtyping 1.2072 + __ load_klass(r9_dst_klass, dst); 1.2073 + __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality 1.2074 + __ jcc(Assembler::notEqual, L_checkcast_copy); 1.2075 + 1.2076 + // Identically typed arrays can be copied without element-wise checks. 1.2077 + arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 1.2078 + r10, L_failed); 1.2079 + 1.2080 + __ lea(from, Address(src, src_pos, TIMES_OOP, 1.2081 + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr 1.2082 + __ lea(to, Address(dst, dst_pos, TIMES_OOP, 1.2083 + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr 1.2084 + __ movl2ptr(count, r11_length); // length 1.2085 + __ BIND(L_plain_copy); 1.2086 + __ jump(RuntimeAddress(oop_copy_entry)); 1.2087 + 1.2088 + __ BIND(L_checkcast_copy); 1.2089 + // live at this point: r10_src_klass, !r11_length 1.2090 + { 1.2091 + // assert(r11_length == C_RARG4); // will reload from here 1.2092 + Register r11_dst_klass = r11; 1.2093 + __ load_klass(r11_dst_klass, dst); 1.2094 + 1.2095 + // Before looking at dst.length, make sure dst is also an objArray. 1.2096 + __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh); 1.2097 + __ jcc(Assembler::notEqual, L_failed); 1.2098 + 1.2099 + // It is safe to examine both src.length and dst.length. 1.2100 +#ifndef _WIN64 1.2101 + arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4, 1.2102 + rax, L_failed); 1.2103 +#else 1.2104 + __ movl(r11_length, C_RARG4); // reload 1.2105 + arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 1.2106 + rax, L_failed); 1.2107 + __ load_klass(r11_dst_klass, dst); // reload 1.2108 +#endif 1.2109 + 1.2110 + // Marshal the base address arguments now, freeing registers. 1.2111 + __ lea(from, Address(src, src_pos, TIMES_OOP, 1.2112 + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 1.2113 + __ lea(to, Address(dst, dst_pos, TIMES_OOP, 1.2114 + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 1.2115 + __ movl(count, C_RARG4); // length (reloaded) 1.2116 + Register sco_temp = c_rarg3; // this register is free now 1.2117 + assert_different_registers(from, to, count, sco_temp, 1.2118 + r11_dst_klass, r10_src_klass); 1.2119 + assert_clean_int(count, sco_temp); 1.2120 + 1.2121 + // Generate the type check. 1.2122 + int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 1.2123 + Klass::super_check_offset_offset_in_bytes()); 1.2124 + __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 1.2125 + assert_clean_int(sco_temp, rax); 1.2126 + generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); 1.2127 + 1.2128 + // Fetch destination element klass from the objArrayKlass header. 1.2129 + int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 1.2130 + objArrayKlass::element_klass_offset_in_bytes()); 1.2131 + __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); 1.2132 + __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 1.2133 + assert_clean_int(sco_temp, rax); 1.2134 + 1.2135 + // the checkcast_copy loop needs two extra arguments: 1.2136 + assert(c_rarg3 == sco_temp, "#3 already in place"); 1.2137 + __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass 1.2138 + __ jump(RuntimeAddress(checkcast_copy_entry)); 1.2139 + } 1.2140 + 1.2141 + __ BIND(L_failed); 1.2142 + __ xorptr(rax, rax); 1.2143 + __ notptr(rax); // return -1 1.2144 + __ leave(); // required for proper stackwalking of RuntimeStub frame 1.2145 + __ ret(0); 1.2146 + 1.2147 + return start; 1.2148 + } 1.2149 + 1.2150 +#undef length_arg 1.2151 +#endif 1.2152 + 1.2153 +//FIXME 1.2154 + address generate_disjoint_long_copy(bool aligned, const char *name) { 1.2155 + Label l_1, l_2; 1.2156 + StubCodeMark mark(this, "StubRoutines", name); 1.2157 + __ align(CodeEntryAlignment); 1.2158 + address start = __ pc(); 1.2159 + 1.2160 + // __ movl(ecx, Address(esp, 4+8)); // count 1.2161 + // __ movl(eax, Address(esp, 4+0)); // from 1.2162 + // __ movl(edx, Address(esp, 4+4)); // to 1.2163 + __ move(T1, A2); 1.2164 + __ move(T3, A0); 1.2165 + __ move(T0, A1); 1.2166 + __ push(T3); 1.2167 + __ push(T0); 1.2168 + __ push(T1); 1.2169 + //__ subl(edx, eax); 1.2170 + //__ jmp(l_2); 1.2171 + __ b(l_2); 1.2172 + __ delayed()->nop(); 1.2173 + __ align(16); 1.2174 + __ bind(l_1); 1.2175 + // if (VM_Version::supports_mmx()) { 1.2176 + // __ movq(mmx0, Address(eax)); 1.2177 + // __ movq(Address(eax, edx, Address::times_1), mmx0); 1.2178 + // } else { 1.2179 + // __ fild_d(Address(eax)); 1.2180 + __ ld(AT, T3, 0); 1.2181 + // __ fistp_d(Address(eax, edx, Address::times_1)); 1.2182 + __ sd (AT, T0, 0); 1.2183 + // } 1.2184 + // __ addl(eax, 8); 1.2185 + __ addi(T3, T3, 8); 1.2186 + __ addi(T0, T0, 8); 1.2187 + __ bind(l_2); 1.2188 + // __ decl(ecx); 1.2189 + __ addi(T1, T1, -1); 1.2190 + // __ jcc(Assembler::greaterEqual, l_1); 1.2191 + __ bgez(T1, l_1); 1.2192 + __ delayed()->nop(); 1.2193 + // if (VM_Version::supports_mmx()) { 1.2194 + // __ emms(); 1.2195 + // } 1.2196 + // __ ret(0); 1.2197 + __ pop(T1); 1.2198 + __ pop(T0); 1.2199 + __ pop(T3); 1.2200 + __ jr(RA); 1.2201 + __ delayed()->nop(); 1.2202 + return start; 1.2203 + } 1.2204 + 1.2205 + 1.2206 + address generate_conjoint_long_copy(bool aligned, const char *name) { 1.2207 + Label l_1, l_2; 1.2208 + StubCodeMark mark(this, "StubRoutines", name); 1.2209 + __ align(CodeEntryAlignment); 1.2210 + address start = __ pc(); 1.2211 + address nooverlap_target = aligned ? 1.2212 + StubRoutines::arrayof_jlong_disjoint_arraycopy() : 1.2213 + StubRoutines::jlong_disjoint_arraycopy(); 1.2214 + array_overlap_test(nooverlap_target, 3); 1.2215 + 1.2216 + __ push(T3); 1.2217 + __ push(T0); 1.2218 + __ push(T1); 1.2219 + 1.2220 + /* __ movl(ecx, Address(esp, 4+8)); // count 1.2221 + __ movl(eax, Address(esp, 4+0)); // from 1.2222 + __ movl(edx, Address(esp, 4+4)); // to 1.2223 + __ jmp(l_2); 1.2224 + 1.2225 + */ 1.2226 + __ move(T1, A2); 1.2227 + __ move(T3, A0); 1.2228 + __ move(T0, A1); 1.2229 + __ sll(AT, T1, Address::times_8); 1.2230 + __ add(AT, T3, AT); 1.2231 + __ lea(T3 , Address(AT, -8)); 1.2232 + __ sll(AT, T1, Address::times_8); 1.2233 + __ add(AT, T0, AT); 1.2234 + __ lea(T0 , Address(AT, -8)); 1.2235 + 1.2236 + 1.2237 + 1.2238 + __ b(l_2); 1.2239 + __ delayed()->nop(); 1.2240 + __ align(16); 1.2241 + __ bind(l_1); 1.2242 + /* if (VM_Version::supports_mmx()) { 1.2243 + __ movq(mmx0, Address(eax, ecx, Address::times_8)); 1.2244 + __ movq(Address(edx, ecx,Address::times_8), mmx0); 1.2245 + } else { 1.2246 + __ fild_d(Address(eax, ecx, Address::times_8)); 1.2247 + __ fistp_d(Address(edx, ecx,Address::times_8)); 1.2248 + } 1.2249 + */ 1.2250 + __ ld(AT, T3, 0); 1.2251 + __ sd (AT, T0, 0); 1.2252 + __ addi(T3, T3, -8); 1.2253 + __ addi(T0, T0,-8); 1.2254 + __ bind(l_2); 1.2255 + // __ decl(ecx); 1.2256 + __ addi(T1, T1, -1); 1.2257 + //__ jcc(Assembler::greaterEqual, l_1); 1.2258 + __ bgez(T1, l_1); 1.2259 + __ delayed()->nop(); 1.2260 + // if (VM_Version::supports_mmx()) { 1.2261 + // __ emms(); 1.2262 + // } 1.2263 + // __ ret(0); 1.2264 + __ pop(T1); 1.2265 + __ pop(T0); 1.2266 + __ pop(T3); 1.2267 + __ jr(RA); 1.2268 + __ delayed()->nop(); 1.2269 + return start; 1.2270 + } 1.2271 + 1.2272 + void generate_arraycopy_stubs() { 1.2273 + if (UseCompressedOops) { 1.2274 + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy"); 1.2275 + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy"); 1.2276 + } else { 1.2277 + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy"); 1.2278 + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy"); 1.2279 + } 1.2280 + 1.2281 + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); 1.2282 + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 1.2283 + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); 1.2284 + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); 1.2285 + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); 1.2286 + 1.2287 + // if (VM_Version::supports_mmx()) 1.2288 + //if (false) 1.2289 + // StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy"); 1.2290 + // else 1.2291 + StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); 1.2292 + StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy"); 1.2293 + //StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy"); 1.2294 + StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy"); 1.2295 + 1.2296 + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); 1.2297 + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); 1.2298 + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); 1.2299 + StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); 1.2300 + 1.2301 + StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy"); 1.2302 + StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); 1.2303 + StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy"); 1.2304 + //StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy"); 1.2305 + StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy"); 1.2306 + 1.2307 + StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; 1.2308 + StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; 1.2309 + } 1.2310 + 1.2311 +//Wang: add a function to implement SafeFetch32 and SafeFetchN 1.2312 + void generate_safefetch(const char* name, int size, address* entry, 1.2313 + address* fault_pc, address* continuation_pc) { 1.2314 + // safefetch signatures: 1.2315 + // int SafeFetch32(int* adr, int errValue); 1.2316 + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); 1.2317 + // 1.2318 + // arguments: 1.2319 + // A0 = adr 1.2320 + // A1 = errValue 1.2321 + // 1.2322 + // result: 1.2323 + // PPC_RET = *adr or errValue 1.2324 + 1.2325 + StubCodeMark mark(this, "StubRoutines", name); 1.2326 + 1.2327 + // Entry point, pc or function descriptor. 1.2328 + *entry = __ pc(); 1.2329 + 1.2330 + // Load *adr into A1, may fault. 1.2331 + *fault_pc = __ pc(); 1.2332 + switch (size) { 1.2333 + case 4: 1.2334 + // int32_t 1.2335 + __ lw(A1, A0, 0); 1.2336 + break; 1.2337 + case 8: 1.2338 + // int64_t 1.2339 + __ ld(A1, A0, 0); 1.2340 + break; 1.2341 + default: 1.2342 + ShouldNotReachHere(); 1.2343 + } 1.2344 + 1.2345 + // return errValue or *adr 1.2346 + *continuation_pc = __ pc(); 1.2347 + __ addu(V0,A1,R0); 1.2348 + __ jr(RA); 1.2349 + __ delayed()->nop(); 1.2350 + } 1.2351 + 1.2352 + 1.2353 +#undef __ 1.2354 +#define __ masm-> 1.2355 + 1.2356 + // Continuation point for throwing of implicit exceptions that are 1.2357 + // not handled in the current activation. Fabricates an exception 1.2358 + // oop and initiates normal exception dispatching in this 1.2359 + // frame. Since we need to preserve callee-saved values (currently 1.2360 + // only for C2, but done for C1 as well) we need a callee-saved oop 1.2361 + // map and therefore have to make these stubs into RuntimeStubs 1.2362 + // rather than BufferBlobs. If the compiler needs all registers to 1.2363 + // be preserved between the fault point and the exception handler 1.2364 + // then it must assume responsibility for that in 1.2365 + // AbstractCompiler::continuation_for_implicit_null_exception or 1.2366 + // continuation_for_implicit_division_by_zero_exception. All other 1.2367 + // implicit exceptions (e.g., NullPointerException or 1.2368 + // AbstractMethodError on entry) are either at call sites or 1.2369 + // otherwise assume that stack unwinding will be initiated, so 1.2370 + // caller saved registers were assumed volatile in the compiler. 1.2371 + address generate_throw_exception(const char* name, 1.2372 + address runtime_entry, 1.2373 + bool restore_saved_exception_pc) { 1.2374 + // Information about frame layout at time of blocking runtime call. 1.2375 + // Note that we only have to preserve callee-saved registers since 1.2376 + // the compilers are responsible for supplying a continuation point 1.2377 + // if they expect all registers to be preserved. 1.2378 +//#define aoqi_test 1.2379 +#ifdef aoqi_test 1.2380 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); 1.2381 +#endif 1.2382 + enum layout { 1.2383 + thread_off, // last_java_sp 1.2384 + S7_off, // callee saved register sp + 1 1.2385 + S6_off, // callee saved register sp + 2 1.2386 + S5_off, // callee saved register sp + 3 1.2387 + S4_off, // callee saved register sp + 4 1.2388 + S3_off, // callee saved register sp + 5 1.2389 + S2_off, // callee saved register sp + 6 1.2390 + S1_off, // callee saved register sp + 7 1.2391 + S0_off, // callee saved register sp + 8 1.2392 + FP_off, 1.2393 + ret_address, 1.2394 + framesize 1.2395 + }; 1.2396 + 1.2397 + int insts_size = 2048; 1.2398 + int locs_size = 32; 1.2399 + 1.2400 + // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, 1.2401 + // NULL, NULL, NULL, false, NULL, name, false); 1.2402 + CodeBuffer code (name , insts_size, locs_size); 1.2403 +#ifdef aoqi_test 1.2404 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); 1.2405 +#endif 1.2406 + OopMapSet* oop_maps = new OopMapSet(); 1.2407 +#ifdef aoqi_test 1.2408 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); 1.2409 +#endif 1.2410 + MacroAssembler* masm = new MacroAssembler(&code); 1.2411 +#ifdef aoqi_test 1.2412 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); 1.2413 +#endif 1.2414 + 1.2415 + address start = __ pc(); 1.2416 + //__ stop("generate_throw_exception"); 1.2417 + /* 1.2418 + __ move(AT, (int)&jerome1 ); 1.2419 + __ sw(SP, AT, 0); 1.2420 + __ move(AT, (int)&jerome2 ); 1.2421 + __ sw(FP, AT, 0); 1.2422 + __ move(AT, (int)&jerome3 ); 1.2423 + __ sw(RA, AT, 0); 1.2424 + __ move(AT, (int)&jerome4 ); 1.2425 + __ sw(R0, AT, 0); 1.2426 + __ move(AT, (int)&jerome5 ); 1.2427 + __ sw(R0, AT, 0); 1.2428 + __ move(AT, (int)&jerome6 ); 1.2429 + __ sw(R0, AT, 0); 1.2430 + __ move(AT, (int)&jerome7 ); 1.2431 + __ sw(R0, AT, 0); 1.2432 + __ move(AT, (int)&jerome10 ); 1.2433 + __ sw(R0, AT, 0); 1.2434 + 1.2435 + __ pushad(); 1.2436 + 1.2437 + //__ enter(); 1.2438 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), 1.2439 + relocInfo::runtime_call_type); 1.2440 + __ delayed()->nop(); 1.2441 + 1.2442 + //__ leave(); 1.2443 + __ popad(); 1.2444 + 1.2445 + */ 1.2446 + 1.2447 + // This is an inlined and slightly modified version of call_VM 1.2448 + // which has the ability to fetch the return PC out of 1.2449 + // thread-local storage and also sets up last_Java_sp slightly 1.2450 + // differently than the real call_VM 1.2451 +#ifndef OPT_THREAD 1.2452 + Register java_thread = TREG; 1.2453 + __ get_thread(java_thread); 1.2454 +#else 1.2455 + Register java_thread = TREG; 1.2456 +#endif 1.2457 +#ifdef aoqi_test 1.2458 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); 1.2459 +#endif 1.2460 + if (restore_saved_exception_pc) { 1.2461 + __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax 1.2462 + } 1.2463 + 1.2464 + __ enter(); // required for proper stackwalking of RuntimeStub frame 1.2465 + 1.2466 + __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog 1.2467 + __ sd(S0, SP, S0_off * wordSize); 1.2468 + __ sd(S1, SP, S1_off * wordSize); 1.2469 + __ sd(S2, SP, S2_off * wordSize); 1.2470 + __ sd(S3, SP, S3_off * wordSize); 1.2471 + __ sd(S4, SP, S4_off * wordSize); 1.2472 + __ sd(S5, SP, S5_off * wordSize); 1.2473 + __ sd(S6, SP, S6_off * wordSize); 1.2474 + __ sd(S7, SP, S7_off * wordSize); 1.2475 + 1.2476 + int frame_complete = __ pc() - start; 1.2477 + // push java thread (becomes first argument of C function) 1.2478 + __ sd(java_thread, SP, thread_off * wordSize); 1.2479 + if (java_thread!=A0) 1.2480 + __ move(A0, java_thread); 1.2481 + 1.2482 + // Set up last_Java_sp and last_Java_fp 1.2483 + __ set_last_Java_frame(java_thread, SP, FP, NULL); 1.2484 + __ relocate(relocInfo::internal_pc_type); 1.2485 + { 1.2486 + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4; 1.2487 + __ li48(AT, save_pc); 1.2488 + } 1.2489 + __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); 1.2490 + 1.2491 + // Call runtime 1.2492 + __ call(runtime_entry); 1.2493 + __ delayed()->nop(); 1.2494 + // Generate oop map 1.2495 + OopMap* map = new OopMap(framesize, 0); 1.2496 + oop_maps->add_gc_map(__ offset(), map); 1.2497 + 1.2498 + // restore the thread (cannot use the pushed argument since arguments 1.2499 + // may be overwritten by C code generated by an optimizing compiler); 1.2500 + // however can use the register value directly if it is callee saved. 1.2501 +#ifndef OPT_THREAD 1.2502 + __ get_thread(java_thread); 1.2503 +#endif 1.2504 + 1.2505 + __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); 1.2506 + // __ reset_last_Java_frame(java_thread, true); 1.2507 + __ reset_last_Java_frame(java_thread, true, true); 1.2508 + 1.2509 + // Restore callee save registers. This must be done after resetting the Java frame 1.2510 + __ ld(S0, SP, S0_off * wordSize); 1.2511 + __ ld(S1, SP, S1_off * wordSize); 1.2512 + __ ld(S2, SP, S2_off * wordSize); 1.2513 + __ ld(S3, SP, S3_off * wordSize); 1.2514 + __ ld(S4, SP, S4_off * wordSize); 1.2515 + __ ld(S5, SP, S5_off * wordSize); 1.2516 + __ ld(S6, SP, S6_off * wordSize); 1.2517 + __ ld(S7, SP, S7_off * wordSize); 1.2518 + 1.2519 + // discard arguments 1.2520 + __ addi(SP, SP, (framesize-2) * wordSize); // epilog 1.2521 + // __ leave(); // required for proper stackwalking of RuntimeStub frame 1.2522 + __ addi(SP, FP, wordSize); 1.2523 + __ ld(FP, SP, -1*wordSize); 1.2524 + // check for pending exceptions 1.2525 +#ifdef ASSERT 1.2526 + Label L; 1.2527 + __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset())); 1.2528 + __ bne(AT, R0, L); 1.2529 + __ delayed()->nop(); 1.2530 + __ should_not_reach_here(); 1.2531 + __ bind(L); 1.2532 +#endif //ASSERT 1.2533 + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 1.2534 + __ delayed()->nop(); 1.2535 +#ifdef aoqi_test 1.2536 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); 1.2537 +#endif 1.2538 + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete, 1.2539 + framesize, oop_maps, false); 1.2540 +#ifdef aoqi_test 1.2541 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); 1.2542 +#endif 1.2543 + return stub->entry_point(); 1.2544 + } 1.2545 + 1.2546 + // Initialization 1.2547 + void generate_initial() { 1.2548 +/* 1.2549 + // Generates all stubs and initializes the entry points 1.2550 + 1.2551 + // This platform-specific stub is needed by generate_call_stub() 1.2552 + StubRoutines::mips::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80); 1.2553 + 1.2554 + // entry points that exist in all platforms Note: This is code 1.2555 + // that could be shared among different platforms - however the 1.2556 + // benefit seems to be smaller than the disadvantage of having a 1.2557 + // much more complicated generator structure. See also comment in 1.2558 + // stubRoutines.hpp. 1.2559 + 1.2560 + StubRoutines::_forward_exception_entry = generate_forward_exception(); 1.2561 + 1.2562 + StubRoutines::_call_stub_entry = 1.2563 + generate_call_stub(StubRoutines::_call_stub_return_address); 1.2564 + 1.2565 + // is referenced by megamorphic call 1.2566 + StubRoutines::_catch_exception_entry = generate_catch_exception(); 1.2567 + 1.2568 + // atomic calls 1.2569 + StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); 1.2570 + StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr(); 1.2571 + StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); 1.2572 + StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); 1.2573 + StubRoutines::_atomic_add_entry = generate_atomic_add(); 1.2574 + StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr(); 1.2575 + StubRoutines::_fence_entry = generate_orderaccess_fence(); 1.2576 + 1.2577 + StubRoutines::_handler_for_unsafe_access_entry = 1.2578 + generate_handler_for_unsafe_access(); 1.2579 + 1.2580 + // platform dependent 1.2581 + StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp(); 1.2582 + 1.2583 + StubRoutines::mips::_verify_mxcsr_entry = generate_verify_mxcsr(); 1.2584 +*/ 1.2585 + // Generates all stubs and initializes the entry points 1.2586 + 1.2587 + //------------------------------------------------------------- 1.2588 + //----------------------------------------------------------- 1.2589 + // entry points that exist in all platforms 1.2590 + // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller 1.2591 + // than the disadvantage of having a much more complicated generator structure. 1.2592 + // See also comment in stubRoutines.hpp. 1.2593 + StubRoutines::_forward_exception_entry = generate_forward_exception(); 1.2594 + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 1.2595 + // is referenced by megamorphic call 1.2596 + StubRoutines::_catch_exception_entry = generate_catch_exception(); 1.2597 + 1.2598 + StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); 1.2599 + 1.2600 + // platform dependent 1.2601 + StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp(); 1.2602 + } 1.2603 + 1.2604 +void generate_all() { 1.2605 +#ifdef aoqi_test 1.2606 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2607 +#endif 1.2608 + // Generates all stubs and initializes the entry points 1.2609 + 1.2610 + // These entry points require SharedInfo::stack0 to be set up in 1.2611 + // non-core builds and need to be relocatable, so they each 1.2612 + // fabricate a RuntimeStub internally. 1.2613 + /* 1.2614 + StubRoutines::_throw_AbstractMethodError_entry = 1.2615 + generate_throw_exception("AbstractMethodError throw_exception", 1.2616 + CAST_FROM_FN_PTR(address, 1.2617 + SharedRuntime:: 1.2618 + throw_AbstractMethodError), 1.2619 + false); 1.2620 + 1.2621 + StubRoutines::_throw_IncompatibleClassChangeError_entry = 1.2622 + generate_throw_exception("IncompatibleClassChangeError throw_exception", 1.2623 + CAST_FROM_FN_PTR(address, 1.2624 + SharedRuntime:: 1.2625 + throw_IncompatibleClassChangeError), 1.2626 + false); 1.2627 + 1.2628 + StubRoutines::_throw_ArithmeticException_entry = 1.2629 + generate_throw_exception("ArithmeticException throw_exception", 1.2630 + CAST_FROM_FN_PTR(address, 1.2631 + SharedRuntime:: 1.2632 + throw_ArithmeticException), 1.2633 + true); 1.2634 + 1.2635 + StubRoutines::_throw_NullPointerException_entry = 1.2636 + generate_throw_exception("NullPointerException throw_exception", 1.2637 + CAST_FROM_FN_PTR(address, 1.2638 + SharedRuntime:: 1.2639 + throw_NullPointerException), 1.2640 + true); 1.2641 + 1.2642 + StubRoutines::_throw_NullPointerException_at_call_entry = 1.2643 + generate_throw_exception("NullPointerException at call throw_exception", 1.2644 + CAST_FROM_FN_PTR(address, 1.2645 + SharedRuntime:: 1.2646 + throw_NullPointerException_at_call), 1.2647 + false); 1.2648 + 1.2649 + StubRoutines::_throw_StackOverflowError_entry = 1.2650 + generate_throw_exception("StackOverflowError throw_exception", 1.2651 + CAST_FROM_FN_PTR(address, 1.2652 + SharedRuntime:: 1.2653 + throw_StackOverflowError), 1.2654 + false); 1.2655 + 1.2656 + // entry points that are platform specific 1.2657 + StubRoutines::mips::_f2i_fixup = generate_f2i_fixup(); 1.2658 + StubRoutines::mips::_f2l_fixup = generate_f2l_fixup(); 1.2659 + StubRoutines::mips::_d2i_fixup = generate_d2i_fixup(); 1.2660 + StubRoutines::mips::_d2l_fixup = generate_d2l_fixup(); 1.2661 + 1.2662 + StubRoutines::mips::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF); 1.2663 + StubRoutines::mips::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000); 1.2664 + StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF); 1.2665 + StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000); 1.2666 + 1.2667 + // support for verify_oop (must happen after universe_init) 1.2668 + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 1.2669 + 1.2670 + // arraycopy stubs used by compilers 1.2671 + generate_arraycopy_stubs(); 1.2672 + */ 1.2673 +#ifdef aoqi_test 1.2674 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2675 +#endif 1.2676 + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 1.2677 +#ifdef aoqi_test 1.2678 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2679 +#endif 1.2680 +// StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); 1.2681 +#ifdef aoqi_test 1.2682 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2683 +#endif 1.2684 +// StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); 1.2685 +#ifdef aoqi_test 1.2686 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2687 +#endif 1.2688 + StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); 1.2689 +#ifdef aoqi_test 1.2690 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2691 +#endif 1.2692 + StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); 1.2693 +#ifdef aoqi_test 1.2694 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2695 +#endif 1.2696 + 1.2697 + //------------------------------------------------------ 1.2698 + //------------------------------------------------------------------ 1.2699 + // entry points that are platform specific 1.2700 + 1.2701 + // support for verify_oop (must happen after universe_init) 1.2702 +#ifdef aoqi_test 1.2703 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2704 +#endif 1.2705 + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 1.2706 +#ifdef aoqi_test 1.2707 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2708 +#endif 1.2709 +#ifndef CORE 1.2710 + // arraycopy stubs used by compilers 1.2711 + generate_arraycopy_stubs(); 1.2712 +#ifdef aoqi_test 1.2713 +tty->print_cr("%s:%d", __func__, __LINE__); 1.2714 +#endif 1.2715 +#endif 1.2716 + 1.2717 + // Safefetch stubs. 1.2718 + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 1.2719 + &StubRoutines::_safefetch32_fault_pc, 1.2720 + &StubRoutines::_safefetch32_continuation_pc); 1.2721 + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 1.2722 + &StubRoutines::_safefetchN_fault_pc, 1.2723 + &StubRoutines::_safefetchN_continuation_pc); 1.2724 + } 1.2725 + 1.2726 + public: 1.2727 + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 1.2728 + if (all) { 1.2729 + generate_all(); 1.2730 + } else { 1.2731 + generate_initial(); 1.2732 + } 1.2733 + } 1.2734 +}; // end class declaration 1.2735 +/* 1.2736 +address StubGenerator::disjoint_byte_copy_entry = NULL; 1.2737 +address StubGenerator::disjoint_short_copy_entry = NULL; 1.2738 +address StubGenerator::disjoint_int_copy_entry = NULL; 1.2739 +address StubGenerator::disjoint_long_copy_entry = NULL; 1.2740 +address StubGenerator::disjoint_oop_copy_entry = NULL; 1.2741 + 1.2742 +address StubGenerator::byte_copy_entry = NULL; 1.2743 +address StubGenerator::short_copy_entry = NULL; 1.2744 +address StubGenerator::int_copy_entry = NULL; 1.2745 +address StubGenerator::long_copy_entry = NULL; 1.2746 +address StubGenerator::oop_copy_entry = NULL; 1.2747 + 1.2748 +address StubGenerator::checkcast_copy_entry = NULL; 1.2749 +*/ 1.2750 +void StubGenerator_generate(CodeBuffer* code, bool all) { 1.2751 + StubGenerator g(code, all); 1.2752 +}