src/cpu/mips/vm/stubGenerator_mips_64.cpp

changeset 1
2d8a650513c2
child 7
e26ad49b7194
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Fri Apr 29 00:06:10 2016 +0800
     1.3 @@ -0,0 +1,2749 @@
     1.4 +/*
     1.5 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8 + *
     1.9 + * This code is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License version 2 only, as
    1.11 + * published by the Free Software Foundation.
    1.12 + *
    1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.16 + * version 2 for more details (a copy is included in the LICENSE file that
    1.17 + * accompanied this code).
    1.18 + *
    1.19 + * You should have received a copy of the GNU General Public License version
    1.20 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.22 + *
    1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.24 + * or visit www.oracle.com if you need additional information or have any
    1.25 + * questions.
    1.26 + *
    1.27 + */
    1.28 +
    1.29 +#include "precompiled.hpp"
    1.30 +#include "asm/macroAssembler.hpp"
    1.31 +#include "asm/macroAssembler.inline.hpp"
    1.32 +#include "interpreter/interpreter.hpp"
    1.33 +#include "nativeInst_mips.hpp"
    1.34 +#include "oops/instanceOop.hpp"
    1.35 +#include "oops/method.hpp"
    1.36 +#include "oops/objArrayKlass.hpp"
    1.37 +#include "oops/oop.inline.hpp"
    1.38 +#include "prims/methodHandles.hpp"
    1.39 +#include "runtime/frame.inline.hpp"
    1.40 +#include "runtime/handles.inline.hpp"
    1.41 +#include "runtime/sharedRuntime.hpp"
    1.42 +#include "runtime/stubCodeGenerator.hpp"
    1.43 +#include "runtime/stubRoutines.hpp"
    1.44 +#include "runtime/thread.inline.hpp"
    1.45 +#include "utilities/top.hpp"
    1.46 +#ifdef COMPILER2
    1.47 +#include "opto/runtime.hpp"
    1.48 +#endif
    1.49 +
    1.50 +
    1.51 +// Declaration and definition of StubGenerator (no .hpp file).
    1.52 +// For a more detailed description of the stub routine structure
    1.53 +// see the comment in stubRoutines.hpp
    1.54 +
    1.55 +#define __ _masm->
    1.56 +//#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
    1.57 +//#define a__ ((Assembler*)_masm)->
    1.58 +
    1.59 +//#ifdef PRODUCT
    1.60 +//#define BLOCK_COMMENT(str) /* nothing */
    1.61 +//#else
    1.62 +//#define BLOCK_COMMENT(str) __ block_comment(str)
    1.63 +//#endif
    1.64 +
    1.65 +//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
    1.66 +const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
    1.67 +
    1.68 +// Stub Code definitions
    1.69 +
    1.70 +static address handle_unsafe_access() {
    1.71 +  JavaThread* thread = JavaThread::current();
    1.72 +  address pc = thread->saved_exception_pc();
    1.73 +  // pc is the instruction which we must emulate
    1.74 +  // doing a no-op is fine:  return garbage from the load
    1.75 +  // therefore, compute npc
    1.76 +  //address npc = Assembler::locate_next_instruction(pc);
    1.77 +	address npc = (address)((unsigned long)pc + sizeof(unsigned long));
    1.78 +
    1.79 +  // request an async exception
    1.80 +  thread->set_pending_unsafe_access_error();
    1.81 +
    1.82 +  // return address of next instruction to execute
    1.83 +  return npc;
    1.84 +}
    1.85 +
    1.86 +class StubGenerator: public StubCodeGenerator {
    1.87 + private:
    1.88 +
    1.89 +  // ABI mips n64
    1.90 +  // This fig is not MIPS ABI. It is call Java from C ABI.
    1.91 +  // Call stubs are used to call Java from C
    1.92 +  //
    1.93 +  //    [ return_from_Java     ]
    1.94 +  //    [ argument word n-1    ] <--- sp
    1.95 +  //      ...
    1.96 +  //    [ argument word 0      ]
    1.97 +  //      ...
    1.98 +  //-10 [ S6     	       ]
    1.99 +  // -9 [ S5		       ] 
   1.100 +  // -8 [ S4		       ]
   1.101 +  // -7 [ S3                   ]
   1.102 +  // -6 [ S0  		       ]
   1.103 +  // -5 [ TSR(S2)	       ]
   1.104 +  // -4 [ LVP(S7)              ]
   1.105 +  // -3 [ BCP(S1)              ]
   1.106 +  // -2 [ saved fp             ] <--- fp_after_call
   1.107 +  // -1 [ return address       ] 
   1.108 +  //  0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
   1.109 +  //  1 [ result               ] <--- a1
   1.110 +  //  2 [ result_type          ] <--- a2
   1.111 +  //  3 [ method               ] <--- a3
   1.112 +  //  4 [ entry_point          ] <--- a4
   1.113 +  //  5 [ parameters           ] <--- a5
   1.114 +  //  6 [ parameter_size       ] <--- a6
   1.115 +  //  7 [ thread               ] <--- a7
   1.116 +
   1.117 +  //
   1.118 +  // _LP64: n64 does not save paras in sp.
   1.119 +  //
   1.120 +  //    [ return_from_Java     ]
   1.121 +  //    [ argument word n-1    ] <--- sp
   1.122 +  //      ...
   1.123 +  //    [ argument word 0      ]
   1.124 +  //      ...
   1.125 +  //-14 [ thread               ]
   1.126 +  //-13 [ result_type          ] <--- a2
   1.127 +  //-12 [ result               ] <--- a1
   1.128 +  //-11 [ ptr. to call wrapper ] <--- a0
   1.129 +  //-10 [ S6     	       ]
   1.130 +  // -9 [ S5		       ] 
   1.131 +  // -8 [ S4		       ]
   1.132 +  // -7 [ S3                   ]
   1.133 +  // -6 [ S0  		       ]
   1.134 +  // -5 [ TSR(S2)	       ]
   1.135 +  // -4 [ LVP(S7)              ]
   1.136 +  // -3 [ BCP(S1)              ]
   1.137 +  // -2 [ saved fp             ] <--- fp_after_call
   1.138 +  // -1 [ return address       ] 
   1.139 +  //  0 [        	       ] <--- old sp
   1.140 +  /*
   1.141 +   * 2014/01/16 Fu: Find a right place in the call_stub for GP.
   1.142 +   * GP will point to the starting point of Interpreter::dispatch_table(itos). 
   1.143 +   * It should be saved/restored before/after Java calls. 
   1.144 +   *
   1.145 +   */
   1.146 +   enum call_stub_layout {
   1.147 +     RA_off		  = -1,
   1.148 +     FP_off		  = -2,
   1.149 +     BCP_off		  = -3,
   1.150 +     LVP_off		  = -4,
   1.151 +     TSR_off		  = -5,
   1.152 +     S1_off		  = -6,
   1.153 +     S3_off		  = -7,
   1.154 +     S4_off		  = -8,
   1.155 +     S5_off		  = -9,
   1.156 +     S6_off		  = -10,
   1.157 +     result_off		  = -11,
   1.158 +     result_type_off	  = -12,
   1.159 +     thread_off		  = -13,
   1.160 +     total_off		  = thread_off - 3,
   1.161 +     GP_off               = -16,
   1.162 +   };
   1.163 +
   1.164 +  address generate_call_stub(address& return_address) {
   1.165 +
   1.166 +    StubCodeMark mark(this, "StubRoutines", "call_stub");
   1.167 +    address start = __ pc();
   1.168 +
   1.169 +    // same as in generate_catch_exception()!
   1.170 +
   1.171 +    // stub code
   1.172 +    // save ra and fp
   1.173 +    __ sd(RA, SP, RA_off * wordSize);
   1.174 +    __ sd(FP, SP, FP_off * wordSize);
   1.175 +    __ sd(BCP, SP, BCP_off * wordSize);
   1.176 +    __ sd(LVP, SP, LVP_off * wordSize);
   1.177 +    __ sd(GP, SP, GP_off * wordSize);
   1.178 +    __ sd(TSR, SP, TSR_off * wordSize);
   1.179 +    __ sd(S1, SP, S1_off * wordSize);
   1.180 +    __ sd(S3, SP, S3_off * wordSize);
   1.181 +    __ sd(S4, SP, S4_off * wordSize);
   1.182 +    __ sd(S5, SP, S5_off * wordSize);
   1.183 +    __ sd(S6, SP, S6_off * wordSize);
   1.184 +
   1.185 +
   1.186 +    __ li48(GP, (long)Interpreter::dispatch_table(itos));
   1.187 +    
   1.188 +    // I think 14 is the max gap between argument and callee saved register
   1.189 +    __ daddi(FP, SP, (-2) * wordSize);
   1.190 +    __ daddi(SP, SP, total_off * wordSize);
   1.191 +//FIXME, aoqi. find a suitable place to save A1 & A2.
   1.192 +    /*
   1.193 +    __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
   1.194 +    __ sd(A1, FP, 3 * wordSize);
   1.195 +    __ sd(A2, FP, 4 * wordSize);
   1.196 +    __ sd(A3, FP, 5 * wordSize);
   1.197 +    __ sd(A4, FP, 6 * wordSize);
   1.198 +    __ sd(A5, FP, 7 * wordSize);
   1.199 +    __ sd(A6, FP, 8 * wordSize);
   1.200 +    __ sd(A7, FP, 9 * wordSize);
   1.201 +    */
   1.202 +    __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
   1.203 +    __ sd(A1, FP, result_off * wordSize);
   1.204 +    __ sd(A2, FP, result_type_off * wordSize);
   1.205 +    __ sd(A7, FP, thread_off * wordSize);
   1.206 +
   1.207 +#ifdef OPT_THREAD
   1.208 +    //__ get_thread(TREG);
   1.209 +    __ move(TREG, A7);
   1.210 +
   1.211 +    //__ ld(TREG, FP, thread_off * wordSize);
   1.212 +#endif
   1.213 +    //add for compressedoops
   1.214 +    __ reinit_heapbase();
   1.215 +
   1.216 +#ifdef ASSERT
   1.217 +    // make sure we have no pending exceptions
   1.218 +    { 
   1.219 +      Label L;
   1.220 +    	__ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
   1.221 +    	__ beq(AT, R0, L); 
   1.222 +    	__ delayed()->nop();
   1.223 +    	/* FIXME: I do not know how to realize stop in mips arch, do it in the future */
   1.224 +    	__ stop("StubRoutines::call_stub: entered with pending exception");
   1.225 +    	__ bind(L);
   1.226 +    }
   1.227 +#endif
   1.228 +
   1.229 +    // pass parameters if any
   1.230 +    // A5: parameter
   1.231 +    // A6: parameter_size
   1.232 +    // T0: parameter_size_tmp(--)
   1.233 +    // T2: offset(++)
   1.234 +    // T3: tmp
   1.235 +    Label parameters_done;
   1.236 +    // judge if the parameter_size equals 0
   1.237 +    __ beq(A6, R0, parameters_done);
   1.238 +    __ delayed()->nop();
   1.239 +    __ dsll(AT, A6, Interpreter::logStackElementSize);
   1.240 +    __ dsub(SP, SP, AT); 
   1.241 +    __ move(AT, -StackAlignmentInBytes); 
   1.242 +    __ andr(SP, SP , AT); 
   1.243 +    // Copy Java parameters in reverse order (receiver last)
   1.244 +    // Note that the argument order is inverted in the process
   1.245 +    // source is edx[ecx: N-1..0]
   1.246 +    // dest   is esp[ebx: 0..N-1]
   1.247 +    Label loop;
   1.248 +    __ move(T0, A6);
   1.249 +    __ move(T2, R0);
   1.250 +    __ bind(loop);
   1.251 +    
   1.252 +    // get parameter
   1.253 +    __ dsll(T3, T0, LogBytesPerWord);   
   1.254 +    __ dadd(T3, T3, A5);	    
   1.255 +    __ ld(AT, T3,  -wordSize);
   1.256 +    __ dsll(T3, T2, LogBytesPerWord); 
   1.257 +    __ dadd(T3, T3, SP); 
   1.258 +    __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
   1.259 +    __ daddi(T2, T2, 1); 
   1.260 +    __ daddi(T0, T0, -1); 
   1.261 +    __ bne(T0, R0, loop);
   1.262 +    __ delayed()->nop();
   1.263 +    // advance to next parameter
   1.264 +    
   1.265 +    // call Java function
   1.266 +    __ bind(parameters_done);
   1.267 +    
   1.268 +    // receiver in V0, methodOop in Rmethod
   1.269 +    
   1.270 +    __ move(Rmethod, A3);
   1.271 +    __ move(Rsender, SP);             //set sender sp
   1.272 +    __ jalr(A4);
   1.273 +    __ delayed()->nop();
   1.274 +    return_address = __ pc();
   1.275 +    
   1.276 +    Label common_return;
   1.277 +    __ bind(common_return);
   1.278 +    
   1.279 +    // store result depending on type
   1.280 +    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
   1.281 +    __ ld(T0, FP, result_off * wordSize); 	// result --> T0
   1.282 +    Label is_long, is_float, is_double, exit;
   1.283 +    __ ld(T2, FP, result_type_off * wordSize);	// result_type --> T2
   1.284 +    __ daddi(T3, T2, (-1) * T_LONG);
   1.285 +    __ beq(T3, R0, is_long);
   1.286 +    __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
   1.287 +    __ beq(T3, R0, is_float);
   1.288 +    __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
   1.289 +    __ beq(T3, R0, is_double);
   1.290 +    __ delayed()->nop();
   1.291 +    
   1.292 +    // handle T_INT case
   1.293 +    __ sd(V0, T0, 0 * wordSize);
   1.294 +    __ bind(exit);
   1.295 +    
   1.296 +    // restore 
   1.297 +    __ daddi(SP, FP, 2 * wordSize );
   1.298 +    __ ld(RA, SP, RA_off * wordSize);
   1.299 +    __ ld(FP, SP, FP_off * wordSize);
   1.300 +    __ ld(BCP, SP, BCP_off * wordSize);
   1.301 +    __ ld(LVP, SP, LVP_off * wordSize);
   1.302 +    __ ld(GP, SP, GP_off * wordSize);
   1.303 +    __ ld(TSR, SP, TSR_off * wordSize);
   1.304 +
   1.305 +    __ ld(S1, SP, S1_off * wordSize);
   1.306 +    __ ld(S3, SP, S3_off * wordSize);
   1.307 +    __ ld(S4, SP, S4_off * wordSize);
   1.308 +    __ ld(S5, SP, S5_off * wordSize);
   1.309 +    __ ld(S6, SP, S6_off * wordSize);
   1.310 +
   1.311 +    // return
   1.312 +    __ jr(RA);
   1.313 +    __ delayed()->nop();
   1.314 +    
   1.315 +    // handle return types different from T_INT
   1.316 +    __ bind(is_long);
   1.317 +    __ sd(V0, T0, 0 * wordSize);
   1.318 +    //__ sd(V1, T0, 1 * wordSize);
   1.319 +    __ sd(R0, T0, 1 * wordSize);
   1.320 +    __ b(exit);
   1.321 +    __ delayed()->nop();
   1.322 +    
   1.323 +    __ bind(is_float);
   1.324 +    __ swc1(F0, T0, 0 * wordSize);
   1.325 +    __ b(exit);
   1.326 +    __ delayed()->nop();
   1.327 +    
   1.328 +    __ bind(is_double);
   1.329 +    __ sdc1(F0, T0, 0 * wordSize);
   1.330 +    //__ sdc1(F1, T0, 1 * wordSize);
   1.331 +    __ sd(R0, T0, 1 * wordSize);
   1.332 +    __ b(exit);
   1.333 +    __ delayed()->nop();
   1.334 +    //FIXME, 1.6 mips version add operation of fpu here
   1.335 +    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
   1.336 +    __ b(common_return);
   1.337 +    __ delayed()->nop(); 
   1.338 +    return start;
   1.339 +  }
   1.340 +
   1.341 +  // Return point for a Java call if there's an exception thrown in
   1.342 +  // Java code.  The exception is caught and transformed into a
   1.343 +  // pending exception stored in JavaThread that can be tested from
   1.344 +  // within the VM.
   1.345 +  //
   1.346 +  // Note: Usually the parameters are removed by the callee. In case
   1.347 +  // of an exception crossing an activation frame boundary, that is
   1.348 +  // not the case if the callee is compiled code => need to setup the
   1.349 +  // rsp.
   1.350 +  //
   1.351 +  // rax: exception oop
   1.352 +
   1.353 +  address generate_catch_exception() {
   1.354 +    StubCodeMark mark(this, "StubRoutines", "catch_exception");
   1.355 +    address start = __ pc();
   1.356 +
   1.357 +    Register thread = TREG;
   1.358 +
   1.359 +    // get thread directly
   1.360 +#ifndef OPT_THREAD
   1.361 +    __ ld(thread, FP, thread_off * wordSize);
   1.362 +#endif
   1.363 +
   1.364 +#ifdef ASSERT
   1.365 +    // verify that threads correspond
   1.366 +    { Label L;
   1.367 +      __ get_thread(T8);
   1.368 +      __ beq(T8, thread, L);
   1.369 +      __ delayed()->nop();
   1.370 +      __ stop("StubRoutines::catch_exception: threads must correspond");
   1.371 +      __ bind(L);
   1.372 +    }
   1.373 +#endif
   1.374 +    // set pending exception
   1.375 +    __ verify_oop(V0);
   1.376 +    __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
   1.377 +    __ li(AT, (long)__FILE__);
   1.378 +    __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
   1.379 +    __ li(AT, (long)__LINE__);
   1.380 +    __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
   1.381 +
   1.382 +    // complete return to VM
   1.383 +    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
   1.384 +    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
   1.385 +    __ delayed()->nop();
   1.386 +
   1.387 +    return start;
   1.388 +  }
   1.389 +
   1.390 +  // Continuation point for runtime calls returning with a pending
   1.391 +  // exception.  The pending exception check happened in the runtime
   1.392 +  // or native call stub.  The pending exception in Thread is
   1.393 +  // converted into a Java-level exception.
   1.394 +  //
   1.395 +  // Contract with Java-level exception handlers:
   1.396 +  // rax: exception
   1.397 +  // rdx: throwing pc
   1.398 +  //
   1.399 +  // NOTE: At entry of this stub, exception-pc must be on stack !!
   1.400 +
   1.401 +  address generate_forward_exception() {
   1.402 +    StubCodeMark mark(this, "StubRoutines", "forward exception");
   1.403 +    //Register thread = TREG;
   1.404 +    Register thread = TREG;
   1.405 +    address start = __ pc();
   1.406 +
   1.407 +    // Upon entry, the sp points to the return address returning into Java
   1.408 +    // (interpreted or compiled) code; i.e., the return address becomes the
   1.409 +    // throwing pc.
   1.410 +    //
   1.411 +    // Arguments pushed before the runtime call are still on the stack but
   1.412 +    // the exception handler will reset the stack pointer -> ignore them.
   1.413 +    // A potential result in registers can be ignored as well.
   1.414 +
   1.415 +#ifdef ASSERT
   1.416 +    // make sure this code is only executed if there is a pending exception
   1.417 +#ifndef OPT_THREAD
   1.418 +    __ get_thread(thread);
   1.419 +#endif
   1.420 +    { Label L;
   1.421 +      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
   1.422 +      __ bne(AT, R0, L);
   1.423 +      __ delayed()->nop();
   1.424 +      __ stop("StubRoutines::forward exception: no pending exception (1)");
   1.425 +      __ bind(L);
   1.426 +    }
   1.427 +#endif
   1.428 +
   1.429 +    // compute exception handler into T9
   1.430 +    __ ld(A1, SP, 0);
   1.431 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
   1.432 +    __ move(T9, V0);
   1.433 +    __ pop(V1);
   1.434 +
   1.435 +#ifndef OPT_THREAD
   1.436 +    __ get_thread(thread);
   1.437 +#endif
   1.438 +    __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
   1.439 +    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
   1.440 +
   1.441 +#ifdef ASSERT
   1.442 +    // make sure exception is set
   1.443 +    { Label L;
   1.444 +      __ bne(V0, R0, L);
   1.445 +      __ delayed()->nop();
   1.446 +      __ stop("StubRoutines::forward exception: no pending exception (2)");
   1.447 +      __ bind(L);
   1.448 +    }
   1.449 +#endif
   1.450 +
   1.451 +    // continue at exception handler (return address removed)
   1.452 +    // V0: exception
   1.453 +    // T9: exception handler
   1.454 +    // V1: throwing pc
   1.455 +    __ verify_oop(V0);
   1.456 +    __ jr(T9);
   1.457 +    __ delayed()->nop();
   1.458 +
   1.459 +    return start;
   1.460 +  }
   1.461 +
   1.462 +  // Support for intptr_t get_previous_fp()
   1.463 +  //
   1.464 +  // This routine is used to find the previous frame pointer for the
   1.465 +  // caller (current_frame_guess). This is used as part of debugging
   1.466 +  // ps() is seemingly lost trying to find frames.
   1.467 +  // This code assumes that caller current_frame_guess) has a frame.
   1.468 +  address generate_get_previous_fp() {
   1.469 +    StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
   1.470 +    const Address old_fp       (FP,  0);
   1.471 +    const Address older_fp       (V0,  0);
   1.472 +    address start = __ pc();
   1.473 +    __ enter();    
   1.474 +    __ lw(V0, old_fp); // callers fp
   1.475 +    __ lw(V0, older_fp); // the frame for ps()
   1.476 +    __ leave();
   1.477 +    __ jr(RA);
   1.478 +    __ delayed()->nop();
   1.479 +    return start;
   1.480 +  }
   1.481 +  // The following routine generates a subroutine to throw an
   1.482 +  // asynchronous UnknownError when an unsafe access gets a fault that
   1.483 +  // could not be reasonably prevented by the programmer.  (Example:
   1.484 +  // SIGBUS/OBJERR.)
   1.485 +  address generate_handler_for_unsafe_access() {
   1.486 +		StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
   1.487 +		address start = __ pc();
   1.488 +		__ pushad();                      // push registers
   1.489 +		//  Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
   1.490 +		__ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
   1.491 +		__ delayed()->nop(); 
   1.492 +		__ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord); 
   1.493 +		__ popad();
   1.494 +		__ jr(RA);
   1.495 +		__ delayed()->nop();  
   1.496 +		return start;
   1.497 +  }
   1.498 +
   1.499 +  // Non-destructive plausibility checks for oops
   1.500 +  //
   1.501 +  // Arguments:
   1.502 +  //    all args on stack!
   1.503 +  //
   1.504 +  // Stack after saving c_rarg3:
   1.505 +  //    [tos + 0]: saved c_rarg3
   1.506 +  //    [tos + 1]: saved c_rarg2
   1.507 +  //    [tos + 2]: saved r12 (several TemplateTable methods use it)
   1.508 +  //    [tos + 3]: saved flags
   1.509 +  //    [tos + 4]: return address
   1.510 +  //  * [tos + 5]: error message (char*)
   1.511 +  //  * [tos + 6]: object to verify (oop)
   1.512 +  //  * [tos + 7]: saved rax - saved by caller and bashed
   1.513 +  //  * = popped on exit
   1.514 +  address generate_verify_oop() {
   1.515 +	  StubCodeMark mark(this, "StubRoutines", "verify_oop");
   1.516 +	  address start = __ pc();
   1.517 +	  __ reinit_heapbase();
   1.518 +	  __ verify_oop_subroutine(); 
   1.519 +    address end = __ pc();
   1.520 +	  return start;
   1.521 +  }
   1.522 +
   1.523 +  //
   1.524 +  //  Generate overlap test for array copy stubs
   1.525 +  //
   1.526 +  //  Input:
   1.527 +  //     A0    -  array1
   1.528 +  //     A1    -  array2
   1.529 +  //     A2    -  element count
   1.530 +  //
   1.531 +  //  Note: this code can only use %eax, %ecx, and %edx
   1.532 +  //
   1.533 +
   1.534 + // use T9 as temp 
   1.535 +  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
   1.536 +    int elem_size = 1 << log2_elem_size;
   1.537 +    Address::ScaleFactor sf = Address::times_1;
   1.538 +
   1.539 +    switch (log2_elem_size) {
   1.540 +      case 0: sf = Address::times_1; break;
   1.541 +      case 1: sf = Address::times_2; break;
   1.542 +      case 2: sf = Address::times_4; break;
   1.543 +      case 3: sf = Address::times_8; break;
   1.544 +    }
   1.545 +
   1.546 +    __ dsll(AT, A2, sf);
   1.547 +    __ dadd(AT, AT, A0); 
   1.548 +    __ lea(T9, Address(AT, -elem_size)); 
   1.549 +    __ dsub(AT, A1, A0); 
   1.550 +    __ blez(AT, no_overlap_target); 
   1.551 +    __ delayed()->nop(); 
   1.552 +    __ dsub(AT, A1, T9); 
   1.553 +    __ bgtz(AT, no_overlap_target); 
   1.554 +    __ delayed()->nop(); 
   1.555 +
   1.556 +  }
   1.557 +
   1.558 +  //
   1.559 +  //  Generate store check for array
   1.560 +  //
   1.561 +  //  Input:
   1.562 +  //     %edi    -  starting address
   1.563 +  //     %ecx    -  element count
   1.564 +  //
   1.565 +  //  The 2 input registers are overwritten
   1.566 +  //
   1.567 + 
   1.568 +  //
   1.569 +  //  Generate store check for array
   1.570 +  //
   1.571 +  //  Input:
   1.572 +  //     T0    -  starting address(edi)
   1.573 +  //     T1    -  element count  (ecx)
   1.574 +  //
   1.575 +  //  The 2 input registers are overwritten
   1.576 +  //
   1.577 + 
   1.578 +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
   1.579 +
   1.580 +	void array_store_check() {
   1.581 +		BarrierSet* bs = Universe::heap()->barrier_set();
   1.582 +		assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
   1.583 +		CardTableModRefBS* ct = (CardTableModRefBS*)bs;
   1.584 +		assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
   1.585 +		Label l_0;
   1.586 +
   1.587 +		__ dsll(AT, T1, TIMES_OOP);
   1.588 +		__ dadd(AT, T0, AT); 
   1.589 +		__ daddiu(T1, AT, - BytesPerHeapOop);
   1.590 +
   1.591 +		__ shr(T0, CardTableModRefBS::card_shift); 
   1.592 +		__ shr(T1, CardTableModRefBS::card_shift);
   1.593 +
   1.594 +		__ dsub(T1, T1, T0);   // end --> cards count
   1.595 +		__ bind(l_0);
   1.596 +
   1.597 +		__ li48(AT, (long)ct->byte_map_base); 
   1.598 +		__ dadd(AT, AT, T0); 
   1.599 +		__ dadd(AT, AT, T1); 
   1.600 +		__ sb(R0, AT, 0);
   1.601 +		//__ daddi(T1, T1, -4);  
   1.602 +		__ daddi(T1, T1, - 1);
   1.603 +		__ bgez(T1, l_0);
   1.604 +		__ delayed()->nop(); 
   1.605 +	}
   1.606 +
   1.607 +  // Arguments:
   1.608 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   1.609 +  //             ignored
   1.610 +  //   name    - stub name string
   1.611 +  //
   1.612 +  // Inputs:
   1.613 +  //   c_rarg0   - source array address
   1.614 +  //   c_rarg1   - destination array address
   1.615 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
   1.616 +  //
   1.617 +  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
   1.618 +  // we let the hardware handle it.  The one to eight bytes within words,
   1.619 +  // dwords or qwords that span cache line boundaries will still be loaded
   1.620 +  // and stored atomically.
   1.621 +  //
   1.622 +  // Side Effects:
   1.623 +  //   disjoint_byte_copy_entry is set to the no-overlap entry point
   1.624 +  //   used by generate_conjoint_byte_copy().
   1.625 +  //
   1.626 +	address generate_disjoint_byte_copy(bool aligned, const char *name) {
   1.627 +	  StubCodeMark mark(this, "StubRoutines", name);
   1.628 +	  __ align(CodeEntryAlignment);
   1.629 +	  address start = __ pc();
   1.630 +	  Label l_0, l_1, l_2, l_3, l_4, l_5, l_6;
   1.631 +
   1.632 +	  __ push(T3);
   1.633 +	  __ push(T0);
   1.634 +	  __ push(T1);
   1.635 +	  __ push(T8);
   1.636 +	  __ move(T3, A0); 
   1.637 +	  __ move(T0, A1);
   1.638 +	  __ move(T1, A2);  
   1.639 +	  __ move(T8, T1);             // original count in T1
   1.640 +	  __ daddi(AT, T1, -3); 
   1.641 +	  __ blez(AT, l_4);  
   1.642 +	  __ delayed()->nop();	
   1.643 +	  if (!aligned) {
   1.644 +	    // align source address at dword address boundary
   1.645 +	    __ move(T1, 4); 
   1.646 +	    __ sub(T1, T1, T3); 
   1.647 +	    __ andi(T1, T1, 3); 
   1.648 +	    __ beq(T1, R0, l_1); 
   1.649 +	    __ delayed()->nop();	
   1.650 +	    __ sub(T8,T8,T1); 
   1.651 +	    __ bind(l_0);
   1.652 +	    __ lb(AT, T3, 0); 
   1.653 +	    __ sb(AT, T0, 0); 
   1.654 +	    __ addi(T3, T3, 1); 
   1.655 +	    __ addi(T0, T0, 1); 
   1.656 +	    __ addi(T1 ,T1, -1);  
   1.657 +	    __ bne(T1, R0, l_0); 
   1.658 +	    __ delayed()->nop(); 
   1.659 +	    __ bind(l_1);
   1.660 +	    __ move(T1, T8); 
   1.661 +	  }
   1.662 +	  __ shr(T1, 2); 
   1.663 +	  __ beq(T1, R0, l_4);     // no dwords to move
   1.664 +	  __ delayed()->nop(); 
   1.665 +	  // copy aligned dwords
   1.666 +	  __ bind(l_2);
   1.667 +	  __ align(16);
   1.668 +	  __ bind(l_3);
   1.669 +	  __ lw(AT, T3, 0);   
   1.670 +	  __ sw(AT, T0, 0 ); 
   1.671 +	  __ addi(T3, T3, 4); 
   1.672 +	  __ addi(T0, T0, 4); 
   1.673 +	  __ addi(T1, T1, -1); 
   1.674 +	  __ bne(T1, R0, l_3); 
   1.675 +	  __ delayed()->nop(); 
   1.676 +	  __ bind(l_4);
   1.677 +	  __ move(T1, T8); 
   1.678 +	  __ andi(T1, T1, 3); 
   1.679 +	  __ beq(T1, R0, l_6);  
   1.680 +	  __ delayed()->nop(); 
   1.681 +	  // copy suffix
   1.682 +	  __ bind(l_5);
   1.683 +	  __ lb(AT, T3, 0); 
   1.684 +	  __ sb(AT, T0, 0); 
   1.685 +	  __ addi(T3, T3, 1);  
   1.686 +	  __ addi(T0, T0, 1);  
   1.687 +	  __ addi(T1, T1, -1); 
   1.688 +	  __ bne(T1, R0, l_5 ); 
   1.689 +	  __ delayed()->nop(); 
   1.690 +	  __ bind(l_6);
   1.691 +	  __ pop(T8); 
   1.692 +	  __ pop(T1); 
   1.693 +	  __ pop(T0); 
   1.694 +	  __ pop(T3); 
   1.695 +	  __ jr(RA); 
   1.696 +	  __ delayed()->nop(); 
   1.697 +	  return start;
   1.698 +  }
   1.699 +
   1.700 +  // Arguments:
   1.701 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   1.702 +  //             ignored
   1.703 +  //   name    - stub name string
   1.704 +  //
   1.705 +  // Inputs:
   1.706 +  //   c_rarg0   - source array address
   1.707 +  //   c_rarg1   - destination array address
   1.708 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
   1.709 +  //
   1.710 +  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
   1.711 +  // we let the hardware handle it.  The one to eight bytes within words,
   1.712 +  // dwords or qwords that span cache line boundaries will still be loaded
   1.713 +  // and stored atomically.
   1.714 +  //
   1.715 +  address generate_conjoint_byte_copy(bool aligned, const char *name) {
   1.716 +		Label l_1, l_2, l_3, l_4, l_5;
   1.717 +		StubCodeMark mark(this, "StubRoutines", name);
   1.718 +		__ align(CodeEntryAlignment);
   1.719 +		address start = __ pc();
   1.720 +		address nooverlap_target = aligned ?
   1.721 +		StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
   1.722 +		StubRoutines::jbyte_disjoint_arraycopy();
   1.723 +
   1.724 +		array_overlap_test(nooverlap_target, 0);
   1.725 +
   1.726 +		__ push(T3);	
   1.727 +		__ push(T0);	
   1.728 +		__ push(T1);	
   1.729 +		__ push(T8);	
   1.730 +
   1.731 +
   1.732 +		// copy from high to low
   1.733 +		__ move(T3, A0); 
   1.734 +		__ move(T0, A1);
   1.735 +		__ move(T1, A2);  
   1.736 +		__ dadd(AT, T3, T1);  
   1.737 +		__ lea(T3, Address(AT, -4));
   1.738 +		__ dadd(AT, T0, T1);  
   1.739 +		__ lea(T0, Address(AT, -4));
   1.740 +		__ move(T8, T1); 
   1.741 +		__ daddi(AT, T1, -3); 
   1.742 +		__ blez(AT, l_3); 
   1.743 +		__ delayed()->nop();	
   1.744 +		__ dsrl(T1, T1, 2); 
   1.745 +		__ align(16);
   1.746 +		__ bind(l_1);
   1.747 +		__ lw(AT, T3, 0);   
   1.748 +		__ sw(AT, T0, 0); 
   1.749 +		__ addi(T3, T3, -4);    
   1.750 +		__ addi(T0, T0, -4);    
   1.751 +		__ addi(T1, T1, -1);  
   1.752 +		__ bne(T1, R0, l_1); 
   1.753 +		__ delayed()->nop(); 
   1.754 +		__ b(l_3);  
   1.755 +		__ delayed()->nop(); 
   1.756 +		// copy dwords aligned or not with repeat move
   1.757 +		__ bind(l_2);
   1.758 +		__ bind(l_3);
   1.759 +		// copy suffix (0-3 bytes)
   1.760 +		__ andi(T8, T8, 3); 
   1.761 +		__ beq(T8, R0, l_5); 
   1.762 +		__ delayed()->nop(); 
   1.763 +		__ addi(T3, T3, 3); 
   1.764 +		__ addi(T0, T0, 3); 
   1.765 +		__ bind(l_4);
   1.766 +		__ lb(AT, T3, 0);  
   1.767 +		__ sb(AT, T0, 0); 
   1.768 +		__ addi(T3, T3, -1);  
   1.769 +		__ addi(T0, T0, -1);  
   1.770 +		__ addi(T8, T8, -1); 
   1.771 +		__ bne(T8, R0, l_4); 
   1.772 +		__ delayed()->nop(); 
   1.773 +		__ bind(l_5);
   1.774 +		__ pop(T8);	
   1.775 +		__ pop(T1);	
   1.776 +		__ pop(T0);	
   1.777 +		__ pop(T3);	
   1.778 +		__ jr(RA); 
   1.779 +		__ delayed()->nop(); 
   1.780 +		return start;
   1.781 +  }
   1.782 +
   1.783 +  // Arguments:
   1.784 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   1.785 +  //             ignored
   1.786 +  //   name    - stub name string
   1.787 +  //
   1.788 +  // Inputs:
   1.789 +  //   c_rarg0   - source array address
   1.790 +  //   c_rarg1   - destination array address
   1.791 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
   1.792 +  //
   1.793 +  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
   1.794 +  // let the hardware handle it.  The two or four words within dwords
   1.795 +  // or qwords that span cache line boundaries will still be loaded
   1.796 +  // and stored atomically.
   1.797 +  //
   1.798 +  // Side Effects:
   1.799 +  //   disjoint_short_copy_entry is set to the no-overlap entry point
   1.800 +  //   used by generate_conjoint_short_copy().
   1.801 +  //
   1.802 +  address generate_disjoint_short_copy(bool aligned, const char *name) {
   1.803 +		Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
   1.804 +		StubCodeMark mark(this, "StubRoutines", name);
   1.805 +		__ align(CodeEntryAlignment);
   1.806 +		address start = __ pc();
   1.807 +
   1.808 +		__ push(T3);	
   1.809 +		__ push(T0);	
   1.810 +		__ push(T1);	
   1.811 +		__ push(T8);	
   1.812 +		__ move(T1, A2);  
   1.813 +		__ move(T3, A0); 
   1.814 +		__ move(T0, A1);
   1.815 +
   1.816 +		if (!aligned) {
   1.817 +			__ beq(T1, R0, l_5);
   1.818 +			__ delayed()->nop(); 
   1.819 +			// align source address at dword address boundary
   1.820 +			__ move(T8, T3); // original from
   1.821 +			__ andi(T8, T8, 3); // either 0 or 2
   1.822 +			__ beq(T8, R0, l_1); // no prefix
   1.823 +			__ delayed()->nop();
   1.824 +			// copy prefix
   1.825 +			__ lh(AT, T3, 0);
   1.826 +			__ sh(AT, T0, 0); 
   1.827 +			__ add(T3, T3, T8); 
   1.828 +			__ add(T0, T0, T8);
   1.829 +			__ addi(T1, T1, -1); 
   1.830 +			__ bind(l_1);
   1.831 +		}
   1.832 +		__ move(T8, T1);            // word count less prefix
   1.833 +		__ sra(T1, T1, 1); 
   1.834 +		__ beq(T1, R0, l_4); 
   1.835 +		__ delayed()->nop(); 
   1.836 +    // copy aligned dwords
   1.837 +		__ bind(l_2);
   1.838 +		__ align(16);
   1.839 +		__ bind(l_3);
   1.840 +		__ lw(AT, T3, 0);   
   1.841 +		__ sw(AT, T0, 0 ); 
   1.842 +		__ addi(T3, T3, 4); 
   1.843 +		__ addi(T0, T0, 4); 
   1.844 +		__ addi(T1, T1, -1); 
   1.845 +		__ bne(T1, R0, l_3); 
   1.846 +		__ delayed()->nop(); 
   1.847 +		__ bind(l_4);
   1.848 +		__ andi(T8, T8, 1); 
   1.849 +		__ beq(T8, R0, l_5);  
   1.850 +		__ delayed()->nop(); 
   1.851 +		// copy suffix
   1.852 +		__ lh(AT, T3, 0); 
   1.853 +		__ sh(AT, T0, 0); 
   1.854 +		__ bind(l_5);
   1.855 +		__ pop(T8);	
   1.856 +		__ pop(T1);	
   1.857 +		__ pop(T0);	
   1.858 +		__ pop(T3);	
   1.859 +		__ jr(RA); 
   1.860 +		__ delayed()->nop();  
   1.861 +		return start;
   1.862 +  }
   1.863 +
   1.864 +  // Arguments:
   1.865 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   1.866 +  //             ignored
   1.867 +  //   name    - stub name string
   1.868 +  //
   1.869 +  // Inputs:
   1.870 +  //   c_rarg0   - source array address
   1.871 +  //   c_rarg1   - destination array address
   1.872 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
   1.873 +  //
   1.874 +  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
   1.875 +  // let the hardware handle it.  The two or four words within dwords
   1.876 +  // or qwords that span cache line boundaries will still be loaded
   1.877 +  // and stored atomically.
   1.878 +  //
   1.879 +  address generate_conjoint_short_copy(bool aligned, const char *name) {
   1.880 +		Label l_1, l_2, l_3, l_4, l_5;
   1.881 +		StubCodeMark mark(this, "StubRoutines", name);
   1.882 +		__ align(CodeEntryAlignment);
   1.883 +		address start = __ pc();
   1.884 +		address nooverlap_target = aligned ?
   1.885 +						StubRoutines::arrayof_jshort_disjoint_arraycopy() :
   1.886 +						StubRoutines::jshort_disjoint_arraycopy();
   1.887 +
   1.888 +		array_overlap_test(nooverlap_target, 1);
   1.889 +
   1.890 +		__ push(T3);	
   1.891 +		__ push(T0);	
   1.892 +		__ push(T1);	
   1.893 +		__ push(T8);	
   1.894 +
   1.895 +		/*
   1.896 +			 __ pushl(esi);
   1.897 +			 __ movl(ecx, Address(esp, 4+12));      // count
   1.898 +			 __ pushl(edi);
   1.899 +			 __ movl(esi, Address(esp, 8+ 4));      // from
   1.900 +			 __ movl(edi, Address(esp, 8+ 8));      // to
   1.901 +		 */ 
   1.902 +		__ move(T1, A2);  
   1.903 +		__ move(T3, A0); 
   1.904 +		__ move(T0, A1);
   1.905 +
   1.906 +
   1.907 +		// copy dwords from high to low
   1.908 +		// __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4
   1.909 +		__ sll(AT, T1, Address::times_2); 
   1.910 +		__ add(AT, T3, AT); 
   1.911 +		__ lea(T3, Address( AT, -4)); 
   1.912 +		//__ std();
   1.913 +		//__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4
   1.914 +		__ sll(AT,T1 , Address::times_2); 
   1.915 +		__ add(AT, T0, AT); 
   1.916 +		__ lea(T0, Address( AT, -4)); 
   1.917 +		//  __ movl(eax, ecx);
   1.918 +		__ move(T8, T1); 
   1.919 +		__ bind(l_1);
   1.920 +		//   __ sarl(ecx, 1);              // dword count
   1.921 +		__ sra(T1,T1, 1); 
   1.922 +		//__ jcc(Assembler::equal, l_4);                   // no dwords to move
   1.923 +		__ beq(T1, R0, l_4);  
   1.924 +		__ delayed()->nop(); 
   1.925 +		/*    __ cmpl(ecx, 32);
   1.926 +					__ jcc(Assembler::above, l_3);                   // > 32 dwords
   1.927 +		// copy dwords with loop
   1.928 +		__ subl(edi, esi);
   1.929 +		 */     __ align(16);
   1.930 +		__ bind(l_2);
   1.931 +		//__ movl(edx, Address(esi));
   1.932 +		__ lw(AT, T3, 0);   
   1.933 +		//__ movl(Address(edi, esi, Address::times_1), edx);
   1.934 +		__ sw(AT, T0, 0); 
   1.935 +		//__ subl(esi, 4);
   1.936 +		__ addi(T3, T3, -4); 
   1.937 +		__ addi(T0, T0, -4); 
   1.938 +		//__ decl(ecx);
   1.939 +		__ addi(T1, T1, -1); 
   1.940 +		//  __ jcc(Assembler::notEqual, l_2);
   1.941 +		__ bne(T1, R0, l_2); 
   1.942 +		__ delayed()->nop(); 
   1.943 +		//  __ addl(edi, esi);
   1.944 +		// __ jmp(l_4);
   1.945 +		__ b(l_4);
   1.946 +		__ delayed()->nop();
   1.947 +		// copy dwords with repeat move
   1.948 +		__ bind(l_3);
   1.949 +		//   __ rep_movl();
   1.950 +		__ bind(l_4);
   1.951 +		//  __ andl(eax, 1);              // suffix count
   1.952 +		__ andi(T8, T8, 1);              // suffix count
   1.953 +		//__ jcc(Assembler::equal, l_5);                   // no suffix
   1.954 +		__ beq(T8, R0, l_5 );  
   1.955 +		__ delayed()->nop(); 
   1.956 +		// copy suffix
   1.957 +		//   __ movw(edx, Address(esi, 2));
   1.958 +		__ lh(AT, T3, 2); 
   1.959 +		//  __ movw(Address(edi, 2), edx);
   1.960 +		__ sh(AT, T0, 2); 
   1.961 +		__ bind(l_5);
   1.962 +		//    __ cld();
   1.963 +		//    __ popl(edi);
   1.964 +		//    __ popl(esi);
   1.965 +		//   __ ret(0);
   1.966 +		__ pop(T8);	
   1.967 +		__ pop(T1);	
   1.968 +		__ pop(T0);	
   1.969 +		__ pop(T3);	
   1.970 +		__ jr(RA); 
   1.971 +		__ delayed()->nop();   
   1.972 +		return start;
   1.973 +  }
   1.974 +
   1.975 +  // Arguments:
   1.976 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   1.977 +  //             ignored
   1.978 +  //   is_oop  - true => oop array, so generate store check code
   1.979 +  //   name    - stub name string
   1.980 +  //
   1.981 +  // Inputs:
   1.982 +  //   c_rarg0   - source array address
   1.983 +  //   c_rarg1   - destination array address
   1.984 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
   1.985 +  //
   1.986 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
   1.987 +  // the hardware handle it.  The two dwords within qwords that span
   1.988 +  // cache line boundaries will still be loaded and stored atomicly.
   1.989 +  //
   1.990 +  // Side Effects:
   1.991 +  //   disjoint_int_copy_entry is set to the no-overlap entry point
   1.992 +  //   used by generate_conjoint_int_oop_copy().
   1.993 +  //
   1.994 +  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
   1.995 +		Label l_2, l_3, l_4, l_stchk;
   1.996 +		StubCodeMark mark(this, "StubRoutines", name);
   1.997 +		__ align(CodeEntryAlignment);
   1.998 +		address start = __ pc();
   1.999 +		/*
  1.1000 +			 __ pushl(esi);
  1.1001 +			 __ movl(ecx, Address(esp, 4+12));      // count
  1.1002 +			 __ pushl(edi);
  1.1003 +			 __ movl(esi, Address(esp, 8+ 4));      // from
  1.1004 +			 __ movl(edi, Address(esp, 8+ 8));      // to
  1.1005 +		 */
  1.1006 +		__ push(T3);	
  1.1007 +		__ push(T0);	
  1.1008 +		__ push(T1);	
  1.1009 +		__ push(T8);	
  1.1010 +		__ move(T1, A2);  
  1.1011 +		__ move(T3, A0); 
  1.1012 +		__ move(T0, A1);
  1.1013 +
  1.1014 +		// __ cmpl(ecx, 32);
  1.1015 +		// __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
  1.1016 +		// __ rep_movl();
  1.1017 +		__ b(l_2); 	
  1.1018 +		__ delayed()->nop();	
  1.1019 +		if (is_oop) {
  1.1020 +		//  __ jmp(l_stchk);
  1.1021 +			__ b(l_stchk); 
  1.1022 +			__ delayed()->nop(); 
  1.1023 +		}
  1.1024 +		//    __ popl(edi);
  1.1025 +		//   __ popl(esi);
  1.1026 +		//  __ ret(0);
  1.1027 +		__ pop(T8);	
  1.1028 +		__ pop(T1);	
  1.1029 +		__ pop(T0);	
  1.1030 +		__ pop(T3);	
  1.1031 +		__ jr(RA); 
  1.1032 +		__ delayed()->nop(); 
  1.1033 +
  1.1034 +		__ bind(l_2);
  1.1035 +		//  __ subl(edi, esi);
  1.1036 +		//  __ testl(ecx, ecx);
  1.1037 +		// __ jcc(Assembler::zero, l_4);
  1.1038 +		__ beq(T1, R0, l_4);  
  1.1039 +		__ delayed()->nop(); 
  1.1040 +		__ align(16);
  1.1041 +		__ bind(l_3);
  1.1042 +		//__ movl(edx, Address(esi));
  1.1043 +		__ lw(AT, T3, 0);   
  1.1044 +		// __ movl(Address(edi, esi, Address::times_1), edx);
  1.1045 +		__ sw(AT, T0, 0); 
  1.1046 +		// __ addl(esi, 4);
  1.1047 +		__ addi(T3, T3, 4);
  1.1048 +		__ addi(T0, T0, 4);
  1.1049 +		//   __ decl(ecx);
  1.1050 +		__ addi(T1, T1, -1); 
  1.1051 +		//    __ jcc(Assembler::notEqual, l_3);
  1.1052 +		__ bne(T1, R0, l_3); 
  1.1053 +		__ delayed()->nop(); 
  1.1054 +		if (is_oop) {
  1.1055 +			__ bind(l_stchk);
  1.1056 +			//      __ movl(edi, Address(esp, 8+ 8));
  1.1057 +			//     __ movl(ecx, Address(esp, 8+ 12));
  1.1058 +			__ move(T0, A1); 
  1.1059 +			__ move(T1, A2); 
  1.1060 +			array_store_check();
  1.1061 +		}
  1.1062 +		__ bind(l_4);
  1.1063 +		//    __ popl(edi);
  1.1064 +		//   __ popl(esi);
  1.1065 +		//  __ ret(0);
  1.1066 +		__ pop(T8);
  1.1067 +		__ pop(T1);
  1.1068 +		__ pop(T0);
  1.1069 +		__ pop(T3);
  1.1070 +		__ jr(RA); 
  1.1071 +		__ delayed()->nop(); 
  1.1072 +		return start;
  1.1073 +	}
  1.1074 +
  1.1075 +  // Arguments:
  1.1076 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1.1077 +  //             ignored
  1.1078 +  //   is_oop  - true => oop array, so generate store check code
  1.1079 +  //   name    - stub name string
  1.1080 +  //
  1.1081 +  // Inputs:
  1.1082 +  //   c_rarg0   - source array address
  1.1083 +  //   c_rarg1   - destination array address
  1.1084 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
  1.1085 +  //
  1.1086 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1.1087 +  // the hardware handle it.  The two dwords within qwords that span
  1.1088 +  // cache line boundaries will still be loaded and stored atomicly.
  1.1089 +  //
  1.1090 +  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
  1.1091 +		Label l_2, l_3, l_4, l_stchk;
  1.1092 +		StubCodeMark mark(this, "StubRoutines", name);
  1.1093 +		__ align(CodeEntryAlignment);
  1.1094 +		address start = __ pc();
  1.1095 +		address nooverlap_target;
  1.1096 +
  1.1097 +		if (is_oop) {
  1.1098 +			nooverlap_target = aligned ?
  1.1099 +							StubRoutines::arrayof_oop_disjoint_arraycopy() :
  1.1100 +							StubRoutines::oop_disjoint_arraycopy();
  1.1101 +		}else {
  1.1102 +			nooverlap_target = aligned ?
  1.1103 +							StubRoutines::arrayof_jint_disjoint_arraycopy() :
  1.1104 +							StubRoutines::jint_disjoint_arraycopy();
  1.1105 +		}
  1.1106 +
  1.1107 +		array_overlap_test(nooverlap_target, 2);
  1.1108 +
  1.1109 +		__ push(T3);
  1.1110 +		__ push(T0);
  1.1111 +		__ push(T1);
  1.1112 +		__ push(T8);
  1.1113 +
  1.1114 +		/*
  1.1115 +			 __ pushl(esi);
  1.1116 +			 __ movl(ecx, Address(esp, 4+12));      // count
  1.1117 +			 __ pushl(edi);
  1.1118 +			 __ movl(esi, Address(esp, 8+ 4));      // from
  1.1119 +			 __ movl(edi, Address(esp, 8+ 8));      // to
  1.1120 +		 */ 
  1.1121 +		__ move(T1, A2);  
  1.1122 +		__ move(T3, A0); 
  1.1123 +		__ move(T0, A1);
  1.1124 +
  1.1125 +		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
  1.1126 +		__ sll(AT, T1, Address::times_4); 
  1.1127 +		__ add(AT, T3, AT); 
  1.1128 +		__ lea(T3 , Address(AT, -4)); 
  1.1129 +		//__ std();
  1.1130 +		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
  1.1131 +		__ sll(AT, T1, Address::times_4); 
  1.1132 +		__ add(AT, T0, AT); 
  1.1133 +		__ lea(T0 , Address(AT, -4)); 
  1.1134 +
  1.1135 +		//    __ cmpl(ecx, 32);
  1.1136 +		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
  1.1137 +		//  __ testl(ecx, ecx);
  1.1138 +		//__ jcc(Assembler::zero, l_4);
  1.1139 +		__ beq(T1, R0, l_4); 
  1.1140 +		__ delayed()->nop();  
  1.1141 +		// __ subl(edi, esi);
  1.1142 +		__ align(16);
  1.1143 +		__ bind(l_2);
  1.1144 +		// __ movl(edx, Address(esi));
  1.1145 +		__ lw(AT, T3, 0);   
  1.1146 +		// __ movl(Address(esi, edi, Address::times_1), edx);
  1.1147 +		__ sw(AT, T0, 0); 
  1.1148 +		// __ subl(esi, 4);
  1.1149 +		__ addi(T3, T3, -4); 
  1.1150 +		__ addi(T0, T0, -4); 
  1.1151 +		//   __ decl(ecx);
  1.1152 +		__ addi(T1, T1, -1); 
  1.1153 +		//__ jcc(Assembler::notEqual, l_2);
  1.1154 +		__ bne(T1, R0, l_2);  
  1.1155 +		__ delayed()->nop(); 
  1.1156 +		if (is_oop) {
  1.1157 +			// __ jmp(l_stchk);
  1.1158 +			__ b( l_stchk); 
  1.1159 +			__ delayed()->nop(); 
  1.1160 +		}
  1.1161 +		__ bind(l_4);
  1.1162 +		//      __ cld();
  1.1163 +		//     __ popl(edi);
  1.1164 +		//    __ popl(esi);
  1.1165 +		//   __ ret(0);
  1.1166 +		__ pop(T8); 
  1.1167 +		__ pop(T1); 
  1.1168 +		__ pop(T0); 
  1.1169 +		__ pop(T3); 
  1.1170 +		__ jr(RA); 
  1.1171 +		__ delayed()->nop(); 
  1.1172 +		__ bind(l_3);
  1.1173 +		//   __ rep_movl();
  1.1174 +		if (is_oop) {
  1.1175 +			__ bind(l_stchk);
  1.1176 +			//  __ movl(edi, Address(esp, 8+ 8));
  1.1177 +			__ move(T0, A1);  
  1.1178 +			// __ movl(ecx, Address(esp, 8+ 12));
  1.1179 +			__ move(T1, A2);  
  1.1180 +			array_store_check();
  1.1181 +		}
  1.1182 +		//    __ cld();
  1.1183 +		//   __ popl(edi);
  1.1184 +		//   __ popl(esi);
  1.1185 +		//  __ ret(0);
  1.1186 +		__ pop(T8);	
  1.1187 +		__ pop(T1);	
  1.1188 +		__ pop(T0);	
  1.1189 +		__ pop(T3);	
  1.1190 +		__ jr(RA);	
  1.1191 +		__ delayed()->nop(); 
  1.1192 +		return start;
  1.1193 +  }
  1.1194 +
  1.1195 +  // Arguments:
  1.1196 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1.1197 +  //             ignored
  1.1198 +  //   is_oop  - true => oop array, so generate store check code
  1.1199 +  //   name    - stub name string
  1.1200 +  //
  1.1201 +  // Inputs:
  1.1202 +  //   c_rarg0   - source array address
  1.1203 +  //   c_rarg1   - destination array address
  1.1204 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
  1.1205 +  //
  1.1206 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1.1207 +  // the hardware handle it.  The two dwords within qwords that span
  1.1208 +  // cache line boundaries will still be loaded and stored atomicly.
  1.1209 +  //
  1.1210 +  // Side Effects:
  1.1211 +  //   disjoint_int_copy_entry is set to the no-overlap entry point
  1.1212 +  //   used by generate_conjoint_int_oop_copy().
  1.1213 +  //
  1.1214 +  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1.1215 +		Label l_2, l_3, l_4, l_stchk;
  1.1216 +		StubCodeMark mark(this, "StubRoutines", name);
  1.1217 +		__ align(CodeEntryAlignment);
  1.1218 +		address start = __ pc();
  1.1219 +		__ push(T3);	
  1.1220 +		__ push(T0);	
  1.1221 +		__ push(T1);	
  1.1222 +		__ push(T8);	
  1.1223 +		__ move(T1, A2);  
  1.1224 +		__ move(T3, A0); 
  1.1225 +		__ move(T0, A1);
  1.1226 +
  1.1227 +		// __ cmpl(ecx, 32);
  1.1228 +		// __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
  1.1229 +		// __ rep_movl();
  1.1230 +		__ b(l_2); 	
  1.1231 +		__ delayed()->nop();	
  1.1232 +		if (is_oop) {
  1.1233 +		//  __ jmp(l_stchk);
  1.1234 +			__ b(l_stchk); 
  1.1235 +			__ delayed()->nop(); 
  1.1236 +		}
  1.1237 +		//    __ popl(edi);
  1.1238 +		//   __ popl(esi);
  1.1239 +		//  __ ret(0);
  1.1240 +		__ pop(T8);	
  1.1241 +		__ pop(T1);	
  1.1242 +		__ pop(T0);	
  1.1243 +		__ pop(T3);	
  1.1244 +		__ jr(RA); 
  1.1245 +		__ delayed()->nop(); 
  1.1246 +
  1.1247 +		__ bind(l_2);
  1.1248 +		//  __ subl(edi, esi);
  1.1249 +		//  __ testl(ecx, ecx);
  1.1250 +		// __ jcc(Assembler::zero, l_4);
  1.1251 +		__ beq(T1, R0, l_4);  
  1.1252 +		__ delayed()->nop(); 
  1.1253 +		__ align(16);
  1.1254 +		__ bind(l_3);
  1.1255 +		//__ movl(edx, Address(esi));
  1.1256 +		__ ld(AT, T3, 0);   
  1.1257 +		// __ movl(Address(edi, esi, Address::times_1), edx);
  1.1258 +		__ sd(AT, T0, 0); 
  1.1259 +		// __ addl(esi, 4);
  1.1260 +		__ addi(T3, T3, 8);
  1.1261 +		__ addi(T0, T0, 8);
  1.1262 +		//   __ decl(ecx);
  1.1263 +		__ addi(T1, T1, -1); 
  1.1264 +		//    __ jcc(Assembler::notEqual, l_3);
  1.1265 +		__ bne(T1, R0, l_3); 
  1.1266 +		__ delayed()->nop(); 
  1.1267 +		if (is_oop) {
  1.1268 +			__ bind(l_stchk);
  1.1269 +			//      __ movl(edi, Address(esp, 8+ 8));
  1.1270 +			//     __ movl(ecx, Address(esp, 8+ 12));
  1.1271 +			__ move(T0, A1); 
  1.1272 +			__ move(T1, A2); 
  1.1273 +			array_store_check();
  1.1274 +		}
  1.1275 +		__ bind(l_4);
  1.1276 +		//    __ popl(edi);
  1.1277 +		//   __ popl(esi);
  1.1278 +		//  __ ret(0);
  1.1279 +		__ pop(T8);
  1.1280 +		__ pop(T1);
  1.1281 +		__ pop(T0);
  1.1282 +		__ pop(T3);
  1.1283 +		__ jr(RA); 
  1.1284 +		__ delayed()->nop(); 
  1.1285 +		return start;
  1.1286 +	}
  1.1287 +
  1.1288 +  // Arguments:
  1.1289 +  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1.1290 +  //             ignored
  1.1291 +  //   is_oop  - true => oop array, so generate store check code
  1.1292 +  //   name    - stub name string
  1.1293 +  //
  1.1294 +  // Inputs:
  1.1295 +  //   c_rarg0   - source array address
  1.1296 +  //   c_rarg1   - destination array address
  1.1297 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
  1.1298 +  //
  1.1299 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1.1300 +  // the hardware handle it.  The two dwords within qwords that span
  1.1301 +  // cache line boundaries will still be loaded and stored atomicly.
  1.1302 +  //
  1.1303 +  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1.1304 +		Label l_2, l_3, l_4, l_stchk;
  1.1305 +		StubCodeMark mark(this, "StubRoutines", name);
  1.1306 +		__ align(CodeEntryAlignment);
  1.1307 +		address start = __ pc();
  1.1308 +		address nooverlap_target;
  1.1309 +
  1.1310 +		if (is_oop) {
  1.1311 +			nooverlap_target = aligned ?
  1.1312 +							StubRoutines::arrayof_oop_disjoint_arraycopy() :
  1.1313 +							StubRoutines::oop_disjoint_arraycopy();
  1.1314 +		}else {
  1.1315 +			nooverlap_target = aligned ?
  1.1316 +							StubRoutines::arrayof_jlong_disjoint_arraycopy() :
  1.1317 +							StubRoutines::jlong_disjoint_arraycopy();
  1.1318 +		}
  1.1319 +
  1.1320 +		array_overlap_test(nooverlap_target, 3);
  1.1321 +
  1.1322 +		__ push(T3);
  1.1323 +		__ push(T0);
  1.1324 +		__ push(T1);
  1.1325 +		__ push(T8);
  1.1326 +
  1.1327 +		__ move(T1, A2);  
  1.1328 +		__ move(T3, A0); 
  1.1329 +		__ move(T0, A1);
  1.1330 +
  1.1331 +		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
  1.1332 +		__ sll(AT, T1, Address::times_8); 
  1.1333 +		__ add(AT, T3, AT); 
  1.1334 +		__ lea(T3 , Address(AT, -8)); 
  1.1335 +		//__ std();
  1.1336 +		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
  1.1337 +		__ sll(AT, T1, Address::times_8); 
  1.1338 +		__ add(AT, T0, AT); 
  1.1339 +		__ lea(T0 , Address(AT, -8)); 
  1.1340 +
  1.1341 +		//    __ cmpl(ecx, 32);
  1.1342 +		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
  1.1343 +		//  __ testl(ecx, ecx);
  1.1344 +		//__ jcc(Assembler::zero, l_4);
  1.1345 +		__ beq(T1, R0, l_4); 
  1.1346 +		__ delayed()->nop();  
  1.1347 +		// __ subl(edi, esi);
  1.1348 +		__ align(16);
  1.1349 +		__ bind(l_2);
  1.1350 +		// __ movl(edx, Address(esi));
  1.1351 +		__ ld(AT, T3, 0);   
  1.1352 +		// __ movl(Address(esi, edi, Address::times_1), edx);
  1.1353 +		__ sd(AT, T0, 0); 
  1.1354 +		// __ subl(esi, 4);
  1.1355 +		__ addi(T3, T3, -8); 
  1.1356 +		__ addi(T0, T0, -8); 
  1.1357 +		//   __ decl(ecx);
  1.1358 +		__ addi(T1, T1, -1); 
  1.1359 +		//__ jcc(Assembler::notEqual, l_2);
  1.1360 +		__ bne(T1, R0, l_2);  
  1.1361 +		__ delayed()->nop(); 
  1.1362 +		if (is_oop) {
  1.1363 +			// __ jmp(l_stchk);
  1.1364 +			__ b( l_stchk); 
  1.1365 +			__ delayed()->nop(); 
  1.1366 +		}
  1.1367 +		__ bind(l_4);
  1.1368 +		//      __ cld();
  1.1369 +		//     __ popl(edi);
  1.1370 +		//    __ popl(esi);
  1.1371 +		//   __ ret(0);
  1.1372 +		__ pop(T8); 
  1.1373 +		__ pop(T1); 
  1.1374 +		__ pop(T0); 
  1.1375 +		__ pop(T3); 
  1.1376 +		__ jr(RA); 
  1.1377 +		__ delayed()->nop(); 
  1.1378 +		__ bind(l_3);
  1.1379 +		//   __ rep_movl();
  1.1380 +		if (is_oop) {
  1.1381 +			__ bind(l_stchk);
  1.1382 +			//  __ movl(edi, Address(esp, 8+ 8));
  1.1383 +			__ move(T0, A1);  
  1.1384 +			// __ movl(ecx, Address(esp, 8+ 12));
  1.1385 +			__ move(T1, A2);  
  1.1386 +			array_store_check();
  1.1387 +		}
  1.1388 +		//    __ cld();
  1.1389 +		//   __ popl(edi);
  1.1390 +		//   __ popl(esi);
  1.1391 +		//  __ ret(0);
  1.1392 +		__ pop(T8);	
  1.1393 +		__ pop(T1);	
  1.1394 +		__ pop(T0);	
  1.1395 +		__ pop(T3);	
  1.1396 +		__ jr(RA);	
  1.1397 +		__ delayed()->nop(); 
  1.1398 +		return start;
  1.1399 +  }
  1.1400 +#if 0
  1.1401 +  // Arguments:
  1.1402 +  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
  1.1403 +  //             ignored
  1.1404 +  //   is_oop  - true => oop array, so generate store check code
  1.1405 +  //   name    - stub name string
  1.1406 +  //
  1.1407 +  // Inputs:
  1.1408 +  //   c_rarg0   - source array address
  1.1409 +  //   c_rarg1   - destination array address
  1.1410 +  //   c_rarg2   - element count, treated as ssize_t, can be zero
  1.1411 +  //
  1.1412 +  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1.1413 +    __ align(CodeEntryAlignment);
  1.1414 +    StubCodeMark mark(this, "StubRoutines", name);
  1.1415 +    address start = __ pc();
  1.1416 +
  1.1417 +    Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
  1.1418 +    const Register from        = rdi;  // source array address
  1.1419 +    const Register to          = rsi;  // destination array address
  1.1420 +    const Register qword_count = rdx;  // elements count
  1.1421 +    const Register saved_count = rcx;
  1.1422 +
  1.1423 +    __ enter(); // required for proper stackwalking of RuntimeStub frame
  1.1424 +    assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1.1425 +
  1.1426 +    address disjoint_copy_entry = NULL;
  1.1427 +    if (is_oop) {
  1.1428 +      assert(!UseCompressedOops, "shouldn't be called for compressed oops");
  1.1429 +      disjoint_copy_entry = disjoint_oop_copy_entry;
  1.1430 +      oop_copy_entry  = __ pc();
  1.1431 +      array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
  1.1432 +    } else {
  1.1433 +      disjoint_copy_entry = disjoint_long_copy_entry;
  1.1434 +      long_copy_entry = __ pc();
  1.1435 +      array_overlap_test(disjoint_long_copy_entry, Address::times_8);
  1.1436 +    }
  1.1437 +    BLOCK_COMMENT("Entry:");
  1.1438 +    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1.1439 +
  1.1440 +    array_overlap_test(disjoint_copy_entry, Address::times_8);
  1.1441 +    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1.1442 +                      // r9 and r10 may be used to save non-volatile registers
  1.1443 +
  1.1444 +    // 'from', 'to' and 'qword_count' are now valid
  1.1445 +
  1.1446 +    if (is_oop) {
  1.1447 +      // Save to and count for store barrier
  1.1448 +      __ movptr(saved_count, qword_count);
  1.1449 +      // No registers are destroyed by this call
  1.1450 +      gen_write_ref_array_pre_barrier(to, saved_count);
  1.1451 +    }
  1.1452 +
  1.1453 +    __ jmp(L_copy_32_bytes);
  1.1454 +
  1.1455 +    // Copy trailing qwords
  1.1456 +  __ BIND(L_copy_8_bytes);
  1.1457 +    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  1.1458 +    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  1.1459 +    __ decrement(qword_count);
  1.1460 +    __ jcc(Assembler::notZero, L_copy_8_bytes);
  1.1461 +
  1.1462 +    if (is_oop) {
  1.1463 +      __ jmp(L_exit);
  1.1464 +    } else {
  1.1465 +      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
  1.1466 +      restore_arg_regs();
  1.1467 +      __ xorptr(rax, rax); // return 0
  1.1468 +      __ leave(); // required for proper stackwalking of RuntimeStub frame
  1.1469 +      __ ret(0);
  1.1470 +    }
  1.1471 +
  1.1472 +    // Copy in 32-bytes chunks
  1.1473 +    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
  1.1474 +
  1.1475 +    if (is_oop) {
  1.1476 +    __ BIND(L_exit);
  1.1477 +      __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
  1.1478 +      gen_write_ref_array_post_barrier(to, rcx, rax);
  1.1479 +      inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
  1.1480 +    } else {
  1.1481 +      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
  1.1482 +    }
  1.1483 +    restore_arg_regs();
  1.1484 +    __ xorptr(rax, rax); // return 0
  1.1485 +    __ leave(); // required for proper stackwalking of RuntimeStub frame
  1.1486 +    __ ret(0);
  1.1487 +
  1.1488 +    return start;
  1.1489 +  }
  1.1490 +
  1.1491 +
  1.1492 +  // Helper for generating a dynamic type check.
  1.1493 +  // Smashes no registers.
  1.1494 +  void generate_type_check(Register sub_klass,
  1.1495 +                           Register super_check_offset,
  1.1496 +                           Register super_klass,
  1.1497 +                           Label& L_success) {
  1.1498 +    assert_different_registers(sub_klass, super_check_offset, super_klass);
  1.1499 +
  1.1500 +    BLOCK_COMMENT("type_check:");
  1.1501 +
  1.1502 +    Label L_miss;
  1.1503 +
  1.1504 +    // a couple of useful fields in sub_klass:
  1.1505 +    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
  1.1506 +                     Klass::secondary_supers_offset_in_bytes());
  1.1507 +    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  1.1508 +                     Klass::secondary_super_cache_offset_in_bytes());
  1.1509 +    Address secondary_supers_addr(sub_klass, ss_offset);
  1.1510 +    Address super_cache_addr(     sub_klass, sc_offset);
  1.1511 +
  1.1512 +    // if the pointers are equal, we are done (e.g., String[] elements)
  1.1513 +    __ cmpptr(super_klass, sub_klass);
  1.1514 +    __ jcc(Assembler::equal, L_success);
  1.1515 +
  1.1516 +    // check the supertype display:
  1.1517 +    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
  1.1518 +    __ cmpptr(super_klass, super_check_addr); // test the super type
  1.1519 +    __ jcc(Assembler::equal, L_success);
  1.1520 +
  1.1521 +    // if it was a primary super, we can just fail immediately
  1.1522 +    __ cmpl(super_check_offset, sc_offset);
  1.1523 +    __ jcc(Assembler::notEqual, L_miss);
  1.1524 +
  1.1525 +    // Now do a linear scan of the secondary super-klass chain.
  1.1526 +    // The repne_scan instruction uses fixed registers, which we must spill.
  1.1527 +    // (We need a couple more temps in any case.)
  1.1528 +    // This code is rarely used, so simplicity is a virtue here.
  1.1529 +    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
  1.1530 +    {
  1.1531 +      __ push(rax);
  1.1532 +      __ push(rcx);
  1.1533 +      __ push(rdi);
  1.1534 +      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
  1.1535 +
  1.1536 +      __ movptr(rdi, secondary_supers_addr);
  1.1537 +      // Load the array length.
  1.1538 +      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
  1.1539 +      // Skip to start of data.
  1.1540 +      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1.1541 +      // Scan rcx words at [rdi] for occurance of rax
  1.1542 +      // Set NZ/Z based on last compare
  1.1543 +      __ movptr(rax, super_klass);
  1.1544 +      if (UseCompressedOops) {
  1.1545 +        // Compare against compressed form.  Don't need to uncompress because
  1.1546 +        // looks like orig rax is restored in popq below.
  1.1547 +        __ encode_heap_oop(rax);
  1.1548 +        __ repne_scanl();
  1.1549 +      } else {
  1.1550 +        __ repne_scan();
  1.1551 +      }
  1.1552 +
  1.1553 +      // Unspill the temp. registers:
  1.1554 +      __ pop(rdi);
  1.1555 +      __ pop(rcx);
  1.1556 +      __ pop(rax);
  1.1557 +
  1.1558 +      __ jcc(Assembler::notEqual, L_miss);
  1.1559 +    }
  1.1560 +
  1.1561 +    // Success.  Cache the super we found and proceed in triumph.
  1.1562 +    __ movptr(super_cache_addr, super_klass); // note: rax is dead
  1.1563 +    __ jmp(L_success);
  1.1564 +
  1.1565 +    // Fall through on failure!
  1.1566 +    __ BIND(L_miss);
  1.1567 +  }
  1.1568 +
  1.1569 +  //
  1.1570 +  //  Generate checkcasting array copy stub
  1.1571 +  //
  1.1572 +  //  Input:
  1.1573 +  //    c_rarg0   - source array address
  1.1574 +  //    c_rarg1   - destination array address
  1.1575 +  //    c_rarg2   - element count, treated as ssize_t, can be zero
  1.1576 +  //    c_rarg3   - size_t ckoff (super_check_offset)
  1.1577 +  // not Win64
  1.1578 +  //    c_rarg4   - oop ckval (super_klass)
  1.1579 +  // Win64
  1.1580 +  //    rsp+40    - oop ckval (super_klass)
  1.1581 +  //
  1.1582 +  //  Output:
  1.1583 +  //    rax ==  0  -  success
  1.1584 +  //    rax == -1^K - failure, where K is partial transfer count
  1.1585 +  //
  1.1586 +  address generate_checkcast_copy(const char *name) {
  1.1587 +
  1.1588 +    Label L_load_element, L_store_element, L_do_card_marks, L_done;
  1.1589 +
  1.1590 +    // Input registers (after setup_arg_regs)
  1.1591 +    const Register from        = rdi;   // source array address
  1.1592 +    const Register to          = rsi;   // destination array address
  1.1593 +    const Register length      = rdx;   // elements count
  1.1594 +    const Register ckoff       = rcx;   // super_check_offset
  1.1595 +    const Register ckval       = r8;    // super_klass
  1.1596 +
  1.1597 +    // Registers used as temps (r13, r14 are save-on-entry)
  1.1598 +    const Register end_from    = from;  // source array end address
  1.1599 +    const Register end_to      = r13;   // destination array end address
  1.1600 +    const Register count       = rdx;   // -(count_remaining)
  1.1601 +    const Register r14_length  = r14;   // saved copy of length
  1.1602 +    // End pointers are inclusive, and if length is not zero they point
  1.1603 +    // to the last unit copied:  end_to[0] := end_from[0]
  1.1604 +
  1.1605 +    const Register rax_oop    = rax;    // actual oop copied
  1.1606 +    const Register r11_klass  = r11;    // oop._klass
  1.1607 +
  1.1608 +    //---------------------------------------------------------------
  1.1609 +    // Assembler stub will be used for this call to arraycopy
  1.1610 +    // if the two arrays are subtypes of Object[] but the
  1.1611 +    // destination array type is not equal to or a supertype
  1.1612 +    // of the source type.  Each element must be separately
  1.1613 +    // checked.
  1.1614 +
  1.1615 +    __ align(CodeEntryAlignment);
  1.1616 +    StubCodeMark mark(this, "StubRoutines", name);
  1.1617 +    address start = __ pc();
  1.1618 +
  1.1619 +    __ enter(); // required for proper stackwalking of RuntimeStub frame
  1.1620 +
  1.1621 +    checkcast_copy_entry  = __ pc();
  1.1622 +    BLOCK_COMMENT("Entry:");
  1.1623 +
  1.1624 +#ifdef ASSERT
  1.1625 +    // caller guarantees that the arrays really are different
  1.1626 +    // otherwise, we would have to make conjoint checks
  1.1627 +    { Label L;
  1.1628 +      array_overlap_test(L, TIMES_OOP);
  1.1629 +      __ stop("checkcast_copy within a single array");
  1.1630 +      __ bind(L);
  1.1631 +    }
  1.1632 +#endif //ASSERT
  1.1633 +
  1.1634 +    // allocate spill slots for r13, r14
  1.1635 +    enum {
  1.1636 +      saved_r13_offset,
  1.1637 +      saved_r14_offset,
  1.1638 +      saved_rbp_offset,
  1.1639 +      saved_rip_offset,
  1.1640 +      saved_rarg0_offset
  1.1641 +    };
  1.1642 +    __ subptr(rsp, saved_rbp_offset * wordSize);
  1.1643 +    __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
  1.1644 +    __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
  1.1645 +    setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
  1.1646 +                       // ckoff => rcx, ckval => r8
  1.1647 +                       // r9 and r10 may be used to save non-volatile registers
  1.1648 +#ifdef _WIN64
  1.1649 +    // last argument (#4) is on stack on Win64
  1.1650 +    const int ckval_offset = saved_rarg0_offset + 4;
  1.1651 +    __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
  1.1652 +#endif
  1.1653 +
  1.1654 +    // check that int operands are properly extended to size_t
  1.1655 +    assert_clean_int(length, rax);
  1.1656 +    assert_clean_int(ckoff, rax);
  1.1657 +
  1.1658 +#ifdef ASSERT
  1.1659 +    BLOCK_COMMENT("assert consistent ckoff/ckval");
  1.1660 +    // The ckoff and ckval must be mutually consistent,
  1.1661 +    // even though caller generates both.
  1.1662 +    { Label L;
  1.1663 +      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  1.1664 +                        Klass::super_check_offset_offset_in_bytes());
  1.1665 +      __ cmpl(ckoff, Address(ckval, sco_offset));
  1.1666 +      __ jcc(Assembler::equal, L);
  1.1667 +      __ stop("super_check_offset inconsistent");
  1.1668 +      __ bind(L);
  1.1669 +    }
  1.1670 +#endif //ASSERT
  1.1671 +
  1.1672 +    // Loop-invariant addresses.  They are exclusive end pointers.
  1.1673 +    Address end_from_addr(from, length, TIMES_OOP, 0);
  1.1674 +    Address   end_to_addr(to,   length, TIMES_OOP, 0);
  1.1675 +    // Loop-variant addresses.  They assume post-incremented count < 0.
  1.1676 +    Address from_element_addr(end_from, count, TIMES_OOP, 0);
  1.1677 +    Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
  1.1678 +
  1.1679 +    gen_write_ref_array_pre_barrier(to, count);
  1.1680 +
  1.1681 +    // Copy from low to high addresses, indexed from the end of each array.
  1.1682 +    __ lea(end_from, end_from_addr);
  1.1683 +    __ lea(end_to,   end_to_addr);
  1.1684 +    __ movptr(r14_length, length);        // save a copy of the length
  1.1685 +    assert(length == count, "");          // else fix next line:
  1.1686 +    __ negptr(count);                     // negate and test the length
  1.1687 +    __ jcc(Assembler::notZero, L_load_element);
  1.1688 +
  1.1689 +    // Empty array:  Nothing to do.
  1.1690 +    __ xorptr(rax, rax);                  // return 0 on (trivial) success
  1.1691 +    __ jmp(L_done);
  1.1692 +
  1.1693 +    // ======== begin loop ========
  1.1694 +    // (Loop is rotated; its entry is L_load_element.)
  1.1695 +    // Loop control:
  1.1696 +    //   for (count = -count; count != 0; count++)
  1.1697 +    // Base pointers src, dst are biased by 8*(count-1),to last element.
  1.1698 +    __ align(16);
  1.1699 +
  1.1700 +    __ BIND(L_store_element);
  1.1701 +    __ store_heap_oop(rax_oop, to_element_addr);  // store the oop
  1.1702 +    __ increment(count);               // increment the count toward zero
  1.1703 +    __ jcc(Assembler::zero, L_do_card_marks);
  1.1704 +
  1.1705 +    // ======== loop entry is here ========
  1.1706 +    __ BIND(L_load_element);
  1.1707 +    __ load_heap_oop(rax_oop, from_element_addr); // load the oop
  1.1708 +    __ testptr(rax_oop, rax_oop);
  1.1709 +    __ jcc(Assembler::zero, L_store_element);
  1.1710 +
  1.1711 +    __ load_klass(r11_klass, rax_oop);// query the object klass
  1.1712 +    generate_type_check(r11_klass, ckoff, ckval, L_store_element);
  1.1713 +    // ======== end loop ========
  1.1714 +
  1.1715 +    // It was a real error; we must depend on the caller to finish the job.
  1.1716 +    // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
  1.1717 +    // Emit GC store barriers for the oops we have copied (r14 + rdx),
  1.1718 +    // and report their number to the caller.
  1.1719 +    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
  1.1720 +    __ lea(end_to, to_element_addr);
  1.1721 +    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
  1.1722 +    __ movptr(rax, r14_length);           // original oops
  1.1723 +    __ addptr(rax, count);                // K = (original - remaining) oops
  1.1724 +    __ notptr(rax);                       // report (-1^K) to caller
  1.1725 +    __ jmp(L_done);
  1.1726 +
  1.1727 +    // Come here on success only.
  1.1728 +    __ BIND(L_do_card_marks);
  1.1729 +    __ addptr(end_to, -wordSize);         // make an inclusive end pointer
  1.1730 +    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
  1.1731 +    __ xorptr(rax, rax);                  // return 0 on success
  1.1732 +
  1.1733 +    // Common exit point (success or failure).
  1.1734 +    __ BIND(L_done);
  1.1735 +    __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
  1.1736 +    __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
  1.1737 +    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
  1.1738 +    restore_arg_regs();
  1.1739 +    __ leave(); // required for proper stackwalking of RuntimeStub frame
  1.1740 +    __ ret(0);
  1.1741 +
  1.1742 +    return start;
  1.1743 +  }
  1.1744 +
  1.1745 +  //
  1.1746 +  //  Generate 'unsafe' array copy stub
  1.1747 +  //  Though just as safe as the other stubs, it takes an unscaled
  1.1748 +  //  size_t argument instead of an element count.
  1.1749 +  //
  1.1750 +  //  Input:
  1.1751 +  //    c_rarg0   - source array address
  1.1752 +  //    c_rarg1   - destination array address
  1.1753 +  //    c_rarg2   - byte count, treated as ssize_t, can be zero
  1.1754 +  //
  1.1755 +  // Examines the alignment of the operands and dispatches
  1.1756 +  // to a long, int, short, or byte copy loop.
  1.1757 +  //
  1.1758 +  address generate_unsafe_copy(const char *name) {
  1.1759 +
  1.1760 +    Label L_long_aligned, L_int_aligned, L_short_aligned;
  1.1761 +
  1.1762 +    // Input registers (before setup_arg_regs)
  1.1763 +    const Register from        = c_rarg0;  // source array address
  1.1764 +    const Register to          = c_rarg1;  // destination array address
  1.1765 +    const Register size        = c_rarg2;  // byte count (size_t)
  1.1766 +
  1.1767 +    // Register used as a temp
  1.1768 +    const Register bits        = rax;      // test copy of low bits
  1.1769 +
  1.1770 +    __ align(CodeEntryAlignment);
  1.1771 +    StubCodeMark mark(this, "StubRoutines", name);
  1.1772 +    address start = __ pc();
  1.1773 +
  1.1774 +    __ enter(); // required for proper stackwalking of RuntimeStub frame
  1.1775 +
  1.1776 +    // bump this on entry, not on exit:
  1.1777 +    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
  1.1778 +
  1.1779 +    __ mov(bits, from);
  1.1780 +    __ orptr(bits, to);
  1.1781 +    __ orptr(bits, size);
  1.1782 +
  1.1783 +    __ testb(bits, BytesPerLong-1);
  1.1784 +    __ jccb(Assembler::zero, L_long_aligned);
  1.1785 +
  1.1786 +    __ testb(bits, BytesPerInt-1);
  1.1787 +    __ jccb(Assembler::zero, L_int_aligned);
  1.1788 +
  1.1789 +    __ testb(bits, BytesPerShort-1);
  1.1790 +    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
  1.1791 +
  1.1792 +    __ BIND(L_short_aligned);
  1.1793 +    __ shrptr(size, LogBytesPerShort); // size => short_count
  1.1794 +    __ jump(RuntimeAddress(short_copy_entry));
  1.1795 +
  1.1796 +    __ BIND(L_int_aligned);
  1.1797 +    __ shrptr(size, LogBytesPerInt); // size => int_count
  1.1798 +    __ jump(RuntimeAddress(int_copy_entry));
  1.1799 +
  1.1800 +    __ BIND(L_long_aligned);
  1.1801 +    __ shrptr(size, LogBytesPerLong); // size => qword_count
  1.1802 +    __ jump(RuntimeAddress(long_copy_entry));
  1.1803 +
  1.1804 +    return start;
  1.1805 +  }
  1.1806 +
  1.1807 +  // Perform range checks on the proposed arraycopy.
  1.1808 +  // Kills temp, but nothing else.
  1.1809 +  // Also, clean the sign bits of src_pos and dst_pos.
  1.1810 +  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
  1.1811 +                              Register src_pos, // source position (c_rarg1)
  1.1812 +                              Register dst,     // destination array oo (c_rarg2)
  1.1813 +                              Register dst_pos, // destination position (c_rarg3)
  1.1814 +                              Register length,
  1.1815 +                              Register temp,
  1.1816 +                              Label& L_failed) {
  1.1817 +    BLOCK_COMMENT("arraycopy_range_checks:");
  1.1818 +
  1.1819 +    //  if (src_pos + length > arrayOop(src)->length())  FAIL;
  1.1820 +    __ movl(temp, length);
  1.1821 +    __ addl(temp, src_pos);             // src_pos + length
  1.1822 +    __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
  1.1823 +    __ jcc(Assembler::above, L_failed);
  1.1824 +
  1.1825 +    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
  1.1826 +    __ movl(temp, length);
  1.1827 +    __ addl(temp, dst_pos);             // dst_pos + length
  1.1828 +    __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
  1.1829 +    __ jcc(Assembler::above, L_failed);
  1.1830 +
  1.1831 +    // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
  1.1832 +    // Move with sign extension can be used since they are positive.
  1.1833 +    __ movslq(src_pos, src_pos);
  1.1834 +    __ movslq(dst_pos, dst_pos);
  1.1835 +
  1.1836 +    BLOCK_COMMENT("arraycopy_range_checks done");
  1.1837 +  }
  1.1838 +
  1.1839 +  //
  1.1840 +  //  Generate generic array copy stubs
  1.1841 +  //
  1.1842 +  //  Input:
  1.1843 +  //    c_rarg0    -  src oop
  1.1844 +  //    c_rarg1    -  src_pos (32-bits)
  1.1845 +  //    c_rarg2    -  dst oop
  1.1846 +  //    c_rarg3    -  dst_pos (32-bits)
  1.1847 +  // not Win64
  1.1848 +  //    c_rarg4    -  element count (32-bits)
  1.1849 +  // Win64
  1.1850 +  //    rsp+40     -  element count (32-bits)
  1.1851 +  //
  1.1852 +  //  Output:
  1.1853 +  //    rax ==  0  -  success
  1.1854 +  //    rax == -1^K - failure, where K is partial transfer count
  1.1855 +  //
  1.1856 +  address generate_generic_copy(const char *name) {
  1.1857 +
  1.1858 +    Label L_failed, L_failed_0, L_objArray;
  1.1859 +    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
  1.1860 +
  1.1861 +    // Input registers
  1.1862 +    const Register src        = c_rarg0;  // source array oop
  1.1863 +    const Register src_pos    = c_rarg1;  // source position
  1.1864 +    const Register dst        = c_rarg2;  // destination array oop
  1.1865 +    const Register dst_pos    = c_rarg3;  // destination position
  1.1866 +    // elements count is on stack on Win64
  1.1867 +#ifdef _WIN64
  1.1868 +#define C_RARG4 Address(rsp, 6 * wordSize)
  1.1869 +#else
  1.1870 +#define C_RARG4 c_rarg4
  1.1871 +#endif
  1.1872 +
  1.1873 +    { int modulus = CodeEntryAlignment;
  1.1874 +      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
  1.1875 +      int advance = target - (__ offset() % modulus);
  1.1876 +      if (advance < 0)  advance += modulus;
  1.1877 +      if (advance > 0)  __ nop(advance);
  1.1878 +    }
  1.1879 +    StubCodeMark mark(this, "StubRoutines", name);
  1.1880 +
  1.1881 +    // Short-hop target to L_failed.  Makes for denser prologue code.
  1.1882 +    __ BIND(L_failed_0);
  1.1883 +    __ jmp(L_failed);
  1.1884 +    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
  1.1885 +
  1.1886 +    __ align(CodeEntryAlignment);
  1.1887 +    address start = __ pc();
  1.1888 +
  1.1889 +    __ enter(); // required for proper stackwalking of RuntimeStub frame
  1.1890 +
  1.1891 +    // bump this on entry, not on exit:
  1.1892 +    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
  1.1893 +
  1.1894 +    //-----------------------------------------------------------------------
  1.1895 +    // Assembler stub will be used for this call to arraycopy
  1.1896 +    // if the following conditions are met:
  1.1897 +    //
  1.1898 +    // (1) src and dst must not be null.
  1.1899 +    // (2) src_pos must not be negative.
  1.1900 +    // (3) dst_pos must not be negative.
  1.1901 +    // (4) length  must not be negative.
  1.1902 +    // (5) src klass and dst klass should be the same and not NULL.
  1.1903 +    // (6) src and dst should be arrays.
  1.1904 +    // (7) src_pos + length must not exceed length of src.
  1.1905 +    // (8) dst_pos + length must not exceed length of dst.
  1.1906 +    //
  1.1907 +
  1.1908 +    //  if (src == NULL) return -1;
  1.1909 +    __ testptr(src, src);         // src oop
  1.1910 +    size_t j1off = __ offset();
  1.1911 +    __ jccb(Assembler::zero, L_failed_0);
  1.1912 +
  1.1913 +    //  if (src_pos < 0) return -1;
  1.1914 +    __ testl(src_pos, src_pos); // src_pos (32-bits)
  1.1915 +    __ jccb(Assembler::negative, L_failed_0);
  1.1916 +
  1.1917 +    //  if (dst == NULL) return -1;
  1.1918 +    __ testptr(dst, dst);         // dst oop
  1.1919 +    __ jccb(Assembler::zero, L_failed_0);
  1.1920 +
  1.1921 +    //  if (dst_pos < 0) return -1;
  1.1922 +    __ testl(dst_pos, dst_pos); // dst_pos (32-bits)
  1.1923 +    size_t j4off = __ offset();
  1.1924 +    __ jccb(Assembler::negative, L_failed_0);
  1.1925 +
  1.1926 +    // The first four tests are very dense code,
  1.1927 +    // but not quite dense enough to put four
  1.1928 +    // jumps in a 16-byte instruction fetch buffer.
  1.1929 +    // That's good, because some branch predicters
  1.1930 +    // do not like jumps so close together.
  1.1931 +    // Make sure of this.
  1.1932 +    guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
  1.1933 +
  1.1934 +    // registers used as temp
  1.1935 +    const Register r11_length    = r11; // elements count to copy
  1.1936 +    const Register r10_src_klass = r10; // array klass
  1.1937 +    const Register r9_dst_klass  = r9;  // dest array klass
  1.1938 +
  1.1939 +    //  if (length < 0) return -1;
  1.1940 +    __ movl(r11_length, C_RARG4);       // length (elements count, 32-bits value)
  1.1941 +    __ testl(r11_length, r11_length);
  1.1942 +    __ jccb(Assembler::negative, L_failed_0);
  1.1943 +
  1.1944 +    __ load_klass(r10_src_klass, src);
  1.1945 +#ifdef ASSERT
  1.1946 +    //  assert(src->klass() != NULL);
  1.1947 +    BLOCK_COMMENT("assert klasses not null");
  1.1948 +    { Label L1, L2;
  1.1949 +      __ testptr(r10_src_klass, r10_src_klass);
  1.1950 +      __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
  1.1951 +      __ bind(L1);
  1.1952 +      __ stop("broken null klass");
  1.1953 +      __ bind(L2);
  1.1954 +      __ load_klass(r9_dst_klass, dst);
  1.1955 +      __ cmpq(r9_dst_klass, 0);
  1.1956 +      __ jcc(Assembler::equal, L1);     // this would be broken also
  1.1957 +      BLOCK_COMMENT("assert done");
  1.1958 +    }
  1.1959 +#endif
  1.1960 +
  1.1961 +    // Load layout helper (32-bits)
  1.1962 +    //
  1.1963 +    //  |array_tag|     | header_size | element_type |     |log2_element_size|
  1.1964 +    // 32        30    24            16              8     2                 0
  1.1965 +    //
  1.1966 +    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
  1.1967 +    //
  1.1968 +
  1.1969 +    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
  1.1970 +                    Klass::layout_helper_offset_in_bytes();
  1.1971 +
  1.1972 +    const Register rax_lh = rax;  // layout helper
  1.1973 +
  1.1974 +    __ movl(rax_lh, Address(r10_src_klass, lh_offset));
  1.1975 +
  1.1976 +    // Handle objArrays completely differently...
  1.1977 +    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
  1.1978 +    __ cmpl(rax_lh, objArray_lh);
  1.1979 +    __ jcc(Assembler::equal, L_objArray);
  1.1980 +
  1.1981 +    //  if (src->klass() != dst->klass()) return -1;
  1.1982 +    __ load_klass(r9_dst_klass, dst);
  1.1983 +    __ cmpq(r10_src_klass, r9_dst_klass);
  1.1984 +    __ jcc(Assembler::notEqual, L_failed);
  1.1985 +
  1.1986 +    //  if (!src->is_Array()) return -1;
  1.1987 +    __ cmpl(rax_lh, Klass::_lh_neutral_value);
  1.1988 +    __ jcc(Assembler::greaterEqual, L_failed);
  1.1989 +
  1.1990 +    // At this point, it is known to be a typeArray (array_tag 0x3).
  1.1991 +#ifdef ASSERT
  1.1992 +    { Label L;
  1.1993 +      __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
  1.1994 +      __ jcc(Assembler::greaterEqual, L);
  1.1995 +      __ stop("must be a primitive array");
  1.1996 +      __ bind(L);
  1.1997 +    }
  1.1998 +#endif
  1.1999 +
  1.2000 +    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  1.2001 +                           r10, L_failed);
  1.2002 +
  1.2003 +    // typeArrayKlass
  1.2004 +    //
  1.2005 +    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
  1.2006 +    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
  1.2007 +    //
  1.2008 +
  1.2009 +    const Register r10_offset = r10;    // array offset
  1.2010 +    const Register rax_elsize = rax_lh; // element size
  1.2011 +
  1.2012 +    __ movl(r10_offset, rax_lh);
  1.2013 +    __ shrl(r10_offset, Klass::_lh_header_size_shift);
  1.2014 +    __ andptr(r10_offset, Klass::_lh_header_size_mask);   // array_offset
  1.2015 +    __ addptr(src, r10_offset);           // src array offset
  1.2016 +    __ addptr(dst, r10_offset);           // dst array offset
  1.2017 +    BLOCK_COMMENT("choose copy loop based on element size");
  1.2018 +    __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
  1.2019 +
  1.2020 +    // next registers should be set before the jump to corresponding stub
  1.2021 +    const Register from     = c_rarg0;  // source array address
  1.2022 +    const Register to       = c_rarg1;  // destination array address
  1.2023 +    const Register count    = c_rarg2;  // elements count
  1.2024 +
  1.2025 +    // 'from', 'to', 'count' registers should be set in such order
  1.2026 +    // since they are the same as 'src', 'src_pos', 'dst'.
  1.2027 +
  1.2028 +  __ BIND(L_copy_bytes);
  1.2029 +    __ cmpl(rax_elsize, 0);
  1.2030 +    __ jccb(Assembler::notEqual, L_copy_shorts);
  1.2031 +    __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
  1.2032 +    __ lea(to,   Address(dst, dst_pos, Address::times_1, 0));// dst_addr
  1.2033 +    __ movl2ptr(count, r11_length); // length
  1.2034 +    __ jump(RuntimeAddress(byte_copy_entry));
  1.2035 +
  1.2036 +  __ BIND(L_copy_shorts);
  1.2037 +    __ cmpl(rax_elsize, LogBytesPerShort);
  1.2038 +    __ jccb(Assembler::notEqual, L_copy_ints);
  1.2039 +    __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
  1.2040 +    __ lea(to,   Address(dst, dst_pos, Address::times_2, 0));// dst_addr
  1.2041 +    __ movl2ptr(count, r11_length); // length
  1.2042 +    __ jump(RuntimeAddress(short_copy_entry));
  1.2043 +
  1.2044 +  __ BIND(L_copy_ints);
  1.2045 +    __ cmpl(rax_elsize, LogBytesPerInt);
  1.2046 +    __ jccb(Assembler::notEqual, L_copy_longs);
  1.2047 +    __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
  1.2048 +    __ lea(to,   Address(dst, dst_pos, Address::times_4, 0));// dst_addr
  1.2049 +    __ movl2ptr(count, r11_length); // length
  1.2050 +    __ jump(RuntimeAddress(int_copy_entry));
  1.2051 +
  1.2052 +  __ BIND(L_copy_longs);
  1.2053 +#ifdef ASSERT
  1.2054 +    { Label L;
  1.2055 +      __ cmpl(rax_elsize, LogBytesPerLong);
  1.2056 +      __ jcc(Assembler::equal, L);
  1.2057 +      __ stop("must be long copy, but elsize is wrong");
  1.2058 +      __ bind(L);
  1.2059 +    }
  1.2060 +#endif
  1.2061 +    __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
  1.2062 +    __ lea(to,   Address(dst, dst_pos, Address::times_8, 0));// dst_addr
  1.2063 +    __ movl2ptr(count, r11_length); // length
  1.2064 +    __ jump(RuntimeAddress(long_copy_entry));
  1.2065 +
  1.2066 +    // objArrayKlass
  1.2067 +  __ BIND(L_objArray);
  1.2068 +    // live at this point:  r10_src_klass, src[_pos], dst[_pos]
  1.2069 +
  1.2070 +    Label L_plain_copy, L_checkcast_copy;
  1.2071 +    //  test array classes for subtyping
  1.2072 +    __ load_klass(r9_dst_klass, dst);
  1.2073 +    __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
  1.2074 +    __ jcc(Assembler::notEqual, L_checkcast_copy);
  1.2075 +
  1.2076 +    // Identically typed arrays can be copied without element-wise checks.
  1.2077 +    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  1.2078 +                           r10, L_failed);
  1.2079 +
  1.2080 +    __ lea(from, Address(src, src_pos, TIMES_OOP,
  1.2081 +                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
  1.2082 +    __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
  1.2083 +                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
  1.2084 +    __ movl2ptr(count, r11_length); // length
  1.2085 +  __ BIND(L_plain_copy);
  1.2086 +    __ jump(RuntimeAddress(oop_copy_entry));
  1.2087 +
  1.2088 +  __ BIND(L_checkcast_copy);
  1.2089 +    // live at this point:  r10_src_klass, !r11_length
  1.2090 +    {
  1.2091 +      // assert(r11_length == C_RARG4); // will reload from here
  1.2092 +      Register r11_dst_klass = r11;
  1.2093 +      __ load_klass(r11_dst_klass, dst);
  1.2094 +
  1.2095 +      // Before looking at dst.length, make sure dst is also an objArray.
  1.2096 +      __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
  1.2097 +      __ jcc(Assembler::notEqual, L_failed);
  1.2098 +
  1.2099 +      // It is safe to examine both src.length and dst.length.
  1.2100 +#ifndef _WIN64
  1.2101 +      arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4,
  1.2102 +                             rax, L_failed);
  1.2103 +#else
  1.2104 +      __ movl(r11_length, C_RARG4);     // reload
  1.2105 +      arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  1.2106 +                             rax, L_failed);
  1.2107 +      __ load_klass(r11_dst_klass, dst); // reload
  1.2108 +#endif
  1.2109 +
  1.2110 +      // Marshal the base address arguments now, freeing registers.
  1.2111 +      __ lea(from, Address(src, src_pos, TIMES_OOP,
  1.2112 +                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  1.2113 +      __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
  1.2114 +                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  1.2115 +      __ movl(count, C_RARG4);          // length (reloaded)
  1.2116 +      Register sco_temp = c_rarg3;      // this register is free now
  1.2117 +      assert_different_registers(from, to, count, sco_temp,
  1.2118 +                                 r11_dst_klass, r10_src_klass);
  1.2119 +      assert_clean_int(count, sco_temp);
  1.2120 +
  1.2121 +      // Generate the type check.
  1.2122 +      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  1.2123 +                        Klass::super_check_offset_offset_in_bytes());
  1.2124 +      __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
  1.2125 +      assert_clean_int(sco_temp, rax);
  1.2126 +      generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
  1.2127 +
  1.2128 +      // Fetch destination element klass from the objArrayKlass header.
  1.2129 +      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
  1.2130 +                       objArrayKlass::element_klass_offset_in_bytes());
  1.2131 +      __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
  1.2132 +      __ movl(sco_temp,      Address(r11_dst_klass, sco_offset));
  1.2133 +      assert_clean_int(sco_temp, rax);
  1.2134 +
  1.2135 +      // the checkcast_copy loop needs two extra arguments:
  1.2136 +      assert(c_rarg3 == sco_temp, "#3 already in place");
  1.2137 +      __ movptr(C_RARG4, r11_dst_klass);  // dst.klass.element_klass
  1.2138 +      __ jump(RuntimeAddress(checkcast_copy_entry));
  1.2139 +    }
  1.2140 +
  1.2141 +  __ BIND(L_failed);
  1.2142 +    __ xorptr(rax, rax);
  1.2143 +    __ notptr(rax); // return -1
  1.2144 +    __ leave();   // required for proper stackwalking of RuntimeStub frame
  1.2145 +    __ ret(0);
  1.2146 +
  1.2147 +    return start;
  1.2148 +  }
  1.2149 +
  1.2150 +#undef length_arg
  1.2151 +#endif
  1.2152 +
  1.2153 +//FIXME
  1.2154 +  address generate_disjoint_long_copy(bool aligned, const char *name) {
  1.2155 +	  Label l_1, l_2;
  1.2156 +	  StubCodeMark mark(this, "StubRoutines", name);
  1.2157 +	  __ align(CodeEntryAlignment);
  1.2158 +	  address start = __ pc();
  1.2159 +
  1.2160 +	  //      __ movl(ecx, Address(esp, 4+8));       // count
  1.2161 +	  //     __ movl(eax, Address(esp, 4+0));       // from
  1.2162 +	  //    __ movl(edx, Address(esp, 4+4));       // to
  1.2163 +	  __ move(T1, A2);  
  1.2164 +	  __ move(T3, A0); 
  1.2165 +	  __ move(T0, A1);
  1.2166 +	  __ push(T3); 
  1.2167 +	  __ push(T0);
  1.2168 +	  __ push(T1);
  1.2169 +	  //__ subl(edx, eax);
  1.2170 +	  //__ jmp(l_2);
  1.2171 +	  __ b(l_2);  
  1.2172 +	  __ delayed()->nop();   
  1.2173 +	  __ align(16);
  1.2174 +	  __ bind(l_1);
  1.2175 +	  //   if (VM_Version::supports_mmx()) {
  1.2176 +	  //     __ movq(mmx0, Address(eax));
  1.2177 +	  //     __ movq(Address(eax, edx, Address::times_1), mmx0);
  1.2178 +	  //   } else {
  1.2179 +	  //   __ fild_d(Address(eax));
  1.2180 +	  __ ld(AT, T3, 0);   
  1.2181 +	  // __ fistp_d(Address(eax, edx, Address::times_1));
  1.2182 +	  __ sd (AT, T0, 0); 
  1.2183 +	  //   }
  1.2184 +	  //   __ addl(eax, 8);
  1.2185 +	  __ addi(T3, T3, 8); 
  1.2186 +	  __ addi(T0, T0, 8); 
  1.2187 +	  __ bind(l_2);
  1.2188 +	  //    __ decl(ecx);
  1.2189 +	  __ addi(T1, T1, -1); 
  1.2190 +	  //    __ jcc(Assembler::greaterEqual, l_1);
  1.2191 +	  __ bgez(T1, l_1);    
  1.2192 +	  __ delayed()->nop(); 
  1.2193 +	  //  if (VM_Version::supports_mmx()) {
  1.2194 +	  //    __ emms();
  1.2195 +	  //  }
  1.2196 +	  //  __ ret(0);
  1.2197 +	  __ pop(T1); 
  1.2198 +	  __ pop(T0); 
  1.2199 +	  __ pop(T3); 
  1.2200 +	  __ jr(RA); 
  1.2201 +	  __ delayed()->nop(); 
  1.2202 +	  return start;
  1.2203 +  }
  1.2204 +
  1.2205 +
  1.2206 +  address generate_conjoint_long_copy(bool aligned, const char *name) {
  1.2207 +	  Label l_1, l_2;
  1.2208 +	  StubCodeMark mark(this, "StubRoutines", name);
  1.2209 +	  __ align(CodeEntryAlignment);
  1.2210 +	  address start = __ pc();
  1.2211 +	  address nooverlap_target = aligned ?
  1.2212 +		  StubRoutines::arrayof_jlong_disjoint_arraycopy() :
  1.2213 +		  StubRoutines::jlong_disjoint_arraycopy();
  1.2214 +	  array_overlap_test(nooverlap_target, 3);
  1.2215 +
  1.2216 +	  __ push(T3); 
  1.2217 +	  __ push(T0); 
  1.2218 +	  __ push(T1); 
  1.2219 +
  1.2220 +		/*      __ movl(ecx, Address(esp, 4+8));       // count
  1.2221 +						__ movl(eax, Address(esp, 4+0));       // from
  1.2222 +						__ movl(edx, Address(esp, 4+4));       // to
  1.2223 +						__ jmp(l_2);
  1.2224 +
  1.2225 +		 */
  1.2226 +	  __ move(T1, A2);  
  1.2227 +	  __ move(T3, A0); 
  1.2228 +	  __ move(T0, A1);
  1.2229 +	  __ sll(AT, T1, Address::times_8); 
  1.2230 +	  __ add(AT, T3, AT); 
  1.2231 +	  __ lea(T3 , Address(AT, -8)); 
  1.2232 +	  __ sll(AT, T1, Address::times_8); 
  1.2233 +	  __ add(AT, T0, AT); 
  1.2234 +	  __ lea(T0 , Address(AT, -8)); 
  1.2235 +
  1.2236 +
  1.2237 +
  1.2238 +	  __ b(l_2); 
  1.2239 +	  __ delayed()->nop(); 
  1.2240 +	  __ align(16);
  1.2241 +		__ bind(l_1);
  1.2242 +		/*      if (VM_Version::supports_mmx()) {
  1.2243 +						__ movq(mmx0, Address(eax, ecx, Address::times_8));
  1.2244 +						__ movq(Address(edx, ecx,Address::times_8), mmx0);
  1.2245 +						} else {
  1.2246 +						__ fild_d(Address(eax, ecx, Address::times_8));
  1.2247 +						__ fistp_d(Address(edx, ecx,Address::times_8));
  1.2248 +						}
  1.2249 +		 */    
  1.2250 +		__ ld(AT, T3, 0);   
  1.2251 +		__ sd (AT, T0, 0); 
  1.2252 +	  __ addi(T3, T3, -8); 
  1.2253 +	  __ addi(T0, T0,-8); 
  1.2254 +	  __ bind(l_2);
  1.2255 +	  //	    __ decl(ecx);
  1.2256 +	  __ addi(T1, T1, -1); 
  1.2257 +	  //__ jcc(Assembler::greaterEqual, l_1);
  1.2258 +	  __ bgez(T1, l_1); 
  1.2259 +	  __ delayed()->nop(); 
  1.2260 +	  //      if (VM_Version::supports_mmx()) {
  1.2261 +	  //      __ emms();
  1.2262 +	  //   }
  1.2263 +	  //  __ ret(0);
  1.2264 +	  __ pop(T1); 
  1.2265 +	  __ pop(T0); 
  1.2266 +	  __ pop(T3); 
  1.2267 +	  __ jr(RA); 
  1.2268 +	  __ delayed()->nop();  
  1.2269 +	  return start;
  1.2270 +  }
  1.2271 +
  1.2272 +  void generate_arraycopy_stubs() {
  1.2273 +    if (UseCompressedOops) {
  1.2274 +      StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
  1.2275 +      StubRoutines::_oop_arraycopy   	= generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
  1.2276 +    } else {
  1.2277 +      StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
  1.2278 +      StubRoutines::_oop_arraycopy   	= generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
  1.2279 +    }
  1.2280 +
  1.2281 +    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
  1.2282 +    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
  1.2283 +    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
  1.2284 +    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
  1.2285 +    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
  1.2286 +
  1.2287 +    //  if (VM_Version::supports_mmx())
  1.2288 +    //if (false)
  1.2289 +    // StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy");
  1.2290 +    // else
  1.2291 +    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
  1.2292 +    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy");
  1.2293 +    //StubRoutines::_arrayof_oop_disjoint_arraycopy   = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy");
  1.2294 +    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
  1.2295 +
  1.2296 +    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
  1.2297 +    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
  1.2298 +    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
  1.2299 +    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
  1.2300 +
  1.2301 +    StubRoutines::_arrayof_jbyte_arraycopy  = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
  1.2302 +    StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
  1.2303 +    StubRoutines::_arrayof_jint_arraycopy   = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy");
  1.2304 +    //StubRoutines::_arrayof_oop_arraycopy    = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy");
  1.2305 +    StubRoutines::_arrayof_jlong_arraycopy  = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy");
  1.2306 +
  1.2307 +    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
  1.2308 +    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
  1.2309 +  }
  1.2310 +
  1.2311 +//Wang: add a function to implement SafeFetch32 and SafeFetchN
  1.2312 +  void generate_safefetch(const char* name, int size, address* entry,
  1.2313 +                          address* fault_pc, address* continuation_pc) {
  1.2314 +    // safefetch signatures:
  1.2315 +    //   int      SafeFetch32(int*      adr, int      errValue);
  1.2316 +    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
  1.2317 +    //
  1.2318 +    // arguments:
  1.2319 +    //   A0 = adr
  1.2320 +    //   A1 = errValue
  1.2321 +    //
  1.2322 +    // result:
  1.2323 +    //   PPC_RET  = *adr or errValue
  1.2324 +
  1.2325 +    StubCodeMark mark(this, "StubRoutines", name);
  1.2326 +
  1.2327 +    // Entry point, pc or function descriptor.
  1.2328 +    *entry = __ pc();
  1.2329 +
  1.2330 +    // Load *adr into A1, may fault.
  1.2331 +    *fault_pc = __ pc();
  1.2332 +    switch (size) {
  1.2333 +      case 4:
  1.2334 +        // int32_t
  1.2335 +        __ lw(A1, A0, 0); 
  1.2336 +        break;
  1.2337 +      case 8:
  1.2338 +        // int64_t
  1.2339 +        __ ld(A1, A0, 0); 
  1.2340 +        break;
  1.2341 +      default:
  1.2342 +        ShouldNotReachHere();
  1.2343 +    }
  1.2344 +
  1.2345 +    // return errValue or *adr
  1.2346 +    *continuation_pc = __ pc();
  1.2347 +    __ addu(V0,A1,R0);
  1.2348 +    __ jr(RA);
  1.2349 +    __ delayed()->nop();
  1.2350 +  }
  1.2351 +
  1.2352 +
  1.2353 +#undef __
  1.2354 +#define __ masm->
  1.2355 +
  1.2356 +  // Continuation point for throwing of implicit exceptions that are
  1.2357 +  // not handled in the current activation. Fabricates an exception
  1.2358 +  // oop and initiates normal exception dispatching in this
  1.2359 +  // frame. Since we need to preserve callee-saved values (currently
  1.2360 +  // only for C2, but done for C1 as well) we need a callee-saved oop
  1.2361 +  // map and therefore have to make these stubs into RuntimeStubs
  1.2362 +  // rather than BufferBlobs.  If the compiler needs all registers to
  1.2363 +  // be preserved between the fault point and the exception handler
  1.2364 +  // then it must assume responsibility for that in
  1.2365 +  // AbstractCompiler::continuation_for_implicit_null_exception or
  1.2366 +  // continuation_for_implicit_division_by_zero_exception. All other
  1.2367 +  // implicit exceptions (e.g., NullPointerException or
  1.2368 +  // AbstractMethodError on entry) are either at call sites or
  1.2369 +  // otherwise assume that stack unwinding will be initiated, so
  1.2370 +  // caller saved registers were assumed volatile in the compiler.
  1.2371 +  address generate_throw_exception(const char* name,
  1.2372 +                                   address runtime_entry,
  1.2373 +                                   bool restore_saved_exception_pc) {
  1.2374 +    // Information about frame layout at time of blocking runtime call.
  1.2375 +    // Note that we only have to preserve callee-saved registers since
  1.2376 +    // the compilers are responsible for supplying a continuation point
  1.2377 +		// if they expect all registers to be preserved.
  1.2378 +//#define aoqi_test
  1.2379 +#ifdef aoqi_test
  1.2380 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  1.2381 +#endif
  1.2382 +		enum layout {
  1.2383 +			thread_off,    // last_java_sp                
  1.2384 +			S7_off,        // callee saved register      sp + 1
  1.2385 +			S6_off,        // callee saved register      sp + 2
  1.2386 +			S5_off,        // callee saved register      sp + 3
  1.2387 +			S4_off,        // callee saved register      sp + 4
  1.2388 +			S3_off,        // callee saved register      sp + 5
  1.2389 +			S2_off,        // callee saved register      sp + 6
  1.2390 +			S1_off,        // callee saved register      sp + 7
  1.2391 +			S0_off,        // callee saved register      sp + 8
  1.2392 +			FP_off,
  1.2393 +			ret_address,
  1.2394 +			framesize
  1.2395 +		};
  1.2396 +
  1.2397 +		int insts_size = 2048;
  1.2398 +		int locs_size  = 32;
  1.2399 +
  1.2400 +		//  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, 
  1.2401 +		//  NULL, NULL, NULL, false, NULL, name, false);
  1.2402 +		CodeBuffer code (name , insts_size, locs_size);
  1.2403 +#ifdef aoqi_test
  1.2404 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  1.2405 +#endif
  1.2406 +		OopMapSet* oop_maps  = new OopMapSet();
  1.2407 +#ifdef aoqi_test
  1.2408 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  1.2409 +#endif
  1.2410 +		MacroAssembler* masm = new MacroAssembler(&code);
  1.2411 +#ifdef aoqi_test
  1.2412 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  1.2413 +#endif
  1.2414 +
  1.2415 +		address start = __ pc();
  1.2416 +    	//__ stop("generate_throw_exception");
  1.2417 +		/*
  1.2418 +			 __ move(AT, (int)&jerome1 );
  1.2419 +			 __ sw(SP, AT, 0); 	
  1.2420 +			 __ move(AT, (int)&jerome2 );
  1.2421 +			 __ sw(FP, AT, 0); 	
  1.2422 +			 __ move(AT, (int)&jerome3 );
  1.2423 +			 __ sw(RA, AT, 0); 	
  1.2424 +			 __ move(AT, (int)&jerome4 );
  1.2425 +			 __ sw(R0, AT, 0); 	
  1.2426 +			 __ move(AT, (int)&jerome5 );
  1.2427 +			 __ sw(R0, AT, 0); 	
  1.2428 +			 __ move(AT, (int)&jerome6 );
  1.2429 +			 __ sw(R0, AT, 0); 	
  1.2430 +			 __ move(AT, (int)&jerome7 );
  1.2431 +			 __ sw(R0, AT, 0); 	
  1.2432 +			 __ move(AT, (int)&jerome10 );
  1.2433 +			 __ sw(R0, AT, 0); 	
  1.2434 +
  1.2435 +			 __ pushad();
  1.2436 +
  1.2437 +		//__ enter();
  1.2438 +		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), 
  1.2439 +		relocInfo::runtime_call_type);
  1.2440 +		__ delayed()->nop();
  1.2441 +
  1.2442 +		//__ leave();
  1.2443 +		__ popad();
  1.2444 +
  1.2445 +		 */
  1.2446 +
  1.2447 +		// This is an inlined and slightly modified version of call_VM
  1.2448 +		// which has the ability to fetch the return PC out of
  1.2449 +		// thread-local storage and also sets up last_Java_sp slightly
  1.2450 +		// differently than the real call_VM
  1.2451 +#ifndef OPT_THREAD	
  1.2452 +		Register java_thread = TREG;
  1.2453 +		__ get_thread(java_thread);
  1.2454 +#else
  1.2455 +		Register java_thread = TREG;
  1.2456 +#endif
  1.2457 +#ifdef aoqi_test
  1.2458 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  1.2459 +#endif
  1.2460 +		if (restore_saved_exception_pc) {
  1.2461 +			__ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
  1.2462 +		}
  1.2463 +
  1.2464 +		__ enter(); // required for proper stackwalking of RuntimeStub frame
  1.2465 +
  1.2466 +		__ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
  1.2467 +		__ sd(S0, SP, S0_off * wordSize);
  1.2468 +		__ sd(S1, SP, S1_off * wordSize);
  1.2469 +		__ sd(S2, SP, S2_off * wordSize);
  1.2470 +		__ sd(S3, SP, S3_off * wordSize);
  1.2471 +		__ sd(S4, SP, S4_off * wordSize);
  1.2472 +		__ sd(S5, SP, S5_off * wordSize);
  1.2473 +		__ sd(S6, SP, S6_off * wordSize);
  1.2474 +		__ sd(S7, SP, S7_off * wordSize);
  1.2475 +
  1.2476 +		int frame_complete = __ pc() - start;
  1.2477 +		// push java thread (becomes first argument of C function)
  1.2478 +		__ sd(java_thread, SP, thread_off * wordSize);
  1.2479 +		if (java_thread!=A0)
  1.2480 +			__ move(A0, java_thread);
  1.2481 +
  1.2482 +		// Set up last_Java_sp and last_Java_fp
  1.2483 +		__ set_last_Java_frame(java_thread, SP, FP, NULL);
  1.2484 +		__ relocate(relocInfo::internal_pc_type);
  1.2485 +		{
  1.2486 +			intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  1.2487 +			__ li48(AT, save_pc);
  1.2488 +		}
  1.2489 +		__ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); 
  1.2490 +
  1.2491 +		// Call runtime
  1.2492 +		__ call(runtime_entry);
  1.2493 +		__ delayed()->nop();
  1.2494 +		// Generate oop map
  1.2495 +		OopMap* map =  new OopMap(framesize, 0);        
  1.2496 +		oop_maps->add_gc_map(__ offset(),  map);
  1.2497 +
  1.2498 +		// restore the thread (cannot use the pushed argument since arguments
  1.2499 +		// may be overwritten by C code generated by an optimizing compiler);
  1.2500 +		// however can use the register value directly if it is callee saved.
  1.2501 +#ifndef OPT_THREAD
  1.2502 +		__ get_thread(java_thread);
  1.2503 +#endif
  1.2504 +
  1.2505 +		__ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1.2506 +		//  __ reset_last_Java_frame(java_thread, true);
  1.2507 +		__ reset_last_Java_frame(java_thread, true, true);
  1.2508 +
  1.2509 +		// Restore callee save registers.  This must be done after resetting the Java frame
  1.2510 +		__ ld(S0, SP, S0_off * wordSize);
  1.2511 +		__ ld(S1, SP, S1_off * wordSize);
  1.2512 +		__ ld(S2, SP, S2_off * wordSize);
  1.2513 +		__ ld(S3, SP, S3_off * wordSize);
  1.2514 +		__ ld(S4, SP, S4_off * wordSize);
  1.2515 +		__ ld(S5, SP, S5_off * wordSize);
  1.2516 +		__ ld(S6, SP, S6_off * wordSize);
  1.2517 +		__ ld(S7, SP, S7_off * wordSize);
  1.2518 +
  1.2519 +		// discard arguments
  1.2520 +		__ addi(SP, SP, (framesize-2) * wordSize); // epilog
  1.2521 +		//	__ leave(); // required for proper stackwalking of RuntimeStub frame
  1.2522 +		__ addi(SP, FP, wordSize);
  1.2523 +		__ ld(FP, SP, -1*wordSize);
  1.2524 +		// check for pending exceptions
  1.2525 +#ifdef ASSERT
  1.2526 +		Label L;
  1.2527 +		__ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1.2528 +		__ bne(AT, R0, L);
  1.2529 +		__ delayed()->nop();
  1.2530 +		__ should_not_reach_here();
  1.2531 +		__ bind(L);
  1.2532 +#endif //ASSERT
  1.2533 +		__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1.2534 +		__ delayed()->nop();
  1.2535 +#ifdef aoqi_test
  1.2536 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  1.2537 +#endif
  1.2538 +		RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete, 
  1.2539 +										framesize, oop_maps, false);
  1.2540 +#ifdef aoqi_test
  1.2541 +tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  1.2542 +#endif
  1.2543 +		return stub->entry_point();
  1.2544 +  }
  1.2545 +
  1.2546 +  // Initialization
  1.2547 +  void generate_initial() {
  1.2548 +/*
  1.2549 +		// Generates all stubs and initializes the entry points
  1.2550 +
  1.2551 +    // This platform-specific stub is needed by generate_call_stub()
  1.2552 +    StubRoutines::mips::_mxcsr_std        = generate_fp_mask("mxcsr_std",        0x0000000000001F80);
  1.2553 +
  1.2554 +    // entry points that exist in all platforms Note: This is code
  1.2555 +    // that could be shared among different platforms - however the
  1.2556 +    // benefit seems to be smaller than the disadvantage of having a
  1.2557 +    // much more complicated generator structure. See also comment in
  1.2558 +    // stubRoutines.hpp.
  1.2559 +
  1.2560 +    StubRoutines::_forward_exception_entry = generate_forward_exception();
  1.2561 +
  1.2562 +    StubRoutines::_call_stub_entry =
  1.2563 +      generate_call_stub(StubRoutines::_call_stub_return_address);
  1.2564 +
  1.2565 +    // is referenced by megamorphic call
  1.2566 +    StubRoutines::_catch_exception_entry = generate_catch_exception();
  1.2567 +
  1.2568 +    // atomic calls
  1.2569 +    StubRoutines::_atomic_xchg_entry         = generate_atomic_xchg();
  1.2570 +    StubRoutines::_atomic_xchg_ptr_entry     = generate_atomic_xchg_ptr();
  1.2571 +    StubRoutines::_atomic_cmpxchg_entry      = generate_atomic_cmpxchg();
  1.2572 +    StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
  1.2573 +    StubRoutines::_atomic_add_entry          = generate_atomic_add();
  1.2574 +    StubRoutines::_atomic_add_ptr_entry      = generate_atomic_add_ptr();
  1.2575 +    StubRoutines::_fence_entry               = generate_orderaccess_fence();
  1.2576 +
  1.2577 +    StubRoutines::_handler_for_unsafe_access_entry =
  1.2578 +      generate_handler_for_unsafe_access();
  1.2579 +
  1.2580 +    // platform dependent
  1.2581 +    StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp();
  1.2582 +
  1.2583 +    StubRoutines::mips::_verify_mxcsr_entry    = generate_verify_mxcsr();
  1.2584 +*/
  1.2585 +		// Generates all stubs and initializes the entry points
  1.2586 +
  1.2587 +		//-------------------------------------------------------------
  1.2588 +		//-----------------------------------------------------------
  1.2589 +		// entry points that exist in all platforms
  1.2590 +		// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller 
  1.2591 +		// than the disadvantage of having a much more complicated generator structure. 
  1.2592 +		// See also comment in stubRoutines.hpp.
  1.2593 +		StubRoutines::_forward_exception_entry = generate_forward_exception();    
  1.2594 +		StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
  1.2595 +		// is referenced by megamorphic call    
  1.2596 +		StubRoutines::_catch_exception_entry = generate_catch_exception();    
  1.2597 +
  1.2598 +		StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
  1.2599 +
  1.2600 +		// platform dependent
  1.2601 +		StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
  1.2602 +	}
  1.2603 +
  1.2604 +void generate_all() {
  1.2605 +#ifdef aoqi_test
  1.2606 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2607 +#endif
  1.2608 +    // Generates all stubs and initializes the entry points
  1.2609 +
  1.2610 +    // These entry points require SharedInfo::stack0 to be set up in
  1.2611 +    // non-core builds and need to be relocatable, so they each
  1.2612 +    // fabricate a RuntimeStub internally.
  1.2613 +	/*
  1.2614 +    StubRoutines::_throw_AbstractMethodError_entry =
  1.2615 +      generate_throw_exception("AbstractMethodError throw_exception",
  1.2616 +                               CAST_FROM_FN_PTR(address,
  1.2617 +                                                SharedRuntime::
  1.2618 +                                                throw_AbstractMethodError),
  1.2619 +                               false);
  1.2620 +
  1.2621 +    StubRoutines::_throw_IncompatibleClassChangeError_entry =
  1.2622 +      generate_throw_exception("IncompatibleClassChangeError throw_exception",
  1.2623 +                               CAST_FROM_FN_PTR(address,
  1.2624 +                                                SharedRuntime::
  1.2625 +                                                throw_IncompatibleClassChangeError),
  1.2626 +                               false);
  1.2627 +
  1.2628 +    StubRoutines::_throw_ArithmeticException_entry =
  1.2629 +      generate_throw_exception("ArithmeticException throw_exception",
  1.2630 +                               CAST_FROM_FN_PTR(address,
  1.2631 +                                                SharedRuntime::
  1.2632 +                                                throw_ArithmeticException),
  1.2633 +                               true);
  1.2634 +
  1.2635 +    StubRoutines::_throw_NullPointerException_entry =
  1.2636 +      generate_throw_exception("NullPointerException throw_exception",
  1.2637 +                               CAST_FROM_FN_PTR(address,
  1.2638 +                                                SharedRuntime::
  1.2639 +                                                throw_NullPointerException),
  1.2640 +                               true);
  1.2641 +
  1.2642 +    StubRoutines::_throw_NullPointerException_at_call_entry =
  1.2643 +      generate_throw_exception("NullPointerException at call throw_exception",
  1.2644 +                               CAST_FROM_FN_PTR(address,
  1.2645 +                                                SharedRuntime::
  1.2646 +                                                throw_NullPointerException_at_call),
  1.2647 +                               false);
  1.2648 +
  1.2649 +    StubRoutines::_throw_StackOverflowError_entry =
  1.2650 +      generate_throw_exception("StackOverflowError throw_exception",
  1.2651 +                               CAST_FROM_FN_PTR(address,
  1.2652 +                                                SharedRuntime::
  1.2653 +                                                throw_StackOverflowError),
  1.2654 +                               false);
  1.2655 +
  1.2656 +    // entry points that are platform specific
  1.2657 +    StubRoutines::mips::_f2i_fixup = generate_f2i_fixup();
  1.2658 +    StubRoutines::mips::_f2l_fixup = generate_f2l_fixup();
  1.2659 +    StubRoutines::mips::_d2i_fixup = generate_d2i_fixup();
  1.2660 +    StubRoutines::mips::_d2l_fixup = generate_d2l_fixup();
  1.2661 +
  1.2662 +    StubRoutines::mips::_float_sign_mask  = generate_fp_mask("float_sign_mask",  0x7FFFFFFF7FFFFFFF);
  1.2663 +    StubRoutines::mips::_float_sign_flip  = generate_fp_mask("float_sign_flip",  0x8000000080000000);
  1.2664 +    StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
  1.2665 +    StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
  1.2666 +
  1.2667 +    // support for verify_oop (must happen after universe_init)
  1.2668 +    StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
  1.2669 +
  1.2670 +    // arraycopy stubs used by compilers
  1.2671 +    generate_arraycopy_stubs();
  1.2672 +	*/
  1.2673 +#ifdef aoqi_test
  1.2674 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2675 +#endif
  1.2676 +		StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
  1.2677 +#ifdef aoqi_test
  1.2678 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2679 +#endif
  1.2680 +//		StubRoutines::_throw_ArithmeticException_entry         = generate_throw_exception("ArithmeticException throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException),  true);
  1.2681 +#ifdef aoqi_test
  1.2682 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2683 +#endif
  1.2684 +//		StubRoutines::_throw_NullPointerException_entry        = generate_throw_exception("NullPointerException throw_exception",         CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
  1.2685 +#ifdef aoqi_test
  1.2686 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2687 +#endif
  1.2688 +		StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
  1.2689 +#ifdef aoqi_test
  1.2690 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2691 +#endif
  1.2692 +		StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
  1.2693 +#ifdef aoqi_test
  1.2694 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2695 +#endif
  1.2696 +
  1.2697 +		//------------------------------------------------------
  1.2698 +		//------------------------------------------------------------------
  1.2699 +		// entry points that are platform specific  
  1.2700 +
  1.2701 +		// support for verify_oop (must happen after universe_init)
  1.2702 +#ifdef aoqi_test
  1.2703 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2704 +#endif
  1.2705 +		StubRoutines::_verify_oop_subroutine_entry	   = generate_verify_oop();
  1.2706 +#ifdef aoqi_test
  1.2707 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2708 +#endif
  1.2709 +#ifndef CORE
  1.2710 +		// arraycopy stubs used by compilers
  1.2711 +		generate_arraycopy_stubs();
  1.2712 +#ifdef aoqi_test
  1.2713 +tty->print_cr("%s:%d", __func__, __LINE__);
  1.2714 +#endif
  1.2715 +#endif
  1.2716 +
  1.2717 +    // Safefetch stubs.
  1.2718 +    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
  1.2719 +                                                       &StubRoutines::_safefetch32_fault_pc,
  1.2720 +                                                       &StubRoutines::_safefetch32_continuation_pc);
  1.2721 +    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
  1.2722 +                                                       &StubRoutines::_safefetchN_fault_pc,
  1.2723 +                                                       &StubRoutines::_safefetchN_continuation_pc);
  1.2724 +	}
  1.2725 +
  1.2726 + public:
  1.2727 +  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
  1.2728 +    if (all) {
  1.2729 +      generate_all();
  1.2730 +    } else {
  1.2731 +      generate_initial();
  1.2732 +    }
  1.2733 +  }
  1.2734 +}; // end class declaration
  1.2735 +/*
  1.2736 +address StubGenerator::disjoint_byte_copy_entry  = NULL;
  1.2737 +address StubGenerator::disjoint_short_copy_entry = NULL;
  1.2738 +address StubGenerator::disjoint_int_copy_entry   = NULL;
  1.2739 +address StubGenerator::disjoint_long_copy_entry  = NULL;
  1.2740 +address StubGenerator::disjoint_oop_copy_entry   = NULL;
  1.2741 +
  1.2742 +address StubGenerator::byte_copy_entry  = NULL;
  1.2743 +address StubGenerator::short_copy_entry = NULL;
  1.2744 +address StubGenerator::int_copy_entry   = NULL;
  1.2745 +address StubGenerator::long_copy_entry  = NULL;
  1.2746 +address StubGenerator::oop_copy_entry   = NULL;
  1.2747 +
  1.2748 +address StubGenerator::checkcast_copy_entry = NULL;
  1.2749 +*/
  1.2750 +void StubGenerator_generate(CodeBuffer* code, bool all) {
  1.2751 +  StubGenerator g(code, all);
  1.2752 +}

mercurial