src/cpu/mips/vm/stubGenerator_mips_64.cpp

Tue, 10 May 2016 15:08:51 -0400

author
aoqi
date
Tue, 10 May 2016 15:08:51 -0400
changeset 8
cf5765c81f87
parent 7
e26ad49b7194
child 13
bc227c49eaae
permissions
-rw-r--r--

Fixed changset 108.

     1 /*
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/macroAssembler.hpp"
    28 #include "asm/macroAssembler.inline.hpp"
    29 #include "interpreter/interpreter.hpp"
    30 #include "nativeInst_mips.hpp"
    31 #include "oops/instanceOop.hpp"
    32 #include "oops/method.hpp"
    33 #include "oops/objArrayKlass.hpp"
    34 #include "oops/oop.inline.hpp"
    35 #include "prims/methodHandles.hpp"
    36 #include "runtime/frame.inline.hpp"
    37 #include "runtime/handles.inline.hpp"
    38 #include "runtime/sharedRuntime.hpp"
    39 #include "runtime/stubCodeGenerator.hpp"
    40 #include "runtime/stubRoutines.hpp"
    41 #include "runtime/thread.inline.hpp"
    42 #include "utilities/top.hpp"
    43 #ifdef COMPILER2
    44 #include "opto/runtime.hpp"
    45 #endif
    48 // Declaration and definition of StubGenerator (no .hpp file).
    49 // For a more detailed description of the stub routine structure
    50 // see the comment in stubRoutines.hpp
    52 #define __ _masm->
    53 //#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
    54 //#define a__ ((Assembler*)_masm)->
    56 //#ifdef PRODUCT
    57 //#define BLOCK_COMMENT(str) /* nothing */
    58 //#else
    59 //#define BLOCK_COMMENT(str) __ block_comment(str)
    60 //#endif
    62 //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
    63 const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
    65 // Stub Code definitions
    67 static address handle_unsafe_access() {
    68   JavaThread* thread = JavaThread::current();
    69   address pc = thread->saved_exception_pc();
    70   // pc is the instruction which we must emulate
    71   // doing a no-op is fine:  return garbage from the load
    72   // therefore, compute npc
    73   //address npc = Assembler::locate_next_instruction(pc);
    74 	address npc = (address)((unsigned long)pc + sizeof(unsigned long));
    76   // request an async exception
    77   thread->set_pending_unsafe_access_error();
    79   // return address of next instruction to execute
    80   return npc;
    81 }
    83 class StubGenerator: public StubCodeGenerator {
    84  private:
    86   // ABI mips n64
    87   // This fig is not MIPS ABI. It is call Java from C ABI.
    88   // Call stubs are used to call Java from C
    89   //
    90   //    [ return_from_Java     ]
    91   //    [ argument word n-1    ] <--- sp
    92   //      ...
    93   //    [ argument word 0      ]
    94   //      ...
    95   //-10 [ S6     	       ]
    96   // -9 [ S5		       ] 
    97   // -8 [ S4		       ]
    98   // -7 [ S3                   ]
    99   // -6 [ S0  		       ]
   100   // -5 [ TSR(S2)	       ]
   101   // -4 [ LVP(S7)              ]
   102   // -3 [ BCP(S1)              ]
   103   // -2 [ saved fp             ] <--- fp_after_call
   104   // -1 [ return address       ] 
   105   //  0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
   106   //  1 [ result               ] <--- a1
   107   //  2 [ result_type          ] <--- a2
   108   //  3 [ method               ] <--- a3
   109   //  4 [ entry_point          ] <--- a4
   110   //  5 [ parameters           ] <--- a5
   111   //  6 [ parameter_size       ] <--- a6
   112   //  7 [ thread               ] <--- a7
   114   //
   115   // _LP64: n64 does not save paras in sp.
   116   //
   117   //    [ return_from_Java     ]
   118   //    [ argument word n-1    ] <--- sp
   119   //      ...
   120   //    [ argument word 0      ]
   121   //      ...
   122   //-14 [ thread               ]
   123   //-13 [ result_type          ] <--- a2
   124   //-12 [ result               ] <--- a1
   125   //-11 [ ptr. to call wrapper ] <--- a0
   126   //-10 [ S6     	       ]
   127   // -9 [ S5		       ] 
   128   // -8 [ S4		       ]
   129   // -7 [ S3                   ]
   130   // -6 [ S0  		       ]
   131   // -5 [ TSR(S2)	       ]
   132   // -4 [ LVP(S7)              ]
   133   // -3 [ BCP(S1)              ]
   134   // -2 [ saved fp             ] <--- fp_after_call
   135   // -1 [ return address       ] 
   136   //  0 [        	       ] <--- old sp
   137   /*
   138    * 2014/01/16 Fu: Find a right place in the call_stub for GP.
   139    * GP will point to the starting point of Interpreter::dispatch_table(itos). 
   140    * It should be saved/restored before/after Java calls. 
   141    *
   142    */
   143    enum call_stub_layout {
   144      RA_off		  = -1,
   145      FP_off		  = -2,
   146      BCP_off		  = -3,
   147      LVP_off		  = -4,
   148      TSR_off		  = -5,
   149      S1_off		  = -6,
   150      S3_off		  = -7,
   151      S4_off		  = -8,
   152      S5_off		  = -9,
   153      S6_off		  = -10,
   154      result_off		  = -11,
   155      result_type_off	  = -12,
   156      thread_off		  = -13,
   157      total_off		  = thread_off - 3,
   158      GP_off               = -16,
   159    };
   161   address generate_call_stub(address& return_address) {
   163     StubCodeMark mark(this, "StubRoutines", "call_stub");
   164     address start = __ pc();
   166     // same as in generate_catch_exception()!
   168     // stub code
   169     // save ra and fp
   170     __ sd(RA, SP, RA_off * wordSize);
   171     __ sd(FP, SP, FP_off * wordSize);
   172     __ sd(BCP, SP, BCP_off * wordSize);
   173     __ sd(LVP, SP, LVP_off * wordSize);
   174     __ sd(GP, SP, GP_off * wordSize);
   175     __ sd(TSR, SP, TSR_off * wordSize);
   176     __ sd(S1, SP, S1_off * wordSize);
   177     __ sd(S3, SP, S3_off * wordSize);
   178     __ sd(S4, SP, S4_off * wordSize);
   179     __ sd(S5, SP, S5_off * wordSize);
   180     __ sd(S6, SP, S6_off * wordSize);
   183     __ li48(GP, (long)Interpreter::dispatch_table(itos));
   185     // I think 14 is the max gap between argument and callee saved register
   186     __ daddi(FP, SP, (-2) * wordSize);
   187     __ daddi(SP, SP, total_off * wordSize);
   188 //FIXME, aoqi. find a suitable place to save A1 & A2.
   189     /*
   190     __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
   191     __ sd(A1, FP, 3 * wordSize);
   192     __ sd(A2, FP, 4 * wordSize);
   193     __ sd(A3, FP, 5 * wordSize);
   194     __ sd(A4, FP, 6 * wordSize);
   195     __ sd(A5, FP, 7 * wordSize);
   196     __ sd(A6, FP, 8 * wordSize);
   197     __ sd(A7, FP, 9 * wordSize);
   198     */
   199     __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
   200     __ sd(A1, FP, result_off * wordSize);
   201     __ sd(A2, FP, result_type_off * wordSize);
   202     __ sd(A7, FP, thread_off * wordSize);
   204 #ifdef OPT_THREAD
   205     //__ get_thread(TREG);
   206     __ move(TREG, A7);
   208     //__ ld(TREG, FP, thread_off * wordSize);
   209 #endif
   210     //add for compressedoops
   211     __ reinit_heapbase();
   213 #ifdef ASSERT
   214     // make sure we have no pending exceptions
   215     { 
   216       Label L;
   217     	__ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
   218     	__ beq(AT, R0, L); 
   219     	__ delayed()->nop();
   220     	/* FIXME: I do not know how to realize stop in mips arch, do it in the future */
   221     	__ stop("StubRoutines::call_stub: entered with pending exception");
   222     	__ bind(L);
   223     }
   224 #endif
   226     // pass parameters if any
   227     // A5: parameter
   228     // A6: parameter_size
   229     // T0: parameter_size_tmp(--)
   230     // T2: offset(++)
   231     // T3: tmp
   232     Label parameters_done;
   233     // judge if the parameter_size equals 0
   234     __ beq(A6, R0, parameters_done);
   235     __ delayed()->nop();
   236     __ dsll(AT, A6, Interpreter::logStackElementSize);
   237     __ dsub(SP, SP, AT); 
   238     __ move(AT, -StackAlignmentInBytes); 
   239     __ andr(SP, SP , AT); 
   240     // Copy Java parameters in reverse order (receiver last)
   241     // Note that the argument order is inverted in the process
   242     // source is edx[ecx: N-1..0]
   243     // dest   is esp[ebx: 0..N-1]
   244     Label loop;
   245     __ move(T0, A6);
   246     __ move(T2, R0);
   247     __ bind(loop);
   249     // get parameter
   250     __ dsll(T3, T0, LogBytesPerWord);   
   251     __ dadd(T3, T3, A5);	    
   252     __ ld(AT, T3,  -wordSize);
   253     __ dsll(T3, T2, LogBytesPerWord); 
   254     __ dadd(T3, T3, SP); 
   255     __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
   256     __ daddi(T2, T2, 1); 
   257     __ daddi(T0, T0, -1); 
   258     __ bne(T0, R0, loop);
   259     __ delayed()->nop();
   260     // advance to next parameter
   262     // call Java function
   263     __ bind(parameters_done);
   265     // receiver in V0, methodOop in Rmethod
   267     __ move(Rmethod, A3);
   268     __ move(Rsender, SP);             //set sender sp
   269     __ jalr(A4);
   270     __ delayed()->nop();
   271     return_address = __ pc();
   273     Label common_return;
   274     __ bind(common_return);
   276     // store result depending on type
   277     // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
   278     __ ld(T0, FP, result_off * wordSize); 	// result --> T0
   279     Label is_long, is_float, is_double, exit;
   280     __ ld(T2, FP, result_type_off * wordSize);	// result_type --> T2
   281     __ daddi(T3, T2, (-1) * T_LONG);
   282     __ beq(T3, R0, is_long);
   283     __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
   284     __ beq(T3, R0, is_float);
   285     __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
   286     __ beq(T3, R0, is_double);
   287     __ delayed()->nop();
   289     // handle T_INT case
   290     __ sd(V0, T0, 0 * wordSize);
   291     __ bind(exit);
   293     // restore 
   294     __ daddi(SP, FP, 2 * wordSize );
   295     __ ld(RA, SP, RA_off * wordSize);
   296     __ ld(FP, SP, FP_off * wordSize);
   297     __ ld(BCP, SP, BCP_off * wordSize);
   298     __ ld(LVP, SP, LVP_off * wordSize);
   299     __ ld(GP, SP, GP_off * wordSize);
   300     __ ld(TSR, SP, TSR_off * wordSize);
   302     __ ld(S1, SP, S1_off * wordSize);
   303     __ ld(S3, SP, S3_off * wordSize);
   304     __ ld(S4, SP, S4_off * wordSize);
   305     __ ld(S5, SP, S5_off * wordSize);
   306     __ ld(S6, SP, S6_off * wordSize);
   308     // return
   309     __ jr(RA);
   310     __ delayed()->nop();
   312     // handle return types different from T_INT
   313     __ bind(is_long);
   314     __ sd(V0, T0, 0 * wordSize);
   315     //__ sd(V1, T0, 1 * wordSize);
   316     __ sd(R0, T0, 1 * wordSize);
   317     __ b(exit);
   318     __ delayed()->nop();
   320     __ bind(is_float);
   321     __ swc1(F0, T0, 0 * wordSize);
   322     __ b(exit);
   323     __ delayed()->nop();
   325     __ bind(is_double);
   326     __ sdc1(F0, T0, 0 * wordSize);
   327     //__ sdc1(F1, T0, 1 * wordSize);
   328     __ sd(R0, T0, 1 * wordSize);
   329     __ b(exit);
   330     __ delayed()->nop();
   331     //FIXME, 1.6 mips version add operation of fpu here
   332     StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
   333     __ b(common_return);
   334     __ delayed()->nop(); 
   335     return start;
   336   }
   338   // Return point for a Java call if there's an exception thrown in
   339   // Java code.  The exception is caught and transformed into a
   340   // pending exception stored in JavaThread that can be tested from
   341   // within the VM.
   342   //
   343   // Note: Usually the parameters are removed by the callee. In case
   344   // of an exception crossing an activation frame boundary, that is
   345   // not the case if the callee is compiled code => need to setup the
   346   // rsp.
   347   //
   348   // rax: exception oop
   350   address generate_catch_exception() {
   351     StubCodeMark mark(this, "StubRoutines", "catch_exception");
   352     address start = __ pc();
   354     Register thread = TREG;
   356     // get thread directly
   357 #ifndef OPT_THREAD
   358     __ ld(thread, FP, thread_off * wordSize);
   359 #endif
   361 #ifdef ASSERT
   362     // verify that threads correspond
   363     { Label L;
   364       __ get_thread(T8);
   365       __ beq(T8, thread, L);
   366       __ delayed()->nop();
   367       __ stop("StubRoutines::catch_exception: threads must correspond");
   368       __ bind(L);
   369     }
   370 #endif
   371     // set pending exception
   372     __ verify_oop(V0);
   373     __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
   374     __ li(AT, (long)__FILE__);
   375     __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
   376     __ li(AT, (long)__LINE__);
   377     __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
   379     // complete return to VM
   380     assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
   381     __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
   382     __ delayed()->nop();
   384     return start;
   385   }
   387   // Continuation point for runtime calls returning with a pending
   388   // exception.  The pending exception check happened in the runtime
   389   // or native call stub.  The pending exception in Thread is
   390   // converted into a Java-level exception.
   391   //
   392   // Contract with Java-level exception handlers:
   393   // rax: exception
   394   // rdx: throwing pc
   395   //
   396   // NOTE: At entry of this stub, exception-pc must be on stack !!
   398   address generate_forward_exception() {
   399     StubCodeMark mark(this, "StubRoutines", "forward exception");
   400     //Register thread = TREG;
   401     Register thread = TREG;
   402     address start = __ pc();
   404     // Upon entry, the sp points to the return address returning into Java
   405     // (interpreted or compiled) code; i.e., the return address becomes the
   406     // throwing pc.
   407     //
   408     // Arguments pushed before the runtime call are still on the stack but
   409     // the exception handler will reset the stack pointer -> ignore them.
   410     // A potential result in registers can be ignored as well.
   412 #ifdef ASSERT
   413     // make sure this code is only executed if there is a pending exception
   414 #ifndef OPT_THREAD
   415     __ get_thread(thread);
   416 #endif
   417     { Label L;
   418       __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
   419       __ bne(AT, R0, L);
   420       __ delayed()->nop();
   421       __ stop("StubRoutines::forward exception: no pending exception (1)");
   422       __ bind(L);
   423     }
   424 #endif
   426     // compute exception handler into T9
   427     __ ld(A1, SP, 0);
   428     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
   429     __ move(T9, V0);
   430     __ pop(V1);
   432 #ifndef OPT_THREAD
   433     __ get_thread(thread);
   434 #endif
   435     __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
   436     __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
   438 #ifdef ASSERT
   439     // make sure exception is set
   440     { Label L;
   441       __ bne(V0, R0, L);
   442       __ delayed()->nop();
   443       __ stop("StubRoutines::forward exception: no pending exception (2)");
   444       __ bind(L);
   445     }
   446 #endif
   448     // continue at exception handler (return address removed)
   449     // V0: exception
   450     // T9: exception handler
   451     // V1: throwing pc
   452     __ verify_oop(V0);
   453     __ jr(T9);
   454     __ delayed()->nop();
   456     return start;
   457   }
   459   // Support for intptr_t get_previous_fp()
   460   //
   461   // This routine is used to find the previous frame pointer for the
   462   // caller (current_frame_guess). This is used as part of debugging
   463   // ps() is seemingly lost trying to find frames.
   464   // This code assumes that caller current_frame_guess) has a frame.
   465   address generate_get_previous_fp() {
   466     StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
   467     const Address old_fp       (FP,  0);
   468     const Address older_fp       (V0,  0);
   469     address start = __ pc();
   470     __ enter();    
   471     __ lw(V0, old_fp); // callers fp
   472     __ lw(V0, older_fp); // the frame for ps()
   473     __ leave();
   474     __ jr(RA);
   475     __ delayed()->nop();
   476     return start;
   477   }
   478   // The following routine generates a subroutine to throw an
   479   // asynchronous UnknownError when an unsafe access gets a fault that
   480   // could not be reasonably prevented by the programmer.  (Example:
   481   // SIGBUS/OBJERR.)
   482   address generate_handler_for_unsafe_access() {
   483 		StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
   484 		address start = __ pc();
   485 		__ pushad();                      // push registers
   486 		//  Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
   487 		__ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
   488 		__ delayed()->nop(); 
   489 		__ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord); 
   490 		__ popad();
   491 		__ jr(RA);
   492 		__ delayed()->nop();  
   493 		return start;
   494   }
   496   // Non-destructive plausibility checks for oops
   497   //
   498   // Arguments:
   499   //    all args on stack!
   500   //
   501   // Stack after saving c_rarg3:
   502   //    [tos + 0]: saved c_rarg3
   503   //    [tos + 1]: saved c_rarg2
   504   //    [tos + 2]: saved r12 (several TemplateTable methods use it)
   505   //    [tos + 3]: saved flags
   506   //    [tos + 4]: return address
   507   //  * [tos + 5]: error message (char*)
   508   //  * [tos + 6]: object to verify (oop)
   509   //  * [tos + 7]: saved rax - saved by caller and bashed
   510   //  * = popped on exit
   511   address generate_verify_oop() {
   512 	  StubCodeMark mark(this, "StubRoutines", "verify_oop");
   513 	  address start = __ pc();
   514 	  __ reinit_heapbase();
   515 	  __ verify_oop_subroutine(); 
   516     address end = __ pc();
   517 	  return start;
   518   }
   520   //
   521   //  Generate overlap test for array copy stubs
   522   //
   523   //  Input:
   524   //     A0    -  array1
   525   //     A1    -  array2
   526   //     A2    -  element count
   527   //
   528   //  Note: this code can only use %eax, %ecx, and %edx
   529   //
   531  // use T9 as temp 
   532   void array_overlap_test(address no_overlap_target, int log2_elem_size) {
   533     int elem_size = 1 << log2_elem_size;
   534     Address::ScaleFactor sf = Address::times_1;
   536     switch (log2_elem_size) {
   537       case 0: sf = Address::times_1; break;
   538       case 1: sf = Address::times_2; break;
   539       case 2: sf = Address::times_4; break;
   540       case 3: sf = Address::times_8; break;
   541     }
   543     __ dsll(AT, A2, sf);
   544     __ dadd(AT, AT, A0); 
   545     __ lea(T9, Address(AT, -elem_size)); 
   546     __ dsub(AT, A1, A0); 
   547     __ blez(AT, no_overlap_target); 
   548     __ delayed()->nop(); 
   549     __ dsub(AT, A1, T9); 
   550     __ bgtz(AT, no_overlap_target); 
   551     __ delayed()->nop(); 
   553     // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target 
   554     Label L;
   555     __ bgez(A0, L);
   556     __ delayed()->nop(); 
   557     __ bgtz(A1, no_overlap_target);
   558     __ delayed()->nop(); 
   559     __ bind(L);
   561   }
   563   //
   564   //  Generate store check for array
   565   //
   566   //  Input:
   567   //     %edi    -  starting address
   568   //     %ecx    -  element count
   569   //
   570   //  The 2 input registers are overwritten
   571   //
   573   //
   574   //  Generate store check for array
   575   //
   576   //  Input:
   577   //     T0    -  starting address(edi)
   578   //     T1    -  element count  (ecx)
   579   //
   580   //  The 2 input registers are overwritten
   581   //
   583 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
   585 	void array_store_check() {
   586 		BarrierSet* bs = Universe::heap()->barrier_set();
   587 		assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
   588 		CardTableModRefBS* ct = (CardTableModRefBS*)bs;
   589 		assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
   590 		Label l_0;
   592 		__ dsll(AT, T1, TIMES_OOP);
   593 		__ dadd(AT, T0, AT); 
   594 		__ daddiu(T1, AT, - BytesPerHeapOop);
   596 		__ shr(T0, CardTableModRefBS::card_shift); 
   597 		__ shr(T1, CardTableModRefBS::card_shift);
   599 		__ dsub(T1, T1, T0);   // end --> cards count
   600 		__ bind(l_0);
   602 		__ li48(AT, (long)ct->byte_map_base); 
   603 		__ dadd(AT, AT, T0); 
   604 		__ dadd(AT, AT, T1); 
   605 		__ sb(R0, AT, 0);
   606 		//__ daddi(T1, T1, -4);  
   607 		__ daddi(T1, T1, - 1);
   608 		__ bgez(T1, l_0);
   609 		__ delayed()->nop(); 
   610 	}
   612   // Arguments:
   613   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   614   //             ignored
   615   //   name    - stub name string
   616   //
   617   // Inputs:
   618   //   c_rarg0   - source array address
   619   //   c_rarg1   - destination array address
   620   //   c_rarg2   - element count, treated as ssize_t, can be zero
   621   //
   622   // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
   623   // we let the hardware handle it.  The one to eight bytes within words,
   624   // dwords or qwords that span cache line boundaries will still be loaded
   625   // and stored atomically.
   626   //
   627   // Side Effects:
   628   //   disjoint_byte_copy_entry is set to the no-overlap entry point
   629   //   used by generate_conjoint_byte_copy().
   630   //
   631 	address generate_disjoint_byte_copy(bool aligned, const char *name) {
   632 	  StubCodeMark mark(this, "StubRoutines", name);
   633 	  __ align(CodeEntryAlignment);
   634 	  address start = __ pc();
   635 	  Label l_0, l_1, l_2, l_3, l_4, l_5, l_6;
   637 	  __ push(T3);
   638 	  __ push(T0);
   639 	  __ push(T1);
   640 	  __ push(T8);
   641 	  __ move(T3, A0); 
   642 	  __ move(T0, A1);
   643 	  __ move(T1, A2);  
   644 	  __ move(T8, T1);             // original count in T1
   645 	  __ daddi(AT, T1, -3); 
   646 	  __ blez(AT, l_4);  
   647 	  __ delayed()->nop();	
   648 	  if (!aligned) {
   649           //TODO: copy 8 bytes at one time
   650 	    // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */
   651 	    __ andi(AT, T3, 3); 
   652 	    __ andi(T9, T0, 3); 
   653 	    __ bne(AT, T9, l_5); 
   654 	    __ delayed()->nop();	
   656 	    // align source address at dword address boundary
   657 	    __ move(T1, 4); 
   658 	    __ sub(T1, T1, T3); 
   659 	    __ andi(T1, T1, 3); 
   660 	    __ beq(T1, R0, l_1); 
   661 	    __ delayed()->nop();	
   662 	    __ sub(T8,T8,T1); 
   663 	    __ bind(l_0);
   664 	    __ lb(AT, T3, 0); 
   665 	    __ sb(AT, T0, 0); 
   666 	    __ addi(T3, T3, 1); 
   667 	    __ addi(T0, T0, 1); 
   668 	    __ addi(T1 ,T1, -1);  
   669 	    __ bne(T1, R0, l_0); 
   670 	    __ delayed()->nop(); 
   671 	    __ bind(l_1);
   672 	    __ move(T1, T8); 
   673 	  }
   674 	  __ shr(T1, 2); 
   675 	  __ beq(T1, R0, l_4);     // no dwords to move
   676 	  __ delayed()->nop(); 
   677 	  // copy aligned dwords
   678 	  __ bind(l_2);
   679 	  __ align(16);
   680 	  __ bind(l_3);
   681 	  __ lw(AT, T3, 0);   
   682 	  __ sw(AT, T0, 0 ); 
   683 	  __ addi(T3, T3, 4); 
   684 	  __ addi(T0, T0, 4); 
   685 	  __ addi(T1, T1, -1); 
   686 	  __ bne(T1, R0, l_3); 
   687 	  __ delayed()->nop(); 
   688 	  __ bind(l_4);
   689 	  __ move(T1, T8); 
   690 	  __ andi(T1, T1, 3); 
   691 	  __ beq(T1, R0, l_6);  
   692 	  __ delayed()->nop(); 
   693 	  // copy suffix
   694 	  __ bind(l_5);
   695 	  __ lb(AT, T3, 0); 
   696 	  __ sb(AT, T0, 0); 
   697 	  __ addi(T3, T3, 1);  
   698 	  __ addi(T0, T0, 1);  
   699 	  __ addi(T1, T1, -1); 
   700 	  __ bne(T1, R0, l_5 ); 
   701 	  __ delayed()->nop(); 
   702 	  __ bind(l_6);
   703 	  __ pop(T8); 
   704 	  __ pop(T1); 
   705 	  __ pop(T0); 
   706 	  __ pop(T3); 
   707 	  __ jr(RA); 
   708 	  __ delayed()->nop(); 
   709 	  return start;
   710   }
   712   // Arguments:
   713   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   714   //             ignored
   715   //   name    - stub name string
   716   //
   717   // Inputs:
   718   //   A0   - source array address
   719   //   A1   - destination array address
   720   //   A2   - element count, treated as ssize_t, can be zero
   721   //
   722   // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
   723   // we let the hardware handle it.  The one to eight bytes within words,
   724   // dwords or qwords that span cache line boundaries will still be loaded
   725   // and stored atomically.
   726   //
   727   address generate_conjoint_byte_copy(bool aligned, const char *name) {
   728     __ align(CodeEntryAlignment);
   729     StubCodeMark mark(this, "StubRoutines", name);
   730     address start = __ pc();
   732     Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
   733     Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
   735     address nooverlap_target = aligned ?
   736 	    StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
   737 	    StubRoutines::jbyte_disjoint_arraycopy();
   739     array_overlap_test(nooverlap_target, 0);
   741     const Register from      = A0;   // source array address
   742     const Register to        = A1;   // destination array address
   743     const Register count     = A2;   // elements count
   744     const Register end_from  = T3;   // source array end address
   745     const Register end_to    = T0;   // destination array end address
   746     const Register end_count = T1;   // destination array end address
   748     __ push(end_from);	
   749     __ push(end_to);	
   750     __ push(end_count);	
   751     __ push(T8);	
   753     // copy from high to low
   754     __ move(end_count, count);  
   755     __ dadd(end_from, from, end_count);  
   756     __ dadd(end_to, to, end_count);  
   758     // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
   759     __ andi(AT, end_from, 3); 
   760     __ andi(T8, end_to, 3); 
   761     __ bne(AT, T8, l_copy_byte); 
   762     __ delayed()->nop();	
   764     // First deal with the unaligned data at the top.
   765     __ bind(l_unaligned);
   766     __ beq(end_count, R0, l_exit); 
   767     __ delayed()->nop(); 
   769     __ andi(AT, end_from, 3);    
   770     __ bne(AT, R0, l_from_unaligned); 
   771     __ delayed()->nop(); 
   773     __ andi(AT, end_to, 3);    
   774     __ beq(AT, R0, l_4_bytes_aligned); 
   775     __ delayed()->nop(); 
   777     __ bind(l_from_unaligned);
   778     __ lb(AT, end_from, -1);   
   779     __ sb(AT, end_to, -1); 
   780     __ daddi(end_from, end_from, -1); 
   781     __ daddi(end_to, end_to, -1); 
   782     __ daddi(end_count, end_count, -1); 
   783     __ b(l_unaligned); 
   784     __ delayed()->nop(); 
   786     // now end_to, end_from point to 4-byte aligned high-ends
   787     //     end_count contains byte count that is not copied.
   788     // copy 4 bytes at a time
   789     __ bind(l_4_bytes_aligned);
   791     __ move(T8, end_count); 
   792     __ daddi(AT, end_count, -3); 
   793     __ blez(AT, l_copy_suffix); 
   794     __ delayed()->nop();	
   796     //__ andi(T8, T8, 3); 
   797     __ lea(end_from, Address(end_from, -4));
   798     __ lea(end_to, Address(end_to, -4));
   800     __ dsrl(end_count, end_count, 2); 
   801     __ align(16);
   802     __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
   803     __ lw(AT, end_from, 0);   
   804     __ sw(AT, end_to, 0); 
   805     __ addi(end_from, end_from, -4);    
   806     __ addi(end_to, end_to, -4);    
   807     __ addi(end_count, end_count, -1);  
   808     __ bne(end_count, R0, l_copy_4_bytes_loop); 
   809     __ delayed()->nop(); 
   811     __ b(l_copy_suffix);  
   812     __ delayed()->nop(); 
   813     // copy dwords aligned or not with repeat move
   814     // l_copy_suffix
   815     // copy suffix (0-3 bytes)
   816     __ bind(l_copy_suffix); 
   817     __ andi(T8, T8, 3); 
   818     __ beq(T8, R0, l_exit); 
   819     __ delayed()->nop(); 
   820     __ addi(end_from, end_from, 3); 
   821     __ addi(end_to, end_to, 3); 
   822     __ bind(l_copy_suffix_loop);
   823     __ lb(AT, end_from, 0);  
   824     __ sb(AT, end_to, 0); 
   825     __ addi(end_from, end_from, -1);  
   826     __ addi(end_to, end_to, -1);  
   827     __ addi(T8, T8, -1); 
   828     __ bne(T8, R0, l_copy_suffix_loop); 
   829     __ delayed()->nop(); 
   831     __ bind(l_copy_byte);
   832     __ beq(end_count, R0, l_exit); 
   833     __ delayed()->nop(); 
   834     __ lb(AT, end_from, -1);   
   835     __ sb(AT, end_to, -1); 
   836     __ daddi(end_from, end_from, -1); 
   837     __ daddi(end_to, end_to, -1); 
   838     __ daddi(end_count, end_count, -1); 
   839     __ b(l_copy_byte); 
   840     __ delayed()->nop(); 
   842     __ bind(l_exit);
   843     __ pop(T8);	
   844     __ pop(end_count);	
   845     __ pop(end_to);	
   846     __ pop(end_from);	
   847     __ jr(RA); 
   848     __ delayed()->nop(); 
   849     return start;
   850   }
   852   // Arguments:
   853   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   854   //             ignored
   855   //   name    - stub name string
   856   //
   857   // Inputs:
   858   //   c_rarg0   - source array address
   859   //   c_rarg1   - destination array address
   860   //   c_rarg2   - element count, treated as ssize_t, can be zero
   861   //
   862   // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
   863   // let the hardware handle it.  The two or four words within dwords
   864   // or qwords that span cache line boundaries will still be loaded
   865   // and stored atomically.
   866   //
   867   // Side Effects:
   868   //   disjoint_short_copy_entry is set to the no-overlap entry point
   869   //   used by generate_conjoint_short_copy().
   870   //
   871   address generate_disjoint_short_copy(bool aligned, const char *name) {
   872 		Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
   873 		StubCodeMark mark(this, "StubRoutines", name);
   874 		__ align(CodeEntryAlignment);
   875 		address start = __ pc();
   877 		__ push(T3);	
   878 		__ push(T0);	
   879 		__ push(T1);	
   880 		__ push(T8);	
   881 		__ move(T1, A2);  
   882 		__ move(T3, A0); 
   883 		__ move(T0, A1);
   885 		if (!aligned) {
   886 			__ beq(T1, R0, l_5);
   887 			__ delayed()->nop(); 
   888 			// align source address at dword address boundary
   889 			__ move(T8, T3); // original from
   890 			__ andi(T8, T8, 3); // either 0 or 2
   891 			__ beq(T8, R0, l_1); // no prefix
   892 			__ delayed()->nop();
   893 			// copy prefix
   894 			__ lh(AT, T3, 0);
   895 			__ sh(AT, T0, 0); 
   896 			__ add(T3, T3, T8); 
   897 			__ add(T0, T0, T8);
   898 			__ addi(T1, T1, -1); 
   899 			__ bind(l_1);
   900 		}
   901 		__ move(T8, T1);            // word count less prefix
   902 		__ sra(T1, T1, 1); 
   903 		__ beq(T1, R0, l_4); 
   904 		__ delayed()->nop(); 
   905     // copy aligned dwords
   906 		__ bind(l_2);
   907 		__ align(16);
   908 		__ bind(l_3);
   909 		__ lw(AT, T3, 0);   
   910 		__ sw(AT, T0, 0 ); 
   911 		__ addi(T3, T3, 4); 
   912 		__ addi(T0, T0, 4); 
   913 		__ addi(T1, T1, -1); 
   914 		__ bne(T1, R0, l_3); 
   915 		__ delayed()->nop(); 
   916 		__ bind(l_4);
   917 		__ andi(T8, T8, 1); 
   918 		__ beq(T8, R0, l_5);  
   919 		__ delayed()->nop(); 
   920 		// copy suffix
   921 		__ lh(AT, T3, 0); 
   922 		__ sh(AT, T0, 0); 
   923 		__ bind(l_5);
   924 		__ pop(T8);	
   925 		__ pop(T1);	
   926 		__ pop(T0);	
   927 		__ pop(T3);	
   928 		__ jr(RA); 
   929 		__ delayed()->nop();  
   930 		return start;
   931   }
   933   // Arguments:
   934   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   935   //             ignored
   936   //   name    - stub name string
   937   //
   938   // Inputs:
   939   //   c_rarg0   - source array address
   940   //   c_rarg1   - destination array address
   941   //   c_rarg2   - element count, treated as ssize_t, can be zero
   942   //
   943   // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
   944   // let the hardware handle it.  The two or four words within dwords
   945   // or qwords that span cache line boundaries will still be loaded
   946   // and stored atomically.
   947   //
   948   address generate_conjoint_short_copy(bool aligned, const char *name) {
   949 		Label l_1, l_2, l_3, l_4, l_5;
   950 		StubCodeMark mark(this, "StubRoutines", name);
   951 		__ align(CodeEntryAlignment);
   952 		address start = __ pc();
   953 		address nooverlap_target = aligned ?
   954 						StubRoutines::arrayof_jshort_disjoint_arraycopy() :
   955 						StubRoutines::jshort_disjoint_arraycopy();
   957 		array_overlap_test(nooverlap_target, 1);
   959 		__ push(T3);	
   960 		__ push(T0);	
   961 		__ push(T1);	
   962 		__ push(T8);	
   964 		/*
   965 			 __ pushl(esi);
   966 			 __ movl(ecx, Address(esp, 4+12));      // count
   967 			 __ pushl(edi);
   968 			 __ movl(esi, Address(esp, 8+ 4));      // from
   969 			 __ movl(edi, Address(esp, 8+ 8));      // to
   970 		 */ 
   971 		__ move(T1, A2);  
   972 		__ move(T3, A0); 
   973 		__ move(T0, A1);
   976 		// copy dwords from high to low
   977 		// __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4
   978 		__ sll(AT, T1, Address::times_2); 
   979 		__ add(AT, T3, AT); 
   980 		__ lea(T3, Address( AT, -4)); 
   981 		//__ std();
   982 		//__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4
   983 		__ sll(AT,T1 , Address::times_2); 
   984 		__ add(AT, T0, AT); 
   985 		__ lea(T0, Address( AT, -4)); 
   986 		//  __ movl(eax, ecx);
   987 		__ move(T8, T1); 
   988 		__ bind(l_1);
   989 		//   __ sarl(ecx, 1);              // dword count
   990 		__ sra(T1,T1, 1); 
   991 		//__ jcc(Assembler::equal, l_4);                   // no dwords to move
   992 		__ beq(T1, R0, l_4);  
   993 		__ delayed()->nop(); 
   994 		/*    __ cmpl(ecx, 32);
   995 					__ jcc(Assembler::above, l_3);                   // > 32 dwords
   996 		// copy dwords with loop
   997 		__ subl(edi, esi);
   998 		 */     __ align(16);
   999 		__ bind(l_2);
  1000 		//__ movl(edx, Address(esi));
  1001 		__ lw(AT, T3, 0);   
  1002 		//__ movl(Address(edi, esi, Address::times_1), edx);
  1003 		__ sw(AT, T0, 0); 
  1004 		//__ subl(esi, 4);
  1005 		__ addi(T3, T3, -4); 
  1006 		__ addi(T0, T0, -4); 
  1007 		//__ decl(ecx);
  1008 		__ addi(T1, T1, -1); 
  1009 		//  __ jcc(Assembler::notEqual, l_2);
  1010 		__ bne(T1, R0, l_2); 
  1011 		__ delayed()->nop(); 
  1012 		//  __ addl(edi, esi);
  1013 		// __ jmp(l_4);
  1014 		__ b(l_4);
  1015 		__ delayed()->nop();
  1016 		// copy dwords with repeat move
  1017 		__ bind(l_3);
  1018 		//   __ rep_movl();
  1019 		__ bind(l_4);
  1020 		//  __ andl(eax, 1);              // suffix count
  1021 		__ andi(T8, T8, 1);              // suffix count
  1022 		//__ jcc(Assembler::equal, l_5);                   // no suffix
  1023 		__ beq(T8, R0, l_5 );  
  1024 		__ delayed()->nop(); 
  1025 		// copy suffix
  1026 		//   __ movw(edx, Address(esi, 2));
  1027 		__ lh(AT, T3, 2); 
  1028 		//  __ movw(Address(edi, 2), edx);
  1029 		__ sh(AT, T0, 2); 
  1030 		__ bind(l_5);
  1031 		//    __ cld();
  1032 		//    __ popl(edi);
  1033 		//    __ popl(esi);
  1034 		//   __ ret(0);
  1035 		__ pop(T8);	
  1036 		__ pop(T1);	
  1037 		__ pop(T0);	
  1038 		__ pop(T3);	
  1039 		__ jr(RA); 
  1040 		__ delayed()->nop();   
  1041 		return start;
  1044   // Arguments:
  1045   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1046   //             ignored
  1047   //   is_oop  - true => oop array, so generate store check code
  1048   //   name    - stub name string
  1049   //
  1050   // Inputs:
  1051   //   c_rarg0   - source array address
  1052   //   c_rarg1   - destination array address
  1053   //   c_rarg2   - element count, treated as ssize_t, can be zero
  1054   //
  1055   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1056   // the hardware handle it.  The two dwords within qwords that span
  1057   // cache line boundaries will still be loaded and stored atomicly.
  1058   //
  1059   // Side Effects:
  1060   //   disjoint_int_copy_entry is set to the no-overlap entry point
  1061   //   used by generate_conjoint_int_oop_copy().
  1062   //
  1063   address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
  1064 		Label l_2, l_3, l_4, l_stchk;
  1065 		StubCodeMark mark(this, "StubRoutines", name);
  1066 		__ align(CodeEntryAlignment);
  1067 		address start = __ pc();
  1068 		/*
  1069 			 __ pushl(esi);
  1070 			 __ movl(ecx, Address(esp, 4+12));      // count
  1071 			 __ pushl(edi);
  1072 			 __ movl(esi, Address(esp, 8+ 4));      // from
  1073 			 __ movl(edi, Address(esp, 8+ 8));      // to
  1074 		 */
  1075 		__ push(T3);	
  1076 		__ push(T0);	
  1077 		__ push(T1);	
  1078 		__ push(T8);	
  1079 		__ move(T1, A2);  
  1080 		__ move(T3, A0); 
  1081 		__ move(T0, A1);
  1083 		// __ cmpl(ecx, 32);
  1084 		// __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
  1085 		// __ rep_movl();
  1086 		__ b(l_2); 	
  1087 		__ delayed()->nop();	
  1088 		if (is_oop) {
  1089 		//  __ jmp(l_stchk);
  1090 			__ b(l_stchk); 
  1091 			__ delayed()->nop(); 
  1093 		//    __ popl(edi);
  1094 		//   __ popl(esi);
  1095 		//  __ ret(0);
  1096 		__ pop(T8);	
  1097 		__ pop(T1);	
  1098 		__ pop(T0);	
  1099 		__ pop(T3);	
  1100 		__ jr(RA); 
  1101 		__ delayed()->nop(); 
  1103 		__ bind(l_2);
  1104 		//  __ subl(edi, esi);
  1105 		//  __ testl(ecx, ecx);
  1106 		// __ jcc(Assembler::zero, l_4);
  1107 		__ beq(T1, R0, l_4);  
  1108 		__ delayed()->nop(); 
  1109 		__ align(16);
  1110 		__ bind(l_3);
  1111 		//__ movl(edx, Address(esi));
  1112 		__ lw(AT, T3, 0);   
  1113 		// __ movl(Address(edi, esi, Address::times_1), edx);
  1114 		__ sw(AT, T0, 0); 
  1115 		// __ addl(esi, 4);
  1116 		__ addi(T3, T3, 4);
  1117 		__ addi(T0, T0, 4);
  1118 		//   __ decl(ecx);
  1119 		__ addi(T1, T1, -1); 
  1120 		//    __ jcc(Assembler::notEqual, l_3);
  1121 		__ bne(T1, R0, l_3); 
  1122 		__ delayed()->nop(); 
  1123 		if (is_oop) {
  1124 			__ bind(l_stchk);
  1125 			//      __ movl(edi, Address(esp, 8+ 8));
  1126 			//     __ movl(ecx, Address(esp, 8+ 12));
  1127 			__ move(T0, A1); 
  1128 			__ move(T1, A2); 
  1129 			array_store_check();
  1131 		__ bind(l_4);
  1132 		//    __ popl(edi);
  1133 		//   __ popl(esi);
  1134 		//  __ ret(0);
  1135 		__ pop(T8);
  1136 		__ pop(T1);
  1137 		__ pop(T0);
  1138 		__ pop(T3);
  1139 		__ jr(RA); 
  1140 		__ delayed()->nop(); 
  1141 		return start;
  1144   // Arguments:
  1145   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1146   //             ignored
  1147   //   is_oop  - true => oop array, so generate store check code
  1148   //   name    - stub name string
  1149   //
  1150   // Inputs:
  1151   //   c_rarg0   - source array address
  1152   //   c_rarg1   - destination array address
  1153   //   c_rarg2   - element count, treated as ssize_t, can be zero
  1154   //
  1155   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1156   // the hardware handle it.  The two dwords within qwords that span
  1157   // cache line boundaries will still be loaded and stored atomicly.
  1158   //
  1159   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
  1160 		Label l_2, l_3, l_4, l_stchk;
  1161 		StubCodeMark mark(this, "StubRoutines", name);
  1162 		__ align(CodeEntryAlignment);
  1163 		address start = __ pc();
  1164 		address nooverlap_target;
  1166 		if (is_oop) {
  1167 			nooverlap_target = aligned ?
  1168 							StubRoutines::arrayof_oop_disjoint_arraycopy() :
  1169 							StubRoutines::oop_disjoint_arraycopy();
  1170 		}else {
  1171 			nooverlap_target = aligned ?
  1172 							StubRoutines::arrayof_jint_disjoint_arraycopy() :
  1173 							StubRoutines::jint_disjoint_arraycopy();
  1176 		array_overlap_test(nooverlap_target, 2);
  1178 		__ push(T3);
  1179 		__ push(T0);
  1180 		__ push(T1);
  1181 		__ push(T8);
  1183 		/*
  1184 			 __ pushl(esi);
  1185 			 __ movl(ecx, Address(esp, 4+12));      // count
  1186 			 __ pushl(edi);
  1187 			 __ movl(esi, Address(esp, 8+ 4));      // from
  1188 			 __ movl(edi, Address(esp, 8+ 8));      // to
  1189 		 */ 
  1190 		__ move(T1, A2);  
  1191 		__ move(T3, A0); 
  1192 		__ move(T0, A1);
  1194 		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
  1195 		__ sll(AT, T1, Address::times_4); 
  1196 		__ add(AT, T3, AT); 
  1197 		__ lea(T3 , Address(AT, -4)); 
  1198 		//__ std();
  1199 		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
  1200 		__ sll(AT, T1, Address::times_4); 
  1201 		__ add(AT, T0, AT); 
  1202 		__ lea(T0 , Address(AT, -4)); 
  1204 		//    __ cmpl(ecx, 32);
  1205 		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
  1206 		//  __ testl(ecx, ecx);
  1207 		//__ jcc(Assembler::zero, l_4);
  1208 		__ beq(T1, R0, l_4); 
  1209 		__ delayed()->nop();  
  1210 		// __ subl(edi, esi);
  1211 		__ align(16);
  1212 		__ bind(l_2);
  1213 		// __ movl(edx, Address(esi));
  1214 		__ lw(AT, T3, 0);   
  1215 		// __ movl(Address(esi, edi, Address::times_1), edx);
  1216 		__ sw(AT, T0, 0); 
  1217 		// __ subl(esi, 4);
  1218 		__ addi(T3, T3, -4); 
  1219 		__ addi(T0, T0, -4); 
  1220 		//   __ decl(ecx);
  1221 		__ addi(T1, T1, -1); 
  1222 		//__ jcc(Assembler::notEqual, l_2);
  1223 		__ bne(T1, R0, l_2);  
  1224 		__ delayed()->nop(); 
  1225 		if (is_oop) {
  1226 			// __ jmp(l_stchk);
  1227 			__ b( l_stchk); 
  1228 			__ delayed()->nop(); 
  1230 		__ bind(l_4);
  1231 		//      __ cld();
  1232 		//     __ popl(edi);
  1233 		//    __ popl(esi);
  1234 		//   __ ret(0);
  1235 		__ pop(T8); 
  1236 		__ pop(T1); 
  1237 		__ pop(T0); 
  1238 		__ pop(T3); 
  1239 		__ jr(RA); 
  1240 		__ delayed()->nop(); 
  1241 		__ bind(l_3);
  1242 		//   __ rep_movl();
  1243 		if (is_oop) {
  1244 			__ bind(l_stchk);
  1245 			//  __ movl(edi, Address(esp, 8+ 8));
  1246 			__ move(T0, A1);  
  1247 			// __ movl(ecx, Address(esp, 8+ 12));
  1248 			__ move(T1, A2);  
  1249 			array_store_check();
  1251 		//    __ cld();
  1252 		//   __ popl(edi);
  1253 		//   __ popl(esi);
  1254 		//  __ ret(0);
  1255 		__ pop(T8);	
  1256 		__ pop(T1);	
  1257 		__ pop(T0);	
  1258 		__ pop(T3);	
  1259 		__ jr(RA);	
  1260 		__ delayed()->nop(); 
  1261 		return start;
  1264   // Arguments:
  1265   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1266   //             ignored
  1267   //   is_oop  - true => oop array, so generate store check code
  1268   //   name    - stub name string
  1269   //
  1270   // Inputs:
  1271   //   c_rarg0   - source array address
  1272   //   c_rarg1   - destination array address
  1273   //   c_rarg2   - element count, treated as ssize_t, can be zero
  1274   //
  1275   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1276   // the hardware handle it.  The two dwords within qwords that span
  1277   // cache line boundaries will still be loaded and stored atomicly.
  1278   //
  1279   // Side Effects:
  1280   //   disjoint_int_copy_entry is set to the no-overlap entry point
  1281   //   used by generate_conjoint_int_oop_copy().
  1282   //
  1283   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1284 		Label l_2, l_3, l_4, l_stchk;
  1285 		StubCodeMark mark(this, "StubRoutines", name);
  1286 		__ align(CodeEntryAlignment);
  1287 		address start = __ pc();
  1288 		__ push(T3);	
  1289 		__ push(T0);	
  1290 		__ push(T1);	
  1291 		__ push(T8);	
  1292 		__ move(T1, A2);  
  1293 		__ move(T3, A0); 
  1294 		__ move(T0, A1);
  1296 		// __ cmpl(ecx, 32);
  1297 		// __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
  1298 		// __ rep_movl();
  1299 		__ b(l_2); 	
  1300 		__ delayed()->nop();	
  1301 		if (is_oop) {
  1302 		//  __ jmp(l_stchk);
  1303 			__ b(l_stchk); 
  1304 			__ delayed()->nop(); 
  1306 		//    __ popl(edi);
  1307 		//   __ popl(esi);
  1308 		//  __ ret(0);
  1309 		__ pop(T8);	
  1310 		__ pop(T1);	
  1311 		__ pop(T0);	
  1312 		__ pop(T3);	
  1313 		__ jr(RA); 
  1314 		__ delayed()->nop(); 
  1316 		__ bind(l_2);
  1317 		//  __ subl(edi, esi);
  1318 		//  __ testl(ecx, ecx);
  1319 		// __ jcc(Assembler::zero, l_4);
  1320 		__ beq(T1, R0, l_4);  
  1321 		__ delayed()->nop(); 
  1322 		__ align(16);
  1323 		__ bind(l_3);
  1324 		//__ movl(edx, Address(esi));
  1325 		__ ld(AT, T3, 0);   
  1326 		// __ movl(Address(edi, esi, Address::times_1), edx);
  1327 		__ sd(AT, T0, 0); 
  1328 		// __ addl(esi, 4);
  1329 		__ addi(T3, T3, 8);
  1330 		__ addi(T0, T0, 8);
  1331 		//   __ decl(ecx);
  1332 		__ addi(T1, T1, -1); 
  1333 		//    __ jcc(Assembler::notEqual, l_3);
  1334 		__ bne(T1, R0, l_3); 
  1335 		__ delayed()->nop(); 
  1336 		if (is_oop) {
  1337 			__ bind(l_stchk);
  1338 			//      __ movl(edi, Address(esp, 8+ 8));
  1339 			//     __ movl(ecx, Address(esp, 8+ 12));
  1340 			__ move(T0, A1); 
  1341 			__ move(T1, A2); 
  1342 			array_store_check();
  1344 		__ bind(l_4);
  1345 		//    __ popl(edi);
  1346 		//   __ popl(esi);
  1347 		//  __ ret(0);
  1348 		__ pop(T8);
  1349 		__ pop(T1);
  1350 		__ pop(T0);
  1351 		__ pop(T3);
  1352 		__ jr(RA); 
  1353 		__ delayed()->nop(); 
  1354 		return start;
  1357   // Arguments:
  1358   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1359   //             ignored
  1360   //   is_oop  - true => oop array, so generate store check code
  1361   //   name    - stub name string
  1362   //
  1363   // Inputs:
  1364   //   c_rarg0   - source array address
  1365   //   c_rarg1   - destination array address
  1366   //   c_rarg2   - element count, treated as ssize_t, can be zero
  1367   //
  1368   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1369   // the hardware handle it.  The two dwords within qwords that span
  1370   // cache line boundaries will still be loaded and stored atomicly.
  1371   //
  1372   address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1373 		Label l_2, l_3, l_4, l_stchk;
  1374 		StubCodeMark mark(this, "StubRoutines", name);
  1375 		__ align(CodeEntryAlignment);
  1376 		address start = __ pc();
  1377 		address nooverlap_target;
  1379 		if (is_oop) {
  1380 			nooverlap_target = aligned ?
  1381 							StubRoutines::arrayof_oop_disjoint_arraycopy() :
  1382 							StubRoutines::oop_disjoint_arraycopy();
  1383 		}else {
  1384 			nooverlap_target = aligned ?
  1385 							StubRoutines::arrayof_jlong_disjoint_arraycopy() :
  1386 							StubRoutines::jlong_disjoint_arraycopy();
  1389 		array_overlap_test(nooverlap_target, 3);
  1391 		__ push(T3);
  1392 		__ push(T0);
  1393 		__ push(T1);
  1394 		__ push(T8);
  1396 		__ move(T1, A2);  
  1397 		__ move(T3, A0); 
  1398 		__ move(T0, A1);
  1400 		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
  1401 		__ sll(AT, T1, Address::times_8); 
  1402 		__ add(AT, T3, AT); 
  1403 		__ lea(T3 , Address(AT, -8)); 
  1404 		//__ std();
  1405 		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
  1406 		__ sll(AT, T1, Address::times_8); 
  1407 		__ add(AT, T0, AT); 
  1408 		__ lea(T0 , Address(AT, -8)); 
  1410 		//    __ cmpl(ecx, 32);
  1411 		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
  1412 		//  __ testl(ecx, ecx);
  1413 		//__ jcc(Assembler::zero, l_4);
  1414 		__ beq(T1, R0, l_4); 
  1415 		__ delayed()->nop();  
  1416 		// __ subl(edi, esi);
  1417 		__ align(16);
  1418 		__ bind(l_2);
  1419 		// __ movl(edx, Address(esi));
  1420 		__ ld(AT, T3, 0);   
  1421 		// __ movl(Address(esi, edi, Address::times_1), edx);
  1422 		__ sd(AT, T0, 0); 
  1423 		// __ subl(esi, 4);
  1424 		__ addi(T3, T3, -8); 
  1425 		__ addi(T0, T0, -8); 
  1426 		//   __ decl(ecx);
  1427 		__ addi(T1, T1, -1); 
  1428 		//__ jcc(Assembler::notEqual, l_2);
  1429 		__ bne(T1, R0, l_2);  
  1430 		__ delayed()->nop(); 
  1431 		if (is_oop) {
  1432 			// __ jmp(l_stchk);
  1433 			__ b( l_stchk); 
  1434 			__ delayed()->nop(); 
  1436 		__ bind(l_4);
  1437 		//      __ cld();
  1438 		//     __ popl(edi);
  1439 		//    __ popl(esi);
  1440 		//   __ ret(0);
  1441 		__ pop(T8); 
  1442 		__ pop(T1); 
  1443 		__ pop(T0); 
  1444 		__ pop(T3); 
  1445 		__ jr(RA); 
  1446 		__ delayed()->nop(); 
  1447 		__ bind(l_3);
  1448 		//   __ rep_movl();
  1449 		if (is_oop) {
  1450 			__ bind(l_stchk);
  1451 			//  __ movl(edi, Address(esp, 8+ 8));
  1452 			__ move(T0, A1);  
  1453 			// __ movl(ecx, Address(esp, 8+ 12));
  1454 			__ move(T1, A2);  
  1455 			array_store_check();
  1457 		//    __ cld();
  1458 		//   __ popl(edi);
  1459 		//   __ popl(esi);
  1460 		//  __ ret(0);
  1461 		__ pop(T8);	
  1462 		__ pop(T1);	
  1463 		__ pop(T0);	
  1464 		__ pop(T3);	
  1465 		__ jr(RA);	
  1466 		__ delayed()->nop(); 
  1467 		return start;
  1469 #if 0
  1470   // Arguments:
  1471   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
  1472   //             ignored
  1473   //   is_oop  - true => oop array, so generate store check code
  1474   //   name    - stub name string
  1475   //
  1476   // Inputs:
  1477   //   c_rarg0   - source array address
  1478   //   c_rarg1   - destination array address
  1479   //   c_rarg2   - element count, treated as ssize_t, can be zero
  1480   //
  1481   address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1482     __ align(CodeEntryAlignment);
  1483     StubCodeMark mark(this, "StubRoutines", name);
  1484     address start = __ pc();
  1486     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
  1487     const Register from        = rdi;  // source array address
  1488     const Register to          = rsi;  // destination array address
  1489     const Register qword_count = rdx;  // elements count
  1490     const Register saved_count = rcx;
  1492     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1493     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1495     address disjoint_copy_entry = NULL;
  1496     if (is_oop) {
  1497       assert(!UseCompressedOops, "shouldn't be called for compressed oops");
  1498       disjoint_copy_entry = disjoint_oop_copy_entry;
  1499       oop_copy_entry  = __ pc();
  1500       array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
  1501     } else {
  1502       disjoint_copy_entry = disjoint_long_copy_entry;
  1503       long_copy_entry = __ pc();
  1504       array_overlap_test(disjoint_long_copy_entry, Address::times_8);
  1506     BLOCK_COMMENT("Entry:");
  1507     // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1509     array_overlap_test(disjoint_copy_entry, Address::times_8);
  1510     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1511                       // r9 and r10 may be used to save non-volatile registers
  1513     // 'from', 'to' and 'qword_count' are now valid
  1515     if (is_oop) {
  1516       // Save to and count for store barrier
  1517       __ movptr(saved_count, qword_count);
  1518       // No registers are destroyed by this call
  1519       gen_write_ref_array_pre_barrier(to, saved_count);
  1522     __ jmp(L_copy_32_bytes);
  1524     // Copy trailing qwords
  1525   __ BIND(L_copy_8_bytes);
  1526     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  1527     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  1528     __ decrement(qword_count);
  1529     __ jcc(Assembler::notZero, L_copy_8_bytes);
  1531     if (is_oop) {
  1532       __ jmp(L_exit);
  1533     } else {
  1534       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
  1535       restore_arg_regs();
  1536       __ xorptr(rax, rax); // return 0
  1537       __ leave(); // required for proper stackwalking of RuntimeStub frame
  1538       __ ret(0);
  1541     // Copy in 32-bytes chunks
  1542     copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
  1544     if (is_oop) {
  1545     __ BIND(L_exit);
  1546       __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
  1547       gen_write_ref_array_post_barrier(to, rcx, rax);
  1548       inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
  1549     } else {
  1550       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
  1552     restore_arg_regs();
  1553     __ xorptr(rax, rax); // return 0
  1554     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1555     __ ret(0);
  1557     return start;
  1561   // Helper for generating a dynamic type check.
  1562   // Smashes no registers.
  1563   void generate_type_check(Register sub_klass,
  1564                            Register super_check_offset,
  1565                            Register super_klass,
  1566                            Label& L_success) {
  1567     assert_different_registers(sub_klass, super_check_offset, super_klass);
  1569     BLOCK_COMMENT("type_check:");
  1571     Label L_miss;
  1573     // a couple of useful fields in sub_klass:
  1574     int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
  1575                      Klass::secondary_supers_offset_in_bytes());
  1576     int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  1577                      Klass::secondary_super_cache_offset_in_bytes());
  1578     Address secondary_supers_addr(sub_klass, ss_offset);
  1579     Address super_cache_addr(     sub_klass, sc_offset);
  1581     // if the pointers are equal, we are done (e.g., String[] elements)
  1582     __ cmpptr(super_klass, sub_klass);
  1583     __ jcc(Assembler::equal, L_success);
  1585     // check the supertype display:
  1586     Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
  1587     __ cmpptr(super_klass, super_check_addr); // test the super type
  1588     __ jcc(Assembler::equal, L_success);
  1590     // if it was a primary super, we can just fail immediately
  1591     __ cmpl(super_check_offset, sc_offset);
  1592     __ jcc(Assembler::notEqual, L_miss);
  1594     // Now do a linear scan of the secondary super-klass chain.
  1595     // The repne_scan instruction uses fixed registers, which we must spill.
  1596     // (We need a couple more temps in any case.)
  1597     // This code is rarely used, so simplicity is a virtue here.
  1598     inc_counter_np(SharedRuntime::_partial_subtype_ctr);
  1600       __ push(rax);
  1601       __ push(rcx);
  1602       __ push(rdi);
  1603       assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
  1605       __ movptr(rdi, secondary_supers_addr);
  1606       // Load the array length.
  1607       __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
  1608       // Skip to start of data.
  1609       __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  1610       // Scan rcx words at [rdi] for occurance of rax
  1611       // Set NZ/Z based on last compare
  1612       __ movptr(rax, super_klass);
  1613       if (UseCompressedOops) {
  1614         // Compare against compressed form.  Don't need to uncompress because
  1615         // looks like orig rax is restored in popq below.
  1616         __ encode_heap_oop(rax);
  1617         __ repne_scanl();
  1618       } else {
  1619         __ repne_scan();
  1622       // Unspill the temp. registers:
  1623       __ pop(rdi);
  1624       __ pop(rcx);
  1625       __ pop(rax);
  1627       __ jcc(Assembler::notEqual, L_miss);
  1630     // Success.  Cache the super we found and proceed in triumph.
  1631     __ movptr(super_cache_addr, super_klass); // note: rax is dead
  1632     __ jmp(L_success);
  1634     // Fall through on failure!
  1635     __ BIND(L_miss);
  1638   //
  1639   //  Generate checkcasting array copy stub
  1640   //
  1641   //  Input:
  1642   //    c_rarg0   - source array address
  1643   //    c_rarg1   - destination array address
  1644   //    c_rarg2   - element count, treated as ssize_t, can be zero
  1645   //    c_rarg3   - size_t ckoff (super_check_offset)
  1646   // not Win64
  1647   //    c_rarg4   - oop ckval (super_klass)
  1648   // Win64
  1649   //    rsp+40    - oop ckval (super_klass)
  1650   //
  1651   //  Output:
  1652   //    rax ==  0  -  success
  1653   //    rax == -1^K - failure, where K is partial transfer count
  1654   //
  1655   address generate_checkcast_copy(const char *name) {
  1657     Label L_load_element, L_store_element, L_do_card_marks, L_done;
  1659     // Input registers (after setup_arg_regs)
  1660     const Register from        = rdi;   // source array address
  1661     const Register to          = rsi;   // destination array address
  1662     const Register length      = rdx;   // elements count
  1663     const Register ckoff       = rcx;   // super_check_offset
  1664     const Register ckval       = r8;    // super_klass
  1666     // Registers used as temps (r13, r14 are save-on-entry)
  1667     const Register end_from    = from;  // source array end address
  1668     const Register end_to      = r13;   // destination array end address
  1669     const Register count       = rdx;   // -(count_remaining)
  1670     const Register r14_length  = r14;   // saved copy of length
  1671     // End pointers are inclusive, and if length is not zero they point
  1672     // to the last unit copied:  end_to[0] := end_from[0]
  1674     const Register rax_oop    = rax;    // actual oop copied
  1675     const Register r11_klass  = r11;    // oop._klass
  1677     //---------------------------------------------------------------
  1678     // Assembler stub will be used for this call to arraycopy
  1679     // if the two arrays are subtypes of Object[] but the
  1680     // destination array type is not equal to or a supertype
  1681     // of the source type.  Each element must be separately
  1682     // checked.
  1684     __ align(CodeEntryAlignment);
  1685     StubCodeMark mark(this, "StubRoutines", name);
  1686     address start = __ pc();
  1688     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1690     checkcast_copy_entry  = __ pc();
  1691     BLOCK_COMMENT("Entry:");
  1693 #ifdef ASSERT
  1694     // caller guarantees that the arrays really are different
  1695     // otherwise, we would have to make conjoint checks
  1696     { Label L;
  1697       array_overlap_test(L, TIMES_OOP);
  1698       __ stop("checkcast_copy within a single array");
  1699       __ bind(L);
  1701 #endif //ASSERT
  1703     // allocate spill slots for r13, r14
  1704     enum {
  1705       saved_r13_offset,
  1706       saved_r14_offset,
  1707       saved_rbp_offset,
  1708       saved_rip_offset,
  1709       saved_rarg0_offset
  1710     };
  1711     __ subptr(rsp, saved_rbp_offset * wordSize);
  1712     __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
  1713     __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
  1714     setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
  1715                        // ckoff => rcx, ckval => r8
  1716                        // r9 and r10 may be used to save non-volatile registers
  1717 #ifdef _WIN64
  1718     // last argument (#4) is on stack on Win64
  1719     const int ckval_offset = saved_rarg0_offset + 4;
  1720     __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
  1721 #endif
  1723     // check that int operands are properly extended to size_t
  1724     assert_clean_int(length, rax);
  1725     assert_clean_int(ckoff, rax);
  1727 #ifdef ASSERT
  1728     BLOCK_COMMENT("assert consistent ckoff/ckval");
  1729     // The ckoff and ckval must be mutually consistent,
  1730     // even though caller generates both.
  1731     { Label L;
  1732       int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  1733                         Klass::super_check_offset_offset_in_bytes());
  1734       __ cmpl(ckoff, Address(ckval, sco_offset));
  1735       __ jcc(Assembler::equal, L);
  1736       __ stop("super_check_offset inconsistent");
  1737       __ bind(L);
  1739 #endif //ASSERT
  1741     // Loop-invariant addresses.  They are exclusive end pointers.
  1742     Address end_from_addr(from, length, TIMES_OOP, 0);
  1743     Address   end_to_addr(to,   length, TIMES_OOP, 0);
  1744     // Loop-variant addresses.  They assume post-incremented count < 0.
  1745     Address from_element_addr(end_from, count, TIMES_OOP, 0);
  1746     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
  1748     gen_write_ref_array_pre_barrier(to, count);
  1750     // Copy from low to high addresses, indexed from the end of each array.
  1751     __ lea(end_from, end_from_addr);
  1752     __ lea(end_to,   end_to_addr);
  1753     __ movptr(r14_length, length);        // save a copy of the length
  1754     assert(length == count, "");          // else fix next line:
  1755     __ negptr(count);                     // negate and test the length
  1756     __ jcc(Assembler::notZero, L_load_element);
  1758     // Empty array:  Nothing to do.
  1759     __ xorptr(rax, rax);                  // return 0 on (trivial) success
  1760     __ jmp(L_done);
  1762     // ======== begin loop ========
  1763     // (Loop is rotated; its entry is L_load_element.)
  1764     // Loop control:
  1765     //   for (count = -count; count != 0; count++)
  1766     // Base pointers src, dst are biased by 8*(count-1),to last element.
  1767     __ align(16);
  1769     __ BIND(L_store_element);
  1770     __ store_heap_oop(rax_oop, to_element_addr);  // store the oop
  1771     __ increment(count);               // increment the count toward zero
  1772     __ jcc(Assembler::zero, L_do_card_marks);
  1774     // ======== loop entry is here ========
  1775     __ BIND(L_load_element);
  1776     __ load_heap_oop(rax_oop, from_element_addr); // load the oop
  1777     __ testptr(rax_oop, rax_oop);
  1778     __ jcc(Assembler::zero, L_store_element);
  1780     __ load_klass(r11_klass, rax_oop);// query the object klass
  1781     generate_type_check(r11_klass, ckoff, ckval, L_store_element);
  1782     // ======== end loop ========
  1784     // It was a real error; we must depend on the caller to finish the job.
  1785     // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
  1786     // Emit GC store barriers for the oops we have copied (r14 + rdx),
  1787     // and report their number to the caller.
  1788     assert_different_registers(rax, r14_length, count, to, end_to, rcx);
  1789     __ lea(end_to, to_element_addr);
  1790     gen_write_ref_array_post_barrier(to, end_to, rscratch1);
  1791     __ movptr(rax, r14_length);           // original oops
  1792     __ addptr(rax, count);                // K = (original - remaining) oops
  1793     __ notptr(rax);                       // report (-1^K) to caller
  1794     __ jmp(L_done);
  1796     // Come here on success only.
  1797     __ BIND(L_do_card_marks);
  1798     __ addptr(end_to, -wordSize);         // make an inclusive end pointer
  1799     gen_write_ref_array_post_barrier(to, end_to, rscratch1);
  1800     __ xorptr(rax, rax);                  // return 0 on success
  1802     // Common exit point (success or failure).
  1803     __ BIND(L_done);
  1804     __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
  1805     __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
  1806     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
  1807     restore_arg_regs();
  1808     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1809     __ ret(0);
  1811     return start;
  1814   //
  1815   //  Generate 'unsafe' array copy stub
  1816   //  Though just as safe as the other stubs, it takes an unscaled
  1817   //  size_t argument instead of an element count.
  1818   //
  1819   //  Input:
  1820   //    c_rarg0   - source array address
  1821   //    c_rarg1   - destination array address
  1822   //    c_rarg2   - byte count, treated as ssize_t, can be zero
  1823   //
  1824   // Examines the alignment of the operands and dispatches
  1825   // to a long, int, short, or byte copy loop.
  1826   //
  1827   address generate_unsafe_copy(const char *name) {
  1829     Label L_long_aligned, L_int_aligned, L_short_aligned;
  1831     // Input registers (before setup_arg_regs)
  1832     const Register from        = c_rarg0;  // source array address
  1833     const Register to          = c_rarg1;  // destination array address
  1834     const Register size        = c_rarg2;  // byte count (size_t)
  1836     // Register used as a temp
  1837     const Register bits        = rax;      // test copy of low bits
  1839     __ align(CodeEntryAlignment);
  1840     StubCodeMark mark(this, "StubRoutines", name);
  1841     address start = __ pc();
  1843     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1845     // bump this on entry, not on exit:
  1846     inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
  1848     __ mov(bits, from);
  1849     __ orptr(bits, to);
  1850     __ orptr(bits, size);
  1852     __ testb(bits, BytesPerLong-1);
  1853     __ jccb(Assembler::zero, L_long_aligned);
  1855     __ testb(bits, BytesPerInt-1);
  1856     __ jccb(Assembler::zero, L_int_aligned);
  1858     __ testb(bits, BytesPerShort-1);
  1859     __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
  1861     __ BIND(L_short_aligned);
  1862     __ shrptr(size, LogBytesPerShort); // size => short_count
  1863     __ jump(RuntimeAddress(short_copy_entry));
  1865     __ BIND(L_int_aligned);
  1866     __ shrptr(size, LogBytesPerInt); // size => int_count
  1867     __ jump(RuntimeAddress(int_copy_entry));
  1869     __ BIND(L_long_aligned);
  1870     __ shrptr(size, LogBytesPerLong); // size => qword_count
  1871     __ jump(RuntimeAddress(long_copy_entry));
  1873     return start;
  1876   // Perform range checks on the proposed arraycopy.
  1877   // Kills temp, but nothing else.
  1878   // Also, clean the sign bits of src_pos and dst_pos.
  1879   void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
  1880                               Register src_pos, // source position (c_rarg1)
  1881                               Register dst,     // destination array oo (c_rarg2)
  1882                               Register dst_pos, // destination position (c_rarg3)
  1883                               Register length,
  1884                               Register temp,
  1885                               Label& L_failed) {
  1886     BLOCK_COMMENT("arraycopy_range_checks:");
  1888     //  if (src_pos + length > arrayOop(src)->length())  FAIL;
  1889     __ movl(temp, length);
  1890     __ addl(temp, src_pos);             // src_pos + length
  1891     __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
  1892     __ jcc(Assembler::above, L_failed);
  1894     //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
  1895     __ movl(temp, length);
  1896     __ addl(temp, dst_pos);             // dst_pos + length
  1897     __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
  1898     __ jcc(Assembler::above, L_failed);
  1900     // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
  1901     // Move with sign extension can be used since they are positive.
  1902     __ movslq(src_pos, src_pos);
  1903     __ movslq(dst_pos, dst_pos);
  1905     BLOCK_COMMENT("arraycopy_range_checks done");
  1908   //
  1909   //  Generate generic array copy stubs
  1910   //
  1911   //  Input:
  1912   //    c_rarg0    -  src oop
  1913   //    c_rarg1    -  src_pos (32-bits)
  1914   //    c_rarg2    -  dst oop
  1915   //    c_rarg3    -  dst_pos (32-bits)
  1916   // not Win64
  1917   //    c_rarg4    -  element count (32-bits)
  1918   // Win64
  1919   //    rsp+40     -  element count (32-bits)
  1920   //
  1921   //  Output:
  1922   //    rax ==  0  -  success
  1923   //    rax == -1^K - failure, where K is partial transfer count
  1924   //
  1925   address generate_generic_copy(const char *name) {
  1927     Label L_failed, L_failed_0, L_objArray;
  1928     Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
  1930     // Input registers
  1931     const Register src        = c_rarg0;  // source array oop
  1932     const Register src_pos    = c_rarg1;  // source position
  1933     const Register dst        = c_rarg2;  // destination array oop
  1934     const Register dst_pos    = c_rarg3;  // destination position
  1935     // elements count is on stack on Win64
  1936 #ifdef _WIN64
  1937 #define C_RARG4 Address(rsp, 6 * wordSize)
  1938 #else
  1939 #define C_RARG4 c_rarg4
  1940 #endif
  1942     { int modulus = CodeEntryAlignment;
  1943       int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
  1944       int advance = target - (__ offset() % modulus);
  1945       if (advance < 0)  advance += modulus;
  1946       if (advance > 0)  __ nop(advance);
  1948     StubCodeMark mark(this, "StubRoutines", name);
  1950     // Short-hop target to L_failed.  Makes for denser prologue code.
  1951     __ BIND(L_failed_0);
  1952     __ jmp(L_failed);
  1953     assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
  1955     __ align(CodeEntryAlignment);
  1956     address start = __ pc();
  1958     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1960     // bump this on entry, not on exit:
  1961     inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
  1963     //-----------------------------------------------------------------------
  1964     // Assembler stub will be used for this call to arraycopy
  1965     // if the following conditions are met:
  1966     //
  1967     // (1) src and dst must not be null.
  1968     // (2) src_pos must not be negative.
  1969     // (3) dst_pos must not be negative.
  1970     // (4) length  must not be negative.
  1971     // (5) src klass and dst klass should be the same and not NULL.
  1972     // (6) src and dst should be arrays.
  1973     // (7) src_pos + length must not exceed length of src.
  1974     // (8) dst_pos + length must not exceed length of dst.
  1975     //
  1977     //  if (src == NULL) return -1;
  1978     __ testptr(src, src);         // src oop
  1979     size_t j1off = __ offset();
  1980     __ jccb(Assembler::zero, L_failed_0);
  1982     //  if (src_pos < 0) return -1;
  1983     __ testl(src_pos, src_pos); // src_pos (32-bits)
  1984     __ jccb(Assembler::negative, L_failed_0);
  1986     //  if (dst == NULL) return -1;
  1987     __ testptr(dst, dst);         // dst oop
  1988     __ jccb(Assembler::zero, L_failed_0);
  1990     //  if (dst_pos < 0) return -1;
  1991     __ testl(dst_pos, dst_pos); // dst_pos (32-bits)
  1992     size_t j4off = __ offset();
  1993     __ jccb(Assembler::negative, L_failed_0);
  1995     // The first four tests are very dense code,
  1996     // but not quite dense enough to put four
  1997     // jumps in a 16-byte instruction fetch buffer.
  1998     // That's good, because some branch predicters
  1999     // do not like jumps so close together.
  2000     // Make sure of this.
  2001     guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
  2003     // registers used as temp
  2004     const Register r11_length    = r11; // elements count to copy
  2005     const Register r10_src_klass = r10; // array klass
  2006     const Register r9_dst_klass  = r9;  // dest array klass
  2008     //  if (length < 0) return -1;
  2009     __ movl(r11_length, C_RARG4);       // length (elements count, 32-bits value)
  2010     __ testl(r11_length, r11_length);
  2011     __ jccb(Assembler::negative, L_failed_0);
  2013     __ load_klass(r10_src_klass, src);
  2014 #ifdef ASSERT
  2015     //  assert(src->klass() != NULL);
  2016     BLOCK_COMMENT("assert klasses not null");
  2017     { Label L1, L2;
  2018       __ testptr(r10_src_klass, r10_src_klass);
  2019       __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
  2020       __ bind(L1);
  2021       __ stop("broken null klass");
  2022       __ bind(L2);
  2023       __ load_klass(r9_dst_klass, dst);
  2024       __ cmpq(r9_dst_klass, 0);
  2025       __ jcc(Assembler::equal, L1);     // this would be broken also
  2026       BLOCK_COMMENT("assert done");
  2028 #endif
  2030     // Load layout helper (32-bits)
  2031     //
  2032     //  |array_tag|     | header_size | element_type |     |log2_element_size|
  2033     // 32        30    24            16              8     2                 0
  2034     //
  2035     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
  2036     //
  2038     int lh_offset = klassOopDesc::header_size() * HeapWordSize +
  2039                     Klass::layout_helper_offset_in_bytes();
  2041     const Register rax_lh = rax;  // layout helper
  2043     __ movl(rax_lh, Address(r10_src_klass, lh_offset));
  2045     // Handle objArrays completely differently...
  2046     jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
  2047     __ cmpl(rax_lh, objArray_lh);
  2048     __ jcc(Assembler::equal, L_objArray);
  2050     //  if (src->klass() != dst->klass()) return -1;
  2051     __ load_klass(r9_dst_klass, dst);
  2052     __ cmpq(r10_src_klass, r9_dst_klass);
  2053     __ jcc(Assembler::notEqual, L_failed);
  2055     //  if (!src->is_Array()) return -1;
  2056     __ cmpl(rax_lh, Klass::_lh_neutral_value);
  2057     __ jcc(Assembler::greaterEqual, L_failed);
  2059     // At this point, it is known to be a typeArray (array_tag 0x3).
  2060 #ifdef ASSERT
  2061     { Label L;
  2062       __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
  2063       __ jcc(Assembler::greaterEqual, L);
  2064       __ stop("must be a primitive array");
  2065       __ bind(L);
  2067 #endif
  2069     arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  2070                            r10, L_failed);
  2072     // typeArrayKlass
  2073     //
  2074     // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
  2075     // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
  2076     //
  2078     const Register r10_offset = r10;    // array offset
  2079     const Register rax_elsize = rax_lh; // element size
  2081     __ movl(r10_offset, rax_lh);
  2082     __ shrl(r10_offset, Klass::_lh_header_size_shift);
  2083     __ andptr(r10_offset, Klass::_lh_header_size_mask);   // array_offset
  2084     __ addptr(src, r10_offset);           // src array offset
  2085     __ addptr(dst, r10_offset);           // dst array offset
  2086     BLOCK_COMMENT("choose copy loop based on element size");
  2087     __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
  2089     // next registers should be set before the jump to corresponding stub
  2090     const Register from     = c_rarg0;  // source array address
  2091     const Register to       = c_rarg1;  // destination array address
  2092     const Register count    = c_rarg2;  // elements count
  2094     // 'from', 'to', 'count' registers should be set in such order
  2095     // since they are the same as 'src', 'src_pos', 'dst'.
  2097   __ BIND(L_copy_bytes);
  2098     __ cmpl(rax_elsize, 0);
  2099     __ jccb(Assembler::notEqual, L_copy_shorts);
  2100     __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
  2101     __ lea(to,   Address(dst, dst_pos, Address::times_1, 0));// dst_addr
  2102     __ movl2ptr(count, r11_length); // length
  2103     __ jump(RuntimeAddress(byte_copy_entry));
  2105   __ BIND(L_copy_shorts);
  2106     __ cmpl(rax_elsize, LogBytesPerShort);
  2107     __ jccb(Assembler::notEqual, L_copy_ints);
  2108     __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
  2109     __ lea(to,   Address(dst, dst_pos, Address::times_2, 0));// dst_addr
  2110     __ movl2ptr(count, r11_length); // length
  2111     __ jump(RuntimeAddress(short_copy_entry));
  2113   __ BIND(L_copy_ints);
  2114     __ cmpl(rax_elsize, LogBytesPerInt);
  2115     __ jccb(Assembler::notEqual, L_copy_longs);
  2116     __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
  2117     __ lea(to,   Address(dst, dst_pos, Address::times_4, 0));// dst_addr
  2118     __ movl2ptr(count, r11_length); // length
  2119     __ jump(RuntimeAddress(int_copy_entry));
  2121   __ BIND(L_copy_longs);
  2122 #ifdef ASSERT
  2123     { Label L;
  2124       __ cmpl(rax_elsize, LogBytesPerLong);
  2125       __ jcc(Assembler::equal, L);
  2126       __ stop("must be long copy, but elsize is wrong");
  2127       __ bind(L);
  2129 #endif
  2130     __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
  2131     __ lea(to,   Address(dst, dst_pos, Address::times_8, 0));// dst_addr
  2132     __ movl2ptr(count, r11_length); // length
  2133     __ jump(RuntimeAddress(long_copy_entry));
  2135     // objArrayKlass
  2136   __ BIND(L_objArray);
  2137     // live at this point:  r10_src_klass, src[_pos], dst[_pos]
  2139     Label L_plain_copy, L_checkcast_copy;
  2140     //  test array classes for subtyping
  2141     __ load_klass(r9_dst_klass, dst);
  2142     __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
  2143     __ jcc(Assembler::notEqual, L_checkcast_copy);
  2145     // Identically typed arrays can be copied without element-wise checks.
  2146     arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  2147                            r10, L_failed);
  2149     __ lea(from, Address(src, src_pos, TIMES_OOP,
  2150                  arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
  2151     __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
  2152                  arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
  2153     __ movl2ptr(count, r11_length); // length
  2154   __ BIND(L_plain_copy);
  2155     __ jump(RuntimeAddress(oop_copy_entry));
  2157   __ BIND(L_checkcast_copy);
  2158     // live at this point:  r10_src_klass, !r11_length
  2160       // assert(r11_length == C_RARG4); // will reload from here
  2161       Register r11_dst_klass = r11;
  2162       __ load_klass(r11_dst_klass, dst);
  2164       // Before looking at dst.length, make sure dst is also an objArray.
  2165       __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
  2166       __ jcc(Assembler::notEqual, L_failed);
  2168       // It is safe to examine both src.length and dst.length.
  2169 #ifndef _WIN64
  2170       arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4,
  2171                              rax, L_failed);
  2172 #else
  2173       __ movl(r11_length, C_RARG4);     // reload
  2174       arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  2175                              rax, L_failed);
  2176       __ load_klass(r11_dst_klass, dst); // reload
  2177 #endif
  2179       // Marshal the base address arguments now, freeing registers.
  2180       __ lea(from, Address(src, src_pos, TIMES_OOP,
  2181                    arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  2182       __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
  2183                    arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  2184       __ movl(count, C_RARG4);          // length (reloaded)
  2185       Register sco_temp = c_rarg3;      // this register is free now
  2186       assert_different_registers(from, to, count, sco_temp,
  2187                                  r11_dst_klass, r10_src_klass);
  2188       assert_clean_int(count, sco_temp);
  2190       // Generate the type check.
  2191       int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  2192                         Klass::super_check_offset_offset_in_bytes());
  2193       __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
  2194       assert_clean_int(sco_temp, rax);
  2195       generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
  2197       // Fetch destination element klass from the objArrayKlass header.
  2198       int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
  2199                        objArrayKlass::element_klass_offset_in_bytes());
  2200       __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
  2201       __ movl(sco_temp,      Address(r11_dst_klass, sco_offset));
  2202       assert_clean_int(sco_temp, rax);
  2204       // the checkcast_copy loop needs two extra arguments:
  2205       assert(c_rarg3 == sco_temp, "#3 already in place");
  2206       __ movptr(C_RARG4, r11_dst_klass);  // dst.klass.element_klass
  2207       __ jump(RuntimeAddress(checkcast_copy_entry));
  2210   __ BIND(L_failed);
  2211     __ xorptr(rax, rax);
  2212     __ notptr(rax); // return -1
  2213     __ leave();   // required for proper stackwalking of RuntimeStub frame
  2214     __ ret(0);
  2216     return start;
  2219 #undef length_arg
  2220 #endif
  2222 //FIXME
  2223   address generate_disjoint_long_copy(bool aligned, const char *name) {
  2224 	  Label l_1, l_2;
  2225 	  StubCodeMark mark(this, "StubRoutines", name);
  2226 	  __ align(CodeEntryAlignment);
  2227 	  address start = __ pc();
  2229 	  //      __ movl(ecx, Address(esp, 4+8));       // count
  2230 	  //     __ movl(eax, Address(esp, 4+0));       // from
  2231 	  //    __ movl(edx, Address(esp, 4+4));       // to
  2232 	  __ move(T1, A2);  
  2233 	  __ move(T3, A0); 
  2234 	  __ move(T0, A1);
  2235 	  __ push(T3); 
  2236 	  __ push(T0);
  2237 	  __ push(T1);
  2238 	  //__ subl(edx, eax);
  2239 	  //__ jmp(l_2);
  2240 	  __ b(l_2);  
  2241 	  __ delayed()->nop();   
  2242 	  __ align(16);
  2243 	  __ bind(l_1);
  2244 	  //   if (VM_Version::supports_mmx()) {
  2245 	  //     __ movq(mmx0, Address(eax));
  2246 	  //     __ movq(Address(eax, edx, Address::times_1), mmx0);
  2247 	  //   } else {
  2248 	  //   __ fild_d(Address(eax));
  2249 	  __ ld(AT, T3, 0);   
  2250 	  // __ fistp_d(Address(eax, edx, Address::times_1));
  2251 	  __ sd (AT, T0, 0); 
  2252 	  //   }
  2253 	  //   __ addl(eax, 8);
  2254 	  __ addi(T3, T3, 8); 
  2255 	  __ addi(T0, T0, 8); 
  2256 	  __ bind(l_2);
  2257 	  //    __ decl(ecx);
  2258 	  __ addi(T1, T1, -1); 
  2259 	  //    __ jcc(Assembler::greaterEqual, l_1);
  2260 	  __ bgez(T1, l_1);    
  2261 	  __ delayed()->nop(); 
  2262 	  //  if (VM_Version::supports_mmx()) {
  2263 	  //    __ emms();
  2264 	  //  }
  2265 	  //  __ ret(0);
  2266 	  __ pop(T1); 
  2267 	  __ pop(T0); 
  2268 	  __ pop(T3); 
  2269 	  __ jr(RA); 
  2270 	  __ delayed()->nop(); 
  2271 	  return start;
  2275   address generate_conjoint_long_copy(bool aligned, const char *name) {
  2276 	  Label l_1, l_2;
  2277 	  StubCodeMark mark(this, "StubRoutines", name);
  2278 	  __ align(CodeEntryAlignment);
  2279 	  address start = __ pc();
  2280 	  address nooverlap_target = aligned ?
  2281 		  StubRoutines::arrayof_jlong_disjoint_arraycopy() :
  2282 		  StubRoutines::jlong_disjoint_arraycopy();
  2283 	  array_overlap_test(nooverlap_target, 3);
  2285 	  __ push(T3); 
  2286 	  __ push(T0); 
  2287 	  __ push(T1); 
  2289 		/*      __ movl(ecx, Address(esp, 4+8));       // count
  2290 						__ movl(eax, Address(esp, 4+0));       // from
  2291 						__ movl(edx, Address(esp, 4+4));       // to
  2292 						__ jmp(l_2);
  2294 		 */
  2295 	  __ move(T1, A2);  
  2296 	  __ move(T3, A0); 
  2297 	  __ move(T0, A1);
  2298 	  __ sll(AT, T1, Address::times_8); 
  2299 	  __ add(AT, T3, AT); 
  2300 	  __ lea(T3 , Address(AT, -8)); 
  2301 	  __ sll(AT, T1, Address::times_8); 
  2302 	  __ add(AT, T0, AT); 
  2303 	  __ lea(T0 , Address(AT, -8)); 
  2307 	  __ b(l_2); 
  2308 	  __ delayed()->nop(); 
  2309 	  __ align(16);
  2310 		__ bind(l_1);
  2311 		/*      if (VM_Version::supports_mmx()) {
  2312 						__ movq(mmx0, Address(eax, ecx, Address::times_8));
  2313 						__ movq(Address(edx, ecx,Address::times_8), mmx0);
  2314 						} else {
  2315 						__ fild_d(Address(eax, ecx, Address::times_8));
  2316 						__ fistp_d(Address(edx, ecx,Address::times_8));
  2318 		 */    
  2319 		__ ld(AT, T3, 0);   
  2320 		__ sd (AT, T0, 0); 
  2321 	  __ addi(T3, T3, -8); 
  2322 	  __ addi(T0, T0,-8); 
  2323 	  __ bind(l_2);
  2324 	  //	    __ decl(ecx);
  2325 	  __ addi(T1, T1, -1); 
  2326 	  //__ jcc(Assembler::greaterEqual, l_1);
  2327 	  __ bgez(T1, l_1); 
  2328 	  __ delayed()->nop(); 
  2329 	  //      if (VM_Version::supports_mmx()) {
  2330 	  //      __ emms();
  2331 	  //   }
  2332 	  //  __ ret(0);
  2333 	  __ pop(T1); 
  2334 	  __ pop(T0); 
  2335 	  __ pop(T3); 
  2336 	  __ jr(RA); 
  2337 	  __ delayed()->nop();  
  2338 	  return start;
  2341   void generate_arraycopy_stubs() {
  2342     if (UseCompressedOops) {
  2343       StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
  2344       StubRoutines::_oop_arraycopy   	= generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
  2345     } else {
  2346       StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
  2347       StubRoutines::_oop_arraycopy   	= generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
  2350     StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
  2351     StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
  2352     StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
  2353     StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
  2354     StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
  2356     //  if (VM_Version::supports_mmx())
  2357     //if (false)
  2358     // StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy");
  2359     // else
  2360     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
  2361     StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy");
  2362     //StubRoutines::_arrayof_oop_disjoint_arraycopy   = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy");
  2363     StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
  2365     StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
  2366     StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
  2367     StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
  2368     StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
  2370     StubRoutines::_arrayof_jbyte_arraycopy  = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
  2371     StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
  2372     StubRoutines::_arrayof_jint_arraycopy   = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy");
  2373     //StubRoutines::_arrayof_oop_arraycopy    = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy");
  2374     StubRoutines::_arrayof_jlong_arraycopy  = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy");
  2376     StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
  2377     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
  2380 //Wang: add a function to implement SafeFetch32 and SafeFetchN
  2381   void generate_safefetch(const char* name, int size, address* entry,
  2382                           address* fault_pc, address* continuation_pc) {
  2383     // safefetch signatures:
  2384     //   int      SafeFetch32(int*      adr, int      errValue);
  2385     //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
  2386     //
  2387     // arguments:
  2388     //   A0 = adr
  2389     //   A1 = errValue
  2390     //
  2391     // result:
  2392     //   PPC_RET  = *adr or errValue
  2394     StubCodeMark mark(this, "StubRoutines", name);
  2396     // Entry point, pc or function descriptor.
  2397     *entry = __ pc();
  2399     // Load *adr into A1, may fault.
  2400     *fault_pc = __ pc();
  2401     switch (size) {
  2402       case 4:
  2403         // int32_t
  2404         __ lw(A1, A0, 0); 
  2405         break;
  2406       case 8:
  2407         // int64_t
  2408         __ ld(A1, A0, 0); 
  2409         break;
  2410       default:
  2411         ShouldNotReachHere();
  2414     // return errValue or *adr
  2415     *continuation_pc = __ pc();
  2416     __ addu(V0,A1,R0);
  2417     __ jr(RA);
  2418     __ delayed()->nop();
  2422 #undef __
  2423 #define __ masm->
  2425   // Continuation point for throwing of implicit exceptions that are
  2426   // not handled in the current activation. Fabricates an exception
  2427   // oop and initiates normal exception dispatching in this
  2428   // frame. Since we need to preserve callee-saved values (currently
  2429   // only for C2, but done for C1 as well) we need a callee-saved oop
  2430   // map and therefore have to make these stubs into RuntimeStubs
  2431   // rather than BufferBlobs.  If the compiler needs all registers to
  2432   // be preserved between the fault point and the exception handler
  2433   // then it must assume responsibility for that in
  2434   // AbstractCompiler::continuation_for_implicit_null_exception or
  2435   // continuation_for_implicit_division_by_zero_exception. All other
  2436   // implicit exceptions (e.g., NullPointerException or
  2437   // AbstractMethodError on entry) are either at call sites or
  2438   // otherwise assume that stack unwinding will be initiated, so
  2439   // caller saved registers were assumed volatile in the compiler.
  2440   address generate_throw_exception(const char* name,
  2441                                    address runtime_entry,
  2442                                    bool restore_saved_exception_pc) {
  2443     // Information about frame layout at time of blocking runtime call.
  2444     // Note that we only have to preserve callee-saved registers since
  2445     // the compilers are responsible for supplying a continuation point
  2446 		// if they expect all registers to be preserved.
  2447 //#define aoqi_test
  2448 #ifdef aoqi_test
  2449 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  2450 #endif
  2451 		enum layout {
  2452 			thread_off,    // last_java_sp                
  2453 			S7_off,        // callee saved register      sp + 1
  2454 			S6_off,        // callee saved register      sp + 2
  2455 			S5_off,        // callee saved register      sp + 3
  2456 			S4_off,        // callee saved register      sp + 4
  2457 			S3_off,        // callee saved register      sp + 5
  2458 			S2_off,        // callee saved register      sp + 6
  2459 			S1_off,        // callee saved register      sp + 7
  2460 			S0_off,        // callee saved register      sp + 8
  2461 			FP_off,
  2462 			ret_address,
  2463 			framesize
  2464 		};
  2466 		int insts_size = 2048;
  2467 		int locs_size  = 32;
  2469 		//  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, 
  2470 		//  NULL, NULL, NULL, false, NULL, name, false);
  2471 		CodeBuffer code (name , insts_size, locs_size);
  2472 #ifdef aoqi_test
  2473 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  2474 #endif
  2475 		OopMapSet* oop_maps  = new OopMapSet();
  2476 #ifdef aoqi_test
  2477 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  2478 #endif
  2479 		MacroAssembler* masm = new MacroAssembler(&code);
  2480 #ifdef aoqi_test
  2481 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  2482 #endif
  2484 		address start = __ pc();
  2485     	//__ stop("generate_throw_exception");
  2486 		/*
  2487 			 __ move(AT, (int)&jerome1 );
  2488 			 __ sw(SP, AT, 0); 	
  2489 			 __ move(AT, (int)&jerome2 );
  2490 			 __ sw(FP, AT, 0); 	
  2491 			 __ move(AT, (int)&jerome3 );
  2492 			 __ sw(RA, AT, 0); 	
  2493 			 __ move(AT, (int)&jerome4 );
  2494 			 __ sw(R0, AT, 0); 	
  2495 			 __ move(AT, (int)&jerome5 );
  2496 			 __ sw(R0, AT, 0); 	
  2497 			 __ move(AT, (int)&jerome6 );
  2498 			 __ sw(R0, AT, 0); 	
  2499 			 __ move(AT, (int)&jerome7 );
  2500 			 __ sw(R0, AT, 0); 	
  2501 			 __ move(AT, (int)&jerome10 );
  2502 			 __ sw(R0, AT, 0); 	
  2504 			 __ pushad();
  2506 		//__ enter();
  2507 		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), 
  2508 		relocInfo::runtime_call_type);
  2509 		__ delayed()->nop();
  2511 		//__ leave();
  2512 		__ popad();
  2514 		 */
  2516 		// This is an inlined and slightly modified version of call_VM
  2517 		// which has the ability to fetch the return PC out of
  2518 		// thread-local storage and also sets up last_Java_sp slightly
  2519 		// differently than the real call_VM
  2520 #ifndef OPT_THREAD	
  2521 		Register java_thread = TREG;
  2522 		__ get_thread(java_thread);
  2523 #else
  2524 		Register java_thread = TREG;
  2525 #endif
  2526 #ifdef aoqi_test
  2527 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  2528 #endif
  2529 		if (restore_saved_exception_pc) {
  2530 			__ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
  2533 		__ enter(); // required for proper stackwalking of RuntimeStub frame
  2535 		__ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
  2536 		__ sd(S0, SP, S0_off * wordSize);
  2537 		__ sd(S1, SP, S1_off * wordSize);
  2538 		__ sd(S2, SP, S2_off * wordSize);
  2539 		__ sd(S3, SP, S3_off * wordSize);
  2540 		__ sd(S4, SP, S4_off * wordSize);
  2541 		__ sd(S5, SP, S5_off * wordSize);
  2542 		__ sd(S6, SP, S6_off * wordSize);
  2543 		__ sd(S7, SP, S7_off * wordSize);
  2545 		int frame_complete = __ pc() - start;
  2546 		// push java thread (becomes first argument of C function)
  2547 		__ sd(java_thread, SP, thread_off * wordSize);
  2548 		if (java_thread!=A0)
  2549 			__ move(A0, java_thread);
  2551 		// Set up last_Java_sp and last_Java_fp
  2552 		__ set_last_Java_frame(java_thread, SP, FP, NULL);
  2553 		__ relocate(relocInfo::internal_pc_type);
  2555 			intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  2556 			__ li48(AT, save_pc);
  2558 		__ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); 
  2560 		// Call runtime
  2561 		__ call(runtime_entry);
  2562 		__ delayed()->nop();
  2563 		// Generate oop map
  2564 		OopMap* map =  new OopMap(framesize, 0);        
  2565 		oop_maps->add_gc_map(__ offset(),  map);
  2567 		// restore the thread (cannot use the pushed argument since arguments
  2568 		// may be overwritten by C code generated by an optimizing compiler);
  2569 		// however can use the register value directly if it is callee saved.
  2570 #ifndef OPT_THREAD
  2571 		__ get_thread(java_thread);
  2572 #endif
  2574 		__ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  2575 		//  __ reset_last_Java_frame(java_thread, true);
  2576 		__ reset_last_Java_frame(java_thread, true, true);
  2578 		// Restore callee save registers.  This must be done after resetting the Java frame
  2579 		__ ld(S0, SP, S0_off * wordSize);
  2580 		__ ld(S1, SP, S1_off * wordSize);
  2581 		__ ld(S2, SP, S2_off * wordSize);
  2582 		__ ld(S3, SP, S3_off * wordSize);
  2583 		__ ld(S4, SP, S4_off * wordSize);
  2584 		__ ld(S5, SP, S5_off * wordSize);
  2585 		__ ld(S6, SP, S6_off * wordSize);
  2586 		__ ld(S7, SP, S7_off * wordSize);
  2588 		// discard arguments
  2589 		__ addi(SP, SP, (framesize-2) * wordSize); // epilog
  2590 		//	__ leave(); // required for proper stackwalking of RuntimeStub frame
  2591 		__ addi(SP, FP, wordSize);
  2592 		__ ld(FP, SP, -1*wordSize);
  2593 		// check for pending exceptions
  2594 #ifdef ASSERT
  2595 		Label L;
  2596 		__ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  2597 		__ bne(AT, R0, L);
  2598 		__ delayed()->nop();
  2599 		__ should_not_reach_here();
  2600 		__ bind(L);
  2601 #endif //ASSERT
  2602 		__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  2603 		__ delayed()->nop();
  2604 #ifdef aoqi_test
  2605 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  2606 #endif
  2607 		RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete, 
  2608 										framesize, oop_maps, false);
  2609 #ifdef aoqi_test
  2610 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
  2611 #endif
  2612 		return stub->entry_point();
  2615   // Initialization
  2616   void generate_initial() {
  2617 /*
  2618 		// Generates all stubs and initializes the entry points
  2620     // This platform-specific stub is needed by generate_call_stub()
  2621     StubRoutines::mips::_mxcsr_std        = generate_fp_mask("mxcsr_std",        0x0000000000001F80);
  2623     // entry points that exist in all platforms Note: This is code
  2624     // that could be shared among different platforms - however the
  2625     // benefit seems to be smaller than the disadvantage of having a
  2626     // much more complicated generator structure. See also comment in
  2627     // stubRoutines.hpp.
  2629     StubRoutines::_forward_exception_entry = generate_forward_exception();
  2631     StubRoutines::_call_stub_entry =
  2632       generate_call_stub(StubRoutines::_call_stub_return_address);
  2634     // is referenced by megamorphic call
  2635     StubRoutines::_catch_exception_entry = generate_catch_exception();
  2637     // atomic calls
  2638     StubRoutines::_atomic_xchg_entry         = generate_atomic_xchg();
  2639     StubRoutines::_atomic_xchg_ptr_entry     = generate_atomic_xchg_ptr();
  2640     StubRoutines::_atomic_cmpxchg_entry      = generate_atomic_cmpxchg();
  2641     StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
  2642     StubRoutines::_atomic_add_entry          = generate_atomic_add();
  2643     StubRoutines::_atomic_add_ptr_entry      = generate_atomic_add_ptr();
  2644     StubRoutines::_fence_entry               = generate_orderaccess_fence();
  2646     StubRoutines::_handler_for_unsafe_access_entry =
  2647       generate_handler_for_unsafe_access();
  2649     // platform dependent
  2650     StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp();
  2652     StubRoutines::mips::_verify_mxcsr_entry    = generate_verify_mxcsr();
  2653 */
  2654 		// Generates all stubs and initializes the entry points
  2656 		//-------------------------------------------------------------
  2657 		//-----------------------------------------------------------
  2658 		// entry points that exist in all platforms
  2659 		// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller 
  2660 		// than the disadvantage of having a much more complicated generator structure. 
  2661 		// See also comment in stubRoutines.hpp.
  2662 		StubRoutines::_forward_exception_entry = generate_forward_exception();    
  2663 		StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
  2664 		// is referenced by megamorphic call    
  2665 		StubRoutines::_catch_exception_entry = generate_catch_exception();    
  2667 		StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
  2669 		// platform dependent
  2670 		StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
  2673 void generate_all() {
  2674 #ifdef aoqi_test
  2675 tty->print_cr("%s:%d", __func__, __LINE__);
  2676 #endif
  2677     // Generates all stubs and initializes the entry points
  2679     // These entry points require SharedInfo::stack0 to be set up in
  2680     // non-core builds and need to be relocatable, so they each
  2681     // fabricate a RuntimeStub internally.
  2682 	/*
  2683     StubRoutines::_throw_AbstractMethodError_entry =
  2684       generate_throw_exception("AbstractMethodError throw_exception",
  2685                                CAST_FROM_FN_PTR(address,
  2686                                                 SharedRuntime::
  2687                                                 throw_AbstractMethodError),
  2688                                false);
  2690     StubRoutines::_throw_IncompatibleClassChangeError_entry =
  2691       generate_throw_exception("IncompatibleClassChangeError throw_exception",
  2692                                CAST_FROM_FN_PTR(address,
  2693                                                 SharedRuntime::
  2694                                                 throw_IncompatibleClassChangeError),
  2695                                false);
  2697     StubRoutines::_throw_ArithmeticException_entry =
  2698       generate_throw_exception("ArithmeticException throw_exception",
  2699                                CAST_FROM_FN_PTR(address,
  2700                                                 SharedRuntime::
  2701                                                 throw_ArithmeticException),
  2702                                true);
  2704     StubRoutines::_throw_NullPointerException_entry =
  2705       generate_throw_exception("NullPointerException throw_exception",
  2706                                CAST_FROM_FN_PTR(address,
  2707                                                 SharedRuntime::
  2708                                                 throw_NullPointerException),
  2709                                true);
  2711     StubRoutines::_throw_NullPointerException_at_call_entry =
  2712       generate_throw_exception("NullPointerException at call throw_exception",
  2713                                CAST_FROM_FN_PTR(address,
  2714                                                 SharedRuntime::
  2715                                                 throw_NullPointerException_at_call),
  2716                                false);
  2718     StubRoutines::_throw_StackOverflowError_entry =
  2719       generate_throw_exception("StackOverflowError throw_exception",
  2720                                CAST_FROM_FN_PTR(address,
  2721                                                 SharedRuntime::
  2722                                                 throw_StackOverflowError),
  2723                                false);
  2725     // entry points that are platform specific
  2726     StubRoutines::mips::_f2i_fixup = generate_f2i_fixup();
  2727     StubRoutines::mips::_f2l_fixup = generate_f2l_fixup();
  2728     StubRoutines::mips::_d2i_fixup = generate_d2i_fixup();
  2729     StubRoutines::mips::_d2l_fixup = generate_d2l_fixup();
  2731     StubRoutines::mips::_float_sign_mask  = generate_fp_mask("float_sign_mask",  0x7FFFFFFF7FFFFFFF);
  2732     StubRoutines::mips::_float_sign_flip  = generate_fp_mask("float_sign_flip",  0x8000000080000000);
  2733     StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
  2734     StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
  2736     // support for verify_oop (must happen after universe_init)
  2737     StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
  2739     // arraycopy stubs used by compilers
  2740     generate_arraycopy_stubs();
  2741 	*/
  2742 #ifdef aoqi_test
  2743 tty->print_cr("%s:%d", __func__, __LINE__);
  2744 #endif
  2745 		StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
  2746 #ifdef aoqi_test
  2747 tty->print_cr("%s:%d", __func__, __LINE__);
  2748 #endif
  2749 //		StubRoutines::_throw_ArithmeticException_entry         = generate_throw_exception("ArithmeticException throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException),  true);
  2750 #ifdef aoqi_test
  2751 tty->print_cr("%s:%d", __func__, __LINE__);
  2752 #endif
  2753 //		StubRoutines::_throw_NullPointerException_entry        = generate_throw_exception("NullPointerException throw_exception",         CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
  2754 #ifdef aoqi_test
  2755 tty->print_cr("%s:%d", __func__, __LINE__);
  2756 #endif
  2757 		StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
  2758 #ifdef aoqi_test
  2759 tty->print_cr("%s:%d", __func__, __LINE__);
  2760 #endif
  2761 		StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
  2762 #ifdef aoqi_test
  2763 tty->print_cr("%s:%d", __func__, __LINE__);
  2764 #endif
  2766 		//------------------------------------------------------
  2767 		//------------------------------------------------------------------
  2768 		// entry points that are platform specific  
  2770 		// support for verify_oop (must happen after universe_init)
  2771 #ifdef aoqi_test
  2772 tty->print_cr("%s:%d", __func__, __LINE__);
  2773 #endif
  2774 		StubRoutines::_verify_oop_subroutine_entry	   = generate_verify_oop();
  2775 #ifdef aoqi_test
  2776 tty->print_cr("%s:%d", __func__, __LINE__);
  2777 #endif
  2778 #ifndef CORE
  2779 		// arraycopy stubs used by compilers
  2780 		generate_arraycopy_stubs();
  2781 #ifdef aoqi_test
  2782 tty->print_cr("%s:%d", __func__, __LINE__);
  2783 #endif
  2784 #endif
  2786     // Safefetch stubs.
  2787     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
  2788                                                        &StubRoutines::_safefetch32_fault_pc,
  2789                                                        &StubRoutines::_safefetch32_continuation_pc);
  2790     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
  2791                                                        &StubRoutines::_safefetchN_fault_pc,
  2792                                                        &StubRoutines::_safefetchN_continuation_pc);
  2795  public:
  2796   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
  2797     if (all) {
  2798       generate_all();
  2799     } else {
  2800       generate_initial();
  2803 }; // end class declaration
  2804 /*
  2805 address StubGenerator::disjoint_byte_copy_entry  = NULL;
  2806 address StubGenerator::disjoint_short_copy_entry = NULL;
  2807 address StubGenerator::disjoint_int_copy_entry   = NULL;
  2808 address StubGenerator::disjoint_long_copy_entry  = NULL;
  2809 address StubGenerator::disjoint_oop_copy_entry   = NULL;
  2811 address StubGenerator::byte_copy_entry  = NULL;
  2812 address StubGenerator::short_copy_entry = NULL;
  2813 address StubGenerator::int_copy_entry   = NULL;
  2814 address StubGenerator::long_copy_entry  = NULL;
  2815 address StubGenerator::oop_copy_entry   = NULL;
  2817 address StubGenerator::checkcast_copy_entry = NULL;
  2818 */
  2819 void StubGenerator_generate(CodeBuffer* code, bool all) {
  2820   StubGenerator g(code, all);

mercurial