Fri, 29 Apr 2016 00:06:10 +0800
Added MIPS 64-bit port.
1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_mips.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "runtime/thread.inline.hpp"
42 #include "utilities/top.hpp"
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #endif
48 // Declaration and definition of StubGenerator (no .hpp file).
49 // For a more detailed description of the stub routine structure
50 // see the comment in stubRoutines.hpp
52 #define __ _masm->
53 //#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
54 //#define a__ ((Assembler*)_masm)->
56 //#ifdef PRODUCT
57 //#define BLOCK_COMMENT(str) /* nothing */
58 //#else
59 //#define BLOCK_COMMENT(str) __ block_comment(str)
60 //#endif
62 //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
63 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
65 // Stub Code definitions
67 static address handle_unsafe_access() {
68 JavaThread* thread = JavaThread::current();
69 address pc = thread->saved_exception_pc();
70 // pc is the instruction which we must emulate
71 // doing a no-op is fine: return garbage from the load
72 // therefore, compute npc
73 //address npc = Assembler::locate_next_instruction(pc);
74 address npc = (address)((unsigned long)pc + sizeof(unsigned long));
76 // request an async exception
77 thread->set_pending_unsafe_access_error();
79 // return address of next instruction to execute
80 return npc;
81 }
83 class StubGenerator: public StubCodeGenerator {
84 private:
86 // ABI mips n64
87 // This fig is not MIPS ABI. It is call Java from C ABI.
88 // Call stubs are used to call Java from C
89 //
90 // [ return_from_Java ]
91 // [ argument word n-1 ] <--- sp
92 // ...
93 // [ argument word 0 ]
94 // ...
95 //-10 [ S6 ]
96 // -9 [ S5 ]
97 // -8 [ S4 ]
98 // -7 [ S3 ]
99 // -6 [ S0 ]
100 // -5 [ TSR(S2) ]
101 // -4 [ LVP(S7) ]
102 // -3 [ BCP(S1) ]
103 // -2 [ saved fp ] <--- fp_after_call
104 // -1 [ return address ]
105 // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
106 // 1 [ result ] <--- a1
107 // 2 [ result_type ] <--- a2
108 // 3 [ method ] <--- a3
109 // 4 [ entry_point ] <--- a4
110 // 5 [ parameters ] <--- a5
111 // 6 [ parameter_size ] <--- a6
112 // 7 [ thread ] <--- a7
114 //
115 // _LP64: n64 does not save paras in sp.
116 //
117 // [ return_from_Java ]
118 // [ argument word n-1 ] <--- sp
119 // ...
120 // [ argument word 0 ]
121 // ...
122 //-14 [ thread ]
123 //-13 [ result_type ] <--- a2
124 //-12 [ result ] <--- a1
125 //-11 [ ptr. to call wrapper ] <--- a0
126 //-10 [ S6 ]
127 // -9 [ S5 ]
128 // -8 [ S4 ]
129 // -7 [ S3 ]
130 // -6 [ S0 ]
131 // -5 [ TSR(S2) ]
132 // -4 [ LVP(S7) ]
133 // -3 [ BCP(S1) ]
134 // -2 [ saved fp ] <--- fp_after_call
135 // -1 [ return address ]
136 // 0 [ ] <--- old sp
137 /*
138 * 2014/01/16 Fu: Find a right place in the call_stub for GP.
139 * GP will point to the starting point of Interpreter::dispatch_table(itos).
140 * It should be saved/restored before/after Java calls.
141 *
142 */
143 enum call_stub_layout {
144 RA_off = -1,
145 FP_off = -2,
146 BCP_off = -3,
147 LVP_off = -4,
148 TSR_off = -5,
149 S1_off = -6,
150 S3_off = -7,
151 S4_off = -8,
152 S5_off = -9,
153 S6_off = -10,
154 result_off = -11,
155 result_type_off = -12,
156 thread_off = -13,
157 total_off = thread_off - 3,
158 GP_off = -16,
159 };
161 address generate_call_stub(address& return_address) {
163 StubCodeMark mark(this, "StubRoutines", "call_stub");
164 address start = __ pc();
166 // same as in generate_catch_exception()!
168 // stub code
169 // save ra and fp
170 __ sd(RA, SP, RA_off * wordSize);
171 __ sd(FP, SP, FP_off * wordSize);
172 __ sd(BCP, SP, BCP_off * wordSize);
173 __ sd(LVP, SP, LVP_off * wordSize);
174 __ sd(GP, SP, GP_off * wordSize);
175 __ sd(TSR, SP, TSR_off * wordSize);
176 __ sd(S1, SP, S1_off * wordSize);
177 __ sd(S3, SP, S3_off * wordSize);
178 __ sd(S4, SP, S4_off * wordSize);
179 __ sd(S5, SP, S5_off * wordSize);
180 __ sd(S6, SP, S6_off * wordSize);
183 __ li48(GP, (long)Interpreter::dispatch_table(itos));
185 // I think 14 is the max gap between argument and callee saved register
186 __ daddi(FP, SP, (-2) * wordSize);
187 __ daddi(SP, SP, total_off * wordSize);
188 //FIXME, aoqi. find a suitable place to save A1 & A2.
189 /*
190 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
191 __ sd(A1, FP, 3 * wordSize);
192 __ sd(A2, FP, 4 * wordSize);
193 __ sd(A3, FP, 5 * wordSize);
194 __ sd(A4, FP, 6 * wordSize);
195 __ sd(A5, FP, 7 * wordSize);
196 __ sd(A6, FP, 8 * wordSize);
197 __ sd(A7, FP, 9 * wordSize);
198 */
199 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
200 __ sd(A1, FP, result_off * wordSize);
201 __ sd(A2, FP, result_type_off * wordSize);
202 __ sd(A7, FP, thread_off * wordSize);
204 #ifdef OPT_THREAD
205 //__ get_thread(TREG);
206 __ move(TREG, A7);
208 //__ ld(TREG, FP, thread_off * wordSize);
209 #endif
210 //add for compressedoops
211 __ reinit_heapbase();
213 #ifdef ASSERT
214 // make sure we have no pending exceptions
215 {
216 Label L;
217 __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
218 __ beq(AT, R0, L);
219 __ delayed()->nop();
220 /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
221 __ stop("StubRoutines::call_stub: entered with pending exception");
222 __ bind(L);
223 }
224 #endif
226 // pass parameters if any
227 // A5: parameter
228 // A6: parameter_size
229 // T0: parameter_size_tmp(--)
230 // T2: offset(++)
231 // T3: tmp
232 Label parameters_done;
233 // judge if the parameter_size equals 0
234 __ beq(A6, R0, parameters_done);
235 __ delayed()->nop();
236 __ dsll(AT, A6, Interpreter::logStackElementSize);
237 __ dsub(SP, SP, AT);
238 __ move(AT, -StackAlignmentInBytes);
239 __ andr(SP, SP , AT);
240 // Copy Java parameters in reverse order (receiver last)
241 // Note that the argument order is inverted in the process
242 // source is edx[ecx: N-1..0]
243 // dest is esp[ebx: 0..N-1]
244 Label loop;
245 __ move(T0, A6);
246 __ move(T2, R0);
247 __ bind(loop);
249 // get parameter
250 __ dsll(T3, T0, LogBytesPerWord);
251 __ dadd(T3, T3, A5);
252 __ ld(AT, T3, -wordSize);
253 __ dsll(T3, T2, LogBytesPerWord);
254 __ dadd(T3, T3, SP);
255 __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
256 __ daddi(T2, T2, 1);
257 __ daddi(T0, T0, -1);
258 __ bne(T0, R0, loop);
259 __ delayed()->nop();
260 // advance to next parameter
262 // call Java function
263 __ bind(parameters_done);
265 // receiver in V0, methodOop in Rmethod
267 __ move(Rmethod, A3);
268 __ move(Rsender, SP); //set sender sp
269 __ jalr(A4);
270 __ delayed()->nop();
271 return_address = __ pc();
273 Label common_return;
274 __ bind(common_return);
276 // store result depending on type
277 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
278 __ ld(T0, FP, result_off * wordSize); // result --> T0
279 Label is_long, is_float, is_double, exit;
280 __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2
281 __ daddi(T3, T2, (-1) * T_LONG);
282 __ beq(T3, R0, is_long);
283 __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
284 __ beq(T3, R0, is_float);
285 __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
286 __ beq(T3, R0, is_double);
287 __ delayed()->nop();
289 // handle T_INT case
290 __ sd(V0, T0, 0 * wordSize);
291 __ bind(exit);
293 // restore
294 __ daddi(SP, FP, 2 * wordSize );
295 __ ld(RA, SP, RA_off * wordSize);
296 __ ld(FP, SP, FP_off * wordSize);
297 __ ld(BCP, SP, BCP_off * wordSize);
298 __ ld(LVP, SP, LVP_off * wordSize);
299 __ ld(GP, SP, GP_off * wordSize);
300 __ ld(TSR, SP, TSR_off * wordSize);
302 __ ld(S1, SP, S1_off * wordSize);
303 __ ld(S3, SP, S3_off * wordSize);
304 __ ld(S4, SP, S4_off * wordSize);
305 __ ld(S5, SP, S5_off * wordSize);
306 __ ld(S6, SP, S6_off * wordSize);
308 // return
309 __ jr(RA);
310 __ delayed()->nop();
312 // handle return types different from T_INT
313 __ bind(is_long);
314 __ sd(V0, T0, 0 * wordSize);
315 //__ sd(V1, T0, 1 * wordSize);
316 __ sd(R0, T0, 1 * wordSize);
317 __ b(exit);
318 __ delayed()->nop();
320 __ bind(is_float);
321 __ swc1(F0, T0, 0 * wordSize);
322 __ b(exit);
323 __ delayed()->nop();
325 __ bind(is_double);
326 __ sdc1(F0, T0, 0 * wordSize);
327 //__ sdc1(F1, T0, 1 * wordSize);
328 __ sd(R0, T0, 1 * wordSize);
329 __ b(exit);
330 __ delayed()->nop();
331 //FIXME, 1.6 mips version add operation of fpu here
332 StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
333 __ b(common_return);
334 __ delayed()->nop();
335 return start;
336 }
338 // Return point for a Java call if there's an exception thrown in
339 // Java code. The exception is caught and transformed into a
340 // pending exception stored in JavaThread that can be tested from
341 // within the VM.
342 //
343 // Note: Usually the parameters are removed by the callee. In case
344 // of an exception crossing an activation frame boundary, that is
345 // not the case if the callee is compiled code => need to setup the
346 // rsp.
347 //
348 // rax: exception oop
350 address generate_catch_exception() {
351 StubCodeMark mark(this, "StubRoutines", "catch_exception");
352 address start = __ pc();
354 Register thread = TREG;
356 // get thread directly
357 #ifndef OPT_THREAD
358 __ ld(thread, FP, thread_off * wordSize);
359 #endif
361 #ifdef ASSERT
362 // verify that threads correspond
363 { Label L;
364 __ get_thread(T8);
365 __ beq(T8, thread, L);
366 __ delayed()->nop();
367 __ stop("StubRoutines::catch_exception: threads must correspond");
368 __ bind(L);
369 }
370 #endif
371 // set pending exception
372 __ verify_oop(V0);
373 __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
374 __ li(AT, (long)__FILE__);
375 __ sd(AT, thread, in_bytes(Thread::exception_file_offset ()));
376 __ li(AT, (long)__LINE__);
377 __ sd(AT, thread, in_bytes(Thread::exception_line_offset ()));
379 // complete return to VM
380 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
381 __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
382 __ delayed()->nop();
384 return start;
385 }
387 // Continuation point for runtime calls returning with a pending
388 // exception. The pending exception check happened in the runtime
389 // or native call stub. The pending exception in Thread is
390 // converted into a Java-level exception.
391 //
392 // Contract with Java-level exception handlers:
393 // rax: exception
394 // rdx: throwing pc
395 //
396 // NOTE: At entry of this stub, exception-pc must be on stack !!
398 address generate_forward_exception() {
399 StubCodeMark mark(this, "StubRoutines", "forward exception");
400 //Register thread = TREG;
401 Register thread = TREG;
402 address start = __ pc();
404 // Upon entry, the sp points to the return address returning into Java
405 // (interpreted or compiled) code; i.e., the return address becomes the
406 // throwing pc.
407 //
408 // Arguments pushed before the runtime call are still on the stack but
409 // the exception handler will reset the stack pointer -> ignore them.
410 // A potential result in registers can be ignored as well.
412 #ifdef ASSERT
413 // make sure this code is only executed if there is a pending exception
414 #ifndef OPT_THREAD
415 __ get_thread(thread);
416 #endif
417 { Label L;
418 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
419 __ bne(AT, R0, L);
420 __ delayed()->nop();
421 __ stop("StubRoutines::forward exception: no pending exception (1)");
422 __ bind(L);
423 }
424 #endif
426 // compute exception handler into T9
427 __ ld(A1, SP, 0);
428 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
429 __ move(T9, V0);
430 __ pop(V1);
432 #ifndef OPT_THREAD
433 __ get_thread(thread);
434 #endif
435 __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
436 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
438 #ifdef ASSERT
439 // make sure exception is set
440 { Label L;
441 __ bne(V0, R0, L);
442 __ delayed()->nop();
443 __ stop("StubRoutines::forward exception: no pending exception (2)");
444 __ bind(L);
445 }
446 #endif
448 // continue at exception handler (return address removed)
449 // V0: exception
450 // T9: exception handler
451 // V1: throwing pc
452 __ verify_oop(V0);
453 __ jr(T9);
454 __ delayed()->nop();
456 return start;
457 }
459 // Support for intptr_t get_previous_fp()
460 //
461 // This routine is used to find the previous frame pointer for the
462 // caller (current_frame_guess). This is used as part of debugging
463 // ps() is seemingly lost trying to find frames.
464 // This code assumes that caller current_frame_guess) has a frame.
465 address generate_get_previous_fp() {
466 StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
467 const Address old_fp (FP, 0);
468 const Address older_fp (V0, 0);
469 address start = __ pc();
470 __ enter();
471 __ lw(V0, old_fp); // callers fp
472 __ lw(V0, older_fp); // the frame for ps()
473 __ leave();
474 __ jr(RA);
475 __ delayed()->nop();
476 return start;
477 }
478 // The following routine generates a subroutine to throw an
479 // asynchronous UnknownError when an unsafe access gets a fault that
480 // could not be reasonably prevented by the programmer. (Example:
481 // SIGBUS/OBJERR.)
482 address generate_handler_for_unsafe_access() {
483 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
484 address start = __ pc();
485 __ pushad(); // push registers
486 // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
487 __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
488 __ delayed()->nop();
489 __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord);
490 __ popad();
491 __ jr(RA);
492 __ delayed()->nop();
493 return start;
494 }
496 // Non-destructive plausibility checks for oops
497 //
498 // Arguments:
499 // all args on stack!
500 //
501 // Stack after saving c_rarg3:
502 // [tos + 0]: saved c_rarg3
503 // [tos + 1]: saved c_rarg2
504 // [tos + 2]: saved r12 (several TemplateTable methods use it)
505 // [tos + 3]: saved flags
506 // [tos + 4]: return address
507 // * [tos + 5]: error message (char*)
508 // * [tos + 6]: object to verify (oop)
509 // * [tos + 7]: saved rax - saved by caller and bashed
510 // * = popped on exit
511 address generate_verify_oop() {
512 StubCodeMark mark(this, "StubRoutines", "verify_oop");
513 address start = __ pc();
514 __ reinit_heapbase();
515 __ verify_oop_subroutine();
516 address end = __ pc();
517 return start;
518 }
520 //
521 // Generate overlap test for array copy stubs
522 //
523 // Input:
524 // A0 - array1
525 // A1 - array2
526 // A2 - element count
527 //
528 // Note: this code can only use %eax, %ecx, and %edx
529 //
531 // use T9 as temp
532 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
533 int elem_size = 1 << log2_elem_size;
534 Address::ScaleFactor sf = Address::times_1;
536 switch (log2_elem_size) {
537 case 0: sf = Address::times_1; break;
538 case 1: sf = Address::times_2; break;
539 case 2: sf = Address::times_4; break;
540 case 3: sf = Address::times_8; break;
541 }
543 __ dsll(AT, A2, sf);
544 __ dadd(AT, AT, A0);
545 __ lea(T9, Address(AT, -elem_size));
546 __ dsub(AT, A1, A0);
547 __ blez(AT, no_overlap_target);
548 __ delayed()->nop();
549 __ dsub(AT, A1, T9);
550 __ bgtz(AT, no_overlap_target);
551 __ delayed()->nop();
553 }
555 //
556 // Generate store check for array
557 //
558 // Input:
559 // %edi - starting address
560 // %ecx - element count
561 //
562 // The 2 input registers are overwritten
563 //
565 //
566 // Generate store check for array
567 //
568 // Input:
569 // T0 - starting address(edi)
570 // T1 - element count (ecx)
571 //
572 // The 2 input registers are overwritten
573 //
575 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
577 void array_store_check() {
578 BarrierSet* bs = Universe::heap()->barrier_set();
579 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
580 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
581 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
582 Label l_0;
584 __ dsll(AT, T1, TIMES_OOP);
585 __ dadd(AT, T0, AT);
586 __ daddiu(T1, AT, - BytesPerHeapOop);
588 __ shr(T0, CardTableModRefBS::card_shift);
589 __ shr(T1, CardTableModRefBS::card_shift);
591 __ dsub(T1, T1, T0); // end --> cards count
592 __ bind(l_0);
594 __ li48(AT, (long)ct->byte_map_base);
595 __ dadd(AT, AT, T0);
596 __ dadd(AT, AT, T1);
597 __ sb(R0, AT, 0);
598 //__ daddi(T1, T1, -4);
599 __ daddi(T1, T1, - 1);
600 __ bgez(T1, l_0);
601 __ delayed()->nop();
602 }
604 // Arguments:
605 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
606 // ignored
607 // name - stub name string
608 //
609 // Inputs:
610 // c_rarg0 - source array address
611 // c_rarg1 - destination array address
612 // c_rarg2 - element count, treated as ssize_t, can be zero
613 //
614 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
615 // we let the hardware handle it. The one to eight bytes within words,
616 // dwords or qwords that span cache line boundaries will still be loaded
617 // and stored atomically.
618 //
619 // Side Effects:
620 // disjoint_byte_copy_entry is set to the no-overlap entry point
621 // used by generate_conjoint_byte_copy().
622 //
623 address generate_disjoint_byte_copy(bool aligned, const char *name) {
624 StubCodeMark mark(this, "StubRoutines", name);
625 __ align(CodeEntryAlignment);
626 address start = __ pc();
627 Label l_0, l_1, l_2, l_3, l_4, l_5, l_6;
629 __ push(T3);
630 __ push(T0);
631 __ push(T1);
632 __ push(T8);
633 __ move(T3, A0);
634 __ move(T0, A1);
635 __ move(T1, A2);
636 __ move(T8, T1); // original count in T1
637 __ daddi(AT, T1, -3);
638 __ blez(AT, l_4);
639 __ delayed()->nop();
640 if (!aligned) {
641 // align source address at dword address boundary
642 __ move(T1, 4);
643 __ sub(T1, T1, T3);
644 __ andi(T1, T1, 3);
645 __ beq(T1, R0, l_1);
646 __ delayed()->nop();
647 __ sub(T8,T8,T1);
648 __ bind(l_0);
649 __ lb(AT, T3, 0);
650 __ sb(AT, T0, 0);
651 __ addi(T3, T3, 1);
652 __ addi(T0, T0, 1);
653 __ addi(T1 ,T1, -1);
654 __ bne(T1, R0, l_0);
655 __ delayed()->nop();
656 __ bind(l_1);
657 __ move(T1, T8);
658 }
659 __ shr(T1, 2);
660 __ beq(T1, R0, l_4); // no dwords to move
661 __ delayed()->nop();
662 // copy aligned dwords
663 __ bind(l_2);
664 __ align(16);
665 __ bind(l_3);
666 __ lw(AT, T3, 0);
667 __ sw(AT, T0, 0 );
668 __ addi(T3, T3, 4);
669 __ addi(T0, T0, 4);
670 __ addi(T1, T1, -1);
671 __ bne(T1, R0, l_3);
672 __ delayed()->nop();
673 __ bind(l_4);
674 __ move(T1, T8);
675 __ andi(T1, T1, 3);
676 __ beq(T1, R0, l_6);
677 __ delayed()->nop();
678 // copy suffix
679 __ bind(l_5);
680 __ lb(AT, T3, 0);
681 __ sb(AT, T0, 0);
682 __ addi(T3, T3, 1);
683 __ addi(T0, T0, 1);
684 __ addi(T1, T1, -1);
685 __ bne(T1, R0, l_5 );
686 __ delayed()->nop();
687 __ bind(l_6);
688 __ pop(T8);
689 __ pop(T1);
690 __ pop(T0);
691 __ pop(T3);
692 __ jr(RA);
693 __ delayed()->nop();
694 return start;
695 }
697 // Arguments:
698 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
699 // ignored
700 // name - stub name string
701 //
702 // Inputs:
703 // c_rarg0 - source array address
704 // c_rarg1 - destination array address
705 // c_rarg2 - element count, treated as ssize_t, can be zero
706 //
707 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
708 // we let the hardware handle it. The one to eight bytes within words,
709 // dwords or qwords that span cache line boundaries will still be loaded
710 // and stored atomically.
711 //
712 address generate_conjoint_byte_copy(bool aligned, const char *name) {
713 Label l_1, l_2, l_3, l_4, l_5;
714 StubCodeMark mark(this, "StubRoutines", name);
715 __ align(CodeEntryAlignment);
716 address start = __ pc();
717 address nooverlap_target = aligned ?
718 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
719 StubRoutines::jbyte_disjoint_arraycopy();
721 array_overlap_test(nooverlap_target, 0);
723 __ push(T3);
724 __ push(T0);
725 __ push(T1);
726 __ push(T8);
729 // copy from high to low
730 __ move(T3, A0);
731 __ move(T0, A1);
732 __ move(T1, A2);
733 __ dadd(AT, T3, T1);
734 __ lea(T3, Address(AT, -4));
735 __ dadd(AT, T0, T1);
736 __ lea(T0, Address(AT, -4));
737 __ move(T8, T1);
738 __ daddi(AT, T1, -3);
739 __ blez(AT, l_3);
740 __ delayed()->nop();
741 __ dsrl(T1, T1, 2);
742 __ align(16);
743 __ bind(l_1);
744 __ lw(AT, T3, 0);
745 __ sw(AT, T0, 0);
746 __ addi(T3, T3, -4);
747 __ addi(T0, T0, -4);
748 __ addi(T1, T1, -1);
749 __ bne(T1, R0, l_1);
750 __ delayed()->nop();
751 __ b(l_3);
752 __ delayed()->nop();
753 // copy dwords aligned or not with repeat move
754 __ bind(l_2);
755 __ bind(l_3);
756 // copy suffix (0-3 bytes)
757 __ andi(T8, T8, 3);
758 __ beq(T8, R0, l_5);
759 __ delayed()->nop();
760 __ addi(T3, T3, 3);
761 __ addi(T0, T0, 3);
762 __ bind(l_4);
763 __ lb(AT, T3, 0);
764 __ sb(AT, T0, 0);
765 __ addi(T3, T3, -1);
766 __ addi(T0, T0, -1);
767 __ addi(T8, T8, -1);
768 __ bne(T8, R0, l_4);
769 __ delayed()->nop();
770 __ bind(l_5);
771 __ pop(T8);
772 __ pop(T1);
773 __ pop(T0);
774 __ pop(T3);
775 __ jr(RA);
776 __ delayed()->nop();
777 return start;
778 }
780 // Arguments:
781 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
782 // ignored
783 // name - stub name string
784 //
785 // Inputs:
786 // c_rarg0 - source array address
787 // c_rarg1 - destination array address
788 // c_rarg2 - element count, treated as ssize_t, can be zero
789 //
790 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
791 // let the hardware handle it. The two or four words within dwords
792 // or qwords that span cache line boundaries will still be loaded
793 // and stored atomically.
794 //
795 // Side Effects:
796 // disjoint_short_copy_entry is set to the no-overlap entry point
797 // used by generate_conjoint_short_copy().
798 //
799 address generate_disjoint_short_copy(bool aligned, const char *name) {
800 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
801 StubCodeMark mark(this, "StubRoutines", name);
802 __ align(CodeEntryAlignment);
803 address start = __ pc();
805 __ push(T3);
806 __ push(T0);
807 __ push(T1);
808 __ push(T8);
809 __ move(T1, A2);
810 __ move(T3, A0);
811 __ move(T0, A1);
813 if (!aligned) {
814 __ beq(T1, R0, l_5);
815 __ delayed()->nop();
816 // align source address at dword address boundary
817 __ move(T8, T3); // original from
818 __ andi(T8, T8, 3); // either 0 or 2
819 __ beq(T8, R0, l_1); // no prefix
820 __ delayed()->nop();
821 // copy prefix
822 __ lh(AT, T3, 0);
823 __ sh(AT, T0, 0);
824 __ add(T3, T3, T8);
825 __ add(T0, T0, T8);
826 __ addi(T1, T1, -1);
827 __ bind(l_1);
828 }
829 __ move(T8, T1); // word count less prefix
830 __ sra(T1, T1, 1);
831 __ beq(T1, R0, l_4);
832 __ delayed()->nop();
833 // copy aligned dwords
834 __ bind(l_2);
835 __ align(16);
836 __ bind(l_3);
837 __ lw(AT, T3, 0);
838 __ sw(AT, T0, 0 );
839 __ addi(T3, T3, 4);
840 __ addi(T0, T0, 4);
841 __ addi(T1, T1, -1);
842 __ bne(T1, R0, l_3);
843 __ delayed()->nop();
844 __ bind(l_4);
845 __ andi(T8, T8, 1);
846 __ beq(T8, R0, l_5);
847 __ delayed()->nop();
848 // copy suffix
849 __ lh(AT, T3, 0);
850 __ sh(AT, T0, 0);
851 __ bind(l_5);
852 __ pop(T8);
853 __ pop(T1);
854 __ pop(T0);
855 __ pop(T3);
856 __ jr(RA);
857 __ delayed()->nop();
858 return start;
859 }
861 // Arguments:
862 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
863 // ignored
864 // name - stub name string
865 //
866 // Inputs:
867 // c_rarg0 - source array address
868 // c_rarg1 - destination array address
869 // c_rarg2 - element count, treated as ssize_t, can be zero
870 //
871 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
872 // let the hardware handle it. The two or four words within dwords
873 // or qwords that span cache line boundaries will still be loaded
874 // and stored atomically.
875 //
876 address generate_conjoint_short_copy(bool aligned, const char *name) {
877 Label l_1, l_2, l_3, l_4, l_5;
878 StubCodeMark mark(this, "StubRoutines", name);
879 __ align(CodeEntryAlignment);
880 address start = __ pc();
881 address nooverlap_target = aligned ?
882 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
883 StubRoutines::jshort_disjoint_arraycopy();
885 array_overlap_test(nooverlap_target, 1);
887 __ push(T3);
888 __ push(T0);
889 __ push(T1);
890 __ push(T8);
892 /*
893 __ pushl(esi);
894 __ movl(ecx, Address(esp, 4+12)); // count
895 __ pushl(edi);
896 __ movl(esi, Address(esp, 8+ 4)); // from
897 __ movl(edi, Address(esp, 8+ 8)); // to
898 */
899 __ move(T1, A2);
900 __ move(T3, A0);
901 __ move(T0, A1);
904 // copy dwords from high to low
905 // __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4
906 __ sll(AT, T1, Address::times_2);
907 __ add(AT, T3, AT);
908 __ lea(T3, Address( AT, -4));
909 //__ std();
910 //__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4
911 __ sll(AT,T1 , Address::times_2);
912 __ add(AT, T0, AT);
913 __ lea(T0, Address( AT, -4));
914 // __ movl(eax, ecx);
915 __ move(T8, T1);
916 __ bind(l_1);
917 // __ sarl(ecx, 1); // dword count
918 __ sra(T1,T1, 1);
919 //__ jcc(Assembler::equal, l_4); // no dwords to move
920 __ beq(T1, R0, l_4);
921 __ delayed()->nop();
922 /* __ cmpl(ecx, 32);
923 __ jcc(Assembler::above, l_3); // > 32 dwords
924 // copy dwords with loop
925 __ subl(edi, esi);
926 */ __ align(16);
927 __ bind(l_2);
928 //__ movl(edx, Address(esi));
929 __ lw(AT, T3, 0);
930 //__ movl(Address(edi, esi, Address::times_1), edx);
931 __ sw(AT, T0, 0);
932 //__ subl(esi, 4);
933 __ addi(T3, T3, -4);
934 __ addi(T0, T0, -4);
935 //__ decl(ecx);
936 __ addi(T1, T1, -1);
937 // __ jcc(Assembler::notEqual, l_2);
938 __ bne(T1, R0, l_2);
939 __ delayed()->nop();
940 // __ addl(edi, esi);
941 // __ jmp(l_4);
942 __ b(l_4);
943 __ delayed()->nop();
944 // copy dwords with repeat move
945 __ bind(l_3);
946 // __ rep_movl();
947 __ bind(l_4);
948 // __ andl(eax, 1); // suffix count
949 __ andi(T8, T8, 1); // suffix count
950 //__ jcc(Assembler::equal, l_5); // no suffix
951 __ beq(T8, R0, l_5 );
952 __ delayed()->nop();
953 // copy suffix
954 // __ movw(edx, Address(esi, 2));
955 __ lh(AT, T3, 2);
956 // __ movw(Address(edi, 2), edx);
957 __ sh(AT, T0, 2);
958 __ bind(l_5);
959 // __ cld();
960 // __ popl(edi);
961 // __ popl(esi);
962 // __ ret(0);
963 __ pop(T8);
964 __ pop(T1);
965 __ pop(T0);
966 __ pop(T3);
967 __ jr(RA);
968 __ delayed()->nop();
969 return start;
970 }
972 // Arguments:
973 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
974 // ignored
975 // is_oop - true => oop array, so generate store check code
976 // name - stub name string
977 //
978 // Inputs:
979 // c_rarg0 - source array address
980 // c_rarg1 - destination array address
981 // c_rarg2 - element count, treated as ssize_t, can be zero
982 //
983 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
984 // the hardware handle it. The two dwords within qwords that span
985 // cache line boundaries will still be loaded and stored atomicly.
986 //
987 // Side Effects:
988 // disjoint_int_copy_entry is set to the no-overlap entry point
989 // used by generate_conjoint_int_oop_copy().
990 //
991 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
992 Label l_2, l_3, l_4, l_stchk;
993 StubCodeMark mark(this, "StubRoutines", name);
994 __ align(CodeEntryAlignment);
995 address start = __ pc();
996 /*
997 __ pushl(esi);
998 __ movl(ecx, Address(esp, 4+12)); // count
999 __ pushl(edi);
1000 __ movl(esi, Address(esp, 8+ 4)); // from
1001 __ movl(edi, Address(esp, 8+ 8)); // to
1002 */
1003 __ push(T3);
1004 __ push(T0);
1005 __ push(T1);
1006 __ push(T8);
1007 __ move(T1, A2);
1008 __ move(T3, A0);
1009 __ move(T0, A1);
1011 // __ cmpl(ecx, 32);
1012 // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords
1013 // __ rep_movl();
1014 __ b(l_2);
1015 __ delayed()->nop();
1016 if (is_oop) {
1017 // __ jmp(l_stchk);
1018 __ b(l_stchk);
1019 __ delayed()->nop();
1020 }
1021 // __ popl(edi);
1022 // __ popl(esi);
1023 // __ ret(0);
1024 __ pop(T8);
1025 __ pop(T1);
1026 __ pop(T0);
1027 __ pop(T3);
1028 __ jr(RA);
1029 __ delayed()->nop();
1031 __ bind(l_2);
1032 // __ subl(edi, esi);
1033 // __ testl(ecx, ecx);
1034 // __ jcc(Assembler::zero, l_4);
1035 __ beq(T1, R0, l_4);
1036 __ delayed()->nop();
1037 __ align(16);
1038 __ bind(l_3);
1039 //__ movl(edx, Address(esi));
1040 __ lw(AT, T3, 0);
1041 // __ movl(Address(edi, esi, Address::times_1), edx);
1042 __ sw(AT, T0, 0);
1043 // __ addl(esi, 4);
1044 __ addi(T3, T3, 4);
1045 __ addi(T0, T0, 4);
1046 // __ decl(ecx);
1047 __ addi(T1, T1, -1);
1048 // __ jcc(Assembler::notEqual, l_3);
1049 __ bne(T1, R0, l_3);
1050 __ delayed()->nop();
1051 if (is_oop) {
1052 __ bind(l_stchk);
1053 // __ movl(edi, Address(esp, 8+ 8));
1054 // __ movl(ecx, Address(esp, 8+ 12));
1055 __ move(T0, A1);
1056 __ move(T1, A2);
1057 array_store_check();
1058 }
1059 __ bind(l_4);
1060 // __ popl(edi);
1061 // __ popl(esi);
1062 // __ ret(0);
1063 __ pop(T8);
1064 __ pop(T1);
1065 __ pop(T0);
1066 __ pop(T3);
1067 __ jr(RA);
1068 __ delayed()->nop();
1069 return start;
1070 }
1072 // Arguments:
1073 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1074 // ignored
1075 // is_oop - true => oop array, so generate store check code
1076 // name - stub name string
1077 //
1078 // Inputs:
1079 // c_rarg0 - source array address
1080 // c_rarg1 - destination array address
1081 // c_rarg2 - element count, treated as ssize_t, can be zero
1082 //
1083 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1084 // the hardware handle it. The two dwords within qwords that span
1085 // cache line boundaries will still be loaded and stored atomicly.
1086 //
1087 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
1088 Label l_2, l_3, l_4, l_stchk;
1089 StubCodeMark mark(this, "StubRoutines", name);
1090 __ align(CodeEntryAlignment);
1091 address start = __ pc();
1092 address nooverlap_target;
1094 if (is_oop) {
1095 nooverlap_target = aligned ?
1096 StubRoutines::arrayof_oop_disjoint_arraycopy() :
1097 StubRoutines::oop_disjoint_arraycopy();
1098 }else {
1099 nooverlap_target = aligned ?
1100 StubRoutines::arrayof_jint_disjoint_arraycopy() :
1101 StubRoutines::jint_disjoint_arraycopy();
1102 }
1104 array_overlap_test(nooverlap_target, 2);
1106 __ push(T3);
1107 __ push(T0);
1108 __ push(T1);
1109 __ push(T8);
1111 /*
1112 __ pushl(esi);
1113 __ movl(ecx, Address(esp, 4+12)); // count
1114 __ pushl(edi);
1115 __ movl(esi, Address(esp, 8+ 4)); // from
1116 __ movl(edi, Address(esp, 8+ 8)); // to
1117 */
1118 __ move(T1, A2);
1119 __ move(T3, A0);
1120 __ move(T0, A1);
1122 //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
1123 __ sll(AT, T1, Address::times_4);
1124 __ add(AT, T3, AT);
1125 __ lea(T3 , Address(AT, -4));
1126 //__ std();
1127 //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
1128 __ sll(AT, T1, Address::times_4);
1129 __ add(AT, T0, AT);
1130 __ lea(T0 , Address(AT, -4));
1132 // __ cmpl(ecx, 32);
1133 // __ jcc(Assembler::above, l_3); // > 32 dwords
1134 // __ testl(ecx, ecx);
1135 //__ jcc(Assembler::zero, l_4);
1136 __ beq(T1, R0, l_4);
1137 __ delayed()->nop();
1138 // __ subl(edi, esi);
1139 __ align(16);
1140 __ bind(l_2);
1141 // __ movl(edx, Address(esi));
1142 __ lw(AT, T3, 0);
1143 // __ movl(Address(esi, edi, Address::times_1), edx);
1144 __ sw(AT, T0, 0);
1145 // __ subl(esi, 4);
1146 __ addi(T3, T3, -4);
1147 __ addi(T0, T0, -4);
1148 // __ decl(ecx);
1149 __ addi(T1, T1, -1);
1150 //__ jcc(Assembler::notEqual, l_2);
1151 __ bne(T1, R0, l_2);
1152 __ delayed()->nop();
1153 if (is_oop) {
1154 // __ jmp(l_stchk);
1155 __ b( l_stchk);
1156 __ delayed()->nop();
1157 }
1158 __ bind(l_4);
1159 // __ cld();
1160 // __ popl(edi);
1161 // __ popl(esi);
1162 // __ ret(0);
1163 __ pop(T8);
1164 __ pop(T1);
1165 __ pop(T0);
1166 __ pop(T3);
1167 __ jr(RA);
1168 __ delayed()->nop();
1169 __ bind(l_3);
1170 // __ rep_movl();
1171 if (is_oop) {
1172 __ bind(l_stchk);
1173 // __ movl(edi, Address(esp, 8+ 8));
1174 __ move(T0, A1);
1175 // __ movl(ecx, Address(esp, 8+ 12));
1176 __ move(T1, A2);
1177 array_store_check();
1178 }
1179 // __ cld();
1180 // __ popl(edi);
1181 // __ popl(esi);
1182 // __ ret(0);
1183 __ pop(T8);
1184 __ pop(T1);
1185 __ pop(T0);
1186 __ pop(T3);
1187 __ jr(RA);
1188 __ delayed()->nop();
1189 return start;
1190 }
1192 // Arguments:
1193 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1194 // ignored
1195 // is_oop - true => oop array, so generate store check code
1196 // name - stub name string
1197 //
1198 // Inputs:
1199 // c_rarg0 - source array address
1200 // c_rarg1 - destination array address
1201 // c_rarg2 - element count, treated as ssize_t, can be zero
1202 //
1203 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1204 // the hardware handle it. The two dwords within qwords that span
1205 // cache line boundaries will still be loaded and stored atomicly.
1206 //
1207 // Side Effects:
1208 // disjoint_int_copy_entry is set to the no-overlap entry point
1209 // used by generate_conjoint_int_oop_copy().
1210 //
1211 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
1212 Label l_2, l_3, l_4, l_stchk;
1213 StubCodeMark mark(this, "StubRoutines", name);
1214 __ align(CodeEntryAlignment);
1215 address start = __ pc();
1216 __ push(T3);
1217 __ push(T0);
1218 __ push(T1);
1219 __ push(T8);
1220 __ move(T1, A2);
1221 __ move(T3, A0);
1222 __ move(T0, A1);
1224 // __ cmpl(ecx, 32);
1225 // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords
1226 // __ rep_movl();
1227 __ b(l_2);
1228 __ delayed()->nop();
1229 if (is_oop) {
1230 // __ jmp(l_stchk);
1231 __ b(l_stchk);
1232 __ delayed()->nop();
1233 }
1234 // __ popl(edi);
1235 // __ popl(esi);
1236 // __ ret(0);
1237 __ pop(T8);
1238 __ pop(T1);
1239 __ pop(T0);
1240 __ pop(T3);
1241 __ jr(RA);
1242 __ delayed()->nop();
1244 __ bind(l_2);
1245 // __ subl(edi, esi);
1246 // __ testl(ecx, ecx);
1247 // __ jcc(Assembler::zero, l_4);
1248 __ beq(T1, R0, l_4);
1249 __ delayed()->nop();
1250 __ align(16);
1251 __ bind(l_3);
1252 //__ movl(edx, Address(esi));
1253 __ ld(AT, T3, 0);
1254 // __ movl(Address(edi, esi, Address::times_1), edx);
1255 __ sd(AT, T0, 0);
1256 // __ addl(esi, 4);
1257 __ addi(T3, T3, 8);
1258 __ addi(T0, T0, 8);
1259 // __ decl(ecx);
1260 __ addi(T1, T1, -1);
1261 // __ jcc(Assembler::notEqual, l_3);
1262 __ bne(T1, R0, l_3);
1263 __ delayed()->nop();
1264 if (is_oop) {
1265 __ bind(l_stchk);
1266 // __ movl(edi, Address(esp, 8+ 8));
1267 // __ movl(ecx, Address(esp, 8+ 12));
1268 __ move(T0, A1);
1269 __ move(T1, A2);
1270 array_store_check();
1271 }
1272 __ bind(l_4);
1273 // __ popl(edi);
1274 // __ popl(esi);
1275 // __ ret(0);
1276 __ pop(T8);
1277 __ pop(T1);
1278 __ pop(T0);
1279 __ pop(T3);
1280 __ jr(RA);
1281 __ delayed()->nop();
1282 return start;
1283 }
1285 // Arguments:
1286 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1287 // ignored
1288 // is_oop - true => oop array, so generate store check code
1289 // name - stub name string
1290 //
1291 // Inputs:
1292 // c_rarg0 - source array address
1293 // c_rarg1 - destination array address
1294 // c_rarg2 - element count, treated as ssize_t, can be zero
1295 //
1296 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1297 // the hardware handle it. The two dwords within qwords that span
1298 // cache line boundaries will still be loaded and stored atomicly.
1299 //
1300 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
1301 Label l_2, l_3, l_4, l_stchk;
1302 StubCodeMark mark(this, "StubRoutines", name);
1303 __ align(CodeEntryAlignment);
1304 address start = __ pc();
1305 address nooverlap_target;
1307 if (is_oop) {
1308 nooverlap_target = aligned ?
1309 StubRoutines::arrayof_oop_disjoint_arraycopy() :
1310 StubRoutines::oop_disjoint_arraycopy();
1311 }else {
1312 nooverlap_target = aligned ?
1313 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
1314 StubRoutines::jlong_disjoint_arraycopy();
1315 }
1317 array_overlap_test(nooverlap_target, 3);
1319 __ push(T3);
1320 __ push(T0);
1321 __ push(T1);
1322 __ push(T8);
1324 __ move(T1, A2);
1325 __ move(T3, A0);
1326 __ move(T0, A1);
1328 //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
1329 __ sll(AT, T1, Address::times_8);
1330 __ add(AT, T3, AT);
1331 __ lea(T3 , Address(AT, -8));
1332 //__ std();
1333 //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
1334 __ sll(AT, T1, Address::times_8);
1335 __ add(AT, T0, AT);
1336 __ lea(T0 , Address(AT, -8));
1338 // __ cmpl(ecx, 32);
1339 // __ jcc(Assembler::above, l_3); // > 32 dwords
1340 // __ testl(ecx, ecx);
1341 //__ jcc(Assembler::zero, l_4);
1342 __ beq(T1, R0, l_4);
1343 __ delayed()->nop();
1344 // __ subl(edi, esi);
1345 __ align(16);
1346 __ bind(l_2);
1347 // __ movl(edx, Address(esi));
1348 __ ld(AT, T3, 0);
1349 // __ movl(Address(esi, edi, Address::times_1), edx);
1350 __ sd(AT, T0, 0);
1351 // __ subl(esi, 4);
1352 __ addi(T3, T3, -8);
1353 __ addi(T0, T0, -8);
1354 // __ decl(ecx);
1355 __ addi(T1, T1, -1);
1356 //__ jcc(Assembler::notEqual, l_2);
1357 __ bne(T1, R0, l_2);
1358 __ delayed()->nop();
1359 if (is_oop) {
1360 // __ jmp(l_stchk);
1361 __ b( l_stchk);
1362 __ delayed()->nop();
1363 }
1364 __ bind(l_4);
1365 // __ cld();
1366 // __ popl(edi);
1367 // __ popl(esi);
1368 // __ ret(0);
1369 __ pop(T8);
1370 __ pop(T1);
1371 __ pop(T0);
1372 __ pop(T3);
1373 __ jr(RA);
1374 __ delayed()->nop();
1375 __ bind(l_3);
1376 // __ rep_movl();
1377 if (is_oop) {
1378 __ bind(l_stchk);
1379 // __ movl(edi, Address(esp, 8+ 8));
1380 __ move(T0, A1);
1381 // __ movl(ecx, Address(esp, 8+ 12));
1382 __ move(T1, A2);
1383 array_store_check();
1384 }
1385 // __ cld();
1386 // __ popl(edi);
1387 // __ popl(esi);
1388 // __ ret(0);
1389 __ pop(T8);
1390 __ pop(T1);
1391 __ pop(T0);
1392 __ pop(T3);
1393 __ jr(RA);
1394 __ delayed()->nop();
1395 return start;
1396 }
1397 #if 0
1398 // Arguments:
1399 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1400 // ignored
1401 // is_oop - true => oop array, so generate store check code
1402 // name - stub name string
1403 //
1404 // Inputs:
1405 // c_rarg0 - source array address
1406 // c_rarg1 - destination array address
1407 // c_rarg2 - element count, treated as ssize_t, can be zero
1408 //
1409 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
1410 __ align(CodeEntryAlignment);
1411 StubCodeMark mark(this, "StubRoutines", name);
1412 address start = __ pc();
1414 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1415 const Register from = rdi; // source array address
1416 const Register to = rsi; // destination array address
1417 const Register qword_count = rdx; // elements count
1418 const Register saved_count = rcx;
1420 __ enter(); // required for proper stackwalking of RuntimeStub frame
1421 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1423 address disjoint_copy_entry = NULL;
1424 if (is_oop) {
1425 assert(!UseCompressedOops, "shouldn't be called for compressed oops");
1426 disjoint_copy_entry = disjoint_oop_copy_entry;
1427 oop_copy_entry = __ pc();
1428 array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
1429 } else {
1430 disjoint_copy_entry = disjoint_long_copy_entry;
1431 long_copy_entry = __ pc();
1432 array_overlap_test(disjoint_long_copy_entry, Address::times_8);
1433 }
1434 BLOCK_COMMENT("Entry:");
1435 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1437 array_overlap_test(disjoint_copy_entry, Address::times_8);
1438 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1439 // r9 and r10 may be used to save non-volatile registers
1441 // 'from', 'to' and 'qword_count' are now valid
1443 if (is_oop) {
1444 // Save to and count for store barrier
1445 __ movptr(saved_count, qword_count);
1446 // No registers are destroyed by this call
1447 gen_write_ref_array_pre_barrier(to, saved_count);
1448 }
1450 __ jmp(L_copy_32_bytes);
1452 // Copy trailing qwords
1453 __ BIND(L_copy_8_bytes);
1454 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1455 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1456 __ decrement(qword_count);
1457 __ jcc(Assembler::notZero, L_copy_8_bytes);
1459 if (is_oop) {
1460 __ jmp(L_exit);
1461 } else {
1462 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
1463 restore_arg_regs();
1464 __ xorptr(rax, rax); // return 0
1465 __ leave(); // required for proper stackwalking of RuntimeStub frame
1466 __ ret(0);
1467 }
1469 // Copy in 32-bytes chunks
1470 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
1472 if (is_oop) {
1473 __ BIND(L_exit);
1474 __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
1475 gen_write_ref_array_post_barrier(to, rcx, rax);
1476 inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
1477 } else {
1478 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
1479 }
1480 restore_arg_regs();
1481 __ xorptr(rax, rax); // return 0
1482 __ leave(); // required for proper stackwalking of RuntimeStub frame
1483 __ ret(0);
1485 return start;
1486 }
1489 // Helper for generating a dynamic type check.
1490 // Smashes no registers.
1491 void generate_type_check(Register sub_klass,
1492 Register super_check_offset,
1493 Register super_klass,
1494 Label& L_success) {
1495 assert_different_registers(sub_klass, super_check_offset, super_klass);
1497 BLOCK_COMMENT("type_check:");
1499 Label L_miss;
1501 // a couple of useful fields in sub_klass:
1502 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
1503 Klass::secondary_supers_offset_in_bytes());
1504 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
1505 Klass::secondary_super_cache_offset_in_bytes());
1506 Address secondary_supers_addr(sub_klass, ss_offset);
1507 Address super_cache_addr( sub_klass, sc_offset);
1509 // if the pointers are equal, we are done (e.g., String[] elements)
1510 __ cmpptr(super_klass, sub_klass);
1511 __ jcc(Assembler::equal, L_success);
1513 // check the supertype display:
1514 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
1515 __ cmpptr(super_klass, super_check_addr); // test the super type
1516 __ jcc(Assembler::equal, L_success);
1518 // if it was a primary super, we can just fail immediately
1519 __ cmpl(super_check_offset, sc_offset);
1520 __ jcc(Assembler::notEqual, L_miss);
1522 // Now do a linear scan of the secondary super-klass chain.
1523 // The repne_scan instruction uses fixed registers, which we must spill.
1524 // (We need a couple more temps in any case.)
1525 // This code is rarely used, so simplicity is a virtue here.
1526 inc_counter_np(SharedRuntime::_partial_subtype_ctr);
1527 {
1528 __ push(rax);
1529 __ push(rcx);
1530 __ push(rdi);
1531 assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
1533 __ movptr(rdi, secondary_supers_addr);
1534 // Load the array length.
1535 __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
1536 // Skip to start of data.
1537 __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1538 // Scan rcx words at [rdi] for occurance of rax
1539 // Set NZ/Z based on last compare
1540 __ movptr(rax, super_klass);
1541 if (UseCompressedOops) {
1542 // Compare against compressed form. Don't need to uncompress because
1543 // looks like orig rax is restored in popq below.
1544 __ encode_heap_oop(rax);
1545 __ repne_scanl();
1546 } else {
1547 __ repne_scan();
1548 }
1550 // Unspill the temp. registers:
1551 __ pop(rdi);
1552 __ pop(rcx);
1553 __ pop(rax);
1555 __ jcc(Assembler::notEqual, L_miss);
1556 }
1558 // Success. Cache the super we found and proceed in triumph.
1559 __ movptr(super_cache_addr, super_klass); // note: rax is dead
1560 __ jmp(L_success);
1562 // Fall through on failure!
1563 __ BIND(L_miss);
1564 }
1566 //
1567 // Generate checkcasting array copy stub
1568 //
1569 // Input:
1570 // c_rarg0 - source array address
1571 // c_rarg1 - destination array address
1572 // c_rarg2 - element count, treated as ssize_t, can be zero
1573 // c_rarg3 - size_t ckoff (super_check_offset)
1574 // not Win64
1575 // c_rarg4 - oop ckval (super_klass)
1576 // Win64
1577 // rsp+40 - oop ckval (super_klass)
1578 //
1579 // Output:
1580 // rax == 0 - success
1581 // rax == -1^K - failure, where K is partial transfer count
1582 //
1583 address generate_checkcast_copy(const char *name) {
1585 Label L_load_element, L_store_element, L_do_card_marks, L_done;
1587 // Input registers (after setup_arg_regs)
1588 const Register from = rdi; // source array address
1589 const Register to = rsi; // destination array address
1590 const Register length = rdx; // elements count
1591 const Register ckoff = rcx; // super_check_offset
1592 const Register ckval = r8; // super_klass
1594 // Registers used as temps (r13, r14 are save-on-entry)
1595 const Register end_from = from; // source array end address
1596 const Register end_to = r13; // destination array end address
1597 const Register count = rdx; // -(count_remaining)
1598 const Register r14_length = r14; // saved copy of length
1599 // End pointers are inclusive, and if length is not zero they point
1600 // to the last unit copied: end_to[0] := end_from[0]
1602 const Register rax_oop = rax; // actual oop copied
1603 const Register r11_klass = r11; // oop._klass
1605 //---------------------------------------------------------------
1606 // Assembler stub will be used for this call to arraycopy
1607 // if the two arrays are subtypes of Object[] but the
1608 // destination array type is not equal to or a supertype
1609 // of the source type. Each element must be separately
1610 // checked.
1612 __ align(CodeEntryAlignment);
1613 StubCodeMark mark(this, "StubRoutines", name);
1614 address start = __ pc();
1616 __ enter(); // required for proper stackwalking of RuntimeStub frame
1618 checkcast_copy_entry = __ pc();
1619 BLOCK_COMMENT("Entry:");
1621 #ifdef ASSERT
1622 // caller guarantees that the arrays really are different
1623 // otherwise, we would have to make conjoint checks
1624 { Label L;
1625 array_overlap_test(L, TIMES_OOP);
1626 __ stop("checkcast_copy within a single array");
1627 __ bind(L);
1628 }
1629 #endif //ASSERT
1631 // allocate spill slots for r13, r14
1632 enum {
1633 saved_r13_offset,
1634 saved_r14_offset,
1635 saved_rbp_offset,
1636 saved_rip_offset,
1637 saved_rarg0_offset
1638 };
1639 __ subptr(rsp, saved_rbp_offset * wordSize);
1640 __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
1641 __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
1642 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
1643 // ckoff => rcx, ckval => r8
1644 // r9 and r10 may be used to save non-volatile registers
1645 #ifdef _WIN64
1646 // last argument (#4) is on stack on Win64
1647 const int ckval_offset = saved_rarg0_offset + 4;
1648 __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
1649 #endif
1651 // check that int operands are properly extended to size_t
1652 assert_clean_int(length, rax);
1653 assert_clean_int(ckoff, rax);
1655 #ifdef ASSERT
1656 BLOCK_COMMENT("assert consistent ckoff/ckval");
1657 // The ckoff and ckval must be mutually consistent,
1658 // even though caller generates both.
1659 { Label L;
1660 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
1661 Klass::super_check_offset_offset_in_bytes());
1662 __ cmpl(ckoff, Address(ckval, sco_offset));
1663 __ jcc(Assembler::equal, L);
1664 __ stop("super_check_offset inconsistent");
1665 __ bind(L);
1666 }
1667 #endif //ASSERT
1669 // Loop-invariant addresses. They are exclusive end pointers.
1670 Address end_from_addr(from, length, TIMES_OOP, 0);
1671 Address end_to_addr(to, length, TIMES_OOP, 0);
1672 // Loop-variant addresses. They assume post-incremented count < 0.
1673 Address from_element_addr(end_from, count, TIMES_OOP, 0);
1674 Address to_element_addr(end_to, count, TIMES_OOP, 0);
1676 gen_write_ref_array_pre_barrier(to, count);
1678 // Copy from low to high addresses, indexed from the end of each array.
1679 __ lea(end_from, end_from_addr);
1680 __ lea(end_to, end_to_addr);
1681 __ movptr(r14_length, length); // save a copy of the length
1682 assert(length == count, ""); // else fix next line:
1683 __ negptr(count); // negate and test the length
1684 __ jcc(Assembler::notZero, L_load_element);
1686 // Empty array: Nothing to do.
1687 __ xorptr(rax, rax); // return 0 on (trivial) success
1688 __ jmp(L_done);
1690 // ======== begin loop ========
1691 // (Loop is rotated; its entry is L_load_element.)
1692 // Loop control:
1693 // for (count = -count; count != 0; count++)
1694 // Base pointers src, dst are biased by 8*(count-1),to last element.
1695 __ align(16);
1697 __ BIND(L_store_element);
1698 __ store_heap_oop(rax_oop, to_element_addr); // store the oop
1699 __ increment(count); // increment the count toward zero
1700 __ jcc(Assembler::zero, L_do_card_marks);
1702 // ======== loop entry is here ========
1703 __ BIND(L_load_element);
1704 __ load_heap_oop(rax_oop, from_element_addr); // load the oop
1705 __ testptr(rax_oop, rax_oop);
1706 __ jcc(Assembler::zero, L_store_element);
1708 __ load_klass(r11_klass, rax_oop);// query the object klass
1709 generate_type_check(r11_klass, ckoff, ckval, L_store_element);
1710 // ======== end loop ========
1712 // It was a real error; we must depend on the caller to finish the job.
1713 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
1714 // Emit GC store barriers for the oops we have copied (r14 + rdx),
1715 // and report their number to the caller.
1716 assert_different_registers(rax, r14_length, count, to, end_to, rcx);
1717 __ lea(end_to, to_element_addr);
1718 gen_write_ref_array_post_barrier(to, end_to, rscratch1);
1719 __ movptr(rax, r14_length); // original oops
1720 __ addptr(rax, count); // K = (original - remaining) oops
1721 __ notptr(rax); // report (-1^K) to caller
1722 __ jmp(L_done);
1724 // Come here on success only.
1725 __ BIND(L_do_card_marks);
1726 __ addptr(end_to, -wordSize); // make an inclusive end pointer
1727 gen_write_ref_array_post_barrier(to, end_to, rscratch1);
1728 __ xorptr(rax, rax); // return 0 on success
1730 // Common exit point (success or failure).
1731 __ BIND(L_done);
1732 __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
1733 __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
1734 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1735 restore_arg_regs();
1736 __ leave(); // required for proper stackwalking of RuntimeStub frame
1737 __ ret(0);
1739 return start;
1740 }
1742 //
1743 // Generate 'unsafe' array copy stub
1744 // Though just as safe as the other stubs, it takes an unscaled
1745 // size_t argument instead of an element count.
1746 //
1747 // Input:
1748 // c_rarg0 - source array address
1749 // c_rarg1 - destination array address
1750 // c_rarg2 - byte count, treated as ssize_t, can be zero
1751 //
1752 // Examines the alignment of the operands and dispatches
1753 // to a long, int, short, or byte copy loop.
1754 //
1755 address generate_unsafe_copy(const char *name) {
1757 Label L_long_aligned, L_int_aligned, L_short_aligned;
1759 // Input registers (before setup_arg_regs)
1760 const Register from = c_rarg0; // source array address
1761 const Register to = c_rarg1; // destination array address
1762 const Register size = c_rarg2; // byte count (size_t)
1764 // Register used as a temp
1765 const Register bits = rax; // test copy of low bits
1767 __ align(CodeEntryAlignment);
1768 StubCodeMark mark(this, "StubRoutines", name);
1769 address start = __ pc();
1771 __ enter(); // required for proper stackwalking of RuntimeStub frame
1773 // bump this on entry, not on exit:
1774 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
1776 __ mov(bits, from);
1777 __ orptr(bits, to);
1778 __ orptr(bits, size);
1780 __ testb(bits, BytesPerLong-1);
1781 __ jccb(Assembler::zero, L_long_aligned);
1783 __ testb(bits, BytesPerInt-1);
1784 __ jccb(Assembler::zero, L_int_aligned);
1786 __ testb(bits, BytesPerShort-1);
1787 __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
1789 __ BIND(L_short_aligned);
1790 __ shrptr(size, LogBytesPerShort); // size => short_count
1791 __ jump(RuntimeAddress(short_copy_entry));
1793 __ BIND(L_int_aligned);
1794 __ shrptr(size, LogBytesPerInt); // size => int_count
1795 __ jump(RuntimeAddress(int_copy_entry));
1797 __ BIND(L_long_aligned);
1798 __ shrptr(size, LogBytesPerLong); // size => qword_count
1799 __ jump(RuntimeAddress(long_copy_entry));
1801 return start;
1802 }
1804 // Perform range checks on the proposed arraycopy.
1805 // Kills temp, but nothing else.
1806 // Also, clean the sign bits of src_pos and dst_pos.
1807 void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
1808 Register src_pos, // source position (c_rarg1)
1809 Register dst, // destination array oo (c_rarg2)
1810 Register dst_pos, // destination position (c_rarg3)
1811 Register length,
1812 Register temp,
1813 Label& L_failed) {
1814 BLOCK_COMMENT("arraycopy_range_checks:");
1816 // if (src_pos + length > arrayOop(src)->length()) FAIL;
1817 __ movl(temp, length);
1818 __ addl(temp, src_pos); // src_pos + length
1819 __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
1820 __ jcc(Assembler::above, L_failed);
1822 // if (dst_pos + length > arrayOop(dst)->length()) FAIL;
1823 __ movl(temp, length);
1824 __ addl(temp, dst_pos); // dst_pos + length
1825 __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
1826 __ jcc(Assembler::above, L_failed);
1828 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
1829 // Move with sign extension can be used since they are positive.
1830 __ movslq(src_pos, src_pos);
1831 __ movslq(dst_pos, dst_pos);
1833 BLOCK_COMMENT("arraycopy_range_checks done");
1834 }
1836 //
1837 // Generate generic array copy stubs
1838 //
1839 // Input:
1840 // c_rarg0 - src oop
1841 // c_rarg1 - src_pos (32-bits)
1842 // c_rarg2 - dst oop
1843 // c_rarg3 - dst_pos (32-bits)
1844 // not Win64
1845 // c_rarg4 - element count (32-bits)
1846 // Win64
1847 // rsp+40 - element count (32-bits)
1848 //
1849 // Output:
1850 // rax == 0 - success
1851 // rax == -1^K - failure, where K is partial transfer count
1852 //
1853 address generate_generic_copy(const char *name) {
1855 Label L_failed, L_failed_0, L_objArray;
1856 Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
1858 // Input registers
1859 const Register src = c_rarg0; // source array oop
1860 const Register src_pos = c_rarg1; // source position
1861 const Register dst = c_rarg2; // destination array oop
1862 const Register dst_pos = c_rarg3; // destination position
1863 // elements count is on stack on Win64
1864 #ifdef _WIN64
1865 #define C_RARG4 Address(rsp, 6 * wordSize)
1866 #else
1867 #define C_RARG4 c_rarg4
1868 #endif
1870 { int modulus = CodeEntryAlignment;
1871 int target = modulus - 5; // 5 = sizeof jmp(L_failed)
1872 int advance = target - (__ offset() % modulus);
1873 if (advance < 0) advance += modulus;
1874 if (advance > 0) __ nop(advance);
1875 }
1876 StubCodeMark mark(this, "StubRoutines", name);
1878 // Short-hop target to L_failed. Makes for denser prologue code.
1879 __ BIND(L_failed_0);
1880 __ jmp(L_failed);
1881 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
1883 __ align(CodeEntryAlignment);
1884 address start = __ pc();
1886 __ enter(); // required for proper stackwalking of RuntimeStub frame
1888 // bump this on entry, not on exit:
1889 inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
1891 //-----------------------------------------------------------------------
1892 // Assembler stub will be used for this call to arraycopy
1893 // if the following conditions are met:
1894 //
1895 // (1) src and dst must not be null.
1896 // (2) src_pos must not be negative.
1897 // (3) dst_pos must not be negative.
1898 // (4) length must not be negative.
1899 // (5) src klass and dst klass should be the same and not NULL.
1900 // (6) src and dst should be arrays.
1901 // (7) src_pos + length must not exceed length of src.
1902 // (8) dst_pos + length must not exceed length of dst.
1903 //
1905 // if (src == NULL) return -1;
1906 __ testptr(src, src); // src oop
1907 size_t j1off = __ offset();
1908 __ jccb(Assembler::zero, L_failed_0);
1910 // if (src_pos < 0) return -1;
1911 __ testl(src_pos, src_pos); // src_pos (32-bits)
1912 __ jccb(Assembler::negative, L_failed_0);
1914 // if (dst == NULL) return -1;
1915 __ testptr(dst, dst); // dst oop
1916 __ jccb(Assembler::zero, L_failed_0);
1918 // if (dst_pos < 0) return -1;
1919 __ testl(dst_pos, dst_pos); // dst_pos (32-bits)
1920 size_t j4off = __ offset();
1921 __ jccb(Assembler::negative, L_failed_0);
1923 // The first four tests are very dense code,
1924 // but not quite dense enough to put four
1925 // jumps in a 16-byte instruction fetch buffer.
1926 // That's good, because some branch predicters
1927 // do not like jumps so close together.
1928 // Make sure of this.
1929 guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
1931 // registers used as temp
1932 const Register r11_length = r11; // elements count to copy
1933 const Register r10_src_klass = r10; // array klass
1934 const Register r9_dst_klass = r9; // dest array klass
1936 // if (length < 0) return -1;
1937 __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value)
1938 __ testl(r11_length, r11_length);
1939 __ jccb(Assembler::negative, L_failed_0);
1941 __ load_klass(r10_src_klass, src);
1942 #ifdef ASSERT
1943 // assert(src->klass() != NULL);
1944 BLOCK_COMMENT("assert klasses not null");
1945 { Label L1, L2;
1946 __ testptr(r10_src_klass, r10_src_klass);
1947 __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL
1948 __ bind(L1);
1949 __ stop("broken null klass");
1950 __ bind(L2);
1951 __ load_klass(r9_dst_klass, dst);
1952 __ cmpq(r9_dst_klass, 0);
1953 __ jcc(Assembler::equal, L1); // this would be broken also
1954 BLOCK_COMMENT("assert done");
1955 }
1956 #endif
1958 // Load layout helper (32-bits)
1959 //
1960 // |array_tag| | header_size | element_type | |log2_element_size|
1961 // 32 30 24 16 8 2 0
1962 //
1963 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
1964 //
1966 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
1967 Klass::layout_helper_offset_in_bytes();
1969 const Register rax_lh = rax; // layout helper
1971 __ movl(rax_lh, Address(r10_src_klass, lh_offset));
1973 // Handle objArrays completely differently...
1974 jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
1975 __ cmpl(rax_lh, objArray_lh);
1976 __ jcc(Assembler::equal, L_objArray);
1978 // if (src->klass() != dst->klass()) return -1;
1979 __ load_klass(r9_dst_klass, dst);
1980 __ cmpq(r10_src_klass, r9_dst_klass);
1981 __ jcc(Assembler::notEqual, L_failed);
1983 // if (!src->is_Array()) return -1;
1984 __ cmpl(rax_lh, Klass::_lh_neutral_value);
1985 __ jcc(Assembler::greaterEqual, L_failed);
1987 // At this point, it is known to be a typeArray (array_tag 0x3).
1988 #ifdef ASSERT
1989 { Label L;
1990 __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
1991 __ jcc(Assembler::greaterEqual, L);
1992 __ stop("must be a primitive array");
1993 __ bind(L);
1994 }
1995 #endif
1997 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
1998 r10, L_failed);
2000 // typeArrayKlass
2001 //
2002 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
2003 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
2004 //
2006 const Register r10_offset = r10; // array offset
2007 const Register rax_elsize = rax_lh; // element size
2009 __ movl(r10_offset, rax_lh);
2010 __ shrl(r10_offset, Klass::_lh_header_size_shift);
2011 __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset
2012 __ addptr(src, r10_offset); // src array offset
2013 __ addptr(dst, r10_offset); // dst array offset
2014 BLOCK_COMMENT("choose copy loop based on element size");
2015 __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
2017 // next registers should be set before the jump to corresponding stub
2018 const Register from = c_rarg0; // source array address
2019 const Register to = c_rarg1; // destination array address
2020 const Register count = c_rarg2; // elements count
2022 // 'from', 'to', 'count' registers should be set in such order
2023 // since they are the same as 'src', 'src_pos', 'dst'.
2025 __ BIND(L_copy_bytes);
2026 __ cmpl(rax_elsize, 0);
2027 __ jccb(Assembler::notEqual, L_copy_shorts);
2028 __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
2029 __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr
2030 __ movl2ptr(count, r11_length); // length
2031 __ jump(RuntimeAddress(byte_copy_entry));
2033 __ BIND(L_copy_shorts);
2034 __ cmpl(rax_elsize, LogBytesPerShort);
2035 __ jccb(Assembler::notEqual, L_copy_ints);
2036 __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
2037 __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr
2038 __ movl2ptr(count, r11_length); // length
2039 __ jump(RuntimeAddress(short_copy_entry));
2041 __ BIND(L_copy_ints);
2042 __ cmpl(rax_elsize, LogBytesPerInt);
2043 __ jccb(Assembler::notEqual, L_copy_longs);
2044 __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
2045 __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr
2046 __ movl2ptr(count, r11_length); // length
2047 __ jump(RuntimeAddress(int_copy_entry));
2049 __ BIND(L_copy_longs);
2050 #ifdef ASSERT
2051 { Label L;
2052 __ cmpl(rax_elsize, LogBytesPerLong);
2053 __ jcc(Assembler::equal, L);
2054 __ stop("must be long copy, but elsize is wrong");
2055 __ bind(L);
2056 }
2057 #endif
2058 __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
2059 __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr
2060 __ movl2ptr(count, r11_length); // length
2061 __ jump(RuntimeAddress(long_copy_entry));
2063 // objArrayKlass
2064 __ BIND(L_objArray);
2065 // live at this point: r10_src_klass, src[_pos], dst[_pos]
2067 Label L_plain_copy, L_checkcast_copy;
2068 // test array classes for subtyping
2069 __ load_klass(r9_dst_klass, dst);
2070 __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
2071 __ jcc(Assembler::notEqual, L_checkcast_copy);
2073 // Identically typed arrays can be copied without element-wise checks.
2074 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
2075 r10, L_failed);
2077 __ lea(from, Address(src, src_pos, TIMES_OOP,
2078 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
2079 __ lea(to, Address(dst, dst_pos, TIMES_OOP,
2080 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
2081 __ movl2ptr(count, r11_length); // length
2082 __ BIND(L_plain_copy);
2083 __ jump(RuntimeAddress(oop_copy_entry));
2085 __ BIND(L_checkcast_copy);
2086 // live at this point: r10_src_klass, !r11_length
2087 {
2088 // assert(r11_length == C_RARG4); // will reload from here
2089 Register r11_dst_klass = r11;
2090 __ load_klass(r11_dst_klass, dst);
2092 // Before looking at dst.length, make sure dst is also an objArray.
2093 __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
2094 __ jcc(Assembler::notEqual, L_failed);
2096 // It is safe to examine both src.length and dst.length.
2097 #ifndef _WIN64
2098 arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4,
2099 rax, L_failed);
2100 #else
2101 __ movl(r11_length, C_RARG4); // reload
2102 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
2103 rax, L_failed);
2104 __ load_klass(r11_dst_klass, dst); // reload
2105 #endif
2107 // Marshal the base address arguments now, freeing registers.
2108 __ lea(from, Address(src, src_pos, TIMES_OOP,
2109 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
2110 __ lea(to, Address(dst, dst_pos, TIMES_OOP,
2111 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
2112 __ movl(count, C_RARG4); // length (reloaded)
2113 Register sco_temp = c_rarg3; // this register is free now
2114 assert_different_registers(from, to, count, sco_temp,
2115 r11_dst_klass, r10_src_klass);
2116 assert_clean_int(count, sco_temp);
2118 // Generate the type check.
2119 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2120 Klass::super_check_offset_offset_in_bytes());
2121 __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
2122 assert_clean_int(sco_temp, rax);
2123 generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
2125 // Fetch destination element klass from the objArrayKlass header.
2126 int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
2127 objArrayKlass::element_klass_offset_in_bytes());
2128 __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
2129 __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
2130 assert_clean_int(sco_temp, rax);
2132 // the checkcast_copy loop needs two extra arguments:
2133 assert(c_rarg3 == sco_temp, "#3 already in place");
2134 __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass
2135 __ jump(RuntimeAddress(checkcast_copy_entry));
2136 }
2138 __ BIND(L_failed);
2139 __ xorptr(rax, rax);
2140 __ notptr(rax); // return -1
2141 __ leave(); // required for proper stackwalking of RuntimeStub frame
2142 __ ret(0);
2144 return start;
2145 }
2147 #undef length_arg
2148 #endif
2150 //FIXME
2151 address generate_disjoint_long_copy(bool aligned, const char *name) {
2152 Label l_1, l_2;
2153 StubCodeMark mark(this, "StubRoutines", name);
2154 __ align(CodeEntryAlignment);
2155 address start = __ pc();
2157 // __ movl(ecx, Address(esp, 4+8)); // count
2158 // __ movl(eax, Address(esp, 4+0)); // from
2159 // __ movl(edx, Address(esp, 4+4)); // to
2160 __ move(T1, A2);
2161 __ move(T3, A0);
2162 __ move(T0, A1);
2163 __ push(T3);
2164 __ push(T0);
2165 __ push(T1);
2166 //__ subl(edx, eax);
2167 //__ jmp(l_2);
2168 __ b(l_2);
2169 __ delayed()->nop();
2170 __ align(16);
2171 __ bind(l_1);
2172 // if (VM_Version::supports_mmx()) {
2173 // __ movq(mmx0, Address(eax));
2174 // __ movq(Address(eax, edx, Address::times_1), mmx0);
2175 // } else {
2176 // __ fild_d(Address(eax));
2177 __ ld(AT, T3, 0);
2178 // __ fistp_d(Address(eax, edx, Address::times_1));
2179 __ sd (AT, T0, 0);
2180 // }
2181 // __ addl(eax, 8);
2182 __ addi(T3, T3, 8);
2183 __ addi(T0, T0, 8);
2184 __ bind(l_2);
2185 // __ decl(ecx);
2186 __ addi(T1, T1, -1);
2187 // __ jcc(Assembler::greaterEqual, l_1);
2188 __ bgez(T1, l_1);
2189 __ delayed()->nop();
2190 // if (VM_Version::supports_mmx()) {
2191 // __ emms();
2192 // }
2193 // __ ret(0);
2194 __ pop(T1);
2195 __ pop(T0);
2196 __ pop(T3);
2197 __ jr(RA);
2198 __ delayed()->nop();
2199 return start;
2200 }
2203 address generate_conjoint_long_copy(bool aligned, const char *name) {
2204 Label l_1, l_2;
2205 StubCodeMark mark(this, "StubRoutines", name);
2206 __ align(CodeEntryAlignment);
2207 address start = __ pc();
2208 address nooverlap_target = aligned ?
2209 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
2210 StubRoutines::jlong_disjoint_arraycopy();
2211 array_overlap_test(nooverlap_target, 3);
2213 __ push(T3);
2214 __ push(T0);
2215 __ push(T1);
2217 /* __ movl(ecx, Address(esp, 4+8)); // count
2218 __ movl(eax, Address(esp, 4+0)); // from
2219 __ movl(edx, Address(esp, 4+4)); // to
2220 __ jmp(l_2);
2222 */
2223 __ move(T1, A2);
2224 __ move(T3, A0);
2225 __ move(T0, A1);
2226 __ sll(AT, T1, Address::times_8);
2227 __ add(AT, T3, AT);
2228 __ lea(T3 , Address(AT, -8));
2229 __ sll(AT, T1, Address::times_8);
2230 __ add(AT, T0, AT);
2231 __ lea(T0 , Address(AT, -8));
2235 __ b(l_2);
2236 __ delayed()->nop();
2237 __ align(16);
2238 __ bind(l_1);
2239 /* if (VM_Version::supports_mmx()) {
2240 __ movq(mmx0, Address(eax, ecx, Address::times_8));
2241 __ movq(Address(edx, ecx,Address::times_8), mmx0);
2242 } else {
2243 __ fild_d(Address(eax, ecx, Address::times_8));
2244 __ fistp_d(Address(edx, ecx,Address::times_8));
2245 }
2246 */
2247 __ ld(AT, T3, 0);
2248 __ sd (AT, T0, 0);
2249 __ addi(T3, T3, -8);
2250 __ addi(T0, T0,-8);
2251 __ bind(l_2);
2252 // __ decl(ecx);
2253 __ addi(T1, T1, -1);
2254 //__ jcc(Assembler::greaterEqual, l_1);
2255 __ bgez(T1, l_1);
2256 __ delayed()->nop();
2257 // if (VM_Version::supports_mmx()) {
2258 // __ emms();
2259 // }
2260 // __ ret(0);
2261 __ pop(T1);
2262 __ pop(T0);
2263 __ pop(T3);
2264 __ jr(RA);
2265 __ delayed()->nop();
2266 return start;
2267 }
2269 void generate_arraycopy_stubs() {
2270 if (UseCompressedOops) {
2271 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
2272 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
2273 } else {
2274 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
2275 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
2276 }
2278 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
2279 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
2280 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
2281 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
2282 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
2284 // if (VM_Version::supports_mmx())
2285 //if (false)
2286 // StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy");
2287 // else
2288 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
2289 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy");
2290 //StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy");
2291 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
2293 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
2294 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
2295 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
2296 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
2298 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
2299 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
2300 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy");
2301 //StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy");
2302 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy");
2304 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
2305 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
2306 }
2308 //Wang: add a function to implement SafeFetch32 and SafeFetchN
2309 void generate_safefetch(const char* name, int size, address* entry,
2310 address* fault_pc, address* continuation_pc) {
2311 // safefetch signatures:
2312 // int SafeFetch32(int* adr, int errValue);
2313 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
2314 //
2315 // arguments:
2316 // A0 = adr
2317 // A1 = errValue
2318 //
2319 // result:
2320 // PPC_RET = *adr or errValue
2322 StubCodeMark mark(this, "StubRoutines", name);
2324 // Entry point, pc or function descriptor.
2325 *entry = __ pc();
2327 // Load *adr into A1, may fault.
2328 *fault_pc = __ pc();
2329 switch (size) {
2330 case 4:
2331 // int32_t
2332 __ lw(A1, A0, 0);
2333 break;
2334 case 8:
2335 // int64_t
2336 __ ld(A1, A0, 0);
2337 break;
2338 default:
2339 ShouldNotReachHere();
2340 }
2342 // return errValue or *adr
2343 *continuation_pc = __ pc();
2344 __ addu(V0,A1,R0);
2345 __ jr(RA);
2346 __ delayed()->nop();
2347 }
2350 #undef __
2351 #define __ masm->
2353 // Continuation point for throwing of implicit exceptions that are
2354 // not handled in the current activation. Fabricates an exception
2355 // oop and initiates normal exception dispatching in this
2356 // frame. Since we need to preserve callee-saved values (currently
2357 // only for C2, but done for C1 as well) we need a callee-saved oop
2358 // map and therefore have to make these stubs into RuntimeStubs
2359 // rather than BufferBlobs. If the compiler needs all registers to
2360 // be preserved between the fault point and the exception handler
2361 // then it must assume responsibility for that in
2362 // AbstractCompiler::continuation_for_implicit_null_exception or
2363 // continuation_for_implicit_division_by_zero_exception. All other
2364 // implicit exceptions (e.g., NullPointerException or
2365 // AbstractMethodError on entry) are either at call sites or
2366 // otherwise assume that stack unwinding will be initiated, so
2367 // caller saved registers were assumed volatile in the compiler.
2368 address generate_throw_exception(const char* name,
2369 address runtime_entry,
2370 bool restore_saved_exception_pc) {
2371 // Information about frame layout at time of blocking runtime call.
2372 // Note that we only have to preserve callee-saved registers since
2373 // the compilers are responsible for supplying a continuation point
2374 // if they expect all registers to be preserved.
2375 //#define aoqi_test
2376 #ifdef aoqi_test
2377 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2378 #endif
2379 enum layout {
2380 thread_off, // last_java_sp
2381 S7_off, // callee saved register sp + 1
2382 S6_off, // callee saved register sp + 2
2383 S5_off, // callee saved register sp + 3
2384 S4_off, // callee saved register sp + 4
2385 S3_off, // callee saved register sp + 5
2386 S2_off, // callee saved register sp + 6
2387 S1_off, // callee saved register sp + 7
2388 S0_off, // callee saved register sp + 8
2389 FP_off,
2390 ret_address,
2391 framesize
2392 };
2394 int insts_size = 2048;
2395 int locs_size = 32;
2397 // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
2398 // NULL, NULL, NULL, false, NULL, name, false);
2399 CodeBuffer code (name , insts_size, locs_size);
2400 #ifdef aoqi_test
2401 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2402 #endif
2403 OopMapSet* oop_maps = new OopMapSet();
2404 #ifdef aoqi_test
2405 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2406 #endif
2407 MacroAssembler* masm = new MacroAssembler(&code);
2408 #ifdef aoqi_test
2409 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2410 #endif
2412 address start = __ pc();
2413 //__ stop("generate_throw_exception");
2414 /*
2415 __ move(AT, (int)&jerome1 );
2416 __ sw(SP, AT, 0);
2417 __ move(AT, (int)&jerome2 );
2418 __ sw(FP, AT, 0);
2419 __ move(AT, (int)&jerome3 );
2420 __ sw(RA, AT, 0);
2421 __ move(AT, (int)&jerome4 );
2422 __ sw(R0, AT, 0);
2423 __ move(AT, (int)&jerome5 );
2424 __ sw(R0, AT, 0);
2425 __ move(AT, (int)&jerome6 );
2426 __ sw(R0, AT, 0);
2427 __ move(AT, (int)&jerome7 );
2428 __ sw(R0, AT, 0);
2429 __ move(AT, (int)&jerome10 );
2430 __ sw(R0, AT, 0);
2432 __ pushad();
2434 //__ enter();
2435 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
2436 relocInfo::runtime_call_type);
2437 __ delayed()->nop();
2439 //__ leave();
2440 __ popad();
2442 */
2444 // This is an inlined and slightly modified version of call_VM
2445 // which has the ability to fetch the return PC out of
2446 // thread-local storage and also sets up last_Java_sp slightly
2447 // differently than the real call_VM
2448 #ifndef OPT_THREAD
2449 Register java_thread = TREG;
2450 __ get_thread(java_thread);
2451 #else
2452 Register java_thread = TREG;
2453 #endif
2454 #ifdef aoqi_test
2455 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2456 #endif
2457 if (restore_saved_exception_pc) {
2458 __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
2459 }
2461 __ enter(); // required for proper stackwalking of RuntimeStub frame
2463 __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
2464 __ sd(S0, SP, S0_off * wordSize);
2465 __ sd(S1, SP, S1_off * wordSize);
2466 __ sd(S2, SP, S2_off * wordSize);
2467 __ sd(S3, SP, S3_off * wordSize);
2468 __ sd(S4, SP, S4_off * wordSize);
2469 __ sd(S5, SP, S5_off * wordSize);
2470 __ sd(S6, SP, S6_off * wordSize);
2471 __ sd(S7, SP, S7_off * wordSize);
2473 int frame_complete = __ pc() - start;
2474 // push java thread (becomes first argument of C function)
2475 __ sd(java_thread, SP, thread_off * wordSize);
2476 if (java_thread!=A0)
2477 __ move(A0, java_thread);
2479 // Set up last_Java_sp and last_Java_fp
2480 __ set_last_Java_frame(java_thread, SP, FP, NULL);
2481 __ relocate(relocInfo::internal_pc_type);
2482 {
2483 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
2484 __ li48(AT, save_pc);
2485 }
2486 __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
2488 // Call runtime
2489 __ call(runtime_entry);
2490 __ delayed()->nop();
2491 // Generate oop map
2492 OopMap* map = new OopMap(framesize, 0);
2493 oop_maps->add_gc_map(__ offset(), map);
2495 // restore the thread (cannot use the pushed argument since arguments
2496 // may be overwritten by C code generated by an optimizing compiler);
2497 // however can use the register value directly if it is callee saved.
2498 #ifndef OPT_THREAD
2499 __ get_thread(java_thread);
2500 #endif
2502 __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
2503 // __ reset_last_Java_frame(java_thread, true);
2504 __ reset_last_Java_frame(java_thread, true, true);
2506 // Restore callee save registers. This must be done after resetting the Java frame
2507 __ ld(S0, SP, S0_off * wordSize);
2508 __ ld(S1, SP, S1_off * wordSize);
2509 __ ld(S2, SP, S2_off * wordSize);
2510 __ ld(S3, SP, S3_off * wordSize);
2511 __ ld(S4, SP, S4_off * wordSize);
2512 __ ld(S5, SP, S5_off * wordSize);
2513 __ ld(S6, SP, S6_off * wordSize);
2514 __ ld(S7, SP, S7_off * wordSize);
2516 // discard arguments
2517 __ addi(SP, SP, (framesize-2) * wordSize); // epilog
2518 // __ leave(); // required for proper stackwalking of RuntimeStub frame
2519 __ addi(SP, FP, wordSize);
2520 __ ld(FP, SP, -1*wordSize);
2521 // check for pending exceptions
2522 #ifdef ASSERT
2523 Label L;
2524 __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
2525 __ bne(AT, R0, L);
2526 __ delayed()->nop();
2527 __ should_not_reach_here();
2528 __ bind(L);
2529 #endif //ASSERT
2530 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
2531 __ delayed()->nop();
2532 #ifdef aoqi_test
2533 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2534 #endif
2535 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete,
2536 framesize, oop_maps, false);
2537 #ifdef aoqi_test
2538 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2539 #endif
2540 return stub->entry_point();
2541 }
2543 // Initialization
2544 void generate_initial() {
2545 /*
2546 // Generates all stubs and initializes the entry points
2548 // This platform-specific stub is needed by generate_call_stub()
2549 StubRoutines::mips::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80);
2551 // entry points that exist in all platforms Note: This is code
2552 // that could be shared among different platforms - however the
2553 // benefit seems to be smaller than the disadvantage of having a
2554 // much more complicated generator structure. See also comment in
2555 // stubRoutines.hpp.
2557 StubRoutines::_forward_exception_entry = generate_forward_exception();
2559 StubRoutines::_call_stub_entry =
2560 generate_call_stub(StubRoutines::_call_stub_return_address);
2562 // is referenced by megamorphic call
2563 StubRoutines::_catch_exception_entry = generate_catch_exception();
2565 // atomic calls
2566 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
2567 StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr();
2568 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
2569 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
2570 StubRoutines::_atomic_add_entry = generate_atomic_add();
2571 StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr();
2572 StubRoutines::_fence_entry = generate_orderaccess_fence();
2574 StubRoutines::_handler_for_unsafe_access_entry =
2575 generate_handler_for_unsafe_access();
2577 // platform dependent
2578 StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp();
2580 StubRoutines::mips::_verify_mxcsr_entry = generate_verify_mxcsr();
2581 */
2582 // Generates all stubs and initializes the entry points
2584 //-------------------------------------------------------------
2585 //-----------------------------------------------------------
2586 // entry points that exist in all platforms
2587 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
2588 // than the disadvantage of having a much more complicated generator structure.
2589 // See also comment in stubRoutines.hpp.
2590 StubRoutines::_forward_exception_entry = generate_forward_exception();
2591 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
2592 // is referenced by megamorphic call
2593 StubRoutines::_catch_exception_entry = generate_catch_exception();
2595 StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
2597 // platform dependent
2598 StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
2599 }
2601 void generate_all() {
2602 #ifdef aoqi_test
2603 tty->print_cr("%s:%d", __func__, __LINE__);
2604 #endif
2605 // Generates all stubs and initializes the entry points
2607 // These entry points require SharedInfo::stack0 to be set up in
2608 // non-core builds and need to be relocatable, so they each
2609 // fabricate a RuntimeStub internally.
2610 /*
2611 StubRoutines::_throw_AbstractMethodError_entry =
2612 generate_throw_exception("AbstractMethodError throw_exception",
2613 CAST_FROM_FN_PTR(address,
2614 SharedRuntime::
2615 throw_AbstractMethodError),
2616 false);
2618 StubRoutines::_throw_IncompatibleClassChangeError_entry =
2619 generate_throw_exception("IncompatibleClassChangeError throw_exception",
2620 CAST_FROM_FN_PTR(address,
2621 SharedRuntime::
2622 throw_IncompatibleClassChangeError),
2623 false);
2625 StubRoutines::_throw_ArithmeticException_entry =
2626 generate_throw_exception("ArithmeticException throw_exception",
2627 CAST_FROM_FN_PTR(address,
2628 SharedRuntime::
2629 throw_ArithmeticException),
2630 true);
2632 StubRoutines::_throw_NullPointerException_entry =
2633 generate_throw_exception("NullPointerException throw_exception",
2634 CAST_FROM_FN_PTR(address,
2635 SharedRuntime::
2636 throw_NullPointerException),
2637 true);
2639 StubRoutines::_throw_NullPointerException_at_call_entry =
2640 generate_throw_exception("NullPointerException at call throw_exception",
2641 CAST_FROM_FN_PTR(address,
2642 SharedRuntime::
2643 throw_NullPointerException_at_call),
2644 false);
2646 StubRoutines::_throw_StackOverflowError_entry =
2647 generate_throw_exception("StackOverflowError throw_exception",
2648 CAST_FROM_FN_PTR(address,
2649 SharedRuntime::
2650 throw_StackOverflowError),
2651 false);
2653 // entry points that are platform specific
2654 StubRoutines::mips::_f2i_fixup = generate_f2i_fixup();
2655 StubRoutines::mips::_f2l_fixup = generate_f2l_fixup();
2656 StubRoutines::mips::_d2i_fixup = generate_d2i_fixup();
2657 StubRoutines::mips::_d2l_fixup = generate_d2l_fixup();
2659 StubRoutines::mips::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
2660 StubRoutines::mips::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
2661 StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
2662 StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
2664 // support for verify_oop (must happen after universe_init)
2665 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
2667 // arraycopy stubs used by compilers
2668 generate_arraycopy_stubs();
2669 */
2670 #ifdef aoqi_test
2671 tty->print_cr("%s:%d", __func__, __LINE__);
2672 #endif
2673 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
2674 #ifdef aoqi_test
2675 tty->print_cr("%s:%d", __func__, __LINE__);
2676 #endif
2677 // StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
2678 #ifdef aoqi_test
2679 tty->print_cr("%s:%d", __func__, __LINE__);
2680 #endif
2681 // StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
2682 #ifdef aoqi_test
2683 tty->print_cr("%s:%d", __func__, __LINE__);
2684 #endif
2685 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2686 #ifdef aoqi_test
2687 tty->print_cr("%s:%d", __func__, __LINE__);
2688 #endif
2689 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
2690 #ifdef aoqi_test
2691 tty->print_cr("%s:%d", __func__, __LINE__);
2692 #endif
2694 //------------------------------------------------------
2695 //------------------------------------------------------------------
2696 // entry points that are platform specific
2698 // support for verify_oop (must happen after universe_init)
2699 #ifdef aoqi_test
2700 tty->print_cr("%s:%d", __func__, __LINE__);
2701 #endif
2702 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
2703 #ifdef aoqi_test
2704 tty->print_cr("%s:%d", __func__, __LINE__);
2705 #endif
2706 #ifndef CORE
2707 // arraycopy stubs used by compilers
2708 generate_arraycopy_stubs();
2709 #ifdef aoqi_test
2710 tty->print_cr("%s:%d", __func__, __LINE__);
2711 #endif
2712 #endif
2714 // Safefetch stubs.
2715 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
2716 &StubRoutines::_safefetch32_fault_pc,
2717 &StubRoutines::_safefetch32_continuation_pc);
2718 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
2719 &StubRoutines::_safefetchN_fault_pc,
2720 &StubRoutines::_safefetchN_continuation_pc);
2721 }
2723 public:
2724 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2725 if (all) {
2726 generate_all();
2727 } else {
2728 generate_initial();
2729 }
2730 }
2731 }; // end class declaration
2732 /*
2733 address StubGenerator::disjoint_byte_copy_entry = NULL;
2734 address StubGenerator::disjoint_short_copy_entry = NULL;
2735 address StubGenerator::disjoint_int_copy_entry = NULL;
2736 address StubGenerator::disjoint_long_copy_entry = NULL;
2737 address StubGenerator::disjoint_oop_copy_entry = NULL;
2739 address StubGenerator::byte_copy_entry = NULL;
2740 address StubGenerator::short_copy_entry = NULL;
2741 address StubGenerator::int_copy_entry = NULL;
2742 address StubGenerator::long_copy_entry = NULL;
2743 address StubGenerator::oop_copy_entry = NULL;
2745 address StubGenerator::checkcast_copy_entry = NULL;
2746 */
2747 void StubGenerator_generate(CodeBuffer* code, bool all) {
2748 StubGenerator g(code, all);
2749 }