Tue, 12 Jun 2018 13:58:17 +0800
#7157 Fix all forgot saying delayed() when filling delay slot issues
Summary: enable check_delay and guarantee delay_state is at_delay_slot when filling delay slot
Reviewed-by: aoqi
1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_mips.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "runtime/thread.inline.hpp"
42 #include "utilities/top.hpp"
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #endif
47 // Declaration and definition of StubGenerator (no .hpp file).
48 // For a more detailed description of the stub routine structure
49 // see the comment in stubRoutines.hpp
51 #define __ _masm->
52 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
53 //#define a__ ((Assembler*)_masm)->
55 //#ifdef PRODUCT
56 //#define BLOCK_COMMENT(str) /* nothing */
57 //#else
58 //#define BLOCK_COMMENT(str) __ block_comment(str)
59 //#endif
61 //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
62 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
64 // Stub Code definitions
66 static address handle_unsafe_access() {
67 JavaThread* thread = JavaThread::current();
68 address pc = thread->saved_exception_pc();
69 // pc is the instruction which we must emulate
70 // doing a no-op is fine: return garbage from the load
71 // therefore, compute npc
72 address npc = (address)((unsigned long)pc + sizeof(unsigned long));
74 // request an async exception
75 thread->set_pending_unsafe_access_error();
77 // return address of next instruction to execute
78 return npc;
79 }
81 class StubGenerator: public StubCodeGenerator {
82 private:
84 // ABI mips n64
85 // This fig is not MIPS ABI. It is call Java from C ABI.
86 // Call stubs are used to call Java from C
87 //
88 // [ return_from_Java ]
89 // [ argument word n-1 ] <--- sp
90 // ...
91 // [ argument word 0 ]
92 // ...
93 //-10 [ S6 ]
94 // -9 [ S5 ]
95 // -8 [ S4 ]
96 // -7 [ S3 ]
97 // -6 [ S0 ]
98 // -5 [ TSR(S2) ]
99 // -4 [ LVP(S7) ]
100 // -3 [ BCP(S1) ]
101 // -2 [ saved fp ] <--- fp_after_call
102 // -1 [ return address ]
103 // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
104 // 1 [ result ] <--- a1
105 // 2 [ result_type ] <--- a2
106 // 3 [ method ] <--- a3
107 // 4 [ entry_point ] <--- a4
108 // 5 [ parameters ] <--- a5
109 // 6 [ parameter_size ] <--- a6
110 // 7 [ thread ] <--- a7
112 //
113 // _LP64: n64 does not save paras in sp.
114 //
115 // [ return_from_Java ]
116 // [ argument word n-1 ] <--- sp
117 // ...
118 // [ argument word 0 ]
119 // ...
120 //-14 [ thread ]
121 //-13 [ result_type ] <--- a2
122 //-12 [ result ] <--- a1
123 //-11 [ ptr. to call wrapper ] <--- a0
124 //-10 [ S6 ]
125 // -9 [ S5 ]
126 // -8 [ S4 ]
127 // -7 [ S3 ]
128 // -6 [ S0 ]
129 // -5 [ TSR(S2) ]
130 // -4 [ LVP(S7) ]
131 // -3 [ BCP(S1) ]
132 // -2 [ saved fp ] <--- fp_after_call
133 // -1 [ return address ]
134 // 0 [ ] <--- old sp
135 /*
136 * 2014/01/16 Fu: Find a right place in the call_stub for GP.
137 * GP will point to the starting point of Interpreter::dispatch_table(itos).
138 * It should be saved/restored before/after Java calls.
139 *
140 */
141 enum call_stub_layout {
142 RA_off = -1,
143 FP_off = -2,
144 BCP_off = -3,
145 LVP_off = -4,
146 TSR_off = -5,
147 S1_off = -6,
148 S3_off = -7,
149 S4_off = -8,
150 S5_off = -9,
151 S6_off = -10,
152 result_off = -11,
153 result_type_off = -12,
154 thread_off = -13,
155 total_off = thread_off - 3,
156 GP_off = -16,
157 };
159 address generate_call_stub(address& return_address) {
161 StubCodeMark mark(this, "StubRoutines", "call_stub");
162 address start = __ pc();
164 // same as in generate_catch_exception()!
166 // stub code
167 // save ra and fp
168 __ sd(RA, SP, RA_off * wordSize);
169 __ sd(FP, SP, FP_off * wordSize);
170 __ sd(BCP, SP, BCP_off * wordSize);
171 __ sd(LVP, SP, LVP_off * wordSize);
172 __ sd(GP, SP, GP_off * wordSize);
173 __ sd(TSR, SP, TSR_off * wordSize);
174 __ sd(S1, SP, S1_off * wordSize);
175 __ sd(S3, SP, S3_off * wordSize);
176 __ sd(S4, SP, S4_off * wordSize);
177 __ sd(S5, SP, S5_off * wordSize);
178 __ sd(S6, SP, S6_off * wordSize);
181 __ set64(GP, (long)Interpreter::dispatch_table(itos));
183 // I think 14 is the max gap between argument and callee saved register
184 __ daddi(FP, SP, (-2) * wordSize);
185 __ daddi(SP, SP, total_off * wordSize);
186 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
187 __ sd(A1, FP, result_off * wordSize);
188 __ sd(A2, FP, result_type_off * wordSize);
189 __ sd(A7, FP, thread_off * wordSize);
191 #ifdef OPT_THREAD
192 __ move(TREG, A7);
193 #endif
194 //add for compressedoops
195 __ reinit_heapbase();
197 #ifdef ASSERT
198 // make sure we have no pending exceptions
199 {
200 Label L;
201 __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
202 __ beq(AT, R0, L);
203 __ delayed()->nop();
204 /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
205 __ stop("StubRoutines::call_stub: entered with pending exception");
206 __ bind(L);
207 }
208 #endif
210 // pass parameters if any
211 // A5: parameter
212 // A6: parameter_size
213 // T0: parameter_size_tmp(--)
214 // T2: offset(++)
215 // T3: tmp
216 Label parameters_done;
217 // judge if the parameter_size equals 0
218 __ beq(A6, R0, parameters_done);
219 __ delayed()->nop();
220 __ dsll(AT, A6, Interpreter::logStackElementSize);
221 __ dsub(SP, SP, AT);
222 __ move(AT, -StackAlignmentInBytes);
223 __ andr(SP, SP , AT);
224 // Copy Java parameters in reverse order (receiver last)
225 // Note that the argument order is inverted in the process
226 // source is edx[ecx: N-1..0]
227 // dest is esp[ebx: 0..N-1]
228 Label loop;
229 __ move(T0, A6);
230 __ move(T2, R0);
231 __ bind(loop);
233 // get parameter
234 __ dsll(T3, T0, LogBytesPerWord);
235 __ dadd(T3, T3, A5);
236 __ ld(AT, T3, -wordSize);
237 __ dsll(T3, T2, LogBytesPerWord);
238 __ dadd(T3, T3, SP);
239 __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
240 __ daddi(T2, T2, 1);
241 __ daddi(T0, T0, -1);
242 __ bne(T0, R0, loop);
243 __ delayed()->nop();
244 // advance to next parameter
246 // call Java function
247 __ bind(parameters_done);
249 // receiver in V0, methodOop in Rmethod
251 __ move(Rmethod, A3);
252 __ move(Rsender, SP); //set sender sp
253 __ jalr(A4);
254 __ delayed()->nop();
255 return_address = __ pc();
257 Label common_return;
258 __ bind(common_return);
260 // store result depending on type
261 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
262 __ ld(T0, FP, result_off * wordSize); // result --> T0
263 Label is_long, is_float, is_double, exit;
264 __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2
265 __ daddi(T3, T2, (-1) * T_LONG);
266 __ beq(T3, R0, is_long);
267 __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
268 __ beq(T3, R0, is_float);
269 __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
270 __ beq(T3, R0, is_double);
271 __ delayed()->nop();
273 // handle T_INT case
274 __ sd(V0, T0, 0 * wordSize);
275 __ bind(exit);
277 // restore
278 __ daddi(SP, FP, 2 * wordSize );
279 __ ld(RA, SP, RA_off * wordSize);
280 __ ld(FP, SP, FP_off * wordSize);
281 __ ld(BCP, SP, BCP_off * wordSize);
282 __ ld(LVP, SP, LVP_off * wordSize);
283 __ ld(GP, SP, GP_off * wordSize);
284 __ ld(TSR, SP, TSR_off * wordSize);
286 __ ld(S1, SP, S1_off * wordSize);
287 __ ld(S3, SP, S3_off * wordSize);
288 __ ld(S4, SP, S4_off * wordSize);
289 __ ld(S5, SP, S5_off * wordSize);
290 __ ld(S6, SP, S6_off * wordSize);
292 // return
293 __ jr(RA);
294 __ delayed()->nop();
296 // handle return types different from T_INT
297 __ bind(is_long);
298 __ sd(V0, T0, 0 * wordSize);
299 //__ sd(V1, T0, 1 * wordSize);
300 //__ sd(R0, T0, 1 * wordSize);
301 __ b(exit);
302 __ delayed()->nop();
304 __ bind(is_float);
305 __ swc1(F0, T0, 0 * wordSize);
306 __ b(exit);
307 __ delayed()->nop();
309 __ bind(is_double);
310 __ sdc1(F0, T0, 0 * wordSize);
311 __ b(exit);
312 __ delayed()->nop();
313 //FIXME, 1.6 mips version add operation of fpu here
314 StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
315 __ b(common_return);
316 __ delayed()->nop();
317 return start;
318 }
320 // Return point for a Java call if there's an exception thrown in
321 // Java code. The exception is caught and transformed into a
322 // pending exception stored in JavaThread that can be tested from
323 // within the VM.
324 //
325 // Note: Usually the parameters are removed by the callee. In case
326 // of an exception crossing an activation frame boundary, that is
327 // not the case if the callee is compiled code => need to setup the
328 // rsp.
329 //
330 // rax: exception oop
332 address generate_catch_exception() {
333 StubCodeMark mark(this, "StubRoutines", "catch_exception");
334 address start = __ pc();
336 Register thread = TREG;
338 // get thread directly
339 #ifndef OPT_THREAD
340 __ ld(thread, FP, thread_off * wordSize);
341 #endif
343 #ifdef ASSERT
344 // verify that threads correspond
345 { Label L;
346 __ get_thread(T8);
347 __ beq(T8, thread, L);
348 __ delayed()->nop();
349 __ stop("StubRoutines::catch_exception: threads must correspond");
350 __ bind(L);
351 }
352 #endif
353 // set pending exception
354 __ verify_oop(V0);
355 __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
356 __ li(AT, (long)__FILE__);
357 __ sd(AT, thread, in_bytes(Thread::exception_file_offset ()));
358 __ li(AT, (long)__LINE__);
359 __ sd(AT, thread, in_bytes(Thread::exception_line_offset ()));
361 // complete return to VM
362 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
363 __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
364 __ delayed()->nop();
366 return start;
367 }
369 // Continuation point for runtime calls returning with a pending
370 // exception. The pending exception check happened in the runtime
371 // or native call stub. The pending exception in Thread is
372 // converted into a Java-level exception.
373 //
374 // Contract with Java-level exception handlers:
375 // rax: exception
376 // rdx: throwing pc
377 //
378 // NOTE: At entry of this stub, exception-pc must be on stack !!
380 address generate_forward_exception() {
381 StubCodeMark mark(this, "StubRoutines", "forward exception");
382 //Register thread = TREG;
383 Register thread = TREG;
384 address start = __ pc();
386 // Upon entry, the sp points to the return address returning into
387 // Java (interpreted or compiled) code; i.e., the return address
388 // throwing pc.
389 //
390 // Arguments pushed before the runtime call are still on the stack
391 // but the exception handler will reset the stack pointer ->
392 // ignore them. A potential result in registers can be ignored as
393 // well.
395 #ifndef OPT_THREAD
396 __ get_thread(thread);
397 #endif
398 #ifdef ASSERT
399 // make sure this code is only executed if there is a pending exception
400 {
401 Label L;
402 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
403 __ bne(AT, R0, L);
404 __ delayed()->nop();
405 __ stop("StubRoutines::forward exception: no pending exception (1)");
406 __ bind(L);
407 }
408 #endif
410 // compute exception handler into T9
411 __ ld(A1, SP, 0);
412 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
413 __ move(T9, V0);
414 __ pop(V1);
416 #ifndef OPT_THREAD
417 __ get_thread(thread);
418 #endif
419 __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
420 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
422 #ifdef ASSERT
423 // make sure exception is set
424 {
425 Label L;
426 __ bne(V0, R0, L);
427 __ delayed()->nop();
428 __ stop("StubRoutines::forward exception: no pending exception (2)");
429 __ bind(L);
430 }
431 #endif
433 // continue at exception handler (return address removed)
434 // V0: exception
435 // T9: exception handler
436 // V1: throwing pc
437 __ verify_oop(V0);
438 __ jr(T9);
439 __ delayed()->nop();
441 return start;
442 }
444 // Support for intptr_t get_previous_fp()
445 //
446 // This routine is used to find the previous frame pointer for the
447 // caller (current_frame_guess). This is used as part of debugging
448 // ps() is seemingly lost trying to find frames.
449 // This code assumes that caller current_frame_guess) has a frame.
450 address generate_get_previous_fp() {
451 StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
452 const Address old_fp (FP, 0);
453 const Address older_fp (V0, 0);
454 address start = __ pc();
455 __ enter();
456 __ lw(V0, old_fp); // callers fp
457 __ lw(V0, older_fp); // the frame for ps()
458 __ leave();
459 __ jr(RA);
460 __ delayed()->nop();
461 return start;
462 }
464 // The following routine generates a subroutine to throw an
465 // asynchronous UnknownError when an unsafe access gets a fault that
466 // could not be reasonably prevented by the programmer. (Example:
467 // SIGBUS/OBJERR.)
468 address generate_handler_for_unsafe_access() {
469 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
470 address start = __ pc();
471 __ pushad(); // push registers
472 // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
473 __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
474 __ delayed()->nop();
475 __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord);
476 __ popad();
477 __ jr(RA);
478 __ delayed()->nop();
479 return start;
480 }
482 // Non-destructive plausibility checks for oops
483 //
484 // Arguments:
485 // all args on stack!
486 //
487 // Stack after saving c_rarg3:
488 // [tos + 0]: saved c_rarg3
489 // [tos + 1]: saved c_rarg2
490 // [tos + 2]: saved r12 (several TemplateTable methods use it)
491 // [tos + 3]: saved flags
492 // [tos + 4]: return address
493 // * [tos + 5]: error message (char*)
494 // * [tos + 6]: object to verify (oop)
495 // * [tos + 7]: saved rax - saved by caller and bashed
496 // * = popped on exit
497 address generate_verify_oop() {
498 StubCodeMark mark(this, "StubRoutines", "verify_oop");
499 address start = __ pc();
500 __ reinit_heapbase();
501 __ verify_oop_subroutine();
502 address end = __ pc();
503 return start;
504 }
506 //
507 // Generate overlap test for array copy stubs
508 //
509 // Input:
510 // A0 - array1
511 // A1 - array2
512 // A2 - element count
513 //
514 // Note: this code can only use %eax, %ecx, and %edx
515 //
517 // use T9 as temp
518 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
519 int elem_size = 1 << log2_elem_size;
520 Address::ScaleFactor sf = Address::times_1;
522 switch (log2_elem_size) {
523 case 0: sf = Address::times_1; break;
524 case 1: sf = Address::times_2; break;
525 case 2: sf = Address::times_4; break;
526 case 3: sf = Address::times_8; break;
527 }
529 __ dsll(AT, A2, sf);
530 __ dadd(AT, AT, A0);
531 __ lea(T9, Address(AT, -elem_size));
532 __ dsub(AT, A1, A0);
533 __ blez(AT, no_overlap_target);
534 __ delayed()->nop();
535 __ dsub(AT, A1, T9);
536 __ bgtz(AT, no_overlap_target);
537 __ delayed()->nop();
539 // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
540 Label L;
541 __ bgez(A0, L);
542 __ delayed()->nop();
543 __ bgtz(A1, no_overlap_target);
544 __ delayed()->nop();
545 __ bind(L);
547 }
549 //
550 // Generate store check for array
551 //
552 // Input:
553 // T0 - starting address(edi)
554 // T1 - element count (ecx)
555 //
556 // The 2 input registers are overwritten
557 //
560 void array_store_check(Register tmp) {
561 assert_different_registers(tmp, AT, T0, T1);
562 BarrierSet* bs = Universe::heap()->barrier_set();
563 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
564 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
565 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
566 Label l_0;
568 if (UseConcMarkSweepGC) __ sync();
570 __ set64(tmp, (long)ct->byte_map_base);
572 __ dsll(AT, T1, TIMES_OOP);
573 __ dadd(AT, T0, AT);
574 __ daddiu(T1, AT, - BytesPerHeapOop);
576 __ shr(T0, CardTableModRefBS::card_shift);
577 __ shr(T1, CardTableModRefBS::card_shift);
579 __ dsub(T1, T1, T0); // end --> cards count
580 __ bind(l_0);
582 __ dadd(AT, tmp, T0);
583 if (UseLoongsonISA) {
584 __ gssbx(R0, AT, T1, 0);
585 } else {
586 __ dadd(AT, AT, T1);
587 __ sb(R0, AT, 0);
588 }
590 __ bgtz(T1, l_0);
591 __ delayed()->daddi(T1, T1, - 1);
592 }
594 // Generate code for an array write pre barrier
595 //
596 // addr - starting address
597 // count - element count
598 // tmp - scratch register
599 //
600 // Destroy no registers!
601 //
602 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
603 BarrierSet* bs = Universe::heap()->barrier_set();
604 switch (bs->kind()) {
605 case BarrierSet::G1SATBCT:
606 case BarrierSet::G1SATBCTLogging:
607 // With G1, don't generate the call if we statically know that the target in uninitialized
608 if (!dest_uninitialized) {
609 __ pushad(); // push registers
610 if (count == A0) {
611 if (addr == A1) {
612 // exactly backwards!!
613 //__ xchgptr(c_rarg1, c_rarg0);
614 __ move(AT, A0);
615 __ move(A0, A1);
616 __ move(A1, AT);
617 } else {
618 __ move(A1, count);
619 __ move(A0, addr);
620 }
621 } else {
622 __ move(A0, addr);
623 __ move(A1, count);
624 }
625 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
626 __ popad();
627 }
628 break;
629 case BarrierSet::CardTableModRef:
630 case BarrierSet::CardTableExtension:
631 case BarrierSet::ModRef:
632 break;
633 default:
634 ShouldNotReachHere();
636 }
637 }
639 //
640 // Generate code for an array write post barrier
641 //
642 // Input:
643 // start - register containing starting address of destination array
644 // count - elements count
645 // scratch - scratch register
646 //
647 // The input registers are overwritten.
648 //
649 void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
650 assert_different_registers(start, count, scratch, AT);
651 BarrierSet* bs = Universe::heap()->barrier_set();
652 switch (bs->kind()) {
653 case BarrierSet::G1SATBCT:
654 case BarrierSet::G1SATBCTLogging:
655 {
656 __ pushad(); // push registers (overkill)
657 if (count == A0) {
658 if (start == A1) {
659 // exactly backwards!!
660 //__ xchgptr(c_rarg1, c_rarg0);
661 __ move(AT, A0);
662 __ move(A0, A1);
663 __ move(A1, AT);
664 } else {
665 __ move(A1, count);
666 __ move(A0, start);
667 }
668 } else {
669 __ move(A0, start);
670 __ move(A1, count);
671 }
672 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
673 __ popad();
674 }
675 break;
676 case BarrierSet::CardTableModRef:
677 case BarrierSet::CardTableExtension:
678 {
679 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
680 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
682 Label L_loop;
683 const Register end = count;
685 if (UseConcMarkSweepGC) __ sync();
687 int64_t disp = (int64_t) ct->byte_map_base;
688 __ set64(scratch, disp);
690 __ lea(end, Address(start, count, TIMES_OOP, 0)); // end == start+count*oop_size
691 __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
692 __ shr(start, CardTableModRefBS::card_shift);
693 __ shr(end, CardTableModRefBS::card_shift);
694 __ dsubu(end, end, start); // end --> cards count
696 __ daddu(start, start, scratch);
698 __ bind(L_loop);
699 if (UseLoongsonISA) {
700 __ gssbx(R0, start, count, 0);
701 } else {
702 __ daddu(AT, start, count);
703 __ sb(R0, AT, 0);
704 }
705 __ daddiu(count, count, -1);
706 __ slt(AT, count, R0);
707 __ beq(AT, R0, L_loop);
708 __ delayed()->nop();
709 }
710 break;
711 default:
712 ShouldNotReachHere();
713 }
714 }
716 // Arguments:
717 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
718 // ignored
719 // name - stub name string
720 //
721 // Inputs:
722 // c_rarg0 - source array address
723 // c_rarg1 - destination array address
724 // c_rarg2 - element count, treated as ssize_t, can be zero
725 //
726 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
727 // we let the hardware handle it. The one to eight bytes within words,
728 // dwords or qwords that span cache line boundaries will still be loaded
729 // and stored atomically.
730 //
731 // Side Effects:
732 // disjoint_byte_copy_entry is set to the no-overlap entry point
733 // used by generate_conjoint_byte_copy().
734 //
735 address generate_disjoint_byte_copy(bool aligned, const char * name) {
736 StubCodeMark mark(this, "StubRoutines", name);
737 __ align(CodeEntryAlignment);
740 Register tmp1 = T0;
741 Register tmp2 = T1;
742 Register tmp3 = T3;
744 address start = __ pc();
746 __ push(tmp1);
747 __ push(tmp2);
748 __ push(tmp3);
749 __ move(tmp1, A0);
750 __ move(tmp2, A1);
751 __ move(tmp3, A2);
754 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
755 Label l_debug;
757 __ daddi(AT, tmp3, -9); //why the number is 9 ?
758 __ blez(AT, l_9);
759 __ delayed()->nop();
761 if (!aligned) {
762 __ xorr(AT, tmp1, tmp2);
763 __ andi(AT, AT, 1);
764 __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
765 __ delayed()->nop();
767 __ andi(AT, tmp1, 1);
768 __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
769 __ delayed()->nop();
771 __ lb(AT, tmp1, 0);
772 __ daddi(tmp1, tmp1, 1);
773 __ sb(AT, tmp2, 0);
774 __ daddi(tmp2, tmp2, 1);
775 __ daddi(tmp3, tmp3, -1);
776 __ bind(l_10);
778 __ xorr(AT, tmp1, tmp2);
779 __ andi(AT, AT, 3);
780 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
781 __ delayed()->nop();
783 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
785 // Copy 2 elements if necessary to align to 4 bytes.
786 __ andi(AT, tmp1, 3);
787 __ beq(AT, R0, l_2);
788 __ delayed()->nop();
790 __ lhu(AT, tmp1, 0);
791 __ daddi(tmp1, tmp1, 2);
792 __ sh(AT, tmp2, 0);
793 __ daddi(tmp2, tmp2, 2);
794 __ daddi(tmp3, tmp3, -2);
795 __ bind(l_2);
797 // At this point the positions of both, from and to, are at least 4 byte aligned.
799 // Copy 4 elements at a time.
800 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
801 __ xorr(AT, tmp1, tmp2);
802 __ andi(AT, AT, 7);
803 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
804 __ delayed()->nop();
806 // Copy a 4 elements if necessary to align to 8 bytes.
807 __ andi(AT, tmp1, 7);
808 __ beq(AT, R0, l_7);
809 __ delayed()->nop();
811 __ lw(AT, tmp1, 0);
812 __ daddi(tmp3, tmp3, -4);
813 __ sw(AT, tmp2, 0);
814 { // FasterArrayCopy
815 __ daddi(tmp1, tmp1, 4);
816 __ daddi(tmp2, tmp2, 4);
817 }
818 }
820 __ bind(l_7);
822 // Copy 4 elements at a time; either the loads or the stores can
823 // be unaligned if aligned == false.
825 { // FasterArrayCopy
826 __ daddi(AT, tmp3, -7);
827 __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
828 __ delayed()->nop();
830 __ bind(l_8);
831 // For Loongson, there is 128-bit memory access. TODO
832 __ ld(AT, tmp1, 0);
833 __ sd(AT, tmp2, 0);
834 __ daddi(tmp1, tmp1, 8);
835 __ daddi(tmp2, tmp2, 8);
836 __ daddi(tmp3, tmp3, -8);
837 __ daddi(AT, tmp3, -8);
838 __ bgez(AT, l_8);
839 __ delayed()->nop();
840 }
841 __ bind(l_6);
843 // copy 4 bytes at a time
844 { // FasterArrayCopy
845 __ daddi(AT, tmp3, -3);
846 __ blez(AT, l_1);
847 __ delayed()->nop();
849 __ bind(l_3);
850 __ lw(AT, tmp1, 0);
851 __ sw(AT, tmp2, 0);
852 __ daddi(tmp1, tmp1, 4);
853 __ daddi(tmp2, tmp2, 4);
854 __ daddi(tmp3, tmp3, -4);
855 __ daddi(AT, tmp3, -4);
856 __ bgez(AT, l_3);
857 __ delayed()->nop();
859 }
861 // do 2 bytes copy
862 __ bind(l_1);
863 {
864 __ daddi(AT, tmp3, -1);
865 __ blez(AT, l_9);
866 __ delayed()->nop();
868 __ bind(l_5);
869 __ lhu(AT, tmp1, 0);
870 __ daddi(tmp3, tmp3, -2);
871 __ sh(AT, tmp2, 0);
872 __ daddi(tmp1, tmp1, 2);
873 __ daddi(tmp2, tmp2, 2);
874 __ daddi(AT, tmp3, -2);
875 __ bgez(AT, l_5);
876 __ delayed()->nop();
877 }
879 //do 1 element copy--byte
880 __ bind(l_9);
881 __ beq(R0, tmp3, l_4);
882 __ delayed()->nop();
884 {
885 __ bind(l_11);
886 __ lb(AT, tmp1, 0);
887 __ daddi(tmp3, tmp3, -1);
888 __ sb(AT, tmp2, 0);
889 __ daddi(tmp1, tmp1, 1);
890 __ daddi(tmp2, tmp2, 1);
891 __ daddi(AT, tmp3, -1);
892 __ bgez(AT, l_11);
893 __ delayed()->nop();
894 }
896 __ bind(l_4);
897 __ pop(tmp3);
898 __ pop(tmp2);
899 __ pop(tmp1);
901 __ jr(RA);
902 __ delayed()->nop();
904 return start;
905 }
907 // Arguments:
908 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
909 // ignored
910 // name - stub name string
911 //
912 // Inputs:
913 // A0 - source array address
914 // A1 - destination array address
915 // A2 - element count, treated as ssize_t, can be zero
916 //
917 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
918 // we let the hardware handle it. The one to eight bytes within words,
919 // dwords or qwords that span cache line boundaries will still be loaded
920 // and stored atomically.
921 //
922 address generate_conjoint_byte_copy(bool aligned, const char *name) {
923 __ align(CodeEntryAlignment);
924 StubCodeMark mark(this, "StubRoutines", name);
925 address start = __ pc();
927 Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
928 Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
930 address nooverlap_target = aligned ?
931 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
932 StubRoutines::jbyte_disjoint_arraycopy();
934 array_overlap_test(nooverlap_target, 0);
936 const Register from = A0; // source array address
937 const Register to = A1; // destination array address
938 const Register count = A2; // elements count
939 const Register end_from = T3; // source array end address
940 const Register end_to = T0; // destination array end address
941 const Register end_count = T1; // destination array end address
943 __ push(end_from);
944 __ push(end_to);
945 __ push(end_count);
946 __ push(T8);
948 // copy from high to low
949 __ move(end_count, count);
950 __ dadd(end_from, from, end_count);
951 __ dadd(end_to, to, end_count);
953 // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
954 __ andi(AT, end_from, 3);
955 __ andi(T8, end_to, 3);
956 __ bne(AT, T8, l_copy_byte);
957 __ delayed()->nop();
959 // First deal with the unaligned data at the top.
960 __ bind(l_unaligned);
961 __ beq(end_count, R0, l_exit);
962 __ delayed()->nop();
964 __ andi(AT, end_from, 3);
965 __ bne(AT, R0, l_from_unaligned);
966 __ delayed()->nop();
968 __ andi(AT, end_to, 3);
969 __ beq(AT, R0, l_4_bytes_aligned);
970 __ delayed()->nop();
972 __ bind(l_from_unaligned);
973 __ lb(AT, end_from, -1);
974 __ sb(AT, end_to, -1);
975 __ daddi(end_from, end_from, -1);
976 __ daddi(end_to, end_to, -1);
977 __ daddi(end_count, end_count, -1);
978 __ b(l_unaligned);
979 __ delayed()->nop();
981 // now end_to, end_from point to 4-byte aligned high-ends
982 // end_count contains byte count that is not copied.
983 // copy 4 bytes at a time
984 __ bind(l_4_bytes_aligned);
986 __ move(T8, end_count);
987 __ daddi(AT, end_count, -3);
988 __ blez(AT, l_copy_suffix);
989 __ delayed()->nop();
991 //__ andi(T8, T8, 3);
992 __ lea(end_from, Address(end_from, -4));
993 __ lea(end_to, Address(end_to, -4));
995 __ dsrl(end_count, end_count, 2);
996 __ align(16);
997 __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
998 __ lw(AT, end_from, 0);
999 __ sw(AT, end_to, 0);
1000 __ addi(end_from, end_from, -4);
1001 __ addi(end_to, end_to, -4);
1002 __ addi(end_count, end_count, -1);
1003 __ bne(end_count, R0, l_copy_4_bytes_loop);
1004 __ delayed()->nop();
1006 __ b(l_copy_suffix);
1007 __ delayed()->nop();
1008 // copy dwords aligned or not with repeat move
1009 // l_copy_suffix
1010 // copy suffix (0-3 bytes)
1011 __ bind(l_copy_suffix);
1012 __ andi(T8, T8, 3);
1013 __ beq(T8, R0, l_exit);
1014 __ delayed()->nop();
1015 __ addi(end_from, end_from, 3);
1016 __ addi(end_to, end_to, 3);
1017 __ bind(l_copy_suffix_loop);
1018 __ lb(AT, end_from, 0);
1019 __ sb(AT, end_to, 0);
1020 __ addi(end_from, end_from, -1);
1021 __ addi(end_to, end_to, -1);
1022 __ addi(T8, T8, -1);
1023 __ bne(T8, R0, l_copy_suffix_loop);
1024 __ delayed()->nop();
1026 __ bind(l_copy_byte);
1027 __ beq(end_count, R0, l_exit);
1028 __ delayed()->nop();
1029 __ lb(AT, end_from, -1);
1030 __ sb(AT, end_to, -1);
1031 __ daddi(end_from, end_from, -1);
1032 __ daddi(end_to, end_to, -1);
1033 __ daddi(end_count, end_count, -1);
1034 __ b(l_copy_byte);
1035 __ delayed()->nop();
1037 __ bind(l_exit);
1038 __ pop(T8);
1039 __ pop(end_count);
1040 __ pop(end_to);
1041 __ pop(end_from);
1042 __ jr(RA);
1043 __ delayed()->nop();
1044 return start;
1045 }
1047 // Generate stub for disjoint short copy. If "aligned" is true, the
1048 // "from" and "to" addresses are assumed to be heapword aligned.
1049 //
1050 // Arguments for generated stub:
1051 // from: A0
1052 // to: A1
1053 // elm.count: A2 treated as signed
1054 // one element: 2 bytes
1055 //
1056 // Strategy for aligned==true:
1057 //
1058 // If length <= 9:
1059 // 1. copy 1 elements at a time (l_5)
1060 //
1061 // If length > 9:
1062 // 1. copy 4 elements at a time until less than 4 elements are left (l_7)
1063 // 2. copy 2 elements at a time until less than 2 elements are left (l_6)
1064 // 3. copy last element if one was left in step 2. (l_1)
1065 //
1066 //
1067 // Strategy for aligned==false:
1068 //
1069 // If length <= 9: same as aligned==true case
1070 //
1071 // If length > 9:
1072 // 1. continue with step 7. if the alignment of from and to mod 4
1073 // is different.
1074 // 2. align from and to to 4 bytes by copying 1 element if necessary
1075 // 3. at l_2 from and to are 4 byte aligned; continue with
1076 // 6. if they cannot be aligned to 8 bytes because they have
1077 // got different alignment mod 8.
1078 // 4. at this point we know that both, from and to, have the same
1079 // alignment mod 8, now copy one element if necessary to get
1080 // 8 byte alignment of from and to.
1081 // 5. copy 4 elements at a time until less than 4 elements are
1082 // left; depending on step 3. all load/stores are aligned.
1083 // 6. copy 2 elements at a time until less than 2 elements are
1084 // left. (l_6)
1085 // 7. copy 1 element at a time. (l_5)
1086 // 8. copy last element if one was left in step 6. (l_1)
1088 address generate_disjoint_short_copy(bool aligned, const char * name) {
1089 StubCodeMark mark(this, "StubRoutines", name);
1090 __ align(CodeEntryAlignment);
1092 Register tmp1 = T0;
1093 Register tmp2 = T1;
1094 Register tmp3 = T3;
1095 Register tmp4 = T8;
1096 Register tmp5 = T9;
1097 Register tmp6 = T2;
1099 address start = __ pc();
1101 __ push(tmp1);
1102 __ push(tmp2);
1103 __ push(tmp3);
1104 __ move(tmp1, A0);
1105 __ move(tmp2, A1);
1106 __ move(tmp3, A2);
1108 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
1109 Label l_debug;
1110 // don't try anything fancy if arrays don't have many elements
1111 __ daddi(AT, tmp3, -23);
1112 __ blez(AT, l_14);
1113 __ delayed()->nop();
1114 // move push here
1115 __ push(tmp4);
1116 __ push(tmp5);
1117 __ push(tmp6);
1119 if (!aligned) {
1120 __ xorr(AT, A0, A1);
1121 __ andi(AT, AT, 1);
1122 __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
1123 __ delayed()->nop();
1125 __ xorr(AT, A0, A1);
1126 __ andi(AT, AT, 3);
1127 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
1128 __ delayed()->nop();
1130 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
1132 // Copy 1 element if necessary to align to 4 bytes.
1133 __ andi(AT, A0, 3);
1134 __ beq(AT, R0, l_2);
1135 __ delayed()->nop();
1137 __ lhu(AT, tmp1, 0);
1138 __ daddi(tmp1, tmp1, 2);
1139 __ sh(AT, tmp2, 0);
1140 __ daddi(tmp2, tmp2, 2);
1141 __ daddi(tmp3, tmp3, -1);
1142 __ bind(l_2);
1144 // At this point the positions of both, from and to, are at least 4 byte aligned.
1146 // Copy 4 elements at a time.
1147 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
1148 __ xorr(AT, tmp1, tmp2);
1149 __ andi(AT, AT, 7);
1150 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
1151 __ delayed()->nop();
1153 // Copy a 2-element word if necessary to align to 8 bytes.
1154 __ andi(AT, tmp1, 7);
1155 __ beq(AT, R0, l_7);
1156 __ delayed()->nop();
1158 __ lw(AT, tmp1, 0);
1159 __ daddi(tmp3, tmp3, -2);
1160 __ sw(AT, tmp2, 0);
1161 __ daddi(tmp1, tmp1, 4);
1162 __ daddi(tmp2, tmp2, 4);
1163 }// end of if (!aligned)
1165 __ bind(l_7);
1166 // At this time the position of both, from and to, are at least 8 byte aligned.
1167 // Copy 8 elemnets at a time.
1168 // Align to 16 bytes, but only if both from and to have same alignment mod 8.
1169 __ xorr(AT, tmp1, tmp2);
1170 __ andi(AT, AT, 15);
1171 __ bne(AT, R0, l_9);
1172 __ delayed()->nop();
1174 // Copy 4-element word if necessary to align to 16 bytes,
1175 __ andi(AT, tmp1, 15);
1176 __ beq(AT, R0, l_10);
1177 __ delayed()->nop();
1179 __ ld(AT, tmp1, 0);
1180 __ daddi(tmp3, tmp3, -4);
1181 __ sd(AT, tmp2, 0);
1182 __ daddi(tmp1, tmp1, 8);
1183 __ daddi(tmp2, tmp2, 8);
1185 __ bind(l_10);
1187 // Copy 8 elements at a time; either the loads or the stores can
1188 // be unalligned if aligned == false
1190 { // FasterArrayCopy
1191 __ bind(l_11);
1192 // For loongson the 128-bit memory access instruction is gslq/gssq
1193 if (UseLoongsonISA) {
1194 __ gslq(AT, tmp4, tmp1, 0);
1195 __ gslq(tmp5, tmp6, tmp1, 16);
1196 __ daddi(tmp1, tmp1, 32);
1197 __ daddi(tmp2, tmp2, 32);
1198 __ gssq(AT, tmp4, tmp2, -32);
1199 __ gssq(tmp5, tmp6, tmp2, -16);
1200 } else {
1201 __ ld(AT, tmp1, 0);
1202 __ ld(tmp4, tmp1, 8);
1203 __ ld(tmp5, tmp1, 16);
1204 __ ld(tmp6, tmp1, 24);
1205 __ daddi(tmp1, tmp1, 32);
1206 __ sd(AT, tmp2, 0);
1207 __ sd(tmp4, tmp2, 8);
1208 __ sd(tmp5, tmp2, 16);
1209 __ sd(tmp6, tmp2, 24);
1210 __ daddi(tmp2, tmp2, 32);
1211 }
1212 __ daddi(tmp3, tmp3, -16);
1213 __ daddi(AT, tmp3, -16);
1214 __ bgez(AT, l_11);
1215 __ delayed()->nop();
1216 }
1217 __ bind(l_9);
1219 // Copy 4 elements at a time; either the loads or the stores can
1220 // be unaligned if aligned == false.
1221 { // FasterArrayCopy
1222 __ daddi(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
1223 __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
1224 __ delayed()->nop();
1226 __ bind(l_8);
1227 __ ld(AT, tmp1, 0);
1228 __ ld(tmp4, tmp1, 8);
1229 __ ld(tmp5, tmp1, 16);
1230 __ ld(tmp6, tmp1, 24);
1231 __ sd(AT, tmp2, 0);
1232 __ sd(tmp4, tmp2, 8);
1233 __ sd(tmp5, tmp2,16);
1234 __ daddi(tmp1, tmp1, 32);
1235 __ daddi(tmp2, tmp2, 32);
1236 __ daddi(tmp3, tmp3, -16);
1237 __ daddi(AT, tmp3, -16);
1238 __ bgez(AT, l_8);
1239 __ delayed()->sd(tmp6, tmp2, -8);
1240 }
1241 __ bind(l_6);
1243 // copy 2 element at a time
1244 { // FasterArrayCopy
1245 __ daddi(AT, tmp3, -7);
1246 __ blez(AT, l_4);
1247 __ delayed()->nop();
1249 __ bind(l_3);
1250 __ lw(AT, tmp1, 0);
1251 __ lw(tmp4, tmp1, 4);
1252 __ lw(tmp5, tmp1, 8);
1253 __ lw(tmp6, tmp1, 12);
1254 __ sw(AT, tmp2, 0);
1255 __ sw(tmp4, tmp2, 4);
1256 __ sw(tmp5, tmp2, 8);
1257 __ daddi(tmp1, tmp1, 16);
1258 __ daddi(tmp2, tmp2, 16);
1259 __ daddi(tmp3, tmp3, -8);
1260 __ daddi(AT, tmp3, -8);
1261 __ bgez(AT, l_3);
1262 __ delayed()->sw(tmp6, tmp2, -4);
1263 }
1265 __ bind(l_1);
1266 // do single element copy (8 bit), can this happen?
1267 { // FasterArrayCopy
1268 __ daddi(AT, tmp3, -3);
1269 __ blez(AT, l_4);
1270 __ delayed()->nop();
1272 __ bind(l_5);
1273 __ lhu(AT, tmp1, 0);
1274 __ lhu(tmp4, tmp1, 2);
1275 __ lhu(tmp5, tmp1, 4);
1276 __ lhu(tmp6, tmp1, 6);
1277 __ sh(AT, tmp2, 0);
1278 __ sh(tmp4, tmp2, 2);
1279 __ sh(tmp5, tmp2, 4);
1280 __ daddi(tmp1, tmp1, 8);
1281 __ daddi(tmp2, tmp2, 8);
1282 __ daddi(tmp3, tmp3, -4);
1283 __ daddi(AT, tmp3, -4);
1284 __ bgez(AT, l_5);
1285 __ delayed()->sh(tmp6, tmp2, -2);
1286 }
1287 // single element
1288 __ bind(l_4);
1290 __ pop(tmp6);
1291 __ pop(tmp5);
1292 __ pop(tmp4);
1294 __ bind(l_14);
1295 { // FasterArrayCopy
1296 __ beq(R0, tmp3, l_13);
1297 __ delayed()->nop();
1299 __ bind(l_12);
1300 __ lhu(AT, tmp1, 0);
1301 __ sh(AT, tmp2, 0);
1302 __ daddi(tmp1, tmp1, 2);
1303 __ daddi(tmp2, tmp2, 2);
1304 __ daddi(tmp3, tmp3, -1);
1305 __ daddi(AT, tmp3, -1);
1306 __ bgez(AT, l_12);
1307 __ delayed()->nop();
1308 }
1310 __ bind(l_13);
1311 __ pop(tmp3);
1312 __ pop(tmp2);
1313 __ pop(tmp1);
1315 __ jr(RA);
1316 __ delayed()->nop();
1318 __ bind(l_debug);
1319 __ stop("generate_disjoint_short_copy should not reach here");
1320 return start;
1321 }
1323 // Arguments:
1324 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1325 // ignored
1326 // name - stub name string
1327 //
1328 // Inputs:
1329 // c_rarg0 - source array address
1330 // c_rarg1 - destination array address
1331 // c_rarg2 - element count, treated as ssize_t, can be zero
1332 //
1333 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
1334 // let the hardware handle it. The two or four words within dwords
1335 // or qwords that span cache line boundaries will still be loaded
1336 // and stored atomically.
1337 //
1338 address generate_conjoint_short_copy(bool aligned, const char *name) {
1339 Label l_1, l_2, l_3, l_4, l_5;
1340 StubCodeMark mark(this, "StubRoutines", name);
1341 __ align(CodeEntryAlignment);
1342 address start = __ pc();
1343 address nooverlap_target = aligned ?
1344 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
1345 StubRoutines::jshort_disjoint_arraycopy();
1347 array_overlap_test(nooverlap_target, 1);
1349 __ push(T3);
1350 __ push(T0);
1351 __ push(T1);
1352 __ push(T8);
1354 __ move(T1, A2);
1355 __ move(T3, A0);
1356 __ move(T0, A1);
1359 // copy dwords from high to low
1360 __ sll(AT, T1, Address::times_2);
1361 __ add(AT, T3, AT);
1362 __ lea(T3, Address( AT, -4));
1363 __ sll(AT,T1 , Address::times_2);
1364 __ add(AT, T0, AT);
1365 __ lea(T0, Address( AT, -4));
1366 __ move(T8, T1);
1367 __ bind(l_1);
1368 __ sra(T1,T1, 1);
1369 __ beq(T1, R0, l_4);
1370 __ delayed()->nop();
1371 __ align(16);
1372 __ bind(l_2);
1373 __ lw(AT, T3, 0);
1374 __ sw(AT, T0, 0);
1375 __ addi(T3, T3, -4);
1376 __ addi(T0, T0, -4);
1377 __ addi(T1, T1, -1);
1378 __ bne(T1, R0, l_2);
1379 __ delayed()->nop();
1380 __ b(l_4);
1381 __ delayed()->nop();
1382 // copy dwords with repeat move
1383 __ bind(l_3);
1384 __ bind(l_4);
1385 __ andi(T8, T8, 1); // suffix count
1386 __ beq(T8, R0, l_5 );
1387 __ delayed()->nop();
1388 // copy suffix
1389 __ lh(AT, T3, 2);
1390 __ sh(AT, T0, 2);
1391 __ bind(l_5);
1392 __ pop(T8);
1393 __ pop(T1);
1394 __ pop(T0);
1395 __ pop(T3);
1396 __ jr(RA);
1397 __ delayed()->nop();
1398 return start;
1399 }
1401 // Arguments:
1402 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1403 // ignored
1404 // is_oop - true => oop array, so generate store check code
1405 // name - stub name string
1406 //
1407 // Inputs:
1408 // c_rarg0 - source array address
1409 // c_rarg1 - destination array address
1410 // c_rarg2 - element count, treated as ssize_t, can be zero
1411 //
1412 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1413 // the hardware handle it. The two dwords within qwords that span
1414 // cache line boundaries will still be loaded and stored atomicly.
1415 //
1416 // Side Effects:
1417 // disjoint_int_copy_entry is set to the no-overlap entry point
1418 // used by generate_conjoint_int_oop_copy().
1419 //
1420 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
1421 Label l_3, l_4, l_5, l_6, l_7;
1422 StubCodeMark mark(this, "StubRoutines", name);
1424 __ align(CodeEntryAlignment);
1425 address start = __ pc();
1426 __ push(T3);
1427 __ push(T0);
1428 __ push(T1);
1429 __ push(T8);
1430 __ push(T9);
1431 __ move(T1, A2);
1432 __ move(T3, A0);
1433 __ move(T0, A1);
1435 if (is_oop) {
1436 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
1437 }
1439 if(!aligned) {
1440 __ xorr(AT, T3, T0);
1441 __ andi(AT, AT, 7);
1442 __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
1443 __ delayed()->nop();
1445 __ andi(AT, T3, 7);
1446 __ beq(AT, R0, l_6); //copy 2 elements each time
1447 __ delayed()->nop();
1449 __ lw(AT, T3, 0);
1450 __ daddi(T1, T1, -1);
1451 __ sw(AT, T0, 0);
1452 __ daddi(T3, T3, 4);
1453 __ daddi(T0, T0, 4);
1454 }
1456 {
1457 __ bind(l_6);
1458 __ daddi(AT, T1, -1);
1459 __ blez(AT, l_5);
1460 __ delayed()->nop();
1462 __ bind(l_7);
1463 __ ld(AT, T3, 0);
1464 __ sd(AT, T0, 0);
1465 __ daddi(T3, T3, 8);
1466 __ daddi(T0, T0, 8);
1467 __ daddi(T1, T1, -2);
1468 __ daddi(AT, T1, -2);
1469 __ bgez(AT, l_7);
1470 __ delayed()->nop();
1471 }
1473 __ bind(l_5);
1474 __ beq(T1, R0, l_4);
1475 __ delayed()->nop();
1477 __ align(16);
1478 __ bind(l_3);
1479 __ lw(AT, T3, 0);
1480 __ sw(AT, T0, 0);
1481 __ addi(T3, T3, 4);
1482 __ addi(T0, T0, 4);
1483 __ addi(T1, T1, -1);
1484 __ bne(T1, R0, l_3);
1485 __ delayed()->nop();
1487 // exit
1488 __ bind(l_4);
1489 if (is_oop) {
1490 gen_write_ref_array_post_barrier(A1, A2, T1);
1491 }
1492 __ pop(T9);
1493 __ pop(T8);
1494 __ pop(T1);
1495 __ pop(T0);
1496 __ pop(T3);
1497 __ jr(RA);
1498 __ delayed()->nop();
1500 return start;
1501 }
1503 // Arguments:
1504 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1505 // ignored
1506 // is_oop - true => oop array, so generate store check code
1507 // name - stub name string
1508 //
1509 // Inputs:
1510 // c_rarg0 - source array address
1511 // c_rarg1 - destination array address
1512 // c_rarg2 - element count, treated as ssize_t, can be zero
1513 //
1514 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1515 // the hardware handle it. The two dwords within qwords that span
1516 // cache line boundaries will still be loaded and stored atomicly.
1517 //
1518 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
1519 Label l_2, l_4;
1520 StubCodeMark mark(this, "StubRoutines", name);
1521 __ align(CodeEntryAlignment);
1522 address start = __ pc();
1523 address nooverlap_target;
1525 if (is_oop) {
1526 nooverlap_target = aligned ?
1527 StubRoutines::arrayof_oop_disjoint_arraycopy() :
1528 StubRoutines::oop_disjoint_arraycopy();
1529 }else {
1530 nooverlap_target = aligned ?
1531 StubRoutines::arrayof_jint_disjoint_arraycopy() :
1532 StubRoutines::jint_disjoint_arraycopy();
1533 }
1535 array_overlap_test(nooverlap_target, 2);
1537 if (is_oop) {
1538 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
1539 }
1541 __ push(T3);
1542 __ push(T0);
1543 __ push(T1);
1544 __ push(T8);
1545 __ push(T9);
1547 __ move(T1, A2);
1548 __ move(T3, A0);
1549 __ move(T0, A1);
1551 // T3: source array address
1552 // T0: destination array address
1553 // T1: element count
1555 __ sll(AT, T1, Address::times_4);
1556 __ add(AT, T3, AT);
1557 __ lea(T3 , Address(AT, -4));
1558 __ sll(AT, T1, Address::times_4);
1559 __ add(AT, T0, AT);
1560 __ lea(T0 , Address(AT, -4));
1562 __ beq(T1, R0, l_4);
1563 __ delayed()->nop();
1565 __ align(16);
1566 __ bind(l_2);
1567 __ lw(AT, T3, 0);
1568 __ sw(AT, T0, 0);
1569 __ addi(T3, T3, -4);
1570 __ addi(T0, T0, -4);
1571 __ addi(T1, T1, -1);
1572 __ bne(T1, R0, l_2);
1573 __ delayed()->nop();
1575 __ bind(l_4);
1576 if (is_oop) {
1577 gen_write_ref_array_post_barrier(A1, A2, T1);
1578 }
1579 __ pop(T9);
1580 __ pop(T8);
1581 __ pop(T1);
1582 __ pop(T0);
1583 __ pop(T3);
1584 __ jr(RA);
1585 __ delayed()->nop();
1587 return start;
1588 }
1590 // Arguments:
1591 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1592 // ignored
1593 // is_oop - true => oop array, so generate store check code
1594 // name - stub name string
1595 //
1596 // Inputs:
1597 // c_rarg0 - source array address
1598 // c_rarg1 - destination array address
1599 // c_rarg2 - element count, treated as ssize_t, can be zero
1600 //
1601 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1602 // the hardware handle it. The two dwords within qwords that span
1603 // cache line boundaries will still be loaded and stored atomicly.
1604 //
1605 // Side Effects:
1606 // disjoint_int_copy_entry is set to the no-overlap entry point
1607 // used by generate_conjoint_int_oop_copy().
1608 //
1609 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
1610 Label l_3, l_4;
1611 StubCodeMark mark(this, "StubRoutines", name);
1612 __ align(CodeEntryAlignment);
1613 address start = __ pc();
1615 if (is_oop) {
1616 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
1617 }
1619 __ push(T3);
1620 __ push(T0);
1621 __ push(T1);
1622 __ push(T8);
1623 __ push(T9);
1625 __ move(T1, A2);
1626 __ move(T3, A0);
1627 __ move(T0, A1);
1629 // T3: source array address
1630 // T0: destination array address
1631 // T1: element count
1633 __ beq(T1, R0, l_4);
1634 __ delayed()->nop();
1636 __ align(16);
1637 __ bind(l_3);
1638 __ ld(AT, T3, 0);
1639 __ sd(AT, T0, 0);
1640 __ addi(T3, T3, 8);
1641 __ addi(T0, T0, 8);
1642 __ addi(T1, T1, -1);
1643 __ bne(T1, R0, l_3);
1644 __ delayed()->nop();
1646 // exit
1647 __ bind(l_4);
1648 if (is_oop) {
1649 gen_write_ref_array_post_barrier(A1, A2, T1);
1650 }
1651 __ pop(T9);
1652 __ pop(T8);
1653 __ pop(T1);
1654 __ pop(T0);
1655 __ pop(T3);
1656 __ jr(RA);
1657 __ delayed()->nop();
1658 return start;
1659 }
1661 // Arguments:
1662 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1663 // ignored
1664 // is_oop - true => oop array, so generate store check code
1665 // name - stub name string
1666 //
1667 // Inputs:
1668 // c_rarg0 - source array address
1669 // c_rarg1 - destination array address
1670 // c_rarg2 - element count, treated as ssize_t, can be zero
1671 //
1672 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1673 // the hardware handle it. The two dwords within qwords that span
1674 // cache line boundaries will still be loaded and stored atomicly.
1675 //
1676 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
1677 Label l_2, l_4;
1678 StubCodeMark mark(this, "StubRoutines", name);
1679 __ align(CodeEntryAlignment);
1680 address start = __ pc();
1681 address nooverlap_target;
1683 if (is_oop) {
1684 nooverlap_target = aligned ?
1685 StubRoutines::arrayof_oop_disjoint_arraycopy() :
1686 StubRoutines::oop_disjoint_arraycopy();
1687 }else {
1688 nooverlap_target = aligned ?
1689 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
1690 StubRoutines::jlong_disjoint_arraycopy();
1691 }
1693 array_overlap_test(nooverlap_target, 3);
1695 if (is_oop) {
1696 gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
1697 }
1699 __ push(T3);
1700 __ push(T0);
1701 __ push(T1);
1702 __ push(T8);
1703 __ push(T9);
1705 __ move(T1, A2);
1706 __ move(T3, A0);
1707 __ move(T0, A1);
1709 __ sll(AT, T1, Address::times_8);
1710 __ add(AT, T3, AT);
1711 __ lea(T3 , Address(AT, -8));
1712 __ sll(AT, T1, Address::times_8);
1713 __ add(AT, T0, AT);
1714 __ lea(T0 , Address(AT, -8));
1716 __ beq(T1, R0, l_4);
1717 __ delayed()->nop();
1719 __ align(16);
1720 __ bind(l_2);
1721 __ ld(AT, T3, 0);
1722 __ sd(AT, T0, 0);
1723 __ addi(T3, T3, -8);
1724 __ addi(T0, T0, -8);
1725 __ addi(T1, T1, -1);
1726 __ bne(T1, R0, l_2);
1727 __ delayed()->nop();
1729 // exit
1730 __ bind(l_4);
1731 if (is_oop) {
1732 gen_write_ref_array_post_barrier(A1, A2, T1);
1733 }
1734 __ pop(T9);
1735 __ pop(T8);
1736 __ pop(T1);
1737 __ pop(T0);
1738 __ pop(T3);
1739 __ jr(RA);
1740 __ delayed()->nop();
1741 return start;
1742 }
1744 //FIXME
1745 address generate_disjoint_long_copy(bool aligned, const char *name) {
1746 Label l_1, l_2;
1747 StubCodeMark mark(this, "StubRoutines", name);
1748 __ align(CodeEntryAlignment);
1749 address start = __ pc();
1751 __ move(T1, A2);
1752 __ move(T3, A0);
1753 __ move(T0, A1);
1754 __ push(T3);
1755 __ push(T0);
1756 __ push(T1);
1757 __ b(l_2);
1758 __ delayed()->nop();
1759 __ align(16);
1760 __ bind(l_1);
1761 __ ld(AT, T3, 0);
1762 __ sd (AT, T0, 0);
1763 __ addi(T3, T3, 8);
1764 __ addi(T0, T0, 8);
1765 __ bind(l_2);
1766 __ addi(T1, T1, -1);
1767 __ bgez(T1, l_1);
1768 __ delayed()->nop();
1769 __ pop(T1);
1770 __ pop(T0);
1771 __ pop(T3);
1772 __ jr(RA);
1773 __ delayed()->nop();
1774 return start;
1775 }
1778 address generate_conjoint_long_copy(bool aligned, const char *name) {
1779 Label l_1, l_2;
1780 StubCodeMark mark(this, "StubRoutines", name);
1781 __ align(CodeEntryAlignment);
1782 address start = __ pc();
1783 address nooverlap_target = aligned ?
1784 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
1785 StubRoutines::jlong_disjoint_arraycopy();
1786 array_overlap_test(nooverlap_target, 3);
1788 __ push(T3);
1789 __ push(T0);
1790 __ push(T1);
1792 __ move(T1, A2);
1793 __ move(T3, A0);
1794 __ move(T0, A1);
1795 __ sll(AT, T1, Address::times_8);
1796 __ add(AT, T3, AT);
1797 __ lea(T3 , Address(AT, -8));
1798 __ sll(AT, T1, Address::times_8);
1799 __ add(AT, T0, AT);
1800 __ lea(T0 , Address(AT, -8));
1802 __ b(l_2);
1803 __ delayed()->nop();
1804 __ align(16);
1805 __ bind(l_1);
1806 __ ld(AT, T3, 0);
1807 __ sd (AT, T0, 0);
1808 __ addi(T3, T3, -8);
1809 __ addi(T0, T0,-8);
1810 __ bind(l_2);
1811 __ addi(T1, T1, -1);
1812 __ bgez(T1, l_1);
1813 __ delayed()->nop();
1814 __ pop(T1);
1815 __ pop(T0);
1816 __ pop(T3);
1817 __ jr(RA);
1818 __ delayed()->nop();
1819 return start;
1820 }
1822 void generate_arraycopy_stubs() {
1823 if (UseCompressedOops) {
1824 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true,
1825 "oop_disjoint_arraycopy");
1826 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true,
1827 "oop_arraycopy");
1828 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true,
1829 "oop_disjoint_arraycopy_uninit", true);
1830 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true,
1831 "oop_arraycopy_uninit", true);
1832 } else {
1833 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true,
1834 "oop_disjoint_arraycopy");
1835 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true,
1836 "oop_arraycopy");
1837 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true,
1838 "oop_disjoint_arraycopy_uninit", true);
1839 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true,
1840 "oop_arraycopy_uninit", true);
1841 }
1843 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
1844 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
1845 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
1846 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
1848 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
1849 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
1850 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
1851 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
1853 // We don't generate specialized code for HeapWord-aligned source
1854 // arrays, so just use the code we've already generated
1855 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
1856 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
1858 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
1859 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
1861 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
1862 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
1864 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
1865 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
1867 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
1868 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
1870 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
1871 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
1872 }
1874 //Wang: add a function to implement SafeFetch32 and SafeFetchN
1875 void generate_safefetch(const char* name, int size, address* entry,
1876 address* fault_pc, address* continuation_pc) {
1877 // safefetch signatures:
1878 // int SafeFetch32(int* adr, int errValue);
1879 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
1880 //
1881 // arguments:
1882 // A0 = adr
1883 // A1 = errValue
1884 //
1885 // result:
1886 // PPC_RET = *adr or errValue
1888 StubCodeMark mark(this, "StubRoutines", name);
1890 // Entry point, pc or function descriptor.
1891 *entry = __ pc();
1893 // Load *adr into A1, may fault.
1894 *fault_pc = __ pc();
1895 switch (size) {
1896 case 4:
1897 // int32_t
1898 __ lw(A1, A0, 0);
1899 break;
1900 case 8:
1901 // int64_t
1902 __ ld(A1, A0, 0);
1903 break;
1904 default:
1905 ShouldNotReachHere();
1906 }
1908 // return errValue or *adr
1909 *continuation_pc = __ pc();
1910 __ addu(V0,A1,R0);
1911 __ jr(RA);
1912 __ delayed()->nop();
1913 }
1916 #undef __
1917 #define __ masm->
1919 // Continuation point for throwing of implicit exceptions that are
1920 // not handled in the current activation. Fabricates an exception
1921 // oop and initiates normal exception dispatching in this
1922 // frame. Since we need to preserve callee-saved values (currently
1923 // only for C2, but done for C1 as well) we need a callee-saved oop
1924 // map and therefore have to make these stubs into RuntimeStubs
1925 // rather than BufferBlobs. If the compiler needs all registers to
1926 // be preserved between the fault point and the exception handler
1927 // then it must assume responsibility for that in
1928 // AbstractCompiler::continuation_for_implicit_null_exception or
1929 // continuation_for_implicit_division_by_zero_exception. All other
1930 // implicit exceptions (e.g., NullPointerException or
1931 // AbstractMethodError on entry) are either at call sites or
1932 // otherwise assume that stack unwinding will be initiated, so
1933 // caller saved registers were assumed volatile in the compiler.
1934 address generate_throw_exception(const char* name,
1935 address runtime_entry,
1936 bool restore_saved_exception_pc) {
1937 // Information about frame layout at time of blocking runtime call.
1938 // Note that we only have to preserve callee-saved registers since
1939 // the compilers are responsible for supplying a continuation point
1940 // if they expect all registers to be preserved.
1941 enum layout {
1942 thread_off, // last_java_sp
1943 S7_off, // callee saved register sp + 1
1944 S6_off, // callee saved register sp + 2
1945 S5_off, // callee saved register sp + 3
1946 S4_off, // callee saved register sp + 4
1947 S3_off, // callee saved register sp + 5
1948 S2_off, // callee saved register sp + 6
1949 S1_off, // callee saved register sp + 7
1950 S0_off, // callee saved register sp + 8
1951 FP_off,
1952 ret_address,
1953 framesize
1954 };
1956 int insts_size = 2048;
1957 int locs_size = 32;
1959 // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
1960 // NULL, NULL, NULL, false, NULL, name, false);
1961 CodeBuffer code (name , insts_size, locs_size);
1962 OopMapSet* oop_maps = new OopMapSet();
1963 MacroAssembler* masm = new MacroAssembler(&code);
1965 address start = __ pc();
1967 // This is an inlined and slightly modified version of call_VM
1968 // which has the ability to fetch the return PC out of
1969 // thread-local storage and also sets up last_Java_sp slightly
1970 // differently than the real call_VM
1971 #ifndef OPT_THREAD
1972 Register java_thread = TREG;
1973 __ get_thread(java_thread);
1974 #else
1975 Register java_thread = TREG;
1976 #endif
1977 if (restore_saved_exception_pc) {
1978 __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
1979 }
1981 __ enter(); // required for proper stackwalking of RuntimeStub frame
1983 __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
1984 __ sd(S0, SP, S0_off * wordSize);
1985 __ sd(S1, SP, S1_off * wordSize);
1986 __ sd(S2, SP, S2_off * wordSize);
1987 __ sd(S3, SP, S3_off * wordSize);
1988 __ sd(S4, SP, S4_off * wordSize);
1989 __ sd(S5, SP, S5_off * wordSize);
1990 __ sd(S6, SP, S6_off * wordSize);
1991 __ sd(S7, SP, S7_off * wordSize);
1993 int frame_complete = __ pc() - start;
1994 // push java thread (becomes first argument of C function)
1995 __ sd(java_thread, SP, thread_off * wordSize);
1996 if (java_thread != A0)
1997 __ move(A0, java_thread);
1999 // Set up last_Java_sp and last_Java_fp
2000 __ set_last_Java_frame(java_thread, SP, FP, NULL);
2001 // Align stack
2002 __ set64(AT, -(StackAlignmentInBytes));
2003 __ andr(SP, SP, AT);
2005 __ relocate(relocInfo::internal_pc_type);
2006 {
2007 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
2008 __ patchable_set48(AT, save_pc);
2009 }
2010 __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
2012 // Call runtime
2013 __ call(runtime_entry);
2014 __ delayed()->nop();
2015 // Generate oop map
2016 OopMap* map = new OopMap(framesize, 0);
2017 oop_maps->add_gc_map(__ offset(), map);
2019 // restore the thread (cannot use the pushed argument since arguments
2020 // may be overwritten by C code generated by an optimizing compiler);
2021 // however can use the register value directly if it is callee saved.
2022 #ifndef OPT_THREAD
2023 __ get_thread(java_thread);
2024 #endif
2026 __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
2027 __ reset_last_Java_frame(java_thread, true, true);
2029 // Restore callee save registers. This must be done after resetting the Java frame
2030 __ ld(S0, SP, S0_off * wordSize);
2031 __ ld(S1, SP, S1_off * wordSize);
2032 __ ld(S2, SP, S2_off * wordSize);
2033 __ ld(S3, SP, S3_off * wordSize);
2034 __ ld(S4, SP, S4_off * wordSize);
2035 __ ld(S5, SP, S5_off * wordSize);
2036 __ ld(S6, SP, S6_off * wordSize);
2037 __ ld(S7, SP, S7_off * wordSize);
2039 // discard arguments
2040 __ addi(SP, SP, (framesize-2) * wordSize); // epilog
2041 __ addi(SP, FP, wordSize);
2042 __ ld(FP, SP, -1*wordSize);
2043 // check for pending exceptions
2044 #ifdef ASSERT
2045 Label L;
2046 __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
2047 __ bne(AT, R0, L);
2048 __ delayed()->nop();
2049 __ should_not_reach_here();
2050 __ bind(L);
2051 #endif //ASSERT
2052 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
2053 __ delayed()->nop();
2054 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
2055 &code,
2056 frame_complete,
2057 framesize,
2058 oop_maps, false);
2059 return stub->entry_point();
2060 }
2062 // Initialization
2063 void generate_initial() {
2064 // Generates all stubs and initializes the entry points
2066 //-------------------------------------------------------------
2067 //-----------------------------------------------------------
2068 // entry points that exist in all platforms
2069 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
2070 // than the disadvantage of having a much more complicated generator structure.
2071 // See also comment in stubRoutines.hpp.
2072 StubRoutines::_forward_exception_entry = generate_forward_exception();
2073 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
2074 // is referenced by megamorphic call
2075 StubRoutines::_catch_exception_entry = generate_catch_exception();
2077 StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
2079 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception",
2080 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
2081 // platform dependent
2082 StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
2083 }
2085 void generate_all() {
2086 // Generates all stubs and initializes the entry points
2088 // These entry points require SharedInfo::stack0 to be set up in
2089 // non-core builds and need to be relocatable, so they each
2090 // fabricate a RuntimeStub internally.
2091 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
2092 CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
2094 StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
2095 CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
2097 StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
2098 CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2100 //------------------------------------------------------------------
2101 // entry points that are platform specific
2103 // support for verify_oop (must happen after universe_init)
2104 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
2105 #ifndef CORE
2106 // arraycopy stubs used by compilers
2107 generate_arraycopy_stubs();
2108 #endif
2110 // Safefetch stubs.
2111 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
2112 &StubRoutines::_safefetch32_fault_pc,
2113 &StubRoutines::_safefetch32_continuation_pc);
2114 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
2115 &StubRoutines::_safefetchN_fault_pc,
2116 &StubRoutines::_safefetchN_continuation_pc);
2117 }
2119 public:
2120 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2121 if (all) {
2122 generate_all();
2123 } else {
2124 generate_initial();
2125 }
2126 }
2127 }; // end class declaration
2129 void StubGenerator_generate(CodeBuffer* code, bool all) {
2130 StubGenerator g(code, all);
2131 }