Wed, 29 Mar 2017 09:41:51 +0800
#4662 TieredCompilation is turned off.
TieredCompilation is not supported yet.
1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_mips.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "runtime/thread.inline.hpp"
42 #include "utilities/top.hpp"
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #endif
48 // Declaration and definition of StubGenerator (no .hpp file).
49 // For a more detailed description of the stub routine structure
50 // see the comment in stubRoutines.hpp
52 #define __ _masm->
53 //#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
54 //#define a__ ((Assembler*)_masm)->
56 //#ifdef PRODUCT
57 //#define BLOCK_COMMENT(str) /* nothing */
58 //#else
59 //#define BLOCK_COMMENT(str) __ block_comment(str)
60 //#endif
62 //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
63 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
65 // Stub Code definitions
67 static address handle_unsafe_access() {
68 JavaThread* thread = JavaThread::current();
69 address pc = thread->saved_exception_pc();
70 // pc is the instruction which we must emulate
71 // doing a no-op is fine: return garbage from the load
72 // therefore, compute npc
73 //address npc = Assembler::locate_next_instruction(pc);
74 address npc = (address)((unsigned long)pc + sizeof(unsigned long));
76 // request an async exception
77 thread->set_pending_unsafe_access_error();
79 // return address of next instruction to execute
80 return npc;
81 }
83 class StubGenerator: public StubCodeGenerator {
84 private:
86 // ABI mips n64
87 // This fig is not MIPS ABI. It is call Java from C ABI.
88 // Call stubs are used to call Java from C
89 //
90 // [ return_from_Java ]
91 // [ argument word n-1 ] <--- sp
92 // ...
93 // [ argument word 0 ]
94 // ...
95 //-10 [ S6 ]
96 // -9 [ S5 ]
97 // -8 [ S4 ]
98 // -7 [ S3 ]
99 // -6 [ S0 ]
100 // -5 [ TSR(S2) ]
101 // -4 [ LVP(S7) ]
102 // -3 [ BCP(S1) ]
103 // -2 [ saved fp ] <--- fp_after_call
104 // -1 [ return address ]
105 // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
106 // 1 [ result ] <--- a1
107 // 2 [ result_type ] <--- a2
108 // 3 [ method ] <--- a3
109 // 4 [ entry_point ] <--- a4
110 // 5 [ parameters ] <--- a5
111 // 6 [ parameter_size ] <--- a6
112 // 7 [ thread ] <--- a7
114 //
115 // _LP64: n64 does not save paras in sp.
116 //
117 // [ return_from_Java ]
118 // [ argument word n-1 ] <--- sp
119 // ...
120 // [ argument word 0 ]
121 // ...
122 //-14 [ thread ]
123 //-13 [ result_type ] <--- a2
124 //-12 [ result ] <--- a1
125 //-11 [ ptr. to call wrapper ] <--- a0
126 //-10 [ S6 ]
127 // -9 [ S5 ]
128 // -8 [ S4 ]
129 // -7 [ S3 ]
130 // -6 [ S0 ]
131 // -5 [ TSR(S2) ]
132 // -4 [ LVP(S7) ]
133 // -3 [ BCP(S1) ]
134 // -2 [ saved fp ] <--- fp_after_call
135 // -1 [ return address ]
136 // 0 [ ] <--- old sp
137 /*
138 * 2014/01/16 Fu: Find a right place in the call_stub for GP.
139 * GP will point to the starting point of Interpreter::dispatch_table(itos).
140 * It should be saved/restored before/after Java calls.
141 *
142 */
143 enum call_stub_layout {
144 RA_off = -1,
145 FP_off = -2,
146 BCP_off = -3,
147 LVP_off = -4,
148 TSR_off = -5,
149 S1_off = -6,
150 S3_off = -7,
151 S4_off = -8,
152 S5_off = -9,
153 S6_off = -10,
154 result_off = -11,
155 result_type_off = -12,
156 thread_off = -13,
157 total_off = thread_off - 3,
158 GP_off = -16,
159 };
161 address generate_call_stub(address& return_address) {
163 StubCodeMark mark(this, "StubRoutines", "call_stub");
164 address start = __ pc();
166 // same as in generate_catch_exception()!
168 // stub code
169 // save ra and fp
170 __ sd(RA, SP, RA_off * wordSize);
171 __ sd(FP, SP, FP_off * wordSize);
172 __ sd(BCP, SP, BCP_off * wordSize);
173 __ sd(LVP, SP, LVP_off * wordSize);
174 __ sd(GP, SP, GP_off * wordSize);
175 __ sd(TSR, SP, TSR_off * wordSize);
176 __ sd(S1, SP, S1_off * wordSize);
177 __ sd(S3, SP, S3_off * wordSize);
178 __ sd(S4, SP, S4_off * wordSize);
179 __ sd(S5, SP, S5_off * wordSize);
180 __ sd(S6, SP, S6_off * wordSize);
183 __ set64(GP, (long)Interpreter::dispatch_table(itos));
185 // I think 14 is the max gap between argument and callee saved register
186 __ daddi(FP, SP, (-2) * wordSize);
187 __ daddi(SP, SP, total_off * wordSize);
188 //FIXME, aoqi. find a suitable place to save A1 & A2.
189 /*
190 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
191 __ sd(A1, FP, 3 * wordSize);
192 __ sd(A2, FP, 4 * wordSize);
193 __ sd(A3, FP, 5 * wordSize);
194 __ sd(A4, FP, 6 * wordSize);
195 __ sd(A5, FP, 7 * wordSize);
196 __ sd(A6, FP, 8 * wordSize);
197 __ sd(A7, FP, 9 * wordSize);
198 */
199 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
200 __ sd(A1, FP, result_off * wordSize);
201 __ sd(A2, FP, result_type_off * wordSize);
202 __ sd(A7, FP, thread_off * wordSize);
204 #ifdef OPT_THREAD
205 //__ get_thread(TREG);
206 __ move(TREG, A7);
208 //__ ld(TREG, FP, thread_off * wordSize);
209 #endif
210 //add for compressedoops
211 __ reinit_heapbase();
213 #ifdef ASSERT
214 // make sure we have no pending exceptions
215 {
216 Label L;
217 __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
218 __ beq(AT, R0, L);
219 __ delayed()->nop();
220 /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
221 __ stop("StubRoutines::call_stub: entered with pending exception");
222 __ bind(L);
223 }
224 #endif
226 // pass parameters if any
227 // A5: parameter
228 // A6: parameter_size
229 // T0: parameter_size_tmp(--)
230 // T2: offset(++)
231 // T3: tmp
232 Label parameters_done;
233 // judge if the parameter_size equals 0
234 __ beq(A6, R0, parameters_done);
235 __ delayed()->nop();
236 __ dsll(AT, A6, Interpreter::logStackElementSize);
237 __ dsub(SP, SP, AT);
238 __ move(AT, -StackAlignmentInBytes);
239 __ andr(SP, SP , AT);
240 // Copy Java parameters in reverse order (receiver last)
241 // Note that the argument order is inverted in the process
242 // source is edx[ecx: N-1..0]
243 // dest is esp[ebx: 0..N-1]
244 Label loop;
245 __ move(T0, A6);
246 __ move(T2, R0);
247 __ bind(loop);
249 // get parameter
250 __ dsll(T3, T0, LogBytesPerWord);
251 __ dadd(T3, T3, A5);
252 __ ld(AT, T3, -wordSize);
253 __ dsll(T3, T2, LogBytesPerWord);
254 __ dadd(T3, T3, SP);
255 __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
256 __ daddi(T2, T2, 1);
257 __ daddi(T0, T0, -1);
258 __ bne(T0, R0, loop);
259 __ delayed()->nop();
260 // advance to next parameter
262 // call Java function
263 __ bind(parameters_done);
265 // receiver in V0, methodOop in Rmethod
267 __ move(Rmethod, A3);
268 __ move(Rsender, SP); //set sender sp
269 __ jalr(A4);
270 __ delayed()->nop();
271 return_address = __ pc();
273 Label common_return;
274 __ bind(common_return);
276 // store result depending on type
277 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
278 __ ld(T0, FP, result_off * wordSize); // result --> T0
279 Label is_long, is_float, is_double, exit;
280 __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2
281 __ daddi(T3, T2, (-1) * T_LONG);
282 __ beq(T3, R0, is_long);
283 __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
284 __ beq(T3, R0, is_float);
285 __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
286 __ beq(T3, R0, is_double);
287 __ delayed()->nop();
289 // handle T_INT case
290 __ sd(V0, T0, 0 * wordSize);
291 __ bind(exit);
293 // restore
294 __ daddi(SP, FP, 2 * wordSize );
295 __ ld(RA, SP, RA_off * wordSize);
296 __ ld(FP, SP, FP_off * wordSize);
297 __ ld(BCP, SP, BCP_off * wordSize);
298 __ ld(LVP, SP, LVP_off * wordSize);
299 __ ld(GP, SP, GP_off * wordSize);
300 __ ld(TSR, SP, TSR_off * wordSize);
302 __ ld(S1, SP, S1_off * wordSize);
303 __ ld(S3, SP, S3_off * wordSize);
304 __ ld(S4, SP, S4_off * wordSize);
305 __ ld(S5, SP, S5_off * wordSize);
306 __ ld(S6, SP, S6_off * wordSize);
308 // return
309 __ jr(RA);
310 __ delayed()->nop();
312 // handle return types different from T_INT
313 __ bind(is_long);
314 __ sd(V0, T0, 0 * wordSize);
315 //__ sd(V1, T0, 1 * wordSize);
316 //__ sd(R0, T0, 1 * wordSize);
317 __ b(exit);
318 __ delayed()->nop();
320 __ bind(is_float);
321 __ swc1(F0, T0, 0 * wordSize);
322 __ b(exit);
323 __ delayed()->nop();
325 __ bind(is_double);
326 __ sdc1(F0, T0, 0 * wordSize);
327 //__ sdc1(F1, T0, 1 * wordSize);
328 //__ sd(R0, T0, 1 * wordSize);
329 __ b(exit);
330 __ delayed()->nop();
331 //FIXME, 1.6 mips version add operation of fpu here
332 StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
333 __ b(common_return);
334 __ delayed()->nop();
335 return start;
336 }
338 // Return point for a Java call if there's an exception thrown in
339 // Java code. The exception is caught and transformed into a
340 // pending exception stored in JavaThread that can be tested from
341 // within the VM.
342 //
343 // Note: Usually the parameters are removed by the callee. In case
344 // of an exception crossing an activation frame boundary, that is
345 // not the case if the callee is compiled code => need to setup the
346 // rsp.
347 //
348 // rax: exception oop
350 address generate_catch_exception() {
351 StubCodeMark mark(this, "StubRoutines", "catch_exception");
352 address start = __ pc();
354 Register thread = TREG;
356 // get thread directly
357 #ifndef OPT_THREAD
358 __ ld(thread, FP, thread_off * wordSize);
359 #endif
361 #ifdef ASSERT
362 // verify that threads correspond
363 { Label L;
364 __ get_thread(T8);
365 __ beq(T8, thread, L);
366 __ delayed()->nop();
367 __ stop("StubRoutines::catch_exception: threads must correspond");
368 __ bind(L);
369 }
370 #endif
371 // set pending exception
372 __ verify_oop(V0);
373 __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
374 __ li(AT, (long)__FILE__);
375 __ sd(AT, thread, in_bytes(Thread::exception_file_offset ()));
376 __ li(AT, (long)__LINE__);
377 __ sd(AT, thread, in_bytes(Thread::exception_line_offset ()));
379 // complete return to VM
380 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
381 __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
382 __ delayed()->nop();
384 return start;
385 }
387 // Continuation point for runtime calls returning with a pending
388 // exception. The pending exception check happened in the runtime
389 // or native call stub. The pending exception in Thread is
390 // converted into a Java-level exception.
391 //
392 // Contract with Java-level exception handlers:
393 // rax: exception
394 // rdx: throwing pc
395 //
396 // NOTE: At entry of this stub, exception-pc must be on stack !!
398 address generate_forward_exception() {
399 StubCodeMark mark(this, "StubRoutines", "forward exception");
400 //Register thread = TREG;
401 Register thread = TREG;
402 address start = __ pc();
404 // Upon entry, the sp points to the return address returning into Java
405 // (interpreted or compiled) code; i.e., the return address becomes the
406 // throwing pc.
407 //
408 // Arguments pushed before the runtime call are still on the stack but
409 // the exception handler will reset the stack pointer -> ignore them.
410 // A potential result in registers can be ignored as well.
412 #ifdef ASSERT
413 // make sure this code is only executed if there is a pending exception
414 #ifndef OPT_THREAD
415 __ get_thread(thread);
416 #endif
417 { Label L;
418 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
419 __ bne(AT, R0, L);
420 __ delayed()->nop();
421 __ stop("StubRoutines::forward exception: no pending exception (1)");
422 __ bind(L);
423 }
424 #endif
426 // compute exception handler into T9
427 __ ld(A1, SP, 0);
428 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
429 __ move(T9, V0);
430 __ pop(V1);
432 #ifndef OPT_THREAD
433 __ get_thread(thread);
434 #endif
435 __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
436 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
438 #ifdef ASSERT
439 // make sure exception is set
440 { Label L;
441 __ bne(V0, R0, L);
442 __ delayed()->nop();
443 __ stop("StubRoutines::forward exception: no pending exception (2)");
444 __ bind(L);
445 }
446 #endif
448 // continue at exception handler (return address removed)
449 // V0: exception
450 // T9: exception handler
451 // V1: throwing pc
452 __ verify_oop(V0);
453 __ jr(T9);
454 __ delayed()->nop();
456 return start;
457 }
459 // Support for intptr_t get_previous_fp()
460 //
461 // This routine is used to find the previous frame pointer for the
462 // caller (current_frame_guess). This is used as part of debugging
463 // ps() is seemingly lost trying to find frames.
464 // This code assumes that caller current_frame_guess) has a frame.
465 address generate_get_previous_fp() {
466 StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
467 const Address old_fp (FP, 0);
468 const Address older_fp (V0, 0);
469 address start = __ pc();
470 __ enter();
471 __ lw(V0, old_fp); // callers fp
472 __ lw(V0, older_fp); // the frame for ps()
473 __ leave();
474 __ jr(RA);
475 __ delayed()->nop();
476 return start;
477 }
478 // The following routine generates a subroutine to throw an
479 // asynchronous UnknownError when an unsafe access gets a fault that
480 // could not be reasonably prevented by the programmer. (Example:
481 // SIGBUS/OBJERR.)
482 address generate_handler_for_unsafe_access() {
483 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
484 address start = __ pc();
485 __ pushad(); // push registers
486 // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
487 __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
488 __ delayed()->nop();
489 __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord);
490 __ popad();
491 __ jr(RA);
492 __ delayed()->nop();
493 return start;
494 }
496 // Non-destructive plausibility checks for oops
497 //
498 // Arguments:
499 // all args on stack!
500 //
501 // Stack after saving c_rarg3:
502 // [tos + 0]: saved c_rarg3
503 // [tos + 1]: saved c_rarg2
504 // [tos + 2]: saved r12 (several TemplateTable methods use it)
505 // [tos + 3]: saved flags
506 // [tos + 4]: return address
507 // * [tos + 5]: error message (char*)
508 // * [tos + 6]: object to verify (oop)
509 // * [tos + 7]: saved rax - saved by caller and bashed
510 // * = popped on exit
511 address generate_verify_oop() {
512 StubCodeMark mark(this, "StubRoutines", "verify_oop");
513 address start = __ pc();
514 __ reinit_heapbase();
515 __ verify_oop_subroutine();
516 address end = __ pc();
517 return start;
518 }
520 //
521 // Generate overlap test for array copy stubs
522 //
523 // Input:
524 // A0 - array1
525 // A1 - array2
526 // A2 - element count
527 //
528 // Note: this code can only use %eax, %ecx, and %edx
529 //
531 // use T9 as temp
532 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
533 int elem_size = 1 << log2_elem_size;
534 Address::ScaleFactor sf = Address::times_1;
536 switch (log2_elem_size) {
537 case 0: sf = Address::times_1; break;
538 case 1: sf = Address::times_2; break;
539 case 2: sf = Address::times_4; break;
540 case 3: sf = Address::times_8; break;
541 }
543 __ dsll(AT, A2, sf);
544 __ dadd(AT, AT, A0);
545 __ lea(T9, Address(AT, -elem_size));
546 __ dsub(AT, A1, A0);
547 __ blez(AT, no_overlap_target);
548 __ delayed()->nop();
549 __ dsub(AT, A1, T9);
550 __ bgtz(AT, no_overlap_target);
551 __ delayed()->nop();
553 // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
554 Label L;
555 __ bgez(A0, L);
556 __ delayed()->nop();
557 __ bgtz(A1, no_overlap_target);
558 __ delayed()->nop();
559 __ bind(L);
561 }
563 //
564 // Generate store check for array
565 //
566 // Input:
567 // T0 - starting address(edi)
568 // T1 - element count (ecx)
569 //
570 // The 2 input registers are overwritten
571 //
573 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
575 void array_store_check() {
576 BarrierSet* bs = Universe::heap()->barrier_set();
577 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
578 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
579 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
580 Label l_0;
582 __ dsll(AT, T1, TIMES_OOP);
583 __ dadd(AT, T0, AT);
584 __ daddiu(T1, AT, - BytesPerHeapOop);
586 __ shr(T0, CardTableModRefBS::card_shift);
587 __ shr(T1, CardTableModRefBS::card_shift);
589 __ dsub(T1, T1, T0); // end --> cards count
590 __ bind(l_0);
592 __ set64(AT, (long)ct->byte_map_base);
593 __ dadd(AT, AT, T0);
594 __ dadd(AT, AT, T1);
595 __ sb(R0, AT, 0);
596 __ sync();
597 __ bgez(T1, l_0);
598 __ delayed()->daddi(T1, T1, - 1);
599 }
601 // Arguments:
602 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
603 // ignored
604 // name - stub name string
605 //
606 // Inputs:
607 // c_rarg0 - source array address
608 // c_rarg1 - destination array address
609 // c_rarg2 - element count, treated as ssize_t, can be zero
610 //
611 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
612 // we let the hardware handle it. The one to eight bytes within words,
613 // dwords or qwords that span cache line boundaries will still be loaded
614 // and stored atomically.
615 //
616 // Side Effects:
617 // disjoint_byte_copy_entry is set to the no-overlap entry point
618 // used by generate_conjoint_byte_copy().
619 //
620 address generate_disjoint_byte_copy(bool aligned, const char * name) {
621 StubCodeMark mark(this, "StubRoutines", name);
622 __ align(CodeEntryAlignment);
625 Register tmp1 = T0;
626 Register tmp2 = T1;
627 Register tmp3 = T3;
629 address start = __ pc();
631 __ push(tmp1);
632 __ push(tmp2);
633 __ push(tmp3);
634 __ move(tmp1, A0);
635 __ move(tmp2, A1);
636 __ move(tmp3, A2);
639 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
640 Label l_debug;
642 __ daddi(AT, tmp3, -9); //why the number is 9 ?
643 __ blez(AT, l_9);
644 __ delayed()->nop();
646 if (!aligned) {
647 __ xorr(AT, tmp1, tmp2);
648 __ andi(AT, AT, 1);
649 __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
650 __ delayed()->nop();
652 __ andi(AT, tmp1, 1);
653 __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
654 __ delayed()->nop();
656 __ lb(AT, tmp1, 0);
657 __ daddi(tmp1, tmp1, 1);
658 __ sb(AT, tmp2, 0);
659 __ daddi(tmp2, tmp2, 1);
660 __ daddi(tmp3, tmp3, -1);
661 __ bind(l_10);
663 __ xorr(AT, tmp1, tmp2);
664 __ andi(AT, AT, 3);
665 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
666 __ delayed()->nop();
668 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
670 // Copy 2 elements if necessary to align to 4 bytes.
671 __ andi(AT, tmp1, 3);
672 __ beq(AT, R0, l_2);
673 __ delayed()->nop();
675 __ lhu(AT, tmp1, 0);
676 __ daddi(tmp1, tmp1, 2);
677 __ sh(AT, tmp2, 0);
678 __ daddi(tmp2, tmp2, 2);
679 __ daddi(tmp3, tmp3, -2);
680 __ bind(l_2);
682 // At this point the positions of both, from and to, are at least 4 byte aligned.
684 // Copy 4 elements at a time.
685 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
686 __ xorr(AT, tmp1, tmp2);
687 __ andi(AT, AT, 7);
688 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
689 __ delayed()->nop();
691 // Copy a 4 elements if necessary to align to 8 bytes.
692 __ andi(AT, tmp1, 7);
693 __ beq(AT, R0, l_7);
694 __ delayed()->nop();
696 __ lw(AT, tmp1, 0);
697 __ daddi(tmp3, tmp3, -4);
698 __ sw(AT, tmp2, 0);
699 { // FasterArrayCopy
700 __ daddi(tmp1, tmp1, 4);
701 __ daddi(tmp2, tmp2, 4);
702 }
703 }
705 __ bind(l_7);
707 // Copy 4 elements at a time; either the loads or the stores can
708 // be unaligned if aligned == false.
710 { // FasterArrayCopy
711 __ daddi(AT, tmp3, -7);
712 __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
713 __ delayed()->nop();
715 __ bind(l_8);
716 // For Loongson, there is 128-bit memory access. TODO
717 __ ld(AT, tmp1, 0);
718 __ sd(AT, tmp2, 0);
719 __ daddi(tmp1, tmp1, 8);
720 __ daddi(tmp2, tmp2, 8);
721 __ daddi(tmp3, tmp3, -8);
722 __ daddi(AT, tmp3, -8);
723 __ bgez(AT, l_8);
724 __ delayed()->nop();
725 }
726 __ bind(l_6);
728 // copy 4 bytes at a time
729 { // FasterArrayCopy
730 __ daddi(AT, tmp3, -3);
731 __ blez(AT, l_1);
732 __ delayed()->nop();
734 __ bind(l_3);
735 __ lw(AT, tmp1, 0);
736 __ sw(AT, tmp2, 0);
737 __ daddi(tmp1, tmp1, 4);
738 __ daddi(tmp2, tmp2, 4);
739 __ daddi(tmp3, tmp3, -4);
740 __ daddi(AT, tmp3, -4);
741 __ bgez(AT, l_3);
742 __ delayed()->nop();
744 }
746 // do 2 bytes copy
747 __ bind(l_1);
748 {
749 __ daddi(AT, tmp3, -1);
750 __ blez(AT, l_9);
751 __ delayed()->nop();
753 __ bind(l_5);
754 __ lhu(AT, tmp1, 0);
755 __ daddi(tmp3, tmp3, -2);
756 __ sh(AT, tmp2, 0);
757 __ daddi(tmp1, tmp1, 2);
758 __ daddi(tmp2, tmp2, 2);
759 __ daddi(AT, tmp3, -2);
760 __ bgez(AT, l_5);
761 __ delayed()->nop();
762 }
764 //do 1 element copy--byte
765 __ bind(l_9);
766 __ beq(R0, tmp3, l_4);
767 __ delayed()->nop();
769 {
770 __ bind(l_11);
771 __ lb(AT, tmp1, 0);
772 __ daddi(tmp3, tmp3, -1);
773 __ sb(AT, tmp2, 0);
774 __ daddi(tmp1, tmp1, 1);
775 __ daddi(tmp2, tmp2, 1);
776 __ daddi(AT, tmp3, -1);
777 __ bgez(AT, l_11);
778 __ delayed()->nop();
779 }
781 __ bind(l_4);
782 __ pop(tmp3);
783 __ pop(tmp2);
784 __ pop(tmp1);
786 __ jr(RA);
787 __ delayed()->nop();
789 return start;
790 }
792 // Arguments:
793 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
794 // ignored
795 // name - stub name string
796 //
797 // Inputs:
798 // A0 - source array address
799 // A1 - destination array address
800 // A2 - element count, treated as ssize_t, can be zero
801 //
802 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
803 // we let the hardware handle it. The one to eight bytes within words,
804 // dwords or qwords that span cache line boundaries will still be loaded
805 // and stored atomically.
806 //
807 address generate_conjoint_byte_copy(bool aligned, const char *name) {
808 __ align(CodeEntryAlignment);
809 StubCodeMark mark(this, "StubRoutines", name);
810 address start = __ pc();
812 Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
813 Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
815 address nooverlap_target = aligned ?
816 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
817 StubRoutines::jbyte_disjoint_arraycopy();
819 array_overlap_test(nooverlap_target, 0);
821 const Register from = A0; // source array address
822 const Register to = A1; // destination array address
823 const Register count = A2; // elements count
824 const Register end_from = T3; // source array end address
825 const Register end_to = T0; // destination array end address
826 const Register end_count = T1; // destination array end address
828 __ push(end_from);
829 __ push(end_to);
830 __ push(end_count);
831 __ push(T8);
833 // copy from high to low
834 __ move(end_count, count);
835 __ dadd(end_from, from, end_count);
836 __ dadd(end_to, to, end_count);
838 // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
839 __ andi(AT, end_from, 3);
840 __ andi(T8, end_to, 3);
841 __ bne(AT, T8, l_copy_byte);
842 __ delayed()->nop();
844 // First deal with the unaligned data at the top.
845 __ bind(l_unaligned);
846 __ beq(end_count, R0, l_exit);
847 __ delayed()->nop();
849 __ andi(AT, end_from, 3);
850 __ bne(AT, R0, l_from_unaligned);
851 __ delayed()->nop();
853 __ andi(AT, end_to, 3);
854 __ beq(AT, R0, l_4_bytes_aligned);
855 __ delayed()->nop();
857 __ bind(l_from_unaligned);
858 __ lb(AT, end_from, -1);
859 __ sb(AT, end_to, -1);
860 __ daddi(end_from, end_from, -1);
861 __ daddi(end_to, end_to, -1);
862 __ daddi(end_count, end_count, -1);
863 __ b(l_unaligned);
864 __ delayed()->nop();
866 // now end_to, end_from point to 4-byte aligned high-ends
867 // end_count contains byte count that is not copied.
868 // copy 4 bytes at a time
869 __ bind(l_4_bytes_aligned);
871 __ move(T8, end_count);
872 __ daddi(AT, end_count, -3);
873 __ blez(AT, l_copy_suffix);
874 __ delayed()->nop();
876 //__ andi(T8, T8, 3);
877 __ lea(end_from, Address(end_from, -4));
878 __ lea(end_to, Address(end_to, -4));
880 __ dsrl(end_count, end_count, 2);
881 __ align(16);
882 __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
883 __ lw(AT, end_from, 0);
884 __ sw(AT, end_to, 0);
885 __ addi(end_from, end_from, -4);
886 __ addi(end_to, end_to, -4);
887 __ addi(end_count, end_count, -1);
888 __ bne(end_count, R0, l_copy_4_bytes_loop);
889 __ delayed()->nop();
891 __ b(l_copy_suffix);
892 __ delayed()->nop();
893 // copy dwords aligned or not with repeat move
894 // l_copy_suffix
895 // copy suffix (0-3 bytes)
896 __ bind(l_copy_suffix);
897 __ andi(T8, T8, 3);
898 __ beq(T8, R0, l_exit);
899 __ delayed()->nop();
900 __ addi(end_from, end_from, 3);
901 __ addi(end_to, end_to, 3);
902 __ bind(l_copy_suffix_loop);
903 __ lb(AT, end_from, 0);
904 __ sb(AT, end_to, 0);
905 __ addi(end_from, end_from, -1);
906 __ addi(end_to, end_to, -1);
907 __ addi(T8, T8, -1);
908 __ bne(T8, R0, l_copy_suffix_loop);
909 __ delayed()->nop();
911 __ bind(l_copy_byte);
912 __ beq(end_count, R0, l_exit);
913 __ delayed()->nop();
914 __ lb(AT, end_from, -1);
915 __ sb(AT, end_to, -1);
916 __ daddi(end_from, end_from, -1);
917 __ daddi(end_to, end_to, -1);
918 __ daddi(end_count, end_count, -1);
919 __ b(l_copy_byte);
920 __ delayed()->nop();
922 __ bind(l_exit);
923 __ pop(T8);
924 __ pop(end_count);
925 __ pop(end_to);
926 __ pop(end_from);
927 __ jr(RA);
928 __ delayed()->nop();
929 return start;
930 }
932 // Generate stub for disjoint short copy. If "aligned" is true, the
933 // "from" and "to" addresses are assumed to be heapword aligned.
934 //
935 // Arguments for generated stub:
936 // from: A0
937 // to: A1
938 // elm.count: A2 treated as signed
939 // one element: 2 bytes
940 //
941 // Strategy for aligned==true:
942 //
943 // If length <= 9:
944 // 1. copy 1 elements at a time (l_5)
945 //
946 // If length > 9:
947 // 1. copy 4 elements at a time until less than 4 elements are left (l_7)
948 // 2. copy 2 elements at a time until less than 2 elements are left (l_6)
949 // 3. copy last element if one was left in step 2. (l_1)
950 //
951 //
952 // Strategy for aligned==false:
953 //
954 // If length <= 9: same as aligned==true case
955 //
956 // If length > 9:
957 // 1. continue with step 7. if the alignment of from and to mod 4
958 // is different.
959 // 2. align from and to to 4 bytes by copying 1 element if necessary
960 // 3. at l_2 from and to are 4 byte aligned; continue with
961 // 6. if they cannot be aligned to 8 bytes because they have
962 // got different alignment mod 8.
963 // 4. at this point we know that both, from and to, have the same
964 // alignment mod 8, now copy one element if necessary to get
965 // 8 byte alignment of from and to.
966 // 5. copy 4 elements at a time until less than 4 elements are
967 // left; depending on step 3. all load/stores are aligned.
968 // 6. copy 2 elements at a time until less than 2 elements are
969 // left. (l_6)
970 // 7. copy 1 element at a time. (l_5)
971 // 8. copy last element if one was left in step 6. (l_1)
973 address generate_disjoint_short_copy(bool aligned, const char * name) {
974 StubCodeMark mark(this, "StubRoutines", name);
975 __ align(CodeEntryAlignment);
977 Register tmp1 = T0;
978 Register tmp2 = T1;
979 Register tmp3 = T3;
980 Register tmp4 = T8;
981 Register tmp5 = T9;
982 Register tmp6 = T2;
984 address start = __ pc();
986 __ push(tmp1);
987 __ push(tmp2);
988 __ push(tmp3);
989 __ move(tmp1, A0);
990 __ move(tmp2, A1);
991 __ move(tmp3, A2);
993 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
994 Label l_debug;
995 // don't try anything fancy if arrays don't have many elements
996 __ daddi(AT, tmp3, -23);
997 __ blez(AT, l_14);
998 __ delayed()->nop();
999 // move push here
1000 __ push(tmp4);
1001 __ push(tmp5);
1002 __ push(tmp6);
1004 if (!aligned) {
1005 __ xorr(AT, A0, A1);
1006 __ andi(AT, AT, 1);
1007 __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
1008 __ delayed()->nop();
1010 __ xorr(AT, A0, A1);
1011 __ andi(AT, AT, 3);
1012 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
1013 __ delayed()->nop();
1015 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
1017 // Copy 1 element if necessary to align to 4 bytes.
1018 __ andi(AT, A0, 3);
1019 __ beq(AT, R0, l_2);
1020 __ delayed()->nop();
1022 __ lhu(AT, tmp1, 0);
1023 __ daddi(tmp1, tmp1, 2);
1024 __ sh(AT, tmp2, 0);
1025 __ daddi(tmp2, tmp2, 2);
1026 __ daddi(tmp3, tmp3, -1);
1027 __ bind(l_2);
1029 // At this point the positions of both, from and to, are at least 4 byte aligned.
1031 // Copy 4 elements at a time.
1032 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
1033 __ xorr(AT, tmp1, tmp2);
1034 __ andi(AT, AT, 7);
1035 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
1036 __ delayed()->nop();
1038 // Copy a 2-element word if necessary to align to 8 bytes.
1039 __ andi(AT, tmp1, 7);
1040 __ beq(AT, R0, l_7);
1041 __ delayed()->nop();
1043 __ lw(AT, tmp1, 0);
1044 __ daddi(tmp3, tmp3, -2);
1045 __ sw(AT, tmp2, 0);
1046 __ daddi(tmp1, tmp1, 4);
1047 __ daddi(tmp2, tmp2, 4);
1048 }// end of if (!aligned)
1050 __ bind(l_7);
1051 // At this time the position of both, from and to, are at least 8 byte aligned.
1052 // Copy 8 elemnets at a time.
1053 // Align to 16 bytes, but only if both from and to have same alignment mod 8.
1054 __ xorr(AT, tmp1, tmp2);
1055 __ andi(AT, AT, 15);
1056 __ bne(AT, R0, l_9);
1057 __ delayed()->nop();
1059 // Copy 4-element word if necessary to align to 16 bytes,
1060 __ andi(AT, tmp1, 15);
1061 __ beq(AT, R0, l_10);
1062 __ delayed()->nop();
1064 __ ld(AT, tmp1, 0);
1065 __ daddi(tmp3, tmp3, -4);
1066 __ sd(AT, tmp2, 0);
1067 __ daddi(tmp1, tmp1, 8);
1068 __ daddi(tmp2, tmp2, 8);
1070 __ bind(l_10);
1072 // Copy 8 elements at a time; either the loads or the stores can
1073 // be unalligned if aligned == false
1075 { // FasterArrayCopy
1076 __ bind(l_11);
1077 // For loongson the 128-bit memory access instruction is gslq/gssq
1078 if (UseLoongsonISA) {
1079 __ gslq(AT, tmp4, tmp1, 0);
1080 __ gslq(tmp5, tmp6, tmp1, 16);
1081 __ daddi(tmp1, tmp1, 32);
1082 __ daddi(tmp2, tmp2, 32);
1083 __ gssq(AT, tmp4, tmp2, -32);
1084 __ gssq(tmp5, tmp6, tmp2, -16);
1085 } else {
1086 __ ld(AT, tmp1, 0);
1087 __ ld(tmp4, tmp1, 8);
1088 __ ld(tmp5, tmp1, 16);
1089 __ ld(tmp6, tmp1, 24);
1090 __ daddi(tmp1, tmp1, 32);
1091 __ sd(AT, tmp2, 0);
1092 __ sd(tmp4, tmp2, 8);
1093 __ sd(tmp5, tmp2, 16);
1094 __ sd(tmp6, tmp2, 24);
1095 __ daddi(tmp2, tmp2, 32);
1096 }
1097 __ daddi(tmp3, tmp3, -16);
1098 __ daddi(AT, tmp3, -16);
1099 __ bgez(AT, l_11);
1100 __ delayed()->nop();
1101 }
1102 __ bind(l_9);
1104 // Copy 4 elements at a time; either the loads or the stores can
1105 // be unaligned if aligned == false.
1106 { // FasterArrayCopy
1107 __ daddi(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
1108 __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
1109 __ delayed()->nop();
1111 __ bind(l_8);
1112 __ ld(AT, tmp1, 0);
1113 __ ld(tmp4, tmp1, 8);
1114 __ ld(tmp5, tmp1, 16);
1115 __ ld(tmp6, tmp1, 24);
1116 __ sd(AT, tmp2, 0);
1117 __ sd(tmp4, tmp2, 8);
1118 __ sd(tmp5, tmp2,16);
1119 __ daddi(tmp1, tmp1, 32);
1120 __ daddi(tmp2, tmp2, 32);
1121 __ daddi(tmp3, tmp3, -16);
1122 __ daddi(AT, tmp3, -16);
1123 __ bgez(AT, l_8);
1124 __ sd(tmp6, tmp2, -8);
1125 }
1126 __ bind(l_6);
1128 // copy 2 element at a time
1129 { // FasterArrayCopy
1130 __ daddi(AT, tmp3, -7);
1131 __ blez(AT, l_4);
1132 __ delayed()->nop();
1134 __ bind(l_3);
1135 __ lw(AT, tmp1, 0);
1136 __ lw(tmp4, tmp1, 4);
1137 __ lw(tmp5, tmp1, 8);
1138 __ lw(tmp6, tmp1, 12);
1139 __ sw(AT, tmp2, 0);
1140 __ sw(tmp4, tmp2, 4);
1141 __ sw(tmp5, tmp2, 8);
1142 __ daddi(tmp1, tmp1, 16);
1143 __ daddi(tmp2, tmp2, 16);
1144 __ daddi(tmp3, tmp3, -8);
1145 __ daddi(AT, tmp3, -8);
1146 __ bgez(AT, l_3);
1147 __ sw(tmp6, tmp2, -4);
1148 }
1150 __ bind(l_1);
1151 // do single element copy (8 bit), can this happen?
1152 { // FasterArrayCopy
1153 __ daddi(AT, tmp3, -3);
1154 __ blez(AT, l_4);
1155 __ delayed()->nop();
1157 __ bind(l_5);
1158 __ lhu(AT, tmp1, 0);
1159 __ lhu(tmp4, tmp1, 2);
1160 __ lhu(tmp5, tmp1, 4);
1161 __ lhu(tmp6, tmp1, 6);
1162 __ sh(AT, tmp2, 0);
1163 __ sh(tmp4, tmp2, 2);
1164 __ sh(tmp5, tmp2, 4);
1165 __ daddi(tmp1, tmp1, 8);
1166 __ daddi(tmp2, tmp2, 8);
1167 __ daddi(tmp3, tmp3, -4);
1168 __ daddi(AT, tmp3, -4);
1169 __ bgez(AT, l_5);
1170 __ sh(tmp6, tmp2, -2);
1171 }
1172 // single element
1173 __ bind(l_4);
1175 __ pop(tmp6);
1176 __ pop(tmp5);
1177 __ pop(tmp4);
1179 __ bind(l_14);
1180 { // FasterArrayCopy
1181 __ beq(R0, tmp3, l_13);
1182 __ delayed()->nop();
1184 __ bind(l_12);
1185 __ lhu(AT, tmp1, 0);
1186 __ sh(AT, tmp2, 0);
1187 __ daddi(tmp1, tmp1, 2);
1188 __ daddi(tmp2, tmp2, 2);
1189 __ daddi(tmp3, tmp3, -1);
1190 __ daddi(AT, tmp3, -1);
1191 __ bgez(AT, l_12);
1192 __ delayed()->nop();
1193 }
1195 __ bind(l_13);
1196 __ pop(tmp3);
1197 __ pop(tmp2);
1198 __ pop(tmp1);
1200 __ jr(RA);
1201 __ delayed()->nop();
1203 __ bind(l_debug);
1204 __ stop("generate_disjoint_short_copy should not reach here");
1205 return start;
1206 }
1208 // Arguments:
1209 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1210 // ignored
1211 // name - stub name string
1212 //
1213 // Inputs:
1214 // c_rarg0 - source array address
1215 // c_rarg1 - destination array address
1216 // c_rarg2 - element count, treated as ssize_t, can be zero
1217 //
1218 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
1219 // let the hardware handle it. The two or four words within dwords
1220 // or qwords that span cache line boundaries will still be loaded
1221 // and stored atomically.
1222 //
1223 address generate_conjoint_short_copy(bool aligned, const char *name) {
1224 Label l_1, l_2, l_3, l_4, l_5;
1225 StubCodeMark mark(this, "StubRoutines", name);
1226 __ align(CodeEntryAlignment);
1227 address start = __ pc();
1228 address nooverlap_target = aligned ?
1229 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
1230 StubRoutines::jshort_disjoint_arraycopy();
1232 array_overlap_test(nooverlap_target, 1);
1234 __ push(T3);
1235 __ push(T0);
1236 __ push(T1);
1237 __ push(T8);
1239 /*
1240 __ pushl(esi);
1241 __ movl(ecx, Address(esp, 4+12)); // count
1242 __ pushl(edi);
1243 __ movl(esi, Address(esp, 8+ 4)); // from
1244 __ movl(edi, Address(esp, 8+ 8)); // to
1245 */
1246 __ move(T1, A2);
1247 __ move(T3, A0);
1248 __ move(T0, A1);
1251 // copy dwords from high to low
1252 // __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4
1253 __ sll(AT, T1, Address::times_2);
1254 __ add(AT, T3, AT);
1255 __ lea(T3, Address( AT, -4));
1256 //__ std();
1257 //__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4
1258 __ sll(AT,T1 , Address::times_2);
1259 __ add(AT, T0, AT);
1260 __ lea(T0, Address( AT, -4));
1261 // __ movl(eax, ecx);
1262 __ move(T8, T1);
1263 __ bind(l_1);
1264 // __ sarl(ecx, 1); // dword count
1265 __ sra(T1,T1, 1);
1266 //__ jcc(Assembler::equal, l_4); // no dwords to move
1267 __ beq(T1, R0, l_4);
1268 __ delayed()->nop();
1269 /* __ cmpl(ecx, 32);
1270 __ jcc(Assembler::above, l_3); // > 32 dwords
1271 // copy dwords with loop
1272 __ subl(edi, esi);
1273 */ __ align(16);
1274 __ bind(l_2);
1275 //__ movl(edx, Address(esi));
1276 __ lw(AT, T3, 0);
1277 //__ movl(Address(edi, esi, Address::times_1), edx);
1278 __ sw(AT, T0, 0);
1279 //__ subl(esi, 4);
1280 __ addi(T3, T3, -4);
1281 __ addi(T0, T0, -4);
1282 //__ decl(ecx);
1283 __ addi(T1, T1, -1);
1284 // __ jcc(Assembler::notEqual, l_2);
1285 __ bne(T1, R0, l_2);
1286 __ delayed()->nop();
1287 // __ addl(edi, esi);
1288 // __ jmp(l_4);
1289 __ b(l_4);
1290 __ delayed()->nop();
1291 // copy dwords with repeat move
1292 __ bind(l_3);
1293 // __ rep_movl();
1294 __ bind(l_4);
1295 // __ andl(eax, 1); // suffix count
1296 __ andi(T8, T8, 1); // suffix count
1297 //__ jcc(Assembler::equal, l_5); // no suffix
1298 __ beq(T8, R0, l_5 );
1299 __ delayed()->nop();
1300 // copy suffix
1301 // __ movw(edx, Address(esi, 2));
1302 __ lh(AT, T3, 2);
1303 // __ movw(Address(edi, 2), edx);
1304 __ sh(AT, T0, 2);
1305 __ bind(l_5);
1306 // __ cld();
1307 // __ popl(edi);
1308 // __ popl(esi);
1309 // __ ret(0);
1310 __ pop(T8);
1311 __ pop(T1);
1312 __ pop(T0);
1313 __ pop(T3);
1314 __ jr(RA);
1315 __ delayed()->nop();
1316 return start;
1317 }
1319 // Arguments:
1320 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1321 // ignored
1322 // is_oop - true => oop array, so generate store check code
1323 // name - stub name string
1324 //
1325 // Inputs:
1326 // c_rarg0 - source array address
1327 // c_rarg1 - destination array address
1328 // c_rarg2 - element count, treated as ssize_t, can be zero
1329 //
1330 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1331 // the hardware handle it. The two dwords within qwords that span
1332 // cache line boundaries will still be loaded and stored atomicly.
1333 //
1334 // Side Effects:
1335 // disjoint_int_copy_entry is set to the no-overlap entry point
1336 // used by generate_conjoint_int_oop_copy().
1337 //
1338 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
1339 Label l_3, l_4, l_5, l_6, l_7;
1340 StubCodeMark mark(this, "StubRoutines", name);
1342 __ align(CodeEntryAlignment);
1343 address start = __ pc();
1344 __ push(T3);
1345 __ push(T0);
1346 __ push(T1);
1347 __ push(T8);
1348 __ move(T1, A2);
1349 __ move(T3, A0);
1350 __ move(T0, A1);
1352 if (is_oop) {
1353 if (Use3A2000) __ sync();
1354 }
1356 if(!aligned) {
1357 __ xorr(AT, T3, T0);
1358 __ andi(AT, AT, 7);
1359 __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
1360 __ delayed()->nop();
1362 __ andi(AT, T3, 7);
1363 __ beq(AT, R0, l_6); //copy 2 elements each time
1364 __ delayed()->nop();
1366 __ lw(AT, T3, 0);
1367 __ daddi(T1, T1, -1);
1368 __ sw(AT, T0, 0);
1369 __ daddi(T3, T3, 4);
1370 __ daddi(T0, T0, 4);
1371 }
1373 {
1374 __ bind(l_6);
1375 __ daddi(AT, T1, -1);
1376 __ blez(AT, l_5);
1377 __ delayed()->nop();
1379 __ bind(l_7);
1380 __ ld(AT, T3, 0);
1381 __ sd(AT, T0, 0);
1382 __ daddi(T3, T3, 8);
1383 __ daddi(T0, T0, 8);
1384 __ daddi(T1, T1, -2);
1385 __ daddi(AT, T1, -2);
1386 __ bgez(AT, l_7);
1387 __ delayed()->nop();
1388 }
1390 __ bind(l_5);
1391 __ beq(T1, R0, l_4);
1392 __ delayed()->nop();
1394 __ align(16);
1395 __ bind(l_3);
1396 __ lw(AT, T3, 0);
1397 __ sw(AT, T0, 0);
1398 __ addi(T3, T3, 4);
1399 __ addi(T0, T0, 4);
1400 __ addi(T1, T1, -1);
1401 __ bne(T1, R0, l_3);
1402 __ delayed()->nop();
1404 if (is_oop) {
1405 __ move(T0, A1);
1406 __ move(T1, A2);
1407 array_store_check();
1408 }
1410 // exit
1411 __ bind(l_4);
1412 __ pop(T8);
1413 __ pop(T1);
1414 __ pop(T0);
1415 __ pop(T3);
1416 __ jr(RA);
1417 __ delayed()->nop();
1419 return start;
1420 }
1422 // Arguments:
1423 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1424 // ignored
1425 // is_oop - true => oop array, so generate store check code
1426 // name - stub name string
1427 //
1428 // Inputs:
1429 // c_rarg0 - source array address
1430 // c_rarg1 - destination array address
1431 // c_rarg2 - element count, treated as ssize_t, can be zero
1432 //
1433 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1434 // the hardware handle it. The two dwords within qwords that span
1435 // cache line boundaries will still be loaded and stored atomicly.
1436 //
1437 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
1438 Label l_2, l_4;
1439 StubCodeMark mark(this, "StubRoutines", name);
1440 __ align(CodeEntryAlignment);
1441 address start = __ pc();
1442 address nooverlap_target;
1444 if (is_oop) {
1445 nooverlap_target = aligned ?
1446 StubRoutines::arrayof_oop_disjoint_arraycopy() :
1447 StubRoutines::oop_disjoint_arraycopy();
1448 }else {
1449 nooverlap_target = aligned ?
1450 StubRoutines::arrayof_jint_disjoint_arraycopy() :
1451 StubRoutines::jint_disjoint_arraycopy();
1452 }
1454 array_overlap_test(nooverlap_target, 2);
1456 __ push(T3);
1457 __ push(T0);
1458 __ push(T1);
1459 __ push(T8);
1461 __ move(T1, A2);
1462 __ move(T3, A0);
1463 __ move(T0, A1);
1464 // T3: source array address
1465 // T0: destination array address
1466 // T1: element count
1468 if (is_oop) {
1469 if (Use3A2000) __ sync();
1470 }
1472 __ sll(AT, T1, Address::times_4);
1473 __ add(AT, T3, AT);
1474 __ lea(T3 , Address(AT, -4));
1475 __ sll(AT, T1, Address::times_4);
1476 __ add(AT, T0, AT);
1477 __ lea(T0 , Address(AT, -4));
1479 __ beq(T1, R0, l_4);
1480 __ delayed()->nop();
1482 __ align(16);
1483 __ bind(l_2);
1484 __ lw(AT, T3, 0);
1485 __ sw(AT, T0, 0);
1486 __ addi(T3, T3, -4);
1487 __ addi(T0, T0, -4);
1488 __ addi(T1, T1, -1);
1489 __ bne(T1, R0, l_2);
1490 __ delayed()->nop();
1492 if (is_oop) {
1493 __ move(T0, A1);
1494 __ move(T1, A2);
1495 array_store_check();
1496 }
1497 __ bind(l_4);
1498 __ pop(T8);
1499 __ pop(T1);
1500 __ pop(T0);
1501 __ pop(T3);
1502 __ jr(RA);
1503 __ delayed()->nop();
1505 return start;
1506 }
1508 // Arguments:
1509 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1510 // ignored
1511 // is_oop - true => oop array, so generate store check code
1512 // name - stub name string
1513 //
1514 // Inputs:
1515 // c_rarg0 - source array address
1516 // c_rarg1 - destination array address
1517 // c_rarg2 - element count, treated as ssize_t, can be zero
1518 //
1519 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1520 // the hardware handle it. The two dwords within qwords that span
1521 // cache line boundaries will still be loaded and stored atomicly.
1522 //
1523 // Side Effects:
1524 // disjoint_int_copy_entry is set to the no-overlap entry point
1525 // used by generate_conjoint_int_oop_copy().
1526 //
1527 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
1528 Label l_3, l_4;
1529 StubCodeMark mark(this, "StubRoutines", name);
1530 __ align(CodeEntryAlignment);
1531 address start = __ pc();
1533 __ push(T3);
1534 __ push(T0);
1535 __ push(T1);
1536 __ push(T8);
1538 __ move(T1, A2);
1539 __ move(T3, A0);
1540 __ move(T0, A1);
1541 // T3: source array address
1542 // T0: destination array address
1543 // T1: element count
1545 if (is_oop) {
1546 if (Use3A2000) __ sync();
1547 }
1549 __ beq(T1, R0, l_4);
1550 __ delayed()->nop();
1552 __ align(16);
1553 __ bind(l_3);
1554 __ ld(AT, T3, 0);
1555 __ sd(AT, T0, 0);
1556 __ addi(T3, T3, 8);
1557 __ addi(T0, T0, 8);
1558 __ addi(T1, T1, -1);
1559 __ bne(T1, R0, l_3);
1560 __ delayed()->nop();
1562 if (is_oop) {
1563 __ move(T0, A1);
1564 __ move(T1, A2);
1565 array_store_check();
1566 }
1568 // exit
1569 __ bind(l_4);
1570 __ pop(T8);
1571 __ pop(T1);
1572 __ pop(T0);
1573 __ pop(T3);
1574 __ jr(RA);
1575 __ delayed()->nop();
1576 return start;
1577 }
1579 // Arguments:
1580 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1581 // ignored
1582 // is_oop - true => oop array, so generate store check code
1583 // name - stub name string
1584 //
1585 // Inputs:
1586 // c_rarg0 - source array address
1587 // c_rarg1 - destination array address
1588 // c_rarg2 - element count, treated as ssize_t, can be zero
1589 //
1590 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1591 // the hardware handle it. The two dwords within qwords that span
1592 // cache line boundaries will still be loaded and stored atomicly.
1593 //
1594 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
1595 Label l_2, l_4;
1596 StubCodeMark mark(this, "StubRoutines", name);
1597 __ align(CodeEntryAlignment);
1598 address start = __ pc();
1599 address nooverlap_target;
1601 if (is_oop) {
1602 nooverlap_target = aligned ?
1603 StubRoutines::arrayof_oop_disjoint_arraycopy() :
1604 StubRoutines::oop_disjoint_arraycopy();
1605 }else {
1606 nooverlap_target = aligned ?
1607 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
1608 StubRoutines::jlong_disjoint_arraycopy();
1609 }
1611 array_overlap_test(nooverlap_target, 3);
1613 __ push(T3);
1614 __ push(T0);
1615 __ push(T1);
1616 __ push(T8);
1618 __ move(T1, A2);
1619 __ move(T3, A0);
1620 __ move(T0, A1);
1622 if (is_oop) {
1623 if (Use3A2000) __ sync();
1624 }
1626 __ sll(AT, T1, Address::times_8);
1627 __ add(AT, T3, AT);
1628 __ lea(T3 , Address(AT, -8));
1629 __ sll(AT, T1, Address::times_8);
1630 __ add(AT, T0, AT);
1631 __ lea(T0 , Address(AT, -8));
1633 __ beq(T1, R0, l_4);
1634 __ delayed()->nop();
1636 __ align(16);
1637 __ bind(l_2);
1638 __ ld(AT, T3, 0);
1639 __ sd(AT, T0, 0);
1640 __ addi(T3, T3, -8);
1641 __ addi(T0, T0, -8);
1642 __ addi(T1, T1, -1);
1643 __ bne(T1, R0, l_2);
1644 __ delayed()->nop();
1646 if (is_oop) {
1647 __ move(T0, A1);
1648 __ move(T1, A2);
1649 array_store_check();
1650 }
1651 __ bind(l_4);
1652 __ pop(T8);
1653 __ pop(T1);
1654 __ pop(T0);
1655 __ pop(T3);
1656 __ jr(RA);
1657 __ delayed()->nop();
1658 return start;
1659 }
1661 //FIXME
1662 address generate_disjoint_long_copy(bool aligned, const char *name) {
1663 Label l_1, l_2;
1664 StubCodeMark mark(this, "StubRoutines", name);
1665 __ align(CodeEntryAlignment);
1666 address start = __ pc();
1668 // __ movl(ecx, Address(esp, 4+8)); // count
1669 // __ movl(eax, Address(esp, 4+0)); // from
1670 // __ movl(edx, Address(esp, 4+4)); // to
1671 __ move(T1, A2);
1672 __ move(T3, A0);
1673 __ move(T0, A1);
1674 __ push(T3);
1675 __ push(T0);
1676 __ push(T1);
1677 //__ subl(edx, eax);
1678 //__ jmp(l_2);
1679 __ b(l_2);
1680 __ delayed()->nop();
1681 __ align(16);
1682 __ bind(l_1);
1683 // if (VM_Version::supports_mmx()) {
1684 // __ movq(mmx0, Address(eax));
1685 // __ movq(Address(eax, edx, Address::times_1), mmx0);
1686 // } else {
1687 // __ fild_d(Address(eax));
1688 __ ld(AT, T3, 0);
1689 // __ fistp_d(Address(eax, edx, Address::times_1));
1690 __ sd (AT, T0, 0);
1691 // }
1692 // __ addl(eax, 8);
1693 __ addi(T3, T3, 8);
1694 __ addi(T0, T0, 8);
1695 __ bind(l_2);
1696 // __ decl(ecx);
1697 __ addi(T1, T1, -1);
1698 // __ jcc(Assembler::greaterEqual, l_1);
1699 __ bgez(T1, l_1);
1700 __ delayed()->nop();
1701 // if (VM_Version::supports_mmx()) {
1702 // __ emms();
1703 // }
1704 // __ ret(0);
1705 __ pop(T1);
1706 __ pop(T0);
1707 __ pop(T3);
1708 __ jr(RA);
1709 __ delayed()->nop();
1710 return start;
1711 }
1714 address generate_conjoint_long_copy(bool aligned, const char *name) {
1715 Label l_1, l_2;
1716 StubCodeMark mark(this, "StubRoutines", name);
1717 __ align(CodeEntryAlignment);
1718 address start = __ pc();
1719 address nooverlap_target = aligned ?
1720 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
1721 StubRoutines::jlong_disjoint_arraycopy();
1722 array_overlap_test(nooverlap_target, 3);
1724 __ push(T3);
1725 __ push(T0);
1726 __ push(T1);
1728 /* __ movl(ecx, Address(esp, 4+8)); // count
1729 __ movl(eax, Address(esp, 4+0)); // from
1730 __ movl(edx, Address(esp, 4+4)); // to
1731 __ jmp(l_2);
1733 */
1734 __ move(T1, A2);
1735 __ move(T3, A0);
1736 __ move(T0, A1);
1737 __ sll(AT, T1, Address::times_8);
1738 __ add(AT, T3, AT);
1739 __ lea(T3 , Address(AT, -8));
1740 __ sll(AT, T1, Address::times_8);
1741 __ add(AT, T0, AT);
1742 __ lea(T0 , Address(AT, -8));
1746 __ b(l_2);
1747 __ delayed()->nop();
1748 __ align(16);
1749 __ bind(l_1);
1750 /* if (VM_Version::supports_mmx()) {
1751 __ movq(mmx0, Address(eax, ecx, Address::times_8));
1752 __ movq(Address(edx, ecx,Address::times_8), mmx0);
1753 } else {
1754 __ fild_d(Address(eax, ecx, Address::times_8));
1755 __ fistp_d(Address(edx, ecx,Address::times_8));
1756 }
1757 */
1758 __ ld(AT, T3, 0);
1759 __ sd (AT, T0, 0);
1760 __ addi(T3, T3, -8);
1761 __ addi(T0, T0,-8);
1762 __ bind(l_2);
1763 // __ decl(ecx);
1764 __ addi(T1, T1, -1);
1765 //__ jcc(Assembler::greaterEqual, l_1);
1766 __ bgez(T1, l_1);
1767 __ delayed()->nop();
1768 // if (VM_Version::supports_mmx()) {
1769 // __ emms();
1770 // }
1771 // __ ret(0);
1772 __ pop(T1);
1773 __ pop(T0);
1774 __ pop(T3);
1775 __ jr(RA);
1776 __ delayed()->nop();
1777 return start;
1778 }
1780 void generate_arraycopy_stubs() {
1781 if (UseCompressedOops) {
1782 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true,
1783 "oop_disjoint_arraycopy");
1784 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true,
1785 "oop_arraycopy");
1786 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true,
1787 "oop_disjoint_arraycopy_uninit");
1788 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true,
1789 "oop_arraycopy_uninit");
1790 } else {
1791 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true,
1792 "oop_disjoint_arraycopy");
1793 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true,
1794 "oop_arraycopy");
1795 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true,
1796 "oop_disjoint_arraycopy_uninit");
1797 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true,
1798 "oop_arraycopy_uninit");
1799 }
1801 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
1802 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
1803 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
1804 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
1806 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
1807 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
1808 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
1809 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
1811 // We don't generate specialized code for HeapWord-aligned source
1812 // arrays, so just use the code we've already generated
1813 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
1814 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
1816 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
1817 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
1819 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
1820 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
1822 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
1823 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
1825 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
1826 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
1828 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
1829 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
1830 }
1832 //Wang: add a function to implement SafeFetch32 and SafeFetchN
1833 void generate_safefetch(const char* name, int size, address* entry,
1834 address* fault_pc, address* continuation_pc) {
1835 // safefetch signatures:
1836 // int SafeFetch32(int* adr, int errValue);
1837 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
1838 //
1839 // arguments:
1840 // A0 = adr
1841 // A1 = errValue
1842 //
1843 // result:
1844 // PPC_RET = *adr or errValue
1846 StubCodeMark mark(this, "StubRoutines", name);
1848 // Entry point, pc or function descriptor.
1849 *entry = __ pc();
1851 // Load *adr into A1, may fault.
1852 *fault_pc = __ pc();
1853 switch (size) {
1854 case 4:
1855 // int32_t
1856 __ lw(A1, A0, 0);
1857 break;
1858 case 8:
1859 // int64_t
1860 __ ld(A1, A0, 0);
1861 break;
1862 default:
1863 ShouldNotReachHere();
1864 }
1866 // return errValue or *adr
1867 *continuation_pc = __ pc();
1868 __ addu(V0,A1,R0);
1869 __ jr(RA);
1870 __ delayed()->nop();
1871 }
1874 #undef __
1875 #define __ masm->
1877 // Continuation point for throwing of implicit exceptions that are
1878 // not handled in the current activation. Fabricates an exception
1879 // oop and initiates normal exception dispatching in this
1880 // frame. Since we need to preserve callee-saved values (currently
1881 // only for C2, but done for C1 as well) we need a callee-saved oop
1882 // map and therefore have to make these stubs into RuntimeStubs
1883 // rather than BufferBlobs. If the compiler needs all registers to
1884 // be preserved between the fault point and the exception handler
1885 // then it must assume responsibility for that in
1886 // AbstractCompiler::continuation_for_implicit_null_exception or
1887 // continuation_for_implicit_division_by_zero_exception. All other
1888 // implicit exceptions (e.g., NullPointerException or
1889 // AbstractMethodError on entry) are either at call sites or
1890 // otherwise assume that stack unwinding will be initiated, so
1891 // caller saved registers were assumed volatile in the compiler.
1892 address generate_throw_exception(const char* name,
1893 address runtime_entry,
1894 bool restore_saved_exception_pc) {
1895 // Information about frame layout at time of blocking runtime call.
1896 // Note that we only have to preserve callee-saved registers since
1897 // the compilers are responsible for supplying a continuation point
1898 // if they expect all registers to be preserved.
1899 //#define aoqi_test
1900 #ifdef aoqi_test
1901 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
1902 #endif
1903 enum layout {
1904 thread_off, // last_java_sp
1905 S7_off, // callee saved register sp + 1
1906 S6_off, // callee saved register sp + 2
1907 S5_off, // callee saved register sp + 3
1908 S4_off, // callee saved register sp + 4
1909 S3_off, // callee saved register sp + 5
1910 S2_off, // callee saved register sp + 6
1911 S1_off, // callee saved register sp + 7
1912 S0_off, // callee saved register sp + 8
1913 FP_off,
1914 ret_address,
1915 framesize
1916 };
1918 int insts_size = 2048;
1919 int locs_size = 32;
1921 // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
1922 // NULL, NULL, NULL, false, NULL, name, false);
1923 CodeBuffer code (name , insts_size, locs_size);
1924 #ifdef aoqi_test
1925 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
1926 #endif
1927 OopMapSet* oop_maps = new OopMapSet();
1928 #ifdef aoqi_test
1929 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
1930 #endif
1931 MacroAssembler* masm = new MacroAssembler(&code);
1932 #ifdef aoqi_test
1933 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
1934 #endif
1936 address start = __ pc();
1937 //__ stop("generate_throw_exception");
1938 /*
1939 __ move(AT, (int)&jerome1 );
1940 __ sw(SP, AT, 0);
1941 __ move(AT, (int)&jerome2 );
1942 __ sw(FP, AT, 0);
1943 __ move(AT, (int)&jerome3 );
1944 __ sw(RA, AT, 0);
1945 __ move(AT, (int)&jerome4 );
1946 __ sw(R0, AT, 0);
1947 __ move(AT, (int)&jerome5 );
1948 __ sw(R0, AT, 0);
1949 __ move(AT, (int)&jerome6 );
1950 __ sw(R0, AT, 0);
1951 __ move(AT, (int)&jerome7 );
1952 __ sw(R0, AT, 0);
1953 __ move(AT, (int)&jerome10 );
1954 __ sw(R0, AT, 0);
1956 __ pushad();
1958 //__ enter();
1959 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
1960 relocInfo::runtime_call_type);
1961 __ delayed()->nop();
1963 //__ leave();
1964 __ popad();
1966 */
1968 // This is an inlined and slightly modified version of call_VM
1969 // which has the ability to fetch the return PC out of
1970 // thread-local storage and also sets up last_Java_sp slightly
1971 // differently than the real call_VM
1972 #ifndef OPT_THREAD
1973 Register java_thread = TREG;
1974 __ get_thread(java_thread);
1975 #else
1976 Register java_thread = TREG;
1977 #endif
1978 #ifdef aoqi_test
1979 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
1980 #endif
1981 if (restore_saved_exception_pc) {
1982 __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
1983 }
1985 __ enter(); // required for proper stackwalking of RuntimeStub frame
1987 __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
1988 __ sd(S0, SP, S0_off * wordSize);
1989 __ sd(S1, SP, S1_off * wordSize);
1990 __ sd(S2, SP, S2_off * wordSize);
1991 __ sd(S3, SP, S3_off * wordSize);
1992 __ sd(S4, SP, S4_off * wordSize);
1993 __ sd(S5, SP, S5_off * wordSize);
1994 __ sd(S6, SP, S6_off * wordSize);
1995 __ sd(S7, SP, S7_off * wordSize);
1997 int frame_complete = __ pc() - start;
1998 // push java thread (becomes first argument of C function)
1999 __ sd(java_thread, SP, thread_off * wordSize);
2000 if (java_thread!=A0)
2001 __ move(A0, java_thread);
2003 // Set up last_Java_sp and last_Java_fp
2004 __ set_last_Java_frame(java_thread, SP, FP, NULL);
2005 __ relocate(relocInfo::internal_pc_type);
2006 {
2007 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
2008 __ patchable_set48(AT, save_pc);
2009 }
2010 __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
2012 // Call runtime
2013 __ call(runtime_entry);
2014 __ delayed()->nop();
2015 // Generate oop map
2016 OopMap* map = new OopMap(framesize, 0);
2017 oop_maps->add_gc_map(__ offset(), map);
2019 // restore the thread (cannot use the pushed argument since arguments
2020 // may be overwritten by C code generated by an optimizing compiler);
2021 // however can use the register value directly if it is callee saved.
2022 #ifndef OPT_THREAD
2023 __ get_thread(java_thread);
2024 #endif
2026 __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
2027 // __ reset_last_Java_frame(java_thread, true);
2028 __ reset_last_Java_frame(java_thread, true, true);
2030 // Restore callee save registers. This must be done after resetting the Java frame
2031 __ ld(S0, SP, S0_off * wordSize);
2032 __ ld(S1, SP, S1_off * wordSize);
2033 __ ld(S2, SP, S2_off * wordSize);
2034 __ ld(S3, SP, S3_off * wordSize);
2035 __ ld(S4, SP, S4_off * wordSize);
2036 __ ld(S5, SP, S5_off * wordSize);
2037 __ ld(S6, SP, S6_off * wordSize);
2038 __ ld(S7, SP, S7_off * wordSize);
2040 // discard arguments
2041 __ addi(SP, SP, (framesize-2) * wordSize); // epilog
2042 // __ leave(); // required for proper stackwalking of RuntimeStub frame
2043 __ addi(SP, FP, wordSize);
2044 __ ld(FP, SP, -1*wordSize);
2045 // check for pending exceptions
2046 #ifdef ASSERT
2047 Label L;
2048 __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
2049 __ bne(AT, R0, L);
2050 __ delayed()->nop();
2051 __ should_not_reach_here();
2052 __ bind(L);
2053 #endif //ASSERT
2054 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
2055 __ delayed()->nop();
2056 #ifdef aoqi_test
2057 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2058 #endif
2059 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete,
2060 framesize, oop_maps, false);
2061 #ifdef aoqi_test
2062 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
2063 #endif
2064 return stub->entry_point();
2065 }
2067 // Initialization
2068 void generate_initial() {
2069 /*
2070 // Generates all stubs and initializes the entry points
2072 // This platform-specific stub is needed by generate_call_stub()
2073 StubRoutines::mips::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80);
2075 // entry points that exist in all platforms Note: This is code
2076 // that could be shared among different platforms - however the
2077 // benefit seems to be smaller than the disadvantage of having a
2078 // much more complicated generator structure. See also comment in
2079 // stubRoutines.hpp.
2081 StubRoutines::_forward_exception_entry = generate_forward_exception();
2083 StubRoutines::_call_stub_entry =
2084 generate_call_stub(StubRoutines::_call_stub_return_address);
2086 // is referenced by megamorphic call
2087 StubRoutines::_catch_exception_entry = generate_catch_exception();
2089 // atomic calls
2090 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
2091 StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr();
2092 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
2093 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
2094 StubRoutines::_atomic_add_entry = generate_atomic_add();
2095 StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr();
2096 StubRoutines::_fence_entry = generate_orderaccess_fence();
2098 StubRoutines::_handler_for_unsafe_access_entry =
2099 generate_handler_for_unsafe_access();
2101 // platform dependent
2102 StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp();
2104 StubRoutines::mips::_verify_mxcsr_entry = generate_verify_mxcsr();
2105 */
2106 // Generates all stubs and initializes the entry points
2108 //-------------------------------------------------------------
2109 //-----------------------------------------------------------
2110 // entry points that exist in all platforms
2111 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
2112 // than the disadvantage of having a much more complicated generator structure.
2113 // See also comment in stubRoutines.hpp.
2114 StubRoutines::_forward_exception_entry = generate_forward_exception();
2115 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
2116 // is referenced by megamorphic call
2117 StubRoutines::_catch_exception_entry = generate_catch_exception();
2119 StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
2121 // platform dependent
2122 StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
2123 }
2125 void generate_all() {
2126 #ifdef aoqi_test
2127 tty->print_cr("%s:%d", __func__, __LINE__);
2128 #endif
2129 // Generates all stubs and initializes the entry points
2131 // These entry points require SharedInfo::stack0 to be set up in
2132 // non-core builds and need to be relocatable, so they each
2133 // fabricate a RuntimeStub internally.
2134 /*
2135 StubRoutines::_throw_AbstractMethodError_entry =
2136 generate_throw_exception("AbstractMethodError throw_exception",
2137 CAST_FROM_FN_PTR(address,
2138 SharedRuntime::
2139 throw_AbstractMethodError),
2140 false);
2142 StubRoutines::_throw_IncompatibleClassChangeError_entry =
2143 generate_throw_exception("IncompatibleClassChangeError throw_exception",
2144 CAST_FROM_FN_PTR(address,
2145 SharedRuntime::
2146 throw_IncompatibleClassChangeError),
2147 false);
2149 StubRoutines::_throw_ArithmeticException_entry =
2150 generate_throw_exception("ArithmeticException throw_exception",
2151 CAST_FROM_FN_PTR(address,
2152 SharedRuntime::
2153 throw_ArithmeticException),
2154 true);
2156 StubRoutines::_throw_NullPointerException_entry =
2157 generate_throw_exception("NullPointerException throw_exception",
2158 CAST_FROM_FN_PTR(address,
2159 SharedRuntime::
2160 throw_NullPointerException),
2161 true);
2163 StubRoutines::_throw_NullPointerException_at_call_entry =
2164 generate_throw_exception("NullPointerException at call throw_exception",
2165 CAST_FROM_FN_PTR(address,
2166 SharedRuntime::
2167 throw_NullPointerException_at_call),
2168 false);
2170 StubRoutines::_throw_StackOverflowError_entry =
2171 generate_throw_exception("StackOverflowError throw_exception",
2172 CAST_FROM_FN_PTR(address,
2173 SharedRuntime::
2174 throw_StackOverflowError),
2175 false);
2177 // entry points that are platform specific
2178 StubRoutines::mips::_f2i_fixup = generate_f2i_fixup();
2179 StubRoutines::mips::_f2l_fixup = generate_f2l_fixup();
2180 StubRoutines::mips::_d2i_fixup = generate_d2i_fixup();
2181 StubRoutines::mips::_d2l_fixup = generate_d2l_fixup();
2183 StubRoutines::mips::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
2184 StubRoutines::mips::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
2185 StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
2186 StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
2188 // support for verify_oop (must happen after universe_init)
2189 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
2191 // arraycopy stubs used by compilers
2192 generate_arraycopy_stubs();
2193 */
2194 #ifdef aoqi_test
2195 tty->print_cr("%s:%d", __func__, __LINE__);
2196 #endif
2197 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
2198 #ifdef aoqi_test
2199 tty->print_cr("%s:%d", __func__, __LINE__);
2200 #endif
2201 // StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
2202 #ifdef aoqi_test
2203 tty->print_cr("%s:%d", __func__, __LINE__);
2204 #endif
2205 // StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
2206 #ifdef aoqi_test
2207 tty->print_cr("%s:%d", __func__, __LINE__);
2208 #endif
2209 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2210 #ifdef aoqi_test
2211 tty->print_cr("%s:%d", __func__, __LINE__);
2212 #endif
2213 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
2214 #ifdef aoqi_test
2215 tty->print_cr("%s:%d", __func__, __LINE__);
2216 #endif
2218 //------------------------------------------------------
2219 //------------------------------------------------------------------
2220 // entry points that are platform specific
2222 // support for verify_oop (must happen after universe_init)
2223 #ifdef aoqi_test
2224 tty->print_cr("%s:%d", __func__, __LINE__);
2225 #endif
2226 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
2227 #ifdef aoqi_test
2228 tty->print_cr("%s:%d", __func__, __LINE__);
2229 #endif
2230 #ifndef CORE
2231 // arraycopy stubs used by compilers
2232 generate_arraycopy_stubs();
2233 #ifdef aoqi_test
2234 tty->print_cr("%s:%d", __func__, __LINE__);
2235 #endif
2236 #endif
2238 // Safefetch stubs.
2239 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
2240 &StubRoutines::_safefetch32_fault_pc,
2241 &StubRoutines::_safefetch32_continuation_pc);
2242 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
2243 &StubRoutines::_safefetchN_fault_pc,
2244 &StubRoutines::_safefetchN_continuation_pc);
2245 }
2247 public:
2248 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2249 if (all) {
2250 generate_all();
2251 } else {
2252 generate_initial();
2253 }
2254 }
2255 }; // end class declaration
2256 /*
2257 address StubGenerator::disjoint_byte_copy_entry = NULL;
2258 address StubGenerator::disjoint_short_copy_entry = NULL;
2259 address StubGenerator::disjoint_int_copy_entry = NULL;
2260 address StubGenerator::disjoint_long_copy_entry = NULL;
2261 address StubGenerator::disjoint_oop_copy_entry = NULL;
2263 address StubGenerator::byte_copy_entry = NULL;
2264 address StubGenerator::short_copy_entry = NULL;
2265 address StubGenerator::int_copy_entry = NULL;
2266 address StubGenerator::long_copy_entry = NULL;
2267 address StubGenerator::oop_copy_entry = NULL;
2269 address StubGenerator::checkcast_copy_entry = NULL;
2270 */
2271 void StubGenerator_generate(CodeBuffer* code, bool all) {
2272 StubGenerator g(code, all);
2273 }