Wed, 08 Nov 2017 09:28:23 +0800
[GC] 17 out of 18 jtreg tests for g1 have passed (the same as x86 with jdk8u60-b32).
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 // Implementation of MacroAssembler
52 intptr_t MacroAssembler::i[32] = {0};
53 float MacroAssembler::f[32] = {0.0};
55 void MacroAssembler::print(outputStream *s) {
56 unsigned int k;
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
58 s->print_cr("i%d = 0x%.16lx", k, i[k]);
59 }
60 s->cr();
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
63 s->print_cr("f%d = %f", k, f[k]);
64 }
65 s->cr();
66 }
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
71 void MacroAssembler::save_registers(MacroAssembler *masm) {
72 #define __ masm->
73 for(int k=0; k<32; k++) {
74 __ sw (as_Register(k), A0, i_offset(k));
75 }
77 for(int k=0; k<32; k++) {
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
79 }
80 #undef __
81 }
83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
84 #define __ masm->
85 for(int k=0; k<32; k++) {
86 __ lw (as_Register(k), A0, i_offset(k));
87 }
89 for(int k=0; k<32; k++) {
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
91 }
92 #undef __
93 }
96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
97 jint& stub_inst = *(jint*) branch;
99 /* *
100 move(AT, RA); // dadd
101 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
102 nop();
103 lui(T9, 0); // to be patched
104 ori(T9, 0);
105 daddu(T9, T9, RA);
106 move(RA, AT);
107 jr(T9);
108 */
109 if(special(stub_inst) == dadd_op) {
110 jint *pc = (jint *)branch;
112 assert(opcode(pc[3]) == lui_op
113 && opcode(pc[4]) == ori_op
114 && special(pc[5]) == daddu_op, "Not a branch label patch");
115 if(!(opcode(pc[3]) == lui_op
116 && opcode(pc[4]) == ori_op
117 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
119 int offset = target - branch;
120 if (!is_simm16(offset))
121 {
122 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
123 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
124 }
125 else
126 {
127 /* revert to "beq + nop" */
128 CodeBuffer cb(branch, 4 * 10);
129 MacroAssembler masm(&cb);
130 #define __ masm.
131 __ b(target);
132 __ nop();
133 __ nop();
134 __ nop();
135 __ nop();
136 __ nop();
137 __ nop();
138 __ nop();
139 }
140 return;
141 }
143 #ifndef PRODUCT
144 if (!is_simm16((target - branch - 4) >> 2))
145 {
146 tty->print_cr("Illegal patching: target=0x%lx", target);
147 int *p = (int *)branch;
148 for (int i = -10; i < 10; i++)
149 {
150 tty->print("0x%lx, ", p[i]);
151 }
152 tty->print_cr("");
153 }
154 #endif
156 stub_inst = patched_branch(target - branch, stub_inst, 0);
157 }
159 static inline address first_cache_address() {
160 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
161 }
163 static inline address last_cache_address() {
164 return CodeCache::high_bound() - Assembler::InstructionSize;
165 }
167 int MacroAssembler::call_size(address target, bool far, bool patchable) {
168 if (patchable) return 6 << Assembler::LogInstructionSize;
169 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
170 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
171 }
173 // Can we reach target using jal/j from anywhere
174 // in the code cache (because code can be relocated)?
175 bool MacroAssembler::reachable_from_cache(address target) {
176 address cl = first_cache_address();
177 address ch = last_cache_address();
179 return fit_in_jal(target, cl) && fit_in_jal(target, ch);
180 }
182 void MacroAssembler::general_jump(address target) {
183 if (reachable_from_cache(target)) {
184 j(target);
185 nop();
186 } else {
187 set64(T9, (long)target);
188 jr(T9);
189 nop();
190 }
191 }
193 int MacroAssembler::insts_for_general_jump(address target) {
194 if (reachable_from_cache(target)) {
195 //j(target);
196 //nop();
197 return 2;
198 } else {
199 //set64(T9, (long)target);
200 //jr(T9);
201 //nop();
202 return insts_for_set64((jlong)target) + 2;
203 }
204 }
206 void MacroAssembler::patchable_jump(address target) {
207 if (reachable_from_cache(target)) {
208 nop();
209 nop();
210 nop();
211 nop();
212 j(target);
213 nop();
214 } else {
215 patchable_set48(T9, (long)target);
216 jr(T9);
217 nop();
218 }
219 }
221 int MacroAssembler::insts_for_patchable_jump(address target) {
222 return 6;
223 }
225 void MacroAssembler::general_call(address target) {
226 if (reachable_from_cache(target)) {
227 jal(target);
228 nop();
229 } else {
230 set64(T9, (long)target);
231 jalr(T9);
232 nop();
233 }
234 }
236 int MacroAssembler::insts_for_general_call(address target) {
237 if (reachable_from_cache(target)) {
238 //jal(target);
239 //nop();
240 return 2;
241 } else {
242 //set64(T9, (long)target);
243 //jalr(T9);
244 //nop();
245 return insts_for_set64((jlong)target) + 2;
246 }
247 }
249 void MacroAssembler::patchable_call(address target) {
250 if (reachable_from_cache(target)) {
251 nop();
252 nop();
253 nop();
254 nop();
255 jal(target);
256 nop();
257 } else {
258 patchable_set48(T9, (long)target);
259 jalr(T9);
260 nop();
261 }
262 }
264 int MacroAssembler::insts_for_patchable_call(address target) {
265 return 6;
266 }
268 void MacroAssembler::beq_far(Register rs, Register rt, address entry)
269 {
270 u_char * cur_pc = pc();
272 /* Jin: Near/Far jump */
273 if(is_simm16((entry - pc() - 4) / 4))
274 {
275 Assembler::beq(rs, rt, offset(entry));
276 }
277 else
278 {
279 Label not_jump;
280 bne(rs, rt, not_jump);
281 delayed()->nop();
283 b_far(entry);
284 delayed()->nop();
286 bind(not_jump);
287 has_delay_slot();
288 }
289 }
291 void MacroAssembler::beq_far(Register rs, Register rt, Label& L)
292 {
293 if (L.is_bound()) {
294 beq_far(rs, rt, target(L));
295 } else {
296 u_char * cur_pc = pc();
297 Label not_jump;
298 bne(rs, rt, not_jump);
299 delayed()->nop();
301 b_far(L);
302 delayed()->nop();
304 bind(not_jump);
305 has_delay_slot();
306 }
307 }
309 void MacroAssembler::bne_far(Register rs, Register rt, address entry)
310 {
311 u_char * cur_pc = pc();
313 /* Jin: Near/Far jump */
314 if(is_simm16((entry - pc() - 4) / 4))
315 {
316 Assembler::bne(rs, rt, offset(entry));
317 }
318 else
319 {
320 Label not_jump;
321 beq(rs, rt, not_jump);
322 delayed()->nop();
324 b_far(entry);
325 delayed()->nop();
327 bind(not_jump);
328 has_delay_slot();
329 }
330 }
332 void MacroAssembler::bne_far(Register rs, Register rt, Label& L)
333 {
334 if (L.is_bound()) {
335 bne_far(rs, rt, target(L));
336 } else {
337 u_char * cur_pc = pc();
338 Label not_jump;
339 beq(rs, rt, not_jump);
340 delayed()->nop();
342 b_far(L);
343 delayed()->nop();
345 bind(not_jump);
346 has_delay_slot();
347 }
348 }
350 void MacroAssembler::b_far(Label& L)
351 {
352 if (L.is_bound()) {
353 b_far(target(L));
354 } else {
355 volatile address dest = target(L);
356 /*
357 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
358 0x00000055651ed514: dadd at, ra, zero
359 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
361 0x00000055651ed51c: sll zero, zero, 0
362 0x00000055651ed520: lui t9, 0x0
363 0x00000055651ed524: ori t9, t9, 0x21b8
364 0x00000055651ed528: daddu t9, t9, ra
365 0x00000055651ed52c: dadd ra, at, zero
366 0x00000055651ed530: jr t9
367 0x00000055651ed534: sll zero, zero, 0
368 */
369 move(AT, RA);
370 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
371 nop();
372 lui(T9, 0); // to be patched
373 ori(T9, T9, 0);
374 daddu(T9, T9, RA);
375 move(RA, AT);
376 jr(T9);
377 }
378 }
380 void MacroAssembler::b_far(address entry)
381 {
382 u_char * cur_pc = pc();
384 /* Jin: Near/Far jump */
385 if(is_simm16((entry - pc() - 4) / 4))
386 {
387 b(offset(entry));
388 }
389 else
390 {
391 /* address must be bounded */
392 move(AT, RA);
393 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
394 nop();
395 li32(T9, entry - pc());
396 daddu(T9, T9, RA);
397 move(RA, AT);
398 jr(T9);
399 }
400 }
402 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
403 addu_long(AT, base, offset);
404 ld_ptr(rt, 0, AT);
405 }
407 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
408 addu_long(AT, base, offset);
409 st_ptr(rt, 0, AT);
410 }
412 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
413 addu_long(AT, base, offset);
414 ld_long(rt, 0, AT);
415 }
417 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
418 addu_long(AT, base, offset);
419 st_long(rt, 0, AT);
420 }
422 Address MacroAssembler::as_Address(AddressLiteral adr) {
423 return Address(adr.target(), adr.rspec());
424 }
426 Address MacroAssembler::as_Address(ArrayAddress adr) {
427 return Address::make_array(adr);
428 }
430 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
431 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
432 Label again;
434 li(tmp_reg1, counter_addr);
435 bind(again);
436 if(!Use3A2000) sync();
437 ll(tmp_reg2, tmp_reg1, 0);
438 addi(tmp_reg2, tmp_reg2, inc);
439 sc(tmp_reg2, tmp_reg1, 0);
440 beq(tmp_reg2, R0, again);
441 delayed()->nop();
442 }
444 int MacroAssembler::biased_locking_enter(Register lock_reg,
445 Register obj_reg,
446 Register swap_reg,
447 Register tmp_reg,
448 bool swap_reg_contains_mark,
449 Label& done,
450 Label* slow_case,
451 BiasedLockingCounters* counters) {
452 assert(UseBiasedLocking, "why call this otherwise?");
453 bool need_tmp_reg = false;
454 if (tmp_reg == noreg) {
455 need_tmp_reg = true;
456 tmp_reg = T9;
457 }
458 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
459 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
460 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
461 Address saved_mark_addr(lock_reg, 0);
463 // Biased locking
464 // See whether the lock is currently biased toward our thread and
465 // whether the epoch is still valid
466 // Note that the runtime guarantees sufficient alignment of JavaThread
467 // pointers to allow age to be placed into low bits
468 // First check to see whether biasing is even enabled for this object
469 Label cas_label;
470 int null_check_offset = -1;
471 if (!swap_reg_contains_mark) {
472 null_check_offset = offset();
473 ld_ptr(swap_reg, mark_addr);
474 }
476 if (need_tmp_reg) {
477 push(tmp_reg);
478 }
479 move(tmp_reg, swap_reg);
480 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
481 #ifdef _LP64
482 daddi(AT, R0, markOopDesc::biased_lock_pattern);
483 dsub(AT, AT, tmp_reg);
484 #else
485 addi(AT, R0, markOopDesc::biased_lock_pattern);
486 sub(AT, AT, tmp_reg);
487 #endif
488 if (need_tmp_reg) {
489 pop(tmp_reg);
490 }
492 bne(AT, R0, cas_label);
493 delayed()->nop();
496 // The bias pattern is present in the object's header. Need to check
497 // whether the bias owner and the epoch are both still current.
498 // Note that because there is no current thread register on MIPS we
499 // need to store off the mark word we read out of the object to
500 // avoid reloading it and needing to recheck invariants below. This
501 // store is unfortunate but it makes the overall code shorter and
502 // simpler.
503 st_ptr(swap_reg, saved_mark_addr);
504 if (need_tmp_reg) {
505 push(tmp_reg);
506 }
507 if (swap_reg_contains_mark) {
508 null_check_offset = offset();
509 }
510 load_prototype_header(tmp_reg, obj_reg);
511 xorr(tmp_reg, tmp_reg, swap_reg);
512 get_thread(swap_reg);
513 xorr(swap_reg, swap_reg, tmp_reg);
515 move(AT, ~((int) markOopDesc::age_mask_in_place));
516 andr(swap_reg, swap_reg, AT);
518 if (PrintBiasedLockingStatistics) {
519 Label L;
520 bne(swap_reg, R0, L);
521 delayed()->nop();
522 push(tmp_reg);
523 push(A0);
524 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
525 pop(A0);
526 pop(tmp_reg);
527 bind(L);
528 }
529 if (need_tmp_reg) {
530 pop(tmp_reg);
531 }
532 beq(swap_reg, R0, done);
533 delayed()->nop();
534 Label try_revoke_bias;
535 Label try_rebias;
537 // At this point we know that the header has the bias pattern and
538 // that we are not the bias owner in the current epoch. We need to
539 // figure out more details about the state of the header in order to
540 // know what operations can be legally performed on the object's
541 // header.
543 // If the low three bits in the xor result aren't clear, that means
544 // the prototype header is no longer biased and we have to revoke
545 // the bias on this object.
547 move(AT, markOopDesc::biased_lock_mask_in_place);
548 andr(AT, swap_reg, AT);
549 bne(AT, R0, try_revoke_bias);
550 delayed()->nop();
551 // Biasing is still enabled for this data type. See whether the
552 // epoch of the current bias is still valid, meaning that the epoch
553 // bits of the mark word are equal to the epoch bits of the
554 // prototype header. (Note that the prototype header's epoch bits
555 // only change at a safepoint.) If not, attempt to rebias the object
556 // toward the current thread. Note that we must be absolutely sure
557 // that the current epoch is invalid in order to do this because
558 // otherwise the manipulations it performs on the mark word are
559 // illegal.
561 move(AT, markOopDesc::epoch_mask_in_place);
562 andr(AT,swap_reg, AT);
563 bne(AT, R0, try_rebias);
564 delayed()->nop();
565 // The epoch of the current bias is still valid but we know nothing
566 // about the owner; it might be set or it might be clear. Try to
567 // acquire the bias of the object using an atomic operation. If this
568 // fails we will go in to the runtime to revoke the object's bias.
569 // Note that we first construct the presumed unbiased header so we
570 // don't accidentally blow away another thread's valid bias.
572 ld_ptr(swap_reg, saved_mark_addr);
574 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
575 andr(swap_reg, swap_reg, AT);
577 if (need_tmp_reg) {
578 push(tmp_reg);
579 }
580 get_thread(tmp_reg);
581 orr(tmp_reg, tmp_reg, swap_reg);
582 //if (os::is_MP()) {
583 // sync();
584 //}
585 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
586 if (need_tmp_reg) {
587 pop(tmp_reg);
588 }
589 // If the biasing toward our thread failed, this means that
590 // another thread succeeded in biasing it toward itself and we
591 // need to revoke that bias. The revocation will occur in the
592 // interpreter runtime in the slow case.
593 if (PrintBiasedLockingStatistics) {
594 Label L;
595 bne(AT, R0, L);
596 delayed()->nop();
597 push(tmp_reg);
598 push(A0);
599 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
600 pop(A0);
601 pop(tmp_reg);
602 bind(L);
603 }
604 if (slow_case != NULL) {
605 beq_far(AT, R0, *slow_case);
606 delayed()->nop();
607 }
608 b(done);
609 delayed()->nop();
611 bind(try_rebias);
612 // At this point we know the epoch has expired, meaning that the
613 // current "bias owner", if any, is actually invalid. Under these
614 // circumstances _only_, we are allowed to use the current header's
615 // value as the comparison value when doing the cas to acquire the
616 // bias in the current epoch. In other words, we allow transfer of
617 // the bias from one thread to another directly in this situation.
618 //
619 // FIXME: due to a lack of registers we currently blow away the age
620 // bits in this situation. Should attempt to preserve them.
621 if (need_tmp_reg) {
622 push(tmp_reg);
623 }
624 load_prototype_header(tmp_reg, obj_reg);
625 get_thread(swap_reg);
626 orr(tmp_reg, tmp_reg, swap_reg);
627 ld_ptr(swap_reg, saved_mark_addr);
629 //if (os::is_MP()) {
630 // sync();
631 //}
632 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
633 if (need_tmp_reg) {
634 pop(tmp_reg);
635 }
636 // If the biasing toward our thread failed, then another thread
637 // succeeded in biasing it toward itself and we need to revoke that
638 // bias. The revocation will occur in the runtime in the slow case.
639 if (PrintBiasedLockingStatistics) {
640 Label L;
641 bne(AT, R0, L);
642 delayed()->nop();
643 push(AT);
644 push(tmp_reg);
645 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
646 pop(tmp_reg);
647 pop(AT);
648 bind(L);
649 }
650 if (slow_case != NULL) {
651 beq_far(AT, R0, *slow_case);
652 delayed()->nop();
653 }
655 b(done);
656 delayed()->nop();
657 bind(try_revoke_bias);
658 // The prototype mark in the klass doesn't have the bias bit set any
659 // more, indicating that objects of this data type are not supposed
660 // to be biased any more. We are going to try to reset the mark of
661 // this object to the prototype value and fall through to the
662 // CAS-based locking scheme. Note that if our CAS fails, it means
663 // that another thread raced us for the privilege of revoking the
664 // bias of this particular object, so it's okay to continue in the
665 // normal locking code.
666 //
667 // FIXME: due to a lack of registers we currently blow away the age
668 // bits in this situation. Should attempt to preserve them.
669 ld_ptr(swap_reg, saved_mark_addr);
671 if (need_tmp_reg) {
672 push(tmp_reg);
673 }
674 load_prototype_header(tmp_reg, obj_reg);
675 //if (os::is_MP()) {
676 // lock();
677 //}
678 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
679 if (need_tmp_reg) {
680 pop(tmp_reg);
681 }
682 // Fall through to the normal CAS-based lock, because no matter what
683 // the result of the above CAS, some thread must have succeeded in
684 // removing the bias bit from the object's header.
685 if (PrintBiasedLockingStatistics) {
686 Label L;
687 bne(AT, R0, L);
688 delayed()->nop();
689 push(AT);
690 push(tmp_reg);
691 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
692 pop(tmp_reg);
693 pop(AT);
694 bind(L);
695 }
697 bind(cas_label);
698 return null_check_offset;
699 }
701 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
702 assert(UseBiasedLocking, "why call this otherwise?");
704 // Check for biased locking unlock case, which is a no-op
705 // Note: we do not have to check the thread ID for two reasons.
706 // First, the interpreter checks for IllegalMonitorStateException at
707 // a higher level. Second, if the bias was revoked while we held the
708 // lock, the object could not be rebiased toward another thread, so
709 // the bias bit would be clear.
710 #ifdef _LP64
711 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
712 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
713 daddi(AT, R0, markOopDesc::biased_lock_pattern);
714 #else
715 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
716 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
717 addi(AT, R0, markOopDesc::biased_lock_pattern);
718 #endif
720 beq(AT, temp_reg, done);
721 delayed()->nop();
722 }
724 // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME.
725 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
726 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
727 void MacroAssembler::call_VM_leaf_base(address entry_point,
728 int number_of_arguments) {
729 //call(RuntimeAddress(entry_point));
730 //increment(rsp, number_of_arguments * wordSize);
731 Label L, E;
733 assert(number_of_arguments <= 4, "just check");
735 andi(AT, SP, 0xf);
736 beq(AT, R0, L);
737 delayed()->nop();
738 daddi(SP, SP, -8);
739 call(entry_point, relocInfo::runtime_call_type);
740 delayed()->nop();
741 daddi(SP, SP, 8);
742 b(E);
743 delayed()->nop();
745 bind(L);
746 call(entry_point, relocInfo::runtime_call_type);
747 delayed()->nop();
748 bind(E);
749 }
752 void MacroAssembler::jmp(address entry) {
753 patchable_set48(T9, (long)entry);
754 jr(T9);
755 }
757 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
758 switch (rtype) {
759 case relocInfo::runtime_call_type:
760 case relocInfo::none:
761 jmp(entry);
762 break;
763 default:
764 {
765 InstructionMark im(this);
766 relocate(rtype);
767 patchable_set48(T9, (long)entry);
768 jr(T9);
769 }
770 break;
771 }
772 }
774 void MacroAssembler::call(address entry) {
775 // c/c++ code assume T9 is entry point, so we just always move entry to t9
776 // maybe there is some more graceful method to handle this. FIXME
777 // For more info, see class NativeCall.
778 #ifndef _LP64
779 move(T9, (int)entry);
780 #else
781 patchable_set48(T9, (long)entry);
782 #endif
783 jalr(T9);
784 }
786 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
787 switch (rtype) {
788 case relocInfo::runtime_call_type:
789 case relocInfo::none:
790 call(entry);
791 break;
792 default:
793 {
794 InstructionMark im(this);
795 relocate(rtype);
796 call(entry);
797 }
798 break;
799 }
800 }
802 void MacroAssembler::call(address entry, RelocationHolder& rh)
803 {
804 switch (rh.type()) {
805 case relocInfo::runtime_call_type:
806 case relocInfo::none:
807 call(entry);
808 break;
809 default:
810 {
811 InstructionMark im(this);
812 relocate(rh);
813 call(entry);
814 }
815 break;
816 }
817 }
819 void MacroAssembler::ic_call(address entry) {
820 RelocationHolder rh = virtual_call_Relocation::spec(pc());
821 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
822 assert(entry != NULL, "call most probably wrong");
823 InstructionMark im(this);
824 relocate(rh);
825 patchable_call(entry);
826 }
828 void MacroAssembler::c2bool(Register r) {
829 Label L;
830 Assembler::beq(r, R0, L);
831 delayed()->nop();
832 move(r, 1);
833 bind(L);
834 }
836 #ifndef PRODUCT
837 extern "C" void findpc(intptr_t x);
838 #endif
840 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
841 // In order to get locks to work, we need to fake a in_VM state
842 JavaThread* thread = JavaThread::current();
843 JavaThreadState saved_state = thread->thread_state();
844 thread->set_thread_state(_thread_in_vm);
845 if (ShowMessageBoxOnError) {
846 JavaThread* thread = JavaThread::current();
847 JavaThreadState saved_state = thread->thread_state();
848 thread->set_thread_state(_thread_in_vm);
849 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
850 ttyLocker ttyl;
851 BytecodeCounter::print();
852 }
853 // To see where a verify_oop failed, get $ebx+40/X for this frame.
854 // This is the value of eip which points to where verify_oop will return.
855 if (os::message_box(msg, "Execution stopped, print registers?")) {
856 ttyLocker ttyl;
857 tty->print_cr("eip = 0x%08x", eip);
858 #ifndef PRODUCT
859 tty->cr();
860 findpc(eip);
861 tty->cr();
862 #endif
863 tty->print_cr("rax, = 0x%08x", rax);
864 tty->print_cr("rbx, = 0x%08x", rbx);
865 tty->print_cr("rcx = 0x%08x", rcx);
866 tty->print_cr("rdx = 0x%08x", rdx);
867 tty->print_cr("rdi = 0x%08x", rdi);
868 tty->print_cr("rsi = 0x%08x", rsi);
869 tty->print_cr("rbp, = 0x%08x", rbp);
870 tty->print_cr("rsp = 0x%08x", rsp);
871 BREAKPOINT;
872 }
873 } else {
874 ttyLocker ttyl;
875 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
876 assert(false, "DEBUG MESSAGE");
877 }
878 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
879 }
881 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
882 if ( ShowMessageBoxOnError ) {
883 JavaThreadState saved_state = JavaThread::current()->thread_state();
884 JavaThread::current()->set_thread_state(_thread_in_vm);
885 {
886 // In order to get locks work, we need to fake a in_VM state
887 ttyLocker ttyl;
888 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
889 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
890 BytecodeCounter::print();
891 }
893 // if (os::message_box(msg, "Execution stopped, print registers?"))
894 // regs->print(::tty);
895 }
896 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
897 }
898 else
899 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
900 }
903 void MacroAssembler::stop(const char* msg) {
904 li(A0, (long)msg);
905 #ifndef _LP64
906 //reserver space for argument. added by yjl 7/10/2005
907 addiu(SP, SP, - 1 * wordSize);
908 #endif
909 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
910 delayed()->nop();
911 #ifndef _LP64
912 //restore space for argument
913 addiu(SP, SP, 1 * wordSize);
914 #endif
915 brk(17);
916 }
918 void MacroAssembler::warn(const char* msg) {
919 #ifdef _LP64
920 pushad();
921 li(A0, (long)msg);
922 push(S2);
923 move(AT, -(StackAlignmentInBytes));
924 move(S2, SP); // use S2 as a sender SP holder
925 andr(SP, SP, AT); // align stack as required by ABI
926 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
927 delayed()->nop();
928 move(SP, S2); // use S2 as a sender SP holder
929 pop(S2);
930 popad();
931 #else
932 pushad();
933 addi(SP, SP, -4);
934 sw(A0, SP, -1 * wordSize);
935 li(A0, (long)msg);
936 addi(SP, SP, -1 * wordSize);
937 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
938 delayed()->nop();
939 addi(SP, SP, 1 * wordSize);
940 lw(A0, SP, -1 * wordSize);
941 addi(SP, SP, 4);
942 popad();
943 #endif
944 }
946 void MacroAssembler::print_reg(Register reg) {
947 /*
948 char *s = getenv("PRINT_REG");
949 if (s == NULL)
950 return;
951 if (strcmp(s, "1") != 0)
952 return;
953 */
954 void * cur_pc = pc();
955 pushad();
956 NOT_LP64(push(FP);)
958 li(A0, (long)reg->name());
959 if (reg == SP)
960 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
961 else if (reg == A0)
962 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
963 else
964 move(A1, reg);
965 li(A2, (long)cur_pc);
966 push(S2);
967 move(AT, -(StackAlignmentInBytes));
968 move(S2, SP); // use S2 as a sender SP holder
969 andr(SP, SP, AT); // align stack as required by ABI
970 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
971 delayed()->nop();
972 move(SP, S2); // use S2 as a sender SP holder
973 pop(S2);
974 NOT_LP64(pop(FP);)
975 popad();
977 /*
978 pushad();
979 #ifdef _LP64
980 if (reg == SP)
981 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
982 else
983 move(A0, reg);
984 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
985 delayed()->nop();
986 #else
987 push(FP);
988 move(A0, reg);
989 dsrl32(A1, reg, 0);
990 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
991 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
992 delayed()->nop();
993 pop(FP);
994 #endif
995 popad();
996 pushad();
997 NOT_LP64(push(FP);)
998 char b[50];
999 sprintf((char *)b, " pc: %p\n",cur_pc);
1000 li(A0, (long)(char *)b);
1001 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1002 delayed()->nop();
1003 NOT_LP64(pop(FP);)
1004 popad();
1005 */
1006 }
1008 void MacroAssembler::print_reg(FloatRegister reg) {
1009 void * cur_pc = pc();
1010 pushad();
1011 NOT_LP64(push(FP);)
1012 li(A0, (long)reg->name());
1013 push(S2);
1014 move(AT, -(StackAlignmentInBytes));
1015 move(S2, SP); // use S2 as a sender SP holder
1016 andr(SP, SP, AT); // align stack as required by ABI
1017 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1018 delayed()->nop();
1019 move(SP, S2); // use S2 as a sender SP holder
1020 pop(S2);
1021 NOT_LP64(pop(FP);)
1022 popad();
1024 pushad();
1025 NOT_LP64(push(FP);)
1026 #if 1
1027 move(FP, SP);
1028 move(AT, -(StackAlignmentInBytes));
1029 andr(SP , SP , AT);
1030 mov_d(F12, reg);
1031 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1032 delayed()->nop();
1033 move(SP, FP);
1034 #else
1035 mov_s(F12, reg);
1036 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1037 //delayed()->nop();
1038 #endif
1039 NOT_LP64(pop(FP);)
1040 popad();
1042 #if 0
1043 pushad();
1044 NOT_LP64(push(FP);)
1045 char* b = new char[50];
1046 sprintf(b, " pc: %p\n", cur_pc);
1047 li(A0, (long)b);
1048 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1049 delayed()->nop();
1050 NOT_LP64(pop(FP);)
1051 popad();
1052 #endif
1053 }
1055 void MacroAssembler::increment(Register reg, int imm) {
1056 if (!imm) return;
1057 if (is_simm16(imm)) {
1058 #ifdef _LP64
1059 daddiu(reg, reg, imm);
1060 #else
1061 addiu(reg, reg, imm);
1062 #endif
1063 } else {
1064 move(AT, imm);
1065 #ifdef _LP64
1066 daddu(reg, reg, AT);
1067 #else
1068 addu(reg, reg, AT);
1069 #endif
1070 }
1071 }
1073 void MacroAssembler::decrement(Register reg, int imm) {
1074 increment(reg, -imm);
1075 }
1078 void MacroAssembler::call_VM(Register oop_result,
1079 address entry_point,
1080 bool check_exceptions) {
1081 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1082 }
1084 void MacroAssembler::call_VM(Register oop_result,
1085 address entry_point,
1086 Register arg_1,
1087 bool check_exceptions) {
1088 if (arg_1!=A1) move(A1, arg_1);
1089 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1090 }
1092 void MacroAssembler::call_VM(Register oop_result,
1093 address entry_point,
1094 Register arg_1,
1095 Register arg_2,
1096 bool check_exceptions) {
1097 if (arg_1!=A1) move(A1, arg_1);
1098 if (arg_2!=A2) move(A2, arg_2);
1099 assert(arg_2 != A1, "smashed argument");
1100 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1101 }
1103 void MacroAssembler::call_VM(Register oop_result,
1104 address entry_point,
1105 Register arg_1,
1106 Register arg_2,
1107 Register arg_3,
1108 bool check_exceptions) {
1109 if (arg_1!=A1) move(A1, arg_1);
1110 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1111 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1112 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1113 }
1115 void MacroAssembler::call_VM(Register oop_result,
1116 Register last_java_sp,
1117 address entry_point,
1118 int number_of_arguments,
1119 bool check_exceptions) {
1120 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1121 }
1123 void MacroAssembler::call_VM(Register oop_result,
1124 Register last_java_sp,
1125 address entry_point,
1126 Register arg_1,
1127 bool check_exceptions) {
1128 if (arg_1 != A1) move(A1, arg_1);
1129 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1130 }
1132 void MacroAssembler::call_VM(Register oop_result,
1133 Register last_java_sp,
1134 address entry_point,
1135 Register arg_1,
1136 Register arg_2,
1137 bool check_exceptions) {
1138 if (arg_1 != A1) move(A1, arg_1);
1139 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1140 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1141 }
1143 void MacroAssembler::call_VM(Register oop_result,
1144 Register last_java_sp,
1145 address entry_point,
1146 Register arg_1,
1147 Register arg_2,
1148 Register arg_3,
1149 bool check_exceptions) {
1150 if (arg_1 != A1) move(A1, arg_1);
1151 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1152 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1153 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1154 }
1156 void MacroAssembler::call_VM_base(Register oop_result,
1157 Register java_thread,
1158 Register last_java_sp,
1159 address entry_point,
1160 int number_of_arguments,
1161 bool check_exceptions) {
1163 address before_call_pc;
1164 // determine java_thread register
1165 if (!java_thread->is_valid()) {
1166 #ifndef OPT_THREAD
1167 java_thread = T2;
1168 get_thread(java_thread);
1169 #else
1170 java_thread = TREG;
1171 #endif
1172 }
1173 // determine last_java_sp register
1174 if (!last_java_sp->is_valid()) {
1175 last_java_sp = SP;
1176 }
1177 // debugging support
1178 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1179 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1180 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1181 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1183 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1185 // set last Java frame before call
1186 before_call_pc = (address)pc();
1187 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1189 // do the call
1190 move(A0, java_thread);
1191 call(entry_point, relocInfo::runtime_call_type);
1192 delayed()->nop();
1194 // restore the thread (cannot use the pushed argument since arguments
1195 // may be overwritten by C code generated by an optimizing compiler);
1196 // however can use the register value directly if it is callee saved.
1197 #ifndef OPT_THREAD
1198 get_thread(java_thread);
1199 #else
1200 #ifdef ASSERT
1201 {
1202 Label L;
1203 get_thread(AT);
1204 beq(java_thread, AT, L);
1205 delayed()->nop();
1206 stop("MacroAssembler::call_VM_base: edi not callee saved?");
1207 bind(L);
1208 }
1209 #endif
1210 #endif
1212 // discard thread and arguments
1213 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1214 // reset last Java frame
1215 reset_last_Java_frame(java_thread, false, true);
1217 check_and_handle_popframe(java_thread);
1218 check_and_handle_earlyret(java_thread);
1219 if (check_exceptions) {
1220 // check for pending exceptions (java_thread is set upon return)
1221 Label L;
1222 #ifdef _LP64
1223 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1224 #else
1225 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1226 #endif
1227 beq(AT, R0, L);
1228 delayed()->nop();
1229 li(AT, before_call_pc);
1230 push(AT);
1231 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1232 delayed()->nop();
1233 bind(L);
1234 }
1236 // get oop result if there is one and reset the value in the thread
1237 if (oop_result->is_valid()) {
1238 #ifdef _LP64
1239 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1240 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1241 #else
1242 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1243 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1244 #endif
1245 verify_oop(oop_result);
1246 }
1247 }
1249 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1251 move(V0, SP);
1252 //we also reserve space for java_thread here
1253 #ifndef _LP64
1254 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1255 #endif
1256 move(AT, -(StackAlignmentInBytes));
1257 andr(SP, SP, AT);
1258 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1260 }
1262 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1263 call_VM_leaf_base(entry_point, number_of_arguments);
1264 }
1266 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1267 if (arg_0 != A0) move(A0, arg_0);
1268 call_VM_leaf(entry_point, 1);
1269 }
1271 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1272 if (arg_0 != A0) move(A0, arg_0);
1273 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1274 call_VM_leaf(entry_point, 2);
1275 }
1277 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1278 if (arg_0 != A0) move(A0, arg_0);
1279 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1280 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1281 call_VM_leaf(entry_point, 3);
1282 }
1283 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1284 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1285 }
1288 void MacroAssembler::super_call_VM_leaf(address entry_point,
1289 Register arg_1) {
1290 if (arg_1 != A0) move(A0, arg_1);
1291 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1292 }
1295 void MacroAssembler::super_call_VM_leaf(address entry_point,
1296 Register arg_1,
1297 Register arg_2) {
1298 if (arg_1 != A0) move(A0, arg_1);
1299 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1300 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1301 }
1302 void MacroAssembler::super_call_VM_leaf(address entry_point,
1303 Register arg_1,
1304 Register arg_2,
1305 Register arg_3) {
1306 if (arg_1 != A0) move(A0, arg_1);
1307 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1308 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1309 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1310 }
1312 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1313 }
1315 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1316 }
1318 void MacroAssembler::null_check(Register reg, int offset) {
1319 if (needs_explicit_null_check(offset)) {
1320 // provoke OS NULL exception if reg = NULL by
1321 // accessing M[reg] w/o changing any (non-CC) registers
1322 // NOTE: cmpl is plenty here to provoke a segv
1323 lw(AT, reg, 0);
1324 // Note: should probably use testl(rax, Address(reg, 0));
1325 // may be shorter code (however, this version of
1326 // testl needs to be implemented first)
1327 } else {
1328 // nothing to do, (later) access of M[reg + offset]
1329 // will provoke OS NULL exception if reg = NULL
1330 }
1331 }
1333 void MacroAssembler::enter() {
1334 push2(RA, FP);
1335 move(FP, SP);
1336 }
1338 void MacroAssembler::leave() {
1339 #ifndef _LP64
1340 //move(SP, FP);
1341 //pop2(FP, RA);
1342 addi(SP, FP, 2 * wordSize);
1343 lw(RA, SP, - 1 * wordSize);
1344 lw(FP, SP, - 2 * wordSize);
1345 #else
1346 daddi(SP, FP, 2 * wordSize);
1347 ld(RA, SP, - 1 * wordSize);
1348 ld(FP, SP, - 2 * wordSize);
1349 #endif
1350 }
1351 /*
1352 void MacroAssembler::os_breakpoint() {
1353 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1354 // (e.g., MSVC can't call ps() otherwise)
1355 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1356 }
1357 */
1358 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1359 // determine java_thread register
1360 if (!java_thread->is_valid()) {
1361 #ifndef OPT_THREAD
1362 java_thread = T1;
1363 get_thread(java_thread);
1364 #else
1365 java_thread = TREG;
1366 #endif
1367 }
1368 // we must set sp to zero to clear frame
1369 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1370 // must clear fp, so that compiled frames are not confused; it is possible
1371 // that we need it only for debugging
1372 if(clear_fp)
1373 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1375 if (clear_pc)
1376 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1377 }
1379 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1380 bool clear_pc) {
1381 Register thread = TREG;
1382 #ifndef OPT_THREAD
1383 get_thread(thread);
1384 #endif
1385 // we must set sp to zero to clear frame
1386 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1387 // must clear fp, so that compiled frames are not confused; it is
1388 // possible that we need it only for debugging
1389 if (clear_fp) {
1390 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1391 }
1393 if (clear_pc) {
1394 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1395 }
1396 }
1398 // Write serialization page so VM thread can do a pseudo remote membar.
1399 // We use the current thread pointer to calculate a thread specific
1400 // offset to write to within the page. This minimizes bus traffic
1401 // due to cache line collision.
1402 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1403 move(tmp, thread);
1404 srl(tmp, tmp,os::get_serialize_page_shift_count());
1405 move(AT, (os::vm_page_size() - sizeof(int)));
1406 andr(tmp, tmp,AT);
1407 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1408 }
1410 // Calls to C land
1411 //
1412 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1413 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1414 // has to be reset to 0. This is required to allow proper stack traversal.
1415 void MacroAssembler::set_last_Java_frame(Register java_thread,
1416 Register last_java_sp,
1417 Register last_java_fp,
1418 address last_java_pc) {
1419 // determine java_thread register
1420 if (!java_thread->is_valid()) {
1421 #ifndef OPT_THREAD
1422 java_thread = T2;
1423 get_thread(java_thread);
1424 #else
1425 java_thread = TREG;
1426 #endif
1427 }
1428 // determine last_java_sp register
1429 if (!last_java_sp->is_valid()) {
1430 last_java_sp = SP;
1431 }
1433 // last_java_fp is optional
1435 if (last_java_fp->is_valid()) {
1436 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1437 }
1439 // last_java_pc is optional
1441 if (last_java_pc != NULL) {
1442 relocate(relocInfo::internal_pc_type);
1443 patchable_set48(AT, (long)last_java_pc);
1444 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1445 }
1446 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1447 }
1449 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1450 Register last_java_fp,
1451 address last_java_pc) {
1452 // determine last_java_sp register
1453 if (!last_java_sp->is_valid()) {
1454 last_java_sp = SP;
1455 }
1457 Register thread = TREG;
1458 #ifndef OPT_THREAD
1459 get_thread(thread);
1460 #endif
1461 // last_java_fp is optional
1462 if (last_java_fp->is_valid()) {
1463 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1464 }
1466 // last_java_pc is optional
1467 if (last_java_pc != NULL) {
1468 Address java_pc(thread,
1469 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1470 li(AT, (intptr_t)(last_java_pc));
1471 sd(AT, java_pc);
1472 }
1474 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1475 }
1478 //////////////////////////////////////////////////////////////////////////////////
1479 #if INCLUDE_ALL_GCS
1481 void MacroAssembler::g1_write_barrier_pre(Register obj,
1482 Register pre_val,
1483 Register thread,
1484 Register tmp,
1485 bool tosca_live,
1486 bool expand_call) {
1488 // If expand_call is true then we expand the call_VM_leaf macro
1489 // directly to skip generating the check by
1490 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1492 #ifdef _LP64
1493 assert(thread == TREG, "must be");
1494 #endif // _LP64
1496 Label done;
1497 Label runtime;
1499 assert(pre_val != noreg, "check this code");
1501 if (obj != noreg) {
1502 assert_different_registers(obj, pre_val, tmp);
1503 assert(pre_val != V0, "check this code");
1504 }
1506 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1507 PtrQueue::byte_offset_of_active()));
1508 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1509 PtrQueue::byte_offset_of_index()));
1510 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1511 PtrQueue::byte_offset_of_buf()));
1514 // Is marking active?
1515 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1516 lw(AT, in_progress);
1517 } else {
1518 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1519 lb(AT, in_progress);
1520 }
1521 beq(AT, R0, done);
1522 nop();
1524 // Do we need to load the previous value?
1525 if (obj != noreg) {
1526 load_heap_oop(pre_val, Address(obj, 0));
1527 }
1529 // Is the previous value null?
1530 beq(pre_val, R0, done);
1531 nop();
1533 // Can we store original value in the thread's buffer?
1534 // Is index == 0?
1535 // (The index field is typed as size_t.)
1537 ld(tmp, index);
1538 beq(tmp, R0, runtime);
1539 nop();
1541 daddiu(tmp, tmp, -1 * wordSize);
1542 sd(tmp, index);
1543 ld(AT, buffer);
1544 daddu(tmp, tmp, AT);
1546 // Record the previous value
1547 sd(pre_val, tmp, 0);
1548 beq(R0, R0, done);
1549 nop();
1551 bind(runtime);
1552 // save the live input values
1553 if (tosca_live) push(V0);
1555 if (obj != noreg && obj != V0) push(obj);
1557 if (pre_val != V0) push(pre_val);
1559 // Calling the runtime using the regular call_VM_leaf mechanism generates
1560 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1561 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
1562 //
1563 // If we care generating the pre-barrier without a frame (e.g. in the
1564 // intrinsified Reference.get() routine) then ebp might be pointing to
1565 // the caller frame and so this check will most likely fail at runtime.
1566 //
1567 // Expanding the call directly bypasses the generation of the check.
1568 // So when we do not have have a full interpreter frame on the stack
1569 // expand_call should be passed true.
1571 NOT_LP64( push(thread); )
1573 if (expand_call) {
1574 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1575 if (thread != A1) move(A1, thread);
1576 if (pre_val != A0) move(A0, pre_val);
1577 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1578 } else {
1579 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1580 }
1582 NOT_LP64( pop(thread); )
1584 // save the live input values
1585 if (pre_val != V0)
1586 pop(pre_val);
1588 if (obj != noreg && obj != V0)
1589 pop(obj);
1591 if(tosca_live) pop(V0);
1593 bind(done);
1594 }
1596 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1597 Register new_val,
1598 Register thread,
1599 Register tmp,
1600 Register tmp2) {
1601 assert(tmp != AT, "must be");
1602 assert(tmp2 != AT, "must be");
1603 #ifdef _LP64
1604 assert(thread == TREG, "must be");
1605 #endif // _LP64
1607 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1608 PtrQueue::byte_offset_of_index()));
1609 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1610 PtrQueue::byte_offset_of_buf()));
1612 BarrierSet* bs = Universe::heap()->barrier_set();
1613 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1614 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1616 Label done;
1617 Label runtime;
1619 // Does store cross heap regions?
1620 xorr(AT, store_addr, new_val);
1621 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1622 beq(AT, R0, done);
1623 nop();
1626 // crosses regions, storing NULL?
1627 beq(new_val, R0, done);
1628 nop();
1630 // storing region crossing non-NULL, is card already dirty?
1631 const Register card_addr = tmp;
1632 const Register cardtable = tmp2;
1634 move(card_addr, store_addr);
1635 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1636 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1637 // a valid address and therefore is not properly handled by the relocation code.
1638 set64(cardtable, (intptr_t)ct->byte_map_base);
1639 daddu(card_addr, card_addr, cardtable);
1641 lb(AT, card_addr, 0);
1642 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1643 beq(AT, R0, done);
1644 nop();
1646 sync();
1647 lb(AT, card_addr, 0);
1648 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1649 beq(AT, R0, done);
1650 nop();
1653 // storing a region crossing, non-NULL oop, card is clean.
1654 // dirty card and log.
1655 move(AT, (int)CardTableModRefBS::dirty_card_val());
1656 sb(AT, card_addr, 0);
1658 lw(AT, queue_index);
1659 beq(AT, R0, runtime);
1660 nop();
1661 daddiu(AT, AT, -1 * wordSize);
1662 sw(AT, queue_index);
1663 ld(tmp2, buffer);
1664 #ifdef _LP64
1665 ld(AT, queue_index);
1666 daddu(tmp2, tmp2, AT);
1667 sd(card_addr, tmp2, 0);
1668 #else
1669 lw(AT, queue_index);
1670 addu32(tmp2, tmp2, AT);
1671 sw(card_addr, tmp2, 0);
1672 #endif
1673 beq(R0, R0, done);
1674 nop();
1676 bind(runtime);
1677 // save the live input values
1678 push(store_addr);
1679 push(new_val);
1680 #ifdef _LP64
1681 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1682 #else
1683 push(thread);
1684 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1685 pop(thread);
1686 #endif
1687 pop(new_val);
1688 pop(store_addr);
1690 bind(done);
1691 }
1693 #endif // INCLUDE_ALL_GCS
1694 //////////////////////////////////////////////////////////////////////////////////
1697 void MacroAssembler::store_check(Register obj) {
1698 // Does a store check for the oop in register obj. The content of
1699 // register obj is destroyed afterwards.
1700 store_check_part_1(obj);
1701 store_check_part_2(obj);
1702 }
1704 void MacroAssembler::store_check(Register obj, Address dst) {
1705 store_check(obj);
1706 }
1709 // split the store check operation so that other instructions can be scheduled inbetween
1710 void MacroAssembler::store_check_part_1(Register obj) {
1711 BarrierSet* bs = Universe::heap()->barrier_set();
1712 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1713 #ifdef _LP64
1714 dsrl(obj, obj, CardTableModRefBS::card_shift);
1715 #else
1716 shr(obj, CardTableModRefBS::card_shift);
1717 #endif
1718 }
1720 void MacroAssembler::store_check_part_2(Register obj) {
1721 BarrierSet* bs = Universe::heap()->barrier_set();
1722 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1723 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1724 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1726 set64(AT, (long)ct->byte_map_base);
1727 #ifdef _LP64
1728 dadd(AT, AT, obj);
1729 #else
1730 add(AT, AT, obj);
1731 #endif
1732 if (UseConcMarkSweepGC) sync();
1733 sb(R0, AT, 0);
1734 }
1736 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1737 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1738 Register t1, Register t2, Label& slow_case) {
1739 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1741 Register end = t2;
1742 #ifndef OPT_THREAD
1743 Register thread = t1;
1744 get_thread(thread);
1745 #else
1746 Register thread = TREG;
1747 #endif
1748 verify_tlab(t1, t2);//blows t1&t2
1750 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1752 if (var_size_in_bytes == NOREG) {
1753 // i dont think we need move con_size_in_bytes to a register first.
1754 // by yjl 8/17/2005
1755 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1756 addi(end, obj, con_size_in_bytes);
1757 } else {
1758 add(end, obj, var_size_in_bytes);
1759 }
1761 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1762 sltu(AT, AT, end);
1763 bne_far(AT, R0, slow_case);
1764 delayed()->nop();
1767 // update the tlab top pointer
1768 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1770 // recover var_size_in_bytes if necessary
1771 /*if (var_size_in_bytes == end) {
1772 sub(var_size_in_bytes, end, obj);
1773 }*/
1775 verify_tlab(t1, t2);
1776 }
1778 // Defines obj, preserves var_size_in_bytes
1779 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1780 Register t1, Register t2, Label& slow_case) {
1781 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1782 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1783 // No allocation in the shared eden.
1784 b_far(slow_case);
1785 delayed()->nop();
1786 } else {
1788 #ifndef _LP64
1789 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1790 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1791 #else
1792 Address heap_top(t1);
1793 li(t1, (long)Universe::heap()->top_addr());
1794 #endif
1795 ld_ptr(obj, heap_top);
1797 Register end = t2;
1798 Label retry;
1800 bind(retry);
1801 if (var_size_in_bytes == NOREG) {
1802 // i dont think we need move con_size_in_bytes to a register first.
1803 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1804 addi(end, obj, con_size_in_bytes);
1805 } else {
1806 add(end, obj, var_size_in_bytes);
1807 }
1808 // if end < obj then we wrapped around => object too long => slow case
1809 sltu(AT, end, obj);
1810 bne_far(AT, R0, slow_case);
1811 delayed()->nop();
1813 li(AT, (long)Universe::heap()->end_addr());
1814 sltu(AT, AT, end);
1815 bne_far(AT, R0, slow_case);
1816 delayed()->nop();
1817 // Compare obj with the top addr, and if still equal, store the new top addr in
1818 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1819 // it otherwise. Use lock prefix for atomicity on MPs.
1820 //if (os::is_MP()) {
1821 // sync();
1822 //}
1824 // if someone beat us on the allocation, try again, otherwise continue
1825 cmpxchg(end, heap_top, obj);
1826 beq_far(AT, R0, retry); //by yyq
1827 delayed()->nop();
1829 }
1830 }
1832 // C2 doesn't invoke this one.
1833 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1834 Register top = T0;
1835 Register t1 = T1;
1836 /* Jin: tlab_refill() is called in
1838 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
1840 In generate_code_for(), T2 has been assigned as a register(length), which is used
1841 after calling tlab_refill();
1842 Therefore, tlab_refill() should not use T2.
1844 Source:
1846 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
1847 at java.lang.System.arraycopy(Native Method)
1848 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
1849 at sun.misc.Resource.getBytes(Resource.java:117)
1850 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
1851 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
1852 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
1853 */
1854 Register t2 = T9;
1855 Register t3 = T3;
1856 Register thread_reg = T8;
1857 Label do_refill, discard_tlab;
1858 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1859 // No allocation in the shared eden.
1860 b(slow_case);
1861 delayed()->nop();
1862 }
1864 get_thread(thread_reg);
1866 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1867 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1869 // calculate amount of free space
1870 sub(t1, t1, top);
1871 shr(t1, LogHeapWordSize);
1873 // Retain tlab and allocate object in shared space if
1874 // the amount free in the tlab is too large to discard.
1875 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1876 slt(AT, t2, t1);
1877 beq(AT, R0, discard_tlab);
1878 delayed()->nop();
1880 // Retain
1882 #ifndef _LP64
1883 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1884 #else
1885 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1886 #endif
1887 add(t2, t2, AT);
1888 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1890 if (TLABStats) {
1891 // increment number of slow_allocations
1892 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1893 addiu(AT, AT, 1);
1894 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1895 }
1896 b(try_eden);
1897 delayed()->nop();
1899 bind(discard_tlab);
1900 if (TLABStats) {
1901 // increment number of refills
1902 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1903 addi(AT, AT, 1);
1904 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1905 // accumulate wastage -- t1 is amount free in tlab
1906 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1907 add(AT, AT, t1);
1908 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1909 }
1911 // if tlab is currently allocated (top or end != null) then
1912 // fill [top, end + alignment_reserve) with array object
1913 beq(top, R0, do_refill);
1914 delayed()->nop();
1916 // set up the mark word
1917 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1918 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1920 // set the length to the remaining space
1921 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1922 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1923 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1924 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1926 // set klass to intArrayKlass
1927 #ifndef _LP64
1928 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
1929 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
1930 #else
1931 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
1932 ld_ptr(t1, AT, 0);
1933 #endif
1934 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
1935 store_klass(top, t1);
1937 // refill the tlab with an eden allocation
1938 bind(do_refill);
1939 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1940 shl(t1, LogHeapWordSize);
1941 // add object_size ??
1942 eden_allocate(top, t1, 0, t2, t3, slow_case);
1944 // Check that t1 was preserved in eden_allocate.
1945 #ifdef ASSERT
1946 if (UseTLAB) {
1947 Label ok;
1948 assert_different_registers(thread_reg, t1);
1949 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1950 shl(AT, LogHeapWordSize);
1951 beq(AT, t1, ok);
1952 delayed()->nop();
1953 stop("assert(t1 != tlab size)");
1954 should_not_reach_here();
1956 bind(ok);
1957 }
1958 #endif
1959 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
1960 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1961 add(top, top, t1);
1962 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
1963 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1964 verify_tlab(t1, t2);
1965 b(retry);
1966 delayed()->nop();
1967 }
1969 static const double pi_4 = 0.7853981633974483;
1971 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
1972 // must get argument(a double) in F12/F13
1973 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
1974 //We need to preseve the register which maybe modified during the Call @Jerome
1975 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
1976 //save all modified register here
1977 // if (preserve_cpu_regs) {
1978 // }
1979 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
1980 pushad();
1981 //we should preserve the stack space before we call
1982 addi(SP, SP, -wordSize * 2);
1983 switch (trig){
1984 case 's' :
1985 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
1986 delayed()->nop();
1987 break;
1988 case 'c':
1989 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
1990 delayed()->nop();
1991 break;
1992 case 't':
1993 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
1994 delayed()->nop();
1995 break;
1996 default:assert (false, "bad intrinsic");
1997 break;
1999 }
2001 addi(SP, SP, wordSize * 2);
2002 popad();
2003 // if (preserve_cpu_regs) {
2004 // }
2005 }
2007 #ifdef _LP64
2008 void MacroAssembler::li(Register rd, long imm) {
2009 if (imm <= max_jint && imm >= min_jint) {
2010 li32(rd, (int)imm);
2011 } else if (julong(imm) <= 0xFFFFFFFF) {
2012 assert_not_delayed();
2013 // lui sign-extends, so we can't use that.
2014 ori(rd, R0, julong(imm) >> 16);
2015 dsll(rd, rd, 16);
2016 ori(rd, rd, split_low(imm));
2017 //aoqi_test
2018 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
2019 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2020 /* A 48-bit address */
2021 li48(rd, imm);
2022 } else {
2023 li64(rd, imm);
2024 }
2025 }
2026 #else
2027 void MacroAssembler::li(Register rd, long imm) {
2028 li32(rd, (int)imm);
2029 }
2030 #endif
2032 void MacroAssembler::li32(Register reg, int imm) {
2033 if (is_simm16(imm)) {
2034 /* Jin: for imm < 0, we should use addi instead of addiu.
2035 *
2036 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2037 *
2038 * 78 move [int:-1|I] [a0|I]
2039 * : daddi a0, zero, 0xffffffff (correct)
2040 * : daddiu a0, zero, 0xffffffff (incorrect)
2041 */
2042 if (imm >= 0)
2043 addiu(reg, R0, imm);
2044 else
2045 addi(reg, R0, imm);
2046 } else {
2047 lui(reg, split_low(imm >> 16));
2048 if (split_low(imm))
2049 ori(reg, reg, split_low(imm));
2050 }
2051 }
2053 #ifdef _LP64
2054 void MacroAssembler::set64(Register d, jlong value) {
2055 assert_not_delayed();
2057 int hi = (int)(value >> 32);
2058 int lo = (int)(value & ~0);
2060 if (value == lo) { // 32-bit integer
2061 if (is_simm16(value)) {
2062 daddiu(d, R0, value);
2063 } else {
2064 lui(d, split_low(value >> 16));
2065 if (split_low(value)) {
2066 ori(d, d, split_low(value));
2067 }
2068 }
2069 } else if (hi == 0) { // hardware zero-extends to upper 32
2070 ori(d, R0, julong(value) >> 16);
2071 dsll(d, d, 16);
2072 if (split_low(value)) {
2073 ori(d, d, split_low(value));
2074 }
2075 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2076 // 4 insts
2077 li48(d, value);
2078 } else { // li64
2079 // 6 insts
2080 li64(d, value);
2081 }
2082 }
2085 int MacroAssembler::insts_for_set64(jlong value) {
2086 int hi = (int)(value >> 32);
2087 int lo = (int)(value & ~0);
2089 int count = 0;
2091 if (value == lo) { // 32-bit integer
2092 if (is_simm16(value)) {
2093 //daddiu(d, R0, value);
2094 count++;
2095 } else {
2096 //lui(d, split_low(value >> 16));
2097 count++;
2098 if (split_low(value)) {
2099 //ori(d, d, split_low(value));
2100 count++;
2101 }
2102 }
2103 } else if (hi == 0) { // hardware zero-extends to upper 32
2104 //ori(d, R0, julong(value) >> 16);
2105 //dsll(d, d, 16);
2106 count += 2;
2107 if (split_low(value)) {
2108 //ori(d, d, split_low(value));
2109 count++;
2110 }
2111 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2112 // 4 insts
2113 //li48(d, value);
2114 count += 4;
2115 } else { // li64
2116 // 6 insts
2117 //li64(d, value);
2118 count += 6;
2119 }
2121 return count;
2122 }
2124 void MacroAssembler::patchable_set48(Register d, jlong value) {
2125 assert_not_delayed();
2127 int hi = (int)(value >> 32);
2128 int lo = (int)(value & ~0);
2130 int count = 0;
2132 if (value == lo) { // 32-bit integer
2133 if (is_simm16(value)) {
2134 daddiu(d, R0, value);
2135 count += 1;
2136 } else {
2137 lui(d, split_low(value >> 16));
2138 count += 1;
2139 if (split_low(value)) {
2140 ori(d, d, split_low(value));
2141 count += 1;
2142 }
2143 }
2144 } else if (hi == 0) { // hardware zero-extends to upper 32
2145 ori(d, R0, julong(value) >> 16);
2146 dsll(d, d, 16);
2147 count += 2;
2148 if (split_low(value)) {
2149 ori(d, d, split_low(value));
2150 count += 1;
2151 }
2152 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2153 // 4 insts
2154 li48(d, value);
2155 count += 4;
2156 } else { // li64
2157 tty->print_cr("value = 0x%x", value);
2158 guarantee(false, "Not supported yet !");
2159 }
2161 for (count; count < 4; count++) {
2162 nop();
2163 }
2164 }
2166 void MacroAssembler::patchable_set32(Register d, jlong value) {
2167 assert_not_delayed();
2169 int hi = (int)(value >> 32);
2170 int lo = (int)(value & ~0);
2172 int count = 0;
2174 if (value == lo) { // 32-bit integer
2175 if (is_simm16(value)) {
2176 daddiu(d, R0, value);
2177 count += 1;
2178 } else {
2179 lui(d, split_low(value >> 16));
2180 count += 1;
2181 if (split_low(value)) {
2182 ori(d, d, split_low(value));
2183 count += 1;
2184 }
2185 }
2186 } else if (hi == 0) { // hardware zero-extends to upper 32
2187 ori(d, R0, julong(value) >> 16);
2188 dsll(d, d, 16);
2189 count += 2;
2190 if (split_low(value)) {
2191 ori(d, d, split_low(value));
2192 count += 1;
2193 }
2194 } else {
2195 tty->print_cr("value = 0x%x", value);
2196 guarantee(false, "Not supported yet !");
2197 }
2199 for (count; count < 3; count++) {
2200 nop();
2201 }
2202 }
2204 void MacroAssembler::patchable_call32(Register d, jlong value) {
2205 assert_not_delayed();
2207 int hi = (int)(value >> 32);
2208 int lo = (int)(value & ~0);
2210 int count = 0;
2212 if (value == lo) { // 32-bit integer
2213 if (is_simm16(value)) {
2214 daddiu(d, R0, value);
2215 count += 1;
2216 } else {
2217 lui(d, split_low(value >> 16));
2218 count += 1;
2219 if (split_low(value)) {
2220 ori(d, d, split_low(value));
2221 count += 1;
2222 }
2223 }
2224 } else {
2225 tty->print_cr("value = 0x%x", value);
2226 guarantee(false, "Not supported yet !");
2227 }
2229 for (count; count < 2; count++) {
2230 nop();
2231 }
2232 }
2234 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2235 assert(UseCompressedClassPointers, "should only be used for compressed header");
2236 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2238 int klass_index = oop_recorder()->find_index(k);
2239 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2240 long narrowKlass = (long)Klass::encode_klass(k);
2242 relocate(rspec, Assembler::narrow_oop_operand);
2243 patchable_set48(dst, narrowKlass);
2244 }
2247 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2248 assert(UseCompressedOops, "should only be used for compressed header");
2249 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2251 int oop_index = oop_recorder()->find_index(obj);
2252 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2254 relocate(rspec, Assembler::narrow_oop_operand);
2255 patchable_set48(dst, oop_index);
2256 }
2258 void MacroAssembler::li64(Register rd, long imm) {
2259 assert_not_delayed();
2260 lui(rd, imm >> 48);
2261 ori(rd, rd, split_low(imm >> 32));
2262 dsll(rd, rd, 16);
2263 ori(rd, rd, split_low(imm >> 16));
2264 dsll(rd, rd, 16);
2265 ori(rd, rd, split_low(imm));
2266 }
2268 void MacroAssembler::li48(Register rd, long imm) {
2269 assert_not_delayed();
2270 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2271 lui(rd, imm >> 32);
2272 ori(rd, rd, split_low(imm >> 16));
2273 dsll(rd, rd, 16);
2274 ori(rd, rd, split_low(imm));
2275 }
2276 #endif
2277 // NOTE: i dont push eax as i486.
2278 // the x86 save eax for it use eax as the jump register
2279 void MacroAssembler::verify_oop(Register reg, const char* s) {
2280 /*
2281 if (!VerifyOops) return;
2283 // Pass register number to verify_oop_subroutine
2284 char* b = new char[strlen(s) + 50];
2285 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2286 push(rax); // save rax,
2287 push(reg); // pass register argument
2288 ExternalAddress buffer((address) b);
2289 // avoid using pushptr, as it modifies scratch registers
2290 // and our contract is not to modify anything
2291 movptr(rax, buffer.addr());
2292 push(rax);
2293 // call indirectly to solve generation ordering problem
2294 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2295 call(rax);
2296 */
2297 if (!VerifyOops) return;
2298 const char * b = NULL;
2299 stringStream ss;
2300 ss.print("verify_oop: %s: %s", reg->name(), s);
2301 b = code_string(ss.as_string());
2302 #ifdef _LP64
2303 pushad();
2304 move(A1, reg);
2305 li(A0, (long)b);
2306 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2307 ld(T9, AT, 0);
2308 jalr(T9);
2309 delayed()->nop();
2310 popad();
2311 #else
2312 // Pass register number to verify_oop_subroutine
2313 sw(T0, SP, - wordSize);
2314 sw(T1, SP, - 2*wordSize);
2315 sw(RA, SP, - 3*wordSize);
2316 sw(A0, SP ,- 4*wordSize);
2317 sw(A1, SP ,- 5*wordSize);
2318 sw(AT, SP ,- 6*wordSize);
2319 sw(T9, SP ,- 7*wordSize);
2320 addiu(SP, SP, - 7 * wordSize);
2321 move(A1, reg);
2322 li(A0, (long)b);
2323 // call indirectly to solve generation ordering problem
2324 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2325 lw(T9, AT, 0);
2326 jalr(T9);
2327 delayed()->nop();
2328 lw(T0, SP, 6* wordSize);
2329 lw(T1, SP, 5* wordSize);
2330 lw(RA, SP, 4* wordSize);
2331 lw(A0, SP, 3* wordSize);
2332 lw(A1, SP, 2* wordSize);
2333 lw(AT, SP, 1* wordSize);
2334 lw(T9, SP, 0* wordSize);
2335 addiu(SP, SP, 7 * wordSize);
2336 #endif
2337 }
2340 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2341 if (!VerifyOops) {
2342 nop();
2343 return;
2344 }
2345 // Pass register number to verify_oop_subroutine
2346 const char * b = NULL;
2347 stringStream ss;
2348 ss.print("verify_oop_addr: %s", s);
2349 b = code_string(ss.as_string());
2351 st_ptr(T0, SP, - wordSize);
2352 st_ptr(T1, SP, - 2*wordSize);
2353 st_ptr(RA, SP, - 3*wordSize);
2354 st_ptr(A0, SP, - 4*wordSize);
2355 st_ptr(A1, SP, - 5*wordSize);
2356 st_ptr(AT, SP, - 6*wordSize);
2357 st_ptr(T9, SP, - 7*wordSize);
2358 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2359 addiu(SP, SP, - 7 * wordSize);
2361 li(A0, (long)b);
2362 // call indirectly to solve generation ordering problem
2363 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2364 ld_ptr(T9, AT, 0);
2365 jalr(T9);
2366 delayed()->nop();
2367 ld_ptr(T0, SP, 6* wordSize);
2368 ld_ptr(T1, SP, 5* wordSize);
2369 ld_ptr(RA, SP, 4* wordSize);
2370 ld_ptr(A0, SP, 3* wordSize);
2371 ld_ptr(A1, SP, 2* wordSize);
2372 ld_ptr(AT, SP, 1* wordSize);
2373 ld_ptr(T9, SP, 0* wordSize);
2374 addiu(SP, SP, 7 * wordSize);
2375 }
2377 // used registers : T0, T1
2378 void MacroAssembler::verify_oop_subroutine() {
2379 // RA: ra
2380 // A0: char* error message
2381 // A1: oop object to verify
2383 Label exit, error;
2384 // increment counter
2385 li(T0, (long)StubRoutines::verify_oop_count_addr());
2386 lw(AT, T0, 0);
2387 #ifdef _LP64
2388 daddi(AT, AT, 1);
2389 #else
2390 addi(AT, AT, 1);
2391 #endif
2392 sw(AT, T0, 0);
2394 // make sure object is 'reasonable'
2395 beq(A1, R0, exit); // if obj is NULL it is ok
2396 delayed()->nop();
2398 // Check if the oop is in the right area of memory
2399 //const int oop_mask = Universe::verify_oop_mask();
2400 //const int oop_bits = Universe::verify_oop_bits();
2401 const uintptr_t oop_mask = Universe::verify_oop_mask();
2402 const uintptr_t oop_bits = Universe::verify_oop_bits();
2403 li(AT, oop_mask);
2404 andr(T0, A1, AT);
2405 li(AT, oop_bits);
2406 bne(T0, AT, error);
2407 delayed()->nop();
2409 // make sure klass is 'reasonable'
2410 //add for compressedoops
2411 reinit_heapbase();
2412 //add for compressedoops
2413 load_klass(T0, A1);
2414 beq(T0, R0, error); // if klass is NULL it is broken
2415 delayed()->nop();
2416 #if 0
2417 //FIXME:wuhui.
2418 // Check if the klass is in the right area of memory
2419 //const int klass_mask = Universe::verify_klass_mask();
2420 //const int klass_bits = Universe::verify_klass_bits();
2421 const uintptr_t klass_mask = Universe::verify_klass_mask();
2422 const uintptr_t klass_bits = Universe::verify_klass_bits();
2424 li(AT, klass_mask);
2425 andr(T1, T0, AT);
2426 li(AT, klass_bits);
2427 bne(T1, AT, error);
2428 delayed()->nop();
2429 // make sure klass' klass is 'reasonable'
2430 //add for compressedoops
2431 load_klass(T0, T0);
2432 beq(T0, R0, error); // if klass' klass is NULL it is broken
2433 delayed()->nop();
2435 li(AT, klass_mask);
2436 andr(T1, T0, AT);
2437 li(AT, klass_bits);
2438 bne(T1, AT, error);
2439 delayed()->nop(); // if klass not in right area of memory it is broken too.
2440 #endif
2441 // return if everything seems ok
2442 bind(exit);
2444 jr(RA);
2445 delayed()->nop();
2447 // handle errors
2448 bind(error);
2449 pushad();
2450 #ifndef _LP64
2451 addi(SP, SP, (-1) * wordSize);
2452 #endif
2453 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2454 delayed()->nop();
2455 #ifndef _LP64
2456 addiu(SP, SP, 1 * wordSize);
2457 #endif
2458 popad();
2459 jr(RA);
2460 delayed()->nop();
2461 }
2463 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2464 #ifdef ASSERT
2465 assert_different_registers(t1, t2, AT);
2466 if (UseTLAB && VerifyOops) {
2467 Label next, ok;
2469 get_thread(t1);
2471 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2472 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2473 sltu(AT, t2, AT);
2474 beq(AT, R0, next);
2475 delayed()->nop();
2477 stop("assert(top >= start)");
2479 bind(next);
2480 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2481 sltu(AT, AT, t2);
2482 beq(AT, R0, ok);
2483 delayed()->nop();
2485 stop("assert(top <= end)");
2487 bind(ok);
2489 }
2490 #endif
2491 }
2492 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2493 Register tmp,
2494 int offset) {
2495 intptr_t value = *delayed_value_addr;
2496 if (value != 0)
2497 return RegisterOrConstant(value + offset);
2498 AddressLiteral a(delayed_value_addr);
2499 // load indirectly to solve generation ordering problem
2500 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2501 //ld(tmp, a);
2502 if (offset != 0)
2503 daddi(tmp,tmp, offset);
2505 return RegisterOrConstant(tmp);
2506 }
2508 void MacroAssembler::hswap(Register reg) {
2509 //short
2510 //andi(reg, reg, 0xffff);
2511 srl(AT, reg, 8);
2512 sll(reg, reg, 24);
2513 sra(reg, reg, 16);
2514 orr(reg, reg, AT);
2515 }
2517 void MacroAssembler::huswap(Register reg) {
2518 #ifdef _LP64
2519 dsrl(AT, reg, 8);
2520 dsll(reg, reg, 24);
2521 dsrl(reg, reg, 16);
2522 orr(reg, reg, AT);
2523 andi(reg, reg, 0xffff);
2524 #else
2525 //andi(reg, reg, 0xffff);
2526 srl(AT, reg, 8);
2527 sll(reg, reg, 24);
2528 srl(reg, reg, 16);
2529 orr(reg, reg, AT);
2530 #endif
2531 }
2533 // something funny to do this will only one more register AT
2534 // 32 bits
2535 void MacroAssembler::swap(Register reg) {
2536 srl(AT, reg, 8);
2537 sll(reg, reg, 24);
2538 orr(reg, reg, AT);
2539 //reg : 4 1 2 3
2540 srl(AT, AT, 16);
2541 xorr(AT, AT, reg);
2542 andi(AT, AT, 0xff);
2543 //AT : 0 0 0 1^3);
2544 xorr(reg, reg, AT);
2545 //reg : 4 1 2 1
2546 sll(AT, AT, 16);
2547 xorr(reg, reg, AT);
2548 //reg : 4 3 2 1
2549 }
2551 #ifdef _LP64
2553 /* do 32-bit CAS using MIPS64 lld/scd
2555 Jin: cas_int should only compare 32-bits of the memory value.
2556 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2557 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2558 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2559 plus the high-32 bits or memory value, are stored togethor with SCD.
2561 Example:
2563 double d = 3.1415926;
2564 System.err.println("hello" + d);
2566 sun.misc.FloatingDecimal$1.<init>()
2567 |
2568 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2570 38 cas_int [a7a7|J] [a0|I] [a6|I]
2571 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2572 // a6: 0x4ab325aa
2574 again:
2575 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2577 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2578 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2579 0x00000055647f3c68: dsll32 t8, t8, 0
2580 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2581 0x00000055647f3c70: sll zero, zero, 0
2583 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2584 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2585 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2586 0x00000055647f3c80: and v1, a6, v1
2587 0x00000055647f3c84: or at, t8, v1
2588 0x00000055647f3c88: scd at, 0x0(a7)
2589 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2590 0x00000055647f3c90: sll zero, zero, 0
2591 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2592 0x00000055647f3c98: sll zero, zero, 0
2593 nequal:
2594 0x00000055647f45a4: dadd a0, t9, zero
2595 0x00000055647f45a8: dadd at, zero, zero
2596 done:
2597 */
2599 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2600 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
2601 Label done, again, nequal;
2603 bind(again);
2605 if(!Use3A2000) sync();
2606 ll(AT, dest);
2607 bne(AT, c_reg, nequal);
2608 delayed()->nop();
2610 move(AT, x_reg);
2611 sc(AT, dest);
2612 beq(AT, R0, again);
2613 delayed()->nop();
2614 b(done);
2615 delayed()->nop();
2617 // not xchged
2618 bind(nequal);
2619 sync();
2620 move(c_reg, AT);
2621 move(AT, R0);
2623 bind(done);
2624 }
2625 #endif // cmpxchg32
2627 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2628 Label done, again, nequal;
2630 bind(again);
2631 #ifdef _LP64
2632 if(!Use3A2000) sync();
2633 lld(AT, dest);
2634 #else
2635 if(!Use3A2000) sync();
2636 ll(AT, dest);
2637 #endif
2638 bne(AT, c_reg, nequal);
2639 delayed()->nop();
2641 move(AT, x_reg);
2642 #ifdef _LP64
2643 scd(AT, dest);
2644 #else
2645 sc(AT, dest);
2646 #endif
2647 beq(AT, R0, again);
2648 delayed()->nop();
2649 b(done);
2650 delayed()->nop();
2652 // not xchged
2653 bind(nequal);
2654 sync();
2655 move(c_reg, AT);
2656 move(AT, R0);
2658 bind(done);
2659 }
2661 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2662 Label done, again, nequal;
2664 Register x_reg = x_regLo;
2665 dsll32(x_regHi, x_regHi, 0);
2666 dsll32(x_regLo, x_regLo, 0);
2667 dsrl32(x_regLo, x_regLo, 0);
2668 orr(x_reg, x_regLo, x_regHi);
2670 Register c_reg = c_regLo;
2671 dsll32(c_regHi, c_regHi, 0);
2672 dsll32(c_regLo, c_regLo, 0);
2673 dsrl32(c_regLo, c_regLo, 0);
2674 orr(c_reg, c_regLo, c_regHi);
2676 bind(again);
2678 if(!Use3A2000) sync();
2679 lld(AT, dest);
2680 bne(AT, c_reg, nequal);
2681 delayed()->nop();
2683 //move(AT, x_reg);
2684 dadd(AT, x_reg, R0);
2685 scd(AT, dest);
2686 beq(AT, R0, again);
2687 delayed()->nop();
2688 b(done);
2689 delayed()->nop();
2691 // not xchged
2692 bind(nequal);
2693 sync();
2694 //move(c_reg, AT);
2695 //move(AT, R0);
2696 dadd(c_reg, AT, R0);
2697 dadd(AT, R0, R0);
2698 bind(done);
2699 }
2701 // be sure the three register is different
2702 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2703 assert_different_registers(tmp, fs, ft);
2704 div_s(tmp, fs, ft);
2705 trunc_l_s(tmp, tmp);
2706 cvt_s_l(tmp, tmp);
2707 mul_s(tmp, tmp, ft);
2708 sub_s(fd, fs, tmp);
2709 }
2711 // be sure the three register is different
2712 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2713 assert_different_registers(tmp, fs, ft);
2714 div_d(tmp, fs, ft);
2715 trunc_l_d(tmp, tmp);
2716 cvt_d_l(tmp, tmp);
2717 mul_d(tmp, tmp, ft);
2718 sub_d(fd, fs, tmp);
2719 }
2721 // Fast_Lock and Fast_Unlock used by C2
2723 // Because the transitions from emitted code to the runtime
2724 // monitorenter/exit helper stubs are so slow it's critical that
2725 // we inline both the stack-locking fast-path and the inflated fast path.
2726 //
2727 // See also: cmpFastLock and cmpFastUnlock.
2728 //
2729 // What follows is a specialized inline transliteration of the code
2730 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2731 // another option would be to emit TrySlowEnter and TrySlowExit methods
2732 // at startup-time. These methods would accept arguments as
2733 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2734 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2735 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2736 // In practice, however, the # of lock sites is bounded and is usually small.
2737 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2738 // if the processor uses simple bimodal branch predictors keyed by EIP
2739 // Since the helper routines would be called from multiple synchronization
2740 // sites.
2741 //
2742 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2743 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2744 // to those specialized methods. That'd give us a mostly platform-independent
2745 // implementation that the JITs could optimize and inline at their pleasure.
2746 // Done correctly, the only time we'd need to cross to native could would be
2747 // to park() or unpark() threads. We'd also need a few more unsafe operators
2748 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2749 // (b) explicit barriers or fence operations.
2750 //
2751 // TODO:
2752 //
2753 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2754 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2755 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2756 // the lock operators would typically be faster than reifying Self.
2757 //
2758 // * Ideally I'd define the primitives as:
2759 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2760 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2761 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2762 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2763 // Furthermore the register assignments are overconstrained, possibly resulting in
2764 // sub-optimal code near the synchronization site.
2765 //
2766 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2767 // Alternately, use a better sp-proximity test.
2768 //
2769 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2770 // Either one is sufficient to uniquely identify a thread.
2771 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2772 //
2773 // * Intrinsify notify() and notifyAll() for the common cases where the
2774 // object is locked by the calling thread but the waitlist is empty.
2775 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2776 //
2777 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2778 // But beware of excessive branch density on AMD Opterons.
2779 //
2780 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2781 // or failure of the fast-path. If the fast-path fails then we pass
2782 // control to the slow-path, typically in C. In Fast_Lock and
2783 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2784 // will emit a conditional branch immediately after the node.
2785 // So we have branches to branches and lots of ICC.ZF games.
2786 // Instead, it might be better to have C2 pass a "FailureLabel"
2787 // into Fast_Lock and Fast_Unlock. In the case of success, control
2788 // will drop through the node. ICC.ZF is undefined at exit.
2789 // In the case of failure, the node will branch directly to the
2790 // FailureLabel
2793 // obj: object to lock
2794 // box: on-stack box address (displaced header location) - KILLED
2795 // rax,: tmp -- KILLED
2796 // scr: tmp -- KILLED
2797 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2799 // Ensure the register assignents are disjoint
2800 guarantee (objReg != boxReg, "") ;
2801 guarantee (objReg != tmpReg, "") ;
2802 guarantee (objReg != scrReg, "") ;
2803 guarantee (boxReg != tmpReg, "") ;
2804 guarantee (boxReg != scrReg, "") ;
2807 block_comment("FastLock");
2808 /*
2809 move(AT, 0x0);
2810 return;
2811 */
2812 if (PrintBiasedLockingStatistics) {
2813 push(tmpReg);
2814 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2815 pop(tmpReg);
2816 }
2818 if (EmitSync & 1) {
2819 move(AT, 0x0);
2820 return;
2821 } else
2822 if (EmitSync & 2) {
2823 Label DONE_LABEL ;
2824 if (UseBiasedLocking) {
2825 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2826 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2827 }
2829 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2830 ori(tmpReg, tmpReg, 0x1);
2831 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2833 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2834 bne(AT, R0, DONE_LABEL);
2835 delayed()->nop();
2837 // Recursive locking
2838 dsubu(tmpReg, tmpReg, SP);
2839 li(AT, (7 - os::vm_page_size() ));
2840 andr(tmpReg, tmpReg, AT);
2841 sd(tmpReg, Address(boxReg, 0));
2842 bind(DONE_LABEL) ;
2843 } else {
2844 // Possible cases that we'll encounter in fast_lock
2845 // ------------------------------------------------
2846 // * Inflated
2847 // -- unlocked
2848 // -- Locked
2849 // = by self
2850 // = by other
2851 // * biased
2852 // -- by Self
2853 // -- by other
2854 // * neutral
2855 // * stack-locked
2856 // -- by self
2857 // = sp-proximity test hits
2858 // = sp-proximity test generates false-negative
2859 // -- by other
2860 //
2862 Label IsInflated, DONE_LABEL, PopDone ;
2864 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2865 // order to reduce the number of conditional branches in the most common cases.
2866 // Beware -- there's a subtle invariant that fetch of the markword
2867 // at [FETCH], below, will never observe a biased encoding (*101b).
2868 // If this invariant is not held we risk exclusion (safety) failure.
2869 if (UseBiasedLocking && !UseOptoBiasInlining) {
2870 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2871 }
2873 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2874 andi(AT, tmpReg, markOopDesc::monitor_value);
2875 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2876 delayed()->nop();
2878 // Attempt stack-locking ...
2879 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2880 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2881 //if (os::is_MP()) {
2882 // sync();
2883 //}
2885 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2886 //AT == 1: unlocked
2888 if (PrintBiasedLockingStatistics) {
2889 Label L;
2890 beq(AT, R0, L);
2891 delayed()->nop();
2892 push(T0);
2893 push(T1);
2894 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2895 pop(T1);
2896 pop(T0);
2897 bind(L);
2898 }
2899 bne(AT, R0, DONE_LABEL);
2900 delayed()->nop();
2902 // Recursive locking
2903 // The object is stack-locked: markword contains stack pointer to BasicLock.
2904 // Locked by current thread if difference with current SP is less than one page.
2905 dsubu(tmpReg, tmpReg, SP);
2906 li(AT, 7 - os::vm_page_size() );
2907 andr(tmpReg, tmpReg, AT);
2908 sd(tmpReg, Address(boxReg, 0));
2909 if (PrintBiasedLockingStatistics) {
2910 Label L;
2911 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2912 bne(tmpReg, R0, L);
2913 delayed()->nop();
2914 push(T0);
2915 push(T1);
2916 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2917 pop(T1);
2918 pop(T0);
2919 bind(L);
2920 }
2921 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
2923 b(DONE_LABEL) ;
2924 delayed()->nop();
2926 bind(IsInflated) ;
2927 // The object's monitor m is unlocked iff m->owner == NULL,
2928 // otherwise m->owner may contain a thread or a stack address.
2930 // TODO: someday avoid the ST-before-CAS penalty by
2931 // relocating (deferring) the following ST.
2932 // We should also think about trying a CAS without having
2933 // fetched _owner. If the CAS is successful we may
2934 // avoid an RTO->RTS upgrade on the $line.
2935 // Without cast to int32_t a movptr will destroy r10 which is typically obj
2936 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
2937 sd(AT, Address(boxReg, 0));
2939 move(boxReg, tmpReg) ;
2940 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2941 // if (m->owner != 0) => AT = 0, goto slow path.
2942 move(AT, R0);
2943 bne(tmpReg, R0, DONE_LABEL);
2944 delayed()->nop();
2946 #ifndef OPT_THREAD
2947 get_thread (TREG) ;
2948 #endif
2949 // It's inflated and appears unlocked
2950 //if (os::is_MP()) {
2951 // sync();
2952 //}
2953 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
2954 // Intentional fall-through into DONE_LABEL ...
2957 // DONE_LABEL is a hot target - we'd really like to place it at the
2958 // start of cache line by padding with NOPs.
2959 // See the AMD and Intel software optimization manuals for the
2960 // most efficient "long" NOP encodings.
2961 // Unfortunately none of our alignment mechanisms suffice.
2962 bind(DONE_LABEL);
2964 // At DONE_LABEL the AT is set as follows ...
2965 // Fast_Unlock uses the same protocol.
2966 // AT == 1 -> Success
2967 // AT == 0 -> Failure - force control through the slow-path
2969 // Avoid branch-to-branch on AMD processors
2970 // This appears to be superstition.
2971 if (EmitSync & 32) nop() ;
2973 }
2974 }
2976 // obj: object to unlock
2977 // box: box address (displaced header location), killed. Must be EAX.
2978 // rbx,: killed tmp; cannot be obj nor box.
2979 //
2980 // Some commentary on balanced locking:
2981 //
2982 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
2983 // Methods that don't have provably balanced locking are forced to run in the
2984 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
2985 // The interpreter provides two properties:
2986 // I1: At return-time the interpreter automatically and quietly unlocks any
2987 // objects acquired the current activation (frame). Recall that the
2988 // interpreter maintains an on-stack list of locks currently held by
2989 // a frame.
2990 // I2: If a method attempts to unlock an object that is not held by the
2991 // the frame the interpreter throws IMSX.
2992 //
2993 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
2994 // B() doesn't have provably balanced locking so it runs in the interpreter.
2995 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
2996 // is still locked by A().
2997 //
2998 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
2999 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3000 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3001 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3003 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3005 guarantee (objReg != boxReg, "") ;
3006 guarantee (objReg != tmpReg, "") ;
3007 guarantee (boxReg != tmpReg, "") ;
3011 block_comment("FastUnlock");
3014 if (EmitSync & 4) {
3015 // Disable - inhibit all inlining. Force control through the slow-path
3016 move(AT, 0x0);
3017 return;
3018 } else
3019 if (EmitSync & 8) {
3020 Label DONE_LABEL ;
3021 if (UseBiasedLocking) {
3022 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3023 }
3024 // classic stack-locking code ...
3025 ld(tmpReg, Address(boxReg, 0)) ;
3026 beq(tmpReg, R0, DONE_LABEL) ;
3027 move(AT, 0x1); // delay slot
3029 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3030 bind(DONE_LABEL);
3031 } else {
3032 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3034 // Critically, the biased locking test must have precedence over
3035 // and appear before the (box->dhw == 0) recursive stack-lock test.
3036 if (UseBiasedLocking && !UseOptoBiasInlining) {
3037 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3038 }
3040 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3041 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3042 delayed()->daddiu(AT, R0, 0x1);
3044 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3045 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3046 beq(AT, R0, Stacked) ; // Inflated?
3047 delayed()->nop();
3049 bind(Inflated) ;
3050 // It's inflated.
3051 // Despite our balanced locking property we still check that m->_owner == Self
3052 // as java routines or native JNI code called by this thread might
3053 // have released the lock.
3054 // Refer to the comments in synchronizer.cpp for how we might encode extra
3055 // state in _succ so we can avoid fetching EntryList|cxq.
3056 //
3057 // I'd like to add more cases in fast_lock() and fast_unlock() --
3058 // such as recursive enter and exit -- but we have to be wary of
3059 // I$ bloat, T$ effects and BP$ effects.
3060 //
3061 // If there's no contention try a 1-0 exit. That is, exit without
3062 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3063 // we detect and recover from the race that the 1-0 exit admits.
3064 //
3065 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3066 // before it STs null into _owner, releasing the lock. Updates
3067 // to data protected by the critical section must be visible before
3068 // we drop the lock (and thus before any other thread could acquire
3069 // the lock and observe the fields protected by the lock).
3070 // IA32's memory-model is SPO, so STs are ordered with respect to
3071 // each other and there's no need for an explicit barrier (fence).
3072 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3073 #ifndef OPT_THREAD
3074 get_thread (TREG) ;
3075 #endif
3077 // It's inflated
3078 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3079 xorr(boxReg, boxReg, TREG);
3081 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3082 orr(boxReg, boxReg, AT);
3084 move(AT, R0);
3085 bne(boxReg, R0, DONE_LABEL);
3086 delayed()->nop();
3088 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3089 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3090 orr(boxReg, boxReg, AT);
3092 move(AT, R0);
3093 bne(boxReg, R0, DONE_LABEL);
3094 delayed()->nop();
3096 sync();
3097 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3098 move(AT, 0x1);
3099 b(DONE_LABEL);
3100 delayed()->nop();
3102 bind (Stacked);
3103 ld(tmpReg, Address(boxReg, 0)) ;
3104 //if (os::is_MP()) { sync(); }
3105 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3107 if (EmitSync & 65536) {
3108 bind (CheckSucc);
3109 }
3111 bind(DONE_LABEL);
3113 // Avoid branch to branch on AMD processors
3114 if (EmitSync & 32768) { nop() ; }
3115 }
3116 }
3118 void MacroAssembler::align(int modulus) {
3119 while (offset() % modulus != 0) nop();
3120 }
3123 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3124 //Unimplemented();
3125 }
3127 #ifdef _LP64
3128 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3130 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3131 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3132 #else
3133 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3135 Register caller_saved_fpu_registers[] = {};
3136 #endif
3138 //We preserve all caller-saved register
3139 void MacroAssembler::pushad(){
3140 int i;
3142 /* Fixed-point registers */
3143 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3144 daddi(SP, SP, -1 * len * wordSize);
3145 for (i = 0; i < len; i++)
3146 {
3147 #ifdef _LP64
3148 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3149 #else
3150 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3151 #endif
3152 }
3154 /* Floating-point registers */
3155 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3156 daddi(SP, SP, -1 * len * wordSize);
3157 for (i = 0; i < len; i++)
3158 {
3159 #ifdef _LP64
3160 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3161 #else
3162 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3163 #endif
3164 }
3165 };
3167 void MacroAssembler::popad(){
3168 int i;
3170 /* Floating-point registers */
3171 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3172 for (i = 0; i < len; i++)
3173 {
3174 #ifdef _LP64
3175 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3176 #else
3177 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3178 #endif
3179 }
3180 daddi(SP, SP, len * wordSize);
3182 /* Fixed-point registers */
3183 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3184 for (i = 0; i < len; i++)
3185 {
3186 #ifdef _LP64
3187 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3188 #else
3189 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3190 #endif
3191 }
3192 daddi(SP, SP, len * wordSize);
3193 };
3195 void MacroAssembler::push2(Register reg1, Register reg2) {
3196 #ifdef _LP64
3197 daddi(SP, SP, -16);
3198 sd(reg2, SP, 0);
3199 sd(reg1, SP, 8);
3200 #else
3201 addi(SP, SP, -8);
3202 sw(reg2, SP, 0);
3203 sw(reg1, SP, 4);
3204 #endif
3205 }
3207 void MacroAssembler::pop2(Register reg1, Register reg2) {
3208 #ifdef _LP64
3209 ld(reg1, SP, 0);
3210 ld(reg2, SP, 8);
3211 daddi(SP, SP, 16);
3212 #else
3213 lw(reg1, SP, 0);
3214 lw(reg2, SP, 4);
3215 addi(SP, SP, 8);
3216 #endif
3217 }
3219 //for UseCompressedOops Option
3220 void MacroAssembler::load_klass(Register dst, Register src) {
3221 #ifdef _LP64
3222 if(UseCompressedClassPointers){
3223 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3224 decode_klass_not_null(dst);
3225 } else
3226 #endif
3227 ld(dst, src, oopDesc::klass_offset_in_bytes());
3228 }
3230 void MacroAssembler::store_klass(Register dst, Register src) {
3231 #ifdef _LP64
3232 if(UseCompressedClassPointers){
3233 encode_klass_not_null(src);
3234 sw(src, dst, oopDesc::klass_offset_in_bytes());
3235 } else {
3236 #endif
3237 sd(src, dst, oopDesc::klass_offset_in_bytes());
3238 }
3239 }
3241 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3242 load_klass(dst, src);
3243 ld(dst, Address(dst, Klass::prototype_header_offset()));
3244 }
3246 #ifdef _LP64
3247 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3248 if (UseCompressedClassPointers) {
3249 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3250 }
3251 }
3253 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3254 if(UseCompressedOops){
3255 lwu(dst, src);
3256 decode_heap_oop(dst);
3257 } else{
3258 ld(dst, src);
3259 }
3260 }
3262 void MacroAssembler::store_heap_oop(Address dst, Register src){
3263 if(UseCompressedOops){
3264 assert(!dst.uses(src), "not enough registers");
3265 encode_heap_oop(src);
3266 sw(src, dst);
3267 } else{
3268 sd(src, dst);
3269 }
3270 }
3272 void MacroAssembler::store_heap_oop_null(Address dst){
3273 if(UseCompressedOops){
3274 sw(R0, dst);
3275 } else{
3276 sd(R0, dst);
3277 }
3278 }
3280 #ifdef ASSERT
3281 void MacroAssembler::verify_heapbase(const char* msg) {
3282 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3283 assert (Universe::heap() != NULL, "java heap should be initialized");
3284 }
3285 #endif
3288 // Algorithm must match oop.inline.hpp encode_heap_oop.
3289 void MacroAssembler::encode_heap_oop(Register r) {
3290 #ifdef ASSERT
3291 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3292 #endif
3293 verify_oop(r, "broken oop in encode_heap_oop");
3294 if (Universe::narrow_oop_base() == NULL) {
3295 if (Universe::narrow_oop_shift() != 0) {
3296 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3297 shr(r, LogMinObjAlignmentInBytes);
3298 }
3299 return;
3300 }
3302 movz(r, S5_heapbase, r);
3303 dsub(r, r, S5_heapbase);
3304 if (Universe::narrow_oop_shift() != 0) {
3305 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3306 shr(r, LogMinObjAlignmentInBytes);
3307 }
3308 }
3310 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3311 #ifdef ASSERT
3312 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3313 #endif
3314 verify_oop(src, "broken oop in encode_heap_oop");
3315 if (Universe::narrow_oop_base() == NULL) {
3316 if (Universe::narrow_oop_shift() != 0) {
3317 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3318 dsrl(dst, src, LogMinObjAlignmentInBytes);
3319 } else {
3320 if (dst != src) move(dst, src);
3321 }
3322 } else {
3323 if (dst == src) {
3324 movz(dst, S5_heapbase, dst);
3325 dsub(dst, dst, S5_heapbase);
3326 if (Universe::narrow_oop_shift() != 0) {
3327 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3328 shr(dst, LogMinObjAlignmentInBytes);
3329 }
3330 } else {
3331 dsub(dst, src, S5_heapbase);
3332 if (Universe::narrow_oop_shift() != 0) {
3333 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3334 shr(dst, LogMinObjAlignmentInBytes);
3335 }
3336 movz(dst, R0, src);
3337 }
3338 }
3339 }
3341 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3342 assert (UseCompressedOops, "should be compressed");
3343 #ifdef ASSERT
3344 if (CheckCompressedOops) {
3345 Label ok;
3346 bne(r, R0, ok);
3347 delayed()->nop();
3348 stop("null oop passed to encode_heap_oop_not_null");
3349 bind(ok);
3350 }
3351 #endif
3352 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3353 if (Universe::narrow_oop_base() != NULL) {
3354 dsub(r, r, S5_heapbase);
3355 }
3356 if (Universe::narrow_oop_shift() != 0) {
3357 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3358 shr(r, LogMinObjAlignmentInBytes);
3359 }
3361 }
3363 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3364 assert (UseCompressedOops, "should be compressed");
3365 #ifdef ASSERT
3366 if (CheckCompressedOops) {
3367 Label ok;
3368 bne(src, R0, ok);
3369 delayed()->nop();
3370 stop("null oop passed to encode_heap_oop_not_null2");
3371 bind(ok);
3372 }
3373 #endif
3374 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3376 if (Universe::narrow_oop_base() != NULL) {
3377 dsub(dst, src, S5_heapbase);
3378 if (Universe::narrow_oop_shift() != 0) {
3379 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3380 shr(dst, LogMinObjAlignmentInBytes);
3381 }
3382 } else {
3383 if (Universe::narrow_oop_shift() != 0) {
3384 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3385 dsrl(dst, src, LogMinObjAlignmentInBytes);
3386 } else {
3387 if (dst != src) move(dst, src);
3388 }
3389 }
3390 }
3392 void MacroAssembler::decode_heap_oop(Register r) {
3393 #ifdef ASSERT
3394 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3395 #endif
3396 if (Universe::narrow_oop_base() == NULL) {
3397 if (Universe::narrow_oop_shift() != 0) {
3398 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3399 shl(r, LogMinObjAlignmentInBytes);
3400 }
3401 } else {
3402 move(AT, r);
3403 if (Universe::narrow_oop_shift() != 0) {
3404 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3405 shl(r, LogMinObjAlignmentInBytes);
3406 }
3407 dadd(r, r, S5_heapbase);
3408 movz(r, R0, AT);
3409 }
3410 verify_oop(r, "broken oop in decode_heap_oop");
3411 }
3413 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3414 #ifdef ASSERT
3415 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3416 #endif
3417 if (Universe::narrow_oop_base() == NULL) {
3418 if (Universe::narrow_oop_shift() != 0) {
3419 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3420 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3421 dsll(dst, src, LogMinObjAlignmentInBytes);
3422 } else {
3423 if (dst != src) move(dst, src);
3424 }
3425 } else {
3426 if (dst == src) {
3427 move(AT, dst);
3428 if (Universe::narrow_oop_shift() != 0) {
3429 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3430 shl(dst, LogMinObjAlignmentInBytes);
3431 }
3432 dadd(dst, dst, S5_heapbase);
3433 movz(dst, R0, AT);
3434 } else {
3435 if (Universe::narrow_oop_shift() != 0) {
3436 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3437 dsll(dst, src, LogMinObjAlignmentInBytes);
3438 daddu(dst, dst, S5_heapbase);
3439 } else {
3440 daddu(dst, src, S5_heapbase);
3441 }
3442 movz(dst, R0, src);
3443 }
3444 }
3445 verify_oop(dst, "broken oop in decode_heap_oop");
3446 }
3448 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3449 // Note: it will change flags
3450 assert (UseCompressedOops, "should only be used for compressed headers");
3451 assert (Universe::heap() != NULL, "java heap should be initialized");
3452 // Cannot assert, unverified entry point counts instructions (see .ad file)
3453 // vtableStubs also counts instructions in pd_code_size_limit.
3454 // Also do not verify_oop as this is called by verify_oop.
3455 if (Universe::narrow_oop_shift() != 0) {
3456 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3457 shl(r, LogMinObjAlignmentInBytes);
3458 if (Universe::narrow_oop_base() != NULL) {
3459 daddu(r, r, S5_heapbase);
3460 }
3461 } else {
3462 assert (Universe::narrow_oop_base() == NULL, "sanity");
3463 }
3464 }
3466 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3467 assert (UseCompressedOops, "should only be used for compressed headers");
3468 assert (Universe::heap() != NULL, "java heap should be initialized");
3470 // Cannot assert, unverified entry point counts instructions (see .ad file)
3471 // vtableStubs also counts instructions in pd_code_size_limit.
3472 // Also do not verify_oop as this is called by verify_oop.
3473 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3474 if (Universe::narrow_oop_shift() != 0) {
3475 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3476 if (LogMinObjAlignmentInBytes == Address::times_8) {
3477 dsll(dst, src, LogMinObjAlignmentInBytes);
3478 daddu(dst, dst, S5_heapbase);
3479 } else {
3480 dsll(dst, src, LogMinObjAlignmentInBytes);
3481 if (Universe::narrow_oop_base() != NULL) {
3482 daddu(dst, dst, S5_heapbase);
3483 }
3484 }
3485 } else {
3486 assert (Universe::narrow_oop_base() == NULL, "sanity");
3487 if (dst != src) {
3488 move(dst, src);
3489 }
3490 }
3491 }
3493 void MacroAssembler::encode_klass_not_null(Register r) {
3494 if (Universe::narrow_klass_base() != NULL) {
3495 assert(r != AT, "Encoding a klass in AT");
3496 set64(AT, (int64_t)Universe::narrow_klass_base());
3497 dsub(r, r, AT);
3498 }
3499 if (Universe::narrow_klass_shift() != 0) {
3500 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3501 shr(r, LogKlassAlignmentInBytes);
3502 }
3503 // Not neccessary for MIPS at all.
3504 //if (Universe::narrow_klass_base() != NULL) {
3505 // reinit_heapbase();
3506 //}
3507 }
3509 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3510 if (dst == src) {
3511 encode_klass_not_null(src);
3512 } else {
3513 if (Universe::narrow_klass_base() != NULL) {
3514 set64(dst, (int64_t)Universe::narrow_klass_base());
3515 dsub(dst, src, dst);
3516 if (Universe::narrow_klass_shift() != 0) {
3517 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3518 shr(dst, LogKlassAlignmentInBytes);
3519 }
3520 } else {
3521 if (Universe::narrow_klass_shift() != 0) {
3522 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3523 dsrl(dst, src, LogKlassAlignmentInBytes);
3524 } else {
3525 move(dst, src);
3526 }
3527 }
3528 }
3529 }
3531 // Function instr_size_for_decode_klass_not_null() counts the instructions
3532 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3533 // when (Universe::heap() != NULL). Hence, if the instructions they
3534 // generate change, then this method needs to be updated.
3535 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3536 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3537 if (Universe::narrow_klass_base() != NULL) {
3538 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3539 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3540 } else {
3541 // longest load decode klass function, mov64, leaq
3542 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3543 }
3544 }
3546 void MacroAssembler::decode_klass_not_null(Register r) {
3547 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3548 assert(r != AT, "Decoding a klass in AT");
3549 // Cannot assert, unverified entry point counts instructions (see .ad file)
3550 // vtableStubs also counts instructions in pd_code_size_limit.
3551 // Also do not verify_oop as this is called by verify_oop.
3552 if (Universe::narrow_klass_shift() != 0) {
3553 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3554 shl(r, LogKlassAlignmentInBytes);
3555 }
3556 if (Universe::narrow_klass_base() != NULL) {
3557 set64(AT, (int64_t)Universe::narrow_klass_base());
3558 daddu(r, r, AT);
3559 //Not neccessary for MIPS at all.
3560 //reinit_heapbase();
3561 }
3562 }
3564 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3565 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3567 if (dst == src) {
3568 decode_klass_not_null(dst);
3569 } else {
3570 // Cannot assert, unverified entry point counts instructions (see .ad file)
3571 // vtableStubs also counts instructions in pd_code_size_limit.
3572 // Also do not verify_oop as this is called by verify_oop.
3573 set64(dst, (int64_t)Universe::narrow_klass_base());
3574 if (Universe::narrow_klass_shift() != 0) {
3575 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3576 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3577 dsll(AT, src, Address::times_8);
3578 daddu(dst, dst, AT);
3579 } else {
3580 daddu(dst, src, dst);
3581 }
3582 }
3583 }
3585 void MacroAssembler::incrementl(Register reg, int value) {
3586 if (value == min_jint) {
3587 move(AT, value);
3588 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3589 return;
3590 }
3591 if (value < 0) { decrementl(reg, -value); return; }
3592 if (value == 0) { ; return; }
3594 if(Assembler::is_simm16(value)) {
3595 NOT_LP64(addiu(reg, reg, value));
3596 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3597 } else {
3598 move(AT, value);
3599 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3600 }
3601 }
3603 void MacroAssembler::decrementl(Register reg, int value) {
3604 if (value == min_jint) {
3605 move(AT, value);
3606 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3607 return;
3608 }
3609 if (value < 0) { incrementl(reg, -value); return; }
3610 if (value == 0) { ; return; }
3612 if(Assembler::is_simm16(value)) {
3613 NOT_LP64(addiu(reg, reg, -value));
3614 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3615 } else {
3616 move(AT, value);
3617 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3618 }
3619 }
3621 void MacroAssembler::reinit_heapbase() {
3622 if (UseCompressedOops || UseCompressedClassPointers) {
3623 if (Universe::heap() != NULL) {
3624 if (Universe::narrow_oop_base() == NULL) {
3625 move(S5_heapbase, R0);
3626 } else {
3627 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3628 }
3629 } else {
3630 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3631 ld(S5_heapbase, S5_heapbase, 0);
3632 }
3633 }
3634 }
3635 #endif // _LP64
3637 void MacroAssembler::check_klass_subtype(Register sub_klass,
3638 Register super_klass,
3639 Register temp_reg,
3640 Label& L_success) {
3641 //implement ind gen_subtype_check
3642 Label L_failure;
3643 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3644 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3645 bind(L_failure);
3646 }
3648 SkipIfEqual::SkipIfEqual(
3649 MacroAssembler* masm, const bool* flag_addr, bool value) {
3650 _masm = masm;
3651 _masm->li(AT, (address)flag_addr);
3652 _masm->lb(AT,AT,0);
3653 _masm->addi(AT,AT,-value);
3654 _masm->beq(AT,R0,_label);
3655 _masm->delayed()->nop();
3656 }
3657 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3658 Register super_klass,
3659 Register temp_reg,
3660 Label* L_success,
3661 Label* L_failure,
3662 Label* L_slow_path,
3663 RegisterOrConstant super_check_offset) {
3664 assert_different_registers(sub_klass, super_klass, temp_reg);
3665 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3666 if (super_check_offset.is_register()) {
3667 assert_different_registers(sub_klass, super_klass,
3668 super_check_offset.as_register());
3669 } else if (must_load_sco) {
3670 assert(temp_reg != noreg, "supply either a temp or a register offset");
3671 }
3673 Label L_fallthrough;
3674 int label_nulls = 0;
3675 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3676 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3677 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3678 assert(label_nulls <= 1, "at most one NULL in the batch");
3680 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3681 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3682 // If the pointers are equal, we are done (e.g., String[] elements).
3683 // This self-check enables sharing of secondary supertype arrays among
3684 // non-primary types such as array-of-interface. Otherwise, each such
3685 // type would need its own customized SSA.
3686 // We move this check to the front of the fast path because many
3687 // type checks are in fact trivially successful in this manner,
3688 // so we get a nicely predicted branch right at the start of the check.
3689 //cmpptr(sub_klass, super_klass);
3690 //local_jcc(Assembler::equal, *L_success);
3691 beq(sub_klass, super_klass, *L_success);
3692 delayed()->nop();
3693 // Check the supertype display:
3694 if (must_load_sco) {
3695 // Positive movl does right thing on LP64.
3696 lwu(temp_reg, super_klass, sco_offset);
3697 super_check_offset = RegisterOrConstant(temp_reg);
3698 }
3699 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3700 daddu(AT, sub_klass, AT);
3701 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3703 // This check has worked decisively for primary supers.
3704 // Secondary supers are sought in the super_cache ('super_cache_addr').
3705 // (Secondary supers are interfaces and very deeply nested subtypes.)
3706 // This works in the same check above because of a tricky aliasing
3707 // between the super_cache and the primary super display elements.
3708 // (The 'super_check_addr' can address either, as the case requires.)
3709 // Note that the cache is updated below if it does not help us find
3710 // what we need immediately.
3711 // So if it was a primary super, we can just fail immediately.
3712 // Otherwise, it's the slow path for us (no success at this point).
3714 if (super_check_offset.is_register()) {
3715 beq(super_klass, AT, *L_success);
3716 delayed()->nop();
3717 addi(AT, super_check_offset.as_register(), -sc_offset);
3718 if (L_failure == &L_fallthrough) {
3719 beq(AT, R0, *L_slow_path);
3720 delayed()->nop();
3721 } else {
3722 bne(AT, R0, *L_failure);
3723 delayed()->nop();
3724 b(*L_slow_path);
3725 delayed()->nop();
3726 }
3727 } else if (super_check_offset.as_constant() == sc_offset) {
3728 // Need a slow path; fast failure is impossible.
3729 if (L_slow_path == &L_fallthrough) {
3730 beq(super_klass, AT, *L_success);
3731 delayed()->nop();
3732 } else {
3733 bne(super_klass, AT, *L_slow_path);
3734 delayed()->nop();
3735 b(*L_success);
3736 delayed()->nop();
3737 }
3738 } else {
3739 // No slow path; it's a fast decision.
3740 if (L_failure == &L_fallthrough) {
3741 beq(super_klass, AT, *L_success);
3742 delayed()->nop();
3743 } else {
3744 bne(super_klass, AT, *L_failure);
3745 delayed()->nop();
3746 b(*L_success);
3747 delayed()->nop();
3748 }
3749 }
3751 bind(L_fallthrough);
3753 }
3756 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3757 Register super_klass,
3758 Register temp_reg,
3759 Register temp2_reg,
3760 Label* L_success,
3761 Label* L_failure,
3762 bool set_cond_codes) {
3763 assert_different_registers(sub_klass, super_klass, temp_reg);
3764 if (temp2_reg != noreg)
3765 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3766 else
3767 temp2_reg = T9;
3768 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3770 Label L_fallthrough;
3771 int label_nulls = 0;
3772 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3773 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3774 assert(label_nulls <= 1, "at most one NULL in the batch");
3776 // a couple of useful fields in sub_klass:
3777 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3778 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3779 Address secondary_supers_addr(sub_klass, ss_offset);
3780 Address super_cache_addr( sub_klass, sc_offset);
3782 // Do a linear scan of the secondary super-klass chain.
3783 // This code is rarely used, so simplicity is a virtue here.
3784 // The repne_scan instruction uses fixed registers, which we must spill.
3785 // Don't worry too much about pre-existing connections with the input regs.
3787 #if 0
3788 assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super)
3789 assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter)
3790 #endif
3792 // Get super_klass value into rax (even if it was in rdi or rcx).
3793 #ifndef PRODUCT
3794 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3795 ExternalAddress pst_counter_addr((address) pst_counter);
3796 NOT_LP64( incrementl(pst_counter_addr) );
3797 //LP64_ONLY( lea(rcx, pst_counter_addr) );
3798 //LP64_ONLY( incrementl(Address(rcx, 0)) );
3799 #endif //PRODUCT
3801 // We will consult the secondary-super array.
3802 ld(temp_reg, secondary_supers_addr);
3803 // Load the array length. (Positive movl does right thing on LP64.)
3804 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3805 // Skip to start of data.
3806 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3808 // Scan RCX words at [RDI] for an occurrence of RAX.
3809 // Set NZ/Z based on last compare.
3810 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3811 // not change flags (only scas instruction which is repeated sets flags).
3812 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3814 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
3815 Label Loop, subtype;
3816 bind(Loop);
3817 beq(temp2_reg, R0, *L_failure);
3818 delayed()->nop();
3819 ld(AT, temp_reg, 0);
3820 beq(AT, super_klass, subtype);
3821 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3822 b(Loop);
3823 delayed()->daddi(temp2_reg, temp2_reg, -1);
3825 bind(subtype);
3826 sd(super_klass, super_cache_addr);
3827 if (L_success != &L_fallthrough) {
3828 b(*L_success);
3829 delayed()->nop();
3830 }
3832 // Success. Cache the super we found and proceed in triumph.
3833 #undef IS_A_TEMP
3835 bind(L_fallthrough);
3836 }
3837 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3838 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3839 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3840 verify_oop(oop_result, "broken oop in call_VM_base");
3841 }
3843 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3844 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3845 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3846 }
3848 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3849 int extra_slot_offset) {
3850 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3851 int stackElementSize = Interpreter::stackElementSize;
3852 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3853 #ifdef ASSERT
3854 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3855 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3856 #endif
3857 Register scale_reg = NOREG;
3858 Address::ScaleFactor scale_factor = Address::no_scale;
3859 if (arg_slot.is_constant()) {
3860 offset += arg_slot.as_constant() * stackElementSize;
3861 } else {
3862 scale_reg = arg_slot.as_register();
3863 scale_factor = Address::times_8;
3864 }
3865 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
3866 // offset += wordSize; // return PC is on stack
3867 if(scale_reg==NOREG) return Address(SP, offset);
3868 else {
3869 dsll(scale_reg, scale_reg, scale_factor);
3870 daddu(scale_reg, SP, scale_reg);
3871 return Address(scale_reg, offset);
3872 }
3873 }
3875 SkipIfEqual::~SkipIfEqual() {
3876 _masm->bind(_label);
3877 }
3879 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3880 switch (size_in_bytes) {
3881 #ifndef _LP64
3882 case 8:
3883 assert(dst2 != noreg, "second dest register required");
3884 lw(dst, src);
3885 lw(dst2, src.plus_disp(BytesPerInt));
3886 break;
3887 #else
3888 case 8: ld(dst, src); break;
3889 #endif
3890 case 4: lw(dst, src); break;
3891 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3892 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3893 default: ShouldNotReachHere();
3894 }
3895 }
3897 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
3898 switch (size_in_bytes) {
3899 #ifndef _LP64
3900 case 8:
3901 assert(src2 != noreg, "second source register required");
3902 sw(src, dst);
3903 sw(src2, dst.plus_disp(BytesPerInt));
3904 break;
3905 #else
3906 case 8: sd(src, dst); break;
3907 #endif
3908 case 4: sw(src, dst); break;
3909 case 2: sh(src, dst); break;
3910 case 1: sb(src, dst); break;
3911 default: ShouldNotReachHere();
3912 }
3913 }
3915 // Look up the method for a megamorphic invokeinterface call.
3916 // The target method is determined by <intf_klass, itable_index>.
3917 // The receiver klass is in recv_klass.
3918 // On success, the result will be in method_result, and execution falls through.
3919 // On failure, execution transfers to the given label.
3920 void MacroAssembler::lookup_interface_method(Register recv_klass,
3921 Register intf_klass,
3922 RegisterOrConstant itable_index,
3923 Register method_result,
3924 Register scan_temp,
3925 Label& L_no_such_interface) {
3926 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
3927 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3928 "caller must use same register for non-constant itable index as for method");
3930 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3931 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
3932 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3933 int scan_step = itableOffsetEntry::size() * wordSize;
3934 int vte_size = vtableEntry::size() * wordSize;
3935 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3936 assert(vte_size == wordSize, "else adjust times_vte_scale");
3938 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
3940 // %%% Could store the aligned, prescaled offset in the klassoop.
3941 dsll(scan_temp, scan_temp, times_vte_scale);
3942 daddu(scan_temp, recv_klass, scan_temp);
3943 daddiu(scan_temp, scan_temp, vtable_base);
3944 if (HeapWordsPerLong > 1) {
3945 // Round up to align_object_offset boundary
3946 // see code for InstanceKlass::start_of_itable!
3947 round_to(scan_temp, BytesPerLong);
3948 }
3950 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
3951 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
3952 // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
3953 if (itable_index.is_constant()) {
3954 set64(AT, (int)itable_index.is_constant());
3955 dsll(AT, AT, (int)Address::times_ptr);
3956 } else {
3957 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
3958 }
3959 daddu(AT, AT, recv_klass);
3960 daddiu(recv_klass, AT, itentry_off);
3962 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
3963 // if (scan->interface() == intf) {
3964 // result = (klass + scan->offset() + itable_index);
3965 // }
3966 // }
3967 Label search, found_method;
3969 for (int peel = 1; peel >= 0; peel--) {
3970 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
3972 if (peel) {
3973 beq(intf_klass, method_result, found_method);
3974 nop();
3975 } else {
3976 bne(intf_klass, method_result, search);
3977 nop();
3978 // (invert the test to fall through to found_method...)
3979 }
3981 if (!peel) break;
3983 bind(search);
3985 // Check that the previous entry is non-null. A null entry means that
3986 // the receiver class doesn't implement the interface, and wasn't the
3987 // same as when the caller was compiled.
3988 beq(method_result, R0, L_no_such_interface);
3989 nop();
3990 daddiu(scan_temp, scan_temp, scan_step);
3991 }
3993 bind(found_method);
3995 // Got a hit.
3996 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
3997 //ld(method_result, Address(recv_klass, scan_temp, Address::times_1));
3998 if(UseLoongsonISA) {
3999 gsldx(method_result, recv_klass, scan_temp, 0);
4000 } else {
4001 daddu(AT, recv_klass, scan_temp);
4002 ld(method_result, AT);
4003 }
4004 }
4007 // virtual method calling
4008 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4009 RegisterOrConstant vtable_index,
4010 Register method_result) {
4011 Register tmp = GP;
4012 push(tmp);
4014 if (vtable_index.is_constant()) {
4015 assert_different_registers(recv_klass, method_result, tmp);
4016 } else {
4017 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4018 }
4019 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4020 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4021 /*
4022 Address vtable_entry_addr(recv_klass,
4023 vtable_index, Address::times_ptr,
4024 base + vtableEntry::method_offset_in_bytes());
4025 */
4026 if (vtable_index.is_constant()) {
4027 set64(AT, vtable_index.as_constant());
4028 dsll(AT, AT, (int)Address::times_ptr);
4029 } else {
4030 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4031 }
4032 set64(tmp, base + vtableEntry::method_offset_in_bytes());
4033 daddu(tmp, tmp, AT);
4034 daddu(tmp, tmp, recv_klass);
4035 ld(method_result, tmp, 0);
4037 pop(tmp);
4038 }