Tue, 04 Sep 2018 21:25:12 +0800
#7517 mRegP match a0_RegP
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, 2018, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 // Implementation of MacroAssembler
52 intptr_t MacroAssembler::i[32] = {0};
53 float MacroAssembler::f[32] = {0.0};
55 void MacroAssembler::print(outputStream *s) {
56 unsigned int k;
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
58 s->print_cr("i%d = 0x%.16lx", k, i[k]);
59 }
60 s->cr();
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
63 s->print_cr("f%d = %f", k, f[k]);
64 }
65 s->cr();
66 }
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
71 void MacroAssembler::save_registers(MacroAssembler *masm) {
72 #define __ masm->
73 for(int k=0; k<32; k++) {
74 __ sw (as_Register(k), A0, i_offset(k));
75 }
77 for(int k=0; k<32; k++) {
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
79 }
80 #undef __
81 }
83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
84 #define __ masm->
85 for(int k=0; k<32; k++) {
86 __ lw (as_Register(k), A0, i_offset(k));
87 }
89 for(int k=0; k<32; k++) {
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
91 }
92 #undef __
93 }
96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
97 jint& stub_inst = *(jint*) branch;
98 jint *pc = (jint *)branch;
100 if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
101 //b_far:
102 // move(AT, RA); // dadd
103 // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
104 // nop();
105 // lui(T9, 0); // to be patched
106 // ori(T9, 0);
107 // daddu(T9, T9, RA);
108 // move(RA, AT);
109 // jr(T9);
111 assert(opcode(pc[3]) == lui_op
112 && opcode(pc[4]) == ori_op
113 && special(pc[5]) == daddu_op, "Not a branch label patch");
114 if(!(opcode(pc[3]) == lui_op
115 && opcode(pc[4]) == ori_op
116 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
118 int offset = target - branch;
119 if (!is_simm16(offset)) {
120 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
121 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
122 } else {
123 /* revert to "beq + nop" */
124 CodeBuffer cb(branch, 4 * 10);
125 MacroAssembler masm(&cb);
126 #define __ masm.
127 __ b(target);
128 __ delayed()->nop();
129 __ nop();
130 __ nop();
131 __ nop();
132 __ nop();
133 __ nop();
134 __ nop();
135 }
136 return;
137 } else if (special(pc[4]) == jr_op
138 && opcode(pc[4]) == special_op
139 && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
140 //jmp_far:
141 // patchable_set48(T9, target);
142 // jr(T9);
143 // nop();
145 CodeBuffer cb(branch, 4 * 4);
146 MacroAssembler masm(&cb);
147 masm.patchable_set48(T9, (long)(target));
148 return;
149 }
151 #ifndef PRODUCT
152 if (!is_simm16((target - branch - 4) >> 2)) {
153 tty->print_cr("Illegal patching: branch = 0x%lx, target = 0x%lx", branch, target);
154 tty->print_cr("======= Start decoding at branch = 0x%lx =======", branch);
155 Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
156 tty->print_cr("======= End of decoding =======");
157 }
158 #endif
160 stub_inst = patched_branch(target - branch, stub_inst, 0);
161 }
163 static inline address first_cache_address() {
164 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
165 }
167 static inline address last_cache_address() {
168 return CodeCache::high_bound() - Assembler::InstructionSize;
169 }
171 int MacroAssembler::call_size(address target, bool far, bool patchable) {
172 if (patchable) return 6 << Assembler::LogInstructionSize;
173 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
174 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
175 }
177 // Can we reach target using jal/j from anywhere
178 // in the code cache (because code can be relocated)?
179 bool MacroAssembler::reachable_from_cache(address target) {
180 address cl = first_cache_address();
181 address ch = last_cache_address();
183 return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
184 }
186 void MacroAssembler::general_jump(address target) {
187 if (reachable_from_cache(target)) {
188 j(target);
189 delayed()->nop();
190 } else {
191 set64(T9, (long)target);
192 jr(T9);
193 delayed()->nop();
194 }
195 }
197 int MacroAssembler::insts_for_general_jump(address target) {
198 if (reachable_from_cache(target)) {
199 //j(target);
200 //nop();
201 return 2;
202 } else {
203 //set64(T9, (long)target);
204 //jr(T9);
205 //nop();
206 return insts_for_set64((jlong)target) + 2;
207 }
208 }
210 void MacroAssembler::patchable_jump(address target) {
211 if (reachable_from_cache(target)) {
212 nop();
213 nop();
214 nop();
215 nop();
216 j(target);
217 delayed()->nop();
218 } else {
219 patchable_set48(T9, (long)target);
220 jr(T9);
221 delayed()->nop();
222 }
223 }
225 int MacroAssembler::insts_for_patchable_jump(address target) {
226 return 6;
227 }
229 void MacroAssembler::general_call(address target) {
230 if (reachable_from_cache(target)) {
231 jal(target);
232 delayed()->nop();
233 } else {
234 set64(T9, (long)target);
235 jalr(T9);
236 delayed()->nop();
237 }
238 }
240 int MacroAssembler::insts_for_general_call(address target) {
241 if (reachable_from_cache(target)) {
242 //jal(target);
243 //nop();
244 return 2;
245 } else {
246 //set64(T9, (long)target);
247 //jalr(T9);
248 //nop();
249 return insts_for_set64((jlong)target) + 2;
250 }
251 }
253 void MacroAssembler::patchable_call(address target) {
254 if (reachable_from_cache(target)) {
255 nop();
256 nop();
257 nop();
258 nop();
259 jal(target);
260 delayed()->nop();
261 } else {
262 patchable_set48(T9, (long)target);
263 jalr(T9);
264 delayed()->nop();
265 }
266 }
268 int MacroAssembler::insts_for_patchable_call(address target) {
269 return 6;
270 }
272 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
273 u_char * cur_pc = pc();
275 // Near/Far jump
276 if(is_simm16((entry - pc() - 4) / 4)) {
277 Assembler::beq(rs, rt, offset(entry));
278 } else {
279 Label not_jump;
280 bne(rs, rt, not_jump);
281 delayed()->nop();
283 b_far(entry);
284 delayed()->nop();
286 bind(not_jump);
287 has_delay_slot();
288 }
289 }
291 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
292 if (L.is_bound()) {
293 beq_far(rs, rt, target(L));
294 } else {
295 u_char * cur_pc = pc();
296 Label not_jump;
297 bne(rs, rt, not_jump);
298 delayed()->nop();
300 b_far(L);
301 delayed()->nop();
303 bind(not_jump);
304 has_delay_slot();
305 }
306 }
308 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
309 u_char * cur_pc = pc();
311 //Near/Far jump
312 if(is_simm16((entry - pc() - 4) / 4)) {
313 Assembler::bne(rs, rt, offset(entry));
314 } else {
315 Label not_jump;
316 beq(rs, rt, not_jump);
317 delayed()->nop();
319 b_far(entry);
320 delayed()->nop();
322 bind(not_jump);
323 has_delay_slot();
324 }
325 }
327 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
328 if (L.is_bound()) {
329 bne_far(rs, rt, target(L));
330 } else {
331 u_char * cur_pc = pc();
332 Label not_jump;
333 beq(rs, rt, not_jump);
334 delayed()->nop();
336 b_far(L);
337 delayed()->nop();
339 bind(not_jump);
340 has_delay_slot();
341 }
342 }
344 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
345 Label not_taken;
347 bne(rs, rt, not_taken);
348 delayed()->nop();
350 jmp_far(L);
352 bind(not_taken);
353 }
355 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
356 Label not_taken;
358 beq(rs, rt, not_taken);
359 delayed()->nop();
361 jmp_far(L);
363 bind(not_taken);
364 }
366 void MacroAssembler::bc1t_long(Label& L) {
367 Label not_taken;
369 bc1f(not_taken);
370 delayed()->nop();
372 jmp_far(L);
374 bind(not_taken);
375 }
377 void MacroAssembler::bc1f_long(Label& L) {
378 Label not_taken;
380 bc1t(not_taken);
381 delayed()->nop();
383 jmp_far(L);
385 bind(not_taken);
386 }
388 void MacroAssembler::b_far(Label& L) {
389 if (L.is_bound()) {
390 b_far(target(L));
391 } else {
392 volatile address dest = target(L);
393 /*
394 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
395 0x00000055651ed514: dadd at, ra, zero
396 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
398 0x00000055651ed51c: sll zero, zero, 0
399 0x00000055651ed520: lui t9, 0x0
400 0x00000055651ed524: ori t9, t9, 0x21b8
401 0x00000055651ed528: daddu t9, t9, ra
402 0x00000055651ed52c: dadd ra, at, zero
403 0x00000055651ed530: jr t9
404 0x00000055651ed534: sll zero, zero, 0
405 */
406 move(AT, RA);
407 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
408 nop();
409 lui(T9, 0); // to be patched
410 ori(T9, T9, 0);
411 daddu(T9, T9, RA);
412 move(RA, AT);
413 jr(T9);
414 }
415 }
417 void MacroAssembler::b_far(address entry) {
418 u_char * cur_pc = pc();
420 // Near/Far jump
421 if(is_simm16((entry - pc() - 4) / 4)) {
422 b(offset(entry));
423 } else {
424 // address must be bounded
425 move(AT, RA);
426 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
427 nop();
428 li32(T9, entry - pc());
429 daddu(T9, T9, RA);
430 move(RA, AT);
431 jr(T9);
432 }
433 }
435 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
436 addu_long(AT, base, offset);
437 ld_ptr(rt, 0, AT);
438 }
440 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
441 addu_long(AT, base, offset);
442 st_ptr(rt, 0, AT);
443 }
445 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
446 addu_long(AT, base, offset);
447 ld_long(rt, 0, AT);
448 }
450 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
451 addu_long(AT, base, offset);
452 st_long(rt, 0, AT);
453 }
455 Address MacroAssembler::as_Address(AddressLiteral adr) {
456 return Address(adr.target(), adr.rspec());
457 }
459 Address MacroAssembler::as_Address(ArrayAddress adr) {
460 return Address::make_array(adr);
461 }
463 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
464 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
465 Label again;
467 li(tmp_reg1, counter_addr);
468 bind(again);
469 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
470 ll(tmp_reg2, tmp_reg1, 0);
471 addi(tmp_reg2, tmp_reg2, inc);
472 sc(tmp_reg2, tmp_reg1, 0);
473 beq(tmp_reg2, R0, again);
474 delayed()->nop();
475 }
477 int MacroAssembler::biased_locking_enter(Register lock_reg,
478 Register obj_reg,
479 Register swap_reg,
480 Register tmp_reg,
481 bool swap_reg_contains_mark,
482 Label& done,
483 Label* slow_case,
484 BiasedLockingCounters* counters) {
485 assert(UseBiasedLocking, "why call this otherwise?");
486 bool need_tmp_reg = false;
487 if (tmp_reg == noreg) {
488 need_tmp_reg = true;
489 tmp_reg = T9;
490 }
491 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
492 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
493 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
494 Address saved_mark_addr(lock_reg, 0);
496 // Biased locking
497 // See whether the lock is currently biased toward our thread and
498 // whether the epoch is still valid
499 // Note that the runtime guarantees sufficient alignment of JavaThread
500 // pointers to allow age to be placed into low bits
501 // First check to see whether biasing is even enabled for this object
502 Label cas_label;
503 int null_check_offset = -1;
504 if (!swap_reg_contains_mark) {
505 null_check_offset = offset();
506 ld_ptr(swap_reg, mark_addr);
507 }
509 if (need_tmp_reg) {
510 push(tmp_reg);
511 }
512 move(tmp_reg, swap_reg);
513 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
514 #ifdef _LP64
515 daddi(AT, R0, markOopDesc::biased_lock_pattern);
516 dsub(AT, AT, tmp_reg);
517 #else
518 addi(AT, R0, markOopDesc::biased_lock_pattern);
519 sub(AT, AT, tmp_reg);
520 #endif
521 if (need_tmp_reg) {
522 pop(tmp_reg);
523 }
525 bne(AT, R0, cas_label);
526 delayed()->nop();
529 // The bias pattern is present in the object's header. Need to check
530 // whether the bias owner and the epoch are both still current.
531 // Note that because there is no current thread register on MIPS we
532 // need to store off the mark word we read out of the object to
533 // avoid reloading it and needing to recheck invariants below. This
534 // store is unfortunate but it makes the overall code shorter and
535 // simpler.
536 st_ptr(swap_reg, saved_mark_addr);
537 if (need_tmp_reg) {
538 push(tmp_reg);
539 }
540 if (swap_reg_contains_mark) {
541 null_check_offset = offset();
542 }
543 load_prototype_header(tmp_reg, obj_reg);
544 xorr(tmp_reg, tmp_reg, swap_reg);
545 get_thread(swap_reg);
546 xorr(swap_reg, swap_reg, tmp_reg);
548 move(AT, ~((int) markOopDesc::age_mask_in_place));
549 andr(swap_reg, swap_reg, AT);
551 if (PrintBiasedLockingStatistics) {
552 Label L;
553 bne(swap_reg, R0, L);
554 delayed()->nop();
555 push(tmp_reg);
556 push(A0);
557 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
558 pop(A0);
559 pop(tmp_reg);
560 bind(L);
561 }
562 if (need_tmp_reg) {
563 pop(tmp_reg);
564 }
565 beq(swap_reg, R0, done);
566 delayed()->nop();
567 Label try_revoke_bias;
568 Label try_rebias;
570 // At this point we know that the header has the bias pattern and
571 // that we are not the bias owner in the current epoch. We need to
572 // figure out more details about the state of the header in order to
573 // know what operations can be legally performed on the object's
574 // header.
576 // If the low three bits in the xor result aren't clear, that means
577 // the prototype header is no longer biased and we have to revoke
578 // the bias on this object.
580 move(AT, markOopDesc::biased_lock_mask_in_place);
581 andr(AT, swap_reg, AT);
582 bne(AT, R0, try_revoke_bias);
583 delayed()->nop();
584 // Biasing is still enabled for this data type. See whether the
585 // epoch of the current bias is still valid, meaning that the epoch
586 // bits of the mark word are equal to the epoch bits of the
587 // prototype header. (Note that the prototype header's epoch bits
588 // only change at a safepoint.) If not, attempt to rebias the object
589 // toward the current thread. Note that we must be absolutely sure
590 // that the current epoch is invalid in order to do this because
591 // otherwise the manipulations it performs on the mark word are
592 // illegal.
594 move(AT, markOopDesc::epoch_mask_in_place);
595 andr(AT,swap_reg, AT);
596 bne(AT, R0, try_rebias);
597 delayed()->nop();
598 // The epoch of the current bias is still valid but we know nothing
599 // about the owner; it might be set or it might be clear. Try to
600 // acquire the bias of the object using an atomic operation. If this
601 // fails we will go in to the runtime to revoke the object's bias.
602 // Note that we first construct the presumed unbiased header so we
603 // don't accidentally blow away another thread's valid bias.
605 ld_ptr(swap_reg, saved_mark_addr);
607 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
608 andr(swap_reg, swap_reg, AT);
610 if (need_tmp_reg) {
611 push(tmp_reg);
612 }
613 get_thread(tmp_reg);
614 orr(tmp_reg, tmp_reg, swap_reg);
615 //if (os::is_MP()) {
616 // sync();
617 //}
618 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
619 if (need_tmp_reg) {
620 pop(tmp_reg);
621 }
622 // If the biasing toward our thread failed, this means that
623 // another thread succeeded in biasing it toward itself and we
624 // need to revoke that bias. The revocation will occur in the
625 // interpreter runtime in the slow case.
626 if (PrintBiasedLockingStatistics) {
627 Label L;
628 bne(AT, R0, L);
629 delayed()->nop();
630 push(tmp_reg);
631 push(A0);
632 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
633 pop(A0);
634 pop(tmp_reg);
635 bind(L);
636 }
637 if (slow_case != NULL) {
638 beq_far(AT, R0, *slow_case);
639 delayed()->nop();
640 }
641 b(done);
642 delayed()->nop();
644 bind(try_rebias);
645 // At this point we know the epoch has expired, meaning that the
646 // current "bias owner", if any, is actually invalid. Under these
647 // circumstances _only_, we are allowed to use the current header's
648 // value as the comparison value when doing the cas to acquire the
649 // bias in the current epoch. In other words, we allow transfer of
650 // the bias from one thread to another directly in this situation.
651 //
652 // FIXME: due to a lack of registers we currently blow away the age
653 // bits in this situation. Should attempt to preserve them.
654 if (need_tmp_reg) {
655 push(tmp_reg);
656 }
657 load_prototype_header(tmp_reg, obj_reg);
658 get_thread(swap_reg);
659 orr(tmp_reg, tmp_reg, swap_reg);
660 ld_ptr(swap_reg, saved_mark_addr);
662 //if (os::is_MP()) {
663 // sync();
664 //}
665 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
666 if (need_tmp_reg) {
667 pop(tmp_reg);
668 }
669 // If the biasing toward our thread failed, then another thread
670 // succeeded in biasing it toward itself and we need to revoke that
671 // bias. The revocation will occur in the runtime in the slow case.
672 if (PrintBiasedLockingStatistics) {
673 Label L;
674 bne(AT, R0, L);
675 delayed()->nop();
676 push(AT);
677 push(tmp_reg);
678 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
679 pop(tmp_reg);
680 pop(AT);
681 bind(L);
682 }
683 if (slow_case != NULL) {
684 beq_far(AT, R0, *slow_case);
685 delayed()->nop();
686 }
688 b(done);
689 delayed()->nop();
690 bind(try_revoke_bias);
691 // The prototype mark in the klass doesn't have the bias bit set any
692 // more, indicating that objects of this data type are not supposed
693 // to be biased any more. We are going to try to reset the mark of
694 // this object to the prototype value and fall through to the
695 // CAS-based locking scheme. Note that if our CAS fails, it means
696 // that another thread raced us for the privilege of revoking the
697 // bias of this particular object, so it's okay to continue in the
698 // normal locking code.
699 //
700 // FIXME: due to a lack of registers we currently blow away the age
701 // bits in this situation. Should attempt to preserve them.
702 ld_ptr(swap_reg, saved_mark_addr);
704 if (need_tmp_reg) {
705 push(tmp_reg);
706 }
707 load_prototype_header(tmp_reg, obj_reg);
708 //if (os::is_MP()) {
709 // lock();
710 //}
711 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
712 if (need_tmp_reg) {
713 pop(tmp_reg);
714 }
715 // Fall through to the normal CAS-based lock, because no matter what
716 // the result of the above CAS, some thread must have succeeded in
717 // removing the bias bit from the object's header.
718 if (PrintBiasedLockingStatistics) {
719 Label L;
720 bne(AT, R0, L);
721 delayed()->nop();
722 push(AT);
723 push(tmp_reg);
724 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
725 pop(tmp_reg);
726 pop(AT);
727 bind(L);
728 }
730 bind(cas_label);
731 return null_check_offset;
732 }
734 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
735 assert(UseBiasedLocking, "why call this otherwise?");
737 // Check for biased locking unlock case, which is a no-op
738 // Note: we do not have to check the thread ID for two reasons.
739 // First, the interpreter checks for IllegalMonitorStateException at
740 // a higher level. Second, if the bias was revoked while we held the
741 // lock, the object could not be rebiased toward another thread, so
742 // the bias bit would be clear.
743 #ifdef _LP64
744 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
745 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
746 daddi(AT, R0, markOopDesc::biased_lock_pattern);
747 #else
748 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
749 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
750 addi(AT, R0, markOopDesc::biased_lock_pattern);
751 #endif
753 beq(AT, temp_reg, done);
754 delayed()->nop();
755 }
757 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
758 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
759 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
760 Label L, E;
762 assert(number_of_arguments <= 4, "just check");
764 andi(AT, SP, 0xf);
765 beq(AT, R0, L);
766 delayed()->nop();
767 daddi(SP, SP, -8);
768 call(entry_point, relocInfo::runtime_call_type);
769 delayed()->nop();
770 daddi(SP, SP, 8);
771 b(E);
772 delayed()->nop();
774 bind(L);
775 call(entry_point, relocInfo::runtime_call_type);
776 delayed()->nop();
777 bind(E);
778 }
781 void MacroAssembler::jmp(address entry) {
782 patchable_set48(T9, (long)entry);
783 jr(T9);
784 }
786 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
787 switch (rtype) {
788 case relocInfo::runtime_call_type:
789 case relocInfo::none:
790 jmp(entry);
791 break;
792 default:
793 {
794 InstructionMark im(this);
795 relocate(rtype);
796 patchable_set48(T9, (long)entry);
797 jr(T9);
798 }
799 break;
800 }
801 }
803 void MacroAssembler::jmp_far(Label& L) {
804 if (L.is_bound()) {
805 address entry = target(L);
806 assert(entry != NULL, "jmp most probably wrong");
807 InstructionMark im(this);
809 relocate(relocInfo::internal_word_type);
810 patchable_set48(T9, (long)entry);
811 } else {
812 InstructionMark im(this);
813 L.add_patch_at(code(), locator());
815 relocate(relocInfo::internal_word_type);
816 patchable_set48(T9, (long)pc());
817 }
819 jr(T9);
820 delayed()->nop();
821 }
822 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
823 int oop_index;
824 if (obj) {
825 oop_index = oop_recorder()->find_index(obj);
826 } else {
827 oop_index = oop_recorder()->allocate_metadata_index(obj);
828 }
829 relocate(metadata_Relocation::spec(oop_index));
830 patchable_set48(AT, (long)obj);
831 sd(AT, dst);
832 }
834 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
835 int oop_index;
836 if (obj) {
837 oop_index = oop_recorder()->find_index(obj);
838 } else {
839 oop_index = oop_recorder()->allocate_metadata_index(obj);
840 }
841 relocate(metadata_Relocation::spec(oop_index));
842 patchable_set48(dst, (long)obj);
843 }
845 void MacroAssembler::call(address entry) {
846 // c/c++ code assume T9 is entry point, so we just always move entry to t9
847 // maybe there is some more graceful method to handle this. FIXME
848 // For more info, see class NativeCall.
849 #ifndef _LP64
850 move(T9, (int)entry);
851 #else
852 patchable_set48(T9, (long)entry);
853 #endif
854 jalr(T9);
855 }
857 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
858 switch (rtype) {
859 case relocInfo::runtime_call_type:
860 case relocInfo::none:
861 call(entry);
862 break;
863 default:
864 {
865 InstructionMark im(this);
866 relocate(rtype);
867 call(entry);
868 }
869 break;
870 }
871 }
873 void MacroAssembler::call(address entry, RelocationHolder& rh)
874 {
875 switch (rh.type()) {
876 case relocInfo::runtime_call_type:
877 case relocInfo::none:
878 call(entry);
879 break;
880 default:
881 {
882 InstructionMark im(this);
883 relocate(rh);
884 call(entry);
885 }
886 break;
887 }
888 }
890 void MacroAssembler::ic_call(address entry) {
891 RelocationHolder rh = virtual_call_Relocation::spec(pc());
892 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
893 assert(entry != NULL, "call most probably wrong");
894 InstructionMark im(this);
895 relocate(rh);
896 patchable_call(entry);
897 }
899 void MacroAssembler::c2bool(Register r) {
900 Label L;
901 Assembler::beq(r, R0, L);
902 delayed()->nop();
903 move(r, 1);
904 bind(L);
905 }
907 #ifndef PRODUCT
908 extern "C" void findpc(intptr_t x);
909 #endif
911 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
912 // In order to get locks to work, we need to fake a in_VM state
913 JavaThread* thread = JavaThread::current();
914 JavaThreadState saved_state = thread->thread_state();
915 thread->set_thread_state(_thread_in_vm);
916 if (ShowMessageBoxOnError) {
917 JavaThread* thread = JavaThread::current();
918 JavaThreadState saved_state = thread->thread_state();
919 thread->set_thread_state(_thread_in_vm);
920 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
921 ttyLocker ttyl;
922 BytecodeCounter::print();
923 }
924 // To see where a verify_oop failed, get $ebx+40/X for this frame.
925 // This is the value of eip which points to where verify_oop will return.
926 if (os::message_box(msg, "Execution stopped, print registers?")) {
927 ttyLocker ttyl;
928 tty->print_cr("eip = 0x%08x", eip);
929 #ifndef PRODUCT
930 tty->cr();
931 findpc(eip);
932 tty->cr();
933 #endif
934 tty->print_cr("rax, = 0x%08x", rax);
935 tty->print_cr("rbx, = 0x%08x", rbx);
936 tty->print_cr("rcx = 0x%08x", rcx);
937 tty->print_cr("rdx = 0x%08x", rdx);
938 tty->print_cr("rdi = 0x%08x", rdi);
939 tty->print_cr("rsi = 0x%08x", rsi);
940 tty->print_cr("rbp, = 0x%08x", rbp);
941 tty->print_cr("rsp = 0x%08x", rsp);
942 BREAKPOINT;
943 }
944 } else {
945 ttyLocker ttyl;
946 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
947 assert(false, "DEBUG MESSAGE");
948 }
949 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
950 }
952 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
953 if ( ShowMessageBoxOnError ) {
954 JavaThreadState saved_state = JavaThread::current()->thread_state();
955 JavaThread::current()->set_thread_state(_thread_in_vm);
956 {
957 // In order to get locks work, we need to fake a in_VM state
958 ttyLocker ttyl;
959 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
960 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
961 BytecodeCounter::print();
962 }
964 // if (os::message_box(msg, "Execution stopped, print registers?"))
965 // regs->print(::tty);
966 }
967 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
968 }
969 else
970 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
971 }
974 void MacroAssembler::stop(const char* msg) {
975 li(A0, (long)msg);
976 #ifndef _LP64
977 //reserver space for argument.
978 addiu(SP, SP, - 1 * wordSize);
979 #endif
980 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
981 delayed()->nop();
982 #ifndef _LP64
983 //restore space for argument
984 addiu(SP, SP, 1 * wordSize);
985 #endif
986 brk(17);
987 }
989 void MacroAssembler::warn(const char* msg) {
990 #ifdef _LP64
991 pushad();
992 li(A0, (long)msg);
993 push(S2);
994 move(AT, -(StackAlignmentInBytes));
995 move(S2, SP); // use S2 as a sender SP holder
996 andr(SP, SP, AT); // align stack as required by ABI
997 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
998 delayed()->nop();
999 move(SP, S2); // use S2 as a sender SP holder
1000 pop(S2);
1001 popad();
1002 #else
1003 pushad();
1004 addi(SP, SP, -4);
1005 sw(A0, SP, -1 * wordSize);
1006 li(A0, (long)msg);
1007 addi(SP, SP, -1 * wordSize);
1008 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1009 delayed()->nop();
1010 addi(SP, SP, 1 * wordSize);
1011 lw(A0, SP, -1 * wordSize);
1012 addi(SP, SP, 4);
1013 popad();
1014 #endif
1015 }
1017 void MacroAssembler::print_reg(Register reg) {
1018 /*
1019 char *s = getenv("PRINT_REG");
1020 if (s == NULL)
1021 return;
1022 if (strcmp(s, "1") != 0)
1023 return;
1024 */
1025 void * cur_pc = pc();
1026 pushad();
1027 NOT_LP64(push(FP);)
1029 li(A0, (long)reg->name());
1030 if (reg == SP)
1031 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
1032 else if (reg == A0)
1033 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
1034 else
1035 move(A1, reg);
1036 li(A2, (long)cur_pc);
1037 push(S2);
1038 move(AT, -(StackAlignmentInBytes));
1039 move(S2, SP); // use S2 as a sender SP holder
1040 andr(SP, SP, AT); // align stack as required by ABI
1041 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1042 delayed()->nop();
1043 move(SP, S2); // use S2 as a sender SP holder
1044 pop(S2);
1045 NOT_LP64(pop(FP);)
1046 popad();
1048 /*
1049 pushad();
1050 #ifdef _LP64
1051 if (reg == SP)
1052 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
1053 else
1054 move(A0, reg);
1055 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1056 delayed()->nop();
1057 #else
1058 push(FP);
1059 move(A0, reg);
1060 dsrl32(A1, reg, 0);
1061 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
1062 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1063 delayed()->nop();
1064 pop(FP);
1065 #endif
1066 popad();
1067 pushad();
1068 NOT_LP64(push(FP);)
1069 char b[50];
1070 sprintf((char *)b, " pc: %p\n",cur_pc);
1071 li(A0, (long)(char *)b);
1072 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1073 delayed()->nop();
1074 NOT_LP64(pop(FP);)
1075 popad();
1076 */
1077 }
1079 void MacroAssembler::print_reg(FloatRegister reg) {
1080 void * cur_pc = pc();
1081 pushad();
1082 NOT_LP64(push(FP);)
1083 li(A0, (long)reg->name());
1084 push(S2);
1085 move(AT, -(StackAlignmentInBytes));
1086 move(S2, SP); // use S2 as a sender SP holder
1087 andr(SP, SP, AT); // align stack as required by ABI
1088 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1089 delayed()->nop();
1090 move(SP, S2); // use S2 as a sender SP holder
1091 pop(S2);
1092 NOT_LP64(pop(FP);)
1093 popad();
1095 pushad();
1096 NOT_LP64(push(FP);)
1097 #if 1
1098 move(FP, SP);
1099 move(AT, -(StackAlignmentInBytes));
1100 andr(SP , SP , AT);
1101 mov_d(F12, reg);
1102 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1103 delayed()->nop();
1104 move(SP, FP);
1105 #else
1106 mov_s(F12, reg);
1107 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1108 //delayed()->nop();
1109 #endif
1110 NOT_LP64(pop(FP);)
1111 popad();
1113 #if 0
1114 pushad();
1115 NOT_LP64(push(FP);)
1116 char* b = new char[50];
1117 sprintf(b, " pc: %p\n", cur_pc);
1118 li(A0, (long)b);
1119 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1120 delayed()->nop();
1121 NOT_LP64(pop(FP);)
1122 popad();
1123 #endif
1124 }
1126 void MacroAssembler::increment(Register reg, int imm) {
1127 if (!imm) return;
1128 if (is_simm16(imm)) {
1129 #ifdef _LP64
1130 daddiu(reg, reg, imm);
1131 #else
1132 addiu(reg, reg, imm);
1133 #endif
1134 } else {
1135 move(AT, imm);
1136 #ifdef _LP64
1137 daddu(reg, reg, AT);
1138 #else
1139 addu(reg, reg, AT);
1140 #endif
1141 }
1142 }
1144 void MacroAssembler::decrement(Register reg, int imm) {
1145 increment(reg, -imm);
1146 }
1149 void MacroAssembler::call_VM(Register oop_result,
1150 address entry_point,
1151 bool check_exceptions) {
1152 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1153 }
1155 void MacroAssembler::call_VM(Register oop_result,
1156 address entry_point,
1157 Register arg_1,
1158 bool check_exceptions) {
1159 if (arg_1!=A1) move(A1, arg_1);
1160 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1161 }
1163 void MacroAssembler::call_VM(Register oop_result,
1164 address entry_point,
1165 Register arg_1,
1166 Register arg_2,
1167 bool check_exceptions) {
1168 if (arg_1!=A1) move(A1, arg_1);
1169 if (arg_2!=A2) move(A2, arg_2);
1170 assert(arg_2 != A1, "smashed argument");
1171 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1172 }
1174 void MacroAssembler::call_VM(Register oop_result,
1175 address entry_point,
1176 Register arg_1,
1177 Register arg_2,
1178 Register arg_3,
1179 bool check_exceptions) {
1180 if (arg_1!=A1) move(A1, arg_1);
1181 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1182 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1183 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1184 }
1186 void MacroAssembler::call_VM(Register oop_result,
1187 Register last_java_sp,
1188 address entry_point,
1189 int number_of_arguments,
1190 bool check_exceptions) {
1191 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1192 }
1194 void MacroAssembler::call_VM(Register oop_result,
1195 Register last_java_sp,
1196 address entry_point,
1197 Register arg_1,
1198 bool check_exceptions) {
1199 if (arg_1 != A1) move(A1, arg_1);
1200 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1201 }
1203 void MacroAssembler::call_VM(Register oop_result,
1204 Register last_java_sp,
1205 address entry_point,
1206 Register arg_1,
1207 Register arg_2,
1208 bool check_exceptions) {
1209 if (arg_1 != A1) move(A1, arg_1);
1210 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1211 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1212 }
1214 void MacroAssembler::call_VM(Register oop_result,
1215 Register last_java_sp,
1216 address entry_point,
1217 Register arg_1,
1218 Register arg_2,
1219 Register arg_3,
1220 bool check_exceptions) {
1221 if (arg_1 != A1) move(A1, arg_1);
1222 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1223 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1224 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1225 }
1227 void MacroAssembler::call_VM_base(Register oop_result,
1228 Register java_thread,
1229 Register last_java_sp,
1230 address entry_point,
1231 int number_of_arguments,
1232 bool check_exceptions) {
1234 address before_call_pc;
1235 // determine java_thread register
1236 if (!java_thread->is_valid()) {
1237 #ifndef OPT_THREAD
1238 java_thread = T2;
1239 get_thread(java_thread);
1240 #else
1241 java_thread = TREG;
1242 #endif
1243 }
1244 // determine last_java_sp register
1245 if (!last_java_sp->is_valid()) {
1246 last_java_sp = SP;
1247 }
1248 // debugging support
1249 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1250 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1251 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1252 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1254 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1256 // set last Java frame before call
1257 before_call_pc = (address)pc();
1258 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1260 // do the call
1261 move(A0, java_thread);
1262 call(entry_point, relocInfo::runtime_call_type);
1263 delayed()->nop();
1265 // restore the thread (cannot use the pushed argument since arguments
1266 // may be overwritten by C code generated by an optimizing compiler);
1267 // however can use the register value directly if it is callee saved.
1268 #ifndef OPT_THREAD
1269 get_thread(java_thread);
1270 #else
1271 #ifdef ASSERT
1272 {
1273 Label L;
1274 get_thread(AT);
1275 beq(java_thread, AT, L);
1276 delayed()->nop();
1277 stop("MacroAssembler::call_VM_base: TREG not callee saved?");
1278 bind(L);
1279 }
1280 #endif
1281 #endif
1283 // discard thread and arguments
1284 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1285 // reset last Java frame
1286 reset_last_Java_frame(java_thread, false);
1288 check_and_handle_popframe(java_thread);
1289 check_and_handle_earlyret(java_thread);
1290 if (check_exceptions) {
1291 // check for pending exceptions (java_thread is set upon return)
1292 Label L;
1293 #ifdef _LP64
1294 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1295 #else
1296 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1297 #endif
1298 beq(AT, R0, L);
1299 delayed()->nop();
1300 li(AT, before_call_pc);
1301 push(AT);
1302 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1303 delayed()->nop();
1304 bind(L);
1305 }
1307 // get oop result if there is one and reset the value in the thread
1308 if (oop_result->is_valid()) {
1309 #ifdef _LP64
1310 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1311 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1312 #else
1313 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1314 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1315 #endif
1316 verify_oop(oop_result);
1317 }
1318 }
1320 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1322 move(V0, SP);
1323 //we also reserve space for java_thread here
1324 #ifndef _LP64
1325 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1326 #endif
1327 move(AT, -(StackAlignmentInBytes));
1328 andr(SP, SP, AT);
1329 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1331 }
1333 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1334 call_VM_leaf_base(entry_point, number_of_arguments);
1335 }
1337 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1338 if (arg_0 != A0) move(A0, arg_0);
1339 call_VM_leaf(entry_point, 1);
1340 }
1342 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1343 if (arg_0 != A0) move(A0, arg_0);
1344 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1345 call_VM_leaf(entry_point, 2);
1346 }
1348 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1349 if (arg_0 != A0) move(A0, arg_0);
1350 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1351 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1352 call_VM_leaf(entry_point, 3);
1353 }
1354 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1355 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1356 }
1359 void MacroAssembler::super_call_VM_leaf(address entry_point,
1360 Register arg_1) {
1361 if (arg_1 != A0) move(A0, arg_1);
1362 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1363 }
1366 void MacroAssembler::super_call_VM_leaf(address entry_point,
1367 Register arg_1,
1368 Register arg_2) {
1369 if (arg_1 != A0) move(A0, arg_1);
1370 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1371 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1372 }
1373 void MacroAssembler::super_call_VM_leaf(address entry_point,
1374 Register arg_1,
1375 Register arg_2,
1376 Register arg_3) {
1377 if (arg_1 != A0) move(A0, arg_1);
1378 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1379 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1380 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1381 }
1383 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1384 }
1386 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1387 }
1389 void MacroAssembler::null_check(Register reg, int offset) {
1390 if (needs_explicit_null_check(offset)) {
1391 // provoke OS NULL exception if reg = NULL by
1392 // accessing M[reg] w/o changing any (non-CC) registers
1393 // NOTE: cmpl is plenty here to provoke a segv
1394 lw(AT, reg, 0);
1395 // Note: should probably use testl(rax, Address(reg, 0));
1396 // may be shorter code (however, this version of
1397 // testl needs to be implemented first)
1398 } else {
1399 // nothing to do, (later) access of M[reg + offset]
1400 // will provoke OS NULL exception if reg = NULL
1401 }
1402 }
1404 void MacroAssembler::enter() {
1405 push2(RA, FP);
1406 move(FP, SP);
1407 }
1409 void MacroAssembler::leave() {
1410 #ifndef _LP64
1411 //move(SP, FP);
1412 //pop2(FP, RA);
1413 addi(SP, FP, 2 * wordSize);
1414 lw(RA, SP, - 1 * wordSize);
1415 lw(FP, SP, - 2 * wordSize);
1416 #else
1417 daddi(SP, FP, 2 * wordSize);
1418 ld(RA, SP, - 1 * wordSize);
1419 ld(FP, SP, - 2 * wordSize);
1420 #endif
1421 }
1422 /*
1423 void MacroAssembler::os_breakpoint() {
1424 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1425 // (e.g., MSVC can't call ps() otherwise)
1426 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1427 }
1428 */
1429 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
1430 // determine java_thread register
1431 if (!java_thread->is_valid()) {
1432 #ifndef OPT_THREAD
1433 java_thread = T1;
1434 get_thread(java_thread);
1435 #else
1436 java_thread = TREG;
1437 #endif
1438 }
1439 // we must set sp to zero to clear frame
1440 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1441 // must clear fp, so that compiled frames are not confused; it is possible
1442 // that we need it only for debugging
1443 if(clear_fp) {
1444 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1445 }
1447 // Always clear the pc because it could have been set by make_walkable()
1448 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1449 }
1451 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
1452 Register thread = TREG;
1453 #ifndef OPT_THREAD
1454 get_thread(thread);
1455 #endif
1456 // we must set sp to zero to clear frame
1457 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1458 // must clear fp, so that compiled frames are not confused; it is
1459 // possible that we need it only for debugging
1460 if (clear_fp) {
1461 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1462 }
1464 // Always clear the pc because it could have been set by make_walkable()
1465 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1466 }
1468 // Write serialization page so VM thread can do a pseudo remote membar.
1469 // We use the current thread pointer to calculate a thread specific
1470 // offset to write to within the page. This minimizes bus traffic
1471 // due to cache line collision.
1472 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1473 move(tmp, thread);
1474 srl(tmp, tmp,os::get_serialize_page_shift_count());
1475 move(AT, (os::vm_page_size() - sizeof(int)));
1476 andr(tmp, tmp,AT);
1477 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1478 }
1480 // Calls to C land
1481 //
1482 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1483 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1484 // has to be reset to 0. This is required to allow proper stack traversal.
1485 void MacroAssembler::set_last_Java_frame(Register java_thread,
1486 Register last_java_sp,
1487 Register last_java_fp,
1488 address last_java_pc) {
1489 // determine java_thread register
1490 if (!java_thread->is_valid()) {
1491 #ifndef OPT_THREAD
1492 java_thread = T2;
1493 get_thread(java_thread);
1494 #else
1495 java_thread = TREG;
1496 #endif
1497 }
1498 // determine last_java_sp register
1499 if (!last_java_sp->is_valid()) {
1500 last_java_sp = SP;
1501 }
1503 // last_java_fp is optional
1504 if (last_java_fp->is_valid()) {
1505 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1506 }
1508 // last_java_pc is optional
1509 if (last_java_pc != NULL) {
1510 relocate(relocInfo::internal_word_type);
1511 patchable_set48(AT, (long)last_java_pc);
1512 st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
1513 }
1514 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1515 }
1517 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1518 Register last_java_fp,
1519 address last_java_pc) {
1520 // determine last_java_sp register
1521 if (!last_java_sp->is_valid()) {
1522 last_java_sp = SP;
1523 }
1525 Register thread = TREG;
1526 #ifndef OPT_THREAD
1527 get_thread(thread);
1528 #endif
1529 // last_java_fp is optional
1530 if (last_java_fp->is_valid()) {
1531 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1532 }
1534 // last_java_pc is optional
1535 if (last_java_pc != NULL) {
1536 relocate(relocInfo::internal_word_type);
1537 patchable_set48(AT, (long)last_java_pc);
1538 st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
1539 }
1541 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1542 }
1544 //////////////////////////////////////////////////////////////////////////////////
1545 #if INCLUDE_ALL_GCS
1547 void MacroAssembler::g1_write_barrier_pre(Register obj,
1548 Register pre_val,
1549 Register thread,
1550 Register tmp,
1551 bool tosca_live,
1552 bool expand_call) {
1554 // If expand_call is true then we expand the call_VM_leaf macro
1555 // directly to skip generating the check by
1556 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1558 #ifdef _LP64
1559 assert(thread == TREG, "must be");
1560 #endif // _LP64
1562 Label done;
1563 Label runtime;
1565 assert(pre_val != noreg, "check this code");
1567 if (obj != noreg) {
1568 assert_different_registers(obj, pre_val, tmp);
1569 assert(pre_val != V0, "check this code");
1570 }
1572 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1573 PtrQueue::byte_offset_of_active()));
1574 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1575 PtrQueue::byte_offset_of_index()));
1576 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1577 PtrQueue::byte_offset_of_buf()));
1580 // Is marking active?
1581 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1582 lw(AT, in_progress);
1583 } else {
1584 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1585 lb(AT, in_progress);
1586 }
1587 beq(AT, R0, done);
1588 delayed()->nop();
1590 // Do we need to load the previous value?
1591 if (obj != noreg) {
1592 load_heap_oop(pre_val, Address(obj, 0));
1593 }
1595 // Is the previous value null?
1596 beq(pre_val, R0, done);
1597 delayed()->nop();
1599 // Can we store original value in the thread's buffer?
1600 // Is index == 0?
1601 // (The index field is typed as size_t.)
1603 ld(tmp, index);
1604 beq(tmp, R0, runtime);
1605 delayed()->nop();
1607 daddiu(tmp, tmp, -1 * wordSize);
1608 sd(tmp, index);
1609 ld(AT, buffer);
1610 daddu(tmp, tmp, AT);
1612 // Record the previous value
1613 sd(pre_val, tmp, 0);
1614 beq(R0, R0, done);
1615 delayed()->nop();
1617 bind(runtime);
1618 // save the live input values
1619 if (tosca_live) push(V0);
1621 if (obj != noreg && obj != V0) push(obj);
1623 if (pre_val != V0) push(pre_val);
1625 // Calling the runtime using the regular call_VM_leaf mechanism generates
1626 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1627 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
1628 //
1629 // If we care generating the pre-barrier without a frame (e.g. in the
1630 // intrinsified Reference.get() routine) then ebp might be pointing to
1631 // the caller frame and so this check will most likely fail at runtime.
1632 //
1633 // Expanding the call directly bypasses the generation of the check.
1634 // So when we do not have have a full interpreter frame on the stack
1635 // expand_call should be passed true.
1637 NOT_LP64( push(thread); )
1639 if (expand_call) {
1640 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1641 if (thread != A1) move(A1, thread);
1642 if (pre_val != A0) move(A0, pre_val);
1643 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1644 } else {
1645 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1646 }
1648 NOT_LP64( pop(thread); )
1650 // save the live input values
1651 if (pre_val != V0)
1652 pop(pre_val);
1654 if (obj != noreg && obj != V0)
1655 pop(obj);
1657 if(tosca_live) pop(V0);
1659 bind(done);
1660 }
1662 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1663 Register new_val,
1664 Register thread,
1665 Register tmp,
1666 Register tmp2) {
1667 assert(tmp != AT, "must be");
1668 assert(tmp2 != AT, "must be");
1669 #ifdef _LP64
1670 assert(thread == TREG, "must be");
1671 #endif // _LP64
1673 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1674 PtrQueue::byte_offset_of_index()));
1675 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1676 PtrQueue::byte_offset_of_buf()));
1678 BarrierSet* bs = Universe::heap()->barrier_set();
1679 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1680 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1682 Label done;
1683 Label runtime;
1685 // Does store cross heap regions?
1686 xorr(AT, store_addr, new_val);
1687 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1688 beq(AT, R0, done);
1689 delayed()->nop();
1692 // crosses regions, storing NULL?
1693 beq(new_val, R0, done);
1694 delayed()->nop();
1696 // storing region crossing non-NULL, is card already dirty?
1697 const Register card_addr = tmp;
1698 const Register cardtable = tmp2;
1700 move(card_addr, store_addr);
1701 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1702 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1703 // a valid address and therefore is not properly handled by the relocation code.
1704 set64(cardtable, (intptr_t)ct->byte_map_base);
1705 daddu(card_addr, card_addr, cardtable);
1707 lb(AT, card_addr, 0);
1708 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1709 beq(AT, R0, done);
1710 delayed()->nop();
1712 sync();
1713 lb(AT, card_addr, 0);
1714 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1715 beq(AT, R0, done);
1716 delayed()->nop();
1719 // storing a region crossing, non-NULL oop, card is clean.
1720 // dirty card and log.
1721 move(AT, (int)CardTableModRefBS::dirty_card_val());
1722 sb(AT, card_addr, 0);
1724 lw(AT, queue_index);
1725 beq(AT, R0, runtime);
1726 delayed()->nop();
1727 daddiu(AT, AT, -1 * wordSize);
1728 sw(AT, queue_index);
1729 ld(tmp2, buffer);
1730 #ifdef _LP64
1731 ld(AT, queue_index);
1732 daddu(tmp2, tmp2, AT);
1733 sd(card_addr, tmp2, 0);
1734 #else
1735 lw(AT, queue_index);
1736 addu32(tmp2, tmp2, AT);
1737 sw(card_addr, tmp2, 0);
1738 #endif
1739 beq(R0, R0, done);
1740 delayed()->nop();
1742 bind(runtime);
1743 // save the live input values
1744 push(store_addr);
1745 push(new_val);
1746 #ifdef _LP64
1747 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1748 #else
1749 push(thread);
1750 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1751 pop(thread);
1752 #endif
1753 pop(new_val);
1754 pop(store_addr);
1756 bind(done);
1757 }
1759 #endif // INCLUDE_ALL_GCS
1760 //////////////////////////////////////////////////////////////////////////////////
1763 void MacroAssembler::store_check(Register obj) {
1764 // Does a store check for the oop in register obj. The content of
1765 // register obj is destroyed afterwards.
1766 store_check_part_1(obj);
1767 store_check_part_2(obj);
1768 }
1770 void MacroAssembler::store_check(Register obj, Address dst) {
1771 store_check(obj);
1772 }
1775 // split the store check operation so that other instructions can be scheduled inbetween
1776 void MacroAssembler::store_check_part_1(Register obj) {
1777 BarrierSet* bs = Universe::heap()->barrier_set();
1778 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1779 #ifdef _LP64
1780 dsrl(obj, obj, CardTableModRefBS::card_shift);
1781 #else
1782 shr(obj, CardTableModRefBS::card_shift);
1783 #endif
1784 }
1786 void MacroAssembler::store_check_part_2(Register obj) {
1787 BarrierSet* bs = Universe::heap()->barrier_set();
1788 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1789 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1790 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1792 set64(AT, (long)ct->byte_map_base);
1793 #ifdef _LP64
1794 dadd(AT, AT, obj);
1795 #else
1796 add(AT, AT, obj);
1797 #endif
1798 if (UseConcMarkSweepGC) sync();
1799 sb(R0, AT, 0);
1800 }
1802 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1803 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1804 Register t1, Register t2, Label& slow_case) {
1805 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1807 Register end = t2;
1808 #ifndef OPT_THREAD
1809 Register thread = t1;
1810 get_thread(thread);
1811 #else
1812 Register thread = TREG;
1813 #endif
1814 verify_tlab(t1, t2);//blows t1&t2
1816 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1818 if (var_size_in_bytes == NOREG) {
1819 // i dont think we need move con_size_in_bytes to a register first.
1820 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1821 addi(end, obj, con_size_in_bytes);
1822 } else {
1823 add(end, obj, var_size_in_bytes);
1824 }
1826 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1827 sltu(AT, AT, end);
1828 bne_far(AT, R0, slow_case);
1829 delayed()->nop();
1832 // update the tlab top pointer
1833 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1835 verify_tlab(t1, t2);
1836 }
1838 // Defines obj, preserves var_size_in_bytes
1839 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1840 Register t1, Register t2, Label& slow_case) {
1841 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1842 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
1843 // No allocation in the shared eden.
1844 b_far(slow_case);
1845 delayed()->nop();
1846 } else {
1848 #ifndef _LP64
1849 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1850 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1851 #else
1852 Address heap_top(t1);
1853 li(t1, (long)Universe::heap()->top_addr());
1854 #endif
1855 ld_ptr(obj, heap_top);
1857 Register end = t2;
1858 Label retry;
1860 bind(retry);
1861 if (var_size_in_bytes == NOREG) {
1862 // i dont think we need move con_size_in_bytes to a register first.
1863 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1864 addi(end, obj, con_size_in_bytes);
1865 } else {
1866 add(end, obj, var_size_in_bytes);
1867 }
1868 // if end < obj then we wrapped around => object too long => slow case
1869 sltu(AT, end, obj);
1870 bne_far(AT, R0, slow_case);
1871 delayed()->nop();
1873 li(AT, (long)Universe::heap()->end_addr());
1874 ld_ptr(AT, AT, 0);
1875 sltu(AT, AT, end);
1876 bne_far(AT, R0, slow_case);
1877 delayed()->nop();
1878 // Compare obj with the top addr, and if still equal, store the new top addr in
1879 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1880 // it otherwise. Use lock prefix for atomicity on MPs.
1881 //if (os::is_MP()) {
1882 // sync();
1883 //}
1885 // if someone beat us on the allocation, try again, otherwise continue
1886 cmpxchg(end, heap_top, obj);
1887 beq_far(AT, R0, retry);
1888 delayed()->nop();
1889 }
1890 }
1892 // C2 doesn't invoke this one.
1893 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1894 Register top = T0;
1895 Register t1 = T1;
1896 Register t2 = T9;
1897 Register t3 = T3;
1898 Register thread_reg = T8;
1899 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4);
1900 Label do_refill, discard_tlab;
1902 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
1903 // No allocation in the shared eden.
1904 b(slow_case);
1905 delayed()->nop();
1906 }
1908 get_thread(thread_reg);
1910 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1911 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1913 // calculate amount of free space
1914 sub(t1, t1, top);
1915 shr(t1, LogHeapWordSize);
1917 // Retain tlab and allocate object in shared space if
1918 // the amount free in the tlab is too large to discard.
1919 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1920 slt(AT, t2, t1);
1921 beq(AT, R0, discard_tlab);
1922 delayed()->nop();
1924 // Retain
1925 #ifndef _LP64
1926 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1927 #else
1928 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1929 #endif
1930 add(t2, t2, AT);
1931 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1933 if (TLABStats) {
1934 // increment number of slow_allocations
1935 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1936 addiu(AT, AT, 1);
1937 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1938 }
1939 b(try_eden);
1940 delayed()->nop();
1942 bind(discard_tlab);
1943 if (TLABStats) {
1944 // increment number of refills
1945 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1946 addi(AT, AT, 1);
1947 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1948 // accumulate wastage -- t1 is amount free in tlab
1949 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1950 add(AT, AT, t1);
1951 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1952 }
1954 // if tlab is currently allocated (top or end != null) then
1955 // fill [top, end + alignment_reserve) with array object
1956 beq(top, R0, do_refill);
1957 delayed()->nop();
1959 // set up the mark word
1960 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1961 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1963 // set the length to the remaining space
1964 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1965 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1966 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1967 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1969 // set klass to intArrayKlass
1970 #ifndef _LP64
1971 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
1972 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
1973 #else
1974 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
1975 ld_ptr(t1, AT, 0);
1976 #endif
1977 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
1978 store_klass(top, t1);
1980 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
1981 subu(t1, top, t1);
1982 incr_allocated_bytes(thread_reg, t1, 0);
1984 // refill the tlab with an eden allocation
1985 bind(do_refill);
1986 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1987 shl(t1, LogHeapWordSize);
1988 // add object_size ??
1989 eden_allocate(top, t1, 0, t2, t3, slow_case);
1991 // Check that t1 was preserved in eden_allocate.
1992 #ifdef ASSERT
1993 if (UseTLAB) {
1994 Label ok;
1995 assert_different_registers(thread_reg, t1);
1996 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1997 shl(AT, LogHeapWordSize);
1998 beq(AT, t1, ok);
1999 delayed()->nop();
2000 stop("assert(t1 != tlab size)");
2001 should_not_reach_here();
2003 bind(ok);
2004 }
2005 #endif
2006 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
2007 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2008 add(top, top, t1);
2009 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
2010 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2011 verify_tlab(t1, t2);
2012 b(retry);
2013 delayed()->nop();
2014 }
2016 void MacroAssembler::incr_allocated_bytes(Register thread,
2017 Register var_size_in_bytes,
2018 int con_size_in_bytes,
2019 Register t1) {
2020 if (!thread->is_valid()) {
2021 #ifndef OPT_THREAD
2022 assert(t1->is_valid(), "need temp reg");
2023 thread = t1;
2024 get_thread(thread);
2025 #else
2026 thread = TREG;
2027 #endif
2028 }
2030 ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
2031 if (var_size_in_bytes->is_valid()) {
2032 addu(AT, AT, var_size_in_bytes);
2033 } else {
2034 addiu(AT, AT, con_size_in_bytes);
2035 }
2036 st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
2037 }
2039 static const double pi_4 = 0.7853981633974483;
2041 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
2042 // must get argument(a double) in F12/F13
2043 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
2044 //We need to preseve the register which maybe modified during the Call
2045 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
2046 //save all modified register here
2047 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
2048 pushad();
2049 //we should preserve the stack space before we call
2050 addi(SP, SP, -wordSize * 2);
2051 switch (trig){
2052 case 's' :
2053 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
2054 delayed()->nop();
2055 break;
2056 case 'c':
2057 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
2058 delayed()->nop();
2059 break;
2060 case 't':
2061 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
2062 delayed()->nop();
2063 break;
2064 default:assert (false, "bad intrinsic");
2065 break;
2067 }
2069 addi(SP, SP, wordSize * 2);
2070 popad();
2071 }
2073 #ifdef _LP64
2074 void MacroAssembler::li(Register rd, long imm) {
2075 if (imm <= max_jint && imm >= min_jint) {
2076 li32(rd, (int)imm);
2077 } else if (julong(imm) <= 0xFFFFFFFF) {
2078 assert_not_delayed();
2079 // lui sign-extends, so we can't use that.
2080 ori(rd, R0, julong(imm) >> 16);
2081 dsll(rd, rd, 16);
2082 ori(rd, rd, split_low(imm));
2083 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2084 /* A 48-bit address */
2085 li48(rd, imm);
2086 } else {
2087 li64(rd, imm);
2088 }
2089 }
2090 #else
2091 void MacroAssembler::li(Register rd, long imm) {
2092 li32(rd, (int)imm);
2093 }
2094 #endif
2096 void MacroAssembler::li32(Register reg, int imm) {
2097 if (is_simm16(imm)) {
2098 /* for imm < 0, we should use addi instead of addiu.
2099 *
2100 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2101 *
2102 * 78 move [int:-1|I] [a0|I]
2103 * : daddi a0, zero, 0xffffffff (correct)
2104 * : daddiu a0, zero, 0xffffffff (incorrect)
2105 */
2106 if (imm >= 0)
2107 addiu(reg, R0, imm);
2108 else
2109 addi(reg, R0, imm);
2110 } else {
2111 lui(reg, split_low(imm >> 16));
2112 if (split_low(imm))
2113 ori(reg, reg, split_low(imm));
2114 }
2115 }
2117 #ifdef _LP64
2118 void MacroAssembler::set64(Register d, jlong value) {
2119 assert_not_delayed();
2121 int hi = (int)(value >> 32);
2122 int lo = (int)(value & ~0);
2124 if (value == lo) { // 32-bit integer
2125 if (is_simm16(value)) {
2126 daddiu(d, R0, value);
2127 } else {
2128 lui(d, split_low(value >> 16));
2129 if (split_low(value)) {
2130 ori(d, d, split_low(value));
2131 }
2132 }
2133 } else if (hi == 0) { // hardware zero-extends to upper 32
2134 ori(d, R0, julong(value) >> 16);
2135 dsll(d, d, 16);
2136 if (split_low(value)) {
2137 ori(d, d, split_low(value));
2138 }
2139 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2140 // 4 insts
2141 li48(d, value);
2142 } else { // li64
2143 // 6 insts
2144 li64(d, value);
2145 }
2146 }
2149 int MacroAssembler::insts_for_set64(jlong value) {
2150 int hi = (int)(value >> 32);
2151 int lo = (int)(value & ~0);
2153 int count = 0;
2155 if (value == lo) { // 32-bit integer
2156 if (is_simm16(value)) {
2157 //daddiu(d, R0, value);
2158 count++;
2159 } else {
2160 //lui(d, split_low(value >> 16));
2161 count++;
2162 if (split_low(value)) {
2163 //ori(d, d, split_low(value));
2164 count++;
2165 }
2166 }
2167 } else if (hi == 0) { // hardware zero-extends to upper 32
2168 //ori(d, R0, julong(value) >> 16);
2169 //dsll(d, d, 16);
2170 count += 2;
2171 if (split_low(value)) {
2172 //ori(d, d, split_low(value));
2173 count++;
2174 }
2175 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2176 // 4 insts
2177 //li48(d, value);
2178 count += 4;
2179 } else { // li64
2180 // 6 insts
2181 //li64(d, value);
2182 count += 6;
2183 }
2185 return count;
2186 }
2188 void MacroAssembler::patchable_set48(Register d, jlong value) {
2189 assert_not_delayed();
2191 int hi = (int)(value >> 32);
2192 int lo = (int)(value & ~0);
2194 int count = 0;
2196 if (value == lo) { // 32-bit integer
2197 if (is_simm16(value)) {
2198 daddiu(d, R0, value);
2199 count += 1;
2200 } else {
2201 lui(d, split_low(value >> 16));
2202 count += 1;
2203 if (split_low(value)) {
2204 ori(d, d, split_low(value));
2205 count += 1;
2206 }
2207 }
2208 } else if (hi == 0) { // hardware zero-extends to upper 32
2209 ori(d, R0, julong(value) >> 16);
2210 dsll(d, d, 16);
2211 count += 2;
2212 if (split_low(value)) {
2213 ori(d, d, split_low(value));
2214 count += 1;
2215 }
2216 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2217 // 4 insts
2218 li48(d, value);
2219 count += 4;
2220 } else { // li64
2221 tty->print_cr("value = 0x%x", value);
2222 guarantee(false, "Not supported yet !");
2223 }
2225 for (count; count < 4; count++) {
2226 nop();
2227 }
2228 }
2230 void MacroAssembler::patchable_set32(Register d, jlong value) {
2231 assert_not_delayed();
2233 int hi = (int)(value >> 32);
2234 int lo = (int)(value & ~0);
2236 int count = 0;
2238 if (value == lo) { // 32-bit integer
2239 if (is_simm16(value)) {
2240 daddiu(d, R0, value);
2241 count += 1;
2242 } else {
2243 lui(d, split_low(value >> 16));
2244 count += 1;
2245 if (split_low(value)) {
2246 ori(d, d, split_low(value));
2247 count += 1;
2248 }
2249 }
2250 } else if (hi == 0) { // hardware zero-extends to upper 32
2251 ori(d, R0, julong(value) >> 16);
2252 dsll(d, d, 16);
2253 count += 2;
2254 if (split_low(value)) {
2255 ori(d, d, split_low(value));
2256 count += 1;
2257 }
2258 } else {
2259 tty->print_cr("value = 0x%x", value);
2260 guarantee(false, "Not supported yet !");
2261 }
2263 for (count; count < 3; count++) {
2264 nop();
2265 }
2266 }
2268 void MacroAssembler::patchable_call32(Register d, jlong value) {
2269 assert_not_delayed();
2271 int hi = (int)(value >> 32);
2272 int lo = (int)(value & ~0);
2274 int count = 0;
2276 if (value == lo) { // 32-bit integer
2277 if (is_simm16(value)) {
2278 daddiu(d, R0, value);
2279 count += 1;
2280 } else {
2281 lui(d, split_low(value >> 16));
2282 count += 1;
2283 if (split_low(value)) {
2284 ori(d, d, split_low(value));
2285 count += 1;
2286 }
2287 }
2288 } else {
2289 tty->print_cr("value = 0x%x", value);
2290 guarantee(false, "Not supported yet !");
2291 }
2293 for (count; count < 2; count++) {
2294 nop();
2295 }
2296 }
2298 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2299 assert(UseCompressedClassPointers, "should only be used for compressed header");
2300 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2302 int klass_index = oop_recorder()->find_index(k);
2303 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2304 long narrowKlass = (long)Klass::encode_klass(k);
2306 relocate(rspec, Assembler::narrow_oop_operand);
2307 patchable_set48(dst, narrowKlass);
2308 }
2311 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2312 assert(UseCompressedOops, "should only be used for compressed header");
2313 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2315 int oop_index = oop_recorder()->find_index(obj);
2316 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2318 relocate(rspec, Assembler::narrow_oop_operand);
2319 patchable_set48(dst, oop_index);
2320 }
2322 void MacroAssembler::li64(Register rd, long imm) {
2323 assert_not_delayed();
2324 lui(rd, imm >> 48);
2325 ori(rd, rd, split_low(imm >> 32));
2326 dsll(rd, rd, 16);
2327 ori(rd, rd, split_low(imm >> 16));
2328 dsll(rd, rd, 16);
2329 ori(rd, rd, split_low(imm));
2330 }
2332 void MacroAssembler::li48(Register rd, long imm) {
2333 assert_not_delayed();
2334 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2335 lui(rd, imm >> 32);
2336 ori(rd, rd, split_low(imm >> 16));
2337 dsll(rd, rd, 16);
2338 ori(rd, rd, split_low(imm));
2339 }
2340 #endif
2341 // NOTE: i dont push eax as i486.
2342 // the x86 save eax for it use eax as the jump register
2343 void MacroAssembler::verify_oop(Register reg, const char* s) {
2344 /*
2345 if (!VerifyOops) return;
2347 // Pass register number to verify_oop_subroutine
2348 char* b = new char[strlen(s) + 50];
2349 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2350 push(rax); // save rax,
2351 push(reg); // pass register argument
2352 ExternalAddress buffer((address) b);
2353 // avoid using pushptr, as it modifies scratch registers
2354 // and our contract is not to modify anything
2355 movptr(rax, buffer.addr());
2356 push(rax);
2357 // call indirectly to solve generation ordering problem
2358 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2359 call(rax);
2360 */
2361 if (!VerifyOops) return;
2362 const char * b = NULL;
2363 stringStream ss;
2364 ss.print("verify_oop: %s: %s", reg->name(), s);
2365 b = code_string(ss.as_string());
2366 #ifdef _LP64
2367 pushad();
2368 move(A1, reg);
2369 li(A0, (long)b);
2370 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2371 ld(T9, AT, 0);
2372 jalr(T9);
2373 delayed()->nop();
2374 popad();
2375 #else
2376 // Pass register number to verify_oop_subroutine
2377 sw(T0, SP, - wordSize);
2378 sw(T1, SP, - 2*wordSize);
2379 sw(RA, SP, - 3*wordSize);
2380 sw(A0, SP ,- 4*wordSize);
2381 sw(A1, SP ,- 5*wordSize);
2382 sw(AT, SP ,- 6*wordSize);
2383 sw(T9, SP ,- 7*wordSize);
2384 addiu(SP, SP, - 7 * wordSize);
2385 move(A1, reg);
2386 li(A0, (long)b);
2387 // call indirectly to solve generation ordering problem
2388 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2389 lw(T9, AT, 0);
2390 jalr(T9);
2391 delayed()->nop();
2392 lw(T0, SP, 6* wordSize);
2393 lw(T1, SP, 5* wordSize);
2394 lw(RA, SP, 4* wordSize);
2395 lw(A0, SP, 3* wordSize);
2396 lw(A1, SP, 2* wordSize);
2397 lw(AT, SP, 1* wordSize);
2398 lw(T9, SP, 0* wordSize);
2399 addiu(SP, SP, 7 * wordSize);
2400 #endif
2401 }
2404 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2405 if (!VerifyOops) {
2406 nop();
2407 return;
2408 }
2409 // Pass register number to verify_oop_subroutine
2410 const char * b = NULL;
2411 stringStream ss;
2412 ss.print("verify_oop_addr: %s", s);
2413 b = code_string(ss.as_string());
2415 st_ptr(T0, SP, - wordSize);
2416 st_ptr(T1, SP, - 2*wordSize);
2417 st_ptr(RA, SP, - 3*wordSize);
2418 st_ptr(A0, SP, - 4*wordSize);
2419 st_ptr(A1, SP, - 5*wordSize);
2420 st_ptr(AT, SP, - 6*wordSize);
2421 st_ptr(T9, SP, - 7*wordSize);
2422 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2423 addiu(SP, SP, - 7 * wordSize);
2425 li(A0, (long)b);
2426 // call indirectly to solve generation ordering problem
2427 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2428 ld_ptr(T9, AT, 0);
2429 jalr(T9);
2430 delayed()->nop();
2431 ld_ptr(T0, SP, 6* wordSize);
2432 ld_ptr(T1, SP, 5* wordSize);
2433 ld_ptr(RA, SP, 4* wordSize);
2434 ld_ptr(A0, SP, 3* wordSize);
2435 ld_ptr(A1, SP, 2* wordSize);
2436 ld_ptr(AT, SP, 1* wordSize);
2437 ld_ptr(T9, SP, 0* wordSize);
2438 addiu(SP, SP, 7 * wordSize);
2439 }
2441 // used registers : T0, T1
2442 void MacroAssembler::verify_oop_subroutine() {
2443 // RA: ra
2444 // A0: char* error message
2445 // A1: oop object to verify
2447 Label exit, error;
2448 // increment counter
2449 li(T0, (long)StubRoutines::verify_oop_count_addr());
2450 lw(AT, T0, 0);
2451 #ifdef _LP64
2452 daddi(AT, AT, 1);
2453 #else
2454 addi(AT, AT, 1);
2455 #endif
2456 sw(AT, T0, 0);
2458 // make sure object is 'reasonable'
2459 beq(A1, R0, exit); // if obj is NULL it is ok
2460 delayed()->nop();
2462 // Check if the oop is in the right area of memory
2463 //const int oop_mask = Universe::verify_oop_mask();
2464 //const int oop_bits = Universe::verify_oop_bits();
2465 const uintptr_t oop_mask = Universe::verify_oop_mask();
2466 const uintptr_t oop_bits = Universe::verify_oop_bits();
2467 li(AT, oop_mask);
2468 andr(T0, A1, AT);
2469 li(AT, oop_bits);
2470 bne(T0, AT, error);
2471 delayed()->nop();
2473 // make sure klass is 'reasonable'
2474 //add for compressedoops
2475 reinit_heapbase();
2476 //add for compressedoops
2477 load_klass(T0, A1);
2478 beq(T0, R0, error); // if klass is NULL it is broken
2479 delayed()->nop();
2480 #if 0
2481 //FIXME:wuhui.
2482 // Check if the klass is in the right area of memory
2483 //const int klass_mask = Universe::verify_klass_mask();
2484 //const int klass_bits = Universe::verify_klass_bits();
2485 const uintptr_t klass_mask = Universe::verify_klass_mask();
2486 const uintptr_t klass_bits = Universe::verify_klass_bits();
2488 li(AT, klass_mask);
2489 andr(T1, T0, AT);
2490 li(AT, klass_bits);
2491 bne(T1, AT, error);
2492 delayed()->nop();
2493 // make sure klass' klass is 'reasonable'
2494 //add for compressedoops
2495 load_klass(T0, T0);
2496 beq(T0, R0, error); // if klass' klass is NULL it is broken
2497 delayed()->nop();
2499 li(AT, klass_mask);
2500 andr(T1, T0, AT);
2501 li(AT, klass_bits);
2502 bne(T1, AT, error);
2503 delayed()->nop(); // if klass not in right area of memory it is broken too.
2504 #endif
2505 // return if everything seems ok
2506 bind(exit);
2508 jr(RA);
2509 delayed()->nop();
2511 // handle errors
2512 bind(error);
2513 pushad();
2514 #ifndef _LP64
2515 addi(SP, SP, (-1) * wordSize);
2516 #endif
2517 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2518 delayed()->nop();
2519 #ifndef _LP64
2520 addiu(SP, SP, 1 * wordSize);
2521 #endif
2522 popad();
2523 jr(RA);
2524 delayed()->nop();
2525 }
2527 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2528 #ifdef ASSERT
2529 assert_different_registers(t1, t2, AT);
2530 if (UseTLAB && VerifyOops) {
2531 Label next, ok;
2533 get_thread(t1);
2535 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2536 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2537 sltu(AT, t2, AT);
2538 beq(AT, R0, next);
2539 delayed()->nop();
2541 stop("assert(top >= start)");
2543 bind(next);
2544 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2545 sltu(AT, AT, t2);
2546 beq(AT, R0, ok);
2547 delayed()->nop();
2549 stop("assert(top <= end)");
2551 bind(ok);
2553 }
2554 #endif
2555 }
2556 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2557 Register tmp,
2558 int offset) {
2559 intptr_t value = *delayed_value_addr;
2560 if (value != 0)
2561 return RegisterOrConstant(value + offset);
2562 AddressLiteral a(delayed_value_addr);
2563 // load indirectly to solve generation ordering problem
2564 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2565 //ld(tmp, a);
2566 if (offset != 0)
2567 daddi(tmp,tmp, offset);
2569 return RegisterOrConstant(tmp);
2570 }
2572 void MacroAssembler::hswap(Register reg) {
2573 //short
2574 //andi(reg, reg, 0xffff);
2575 srl(AT, reg, 8);
2576 sll(reg, reg, 24);
2577 sra(reg, reg, 16);
2578 orr(reg, reg, AT);
2579 }
2581 void MacroAssembler::huswap(Register reg) {
2582 #ifdef _LP64
2583 dsrl(AT, reg, 8);
2584 dsll(reg, reg, 24);
2585 dsrl(reg, reg, 16);
2586 orr(reg, reg, AT);
2587 andi(reg, reg, 0xffff);
2588 #else
2589 //andi(reg, reg, 0xffff);
2590 srl(AT, reg, 8);
2591 sll(reg, reg, 24);
2592 srl(reg, reg, 16);
2593 orr(reg, reg, AT);
2594 #endif
2595 }
2597 // something funny to do this will only one more register AT
2598 // 32 bits
2599 void MacroAssembler::swap(Register reg) {
2600 srl(AT, reg, 8);
2601 sll(reg, reg, 24);
2602 orr(reg, reg, AT);
2603 //reg : 4 1 2 3
2604 srl(AT, AT, 16);
2605 xorr(AT, AT, reg);
2606 andi(AT, AT, 0xff);
2607 //AT : 0 0 0 1^3);
2608 xorr(reg, reg, AT);
2609 //reg : 4 1 2 1
2610 sll(AT, AT, 16);
2611 xorr(reg, reg, AT);
2612 //reg : 4 3 2 1
2613 }
2615 #ifdef _LP64
2617 /* do 32-bit CAS using MIPS64 lld/scd
2619 cas_int should only compare 32-bits of the memory value.
2620 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2621 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2622 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2623 plus the high-32 bits or memory value, are stored togethor with SCD.
2625 Example:
2627 double d = 3.1415926;
2628 System.err.println("hello" + d);
2630 sun.misc.FloatingDecimal$1.<init>()
2631 |
2632 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2634 38 cas_int [a7a7|J] [a0|I] [a6|I]
2635 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2636 // a6: 0x4ab325aa
2638 again:
2639 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2641 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2642 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2643 0x00000055647f3c68: dsll32 t8, t8, 0
2644 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2645 0x00000055647f3c70: sll zero, zero, 0
2647 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2648 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2649 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2650 0x00000055647f3c80: and v1, a6, v1
2651 0x00000055647f3c84: or at, t8, v1
2652 0x00000055647f3c88: scd at, 0x0(a7)
2653 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2654 0x00000055647f3c90: sll zero, zero, 0
2655 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2656 0x00000055647f3c98: sll zero, zero, 0
2657 nequal:
2658 0x00000055647f45a4: dadd a0, t9, zero
2659 0x00000055647f45a8: dadd at, zero, zero
2660 done:
2661 */
2663 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2664 /* MIPS64 can use ll/sc for 32-bit atomic memory access */
2665 Label done, again, nequal;
2667 bind(again);
2669 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2670 ll(AT, dest);
2671 bne(AT, c_reg, nequal);
2672 delayed()->nop();
2674 move(AT, x_reg);
2675 sc(AT, dest);
2676 beq(AT, R0, again);
2677 delayed()->nop();
2678 b(done);
2679 delayed()->nop();
2681 // not xchged
2682 bind(nequal);
2683 sync();
2684 move(c_reg, AT);
2685 move(AT, R0);
2687 bind(done);
2688 }
2689 #endif // cmpxchg32
2691 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2692 Label done, again, nequal;
2694 bind(again);
2695 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2696 #ifdef _LP64
2697 lld(AT, dest);
2698 #else
2699 ll(AT, dest);
2700 #endif
2701 bne(AT, c_reg, nequal);
2702 delayed()->nop();
2704 move(AT, x_reg);
2705 #ifdef _LP64
2706 scd(AT, dest);
2707 #else
2708 sc(AT, dest);
2709 #endif
2710 beq(AT, R0, again);
2711 delayed()->nop();
2712 b(done);
2713 delayed()->nop();
2715 // not xchged
2716 bind(nequal);
2717 sync();
2718 move(c_reg, AT);
2719 move(AT, R0);
2721 bind(done);
2722 }
2724 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2725 Label done, again, nequal;
2727 Register x_reg = x_regLo;
2728 dsll32(x_regHi, x_regHi, 0);
2729 dsll32(x_regLo, x_regLo, 0);
2730 dsrl32(x_regLo, x_regLo, 0);
2731 orr(x_reg, x_regLo, x_regHi);
2733 Register c_reg = c_regLo;
2734 dsll32(c_regHi, c_regHi, 0);
2735 dsll32(c_regLo, c_regLo, 0);
2736 dsrl32(c_regLo, c_regLo, 0);
2737 orr(c_reg, c_regLo, c_regHi);
2739 bind(again);
2741 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2742 lld(AT, dest);
2743 bne(AT, c_reg, nequal);
2744 delayed()->nop();
2746 //move(AT, x_reg);
2747 dadd(AT, x_reg, R0);
2748 scd(AT, dest);
2749 beq(AT, R0, again);
2750 delayed()->nop();
2751 b(done);
2752 delayed()->nop();
2754 // not xchged
2755 bind(nequal);
2756 sync();
2757 //move(c_reg, AT);
2758 //move(AT, R0);
2759 dadd(c_reg, AT, R0);
2760 dadd(AT, R0, R0);
2761 bind(done);
2762 }
2764 // be sure the three register is different
2765 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2766 assert_different_registers(tmp, fs, ft);
2767 div_s(tmp, fs, ft);
2768 trunc_l_s(tmp, tmp);
2769 cvt_s_l(tmp, tmp);
2770 mul_s(tmp, tmp, ft);
2771 sub_s(fd, fs, tmp);
2772 }
2774 // be sure the three register is different
2775 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2776 assert_different_registers(tmp, fs, ft);
2777 div_d(tmp, fs, ft);
2778 trunc_l_d(tmp, tmp);
2779 cvt_d_l(tmp, tmp);
2780 mul_d(tmp, tmp, ft);
2781 sub_d(fd, fs, tmp);
2782 }
2784 // Fast_Lock and Fast_Unlock used by C2
2786 // Because the transitions from emitted code to the runtime
2787 // monitorenter/exit helper stubs are so slow it's critical that
2788 // we inline both the stack-locking fast-path and the inflated fast path.
2789 //
2790 // See also: cmpFastLock and cmpFastUnlock.
2791 //
2792 // What follows is a specialized inline transliteration of the code
2793 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2794 // another option would be to emit TrySlowEnter and TrySlowExit methods
2795 // at startup-time. These methods would accept arguments as
2796 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2797 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2798 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2799 // In practice, however, the # of lock sites is bounded and is usually small.
2800 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2801 // if the processor uses simple bimodal branch predictors keyed by EIP
2802 // Since the helper routines would be called from multiple synchronization
2803 // sites.
2804 //
2805 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2806 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2807 // to those specialized methods. That'd give us a mostly platform-independent
2808 // implementation that the JITs could optimize and inline at their pleasure.
2809 // Done correctly, the only time we'd need to cross to native could would be
2810 // to park() or unpark() threads. We'd also need a few more unsafe operators
2811 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2812 // (b) explicit barriers or fence operations.
2813 //
2814 // TODO:
2815 //
2816 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2817 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2818 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2819 // the lock operators would typically be faster than reifying Self.
2820 //
2821 // * Ideally I'd define the primitives as:
2822 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2823 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2824 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2825 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2826 // Furthermore the register assignments are overconstrained, possibly resulting in
2827 // sub-optimal code near the synchronization site.
2828 //
2829 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2830 // Alternately, use a better sp-proximity test.
2831 //
2832 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2833 // Either one is sufficient to uniquely identify a thread.
2834 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2835 //
2836 // * Intrinsify notify() and notifyAll() for the common cases where the
2837 // object is locked by the calling thread but the waitlist is empty.
2838 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2839 //
2840 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2841 // But beware of excessive branch density on AMD Opterons.
2842 //
2843 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2844 // or failure of the fast-path. If the fast-path fails then we pass
2845 // control to the slow-path, typically in C. In Fast_Lock and
2846 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2847 // will emit a conditional branch immediately after the node.
2848 // So we have branches to branches and lots of ICC.ZF games.
2849 // Instead, it might be better to have C2 pass a "FailureLabel"
2850 // into Fast_Lock and Fast_Unlock. In the case of success, control
2851 // will drop through the node. ICC.ZF is undefined at exit.
2852 // In the case of failure, the node will branch directly to the
2853 // FailureLabel
2856 // obj: object to lock
2857 // box: on-stack box address (displaced header location) - KILLED
2858 // rax,: tmp -- KILLED
2859 // scr: tmp -- KILLED
2860 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2862 // Ensure the register assignents are disjoint
2863 guarantee (objReg != boxReg, "") ;
2864 guarantee (objReg != tmpReg, "") ;
2865 guarantee (objReg != scrReg, "") ;
2866 guarantee (boxReg != tmpReg, "") ;
2867 guarantee (boxReg != scrReg, "") ;
2870 block_comment("FastLock");
2871 /*
2872 move(AT, 0x0);
2873 return;
2874 */
2875 if (PrintBiasedLockingStatistics) {
2876 push(tmpReg);
2877 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2878 pop(tmpReg);
2879 }
2881 if (EmitSync & 1) {
2882 move(AT, 0x0);
2883 return;
2884 } else
2885 if (EmitSync & 2) {
2886 Label DONE_LABEL ;
2887 if (UseBiasedLocking) {
2888 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2889 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2890 }
2892 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2893 ori(tmpReg, tmpReg, 0x1);
2894 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2896 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2897 bne(AT, R0, DONE_LABEL);
2898 delayed()->nop();
2900 // Recursive locking
2901 dsubu(tmpReg, tmpReg, SP);
2902 li(AT, (7 - os::vm_page_size() ));
2903 andr(tmpReg, tmpReg, AT);
2904 sd(tmpReg, Address(boxReg, 0));
2905 bind(DONE_LABEL) ;
2906 } else {
2907 // Possible cases that we'll encounter in fast_lock
2908 // ------------------------------------------------
2909 // * Inflated
2910 // -- unlocked
2911 // -- Locked
2912 // = by self
2913 // = by other
2914 // * biased
2915 // -- by Self
2916 // -- by other
2917 // * neutral
2918 // * stack-locked
2919 // -- by self
2920 // = sp-proximity test hits
2921 // = sp-proximity test generates false-negative
2922 // -- by other
2923 //
2925 Label IsInflated, DONE_LABEL, PopDone ;
2927 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2928 // order to reduce the number of conditional branches in the most common cases.
2929 // Beware -- there's a subtle invariant that fetch of the markword
2930 // at [FETCH], below, will never observe a biased encoding (*101b).
2931 // If this invariant is not held we risk exclusion (safety) failure.
2932 if (UseBiasedLocking && !UseOptoBiasInlining) {
2933 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2934 }
2936 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2937 andi(AT, tmpReg, markOopDesc::monitor_value);
2938 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2939 delayed()->nop();
2941 // Attempt stack-locking ...
2942 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2943 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2944 //if (os::is_MP()) {
2945 // sync();
2946 //}
2948 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2949 //AT == 1: unlocked
2951 if (PrintBiasedLockingStatistics) {
2952 Label L;
2953 beq(AT, R0, L);
2954 delayed()->nop();
2955 push(T0);
2956 push(T1);
2957 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2958 pop(T1);
2959 pop(T0);
2960 bind(L);
2961 }
2962 bne(AT, R0, DONE_LABEL);
2963 delayed()->nop();
2965 // Recursive locking
2966 // The object is stack-locked: markword contains stack pointer to BasicLock.
2967 // Locked by current thread if difference with current SP is less than one page.
2968 dsubu(tmpReg, tmpReg, SP);
2969 li(AT, 7 - os::vm_page_size() );
2970 andr(tmpReg, tmpReg, AT);
2971 sd(tmpReg, Address(boxReg, 0));
2972 if (PrintBiasedLockingStatistics) {
2973 Label L;
2974 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2975 bne(tmpReg, R0, L);
2976 delayed()->nop();
2977 push(T0);
2978 push(T1);
2979 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2980 pop(T1);
2981 pop(T0);
2982 bind(L);
2983 }
2984 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
2986 b(DONE_LABEL) ;
2987 delayed()->nop();
2989 bind(IsInflated) ;
2990 // The object's monitor m is unlocked iff m->owner == NULL,
2991 // otherwise m->owner may contain a thread or a stack address.
2993 // TODO: someday avoid the ST-before-CAS penalty by
2994 // relocating (deferring) the following ST.
2995 // We should also think about trying a CAS without having
2996 // fetched _owner. If the CAS is successful we may
2997 // avoid an RTO->RTS upgrade on the $line.
2998 // Without cast to int32_t a movptr will destroy r10 which is typically obj
2999 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3000 sd(AT, Address(boxReg, 0));
3002 move(boxReg, tmpReg) ;
3003 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3004 // if (m->owner != 0) => AT = 0, goto slow path.
3005 move(AT, R0);
3006 bne(tmpReg, R0, DONE_LABEL);
3007 delayed()->nop();
3009 #ifndef OPT_THREAD
3010 get_thread (TREG) ;
3011 #endif
3012 // It's inflated and appears unlocked
3013 //if (os::is_MP()) {
3014 // sync();
3015 //}
3016 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
3017 // Intentional fall-through into DONE_LABEL ...
3020 // DONE_LABEL is a hot target - we'd really like to place it at the
3021 // start of cache line by padding with NOPs.
3022 // See the AMD and Intel software optimization manuals for the
3023 // most efficient "long" NOP encodings.
3024 // Unfortunately none of our alignment mechanisms suffice.
3025 bind(DONE_LABEL);
3027 // At DONE_LABEL the AT is set as follows ...
3028 // Fast_Unlock uses the same protocol.
3029 // AT == 1 -> Success
3030 // AT == 0 -> Failure - force control through the slow-path
3032 // Avoid branch-to-branch on AMD processors
3033 // This appears to be superstition.
3034 if (EmitSync & 32) nop() ;
3036 }
3037 }
3039 // obj: object to unlock
3040 // box: box address (displaced header location), killed. Must be EAX.
3041 // rbx,: killed tmp; cannot be obj nor box.
3042 //
3043 // Some commentary on balanced locking:
3044 //
3045 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3046 // Methods that don't have provably balanced locking are forced to run in the
3047 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3048 // The interpreter provides two properties:
3049 // I1: At return-time the interpreter automatically and quietly unlocks any
3050 // objects acquired the current activation (frame). Recall that the
3051 // interpreter maintains an on-stack list of locks currently held by
3052 // a frame.
3053 // I2: If a method attempts to unlock an object that is not held by the
3054 // the frame the interpreter throws IMSX.
3055 //
3056 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3057 // B() doesn't have provably balanced locking so it runs in the interpreter.
3058 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3059 // is still locked by A().
3060 //
3061 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3062 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3063 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3064 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3066 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3068 guarantee (objReg != boxReg, "") ;
3069 guarantee (objReg != tmpReg, "") ;
3070 guarantee (boxReg != tmpReg, "") ;
3074 block_comment("FastUnlock");
3077 if (EmitSync & 4) {
3078 // Disable - inhibit all inlining. Force control through the slow-path
3079 move(AT, 0x0);
3080 return;
3081 } else
3082 if (EmitSync & 8) {
3083 Label DONE_LABEL ;
3084 if (UseBiasedLocking) {
3085 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3086 }
3087 // classic stack-locking code ...
3088 ld(tmpReg, Address(boxReg, 0)) ;
3089 beq(tmpReg, R0, DONE_LABEL) ;
3090 move(AT, 0x1); // delay slot
3092 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3093 bind(DONE_LABEL);
3094 } else {
3095 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3097 // Critically, the biased locking test must have precedence over
3098 // and appear before the (box->dhw == 0) recursive stack-lock test.
3099 if (UseBiasedLocking && !UseOptoBiasInlining) {
3100 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3101 }
3103 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3104 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3105 delayed()->daddiu(AT, R0, 0x1);
3107 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3108 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3109 beq(AT, R0, Stacked) ; // Inflated?
3110 delayed()->nop();
3112 bind(Inflated) ;
3113 // It's inflated.
3114 // Despite our balanced locking property we still check that m->_owner == Self
3115 // as java routines or native JNI code called by this thread might
3116 // have released the lock.
3117 // Refer to the comments in synchronizer.cpp for how we might encode extra
3118 // state in _succ so we can avoid fetching EntryList|cxq.
3119 //
3120 // I'd like to add more cases in fast_lock() and fast_unlock() --
3121 // such as recursive enter and exit -- but we have to be wary of
3122 // I$ bloat, T$ effects and BP$ effects.
3123 //
3124 // If there's no contention try a 1-0 exit. That is, exit without
3125 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3126 // we detect and recover from the race that the 1-0 exit admits.
3127 //
3128 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3129 // before it STs null into _owner, releasing the lock. Updates
3130 // to data protected by the critical section must be visible before
3131 // we drop the lock (and thus before any other thread could acquire
3132 // the lock and observe the fields protected by the lock).
3133 // IA32's memory-model is SPO, so STs are ordered with respect to
3134 // each other and there's no need for an explicit barrier (fence).
3135 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3136 #ifndef OPT_THREAD
3137 get_thread (TREG) ;
3138 #endif
3140 // It's inflated
3141 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3142 xorr(boxReg, boxReg, TREG);
3144 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3145 orr(boxReg, boxReg, AT);
3147 move(AT, R0);
3148 bne(boxReg, R0, DONE_LABEL);
3149 delayed()->nop();
3151 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3152 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3153 orr(boxReg, boxReg, AT);
3155 move(AT, R0);
3156 bne(boxReg, R0, DONE_LABEL);
3157 delayed()->nop();
3159 sync();
3160 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3161 move(AT, 0x1);
3162 b(DONE_LABEL);
3163 delayed()->nop();
3165 bind (Stacked);
3166 ld(tmpReg, Address(boxReg, 0)) ;
3167 //if (os::is_MP()) { sync(); }
3168 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3170 if (EmitSync & 65536) {
3171 bind (CheckSucc);
3172 }
3174 bind(DONE_LABEL);
3176 // Avoid branch to branch on AMD processors
3177 if (EmitSync & 32768) { nop() ; }
3178 }
3179 }
3181 void MacroAssembler::align(int modulus) {
3182 while (offset() % modulus != 0) nop();
3183 }
3186 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3187 //Unimplemented();
3188 }
3190 #ifdef _LP64
3191 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3192 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3194 //In MIPS64, F0~23 are all caller-saved registers
3195 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3196 #else
3197 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3198 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3200 Register caller_saved_fpu_registers[] = {};
3201 #endif
3203 //We preserve all caller-saved register
3204 void MacroAssembler::pushad(){
3205 int i;
3207 /* Fixed-point registers */
3208 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3209 daddi(SP, SP, -1 * len * wordSize);
3210 for (i = 0; i < len; i++)
3211 {
3212 #ifdef _LP64
3213 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3214 #else
3215 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3216 #endif
3217 }
3219 /* Floating-point registers */
3220 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3221 daddi(SP, SP, -1 * len * wordSize);
3222 for (i = 0; i < len; i++)
3223 {
3224 #ifdef _LP64
3225 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3226 #else
3227 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3228 #endif
3229 }
3230 };
3232 void MacroAssembler::popad(){
3233 int i;
3235 /* Floating-point registers */
3236 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3237 for (i = 0; i < len; i++)
3238 {
3239 #ifdef _LP64
3240 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3241 #else
3242 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3243 #endif
3244 }
3245 daddi(SP, SP, len * wordSize);
3247 /* Fixed-point registers */
3248 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3249 for (i = 0; i < len; i++)
3250 {
3251 #ifdef _LP64
3252 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3253 #else
3254 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3255 #endif
3256 }
3257 daddi(SP, SP, len * wordSize);
3258 };
3260 // We preserve all caller-saved register except V0
3261 void MacroAssembler::pushad_except_v0() {
3262 int i;
3264 /* Fixed-point registers */
3265 int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
3266 daddi(SP, SP, -1 * len * wordSize);
3267 for (i = 0; i < len; i++) {
3268 #ifdef _LP64
3269 sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3270 #else
3271 sw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3272 #endif
3273 }
3275 /* Floating-point registers */
3276 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3277 daddi(SP, SP, -1 * len * wordSize);
3278 for (i = 0; i < len; i++) {
3279 #ifdef _LP64
3280 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3281 #else
3282 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3283 #endif
3284 }
3285 }
3287 void MacroAssembler::popad_except_v0() {
3288 int i;
3290 /* Floating-point registers */
3291 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3292 for (i = 0; i < len; i++) {
3293 #ifdef _LP64
3294 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3295 #else
3296 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3297 #endif
3298 }
3299 daddi(SP, SP, len * wordSize);
3301 /* Fixed-point registers */
3302 len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
3303 for (i = 0; i < len; i++) {
3304 #ifdef _LP64
3305 ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3306 #else
3307 lw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3308 #endif
3309 }
3310 daddi(SP, SP, len * wordSize);
3311 }
3313 void MacroAssembler::push2(Register reg1, Register reg2) {
3314 #ifdef _LP64
3315 daddi(SP, SP, -16);
3316 sd(reg2, SP, 0);
3317 sd(reg1, SP, 8);
3318 #else
3319 addi(SP, SP, -8);
3320 sw(reg2, SP, 0);
3321 sw(reg1, SP, 4);
3322 #endif
3323 }
3325 void MacroAssembler::pop2(Register reg1, Register reg2) {
3326 #ifdef _LP64
3327 ld(reg1, SP, 0);
3328 ld(reg2, SP, 8);
3329 daddi(SP, SP, 16);
3330 #else
3331 lw(reg1, SP, 0);
3332 lw(reg2, SP, 4);
3333 addi(SP, SP, 8);
3334 #endif
3335 }
3337 //for UseCompressedOops Option
3338 void MacroAssembler::load_klass(Register dst, Register src) {
3339 #ifdef _LP64
3340 if(UseCompressedClassPointers){
3341 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3342 decode_klass_not_null(dst);
3343 } else
3344 #endif
3345 ld(dst, src, oopDesc::klass_offset_in_bytes());
3346 }
3348 void MacroAssembler::store_klass(Register dst, Register src) {
3349 #ifdef _LP64
3350 if(UseCompressedClassPointers){
3351 encode_klass_not_null(src);
3352 sw(src, dst, oopDesc::klass_offset_in_bytes());
3353 } else {
3354 #endif
3355 sd(src, dst, oopDesc::klass_offset_in_bytes());
3356 }
3357 }
3359 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3360 load_klass(dst, src);
3361 ld(dst, Address(dst, Klass::prototype_header_offset()));
3362 }
3364 #ifdef _LP64
3365 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3366 if (UseCompressedClassPointers) {
3367 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3368 }
3369 }
3371 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3372 if(UseCompressedOops){
3373 lwu(dst, src);
3374 decode_heap_oop(dst);
3375 } else {
3376 ld(dst, src);
3377 }
3378 }
3380 void MacroAssembler::store_heap_oop(Address dst, Register src){
3381 if(UseCompressedOops){
3382 assert(!dst.uses(src), "not enough registers");
3383 encode_heap_oop(src);
3384 sw(src, dst);
3385 } else {
3386 sd(src, dst);
3387 }
3388 }
3390 void MacroAssembler::store_heap_oop_null(Address dst){
3391 if(UseCompressedOops){
3392 sw(R0, dst);
3393 } else {
3394 sd(R0, dst);
3395 }
3396 }
3398 #ifdef ASSERT
3399 void MacroAssembler::verify_heapbase(const char* msg) {
3400 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3401 assert (Universe::heap() != NULL, "java heap should be initialized");
3402 }
3403 #endif
3406 // Algorithm must match oop.inline.hpp encode_heap_oop.
3407 void MacroAssembler::encode_heap_oop(Register r) {
3408 #ifdef ASSERT
3409 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3410 #endif
3411 verify_oop(r, "broken oop in encode_heap_oop");
3412 if (Universe::narrow_oop_base() == NULL) {
3413 if (Universe::narrow_oop_shift() != 0) {
3414 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3415 shr(r, LogMinObjAlignmentInBytes);
3416 }
3417 return;
3418 }
3420 movz(r, S5_heapbase, r);
3421 dsub(r, r, S5_heapbase);
3422 if (Universe::narrow_oop_shift() != 0) {
3423 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3424 shr(r, LogMinObjAlignmentInBytes);
3425 }
3426 }
3428 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3429 #ifdef ASSERT
3430 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3431 #endif
3432 verify_oop(src, "broken oop in encode_heap_oop");
3433 if (Universe::narrow_oop_base() == NULL) {
3434 if (Universe::narrow_oop_shift() != 0) {
3435 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3436 dsrl(dst, src, LogMinObjAlignmentInBytes);
3437 } else {
3438 if (dst != src) move(dst, src);
3439 }
3440 } else {
3441 if (dst == src) {
3442 movz(dst, S5_heapbase, dst);
3443 dsub(dst, dst, S5_heapbase);
3444 if (Universe::narrow_oop_shift() != 0) {
3445 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3446 shr(dst, LogMinObjAlignmentInBytes);
3447 }
3448 } else {
3449 dsub(dst, src, S5_heapbase);
3450 if (Universe::narrow_oop_shift() != 0) {
3451 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3452 shr(dst, LogMinObjAlignmentInBytes);
3453 }
3454 movz(dst, R0, src);
3455 }
3456 }
3457 }
3459 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3460 assert (UseCompressedOops, "should be compressed");
3461 #ifdef ASSERT
3462 if (CheckCompressedOops) {
3463 Label ok;
3464 bne(r, R0, ok);
3465 delayed()->nop();
3466 stop("null oop passed to encode_heap_oop_not_null");
3467 bind(ok);
3468 }
3469 #endif
3470 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3471 if (Universe::narrow_oop_base() != NULL) {
3472 dsub(r, r, S5_heapbase);
3473 }
3474 if (Universe::narrow_oop_shift() != 0) {
3475 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3476 shr(r, LogMinObjAlignmentInBytes);
3477 }
3479 }
3481 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3482 assert (UseCompressedOops, "should be compressed");
3483 #ifdef ASSERT
3484 if (CheckCompressedOops) {
3485 Label ok;
3486 bne(src, R0, ok);
3487 delayed()->nop();
3488 stop("null oop passed to encode_heap_oop_not_null2");
3489 bind(ok);
3490 }
3491 #endif
3492 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3494 if (Universe::narrow_oop_base() != NULL) {
3495 dsub(dst, src, S5_heapbase);
3496 if (Universe::narrow_oop_shift() != 0) {
3497 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3498 shr(dst, LogMinObjAlignmentInBytes);
3499 }
3500 } else {
3501 if (Universe::narrow_oop_shift() != 0) {
3502 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3503 dsrl(dst, src, LogMinObjAlignmentInBytes);
3504 } else {
3505 if (dst != src) move(dst, src);
3506 }
3507 }
3508 }
3510 void MacroAssembler::decode_heap_oop(Register r) {
3511 #ifdef ASSERT
3512 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3513 #endif
3514 if (Universe::narrow_oop_base() == NULL) {
3515 if (Universe::narrow_oop_shift() != 0) {
3516 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3517 shl(r, LogMinObjAlignmentInBytes);
3518 }
3519 } else {
3520 move(AT, r);
3521 if (Universe::narrow_oop_shift() != 0) {
3522 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3523 shl(r, LogMinObjAlignmentInBytes);
3524 }
3525 dadd(r, r, S5_heapbase);
3526 movz(r, R0, AT);
3527 }
3528 verify_oop(r, "broken oop in decode_heap_oop");
3529 }
3531 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3532 #ifdef ASSERT
3533 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3534 #endif
3535 if (Universe::narrow_oop_base() == NULL) {
3536 if (Universe::narrow_oop_shift() != 0) {
3537 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3538 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3539 dsll(dst, src, LogMinObjAlignmentInBytes);
3540 } else {
3541 if (dst != src) move(dst, src);
3542 }
3543 } else {
3544 if (dst == src) {
3545 move(AT, dst);
3546 if (Universe::narrow_oop_shift() != 0) {
3547 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3548 shl(dst, LogMinObjAlignmentInBytes);
3549 }
3550 dadd(dst, dst, S5_heapbase);
3551 movz(dst, R0, AT);
3552 } else {
3553 if (Universe::narrow_oop_shift() != 0) {
3554 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3555 dsll(dst, src, LogMinObjAlignmentInBytes);
3556 daddu(dst, dst, S5_heapbase);
3557 } else {
3558 daddu(dst, src, S5_heapbase);
3559 }
3560 movz(dst, R0, src);
3561 }
3562 }
3563 verify_oop(dst, "broken oop in decode_heap_oop");
3564 }
3566 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3567 // Note: it will change flags
3568 assert (UseCompressedOops, "should only be used for compressed headers");
3569 assert (Universe::heap() != NULL, "java heap should be initialized");
3570 // Cannot assert, unverified entry point counts instructions (see .ad file)
3571 // vtableStubs also counts instructions in pd_code_size_limit.
3572 // Also do not verify_oop as this is called by verify_oop.
3573 if (Universe::narrow_oop_shift() != 0) {
3574 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3575 shl(r, LogMinObjAlignmentInBytes);
3576 if (Universe::narrow_oop_base() != NULL) {
3577 daddu(r, r, S5_heapbase);
3578 }
3579 } else {
3580 assert (Universe::narrow_oop_base() == NULL, "sanity");
3581 }
3582 }
3584 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3585 assert (UseCompressedOops, "should only be used for compressed headers");
3586 assert (Universe::heap() != NULL, "java heap should be initialized");
3588 // Cannot assert, unverified entry point counts instructions (see .ad file)
3589 // vtableStubs also counts instructions in pd_code_size_limit.
3590 // Also do not verify_oop as this is called by verify_oop.
3591 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3592 if (Universe::narrow_oop_shift() != 0) {
3593 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3594 if (LogMinObjAlignmentInBytes == Address::times_8) {
3595 dsll(dst, src, LogMinObjAlignmentInBytes);
3596 daddu(dst, dst, S5_heapbase);
3597 } else {
3598 dsll(dst, src, LogMinObjAlignmentInBytes);
3599 if (Universe::narrow_oop_base() != NULL) {
3600 daddu(dst, dst, S5_heapbase);
3601 }
3602 }
3603 } else {
3604 assert (Universe::narrow_oop_base() == NULL, "sanity");
3605 if (dst != src) {
3606 move(dst, src);
3607 }
3608 }
3609 }
3611 void MacroAssembler::encode_klass_not_null(Register r) {
3612 if (Universe::narrow_klass_base() != NULL) {
3613 assert(r != AT, "Encoding a klass in AT");
3614 set64(AT, (int64_t)Universe::narrow_klass_base());
3615 dsub(r, r, AT);
3616 }
3617 if (Universe::narrow_klass_shift() != 0) {
3618 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3619 shr(r, LogKlassAlignmentInBytes);
3620 }
3621 }
3623 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3624 if (dst == src) {
3625 encode_klass_not_null(src);
3626 } else {
3627 if (Universe::narrow_klass_base() != NULL) {
3628 set64(dst, (int64_t)Universe::narrow_klass_base());
3629 dsub(dst, src, dst);
3630 if (Universe::narrow_klass_shift() != 0) {
3631 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3632 shr(dst, LogKlassAlignmentInBytes);
3633 }
3634 } else {
3635 if (Universe::narrow_klass_shift() != 0) {
3636 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3637 dsrl(dst, src, LogKlassAlignmentInBytes);
3638 } else {
3639 move(dst, src);
3640 }
3641 }
3642 }
3643 }
3645 // Function instr_size_for_decode_klass_not_null() counts the instructions
3646 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3647 // when (Universe::heap() != NULL). Hence, if the instructions they
3648 // generate change, then this method needs to be updated.
3649 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3650 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3651 if (Universe::narrow_klass_base() != NULL) {
3652 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3653 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3654 } else {
3655 // longest load decode klass function, mov64, leaq
3656 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3657 }
3658 }
3660 void MacroAssembler::decode_klass_not_null(Register r) {
3661 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3662 assert(r != AT, "Decoding a klass in AT");
3663 // Cannot assert, unverified entry point counts instructions (see .ad file)
3664 // vtableStubs also counts instructions in pd_code_size_limit.
3665 // Also do not verify_oop as this is called by verify_oop.
3666 if (Universe::narrow_klass_shift() != 0) {
3667 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3668 shl(r, LogKlassAlignmentInBytes);
3669 }
3670 if (Universe::narrow_klass_base() != NULL) {
3671 set64(AT, (int64_t)Universe::narrow_klass_base());
3672 daddu(r, r, AT);
3673 //Not neccessary for MIPS at all.
3674 //reinit_heapbase();
3675 }
3676 }
3678 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3679 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3681 if (dst == src) {
3682 decode_klass_not_null(dst);
3683 } else {
3684 // Cannot assert, unverified entry point counts instructions (see .ad file)
3685 // vtableStubs also counts instructions in pd_code_size_limit.
3686 // Also do not verify_oop as this is called by verify_oop.
3687 set64(dst, (int64_t)Universe::narrow_klass_base());
3688 if (Universe::narrow_klass_shift() != 0) {
3689 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3690 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3691 dsll(AT, src, Address::times_8);
3692 daddu(dst, dst, AT);
3693 } else {
3694 daddu(dst, src, dst);
3695 }
3696 }
3697 }
3699 void MacroAssembler::incrementl(Register reg, int value) {
3700 if (value == min_jint) {
3701 move(AT, value);
3702 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3703 return;
3704 }
3705 if (value < 0) { decrementl(reg, -value); return; }
3706 if (value == 0) { ; return; }
3708 if(Assembler::is_simm16(value)) {
3709 NOT_LP64(addiu(reg, reg, value));
3710 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3711 } else {
3712 move(AT, value);
3713 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3714 }
3715 }
3717 void MacroAssembler::decrementl(Register reg, int value) {
3718 if (value == min_jint) {
3719 move(AT, value);
3720 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3721 return;
3722 }
3723 if (value < 0) { incrementl(reg, -value); return; }
3724 if (value == 0) { ; return; }
3726 if (Assembler::is_simm16(value)) {
3727 NOT_LP64(addiu(reg, reg, -value));
3728 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3729 } else {
3730 move(AT, value);
3731 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3732 }
3733 }
3735 void MacroAssembler::reinit_heapbase() {
3736 if (UseCompressedOops || UseCompressedClassPointers) {
3737 if (Universe::heap() != NULL) {
3738 if (Universe::narrow_oop_base() == NULL) {
3739 move(S5_heapbase, R0);
3740 } else {
3741 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3742 }
3743 } else {
3744 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3745 ld(S5_heapbase, S5_heapbase, 0);
3746 }
3747 }
3748 }
3749 #endif // _LP64
3751 void MacroAssembler::check_klass_subtype(Register sub_klass,
3752 Register super_klass,
3753 Register temp_reg,
3754 Label& L_success) {
3755 //implement ind gen_subtype_check
3756 Label L_failure;
3757 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3758 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3759 bind(L_failure);
3760 }
3762 SkipIfEqual::SkipIfEqual(
3763 MacroAssembler* masm, const bool* flag_addr, bool value) {
3764 _masm = masm;
3765 _masm->li(AT, (address)flag_addr);
3766 _masm->lb(AT,AT,0);
3767 _masm->addi(AT,AT,-value);
3768 _masm->beq(AT,R0,_label);
3769 _masm->delayed()->nop();
3770 }
3771 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3772 Register super_klass,
3773 Register temp_reg,
3774 Label* L_success,
3775 Label* L_failure,
3776 Label* L_slow_path,
3777 RegisterOrConstant super_check_offset) {
3778 assert_different_registers(sub_klass, super_klass, temp_reg);
3779 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3780 if (super_check_offset.is_register()) {
3781 assert_different_registers(sub_klass, super_klass,
3782 super_check_offset.as_register());
3783 } else if (must_load_sco) {
3784 assert(temp_reg != noreg, "supply either a temp or a register offset");
3785 }
3787 Label L_fallthrough;
3788 int label_nulls = 0;
3789 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3790 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3791 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3792 assert(label_nulls <= 1, "at most one NULL in the batch");
3794 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3795 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3796 // If the pointers are equal, we are done (e.g., String[] elements).
3797 // This self-check enables sharing of secondary supertype arrays among
3798 // non-primary types such as array-of-interface. Otherwise, each such
3799 // type would need its own customized SSA.
3800 // We move this check to the front of the fast path because many
3801 // type checks are in fact trivially successful in this manner,
3802 // so we get a nicely predicted branch right at the start of the check.
3803 beq(sub_klass, super_klass, *L_success);
3804 delayed()->nop();
3805 // Check the supertype display:
3806 if (must_load_sco) {
3807 // Positive movl does right thing on LP64.
3808 lwu(temp_reg, super_klass, sco_offset);
3809 super_check_offset = RegisterOrConstant(temp_reg);
3810 }
3811 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3812 daddu(AT, sub_klass, AT);
3813 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3815 // This check has worked decisively for primary supers.
3816 // Secondary supers are sought in the super_cache ('super_cache_addr').
3817 // (Secondary supers are interfaces and very deeply nested subtypes.)
3818 // This works in the same check above because of a tricky aliasing
3819 // between the super_cache and the primary super display elements.
3820 // (The 'super_check_addr' can address either, as the case requires.)
3821 // Note that the cache is updated below if it does not help us find
3822 // what we need immediately.
3823 // So if it was a primary super, we can just fail immediately.
3824 // Otherwise, it's the slow path for us (no success at this point).
3826 if (super_check_offset.is_register()) {
3827 beq(super_klass, AT, *L_success);
3828 delayed()->nop();
3829 addi(AT, super_check_offset.as_register(), -sc_offset);
3830 if (L_failure == &L_fallthrough) {
3831 beq(AT, R0, *L_slow_path);
3832 delayed()->nop();
3833 } else {
3834 bne_far(AT, R0, *L_failure);
3835 delayed()->nop();
3836 b(*L_slow_path);
3837 delayed()->nop();
3838 }
3839 } else if (super_check_offset.as_constant() == sc_offset) {
3840 // Need a slow path; fast failure is impossible.
3841 if (L_slow_path == &L_fallthrough) {
3842 beq(super_klass, AT, *L_success);
3843 delayed()->nop();
3844 } else {
3845 bne(super_klass, AT, *L_slow_path);
3846 delayed()->nop();
3847 b(*L_success);
3848 delayed()->nop();
3849 }
3850 } else {
3851 // No slow path; it's a fast decision.
3852 if (L_failure == &L_fallthrough) {
3853 beq(super_klass, AT, *L_success);
3854 delayed()->nop();
3855 } else {
3856 bne_far(super_klass, AT, *L_failure);
3857 delayed()->nop();
3858 b(*L_success);
3859 delayed()->nop();
3860 }
3861 }
3863 bind(L_fallthrough);
3865 }
3868 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3869 Register super_klass,
3870 Register temp_reg,
3871 Register temp2_reg,
3872 Label* L_success,
3873 Label* L_failure,
3874 bool set_cond_codes) {
3875 assert_different_registers(sub_klass, super_klass, temp_reg);
3876 if (temp2_reg != noreg)
3877 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3878 else
3879 temp2_reg = T9;
3880 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3882 Label L_fallthrough;
3883 int label_nulls = 0;
3884 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3885 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3886 assert(label_nulls <= 1, "at most one NULL in the batch");
3888 // a couple of useful fields in sub_klass:
3889 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3890 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3891 Address secondary_supers_addr(sub_klass, ss_offset);
3892 Address super_cache_addr( sub_klass, sc_offset);
3894 // Do a linear scan of the secondary super-klass chain.
3895 // This code is rarely used, so simplicity is a virtue here.
3896 // The repne_scan instruction uses fixed registers, which we must spill.
3897 // Don't worry too much about pre-existing connections with the input regs.
3899 // Get super_klass value into rax (even if it was in rdi or rcx).
3900 #ifndef PRODUCT
3901 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3902 ExternalAddress pst_counter_addr((address) pst_counter);
3903 NOT_LP64( incrementl(pst_counter_addr) );
3904 #endif //PRODUCT
3906 // We will consult the secondary-super array.
3907 ld(temp_reg, secondary_supers_addr);
3908 // Load the array length. (Positive movl does right thing on LP64.)
3909 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3910 // Skip to start of data.
3911 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3913 // Scan RCX words at [RDI] for an occurrence of RAX.
3914 // Set NZ/Z based on last compare.
3915 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3916 // not change flags (only scas instruction which is repeated sets flags).
3917 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3919 // OpenJDK8 never compresses klass pointers in secondary-super array.
3920 Label Loop, subtype;
3921 bind(Loop);
3922 beq(temp2_reg, R0, *L_failure);
3923 delayed()->nop();
3924 ld(AT, temp_reg, 0);
3925 beq(AT, super_klass, subtype);
3926 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3927 b(Loop);
3928 delayed()->daddi(temp2_reg, temp2_reg, -1);
3930 bind(subtype);
3931 sd(super_klass, super_cache_addr);
3932 if (L_success != &L_fallthrough) {
3933 b(*L_success);
3934 delayed()->nop();
3935 }
3937 // Success. Cache the super we found and proceed in triumph.
3938 #undef IS_A_TEMP
3940 bind(L_fallthrough);
3941 }
3943 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3944 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3945 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3946 verify_oop(oop_result, "broken oop in call_VM_base");
3947 }
3949 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3950 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3951 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3952 }
3954 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3955 int extra_slot_offset) {
3956 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3957 int stackElementSize = Interpreter::stackElementSize;
3958 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3959 #ifdef ASSERT
3960 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3961 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3962 #endif
3963 Register scale_reg = NOREG;
3964 Address::ScaleFactor scale_factor = Address::no_scale;
3965 if (arg_slot.is_constant()) {
3966 offset += arg_slot.as_constant() * stackElementSize;
3967 } else {
3968 scale_reg = arg_slot.as_register();
3969 scale_factor = Address::times_8;
3970 }
3971 // We don't push RA on stack in prepare_invoke.
3972 // offset += wordSize; // return PC is on stack
3973 if(scale_reg==NOREG) return Address(SP, offset);
3974 else {
3975 dsll(scale_reg, scale_reg, scale_factor);
3976 daddu(scale_reg, SP, scale_reg);
3977 return Address(scale_reg, offset);
3978 }
3979 }
3981 SkipIfEqual::~SkipIfEqual() {
3982 _masm->bind(_label);
3983 }
3985 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3986 switch (size_in_bytes) {
3987 #ifndef _LP64
3988 case 8:
3989 assert(dst2 != noreg, "second dest register required");
3990 lw(dst, src);
3991 lw(dst2, src.plus_disp(BytesPerInt));
3992 break;
3993 #else
3994 case 8: ld(dst, src); break;
3995 #endif
3996 case 4: lw(dst, src); break;
3997 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3998 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3999 default: ShouldNotReachHere();
4000 }
4001 }
4003 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
4004 switch (size_in_bytes) {
4005 #ifndef _LP64
4006 case 8:
4007 assert(src2 != noreg, "second source register required");
4008 sw(src, dst);
4009 sw(src2, dst.plus_disp(BytesPerInt));
4010 break;
4011 #else
4012 case 8: sd(src, dst); break;
4013 #endif
4014 case 4: sw(src, dst); break;
4015 case 2: sh(src, dst); break;
4016 case 1: sb(src, dst); break;
4017 default: ShouldNotReachHere();
4018 }
4019 }
4021 // Look up the method for a megamorphic invokeinterface call.
4022 // The target method is determined by <intf_klass, itable_index>.
4023 // The receiver klass is in recv_klass.
4024 // On success, the result will be in method_result, and execution falls through.
4025 // On failure, execution transfers to the given label.
4026 void MacroAssembler::lookup_interface_method(Register recv_klass,
4027 Register intf_klass,
4028 RegisterOrConstant itable_index,
4029 Register method_result,
4030 Register scan_temp,
4031 Label& L_no_such_interface,
4032 bool return_method) {
4033 assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
4034 assert_different_registers(method_result, intf_klass, scan_temp, AT);
4035 assert(recv_klass != method_result || !return_method,
4036 "recv_klass can be destroyed when method isn't needed");
4038 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
4039 "caller must use same register for non-constant itable index as for method");
4041 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
4042 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
4043 int itentry_off = itableMethodEntry::method_offset_in_bytes();
4044 int scan_step = itableOffsetEntry::size() * wordSize;
4045 int vte_size = vtableEntry::size() * wordSize;
4046 Address::ScaleFactor times_vte_scale = Address::times_ptr;
4047 assert(vte_size == wordSize, "else adjust times_vte_scale");
4049 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
4051 // %%% Could store the aligned, prescaled offset in the klassoop.
4052 dsll(scan_temp, scan_temp, times_vte_scale);
4053 daddu(scan_temp, recv_klass, scan_temp);
4054 daddiu(scan_temp, scan_temp, vtable_base);
4055 if (HeapWordsPerLong > 1) {
4056 // Round up to align_object_offset boundary
4057 // see code for InstanceKlass::start_of_itable!
4058 round_to(scan_temp, BytesPerLong);
4059 }
4061 if (return_method) {
4062 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
4063 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
4064 if (itable_index.is_constant()) {
4065 set64(AT, (int)itable_index.is_constant());
4066 dsll(AT, AT, (int)Address::times_ptr);
4067 } else {
4068 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
4069 }
4070 daddu(AT, AT, recv_klass);
4071 daddiu(recv_klass, AT, itentry_off);
4072 }
4074 Label search, found_method;
4076 for (int peel = 1; peel >= 0; peel--) {
4077 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
4079 if (peel) {
4080 beq(intf_klass, method_result, found_method);
4081 delayed()->nop();
4082 } else {
4083 bne(intf_klass, method_result, search);
4084 delayed()->nop();
4085 // (invert the test to fall through to found_method...)
4086 }
4088 if (!peel) break;
4090 bind(search);
4092 // Check that the previous entry is non-null. A null entry means that
4093 // the receiver class doesn't implement the interface, and wasn't the
4094 // same as when the caller was compiled.
4095 beq(method_result, R0, L_no_such_interface);
4096 delayed()->nop();
4097 daddiu(scan_temp, scan_temp, scan_step);
4098 }
4100 bind(found_method);
4102 if (return_method) {
4103 // Got a hit.
4104 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
4105 if(UseLoongsonISA) {
4106 gsldx(method_result, recv_klass, scan_temp, 0);
4107 } else {
4108 daddu(AT, recv_klass, scan_temp);
4109 ld(method_result, AT);
4110 }
4111 }
4112 }
4114 // virtual method calling
4115 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4116 RegisterOrConstant vtable_index,
4117 Register method_result) {
4118 Register tmp = GP;
4119 push(tmp);
4121 if (vtable_index.is_constant()) {
4122 assert_different_registers(recv_klass, method_result, tmp);
4123 } else {
4124 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4125 }
4126 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4127 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4128 /*
4129 Address vtable_entry_addr(recv_klass,
4130 vtable_index, Address::times_ptr,
4131 base + vtableEntry::method_offset_in_bytes());
4132 */
4133 if (vtable_index.is_constant()) {
4134 set64(AT, vtable_index.as_constant());
4135 dsll(AT, AT, (int)Address::times_ptr);
4136 } else {
4137 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4138 }
4139 set64(tmp, base + vtableEntry::method_offset_in_bytes());
4140 daddu(tmp, tmp, AT);
4141 daddu(tmp, tmp, recv_klass);
4142 ld(method_result, tmp, 0);
4144 pop(tmp);
4145 }
4147 void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
4148 switch (type) {
4149 case T_LONG:
4150 st_ptr(src_reg, tmp_reg, disp);
4151 break;
4152 case T_ARRAY:
4153 case T_OBJECT:
4154 if (UseCompressedOops && !wide) {
4155 sw(src_reg, tmp_reg, disp);
4156 } else {
4157 st_ptr(src_reg, tmp_reg, disp);
4158 }
4159 break;
4160 case T_ADDRESS:
4161 st_ptr(src_reg, tmp_reg, disp);
4162 break;
4163 case T_INT:
4164 sw(src_reg, tmp_reg, disp);
4165 break;
4166 case T_CHAR:
4167 case T_SHORT:
4168 sh(src_reg, tmp_reg, disp);
4169 break;
4170 case T_BYTE:
4171 case T_BOOLEAN:
4172 sb(src_reg, tmp_reg, disp);
4173 break;
4174 default:
4175 ShouldNotReachHere();
4176 }
4177 }
4179 void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
4180 Register tmp_reg = T9;
4181 Register index_reg = addr.index();
4182 if (index_reg == NOREG) {
4183 tmp_reg = NOREG;
4184 }
4186 int scale = addr.scale();
4187 if (tmp_reg != NOREG && scale >= 0) {
4188 dsll(tmp_reg, index_reg, scale);
4189 }
4191 int disp = addr.disp();
4192 bool disp_is_simm16 = true;
4193 if (!Assembler::is_simm16(disp)) {
4194 disp_is_simm16 = false;
4195 }
4197 Register base_reg = addr.base();
4198 if (tmp_reg != NOREG) {
4199 assert_different_registers(tmp_reg, base_reg, index_reg);
4200 }
4202 if (tmp_reg != NOREG) {
4203 daddu(tmp_reg, base_reg, tmp_reg);
4204 if (!disp_is_simm16) {
4205 move(tmp_reg, disp);
4206 daddu(tmp_reg, base_reg, tmp_reg);
4207 }
4208 store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4209 } else {
4210 if (!disp_is_simm16) {
4211 tmp_reg = T9;
4212 assert_different_registers(tmp_reg, base_reg);
4213 move(tmp_reg, disp);
4214 daddu(tmp_reg, base_reg, tmp_reg);
4215 }
4216 store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4217 }
4218 }
4220 void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
4221 switch (type) {
4222 case T_DOUBLE:
4223 sdc1(src_reg, tmp_reg, disp);
4224 break;
4225 case T_FLOAT:
4226 swc1(src_reg, tmp_reg, disp);
4227 break;
4228 default:
4229 ShouldNotReachHere();
4230 }
4231 }
4233 void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
4234 Register tmp_reg = T9;
4235 Register index_reg = addr.index();
4236 if (index_reg == NOREG) {
4237 tmp_reg = NOREG;
4238 }
4240 int scale = addr.scale();
4241 if (tmp_reg != NOREG && scale >= 0) {
4242 dsll(tmp_reg, index_reg, scale);
4243 }
4245 int disp = addr.disp();
4246 bool disp_is_simm16 = true;
4247 if (!Assembler::is_simm16(disp)) {
4248 disp_is_simm16 = false;
4249 }
4251 Register base_reg = addr.base();
4252 if (tmp_reg != NOREG) {
4253 assert_different_registers(tmp_reg, base_reg, index_reg);
4254 }
4256 if (tmp_reg != NOREG) {
4257 daddu(tmp_reg, base_reg, tmp_reg);
4258 if (!disp_is_simm16) {
4259 move(tmp_reg, disp);
4260 daddu(tmp_reg, base_reg, tmp_reg);
4261 }
4262 store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
4263 } else {
4264 if (!disp_is_simm16) {
4265 tmp_reg = T9;
4266 assert_different_registers(tmp_reg, base_reg);
4267 move(tmp_reg, disp);
4268 daddu(tmp_reg, base_reg, tmp_reg);
4269 }
4270 store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
4271 }
4272 }
4274 void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
4275 switch (type) {
4276 case T_LONG:
4277 ld_ptr(dst_reg, tmp_reg, disp);
4278 break;
4279 case T_ARRAY:
4280 case T_OBJECT:
4281 if (UseCompressedOops && !wide) {
4282 lwu(dst_reg, tmp_reg, disp);
4283 } else {
4284 ld_ptr(dst_reg, tmp_reg, disp);
4285 }
4286 break;
4287 case T_ADDRESS:
4288 if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
4289 lwu(dst_reg, tmp_reg, disp);
4290 } else {
4291 ld_ptr(dst_reg, tmp_reg, disp);
4292 }
4293 break;
4294 case T_INT:
4295 lw(dst_reg, tmp_reg, disp);
4296 break;
4297 case T_CHAR:
4298 lhu(dst_reg, tmp_reg, disp);
4299 break;
4300 case T_SHORT:
4301 lh(dst_reg, tmp_reg, disp);
4302 break;
4303 case T_BYTE:
4304 case T_BOOLEAN:
4305 lb(dst_reg, tmp_reg, disp);
4306 break;
4307 default:
4308 ShouldNotReachHere();
4309 }
4310 }
4312 int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
4313 int code_offset = 0;
4314 Register tmp_reg = T9;
4315 Register index_reg = addr.index();
4316 if (index_reg == NOREG) {
4317 tmp_reg = NOREG;
4318 }
4320 int scale = addr.scale();
4321 if (tmp_reg != NOREG && scale >= 0) {
4322 dsll(tmp_reg, index_reg, scale);
4323 }
4325 int disp = addr.disp();
4326 bool disp_is_simm16 = true;
4327 if (!Assembler::is_simm16(disp)) {
4328 disp_is_simm16 = false;
4329 }
4331 Register base_reg = addr.base();
4332 if (tmp_reg != NOREG) {
4333 assert_different_registers(tmp_reg, base_reg, index_reg);
4334 }
4336 if (tmp_reg != NOREG) {
4337 daddu(tmp_reg, base_reg, tmp_reg);
4338 if (!disp_is_simm16) {
4339 move(tmp_reg, disp);
4340 daddu(tmp_reg, base_reg, tmp_reg);
4341 }
4342 code_offset = offset();
4343 load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4344 } else {
4345 if (!disp_is_simm16) {
4346 tmp_reg = T9;
4347 assert_different_registers(tmp_reg, base_reg);
4348 move(tmp_reg, disp);
4349 daddu(tmp_reg, base_reg, tmp_reg);
4350 }
4351 code_offset = offset();
4352 load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4353 }
4355 return code_offset;
4356 }
4358 void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
4359 switch (type) {
4360 case T_DOUBLE:
4361 ldc1(dst_reg, tmp_reg, disp);
4362 break;
4363 case T_FLOAT:
4364 lwc1(dst_reg, tmp_reg, disp);
4365 break;
4366 default:
4367 ShouldNotReachHere();
4368 }
4369 }
4371 int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
4372 int code_offset = 0;
4373 Register tmp_reg = T9;
4374 Register index_reg = addr.index();
4375 if (index_reg == NOREG) {
4376 tmp_reg = NOREG;
4377 }
4379 int scale = addr.scale();
4380 if (tmp_reg != NOREG && scale >= 0) {
4381 dsll(tmp_reg, index_reg, scale);
4382 }
4384 int disp = addr.disp();
4385 bool disp_is_simm16 = true;
4386 if (!Assembler::is_simm16(disp)) {
4387 disp_is_simm16 = false;
4388 }
4390 Register base_reg = addr.base();
4391 if (tmp_reg != NOREG) {
4392 assert_different_registers(tmp_reg, base_reg, index_reg);
4393 }
4395 if (tmp_reg != NOREG) {
4396 daddu(tmp_reg, base_reg, tmp_reg);
4397 if (!disp_is_simm16) {
4398 move(tmp_reg, disp);
4399 daddu(tmp_reg, base_reg, tmp_reg);
4400 }
4401 code_offset = offset();
4402 load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
4403 } else {
4404 if (!disp_is_simm16) {
4405 tmp_reg = T9;
4406 assert_different_registers(tmp_reg, base_reg);
4407 move(tmp_reg, disp);
4408 daddu(tmp_reg, base_reg, tmp_reg);
4409 }
4410 code_offset = offset();
4411 load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
4412 }
4414 return code_offset;
4415 }