Thu, 13 Feb 2020 19:16:02 +0800
#11867 Backport of #11497 assert(false) failed: Should Not Reach Here, what is the cpu type?
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, 2020, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 // Implementation of MacroAssembler
52 intptr_t MacroAssembler::i[32] = {0};
53 float MacroAssembler::f[32] = {0.0};
55 void MacroAssembler::print(outputStream *s) {
56 unsigned int k;
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
58 s->print_cr("i%d = 0x%.16lx", k, i[k]);
59 }
60 s->cr();
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
63 s->print_cr("f%d = %f", k, f[k]);
64 }
65 s->cr();
66 }
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
71 void MacroAssembler::save_registers(MacroAssembler *masm) {
72 #define __ masm->
73 for(int k=0; k<32; k++) {
74 __ sw (as_Register(k), A0, i_offset(k));
75 }
77 for(int k=0; k<32; k++) {
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
79 }
80 #undef __
81 }
83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
84 #define __ masm->
85 for(int k=0; k<32; k++) {
86 __ lw (as_Register(k), A0, i_offset(k));
87 }
89 for(int k=0; k<32; k++) {
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
91 }
92 #undef __
93 }
96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
97 jint& stub_inst = *(jint*) branch;
98 jint *pc = (jint *)branch;
100 if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
101 //b_far:
102 // move(AT, RA); // dadd
103 // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
104 // nop();
105 // lui(T9, 0); // to be patched
106 // ori(T9, 0);
107 // daddu(T9, T9, RA);
108 // move(RA, AT);
109 // jr(T9);
111 assert(opcode(pc[3]) == lui_op
112 && opcode(pc[4]) == ori_op
113 && special(pc[5]) == daddu_op, "Not a branch label patch");
114 if(!(opcode(pc[3]) == lui_op
115 && opcode(pc[4]) == ori_op
116 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
118 int offset = target - branch;
119 if (!is_simm16(offset)) {
120 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
121 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
122 } else {
123 // revert to "beq + nop"
124 CodeBuffer cb(branch, 4 * 10);
125 MacroAssembler masm(&cb);
126 #define __ masm.
127 __ b(target);
128 __ delayed()->nop();
129 __ nop();
130 __ nop();
131 __ nop();
132 __ nop();
133 __ nop();
134 __ nop();
135 }
136 return;
137 } else if (special(pc[4]) == jr_op
138 && opcode(pc[4]) == special_op
139 && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
140 //jmp_far:
141 // patchable_set48(T9, target);
142 // jr(T9);
143 // nop();
145 CodeBuffer cb(branch, 4 * 4);
146 MacroAssembler masm(&cb);
147 masm.patchable_set48(T9, (long)(target));
148 return;
149 }
151 #ifndef PRODUCT
152 if (!is_simm16((target - branch - 4) >> 2)) {
153 tty->print_cr("Illegal patching: branch = 0x%lx, target = 0x%lx", branch, target);
154 tty->print_cr("======= Start decoding at branch = 0x%lx =======", branch);
155 Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
156 tty->print_cr("======= End of decoding =======");
157 }
158 #endif
160 stub_inst = patched_branch(target - branch, stub_inst, 0);
161 }
163 static inline address first_cache_address() {
164 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
165 }
167 static inline address last_cache_address() {
168 return CodeCache::high_bound() - Assembler::InstructionSize;
169 }
171 int MacroAssembler::call_size(address target, bool far, bool patchable) {
172 if (patchable) return 6 << Assembler::LogInstructionSize;
173 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
174 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
175 }
177 // Can we reach target using jal/j from anywhere
178 // in the code cache (because code can be relocated)?
179 bool MacroAssembler::reachable_from_cache(address target) {
180 address cl = first_cache_address();
181 address ch = last_cache_address();
183 return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
184 }
186 void MacroAssembler::general_jump(address target) {
187 if (reachable_from_cache(target)) {
188 j(target);
189 delayed()->nop();
190 } else {
191 set64(T9, (long)target);
192 jr(T9);
193 delayed()->nop();
194 }
195 }
197 int MacroAssembler::insts_for_general_jump(address target) {
198 if (reachable_from_cache(target)) {
199 //j(target);
200 //nop();
201 return 2;
202 } else {
203 //set64(T9, (long)target);
204 //jr(T9);
205 //nop();
206 return insts_for_set64((jlong)target) + 2;
207 }
208 }
210 void MacroAssembler::patchable_jump(address target) {
211 if (reachable_from_cache(target)) {
212 nop();
213 nop();
214 nop();
215 nop();
216 j(target);
217 delayed()->nop();
218 } else {
219 patchable_set48(T9, (long)target);
220 jr(T9);
221 delayed()->nop();
222 }
223 }
225 int MacroAssembler::insts_for_patchable_jump(address target) {
226 return 6;
227 }
229 void MacroAssembler::general_call(address target) {
230 if (reachable_from_cache(target)) {
231 jal(target);
232 delayed()->nop();
233 } else {
234 set64(T9, (long)target);
235 jalr(T9);
236 delayed()->nop();
237 }
238 }
240 int MacroAssembler::insts_for_general_call(address target) {
241 if (reachable_from_cache(target)) {
242 //jal(target);
243 //nop();
244 return 2;
245 } else {
246 //set64(T9, (long)target);
247 //jalr(T9);
248 //nop();
249 return insts_for_set64((jlong)target) + 2;
250 }
251 }
253 void MacroAssembler::patchable_call(address target) {
254 if (reachable_from_cache(target)) {
255 nop();
256 nop();
257 nop();
258 nop();
259 jal(target);
260 delayed()->nop();
261 } else {
262 patchable_set48(T9, (long)target);
263 jalr(T9);
264 delayed()->nop();
265 }
266 }
268 int MacroAssembler::insts_for_patchable_call(address target) {
269 return 6;
270 }
272 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
273 u_char * cur_pc = pc();
275 // Near/Far jump
276 if(is_simm16((entry - pc() - 4) / 4)) {
277 Assembler::beq(rs, rt, offset(entry));
278 } else {
279 Label not_jump;
280 bne(rs, rt, not_jump);
281 delayed()->nop();
283 b_far(entry);
284 delayed()->nop();
286 bind(not_jump);
287 has_delay_slot();
288 }
289 }
291 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
292 if (L.is_bound()) {
293 beq_far(rs, rt, target(L));
294 } else {
295 u_char * cur_pc = pc();
296 Label not_jump;
297 bne(rs, rt, not_jump);
298 delayed()->nop();
300 b_far(L);
301 delayed()->nop();
303 bind(not_jump);
304 has_delay_slot();
305 }
306 }
308 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
309 u_char * cur_pc = pc();
311 //Near/Far jump
312 if(is_simm16((entry - pc() - 4) / 4)) {
313 Assembler::bne(rs, rt, offset(entry));
314 } else {
315 Label not_jump;
316 beq(rs, rt, not_jump);
317 delayed()->nop();
319 b_far(entry);
320 delayed()->nop();
322 bind(not_jump);
323 has_delay_slot();
324 }
325 }
327 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
328 if (L.is_bound()) {
329 bne_far(rs, rt, target(L));
330 } else {
331 u_char * cur_pc = pc();
332 Label not_jump;
333 beq(rs, rt, not_jump);
334 delayed()->nop();
336 b_far(L);
337 delayed()->nop();
339 bind(not_jump);
340 has_delay_slot();
341 }
342 }
344 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
345 Label not_taken;
347 bne(rs, rt, not_taken);
348 delayed()->nop();
350 jmp_far(L);
352 bind(not_taken);
353 }
355 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
356 Label not_taken;
358 beq(rs, rt, not_taken);
359 delayed()->nop();
361 jmp_far(L);
363 bind(not_taken);
364 }
366 void MacroAssembler::bc1t_long(Label& L) {
367 Label not_taken;
369 bc1f(not_taken);
370 delayed()->nop();
372 jmp_far(L);
374 bind(not_taken);
375 }
377 void MacroAssembler::bc1f_long(Label& L) {
378 Label not_taken;
380 bc1t(not_taken);
381 delayed()->nop();
383 jmp_far(L);
385 bind(not_taken);
386 }
388 void MacroAssembler::b_far(Label& L) {
389 if (L.is_bound()) {
390 b_far(target(L));
391 } else {
392 volatile address dest = target(L);
393 //
394 // MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
395 // 0x00000055651ed514: dadd at, ra, zero
396 // 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
397 //
398 // 0x00000055651ed51c: sll zero, zero, 0
399 // 0x00000055651ed520: lui t9, 0x0
400 // 0x00000055651ed524: ori t9, t9, 0x21b8
401 // 0x00000055651ed528: daddu t9, t9, ra
402 // 0x00000055651ed52c: dadd ra, at, zero
403 // 0x00000055651ed530: jr t9
404 // 0x00000055651ed534: sll zero, zero, 0
405 //
406 move(AT, RA);
407 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
408 nop();
409 lui(T9, 0); // to be patched
410 ori(T9, T9, 0);
411 daddu(T9, T9, RA);
412 move(RA, AT);
413 jr(T9);
414 }
415 }
417 void MacroAssembler::b_far(address entry) {
418 u_char * cur_pc = pc();
420 // Near/Far jump
421 if(is_simm16((entry - pc() - 4) / 4)) {
422 b(offset(entry));
423 } else {
424 // address must be bounded
425 move(AT, RA);
426 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
427 nop();
428 li32(T9, entry - pc());
429 daddu(T9, T9, RA);
430 move(RA, AT);
431 jr(T9);
432 }
433 }
435 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
436 addu_long(AT, base, offset);
437 ld_ptr(rt, 0, AT);
438 }
440 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
441 addu_long(AT, base, offset);
442 st_ptr(rt, 0, AT);
443 }
445 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
446 addu_long(AT, base, offset);
447 ld_long(rt, 0, AT);
448 }
450 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
451 addu_long(AT, base, offset);
452 st_long(rt, 0, AT);
453 }
455 Address MacroAssembler::as_Address(AddressLiteral adr) {
456 return Address(adr.target(), adr.rspec());
457 }
459 Address MacroAssembler::as_Address(ArrayAddress adr) {
460 return Address::make_array(adr);
461 }
463 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
464 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
465 Label again;
467 li(tmp_reg1, counter_addr);
468 bind(again);
469 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
470 ll(tmp_reg2, tmp_reg1, 0);
471 addi(tmp_reg2, tmp_reg2, inc);
472 sc(tmp_reg2, tmp_reg1, 0);
473 beq(tmp_reg2, R0, again);
474 delayed()->nop();
475 }
477 int MacroAssembler::biased_locking_enter(Register lock_reg,
478 Register obj_reg,
479 Register swap_reg,
480 Register tmp_reg,
481 bool swap_reg_contains_mark,
482 Label& done,
483 Label* slow_case,
484 BiasedLockingCounters* counters) {
485 assert(UseBiasedLocking, "why call this otherwise?");
486 bool need_tmp_reg = false;
487 if (tmp_reg == noreg) {
488 need_tmp_reg = true;
489 tmp_reg = T9;
490 }
491 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
492 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
493 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
494 Address saved_mark_addr(lock_reg, 0);
496 // Biased locking
497 // See whether the lock is currently biased toward our thread and
498 // whether the epoch is still valid
499 // Note that the runtime guarantees sufficient alignment of JavaThread
500 // pointers to allow age to be placed into low bits
501 // First check to see whether biasing is even enabled for this object
502 Label cas_label;
503 int null_check_offset = -1;
504 if (!swap_reg_contains_mark) {
505 null_check_offset = offset();
506 ld_ptr(swap_reg, mark_addr);
507 }
509 if (need_tmp_reg) {
510 push(tmp_reg);
511 }
512 move(tmp_reg, swap_reg);
513 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
514 #ifdef _LP64
515 daddi(AT, R0, markOopDesc::biased_lock_pattern);
516 dsub(AT, AT, tmp_reg);
517 #else
518 addi(AT, R0, markOopDesc::biased_lock_pattern);
519 sub(AT, AT, tmp_reg);
520 #endif
521 if (need_tmp_reg) {
522 pop(tmp_reg);
523 }
525 bne(AT, R0, cas_label);
526 delayed()->nop();
529 // The bias pattern is present in the object's header. Need to check
530 // whether the bias owner and the epoch are both still current.
531 // Note that because there is no current thread register on MIPS we
532 // need to store off the mark word we read out of the object to
533 // avoid reloading it and needing to recheck invariants below. This
534 // store is unfortunate but it makes the overall code shorter and
535 // simpler.
536 st_ptr(swap_reg, saved_mark_addr);
537 if (need_tmp_reg) {
538 push(tmp_reg);
539 }
540 if (swap_reg_contains_mark) {
541 null_check_offset = offset();
542 }
543 load_prototype_header(tmp_reg, obj_reg);
544 xorr(tmp_reg, tmp_reg, swap_reg);
545 get_thread(swap_reg);
546 xorr(swap_reg, swap_reg, tmp_reg);
548 move(AT, ~((int) markOopDesc::age_mask_in_place));
549 andr(swap_reg, swap_reg, AT);
551 if (PrintBiasedLockingStatistics) {
552 Label L;
553 bne(swap_reg, R0, L);
554 delayed()->nop();
555 push(tmp_reg);
556 push(A0);
557 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
558 pop(A0);
559 pop(tmp_reg);
560 bind(L);
561 }
562 if (need_tmp_reg) {
563 pop(tmp_reg);
564 }
565 beq(swap_reg, R0, done);
566 delayed()->nop();
567 Label try_revoke_bias;
568 Label try_rebias;
570 // At this point we know that the header has the bias pattern and
571 // that we are not the bias owner in the current epoch. We need to
572 // figure out more details about the state of the header in order to
573 // know what operations can be legally performed on the object's
574 // header.
576 // If the low three bits in the xor result aren't clear, that means
577 // the prototype header is no longer biased and we have to revoke
578 // the bias on this object.
580 move(AT, markOopDesc::biased_lock_mask_in_place);
581 andr(AT, swap_reg, AT);
582 bne(AT, R0, try_revoke_bias);
583 delayed()->nop();
584 // Biasing is still enabled for this data type. See whether the
585 // epoch of the current bias is still valid, meaning that the epoch
586 // bits of the mark word are equal to the epoch bits of the
587 // prototype header. (Note that the prototype header's epoch bits
588 // only change at a safepoint.) If not, attempt to rebias the object
589 // toward the current thread. Note that we must be absolutely sure
590 // that the current epoch is invalid in order to do this because
591 // otherwise the manipulations it performs on the mark word are
592 // illegal.
594 move(AT, markOopDesc::epoch_mask_in_place);
595 andr(AT,swap_reg, AT);
596 bne(AT, R0, try_rebias);
597 delayed()->nop();
598 // The epoch of the current bias is still valid but we know nothing
599 // about the owner; it might be set or it might be clear. Try to
600 // acquire the bias of the object using an atomic operation. If this
601 // fails we will go in to the runtime to revoke the object's bias.
602 // Note that we first construct the presumed unbiased header so we
603 // don't accidentally blow away another thread's valid bias.
605 ld_ptr(swap_reg, saved_mark_addr);
607 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
608 andr(swap_reg, swap_reg, AT);
610 if (need_tmp_reg) {
611 push(tmp_reg);
612 }
613 get_thread(tmp_reg);
614 orr(tmp_reg, tmp_reg, swap_reg);
615 //if (os::is_MP()) {
616 // sync();
617 //}
618 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
619 if (need_tmp_reg) {
620 pop(tmp_reg);
621 }
622 // If the biasing toward our thread failed, this means that
623 // another thread succeeded in biasing it toward itself and we
624 // need to revoke that bias. The revocation will occur in the
625 // interpreter runtime in the slow case.
626 if (PrintBiasedLockingStatistics) {
627 Label L;
628 bne(AT, R0, L);
629 delayed()->nop();
630 push(tmp_reg);
631 push(A0);
632 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
633 pop(A0);
634 pop(tmp_reg);
635 bind(L);
636 }
637 if (slow_case != NULL) {
638 beq_far(AT, R0, *slow_case);
639 delayed()->nop();
640 }
641 b(done);
642 delayed()->nop();
644 bind(try_rebias);
645 // At this point we know the epoch has expired, meaning that the
646 // current "bias owner", if any, is actually invalid. Under these
647 // circumstances _only_, we are allowed to use the current header's
648 // value as the comparison value when doing the cas to acquire the
649 // bias in the current epoch. In other words, we allow transfer of
650 // the bias from one thread to another directly in this situation.
651 //
652 // FIXME: due to a lack of registers we currently blow away the age
653 // bits in this situation. Should attempt to preserve them.
654 if (need_tmp_reg) {
655 push(tmp_reg);
656 }
657 load_prototype_header(tmp_reg, obj_reg);
658 get_thread(swap_reg);
659 orr(tmp_reg, tmp_reg, swap_reg);
660 ld_ptr(swap_reg, saved_mark_addr);
662 //if (os::is_MP()) {
663 // sync();
664 //}
665 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
666 if (need_tmp_reg) {
667 pop(tmp_reg);
668 }
669 // If the biasing toward our thread failed, then another thread
670 // succeeded in biasing it toward itself and we need to revoke that
671 // bias. The revocation will occur in the runtime in the slow case.
672 if (PrintBiasedLockingStatistics) {
673 Label L;
674 bne(AT, R0, L);
675 delayed()->nop();
676 push(AT);
677 push(tmp_reg);
678 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
679 pop(tmp_reg);
680 pop(AT);
681 bind(L);
682 }
683 if (slow_case != NULL) {
684 beq_far(AT, R0, *slow_case);
685 delayed()->nop();
686 }
688 b(done);
689 delayed()->nop();
690 bind(try_revoke_bias);
691 // The prototype mark in the klass doesn't have the bias bit set any
692 // more, indicating that objects of this data type are not supposed
693 // to be biased any more. We are going to try to reset the mark of
694 // this object to the prototype value and fall through to the
695 // CAS-based locking scheme. Note that if our CAS fails, it means
696 // that another thread raced us for the privilege of revoking the
697 // bias of this particular object, so it's okay to continue in the
698 // normal locking code.
699 //
700 // FIXME: due to a lack of registers we currently blow away the age
701 // bits in this situation. Should attempt to preserve them.
702 ld_ptr(swap_reg, saved_mark_addr);
704 if (need_tmp_reg) {
705 push(tmp_reg);
706 }
707 load_prototype_header(tmp_reg, obj_reg);
708 //if (os::is_MP()) {
709 // lock();
710 //}
711 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
712 if (need_tmp_reg) {
713 pop(tmp_reg);
714 }
715 // Fall through to the normal CAS-based lock, because no matter what
716 // the result of the above CAS, some thread must have succeeded in
717 // removing the bias bit from the object's header.
718 if (PrintBiasedLockingStatistics) {
719 Label L;
720 bne(AT, R0, L);
721 delayed()->nop();
722 push(AT);
723 push(tmp_reg);
724 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
725 pop(tmp_reg);
726 pop(AT);
727 bind(L);
728 }
730 bind(cas_label);
731 return null_check_offset;
732 }
734 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
735 assert(UseBiasedLocking, "why call this otherwise?");
737 // Check for biased locking unlock case, which is a no-op
738 // Note: we do not have to check the thread ID for two reasons.
739 // First, the interpreter checks for IllegalMonitorStateException at
740 // a higher level. Second, if the bias was revoked while we held the
741 // lock, the object could not be rebiased toward another thread, so
742 // the bias bit would be clear.
743 #ifdef _LP64
744 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
745 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
746 daddi(AT, R0, markOopDesc::biased_lock_pattern);
747 #else
748 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
749 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
750 addi(AT, R0, markOopDesc::biased_lock_pattern);
751 #endif
753 beq(AT, temp_reg, done);
754 delayed()->nop();
755 }
757 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
758 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
759 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
760 Label L, E;
762 assert(number_of_arguments <= 4, "just check");
764 andi(AT, SP, 0xf);
765 beq(AT, R0, L);
766 delayed()->nop();
767 daddi(SP, SP, -8);
768 call(entry_point, relocInfo::runtime_call_type);
769 delayed()->nop();
770 daddi(SP, SP, 8);
771 b(E);
772 delayed()->nop();
774 bind(L);
775 call(entry_point, relocInfo::runtime_call_type);
776 delayed()->nop();
777 bind(E);
778 }
781 void MacroAssembler::jmp(address entry) {
782 patchable_set48(T9, (long)entry);
783 jr(T9);
784 }
786 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
787 switch (rtype) {
788 case relocInfo::runtime_call_type:
789 case relocInfo::none:
790 jmp(entry);
791 break;
792 default:
793 {
794 InstructionMark im(this);
795 relocate(rtype);
796 patchable_set48(T9, (long)entry);
797 jr(T9);
798 }
799 break;
800 }
801 }
803 void MacroAssembler::jmp_far(Label& L) {
804 if (L.is_bound()) {
805 address entry = target(L);
806 assert(entry != NULL, "jmp most probably wrong");
807 InstructionMark im(this);
809 relocate(relocInfo::internal_word_type);
810 patchable_set48(T9, (long)entry);
811 } else {
812 InstructionMark im(this);
813 L.add_patch_at(code(), locator());
815 relocate(relocInfo::internal_word_type);
816 patchable_set48(T9, (long)pc());
817 }
819 jr(T9);
820 delayed()->nop();
821 }
822 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
823 int oop_index;
824 if (obj) {
825 oop_index = oop_recorder()->find_index(obj);
826 } else {
827 oop_index = oop_recorder()->allocate_metadata_index(obj);
828 }
829 relocate(metadata_Relocation::spec(oop_index));
830 patchable_set48(AT, (long)obj);
831 sd(AT, dst);
832 }
834 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
835 int oop_index;
836 if (obj) {
837 oop_index = oop_recorder()->find_index(obj);
838 } else {
839 oop_index = oop_recorder()->allocate_metadata_index(obj);
840 }
841 relocate(metadata_Relocation::spec(oop_index));
842 patchable_set48(dst, (long)obj);
843 }
845 void MacroAssembler::call(address entry) {
846 // c/c++ code assume T9 is entry point, so we just always move entry to t9
847 // maybe there is some more graceful method to handle this. FIXME
848 // For more info, see class NativeCall.
849 #ifndef _LP64
850 move(T9, (int)entry);
851 #else
852 patchable_set48(T9, (long)entry);
853 #endif
854 jalr(T9);
855 }
857 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
858 switch (rtype) {
859 case relocInfo::runtime_call_type:
860 case relocInfo::none:
861 call(entry);
862 break;
863 default:
864 {
865 InstructionMark im(this);
866 relocate(rtype);
867 call(entry);
868 }
869 break;
870 }
871 }
873 void MacroAssembler::call(address entry, RelocationHolder& rh)
874 {
875 switch (rh.type()) {
876 case relocInfo::runtime_call_type:
877 case relocInfo::none:
878 call(entry);
879 break;
880 default:
881 {
882 InstructionMark im(this);
883 relocate(rh);
884 call(entry);
885 }
886 break;
887 }
888 }
890 void MacroAssembler::ic_call(address entry) {
891 RelocationHolder rh = virtual_call_Relocation::spec(pc());
892 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
893 assert(entry != NULL, "call most probably wrong");
894 InstructionMark im(this);
895 relocate(rh);
896 patchable_call(entry);
897 }
899 void MacroAssembler::c2bool(Register r) {
900 Label L;
901 Assembler::beq(r, R0, L);
902 delayed()->nop();
903 move(r, 1);
904 bind(L);
905 }
907 #ifndef PRODUCT
908 extern "C" void findpc(intptr_t x);
909 #endif
911 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
912 if ( ShowMessageBoxOnError ) {
913 JavaThreadState saved_state = JavaThread::current()->thread_state();
914 JavaThread::current()->set_thread_state(_thread_in_vm);
915 {
916 // In order to get locks work, we need to fake a in_VM state
917 ttyLocker ttyl;
918 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
919 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
920 BytecodeCounter::print();
921 }
923 }
924 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
925 }
926 else
927 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
928 }
931 void MacroAssembler::stop(const char* msg) {
932 li(A0, (long)msg);
933 #ifndef _LP64
934 //reserver space for argument.
935 addiu(SP, SP, - 1 * wordSize);
936 #endif
937 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
938 delayed()->nop();
939 #ifndef _LP64
940 //restore space for argument
941 addiu(SP, SP, 1 * wordSize);
942 #endif
943 brk(17);
944 }
946 void MacroAssembler::warn(const char* msg) {
947 #ifdef _LP64
948 pushad();
949 li(A0, (long)msg);
950 push(S2);
951 move(AT, -(StackAlignmentInBytes));
952 move(S2, SP); // use S2 as a sender SP holder
953 andr(SP, SP, AT); // align stack as required by ABI
954 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
955 delayed()->nop();
956 move(SP, S2); // use S2 as a sender SP holder
957 pop(S2);
958 popad();
959 #else
960 pushad();
961 addi(SP, SP, -4);
962 sw(A0, SP, -1 * wordSize);
963 li(A0, (long)msg);
964 addi(SP, SP, -1 * wordSize);
965 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
966 delayed()->nop();
967 addi(SP, SP, 1 * wordSize);
968 lw(A0, SP, -1 * wordSize);
969 addi(SP, SP, 4);
970 popad();
971 #endif
972 }
974 void MacroAssembler::print_reg(Register reg) {
975 void * cur_pc = pc();
976 pushad();
977 NOT_LP64(push(FP);)
979 li(A0, (long)reg->name());
980 if (reg == SP)
981 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
982 else if (reg == A0)
983 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
984 else
985 move(A1, reg);
986 li(A2, (long)cur_pc);
987 push(S2);
988 move(AT, -(StackAlignmentInBytes));
989 move(S2, SP); // use S2 as a sender SP holder
990 andr(SP, SP, AT); // align stack as required by ABI
991 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
992 delayed()->nop();
993 move(SP, S2); // use S2 as a sender SP holder
994 pop(S2);
995 NOT_LP64(pop(FP);)
996 popad();
998 }
1000 void MacroAssembler::print_reg(FloatRegister reg) {
1001 void * cur_pc = pc();
1002 pushad();
1003 NOT_LP64(push(FP);)
1004 li(A0, (long)reg->name());
1005 push(S2);
1006 move(AT, -(StackAlignmentInBytes));
1007 move(S2, SP); // use S2 as a sender SP holder
1008 andr(SP, SP, AT); // align stack as required by ABI
1009 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1010 delayed()->nop();
1011 move(SP, S2); // use S2 as a sender SP holder
1012 pop(S2);
1013 NOT_LP64(pop(FP);)
1014 popad();
1016 pushad();
1017 NOT_LP64(push(FP);)
1018 move(FP, SP);
1019 move(AT, -(StackAlignmentInBytes));
1020 andr(SP , SP , AT);
1021 mov_d(F12, reg);
1022 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1023 delayed()->nop();
1024 move(SP, FP);
1025 NOT_LP64(pop(FP);)
1026 popad();
1028 }
1030 void MacroAssembler::increment(Register reg, int imm) {
1031 if (!imm) return;
1032 if (is_simm16(imm)) {
1033 #ifdef _LP64
1034 daddiu(reg, reg, imm);
1035 #else
1036 addiu(reg, reg, imm);
1037 #endif
1038 } else {
1039 move(AT, imm);
1040 #ifdef _LP64
1041 daddu(reg, reg, AT);
1042 #else
1043 addu(reg, reg, AT);
1044 #endif
1045 }
1046 }
1048 void MacroAssembler::decrement(Register reg, int imm) {
1049 increment(reg, -imm);
1050 }
1053 void MacroAssembler::call_VM(Register oop_result,
1054 address entry_point,
1055 bool check_exceptions) {
1056 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1057 }
1059 void MacroAssembler::call_VM(Register oop_result,
1060 address entry_point,
1061 Register arg_1,
1062 bool check_exceptions) {
1063 if (arg_1!=A1) move(A1, arg_1);
1064 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1065 }
1067 void MacroAssembler::call_VM(Register oop_result,
1068 address entry_point,
1069 Register arg_1,
1070 Register arg_2,
1071 bool check_exceptions) {
1072 if (arg_1!=A1) move(A1, arg_1);
1073 if (arg_2!=A2) move(A2, arg_2);
1074 assert(arg_2 != A1, "smashed argument");
1075 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1076 }
1078 void MacroAssembler::call_VM(Register oop_result,
1079 address entry_point,
1080 Register arg_1,
1081 Register arg_2,
1082 Register arg_3,
1083 bool check_exceptions) {
1084 if (arg_1!=A1) move(A1, arg_1);
1085 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1086 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1087 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1088 }
1090 void MacroAssembler::call_VM(Register oop_result,
1091 Register last_java_sp,
1092 address entry_point,
1093 int number_of_arguments,
1094 bool check_exceptions) {
1095 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1096 }
1098 void MacroAssembler::call_VM(Register oop_result,
1099 Register last_java_sp,
1100 address entry_point,
1101 Register arg_1,
1102 bool check_exceptions) {
1103 if (arg_1 != A1) move(A1, arg_1);
1104 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1105 }
1107 void MacroAssembler::call_VM(Register oop_result,
1108 Register last_java_sp,
1109 address entry_point,
1110 Register arg_1,
1111 Register arg_2,
1112 bool check_exceptions) {
1113 if (arg_1 != A1) move(A1, arg_1);
1114 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1115 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1116 }
1118 void MacroAssembler::call_VM(Register oop_result,
1119 Register last_java_sp,
1120 address entry_point,
1121 Register arg_1,
1122 Register arg_2,
1123 Register arg_3,
1124 bool check_exceptions) {
1125 if (arg_1 != A1) move(A1, arg_1);
1126 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1127 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1128 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1129 }
1131 void MacroAssembler::call_VM_base(Register oop_result,
1132 Register java_thread,
1133 Register last_java_sp,
1134 address entry_point,
1135 int number_of_arguments,
1136 bool check_exceptions) {
1138 address before_call_pc;
1139 // determine java_thread register
1140 if (!java_thread->is_valid()) {
1141 #ifndef OPT_THREAD
1142 java_thread = T2;
1143 get_thread(java_thread);
1144 #else
1145 java_thread = TREG;
1146 #endif
1147 }
1148 // determine last_java_sp register
1149 if (!last_java_sp->is_valid()) {
1150 last_java_sp = SP;
1151 }
1152 // debugging support
1153 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1154 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1155 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1156 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1158 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
1160 // set last Java frame before call
1161 before_call_pc = (address)pc();
1162 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1164 // do the call
1165 move(A0, java_thread);
1166 call(entry_point, relocInfo::runtime_call_type);
1167 delayed()->nop();
1169 // restore the thread (cannot use the pushed argument since arguments
1170 // may be overwritten by C code generated by an optimizing compiler);
1171 // however can use the register value directly if it is callee saved.
1172 #ifndef OPT_THREAD
1173 get_thread(java_thread);
1174 #else
1175 #ifdef ASSERT
1176 {
1177 Label L;
1178 get_thread(AT);
1179 beq(java_thread, AT, L);
1180 delayed()->nop();
1181 stop("MacroAssembler::call_VM_base: TREG not callee saved?");
1182 bind(L);
1183 }
1184 #endif
1185 #endif
1187 // discard thread and arguments
1188 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1189 // reset last Java frame
1190 reset_last_Java_frame(java_thread, false);
1192 check_and_handle_popframe(java_thread);
1193 check_and_handle_earlyret(java_thread);
1194 if (check_exceptions) {
1195 // check for pending exceptions (java_thread is set upon return)
1196 Label L;
1197 #ifdef _LP64
1198 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1199 #else
1200 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1201 #endif
1202 beq(AT, R0, L);
1203 delayed()->nop();
1204 li(AT, before_call_pc);
1205 push(AT);
1206 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1207 delayed()->nop();
1208 bind(L);
1209 }
1211 // get oop result if there is one and reset the value in the thread
1212 if (oop_result->is_valid()) {
1213 #ifdef _LP64
1214 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1215 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1216 #else
1217 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1218 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1219 #endif
1220 verify_oop(oop_result);
1221 }
1222 }
1224 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1226 move(V0, SP);
1227 //we also reserve space for java_thread here
1228 #ifndef _LP64
1229 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1230 #endif
1231 move(AT, -(StackAlignmentInBytes));
1232 andr(SP, SP, AT);
1233 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1235 }
1237 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1238 call_VM_leaf_base(entry_point, number_of_arguments);
1239 }
1241 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1242 if (arg_0 != A0) move(A0, arg_0);
1243 call_VM_leaf(entry_point, 1);
1244 }
1246 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1247 if (arg_0 != A0) move(A0, arg_0);
1248 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1249 call_VM_leaf(entry_point, 2);
1250 }
1252 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1253 if (arg_0 != A0) move(A0, arg_0);
1254 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1255 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1256 call_VM_leaf(entry_point, 3);
1257 }
1258 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1259 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1260 }
1263 void MacroAssembler::super_call_VM_leaf(address entry_point,
1264 Register arg_1) {
1265 if (arg_1 != A0) move(A0, arg_1);
1266 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1267 }
1270 void MacroAssembler::super_call_VM_leaf(address entry_point,
1271 Register arg_1,
1272 Register arg_2) {
1273 if (arg_1 != A0) move(A0, arg_1);
1274 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1275 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1276 }
1277 void MacroAssembler::super_call_VM_leaf(address entry_point,
1278 Register arg_1,
1279 Register arg_2,
1280 Register arg_3) {
1281 if (arg_1 != A0) move(A0, arg_1);
1282 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1283 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1284 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1285 }
1287 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1288 }
1290 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1291 }
1293 void MacroAssembler::null_check(Register reg, int offset) {
1294 if (needs_explicit_null_check(offset)) {
1295 // provoke OS NULL exception if reg = NULL by
1296 // accessing M[reg] w/o changing any (non-CC) registers
1297 // NOTE: cmpl is plenty here to provoke a segv
1298 lw(AT, reg, 0);
1299 } else {
1300 // nothing to do, (later) access of M[reg + offset]
1301 // will provoke OS NULL exception if reg = NULL
1302 }
1303 }
1305 void MacroAssembler::enter() {
1306 push2(RA, FP);
1307 move(FP, SP);
1308 }
1310 void MacroAssembler::leave() {
1311 #ifndef _LP64
1312 addi(SP, FP, 2 * wordSize);
1313 lw(RA, SP, - 1 * wordSize);
1314 lw(FP, SP, - 2 * wordSize);
1315 #else
1316 daddi(SP, FP, 2 * wordSize);
1317 ld(RA, SP, - 1 * wordSize);
1318 ld(FP, SP, - 2 * wordSize);
1319 #endif
1320 }
1322 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
1323 // determine java_thread register
1324 if (!java_thread->is_valid()) {
1325 #ifndef OPT_THREAD
1326 java_thread = T1;
1327 get_thread(java_thread);
1328 #else
1329 java_thread = TREG;
1330 #endif
1331 }
1332 // we must set sp to zero to clear frame
1333 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1334 // must clear fp, so that compiled frames are not confused; it is possible
1335 // that we need it only for debugging
1336 if(clear_fp) {
1337 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1338 }
1340 // Always clear the pc because it could have been set by make_walkable()
1341 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1342 }
1344 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
1345 Register thread = TREG;
1346 #ifndef OPT_THREAD
1347 get_thread(thread);
1348 #endif
1349 // we must set sp to zero to clear frame
1350 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1351 // must clear fp, so that compiled frames are not confused; it is
1352 // possible that we need it only for debugging
1353 if (clear_fp) {
1354 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1355 }
1357 // Always clear the pc because it could have been set by make_walkable()
1358 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1359 }
1361 // Write serialization page so VM thread can do a pseudo remote membar.
1362 // We use the current thread pointer to calculate a thread specific
1363 // offset to write to within the page. This minimizes bus traffic
1364 // due to cache line collision.
1365 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1366 move(tmp, thread);
1367 srl(tmp, tmp,os::get_serialize_page_shift_count());
1368 move(AT, (os::vm_page_size() - sizeof(int)));
1369 andr(tmp, tmp,AT);
1370 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1371 }
1373 // Calls to C land
1374 //
1375 // When entering C land, the fp, & sp of the last Java frame have to be recorded
1376 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1377 // has to be reset to 0. This is required to allow proper stack traversal.
1378 void MacroAssembler::set_last_Java_frame(Register java_thread,
1379 Register last_java_sp,
1380 Register last_java_fp,
1381 address last_java_pc) {
1382 // determine java_thread register
1383 if (!java_thread->is_valid()) {
1384 #ifndef OPT_THREAD
1385 java_thread = T2;
1386 get_thread(java_thread);
1387 #else
1388 java_thread = TREG;
1389 #endif
1390 }
1391 // determine last_java_sp register
1392 if (!last_java_sp->is_valid()) {
1393 last_java_sp = SP;
1394 }
1396 // last_java_fp is optional
1397 if (last_java_fp->is_valid()) {
1398 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1399 }
1401 // last_java_pc is optional
1402 if (last_java_pc != NULL) {
1403 relocate(relocInfo::internal_word_type);
1404 patchable_set48(AT, (long)last_java_pc);
1405 st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
1406 }
1407 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1408 }
1410 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1411 Register last_java_fp,
1412 address last_java_pc) {
1413 // determine last_java_sp register
1414 if (!last_java_sp->is_valid()) {
1415 last_java_sp = SP;
1416 }
1418 Register thread = TREG;
1419 #ifndef OPT_THREAD
1420 get_thread(thread);
1421 #endif
1422 // last_java_fp is optional
1423 if (last_java_fp->is_valid()) {
1424 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1425 }
1427 // last_java_pc is optional
1428 if (last_java_pc != NULL) {
1429 relocate(relocInfo::internal_word_type);
1430 patchable_set48(AT, (long)last_java_pc);
1431 st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
1432 }
1434 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1435 }
1437 //////////////////////////////////////////////////////////////////////////////////
1438 #if INCLUDE_ALL_GCS
1440 void MacroAssembler::g1_write_barrier_pre(Register obj,
1441 Register pre_val,
1442 Register thread,
1443 Register tmp,
1444 bool tosca_live,
1445 bool expand_call) {
1447 // If expand_call is true then we expand the call_VM_leaf macro
1448 // directly to skip generating the check by
1449 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1451 #ifdef _LP64
1452 assert(thread == TREG, "must be");
1453 #endif // _LP64
1455 Label done;
1456 Label runtime;
1458 assert(pre_val != noreg, "check this code");
1460 if (obj != noreg) {
1461 assert_different_registers(obj, pre_val, tmp);
1462 assert(pre_val != V0, "check this code");
1463 }
1465 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1466 PtrQueue::byte_offset_of_active()));
1467 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1468 PtrQueue::byte_offset_of_index()));
1469 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1470 PtrQueue::byte_offset_of_buf()));
1473 // Is marking active?
1474 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1475 lw(AT, in_progress);
1476 } else {
1477 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1478 lb(AT, in_progress);
1479 }
1480 beq(AT, R0, done);
1481 delayed()->nop();
1483 // Do we need to load the previous value?
1484 if (obj != noreg) {
1485 load_heap_oop(pre_val, Address(obj, 0));
1486 }
1488 // Is the previous value null?
1489 beq(pre_val, R0, done);
1490 delayed()->nop();
1492 // Can we store original value in the thread's buffer?
1493 // Is index == 0?
1494 // (The index field is typed as size_t.)
1496 ld(tmp, index);
1497 beq(tmp, R0, runtime);
1498 delayed()->nop();
1500 daddiu(tmp, tmp, -1 * wordSize);
1501 sd(tmp, index);
1502 ld(AT, buffer);
1503 daddu(tmp, tmp, AT);
1505 // Record the previous value
1506 sd(pre_val, tmp, 0);
1507 beq(R0, R0, done);
1508 delayed()->nop();
1510 bind(runtime);
1511 // save the live input values
1512 if (tosca_live) push(V0);
1514 if (obj != noreg && obj != V0) push(obj);
1516 if (pre_val != V0) push(pre_val);
1518 // Calling the runtime using the regular call_VM_leaf mechanism generates
1519 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1520 // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
1521 //
1522 // If we care generating the pre-barrier without a frame (e.g. in the
1523 // intrinsified Reference.get() routine) then fp might be pointing to
1524 // the caller frame and so this check will most likely fail at runtime.
1525 //
1526 // Expanding the call directly bypasses the generation of the check.
1527 // So when we do not have have a full interpreter frame on the stack
1528 // expand_call should be passed true.
1530 NOT_LP64( push(thread); )
1532 if (expand_call) {
1533 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1534 if (thread != A1) move(A1, thread);
1535 if (pre_val != A0) move(A0, pre_val);
1536 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1537 } else {
1538 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1539 }
1541 NOT_LP64( pop(thread); )
1543 // save the live input values
1544 if (pre_val != V0)
1545 pop(pre_val);
1547 if (obj != noreg && obj != V0)
1548 pop(obj);
1550 if(tosca_live) pop(V0);
1552 bind(done);
1553 }
1555 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1556 Register new_val,
1557 Register thread,
1558 Register tmp,
1559 Register tmp2) {
1560 assert(tmp != AT, "must be");
1561 assert(tmp2 != AT, "must be");
1562 #ifdef _LP64
1563 assert(thread == TREG, "must be");
1564 #endif // _LP64
1566 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1567 PtrQueue::byte_offset_of_index()));
1568 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1569 PtrQueue::byte_offset_of_buf()));
1571 BarrierSet* bs = Universe::heap()->barrier_set();
1572 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1573 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1575 Label done;
1576 Label runtime;
1578 // Does store cross heap regions?
1579 xorr(AT, store_addr, new_val);
1580 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1581 beq(AT, R0, done);
1582 delayed()->nop();
1585 // crosses regions, storing NULL?
1586 beq(new_val, R0, done);
1587 delayed()->nop();
1589 // storing region crossing non-NULL, is card already dirty?
1590 const Register card_addr = tmp;
1591 const Register cardtable = tmp2;
1593 move(card_addr, store_addr);
1594 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1595 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1596 // a valid address and therefore is not properly handled by the relocation code.
1597 set64(cardtable, (intptr_t)ct->byte_map_base);
1598 daddu(card_addr, card_addr, cardtable);
1600 lb(AT, card_addr, 0);
1601 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1602 beq(AT, R0, done);
1603 delayed()->nop();
1605 sync();
1606 lb(AT, card_addr, 0);
1607 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1608 beq(AT, R0, done);
1609 delayed()->nop();
1612 // storing a region crossing, non-NULL oop, card is clean.
1613 // dirty card and log.
1614 move(AT, (int)CardTableModRefBS::dirty_card_val());
1615 sb(AT, card_addr, 0);
1617 lw(AT, queue_index);
1618 beq(AT, R0, runtime);
1619 delayed()->nop();
1620 daddiu(AT, AT, -1 * wordSize);
1621 sw(AT, queue_index);
1622 ld(tmp2, buffer);
1623 #ifdef _LP64
1624 ld(AT, queue_index);
1625 daddu(tmp2, tmp2, AT);
1626 sd(card_addr, tmp2, 0);
1627 #else
1628 lw(AT, queue_index);
1629 addu32(tmp2, tmp2, AT);
1630 sw(card_addr, tmp2, 0);
1631 #endif
1632 beq(R0, R0, done);
1633 delayed()->nop();
1635 bind(runtime);
1636 // save the live input values
1637 push(store_addr);
1638 push(new_val);
1639 #ifdef _LP64
1640 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1641 #else
1642 push(thread);
1643 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1644 pop(thread);
1645 #endif
1646 pop(new_val);
1647 pop(store_addr);
1649 bind(done);
1650 }
1652 #endif // INCLUDE_ALL_GCS
1653 //////////////////////////////////////////////////////////////////////////////////
1656 void MacroAssembler::store_check(Register obj) {
1657 // Does a store check for the oop in register obj. The content of
1658 // register obj is destroyed afterwards.
1659 store_check_part_1(obj);
1660 store_check_part_2(obj);
1661 }
1663 void MacroAssembler::store_check(Register obj, Address dst) {
1664 store_check(obj);
1665 }
1668 // split the store check operation so that other instructions can be scheduled inbetween
1669 void MacroAssembler::store_check_part_1(Register obj) {
1670 BarrierSet* bs = Universe::heap()->barrier_set();
1671 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1672 #ifdef _LP64
1673 dsrl(obj, obj, CardTableModRefBS::card_shift);
1674 #else
1675 shr(obj, CardTableModRefBS::card_shift);
1676 #endif
1677 }
1679 void MacroAssembler::store_check_part_2(Register obj) {
1680 BarrierSet* bs = Universe::heap()->barrier_set();
1681 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1682 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1683 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1685 set64(AT, (long)ct->byte_map_base);
1686 #ifdef _LP64
1687 dadd(AT, AT, obj);
1688 #else
1689 add(AT, AT, obj);
1690 #endif
1691 if (UseConcMarkSweepGC) sync();
1692 sb(R0, AT, 0);
1693 }
1695 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1696 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1697 Register t1, Register t2, Label& slow_case) {
1698 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1700 Register end = t2;
1701 #ifndef OPT_THREAD
1702 Register thread = t1;
1703 get_thread(thread);
1704 #else
1705 Register thread = TREG;
1706 #endif
1707 verify_tlab(t1, t2);//blows t1&t2
1709 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1711 if (var_size_in_bytes == NOREG) {
1712 set64(AT, con_size_in_bytes);
1713 add(end, obj, AT);
1714 } else {
1715 add(end, obj, var_size_in_bytes);
1716 }
1718 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1719 sltu(AT, AT, end);
1720 bne_far(AT, R0, slow_case);
1721 delayed()->nop();
1724 // update the tlab top pointer
1725 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1727 verify_tlab(t1, t2);
1728 }
1730 // Defines obj, preserves var_size_in_bytes
1731 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1732 Register t1, Register t2, Label& slow_case) {
1733 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1734 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
1735 // No allocation in the shared eden.
1736 b_far(slow_case);
1737 delayed()->nop();
1738 } else {
1740 #ifndef _LP64
1741 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1742 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1743 #else
1744 Address heap_top(t1);
1745 li(t1, (long)Universe::heap()->top_addr());
1746 #endif
1747 ld_ptr(obj, heap_top);
1749 Register end = t2;
1750 Label retry;
1752 bind(retry);
1753 if (var_size_in_bytes == NOREG) {
1754 set64(AT, con_size_in_bytes);
1755 add(end, obj, AT);
1756 } else {
1757 add(end, obj, var_size_in_bytes);
1758 }
1759 // if end < obj then we wrapped around => object too long => slow case
1760 sltu(AT, end, obj);
1761 bne_far(AT, R0, slow_case);
1762 delayed()->nop();
1764 li(AT, (long)Universe::heap()->end_addr());
1765 ld_ptr(AT, AT, 0);
1766 sltu(AT, AT, end);
1767 bne_far(AT, R0, slow_case);
1768 delayed()->nop();
1769 // Compare obj with the top addr, and if still equal, store the new top addr in
1770 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1771 // it otherwise. Use lock prefix for atomicity on MPs.
1772 //if (os::is_MP()) {
1773 // sync();
1774 //}
1776 // if someone beat us on the allocation, try again, otherwise continue
1777 cmpxchg(end, heap_top, obj);
1778 beq_far(AT, R0, retry);
1779 delayed()->nop();
1780 }
1781 }
1783 // C2 doesn't invoke this one.
1784 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1785 Register top = T0;
1786 Register t1 = T1;
1787 Register t2 = T9;
1788 Register t3 = T3;
1789 Register thread_reg = T8;
1790 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4);
1791 Label do_refill, discard_tlab;
1793 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
1794 // No allocation in the shared eden.
1795 b(slow_case);
1796 delayed()->nop();
1797 }
1799 get_thread(thread_reg);
1801 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1802 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1804 // calculate amount of free space
1805 sub(t1, t1, top);
1806 shr(t1, LogHeapWordSize);
1808 // Retain tlab and allocate object in shared space if
1809 // the amount free in the tlab is too large to discard.
1810 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1811 slt(AT, t2, t1);
1812 beq(AT, R0, discard_tlab);
1813 delayed()->nop();
1815 // Retain
1816 #ifndef _LP64
1817 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1818 #else
1819 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1820 #endif
1821 add(t2, t2, AT);
1822 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1824 if (TLABStats) {
1825 // increment number of slow_allocations
1826 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1827 addiu(AT, AT, 1);
1828 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1829 }
1830 b(try_eden);
1831 delayed()->nop();
1833 bind(discard_tlab);
1834 if (TLABStats) {
1835 // increment number of refills
1836 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1837 addi(AT, AT, 1);
1838 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1839 // accumulate wastage -- t1 is amount free in tlab
1840 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1841 add(AT, AT, t1);
1842 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1843 }
1845 // if tlab is currently allocated (top or end != null) then
1846 // fill [top, end + alignment_reserve) with array object
1847 beq(top, R0, do_refill);
1848 delayed()->nop();
1850 // set up the mark word
1851 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1852 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1854 // set the length to the remaining space
1855 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1856 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1857 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1858 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1860 // set klass to intArrayKlass
1861 #ifndef _LP64
1862 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
1863 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
1864 #else
1865 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
1866 ld_ptr(t1, AT, 0);
1867 #endif
1868 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
1869 store_klass(top, t1);
1871 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
1872 subu(t1, top, t1);
1873 incr_allocated_bytes(thread_reg, t1, 0);
1875 // refill the tlab with an eden allocation
1876 bind(do_refill);
1877 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1878 shl(t1, LogHeapWordSize);
1879 // add object_size ??
1880 eden_allocate(top, t1, 0, t2, t3, slow_case);
1882 // Check that t1 was preserved in eden_allocate.
1883 #ifdef ASSERT
1884 if (UseTLAB) {
1885 Label ok;
1886 assert_different_registers(thread_reg, t1);
1887 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1888 shl(AT, LogHeapWordSize);
1889 beq(AT, t1, ok);
1890 delayed()->nop();
1891 stop("assert(t1 != tlab size)");
1892 should_not_reach_here();
1894 bind(ok);
1895 }
1896 #endif
1897 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
1898 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1899 add(top, top, t1);
1900 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
1901 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1902 verify_tlab(t1, t2);
1903 b(retry);
1904 delayed()->nop();
1905 }
1907 void MacroAssembler::incr_allocated_bytes(Register thread,
1908 Register var_size_in_bytes,
1909 int con_size_in_bytes,
1910 Register t1) {
1911 if (!thread->is_valid()) {
1912 #ifndef OPT_THREAD
1913 assert(t1->is_valid(), "need temp reg");
1914 thread = t1;
1915 get_thread(thread);
1916 #else
1917 thread = TREG;
1918 #endif
1919 }
1921 ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
1922 if (var_size_in_bytes->is_valid()) {
1923 addu(AT, AT, var_size_in_bytes);
1924 } else {
1925 addiu(AT, AT, con_size_in_bytes);
1926 }
1927 st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
1928 }
1930 static const double pi_4 = 0.7853981633974483;
1932 // must get argument(a double) in F12/F13
1933 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
1934 //We need to preseve the register which maybe modified during the Call
1935 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
1936 // save all modified register here
1937 // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9
1938 pushad();
1939 // we should preserve the stack space before we call
1940 addi(SP, SP, -wordSize * 2);
1941 switch (trig){
1942 case 's' :
1943 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
1944 delayed()->nop();
1945 break;
1946 case 'c':
1947 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
1948 delayed()->nop();
1949 break;
1950 case 't':
1951 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
1952 delayed()->nop();
1953 break;
1954 default:assert (false, "bad intrinsic");
1955 break;
1957 }
1959 addi(SP, SP, wordSize * 2);
1960 popad();
1961 }
1963 #ifdef _LP64
1964 void MacroAssembler::li(Register rd, long imm) {
1965 if (imm <= max_jint && imm >= min_jint) {
1966 li32(rd, (int)imm);
1967 } else if (julong(imm) <= 0xFFFFFFFF) {
1968 assert_not_delayed();
1969 // lui sign-extends, so we can't use that.
1970 ori(rd, R0, julong(imm) >> 16);
1971 dsll(rd, rd, 16);
1972 ori(rd, rd, split_low(imm));
1973 } else if ((imm > 0) && is_simm16(imm >> 32)) {
1974 // A 48-bit address
1975 li48(rd, imm);
1976 } else {
1977 li64(rd, imm);
1978 }
1979 }
1980 #else
1981 void MacroAssembler::li(Register rd, long imm) {
1982 li32(rd, (int)imm);
1983 }
1984 #endif
1986 void MacroAssembler::li32(Register reg, int imm) {
1987 if (is_simm16(imm)) {
1988 // for imm < 0, we should use addi instead of addiu.
1989 //
1990 // java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
1991 //
1992 // 78 move [int:-1|I] [a0|I]
1993 // : daddi a0, zero, 0xffffffff (correct)
1994 // : daddiu a0, zero, 0xffffffff (incorrect)
1995 //
1996 if (imm >= 0)
1997 addiu(reg, R0, imm);
1998 else
1999 addi(reg, R0, imm);
2000 } else {
2001 lui(reg, split_low(imm >> 16));
2002 if (split_low(imm))
2003 ori(reg, reg, split_low(imm));
2004 }
2005 }
2007 #ifdef _LP64
2008 void MacroAssembler::set64(Register d, jlong value) {
2009 assert_not_delayed();
2011 int hi = (int)(value >> 32);
2012 int lo = (int)(value & ~0);
2014 if (value == lo) { // 32-bit integer
2015 if (is_simm16(value)) {
2016 daddiu(d, R0, value);
2017 } else {
2018 lui(d, split_low(value >> 16));
2019 if (split_low(value)) {
2020 ori(d, d, split_low(value));
2021 }
2022 }
2023 } else if (hi == 0) { // hardware zero-extends to upper 32
2024 ori(d, R0, julong(value) >> 16);
2025 dsll(d, d, 16);
2026 if (split_low(value)) {
2027 ori(d, d, split_low(value));
2028 }
2029 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2030 // 4 insts
2031 li48(d, value);
2032 } else { // li64
2033 // 6 insts
2034 li64(d, value);
2035 }
2036 }
2039 int MacroAssembler::insts_for_set64(jlong value) {
2040 int hi = (int)(value >> 32);
2041 int lo = (int)(value & ~0);
2043 int count = 0;
2045 if (value == lo) { // 32-bit integer
2046 if (is_simm16(value)) {
2047 //daddiu(d, R0, value);
2048 count++;
2049 } else {
2050 //lui(d, split_low(value >> 16));
2051 count++;
2052 if (split_low(value)) {
2053 //ori(d, d, split_low(value));
2054 count++;
2055 }
2056 }
2057 } else if (hi == 0) { // hardware zero-extends to upper 32
2058 //ori(d, R0, julong(value) >> 16);
2059 //dsll(d, d, 16);
2060 count += 2;
2061 if (split_low(value)) {
2062 //ori(d, d, split_low(value));
2063 count++;
2064 }
2065 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2066 // 4 insts
2067 //li48(d, value);
2068 count += 4;
2069 } else { // li64
2070 // 6 insts
2071 //li64(d, value);
2072 count += 6;
2073 }
2075 return count;
2076 }
2078 void MacroAssembler::patchable_set48(Register d, jlong value) {
2079 assert_not_delayed();
2081 int hi = (int)(value >> 32);
2082 int lo = (int)(value & ~0);
2084 int count = 0;
2086 if (value == lo) { // 32-bit integer
2087 if (is_simm16(value)) {
2088 daddiu(d, R0, value);
2089 count += 1;
2090 } else {
2091 lui(d, split_low(value >> 16));
2092 count += 1;
2093 if (split_low(value)) {
2094 ori(d, d, split_low(value));
2095 count += 1;
2096 }
2097 }
2098 } else if (hi == 0) { // hardware zero-extends to upper 32
2099 ori(d, R0, julong(value) >> 16);
2100 dsll(d, d, 16);
2101 count += 2;
2102 if (split_low(value)) {
2103 ori(d, d, split_low(value));
2104 count += 1;
2105 }
2106 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2107 // 4 insts
2108 li48(d, value);
2109 count += 4;
2110 } else { // li64
2111 tty->print_cr("value = 0x%x", value);
2112 guarantee(false, "Not supported yet !");
2113 }
2115 while (count < 4) {
2116 nop();
2117 count++;
2118 }
2119 }
2121 void MacroAssembler::patchable_set32(Register d, jlong value) {
2122 assert_not_delayed();
2124 int hi = (int)(value >> 32);
2125 int lo = (int)(value & ~0);
2127 int count = 0;
2129 if (value == lo) { // 32-bit integer
2130 if (is_simm16(value)) {
2131 daddiu(d, R0, value);
2132 count += 1;
2133 } else {
2134 lui(d, split_low(value >> 16));
2135 count += 1;
2136 if (split_low(value)) {
2137 ori(d, d, split_low(value));
2138 count += 1;
2139 }
2140 }
2141 } else if (hi == 0) { // hardware zero-extends to upper 32
2142 ori(d, R0, julong(value) >> 16);
2143 dsll(d, d, 16);
2144 count += 2;
2145 if (split_low(value)) {
2146 ori(d, d, split_low(value));
2147 count += 1;
2148 }
2149 } else {
2150 tty->print_cr("value = 0x%x", value);
2151 guarantee(false, "Not supported yet !");
2152 }
2154 while (count < 3) {
2155 nop();
2156 count++;
2157 }
2158 }
2160 void MacroAssembler::patchable_call32(Register d, jlong value) {
2161 assert_not_delayed();
2163 int hi = (int)(value >> 32);
2164 int lo = (int)(value & ~0);
2166 int count = 0;
2168 if (value == lo) { // 32-bit integer
2169 if (is_simm16(value)) {
2170 daddiu(d, R0, value);
2171 count += 1;
2172 } else {
2173 lui(d, split_low(value >> 16));
2174 count += 1;
2175 if (split_low(value)) {
2176 ori(d, d, split_low(value));
2177 count += 1;
2178 }
2179 }
2180 } else {
2181 tty->print_cr("value = 0x%x", value);
2182 guarantee(false, "Not supported yet !");
2183 }
2185 while (count < 2) {
2186 nop();
2187 count++;
2188 }
2189 }
2191 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2192 assert(UseCompressedClassPointers, "should only be used for compressed header");
2193 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2195 int klass_index = oop_recorder()->find_index(k);
2196 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2197 long narrowKlass = (long)Klass::encode_klass(k);
2199 relocate(rspec, Assembler::narrow_oop_operand);
2200 patchable_set48(dst, narrowKlass);
2201 }
2204 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2205 assert(UseCompressedOops, "should only be used for compressed header");
2206 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2208 int oop_index = oop_recorder()->find_index(obj);
2209 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2211 relocate(rspec, Assembler::narrow_oop_operand);
2212 patchable_set48(dst, oop_index);
2213 }
2215 void MacroAssembler::li64(Register rd, long imm) {
2216 assert_not_delayed();
2217 lui(rd, split_low(imm >> 48));
2218 ori(rd, rd, split_low(imm >> 32));
2219 dsll(rd, rd, 16);
2220 ori(rd, rd, split_low(imm >> 16));
2221 dsll(rd, rd, 16);
2222 ori(rd, rd, split_low(imm));
2223 }
2225 void MacroAssembler::li48(Register rd, long imm) {
2226 assert_not_delayed();
2227 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2228 lui(rd, imm >> 32);
2229 ori(rd, rd, split_low(imm >> 16));
2230 dsll(rd, rd, 16);
2231 ori(rd, rd, split_low(imm));
2232 }
2233 #endif
2235 void MacroAssembler::verify_oop(Register reg, const char* s) {
2236 if (!VerifyOops) return;
2237 const char * b = NULL;
2238 stringStream ss;
2239 ss.print("verify_oop: %s: %s", reg->name(), s);
2240 b = code_string(ss.as_string());
2241 #ifdef _LP64
2242 pushad();
2243 move(A1, reg);
2244 li(A0, (long)b);
2245 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2246 ld(T9, AT, 0);
2247 jalr(T9);
2248 delayed()->nop();
2249 popad();
2250 #else
2251 // Pass register number to verify_oop_subroutine
2252 sw(T0, SP, - wordSize);
2253 sw(T1, SP, - 2*wordSize);
2254 sw(RA, SP, - 3*wordSize);
2255 sw(A0, SP ,- 4*wordSize);
2256 sw(A1, SP ,- 5*wordSize);
2257 sw(AT, SP ,- 6*wordSize);
2258 sw(T9, SP ,- 7*wordSize);
2259 addiu(SP, SP, - 7 * wordSize);
2260 move(A1, reg);
2261 li(A0, (long)b);
2262 // call indirectly to solve generation ordering problem
2263 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2264 lw(T9, AT, 0);
2265 jalr(T9);
2266 delayed()->nop();
2267 lw(T0, SP, 6* wordSize);
2268 lw(T1, SP, 5* wordSize);
2269 lw(RA, SP, 4* wordSize);
2270 lw(A0, SP, 3* wordSize);
2271 lw(A1, SP, 2* wordSize);
2272 lw(AT, SP, 1* wordSize);
2273 lw(T9, SP, 0* wordSize);
2274 addiu(SP, SP, 7 * wordSize);
2275 #endif
2276 }
2279 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2280 if (!VerifyOops) {
2281 nop();
2282 return;
2283 }
2284 // Pass register number to verify_oop_subroutine
2285 const char * b = NULL;
2286 stringStream ss;
2287 ss.print("verify_oop_addr: %s", s);
2288 b = code_string(ss.as_string());
2290 st_ptr(T0, SP, - wordSize);
2291 st_ptr(T1, SP, - 2*wordSize);
2292 st_ptr(RA, SP, - 3*wordSize);
2293 st_ptr(A0, SP, - 4*wordSize);
2294 st_ptr(A1, SP, - 5*wordSize);
2295 st_ptr(AT, SP, - 6*wordSize);
2296 st_ptr(T9, SP, - 7*wordSize);
2297 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2298 addiu(SP, SP, - 7 * wordSize);
2300 li(A0, (long)b);
2301 // call indirectly to solve generation ordering problem
2302 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2303 ld_ptr(T9, AT, 0);
2304 jalr(T9);
2305 delayed()->nop();
2306 ld_ptr(T0, SP, 6* wordSize);
2307 ld_ptr(T1, SP, 5* wordSize);
2308 ld_ptr(RA, SP, 4* wordSize);
2309 ld_ptr(A0, SP, 3* wordSize);
2310 ld_ptr(A1, SP, 2* wordSize);
2311 ld_ptr(AT, SP, 1* wordSize);
2312 ld_ptr(T9, SP, 0* wordSize);
2313 addiu(SP, SP, 7 * wordSize);
2314 }
2316 // used registers : T0, T1
2317 void MacroAssembler::verify_oop_subroutine() {
2318 // RA: ra
2319 // A0: char* error message
2320 // A1: oop object to verify
2322 Label exit, error;
2323 // increment counter
2324 li(T0, (long)StubRoutines::verify_oop_count_addr());
2325 lw(AT, T0, 0);
2326 #ifdef _LP64
2327 daddi(AT, AT, 1);
2328 #else
2329 addi(AT, AT, 1);
2330 #endif
2331 sw(AT, T0, 0);
2333 // make sure object is 'reasonable'
2334 beq(A1, R0, exit); // if obj is NULL it is ok
2335 delayed()->nop();
2337 // Check if the oop is in the right area of memory
2338 // const int oop_mask = Universe::verify_oop_mask();
2339 // const int oop_bits = Universe::verify_oop_bits();
2340 const uintptr_t oop_mask = Universe::verify_oop_mask();
2341 const uintptr_t oop_bits = Universe::verify_oop_bits();
2342 li(AT, oop_mask);
2343 andr(T0, A1, AT);
2344 li(AT, oop_bits);
2345 bne(T0, AT, error);
2346 delayed()->nop();
2348 // make sure klass is 'reasonable'
2349 // add for compressedoops
2350 reinit_heapbase();
2351 // add for compressedoops
2352 load_klass(T0, A1);
2353 beq(T0, R0, error); // if klass is NULL it is broken
2354 delayed()->nop();
2355 // return if everything seems ok
2356 bind(exit);
2358 jr(RA);
2359 delayed()->nop();
2361 // handle errors
2362 bind(error);
2363 pushad();
2364 #ifndef _LP64
2365 addi(SP, SP, (-1) * wordSize);
2366 #endif
2367 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2368 delayed()->nop();
2369 #ifndef _LP64
2370 addiu(SP, SP, 1 * wordSize);
2371 #endif
2372 popad();
2373 jr(RA);
2374 delayed()->nop();
2375 }
2377 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2378 #ifdef ASSERT
2379 assert_different_registers(t1, t2, AT);
2380 if (UseTLAB && VerifyOops) {
2381 Label next, ok;
2383 get_thread(t1);
2385 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2386 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2387 sltu(AT, t2, AT);
2388 beq(AT, R0, next);
2389 delayed()->nop();
2391 stop("assert(top >= start)");
2393 bind(next);
2394 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2395 sltu(AT, AT, t2);
2396 beq(AT, R0, ok);
2397 delayed()->nop();
2399 stop("assert(top <= end)");
2401 bind(ok);
2403 }
2404 #endif
2405 }
2407 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2408 Register tmp,
2409 int offset) {
2410 intptr_t value = *delayed_value_addr;
2411 if (value != 0)
2412 return RegisterOrConstant(value + offset);
2413 AddressLiteral a(delayed_value_addr);
2414 // load indirectly to solve generation ordering problem
2415 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2416 //ld(tmp, a);
2417 if (offset != 0)
2418 daddi(tmp,tmp, offset);
2420 return RegisterOrConstant(tmp);
2421 }
2423 void MacroAssembler::hswap(Register reg) {
2424 //short
2425 //andi(reg, reg, 0xffff);
2426 srl(AT, reg, 8);
2427 sll(reg, reg, 24);
2428 sra(reg, reg, 16);
2429 orr(reg, reg, AT);
2430 }
2432 void MacroAssembler::huswap(Register reg) {
2433 #ifdef _LP64
2434 dsrl(AT, reg, 8);
2435 dsll(reg, reg, 24);
2436 dsrl(reg, reg, 16);
2437 orr(reg, reg, AT);
2438 andi(reg, reg, 0xffff);
2439 #else
2440 //andi(reg, reg, 0xffff);
2441 srl(AT, reg, 8);
2442 sll(reg, reg, 24);
2443 srl(reg, reg, 16);
2444 orr(reg, reg, AT);
2445 #endif
2446 }
2448 // something funny to do this will only one more register AT
2449 // 32 bits
2450 void MacroAssembler::swap(Register reg) {
2451 srl(AT, reg, 8);
2452 sll(reg, reg, 24);
2453 orr(reg, reg, AT);
2454 //reg : 4 1 2 3
2455 srl(AT, AT, 16);
2456 xorr(AT, AT, reg);
2457 andi(AT, AT, 0xff);
2458 //AT : 0 0 0 1^3);
2459 xorr(reg, reg, AT);
2460 //reg : 4 1 2 1
2461 sll(AT, AT, 16);
2462 xorr(reg, reg, AT);
2463 //reg : 4 3 2 1
2464 }
2466 #ifdef _LP64
2468 // do 32-bit CAS using MIPS64 lld/scd
2469 //
2470 // cas_int should only compare 32-bits of the memory value.
2471 // However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2472 // To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2473 // tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2474 // plus the high-32 bits or memory value, are stored togethor with SCD.
2475 //
2476 //Example:
2477 //
2478 // double d = 3.1415926;
2479 // System.err.println("hello" + d);
2480 //
2481 // sun.misc.FloatingDecimal$1.<init>()
2482 // |
2483 // `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2484 //
2485 // 38 cas_int [a7a7|J] [a0|I] [a6|I]
2486 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2487 // a6: 0x4ab325aa
2488 //
2489 //again:
2490 // 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2491 //
2492 // 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2493 // 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2494 // 0x00000055647f3c68: dsll32 t8, t8, 0
2495 // 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2496 // 0x00000055647f3c70: sll zero, zero, 0
2497 //
2498 // 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2499 // 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2500 // 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2501 // 0x00000055647f3c80: and v1, a6, v1
2502 // 0x00000055647f3c84: or at, t8, v1
2503 // 0x00000055647f3c88: scd at, 0x0(a7)
2504 // 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2505 // 0x00000055647f3c90: sll zero, zero, 0
2506 // 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2507 // 0x00000055647f3c98: sll zero, zero, 0
2508 //nequal:
2509 // 0x00000055647f45a4: dadd a0, t9, zero
2510 // 0x00000055647f45a8: dadd at, zero, zero
2511 //done:
2512 //
2514 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2515 // MIPS64 can use ll/sc for 32-bit atomic memory access
2516 Label done, again, nequal;
2518 bind(again);
2520 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
2521 ll(AT, dest);
2522 bne(AT, c_reg, nequal);
2523 delayed()->nop();
2525 move(AT, x_reg);
2526 sc(AT, dest);
2527 beq(AT, R0, again);
2528 delayed()->nop();
2529 b(done);
2530 delayed()->nop();
2532 // not xchged
2533 bind(nequal);
2534 sync();
2535 move(c_reg, AT);
2536 move(AT, R0);
2538 bind(done);
2539 }
2540 #endif // cmpxchg32
2542 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2543 Label done, again, nequal;
2545 bind(again);
2546 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
2547 #ifdef _LP64
2548 lld(AT, dest);
2549 #else
2550 ll(AT, dest);
2551 #endif
2552 bne(AT, c_reg, nequal);
2553 delayed()->nop();
2555 move(AT, x_reg);
2556 #ifdef _LP64
2557 scd(AT, dest);
2558 #else
2559 sc(AT, dest);
2560 #endif
2561 beq(AT, R0, again);
2562 delayed()->nop();
2563 b(done);
2564 delayed()->nop();
2566 // not xchged
2567 bind(nequal);
2568 sync();
2569 move(c_reg, AT);
2570 move(AT, R0);
2572 bind(done);
2573 }
2575 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2576 Label done, again, nequal;
2578 Register x_reg = x_regLo;
2579 dsll32(x_regHi, x_regHi, 0);
2580 dsll32(x_regLo, x_regLo, 0);
2581 dsrl32(x_regLo, x_regLo, 0);
2582 orr(x_reg, x_regLo, x_regHi);
2584 Register c_reg = c_regLo;
2585 dsll32(c_regHi, c_regHi, 0);
2586 dsll32(c_regLo, c_regLo, 0);
2587 dsrl32(c_regLo, c_regLo, 0);
2588 orr(c_reg, c_regLo, c_regHi);
2590 bind(again);
2592 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
2593 lld(AT, dest);
2594 bne(AT, c_reg, nequal);
2595 delayed()->nop();
2597 //move(AT, x_reg);
2598 dadd(AT, x_reg, R0);
2599 scd(AT, dest);
2600 beq(AT, R0, again);
2601 delayed()->nop();
2602 b(done);
2603 delayed()->nop();
2605 // not xchged
2606 bind(nequal);
2607 sync();
2608 //move(c_reg, AT);
2609 //move(AT, R0);
2610 dadd(c_reg, AT, R0);
2611 dadd(AT, R0, R0);
2612 bind(done);
2613 }
2615 // be sure the three register is different
2616 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2617 assert_different_registers(tmp, fs, ft);
2618 div_s(tmp, fs, ft);
2619 trunc_l_s(tmp, tmp);
2620 cvt_s_l(tmp, tmp);
2621 mul_s(tmp, tmp, ft);
2622 sub_s(fd, fs, tmp);
2623 }
2625 // be sure the three register is different
2626 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2627 assert_different_registers(tmp, fs, ft);
2628 div_d(tmp, fs, ft);
2629 trunc_l_d(tmp, tmp);
2630 cvt_d_l(tmp, tmp);
2631 mul_d(tmp, tmp, ft);
2632 sub_d(fd, fs, tmp);
2633 }
2635 // Fast_Lock and Fast_Unlock used by C2
2637 // Because the transitions from emitted code to the runtime
2638 // monitorenter/exit helper stubs are so slow it's critical that
2639 // we inline both the stack-locking fast-path and the inflated fast path.
2640 //
2641 // See also: cmpFastLock and cmpFastUnlock.
2642 //
2643 // What follows is a specialized inline transliteration of the code
2644 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2645 // another option would be to emit TrySlowEnter and TrySlowExit methods
2646 // at startup-time. These methods would accept arguments as
2647 // (Obj, Self, box, Scratch) and return success-failure
2648 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2649 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2650 // In practice, however, the # of lock sites is bounded and is usually small.
2651 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2652 // if the processor uses simple bimodal branch predictors keyed by EIP
2653 // Since the helper routines would be called from multiple synchronization
2654 // sites.
2655 //
2656 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2657 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2658 // to those specialized methods. That'd give us a mostly platform-independent
2659 // implementation that the JITs could optimize and inline at their pleasure.
2660 // Done correctly, the only time we'd need to cross to native could would be
2661 // to park() or unpark() threads. We'd also need a few more unsafe operators
2662 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2663 // (b) explicit barriers or fence operations.
2664 //
2665 // TODO:
2666 //
2667 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2668 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2669 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2670 // the lock operators would typically be faster than reifying Self.
2671 //
2672 // * Ideally I'd define the primitives as:
2673 // fast_lock (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED.
2674 // fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED
2675 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2676 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2677 // Furthermore the register assignments are overconstrained, possibly resulting in
2678 // sub-optimal code near the synchronization site.
2679 //
2680 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2681 // Alternately, use a better sp-proximity test.
2682 //
2683 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2684 // Either one is sufficient to uniquely identify a thread.
2685 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2686 //
2687 // * Intrinsify notify() and notifyAll() for the common cases where the
2688 // object is locked by the calling thread but the waitlist is empty.
2689 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2690 //
2691 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2692 // But beware of excessive branch density on AMD Opterons.
2693 //
2694 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2695 // or failure of the fast-path. If the fast-path fails then we pass
2696 // control to the slow-path, typically in C. In Fast_Lock and
2697 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2698 // will emit a conditional branch immediately after the node.
2699 // So we have branches to branches and lots of ICC.ZF games.
2700 // Instead, it might be better to have C2 pass a "FailureLabel"
2701 // into Fast_Lock and Fast_Unlock. In the case of success, control
2702 // will drop through the node. ICC.ZF is undefined at exit.
2703 // In the case of failure, the node will branch directly to the
2704 // FailureLabel
2707 // obj: object to lock
2708 // box: on-stack box address (displaced header location) - KILLED
2709 // tmp: tmp -- KILLED
2710 // scr: tmp -- KILLED
2711 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2713 // Ensure the register assignents are disjoint
2714 guarantee (objReg != boxReg, "") ;
2715 guarantee (objReg != tmpReg, "") ;
2716 guarantee (objReg != scrReg, "") ;
2717 guarantee (boxReg != tmpReg, "") ;
2718 guarantee (boxReg != scrReg, "") ;
2721 block_comment("FastLock");
2722 if (PrintBiasedLockingStatistics) {
2723 push(tmpReg);
2724 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2725 pop(tmpReg);
2726 }
2728 if (EmitSync & 1) {
2729 move(AT, 0x0);
2730 return;
2731 } else
2732 if (EmitSync & 2) {
2733 Label DONE_LABEL ;
2734 if (UseBiasedLocking) {
2735 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2736 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2737 }
2739 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2740 ori(tmpReg, tmpReg, 0x1);
2741 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2743 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2744 bne(AT, R0, DONE_LABEL);
2745 delayed()->nop();
2747 // Recursive locking
2748 dsubu(tmpReg, tmpReg, SP);
2749 li(AT, (7 - os::vm_page_size() ));
2750 andr(tmpReg, tmpReg, AT);
2751 sd(tmpReg, Address(boxReg, 0));
2752 bind(DONE_LABEL) ;
2753 } else {
2754 // Possible cases that we'll encounter in fast_lock
2755 // ------------------------------------------------
2756 // * Inflated
2757 // -- unlocked
2758 // -- Locked
2759 // = by self
2760 // = by other
2761 // * biased
2762 // -- by Self
2763 // -- by other
2764 // * neutral
2765 // * stack-locked
2766 // -- by self
2767 // = sp-proximity test hits
2768 // = sp-proximity test generates false-negative
2769 // -- by other
2770 //
2772 Label IsInflated, DONE_LABEL, PopDone ;
2774 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2775 // order to reduce the number of conditional branches in the most common cases.
2776 // Beware -- there's a subtle invariant that fetch of the markword
2777 // at [FETCH], below, will never observe a biased encoding (*101b).
2778 // If this invariant is not held we risk exclusion (safety) failure.
2779 if (UseBiasedLocking && !UseOptoBiasInlining) {
2780 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2781 }
2783 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2784 andi(AT, tmpReg, markOopDesc::monitor_value);
2785 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2786 delayed()->nop();
2788 // Attempt stack-locking ...
2789 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2790 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2791 //if (os::is_MP()) {
2792 // sync();
2793 //}
2795 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2796 //AT == 1: unlocked
2798 if (PrintBiasedLockingStatistics) {
2799 Label L;
2800 beq(AT, R0, L);
2801 delayed()->nop();
2802 push(T0);
2803 push(T1);
2804 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2805 pop(T1);
2806 pop(T0);
2807 bind(L);
2808 }
2809 bne(AT, R0, DONE_LABEL);
2810 delayed()->nop();
2812 // Recursive locking
2813 // The object is stack-locked: markword contains stack pointer to BasicLock.
2814 // Locked by current thread if difference with current SP is less than one page.
2815 dsubu(tmpReg, tmpReg, SP);
2816 li(AT, 7 - os::vm_page_size() );
2817 andr(tmpReg, tmpReg, AT);
2818 sd(tmpReg, Address(boxReg, 0));
2819 if (PrintBiasedLockingStatistics) {
2820 Label L;
2821 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2822 bne(tmpReg, R0, L);
2823 delayed()->nop();
2824 push(T0);
2825 push(T1);
2826 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2827 pop(T1);
2828 pop(T0);
2829 bind(L);
2830 }
2831 sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0
2833 b(DONE_LABEL) ;
2834 delayed()->nop();
2836 bind(IsInflated) ;
2837 // The object's monitor m is unlocked iff m->owner == NULL,
2838 // otherwise m->owner may contain a thread or a stack address.
2840 // TODO: someday avoid the ST-before-CAS penalty by
2841 // relocating (deferring) the following ST.
2842 // We should also think about trying a CAS without having
2843 // fetched _owner. If the CAS is successful we may
2844 // avoid an RTO->RTS upgrade on the $line.
2845 // Without cast to int32_t a movptr will destroy r10 which is typically obj
2846 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
2847 sd(AT, Address(boxReg, 0));
2849 move(boxReg, tmpReg) ;
2850 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2851 // if (m->owner != 0) => AT = 0, goto slow path.
2852 move(AT, R0);
2853 bne(tmpReg, R0, DONE_LABEL);
2854 delayed()->nop();
2856 #ifndef OPT_THREAD
2857 get_thread (TREG) ;
2858 #endif
2859 // It's inflated and appears unlocked
2860 //if (os::is_MP()) {
2861 // sync();
2862 //}
2863 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
2864 // Intentional fall-through into DONE_LABEL ...
2867 // DONE_LABEL is a hot target - we'd really like to place it at the
2868 // start of cache line by padding with NOPs.
2869 // See the AMD and Intel software optimization manuals for the
2870 // most efficient "long" NOP encodings.
2871 // Unfortunately none of our alignment mechanisms suffice.
2872 bind(DONE_LABEL);
2874 // At DONE_LABEL the AT is set as follows ...
2875 // Fast_Unlock uses the same protocol.
2876 // AT == 1 -> Success
2877 // AT == 0 -> Failure - force control through the slow-path
2879 // Avoid branch-to-branch on AMD processors
2880 // This appears to be superstition.
2881 if (EmitSync & 32) nop() ;
2883 }
2884 }
2886 // obj: object to unlock
2887 // box: box address (displaced header location), killed.
2888 // tmp: killed tmp; cannot be obj nor box.
2889 //
2890 // Some commentary on balanced locking:
2891 //
2892 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
2893 // Methods that don't have provably balanced locking are forced to run in the
2894 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
2895 // The interpreter provides two properties:
2896 // I1: At return-time the interpreter automatically and quietly unlocks any
2897 // objects acquired the current activation (frame). Recall that the
2898 // interpreter maintains an on-stack list of locks currently held by
2899 // a frame.
2900 // I2: If a method attempts to unlock an object that is not held by the
2901 // the frame the interpreter throws IMSX.
2902 //
2903 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
2904 // B() doesn't have provably balanced locking so it runs in the interpreter.
2905 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
2906 // is still locked by A().
2907 //
2908 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
2909 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
2910 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
2911 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
2913 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
2915 guarantee (objReg != boxReg, "") ;
2916 guarantee (objReg != tmpReg, "") ;
2917 guarantee (boxReg != tmpReg, "") ;
2919 block_comment("FastUnlock");
2922 if (EmitSync & 4) {
2923 // Disable - inhibit all inlining. Force control through the slow-path
2924 move(AT, 0x0);
2925 return;
2926 } else
2927 if (EmitSync & 8) {
2928 Label DONE_LABEL ;
2929 if (UseBiasedLocking) {
2930 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2931 }
2932 // classic stack-locking code ...
2933 ld(tmpReg, Address(boxReg, 0)) ;
2934 beq(tmpReg, R0, DONE_LABEL) ;
2935 move(AT, 0x1); // delay slot
2937 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
2938 bind(DONE_LABEL);
2939 } else {
2940 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
2942 // Critically, the biased locking test must have precedence over
2943 // and appear before the (box->dhw == 0) recursive stack-lock test.
2944 if (UseBiasedLocking && !UseOptoBiasInlining) {
2945 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2946 }
2948 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
2949 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
2950 delayed()->daddiu(AT, R0, 0x1);
2952 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
2953 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
2954 beq(AT, R0, Stacked) ; // Inflated?
2955 delayed()->nop();
2957 bind(Inflated) ;
2958 // It's inflated.
2959 // Despite our balanced locking property we still check that m->_owner == Self
2960 // as java routines or native JNI code called by this thread might
2961 // have released the lock.
2962 // Refer to the comments in synchronizer.cpp for how we might encode extra
2963 // state in _succ so we can avoid fetching EntryList|cxq.
2964 //
2965 // I'd like to add more cases in fast_lock() and fast_unlock() --
2966 // such as recursive enter and exit -- but we have to be wary of
2967 // I$ bloat, T$ effects and BP$ effects.
2968 //
2969 // If there's no contention try a 1-0 exit. That is, exit without
2970 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
2971 // we detect and recover from the race that the 1-0 exit admits.
2972 //
2973 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
2974 // before it STs null into _owner, releasing the lock. Updates
2975 // to data protected by the critical section must be visible before
2976 // we drop the lock (and thus before any other thread could acquire
2977 // the lock and observe the fields protected by the lock).
2978 #ifndef OPT_THREAD
2979 get_thread (TREG) ;
2980 #endif
2982 // It's inflated
2983 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2984 xorr(boxReg, boxReg, TREG);
2986 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
2987 orr(boxReg, boxReg, AT);
2989 move(AT, R0);
2990 bne(boxReg, R0, DONE_LABEL);
2991 delayed()->nop();
2993 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
2994 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
2995 orr(boxReg, boxReg, AT);
2997 move(AT, R0);
2998 bne(boxReg, R0, DONE_LABEL);
2999 delayed()->nop();
3001 sync();
3002 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3003 move(AT, 0x1);
3004 b(DONE_LABEL);
3005 delayed()->nop();
3007 bind (Stacked);
3008 ld(tmpReg, Address(boxReg, 0)) ;
3009 //if (os::is_MP()) { sync(); }
3010 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3012 if (EmitSync & 65536) {
3013 bind (CheckSucc);
3014 }
3016 bind(DONE_LABEL);
3018 // Avoid branch to branch on AMD processors
3019 if (EmitSync & 32768) { nop() ; }
3020 }
3021 }
3023 void MacroAssembler::align(int modulus) {
3024 while (offset() % modulus != 0) nop();
3025 }
3028 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3029 //Unimplemented();
3030 }
3032 #ifdef _LP64
3033 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3034 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3036 //In MIPS64, F0~23 are all caller-saved registers
3037 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3038 #else
3039 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3040 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3042 Register caller_saved_fpu_registers[] = {};
3043 #endif
3045 // We preserve all caller-saved register
3046 void MacroAssembler::pushad(){
3047 int i;
3049 // Fixed-point registers
3050 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3051 daddi(SP, SP, -1 * len * wordSize);
3052 for (i = 0; i < len; i++)
3053 {
3054 #ifdef _LP64
3055 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3056 #else
3057 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3058 #endif
3059 }
3061 // Floating-point registers
3062 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3063 daddi(SP, SP, -1 * len * wordSize);
3064 for (i = 0; i < len; i++)
3065 {
3066 #ifdef _LP64
3067 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3068 #else
3069 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3070 #endif
3071 }
3072 };
3074 void MacroAssembler::popad(){
3075 int i;
3077 // Floating-point registers
3078 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3079 for (i = 0; i < len; i++)
3080 {
3081 #ifdef _LP64
3082 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3083 #else
3084 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3085 #endif
3086 }
3087 daddi(SP, SP, len * wordSize);
3089 // Fixed-point registers
3090 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3091 for (i = 0; i < len; i++)
3092 {
3093 #ifdef _LP64
3094 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3095 #else
3096 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3097 #endif
3098 }
3099 daddi(SP, SP, len * wordSize);
3100 };
3102 // We preserve all caller-saved register except V0
3103 void MacroAssembler::pushad_except_v0() {
3104 int i;
3106 // Fixed-point registers
3107 int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
3108 daddi(SP, SP, -1 * len * wordSize);
3109 for (i = 0; i < len; i++) {
3110 #ifdef _LP64
3111 sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3112 #else
3113 sw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3114 #endif
3115 }
3117 // Floating-point registers
3118 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3119 daddi(SP, SP, -1 * len * wordSize);
3120 for (i = 0; i < len; i++) {
3121 #ifdef _LP64
3122 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3123 #else
3124 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3125 #endif
3126 }
3127 }
3129 void MacroAssembler::popad_except_v0() {
3130 int i;
3132 // Floating-point registers
3133 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3134 for (i = 0; i < len; i++) {
3135 #ifdef _LP64
3136 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3137 #else
3138 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3139 #endif
3140 }
3141 daddi(SP, SP, len * wordSize);
3143 // Fixed-point registers
3144 len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
3145 for (i = 0; i < len; i++) {
3146 #ifdef _LP64
3147 ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3148 #else
3149 lw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3150 #endif
3151 }
3152 daddi(SP, SP, len * wordSize);
3153 }
3155 void MacroAssembler::push2(Register reg1, Register reg2) {
3156 #ifdef _LP64
3157 daddi(SP, SP, -16);
3158 sd(reg2, SP, 0);
3159 sd(reg1, SP, 8);
3160 #else
3161 addi(SP, SP, -8);
3162 sw(reg2, SP, 0);
3163 sw(reg1, SP, 4);
3164 #endif
3165 }
3167 void MacroAssembler::pop2(Register reg1, Register reg2) {
3168 #ifdef _LP64
3169 ld(reg1, SP, 0);
3170 ld(reg2, SP, 8);
3171 daddi(SP, SP, 16);
3172 #else
3173 lw(reg1, SP, 0);
3174 lw(reg2, SP, 4);
3175 addi(SP, SP, 8);
3176 #endif
3177 }
3179 // for UseCompressedOops Option
3180 void MacroAssembler::load_klass(Register dst, Register src) {
3181 #ifdef _LP64
3182 if(UseCompressedClassPointers){
3183 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3184 decode_klass_not_null(dst);
3185 } else
3186 #endif
3187 ld(dst, src, oopDesc::klass_offset_in_bytes());
3188 }
3190 void MacroAssembler::store_klass(Register dst, Register src) {
3191 #ifdef _LP64
3192 if(UseCompressedClassPointers){
3193 encode_klass_not_null(src);
3194 sw(src, dst, oopDesc::klass_offset_in_bytes());
3195 } else {
3196 #endif
3197 sd(src, dst, oopDesc::klass_offset_in_bytes());
3198 }
3199 }
3201 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3202 load_klass(dst, src);
3203 ld(dst, Address(dst, Klass::prototype_header_offset()));
3204 }
3206 #ifdef _LP64
3207 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3208 if (UseCompressedClassPointers) {
3209 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3210 }
3211 }
3213 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3214 if(UseCompressedOops){
3215 lwu(dst, src);
3216 decode_heap_oop(dst);
3217 } else {
3218 ld(dst, src);
3219 }
3220 }
3222 void MacroAssembler::store_heap_oop(Address dst, Register src){
3223 if(UseCompressedOops){
3224 assert(!dst.uses(src), "not enough registers");
3225 encode_heap_oop(src);
3226 sw(src, dst);
3227 } else {
3228 sd(src, dst);
3229 }
3230 }
3232 void MacroAssembler::store_heap_oop_null(Address dst){
3233 if(UseCompressedOops){
3234 sw(R0, dst);
3235 } else {
3236 sd(R0, dst);
3237 }
3238 }
3240 #ifdef ASSERT
3241 void MacroAssembler::verify_heapbase(const char* msg) {
3242 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3243 assert (Universe::heap() != NULL, "java heap should be initialized");
3244 }
3245 #endif
3248 // Algorithm must match oop.inline.hpp encode_heap_oop.
3249 void MacroAssembler::encode_heap_oop(Register r) {
3250 #ifdef ASSERT
3251 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3252 #endif
3253 verify_oop(r, "broken oop in encode_heap_oop");
3254 if (Universe::narrow_oop_base() == NULL) {
3255 if (Universe::narrow_oop_shift() != 0) {
3256 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3257 shr(r, LogMinObjAlignmentInBytes);
3258 }
3259 return;
3260 }
3262 movz(r, S5_heapbase, r);
3263 dsub(r, r, S5_heapbase);
3264 if (Universe::narrow_oop_shift() != 0) {
3265 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3266 shr(r, LogMinObjAlignmentInBytes);
3267 }
3268 }
3270 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3271 #ifdef ASSERT
3272 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3273 #endif
3274 verify_oop(src, "broken oop in encode_heap_oop");
3275 if (Universe::narrow_oop_base() == NULL) {
3276 if (Universe::narrow_oop_shift() != 0) {
3277 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3278 dsrl(dst, src, LogMinObjAlignmentInBytes);
3279 } else {
3280 if (dst != src) move(dst, src);
3281 }
3282 } else {
3283 if (dst == src) {
3284 movz(dst, S5_heapbase, dst);
3285 dsub(dst, dst, S5_heapbase);
3286 if (Universe::narrow_oop_shift() != 0) {
3287 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3288 shr(dst, LogMinObjAlignmentInBytes);
3289 }
3290 } else {
3291 dsub(dst, src, S5_heapbase);
3292 if (Universe::narrow_oop_shift() != 0) {
3293 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3294 shr(dst, LogMinObjAlignmentInBytes);
3295 }
3296 movz(dst, R0, src);
3297 }
3298 }
3299 }
3301 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3302 assert (UseCompressedOops, "should be compressed");
3303 #ifdef ASSERT
3304 if (CheckCompressedOops) {
3305 Label ok;
3306 bne(r, R0, ok);
3307 delayed()->nop();
3308 stop("null oop passed to encode_heap_oop_not_null");
3309 bind(ok);
3310 }
3311 #endif
3312 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3313 if (Universe::narrow_oop_base() != NULL) {
3314 dsub(r, r, S5_heapbase);
3315 }
3316 if (Universe::narrow_oop_shift() != 0) {
3317 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3318 shr(r, LogMinObjAlignmentInBytes);
3319 }
3321 }
3323 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3324 assert (UseCompressedOops, "should be compressed");
3325 #ifdef ASSERT
3326 if (CheckCompressedOops) {
3327 Label ok;
3328 bne(src, R0, ok);
3329 delayed()->nop();
3330 stop("null oop passed to encode_heap_oop_not_null2");
3331 bind(ok);
3332 }
3333 #endif
3334 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3336 if (Universe::narrow_oop_base() != NULL) {
3337 dsub(dst, src, S5_heapbase);
3338 if (Universe::narrow_oop_shift() != 0) {
3339 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3340 shr(dst, LogMinObjAlignmentInBytes);
3341 }
3342 } else {
3343 if (Universe::narrow_oop_shift() != 0) {
3344 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3345 dsrl(dst, src, LogMinObjAlignmentInBytes);
3346 } else {
3347 if (dst != src) move(dst, src);
3348 }
3349 }
3350 }
3352 void MacroAssembler::decode_heap_oop(Register r) {
3353 #ifdef ASSERT
3354 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3355 #endif
3356 if (Universe::narrow_oop_base() == NULL) {
3357 if (Universe::narrow_oop_shift() != 0) {
3358 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3359 shl(r, LogMinObjAlignmentInBytes);
3360 }
3361 } else {
3362 move(AT, r);
3363 if (Universe::narrow_oop_shift() != 0) {
3364 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3365 shl(r, LogMinObjAlignmentInBytes);
3366 }
3367 dadd(r, r, S5_heapbase);
3368 movz(r, R0, AT);
3369 }
3370 verify_oop(r, "broken oop in decode_heap_oop");
3371 }
3373 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3374 #ifdef ASSERT
3375 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3376 #endif
3377 if (Universe::narrow_oop_base() == NULL) {
3378 if (Universe::narrow_oop_shift() != 0) {
3379 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3380 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3381 dsll(dst, src, LogMinObjAlignmentInBytes);
3382 } else {
3383 if (dst != src) move(dst, src);
3384 }
3385 } else {
3386 if (dst == src) {
3387 move(AT, dst);
3388 if (Universe::narrow_oop_shift() != 0) {
3389 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3390 shl(dst, LogMinObjAlignmentInBytes);
3391 }
3392 dadd(dst, dst, S5_heapbase);
3393 movz(dst, R0, AT);
3394 } else {
3395 if (Universe::narrow_oop_shift() != 0) {
3396 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3397 dsll(dst, src, LogMinObjAlignmentInBytes);
3398 daddu(dst, dst, S5_heapbase);
3399 } else {
3400 daddu(dst, src, S5_heapbase);
3401 }
3402 movz(dst, R0, src);
3403 }
3404 }
3405 verify_oop(dst, "broken oop in decode_heap_oop");
3406 }
3408 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3409 // Note: it will change flags
3410 assert (UseCompressedOops, "should only be used for compressed headers");
3411 assert (Universe::heap() != NULL, "java heap should be initialized");
3412 // Cannot assert, unverified entry point counts instructions (see .ad file)
3413 // vtableStubs also counts instructions in pd_code_size_limit.
3414 // Also do not verify_oop as this is called by verify_oop.
3415 if (Universe::narrow_oop_shift() != 0) {
3416 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3417 shl(r, LogMinObjAlignmentInBytes);
3418 if (Universe::narrow_oop_base() != NULL) {
3419 daddu(r, r, S5_heapbase);
3420 }
3421 } else {
3422 assert (Universe::narrow_oop_base() == NULL, "sanity");
3423 }
3424 }
3426 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3427 assert (UseCompressedOops, "should only be used for compressed headers");
3428 assert (Universe::heap() != NULL, "java heap should be initialized");
3430 // Cannot assert, unverified entry point counts instructions (see .ad file)
3431 // vtableStubs also counts instructions in pd_code_size_limit.
3432 // Also do not verify_oop as this is called by verify_oop.
3433 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3434 if (Universe::narrow_oop_shift() != 0) {
3435 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3436 if (LogMinObjAlignmentInBytes == Address::times_8) {
3437 dsll(dst, src, LogMinObjAlignmentInBytes);
3438 daddu(dst, dst, S5_heapbase);
3439 } else {
3440 dsll(dst, src, LogMinObjAlignmentInBytes);
3441 if (Universe::narrow_oop_base() != NULL) {
3442 daddu(dst, dst, S5_heapbase);
3443 }
3444 }
3445 } else {
3446 assert (Universe::narrow_oop_base() == NULL, "sanity");
3447 if (dst != src) {
3448 move(dst, src);
3449 }
3450 }
3451 }
3453 void MacroAssembler::encode_klass_not_null(Register r) {
3454 if (Universe::narrow_klass_base() != NULL) {
3455 assert(r != AT, "Encoding a klass in AT");
3456 set64(AT, (int64_t)Universe::narrow_klass_base());
3457 dsub(r, r, AT);
3458 }
3459 if (Universe::narrow_klass_shift() != 0) {
3460 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3461 shr(r, LogKlassAlignmentInBytes);
3462 }
3463 }
3465 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3466 if (dst == src) {
3467 encode_klass_not_null(src);
3468 } else {
3469 if (Universe::narrow_klass_base() != NULL) {
3470 set64(dst, (int64_t)Universe::narrow_klass_base());
3471 dsub(dst, src, dst);
3472 if (Universe::narrow_klass_shift() != 0) {
3473 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3474 shr(dst, LogKlassAlignmentInBytes);
3475 }
3476 } else {
3477 if (Universe::narrow_klass_shift() != 0) {
3478 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3479 dsrl(dst, src, LogKlassAlignmentInBytes);
3480 } else {
3481 move(dst, src);
3482 }
3483 }
3484 }
3485 }
3487 // Function instr_size_for_decode_klass_not_null() counts the instructions
3488 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3489 // when (Universe::heap() != NULL). Hence, if the instructions they
3490 // generate change, then this method needs to be updated.
3491 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3492 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3493 if (Universe::narrow_klass_base() != NULL) {
3494 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3495 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3496 } else {
3497 // longest load decode klass function, mov64, leaq
3498 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3499 }
3500 }
3502 void MacroAssembler::decode_klass_not_null(Register r) {
3503 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3504 assert(r != AT, "Decoding a klass in AT");
3505 // Cannot assert, unverified entry point counts instructions (see .ad file)
3506 // vtableStubs also counts instructions in pd_code_size_limit.
3507 // Also do not verify_oop as this is called by verify_oop.
3508 if (Universe::narrow_klass_shift() != 0) {
3509 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3510 shl(r, LogKlassAlignmentInBytes);
3511 }
3512 if (Universe::narrow_klass_base() != NULL) {
3513 set64(AT, (int64_t)Universe::narrow_klass_base());
3514 daddu(r, r, AT);
3515 //Not neccessary for MIPS at all.
3516 //reinit_heapbase();
3517 }
3518 }
3520 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3521 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3523 if (dst == src) {
3524 decode_klass_not_null(dst);
3525 } else {
3526 // Cannot assert, unverified entry point counts instructions (see .ad file)
3527 // vtableStubs also counts instructions in pd_code_size_limit.
3528 // Also do not verify_oop as this is called by verify_oop.
3529 set64(dst, (int64_t)Universe::narrow_klass_base());
3530 if (Universe::narrow_klass_shift() != 0) {
3531 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3532 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3533 dsll(AT, src, Address::times_8);
3534 daddu(dst, dst, AT);
3535 } else {
3536 daddu(dst, src, dst);
3537 }
3538 }
3539 }
3541 void MacroAssembler::incrementl(Register reg, int value) {
3542 if (value == min_jint) {
3543 move(AT, value);
3544 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3545 return;
3546 }
3547 if (value < 0) { decrementl(reg, -value); return; }
3548 if (value == 0) { ; return; }
3550 if(Assembler::is_simm16(value)) {
3551 NOT_LP64(addiu(reg, reg, value));
3552 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3553 } else {
3554 move(AT, value);
3555 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3556 }
3557 }
3559 void MacroAssembler::decrementl(Register reg, int value) {
3560 if (value == min_jint) {
3561 move(AT, value);
3562 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3563 return;
3564 }
3565 if (value < 0) { incrementl(reg, -value); return; }
3566 if (value == 0) { ; return; }
3568 if (Assembler::is_simm16(value)) {
3569 NOT_LP64(addiu(reg, reg, -value));
3570 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3571 } else {
3572 move(AT, value);
3573 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3574 }
3575 }
3577 void MacroAssembler::reinit_heapbase() {
3578 if (UseCompressedOops || UseCompressedClassPointers) {
3579 if (Universe::heap() != NULL) {
3580 if (Universe::narrow_oop_base() == NULL) {
3581 move(S5_heapbase, R0);
3582 } else {
3583 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3584 }
3585 } else {
3586 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3587 ld(S5_heapbase, S5_heapbase, 0);
3588 }
3589 }
3590 }
3591 #endif // _LP64
3593 void MacroAssembler::check_klass_subtype(Register sub_klass,
3594 Register super_klass,
3595 Register temp_reg,
3596 Label& L_success) {
3597 //implement ind gen_subtype_check
3598 Label L_failure;
3599 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3600 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3601 bind(L_failure);
3602 }
3604 SkipIfEqual::SkipIfEqual(
3605 MacroAssembler* masm, const bool* flag_addr, bool value) {
3606 _masm = masm;
3607 _masm->li(AT, (address)flag_addr);
3608 _masm->lb(AT, AT, 0);
3609 _masm->addi(AT, AT, -value);
3610 _masm->beq(AT, R0, _label);
3611 _masm->delayed()->nop();
3612 }
3613 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3614 Register super_klass,
3615 Register temp_reg,
3616 Label* L_success,
3617 Label* L_failure,
3618 Label* L_slow_path,
3619 RegisterOrConstant super_check_offset) {
3620 assert_different_registers(sub_klass, super_klass, temp_reg);
3621 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3622 if (super_check_offset.is_register()) {
3623 assert_different_registers(sub_klass, super_klass,
3624 super_check_offset.as_register());
3625 } else if (must_load_sco) {
3626 assert(temp_reg != noreg, "supply either a temp or a register offset");
3627 }
3629 Label L_fallthrough;
3630 int label_nulls = 0;
3631 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3632 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3633 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3634 assert(label_nulls <= 1, "at most one NULL in the batch");
3636 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3637 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3638 // If the pointers are equal, we are done (e.g., String[] elements).
3639 // This self-check enables sharing of secondary supertype arrays among
3640 // non-primary types such as array-of-interface. Otherwise, each such
3641 // type would need its own customized SSA.
3642 // We move this check to the front of the fast path because many
3643 // type checks are in fact trivially successful in this manner,
3644 // so we get a nicely predicted branch right at the start of the check.
3645 beq(sub_klass, super_klass, *L_success);
3646 delayed()->nop();
3647 // Check the supertype display:
3648 if (must_load_sco) {
3649 // Positive movl does right thing on LP64.
3650 lwu(temp_reg, super_klass, sco_offset);
3651 super_check_offset = RegisterOrConstant(temp_reg);
3652 }
3653 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3654 daddu(AT, sub_klass, AT);
3655 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3657 // This check has worked decisively for primary supers.
3658 // Secondary supers are sought in the super_cache ('super_cache_addr').
3659 // (Secondary supers are interfaces and very deeply nested subtypes.)
3660 // This works in the same check above because of a tricky aliasing
3661 // between the super_cache and the primary super display elements.
3662 // (The 'super_check_addr' can address either, as the case requires.)
3663 // Note that the cache is updated below if it does not help us find
3664 // what we need immediately.
3665 // So if it was a primary super, we can just fail immediately.
3666 // Otherwise, it's the slow path for us (no success at this point).
3668 if (super_check_offset.is_register()) {
3669 beq(super_klass, AT, *L_success);
3670 delayed()->nop();
3671 addi(AT, super_check_offset.as_register(), -sc_offset);
3672 if (L_failure == &L_fallthrough) {
3673 beq(AT, R0, *L_slow_path);
3674 delayed()->nop();
3675 } else {
3676 bne_far(AT, R0, *L_failure);
3677 delayed()->nop();
3678 b(*L_slow_path);
3679 delayed()->nop();
3680 }
3681 } else if (super_check_offset.as_constant() == sc_offset) {
3682 // Need a slow path; fast failure is impossible.
3683 if (L_slow_path == &L_fallthrough) {
3684 beq(super_klass, AT, *L_success);
3685 delayed()->nop();
3686 } else {
3687 bne(super_klass, AT, *L_slow_path);
3688 delayed()->nop();
3689 b(*L_success);
3690 delayed()->nop();
3691 }
3692 } else {
3693 // No slow path; it's a fast decision.
3694 if (L_failure == &L_fallthrough) {
3695 beq(super_klass, AT, *L_success);
3696 delayed()->nop();
3697 } else {
3698 bne_far(super_klass, AT, *L_failure);
3699 delayed()->nop();
3700 b(*L_success);
3701 delayed()->nop();
3702 }
3703 }
3705 bind(L_fallthrough);
3707 }
3710 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3711 Register super_klass,
3712 Register temp_reg,
3713 Register temp2_reg,
3714 Label* L_success,
3715 Label* L_failure,
3716 bool set_cond_codes) {
3717 if (temp2_reg == noreg)
3718 temp2_reg = TSR;
3719 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3720 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3722 Label L_fallthrough;
3723 int label_nulls = 0;
3724 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3725 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3726 assert(label_nulls <= 1, "at most one NULL in the batch");
3728 // a couple of useful fields in sub_klass:
3729 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3730 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3731 Address secondary_supers_addr(sub_klass, ss_offset);
3732 Address super_cache_addr( sub_klass, sc_offset);
3734 // Do a linear scan of the secondary super-klass chain.
3735 // This code is rarely used, so simplicity is a virtue here.
3736 // The repne_scan instruction uses fixed registers, which we must spill.
3737 // Don't worry too much about pre-existing connections with the input regs.
3739 #ifndef PRODUCT
3740 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3741 ExternalAddress pst_counter_addr((address) pst_counter);
3742 NOT_LP64( incrementl(pst_counter_addr) );
3743 #endif //PRODUCT
3745 // We will consult the secondary-super array.
3746 ld(temp_reg, secondary_supers_addr);
3747 // Load the array length. (Positive movl does right thing on LP64.)
3748 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3749 // Skip to start of data.
3750 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3752 // OpenJDK8 never compresses klass pointers in secondary-super array.
3753 Label Loop, subtype;
3754 bind(Loop);
3755 beq(temp2_reg, R0, *L_failure);
3756 delayed()->nop();
3757 ld(AT, temp_reg, 0);
3758 beq(AT, super_klass, subtype);
3759 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3760 b(Loop);
3761 delayed()->daddi(temp2_reg, temp2_reg, -1);
3763 bind(subtype);
3764 sd(super_klass, super_cache_addr);
3765 if (L_success != &L_fallthrough) {
3766 b(*L_success);
3767 delayed()->nop();
3768 }
3770 // Success. Cache the super we found and proceed in triumph.
3771 #undef IS_A_TEMP
3773 bind(L_fallthrough);
3774 }
3776 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3777 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3778 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3779 verify_oop(oop_result, "broken oop in call_VM_base");
3780 }
3782 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3783 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3784 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3785 }
3787 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3788 int extra_slot_offset) {
3789 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3790 int stackElementSize = Interpreter::stackElementSize;
3791 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3792 #ifdef ASSERT
3793 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3794 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3795 #endif
3796 Register scale_reg = NOREG;
3797 Address::ScaleFactor scale_factor = Address::no_scale;
3798 if (arg_slot.is_constant()) {
3799 offset += arg_slot.as_constant() * stackElementSize;
3800 } else {
3801 scale_reg = arg_slot.as_register();
3802 scale_factor = Address::times_8;
3803 }
3804 // We don't push RA on stack in prepare_invoke.
3805 // offset += wordSize; // return PC is on stack
3806 if(scale_reg==NOREG) return Address(SP, offset);
3807 else {
3808 dsll(scale_reg, scale_reg, scale_factor);
3809 daddu(scale_reg, SP, scale_reg);
3810 return Address(scale_reg, offset);
3811 }
3812 }
3814 SkipIfEqual::~SkipIfEqual() {
3815 _masm->bind(_label);
3816 }
3818 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3819 switch (size_in_bytes) {
3820 #ifndef _LP64
3821 case 8:
3822 assert(dst2 != noreg, "second dest register required");
3823 lw(dst, src);
3824 lw(dst2, src.plus_disp(BytesPerInt));
3825 break;
3826 #else
3827 case 8: ld(dst, src); break;
3828 #endif
3829 case 4: lw(dst, src); break;
3830 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3831 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3832 default: ShouldNotReachHere();
3833 }
3834 }
3836 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
3837 switch (size_in_bytes) {
3838 #ifndef _LP64
3839 case 8:
3840 assert(src2 != noreg, "second source register required");
3841 sw(src, dst);
3842 sw(src2, dst.plus_disp(BytesPerInt));
3843 break;
3844 #else
3845 case 8: sd(src, dst); break;
3846 #endif
3847 case 4: sw(src, dst); break;
3848 case 2: sh(src, dst); break;
3849 case 1: sb(src, dst); break;
3850 default: ShouldNotReachHere();
3851 }
3852 }
3854 // Look up the method for a megamorphic invokeinterface call.
3855 // The target method is determined by <intf_klass, itable_index>.
3856 // The receiver klass is in recv_klass.
3857 // On success, the result will be in method_result, and execution falls through.
3858 // On failure, execution transfers to the given label.
3859 void MacroAssembler::lookup_interface_method(Register recv_klass,
3860 Register intf_klass,
3861 RegisterOrConstant itable_index,
3862 Register method_result,
3863 Register scan_temp,
3864 Label& L_no_such_interface,
3865 bool return_method) {
3866 assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
3867 assert_different_registers(method_result, intf_klass, scan_temp, AT);
3868 assert(recv_klass != method_result || !return_method,
3869 "recv_klass can be destroyed when method isn't needed");
3871 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3872 "caller must use same register for non-constant itable index as for method");
3874 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3875 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
3876 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3877 int scan_step = itableOffsetEntry::size() * wordSize;
3878 int vte_size = vtableEntry::size() * wordSize;
3879 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3880 assert(vte_size == wordSize, "else adjust times_vte_scale");
3882 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
3884 // %%% Could store the aligned, prescaled offset in the klassoop.
3885 dsll(scan_temp, scan_temp, times_vte_scale);
3886 daddu(scan_temp, recv_klass, scan_temp);
3887 daddiu(scan_temp, scan_temp, vtable_base);
3888 if (HeapWordsPerLong > 1) {
3889 // Round up to align_object_offset boundary
3890 // see code for InstanceKlass::start_of_itable!
3891 round_to(scan_temp, BytesPerLong);
3892 }
3894 if (return_method) {
3895 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
3896 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
3897 if (itable_index.is_constant()) {
3898 set64(AT, (int)itable_index.is_constant());
3899 dsll(AT, AT, (int)Address::times_ptr);
3900 } else {
3901 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
3902 }
3903 daddu(AT, AT, recv_klass);
3904 daddiu(recv_klass, AT, itentry_off);
3905 }
3907 Label search, found_method;
3909 for (int peel = 1; peel >= 0; peel--) {
3910 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
3912 if (peel) {
3913 beq(intf_klass, method_result, found_method);
3914 delayed()->nop();
3915 } else {
3916 bne(intf_klass, method_result, search);
3917 delayed()->nop();
3918 // (invert the test to fall through to found_method...)
3919 }
3921 if (!peel) break;
3923 bind(search);
3925 // Check that the previous entry is non-null. A null entry means that
3926 // the receiver class doesn't implement the interface, and wasn't the
3927 // same as when the caller was compiled.
3928 beq(method_result, R0, L_no_such_interface);
3929 delayed()->nop();
3930 daddiu(scan_temp, scan_temp, scan_step);
3931 }
3933 bind(found_method);
3935 if (return_method) {
3936 // Got a hit.
3937 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
3938 if (UseLEXT1) {
3939 gsldx(method_result, recv_klass, scan_temp, 0);
3940 } else {
3941 daddu(AT, recv_klass, scan_temp);
3942 ld(method_result, AT, 0);
3943 }
3944 }
3945 }
3947 // virtual method calling
3948 void MacroAssembler::lookup_virtual_method(Register recv_klass,
3949 RegisterOrConstant vtable_index,
3950 Register method_result) {
3951 Register tmp = GP;
3952 push(tmp);
3954 if (vtable_index.is_constant()) {
3955 assert_different_registers(recv_klass, method_result, tmp);
3956 } else {
3957 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
3958 }
3959 const int base = InstanceKlass::vtable_start_offset() * wordSize;
3960 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
3961 if (vtable_index.is_constant()) {
3962 set64(AT, vtable_index.as_constant());
3963 dsll(AT, AT, (int)Address::times_ptr);
3964 } else {
3965 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
3966 }
3967 set64(tmp, base + vtableEntry::method_offset_in_bytes());
3968 daddu(tmp, tmp, AT);
3969 daddu(tmp, tmp, recv_klass);
3970 ld(method_result, tmp, 0);
3972 pop(tmp);
3973 }
3975 void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
3976 switch (type) {
3977 case T_LONG:
3978 st_ptr(src_reg, tmp_reg, disp);
3979 break;
3980 case T_ARRAY:
3981 case T_OBJECT:
3982 if (UseCompressedOops && !wide) {
3983 sw(src_reg, tmp_reg, disp);
3984 } else {
3985 st_ptr(src_reg, tmp_reg, disp);
3986 }
3987 break;
3988 case T_ADDRESS:
3989 st_ptr(src_reg, tmp_reg, disp);
3990 break;
3991 case T_INT:
3992 sw(src_reg, tmp_reg, disp);
3993 break;
3994 case T_CHAR:
3995 case T_SHORT:
3996 sh(src_reg, tmp_reg, disp);
3997 break;
3998 case T_BYTE:
3999 case T_BOOLEAN:
4000 sb(src_reg, tmp_reg, disp);
4001 break;
4002 default:
4003 ShouldNotReachHere();
4004 }
4005 }
4007 void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
4008 Register tmp_reg = T9;
4009 Register index_reg = addr.index();
4010 if (index_reg == NOREG) {
4011 tmp_reg = NOREG;
4012 }
4014 int scale = addr.scale();
4015 if (tmp_reg != NOREG && scale >= 0) {
4016 dsll(tmp_reg, index_reg, scale);
4017 }
4019 int disp = addr.disp();
4020 bool disp_is_simm16 = true;
4021 if (!Assembler::is_simm16(disp)) {
4022 disp_is_simm16 = false;
4023 }
4025 Register base_reg = addr.base();
4026 if (tmp_reg != NOREG) {
4027 assert_different_registers(tmp_reg, base_reg, index_reg);
4028 }
4030 if (tmp_reg != NOREG) {
4031 daddu(tmp_reg, base_reg, tmp_reg);
4032 if (!disp_is_simm16) {
4033 move(tmp_reg, disp);
4034 daddu(tmp_reg, base_reg, tmp_reg);
4035 }
4036 store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4037 } else {
4038 if (!disp_is_simm16) {
4039 tmp_reg = T9;
4040 assert_different_registers(tmp_reg, base_reg);
4041 move(tmp_reg, disp);
4042 daddu(tmp_reg, base_reg, tmp_reg);
4043 }
4044 store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4045 }
4046 }
4048 void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
4049 switch (type) {
4050 case T_DOUBLE:
4051 sdc1(src_reg, tmp_reg, disp);
4052 break;
4053 case T_FLOAT:
4054 swc1(src_reg, tmp_reg, disp);
4055 break;
4056 default:
4057 ShouldNotReachHere();
4058 }
4059 }
4061 void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
4062 Register tmp_reg = T9;
4063 Register index_reg = addr.index();
4064 if (index_reg == NOREG) {
4065 tmp_reg = NOREG;
4066 }
4068 int scale = addr.scale();
4069 if (tmp_reg != NOREG && scale >= 0) {
4070 dsll(tmp_reg, index_reg, scale);
4071 }
4073 int disp = addr.disp();
4074 bool disp_is_simm16 = true;
4075 if (!Assembler::is_simm16(disp)) {
4076 disp_is_simm16 = false;
4077 }
4079 Register base_reg = addr.base();
4080 if (tmp_reg != NOREG) {
4081 assert_different_registers(tmp_reg, base_reg, index_reg);
4082 }
4084 if (tmp_reg != NOREG) {
4085 daddu(tmp_reg, base_reg, tmp_reg);
4086 if (!disp_is_simm16) {
4087 move(tmp_reg, disp);
4088 daddu(tmp_reg, base_reg, tmp_reg);
4089 }
4090 store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
4091 } else {
4092 if (!disp_is_simm16) {
4093 tmp_reg = T9;
4094 assert_different_registers(tmp_reg, base_reg);
4095 move(tmp_reg, disp);
4096 daddu(tmp_reg, base_reg, tmp_reg);
4097 }
4098 store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
4099 }
4100 }
4102 void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
4103 switch (type) {
4104 case T_LONG:
4105 ld_ptr(dst_reg, tmp_reg, disp);
4106 break;
4107 case T_ARRAY:
4108 case T_OBJECT:
4109 if (UseCompressedOops && !wide) {
4110 lwu(dst_reg, tmp_reg, disp);
4111 } else {
4112 ld_ptr(dst_reg, tmp_reg, disp);
4113 }
4114 break;
4115 case T_ADDRESS:
4116 if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
4117 lwu(dst_reg, tmp_reg, disp);
4118 } else {
4119 ld_ptr(dst_reg, tmp_reg, disp);
4120 }
4121 break;
4122 case T_INT:
4123 lw(dst_reg, tmp_reg, disp);
4124 break;
4125 case T_CHAR:
4126 lhu(dst_reg, tmp_reg, disp);
4127 break;
4128 case T_SHORT:
4129 lh(dst_reg, tmp_reg, disp);
4130 break;
4131 case T_BYTE:
4132 case T_BOOLEAN:
4133 lb(dst_reg, tmp_reg, disp);
4134 break;
4135 default:
4136 ShouldNotReachHere();
4137 }
4138 }
4140 int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
4141 int code_offset = 0;
4142 Register tmp_reg = T9;
4143 Register index_reg = addr.index();
4144 if (index_reg == NOREG) {
4145 tmp_reg = NOREG;
4146 }
4148 int scale = addr.scale();
4149 if (tmp_reg != NOREG && scale >= 0) {
4150 dsll(tmp_reg, index_reg, scale);
4151 }
4153 int disp = addr.disp();
4154 bool disp_is_simm16 = true;
4155 if (!Assembler::is_simm16(disp)) {
4156 disp_is_simm16 = false;
4157 }
4159 Register base_reg = addr.base();
4160 if (tmp_reg != NOREG) {
4161 assert_different_registers(tmp_reg, base_reg, index_reg);
4162 }
4164 if (tmp_reg != NOREG) {
4165 daddu(tmp_reg, base_reg, tmp_reg);
4166 if (!disp_is_simm16) {
4167 move(tmp_reg, disp);
4168 daddu(tmp_reg, base_reg, tmp_reg);
4169 }
4170 code_offset = offset();
4171 load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4172 } else {
4173 if (!disp_is_simm16) {
4174 tmp_reg = T9;
4175 assert_different_registers(tmp_reg, base_reg);
4176 move(tmp_reg, disp);
4177 daddu(tmp_reg, base_reg, tmp_reg);
4178 }
4179 code_offset = offset();
4180 load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4181 }
4183 return code_offset;
4184 }
4186 void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
4187 switch (type) {
4188 case T_DOUBLE:
4189 ldc1(dst_reg, tmp_reg, disp);
4190 break;
4191 case T_FLOAT:
4192 lwc1(dst_reg, tmp_reg, disp);
4193 break;
4194 default:
4195 ShouldNotReachHere();
4196 }
4197 }
4199 int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
4200 int code_offset = 0;
4201 Register tmp_reg = T9;
4202 Register index_reg = addr.index();
4203 if (index_reg == NOREG) {
4204 tmp_reg = NOREG;
4205 }
4207 int scale = addr.scale();
4208 if (tmp_reg != NOREG && scale >= 0) {
4209 dsll(tmp_reg, index_reg, scale);
4210 }
4212 int disp = addr.disp();
4213 bool disp_is_simm16 = true;
4214 if (!Assembler::is_simm16(disp)) {
4215 disp_is_simm16 = false;
4216 }
4218 Register base_reg = addr.base();
4219 if (tmp_reg != NOREG) {
4220 assert_different_registers(tmp_reg, base_reg, index_reg);
4221 }
4223 if (tmp_reg != NOREG) {
4224 daddu(tmp_reg, base_reg, tmp_reg);
4225 if (!disp_is_simm16) {
4226 move(tmp_reg, disp);
4227 daddu(tmp_reg, base_reg, tmp_reg);
4228 }
4229 code_offset = offset();
4230 load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
4231 } else {
4232 if (!disp_is_simm16) {
4233 tmp_reg = T9;
4234 assert_different_registers(tmp_reg, base_reg);
4235 move(tmp_reg, disp);
4236 daddu(tmp_reg, base_reg, tmp_reg);
4237 }
4238 code_offset = offset();
4239 load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
4240 }
4242 return code_offset;
4243 }
4245 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
4246 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
4247 STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
4248 // The inverted mask is sign-extended
4249 move(AT, inverted_jweak_mask);
4250 andr(possibly_jweak, AT, possibly_jweak);
4251 }
4253 void MacroAssembler::resolve_jobject(Register value,
4254 Register thread,
4255 Register tmp) {
4256 assert_different_registers(value, thread, tmp);
4257 Label done, not_weak;
4258 beq(value, R0, done); // Use NULL as-is.
4259 delayed()->nop();
4260 move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
4261 andr(AT, value, AT);
4262 beq(AT, R0, not_weak);
4263 delayed()->nop();
4264 // Resolve jweak.
4265 ld(value, value, -JNIHandles::weak_tag_value);
4266 verify_oop(value);
4267 #if INCLUDE_ALL_GCS
4268 if (UseG1GC) {
4269 g1_write_barrier_pre(noreg /* obj */,
4270 value /* pre_val */,
4271 thread /* thread */,
4272 tmp /* tmp */,
4273 true /* tosca_live */,
4274 true /* expand_call */);
4275 }
4276 #endif // INCLUDE_ALL_GCS
4277 b(done);
4278 delayed()->nop();
4279 bind(not_weak);
4280 // Resolve (untagged) jobject.
4281 ld(value, value, 0);
4282 verify_oop(value);
4283 bind(done);
4284 }