Sat, 07 Nov 2020 10:30:02 +0800
Added tag mips-jdk8u275-b01 for changeset d3b4d62f391f
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, 2020, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 #define A0 RA0
51 #define A1 RA1
52 #define A2 RA2
53 #define A3 RA3
54 #define A4 RA4
55 #define A5 RA5
56 #define A6 RA6
57 #define A7 RA7
58 #define T0 RT0
59 #define T1 RT1
60 #define T2 RT2
61 #define T3 RT3
62 #define T8 RT8
63 #define T9 RT9
65 // Implementation of MacroAssembler
67 intptr_t MacroAssembler::i[32] = {0};
68 float MacroAssembler::f[32] = {0.0};
70 void MacroAssembler::print(outputStream *s) {
71 unsigned int k;
72 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
73 s->print_cr("i%d = 0x%.16lx", k, i[k]);
74 }
75 s->cr();
77 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
78 s->print_cr("f%d = %f", k, f[k]);
79 }
80 s->cr();
81 }
83 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
84 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
86 void MacroAssembler::save_registers(MacroAssembler *masm) {
87 #define __ masm->
88 for(int k=0; k<32; k++) {
89 __ sw (as_Register(k), A0, i_offset(k));
90 }
92 for(int k=0; k<32; k++) {
93 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
94 }
95 #undef __
96 }
98 void MacroAssembler::restore_registers(MacroAssembler *masm) {
99 #define __ masm->
100 for(int k=0; k<32; k++) {
101 __ lw (as_Register(k), A0, i_offset(k));
102 }
104 for(int k=0; k<32; k++) {
105 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
106 }
107 #undef __
108 }
111 void MacroAssembler::pd_patch_instruction(address branch, address target) {
112 jint& stub_inst = *(jint*) branch;
113 jint *pc = (jint *)branch;
115 if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
116 //b_far:
117 // move(AT, RA); // dadd
118 // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
119 // nop();
120 // lui(T9, 0); // to be patched
121 // ori(T9, 0);
122 // daddu(T9, T9, RA);
123 // move(RA, AT);
124 // jr(T9);
126 assert(opcode(pc[3]) == lui_op
127 && opcode(pc[4]) == ori_op
128 && special(pc[5]) == daddu_op, "Not a branch label patch");
129 if(!(opcode(pc[3]) == lui_op
130 && opcode(pc[4]) == ori_op
131 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
133 int offset = target - branch;
134 if (!is_simm16(offset)) {
135 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
136 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
137 } else {
138 // revert to "beq + nop"
139 CodeBuffer cb(branch, 4 * 10);
140 MacroAssembler masm(&cb);
141 #define __ masm.
142 __ b(target);
143 __ delayed()->nop();
144 __ nop();
145 __ nop();
146 __ nop();
147 __ nop();
148 __ nop();
149 __ nop();
150 }
151 return;
152 } else if (special(pc[4]) == jr_op
153 && opcode(pc[4]) == special_op
154 && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
155 //jmp_far:
156 // patchable_set48(T9, target);
157 // jr(T9);
158 // nop();
160 CodeBuffer cb(branch, 4 * 4);
161 MacroAssembler masm(&cb);
162 masm.patchable_set48(T9, (long)(target));
163 return;
164 }
166 #ifndef PRODUCT
167 if (!is_simm16((target - branch - 4) >> 2)) {
168 tty->print_cr("Illegal patching: branch = 0x%lx, target = 0x%lx", branch, target);
169 tty->print_cr("======= Start decoding at branch = 0x%lx =======", branch);
170 Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
171 tty->print_cr("======= End of decoding =======");
172 }
173 #endif
175 stub_inst = patched_branch(target - branch, stub_inst, 0);
176 }
178 static inline address first_cache_address() {
179 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
180 }
182 static inline address last_cache_address() {
183 return CodeCache::high_bound() - Assembler::InstructionSize;
184 }
186 int MacroAssembler::call_size(address target, bool far, bool patchable) {
187 if (patchable) return 6 << Assembler::LogInstructionSize;
188 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
189 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
190 }
192 // Can we reach target using jal/j from anywhere
193 // in the code cache (because code can be relocated)?
194 bool MacroAssembler::reachable_from_cache(address target) {
195 address cl = first_cache_address();
196 address ch = last_cache_address();
198 return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
199 }
201 void MacroAssembler::general_jump(address target) {
202 if (reachable_from_cache(target)) {
203 j(target);
204 delayed()->nop();
205 } else {
206 set64(T9, (long)target);
207 jr(T9);
208 delayed()->nop();
209 }
210 }
212 int MacroAssembler::insts_for_general_jump(address target) {
213 if (reachable_from_cache(target)) {
214 //j(target);
215 //nop();
216 return 2;
217 } else {
218 //set64(T9, (long)target);
219 //jr(T9);
220 //nop();
221 return insts_for_set64((jlong)target) + 2;
222 }
223 }
225 void MacroAssembler::patchable_jump(address target) {
226 if (reachable_from_cache(target)) {
227 nop();
228 nop();
229 nop();
230 nop();
231 j(target);
232 delayed()->nop();
233 } else {
234 patchable_set48(T9, (long)target);
235 jr(T9);
236 delayed()->nop();
237 }
238 }
240 int MacroAssembler::insts_for_patchable_jump(address target) {
241 return 6;
242 }
244 void MacroAssembler::general_call(address target) {
245 if (reachable_from_cache(target)) {
246 jal(target);
247 delayed()->nop();
248 } else {
249 set64(T9, (long)target);
250 jalr(T9);
251 delayed()->nop();
252 }
253 }
255 int MacroAssembler::insts_for_general_call(address target) {
256 if (reachable_from_cache(target)) {
257 //jal(target);
258 //nop();
259 return 2;
260 } else {
261 //set64(T9, (long)target);
262 //jalr(T9);
263 //nop();
264 return insts_for_set64((jlong)target) + 2;
265 }
266 }
268 void MacroAssembler::patchable_call(address target) {
269 if (reachable_from_cache(target)) {
270 nop();
271 nop();
272 nop();
273 nop();
274 jal(target);
275 delayed()->nop();
276 } else {
277 patchable_set48(T9, (long)target);
278 jalr(T9);
279 delayed()->nop();
280 }
281 }
283 int MacroAssembler::insts_for_patchable_call(address target) {
284 return 6;
285 }
287 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
288 u_char * cur_pc = pc();
290 // Near/Far jump
291 if(is_simm16((entry - pc() - 4) / 4)) {
292 Assembler::beq(rs, rt, offset(entry));
293 } else {
294 Label not_jump;
295 bne(rs, rt, not_jump);
296 delayed()->nop();
298 b_far(entry);
299 delayed()->nop();
301 bind(not_jump);
302 has_delay_slot();
303 }
304 }
306 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
307 if (L.is_bound()) {
308 beq_far(rs, rt, target(L));
309 } else {
310 u_char * cur_pc = pc();
311 Label not_jump;
312 bne(rs, rt, not_jump);
313 delayed()->nop();
315 b_far(L);
316 delayed()->nop();
318 bind(not_jump);
319 has_delay_slot();
320 }
321 }
323 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
324 u_char * cur_pc = pc();
326 //Near/Far jump
327 if(is_simm16((entry - pc() - 4) / 4)) {
328 Assembler::bne(rs, rt, offset(entry));
329 } else {
330 Label not_jump;
331 beq(rs, rt, not_jump);
332 delayed()->nop();
334 b_far(entry);
335 delayed()->nop();
337 bind(not_jump);
338 has_delay_slot();
339 }
340 }
342 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
343 if (L.is_bound()) {
344 bne_far(rs, rt, target(L));
345 } else {
346 u_char * cur_pc = pc();
347 Label not_jump;
348 beq(rs, rt, not_jump);
349 delayed()->nop();
351 b_far(L);
352 delayed()->nop();
354 bind(not_jump);
355 has_delay_slot();
356 }
357 }
359 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
360 Label not_taken;
362 bne(rs, rt, not_taken);
363 delayed()->nop();
365 jmp_far(L);
367 bind(not_taken);
368 }
370 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
371 Label not_taken;
373 beq(rs, rt, not_taken);
374 delayed()->nop();
376 jmp_far(L);
378 bind(not_taken);
379 }
381 void MacroAssembler::bc1t_long(Label& L) {
382 Label not_taken;
384 bc1f(not_taken);
385 delayed()->nop();
387 jmp_far(L);
389 bind(not_taken);
390 }
392 void MacroAssembler::bc1f_long(Label& L) {
393 Label not_taken;
395 bc1t(not_taken);
396 delayed()->nop();
398 jmp_far(L);
400 bind(not_taken);
401 }
403 void MacroAssembler::b_far(Label& L) {
404 if (L.is_bound()) {
405 b_far(target(L));
406 } else {
407 volatile address dest = target(L);
408 //
409 // MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
410 // 0x00000055651ed514: dadd at, ra, zero
411 // 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
412 //
413 // 0x00000055651ed51c: sll zero, zero, 0
414 // 0x00000055651ed520: lui t9, 0x0
415 // 0x00000055651ed524: ori t9, t9, 0x21b8
416 // 0x00000055651ed528: daddu t9, t9, ra
417 // 0x00000055651ed52c: dadd ra, at, zero
418 // 0x00000055651ed530: jr t9
419 // 0x00000055651ed534: sll zero, zero, 0
420 //
421 move(AT, RA);
422 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
423 nop();
424 lui(T9, 0); // to be patched
425 ori(T9, T9, 0);
426 daddu(T9, T9, RA);
427 move(RA, AT);
428 jr(T9);
429 }
430 }
432 void MacroAssembler::b_far(address entry) {
433 u_char * cur_pc = pc();
435 // Near/Far jump
436 if(is_simm16((entry - pc() - 4) / 4)) {
437 b(offset(entry));
438 } else {
439 // address must be bounded
440 move(AT, RA);
441 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
442 nop();
443 li32(T9, entry - pc());
444 daddu(T9, T9, RA);
445 move(RA, AT);
446 jr(T9);
447 }
448 }
450 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
451 addu_long(AT, base, offset);
452 ld_ptr(rt, 0, AT);
453 }
455 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
456 addu_long(AT, base, offset);
457 st_ptr(rt, 0, AT);
458 }
460 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
461 addu_long(AT, base, offset);
462 ld_long(rt, 0, AT);
463 }
465 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
466 addu_long(AT, base, offset);
467 st_long(rt, 0, AT);
468 }
470 Address MacroAssembler::as_Address(AddressLiteral adr) {
471 return Address(adr.target(), adr.rspec());
472 }
474 Address MacroAssembler::as_Address(ArrayAddress adr) {
475 return Address::make_array(adr);
476 }
478 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
479 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
480 Label again;
482 li(tmp_reg1, counter_addr);
483 bind(again);
484 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
485 ll(tmp_reg2, tmp_reg1, 0);
486 addi(tmp_reg2, tmp_reg2, inc);
487 sc(tmp_reg2, tmp_reg1, 0);
488 beq(tmp_reg2, R0, again);
489 delayed()->nop();
490 }
492 int MacroAssembler::biased_locking_enter(Register lock_reg,
493 Register obj_reg,
494 Register swap_reg,
495 Register tmp_reg,
496 bool swap_reg_contains_mark,
497 Label& done,
498 Label* slow_case,
499 BiasedLockingCounters* counters) {
500 assert(UseBiasedLocking, "why call this otherwise?");
501 bool need_tmp_reg = false;
502 if (tmp_reg == noreg) {
503 need_tmp_reg = true;
504 tmp_reg = T9;
505 }
506 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
507 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
508 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
509 Address saved_mark_addr(lock_reg, 0);
511 // Biased locking
512 // See whether the lock is currently biased toward our thread and
513 // whether the epoch is still valid
514 // Note that the runtime guarantees sufficient alignment of JavaThread
515 // pointers to allow age to be placed into low bits
516 // First check to see whether biasing is even enabled for this object
517 Label cas_label;
518 int null_check_offset = -1;
519 if (!swap_reg_contains_mark) {
520 null_check_offset = offset();
521 ld_ptr(swap_reg, mark_addr);
522 }
524 if (need_tmp_reg) {
525 push(tmp_reg);
526 }
527 move(tmp_reg, swap_reg);
528 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
529 #ifdef _LP64
530 daddi(AT, R0, markOopDesc::biased_lock_pattern);
531 dsub(AT, AT, tmp_reg);
532 #else
533 addi(AT, R0, markOopDesc::biased_lock_pattern);
534 sub(AT, AT, tmp_reg);
535 #endif
536 if (need_tmp_reg) {
537 pop(tmp_reg);
538 }
540 bne(AT, R0, cas_label);
541 delayed()->nop();
544 // The bias pattern is present in the object's header. Need to check
545 // whether the bias owner and the epoch are both still current.
546 // Note that because there is no current thread register on MIPS we
547 // need to store off the mark word we read out of the object to
548 // avoid reloading it and needing to recheck invariants below. This
549 // store is unfortunate but it makes the overall code shorter and
550 // simpler.
551 st_ptr(swap_reg, saved_mark_addr);
552 if (need_tmp_reg) {
553 push(tmp_reg);
554 }
555 if (swap_reg_contains_mark) {
556 null_check_offset = offset();
557 }
558 load_prototype_header(tmp_reg, obj_reg);
559 xorr(tmp_reg, tmp_reg, swap_reg);
560 get_thread(swap_reg);
561 xorr(swap_reg, swap_reg, tmp_reg);
563 move(AT, ~((int) markOopDesc::age_mask_in_place));
564 andr(swap_reg, swap_reg, AT);
566 if (PrintBiasedLockingStatistics) {
567 Label L;
568 bne(swap_reg, R0, L);
569 delayed()->nop();
570 push(tmp_reg);
571 push(A0);
572 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
573 pop(A0);
574 pop(tmp_reg);
575 bind(L);
576 }
577 if (need_tmp_reg) {
578 pop(tmp_reg);
579 }
580 beq(swap_reg, R0, done);
581 delayed()->nop();
582 Label try_revoke_bias;
583 Label try_rebias;
585 // At this point we know that the header has the bias pattern and
586 // that we are not the bias owner in the current epoch. We need to
587 // figure out more details about the state of the header in order to
588 // know what operations can be legally performed on the object's
589 // header.
591 // If the low three bits in the xor result aren't clear, that means
592 // the prototype header is no longer biased and we have to revoke
593 // the bias on this object.
595 move(AT, markOopDesc::biased_lock_mask_in_place);
596 andr(AT, swap_reg, AT);
597 bne(AT, R0, try_revoke_bias);
598 delayed()->nop();
599 // Biasing is still enabled for this data type. See whether the
600 // epoch of the current bias is still valid, meaning that the epoch
601 // bits of the mark word are equal to the epoch bits of the
602 // prototype header. (Note that the prototype header's epoch bits
603 // only change at a safepoint.) If not, attempt to rebias the object
604 // toward the current thread. Note that we must be absolutely sure
605 // that the current epoch is invalid in order to do this because
606 // otherwise the manipulations it performs on the mark word are
607 // illegal.
609 move(AT, markOopDesc::epoch_mask_in_place);
610 andr(AT,swap_reg, AT);
611 bne(AT, R0, try_rebias);
612 delayed()->nop();
613 // The epoch of the current bias is still valid but we know nothing
614 // about the owner; it might be set or it might be clear. Try to
615 // acquire the bias of the object using an atomic operation. If this
616 // fails we will go in to the runtime to revoke the object's bias.
617 // Note that we first construct the presumed unbiased header so we
618 // don't accidentally blow away another thread's valid bias.
620 ld_ptr(swap_reg, saved_mark_addr);
622 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
623 andr(swap_reg, swap_reg, AT);
625 if (need_tmp_reg) {
626 push(tmp_reg);
627 }
628 get_thread(tmp_reg);
629 orr(tmp_reg, tmp_reg, swap_reg);
630 //if (os::is_MP()) {
631 // sync();
632 //}
633 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
634 if (need_tmp_reg) {
635 pop(tmp_reg);
636 }
637 // If the biasing toward our thread failed, this means that
638 // another thread succeeded in biasing it toward itself and we
639 // need to revoke that bias. The revocation will occur in the
640 // interpreter runtime in the slow case.
641 if (PrintBiasedLockingStatistics) {
642 Label L;
643 bne(AT, R0, L);
644 delayed()->nop();
645 push(tmp_reg);
646 push(A0);
647 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
648 pop(A0);
649 pop(tmp_reg);
650 bind(L);
651 }
652 if (slow_case != NULL) {
653 beq_far(AT, R0, *slow_case);
654 delayed()->nop();
655 }
656 b(done);
657 delayed()->nop();
659 bind(try_rebias);
660 // At this point we know the epoch has expired, meaning that the
661 // current "bias owner", if any, is actually invalid. Under these
662 // circumstances _only_, we are allowed to use the current header's
663 // value as the comparison value when doing the cas to acquire the
664 // bias in the current epoch. In other words, we allow transfer of
665 // the bias from one thread to another directly in this situation.
666 //
667 // FIXME: due to a lack of registers we currently blow away the age
668 // bits in this situation. Should attempt to preserve them.
669 if (need_tmp_reg) {
670 push(tmp_reg);
671 }
672 load_prototype_header(tmp_reg, obj_reg);
673 get_thread(swap_reg);
674 orr(tmp_reg, tmp_reg, swap_reg);
675 ld_ptr(swap_reg, saved_mark_addr);
677 //if (os::is_MP()) {
678 // sync();
679 //}
680 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
681 if (need_tmp_reg) {
682 pop(tmp_reg);
683 }
684 // If the biasing toward our thread failed, then another thread
685 // succeeded in biasing it toward itself and we need to revoke that
686 // bias. The revocation will occur in the runtime in the slow case.
687 if (PrintBiasedLockingStatistics) {
688 Label L;
689 bne(AT, R0, L);
690 delayed()->nop();
691 push(AT);
692 push(tmp_reg);
693 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
694 pop(tmp_reg);
695 pop(AT);
696 bind(L);
697 }
698 if (slow_case != NULL) {
699 beq_far(AT, R0, *slow_case);
700 delayed()->nop();
701 }
703 b(done);
704 delayed()->nop();
705 bind(try_revoke_bias);
706 // The prototype mark in the klass doesn't have the bias bit set any
707 // more, indicating that objects of this data type are not supposed
708 // to be biased any more. We are going to try to reset the mark of
709 // this object to the prototype value and fall through to the
710 // CAS-based locking scheme. Note that if our CAS fails, it means
711 // that another thread raced us for the privilege of revoking the
712 // bias of this particular object, so it's okay to continue in the
713 // normal locking code.
714 //
715 // FIXME: due to a lack of registers we currently blow away the age
716 // bits in this situation. Should attempt to preserve them.
717 ld_ptr(swap_reg, saved_mark_addr);
719 if (need_tmp_reg) {
720 push(tmp_reg);
721 }
722 load_prototype_header(tmp_reg, obj_reg);
723 //if (os::is_MP()) {
724 // lock();
725 //}
726 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
727 if (need_tmp_reg) {
728 pop(tmp_reg);
729 }
730 // Fall through to the normal CAS-based lock, because no matter what
731 // the result of the above CAS, some thread must have succeeded in
732 // removing the bias bit from the object's header.
733 if (PrintBiasedLockingStatistics) {
734 Label L;
735 bne(AT, R0, L);
736 delayed()->nop();
737 push(AT);
738 push(tmp_reg);
739 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
740 pop(tmp_reg);
741 pop(AT);
742 bind(L);
743 }
745 bind(cas_label);
746 return null_check_offset;
747 }
749 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
750 assert(UseBiasedLocking, "why call this otherwise?");
752 // Check for biased locking unlock case, which is a no-op
753 // Note: we do not have to check the thread ID for two reasons.
754 // First, the interpreter checks for IllegalMonitorStateException at
755 // a higher level. Second, if the bias was revoked while we held the
756 // lock, the object could not be rebiased toward another thread, so
757 // the bias bit would be clear.
758 #ifdef _LP64
759 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
760 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
761 daddi(AT, R0, markOopDesc::biased_lock_pattern);
762 #else
763 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
764 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
765 addi(AT, R0, markOopDesc::biased_lock_pattern);
766 #endif
768 beq(AT, temp_reg, done);
769 delayed()->nop();
770 }
772 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
773 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
774 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
775 Label L, E;
777 assert(number_of_arguments <= 4, "just check");
779 andi(AT, SP, 0xf);
780 beq(AT, R0, L);
781 delayed()->nop();
782 daddi(SP, SP, -8);
783 call(entry_point, relocInfo::runtime_call_type);
784 delayed()->nop();
785 daddi(SP, SP, 8);
786 b(E);
787 delayed()->nop();
789 bind(L);
790 call(entry_point, relocInfo::runtime_call_type);
791 delayed()->nop();
792 bind(E);
793 }
796 void MacroAssembler::jmp(address entry) {
797 patchable_set48(T9, (long)entry);
798 jr(T9);
799 }
801 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
802 switch (rtype) {
803 case relocInfo::runtime_call_type:
804 case relocInfo::none:
805 jmp(entry);
806 break;
807 default:
808 {
809 InstructionMark im(this);
810 relocate(rtype);
811 patchable_set48(T9, (long)entry);
812 jr(T9);
813 }
814 break;
815 }
816 }
818 void MacroAssembler::jmp_far(Label& L) {
819 if (L.is_bound()) {
820 address entry = target(L);
821 assert(entry != NULL, "jmp most probably wrong");
822 InstructionMark im(this);
824 relocate(relocInfo::internal_word_type);
825 patchable_set48(T9, (long)entry);
826 } else {
827 InstructionMark im(this);
828 L.add_patch_at(code(), locator());
830 relocate(relocInfo::internal_word_type);
831 patchable_set48(T9, (long)pc());
832 }
834 jr(T9);
835 delayed()->nop();
836 }
837 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
838 int oop_index;
839 if (obj) {
840 oop_index = oop_recorder()->find_index(obj);
841 } else {
842 oop_index = oop_recorder()->allocate_metadata_index(obj);
843 }
844 relocate(metadata_Relocation::spec(oop_index));
845 patchable_set48(AT, (long)obj);
846 sd(AT, dst);
847 }
849 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
850 int oop_index;
851 if (obj) {
852 oop_index = oop_recorder()->find_index(obj);
853 } else {
854 oop_index = oop_recorder()->allocate_metadata_index(obj);
855 }
856 relocate(metadata_Relocation::spec(oop_index));
857 patchable_set48(dst, (long)obj);
858 }
860 void MacroAssembler::call(address entry) {
861 // c/c++ code assume T9 is entry point, so we just always move entry to t9
862 // maybe there is some more graceful method to handle this. FIXME
863 // For more info, see class NativeCall.
864 #ifndef _LP64
865 move(T9, (int)entry);
866 #else
867 patchable_set48(T9, (long)entry);
868 #endif
869 jalr(T9);
870 }
872 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
873 switch (rtype) {
874 case relocInfo::runtime_call_type:
875 case relocInfo::none:
876 call(entry);
877 break;
878 default:
879 {
880 InstructionMark im(this);
881 relocate(rtype);
882 call(entry);
883 }
884 break;
885 }
886 }
888 void MacroAssembler::call(address entry, RelocationHolder& rh)
889 {
890 switch (rh.type()) {
891 case relocInfo::runtime_call_type:
892 case relocInfo::none:
893 call(entry);
894 break;
895 default:
896 {
897 InstructionMark im(this);
898 relocate(rh);
899 call(entry);
900 }
901 break;
902 }
903 }
905 void MacroAssembler::ic_call(address entry) {
906 RelocationHolder rh = virtual_call_Relocation::spec(pc());
907 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
908 assert(entry != NULL, "call most probably wrong");
909 InstructionMark im(this);
910 relocate(rh);
911 patchable_call(entry);
912 }
914 void MacroAssembler::c2bool(Register r) {
915 Label L;
916 Assembler::beq(r, R0, L);
917 delayed()->nop();
918 move(r, 1);
919 bind(L);
920 }
922 #ifndef PRODUCT
923 extern "C" void findpc(intptr_t x);
924 #endif
926 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
927 if ( ShowMessageBoxOnError ) {
928 JavaThreadState saved_state = JavaThread::current()->thread_state();
929 JavaThread::current()->set_thread_state(_thread_in_vm);
930 {
931 // In order to get locks work, we need to fake a in_VM state
932 ttyLocker ttyl;
933 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
934 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
935 BytecodeCounter::print();
936 }
938 }
939 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
940 }
941 else
942 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
943 }
946 void MacroAssembler::stop(const char* msg) {
947 li(A0, (long)msg);
948 #ifndef _LP64
949 //reserver space for argument.
950 addiu(SP, SP, - 1 * wordSize);
951 #endif
952 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
953 delayed()->nop();
954 #ifndef _LP64
955 //restore space for argument
956 addiu(SP, SP, 1 * wordSize);
957 #endif
958 brk(17);
959 }
961 void MacroAssembler::warn(const char* msg) {
962 #ifdef _LP64
963 pushad();
964 li(A0, (long)msg);
965 push(S2);
966 move(AT, -(StackAlignmentInBytes));
967 move(S2, SP); // use S2 as a sender SP holder
968 andr(SP, SP, AT); // align stack as required by ABI
969 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
970 delayed()->nop();
971 move(SP, S2); // use S2 as a sender SP holder
972 pop(S2);
973 popad();
974 #else
975 pushad();
976 addi(SP, SP, -4);
977 sw(A0, SP, -1 * wordSize);
978 li(A0, (long)msg);
979 addi(SP, SP, -1 * wordSize);
980 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
981 delayed()->nop();
982 addi(SP, SP, 1 * wordSize);
983 lw(A0, SP, -1 * wordSize);
984 addi(SP, SP, 4);
985 popad();
986 #endif
987 }
989 void MacroAssembler::print_reg(Register reg) {
990 void * cur_pc = pc();
991 pushad();
992 NOT_LP64(push(FP);)
994 li(A0, (long)reg->name());
995 if (reg == SP)
996 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
997 else if (reg == A0)
998 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
999 else
1000 move(A1, reg);
1001 li(A2, (long)cur_pc);
1002 push(S2);
1003 move(AT, -(StackAlignmentInBytes));
1004 move(S2, SP); // use S2 as a sender SP holder
1005 andr(SP, SP, AT); // align stack as required by ABI
1006 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1007 delayed()->nop();
1008 move(SP, S2); // use S2 as a sender SP holder
1009 pop(S2);
1010 NOT_LP64(pop(FP);)
1011 popad();
1013 }
1015 void MacroAssembler::print_reg(FloatRegister reg) {
1016 void * cur_pc = pc();
1017 pushad();
1018 NOT_LP64(push(FP);)
1019 li(A0, (long)reg->name());
1020 push(S2);
1021 move(AT, -(StackAlignmentInBytes));
1022 move(S2, SP); // use S2 as a sender SP holder
1023 andr(SP, SP, AT); // align stack as required by ABI
1024 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1025 delayed()->nop();
1026 move(SP, S2); // use S2 as a sender SP holder
1027 pop(S2);
1028 NOT_LP64(pop(FP);)
1029 popad();
1031 pushad();
1032 NOT_LP64(push(FP);)
1033 move(FP, SP);
1034 move(AT, -(StackAlignmentInBytes));
1035 andr(SP , SP , AT);
1036 mov_d(F12, reg);
1037 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1038 delayed()->nop();
1039 move(SP, FP);
1040 NOT_LP64(pop(FP);)
1041 popad();
1043 }
1045 void MacroAssembler::increment(Register reg, int imm) {
1046 if (!imm) return;
1047 if (is_simm16(imm)) {
1048 #ifdef _LP64
1049 daddiu(reg, reg, imm);
1050 #else
1051 addiu(reg, reg, imm);
1052 #endif
1053 } else {
1054 move(AT, imm);
1055 #ifdef _LP64
1056 daddu(reg, reg, AT);
1057 #else
1058 addu(reg, reg, AT);
1059 #endif
1060 }
1061 }
1063 void MacroAssembler::decrement(Register reg, int imm) {
1064 increment(reg, -imm);
1065 }
1068 void MacroAssembler::call_VM(Register oop_result,
1069 address entry_point,
1070 bool check_exceptions) {
1071 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1072 }
1074 void MacroAssembler::call_VM(Register oop_result,
1075 address entry_point,
1076 Register arg_1,
1077 bool check_exceptions) {
1078 if (arg_1!=A1) move(A1, arg_1);
1079 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1080 }
1082 void MacroAssembler::call_VM(Register oop_result,
1083 address entry_point,
1084 Register arg_1,
1085 Register arg_2,
1086 bool check_exceptions) {
1087 if (arg_1!=A1) move(A1, arg_1);
1088 if (arg_2!=A2) move(A2, arg_2);
1089 assert(arg_2 != A1, "smashed argument");
1090 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1091 }
1093 void MacroAssembler::call_VM(Register oop_result,
1094 address entry_point,
1095 Register arg_1,
1096 Register arg_2,
1097 Register arg_3,
1098 bool check_exceptions) {
1099 if (arg_1!=A1) move(A1, arg_1);
1100 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1101 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1102 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1103 }
1105 void MacroAssembler::call_VM(Register oop_result,
1106 Register last_java_sp,
1107 address entry_point,
1108 int number_of_arguments,
1109 bool check_exceptions) {
1110 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1111 }
1113 void MacroAssembler::call_VM(Register oop_result,
1114 Register last_java_sp,
1115 address entry_point,
1116 Register arg_1,
1117 bool check_exceptions) {
1118 if (arg_1 != A1) move(A1, arg_1);
1119 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1120 }
1122 void MacroAssembler::call_VM(Register oop_result,
1123 Register last_java_sp,
1124 address entry_point,
1125 Register arg_1,
1126 Register arg_2,
1127 bool check_exceptions) {
1128 if (arg_1 != A1) move(A1, arg_1);
1129 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1130 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1131 }
1133 void MacroAssembler::call_VM(Register oop_result,
1134 Register last_java_sp,
1135 address entry_point,
1136 Register arg_1,
1137 Register arg_2,
1138 Register arg_3,
1139 bool check_exceptions) {
1140 if (arg_1 != A1) move(A1, arg_1);
1141 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1142 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1143 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1144 }
1146 void MacroAssembler::call_VM_base(Register oop_result,
1147 Register java_thread,
1148 Register last_java_sp,
1149 address entry_point,
1150 int number_of_arguments,
1151 bool check_exceptions) {
1153 address before_call_pc;
1154 // determine java_thread register
1155 if (!java_thread->is_valid()) {
1156 #ifndef OPT_THREAD
1157 java_thread = T2;
1158 get_thread(java_thread);
1159 #else
1160 java_thread = TREG;
1161 #endif
1162 }
1163 // determine last_java_sp register
1164 if (!last_java_sp->is_valid()) {
1165 last_java_sp = SP;
1166 }
1167 // debugging support
1168 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1169 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1170 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1171 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1173 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
1175 // set last Java frame before call
1176 before_call_pc = (address)pc();
1177 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1179 // do the call
1180 move(A0, java_thread);
1181 call(entry_point, relocInfo::runtime_call_type);
1182 delayed()->nop();
1184 // restore the thread (cannot use the pushed argument since arguments
1185 // may be overwritten by C code generated by an optimizing compiler);
1186 // however can use the register value directly if it is callee saved.
1187 #ifndef OPT_THREAD
1188 get_thread(java_thread);
1189 #else
1190 #ifdef ASSERT
1191 {
1192 Label L;
1193 get_thread(AT);
1194 beq(java_thread, AT, L);
1195 delayed()->nop();
1196 stop("MacroAssembler::call_VM_base: TREG not callee saved?");
1197 bind(L);
1198 }
1199 #endif
1200 #endif
1202 // discard thread and arguments
1203 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1204 // reset last Java frame
1205 reset_last_Java_frame(java_thread, false);
1207 check_and_handle_popframe(java_thread);
1208 check_and_handle_earlyret(java_thread);
1209 if (check_exceptions) {
1210 // check for pending exceptions (java_thread is set upon return)
1211 Label L;
1212 #ifdef _LP64
1213 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1214 #else
1215 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1216 #endif
1217 beq(AT, R0, L);
1218 delayed()->nop();
1219 li(AT, before_call_pc);
1220 push(AT);
1221 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1222 delayed()->nop();
1223 bind(L);
1224 }
1226 // get oop result if there is one and reset the value in the thread
1227 if (oop_result->is_valid()) {
1228 #ifdef _LP64
1229 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1230 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1231 #else
1232 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1233 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1234 #endif
1235 verify_oop(oop_result);
1236 }
1237 }
1239 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1241 move(V0, SP);
1242 //we also reserve space for java_thread here
1243 #ifndef _LP64
1244 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1245 #endif
1246 move(AT, -(StackAlignmentInBytes));
1247 andr(SP, SP, AT);
1248 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1250 }
1252 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1253 call_VM_leaf_base(entry_point, number_of_arguments);
1254 }
1256 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1257 if (arg_0 != A0) move(A0, arg_0);
1258 call_VM_leaf(entry_point, 1);
1259 }
1261 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1262 if (arg_0 != A0) move(A0, arg_0);
1263 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1264 call_VM_leaf(entry_point, 2);
1265 }
1267 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1268 if (arg_0 != A0) move(A0, arg_0);
1269 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1270 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1271 call_VM_leaf(entry_point, 3);
1272 }
1273 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1274 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1275 }
1278 void MacroAssembler::super_call_VM_leaf(address entry_point,
1279 Register arg_1) {
1280 if (arg_1 != A0) move(A0, arg_1);
1281 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1282 }
1285 void MacroAssembler::super_call_VM_leaf(address entry_point,
1286 Register arg_1,
1287 Register arg_2) {
1288 if (arg_1 != A0) move(A0, arg_1);
1289 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1290 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1291 }
1292 void MacroAssembler::super_call_VM_leaf(address entry_point,
1293 Register arg_1,
1294 Register arg_2,
1295 Register arg_3) {
1296 if (arg_1 != A0) move(A0, arg_1);
1297 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1298 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1299 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1300 }
1302 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1303 }
1305 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1306 }
1308 void MacroAssembler::null_check(Register reg, int offset) {
1309 if (needs_explicit_null_check(offset)) {
1310 // provoke OS NULL exception if reg = NULL by
1311 // accessing M[reg] w/o changing any (non-CC) registers
1312 // NOTE: cmpl is plenty here to provoke a segv
1313 lw(AT, reg, 0);
1314 } else {
1315 // nothing to do, (later) access of M[reg + offset]
1316 // will provoke OS NULL exception if reg = NULL
1317 }
1318 }
1320 void MacroAssembler::enter() {
1321 push2(RA, FP);
1322 move(FP, SP);
1323 }
1325 void MacroAssembler::leave() {
1326 #ifndef _LP64
1327 addi(SP, FP, 2 * wordSize);
1328 lw(RA, SP, - 1 * wordSize);
1329 lw(FP, SP, - 2 * wordSize);
1330 #else
1331 daddi(SP, FP, 2 * wordSize);
1332 ld(RA, SP, - 1 * wordSize);
1333 ld(FP, SP, - 2 * wordSize);
1334 #endif
1335 }
1337 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
1338 // determine java_thread register
1339 if (!java_thread->is_valid()) {
1340 #ifndef OPT_THREAD
1341 java_thread = T1;
1342 get_thread(java_thread);
1343 #else
1344 java_thread = TREG;
1345 #endif
1346 }
1347 // we must set sp to zero to clear frame
1348 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1349 // must clear fp, so that compiled frames are not confused; it is possible
1350 // that we need it only for debugging
1351 if(clear_fp) {
1352 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1353 }
1355 // Always clear the pc because it could have been set by make_walkable()
1356 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1357 }
1359 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
1360 Register thread = TREG;
1361 #ifndef OPT_THREAD
1362 get_thread(thread);
1363 #endif
1364 // we must set sp to zero to clear frame
1365 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1366 // must clear fp, so that compiled frames are not confused; it is
1367 // possible that we need it only for debugging
1368 if (clear_fp) {
1369 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1370 }
1372 // Always clear the pc because it could have been set by make_walkable()
1373 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1374 }
1376 // Write serialization page so VM thread can do a pseudo remote membar.
1377 // We use the current thread pointer to calculate a thread specific
1378 // offset to write to within the page. This minimizes bus traffic
1379 // due to cache line collision.
1380 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1381 move(tmp, thread);
1382 srl(tmp, tmp,os::get_serialize_page_shift_count());
1383 move(AT, (os::vm_page_size() - sizeof(int)));
1384 andr(tmp, tmp,AT);
1385 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1386 }
1388 // Calls to C land
1389 //
1390 // When entering C land, the fp, & sp of the last Java frame have to be recorded
1391 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1392 // has to be reset to 0. This is required to allow proper stack traversal.
1393 void MacroAssembler::set_last_Java_frame(Register java_thread,
1394 Register last_java_sp,
1395 Register last_java_fp,
1396 address last_java_pc) {
1397 // determine java_thread register
1398 if (!java_thread->is_valid()) {
1399 #ifndef OPT_THREAD
1400 java_thread = T2;
1401 get_thread(java_thread);
1402 #else
1403 java_thread = TREG;
1404 #endif
1405 }
1406 // determine last_java_sp register
1407 if (!last_java_sp->is_valid()) {
1408 last_java_sp = SP;
1409 }
1411 // last_java_fp is optional
1412 if (last_java_fp->is_valid()) {
1413 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1414 }
1416 // last_java_pc is optional
1417 if (last_java_pc != NULL) {
1418 relocate(relocInfo::internal_word_type);
1419 patchable_set48(AT, (long)last_java_pc);
1420 st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
1421 }
1422 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1423 }
1425 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1426 Register last_java_fp,
1427 address last_java_pc) {
1428 // determine last_java_sp register
1429 if (!last_java_sp->is_valid()) {
1430 last_java_sp = SP;
1431 }
1433 Register thread = TREG;
1434 #ifndef OPT_THREAD
1435 get_thread(thread);
1436 #endif
1437 // last_java_fp is optional
1438 if (last_java_fp->is_valid()) {
1439 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1440 }
1442 // last_java_pc is optional
1443 if (last_java_pc != NULL) {
1444 relocate(relocInfo::internal_word_type);
1445 patchable_set48(AT, (long)last_java_pc);
1446 st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
1447 }
1449 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1450 }
1452 //////////////////////////////////////////////////////////////////////////////////
1453 #if INCLUDE_ALL_GCS
1455 void MacroAssembler::g1_write_barrier_pre(Register obj,
1456 Register pre_val,
1457 Register thread,
1458 Register tmp,
1459 bool tosca_live,
1460 bool expand_call) {
1462 // If expand_call is true then we expand the call_VM_leaf macro
1463 // directly to skip generating the check by
1464 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1466 #ifdef _LP64
1467 assert(thread == TREG, "must be");
1468 #endif // _LP64
1470 Label done;
1471 Label runtime;
1473 assert(pre_val != noreg, "check this code");
1475 if (obj != noreg) {
1476 assert_different_registers(obj, pre_val, tmp);
1477 assert(pre_val != V0, "check this code");
1478 }
1480 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1481 PtrQueue::byte_offset_of_active()));
1482 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1483 PtrQueue::byte_offset_of_index()));
1484 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1485 PtrQueue::byte_offset_of_buf()));
1488 // Is marking active?
1489 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1490 lw(AT, in_progress);
1491 } else {
1492 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1493 lb(AT, in_progress);
1494 }
1495 beq(AT, R0, done);
1496 delayed()->nop();
1498 // Do we need to load the previous value?
1499 if (obj != noreg) {
1500 load_heap_oop(pre_val, Address(obj, 0));
1501 }
1503 // Is the previous value null?
1504 beq(pre_val, R0, done);
1505 delayed()->nop();
1507 // Can we store original value in the thread's buffer?
1508 // Is index == 0?
1509 // (The index field is typed as size_t.)
1511 ld(tmp, index);
1512 beq(tmp, R0, runtime);
1513 delayed()->nop();
1515 daddiu(tmp, tmp, -1 * wordSize);
1516 sd(tmp, index);
1517 ld(AT, buffer);
1518 daddu(tmp, tmp, AT);
1520 // Record the previous value
1521 sd(pre_val, tmp, 0);
1522 beq(R0, R0, done);
1523 delayed()->nop();
1525 bind(runtime);
1526 // save the live input values
1527 if (tosca_live) push(V0);
1529 if (obj != noreg && obj != V0) push(obj);
1531 if (pre_val != V0) push(pre_val);
1533 // Calling the runtime using the regular call_VM_leaf mechanism generates
1534 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1535 // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
1536 //
1537 // If we care generating the pre-barrier without a frame (e.g. in the
1538 // intrinsified Reference.get() routine) then fp might be pointing to
1539 // the caller frame and so this check will most likely fail at runtime.
1540 //
1541 // Expanding the call directly bypasses the generation of the check.
1542 // So when we do not have have a full interpreter frame on the stack
1543 // expand_call should be passed true.
1545 NOT_LP64( push(thread); )
1547 if (expand_call) {
1548 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1549 if (thread != A1) move(A1, thread);
1550 if (pre_val != A0) move(A0, pre_val);
1551 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1552 } else {
1553 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1554 }
1556 NOT_LP64( pop(thread); )
1558 // save the live input values
1559 if (pre_val != V0)
1560 pop(pre_val);
1562 if (obj != noreg && obj != V0)
1563 pop(obj);
1565 if(tosca_live) pop(V0);
1567 bind(done);
1568 }
1570 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1571 Register new_val,
1572 Register thread,
1573 Register tmp,
1574 Register tmp2) {
1575 assert(tmp != AT, "must be");
1576 assert(tmp2 != AT, "must be");
1577 #ifdef _LP64
1578 assert(thread == TREG, "must be");
1579 #endif // _LP64
1581 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1582 PtrQueue::byte_offset_of_index()));
1583 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1584 PtrQueue::byte_offset_of_buf()));
1586 BarrierSet* bs = Universe::heap()->barrier_set();
1587 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1588 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1590 Label done;
1591 Label runtime;
1593 // Does store cross heap regions?
1594 xorr(AT, store_addr, new_val);
1595 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1596 beq(AT, R0, done);
1597 delayed()->nop();
1600 // crosses regions, storing NULL?
1601 beq(new_val, R0, done);
1602 delayed()->nop();
1604 // storing region crossing non-NULL, is card already dirty?
1605 const Register card_addr = tmp;
1606 const Register cardtable = tmp2;
1608 move(card_addr, store_addr);
1609 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1610 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1611 // a valid address and therefore is not properly handled by the relocation code.
1612 set64(cardtable, (intptr_t)ct->byte_map_base);
1613 daddu(card_addr, card_addr, cardtable);
1615 lb(AT, card_addr, 0);
1616 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1617 beq(AT, R0, done);
1618 delayed()->nop();
1620 sync();
1621 lb(AT, card_addr, 0);
1622 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1623 beq(AT, R0, done);
1624 delayed()->nop();
1627 // storing a region crossing, non-NULL oop, card is clean.
1628 // dirty card and log.
1629 move(AT, (int)CardTableModRefBS::dirty_card_val());
1630 sb(AT, card_addr, 0);
1632 lw(AT, queue_index);
1633 beq(AT, R0, runtime);
1634 delayed()->nop();
1635 daddiu(AT, AT, -1 * wordSize);
1636 sw(AT, queue_index);
1637 ld(tmp2, buffer);
1638 #ifdef _LP64
1639 ld(AT, queue_index);
1640 daddu(tmp2, tmp2, AT);
1641 sd(card_addr, tmp2, 0);
1642 #else
1643 lw(AT, queue_index);
1644 addu32(tmp2, tmp2, AT);
1645 sw(card_addr, tmp2, 0);
1646 #endif
1647 beq(R0, R0, done);
1648 delayed()->nop();
1650 bind(runtime);
1651 // save the live input values
1652 push(store_addr);
1653 push(new_val);
1654 #ifdef _LP64
1655 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1656 #else
1657 push(thread);
1658 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1659 pop(thread);
1660 #endif
1661 pop(new_val);
1662 pop(store_addr);
1664 bind(done);
1665 }
1667 #endif // INCLUDE_ALL_GCS
1668 //////////////////////////////////////////////////////////////////////////////////
1671 void MacroAssembler::store_check(Register obj) {
1672 // Does a store check for the oop in register obj. The content of
1673 // register obj is destroyed afterwards.
1674 store_check_part_1(obj);
1675 store_check_part_2(obj);
1676 }
1678 void MacroAssembler::store_check(Register obj, Address dst) {
1679 store_check(obj);
1680 }
1683 // split the store check operation so that other instructions can be scheduled inbetween
1684 void MacroAssembler::store_check_part_1(Register obj) {
1685 BarrierSet* bs = Universe::heap()->barrier_set();
1686 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1687 #ifdef _LP64
1688 dsrl(obj, obj, CardTableModRefBS::card_shift);
1689 #else
1690 shr(obj, CardTableModRefBS::card_shift);
1691 #endif
1692 }
1694 void MacroAssembler::store_check_part_2(Register obj) {
1695 BarrierSet* bs = Universe::heap()->barrier_set();
1696 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1697 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1698 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1700 set64(AT, (long)ct->byte_map_base);
1701 #ifdef _LP64
1702 dadd(AT, AT, obj);
1703 #else
1704 add(AT, AT, obj);
1705 #endif
1706 if (UseConcMarkSweepGC) sync();
1707 sb(R0, AT, 0);
1708 }
1710 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1711 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1712 Register t1, Register t2, Label& slow_case) {
1713 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1715 Register end = t2;
1716 #ifndef OPT_THREAD
1717 Register thread = t1;
1718 get_thread(thread);
1719 #else
1720 Register thread = TREG;
1721 #endif
1722 verify_tlab(t1, t2);//blows t1&t2
1724 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1726 if (var_size_in_bytes == NOREG) {
1727 set64(AT, con_size_in_bytes);
1728 add(end, obj, AT);
1729 } else {
1730 add(end, obj, var_size_in_bytes);
1731 }
1733 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1734 sltu(AT, AT, end);
1735 bne_far(AT, R0, slow_case);
1736 delayed()->nop();
1739 // update the tlab top pointer
1740 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1742 verify_tlab(t1, t2);
1743 }
1745 // Defines obj, preserves var_size_in_bytes
1746 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1747 Register t1, Register t2, Label& slow_case) {
1748 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1749 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
1750 // No allocation in the shared eden.
1751 b_far(slow_case);
1752 delayed()->nop();
1753 } else {
1755 #ifndef _LP64
1756 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1757 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1758 #else
1759 Address heap_top(t1);
1760 li(t1, (long)Universe::heap()->top_addr());
1761 #endif
1762 ld_ptr(obj, heap_top);
1764 Register end = t2;
1765 Label retry;
1767 bind(retry);
1768 if (var_size_in_bytes == NOREG) {
1769 set64(AT, con_size_in_bytes);
1770 add(end, obj, AT);
1771 } else {
1772 add(end, obj, var_size_in_bytes);
1773 }
1774 // if end < obj then we wrapped around => object too long => slow case
1775 sltu(AT, end, obj);
1776 bne_far(AT, R0, slow_case);
1777 delayed()->nop();
1779 li(AT, (long)Universe::heap()->end_addr());
1780 ld_ptr(AT, AT, 0);
1781 sltu(AT, AT, end);
1782 bne_far(AT, R0, slow_case);
1783 delayed()->nop();
1784 // Compare obj with the top addr, and if still equal, store the new top addr in
1785 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1786 // it otherwise. Use lock prefix for atomicity on MPs.
1787 //if (os::is_MP()) {
1788 // sync();
1789 //}
1791 // if someone beat us on the allocation, try again, otherwise continue
1792 cmpxchg(end, heap_top, obj);
1793 beq_far(AT, R0, retry);
1794 delayed()->nop();
1795 }
1796 }
1798 // C2 doesn't invoke this one.
1799 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1800 Register top = T0;
1801 Register t1 = T1;
1802 Register t2 = T9;
1803 Register t3 = T3;
1804 Register thread_reg = T8;
1805 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4);
1806 Label do_refill, discard_tlab;
1808 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
1809 // No allocation in the shared eden.
1810 b(slow_case);
1811 delayed()->nop();
1812 }
1814 get_thread(thread_reg);
1816 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1817 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1819 // calculate amount of free space
1820 sub(t1, t1, top);
1821 shr(t1, LogHeapWordSize);
1823 // Retain tlab and allocate object in shared space if
1824 // the amount free in the tlab is too large to discard.
1825 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1826 slt(AT, t2, t1);
1827 beq(AT, R0, discard_tlab);
1828 delayed()->nop();
1830 // Retain
1831 #ifndef _LP64
1832 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1833 #else
1834 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1835 #endif
1836 add(t2, t2, AT);
1837 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1839 if (TLABStats) {
1840 // increment number of slow_allocations
1841 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1842 addiu(AT, AT, 1);
1843 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1844 }
1845 b(try_eden);
1846 delayed()->nop();
1848 bind(discard_tlab);
1849 if (TLABStats) {
1850 // increment number of refills
1851 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1852 addi(AT, AT, 1);
1853 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1854 // accumulate wastage -- t1 is amount free in tlab
1855 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1856 add(AT, AT, t1);
1857 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1858 }
1860 // if tlab is currently allocated (top or end != null) then
1861 // fill [top, end + alignment_reserve) with array object
1862 beq(top, R0, do_refill);
1863 delayed()->nop();
1865 // set up the mark word
1866 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1867 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1869 // set the length to the remaining space
1870 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1871 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1872 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1873 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1875 // set klass to intArrayKlass
1876 #ifndef _LP64
1877 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
1878 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
1879 #else
1880 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
1881 ld_ptr(t1, AT, 0);
1882 #endif
1883 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
1884 store_klass(top, t1);
1886 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
1887 subu(t1, top, t1);
1888 incr_allocated_bytes(thread_reg, t1, 0);
1890 // refill the tlab with an eden allocation
1891 bind(do_refill);
1892 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1893 shl(t1, LogHeapWordSize);
1894 // add object_size ??
1895 eden_allocate(top, t1, 0, t2, t3, slow_case);
1897 // Check that t1 was preserved in eden_allocate.
1898 #ifdef ASSERT
1899 if (UseTLAB) {
1900 Label ok;
1901 assert_different_registers(thread_reg, t1);
1902 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1903 shl(AT, LogHeapWordSize);
1904 beq(AT, t1, ok);
1905 delayed()->nop();
1906 stop("assert(t1 != tlab size)");
1907 should_not_reach_here();
1909 bind(ok);
1910 }
1911 #endif
1912 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
1913 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1914 add(top, top, t1);
1915 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
1916 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1917 verify_tlab(t1, t2);
1918 b(retry);
1919 delayed()->nop();
1920 }
1922 void MacroAssembler::incr_allocated_bytes(Register thread,
1923 Register var_size_in_bytes,
1924 int con_size_in_bytes,
1925 Register t1) {
1926 if (!thread->is_valid()) {
1927 #ifndef OPT_THREAD
1928 assert(t1->is_valid(), "need temp reg");
1929 thread = t1;
1930 get_thread(thread);
1931 #else
1932 thread = TREG;
1933 #endif
1934 }
1936 ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
1937 if (var_size_in_bytes->is_valid()) {
1938 addu(AT, AT, var_size_in_bytes);
1939 } else {
1940 addiu(AT, AT, con_size_in_bytes);
1941 }
1942 st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
1943 }
1945 static const double pi_4 = 0.7853981633974483;
1947 // must get argument(a double) in F12/F13
1948 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
1949 //We need to preseve the register which maybe modified during the Call
1950 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
1951 // save all modified register here
1952 // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9
1953 pushad();
1954 // we should preserve the stack space before we call
1955 addi(SP, SP, -wordSize * 2);
1956 switch (trig){
1957 case 's' :
1958 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
1959 delayed()->nop();
1960 break;
1961 case 'c':
1962 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
1963 delayed()->nop();
1964 break;
1965 case 't':
1966 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
1967 delayed()->nop();
1968 break;
1969 default:assert (false, "bad intrinsic");
1970 break;
1972 }
1974 addi(SP, SP, wordSize * 2);
1975 popad();
1976 }
1978 #ifdef _LP64
1979 void MacroAssembler::li(Register rd, long imm) {
1980 if (imm <= max_jint && imm >= min_jint) {
1981 li32(rd, (int)imm);
1982 } else if (julong(imm) <= 0xFFFFFFFF) {
1983 assert_not_delayed();
1984 // lui sign-extends, so we can't use that.
1985 ori(rd, R0, julong(imm) >> 16);
1986 dsll(rd, rd, 16);
1987 ori(rd, rd, split_low(imm));
1988 } else if ((imm > 0) && is_simm16(imm >> 32)) {
1989 // A 48-bit address
1990 li48(rd, imm);
1991 } else {
1992 li64(rd, imm);
1993 }
1994 }
1995 #else
1996 void MacroAssembler::li(Register rd, long imm) {
1997 li32(rd, (int)imm);
1998 }
1999 #endif
2001 void MacroAssembler::li32(Register reg, int imm) {
2002 if (is_simm16(imm)) {
2003 // for imm < 0, we should use addi instead of addiu.
2004 //
2005 // java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2006 //
2007 // 78 move [int:-1|I] [a0|I]
2008 // : daddi a0, zero, 0xffffffff (correct)
2009 // : daddiu a0, zero, 0xffffffff (incorrect)
2010 //
2011 if (imm >= 0)
2012 addiu(reg, R0, imm);
2013 else
2014 addi(reg, R0, imm);
2015 } else {
2016 lui(reg, split_low(imm >> 16));
2017 if (split_low(imm))
2018 ori(reg, reg, split_low(imm));
2019 }
2020 }
2022 #ifdef _LP64
2023 void MacroAssembler::set64(Register d, jlong value) {
2024 assert_not_delayed();
2026 int hi = (int)(value >> 32);
2027 int lo = (int)(value & ~0);
2029 if (value == lo) { // 32-bit integer
2030 if (is_simm16(value)) {
2031 daddiu(d, R0, value);
2032 } else {
2033 lui(d, split_low(value >> 16));
2034 if (split_low(value)) {
2035 ori(d, d, split_low(value));
2036 }
2037 }
2038 } else if (hi == 0) { // hardware zero-extends to upper 32
2039 ori(d, R0, julong(value) >> 16);
2040 dsll(d, d, 16);
2041 if (split_low(value)) {
2042 ori(d, d, split_low(value));
2043 }
2044 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2045 // 4 insts
2046 li48(d, value);
2047 } else { // li64
2048 // 6 insts
2049 li64(d, value);
2050 }
2051 }
2054 int MacroAssembler::insts_for_set64(jlong value) {
2055 int hi = (int)(value >> 32);
2056 int lo = (int)(value & ~0);
2058 int count = 0;
2060 if (value == lo) { // 32-bit integer
2061 if (is_simm16(value)) {
2062 //daddiu(d, R0, value);
2063 count++;
2064 } else {
2065 //lui(d, split_low(value >> 16));
2066 count++;
2067 if (split_low(value)) {
2068 //ori(d, d, split_low(value));
2069 count++;
2070 }
2071 }
2072 } else if (hi == 0) { // hardware zero-extends to upper 32
2073 //ori(d, R0, julong(value) >> 16);
2074 //dsll(d, d, 16);
2075 count += 2;
2076 if (split_low(value)) {
2077 //ori(d, d, split_low(value));
2078 count++;
2079 }
2080 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2081 // 4 insts
2082 //li48(d, value);
2083 count += 4;
2084 } else { // li64
2085 // 6 insts
2086 //li64(d, value);
2087 count += 6;
2088 }
2090 return count;
2091 }
2093 void MacroAssembler::patchable_set48(Register d, jlong value) {
2094 assert_not_delayed();
2096 int hi = (int)(value >> 32);
2097 int lo = (int)(value & ~0);
2099 int count = 0;
2101 if (value == lo) { // 32-bit integer
2102 if (is_simm16(value)) {
2103 daddiu(d, R0, value);
2104 count += 1;
2105 } else {
2106 lui(d, split_low(value >> 16));
2107 count += 1;
2108 if (split_low(value)) {
2109 ori(d, d, split_low(value));
2110 count += 1;
2111 }
2112 }
2113 } else if (hi == 0) { // hardware zero-extends to upper 32
2114 ori(d, R0, julong(value) >> 16);
2115 dsll(d, d, 16);
2116 count += 2;
2117 if (split_low(value)) {
2118 ori(d, d, split_low(value));
2119 count += 1;
2120 }
2121 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2122 // 4 insts
2123 li48(d, value);
2124 count += 4;
2125 } else { // li64
2126 tty->print_cr("value = 0x%x", value);
2127 guarantee(false, "Not supported yet !");
2128 }
2130 while (count < 4) {
2131 nop();
2132 count++;
2133 }
2134 }
2136 void MacroAssembler::patchable_set32(Register d, jlong value) {
2137 assert_not_delayed();
2139 int hi = (int)(value >> 32);
2140 int lo = (int)(value & ~0);
2142 int count = 0;
2144 if (value == lo) { // 32-bit integer
2145 if (is_simm16(value)) {
2146 daddiu(d, R0, value);
2147 count += 1;
2148 } else {
2149 lui(d, split_low(value >> 16));
2150 count += 1;
2151 if (split_low(value)) {
2152 ori(d, d, split_low(value));
2153 count += 1;
2154 }
2155 }
2156 } else if (hi == 0) { // hardware zero-extends to upper 32
2157 ori(d, R0, julong(value) >> 16);
2158 dsll(d, d, 16);
2159 count += 2;
2160 if (split_low(value)) {
2161 ori(d, d, split_low(value));
2162 count += 1;
2163 }
2164 } else {
2165 tty->print_cr("value = 0x%x", value);
2166 guarantee(false, "Not supported yet !");
2167 }
2169 while (count < 3) {
2170 nop();
2171 count++;
2172 }
2173 }
2175 void MacroAssembler::patchable_call32(Register d, jlong value) {
2176 assert_not_delayed();
2178 int hi = (int)(value >> 32);
2179 int lo = (int)(value & ~0);
2181 int count = 0;
2183 if (value == lo) { // 32-bit integer
2184 if (is_simm16(value)) {
2185 daddiu(d, R0, value);
2186 count += 1;
2187 } else {
2188 lui(d, split_low(value >> 16));
2189 count += 1;
2190 if (split_low(value)) {
2191 ori(d, d, split_low(value));
2192 count += 1;
2193 }
2194 }
2195 } else {
2196 tty->print_cr("value = 0x%x", value);
2197 guarantee(false, "Not supported yet !");
2198 }
2200 while (count < 2) {
2201 nop();
2202 count++;
2203 }
2204 }
2206 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2207 assert(UseCompressedClassPointers, "should only be used for compressed header");
2208 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2210 int klass_index = oop_recorder()->find_index(k);
2211 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2212 long narrowKlass = (long)Klass::encode_klass(k);
2214 relocate(rspec, Assembler::narrow_oop_operand);
2215 patchable_set48(dst, narrowKlass);
2216 }
2219 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2220 assert(UseCompressedOops, "should only be used for compressed header");
2221 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2223 int oop_index = oop_recorder()->find_index(obj);
2224 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2226 relocate(rspec, Assembler::narrow_oop_operand);
2227 patchable_set48(dst, oop_index);
2228 }
2230 void MacroAssembler::li64(Register rd, long imm) {
2231 assert_not_delayed();
2232 lui(rd, split_low(imm >> 48));
2233 ori(rd, rd, split_low(imm >> 32));
2234 dsll(rd, rd, 16);
2235 ori(rd, rd, split_low(imm >> 16));
2236 dsll(rd, rd, 16);
2237 ori(rd, rd, split_low(imm));
2238 }
2240 void MacroAssembler::li48(Register rd, long imm) {
2241 assert_not_delayed();
2242 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2243 lui(rd, imm >> 32);
2244 ori(rd, rd, split_low(imm >> 16));
2245 dsll(rd, rd, 16);
2246 ori(rd, rd, split_low(imm));
2247 }
2248 #endif
2250 void MacroAssembler::verify_oop(Register reg, const char* s) {
2251 if (!VerifyOops) return;
2252 const char * b = NULL;
2253 stringStream ss;
2254 ss.print("verify_oop: %s: %s", reg->name(), s);
2255 b = code_string(ss.as_string());
2256 #ifdef _LP64
2257 pushad();
2258 move(A1, reg);
2259 li(A0, (long)b);
2260 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2261 ld(T9, AT, 0);
2262 jalr(T9);
2263 delayed()->nop();
2264 popad();
2265 #else
2266 // Pass register number to verify_oop_subroutine
2267 sw(T0, SP, - wordSize);
2268 sw(T1, SP, - 2*wordSize);
2269 sw(RA, SP, - 3*wordSize);
2270 sw(A0, SP ,- 4*wordSize);
2271 sw(A1, SP ,- 5*wordSize);
2272 sw(AT, SP ,- 6*wordSize);
2273 sw(T9, SP ,- 7*wordSize);
2274 addiu(SP, SP, - 7 * wordSize);
2275 move(A1, reg);
2276 li(A0, (long)b);
2277 // call indirectly to solve generation ordering problem
2278 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2279 lw(T9, AT, 0);
2280 jalr(T9);
2281 delayed()->nop();
2282 lw(T0, SP, 6* wordSize);
2283 lw(T1, SP, 5* wordSize);
2284 lw(RA, SP, 4* wordSize);
2285 lw(A0, SP, 3* wordSize);
2286 lw(A1, SP, 2* wordSize);
2287 lw(AT, SP, 1* wordSize);
2288 lw(T9, SP, 0* wordSize);
2289 addiu(SP, SP, 7 * wordSize);
2290 #endif
2291 }
2294 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2295 if (!VerifyOops) {
2296 nop();
2297 return;
2298 }
2299 // Pass register number to verify_oop_subroutine
2300 const char * b = NULL;
2301 stringStream ss;
2302 ss.print("verify_oop_addr: %s", s);
2303 b = code_string(ss.as_string());
2305 st_ptr(T0, SP, - wordSize);
2306 st_ptr(T1, SP, - 2*wordSize);
2307 st_ptr(RA, SP, - 3*wordSize);
2308 st_ptr(A0, SP, - 4*wordSize);
2309 st_ptr(A1, SP, - 5*wordSize);
2310 st_ptr(AT, SP, - 6*wordSize);
2311 st_ptr(T9, SP, - 7*wordSize);
2312 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2313 addiu(SP, SP, - 7 * wordSize);
2315 li(A0, (long)b);
2316 // call indirectly to solve generation ordering problem
2317 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2318 ld_ptr(T9, AT, 0);
2319 jalr(T9);
2320 delayed()->nop();
2321 ld_ptr(T0, SP, 6* wordSize);
2322 ld_ptr(T1, SP, 5* wordSize);
2323 ld_ptr(RA, SP, 4* wordSize);
2324 ld_ptr(A0, SP, 3* wordSize);
2325 ld_ptr(A1, SP, 2* wordSize);
2326 ld_ptr(AT, SP, 1* wordSize);
2327 ld_ptr(T9, SP, 0* wordSize);
2328 addiu(SP, SP, 7 * wordSize);
2329 }
2331 // used registers : T0, T1
2332 void MacroAssembler::verify_oop_subroutine() {
2333 // RA: ra
2334 // A0: char* error message
2335 // A1: oop object to verify
2337 Label exit, error;
2338 // increment counter
2339 li(T0, (long)StubRoutines::verify_oop_count_addr());
2340 lw(AT, T0, 0);
2341 #ifdef _LP64
2342 daddi(AT, AT, 1);
2343 #else
2344 addi(AT, AT, 1);
2345 #endif
2346 sw(AT, T0, 0);
2348 // make sure object is 'reasonable'
2349 beq(A1, R0, exit); // if obj is NULL it is ok
2350 delayed()->nop();
2352 // Check if the oop is in the right area of memory
2353 // const int oop_mask = Universe::verify_oop_mask();
2354 // const int oop_bits = Universe::verify_oop_bits();
2355 const uintptr_t oop_mask = Universe::verify_oop_mask();
2356 const uintptr_t oop_bits = Universe::verify_oop_bits();
2357 li(AT, oop_mask);
2358 andr(T0, A1, AT);
2359 li(AT, oop_bits);
2360 bne(T0, AT, error);
2361 delayed()->nop();
2363 // make sure klass is 'reasonable'
2364 // add for compressedoops
2365 reinit_heapbase();
2366 // add for compressedoops
2367 load_klass(T0, A1);
2368 beq(T0, R0, error); // if klass is NULL it is broken
2369 delayed()->nop();
2370 // return if everything seems ok
2371 bind(exit);
2373 jr(RA);
2374 delayed()->nop();
2376 // handle errors
2377 bind(error);
2378 pushad();
2379 #ifndef _LP64
2380 addi(SP, SP, (-1) * wordSize);
2381 #endif
2382 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2383 delayed()->nop();
2384 #ifndef _LP64
2385 addiu(SP, SP, 1 * wordSize);
2386 #endif
2387 popad();
2388 jr(RA);
2389 delayed()->nop();
2390 }
2392 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2393 #ifdef ASSERT
2394 assert_different_registers(t1, t2, AT);
2395 if (UseTLAB && VerifyOops) {
2396 Label next, ok;
2398 get_thread(t1);
2400 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2401 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2402 sltu(AT, t2, AT);
2403 beq(AT, R0, next);
2404 delayed()->nop();
2406 stop("assert(top >= start)");
2408 bind(next);
2409 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2410 sltu(AT, AT, t2);
2411 beq(AT, R0, ok);
2412 delayed()->nop();
2414 stop("assert(top <= end)");
2416 bind(ok);
2418 }
2419 #endif
2420 }
2422 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2423 Register tmp,
2424 int offset) {
2425 intptr_t value = *delayed_value_addr;
2426 if (value != 0)
2427 return RegisterOrConstant(value + offset);
2428 AddressLiteral a(delayed_value_addr);
2429 // load indirectly to solve generation ordering problem
2430 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2431 //ld(tmp, a);
2432 if (offset != 0)
2433 daddi(tmp,tmp, offset);
2435 return RegisterOrConstant(tmp);
2436 }
2438 void MacroAssembler::hswap(Register reg) {
2439 //short
2440 //andi(reg, reg, 0xffff);
2441 srl(AT, reg, 8);
2442 sll(reg, reg, 24);
2443 sra(reg, reg, 16);
2444 orr(reg, reg, AT);
2445 }
2447 void MacroAssembler::huswap(Register reg) {
2448 #ifdef _LP64
2449 dsrl(AT, reg, 8);
2450 dsll(reg, reg, 24);
2451 dsrl(reg, reg, 16);
2452 orr(reg, reg, AT);
2453 andi(reg, reg, 0xffff);
2454 #else
2455 //andi(reg, reg, 0xffff);
2456 srl(AT, reg, 8);
2457 sll(reg, reg, 24);
2458 srl(reg, reg, 16);
2459 orr(reg, reg, AT);
2460 #endif
2461 }
2463 // something funny to do this will only one more register AT
2464 // 32 bits
2465 void MacroAssembler::swap(Register reg) {
2466 srl(AT, reg, 8);
2467 sll(reg, reg, 24);
2468 orr(reg, reg, AT);
2469 //reg : 4 1 2 3
2470 srl(AT, AT, 16);
2471 xorr(AT, AT, reg);
2472 andi(AT, AT, 0xff);
2473 //AT : 0 0 0 1^3);
2474 xorr(reg, reg, AT);
2475 //reg : 4 1 2 1
2476 sll(AT, AT, 16);
2477 xorr(reg, reg, AT);
2478 //reg : 4 3 2 1
2479 }
2481 #ifdef _LP64
2483 // do 32-bit CAS using MIPS64 lld/scd
2484 //
2485 // cas_int should only compare 32-bits of the memory value.
2486 // However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2487 // To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2488 // tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2489 // plus the high-32 bits or memory value, are stored togethor with SCD.
2490 //
2491 //Example:
2492 //
2493 // double d = 3.1415926;
2494 // System.err.println("hello" + d);
2495 //
2496 // sun.misc.FloatingDecimal$1.<init>()
2497 // |
2498 // `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2499 //
2500 // 38 cas_int [a7a7|J] [a0|I] [a6|I]
2501 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2502 // a6: 0x4ab325aa
2503 //
2504 //again:
2505 // 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2506 //
2507 // 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2508 // 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2509 // 0x00000055647f3c68: dsll32 t8, t8, 0
2510 // 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2511 // 0x00000055647f3c70: sll zero, zero, 0
2512 //
2513 // 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2514 // 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2515 // 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2516 // 0x00000055647f3c80: and v1, a6, v1
2517 // 0x00000055647f3c84: or at, t8, v1
2518 // 0x00000055647f3c88: scd at, 0x0(a7)
2519 // 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2520 // 0x00000055647f3c90: sll zero, zero, 0
2521 // 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2522 // 0x00000055647f3c98: sll zero, zero, 0
2523 //nequal:
2524 // 0x00000055647f45a4: dadd a0, t9, zero
2525 // 0x00000055647f45a8: dadd at, zero, zero
2526 //done:
2527 //
2529 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2530 // MIPS64 can use ll/sc for 32-bit atomic memory access
2531 Label done, again, nequal;
2533 bind(again);
2535 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
2536 ll(AT, dest);
2537 bne(AT, c_reg, nequal);
2538 delayed()->nop();
2540 move(AT, x_reg);
2541 sc(AT, dest);
2542 beq(AT, R0, again);
2543 delayed()->nop();
2544 b(done);
2545 delayed()->nop();
2547 // not xchged
2548 bind(nequal);
2549 sync();
2550 move(c_reg, AT);
2551 move(AT, R0);
2553 bind(done);
2554 }
2555 #endif // cmpxchg32
2557 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2558 Label done, again, nequal;
2560 bind(again);
2561 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
2562 #ifdef _LP64
2563 lld(AT, dest);
2564 #else
2565 ll(AT, dest);
2566 #endif
2567 bne(AT, c_reg, nequal);
2568 delayed()->nop();
2570 move(AT, x_reg);
2571 #ifdef _LP64
2572 scd(AT, dest);
2573 #else
2574 sc(AT, dest);
2575 #endif
2576 beq(AT, R0, again);
2577 delayed()->nop();
2578 b(done);
2579 delayed()->nop();
2581 // not xchged
2582 bind(nequal);
2583 sync();
2584 move(c_reg, AT);
2585 move(AT, R0);
2587 bind(done);
2588 }
2590 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2591 Label done, again, nequal;
2593 Register x_reg = x_regLo;
2594 dsll32(x_regHi, x_regHi, 0);
2595 dsll32(x_regLo, x_regLo, 0);
2596 dsrl32(x_regLo, x_regLo, 0);
2597 orr(x_reg, x_regLo, x_regHi);
2599 Register c_reg = c_regLo;
2600 dsll32(c_regHi, c_regHi, 0);
2601 dsll32(c_regLo, c_regLo, 0);
2602 dsrl32(c_regLo, c_regLo, 0);
2603 orr(c_reg, c_regLo, c_regHi);
2605 bind(again);
2607 if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
2608 lld(AT, dest);
2609 bne(AT, c_reg, nequal);
2610 delayed()->nop();
2612 //move(AT, x_reg);
2613 dadd(AT, x_reg, R0);
2614 scd(AT, dest);
2615 beq(AT, R0, again);
2616 delayed()->nop();
2617 b(done);
2618 delayed()->nop();
2620 // not xchged
2621 bind(nequal);
2622 sync();
2623 //move(c_reg, AT);
2624 //move(AT, R0);
2625 dadd(c_reg, AT, R0);
2626 dadd(AT, R0, R0);
2627 bind(done);
2628 }
2630 // be sure the three register is different
2631 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2632 assert_different_registers(tmp, fs, ft);
2633 div_s(tmp, fs, ft);
2634 trunc_l_s(tmp, tmp);
2635 cvt_s_l(tmp, tmp);
2636 mul_s(tmp, tmp, ft);
2637 sub_s(fd, fs, tmp);
2638 }
2640 // be sure the three register is different
2641 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2642 assert_different_registers(tmp, fs, ft);
2643 div_d(tmp, fs, ft);
2644 trunc_l_d(tmp, tmp);
2645 cvt_d_l(tmp, tmp);
2646 mul_d(tmp, tmp, ft);
2647 sub_d(fd, fs, tmp);
2648 }
2650 // Fast_Lock and Fast_Unlock used by C2
2652 // Because the transitions from emitted code to the runtime
2653 // monitorenter/exit helper stubs are so slow it's critical that
2654 // we inline both the stack-locking fast-path and the inflated fast path.
2655 //
2656 // See also: cmpFastLock and cmpFastUnlock.
2657 //
2658 // What follows is a specialized inline transliteration of the code
2659 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2660 // another option would be to emit TrySlowEnter and TrySlowExit methods
2661 // at startup-time. These methods would accept arguments as
2662 // (Obj, Self, box, Scratch) and return success-failure
2663 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2664 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2665 // In practice, however, the # of lock sites is bounded and is usually small.
2666 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2667 // if the processor uses simple bimodal branch predictors keyed by EIP
2668 // Since the helper routines would be called from multiple synchronization
2669 // sites.
2670 //
2671 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2672 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2673 // to those specialized methods. That'd give us a mostly platform-independent
2674 // implementation that the JITs could optimize and inline at their pleasure.
2675 // Done correctly, the only time we'd need to cross to native could would be
2676 // to park() or unpark() threads. We'd also need a few more unsafe operators
2677 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2678 // (b) explicit barriers or fence operations.
2679 //
2680 // TODO:
2681 //
2682 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2683 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2684 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2685 // the lock operators would typically be faster than reifying Self.
2686 //
2687 // * Ideally I'd define the primitives as:
2688 // fast_lock (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED.
2689 // fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED
2690 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2691 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2692 // Furthermore the register assignments are overconstrained, possibly resulting in
2693 // sub-optimal code near the synchronization site.
2694 //
2695 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2696 // Alternately, use a better sp-proximity test.
2697 //
2698 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2699 // Either one is sufficient to uniquely identify a thread.
2700 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2701 //
2702 // * Intrinsify notify() and notifyAll() for the common cases where the
2703 // object is locked by the calling thread but the waitlist is empty.
2704 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2705 //
2706 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2707 // But beware of excessive branch density on AMD Opterons.
2708 //
2709 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2710 // or failure of the fast-path. If the fast-path fails then we pass
2711 // control to the slow-path, typically in C. In Fast_Lock and
2712 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2713 // will emit a conditional branch immediately after the node.
2714 // So we have branches to branches and lots of ICC.ZF games.
2715 // Instead, it might be better to have C2 pass a "FailureLabel"
2716 // into Fast_Lock and Fast_Unlock. In the case of success, control
2717 // will drop through the node. ICC.ZF is undefined at exit.
2718 // In the case of failure, the node will branch directly to the
2719 // FailureLabel
2722 // obj: object to lock
2723 // box: on-stack box address (displaced header location) - KILLED
2724 // tmp: tmp -- KILLED
2725 // scr: tmp -- KILLED
2726 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2728 // Ensure the register assignents are disjoint
2729 guarantee (objReg != boxReg, "") ;
2730 guarantee (objReg != tmpReg, "") ;
2731 guarantee (objReg != scrReg, "") ;
2732 guarantee (boxReg != tmpReg, "") ;
2733 guarantee (boxReg != scrReg, "") ;
2736 block_comment("FastLock");
2737 if (PrintBiasedLockingStatistics) {
2738 push(tmpReg);
2739 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2740 pop(tmpReg);
2741 }
2743 if (EmitSync & 1) {
2744 move(AT, 0x0);
2745 return;
2746 } else
2747 if (EmitSync & 2) {
2748 Label DONE_LABEL ;
2749 if (UseBiasedLocking) {
2750 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2751 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2752 }
2754 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2755 ori(tmpReg, tmpReg, 0x1);
2756 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2758 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2759 bne(AT, R0, DONE_LABEL);
2760 delayed()->nop();
2762 // Recursive locking
2763 dsubu(tmpReg, tmpReg, SP);
2764 li(AT, (7 - os::vm_page_size() ));
2765 andr(tmpReg, tmpReg, AT);
2766 sd(tmpReg, Address(boxReg, 0));
2767 bind(DONE_LABEL) ;
2768 } else {
2769 // Possible cases that we'll encounter in fast_lock
2770 // ------------------------------------------------
2771 // * Inflated
2772 // -- unlocked
2773 // -- Locked
2774 // = by self
2775 // = by other
2776 // * biased
2777 // -- by Self
2778 // -- by other
2779 // * neutral
2780 // * stack-locked
2781 // -- by self
2782 // = sp-proximity test hits
2783 // = sp-proximity test generates false-negative
2784 // -- by other
2785 //
2787 Label IsInflated, DONE_LABEL, PopDone ;
2789 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2790 // order to reduce the number of conditional branches in the most common cases.
2791 // Beware -- there's a subtle invariant that fetch of the markword
2792 // at [FETCH], below, will never observe a biased encoding (*101b).
2793 // If this invariant is not held we risk exclusion (safety) failure.
2794 if (UseBiasedLocking && !UseOptoBiasInlining) {
2795 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2796 }
2798 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2799 andi(AT, tmpReg, markOopDesc::monitor_value);
2800 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2801 delayed()->nop();
2803 // Attempt stack-locking ...
2804 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2805 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2806 //if (os::is_MP()) {
2807 // sync();
2808 //}
2810 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2811 //AT == 1: unlocked
2813 if (PrintBiasedLockingStatistics) {
2814 Label L;
2815 beq(AT, R0, L);
2816 delayed()->nop();
2817 push(T0);
2818 push(T1);
2819 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2820 pop(T1);
2821 pop(T0);
2822 bind(L);
2823 }
2824 bne(AT, R0, DONE_LABEL);
2825 delayed()->nop();
2827 // Recursive locking
2828 // The object is stack-locked: markword contains stack pointer to BasicLock.
2829 // Locked by current thread if difference with current SP is less than one page.
2830 dsubu(tmpReg, tmpReg, SP);
2831 li(AT, 7 - os::vm_page_size() );
2832 andr(tmpReg, tmpReg, AT);
2833 sd(tmpReg, Address(boxReg, 0));
2834 if (PrintBiasedLockingStatistics) {
2835 Label L;
2836 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2837 bne(tmpReg, R0, L);
2838 delayed()->nop();
2839 push(T0);
2840 push(T1);
2841 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2842 pop(T1);
2843 pop(T0);
2844 bind(L);
2845 }
2846 sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0
2848 b(DONE_LABEL) ;
2849 delayed()->nop();
2851 bind(IsInflated) ;
2852 // The object's monitor m is unlocked iff m->owner == NULL,
2853 // otherwise m->owner may contain a thread or a stack address.
2855 // TODO: someday avoid the ST-before-CAS penalty by
2856 // relocating (deferring) the following ST.
2857 // We should also think about trying a CAS without having
2858 // fetched _owner. If the CAS is successful we may
2859 // avoid an RTO->RTS upgrade on the $line.
2860 // Without cast to int32_t a movptr will destroy r10 which is typically obj
2861 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
2862 sd(AT, Address(boxReg, 0));
2864 move(boxReg, tmpReg) ;
2865 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2866 // if (m->owner != 0) => AT = 0, goto slow path.
2867 move(AT, R0);
2868 bne(tmpReg, R0, DONE_LABEL);
2869 delayed()->nop();
2871 #ifndef OPT_THREAD
2872 get_thread (TREG) ;
2873 #endif
2874 // It's inflated and appears unlocked
2875 //if (os::is_MP()) {
2876 // sync();
2877 //}
2878 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
2879 // Intentional fall-through into DONE_LABEL ...
2882 // DONE_LABEL is a hot target - we'd really like to place it at the
2883 // start of cache line by padding with NOPs.
2884 // See the AMD and Intel software optimization manuals for the
2885 // most efficient "long" NOP encodings.
2886 // Unfortunately none of our alignment mechanisms suffice.
2887 bind(DONE_LABEL);
2889 // At DONE_LABEL the AT is set as follows ...
2890 // Fast_Unlock uses the same protocol.
2891 // AT == 1 -> Success
2892 // AT == 0 -> Failure - force control through the slow-path
2894 // Avoid branch-to-branch on AMD processors
2895 // This appears to be superstition.
2896 if (EmitSync & 32) nop() ;
2898 }
2899 }
2901 // obj: object to unlock
2902 // box: box address (displaced header location), killed.
2903 // tmp: killed tmp; cannot be obj nor box.
2904 //
2905 // Some commentary on balanced locking:
2906 //
2907 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
2908 // Methods that don't have provably balanced locking are forced to run in the
2909 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
2910 // The interpreter provides two properties:
2911 // I1: At return-time the interpreter automatically and quietly unlocks any
2912 // objects acquired the current activation (frame). Recall that the
2913 // interpreter maintains an on-stack list of locks currently held by
2914 // a frame.
2915 // I2: If a method attempts to unlock an object that is not held by the
2916 // the frame the interpreter throws IMSX.
2917 //
2918 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
2919 // B() doesn't have provably balanced locking so it runs in the interpreter.
2920 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
2921 // is still locked by A().
2922 //
2923 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
2924 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
2925 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
2926 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
2928 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
2930 guarantee (objReg != boxReg, "") ;
2931 guarantee (objReg != tmpReg, "") ;
2932 guarantee (boxReg != tmpReg, "") ;
2934 block_comment("FastUnlock");
2937 if (EmitSync & 4) {
2938 // Disable - inhibit all inlining. Force control through the slow-path
2939 move(AT, 0x0);
2940 return;
2941 } else
2942 if (EmitSync & 8) {
2943 Label DONE_LABEL ;
2944 if (UseBiasedLocking) {
2945 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2946 }
2947 // classic stack-locking code ...
2948 ld(tmpReg, Address(boxReg, 0)) ;
2949 beq(tmpReg, R0, DONE_LABEL) ;
2950 move(AT, 0x1); // delay slot
2952 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
2953 bind(DONE_LABEL);
2954 } else {
2955 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
2957 // Critically, the biased locking test must have precedence over
2958 // and appear before the (box->dhw == 0) recursive stack-lock test.
2959 if (UseBiasedLocking && !UseOptoBiasInlining) {
2960 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2961 }
2963 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
2964 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
2965 delayed()->daddiu(AT, R0, 0x1);
2967 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
2968 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
2969 beq(AT, R0, Stacked) ; // Inflated?
2970 delayed()->nop();
2972 bind(Inflated) ;
2973 // It's inflated.
2974 // Despite our balanced locking property we still check that m->_owner == Self
2975 // as java routines or native JNI code called by this thread might
2976 // have released the lock.
2977 // Refer to the comments in synchronizer.cpp for how we might encode extra
2978 // state in _succ so we can avoid fetching EntryList|cxq.
2979 //
2980 // I'd like to add more cases in fast_lock() and fast_unlock() --
2981 // such as recursive enter and exit -- but we have to be wary of
2982 // I$ bloat, T$ effects and BP$ effects.
2983 //
2984 // If there's no contention try a 1-0 exit. That is, exit without
2985 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
2986 // we detect and recover from the race that the 1-0 exit admits.
2987 //
2988 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
2989 // before it STs null into _owner, releasing the lock. Updates
2990 // to data protected by the critical section must be visible before
2991 // we drop the lock (and thus before any other thread could acquire
2992 // the lock and observe the fields protected by the lock).
2993 #ifndef OPT_THREAD
2994 get_thread (TREG) ;
2995 #endif
2997 // It's inflated
2998 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2999 xorr(boxReg, boxReg, TREG);
3001 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3002 orr(boxReg, boxReg, AT);
3004 move(AT, R0);
3005 bne(boxReg, R0, DONE_LABEL);
3006 delayed()->nop();
3008 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3009 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3010 orr(boxReg, boxReg, AT);
3012 move(AT, R0);
3013 bne(boxReg, R0, DONE_LABEL);
3014 delayed()->nop();
3016 sync();
3017 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3018 move(AT, 0x1);
3019 b(DONE_LABEL);
3020 delayed()->nop();
3022 bind (Stacked);
3023 ld(tmpReg, Address(boxReg, 0)) ;
3024 //if (os::is_MP()) { sync(); }
3025 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3027 if (EmitSync & 65536) {
3028 bind (CheckSucc);
3029 }
3031 bind(DONE_LABEL);
3033 // Avoid branch to branch on AMD processors
3034 if (EmitSync & 32768) { nop() ; }
3035 }
3036 }
3038 void MacroAssembler::align(int modulus) {
3039 while (offset() % modulus != 0) nop();
3040 }
3043 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3044 //Unimplemented();
3045 }
3047 #ifdef _LP64
3048 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3049 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3051 //In MIPS64, F0~23 are all caller-saved registers
3052 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3053 #else
3054 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3055 Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3057 Register caller_saved_fpu_registers[] = {};
3058 #endif
3060 // We preserve all caller-saved register
3061 void MacroAssembler::pushad(){
3062 int i;
3064 // Fixed-point registers
3065 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3066 daddi(SP, SP, -1 * len * wordSize);
3067 for (i = 0; i < len; i++)
3068 {
3069 #ifdef _LP64
3070 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3071 #else
3072 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3073 #endif
3074 }
3076 // Floating-point registers
3077 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3078 daddi(SP, SP, -1 * len * wordSize);
3079 for (i = 0; i < len; i++)
3080 {
3081 #ifdef _LP64
3082 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3083 #else
3084 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3085 #endif
3086 }
3087 };
3089 void MacroAssembler::popad(){
3090 int i;
3092 // Floating-point registers
3093 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3094 for (i = 0; i < len; i++)
3095 {
3096 #ifdef _LP64
3097 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3098 #else
3099 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3100 #endif
3101 }
3102 daddi(SP, SP, len * wordSize);
3104 // Fixed-point registers
3105 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3106 for (i = 0; i < len; i++)
3107 {
3108 #ifdef _LP64
3109 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3110 #else
3111 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3112 #endif
3113 }
3114 daddi(SP, SP, len * wordSize);
3115 };
3117 // We preserve all caller-saved register except V0
3118 void MacroAssembler::pushad_except_v0() {
3119 int i;
3121 // Fixed-point registers
3122 int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
3123 daddi(SP, SP, -1 * len * wordSize);
3124 for (i = 0; i < len; i++) {
3125 #ifdef _LP64
3126 sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3127 #else
3128 sw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3129 #endif
3130 }
3132 // Floating-point registers
3133 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3134 daddi(SP, SP, -1 * len * wordSize);
3135 for (i = 0; i < len; i++) {
3136 #ifdef _LP64
3137 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3138 #else
3139 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3140 #endif
3141 }
3142 }
3144 void MacroAssembler::popad_except_v0() {
3145 int i;
3147 // Floating-point registers
3148 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3149 for (i = 0; i < len; i++) {
3150 #ifdef _LP64
3151 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3152 #else
3153 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3154 #endif
3155 }
3156 daddi(SP, SP, len * wordSize);
3158 // Fixed-point registers
3159 len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
3160 for (i = 0; i < len; i++) {
3161 #ifdef _LP64
3162 ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3163 #else
3164 lw(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
3165 #endif
3166 }
3167 daddi(SP, SP, len * wordSize);
3168 }
3170 void MacroAssembler::push2(Register reg1, Register reg2) {
3171 #ifdef _LP64
3172 daddi(SP, SP, -16);
3173 sd(reg2, SP, 0);
3174 sd(reg1, SP, 8);
3175 #else
3176 addi(SP, SP, -8);
3177 sw(reg2, SP, 0);
3178 sw(reg1, SP, 4);
3179 #endif
3180 }
3182 void MacroAssembler::pop2(Register reg1, Register reg2) {
3183 #ifdef _LP64
3184 ld(reg1, SP, 0);
3185 ld(reg2, SP, 8);
3186 daddi(SP, SP, 16);
3187 #else
3188 lw(reg1, SP, 0);
3189 lw(reg2, SP, 4);
3190 addi(SP, SP, 8);
3191 #endif
3192 }
3194 // for UseCompressedOops Option
3195 void MacroAssembler::load_klass(Register dst, Register src) {
3196 #ifdef _LP64
3197 if(UseCompressedClassPointers){
3198 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3199 decode_klass_not_null(dst);
3200 } else
3201 #endif
3202 ld(dst, src, oopDesc::klass_offset_in_bytes());
3203 }
3205 void MacroAssembler::store_klass(Register dst, Register src) {
3206 #ifdef _LP64
3207 if(UseCompressedClassPointers){
3208 encode_klass_not_null(src);
3209 sw(src, dst, oopDesc::klass_offset_in_bytes());
3210 } else {
3211 #endif
3212 sd(src, dst, oopDesc::klass_offset_in_bytes());
3213 }
3214 }
3216 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3217 load_klass(dst, src);
3218 ld(dst, Address(dst, Klass::prototype_header_offset()));
3219 }
3221 #ifdef _LP64
3222 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3223 if (UseCompressedClassPointers) {
3224 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3225 }
3226 }
3228 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3229 if(UseCompressedOops){
3230 lwu(dst, src);
3231 decode_heap_oop(dst);
3232 } else {
3233 ld(dst, src);
3234 }
3235 }
3237 void MacroAssembler::store_heap_oop(Address dst, Register src){
3238 if(UseCompressedOops){
3239 assert(!dst.uses(src), "not enough registers");
3240 encode_heap_oop(src);
3241 sw(src, dst);
3242 } else {
3243 sd(src, dst);
3244 }
3245 }
3247 void MacroAssembler::store_heap_oop_null(Address dst){
3248 if(UseCompressedOops){
3249 sw(R0, dst);
3250 } else {
3251 sd(R0, dst);
3252 }
3253 }
3255 #ifdef ASSERT
3256 void MacroAssembler::verify_heapbase(const char* msg) {
3257 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3258 assert (Universe::heap() != NULL, "java heap should be initialized");
3259 }
3260 #endif
3263 // Algorithm must match oop.inline.hpp encode_heap_oop.
3264 void MacroAssembler::encode_heap_oop(Register r) {
3265 #ifdef ASSERT
3266 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3267 #endif
3268 verify_oop(r, "broken oop in encode_heap_oop");
3269 if (Universe::narrow_oop_base() == NULL) {
3270 if (Universe::narrow_oop_shift() != 0) {
3271 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3272 shr(r, LogMinObjAlignmentInBytes);
3273 }
3274 return;
3275 }
3277 movz(r, S5_heapbase, r);
3278 dsub(r, r, S5_heapbase);
3279 if (Universe::narrow_oop_shift() != 0) {
3280 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3281 shr(r, LogMinObjAlignmentInBytes);
3282 }
3283 }
3285 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3286 #ifdef ASSERT
3287 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3288 #endif
3289 verify_oop(src, "broken oop in encode_heap_oop");
3290 if (Universe::narrow_oop_base() == NULL) {
3291 if (Universe::narrow_oop_shift() != 0) {
3292 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3293 dsrl(dst, src, LogMinObjAlignmentInBytes);
3294 } else {
3295 if (dst != src) move(dst, src);
3296 }
3297 } else {
3298 if (dst == src) {
3299 movz(dst, S5_heapbase, dst);
3300 dsub(dst, dst, S5_heapbase);
3301 if (Universe::narrow_oop_shift() != 0) {
3302 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3303 shr(dst, LogMinObjAlignmentInBytes);
3304 }
3305 } else {
3306 dsub(dst, src, S5_heapbase);
3307 if (Universe::narrow_oop_shift() != 0) {
3308 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3309 shr(dst, LogMinObjAlignmentInBytes);
3310 }
3311 movz(dst, R0, src);
3312 }
3313 }
3314 }
3316 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3317 assert (UseCompressedOops, "should be compressed");
3318 #ifdef ASSERT
3319 if (CheckCompressedOops) {
3320 Label ok;
3321 bne(r, R0, ok);
3322 delayed()->nop();
3323 stop("null oop passed to encode_heap_oop_not_null");
3324 bind(ok);
3325 }
3326 #endif
3327 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3328 if (Universe::narrow_oop_base() != NULL) {
3329 dsub(r, r, S5_heapbase);
3330 }
3331 if (Universe::narrow_oop_shift() != 0) {
3332 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3333 shr(r, LogMinObjAlignmentInBytes);
3334 }
3336 }
3338 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3339 assert (UseCompressedOops, "should be compressed");
3340 #ifdef ASSERT
3341 if (CheckCompressedOops) {
3342 Label ok;
3343 bne(src, R0, ok);
3344 delayed()->nop();
3345 stop("null oop passed to encode_heap_oop_not_null2");
3346 bind(ok);
3347 }
3348 #endif
3349 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3351 if (Universe::narrow_oop_base() != NULL) {
3352 dsub(dst, src, S5_heapbase);
3353 if (Universe::narrow_oop_shift() != 0) {
3354 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3355 shr(dst, LogMinObjAlignmentInBytes);
3356 }
3357 } else {
3358 if (Universe::narrow_oop_shift() != 0) {
3359 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3360 dsrl(dst, src, LogMinObjAlignmentInBytes);
3361 } else {
3362 if (dst != src) move(dst, src);
3363 }
3364 }
3365 }
3367 void MacroAssembler::decode_heap_oop(Register r) {
3368 #ifdef ASSERT
3369 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3370 #endif
3371 if (Universe::narrow_oop_base() == NULL) {
3372 if (Universe::narrow_oop_shift() != 0) {
3373 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3374 shl(r, LogMinObjAlignmentInBytes);
3375 }
3376 } else {
3377 move(AT, r);
3378 if (Universe::narrow_oop_shift() != 0) {
3379 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3380 shl(r, LogMinObjAlignmentInBytes);
3381 }
3382 dadd(r, r, S5_heapbase);
3383 movz(r, R0, AT);
3384 }
3385 verify_oop(r, "broken oop in decode_heap_oop");
3386 }
3388 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3389 #ifdef ASSERT
3390 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3391 #endif
3392 if (Universe::narrow_oop_base() == NULL) {
3393 if (Universe::narrow_oop_shift() != 0) {
3394 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3395 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3396 dsll(dst, src, LogMinObjAlignmentInBytes);
3397 } else {
3398 if (dst != src) move(dst, src);
3399 }
3400 } else {
3401 if (dst == src) {
3402 move(AT, dst);
3403 if (Universe::narrow_oop_shift() != 0) {
3404 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3405 shl(dst, LogMinObjAlignmentInBytes);
3406 }
3407 dadd(dst, dst, S5_heapbase);
3408 movz(dst, R0, AT);
3409 } else {
3410 if (Universe::narrow_oop_shift() != 0) {
3411 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3412 dsll(dst, src, LogMinObjAlignmentInBytes);
3413 daddu(dst, dst, S5_heapbase);
3414 } else {
3415 daddu(dst, src, S5_heapbase);
3416 }
3417 movz(dst, R0, src);
3418 }
3419 }
3420 verify_oop(dst, "broken oop in decode_heap_oop");
3421 }
3423 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3424 // Note: it will change flags
3425 assert (UseCompressedOops, "should only be used for compressed headers");
3426 assert (Universe::heap() != NULL, "java heap should be initialized");
3427 // Cannot assert, unverified entry point counts instructions (see .ad file)
3428 // vtableStubs also counts instructions in pd_code_size_limit.
3429 // Also do not verify_oop as this is called by verify_oop.
3430 if (Universe::narrow_oop_shift() != 0) {
3431 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3432 shl(r, LogMinObjAlignmentInBytes);
3433 if (Universe::narrow_oop_base() != NULL) {
3434 daddu(r, r, S5_heapbase);
3435 }
3436 } else {
3437 assert (Universe::narrow_oop_base() == NULL, "sanity");
3438 }
3439 }
3441 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3442 assert (UseCompressedOops, "should only be used for compressed headers");
3443 assert (Universe::heap() != NULL, "java heap should be initialized");
3445 // Cannot assert, unverified entry point counts instructions (see .ad file)
3446 // vtableStubs also counts instructions in pd_code_size_limit.
3447 // Also do not verify_oop as this is called by verify_oop.
3448 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3449 if (Universe::narrow_oop_shift() != 0) {
3450 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3451 if (LogMinObjAlignmentInBytes == Address::times_8) {
3452 dsll(dst, src, LogMinObjAlignmentInBytes);
3453 daddu(dst, dst, S5_heapbase);
3454 } else {
3455 dsll(dst, src, LogMinObjAlignmentInBytes);
3456 if (Universe::narrow_oop_base() != NULL) {
3457 daddu(dst, dst, S5_heapbase);
3458 }
3459 }
3460 } else {
3461 assert (Universe::narrow_oop_base() == NULL, "sanity");
3462 if (dst != src) {
3463 move(dst, src);
3464 }
3465 }
3466 }
3468 void MacroAssembler::encode_klass_not_null(Register r) {
3469 if (Universe::narrow_klass_base() != NULL) {
3470 assert(r != AT, "Encoding a klass in AT");
3471 set64(AT, (int64_t)Universe::narrow_klass_base());
3472 dsub(r, r, AT);
3473 }
3474 if (Universe::narrow_klass_shift() != 0) {
3475 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3476 shr(r, LogKlassAlignmentInBytes);
3477 }
3478 }
3480 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3481 if (dst == src) {
3482 encode_klass_not_null(src);
3483 } else {
3484 if (Universe::narrow_klass_base() != NULL) {
3485 set64(dst, (int64_t)Universe::narrow_klass_base());
3486 dsub(dst, src, dst);
3487 if (Universe::narrow_klass_shift() != 0) {
3488 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3489 shr(dst, LogKlassAlignmentInBytes);
3490 }
3491 } else {
3492 if (Universe::narrow_klass_shift() != 0) {
3493 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3494 dsrl(dst, src, LogKlassAlignmentInBytes);
3495 } else {
3496 move(dst, src);
3497 }
3498 }
3499 }
3500 }
3502 // Function instr_size_for_decode_klass_not_null() counts the instructions
3503 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3504 // when (Universe::heap() != NULL). Hence, if the instructions they
3505 // generate change, then this method needs to be updated.
3506 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3507 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3508 if (Universe::narrow_klass_base() != NULL) {
3509 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3510 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3511 } else {
3512 // longest load decode klass function, mov64, leaq
3513 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3514 }
3515 }
3517 void MacroAssembler::decode_klass_not_null(Register r) {
3518 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3519 assert(r != AT, "Decoding a klass in AT");
3520 // Cannot assert, unverified entry point counts instructions (see .ad file)
3521 // vtableStubs also counts instructions in pd_code_size_limit.
3522 // Also do not verify_oop as this is called by verify_oop.
3523 if (Universe::narrow_klass_shift() != 0) {
3524 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3525 shl(r, LogKlassAlignmentInBytes);
3526 }
3527 if (Universe::narrow_klass_base() != NULL) {
3528 set64(AT, (int64_t)Universe::narrow_klass_base());
3529 daddu(r, r, AT);
3530 //Not neccessary for MIPS at all.
3531 //reinit_heapbase();
3532 }
3533 }
3535 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3536 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3538 if (dst == src) {
3539 decode_klass_not_null(dst);
3540 } else {
3541 // Cannot assert, unverified entry point counts instructions (see .ad file)
3542 // vtableStubs also counts instructions in pd_code_size_limit.
3543 // Also do not verify_oop as this is called by verify_oop.
3544 set64(dst, (int64_t)Universe::narrow_klass_base());
3545 if (Universe::narrow_klass_shift() != 0) {
3546 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3547 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3548 dsll(AT, src, Address::times_8);
3549 daddu(dst, dst, AT);
3550 } else {
3551 daddu(dst, src, dst);
3552 }
3553 }
3554 }
3556 void MacroAssembler::incrementl(Register reg, int value) {
3557 if (value == min_jint) {
3558 move(AT, value);
3559 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3560 return;
3561 }
3562 if (value < 0) { decrementl(reg, -value); return; }
3563 if (value == 0) { ; return; }
3565 if(Assembler::is_simm16(value)) {
3566 NOT_LP64(addiu(reg, reg, value));
3567 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3568 } else {
3569 move(AT, value);
3570 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3571 }
3572 }
3574 void MacroAssembler::decrementl(Register reg, int value) {
3575 if (value == min_jint) {
3576 move(AT, value);
3577 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3578 return;
3579 }
3580 if (value < 0) { incrementl(reg, -value); return; }
3581 if (value == 0) { ; return; }
3583 if (Assembler::is_simm16(value)) {
3584 NOT_LP64(addiu(reg, reg, -value));
3585 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3586 } else {
3587 move(AT, value);
3588 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3589 }
3590 }
3592 void MacroAssembler::reinit_heapbase() {
3593 if (UseCompressedOops || UseCompressedClassPointers) {
3594 if (Universe::heap() != NULL) {
3595 if (Universe::narrow_oop_base() == NULL) {
3596 move(S5_heapbase, R0);
3597 } else {
3598 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3599 }
3600 } else {
3601 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3602 ld(S5_heapbase, S5_heapbase, 0);
3603 }
3604 }
3605 }
3606 #endif // _LP64
3608 void MacroAssembler::check_klass_subtype(Register sub_klass,
3609 Register super_klass,
3610 Register temp_reg,
3611 Label& L_success) {
3612 //implement ind gen_subtype_check
3613 Label L_failure;
3614 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3615 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3616 bind(L_failure);
3617 }
3619 SkipIfEqual::SkipIfEqual(
3620 MacroAssembler* masm, const bool* flag_addr, bool value) {
3621 _masm = masm;
3622 _masm->li(AT, (address)flag_addr);
3623 _masm->lb(AT, AT, 0);
3624 _masm->addi(AT, AT, -value);
3625 _masm->beq(AT, R0, _label);
3626 _masm->delayed()->nop();
3627 }
3628 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3629 Register super_klass,
3630 Register temp_reg,
3631 Label* L_success,
3632 Label* L_failure,
3633 Label* L_slow_path,
3634 RegisterOrConstant super_check_offset) {
3635 assert_different_registers(sub_klass, super_klass, temp_reg);
3636 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3637 if (super_check_offset.is_register()) {
3638 assert_different_registers(sub_klass, super_klass,
3639 super_check_offset.as_register());
3640 } else if (must_load_sco) {
3641 assert(temp_reg != noreg, "supply either a temp or a register offset");
3642 }
3644 Label L_fallthrough;
3645 int label_nulls = 0;
3646 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3647 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3648 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3649 assert(label_nulls <= 1, "at most one NULL in the batch");
3651 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3652 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3653 // If the pointers are equal, we are done (e.g., String[] elements).
3654 // This self-check enables sharing of secondary supertype arrays among
3655 // non-primary types such as array-of-interface. Otherwise, each such
3656 // type would need its own customized SSA.
3657 // We move this check to the front of the fast path because many
3658 // type checks are in fact trivially successful in this manner,
3659 // so we get a nicely predicted branch right at the start of the check.
3660 beq(sub_klass, super_klass, *L_success);
3661 delayed()->nop();
3662 // Check the supertype display:
3663 if (must_load_sco) {
3664 // Positive movl does right thing on LP64.
3665 lwu(temp_reg, super_klass, sco_offset);
3666 super_check_offset = RegisterOrConstant(temp_reg);
3667 }
3668 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3669 daddu(AT, sub_klass, AT);
3670 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3672 // This check has worked decisively for primary supers.
3673 // Secondary supers are sought in the super_cache ('super_cache_addr').
3674 // (Secondary supers are interfaces and very deeply nested subtypes.)
3675 // This works in the same check above because of a tricky aliasing
3676 // between the super_cache and the primary super display elements.
3677 // (The 'super_check_addr' can address either, as the case requires.)
3678 // Note that the cache is updated below if it does not help us find
3679 // what we need immediately.
3680 // So if it was a primary super, we can just fail immediately.
3681 // Otherwise, it's the slow path for us (no success at this point).
3683 if (super_check_offset.is_register()) {
3684 beq(super_klass, AT, *L_success);
3685 delayed()->nop();
3686 addi(AT, super_check_offset.as_register(), -sc_offset);
3687 if (L_failure == &L_fallthrough) {
3688 beq(AT, R0, *L_slow_path);
3689 delayed()->nop();
3690 } else {
3691 bne_far(AT, R0, *L_failure);
3692 delayed()->nop();
3693 b(*L_slow_path);
3694 delayed()->nop();
3695 }
3696 } else if (super_check_offset.as_constant() == sc_offset) {
3697 // Need a slow path; fast failure is impossible.
3698 if (L_slow_path == &L_fallthrough) {
3699 beq(super_klass, AT, *L_success);
3700 delayed()->nop();
3701 } else {
3702 bne(super_klass, AT, *L_slow_path);
3703 delayed()->nop();
3704 b(*L_success);
3705 delayed()->nop();
3706 }
3707 } else {
3708 // No slow path; it's a fast decision.
3709 if (L_failure == &L_fallthrough) {
3710 beq(super_klass, AT, *L_success);
3711 delayed()->nop();
3712 } else {
3713 bne_far(super_klass, AT, *L_failure);
3714 delayed()->nop();
3715 b(*L_success);
3716 delayed()->nop();
3717 }
3718 }
3720 bind(L_fallthrough);
3722 }
3725 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3726 Register super_klass,
3727 Register temp_reg,
3728 Register temp2_reg,
3729 Label* L_success,
3730 Label* L_failure,
3731 bool set_cond_codes) {
3732 if (temp2_reg == noreg)
3733 temp2_reg = TSR;
3734 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3735 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3737 Label L_fallthrough;
3738 int label_nulls = 0;
3739 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3740 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3741 assert(label_nulls <= 1, "at most one NULL in the batch");
3743 // a couple of useful fields in sub_klass:
3744 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3745 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3746 Address secondary_supers_addr(sub_klass, ss_offset);
3747 Address super_cache_addr( sub_klass, sc_offset);
3749 // Do a linear scan of the secondary super-klass chain.
3750 // This code is rarely used, so simplicity is a virtue here.
3751 // The repne_scan instruction uses fixed registers, which we must spill.
3752 // Don't worry too much about pre-existing connections with the input regs.
3754 #ifndef PRODUCT
3755 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3756 ExternalAddress pst_counter_addr((address) pst_counter);
3757 NOT_LP64( incrementl(pst_counter_addr) );
3758 #endif //PRODUCT
3760 // We will consult the secondary-super array.
3761 ld(temp_reg, secondary_supers_addr);
3762 // Load the array length. (Positive movl does right thing on LP64.)
3763 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3764 // Skip to start of data.
3765 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3767 // OpenJDK8 never compresses klass pointers in secondary-super array.
3768 Label Loop, subtype;
3769 bind(Loop);
3770 beq(temp2_reg, R0, *L_failure);
3771 delayed()->nop();
3772 ld(AT, temp_reg, 0);
3773 beq(AT, super_klass, subtype);
3774 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3775 b(Loop);
3776 delayed()->daddi(temp2_reg, temp2_reg, -1);
3778 bind(subtype);
3779 sd(super_klass, super_cache_addr);
3780 if (L_success != &L_fallthrough) {
3781 b(*L_success);
3782 delayed()->nop();
3783 }
3785 // Success. Cache the super we found and proceed in triumph.
3786 #undef IS_A_TEMP
3788 bind(L_fallthrough);
3789 }
3791 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3792 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3793 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3794 verify_oop(oop_result, "broken oop in call_VM_base");
3795 }
3797 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3798 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3799 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3800 }
3802 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3803 int extra_slot_offset) {
3804 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3805 int stackElementSize = Interpreter::stackElementSize;
3806 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3807 #ifdef ASSERT
3808 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3809 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3810 #endif
3811 Register scale_reg = NOREG;
3812 Address::ScaleFactor scale_factor = Address::no_scale;
3813 if (arg_slot.is_constant()) {
3814 offset += arg_slot.as_constant() * stackElementSize;
3815 } else {
3816 scale_reg = arg_slot.as_register();
3817 scale_factor = Address::times_8;
3818 }
3819 // We don't push RA on stack in prepare_invoke.
3820 // offset += wordSize; // return PC is on stack
3821 if(scale_reg==NOREG) return Address(SP, offset);
3822 else {
3823 dsll(scale_reg, scale_reg, scale_factor);
3824 daddu(scale_reg, SP, scale_reg);
3825 return Address(scale_reg, offset);
3826 }
3827 }
3829 SkipIfEqual::~SkipIfEqual() {
3830 _masm->bind(_label);
3831 }
3833 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3834 switch (size_in_bytes) {
3835 #ifndef _LP64
3836 case 8:
3837 assert(dst2 != noreg, "second dest register required");
3838 lw(dst, src);
3839 lw(dst2, src.plus_disp(BytesPerInt));
3840 break;
3841 #else
3842 case 8: ld(dst, src); break;
3843 #endif
3844 case 4: lw(dst, src); break;
3845 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3846 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3847 default: ShouldNotReachHere();
3848 }
3849 }
3851 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
3852 switch (size_in_bytes) {
3853 #ifndef _LP64
3854 case 8:
3855 assert(src2 != noreg, "second source register required");
3856 sw(src, dst);
3857 sw(src2, dst.plus_disp(BytesPerInt));
3858 break;
3859 #else
3860 case 8: sd(src, dst); break;
3861 #endif
3862 case 4: sw(src, dst); break;
3863 case 2: sh(src, dst); break;
3864 case 1: sb(src, dst); break;
3865 default: ShouldNotReachHere();
3866 }
3867 }
3869 // Look up the method for a megamorphic invokeinterface call.
3870 // The target method is determined by <intf_klass, itable_index>.
3871 // The receiver klass is in recv_klass.
3872 // On success, the result will be in method_result, and execution falls through.
3873 // On failure, execution transfers to the given label.
3874 void MacroAssembler::lookup_interface_method(Register recv_klass,
3875 Register intf_klass,
3876 RegisterOrConstant itable_index,
3877 Register method_result,
3878 Register scan_temp,
3879 Label& L_no_such_interface,
3880 bool return_method) {
3881 assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
3882 assert_different_registers(method_result, intf_klass, scan_temp, AT);
3883 assert(recv_klass != method_result || !return_method,
3884 "recv_klass can be destroyed when method isn't needed");
3886 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3887 "caller must use same register for non-constant itable index as for method");
3889 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3890 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
3891 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3892 int scan_step = itableOffsetEntry::size() * wordSize;
3893 int vte_size = vtableEntry::size() * wordSize;
3894 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3895 assert(vte_size == wordSize, "else adjust times_vte_scale");
3897 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
3899 // %%% Could store the aligned, prescaled offset in the klassoop.
3900 dsll(scan_temp, scan_temp, times_vte_scale);
3901 daddu(scan_temp, recv_klass, scan_temp);
3902 daddiu(scan_temp, scan_temp, vtable_base);
3903 if (HeapWordsPerLong > 1) {
3904 // Round up to align_object_offset boundary
3905 // see code for InstanceKlass::start_of_itable!
3906 round_to(scan_temp, BytesPerLong);
3907 }
3909 if (return_method) {
3910 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
3911 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
3912 if (itable_index.is_constant()) {
3913 set64(AT, (int)itable_index.is_constant());
3914 dsll(AT, AT, (int)Address::times_ptr);
3915 } else {
3916 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
3917 }
3918 daddu(AT, AT, recv_klass);
3919 daddiu(recv_klass, AT, itentry_off);
3920 }
3922 Label search, found_method;
3924 for (int peel = 1; peel >= 0; peel--) {
3925 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
3927 if (peel) {
3928 beq(intf_klass, method_result, found_method);
3929 delayed()->nop();
3930 } else {
3931 bne(intf_klass, method_result, search);
3932 delayed()->nop();
3933 // (invert the test to fall through to found_method...)
3934 }
3936 if (!peel) break;
3938 bind(search);
3940 // Check that the previous entry is non-null. A null entry means that
3941 // the receiver class doesn't implement the interface, and wasn't the
3942 // same as when the caller was compiled.
3943 beq(method_result, R0, L_no_such_interface);
3944 delayed()->nop();
3945 daddiu(scan_temp, scan_temp, scan_step);
3946 }
3948 bind(found_method);
3950 if (return_method) {
3951 // Got a hit.
3952 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
3953 if (UseLEXT1) {
3954 gsldx(method_result, recv_klass, scan_temp, 0);
3955 } else {
3956 daddu(AT, recv_klass, scan_temp);
3957 ld(method_result, AT, 0);
3958 }
3959 }
3960 }
3962 // virtual method calling
3963 void MacroAssembler::lookup_virtual_method(Register recv_klass,
3964 RegisterOrConstant vtable_index,
3965 Register method_result) {
3966 Register tmp = GP;
3967 push(tmp);
3969 if (vtable_index.is_constant()) {
3970 assert_different_registers(recv_klass, method_result, tmp);
3971 } else {
3972 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
3973 }
3974 const int base = InstanceKlass::vtable_start_offset() * wordSize;
3975 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
3976 if (vtable_index.is_constant()) {
3977 set64(AT, vtable_index.as_constant());
3978 dsll(AT, AT, (int)Address::times_ptr);
3979 } else {
3980 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
3981 }
3982 set64(tmp, base + vtableEntry::method_offset_in_bytes());
3983 daddu(tmp, tmp, AT);
3984 daddu(tmp, tmp, recv_klass);
3985 ld(method_result, tmp, 0);
3987 pop(tmp);
3988 }
3990 void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
3991 switch (type) {
3992 case T_LONG:
3993 st_ptr(src_reg, tmp_reg, disp);
3994 break;
3995 case T_ARRAY:
3996 case T_OBJECT:
3997 if (UseCompressedOops && !wide) {
3998 sw(src_reg, tmp_reg, disp);
3999 } else {
4000 st_ptr(src_reg, tmp_reg, disp);
4001 }
4002 break;
4003 case T_ADDRESS:
4004 st_ptr(src_reg, tmp_reg, disp);
4005 break;
4006 case T_INT:
4007 sw(src_reg, tmp_reg, disp);
4008 break;
4009 case T_CHAR:
4010 case T_SHORT:
4011 sh(src_reg, tmp_reg, disp);
4012 break;
4013 case T_BYTE:
4014 case T_BOOLEAN:
4015 sb(src_reg, tmp_reg, disp);
4016 break;
4017 default:
4018 ShouldNotReachHere();
4019 }
4020 }
4022 void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
4023 Register tmp_reg = T9;
4024 Register index_reg = addr.index();
4025 if (index_reg == NOREG) {
4026 tmp_reg = NOREG;
4027 }
4029 int scale = addr.scale();
4030 if (tmp_reg != NOREG && scale >= 0) {
4031 dsll(tmp_reg, index_reg, scale);
4032 }
4034 int disp = addr.disp();
4035 bool disp_is_simm16 = true;
4036 if (!Assembler::is_simm16(disp)) {
4037 disp_is_simm16 = false;
4038 }
4040 Register base_reg = addr.base();
4041 if (tmp_reg != NOREG) {
4042 assert_different_registers(tmp_reg, base_reg, index_reg);
4043 }
4045 if (tmp_reg != NOREG) {
4046 daddu(tmp_reg, base_reg, tmp_reg);
4047 if (!disp_is_simm16) {
4048 move(tmp_reg, disp);
4049 daddu(tmp_reg, base_reg, tmp_reg);
4050 }
4051 store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4052 } else {
4053 if (!disp_is_simm16) {
4054 tmp_reg = T9;
4055 assert_different_registers(tmp_reg, base_reg);
4056 move(tmp_reg, disp);
4057 daddu(tmp_reg, base_reg, tmp_reg);
4058 }
4059 store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4060 }
4061 }
4063 void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
4064 switch (type) {
4065 case T_DOUBLE:
4066 sdc1(src_reg, tmp_reg, disp);
4067 break;
4068 case T_FLOAT:
4069 swc1(src_reg, tmp_reg, disp);
4070 break;
4071 default:
4072 ShouldNotReachHere();
4073 }
4074 }
4076 void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
4077 Register tmp_reg = T9;
4078 Register index_reg = addr.index();
4079 if (index_reg == NOREG) {
4080 tmp_reg = NOREG;
4081 }
4083 int scale = addr.scale();
4084 if (tmp_reg != NOREG && scale >= 0) {
4085 dsll(tmp_reg, index_reg, scale);
4086 }
4088 int disp = addr.disp();
4089 bool disp_is_simm16 = true;
4090 if (!Assembler::is_simm16(disp)) {
4091 disp_is_simm16 = false;
4092 }
4094 Register base_reg = addr.base();
4095 if (tmp_reg != NOREG) {
4096 assert_different_registers(tmp_reg, base_reg, index_reg);
4097 }
4099 if (tmp_reg != NOREG) {
4100 daddu(tmp_reg, base_reg, tmp_reg);
4101 if (!disp_is_simm16) {
4102 move(tmp_reg, disp);
4103 daddu(tmp_reg, base_reg, tmp_reg);
4104 }
4105 store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
4106 } else {
4107 if (!disp_is_simm16) {
4108 tmp_reg = T9;
4109 assert_different_registers(tmp_reg, base_reg);
4110 move(tmp_reg, disp);
4111 daddu(tmp_reg, base_reg, tmp_reg);
4112 }
4113 store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
4114 }
4115 }
4117 void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
4118 switch (type) {
4119 case T_LONG:
4120 ld_ptr(dst_reg, tmp_reg, disp);
4121 break;
4122 case T_ARRAY:
4123 case T_OBJECT:
4124 if (UseCompressedOops && !wide) {
4125 lwu(dst_reg, tmp_reg, disp);
4126 } else {
4127 ld_ptr(dst_reg, tmp_reg, disp);
4128 }
4129 break;
4130 case T_ADDRESS:
4131 if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
4132 lwu(dst_reg, tmp_reg, disp);
4133 } else {
4134 ld_ptr(dst_reg, tmp_reg, disp);
4135 }
4136 break;
4137 case T_INT:
4138 lw(dst_reg, tmp_reg, disp);
4139 break;
4140 case T_CHAR:
4141 lhu(dst_reg, tmp_reg, disp);
4142 break;
4143 case T_SHORT:
4144 lh(dst_reg, tmp_reg, disp);
4145 break;
4146 case T_BYTE:
4147 case T_BOOLEAN:
4148 lb(dst_reg, tmp_reg, disp);
4149 break;
4150 default:
4151 ShouldNotReachHere();
4152 }
4153 }
4155 int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
4156 int code_offset = 0;
4157 Register tmp_reg = T9;
4158 Register index_reg = addr.index();
4159 if (index_reg == NOREG) {
4160 tmp_reg = NOREG;
4161 }
4163 int scale = addr.scale();
4164 if (tmp_reg != NOREG && scale >= 0) {
4165 dsll(tmp_reg, index_reg, scale);
4166 }
4168 int disp = addr.disp();
4169 bool disp_is_simm16 = true;
4170 if (!Assembler::is_simm16(disp)) {
4171 disp_is_simm16 = false;
4172 }
4174 Register base_reg = addr.base();
4175 if (tmp_reg != NOREG) {
4176 assert_different_registers(tmp_reg, base_reg, index_reg);
4177 }
4179 if (tmp_reg != NOREG) {
4180 daddu(tmp_reg, base_reg, tmp_reg);
4181 if (!disp_is_simm16) {
4182 move(tmp_reg, disp);
4183 daddu(tmp_reg, base_reg, tmp_reg);
4184 }
4185 code_offset = offset();
4186 load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4187 } else {
4188 if (!disp_is_simm16) {
4189 tmp_reg = T9;
4190 assert_different_registers(tmp_reg, base_reg);
4191 move(tmp_reg, disp);
4192 daddu(tmp_reg, base_reg, tmp_reg);
4193 }
4194 code_offset = offset();
4195 load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
4196 }
4198 return code_offset;
4199 }
4201 void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
4202 switch (type) {
4203 case T_DOUBLE:
4204 ldc1(dst_reg, tmp_reg, disp);
4205 break;
4206 case T_FLOAT:
4207 lwc1(dst_reg, tmp_reg, disp);
4208 break;
4209 default:
4210 ShouldNotReachHere();
4211 }
4212 }
4214 int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
4215 int code_offset = 0;
4216 Register tmp_reg = T9;
4217 Register index_reg = addr.index();
4218 if (index_reg == NOREG) {
4219 tmp_reg = NOREG;
4220 }
4222 int scale = addr.scale();
4223 if (tmp_reg != NOREG && scale >= 0) {
4224 dsll(tmp_reg, index_reg, scale);
4225 }
4227 int disp = addr.disp();
4228 bool disp_is_simm16 = true;
4229 if (!Assembler::is_simm16(disp)) {
4230 disp_is_simm16 = false;
4231 }
4233 Register base_reg = addr.base();
4234 if (tmp_reg != NOREG) {
4235 assert_different_registers(tmp_reg, base_reg, index_reg);
4236 }
4238 if (tmp_reg != NOREG) {
4239 daddu(tmp_reg, base_reg, tmp_reg);
4240 if (!disp_is_simm16) {
4241 move(tmp_reg, disp);
4242 daddu(tmp_reg, base_reg, tmp_reg);
4243 }
4244 code_offset = offset();
4245 load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
4246 } else {
4247 if (!disp_is_simm16) {
4248 tmp_reg = T9;
4249 assert_different_registers(tmp_reg, base_reg);
4250 move(tmp_reg, disp);
4251 daddu(tmp_reg, base_reg, tmp_reg);
4252 }
4253 code_offset = offset();
4254 load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
4255 }
4257 return code_offset;
4258 }
4260 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
4261 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
4262 STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
4263 // The inverted mask is sign-extended
4264 move(AT, inverted_jweak_mask);
4265 andr(possibly_jweak, AT, possibly_jweak);
4266 }
4268 void MacroAssembler::resolve_jobject(Register value,
4269 Register thread,
4270 Register tmp) {
4271 assert_different_registers(value, thread, tmp);
4272 Label done, not_weak;
4273 beq(value, R0, done); // Use NULL as-is.
4274 delayed()->nop();
4275 move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
4276 andr(AT, value, AT);
4277 beq(AT, R0, not_weak);
4278 delayed()->nop();
4279 // Resolve jweak.
4280 ld(value, value, -JNIHandles::weak_tag_value);
4281 verify_oop(value);
4282 #if INCLUDE_ALL_GCS
4283 if (UseG1GC) {
4284 g1_write_barrier_pre(noreg /* obj */,
4285 value /* pre_val */,
4286 thread /* thread */,
4287 tmp /* tmp */,
4288 true /* tosca_live */,
4289 true /* expand_call */);
4290 }
4291 #endif // INCLUDE_ALL_GCS
4292 b(done);
4293 delayed()->nop();
4294 bind(not_weak);
4295 // Resolve (untagged) jobject.
4296 ld(value, value, 0);
4297 verify_oop(value);
4298 bind(done);
4299 }