Fri, 25 May 2018 14:48:32 +0800
#7046 added some notes
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, 2018, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 // Implementation of MacroAssembler
52 intptr_t MacroAssembler::i[32] = {0};
53 float MacroAssembler::f[32] = {0.0};
55 void MacroAssembler::print(outputStream *s) {
56 unsigned int k;
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
58 s->print_cr("i%d = 0x%.16lx", k, i[k]);
59 }
60 s->cr();
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
63 s->print_cr("f%d = %f", k, f[k]);
64 }
65 s->cr();
66 }
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
71 void MacroAssembler::save_registers(MacroAssembler *masm) {
72 #define __ masm->
73 for(int k=0; k<32; k++) {
74 __ sw (as_Register(k), A0, i_offset(k));
75 }
77 for(int k=0; k<32; k++) {
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
79 }
80 #undef __
81 }
83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
84 #define __ masm->
85 for(int k=0; k<32; k++) {
86 __ lw (as_Register(k), A0, i_offset(k));
87 }
89 for(int k=0; k<32; k++) {
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
91 }
92 #undef __
93 }
96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
97 jint& stub_inst = *(jint*) branch;
98 jint *pc = (jint *)branch;
100 if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
101 //b_far:
102 // move(AT, RA); // dadd
103 // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
104 // nop();
105 // lui(T9, 0); // to be patched
106 // ori(T9, 0);
107 // daddu(T9, T9, RA);
108 // move(RA, AT);
109 // jr(T9);
111 assert(opcode(pc[3]) == lui_op
112 && opcode(pc[4]) == ori_op
113 && special(pc[5]) == daddu_op, "Not a branch label patch");
114 if(!(opcode(pc[3]) == lui_op
115 && opcode(pc[4]) == ori_op
116 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
118 int offset = target - branch;
119 if (!is_simm16(offset)) {
120 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
121 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
122 } else {
123 /* revert to "beq + nop" */
124 CodeBuffer cb(branch, 4 * 10);
125 MacroAssembler masm(&cb);
126 #define __ masm.
127 __ b(target);
128 __ nop();
129 __ nop();
130 __ nop();
131 __ nop();
132 __ nop();
133 __ nop();
134 __ nop();
135 }
136 return;
137 } else if (special(pc[4]) == jr_op
138 && opcode(pc[4]) == special_op
139 && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
140 //jmp_far:
141 // patchable_set48(T9, target);
142 // jr(T9);
143 // nop();
145 CodeBuffer cb(branch, 4 * 4);
146 MacroAssembler masm(&cb);
147 masm.patchable_set48(T9, (long)(target));
148 return;
149 }
151 #ifndef PRODUCT
152 if (!is_simm16((target - branch - 4) >> 2)) {
153 tty->print_cr("Illegal patching: target=0x%lx", target);
154 int *p = (int *)branch;
155 for (int i = -10; i < 10; i++) {
156 tty->print("0x%lx, ", p[i]);
157 }
158 tty->print_cr("");
159 }
160 #endif
162 stub_inst = patched_branch(target - branch, stub_inst, 0);
163 }
165 static inline address first_cache_address() {
166 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
167 }
169 static inline address last_cache_address() {
170 return CodeCache::high_bound() - Assembler::InstructionSize;
171 }
173 int MacroAssembler::call_size(address target, bool far, bool patchable) {
174 if (patchable) return 6 << Assembler::LogInstructionSize;
175 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
176 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
177 }
179 // Can we reach target using jal/j from anywhere
180 // in the code cache (because code can be relocated)?
181 bool MacroAssembler::reachable_from_cache(address target) {
182 address cl = first_cache_address();
183 address ch = last_cache_address();
185 return fit_in_jal(target, cl) && fit_in_jal(target, ch);
186 }
188 void MacroAssembler::general_jump(address target) {
189 if (reachable_from_cache(target)) {
190 j(target);
191 nop();
192 } else {
193 set64(T9, (long)target);
194 jr(T9);
195 nop();
196 }
197 }
199 int MacroAssembler::insts_for_general_jump(address target) {
200 if (reachable_from_cache(target)) {
201 //j(target);
202 //nop();
203 return 2;
204 } else {
205 //set64(T9, (long)target);
206 //jr(T9);
207 //nop();
208 return insts_for_set64((jlong)target) + 2;
209 }
210 }
212 void MacroAssembler::patchable_jump(address target) {
213 if (reachable_from_cache(target)) {
214 nop();
215 nop();
216 nop();
217 nop();
218 j(target);
219 nop();
220 } else {
221 patchable_set48(T9, (long)target);
222 jr(T9);
223 nop();
224 }
225 }
227 int MacroAssembler::insts_for_patchable_jump(address target) {
228 return 6;
229 }
231 void MacroAssembler::general_call(address target) {
232 if (reachable_from_cache(target)) {
233 jal(target);
234 nop();
235 } else {
236 set64(T9, (long)target);
237 jalr(T9);
238 nop();
239 }
240 }
242 int MacroAssembler::insts_for_general_call(address target) {
243 if (reachable_from_cache(target)) {
244 //jal(target);
245 //nop();
246 return 2;
247 } else {
248 //set64(T9, (long)target);
249 //jalr(T9);
250 //nop();
251 return insts_for_set64((jlong)target) + 2;
252 }
253 }
255 void MacroAssembler::patchable_call(address target) {
256 if (reachable_from_cache(target)) {
257 nop();
258 nop();
259 nop();
260 nop();
261 jal(target);
262 nop();
263 } else {
264 patchable_set48(T9, (long)target);
265 jalr(T9);
266 nop();
267 }
268 }
270 int MacroAssembler::insts_for_patchable_call(address target) {
271 return 6;
272 }
274 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
275 u_char * cur_pc = pc();
277 /* Jin: Near/Far jump */
278 if(is_simm16((entry - pc() - 4) / 4)) {
279 Assembler::beq(rs, rt, offset(entry));
280 } else {
281 Label not_jump;
282 bne(rs, rt, not_jump);
283 delayed()->nop();
285 b_far(entry);
286 delayed()->nop();
288 bind(not_jump);
289 has_delay_slot();
290 }
291 }
293 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
294 if (L.is_bound()) {
295 beq_far(rs, rt, target(L));
296 } else {
297 u_char * cur_pc = pc();
298 Label not_jump;
299 bne(rs, rt, not_jump);
300 delayed()->nop();
302 b_far(L);
303 delayed()->nop();
305 bind(not_jump);
306 has_delay_slot();
307 }
308 }
310 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
311 u_char * cur_pc = pc();
313 /* Jin: Near/Far jump */
314 if(is_simm16((entry - pc() - 4) / 4)) {
315 Assembler::bne(rs, rt, offset(entry));
316 } else {
317 Label not_jump;
318 beq(rs, rt, not_jump);
319 delayed()->nop();
321 b_far(entry);
322 delayed()->nop();
324 bind(not_jump);
325 has_delay_slot();
326 }
327 }
329 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
330 if (L.is_bound()) {
331 bne_far(rs, rt, target(L));
332 } else {
333 u_char * cur_pc = pc();
334 Label not_jump;
335 beq(rs, rt, not_jump);
336 delayed()->nop();
338 b_far(L);
339 delayed()->nop();
341 bind(not_jump);
342 has_delay_slot();
343 }
344 }
346 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
347 Label not_taken;
349 bne(rs, rt, not_taken);
350 nop();
352 jmp_far(L);
354 bind(not_taken);
355 }
357 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
358 Label not_taken;
360 beq(rs, rt, not_taken);
361 nop();
363 jmp_far(L);
365 bind(not_taken);
366 }
368 void MacroAssembler::bc1t_long(Label& L) {
369 Label not_taken;
371 bc1f(not_taken);
372 nop();
374 jmp_far(L);
376 bind(not_taken);
377 }
379 void MacroAssembler::bc1f_long(Label& L) {
380 Label not_taken;
382 bc1t(not_taken);
383 nop();
385 jmp_far(L);
387 bind(not_taken);
388 }
390 void MacroAssembler::b_far(Label& L) {
391 if (L.is_bound()) {
392 b_far(target(L));
393 } else {
394 volatile address dest = target(L);
395 /*
396 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
397 0x00000055651ed514: dadd at, ra, zero
398 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
400 0x00000055651ed51c: sll zero, zero, 0
401 0x00000055651ed520: lui t9, 0x0
402 0x00000055651ed524: ori t9, t9, 0x21b8
403 0x00000055651ed528: daddu t9, t9, ra
404 0x00000055651ed52c: dadd ra, at, zero
405 0x00000055651ed530: jr t9
406 0x00000055651ed534: sll zero, zero, 0
407 */
408 move(AT, RA);
409 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
410 nop();
411 lui(T9, 0); // to be patched
412 ori(T9, T9, 0);
413 daddu(T9, T9, RA);
414 move(RA, AT);
415 jr(T9);
416 }
417 }
419 void MacroAssembler::b_far(address entry) {
420 u_char * cur_pc = pc();
422 /* Jin: Near/Far jump */
423 if(is_simm16((entry - pc() - 4) / 4)) {
424 b(offset(entry));
425 } else {
426 /* address must be bounded */
427 move(AT, RA);
428 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
429 nop();
430 li32(T9, entry - pc());
431 daddu(T9, T9, RA);
432 move(RA, AT);
433 jr(T9);
434 }
435 }
437 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
438 addu_long(AT, base, offset);
439 ld_ptr(rt, 0, AT);
440 }
442 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
443 addu_long(AT, base, offset);
444 st_ptr(rt, 0, AT);
445 }
447 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
448 addu_long(AT, base, offset);
449 ld_long(rt, 0, AT);
450 }
452 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
453 addu_long(AT, base, offset);
454 st_long(rt, 0, AT);
455 }
457 Address MacroAssembler::as_Address(AddressLiteral adr) {
458 return Address(adr.target(), adr.rspec());
459 }
461 Address MacroAssembler::as_Address(ArrayAddress adr) {
462 return Address::make_array(adr);
463 }
465 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
466 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
467 Label again;
469 li(tmp_reg1, counter_addr);
470 bind(again);
471 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
472 ll(tmp_reg2, tmp_reg1, 0);
473 addi(tmp_reg2, tmp_reg2, inc);
474 sc(tmp_reg2, tmp_reg1, 0);
475 beq(tmp_reg2, R0, again);
476 delayed()->nop();
477 }
479 int MacroAssembler::biased_locking_enter(Register lock_reg,
480 Register obj_reg,
481 Register swap_reg,
482 Register tmp_reg,
483 bool swap_reg_contains_mark,
484 Label& done,
485 Label* slow_case,
486 BiasedLockingCounters* counters) {
487 assert(UseBiasedLocking, "why call this otherwise?");
488 bool need_tmp_reg = false;
489 if (tmp_reg == noreg) {
490 need_tmp_reg = true;
491 tmp_reg = T9;
492 }
493 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
494 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
495 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
496 Address saved_mark_addr(lock_reg, 0);
498 // Biased locking
499 // See whether the lock is currently biased toward our thread and
500 // whether the epoch is still valid
501 // Note that the runtime guarantees sufficient alignment of JavaThread
502 // pointers to allow age to be placed into low bits
503 // First check to see whether biasing is even enabled for this object
504 Label cas_label;
505 int null_check_offset = -1;
506 if (!swap_reg_contains_mark) {
507 null_check_offset = offset();
508 ld_ptr(swap_reg, mark_addr);
509 }
511 if (need_tmp_reg) {
512 push(tmp_reg);
513 }
514 move(tmp_reg, swap_reg);
515 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
516 #ifdef _LP64
517 daddi(AT, R0, markOopDesc::biased_lock_pattern);
518 dsub(AT, AT, tmp_reg);
519 #else
520 addi(AT, R0, markOopDesc::biased_lock_pattern);
521 sub(AT, AT, tmp_reg);
522 #endif
523 if (need_tmp_reg) {
524 pop(tmp_reg);
525 }
527 bne(AT, R0, cas_label);
528 delayed()->nop();
531 // The bias pattern is present in the object's header. Need to check
532 // whether the bias owner and the epoch are both still current.
533 // Note that because there is no current thread register on MIPS we
534 // need to store off the mark word we read out of the object to
535 // avoid reloading it and needing to recheck invariants below. This
536 // store is unfortunate but it makes the overall code shorter and
537 // simpler.
538 st_ptr(swap_reg, saved_mark_addr);
539 if (need_tmp_reg) {
540 push(tmp_reg);
541 }
542 if (swap_reg_contains_mark) {
543 null_check_offset = offset();
544 }
545 load_prototype_header(tmp_reg, obj_reg);
546 xorr(tmp_reg, tmp_reg, swap_reg);
547 get_thread(swap_reg);
548 xorr(swap_reg, swap_reg, tmp_reg);
550 move(AT, ~((int) markOopDesc::age_mask_in_place));
551 andr(swap_reg, swap_reg, AT);
553 if (PrintBiasedLockingStatistics) {
554 Label L;
555 bne(swap_reg, R0, L);
556 delayed()->nop();
557 push(tmp_reg);
558 push(A0);
559 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
560 pop(A0);
561 pop(tmp_reg);
562 bind(L);
563 }
564 if (need_tmp_reg) {
565 pop(tmp_reg);
566 }
567 beq(swap_reg, R0, done);
568 delayed()->nop();
569 Label try_revoke_bias;
570 Label try_rebias;
572 // At this point we know that the header has the bias pattern and
573 // that we are not the bias owner in the current epoch. We need to
574 // figure out more details about the state of the header in order to
575 // know what operations can be legally performed on the object's
576 // header.
578 // If the low three bits in the xor result aren't clear, that means
579 // the prototype header is no longer biased and we have to revoke
580 // the bias on this object.
582 move(AT, markOopDesc::biased_lock_mask_in_place);
583 andr(AT, swap_reg, AT);
584 bne(AT, R0, try_revoke_bias);
585 delayed()->nop();
586 // Biasing is still enabled for this data type. See whether the
587 // epoch of the current bias is still valid, meaning that the epoch
588 // bits of the mark word are equal to the epoch bits of the
589 // prototype header. (Note that the prototype header's epoch bits
590 // only change at a safepoint.) If not, attempt to rebias the object
591 // toward the current thread. Note that we must be absolutely sure
592 // that the current epoch is invalid in order to do this because
593 // otherwise the manipulations it performs on the mark word are
594 // illegal.
596 move(AT, markOopDesc::epoch_mask_in_place);
597 andr(AT,swap_reg, AT);
598 bne(AT, R0, try_rebias);
599 delayed()->nop();
600 // The epoch of the current bias is still valid but we know nothing
601 // about the owner; it might be set or it might be clear. Try to
602 // acquire the bias of the object using an atomic operation. If this
603 // fails we will go in to the runtime to revoke the object's bias.
604 // Note that we first construct the presumed unbiased header so we
605 // don't accidentally blow away another thread's valid bias.
607 ld_ptr(swap_reg, saved_mark_addr);
609 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
610 andr(swap_reg, swap_reg, AT);
612 if (need_tmp_reg) {
613 push(tmp_reg);
614 }
615 get_thread(tmp_reg);
616 orr(tmp_reg, tmp_reg, swap_reg);
617 //if (os::is_MP()) {
618 // sync();
619 //}
620 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
621 if (need_tmp_reg) {
622 pop(tmp_reg);
623 }
624 // If the biasing toward our thread failed, this means that
625 // another thread succeeded in biasing it toward itself and we
626 // need to revoke that bias. The revocation will occur in the
627 // interpreter runtime in the slow case.
628 if (PrintBiasedLockingStatistics) {
629 Label L;
630 bne(AT, R0, L);
631 delayed()->nop();
632 push(tmp_reg);
633 push(A0);
634 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
635 pop(A0);
636 pop(tmp_reg);
637 bind(L);
638 }
639 if (slow_case != NULL) {
640 beq_far(AT, R0, *slow_case);
641 delayed()->nop();
642 }
643 b(done);
644 delayed()->nop();
646 bind(try_rebias);
647 // At this point we know the epoch has expired, meaning that the
648 // current "bias owner", if any, is actually invalid. Under these
649 // circumstances _only_, we are allowed to use the current header's
650 // value as the comparison value when doing the cas to acquire the
651 // bias in the current epoch. In other words, we allow transfer of
652 // the bias from one thread to another directly in this situation.
653 //
654 // FIXME: due to a lack of registers we currently blow away the age
655 // bits in this situation. Should attempt to preserve them.
656 if (need_tmp_reg) {
657 push(tmp_reg);
658 }
659 load_prototype_header(tmp_reg, obj_reg);
660 get_thread(swap_reg);
661 orr(tmp_reg, tmp_reg, swap_reg);
662 ld_ptr(swap_reg, saved_mark_addr);
664 //if (os::is_MP()) {
665 // sync();
666 //}
667 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
668 if (need_tmp_reg) {
669 pop(tmp_reg);
670 }
671 // If the biasing toward our thread failed, then another thread
672 // succeeded in biasing it toward itself and we need to revoke that
673 // bias. The revocation will occur in the runtime in the slow case.
674 if (PrintBiasedLockingStatistics) {
675 Label L;
676 bne(AT, R0, L);
677 delayed()->nop();
678 push(AT);
679 push(tmp_reg);
680 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
681 pop(tmp_reg);
682 pop(AT);
683 bind(L);
684 }
685 if (slow_case != NULL) {
686 beq_far(AT, R0, *slow_case);
687 delayed()->nop();
688 }
690 b(done);
691 delayed()->nop();
692 bind(try_revoke_bias);
693 // The prototype mark in the klass doesn't have the bias bit set any
694 // more, indicating that objects of this data type are not supposed
695 // to be biased any more. We are going to try to reset the mark of
696 // this object to the prototype value and fall through to the
697 // CAS-based locking scheme. Note that if our CAS fails, it means
698 // that another thread raced us for the privilege of revoking the
699 // bias of this particular object, so it's okay to continue in the
700 // normal locking code.
701 //
702 // FIXME: due to a lack of registers we currently blow away the age
703 // bits in this situation. Should attempt to preserve them.
704 ld_ptr(swap_reg, saved_mark_addr);
706 if (need_tmp_reg) {
707 push(tmp_reg);
708 }
709 load_prototype_header(tmp_reg, obj_reg);
710 //if (os::is_MP()) {
711 // lock();
712 //}
713 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
714 if (need_tmp_reg) {
715 pop(tmp_reg);
716 }
717 // Fall through to the normal CAS-based lock, because no matter what
718 // the result of the above CAS, some thread must have succeeded in
719 // removing the bias bit from the object's header.
720 if (PrintBiasedLockingStatistics) {
721 Label L;
722 bne(AT, R0, L);
723 delayed()->nop();
724 push(AT);
725 push(tmp_reg);
726 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
727 pop(tmp_reg);
728 pop(AT);
729 bind(L);
730 }
732 bind(cas_label);
733 return null_check_offset;
734 }
736 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
737 assert(UseBiasedLocking, "why call this otherwise?");
739 // Check for biased locking unlock case, which is a no-op
740 // Note: we do not have to check the thread ID for two reasons.
741 // First, the interpreter checks for IllegalMonitorStateException at
742 // a higher level. Second, if the bias was revoked while we held the
743 // lock, the object could not be rebiased toward another thread, so
744 // the bias bit would be clear.
745 #ifdef _LP64
746 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
747 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
748 daddi(AT, R0, markOopDesc::biased_lock_pattern);
749 #else
750 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
751 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
752 addi(AT, R0, markOopDesc::biased_lock_pattern);
753 #endif
755 beq(AT, temp_reg, done);
756 delayed()->nop();
757 }
759 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
760 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
761 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
762 Label L, E;
764 assert(number_of_arguments <= 4, "just check");
766 andi(AT, SP, 0xf);
767 beq(AT, R0, L);
768 delayed()->nop();
769 daddi(SP, SP, -8);
770 call(entry_point, relocInfo::runtime_call_type);
771 delayed()->nop();
772 daddi(SP, SP, 8);
773 b(E);
774 delayed()->nop();
776 bind(L);
777 call(entry_point, relocInfo::runtime_call_type);
778 delayed()->nop();
779 bind(E);
780 }
783 void MacroAssembler::jmp(address entry) {
784 patchable_set48(T9, (long)entry);
785 jr(T9);
786 }
788 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
789 switch (rtype) {
790 case relocInfo::runtime_call_type:
791 case relocInfo::none:
792 jmp(entry);
793 break;
794 default:
795 {
796 InstructionMark im(this);
797 relocate(rtype);
798 patchable_set48(T9, (long)entry);
799 jr(T9);
800 }
801 break;
802 }
803 }
805 void MacroAssembler::jmp_far(Label& L) {
806 if (L.is_bound()) {
807 address entry = target(L);
808 assert(entry != NULL, "jmp most probably wrong");
809 InstructionMark im(this);
811 relocate(relocInfo::internal_word_type);
812 patchable_set48(T9, (long)entry);
813 } else {
814 InstructionMark im(this);
815 L.add_patch_at(code(), locator());
817 relocate(relocInfo::internal_word_type);
818 patchable_set48(T9, (long)pc());
819 }
821 jr(T9);
822 nop();
823 }
824 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
825 int oop_index;
826 if (obj) {
827 oop_index = oop_recorder()->find_index(obj);
828 } else {
829 oop_index = oop_recorder()->allocate_metadata_index(obj);
830 }
831 relocate(metadata_Relocation::spec(oop_index));
832 patchable_set48(AT, (long)obj);
833 sd(AT, dst);
834 }
836 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
837 int oop_index;
838 if (obj) {
839 oop_index = oop_recorder()->find_index(obj);
840 } else {
841 oop_index = oop_recorder()->allocate_metadata_index(obj);
842 }
843 relocate(metadata_Relocation::spec(oop_index));
844 patchable_set48(dst, (long)obj);
845 }
847 void MacroAssembler::call(address entry) {
848 // c/c++ code assume T9 is entry point, so we just always move entry to t9
849 // maybe there is some more graceful method to handle this. FIXME
850 // For more info, see class NativeCall.
851 #ifndef _LP64
852 move(T9, (int)entry);
853 #else
854 patchable_set48(T9, (long)entry);
855 #endif
856 jalr(T9);
857 }
859 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
860 switch (rtype) {
861 case relocInfo::runtime_call_type:
862 case relocInfo::none:
863 call(entry);
864 break;
865 default:
866 {
867 InstructionMark im(this);
868 relocate(rtype);
869 call(entry);
870 }
871 break;
872 }
873 }
875 void MacroAssembler::call(address entry, RelocationHolder& rh)
876 {
877 switch (rh.type()) {
878 case relocInfo::runtime_call_type:
879 case relocInfo::none:
880 call(entry);
881 break;
882 default:
883 {
884 InstructionMark im(this);
885 relocate(rh);
886 call(entry);
887 }
888 break;
889 }
890 }
892 void MacroAssembler::ic_call(address entry) {
893 RelocationHolder rh = virtual_call_Relocation::spec(pc());
894 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
895 assert(entry != NULL, "call most probably wrong");
896 InstructionMark im(this);
897 relocate(rh);
898 patchable_call(entry);
899 }
901 void MacroAssembler::c2bool(Register r) {
902 Label L;
903 Assembler::beq(r, R0, L);
904 delayed()->nop();
905 move(r, 1);
906 bind(L);
907 }
909 #ifndef PRODUCT
910 extern "C" void findpc(intptr_t x);
911 #endif
913 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
914 // In order to get locks to work, we need to fake a in_VM state
915 JavaThread* thread = JavaThread::current();
916 JavaThreadState saved_state = thread->thread_state();
917 thread->set_thread_state(_thread_in_vm);
918 if (ShowMessageBoxOnError) {
919 JavaThread* thread = JavaThread::current();
920 JavaThreadState saved_state = thread->thread_state();
921 thread->set_thread_state(_thread_in_vm);
922 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
923 ttyLocker ttyl;
924 BytecodeCounter::print();
925 }
926 // To see where a verify_oop failed, get $ebx+40/X for this frame.
927 // This is the value of eip which points to where verify_oop will return.
928 if (os::message_box(msg, "Execution stopped, print registers?")) {
929 ttyLocker ttyl;
930 tty->print_cr("eip = 0x%08x", eip);
931 #ifndef PRODUCT
932 tty->cr();
933 findpc(eip);
934 tty->cr();
935 #endif
936 tty->print_cr("rax, = 0x%08x", rax);
937 tty->print_cr("rbx, = 0x%08x", rbx);
938 tty->print_cr("rcx = 0x%08x", rcx);
939 tty->print_cr("rdx = 0x%08x", rdx);
940 tty->print_cr("rdi = 0x%08x", rdi);
941 tty->print_cr("rsi = 0x%08x", rsi);
942 tty->print_cr("rbp, = 0x%08x", rbp);
943 tty->print_cr("rsp = 0x%08x", rsp);
944 BREAKPOINT;
945 }
946 } else {
947 ttyLocker ttyl;
948 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
949 assert(false, "DEBUG MESSAGE");
950 }
951 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
952 }
954 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
955 if ( ShowMessageBoxOnError ) {
956 JavaThreadState saved_state = JavaThread::current()->thread_state();
957 JavaThread::current()->set_thread_state(_thread_in_vm);
958 {
959 // In order to get locks work, we need to fake a in_VM state
960 ttyLocker ttyl;
961 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
962 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
963 BytecodeCounter::print();
964 }
966 // if (os::message_box(msg, "Execution stopped, print registers?"))
967 // regs->print(::tty);
968 }
969 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
970 }
971 else
972 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
973 }
976 void MacroAssembler::stop(const char* msg) {
977 li(A0, (long)msg);
978 #ifndef _LP64
979 //reserver space for argument. added by yjl 7/10/2005
980 addiu(SP, SP, - 1 * wordSize);
981 #endif
982 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
983 delayed()->nop();
984 #ifndef _LP64
985 //restore space for argument
986 addiu(SP, SP, 1 * wordSize);
987 #endif
988 brk(17);
989 }
991 void MacroAssembler::warn(const char* msg) {
992 #ifdef _LP64
993 pushad();
994 li(A0, (long)msg);
995 push(S2);
996 move(AT, -(StackAlignmentInBytes));
997 move(S2, SP); // use S2 as a sender SP holder
998 andr(SP, SP, AT); // align stack as required by ABI
999 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1000 delayed()->nop();
1001 move(SP, S2); // use S2 as a sender SP holder
1002 pop(S2);
1003 popad();
1004 #else
1005 pushad();
1006 addi(SP, SP, -4);
1007 sw(A0, SP, -1 * wordSize);
1008 li(A0, (long)msg);
1009 addi(SP, SP, -1 * wordSize);
1010 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1011 delayed()->nop();
1012 addi(SP, SP, 1 * wordSize);
1013 lw(A0, SP, -1 * wordSize);
1014 addi(SP, SP, 4);
1015 popad();
1016 #endif
1017 }
1019 void MacroAssembler::print_reg(Register reg) {
1020 /*
1021 char *s = getenv("PRINT_REG");
1022 if (s == NULL)
1023 return;
1024 if (strcmp(s, "1") != 0)
1025 return;
1026 */
1027 void * cur_pc = pc();
1028 pushad();
1029 NOT_LP64(push(FP);)
1031 li(A0, (long)reg->name());
1032 if (reg == SP)
1033 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
1034 else if (reg == A0)
1035 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
1036 else
1037 move(A1, reg);
1038 li(A2, (long)cur_pc);
1039 push(S2);
1040 move(AT, -(StackAlignmentInBytes));
1041 move(S2, SP); // use S2 as a sender SP holder
1042 andr(SP, SP, AT); // align stack as required by ABI
1043 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1044 delayed()->nop();
1045 move(SP, S2); // use S2 as a sender SP holder
1046 pop(S2);
1047 NOT_LP64(pop(FP);)
1048 popad();
1050 /*
1051 pushad();
1052 #ifdef _LP64
1053 if (reg == SP)
1054 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
1055 else
1056 move(A0, reg);
1057 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1058 delayed()->nop();
1059 #else
1060 push(FP);
1061 move(A0, reg);
1062 dsrl32(A1, reg, 0);
1063 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
1064 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1065 delayed()->nop();
1066 pop(FP);
1067 #endif
1068 popad();
1069 pushad();
1070 NOT_LP64(push(FP);)
1071 char b[50];
1072 sprintf((char *)b, " pc: %p\n",cur_pc);
1073 li(A0, (long)(char *)b);
1074 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1075 delayed()->nop();
1076 NOT_LP64(pop(FP);)
1077 popad();
1078 */
1079 }
1081 void MacroAssembler::print_reg(FloatRegister reg) {
1082 void * cur_pc = pc();
1083 pushad();
1084 NOT_LP64(push(FP);)
1085 li(A0, (long)reg->name());
1086 push(S2);
1087 move(AT, -(StackAlignmentInBytes));
1088 move(S2, SP); // use S2 as a sender SP holder
1089 andr(SP, SP, AT); // align stack as required by ABI
1090 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1091 delayed()->nop();
1092 move(SP, S2); // use S2 as a sender SP holder
1093 pop(S2);
1094 NOT_LP64(pop(FP);)
1095 popad();
1097 pushad();
1098 NOT_LP64(push(FP);)
1099 #if 1
1100 move(FP, SP);
1101 move(AT, -(StackAlignmentInBytes));
1102 andr(SP , SP , AT);
1103 mov_d(F12, reg);
1104 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1105 delayed()->nop();
1106 move(SP, FP);
1107 #else
1108 mov_s(F12, reg);
1109 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1110 //delayed()->nop();
1111 #endif
1112 NOT_LP64(pop(FP);)
1113 popad();
1115 #if 0
1116 pushad();
1117 NOT_LP64(push(FP);)
1118 char* b = new char[50];
1119 sprintf(b, " pc: %p\n", cur_pc);
1120 li(A0, (long)b);
1121 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1122 delayed()->nop();
1123 NOT_LP64(pop(FP);)
1124 popad();
1125 #endif
1126 }
1128 void MacroAssembler::increment(Register reg, int imm) {
1129 if (!imm) return;
1130 if (is_simm16(imm)) {
1131 #ifdef _LP64
1132 daddiu(reg, reg, imm);
1133 #else
1134 addiu(reg, reg, imm);
1135 #endif
1136 } else {
1137 move(AT, imm);
1138 #ifdef _LP64
1139 daddu(reg, reg, AT);
1140 #else
1141 addu(reg, reg, AT);
1142 #endif
1143 }
1144 }
1146 void MacroAssembler::decrement(Register reg, int imm) {
1147 increment(reg, -imm);
1148 }
1151 void MacroAssembler::call_VM(Register oop_result,
1152 address entry_point,
1153 bool check_exceptions) {
1154 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1155 }
1157 void MacroAssembler::call_VM(Register oop_result,
1158 address entry_point,
1159 Register arg_1,
1160 bool check_exceptions) {
1161 if (arg_1!=A1) move(A1, arg_1);
1162 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1163 }
1165 void MacroAssembler::call_VM(Register oop_result,
1166 address entry_point,
1167 Register arg_1,
1168 Register arg_2,
1169 bool check_exceptions) {
1170 if (arg_1!=A1) move(A1, arg_1);
1171 if (arg_2!=A2) move(A2, arg_2);
1172 assert(arg_2 != A1, "smashed argument");
1173 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1174 }
1176 void MacroAssembler::call_VM(Register oop_result,
1177 address entry_point,
1178 Register arg_1,
1179 Register arg_2,
1180 Register arg_3,
1181 bool check_exceptions) {
1182 if (arg_1!=A1) move(A1, arg_1);
1183 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1184 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1185 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1186 }
1188 void MacroAssembler::call_VM(Register oop_result,
1189 Register last_java_sp,
1190 address entry_point,
1191 int number_of_arguments,
1192 bool check_exceptions) {
1193 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1194 }
1196 void MacroAssembler::call_VM(Register oop_result,
1197 Register last_java_sp,
1198 address entry_point,
1199 Register arg_1,
1200 bool check_exceptions) {
1201 if (arg_1 != A1) move(A1, arg_1);
1202 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1203 }
1205 void MacroAssembler::call_VM(Register oop_result,
1206 Register last_java_sp,
1207 address entry_point,
1208 Register arg_1,
1209 Register arg_2,
1210 bool check_exceptions) {
1211 if (arg_1 != A1) move(A1, arg_1);
1212 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1213 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1214 }
1216 void MacroAssembler::call_VM(Register oop_result,
1217 Register last_java_sp,
1218 address entry_point,
1219 Register arg_1,
1220 Register arg_2,
1221 Register arg_3,
1222 bool check_exceptions) {
1223 if (arg_1 != A1) move(A1, arg_1);
1224 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1225 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1226 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1227 }
1229 void MacroAssembler::call_VM_base(Register oop_result,
1230 Register java_thread,
1231 Register last_java_sp,
1232 address entry_point,
1233 int number_of_arguments,
1234 bool check_exceptions) {
1236 address before_call_pc;
1237 // determine java_thread register
1238 if (!java_thread->is_valid()) {
1239 #ifndef OPT_THREAD
1240 java_thread = T2;
1241 get_thread(java_thread);
1242 #else
1243 java_thread = TREG;
1244 #endif
1245 }
1246 // determine last_java_sp register
1247 if (!last_java_sp->is_valid()) {
1248 last_java_sp = SP;
1249 }
1250 // debugging support
1251 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1252 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1253 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1254 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1256 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1258 // set last Java frame before call
1259 before_call_pc = (address)pc();
1260 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1262 // do the call
1263 move(A0, java_thread);
1264 call(entry_point, relocInfo::runtime_call_type);
1265 delayed()->nop();
1267 // restore the thread (cannot use the pushed argument since arguments
1268 // may be overwritten by C code generated by an optimizing compiler);
1269 // however can use the register value directly if it is callee saved.
1270 #ifndef OPT_THREAD
1271 get_thread(java_thread);
1272 #else
1273 #ifdef ASSERT
1274 {
1275 Label L;
1276 get_thread(AT);
1277 beq(java_thread, AT, L);
1278 delayed()->nop();
1279 stop("MacroAssembler::call_VM_base: TREG not callee saved?");
1280 bind(L);
1281 }
1282 #endif
1283 #endif
1285 // discard thread and arguments
1286 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1287 // reset last Java frame
1288 reset_last_Java_frame(java_thread, false, true);
1290 check_and_handle_popframe(java_thread);
1291 check_and_handle_earlyret(java_thread);
1292 if (check_exceptions) {
1293 // check for pending exceptions (java_thread is set upon return)
1294 Label L;
1295 #ifdef _LP64
1296 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1297 #else
1298 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1299 #endif
1300 beq(AT, R0, L);
1301 delayed()->nop();
1302 li(AT, before_call_pc);
1303 push(AT);
1304 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1305 delayed()->nop();
1306 bind(L);
1307 }
1309 // get oop result if there is one and reset the value in the thread
1310 if (oop_result->is_valid()) {
1311 #ifdef _LP64
1312 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1313 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1314 #else
1315 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1316 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1317 #endif
1318 verify_oop(oop_result);
1319 }
1320 }
1322 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1324 move(V0, SP);
1325 //we also reserve space for java_thread here
1326 #ifndef _LP64
1327 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1328 #endif
1329 move(AT, -(StackAlignmentInBytes));
1330 andr(SP, SP, AT);
1331 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1333 }
1335 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1336 call_VM_leaf_base(entry_point, number_of_arguments);
1337 }
1339 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1340 if (arg_0 != A0) move(A0, arg_0);
1341 call_VM_leaf(entry_point, 1);
1342 }
1344 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1345 if (arg_0 != A0) move(A0, arg_0);
1346 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1347 call_VM_leaf(entry_point, 2);
1348 }
1350 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1351 if (arg_0 != A0) move(A0, arg_0);
1352 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1353 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1354 call_VM_leaf(entry_point, 3);
1355 }
1356 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1357 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1358 }
1361 void MacroAssembler::super_call_VM_leaf(address entry_point,
1362 Register arg_1) {
1363 if (arg_1 != A0) move(A0, arg_1);
1364 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1365 }
1368 void MacroAssembler::super_call_VM_leaf(address entry_point,
1369 Register arg_1,
1370 Register arg_2) {
1371 if (arg_1 != A0) move(A0, arg_1);
1372 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1373 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1374 }
1375 void MacroAssembler::super_call_VM_leaf(address entry_point,
1376 Register arg_1,
1377 Register arg_2,
1378 Register arg_3) {
1379 if (arg_1 != A0) move(A0, arg_1);
1380 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1381 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1382 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1383 }
1385 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1386 }
1388 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1389 }
1391 void MacroAssembler::null_check(Register reg, int offset) {
1392 if (needs_explicit_null_check(offset)) {
1393 // provoke OS NULL exception if reg = NULL by
1394 // accessing M[reg] w/o changing any (non-CC) registers
1395 // NOTE: cmpl is plenty here to provoke a segv
1396 lw(AT, reg, 0);
1397 // Note: should probably use testl(rax, Address(reg, 0));
1398 // may be shorter code (however, this version of
1399 // testl needs to be implemented first)
1400 } else {
1401 // nothing to do, (later) access of M[reg + offset]
1402 // will provoke OS NULL exception if reg = NULL
1403 }
1404 }
1406 void MacroAssembler::enter() {
1407 push2(RA, FP);
1408 move(FP, SP);
1409 }
1411 void MacroAssembler::leave() {
1412 #ifndef _LP64
1413 //move(SP, FP);
1414 //pop2(FP, RA);
1415 addi(SP, FP, 2 * wordSize);
1416 lw(RA, SP, - 1 * wordSize);
1417 lw(FP, SP, - 2 * wordSize);
1418 #else
1419 daddi(SP, FP, 2 * wordSize);
1420 ld(RA, SP, - 1 * wordSize);
1421 ld(FP, SP, - 2 * wordSize);
1422 #endif
1423 }
1424 /*
1425 void MacroAssembler::os_breakpoint() {
1426 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1427 // (e.g., MSVC can't call ps() otherwise)
1428 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1429 }
1430 */
1431 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1432 // determine java_thread register
1433 if (!java_thread->is_valid()) {
1434 #ifndef OPT_THREAD
1435 java_thread = T1;
1436 get_thread(java_thread);
1437 #else
1438 java_thread = TREG;
1439 #endif
1440 }
1441 // we must set sp to zero to clear frame
1442 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1443 // must clear fp, so that compiled frames are not confused; it is possible
1444 // that we need it only for debugging
1445 if(clear_fp)
1446 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1448 if (clear_pc)
1449 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1450 }
1452 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1453 bool clear_pc) {
1454 Register thread = TREG;
1455 #ifndef OPT_THREAD
1456 get_thread(thread);
1457 #endif
1458 // we must set sp to zero to clear frame
1459 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1460 // must clear fp, so that compiled frames are not confused; it is
1461 // possible that we need it only for debugging
1462 if (clear_fp) {
1463 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1464 }
1466 if (clear_pc) {
1467 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1468 }
1469 }
1471 // Write serialization page so VM thread can do a pseudo remote membar.
1472 // We use the current thread pointer to calculate a thread specific
1473 // offset to write to within the page. This minimizes bus traffic
1474 // due to cache line collision.
1475 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1476 move(tmp, thread);
1477 srl(tmp, tmp,os::get_serialize_page_shift_count());
1478 move(AT, (os::vm_page_size() - sizeof(int)));
1479 andr(tmp, tmp,AT);
1480 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1481 }
1483 // Calls to C land
1484 //
1485 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1486 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1487 // has to be reset to 0. This is required to allow proper stack traversal.
1488 void MacroAssembler::set_last_Java_frame(Register java_thread,
1489 Register last_java_sp,
1490 Register last_java_fp,
1491 address last_java_pc) {
1492 // determine java_thread register
1493 if (!java_thread->is_valid()) {
1494 #ifndef OPT_THREAD
1495 java_thread = T2;
1496 get_thread(java_thread);
1497 #else
1498 java_thread = TREG;
1499 #endif
1500 }
1501 // determine last_java_sp register
1502 if (!last_java_sp->is_valid()) {
1503 last_java_sp = SP;
1504 }
1506 // last_java_fp is optional
1508 if (last_java_fp->is_valid()) {
1509 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1510 }
1512 // last_java_pc is optional
1514 if (last_java_pc != NULL) {
1515 relocate(relocInfo::internal_pc_type);
1516 patchable_set48(AT, (long)last_java_pc);
1517 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1518 }
1519 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1520 }
1522 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1523 Register last_java_fp,
1524 address last_java_pc) {
1525 // determine last_java_sp register
1526 if (!last_java_sp->is_valid()) {
1527 last_java_sp = SP;
1528 }
1530 Register thread = TREG;
1531 #ifndef OPT_THREAD
1532 get_thread(thread);
1533 #endif
1534 // last_java_fp is optional
1535 if (last_java_fp->is_valid()) {
1536 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1537 }
1539 // last_java_pc is optional
1540 if (last_java_pc != NULL) {
1541 Address java_pc(thread,
1542 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1543 li(AT, (intptr_t)(last_java_pc));
1544 sd(AT, java_pc);
1545 }
1547 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1548 }
1550 //////////////////////////////////////////////////////////////////////////////////
1551 #if INCLUDE_ALL_GCS
1553 void MacroAssembler::g1_write_barrier_pre(Register obj,
1554 Register pre_val,
1555 Register thread,
1556 Register tmp,
1557 bool tosca_live,
1558 bool expand_call) {
1560 // If expand_call is true then we expand the call_VM_leaf macro
1561 // directly to skip generating the check by
1562 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1564 #ifdef _LP64
1565 assert(thread == TREG, "must be");
1566 #endif // _LP64
1568 Label done;
1569 Label runtime;
1571 assert(pre_val != noreg, "check this code");
1573 if (obj != noreg) {
1574 assert_different_registers(obj, pre_val, tmp);
1575 assert(pre_val != V0, "check this code");
1576 }
1578 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1579 PtrQueue::byte_offset_of_active()));
1580 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1581 PtrQueue::byte_offset_of_index()));
1582 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1583 PtrQueue::byte_offset_of_buf()));
1586 // Is marking active?
1587 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1588 lw(AT, in_progress);
1589 } else {
1590 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1591 lb(AT, in_progress);
1592 }
1593 beq(AT, R0, done);
1594 nop();
1596 // Do we need to load the previous value?
1597 if (obj != noreg) {
1598 load_heap_oop(pre_val, Address(obj, 0));
1599 }
1601 // Is the previous value null?
1602 beq(pre_val, R0, done);
1603 nop();
1605 // Can we store original value in the thread's buffer?
1606 // Is index == 0?
1607 // (The index field is typed as size_t.)
1609 ld(tmp, index);
1610 beq(tmp, R0, runtime);
1611 nop();
1613 daddiu(tmp, tmp, -1 * wordSize);
1614 sd(tmp, index);
1615 ld(AT, buffer);
1616 daddu(tmp, tmp, AT);
1618 // Record the previous value
1619 sd(pre_val, tmp, 0);
1620 beq(R0, R0, done);
1621 nop();
1623 bind(runtime);
1624 // save the live input values
1625 if (tosca_live) push(V0);
1627 if (obj != noreg && obj != V0) push(obj);
1629 if (pre_val != V0) push(pre_val);
1631 // Calling the runtime using the regular call_VM_leaf mechanism generates
1632 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1633 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
1634 //
1635 // If we care generating the pre-barrier without a frame (e.g. in the
1636 // intrinsified Reference.get() routine) then ebp might be pointing to
1637 // the caller frame and so this check will most likely fail at runtime.
1638 //
1639 // Expanding the call directly bypasses the generation of the check.
1640 // So when we do not have have a full interpreter frame on the stack
1641 // expand_call should be passed true.
1643 NOT_LP64( push(thread); )
1645 if (expand_call) {
1646 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1647 if (thread != A1) move(A1, thread);
1648 if (pre_val != A0) move(A0, pre_val);
1649 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1650 } else {
1651 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1652 }
1654 NOT_LP64( pop(thread); )
1656 // save the live input values
1657 if (pre_val != V0)
1658 pop(pre_val);
1660 if (obj != noreg && obj != V0)
1661 pop(obj);
1663 if(tosca_live) pop(V0);
1665 bind(done);
1666 }
1668 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1669 Register new_val,
1670 Register thread,
1671 Register tmp,
1672 Register tmp2) {
1673 assert(tmp != AT, "must be");
1674 assert(tmp2 != AT, "must be");
1675 #ifdef _LP64
1676 assert(thread == TREG, "must be");
1677 #endif // _LP64
1679 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1680 PtrQueue::byte_offset_of_index()));
1681 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1682 PtrQueue::byte_offset_of_buf()));
1684 BarrierSet* bs = Universe::heap()->barrier_set();
1685 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1686 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1688 Label done;
1689 Label runtime;
1691 // Does store cross heap regions?
1692 xorr(AT, store_addr, new_val);
1693 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1694 beq(AT, R0, done);
1695 nop();
1698 // crosses regions, storing NULL?
1699 beq(new_val, R0, done);
1700 nop();
1702 // storing region crossing non-NULL, is card already dirty?
1703 const Register card_addr = tmp;
1704 const Register cardtable = tmp2;
1706 move(card_addr, store_addr);
1707 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1708 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1709 // a valid address and therefore is not properly handled by the relocation code.
1710 set64(cardtable, (intptr_t)ct->byte_map_base);
1711 daddu(card_addr, card_addr, cardtable);
1713 lb(AT, card_addr, 0);
1714 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1715 beq(AT, R0, done);
1716 nop();
1718 sync();
1719 lb(AT, card_addr, 0);
1720 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1721 beq(AT, R0, done);
1722 nop();
1725 // storing a region crossing, non-NULL oop, card is clean.
1726 // dirty card and log.
1727 move(AT, (int)CardTableModRefBS::dirty_card_val());
1728 sb(AT, card_addr, 0);
1730 lw(AT, queue_index);
1731 beq(AT, R0, runtime);
1732 nop();
1733 daddiu(AT, AT, -1 * wordSize);
1734 sw(AT, queue_index);
1735 ld(tmp2, buffer);
1736 #ifdef _LP64
1737 ld(AT, queue_index);
1738 daddu(tmp2, tmp2, AT);
1739 sd(card_addr, tmp2, 0);
1740 #else
1741 lw(AT, queue_index);
1742 addu32(tmp2, tmp2, AT);
1743 sw(card_addr, tmp2, 0);
1744 #endif
1745 beq(R0, R0, done);
1746 nop();
1748 bind(runtime);
1749 // save the live input values
1750 push(store_addr);
1751 push(new_val);
1752 #ifdef _LP64
1753 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1754 #else
1755 push(thread);
1756 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1757 pop(thread);
1758 #endif
1759 pop(new_val);
1760 pop(store_addr);
1762 bind(done);
1763 }
1765 #endif // INCLUDE_ALL_GCS
1766 //////////////////////////////////////////////////////////////////////////////////
1769 void MacroAssembler::store_check(Register obj) {
1770 // Does a store check for the oop in register obj. The content of
1771 // register obj is destroyed afterwards.
1772 store_check_part_1(obj);
1773 store_check_part_2(obj);
1774 }
1776 void MacroAssembler::store_check(Register obj, Address dst) {
1777 store_check(obj);
1778 }
1781 // split the store check operation so that other instructions can be scheduled inbetween
1782 void MacroAssembler::store_check_part_1(Register obj) {
1783 BarrierSet* bs = Universe::heap()->barrier_set();
1784 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1785 #ifdef _LP64
1786 dsrl(obj, obj, CardTableModRefBS::card_shift);
1787 #else
1788 shr(obj, CardTableModRefBS::card_shift);
1789 #endif
1790 }
1792 void MacroAssembler::store_check_part_2(Register obj) {
1793 BarrierSet* bs = Universe::heap()->barrier_set();
1794 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1795 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1796 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1798 set64(AT, (long)ct->byte_map_base);
1799 #ifdef _LP64
1800 dadd(AT, AT, obj);
1801 #else
1802 add(AT, AT, obj);
1803 #endif
1804 if (UseConcMarkSweepGC) sync();
1805 sb(R0, AT, 0);
1806 }
1808 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1809 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1810 Register t1, Register t2, Label& slow_case) {
1811 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1813 Register end = t2;
1814 #ifndef OPT_THREAD
1815 Register thread = t1;
1816 get_thread(thread);
1817 #else
1818 Register thread = TREG;
1819 #endif
1820 verify_tlab(t1, t2);//blows t1&t2
1822 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1824 if (var_size_in_bytes == NOREG) {
1825 // i dont think we need move con_size_in_bytes to a register first.
1826 // by yjl 8/17/2005
1827 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1828 addi(end, obj, con_size_in_bytes);
1829 } else {
1830 add(end, obj, var_size_in_bytes);
1831 }
1833 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1834 sltu(AT, AT, end);
1835 bne_far(AT, R0, slow_case);
1836 delayed()->nop();
1839 // update the tlab top pointer
1840 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1842 // recover var_size_in_bytes if necessary
1843 /*if (var_size_in_bytes == end) {
1844 sub(var_size_in_bytes, end, obj);
1845 }*/
1847 verify_tlab(t1, t2);
1848 }
1850 // Defines obj, preserves var_size_in_bytes
1851 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1852 Register t1, Register t2, Label& slow_case) {
1853 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1854 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1855 // No allocation in the shared eden.
1856 b_far(slow_case);
1857 delayed()->nop();
1858 } else {
1860 #ifndef _LP64
1861 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1862 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1863 #else
1864 Address heap_top(t1);
1865 li(t1, (long)Universe::heap()->top_addr());
1866 #endif
1867 ld_ptr(obj, heap_top);
1869 Register end = t2;
1870 Label retry;
1872 bind(retry);
1873 if (var_size_in_bytes == NOREG) {
1874 // i dont think we need move con_size_in_bytes to a register first.
1875 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1876 addi(end, obj, con_size_in_bytes);
1877 } else {
1878 add(end, obj, var_size_in_bytes);
1879 }
1880 // if end < obj then we wrapped around => object too long => slow case
1881 sltu(AT, end, obj);
1882 bne_far(AT, R0, slow_case);
1883 delayed()->nop();
1885 li(AT, (long)Universe::heap()->end_addr());
1886 sltu(AT, AT, end);
1887 bne_far(AT, R0, slow_case);
1888 delayed()->nop();
1889 // Compare obj with the top addr, and if still equal, store the new top addr in
1890 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1891 // it otherwise. Use lock prefix for atomicity on MPs.
1892 //if (os::is_MP()) {
1893 // sync();
1894 //}
1896 // if someone beat us on the allocation, try again, otherwise continue
1897 cmpxchg(end, heap_top, obj);
1898 beq_far(AT, R0, retry); //by yyq
1899 delayed()->nop();
1901 }
1902 }
1904 // C2 doesn't invoke this one.
1905 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1906 Register top = T0;
1907 Register t1 = T1;
1908 /* Jin: tlab_refill() is called in
1910 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
1912 In generate_code_for(), T2 has been assigned as a register(length), which is used
1913 after calling tlab_refill();
1914 Therefore, tlab_refill() should not use T2.
1916 Source:
1918 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
1919 at java.lang.System.arraycopy(Native Method)
1920 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
1921 at sun.misc.Resource.getBytes(Resource.java:117)
1922 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
1923 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
1924 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
1925 */
1926 Register t2 = T9;
1927 Register t3 = T3;
1928 Register thread_reg = T8;
1929 Label do_refill, discard_tlab;
1930 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1931 // No allocation in the shared eden.
1932 b(slow_case);
1933 delayed()->nop();
1934 }
1936 get_thread(thread_reg);
1938 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1939 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1941 // calculate amount of free space
1942 sub(t1, t1, top);
1943 shr(t1, LogHeapWordSize);
1945 // Retain tlab and allocate object in shared space if
1946 // the amount free in the tlab is too large to discard.
1947 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1948 slt(AT, t2, t1);
1949 beq(AT, R0, discard_tlab);
1950 delayed()->nop();
1952 // Retain
1954 #ifndef _LP64
1955 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1956 #else
1957 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1958 #endif
1959 add(t2, t2, AT);
1960 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1962 if (TLABStats) {
1963 // increment number of slow_allocations
1964 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1965 addiu(AT, AT, 1);
1966 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1967 }
1968 b(try_eden);
1969 delayed()->nop();
1971 bind(discard_tlab);
1972 if (TLABStats) {
1973 // increment number of refills
1974 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1975 addi(AT, AT, 1);
1976 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1977 // accumulate wastage -- t1 is amount free in tlab
1978 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1979 add(AT, AT, t1);
1980 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1981 }
1983 // if tlab is currently allocated (top or end != null) then
1984 // fill [top, end + alignment_reserve) with array object
1985 beq(top, R0, do_refill);
1986 delayed()->nop();
1988 // set up the mark word
1989 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1990 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1992 // set the length to the remaining space
1993 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1994 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1995 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1996 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1998 // set klass to intArrayKlass
1999 #ifndef _LP64
2000 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
2001 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
2002 #else
2003 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
2004 ld_ptr(t1, AT, 0);
2005 #endif
2006 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
2007 store_klass(top, t1);
2009 // refill the tlab with an eden allocation
2010 bind(do_refill);
2011 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2012 shl(t1, LogHeapWordSize);
2013 // add object_size ??
2014 eden_allocate(top, t1, 0, t2, t3, slow_case);
2016 // Check that t1 was preserved in eden_allocate.
2017 #ifdef ASSERT
2018 if (UseTLAB) {
2019 Label ok;
2020 assert_different_registers(thread_reg, t1);
2021 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2022 shl(AT, LogHeapWordSize);
2023 beq(AT, t1, ok);
2024 delayed()->nop();
2025 stop("assert(t1 != tlab size)");
2026 should_not_reach_here();
2028 bind(ok);
2029 }
2030 #endif
2031 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
2032 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2033 add(top, top, t1);
2034 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
2035 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2036 verify_tlab(t1, t2);
2037 b(retry);
2038 delayed()->nop();
2039 }
2041 static const double pi_4 = 0.7853981633974483;
2043 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
2044 // must get argument(a double) in F12/F13
2045 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
2046 //We need to preseve the register which maybe modified during the Call @Jerome
2047 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
2048 //save all modified register here
2049 // if (preserve_cpu_regs) {
2050 // }
2051 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
2052 pushad();
2053 //we should preserve the stack space before we call
2054 addi(SP, SP, -wordSize * 2);
2055 switch (trig){
2056 case 's' :
2057 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
2058 delayed()->nop();
2059 break;
2060 case 'c':
2061 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
2062 delayed()->nop();
2063 break;
2064 case 't':
2065 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
2066 delayed()->nop();
2067 break;
2068 default:assert (false, "bad intrinsic");
2069 break;
2071 }
2073 addi(SP, SP, wordSize * 2);
2074 popad();
2075 // if (preserve_cpu_regs) {
2076 // }
2077 }
2079 #ifdef _LP64
2080 void MacroAssembler::li(Register rd, long imm) {
2081 if (imm <= max_jint && imm >= min_jint) {
2082 li32(rd, (int)imm);
2083 } else if (julong(imm) <= 0xFFFFFFFF) {
2084 assert_not_delayed();
2085 // lui sign-extends, so we can't use that.
2086 ori(rd, R0, julong(imm) >> 16);
2087 dsll(rd, rd, 16);
2088 ori(rd, rd, split_low(imm));
2089 //aoqi_test
2090 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
2091 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2092 /* A 48-bit address */
2093 li48(rd, imm);
2094 } else {
2095 li64(rd, imm);
2096 }
2097 }
2098 #else
2099 void MacroAssembler::li(Register rd, long imm) {
2100 li32(rd, (int)imm);
2101 }
2102 #endif
2104 void MacroAssembler::li32(Register reg, int imm) {
2105 if (is_simm16(imm)) {
2106 /* Jin: for imm < 0, we should use addi instead of addiu.
2107 *
2108 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2109 *
2110 * 78 move [int:-1|I] [a0|I]
2111 * : daddi a0, zero, 0xffffffff (correct)
2112 * : daddiu a0, zero, 0xffffffff (incorrect)
2113 */
2114 if (imm >= 0)
2115 addiu(reg, R0, imm);
2116 else
2117 addi(reg, R0, imm);
2118 } else {
2119 lui(reg, split_low(imm >> 16));
2120 if (split_low(imm))
2121 ori(reg, reg, split_low(imm));
2122 }
2123 }
2125 #ifdef _LP64
2126 void MacroAssembler::set64(Register d, jlong value) {
2127 assert_not_delayed();
2129 int hi = (int)(value >> 32);
2130 int lo = (int)(value & ~0);
2132 if (value == lo) { // 32-bit integer
2133 if (is_simm16(value)) {
2134 daddiu(d, R0, value);
2135 } else {
2136 lui(d, split_low(value >> 16));
2137 if (split_low(value)) {
2138 ori(d, d, split_low(value));
2139 }
2140 }
2141 } else if (hi == 0) { // hardware zero-extends to upper 32
2142 ori(d, R0, julong(value) >> 16);
2143 dsll(d, d, 16);
2144 if (split_low(value)) {
2145 ori(d, d, split_low(value));
2146 }
2147 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2148 // 4 insts
2149 li48(d, value);
2150 } else { // li64
2151 // 6 insts
2152 li64(d, value);
2153 }
2154 }
2157 int MacroAssembler::insts_for_set64(jlong value) {
2158 int hi = (int)(value >> 32);
2159 int lo = (int)(value & ~0);
2161 int count = 0;
2163 if (value == lo) { // 32-bit integer
2164 if (is_simm16(value)) {
2165 //daddiu(d, R0, value);
2166 count++;
2167 } else {
2168 //lui(d, split_low(value >> 16));
2169 count++;
2170 if (split_low(value)) {
2171 //ori(d, d, split_low(value));
2172 count++;
2173 }
2174 }
2175 } else if (hi == 0) { // hardware zero-extends to upper 32
2176 //ori(d, R0, julong(value) >> 16);
2177 //dsll(d, d, 16);
2178 count += 2;
2179 if (split_low(value)) {
2180 //ori(d, d, split_low(value));
2181 count++;
2182 }
2183 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2184 // 4 insts
2185 //li48(d, value);
2186 count += 4;
2187 } else { // li64
2188 // 6 insts
2189 //li64(d, value);
2190 count += 6;
2191 }
2193 return count;
2194 }
2196 void MacroAssembler::patchable_set48(Register d, jlong value) {
2197 assert_not_delayed();
2199 int hi = (int)(value >> 32);
2200 int lo = (int)(value & ~0);
2202 int count = 0;
2204 if (value == lo) { // 32-bit integer
2205 if (is_simm16(value)) {
2206 daddiu(d, R0, value);
2207 count += 1;
2208 } else {
2209 lui(d, split_low(value >> 16));
2210 count += 1;
2211 if (split_low(value)) {
2212 ori(d, d, split_low(value));
2213 count += 1;
2214 }
2215 }
2216 } else if (hi == 0) { // hardware zero-extends to upper 32
2217 ori(d, R0, julong(value) >> 16);
2218 dsll(d, d, 16);
2219 count += 2;
2220 if (split_low(value)) {
2221 ori(d, d, split_low(value));
2222 count += 1;
2223 }
2224 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2225 // 4 insts
2226 li48(d, value);
2227 count += 4;
2228 } else { // li64
2229 tty->print_cr("value = 0x%x", value);
2230 guarantee(false, "Not supported yet !");
2231 }
2233 for (count; count < 4; count++) {
2234 nop();
2235 }
2236 }
2238 void MacroAssembler::patchable_set32(Register d, jlong value) {
2239 assert_not_delayed();
2241 int hi = (int)(value >> 32);
2242 int lo = (int)(value & ~0);
2244 int count = 0;
2246 if (value == lo) { // 32-bit integer
2247 if (is_simm16(value)) {
2248 daddiu(d, R0, value);
2249 count += 1;
2250 } else {
2251 lui(d, split_low(value >> 16));
2252 count += 1;
2253 if (split_low(value)) {
2254 ori(d, d, split_low(value));
2255 count += 1;
2256 }
2257 }
2258 } else if (hi == 0) { // hardware zero-extends to upper 32
2259 ori(d, R0, julong(value) >> 16);
2260 dsll(d, d, 16);
2261 count += 2;
2262 if (split_low(value)) {
2263 ori(d, d, split_low(value));
2264 count += 1;
2265 }
2266 } else {
2267 tty->print_cr("value = 0x%x", value);
2268 guarantee(false, "Not supported yet !");
2269 }
2271 for (count; count < 3; count++) {
2272 nop();
2273 }
2274 }
2276 void MacroAssembler::patchable_call32(Register d, jlong value) {
2277 assert_not_delayed();
2279 int hi = (int)(value >> 32);
2280 int lo = (int)(value & ~0);
2282 int count = 0;
2284 if (value == lo) { // 32-bit integer
2285 if (is_simm16(value)) {
2286 daddiu(d, R0, value);
2287 count += 1;
2288 } else {
2289 lui(d, split_low(value >> 16));
2290 count += 1;
2291 if (split_low(value)) {
2292 ori(d, d, split_low(value));
2293 count += 1;
2294 }
2295 }
2296 } else {
2297 tty->print_cr("value = 0x%x", value);
2298 guarantee(false, "Not supported yet !");
2299 }
2301 for (count; count < 2; count++) {
2302 nop();
2303 }
2304 }
2306 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2307 assert(UseCompressedClassPointers, "should only be used for compressed header");
2308 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2310 int klass_index = oop_recorder()->find_index(k);
2311 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2312 long narrowKlass = (long)Klass::encode_klass(k);
2314 relocate(rspec, Assembler::narrow_oop_operand);
2315 patchable_set48(dst, narrowKlass);
2316 }
2319 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2320 assert(UseCompressedOops, "should only be used for compressed header");
2321 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2323 int oop_index = oop_recorder()->find_index(obj);
2324 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2326 relocate(rspec, Assembler::narrow_oop_operand);
2327 patchable_set48(dst, oop_index);
2328 }
2330 void MacroAssembler::li64(Register rd, long imm) {
2331 assert_not_delayed();
2332 lui(rd, imm >> 48);
2333 ori(rd, rd, split_low(imm >> 32));
2334 dsll(rd, rd, 16);
2335 ori(rd, rd, split_low(imm >> 16));
2336 dsll(rd, rd, 16);
2337 ori(rd, rd, split_low(imm));
2338 }
2340 void MacroAssembler::li48(Register rd, long imm) {
2341 assert_not_delayed();
2342 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2343 lui(rd, imm >> 32);
2344 ori(rd, rd, split_low(imm >> 16));
2345 dsll(rd, rd, 16);
2346 ori(rd, rd, split_low(imm));
2347 }
2348 #endif
2349 // NOTE: i dont push eax as i486.
2350 // the x86 save eax for it use eax as the jump register
2351 void MacroAssembler::verify_oop(Register reg, const char* s) {
2352 /*
2353 if (!VerifyOops) return;
2355 // Pass register number to verify_oop_subroutine
2356 char* b = new char[strlen(s) + 50];
2357 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2358 push(rax); // save rax,
2359 push(reg); // pass register argument
2360 ExternalAddress buffer((address) b);
2361 // avoid using pushptr, as it modifies scratch registers
2362 // and our contract is not to modify anything
2363 movptr(rax, buffer.addr());
2364 push(rax);
2365 // call indirectly to solve generation ordering problem
2366 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2367 call(rax);
2368 */
2369 if (!VerifyOops) return;
2370 const char * b = NULL;
2371 stringStream ss;
2372 ss.print("verify_oop: %s: %s", reg->name(), s);
2373 b = code_string(ss.as_string());
2374 #ifdef _LP64
2375 pushad();
2376 move(A1, reg);
2377 li(A0, (long)b);
2378 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2379 ld(T9, AT, 0);
2380 jalr(T9);
2381 delayed()->nop();
2382 popad();
2383 #else
2384 // Pass register number to verify_oop_subroutine
2385 sw(T0, SP, - wordSize);
2386 sw(T1, SP, - 2*wordSize);
2387 sw(RA, SP, - 3*wordSize);
2388 sw(A0, SP ,- 4*wordSize);
2389 sw(A1, SP ,- 5*wordSize);
2390 sw(AT, SP ,- 6*wordSize);
2391 sw(T9, SP ,- 7*wordSize);
2392 addiu(SP, SP, - 7 * wordSize);
2393 move(A1, reg);
2394 li(A0, (long)b);
2395 // call indirectly to solve generation ordering problem
2396 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2397 lw(T9, AT, 0);
2398 jalr(T9);
2399 delayed()->nop();
2400 lw(T0, SP, 6* wordSize);
2401 lw(T1, SP, 5* wordSize);
2402 lw(RA, SP, 4* wordSize);
2403 lw(A0, SP, 3* wordSize);
2404 lw(A1, SP, 2* wordSize);
2405 lw(AT, SP, 1* wordSize);
2406 lw(T9, SP, 0* wordSize);
2407 addiu(SP, SP, 7 * wordSize);
2408 #endif
2409 }
2412 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2413 if (!VerifyOops) {
2414 nop();
2415 return;
2416 }
2417 // Pass register number to verify_oop_subroutine
2418 const char * b = NULL;
2419 stringStream ss;
2420 ss.print("verify_oop_addr: %s", s);
2421 b = code_string(ss.as_string());
2423 st_ptr(T0, SP, - wordSize);
2424 st_ptr(T1, SP, - 2*wordSize);
2425 st_ptr(RA, SP, - 3*wordSize);
2426 st_ptr(A0, SP, - 4*wordSize);
2427 st_ptr(A1, SP, - 5*wordSize);
2428 st_ptr(AT, SP, - 6*wordSize);
2429 st_ptr(T9, SP, - 7*wordSize);
2430 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2431 addiu(SP, SP, - 7 * wordSize);
2433 li(A0, (long)b);
2434 // call indirectly to solve generation ordering problem
2435 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2436 ld_ptr(T9, AT, 0);
2437 jalr(T9);
2438 delayed()->nop();
2439 ld_ptr(T0, SP, 6* wordSize);
2440 ld_ptr(T1, SP, 5* wordSize);
2441 ld_ptr(RA, SP, 4* wordSize);
2442 ld_ptr(A0, SP, 3* wordSize);
2443 ld_ptr(A1, SP, 2* wordSize);
2444 ld_ptr(AT, SP, 1* wordSize);
2445 ld_ptr(T9, SP, 0* wordSize);
2446 addiu(SP, SP, 7 * wordSize);
2447 }
2449 // used registers : T0, T1
2450 void MacroAssembler::verify_oop_subroutine() {
2451 // RA: ra
2452 // A0: char* error message
2453 // A1: oop object to verify
2455 Label exit, error;
2456 // increment counter
2457 li(T0, (long)StubRoutines::verify_oop_count_addr());
2458 lw(AT, T0, 0);
2459 #ifdef _LP64
2460 daddi(AT, AT, 1);
2461 #else
2462 addi(AT, AT, 1);
2463 #endif
2464 sw(AT, T0, 0);
2466 // make sure object is 'reasonable'
2467 beq(A1, R0, exit); // if obj is NULL it is ok
2468 delayed()->nop();
2470 // Check if the oop is in the right area of memory
2471 //const int oop_mask = Universe::verify_oop_mask();
2472 //const int oop_bits = Universe::verify_oop_bits();
2473 const uintptr_t oop_mask = Universe::verify_oop_mask();
2474 const uintptr_t oop_bits = Universe::verify_oop_bits();
2475 li(AT, oop_mask);
2476 andr(T0, A1, AT);
2477 li(AT, oop_bits);
2478 bne(T0, AT, error);
2479 delayed()->nop();
2481 // make sure klass is 'reasonable'
2482 //add for compressedoops
2483 reinit_heapbase();
2484 //add for compressedoops
2485 load_klass(T0, A1);
2486 beq(T0, R0, error); // if klass is NULL it is broken
2487 delayed()->nop();
2488 #if 0
2489 //FIXME:wuhui.
2490 // Check if the klass is in the right area of memory
2491 //const int klass_mask = Universe::verify_klass_mask();
2492 //const int klass_bits = Universe::verify_klass_bits();
2493 const uintptr_t klass_mask = Universe::verify_klass_mask();
2494 const uintptr_t klass_bits = Universe::verify_klass_bits();
2496 li(AT, klass_mask);
2497 andr(T1, T0, AT);
2498 li(AT, klass_bits);
2499 bne(T1, AT, error);
2500 delayed()->nop();
2501 // make sure klass' klass is 'reasonable'
2502 //add for compressedoops
2503 load_klass(T0, T0);
2504 beq(T0, R0, error); // if klass' klass is NULL it is broken
2505 delayed()->nop();
2507 li(AT, klass_mask);
2508 andr(T1, T0, AT);
2509 li(AT, klass_bits);
2510 bne(T1, AT, error);
2511 delayed()->nop(); // if klass not in right area of memory it is broken too.
2512 #endif
2513 // return if everything seems ok
2514 bind(exit);
2516 jr(RA);
2517 delayed()->nop();
2519 // handle errors
2520 bind(error);
2521 pushad();
2522 #ifndef _LP64
2523 addi(SP, SP, (-1) * wordSize);
2524 #endif
2525 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2526 delayed()->nop();
2527 #ifndef _LP64
2528 addiu(SP, SP, 1 * wordSize);
2529 #endif
2530 popad();
2531 jr(RA);
2532 delayed()->nop();
2533 }
2535 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2536 #ifdef ASSERT
2537 assert_different_registers(t1, t2, AT);
2538 if (UseTLAB && VerifyOops) {
2539 Label next, ok;
2541 get_thread(t1);
2543 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2544 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2545 sltu(AT, t2, AT);
2546 beq(AT, R0, next);
2547 delayed()->nop();
2549 stop("assert(top >= start)");
2551 bind(next);
2552 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2553 sltu(AT, AT, t2);
2554 beq(AT, R0, ok);
2555 delayed()->nop();
2557 stop("assert(top <= end)");
2559 bind(ok);
2561 }
2562 #endif
2563 }
2564 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2565 Register tmp,
2566 int offset) {
2567 intptr_t value = *delayed_value_addr;
2568 if (value != 0)
2569 return RegisterOrConstant(value + offset);
2570 AddressLiteral a(delayed_value_addr);
2571 // load indirectly to solve generation ordering problem
2572 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2573 //ld(tmp, a);
2574 if (offset != 0)
2575 daddi(tmp,tmp, offset);
2577 return RegisterOrConstant(tmp);
2578 }
2580 void MacroAssembler::hswap(Register reg) {
2581 //short
2582 //andi(reg, reg, 0xffff);
2583 srl(AT, reg, 8);
2584 sll(reg, reg, 24);
2585 sra(reg, reg, 16);
2586 orr(reg, reg, AT);
2587 }
2589 void MacroAssembler::huswap(Register reg) {
2590 #ifdef _LP64
2591 dsrl(AT, reg, 8);
2592 dsll(reg, reg, 24);
2593 dsrl(reg, reg, 16);
2594 orr(reg, reg, AT);
2595 andi(reg, reg, 0xffff);
2596 #else
2597 //andi(reg, reg, 0xffff);
2598 srl(AT, reg, 8);
2599 sll(reg, reg, 24);
2600 srl(reg, reg, 16);
2601 orr(reg, reg, AT);
2602 #endif
2603 }
2605 // something funny to do this will only one more register AT
2606 // 32 bits
2607 void MacroAssembler::swap(Register reg) {
2608 srl(AT, reg, 8);
2609 sll(reg, reg, 24);
2610 orr(reg, reg, AT);
2611 //reg : 4 1 2 3
2612 srl(AT, AT, 16);
2613 xorr(AT, AT, reg);
2614 andi(AT, AT, 0xff);
2615 //AT : 0 0 0 1^3);
2616 xorr(reg, reg, AT);
2617 //reg : 4 1 2 1
2618 sll(AT, AT, 16);
2619 xorr(reg, reg, AT);
2620 //reg : 4 3 2 1
2621 }
2623 #ifdef _LP64
2625 /* do 32-bit CAS using MIPS64 lld/scd
2627 Jin: cas_int should only compare 32-bits of the memory value.
2628 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2629 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2630 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2631 plus the high-32 bits or memory value, are stored togethor with SCD.
2633 Example:
2635 double d = 3.1415926;
2636 System.err.println("hello" + d);
2638 sun.misc.FloatingDecimal$1.<init>()
2639 |
2640 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2642 38 cas_int [a7a7|J] [a0|I] [a6|I]
2643 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2644 // a6: 0x4ab325aa
2646 again:
2647 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2649 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2650 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2651 0x00000055647f3c68: dsll32 t8, t8, 0
2652 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2653 0x00000055647f3c70: sll zero, zero, 0
2655 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2656 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2657 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2658 0x00000055647f3c80: and v1, a6, v1
2659 0x00000055647f3c84: or at, t8, v1
2660 0x00000055647f3c88: scd at, 0x0(a7)
2661 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2662 0x00000055647f3c90: sll zero, zero, 0
2663 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2664 0x00000055647f3c98: sll zero, zero, 0
2665 nequal:
2666 0x00000055647f45a4: dadd a0, t9, zero
2667 0x00000055647f45a8: dadd at, zero, zero
2668 done:
2669 */
2671 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2672 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
2673 Label done, again, nequal;
2675 bind(again);
2677 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2678 ll(AT, dest);
2679 bne(AT, c_reg, nequal);
2680 delayed()->nop();
2682 move(AT, x_reg);
2683 sc(AT, dest);
2684 beq(AT, R0, again);
2685 delayed()->nop();
2686 b(done);
2687 delayed()->nop();
2689 // not xchged
2690 bind(nequal);
2691 sync();
2692 move(c_reg, AT);
2693 move(AT, R0);
2695 bind(done);
2696 }
2697 #endif // cmpxchg32
2699 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2700 Label done, again, nequal;
2702 bind(again);
2703 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2704 #ifdef _LP64
2705 lld(AT, dest);
2706 #else
2707 ll(AT, dest);
2708 #endif
2709 bne(AT, c_reg, nequal);
2710 delayed()->nop();
2712 move(AT, x_reg);
2713 #ifdef _LP64
2714 scd(AT, dest);
2715 #else
2716 sc(AT, dest);
2717 #endif
2718 beq(AT, R0, again);
2719 delayed()->nop();
2720 b(done);
2721 delayed()->nop();
2723 // not xchged
2724 bind(nequal);
2725 sync();
2726 move(c_reg, AT);
2727 move(AT, R0);
2729 bind(done);
2730 }
2732 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2733 Label done, again, nequal;
2735 Register x_reg = x_regLo;
2736 dsll32(x_regHi, x_regHi, 0);
2737 dsll32(x_regLo, x_regLo, 0);
2738 dsrl32(x_regLo, x_regLo, 0);
2739 orr(x_reg, x_regLo, x_regHi);
2741 Register c_reg = c_regLo;
2742 dsll32(c_regHi, c_regHi, 0);
2743 dsll32(c_regLo, c_regLo, 0);
2744 dsrl32(c_regLo, c_regLo, 0);
2745 orr(c_reg, c_regLo, c_regHi);
2747 bind(again);
2749 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2750 lld(AT, dest);
2751 bne(AT, c_reg, nequal);
2752 delayed()->nop();
2754 //move(AT, x_reg);
2755 dadd(AT, x_reg, R0);
2756 scd(AT, dest);
2757 beq(AT, R0, again);
2758 delayed()->nop();
2759 b(done);
2760 delayed()->nop();
2762 // not xchged
2763 bind(nequal);
2764 sync();
2765 //move(c_reg, AT);
2766 //move(AT, R0);
2767 dadd(c_reg, AT, R0);
2768 dadd(AT, R0, R0);
2769 bind(done);
2770 }
2772 // be sure the three register is different
2773 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2774 assert_different_registers(tmp, fs, ft);
2775 div_s(tmp, fs, ft);
2776 trunc_l_s(tmp, tmp);
2777 cvt_s_l(tmp, tmp);
2778 mul_s(tmp, tmp, ft);
2779 sub_s(fd, fs, tmp);
2780 }
2782 // be sure the three register is different
2783 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2784 assert_different_registers(tmp, fs, ft);
2785 div_d(tmp, fs, ft);
2786 trunc_l_d(tmp, tmp);
2787 cvt_d_l(tmp, tmp);
2788 mul_d(tmp, tmp, ft);
2789 sub_d(fd, fs, tmp);
2790 }
2792 // Fast_Lock and Fast_Unlock used by C2
2794 // Because the transitions from emitted code to the runtime
2795 // monitorenter/exit helper stubs are so slow it's critical that
2796 // we inline both the stack-locking fast-path and the inflated fast path.
2797 //
2798 // See also: cmpFastLock and cmpFastUnlock.
2799 //
2800 // What follows is a specialized inline transliteration of the code
2801 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2802 // another option would be to emit TrySlowEnter and TrySlowExit methods
2803 // at startup-time. These methods would accept arguments as
2804 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2805 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2806 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2807 // In practice, however, the # of lock sites is bounded and is usually small.
2808 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2809 // if the processor uses simple bimodal branch predictors keyed by EIP
2810 // Since the helper routines would be called from multiple synchronization
2811 // sites.
2812 //
2813 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2814 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2815 // to those specialized methods. That'd give us a mostly platform-independent
2816 // implementation that the JITs could optimize and inline at their pleasure.
2817 // Done correctly, the only time we'd need to cross to native could would be
2818 // to park() or unpark() threads. We'd also need a few more unsafe operators
2819 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2820 // (b) explicit barriers or fence operations.
2821 //
2822 // TODO:
2823 //
2824 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2825 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2826 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2827 // the lock operators would typically be faster than reifying Self.
2828 //
2829 // * Ideally I'd define the primitives as:
2830 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2831 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2832 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2833 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2834 // Furthermore the register assignments are overconstrained, possibly resulting in
2835 // sub-optimal code near the synchronization site.
2836 //
2837 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2838 // Alternately, use a better sp-proximity test.
2839 //
2840 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2841 // Either one is sufficient to uniquely identify a thread.
2842 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2843 //
2844 // * Intrinsify notify() and notifyAll() for the common cases where the
2845 // object is locked by the calling thread but the waitlist is empty.
2846 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2847 //
2848 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2849 // But beware of excessive branch density on AMD Opterons.
2850 //
2851 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2852 // or failure of the fast-path. If the fast-path fails then we pass
2853 // control to the slow-path, typically in C. In Fast_Lock and
2854 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2855 // will emit a conditional branch immediately after the node.
2856 // So we have branches to branches and lots of ICC.ZF games.
2857 // Instead, it might be better to have C2 pass a "FailureLabel"
2858 // into Fast_Lock and Fast_Unlock. In the case of success, control
2859 // will drop through the node. ICC.ZF is undefined at exit.
2860 // In the case of failure, the node will branch directly to the
2861 // FailureLabel
2864 // obj: object to lock
2865 // box: on-stack box address (displaced header location) - KILLED
2866 // rax,: tmp -- KILLED
2867 // scr: tmp -- KILLED
2868 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2870 // Ensure the register assignents are disjoint
2871 guarantee (objReg != boxReg, "") ;
2872 guarantee (objReg != tmpReg, "") ;
2873 guarantee (objReg != scrReg, "") ;
2874 guarantee (boxReg != tmpReg, "") ;
2875 guarantee (boxReg != scrReg, "") ;
2878 block_comment("FastLock");
2879 /*
2880 move(AT, 0x0);
2881 return;
2882 */
2883 if (PrintBiasedLockingStatistics) {
2884 push(tmpReg);
2885 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2886 pop(tmpReg);
2887 }
2889 if (EmitSync & 1) {
2890 move(AT, 0x0);
2891 return;
2892 } else
2893 if (EmitSync & 2) {
2894 Label DONE_LABEL ;
2895 if (UseBiasedLocking) {
2896 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2897 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2898 }
2900 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2901 ori(tmpReg, tmpReg, 0x1);
2902 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2904 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2905 bne(AT, R0, DONE_LABEL);
2906 delayed()->nop();
2908 // Recursive locking
2909 dsubu(tmpReg, tmpReg, SP);
2910 li(AT, (7 - os::vm_page_size() ));
2911 andr(tmpReg, tmpReg, AT);
2912 sd(tmpReg, Address(boxReg, 0));
2913 bind(DONE_LABEL) ;
2914 } else {
2915 // Possible cases that we'll encounter in fast_lock
2916 // ------------------------------------------------
2917 // * Inflated
2918 // -- unlocked
2919 // -- Locked
2920 // = by self
2921 // = by other
2922 // * biased
2923 // -- by Self
2924 // -- by other
2925 // * neutral
2926 // * stack-locked
2927 // -- by self
2928 // = sp-proximity test hits
2929 // = sp-proximity test generates false-negative
2930 // -- by other
2931 //
2933 Label IsInflated, DONE_LABEL, PopDone ;
2935 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2936 // order to reduce the number of conditional branches in the most common cases.
2937 // Beware -- there's a subtle invariant that fetch of the markword
2938 // at [FETCH], below, will never observe a biased encoding (*101b).
2939 // If this invariant is not held we risk exclusion (safety) failure.
2940 if (UseBiasedLocking && !UseOptoBiasInlining) {
2941 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2942 }
2944 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2945 andi(AT, tmpReg, markOopDesc::monitor_value);
2946 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2947 delayed()->nop();
2949 // Attempt stack-locking ...
2950 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2951 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2952 //if (os::is_MP()) {
2953 // sync();
2954 //}
2956 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2957 //AT == 1: unlocked
2959 if (PrintBiasedLockingStatistics) {
2960 Label L;
2961 beq(AT, R0, L);
2962 delayed()->nop();
2963 push(T0);
2964 push(T1);
2965 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2966 pop(T1);
2967 pop(T0);
2968 bind(L);
2969 }
2970 bne(AT, R0, DONE_LABEL);
2971 delayed()->nop();
2973 // Recursive locking
2974 // The object is stack-locked: markword contains stack pointer to BasicLock.
2975 // Locked by current thread if difference with current SP is less than one page.
2976 dsubu(tmpReg, tmpReg, SP);
2977 li(AT, 7 - os::vm_page_size() );
2978 andr(tmpReg, tmpReg, AT);
2979 sd(tmpReg, Address(boxReg, 0));
2980 if (PrintBiasedLockingStatistics) {
2981 Label L;
2982 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2983 bne(tmpReg, R0, L);
2984 delayed()->nop();
2985 push(T0);
2986 push(T1);
2987 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2988 pop(T1);
2989 pop(T0);
2990 bind(L);
2991 }
2992 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
2994 b(DONE_LABEL) ;
2995 delayed()->nop();
2997 bind(IsInflated) ;
2998 // The object's monitor m is unlocked iff m->owner == NULL,
2999 // otherwise m->owner may contain a thread or a stack address.
3001 // TODO: someday avoid the ST-before-CAS penalty by
3002 // relocating (deferring) the following ST.
3003 // We should also think about trying a CAS without having
3004 // fetched _owner. If the CAS is successful we may
3005 // avoid an RTO->RTS upgrade on the $line.
3006 // Without cast to int32_t a movptr will destroy r10 which is typically obj
3007 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3008 sd(AT, Address(boxReg, 0));
3010 move(boxReg, tmpReg) ;
3011 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3012 // if (m->owner != 0) => AT = 0, goto slow path.
3013 move(AT, R0);
3014 bne(tmpReg, R0, DONE_LABEL);
3015 delayed()->nop();
3017 #ifndef OPT_THREAD
3018 get_thread (TREG) ;
3019 #endif
3020 // It's inflated and appears unlocked
3021 //if (os::is_MP()) {
3022 // sync();
3023 //}
3024 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
3025 // Intentional fall-through into DONE_LABEL ...
3028 // DONE_LABEL is a hot target - we'd really like to place it at the
3029 // start of cache line by padding with NOPs.
3030 // See the AMD and Intel software optimization manuals for the
3031 // most efficient "long" NOP encodings.
3032 // Unfortunately none of our alignment mechanisms suffice.
3033 bind(DONE_LABEL);
3035 // At DONE_LABEL the AT is set as follows ...
3036 // Fast_Unlock uses the same protocol.
3037 // AT == 1 -> Success
3038 // AT == 0 -> Failure - force control through the slow-path
3040 // Avoid branch-to-branch on AMD processors
3041 // This appears to be superstition.
3042 if (EmitSync & 32) nop() ;
3044 }
3045 }
3047 // obj: object to unlock
3048 // box: box address (displaced header location), killed. Must be EAX.
3049 // rbx,: killed tmp; cannot be obj nor box.
3050 //
3051 // Some commentary on balanced locking:
3052 //
3053 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3054 // Methods that don't have provably balanced locking are forced to run in the
3055 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3056 // The interpreter provides two properties:
3057 // I1: At return-time the interpreter automatically and quietly unlocks any
3058 // objects acquired the current activation (frame). Recall that the
3059 // interpreter maintains an on-stack list of locks currently held by
3060 // a frame.
3061 // I2: If a method attempts to unlock an object that is not held by the
3062 // the frame the interpreter throws IMSX.
3063 //
3064 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3065 // B() doesn't have provably balanced locking so it runs in the interpreter.
3066 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3067 // is still locked by A().
3068 //
3069 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3070 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3071 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3072 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3074 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3076 guarantee (objReg != boxReg, "") ;
3077 guarantee (objReg != tmpReg, "") ;
3078 guarantee (boxReg != tmpReg, "") ;
3082 block_comment("FastUnlock");
3085 if (EmitSync & 4) {
3086 // Disable - inhibit all inlining. Force control through the slow-path
3087 move(AT, 0x0);
3088 return;
3089 } else
3090 if (EmitSync & 8) {
3091 Label DONE_LABEL ;
3092 if (UseBiasedLocking) {
3093 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3094 }
3095 // classic stack-locking code ...
3096 ld(tmpReg, Address(boxReg, 0)) ;
3097 beq(tmpReg, R0, DONE_LABEL) ;
3098 move(AT, 0x1); // delay slot
3100 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3101 bind(DONE_LABEL);
3102 } else {
3103 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3105 // Critically, the biased locking test must have precedence over
3106 // and appear before the (box->dhw == 0) recursive stack-lock test.
3107 if (UseBiasedLocking && !UseOptoBiasInlining) {
3108 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3109 }
3111 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3112 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3113 delayed()->daddiu(AT, R0, 0x1);
3115 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3116 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3117 beq(AT, R0, Stacked) ; // Inflated?
3118 delayed()->nop();
3120 bind(Inflated) ;
3121 // It's inflated.
3122 // Despite our balanced locking property we still check that m->_owner == Self
3123 // as java routines or native JNI code called by this thread might
3124 // have released the lock.
3125 // Refer to the comments in synchronizer.cpp for how we might encode extra
3126 // state in _succ so we can avoid fetching EntryList|cxq.
3127 //
3128 // I'd like to add more cases in fast_lock() and fast_unlock() --
3129 // such as recursive enter and exit -- but we have to be wary of
3130 // I$ bloat, T$ effects and BP$ effects.
3131 //
3132 // If there's no contention try a 1-0 exit. That is, exit without
3133 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3134 // we detect and recover from the race that the 1-0 exit admits.
3135 //
3136 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3137 // before it STs null into _owner, releasing the lock. Updates
3138 // to data protected by the critical section must be visible before
3139 // we drop the lock (and thus before any other thread could acquire
3140 // the lock and observe the fields protected by the lock).
3141 // IA32's memory-model is SPO, so STs are ordered with respect to
3142 // each other and there's no need for an explicit barrier (fence).
3143 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3144 #ifndef OPT_THREAD
3145 get_thread (TREG) ;
3146 #endif
3148 // It's inflated
3149 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3150 xorr(boxReg, boxReg, TREG);
3152 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3153 orr(boxReg, boxReg, AT);
3155 move(AT, R0);
3156 bne(boxReg, R0, DONE_LABEL);
3157 delayed()->nop();
3159 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3160 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3161 orr(boxReg, boxReg, AT);
3163 move(AT, R0);
3164 bne(boxReg, R0, DONE_LABEL);
3165 delayed()->nop();
3167 sync();
3168 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3169 move(AT, 0x1);
3170 b(DONE_LABEL);
3171 delayed()->nop();
3173 bind (Stacked);
3174 ld(tmpReg, Address(boxReg, 0)) ;
3175 //if (os::is_MP()) { sync(); }
3176 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3178 if (EmitSync & 65536) {
3179 bind (CheckSucc);
3180 }
3182 bind(DONE_LABEL);
3184 // Avoid branch to branch on AMD processors
3185 if (EmitSync & 32768) { nop() ; }
3186 }
3187 }
3189 void MacroAssembler::align(int modulus) {
3190 while (offset() % modulus != 0) nop();
3191 }
3194 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3195 //Unimplemented();
3196 }
3198 #ifdef _LP64
3199 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3201 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3202 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3203 #else
3204 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3206 Register caller_saved_fpu_registers[] = {};
3207 #endif
3209 //We preserve all caller-saved register
3210 void MacroAssembler::pushad(){
3211 int i;
3213 /* Fixed-point registers */
3214 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3215 daddi(SP, SP, -1 * len * wordSize);
3216 for (i = 0; i < len; i++)
3217 {
3218 #ifdef _LP64
3219 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3220 #else
3221 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3222 #endif
3223 }
3225 /* Floating-point registers */
3226 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3227 daddi(SP, SP, -1 * len * wordSize);
3228 for (i = 0; i < len; i++)
3229 {
3230 #ifdef _LP64
3231 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3232 #else
3233 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3234 #endif
3235 }
3236 };
3238 void MacroAssembler::popad(){
3239 int i;
3241 /* Floating-point registers */
3242 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3243 for (i = 0; i < len; i++)
3244 {
3245 #ifdef _LP64
3246 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3247 #else
3248 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3249 #endif
3250 }
3251 daddi(SP, SP, len * wordSize);
3253 /* Fixed-point registers */
3254 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3255 for (i = 0; i < len; i++)
3256 {
3257 #ifdef _LP64
3258 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3259 #else
3260 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3261 #endif
3262 }
3263 daddi(SP, SP, len * wordSize);
3264 };
3266 void MacroAssembler::push2(Register reg1, Register reg2) {
3267 #ifdef _LP64
3268 daddi(SP, SP, -16);
3269 sd(reg2, SP, 0);
3270 sd(reg1, SP, 8);
3271 #else
3272 addi(SP, SP, -8);
3273 sw(reg2, SP, 0);
3274 sw(reg1, SP, 4);
3275 #endif
3276 }
3278 void MacroAssembler::pop2(Register reg1, Register reg2) {
3279 #ifdef _LP64
3280 ld(reg1, SP, 0);
3281 ld(reg2, SP, 8);
3282 daddi(SP, SP, 16);
3283 #else
3284 lw(reg1, SP, 0);
3285 lw(reg2, SP, 4);
3286 addi(SP, SP, 8);
3287 #endif
3288 }
3290 //for UseCompressedOops Option
3291 void MacroAssembler::load_klass(Register dst, Register src) {
3292 #ifdef _LP64
3293 if(UseCompressedClassPointers){
3294 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3295 decode_klass_not_null(dst);
3296 } else
3297 #endif
3298 ld(dst, src, oopDesc::klass_offset_in_bytes());
3299 }
3301 void MacroAssembler::store_klass(Register dst, Register src) {
3302 #ifdef _LP64
3303 if(UseCompressedClassPointers){
3304 encode_klass_not_null(src);
3305 sw(src, dst, oopDesc::klass_offset_in_bytes());
3306 } else {
3307 #endif
3308 sd(src, dst, oopDesc::klass_offset_in_bytes());
3309 }
3310 }
3312 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3313 load_klass(dst, src);
3314 ld(dst, Address(dst, Klass::prototype_header_offset()));
3315 }
3317 #ifdef _LP64
3318 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3319 if (UseCompressedClassPointers) {
3320 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3321 }
3322 }
3324 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3325 if(UseCompressedOops){
3326 lwu(dst, src);
3327 decode_heap_oop(dst);
3328 } else {
3329 ld(dst, src);
3330 }
3331 }
3333 void MacroAssembler::store_heap_oop(Address dst, Register src){
3334 if(UseCompressedOops){
3335 assert(!dst.uses(src), "not enough registers");
3336 encode_heap_oop(src);
3337 sw(src, dst);
3338 } else {
3339 sd(src, dst);
3340 }
3341 }
3343 void MacroAssembler::store_heap_oop_null(Address dst){
3344 if(UseCompressedOops){
3345 sw(R0, dst);
3346 } else {
3347 sd(R0, dst);
3348 }
3349 }
3351 #ifdef ASSERT
3352 void MacroAssembler::verify_heapbase(const char* msg) {
3353 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3354 assert (Universe::heap() != NULL, "java heap should be initialized");
3355 }
3356 #endif
3359 // Algorithm must match oop.inline.hpp encode_heap_oop.
3360 void MacroAssembler::encode_heap_oop(Register r) {
3361 #ifdef ASSERT
3362 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3363 #endif
3364 verify_oop(r, "broken oop in encode_heap_oop");
3365 if (Universe::narrow_oop_base() == NULL) {
3366 if (Universe::narrow_oop_shift() != 0) {
3367 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3368 shr(r, LogMinObjAlignmentInBytes);
3369 }
3370 return;
3371 }
3373 movz(r, S5_heapbase, r);
3374 dsub(r, r, S5_heapbase);
3375 if (Universe::narrow_oop_shift() != 0) {
3376 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3377 shr(r, LogMinObjAlignmentInBytes);
3378 }
3379 }
3381 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3382 #ifdef ASSERT
3383 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3384 #endif
3385 verify_oop(src, "broken oop in encode_heap_oop");
3386 if (Universe::narrow_oop_base() == NULL) {
3387 if (Universe::narrow_oop_shift() != 0) {
3388 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3389 dsrl(dst, src, LogMinObjAlignmentInBytes);
3390 } else {
3391 if (dst != src) move(dst, src);
3392 }
3393 } else {
3394 if (dst == src) {
3395 movz(dst, S5_heapbase, dst);
3396 dsub(dst, dst, S5_heapbase);
3397 if (Universe::narrow_oop_shift() != 0) {
3398 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3399 shr(dst, LogMinObjAlignmentInBytes);
3400 }
3401 } else {
3402 dsub(dst, src, S5_heapbase);
3403 if (Universe::narrow_oop_shift() != 0) {
3404 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3405 shr(dst, LogMinObjAlignmentInBytes);
3406 }
3407 movz(dst, R0, src);
3408 }
3409 }
3410 }
3412 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3413 assert (UseCompressedOops, "should be compressed");
3414 #ifdef ASSERT
3415 if (CheckCompressedOops) {
3416 Label ok;
3417 bne(r, R0, ok);
3418 delayed()->nop();
3419 stop("null oop passed to encode_heap_oop_not_null");
3420 bind(ok);
3421 }
3422 #endif
3423 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3424 if (Universe::narrow_oop_base() != NULL) {
3425 dsub(r, r, S5_heapbase);
3426 }
3427 if (Universe::narrow_oop_shift() != 0) {
3428 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3429 shr(r, LogMinObjAlignmentInBytes);
3430 }
3432 }
3434 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3435 assert (UseCompressedOops, "should be compressed");
3436 #ifdef ASSERT
3437 if (CheckCompressedOops) {
3438 Label ok;
3439 bne(src, R0, ok);
3440 delayed()->nop();
3441 stop("null oop passed to encode_heap_oop_not_null2");
3442 bind(ok);
3443 }
3444 #endif
3445 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3447 if (Universe::narrow_oop_base() != NULL) {
3448 dsub(dst, src, S5_heapbase);
3449 if (Universe::narrow_oop_shift() != 0) {
3450 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3451 shr(dst, LogMinObjAlignmentInBytes);
3452 }
3453 } else {
3454 if (Universe::narrow_oop_shift() != 0) {
3455 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3456 dsrl(dst, src, LogMinObjAlignmentInBytes);
3457 } else {
3458 if (dst != src) move(dst, src);
3459 }
3460 }
3461 }
3463 void MacroAssembler::decode_heap_oop(Register r) {
3464 #ifdef ASSERT
3465 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3466 #endif
3467 if (Universe::narrow_oop_base() == NULL) {
3468 if (Universe::narrow_oop_shift() != 0) {
3469 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3470 shl(r, LogMinObjAlignmentInBytes);
3471 }
3472 } else {
3473 move(AT, r);
3474 if (Universe::narrow_oop_shift() != 0) {
3475 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3476 shl(r, LogMinObjAlignmentInBytes);
3477 }
3478 dadd(r, r, S5_heapbase);
3479 movz(r, R0, AT);
3480 }
3481 verify_oop(r, "broken oop in decode_heap_oop");
3482 }
3484 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3485 #ifdef ASSERT
3486 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3487 #endif
3488 if (Universe::narrow_oop_base() == NULL) {
3489 if (Universe::narrow_oop_shift() != 0) {
3490 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3491 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3492 dsll(dst, src, LogMinObjAlignmentInBytes);
3493 } else {
3494 if (dst != src) move(dst, src);
3495 }
3496 } else {
3497 if (dst == src) {
3498 move(AT, dst);
3499 if (Universe::narrow_oop_shift() != 0) {
3500 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3501 shl(dst, LogMinObjAlignmentInBytes);
3502 }
3503 dadd(dst, dst, S5_heapbase);
3504 movz(dst, R0, AT);
3505 } else {
3506 if (Universe::narrow_oop_shift() != 0) {
3507 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3508 dsll(dst, src, LogMinObjAlignmentInBytes);
3509 daddu(dst, dst, S5_heapbase);
3510 } else {
3511 daddu(dst, src, S5_heapbase);
3512 }
3513 movz(dst, R0, src);
3514 }
3515 }
3516 verify_oop(dst, "broken oop in decode_heap_oop");
3517 }
3519 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3520 // Note: it will change flags
3521 assert (UseCompressedOops, "should only be used for compressed headers");
3522 assert (Universe::heap() != NULL, "java heap should be initialized");
3523 // Cannot assert, unverified entry point counts instructions (see .ad file)
3524 // vtableStubs also counts instructions in pd_code_size_limit.
3525 // Also do not verify_oop as this is called by verify_oop.
3526 if (Universe::narrow_oop_shift() != 0) {
3527 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3528 shl(r, LogMinObjAlignmentInBytes);
3529 if (Universe::narrow_oop_base() != NULL) {
3530 daddu(r, r, S5_heapbase);
3531 }
3532 } else {
3533 assert (Universe::narrow_oop_base() == NULL, "sanity");
3534 }
3535 }
3537 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3538 assert (UseCompressedOops, "should only be used for compressed headers");
3539 assert (Universe::heap() != NULL, "java heap should be initialized");
3541 // Cannot assert, unverified entry point counts instructions (see .ad file)
3542 // vtableStubs also counts instructions in pd_code_size_limit.
3543 // Also do not verify_oop as this is called by verify_oop.
3544 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3545 if (Universe::narrow_oop_shift() != 0) {
3546 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3547 if (LogMinObjAlignmentInBytes == Address::times_8) {
3548 dsll(dst, src, LogMinObjAlignmentInBytes);
3549 daddu(dst, dst, S5_heapbase);
3550 } else {
3551 dsll(dst, src, LogMinObjAlignmentInBytes);
3552 if (Universe::narrow_oop_base() != NULL) {
3553 daddu(dst, dst, S5_heapbase);
3554 }
3555 }
3556 } else {
3557 assert (Universe::narrow_oop_base() == NULL, "sanity");
3558 if (dst != src) {
3559 move(dst, src);
3560 }
3561 }
3562 }
3564 void MacroAssembler::encode_klass_not_null(Register r) {
3565 if (Universe::narrow_klass_base() != NULL) {
3566 assert(r != AT, "Encoding a klass in AT");
3567 set64(AT, (int64_t)Universe::narrow_klass_base());
3568 dsub(r, r, AT);
3569 }
3570 if (Universe::narrow_klass_shift() != 0) {
3571 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3572 shr(r, LogKlassAlignmentInBytes);
3573 }
3574 }
3576 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3577 if (dst == src) {
3578 encode_klass_not_null(src);
3579 } else {
3580 if (Universe::narrow_klass_base() != NULL) {
3581 set64(dst, (int64_t)Universe::narrow_klass_base());
3582 dsub(dst, src, dst);
3583 if (Universe::narrow_klass_shift() != 0) {
3584 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3585 shr(dst, LogKlassAlignmentInBytes);
3586 }
3587 } else {
3588 if (Universe::narrow_klass_shift() != 0) {
3589 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3590 dsrl(dst, src, LogKlassAlignmentInBytes);
3591 } else {
3592 move(dst, src);
3593 }
3594 }
3595 }
3596 }
3598 // Function instr_size_for_decode_klass_not_null() counts the instructions
3599 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3600 // when (Universe::heap() != NULL). Hence, if the instructions they
3601 // generate change, then this method needs to be updated.
3602 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3603 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3604 if (Universe::narrow_klass_base() != NULL) {
3605 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3606 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3607 } else {
3608 // longest load decode klass function, mov64, leaq
3609 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3610 }
3611 }
3613 void MacroAssembler::decode_klass_not_null(Register r) {
3614 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3615 assert(r != AT, "Decoding a klass in AT");
3616 // Cannot assert, unverified entry point counts instructions (see .ad file)
3617 // vtableStubs also counts instructions in pd_code_size_limit.
3618 // Also do not verify_oop as this is called by verify_oop.
3619 if (Universe::narrow_klass_shift() != 0) {
3620 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3621 shl(r, LogKlassAlignmentInBytes);
3622 }
3623 if (Universe::narrow_klass_base() != NULL) {
3624 set64(AT, (int64_t)Universe::narrow_klass_base());
3625 daddu(r, r, AT);
3626 //Not neccessary for MIPS at all.
3627 //reinit_heapbase();
3628 }
3629 }
3631 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3632 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3634 if (dst == src) {
3635 decode_klass_not_null(dst);
3636 } else {
3637 // Cannot assert, unverified entry point counts instructions (see .ad file)
3638 // vtableStubs also counts instructions in pd_code_size_limit.
3639 // Also do not verify_oop as this is called by verify_oop.
3640 set64(dst, (int64_t)Universe::narrow_klass_base());
3641 if (Universe::narrow_klass_shift() != 0) {
3642 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3643 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3644 dsll(AT, src, Address::times_8);
3645 daddu(dst, dst, AT);
3646 } else {
3647 daddu(dst, src, dst);
3648 }
3649 }
3650 }
3652 void MacroAssembler::incrementl(Register reg, int value) {
3653 if (value == min_jint) {
3654 move(AT, value);
3655 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3656 return;
3657 }
3658 if (value < 0) { decrementl(reg, -value); return; }
3659 if (value == 0) { ; return; }
3661 if(Assembler::is_simm16(value)) {
3662 NOT_LP64(addiu(reg, reg, value));
3663 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3664 } else {
3665 move(AT, value);
3666 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3667 }
3668 }
3670 void MacroAssembler::decrementl(Register reg, int value) {
3671 if (value == min_jint) {
3672 move(AT, value);
3673 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3674 return;
3675 }
3676 if (value < 0) { incrementl(reg, -value); return; }
3677 if (value == 0) { ; return; }
3679 if (Assembler::is_simm16(value)) {
3680 NOT_LP64(addiu(reg, reg, -value));
3681 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3682 } else {
3683 move(AT, value);
3684 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3685 }
3686 }
3688 void MacroAssembler::reinit_heapbase() {
3689 if (UseCompressedOops || UseCompressedClassPointers) {
3690 if (Universe::heap() != NULL) {
3691 if (Universe::narrow_oop_base() == NULL) {
3692 move(S5_heapbase, R0);
3693 } else {
3694 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3695 }
3696 } else {
3697 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3698 ld(S5_heapbase, S5_heapbase, 0);
3699 }
3700 }
3701 }
3702 #endif // _LP64
3704 void MacroAssembler::check_klass_subtype(Register sub_klass,
3705 Register super_klass,
3706 Register temp_reg,
3707 Label& L_success) {
3708 //implement ind gen_subtype_check
3709 Label L_failure;
3710 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3711 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3712 bind(L_failure);
3713 }
3715 SkipIfEqual::SkipIfEqual(
3716 MacroAssembler* masm, const bool* flag_addr, bool value) {
3717 _masm = masm;
3718 _masm->li(AT, (address)flag_addr);
3719 _masm->lb(AT,AT,0);
3720 _masm->addi(AT,AT,-value);
3721 _masm->beq(AT,R0,_label);
3722 _masm->delayed()->nop();
3723 }
3724 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3725 Register super_klass,
3726 Register temp_reg,
3727 Label* L_success,
3728 Label* L_failure,
3729 Label* L_slow_path,
3730 RegisterOrConstant super_check_offset) {
3731 assert_different_registers(sub_klass, super_klass, temp_reg);
3732 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3733 if (super_check_offset.is_register()) {
3734 assert_different_registers(sub_klass, super_klass,
3735 super_check_offset.as_register());
3736 } else if (must_load_sco) {
3737 assert(temp_reg != noreg, "supply either a temp or a register offset");
3738 }
3740 Label L_fallthrough;
3741 int label_nulls = 0;
3742 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3743 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3744 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3745 assert(label_nulls <= 1, "at most one NULL in the batch");
3747 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3748 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3749 // If the pointers are equal, we are done (e.g., String[] elements).
3750 // This self-check enables sharing of secondary supertype arrays among
3751 // non-primary types such as array-of-interface. Otherwise, each such
3752 // type would need its own customized SSA.
3753 // We move this check to the front of the fast path because many
3754 // type checks are in fact trivially successful in this manner,
3755 // so we get a nicely predicted branch right at the start of the check.
3756 beq(sub_klass, super_klass, *L_success);
3757 delayed()->nop();
3758 // Check the supertype display:
3759 if (must_load_sco) {
3760 // Positive movl does right thing on LP64.
3761 lwu(temp_reg, super_klass, sco_offset);
3762 super_check_offset = RegisterOrConstant(temp_reg);
3763 }
3764 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3765 daddu(AT, sub_klass, AT);
3766 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3768 // This check has worked decisively for primary supers.
3769 // Secondary supers are sought in the super_cache ('super_cache_addr').
3770 // (Secondary supers are interfaces and very deeply nested subtypes.)
3771 // This works in the same check above because of a tricky aliasing
3772 // between the super_cache and the primary super display elements.
3773 // (The 'super_check_addr' can address either, as the case requires.)
3774 // Note that the cache is updated below if it does not help us find
3775 // what we need immediately.
3776 // So if it was a primary super, we can just fail immediately.
3777 // Otherwise, it's the slow path for us (no success at this point).
3779 if (super_check_offset.is_register()) {
3780 beq(super_klass, AT, *L_success);
3781 delayed()->nop();
3782 addi(AT, super_check_offset.as_register(), -sc_offset);
3783 if (L_failure == &L_fallthrough) {
3784 beq(AT, R0, *L_slow_path);
3785 delayed()->nop();
3786 } else {
3787 bne(AT, R0, *L_failure);
3788 delayed()->nop();
3789 b(*L_slow_path);
3790 delayed()->nop();
3791 }
3792 } else if (super_check_offset.as_constant() == sc_offset) {
3793 // Need a slow path; fast failure is impossible.
3794 if (L_slow_path == &L_fallthrough) {
3795 beq(super_klass, AT, *L_success);
3796 delayed()->nop();
3797 } else {
3798 bne(super_klass, AT, *L_slow_path);
3799 delayed()->nop();
3800 b(*L_success);
3801 delayed()->nop();
3802 }
3803 } else {
3804 // No slow path; it's a fast decision.
3805 if (L_failure == &L_fallthrough) {
3806 beq(super_klass, AT, *L_success);
3807 delayed()->nop();
3808 } else {
3809 bne(super_klass, AT, *L_failure);
3810 delayed()->nop();
3811 b(*L_success);
3812 delayed()->nop();
3813 }
3814 }
3816 bind(L_fallthrough);
3818 }
3821 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3822 Register super_klass,
3823 Register temp_reg,
3824 Register temp2_reg,
3825 Label* L_success,
3826 Label* L_failure,
3827 bool set_cond_codes) {
3828 assert_different_registers(sub_klass, super_klass, temp_reg);
3829 if (temp2_reg != noreg)
3830 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3831 else
3832 temp2_reg = T9;
3833 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3835 Label L_fallthrough;
3836 int label_nulls = 0;
3837 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3838 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3839 assert(label_nulls <= 1, "at most one NULL in the batch");
3841 // a couple of useful fields in sub_klass:
3842 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3843 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3844 Address secondary_supers_addr(sub_klass, ss_offset);
3845 Address super_cache_addr( sub_klass, sc_offset);
3847 // Do a linear scan of the secondary super-klass chain.
3848 // This code is rarely used, so simplicity is a virtue here.
3849 // The repne_scan instruction uses fixed registers, which we must spill.
3850 // Don't worry too much about pre-existing connections with the input regs.
3852 // Get super_klass value into rax (even if it was in rdi or rcx).
3853 #ifndef PRODUCT
3854 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3855 ExternalAddress pst_counter_addr((address) pst_counter);
3856 NOT_LP64( incrementl(pst_counter_addr) );
3857 #endif //PRODUCT
3859 // We will consult the secondary-super array.
3860 ld(temp_reg, secondary_supers_addr);
3861 // Load the array length. (Positive movl does right thing on LP64.)
3862 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3863 // Skip to start of data.
3864 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3866 // Scan RCX words at [RDI] for an occurrence of RAX.
3867 // Set NZ/Z based on last compare.
3868 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3869 // not change flags (only scas instruction which is repeated sets flags).
3870 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3872 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
3873 Label Loop, subtype;
3874 bind(Loop);
3875 beq(temp2_reg, R0, *L_failure);
3876 delayed()->nop();
3877 ld(AT, temp_reg, 0);
3878 beq(AT, super_klass, subtype);
3879 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3880 b(Loop);
3881 delayed()->daddi(temp2_reg, temp2_reg, -1);
3883 bind(subtype);
3884 sd(super_klass, super_cache_addr);
3885 if (L_success != &L_fallthrough) {
3886 b(*L_success);
3887 delayed()->nop();
3888 }
3890 // Success. Cache the super we found and proceed in triumph.
3891 #undef IS_A_TEMP
3893 bind(L_fallthrough);
3894 }
3896 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3897 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3898 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3899 verify_oop(oop_result, "broken oop in call_VM_base");
3900 }
3902 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3903 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3904 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3905 }
3907 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3908 int extra_slot_offset) {
3909 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3910 int stackElementSize = Interpreter::stackElementSize;
3911 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3912 #ifdef ASSERT
3913 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3914 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3915 #endif
3916 Register scale_reg = NOREG;
3917 Address::ScaleFactor scale_factor = Address::no_scale;
3918 if (arg_slot.is_constant()) {
3919 offset += arg_slot.as_constant() * stackElementSize;
3920 } else {
3921 scale_reg = arg_slot.as_register();
3922 scale_factor = Address::times_8;
3923 }
3924 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
3925 // offset += wordSize; // return PC is on stack
3926 if(scale_reg==NOREG) return Address(SP, offset);
3927 else {
3928 dsll(scale_reg, scale_reg, scale_factor);
3929 daddu(scale_reg, SP, scale_reg);
3930 return Address(scale_reg, offset);
3931 }
3932 }
3934 SkipIfEqual::~SkipIfEqual() {
3935 _masm->bind(_label);
3936 }
3938 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3939 switch (size_in_bytes) {
3940 #ifndef _LP64
3941 case 8:
3942 assert(dst2 != noreg, "second dest register required");
3943 lw(dst, src);
3944 lw(dst2, src.plus_disp(BytesPerInt));
3945 break;
3946 #else
3947 case 8: ld(dst, src); break;
3948 #endif
3949 case 4: lw(dst, src); break;
3950 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3951 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3952 default: ShouldNotReachHere();
3953 }
3954 }
3956 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
3957 switch (size_in_bytes) {
3958 #ifndef _LP64
3959 case 8:
3960 assert(src2 != noreg, "second source register required");
3961 sw(src, dst);
3962 sw(src2, dst.plus_disp(BytesPerInt));
3963 break;
3964 #else
3965 case 8: sd(src, dst); break;
3966 #endif
3967 case 4: sw(src, dst); break;
3968 case 2: sh(src, dst); break;
3969 case 1: sb(src, dst); break;
3970 default: ShouldNotReachHere();
3971 }
3972 }
3974 // Look up the method for a megamorphic invokeinterface call.
3975 // The target method is determined by <intf_klass, itable_index>.
3976 // The receiver klass is in recv_klass.
3977 // On success, the result will be in method_result, and execution falls through.
3978 // On failure, execution transfers to the given label.
3979 void MacroAssembler::lookup_interface_method(Register recv_klass,
3980 Register intf_klass,
3981 RegisterOrConstant itable_index,
3982 Register method_result,
3983 Register scan_temp,
3984 Label& L_no_such_interface,
3985 bool return_method) {
3986 assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
3987 assert_different_registers(method_result, intf_klass, scan_temp, AT);
3988 assert(recv_klass != method_result || !return_method,
3989 "recv_klass can be destroyed when method isn't needed");
3991 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3992 "caller must use same register for non-constant itable index as for method");
3994 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3995 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
3996 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3997 int scan_step = itableOffsetEntry::size() * wordSize;
3998 int vte_size = vtableEntry::size() * wordSize;
3999 Address::ScaleFactor times_vte_scale = Address::times_ptr;
4000 assert(vte_size == wordSize, "else adjust times_vte_scale");
4002 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
4004 // %%% Could store the aligned, prescaled offset in the klassoop.
4005 dsll(scan_temp, scan_temp, times_vte_scale);
4006 daddu(scan_temp, recv_klass, scan_temp);
4007 daddiu(scan_temp, scan_temp, vtable_base);
4008 if (HeapWordsPerLong > 1) {
4009 // Round up to align_object_offset boundary
4010 // see code for InstanceKlass::start_of_itable!
4011 round_to(scan_temp, BytesPerLong);
4012 }
4014 if (return_method) {
4015 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
4016 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
4017 if (itable_index.is_constant()) {
4018 set64(AT, (int)itable_index.is_constant());
4019 dsll(AT, AT, (int)Address::times_ptr);
4020 } else {
4021 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
4022 }
4023 daddu(AT, AT, recv_klass);
4024 daddiu(recv_klass, AT, itentry_off);
4025 }
4027 Label search, found_method;
4029 for (int peel = 1; peel >= 0; peel--) {
4030 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
4032 if (peel) {
4033 beq(intf_klass, method_result, found_method);
4034 nop();
4035 } else {
4036 bne(intf_klass, method_result, search);
4037 nop();
4038 // (invert the test to fall through to found_method...)
4039 }
4041 if (!peel) break;
4043 bind(search);
4045 // Check that the previous entry is non-null. A null entry means that
4046 // the receiver class doesn't implement the interface, and wasn't the
4047 // same as when the caller was compiled.
4048 beq(method_result, R0, L_no_such_interface);
4049 nop();
4050 daddiu(scan_temp, scan_temp, scan_step);
4051 }
4053 bind(found_method);
4055 if (return_method) {
4056 // Got a hit.
4057 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
4058 if(UseLoongsonISA) {
4059 gsldx(method_result, recv_klass, scan_temp, 0);
4060 } else {
4061 daddu(AT, recv_klass, scan_temp);
4062 ld(method_result, AT);
4063 }
4064 }
4065 }
4067 // virtual method calling
4068 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4069 RegisterOrConstant vtable_index,
4070 Register method_result) {
4071 Register tmp = GP;
4072 push(tmp);
4074 if (vtable_index.is_constant()) {
4075 assert_different_registers(recv_klass, method_result, tmp);
4076 } else {
4077 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4078 }
4079 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4080 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4081 /*
4082 Address vtable_entry_addr(recv_klass,
4083 vtable_index, Address::times_ptr,
4084 base + vtableEntry::method_offset_in_bytes());
4085 */
4086 if (vtable_index.is_constant()) {
4087 set64(AT, vtable_index.as_constant());
4088 dsll(AT, AT, (int)Address::times_ptr);
4089 } else {
4090 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4091 }
4092 set64(tmp, base + vtableEntry::method_offset_in_bytes());
4093 daddu(tmp, tmp, AT);
4094 daddu(tmp, tmp, recv_klass);
4095 ld(method_result, tmp, 0);
4097 pop(tmp);
4098 }