Thu, 24 May 2018 19:26:50 +0800
#7046 C2 supports long branch
Contributed-by: fujie
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 // Implementation of MacroAssembler
52 intptr_t MacroAssembler::i[32] = {0};
53 float MacroAssembler::f[32] = {0.0};
55 void MacroAssembler::print(outputStream *s) {
56 unsigned int k;
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
58 s->print_cr("i%d = 0x%.16lx", k, i[k]);
59 }
60 s->cr();
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
63 s->print_cr("f%d = %f", k, f[k]);
64 }
65 s->cr();
66 }
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
71 void MacroAssembler::save_registers(MacroAssembler *masm) {
72 #define __ masm->
73 for(int k=0; k<32; k++) {
74 __ sw (as_Register(k), A0, i_offset(k));
75 }
77 for(int k=0; k<32; k++) {
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
79 }
80 #undef __
81 }
83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
84 #define __ masm->
85 for(int k=0; k<32; k++) {
86 __ lw (as_Register(k), A0, i_offset(k));
87 }
89 for(int k=0; k<32; k++) {
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
91 }
92 #undef __
93 }
96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
97 jint& stub_inst = *(jint*) branch;
98 jint *pc = (jint *)branch;
100 /* *
101 move(AT, RA); // dadd
102 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
103 nop();
104 lui(T9, 0); // to be patched
105 ori(T9, 0);
106 daddu(T9, T9, RA);
107 move(RA, AT);
108 jr(T9);
109 */
110 if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
112 assert(opcode(pc[3]) == lui_op
113 && opcode(pc[4]) == ori_op
114 && special(pc[5]) == daddu_op, "Not a branch label patch");
115 if(!(opcode(pc[3]) == lui_op
116 && opcode(pc[4]) == ori_op
117 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
119 int offset = target - branch;
120 if (!is_simm16(offset)) {
121 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
122 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
123 } else {
124 /* revert to "beq + nop" */
125 CodeBuffer cb(branch, 4 * 10);
126 MacroAssembler masm(&cb);
127 #define __ masm.
128 __ b(target);
129 __ nop();
130 __ nop();
131 __ nop();
132 __ nop();
133 __ nop();
134 __ nop();
135 __ nop();
136 }
137 return;
138 } else if (special(pc[4]) == jr_op
139 && opcode(pc[4]) == special_op
140 && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
142 CodeBuffer cb(branch, 4 * 4);
143 MacroAssembler masm(&cb);
144 masm.patchable_set48(T9, (long)(target));
145 return;
146 }
148 #ifndef PRODUCT
149 if (!is_simm16((target - branch - 4) >> 2)) {
150 tty->print_cr("Illegal patching: target=0x%lx", target);
151 int *p = (int *)branch;
152 for (int i = -10; i < 10; i++) {
153 tty->print("0x%lx, ", p[i]);
154 }
155 tty->print_cr("");
156 }
157 #endif
159 stub_inst = patched_branch(target - branch, stub_inst, 0);
160 }
162 static inline address first_cache_address() {
163 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
164 }
166 static inline address last_cache_address() {
167 return CodeCache::high_bound() - Assembler::InstructionSize;
168 }
170 int MacroAssembler::call_size(address target, bool far, bool patchable) {
171 if (patchable) return 6 << Assembler::LogInstructionSize;
172 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
173 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
174 }
176 // Can we reach target using jal/j from anywhere
177 // in the code cache (because code can be relocated)?
178 bool MacroAssembler::reachable_from_cache(address target) {
179 address cl = first_cache_address();
180 address ch = last_cache_address();
182 return fit_in_jal(target, cl) && fit_in_jal(target, ch);
183 }
185 void MacroAssembler::general_jump(address target) {
186 if (reachable_from_cache(target)) {
187 j(target);
188 nop();
189 } else {
190 set64(T9, (long)target);
191 jr(T9);
192 nop();
193 }
194 }
196 int MacroAssembler::insts_for_general_jump(address target) {
197 if (reachable_from_cache(target)) {
198 //j(target);
199 //nop();
200 return 2;
201 } else {
202 //set64(T9, (long)target);
203 //jr(T9);
204 //nop();
205 return insts_for_set64((jlong)target) + 2;
206 }
207 }
209 void MacroAssembler::patchable_jump(address target) {
210 if (reachable_from_cache(target)) {
211 nop();
212 nop();
213 nop();
214 nop();
215 j(target);
216 nop();
217 } else {
218 patchable_set48(T9, (long)target);
219 jr(T9);
220 nop();
221 }
222 }
224 int MacroAssembler::insts_for_patchable_jump(address target) {
225 return 6;
226 }
228 void MacroAssembler::general_call(address target) {
229 if (reachable_from_cache(target)) {
230 jal(target);
231 nop();
232 } else {
233 set64(T9, (long)target);
234 jalr(T9);
235 nop();
236 }
237 }
239 int MacroAssembler::insts_for_general_call(address target) {
240 if (reachable_from_cache(target)) {
241 //jal(target);
242 //nop();
243 return 2;
244 } else {
245 //set64(T9, (long)target);
246 //jalr(T9);
247 //nop();
248 return insts_for_set64((jlong)target) + 2;
249 }
250 }
252 void MacroAssembler::patchable_call(address target) {
253 if (reachable_from_cache(target)) {
254 nop();
255 nop();
256 nop();
257 nop();
258 jal(target);
259 nop();
260 } else {
261 patchable_set48(T9, (long)target);
262 jalr(T9);
263 nop();
264 }
265 }
267 int MacroAssembler::insts_for_patchable_call(address target) {
268 return 6;
269 }
271 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
272 u_char * cur_pc = pc();
274 /* Jin: Near/Far jump */
275 if(is_simm16((entry - pc() - 4) / 4)) {
276 Assembler::beq(rs, rt, offset(entry));
277 } else {
278 Label not_jump;
279 bne(rs, rt, not_jump);
280 delayed()->nop();
282 b_far(entry);
283 delayed()->nop();
285 bind(not_jump);
286 has_delay_slot();
287 }
288 }
290 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
291 if (L.is_bound()) {
292 beq_far(rs, rt, target(L));
293 } else {
294 u_char * cur_pc = pc();
295 Label not_jump;
296 bne(rs, rt, not_jump);
297 delayed()->nop();
299 b_far(L);
300 delayed()->nop();
302 bind(not_jump);
303 has_delay_slot();
304 }
305 }
307 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
308 u_char * cur_pc = pc();
310 /* Jin: Near/Far jump */
311 if(is_simm16((entry - pc() - 4) / 4)) {
312 Assembler::bne(rs, rt, offset(entry));
313 } else {
314 Label not_jump;
315 beq(rs, rt, not_jump);
316 delayed()->nop();
318 b_far(entry);
319 delayed()->nop();
321 bind(not_jump);
322 has_delay_slot();
323 }
324 }
326 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
327 if (L.is_bound()) {
328 bne_far(rs, rt, target(L));
329 } else {
330 u_char * cur_pc = pc();
331 Label not_jump;
332 beq(rs, rt, not_jump);
333 delayed()->nop();
335 b_far(L);
336 delayed()->nop();
338 bind(not_jump);
339 has_delay_slot();
340 }
341 }
343 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
344 Label not_taken;
346 bne(rs, rt, not_taken);
347 nop();
349 jmp_far(L);
351 bind(not_taken);
352 }
354 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
355 Label not_taken;
357 beq(rs, rt, not_taken);
358 nop();
360 jmp_far(L);
362 bind(not_taken);
363 }
365 void MacroAssembler::bc1t_long(Label& L) {
366 Label not_taken;
368 bc1f(not_taken);
369 nop();
371 jmp_far(L);
373 bind(not_taken);
374 }
376 void MacroAssembler::bc1f_long(Label& L) {
377 Label not_taken;
379 bc1t(not_taken);
380 nop();
382 jmp_far(L);
384 bind(not_taken);
385 }
387 void MacroAssembler::b_far(Label& L) {
388 if (L.is_bound()) {
389 b_far(target(L));
390 } else {
391 volatile address dest = target(L);
392 /*
393 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
394 0x00000055651ed514: dadd at, ra, zero
395 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
397 0x00000055651ed51c: sll zero, zero, 0
398 0x00000055651ed520: lui t9, 0x0
399 0x00000055651ed524: ori t9, t9, 0x21b8
400 0x00000055651ed528: daddu t9, t9, ra
401 0x00000055651ed52c: dadd ra, at, zero
402 0x00000055651ed530: jr t9
403 0x00000055651ed534: sll zero, zero, 0
404 */
405 move(AT, RA);
406 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
407 nop();
408 lui(T9, 0); // to be patched
409 ori(T9, T9, 0);
410 daddu(T9, T9, RA);
411 move(RA, AT);
412 jr(T9);
413 }
414 }
416 void MacroAssembler::b_far(address entry) {
417 u_char * cur_pc = pc();
419 /* Jin: Near/Far jump */
420 if(is_simm16((entry - pc() - 4) / 4)) {
421 b(offset(entry));
422 } else {
423 /* address must be bounded */
424 move(AT, RA);
425 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
426 nop();
427 li32(T9, entry - pc());
428 daddu(T9, T9, RA);
429 move(RA, AT);
430 jr(T9);
431 }
432 }
434 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
435 addu_long(AT, base, offset);
436 ld_ptr(rt, 0, AT);
437 }
439 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
440 addu_long(AT, base, offset);
441 st_ptr(rt, 0, AT);
442 }
444 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
445 addu_long(AT, base, offset);
446 ld_long(rt, 0, AT);
447 }
449 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
450 addu_long(AT, base, offset);
451 st_long(rt, 0, AT);
452 }
454 Address MacroAssembler::as_Address(AddressLiteral adr) {
455 return Address(adr.target(), adr.rspec());
456 }
458 Address MacroAssembler::as_Address(ArrayAddress adr) {
459 return Address::make_array(adr);
460 }
462 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
463 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
464 Label again;
466 li(tmp_reg1, counter_addr);
467 bind(again);
468 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
469 ll(tmp_reg2, tmp_reg1, 0);
470 addi(tmp_reg2, tmp_reg2, inc);
471 sc(tmp_reg2, tmp_reg1, 0);
472 beq(tmp_reg2, R0, again);
473 delayed()->nop();
474 }
476 int MacroAssembler::biased_locking_enter(Register lock_reg,
477 Register obj_reg,
478 Register swap_reg,
479 Register tmp_reg,
480 bool swap_reg_contains_mark,
481 Label& done,
482 Label* slow_case,
483 BiasedLockingCounters* counters) {
484 assert(UseBiasedLocking, "why call this otherwise?");
485 bool need_tmp_reg = false;
486 if (tmp_reg == noreg) {
487 need_tmp_reg = true;
488 tmp_reg = T9;
489 }
490 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
491 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
492 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
493 Address saved_mark_addr(lock_reg, 0);
495 // Biased locking
496 // See whether the lock is currently biased toward our thread and
497 // whether the epoch is still valid
498 // Note that the runtime guarantees sufficient alignment of JavaThread
499 // pointers to allow age to be placed into low bits
500 // First check to see whether biasing is even enabled for this object
501 Label cas_label;
502 int null_check_offset = -1;
503 if (!swap_reg_contains_mark) {
504 null_check_offset = offset();
505 ld_ptr(swap_reg, mark_addr);
506 }
508 if (need_tmp_reg) {
509 push(tmp_reg);
510 }
511 move(tmp_reg, swap_reg);
512 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
513 #ifdef _LP64
514 daddi(AT, R0, markOopDesc::biased_lock_pattern);
515 dsub(AT, AT, tmp_reg);
516 #else
517 addi(AT, R0, markOopDesc::biased_lock_pattern);
518 sub(AT, AT, tmp_reg);
519 #endif
520 if (need_tmp_reg) {
521 pop(tmp_reg);
522 }
524 bne(AT, R0, cas_label);
525 delayed()->nop();
528 // The bias pattern is present in the object's header. Need to check
529 // whether the bias owner and the epoch are both still current.
530 // Note that because there is no current thread register on MIPS we
531 // need to store off the mark word we read out of the object to
532 // avoid reloading it and needing to recheck invariants below. This
533 // store is unfortunate but it makes the overall code shorter and
534 // simpler.
535 st_ptr(swap_reg, saved_mark_addr);
536 if (need_tmp_reg) {
537 push(tmp_reg);
538 }
539 if (swap_reg_contains_mark) {
540 null_check_offset = offset();
541 }
542 load_prototype_header(tmp_reg, obj_reg);
543 xorr(tmp_reg, tmp_reg, swap_reg);
544 get_thread(swap_reg);
545 xorr(swap_reg, swap_reg, tmp_reg);
547 move(AT, ~((int) markOopDesc::age_mask_in_place));
548 andr(swap_reg, swap_reg, AT);
550 if (PrintBiasedLockingStatistics) {
551 Label L;
552 bne(swap_reg, R0, L);
553 delayed()->nop();
554 push(tmp_reg);
555 push(A0);
556 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
557 pop(A0);
558 pop(tmp_reg);
559 bind(L);
560 }
561 if (need_tmp_reg) {
562 pop(tmp_reg);
563 }
564 beq(swap_reg, R0, done);
565 delayed()->nop();
566 Label try_revoke_bias;
567 Label try_rebias;
569 // At this point we know that the header has the bias pattern and
570 // that we are not the bias owner in the current epoch. We need to
571 // figure out more details about the state of the header in order to
572 // know what operations can be legally performed on the object's
573 // header.
575 // If the low three bits in the xor result aren't clear, that means
576 // the prototype header is no longer biased and we have to revoke
577 // the bias on this object.
579 move(AT, markOopDesc::biased_lock_mask_in_place);
580 andr(AT, swap_reg, AT);
581 bne(AT, R0, try_revoke_bias);
582 delayed()->nop();
583 // Biasing is still enabled for this data type. See whether the
584 // epoch of the current bias is still valid, meaning that the epoch
585 // bits of the mark word are equal to the epoch bits of the
586 // prototype header. (Note that the prototype header's epoch bits
587 // only change at a safepoint.) If not, attempt to rebias the object
588 // toward the current thread. Note that we must be absolutely sure
589 // that the current epoch is invalid in order to do this because
590 // otherwise the manipulations it performs on the mark word are
591 // illegal.
593 move(AT, markOopDesc::epoch_mask_in_place);
594 andr(AT,swap_reg, AT);
595 bne(AT, R0, try_rebias);
596 delayed()->nop();
597 // The epoch of the current bias is still valid but we know nothing
598 // about the owner; it might be set or it might be clear. Try to
599 // acquire the bias of the object using an atomic operation. If this
600 // fails we will go in to the runtime to revoke the object's bias.
601 // Note that we first construct the presumed unbiased header so we
602 // don't accidentally blow away another thread's valid bias.
604 ld_ptr(swap_reg, saved_mark_addr);
606 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
607 andr(swap_reg, swap_reg, AT);
609 if (need_tmp_reg) {
610 push(tmp_reg);
611 }
612 get_thread(tmp_reg);
613 orr(tmp_reg, tmp_reg, swap_reg);
614 //if (os::is_MP()) {
615 // sync();
616 //}
617 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
618 if (need_tmp_reg) {
619 pop(tmp_reg);
620 }
621 // If the biasing toward our thread failed, this means that
622 // another thread succeeded in biasing it toward itself and we
623 // need to revoke that bias. The revocation will occur in the
624 // interpreter runtime in the slow case.
625 if (PrintBiasedLockingStatistics) {
626 Label L;
627 bne(AT, R0, L);
628 delayed()->nop();
629 push(tmp_reg);
630 push(A0);
631 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
632 pop(A0);
633 pop(tmp_reg);
634 bind(L);
635 }
636 if (slow_case != NULL) {
637 beq_far(AT, R0, *slow_case);
638 delayed()->nop();
639 }
640 b(done);
641 delayed()->nop();
643 bind(try_rebias);
644 // At this point we know the epoch has expired, meaning that the
645 // current "bias owner", if any, is actually invalid. Under these
646 // circumstances _only_, we are allowed to use the current header's
647 // value as the comparison value when doing the cas to acquire the
648 // bias in the current epoch. In other words, we allow transfer of
649 // the bias from one thread to another directly in this situation.
650 //
651 // FIXME: due to a lack of registers we currently blow away the age
652 // bits in this situation. Should attempt to preserve them.
653 if (need_tmp_reg) {
654 push(tmp_reg);
655 }
656 load_prototype_header(tmp_reg, obj_reg);
657 get_thread(swap_reg);
658 orr(tmp_reg, tmp_reg, swap_reg);
659 ld_ptr(swap_reg, saved_mark_addr);
661 //if (os::is_MP()) {
662 // sync();
663 //}
664 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
665 if (need_tmp_reg) {
666 pop(tmp_reg);
667 }
668 // If the biasing toward our thread failed, then another thread
669 // succeeded in biasing it toward itself and we need to revoke that
670 // bias. The revocation will occur in the runtime in the slow case.
671 if (PrintBiasedLockingStatistics) {
672 Label L;
673 bne(AT, R0, L);
674 delayed()->nop();
675 push(AT);
676 push(tmp_reg);
677 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
678 pop(tmp_reg);
679 pop(AT);
680 bind(L);
681 }
682 if (slow_case != NULL) {
683 beq_far(AT, R0, *slow_case);
684 delayed()->nop();
685 }
687 b(done);
688 delayed()->nop();
689 bind(try_revoke_bias);
690 // The prototype mark in the klass doesn't have the bias bit set any
691 // more, indicating that objects of this data type are not supposed
692 // to be biased any more. We are going to try to reset the mark of
693 // this object to the prototype value and fall through to the
694 // CAS-based locking scheme. Note that if our CAS fails, it means
695 // that another thread raced us for the privilege of revoking the
696 // bias of this particular object, so it's okay to continue in the
697 // normal locking code.
698 //
699 // FIXME: due to a lack of registers we currently blow away the age
700 // bits in this situation. Should attempt to preserve them.
701 ld_ptr(swap_reg, saved_mark_addr);
703 if (need_tmp_reg) {
704 push(tmp_reg);
705 }
706 load_prototype_header(tmp_reg, obj_reg);
707 //if (os::is_MP()) {
708 // lock();
709 //}
710 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
711 if (need_tmp_reg) {
712 pop(tmp_reg);
713 }
714 // Fall through to the normal CAS-based lock, because no matter what
715 // the result of the above CAS, some thread must have succeeded in
716 // removing the bias bit from the object's header.
717 if (PrintBiasedLockingStatistics) {
718 Label L;
719 bne(AT, R0, L);
720 delayed()->nop();
721 push(AT);
722 push(tmp_reg);
723 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
724 pop(tmp_reg);
725 pop(AT);
726 bind(L);
727 }
729 bind(cas_label);
730 return null_check_offset;
731 }
733 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
734 assert(UseBiasedLocking, "why call this otherwise?");
736 // Check for biased locking unlock case, which is a no-op
737 // Note: we do not have to check the thread ID for two reasons.
738 // First, the interpreter checks for IllegalMonitorStateException at
739 // a higher level. Second, if the bias was revoked while we held the
740 // lock, the object could not be rebiased toward another thread, so
741 // the bias bit would be clear.
742 #ifdef _LP64
743 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
744 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
745 daddi(AT, R0, markOopDesc::biased_lock_pattern);
746 #else
747 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
748 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
749 addi(AT, R0, markOopDesc::biased_lock_pattern);
750 #endif
752 beq(AT, temp_reg, done);
753 delayed()->nop();
754 }
756 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
757 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
758 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
759 Label L, E;
761 assert(number_of_arguments <= 4, "just check");
763 andi(AT, SP, 0xf);
764 beq(AT, R0, L);
765 delayed()->nop();
766 daddi(SP, SP, -8);
767 call(entry_point, relocInfo::runtime_call_type);
768 delayed()->nop();
769 daddi(SP, SP, 8);
770 b(E);
771 delayed()->nop();
773 bind(L);
774 call(entry_point, relocInfo::runtime_call_type);
775 delayed()->nop();
776 bind(E);
777 }
780 void MacroAssembler::jmp(address entry) {
781 patchable_set48(T9, (long)entry);
782 jr(T9);
783 }
785 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
786 switch (rtype) {
787 case relocInfo::runtime_call_type:
788 case relocInfo::none:
789 jmp(entry);
790 break;
791 default:
792 {
793 InstructionMark im(this);
794 relocate(rtype);
795 patchable_set48(T9, (long)entry);
796 jr(T9);
797 }
798 break;
799 }
800 }
802 void MacroAssembler::jmp_far(Label& L) {
803 if (L.is_bound()) {
804 address entry = target(L);
805 assert(entry != NULL, "jmp most probably wrong");
806 InstructionMark im(this);
808 relocate(relocInfo::internal_word_type);
809 patchable_set48(T9, (long)entry);
810 } else {
811 InstructionMark im(this);
812 L.add_patch_at(code(), locator());
814 relocate(relocInfo::internal_word_type);
815 patchable_set48(T9, (long)pc());
816 }
818 jr(T9);
819 nop();
820 }
822 void MacroAssembler::call(address entry) {
823 // c/c++ code assume T9 is entry point, so we just always move entry to t9
824 // maybe there is some more graceful method to handle this. FIXME
825 // For more info, see class NativeCall.
826 #ifndef _LP64
827 move(T9, (int)entry);
828 #else
829 patchable_set48(T9, (long)entry);
830 #endif
831 jalr(T9);
832 }
834 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
835 switch (rtype) {
836 case relocInfo::runtime_call_type:
837 case relocInfo::none:
838 call(entry);
839 break;
840 default:
841 {
842 InstructionMark im(this);
843 relocate(rtype);
844 call(entry);
845 }
846 break;
847 }
848 }
850 void MacroAssembler::call(address entry, RelocationHolder& rh)
851 {
852 switch (rh.type()) {
853 case relocInfo::runtime_call_type:
854 case relocInfo::none:
855 call(entry);
856 break;
857 default:
858 {
859 InstructionMark im(this);
860 relocate(rh);
861 call(entry);
862 }
863 break;
864 }
865 }
867 void MacroAssembler::ic_call(address entry) {
868 RelocationHolder rh = virtual_call_Relocation::spec(pc());
869 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
870 assert(entry != NULL, "call most probably wrong");
871 InstructionMark im(this);
872 relocate(rh);
873 patchable_call(entry);
874 }
876 void MacroAssembler::c2bool(Register r) {
877 Label L;
878 Assembler::beq(r, R0, L);
879 delayed()->nop();
880 move(r, 1);
881 bind(L);
882 }
884 #ifndef PRODUCT
885 extern "C" void findpc(intptr_t x);
886 #endif
888 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
889 // In order to get locks to work, we need to fake a in_VM state
890 JavaThread* thread = JavaThread::current();
891 JavaThreadState saved_state = thread->thread_state();
892 thread->set_thread_state(_thread_in_vm);
893 if (ShowMessageBoxOnError) {
894 JavaThread* thread = JavaThread::current();
895 JavaThreadState saved_state = thread->thread_state();
896 thread->set_thread_state(_thread_in_vm);
897 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
898 ttyLocker ttyl;
899 BytecodeCounter::print();
900 }
901 // To see where a verify_oop failed, get $ebx+40/X for this frame.
902 // This is the value of eip which points to where verify_oop will return.
903 if (os::message_box(msg, "Execution stopped, print registers?")) {
904 ttyLocker ttyl;
905 tty->print_cr("eip = 0x%08x", eip);
906 #ifndef PRODUCT
907 tty->cr();
908 findpc(eip);
909 tty->cr();
910 #endif
911 tty->print_cr("rax, = 0x%08x", rax);
912 tty->print_cr("rbx, = 0x%08x", rbx);
913 tty->print_cr("rcx = 0x%08x", rcx);
914 tty->print_cr("rdx = 0x%08x", rdx);
915 tty->print_cr("rdi = 0x%08x", rdi);
916 tty->print_cr("rsi = 0x%08x", rsi);
917 tty->print_cr("rbp, = 0x%08x", rbp);
918 tty->print_cr("rsp = 0x%08x", rsp);
919 BREAKPOINT;
920 }
921 } else {
922 ttyLocker ttyl;
923 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
924 assert(false, "DEBUG MESSAGE");
925 }
926 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
927 }
929 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
930 if ( ShowMessageBoxOnError ) {
931 JavaThreadState saved_state = JavaThread::current()->thread_state();
932 JavaThread::current()->set_thread_state(_thread_in_vm);
933 {
934 // In order to get locks work, we need to fake a in_VM state
935 ttyLocker ttyl;
936 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
937 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
938 BytecodeCounter::print();
939 }
941 // if (os::message_box(msg, "Execution stopped, print registers?"))
942 // regs->print(::tty);
943 }
944 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
945 }
946 else
947 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
948 }
951 void MacroAssembler::stop(const char* msg) {
952 li(A0, (long)msg);
953 #ifndef _LP64
954 //reserver space for argument. added by yjl 7/10/2005
955 addiu(SP, SP, - 1 * wordSize);
956 #endif
957 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
958 delayed()->nop();
959 #ifndef _LP64
960 //restore space for argument
961 addiu(SP, SP, 1 * wordSize);
962 #endif
963 brk(17);
964 }
966 void MacroAssembler::warn(const char* msg) {
967 #ifdef _LP64
968 pushad();
969 li(A0, (long)msg);
970 push(S2);
971 move(AT, -(StackAlignmentInBytes));
972 move(S2, SP); // use S2 as a sender SP holder
973 andr(SP, SP, AT); // align stack as required by ABI
974 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
975 delayed()->nop();
976 move(SP, S2); // use S2 as a sender SP holder
977 pop(S2);
978 popad();
979 #else
980 pushad();
981 addi(SP, SP, -4);
982 sw(A0, SP, -1 * wordSize);
983 li(A0, (long)msg);
984 addi(SP, SP, -1 * wordSize);
985 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
986 delayed()->nop();
987 addi(SP, SP, 1 * wordSize);
988 lw(A0, SP, -1 * wordSize);
989 addi(SP, SP, 4);
990 popad();
991 #endif
992 }
994 void MacroAssembler::print_reg(Register reg) {
995 /*
996 char *s = getenv("PRINT_REG");
997 if (s == NULL)
998 return;
999 if (strcmp(s, "1") != 0)
1000 return;
1001 */
1002 void * cur_pc = pc();
1003 pushad();
1004 NOT_LP64(push(FP);)
1006 li(A0, (long)reg->name());
1007 if (reg == SP)
1008 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
1009 else if (reg == A0)
1010 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
1011 else
1012 move(A1, reg);
1013 li(A2, (long)cur_pc);
1014 push(S2);
1015 move(AT, -(StackAlignmentInBytes));
1016 move(S2, SP); // use S2 as a sender SP holder
1017 andr(SP, SP, AT); // align stack as required by ABI
1018 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1019 delayed()->nop();
1020 move(SP, S2); // use S2 as a sender SP holder
1021 pop(S2);
1022 NOT_LP64(pop(FP);)
1023 popad();
1025 /*
1026 pushad();
1027 #ifdef _LP64
1028 if (reg == SP)
1029 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
1030 else
1031 move(A0, reg);
1032 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1033 delayed()->nop();
1034 #else
1035 push(FP);
1036 move(A0, reg);
1037 dsrl32(A1, reg, 0);
1038 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
1039 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1040 delayed()->nop();
1041 pop(FP);
1042 #endif
1043 popad();
1044 pushad();
1045 NOT_LP64(push(FP);)
1046 char b[50];
1047 sprintf((char *)b, " pc: %p\n",cur_pc);
1048 li(A0, (long)(char *)b);
1049 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1050 delayed()->nop();
1051 NOT_LP64(pop(FP);)
1052 popad();
1053 */
1054 }
1056 void MacroAssembler::print_reg(FloatRegister reg) {
1057 void * cur_pc = pc();
1058 pushad();
1059 NOT_LP64(push(FP);)
1060 li(A0, (long)reg->name());
1061 push(S2);
1062 move(AT, -(StackAlignmentInBytes));
1063 move(S2, SP); // use S2 as a sender SP holder
1064 andr(SP, SP, AT); // align stack as required by ABI
1065 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1066 delayed()->nop();
1067 move(SP, S2); // use S2 as a sender SP holder
1068 pop(S2);
1069 NOT_LP64(pop(FP);)
1070 popad();
1072 pushad();
1073 NOT_LP64(push(FP);)
1074 #if 1
1075 move(FP, SP);
1076 move(AT, -(StackAlignmentInBytes));
1077 andr(SP , SP , AT);
1078 mov_d(F12, reg);
1079 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1080 delayed()->nop();
1081 move(SP, FP);
1082 #else
1083 mov_s(F12, reg);
1084 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1085 //delayed()->nop();
1086 #endif
1087 NOT_LP64(pop(FP);)
1088 popad();
1090 #if 0
1091 pushad();
1092 NOT_LP64(push(FP);)
1093 char* b = new char[50];
1094 sprintf(b, " pc: %p\n", cur_pc);
1095 li(A0, (long)b);
1096 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1097 delayed()->nop();
1098 NOT_LP64(pop(FP);)
1099 popad();
1100 #endif
1101 }
1103 void MacroAssembler::increment(Register reg, int imm) {
1104 if (!imm) return;
1105 if (is_simm16(imm)) {
1106 #ifdef _LP64
1107 daddiu(reg, reg, imm);
1108 #else
1109 addiu(reg, reg, imm);
1110 #endif
1111 } else {
1112 move(AT, imm);
1113 #ifdef _LP64
1114 daddu(reg, reg, AT);
1115 #else
1116 addu(reg, reg, AT);
1117 #endif
1118 }
1119 }
1121 void MacroAssembler::decrement(Register reg, int imm) {
1122 increment(reg, -imm);
1123 }
1126 void MacroAssembler::call_VM(Register oop_result,
1127 address entry_point,
1128 bool check_exceptions) {
1129 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1130 }
1132 void MacroAssembler::call_VM(Register oop_result,
1133 address entry_point,
1134 Register arg_1,
1135 bool check_exceptions) {
1136 if (arg_1!=A1) move(A1, arg_1);
1137 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1138 }
1140 void MacroAssembler::call_VM(Register oop_result,
1141 address entry_point,
1142 Register arg_1,
1143 Register arg_2,
1144 bool check_exceptions) {
1145 if (arg_1!=A1) move(A1, arg_1);
1146 if (arg_2!=A2) move(A2, arg_2);
1147 assert(arg_2 != A1, "smashed argument");
1148 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1149 }
1151 void MacroAssembler::call_VM(Register oop_result,
1152 address entry_point,
1153 Register arg_1,
1154 Register arg_2,
1155 Register arg_3,
1156 bool check_exceptions) {
1157 if (arg_1!=A1) move(A1, arg_1);
1158 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1159 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1160 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1161 }
1163 void MacroAssembler::call_VM(Register oop_result,
1164 Register last_java_sp,
1165 address entry_point,
1166 int number_of_arguments,
1167 bool check_exceptions) {
1168 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1169 }
1171 void MacroAssembler::call_VM(Register oop_result,
1172 Register last_java_sp,
1173 address entry_point,
1174 Register arg_1,
1175 bool check_exceptions) {
1176 if (arg_1 != A1) move(A1, arg_1);
1177 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1178 }
1180 void MacroAssembler::call_VM(Register oop_result,
1181 Register last_java_sp,
1182 address entry_point,
1183 Register arg_1,
1184 Register arg_2,
1185 bool check_exceptions) {
1186 if (arg_1 != A1) move(A1, arg_1);
1187 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1188 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1189 }
1191 void MacroAssembler::call_VM(Register oop_result,
1192 Register last_java_sp,
1193 address entry_point,
1194 Register arg_1,
1195 Register arg_2,
1196 Register arg_3,
1197 bool check_exceptions) {
1198 if (arg_1 != A1) move(A1, arg_1);
1199 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1200 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1201 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1202 }
1204 void MacroAssembler::call_VM_base(Register oop_result,
1205 Register java_thread,
1206 Register last_java_sp,
1207 address entry_point,
1208 int number_of_arguments,
1209 bool check_exceptions) {
1211 address before_call_pc;
1212 // determine java_thread register
1213 if (!java_thread->is_valid()) {
1214 #ifndef OPT_THREAD
1215 java_thread = T2;
1216 get_thread(java_thread);
1217 #else
1218 java_thread = TREG;
1219 #endif
1220 }
1221 // determine last_java_sp register
1222 if (!last_java_sp->is_valid()) {
1223 last_java_sp = SP;
1224 }
1225 // debugging support
1226 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1227 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1228 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1229 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1231 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1233 // set last Java frame before call
1234 before_call_pc = (address)pc();
1235 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1237 // do the call
1238 move(A0, java_thread);
1239 call(entry_point, relocInfo::runtime_call_type);
1240 delayed()->nop();
1241 //MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
1243 // restore the thread (cannot use the pushed argument since arguments
1244 // may be overwritten by C code generated by an optimizing compiler);
1245 // however can use the register value directly if it is callee saved.
1246 #ifndef OPT_THREAD
1247 get_thread(java_thread);
1248 #else
1249 #ifdef ASSERT
1250 {
1251 Label L;
1252 get_thread(AT);
1253 beq(java_thread, AT, L);
1254 delayed()->nop();
1255 stop("MacroAssembler::call_VM_base: TREG not callee saved?");
1256 bind(L);
1257 }
1258 #endif
1259 #endif
1261 // discard thread and arguments
1262 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1263 // reset last Java frame
1264 reset_last_Java_frame(java_thread, false, true);
1266 check_and_handle_popframe(java_thread);
1267 check_and_handle_earlyret(java_thread);
1268 if (check_exceptions) {
1269 // check for pending exceptions (java_thread is set upon return)
1270 Label L;
1271 #ifdef _LP64
1272 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1273 #else
1274 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1275 #endif
1276 beq(AT, R0, L);
1277 delayed()->nop();
1278 li(AT, before_call_pc);
1279 push(AT);
1280 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1281 delayed()->nop();
1282 bind(L);
1283 }
1285 // get oop result if there is one and reset the value in the thread
1286 if (oop_result->is_valid()) {
1287 #ifdef _LP64
1288 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1289 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1290 #else
1291 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1292 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1293 #endif
1294 verify_oop(oop_result);
1295 }
1296 }
1298 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1300 move(V0, SP);
1301 //we also reserve space for java_thread here
1302 #ifndef _LP64
1303 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1304 #endif
1305 move(AT, -(StackAlignmentInBytes));
1306 andr(SP, SP, AT);
1307 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1309 }
1311 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1312 call_VM_leaf_base(entry_point, number_of_arguments);
1313 }
1315 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1316 if (arg_0 != A0) move(A0, arg_0);
1317 call_VM_leaf(entry_point, 1);
1318 }
1320 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1321 if (arg_0 != A0) move(A0, arg_0);
1322 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1323 call_VM_leaf(entry_point, 2);
1324 }
1326 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1327 if (arg_0 != A0) move(A0, arg_0);
1328 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1329 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1330 call_VM_leaf(entry_point, 3);
1331 }
1332 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1333 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1334 }
1337 void MacroAssembler::super_call_VM_leaf(address entry_point,
1338 Register arg_1) {
1339 if (arg_1 != A0) move(A0, arg_1);
1340 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1341 }
1344 void MacroAssembler::super_call_VM_leaf(address entry_point,
1345 Register arg_1,
1346 Register arg_2) {
1347 if (arg_1 != A0) move(A0, arg_1);
1348 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1349 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1350 }
1351 void MacroAssembler::super_call_VM_leaf(address entry_point,
1352 Register arg_1,
1353 Register arg_2,
1354 Register arg_3) {
1355 if (arg_1 != A0) move(A0, arg_1);
1356 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1357 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1358 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1359 }
1361 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1362 }
1364 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1365 }
1367 void MacroAssembler::null_check(Register reg, int offset) {
1368 if (needs_explicit_null_check(offset)) {
1369 // provoke OS NULL exception if reg = NULL by
1370 // accessing M[reg] w/o changing any (non-CC) registers
1371 // NOTE: cmpl is plenty here to provoke a segv
1372 lw(AT, reg, 0);
1373 // Note: should probably use testl(rax, Address(reg, 0));
1374 // may be shorter code (however, this version of
1375 // testl needs to be implemented first)
1376 } else {
1377 // nothing to do, (later) access of M[reg + offset]
1378 // will provoke OS NULL exception if reg = NULL
1379 }
1380 }
1382 void MacroAssembler::enter() {
1383 push2(RA, FP);
1384 move(FP, SP);
1385 }
1387 void MacroAssembler::leave() {
1388 #ifndef _LP64
1389 //move(SP, FP);
1390 //pop2(FP, RA);
1391 addi(SP, FP, 2 * wordSize);
1392 lw(RA, SP, - 1 * wordSize);
1393 lw(FP, SP, - 2 * wordSize);
1394 #else
1395 daddi(SP, FP, 2 * wordSize);
1396 ld(RA, SP, - 1 * wordSize);
1397 ld(FP, SP, - 2 * wordSize);
1398 #endif
1399 }
1400 /*
1401 void MacroAssembler::os_breakpoint() {
1402 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1403 // (e.g., MSVC can't call ps() otherwise)
1404 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1405 }
1406 */
1407 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1408 // determine java_thread register
1409 if (!java_thread->is_valid()) {
1410 #ifndef OPT_THREAD
1411 java_thread = T1;
1412 get_thread(java_thread);
1413 #else
1414 java_thread = TREG;
1415 #endif
1416 }
1417 // we must set sp to zero to clear frame
1418 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1419 // must clear fp, so that compiled frames are not confused; it is possible
1420 // that we need it only for debugging
1421 if(clear_fp)
1422 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1424 if (clear_pc)
1425 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1426 }
1428 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1429 bool clear_pc) {
1430 Register thread = TREG;
1431 #ifndef OPT_THREAD
1432 get_thread(thread);
1433 #endif
1434 // we must set sp to zero to clear frame
1435 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1436 // must clear fp, so that compiled frames are not confused; it is
1437 // possible that we need it only for debugging
1438 if (clear_fp) {
1439 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1440 }
1442 if (clear_pc) {
1443 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1444 }
1445 }
1447 // Write serialization page so VM thread can do a pseudo remote membar.
1448 // We use the current thread pointer to calculate a thread specific
1449 // offset to write to within the page. This minimizes bus traffic
1450 // due to cache line collision.
1451 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1452 move(tmp, thread);
1453 srl(tmp, tmp,os::get_serialize_page_shift_count());
1454 move(AT, (os::vm_page_size() - sizeof(int)));
1455 andr(tmp, tmp,AT);
1456 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1457 }
1459 // Calls to C land
1460 //
1461 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1462 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1463 // has to be reset to 0. This is required to allow proper stack traversal.
1464 void MacroAssembler::set_last_Java_frame(Register java_thread,
1465 Register last_java_sp,
1466 Register last_java_fp,
1467 address last_java_pc) {
1468 // determine java_thread register
1469 if (!java_thread->is_valid()) {
1470 #ifndef OPT_THREAD
1471 java_thread = T2;
1472 get_thread(java_thread);
1473 #else
1474 java_thread = TREG;
1475 #endif
1476 }
1477 // determine last_java_sp register
1478 if (!last_java_sp->is_valid()) {
1479 last_java_sp = SP;
1480 }
1482 // last_java_fp is optional
1484 if (last_java_fp->is_valid()) {
1485 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1486 }
1488 // last_java_pc is optional
1490 if (last_java_pc != NULL) {
1491 relocate(relocInfo::internal_pc_type);
1492 patchable_set48(AT, (long)last_java_pc);
1493 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1494 }
1495 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1496 }
1498 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1499 Register last_java_fp,
1500 address last_java_pc) {
1501 // determine last_java_sp register
1502 if (!last_java_sp->is_valid()) {
1503 last_java_sp = SP;
1504 }
1506 Register thread = TREG;
1507 #ifndef OPT_THREAD
1508 get_thread(thread);
1509 #endif
1510 // last_java_fp is optional
1511 if (last_java_fp->is_valid()) {
1512 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1513 }
1515 // last_java_pc is optional
1516 if (last_java_pc != NULL) {
1517 Address java_pc(thread,
1518 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1519 li(AT, (intptr_t)(last_java_pc));
1520 sd(AT, java_pc);
1521 }
1523 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1524 }
1526 //////////////////////////////////////////////////////////////////////////////////
1527 #if INCLUDE_ALL_GCS
1529 void MacroAssembler::g1_write_barrier_pre(Register obj,
1530 Register pre_val,
1531 Register thread,
1532 Register tmp,
1533 bool tosca_live,
1534 bool expand_call) {
1536 // If expand_call is true then we expand the call_VM_leaf macro
1537 // directly to skip generating the check by
1538 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1540 #ifdef _LP64
1541 assert(thread == TREG, "must be");
1542 #endif // _LP64
1544 Label done;
1545 Label runtime;
1547 assert(pre_val != noreg, "check this code");
1549 if (obj != noreg) {
1550 assert_different_registers(obj, pre_val, tmp);
1551 assert(pre_val != V0, "check this code");
1552 }
1554 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1555 PtrQueue::byte_offset_of_active()));
1556 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1557 PtrQueue::byte_offset_of_index()));
1558 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1559 PtrQueue::byte_offset_of_buf()));
1562 // Is marking active?
1563 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1564 lw(AT, in_progress);
1565 } else {
1566 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1567 lb(AT, in_progress);
1568 }
1569 beq(AT, R0, done);
1570 nop();
1572 // Do we need to load the previous value?
1573 if (obj != noreg) {
1574 load_heap_oop(pre_val, Address(obj, 0));
1575 }
1577 // Is the previous value null?
1578 beq(pre_val, R0, done);
1579 nop();
1581 // Can we store original value in the thread's buffer?
1582 // Is index == 0?
1583 // (The index field is typed as size_t.)
1585 ld(tmp, index);
1586 beq(tmp, R0, runtime);
1587 nop();
1589 daddiu(tmp, tmp, -1 * wordSize);
1590 sd(tmp, index);
1591 ld(AT, buffer);
1592 daddu(tmp, tmp, AT);
1594 // Record the previous value
1595 sd(pre_val, tmp, 0);
1596 beq(R0, R0, done);
1597 nop();
1599 bind(runtime);
1600 // save the live input values
1601 if (tosca_live) push(V0);
1603 if (obj != noreg && obj != V0) push(obj);
1605 if (pre_val != V0) push(pre_val);
1607 // Calling the runtime using the regular call_VM_leaf mechanism generates
1608 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1609 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
1610 //
1611 // If we care generating the pre-barrier without a frame (e.g. in the
1612 // intrinsified Reference.get() routine) then ebp might be pointing to
1613 // the caller frame and so this check will most likely fail at runtime.
1614 //
1615 // Expanding the call directly bypasses the generation of the check.
1616 // So when we do not have have a full interpreter frame on the stack
1617 // expand_call should be passed true.
1619 NOT_LP64( push(thread); )
1621 if (expand_call) {
1622 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1623 if (thread != A1) move(A1, thread);
1624 if (pre_val != A0) move(A0, pre_val);
1625 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1626 } else {
1627 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1628 }
1630 NOT_LP64( pop(thread); )
1632 // save the live input values
1633 if (pre_val != V0)
1634 pop(pre_val);
1636 if (obj != noreg && obj != V0)
1637 pop(obj);
1639 if(tosca_live) pop(V0);
1641 bind(done);
1642 }
1644 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1645 Register new_val,
1646 Register thread,
1647 Register tmp,
1648 Register tmp2) {
1649 assert(tmp != AT, "must be");
1650 assert(tmp2 != AT, "must be");
1651 #ifdef _LP64
1652 assert(thread == TREG, "must be");
1653 #endif // _LP64
1655 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1656 PtrQueue::byte_offset_of_index()));
1657 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1658 PtrQueue::byte_offset_of_buf()));
1660 BarrierSet* bs = Universe::heap()->barrier_set();
1661 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1662 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1664 Label done;
1665 Label runtime;
1667 // Does store cross heap regions?
1668 xorr(AT, store_addr, new_val);
1669 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1670 beq(AT, R0, done);
1671 nop();
1674 // crosses regions, storing NULL?
1675 beq(new_val, R0, done);
1676 nop();
1678 // storing region crossing non-NULL, is card already dirty?
1679 const Register card_addr = tmp;
1680 const Register cardtable = tmp2;
1682 move(card_addr, store_addr);
1683 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1684 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1685 // a valid address and therefore is not properly handled by the relocation code.
1686 set64(cardtable, (intptr_t)ct->byte_map_base);
1687 daddu(card_addr, card_addr, cardtable);
1689 lb(AT, card_addr, 0);
1690 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1691 beq(AT, R0, done);
1692 nop();
1694 sync();
1695 lb(AT, card_addr, 0);
1696 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1697 beq(AT, R0, done);
1698 nop();
1701 // storing a region crossing, non-NULL oop, card is clean.
1702 // dirty card and log.
1703 move(AT, (int)CardTableModRefBS::dirty_card_val());
1704 sb(AT, card_addr, 0);
1706 lw(AT, queue_index);
1707 beq(AT, R0, runtime);
1708 nop();
1709 daddiu(AT, AT, -1 * wordSize);
1710 sw(AT, queue_index);
1711 ld(tmp2, buffer);
1712 #ifdef _LP64
1713 ld(AT, queue_index);
1714 daddu(tmp2, tmp2, AT);
1715 sd(card_addr, tmp2, 0);
1716 #else
1717 lw(AT, queue_index);
1718 addu32(tmp2, tmp2, AT);
1719 sw(card_addr, tmp2, 0);
1720 #endif
1721 beq(R0, R0, done);
1722 nop();
1724 bind(runtime);
1725 // save the live input values
1726 push(store_addr);
1727 push(new_val);
1728 #ifdef _LP64
1729 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1730 #else
1731 push(thread);
1732 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1733 pop(thread);
1734 #endif
1735 pop(new_val);
1736 pop(store_addr);
1738 bind(done);
1739 }
1741 #endif // INCLUDE_ALL_GCS
1742 //////////////////////////////////////////////////////////////////////////////////
1745 void MacroAssembler::store_check(Register obj) {
1746 // Does a store check for the oop in register obj. The content of
1747 // register obj is destroyed afterwards.
1748 store_check_part_1(obj);
1749 store_check_part_2(obj);
1750 }
1752 void MacroAssembler::store_check(Register obj, Address dst) {
1753 store_check(obj);
1754 }
1757 // split the store check operation so that other instructions can be scheduled inbetween
1758 void MacroAssembler::store_check_part_1(Register obj) {
1759 BarrierSet* bs = Universe::heap()->barrier_set();
1760 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1761 #ifdef _LP64
1762 dsrl(obj, obj, CardTableModRefBS::card_shift);
1763 #else
1764 shr(obj, CardTableModRefBS::card_shift);
1765 #endif
1766 }
1768 void MacroAssembler::store_check_part_2(Register obj) {
1769 BarrierSet* bs = Universe::heap()->barrier_set();
1770 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1771 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1772 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1774 set64(AT, (long)ct->byte_map_base);
1775 #ifdef _LP64
1776 dadd(AT, AT, obj);
1777 #else
1778 add(AT, AT, obj);
1779 #endif
1780 if (UseConcMarkSweepGC) sync();
1781 sb(R0, AT, 0);
1782 }
1784 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1785 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1786 Register t1, Register t2, Label& slow_case) {
1787 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1789 Register end = t2;
1790 #ifndef OPT_THREAD
1791 Register thread = t1;
1792 get_thread(thread);
1793 #else
1794 Register thread = TREG;
1795 #endif
1796 verify_tlab(t1, t2);//blows t1&t2
1798 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1800 if (var_size_in_bytes == NOREG) {
1801 // i dont think we need move con_size_in_bytes to a register first.
1802 // by yjl 8/17/2005
1803 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1804 addi(end, obj, con_size_in_bytes);
1805 } else {
1806 add(end, obj, var_size_in_bytes);
1807 }
1809 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1810 sltu(AT, AT, end);
1811 bne_far(AT, R0, slow_case);
1812 delayed()->nop();
1815 // update the tlab top pointer
1816 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1818 // recover var_size_in_bytes if necessary
1819 /*if (var_size_in_bytes == end) {
1820 sub(var_size_in_bytes, end, obj);
1821 }*/
1823 verify_tlab(t1, t2);
1824 }
1826 // Defines obj, preserves var_size_in_bytes
1827 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1828 Register t1, Register t2, Label& slow_case) {
1829 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1830 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1831 // No allocation in the shared eden.
1832 b_far(slow_case);
1833 delayed()->nop();
1834 } else {
1836 #ifndef _LP64
1837 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1838 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1839 #else
1840 Address heap_top(t1);
1841 li(t1, (long)Universe::heap()->top_addr());
1842 #endif
1843 ld_ptr(obj, heap_top);
1845 Register end = t2;
1846 Label retry;
1848 bind(retry);
1849 if (var_size_in_bytes == NOREG) {
1850 // i dont think we need move con_size_in_bytes to a register first.
1851 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1852 addi(end, obj, con_size_in_bytes);
1853 } else {
1854 add(end, obj, var_size_in_bytes);
1855 }
1856 // if end < obj then we wrapped around => object too long => slow case
1857 sltu(AT, end, obj);
1858 bne_far(AT, R0, slow_case);
1859 delayed()->nop();
1861 li(AT, (long)Universe::heap()->end_addr());
1862 sltu(AT, AT, end);
1863 bne_far(AT, R0, slow_case);
1864 delayed()->nop();
1865 // Compare obj with the top addr, and if still equal, store the new top addr in
1866 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1867 // it otherwise. Use lock prefix for atomicity on MPs.
1868 //if (os::is_MP()) {
1869 // sync();
1870 //}
1872 // if someone beat us on the allocation, try again, otherwise continue
1873 cmpxchg(end, heap_top, obj);
1874 beq_far(AT, R0, retry); //by yyq
1875 delayed()->nop();
1877 }
1878 }
1880 // C2 doesn't invoke this one.
1881 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1882 Register top = T0;
1883 Register t1 = T1;
1884 /* Jin: tlab_refill() is called in
1886 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
1888 In generate_code_for(), T2 has been assigned as a register(length), which is used
1889 after calling tlab_refill();
1890 Therefore, tlab_refill() should not use T2.
1892 Source:
1894 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
1895 at java.lang.System.arraycopy(Native Method)
1896 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
1897 at sun.misc.Resource.getBytes(Resource.java:117)
1898 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
1899 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
1900 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
1901 */
1902 Register t2 = T9;
1903 Register t3 = T3;
1904 Register thread_reg = T8;
1905 Label do_refill, discard_tlab;
1906 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1907 // No allocation in the shared eden.
1908 b(slow_case);
1909 delayed()->nop();
1910 }
1912 get_thread(thread_reg);
1914 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1915 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1917 // calculate amount of free space
1918 sub(t1, t1, top);
1919 shr(t1, LogHeapWordSize);
1921 // Retain tlab and allocate object in shared space if
1922 // the amount free in the tlab is too large to discard.
1923 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1924 slt(AT, t2, t1);
1925 beq(AT, R0, discard_tlab);
1926 delayed()->nop();
1928 // Retain
1930 #ifndef _LP64
1931 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1932 #else
1933 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1934 #endif
1935 add(t2, t2, AT);
1936 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1938 if (TLABStats) {
1939 // increment number of slow_allocations
1940 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1941 addiu(AT, AT, 1);
1942 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1943 }
1944 b(try_eden);
1945 delayed()->nop();
1947 bind(discard_tlab);
1948 if (TLABStats) {
1949 // increment number of refills
1950 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1951 addi(AT, AT, 1);
1952 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1953 // accumulate wastage -- t1 is amount free in tlab
1954 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1955 add(AT, AT, t1);
1956 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1957 }
1959 // if tlab is currently allocated (top or end != null) then
1960 // fill [top, end + alignment_reserve) with array object
1961 beq(top, R0, do_refill);
1962 delayed()->nop();
1964 // set up the mark word
1965 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1966 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1968 // set the length to the remaining space
1969 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1970 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1971 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1972 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1974 // set klass to intArrayKlass
1975 #ifndef _LP64
1976 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
1977 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
1978 #else
1979 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
1980 ld_ptr(t1, AT, 0);
1981 #endif
1982 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
1983 store_klass(top, t1);
1985 // refill the tlab with an eden allocation
1986 bind(do_refill);
1987 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1988 shl(t1, LogHeapWordSize);
1989 // add object_size ??
1990 eden_allocate(top, t1, 0, t2, t3, slow_case);
1992 // Check that t1 was preserved in eden_allocate.
1993 #ifdef ASSERT
1994 if (UseTLAB) {
1995 Label ok;
1996 assert_different_registers(thread_reg, t1);
1997 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1998 shl(AT, LogHeapWordSize);
1999 beq(AT, t1, ok);
2000 delayed()->nop();
2001 stop("assert(t1 != tlab size)");
2002 should_not_reach_here();
2004 bind(ok);
2005 }
2006 #endif
2007 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
2008 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2009 add(top, top, t1);
2010 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
2011 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2012 verify_tlab(t1, t2);
2013 b(retry);
2014 delayed()->nop();
2015 }
2017 static const double pi_4 = 0.7853981633974483;
2019 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
2020 // must get argument(a double) in F12/F13
2021 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
2022 //We need to preseve the register which maybe modified during the Call @Jerome
2023 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
2024 //save all modified register here
2025 // if (preserve_cpu_regs) {
2026 // }
2027 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
2028 pushad();
2029 //we should preserve the stack space before we call
2030 addi(SP, SP, -wordSize * 2);
2031 switch (trig){
2032 case 's' :
2033 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
2034 delayed()->nop();
2035 break;
2036 case 'c':
2037 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
2038 delayed()->nop();
2039 break;
2040 case 't':
2041 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
2042 delayed()->nop();
2043 break;
2044 default:assert (false, "bad intrinsic");
2045 break;
2047 }
2049 addi(SP, SP, wordSize * 2);
2050 popad();
2051 // if (preserve_cpu_regs) {
2052 // }
2053 }
2055 #ifdef _LP64
2056 void MacroAssembler::li(Register rd, long imm) {
2057 if (imm <= max_jint && imm >= min_jint) {
2058 li32(rd, (int)imm);
2059 } else if (julong(imm) <= 0xFFFFFFFF) {
2060 assert_not_delayed();
2061 // lui sign-extends, so we can't use that.
2062 ori(rd, R0, julong(imm) >> 16);
2063 dsll(rd, rd, 16);
2064 ori(rd, rd, split_low(imm));
2065 //aoqi_test
2066 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
2067 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2068 /* A 48-bit address */
2069 li48(rd, imm);
2070 } else {
2071 li64(rd, imm);
2072 }
2073 }
2074 #else
2075 void MacroAssembler::li(Register rd, long imm) {
2076 li32(rd, (int)imm);
2077 }
2078 #endif
2080 void MacroAssembler::li32(Register reg, int imm) {
2081 if (is_simm16(imm)) {
2082 /* Jin: for imm < 0, we should use addi instead of addiu.
2083 *
2084 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2085 *
2086 * 78 move [int:-1|I] [a0|I]
2087 * : daddi a0, zero, 0xffffffff (correct)
2088 * : daddiu a0, zero, 0xffffffff (incorrect)
2089 */
2090 if (imm >= 0)
2091 addiu(reg, R0, imm);
2092 else
2093 addi(reg, R0, imm);
2094 } else {
2095 lui(reg, split_low(imm >> 16));
2096 if (split_low(imm))
2097 ori(reg, reg, split_low(imm));
2098 }
2099 }
2101 #ifdef _LP64
2102 void MacroAssembler::set64(Register d, jlong value) {
2103 assert_not_delayed();
2105 int hi = (int)(value >> 32);
2106 int lo = (int)(value & ~0);
2108 if (value == lo) { // 32-bit integer
2109 if (is_simm16(value)) {
2110 daddiu(d, R0, value);
2111 } else {
2112 lui(d, split_low(value >> 16));
2113 if (split_low(value)) {
2114 ori(d, d, split_low(value));
2115 }
2116 }
2117 } else if (hi == 0) { // hardware zero-extends to upper 32
2118 ori(d, R0, julong(value) >> 16);
2119 dsll(d, d, 16);
2120 if (split_low(value)) {
2121 ori(d, d, split_low(value));
2122 }
2123 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2124 // 4 insts
2125 li48(d, value);
2126 } else { // li64
2127 // 6 insts
2128 li64(d, value);
2129 }
2130 }
2133 int MacroAssembler::insts_for_set64(jlong value) {
2134 int hi = (int)(value >> 32);
2135 int lo = (int)(value & ~0);
2137 int count = 0;
2139 if (value == lo) { // 32-bit integer
2140 if (is_simm16(value)) {
2141 //daddiu(d, R0, value);
2142 count++;
2143 } else {
2144 //lui(d, split_low(value >> 16));
2145 count++;
2146 if (split_low(value)) {
2147 //ori(d, d, split_low(value));
2148 count++;
2149 }
2150 }
2151 } else if (hi == 0) { // hardware zero-extends to upper 32
2152 //ori(d, R0, julong(value) >> 16);
2153 //dsll(d, d, 16);
2154 count += 2;
2155 if (split_low(value)) {
2156 //ori(d, d, split_low(value));
2157 count++;
2158 }
2159 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2160 // 4 insts
2161 //li48(d, value);
2162 count += 4;
2163 } else { // li64
2164 // 6 insts
2165 //li64(d, value);
2166 count += 6;
2167 }
2169 return count;
2170 }
2172 void MacroAssembler::patchable_set48(Register d, jlong value) {
2173 assert_not_delayed();
2175 int hi = (int)(value >> 32);
2176 int lo = (int)(value & ~0);
2178 int count = 0;
2180 if (value == lo) { // 32-bit integer
2181 if (is_simm16(value)) {
2182 daddiu(d, R0, value);
2183 count += 1;
2184 } else {
2185 lui(d, split_low(value >> 16));
2186 count += 1;
2187 if (split_low(value)) {
2188 ori(d, d, split_low(value));
2189 count += 1;
2190 }
2191 }
2192 } else if (hi == 0) { // hardware zero-extends to upper 32
2193 ori(d, R0, julong(value) >> 16);
2194 dsll(d, d, 16);
2195 count += 2;
2196 if (split_low(value)) {
2197 ori(d, d, split_low(value));
2198 count += 1;
2199 }
2200 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2201 // 4 insts
2202 li48(d, value);
2203 count += 4;
2204 } else { // li64
2205 tty->print_cr("value = 0x%x", value);
2206 guarantee(false, "Not supported yet !");
2207 }
2209 for (count; count < 4; count++) {
2210 nop();
2211 }
2212 }
2214 void MacroAssembler::patchable_set32(Register d, jlong value) {
2215 assert_not_delayed();
2217 int hi = (int)(value >> 32);
2218 int lo = (int)(value & ~0);
2220 int count = 0;
2222 if (value == lo) { // 32-bit integer
2223 if (is_simm16(value)) {
2224 daddiu(d, R0, value);
2225 count += 1;
2226 } else {
2227 lui(d, split_low(value >> 16));
2228 count += 1;
2229 if (split_low(value)) {
2230 ori(d, d, split_low(value));
2231 count += 1;
2232 }
2233 }
2234 } else if (hi == 0) { // hardware zero-extends to upper 32
2235 ori(d, R0, julong(value) >> 16);
2236 dsll(d, d, 16);
2237 count += 2;
2238 if (split_low(value)) {
2239 ori(d, d, split_low(value));
2240 count += 1;
2241 }
2242 } else {
2243 tty->print_cr("value = 0x%x", value);
2244 guarantee(false, "Not supported yet !");
2245 }
2247 for (count; count < 3; count++) {
2248 nop();
2249 }
2250 }
2252 void MacroAssembler::patchable_call32(Register d, jlong value) {
2253 assert_not_delayed();
2255 int hi = (int)(value >> 32);
2256 int lo = (int)(value & ~0);
2258 int count = 0;
2260 if (value == lo) { // 32-bit integer
2261 if (is_simm16(value)) {
2262 daddiu(d, R0, value);
2263 count += 1;
2264 } else {
2265 lui(d, split_low(value >> 16));
2266 count += 1;
2267 if (split_low(value)) {
2268 ori(d, d, split_low(value));
2269 count += 1;
2270 }
2271 }
2272 } else {
2273 tty->print_cr("value = 0x%x", value);
2274 guarantee(false, "Not supported yet !");
2275 }
2277 for (count; count < 2; count++) {
2278 nop();
2279 }
2280 }
2282 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2283 assert(UseCompressedClassPointers, "should only be used for compressed header");
2284 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2286 int klass_index = oop_recorder()->find_index(k);
2287 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2288 long narrowKlass = (long)Klass::encode_klass(k);
2290 relocate(rspec, Assembler::narrow_oop_operand);
2291 patchable_set48(dst, narrowKlass);
2292 }
2295 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2296 assert(UseCompressedOops, "should only be used for compressed header");
2297 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2299 int oop_index = oop_recorder()->find_index(obj);
2300 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2302 relocate(rspec, Assembler::narrow_oop_operand);
2303 patchable_set48(dst, oop_index);
2304 }
2306 void MacroAssembler::li64(Register rd, long imm) {
2307 assert_not_delayed();
2308 lui(rd, imm >> 48);
2309 ori(rd, rd, split_low(imm >> 32));
2310 dsll(rd, rd, 16);
2311 ori(rd, rd, split_low(imm >> 16));
2312 dsll(rd, rd, 16);
2313 ori(rd, rd, split_low(imm));
2314 }
2316 void MacroAssembler::li48(Register rd, long imm) {
2317 assert_not_delayed();
2318 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2319 lui(rd, imm >> 32);
2320 ori(rd, rd, split_low(imm >> 16));
2321 dsll(rd, rd, 16);
2322 ori(rd, rd, split_low(imm));
2323 }
2324 #endif
2325 // NOTE: i dont push eax as i486.
2326 // the x86 save eax for it use eax as the jump register
2327 void MacroAssembler::verify_oop(Register reg, const char* s) {
2328 /*
2329 if (!VerifyOops) return;
2331 // Pass register number to verify_oop_subroutine
2332 char* b = new char[strlen(s) + 50];
2333 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2334 push(rax); // save rax,
2335 push(reg); // pass register argument
2336 ExternalAddress buffer((address) b);
2337 // avoid using pushptr, as it modifies scratch registers
2338 // and our contract is not to modify anything
2339 movptr(rax, buffer.addr());
2340 push(rax);
2341 // call indirectly to solve generation ordering problem
2342 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2343 call(rax);
2344 */
2345 if (!VerifyOops) return;
2346 const char * b = NULL;
2347 stringStream ss;
2348 ss.print("verify_oop: %s: %s", reg->name(), s);
2349 b = code_string(ss.as_string());
2350 #ifdef _LP64
2351 pushad();
2352 move(A1, reg);
2353 li(A0, (long)b);
2354 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2355 ld(T9, AT, 0);
2356 jalr(T9);
2357 delayed()->nop();
2358 popad();
2359 #else
2360 // Pass register number to verify_oop_subroutine
2361 sw(T0, SP, - wordSize);
2362 sw(T1, SP, - 2*wordSize);
2363 sw(RA, SP, - 3*wordSize);
2364 sw(A0, SP ,- 4*wordSize);
2365 sw(A1, SP ,- 5*wordSize);
2366 sw(AT, SP ,- 6*wordSize);
2367 sw(T9, SP ,- 7*wordSize);
2368 addiu(SP, SP, - 7 * wordSize);
2369 move(A1, reg);
2370 li(A0, (long)b);
2371 // call indirectly to solve generation ordering problem
2372 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2373 lw(T9, AT, 0);
2374 jalr(T9);
2375 delayed()->nop();
2376 lw(T0, SP, 6* wordSize);
2377 lw(T1, SP, 5* wordSize);
2378 lw(RA, SP, 4* wordSize);
2379 lw(A0, SP, 3* wordSize);
2380 lw(A1, SP, 2* wordSize);
2381 lw(AT, SP, 1* wordSize);
2382 lw(T9, SP, 0* wordSize);
2383 addiu(SP, SP, 7 * wordSize);
2384 #endif
2385 }
2388 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2389 if (!VerifyOops) {
2390 nop();
2391 return;
2392 }
2393 // Pass register number to verify_oop_subroutine
2394 const char * b = NULL;
2395 stringStream ss;
2396 ss.print("verify_oop_addr: %s", s);
2397 b = code_string(ss.as_string());
2399 st_ptr(T0, SP, - wordSize);
2400 st_ptr(T1, SP, - 2*wordSize);
2401 st_ptr(RA, SP, - 3*wordSize);
2402 st_ptr(A0, SP, - 4*wordSize);
2403 st_ptr(A1, SP, - 5*wordSize);
2404 st_ptr(AT, SP, - 6*wordSize);
2405 st_ptr(T9, SP, - 7*wordSize);
2406 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2407 addiu(SP, SP, - 7 * wordSize);
2409 li(A0, (long)b);
2410 // call indirectly to solve generation ordering problem
2411 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2412 ld_ptr(T9, AT, 0);
2413 jalr(T9);
2414 delayed()->nop();
2415 ld_ptr(T0, SP, 6* wordSize);
2416 ld_ptr(T1, SP, 5* wordSize);
2417 ld_ptr(RA, SP, 4* wordSize);
2418 ld_ptr(A0, SP, 3* wordSize);
2419 ld_ptr(A1, SP, 2* wordSize);
2420 ld_ptr(AT, SP, 1* wordSize);
2421 ld_ptr(T9, SP, 0* wordSize);
2422 addiu(SP, SP, 7 * wordSize);
2423 }
2425 // used registers : T0, T1
2426 void MacroAssembler::verify_oop_subroutine() {
2427 // RA: ra
2428 // A0: char* error message
2429 // A1: oop object to verify
2431 Label exit, error;
2432 // increment counter
2433 li(T0, (long)StubRoutines::verify_oop_count_addr());
2434 lw(AT, T0, 0);
2435 #ifdef _LP64
2436 daddi(AT, AT, 1);
2437 #else
2438 addi(AT, AT, 1);
2439 #endif
2440 sw(AT, T0, 0);
2442 // make sure object is 'reasonable'
2443 beq(A1, R0, exit); // if obj is NULL it is ok
2444 delayed()->nop();
2446 // Check if the oop is in the right area of memory
2447 //const int oop_mask = Universe::verify_oop_mask();
2448 //const int oop_bits = Universe::verify_oop_bits();
2449 const uintptr_t oop_mask = Universe::verify_oop_mask();
2450 const uintptr_t oop_bits = Universe::verify_oop_bits();
2451 li(AT, oop_mask);
2452 andr(T0, A1, AT);
2453 li(AT, oop_bits);
2454 bne(T0, AT, error);
2455 delayed()->nop();
2457 // make sure klass is 'reasonable'
2458 //add for compressedoops
2459 reinit_heapbase();
2460 //add for compressedoops
2461 load_klass(T0, A1);
2462 beq(T0, R0, error); // if klass is NULL it is broken
2463 delayed()->nop();
2464 #if 0
2465 //FIXME:wuhui.
2466 // Check if the klass is in the right area of memory
2467 //const int klass_mask = Universe::verify_klass_mask();
2468 //const int klass_bits = Universe::verify_klass_bits();
2469 const uintptr_t klass_mask = Universe::verify_klass_mask();
2470 const uintptr_t klass_bits = Universe::verify_klass_bits();
2472 li(AT, klass_mask);
2473 andr(T1, T0, AT);
2474 li(AT, klass_bits);
2475 bne(T1, AT, error);
2476 delayed()->nop();
2477 // make sure klass' klass is 'reasonable'
2478 //add for compressedoops
2479 load_klass(T0, T0);
2480 beq(T0, R0, error); // if klass' klass is NULL it is broken
2481 delayed()->nop();
2483 li(AT, klass_mask);
2484 andr(T1, T0, AT);
2485 li(AT, klass_bits);
2486 bne(T1, AT, error);
2487 delayed()->nop(); // if klass not in right area of memory it is broken too.
2488 #endif
2489 // return if everything seems ok
2490 bind(exit);
2492 jr(RA);
2493 delayed()->nop();
2495 // handle errors
2496 bind(error);
2497 pushad();
2498 #ifndef _LP64
2499 addi(SP, SP, (-1) * wordSize);
2500 #endif
2501 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2502 delayed()->nop();
2503 #ifndef _LP64
2504 addiu(SP, SP, 1 * wordSize);
2505 #endif
2506 popad();
2507 jr(RA);
2508 delayed()->nop();
2509 }
2511 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2512 #ifdef ASSERT
2513 assert_different_registers(t1, t2, AT);
2514 if (UseTLAB && VerifyOops) {
2515 Label next, ok;
2517 get_thread(t1);
2519 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2520 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2521 sltu(AT, t2, AT);
2522 beq(AT, R0, next);
2523 delayed()->nop();
2525 stop("assert(top >= start)");
2527 bind(next);
2528 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2529 sltu(AT, AT, t2);
2530 beq(AT, R0, ok);
2531 delayed()->nop();
2533 stop("assert(top <= end)");
2535 bind(ok);
2537 }
2538 #endif
2539 }
2540 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2541 Register tmp,
2542 int offset) {
2543 intptr_t value = *delayed_value_addr;
2544 if (value != 0)
2545 return RegisterOrConstant(value + offset);
2546 AddressLiteral a(delayed_value_addr);
2547 // load indirectly to solve generation ordering problem
2548 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2549 //ld(tmp, a);
2550 if (offset != 0)
2551 daddi(tmp,tmp, offset);
2553 return RegisterOrConstant(tmp);
2554 }
2556 void MacroAssembler::hswap(Register reg) {
2557 //short
2558 //andi(reg, reg, 0xffff);
2559 srl(AT, reg, 8);
2560 sll(reg, reg, 24);
2561 sra(reg, reg, 16);
2562 orr(reg, reg, AT);
2563 }
2565 void MacroAssembler::huswap(Register reg) {
2566 #ifdef _LP64
2567 dsrl(AT, reg, 8);
2568 dsll(reg, reg, 24);
2569 dsrl(reg, reg, 16);
2570 orr(reg, reg, AT);
2571 andi(reg, reg, 0xffff);
2572 #else
2573 //andi(reg, reg, 0xffff);
2574 srl(AT, reg, 8);
2575 sll(reg, reg, 24);
2576 srl(reg, reg, 16);
2577 orr(reg, reg, AT);
2578 #endif
2579 }
2581 // something funny to do this will only one more register AT
2582 // 32 bits
2583 void MacroAssembler::swap(Register reg) {
2584 srl(AT, reg, 8);
2585 sll(reg, reg, 24);
2586 orr(reg, reg, AT);
2587 //reg : 4 1 2 3
2588 srl(AT, AT, 16);
2589 xorr(AT, AT, reg);
2590 andi(AT, AT, 0xff);
2591 //AT : 0 0 0 1^3);
2592 xorr(reg, reg, AT);
2593 //reg : 4 1 2 1
2594 sll(AT, AT, 16);
2595 xorr(reg, reg, AT);
2596 //reg : 4 3 2 1
2597 }
2599 #ifdef _LP64
2601 /* do 32-bit CAS using MIPS64 lld/scd
2603 Jin: cas_int should only compare 32-bits of the memory value.
2604 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2605 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2606 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2607 plus the high-32 bits or memory value, are stored togethor with SCD.
2609 Example:
2611 double d = 3.1415926;
2612 System.err.println("hello" + d);
2614 sun.misc.FloatingDecimal$1.<init>()
2615 |
2616 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2618 38 cas_int [a7a7|J] [a0|I] [a6|I]
2619 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2620 // a6: 0x4ab325aa
2622 again:
2623 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2625 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2626 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2627 0x00000055647f3c68: dsll32 t8, t8, 0
2628 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2629 0x00000055647f3c70: sll zero, zero, 0
2631 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2632 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2633 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2634 0x00000055647f3c80: and v1, a6, v1
2635 0x00000055647f3c84: or at, t8, v1
2636 0x00000055647f3c88: scd at, 0x0(a7)
2637 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2638 0x00000055647f3c90: sll zero, zero, 0
2639 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2640 0x00000055647f3c98: sll zero, zero, 0
2641 nequal:
2642 0x00000055647f45a4: dadd a0, t9, zero
2643 0x00000055647f45a8: dadd at, zero, zero
2644 done:
2645 */
2647 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2648 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
2649 Label done, again, nequal;
2651 bind(again);
2653 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2654 ll(AT, dest);
2655 bne(AT, c_reg, nequal);
2656 delayed()->nop();
2658 move(AT, x_reg);
2659 sc(AT, dest);
2660 beq(AT, R0, again);
2661 delayed()->nop();
2662 b(done);
2663 delayed()->nop();
2665 // not xchged
2666 bind(nequal);
2667 sync();
2668 move(c_reg, AT);
2669 move(AT, R0);
2671 bind(done);
2672 }
2673 #endif // cmpxchg32
2675 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2676 Label done, again, nequal;
2678 bind(again);
2679 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2680 #ifdef _LP64
2681 lld(AT, dest);
2682 #else
2683 ll(AT, dest);
2684 #endif
2685 bne(AT, c_reg, nequal);
2686 delayed()->nop();
2688 move(AT, x_reg);
2689 #ifdef _LP64
2690 scd(AT, dest);
2691 #else
2692 sc(AT, dest);
2693 #endif
2694 beq(AT, R0, again);
2695 delayed()->nop();
2696 b(done);
2697 delayed()->nop();
2699 // not xchged
2700 bind(nequal);
2701 sync();
2702 move(c_reg, AT);
2703 move(AT, R0);
2705 bind(done);
2706 }
2708 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2709 Label done, again, nequal;
2711 Register x_reg = x_regLo;
2712 dsll32(x_regHi, x_regHi, 0);
2713 dsll32(x_regLo, x_regLo, 0);
2714 dsrl32(x_regLo, x_regLo, 0);
2715 orr(x_reg, x_regLo, x_regHi);
2717 Register c_reg = c_regLo;
2718 dsll32(c_regHi, c_regHi, 0);
2719 dsll32(c_regLo, c_regLo, 0);
2720 dsrl32(c_regLo, c_regLo, 0);
2721 orr(c_reg, c_regLo, c_regHi);
2723 bind(again);
2725 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2726 lld(AT, dest);
2727 bne(AT, c_reg, nequal);
2728 delayed()->nop();
2730 //move(AT, x_reg);
2731 dadd(AT, x_reg, R0);
2732 scd(AT, dest);
2733 beq(AT, R0, again);
2734 delayed()->nop();
2735 b(done);
2736 delayed()->nop();
2738 // not xchged
2739 bind(nequal);
2740 sync();
2741 //move(c_reg, AT);
2742 //move(AT, R0);
2743 dadd(c_reg, AT, R0);
2744 dadd(AT, R0, R0);
2745 bind(done);
2746 }
2748 // be sure the three register is different
2749 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2750 assert_different_registers(tmp, fs, ft);
2751 div_s(tmp, fs, ft);
2752 trunc_l_s(tmp, tmp);
2753 cvt_s_l(tmp, tmp);
2754 mul_s(tmp, tmp, ft);
2755 sub_s(fd, fs, tmp);
2756 }
2758 // be sure the three register is different
2759 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2760 assert_different_registers(tmp, fs, ft);
2761 div_d(tmp, fs, ft);
2762 trunc_l_d(tmp, tmp);
2763 cvt_d_l(tmp, tmp);
2764 mul_d(tmp, tmp, ft);
2765 sub_d(fd, fs, tmp);
2766 }
2768 // Fast_Lock and Fast_Unlock used by C2
2770 // Because the transitions from emitted code to the runtime
2771 // monitorenter/exit helper stubs are so slow it's critical that
2772 // we inline both the stack-locking fast-path and the inflated fast path.
2773 //
2774 // See also: cmpFastLock and cmpFastUnlock.
2775 //
2776 // What follows is a specialized inline transliteration of the code
2777 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2778 // another option would be to emit TrySlowEnter and TrySlowExit methods
2779 // at startup-time. These methods would accept arguments as
2780 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2781 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2782 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2783 // In practice, however, the # of lock sites is bounded and is usually small.
2784 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2785 // if the processor uses simple bimodal branch predictors keyed by EIP
2786 // Since the helper routines would be called from multiple synchronization
2787 // sites.
2788 //
2789 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2790 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2791 // to those specialized methods. That'd give us a mostly platform-independent
2792 // implementation that the JITs could optimize and inline at their pleasure.
2793 // Done correctly, the only time we'd need to cross to native could would be
2794 // to park() or unpark() threads. We'd also need a few more unsafe operators
2795 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2796 // (b) explicit barriers or fence operations.
2797 //
2798 // TODO:
2799 //
2800 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2801 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2802 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2803 // the lock operators would typically be faster than reifying Self.
2804 //
2805 // * Ideally I'd define the primitives as:
2806 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2807 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2808 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2809 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2810 // Furthermore the register assignments are overconstrained, possibly resulting in
2811 // sub-optimal code near the synchronization site.
2812 //
2813 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2814 // Alternately, use a better sp-proximity test.
2815 //
2816 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2817 // Either one is sufficient to uniquely identify a thread.
2818 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2819 //
2820 // * Intrinsify notify() and notifyAll() for the common cases where the
2821 // object is locked by the calling thread but the waitlist is empty.
2822 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2823 //
2824 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2825 // But beware of excessive branch density on AMD Opterons.
2826 //
2827 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2828 // or failure of the fast-path. If the fast-path fails then we pass
2829 // control to the slow-path, typically in C. In Fast_Lock and
2830 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2831 // will emit a conditional branch immediately after the node.
2832 // So we have branches to branches and lots of ICC.ZF games.
2833 // Instead, it might be better to have C2 pass a "FailureLabel"
2834 // into Fast_Lock and Fast_Unlock. In the case of success, control
2835 // will drop through the node. ICC.ZF is undefined at exit.
2836 // In the case of failure, the node will branch directly to the
2837 // FailureLabel
2840 // obj: object to lock
2841 // box: on-stack box address (displaced header location) - KILLED
2842 // rax,: tmp -- KILLED
2843 // scr: tmp -- KILLED
2844 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2846 // Ensure the register assignents are disjoint
2847 guarantee (objReg != boxReg, "") ;
2848 guarantee (objReg != tmpReg, "") ;
2849 guarantee (objReg != scrReg, "") ;
2850 guarantee (boxReg != tmpReg, "") ;
2851 guarantee (boxReg != scrReg, "") ;
2854 block_comment("FastLock");
2855 /*
2856 move(AT, 0x0);
2857 return;
2858 */
2859 if (PrintBiasedLockingStatistics) {
2860 push(tmpReg);
2861 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2862 pop(tmpReg);
2863 }
2865 if (EmitSync & 1) {
2866 move(AT, 0x0);
2867 return;
2868 } else
2869 if (EmitSync & 2) {
2870 Label DONE_LABEL ;
2871 if (UseBiasedLocking) {
2872 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2873 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2874 }
2876 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2877 ori(tmpReg, tmpReg, 0x1);
2878 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2880 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2881 bne(AT, R0, DONE_LABEL);
2882 delayed()->nop();
2884 // Recursive locking
2885 dsubu(tmpReg, tmpReg, SP);
2886 li(AT, (7 - os::vm_page_size() ));
2887 andr(tmpReg, tmpReg, AT);
2888 sd(tmpReg, Address(boxReg, 0));
2889 bind(DONE_LABEL) ;
2890 } else {
2891 // Possible cases that we'll encounter in fast_lock
2892 // ------------------------------------------------
2893 // * Inflated
2894 // -- unlocked
2895 // -- Locked
2896 // = by self
2897 // = by other
2898 // * biased
2899 // -- by Self
2900 // -- by other
2901 // * neutral
2902 // * stack-locked
2903 // -- by self
2904 // = sp-proximity test hits
2905 // = sp-proximity test generates false-negative
2906 // -- by other
2907 //
2909 Label IsInflated, DONE_LABEL, PopDone ;
2911 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2912 // order to reduce the number of conditional branches in the most common cases.
2913 // Beware -- there's a subtle invariant that fetch of the markword
2914 // at [FETCH], below, will never observe a biased encoding (*101b).
2915 // If this invariant is not held we risk exclusion (safety) failure.
2916 if (UseBiasedLocking && !UseOptoBiasInlining) {
2917 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2918 }
2920 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2921 andi(AT, tmpReg, markOopDesc::monitor_value);
2922 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2923 delayed()->nop();
2925 // Attempt stack-locking ...
2926 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2927 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2928 //if (os::is_MP()) {
2929 // sync();
2930 //}
2932 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2933 //AT == 1: unlocked
2935 if (PrintBiasedLockingStatistics) {
2936 Label L;
2937 beq(AT, R0, L);
2938 delayed()->nop();
2939 push(T0);
2940 push(T1);
2941 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2942 pop(T1);
2943 pop(T0);
2944 bind(L);
2945 }
2946 bne(AT, R0, DONE_LABEL);
2947 delayed()->nop();
2949 // Recursive locking
2950 // The object is stack-locked: markword contains stack pointer to BasicLock.
2951 // Locked by current thread if difference with current SP is less than one page.
2952 dsubu(tmpReg, tmpReg, SP);
2953 li(AT, 7 - os::vm_page_size() );
2954 andr(tmpReg, tmpReg, AT);
2955 sd(tmpReg, Address(boxReg, 0));
2956 if (PrintBiasedLockingStatistics) {
2957 Label L;
2958 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2959 bne(tmpReg, R0, L);
2960 delayed()->nop();
2961 push(T0);
2962 push(T1);
2963 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2964 pop(T1);
2965 pop(T0);
2966 bind(L);
2967 }
2968 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
2970 b(DONE_LABEL) ;
2971 delayed()->nop();
2973 bind(IsInflated) ;
2974 // The object's monitor m is unlocked iff m->owner == NULL,
2975 // otherwise m->owner may contain a thread or a stack address.
2977 // TODO: someday avoid the ST-before-CAS penalty by
2978 // relocating (deferring) the following ST.
2979 // We should also think about trying a CAS without having
2980 // fetched _owner. If the CAS is successful we may
2981 // avoid an RTO->RTS upgrade on the $line.
2982 // Without cast to int32_t a movptr will destroy r10 which is typically obj
2983 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
2984 sd(AT, Address(boxReg, 0));
2986 move(boxReg, tmpReg) ;
2987 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2988 // if (m->owner != 0) => AT = 0, goto slow path.
2989 move(AT, R0);
2990 bne(tmpReg, R0, DONE_LABEL);
2991 delayed()->nop();
2993 #ifndef OPT_THREAD
2994 get_thread (TREG) ;
2995 #endif
2996 // It's inflated and appears unlocked
2997 //if (os::is_MP()) {
2998 // sync();
2999 //}
3000 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
3001 // Intentional fall-through into DONE_LABEL ...
3004 // DONE_LABEL is a hot target - we'd really like to place it at the
3005 // start of cache line by padding with NOPs.
3006 // See the AMD and Intel software optimization manuals for the
3007 // most efficient "long" NOP encodings.
3008 // Unfortunately none of our alignment mechanisms suffice.
3009 bind(DONE_LABEL);
3011 // At DONE_LABEL the AT is set as follows ...
3012 // Fast_Unlock uses the same protocol.
3013 // AT == 1 -> Success
3014 // AT == 0 -> Failure - force control through the slow-path
3016 // Avoid branch-to-branch on AMD processors
3017 // This appears to be superstition.
3018 if (EmitSync & 32) nop() ;
3020 }
3021 }
3023 // obj: object to unlock
3024 // box: box address (displaced header location), killed. Must be EAX.
3025 // rbx,: killed tmp; cannot be obj nor box.
3026 //
3027 // Some commentary on balanced locking:
3028 //
3029 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3030 // Methods that don't have provably balanced locking are forced to run in the
3031 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3032 // The interpreter provides two properties:
3033 // I1: At return-time the interpreter automatically and quietly unlocks any
3034 // objects acquired the current activation (frame). Recall that the
3035 // interpreter maintains an on-stack list of locks currently held by
3036 // a frame.
3037 // I2: If a method attempts to unlock an object that is not held by the
3038 // the frame the interpreter throws IMSX.
3039 //
3040 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3041 // B() doesn't have provably balanced locking so it runs in the interpreter.
3042 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3043 // is still locked by A().
3044 //
3045 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3046 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3047 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3048 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3050 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3052 guarantee (objReg != boxReg, "") ;
3053 guarantee (objReg != tmpReg, "") ;
3054 guarantee (boxReg != tmpReg, "") ;
3058 block_comment("FastUnlock");
3061 if (EmitSync & 4) {
3062 // Disable - inhibit all inlining. Force control through the slow-path
3063 move(AT, 0x0);
3064 return;
3065 } else
3066 if (EmitSync & 8) {
3067 Label DONE_LABEL ;
3068 if (UseBiasedLocking) {
3069 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3070 }
3071 // classic stack-locking code ...
3072 ld(tmpReg, Address(boxReg, 0)) ;
3073 beq(tmpReg, R0, DONE_LABEL) ;
3074 move(AT, 0x1); // delay slot
3076 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3077 bind(DONE_LABEL);
3078 } else {
3079 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3081 // Critically, the biased locking test must have precedence over
3082 // and appear before the (box->dhw == 0) recursive stack-lock test.
3083 if (UseBiasedLocking && !UseOptoBiasInlining) {
3084 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3085 }
3087 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3088 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3089 delayed()->daddiu(AT, R0, 0x1);
3091 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3092 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3093 beq(AT, R0, Stacked) ; // Inflated?
3094 delayed()->nop();
3096 bind(Inflated) ;
3097 // It's inflated.
3098 // Despite our balanced locking property we still check that m->_owner == Self
3099 // as java routines or native JNI code called by this thread might
3100 // have released the lock.
3101 // Refer to the comments in synchronizer.cpp for how we might encode extra
3102 // state in _succ so we can avoid fetching EntryList|cxq.
3103 //
3104 // I'd like to add more cases in fast_lock() and fast_unlock() --
3105 // such as recursive enter and exit -- but we have to be wary of
3106 // I$ bloat, T$ effects and BP$ effects.
3107 //
3108 // If there's no contention try a 1-0 exit. That is, exit without
3109 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3110 // we detect and recover from the race that the 1-0 exit admits.
3111 //
3112 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3113 // before it STs null into _owner, releasing the lock. Updates
3114 // to data protected by the critical section must be visible before
3115 // we drop the lock (and thus before any other thread could acquire
3116 // the lock and observe the fields protected by the lock).
3117 // IA32's memory-model is SPO, so STs are ordered with respect to
3118 // each other and there's no need for an explicit barrier (fence).
3119 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3120 #ifndef OPT_THREAD
3121 get_thread (TREG) ;
3122 #endif
3124 // It's inflated
3125 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3126 xorr(boxReg, boxReg, TREG);
3128 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3129 orr(boxReg, boxReg, AT);
3131 move(AT, R0);
3132 bne(boxReg, R0, DONE_LABEL);
3133 delayed()->nop();
3135 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3136 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3137 orr(boxReg, boxReg, AT);
3139 move(AT, R0);
3140 bne(boxReg, R0, DONE_LABEL);
3141 delayed()->nop();
3143 sync();
3144 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3145 move(AT, 0x1);
3146 b(DONE_LABEL);
3147 delayed()->nop();
3149 bind (Stacked);
3150 ld(tmpReg, Address(boxReg, 0)) ;
3151 //if (os::is_MP()) { sync(); }
3152 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3154 if (EmitSync & 65536) {
3155 bind (CheckSucc);
3156 }
3158 bind(DONE_LABEL);
3160 // Avoid branch to branch on AMD processors
3161 if (EmitSync & 32768) { nop() ; }
3162 }
3163 }
3165 void MacroAssembler::align(int modulus) {
3166 while (offset() % modulus != 0) nop();
3167 }
3170 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3171 //Unimplemented();
3172 }
3174 #ifdef _LP64
3175 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3177 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3178 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3179 #else
3180 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3182 Register caller_saved_fpu_registers[] = {};
3183 #endif
3185 //We preserve all caller-saved register
3186 void MacroAssembler::pushad(){
3187 int i;
3189 /* Fixed-point registers */
3190 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3191 daddi(SP, SP, -1 * len * wordSize);
3192 for (i = 0; i < len; i++)
3193 {
3194 #ifdef _LP64
3195 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3196 #else
3197 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3198 #endif
3199 }
3201 /* Floating-point registers */
3202 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3203 daddi(SP, SP, -1 * len * wordSize);
3204 for (i = 0; i < len; i++)
3205 {
3206 #ifdef _LP64
3207 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3208 #else
3209 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3210 #endif
3211 }
3212 };
3214 void MacroAssembler::popad(){
3215 int i;
3217 /* Floating-point registers */
3218 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3219 for (i = 0; i < len; i++)
3220 {
3221 #ifdef _LP64
3222 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3223 #else
3224 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3225 #endif
3226 }
3227 daddi(SP, SP, len * wordSize);
3229 /* Fixed-point registers */
3230 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3231 for (i = 0; i < len; i++)
3232 {
3233 #ifdef _LP64
3234 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3235 #else
3236 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3237 #endif
3238 }
3239 daddi(SP, SP, len * wordSize);
3240 };
3242 void MacroAssembler::push2(Register reg1, Register reg2) {
3243 #ifdef _LP64
3244 daddi(SP, SP, -16);
3245 sd(reg2, SP, 0);
3246 sd(reg1, SP, 8);
3247 #else
3248 addi(SP, SP, -8);
3249 sw(reg2, SP, 0);
3250 sw(reg1, SP, 4);
3251 #endif
3252 }
3254 void MacroAssembler::pop2(Register reg1, Register reg2) {
3255 #ifdef _LP64
3256 ld(reg1, SP, 0);
3257 ld(reg2, SP, 8);
3258 daddi(SP, SP, 16);
3259 #else
3260 lw(reg1, SP, 0);
3261 lw(reg2, SP, 4);
3262 addi(SP, SP, 8);
3263 #endif
3264 }
3266 //for UseCompressedOops Option
3267 void MacroAssembler::load_klass(Register dst, Register src) {
3268 #ifdef _LP64
3269 if(UseCompressedClassPointers){
3270 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3271 decode_klass_not_null(dst);
3272 } else
3273 #endif
3274 ld(dst, src, oopDesc::klass_offset_in_bytes());
3275 }
3277 void MacroAssembler::store_klass(Register dst, Register src) {
3278 #ifdef _LP64
3279 if(UseCompressedClassPointers){
3280 encode_klass_not_null(src);
3281 sw(src, dst, oopDesc::klass_offset_in_bytes());
3282 } else {
3283 #endif
3284 sd(src, dst, oopDesc::klass_offset_in_bytes());
3285 }
3286 }
3288 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3289 load_klass(dst, src);
3290 ld(dst, Address(dst, Klass::prototype_header_offset()));
3291 }
3293 #ifdef _LP64
3294 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3295 if (UseCompressedClassPointers) {
3296 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3297 }
3298 }
3300 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3301 if(UseCompressedOops){
3302 lwu(dst, src);
3303 decode_heap_oop(dst);
3304 } else {
3305 ld(dst, src);
3306 }
3307 }
3309 void MacroAssembler::store_heap_oop(Address dst, Register src){
3310 if(UseCompressedOops){
3311 assert(!dst.uses(src), "not enough registers");
3312 encode_heap_oop(src);
3313 sw(src, dst);
3314 } else {
3315 sd(src, dst);
3316 }
3317 }
3319 void MacroAssembler::store_heap_oop_null(Address dst){
3320 if(UseCompressedOops){
3321 sw(R0, dst);
3322 } else {
3323 sd(R0, dst);
3324 }
3325 }
3327 #ifdef ASSERT
3328 void MacroAssembler::verify_heapbase(const char* msg) {
3329 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3330 assert (Universe::heap() != NULL, "java heap should be initialized");
3331 }
3332 #endif
3335 // Algorithm must match oop.inline.hpp encode_heap_oop.
3336 void MacroAssembler::encode_heap_oop(Register r) {
3337 #ifdef ASSERT
3338 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3339 #endif
3340 verify_oop(r, "broken oop in encode_heap_oop");
3341 if (Universe::narrow_oop_base() == NULL) {
3342 if (Universe::narrow_oop_shift() != 0) {
3343 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3344 shr(r, LogMinObjAlignmentInBytes);
3345 }
3346 return;
3347 }
3349 movz(r, S5_heapbase, r);
3350 dsub(r, r, S5_heapbase);
3351 if (Universe::narrow_oop_shift() != 0) {
3352 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3353 shr(r, LogMinObjAlignmentInBytes);
3354 }
3355 }
3357 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3358 #ifdef ASSERT
3359 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3360 #endif
3361 verify_oop(src, "broken oop in encode_heap_oop");
3362 if (Universe::narrow_oop_base() == NULL) {
3363 if (Universe::narrow_oop_shift() != 0) {
3364 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3365 dsrl(dst, src, LogMinObjAlignmentInBytes);
3366 } else {
3367 if (dst != src) move(dst, src);
3368 }
3369 } else {
3370 if (dst == src) {
3371 movz(dst, S5_heapbase, dst);
3372 dsub(dst, dst, S5_heapbase);
3373 if (Universe::narrow_oop_shift() != 0) {
3374 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3375 shr(dst, LogMinObjAlignmentInBytes);
3376 }
3377 } else {
3378 dsub(dst, src, S5_heapbase);
3379 if (Universe::narrow_oop_shift() != 0) {
3380 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3381 shr(dst, LogMinObjAlignmentInBytes);
3382 }
3383 movz(dst, R0, src);
3384 }
3385 }
3386 }
3388 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3389 assert (UseCompressedOops, "should be compressed");
3390 #ifdef ASSERT
3391 if (CheckCompressedOops) {
3392 Label ok;
3393 bne(r, R0, ok);
3394 delayed()->nop();
3395 stop("null oop passed to encode_heap_oop_not_null");
3396 bind(ok);
3397 }
3398 #endif
3399 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3400 if (Universe::narrow_oop_base() != NULL) {
3401 dsub(r, r, S5_heapbase);
3402 }
3403 if (Universe::narrow_oop_shift() != 0) {
3404 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3405 shr(r, LogMinObjAlignmentInBytes);
3406 }
3408 }
3410 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3411 assert (UseCompressedOops, "should be compressed");
3412 #ifdef ASSERT
3413 if (CheckCompressedOops) {
3414 Label ok;
3415 bne(src, R0, ok);
3416 delayed()->nop();
3417 stop("null oop passed to encode_heap_oop_not_null2");
3418 bind(ok);
3419 }
3420 #endif
3421 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3423 if (Universe::narrow_oop_base() != NULL) {
3424 dsub(dst, src, S5_heapbase);
3425 if (Universe::narrow_oop_shift() != 0) {
3426 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3427 shr(dst, LogMinObjAlignmentInBytes);
3428 }
3429 } else {
3430 if (Universe::narrow_oop_shift() != 0) {
3431 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3432 dsrl(dst, src, LogMinObjAlignmentInBytes);
3433 } else {
3434 if (dst != src) move(dst, src);
3435 }
3436 }
3437 }
3439 void MacroAssembler::decode_heap_oop(Register r) {
3440 #ifdef ASSERT
3441 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3442 #endif
3443 if (Universe::narrow_oop_base() == NULL) {
3444 if (Universe::narrow_oop_shift() != 0) {
3445 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3446 shl(r, LogMinObjAlignmentInBytes);
3447 }
3448 } else {
3449 move(AT, r);
3450 if (Universe::narrow_oop_shift() != 0) {
3451 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3452 shl(r, LogMinObjAlignmentInBytes);
3453 }
3454 dadd(r, r, S5_heapbase);
3455 movz(r, R0, AT);
3456 }
3457 verify_oop(r, "broken oop in decode_heap_oop");
3458 }
3460 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3461 #ifdef ASSERT
3462 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3463 #endif
3464 if (Universe::narrow_oop_base() == NULL) {
3465 if (Universe::narrow_oop_shift() != 0) {
3466 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3467 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3468 dsll(dst, src, LogMinObjAlignmentInBytes);
3469 } else {
3470 if (dst != src) move(dst, src);
3471 }
3472 } else {
3473 if (dst == src) {
3474 move(AT, dst);
3475 if (Universe::narrow_oop_shift() != 0) {
3476 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3477 shl(dst, LogMinObjAlignmentInBytes);
3478 }
3479 dadd(dst, dst, S5_heapbase);
3480 movz(dst, R0, AT);
3481 } else {
3482 if (Universe::narrow_oop_shift() != 0) {
3483 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3484 dsll(dst, src, LogMinObjAlignmentInBytes);
3485 daddu(dst, dst, S5_heapbase);
3486 } else {
3487 daddu(dst, src, S5_heapbase);
3488 }
3489 movz(dst, R0, src);
3490 }
3491 }
3492 verify_oop(dst, "broken oop in decode_heap_oop");
3493 }
3495 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3496 // Note: it will change flags
3497 assert (UseCompressedOops, "should only be used for compressed headers");
3498 assert (Universe::heap() != NULL, "java heap should be initialized");
3499 // Cannot assert, unverified entry point counts instructions (see .ad file)
3500 // vtableStubs also counts instructions in pd_code_size_limit.
3501 // Also do not verify_oop as this is called by verify_oop.
3502 if (Universe::narrow_oop_shift() != 0) {
3503 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3504 shl(r, LogMinObjAlignmentInBytes);
3505 if (Universe::narrow_oop_base() != NULL) {
3506 daddu(r, r, S5_heapbase);
3507 }
3508 } else {
3509 assert (Universe::narrow_oop_base() == NULL, "sanity");
3510 }
3511 }
3513 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3514 assert (UseCompressedOops, "should only be used for compressed headers");
3515 assert (Universe::heap() != NULL, "java heap should be initialized");
3517 // Cannot assert, unverified entry point counts instructions (see .ad file)
3518 // vtableStubs also counts instructions in pd_code_size_limit.
3519 // Also do not verify_oop as this is called by verify_oop.
3520 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3521 if (Universe::narrow_oop_shift() != 0) {
3522 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3523 if (LogMinObjAlignmentInBytes == Address::times_8) {
3524 dsll(dst, src, LogMinObjAlignmentInBytes);
3525 daddu(dst, dst, S5_heapbase);
3526 } else {
3527 dsll(dst, src, LogMinObjAlignmentInBytes);
3528 if (Universe::narrow_oop_base() != NULL) {
3529 daddu(dst, dst, S5_heapbase);
3530 }
3531 }
3532 } else {
3533 assert (Universe::narrow_oop_base() == NULL, "sanity");
3534 if (dst != src) {
3535 move(dst, src);
3536 }
3537 }
3538 }
3540 void MacroAssembler::encode_klass_not_null(Register r) {
3541 if (Universe::narrow_klass_base() != NULL) {
3542 assert(r != AT, "Encoding a klass in AT");
3543 set64(AT, (int64_t)Universe::narrow_klass_base());
3544 dsub(r, r, AT);
3545 }
3546 if (Universe::narrow_klass_shift() != 0) {
3547 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3548 shr(r, LogKlassAlignmentInBytes);
3549 }
3550 }
3552 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3553 if (dst == src) {
3554 encode_klass_not_null(src);
3555 } else {
3556 if (Universe::narrow_klass_base() != NULL) {
3557 set64(dst, (int64_t)Universe::narrow_klass_base());
3558 dsub(dst, src, dst);
3559 if (Universe::narrow_klass_shift() != 0) {
3560 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3561 shr(dst, LogKlassAlignmentInBytes);
3562 }
3563 } else {
3564 if (Universe::narrow_klass_shift() != 0) {
3565 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3566 dsrl(dst, src, LogKlassAlignmentInBytes);
3567 } else {
3568 move(dst, src);
3569 }
3570 }
3571 }
3572 }
3574 // Function instr_size_for_decode_klass_not_null() counts the instructions
3575 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3576 // when (Universe::heap() != NULL). Hence, if the instructions they
3577 // generate change, then this method needs to be updated.
3578 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3579 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3580 if (Universe::narrow_klass_base() != NULL) {
3581 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3582 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3583 } else {
3584 // longest load decode klass function, mov64, leaq
3585 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3586 }
3587 }
3589 void MacroAssembler::decode_klass_not_null(Register r) {
3590 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3591 assert(r != AT, "Decoding a klass in AT");
3592 // Cannot assert, unverified entry point counts instructions (see .ad file)
3593 // vtableStubs also counts instructions in pd_code_size_limit.
3594 // Also do not verify_oop as this is called by verify_oop.
3595 if (Universe::narrow_klass_shift() != 0) {
3596 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3597 shl(r, LogKlassAlignmentInBytes);
3598 }
3599 if (Universe::narrow_klass_base() != NULL) {
3600 set64(AT, (int64_t)Universe::narrow_klass_base());
3601 daddu(r, r, AT);
3602 //Not neccessary for MIPS at all.
3603 //reinit_heapbase();
3604 }
3605 }
3607 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3608 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3610 if (dst == src) {
3611 decode_klass_not_null(dst);
3612 } else {
3613 // Cannot assert, unverified entry point counts instructions (see .ad file)
3614 // vtableStubs also counts instructions in pd_code_size_limit.
3615 // Also do not verify_oop as this is called by verify_oop.
3616 set64(dst, (int64_t)Universe::narrow_klass_base());
3617 if (Universe::narrow_klass_shift() != 0) {
3618 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3619 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3620 dsll(AT, src, Address::times_8);
3621 daddu(dst, dst, AT);
3622 } else {
3623 daddu(dst, src, dst);
3624 }
3625 }
3626 }
3628 void MacroAssembler::incrementl(Register reg, int value) {
3629 if (value == min_jint) {
3630 move(AT, value);
3631 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3632 return;
3633 }
3634 if (value < 0) { decrementl(reg, -value); return; }
3635 if (value == 0) { ; return; }
3637 if(Assembler::is_simm16(value)) {
3638 NOT_LP64(addiu(reg, reg, value));
3639 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3640 } else {
3641 move(AT, value);
3642 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3643 }
3644 }
3646 void MacroAssembler::decrementl(Register reg, int value) {
3647 if (value == min_jint) {
3648 move(AT, value);
3649 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3650 return;
3651 }
3652 if (value < 0) { incrementl(reg, -value); return; }
3653 if (value == 0) { ; return; }
3655 if (Assembler::is_simm16(value)) {
3656 NOT_LP64(addiu(reg, reg, -value));
3657 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3658 } else {
3659 move(AT, value);
3660 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3661 }
3662 }
3664 void MacroAssembler::reinit_heapbase() {
3665 if (UseCompressedOops || UseCompressedClassPointers) {
3666 if (Universe::heap() != NULL) {
3667 if (Universe::narrow_oop_base() == NULL) {
3668 move(S5_heapbase, R0);
3669 } else {
3670 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3671 }
3672 } else {
3673 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3674 ld(S5_heapbase, S5_heapbase, 0);
3675 }
3676 }
3677 }
3678 #endif // _LP64
3680 void MacroAssembler::check_klass_subtype(Register sub_klass,
3681 Register super_klass,
3682 Register temp_reg,
3683 Label& L_success) {
3684 //implement ind gen_subtype_check
3685 Label L_failure;
3686 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3687 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3688 bind(L_failure);
3689 }
3691 SkipIfEqual::SkipIfEqual(
3692 MacroAssembler* masm, const bool* flag_addr, bool value) {
3693 _masm = masm;
3694 _masm->li(AT, (address)flag_addr);
3695 _masm->lb(AT,AT,0);
3696 _masm->addi(AT,AT,-value);
3697 _masm->beq(AT,R0,_label);
3698 _masm->delayed()->nop();
3699 }
3700 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3701 Register super_klass,
3702 Register temp_reg,
3703 Label* L_success,
3704 Label* L_failure,
3705 Label* L_slow_path,
3706 RegisterOrConstant super_check_offset) {
3707 assert_different_registers(sub_klass, super_klass, temp_reg);
3708 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3709 if (super_check_offset.is_register()) {
3710 assert_different_registers(sub_klass, super_klass,
3711 super_check_offset.as_register());
3712 } else if (must_load_sco) {
3713 assert(temp_reg != noreg, "supply either a temp or a register offset");
3714 }
3716 Label L_fallthrough;
3717 int label_nulls = 0;
3718 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3719 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3720 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3721 assert(label_nulls <= 1, "at most one NULL in the batch");
3723 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3724 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3725 // If the pointers are equal, we are done (e.g., String[] elements).
3726 // This self-check enables sharing of secondary supertype arrays among
3727 // non-primary types such as array-of-interface. Otherwise, each such
3728 // type would need its own customized SSA.
3729 // We move this check to the front of the fast path because many
3730 // type checks are in fact trivially successful in this manner,
3731 // so we get a nicely predicted branch right at the start of the check.
3732 beq(sub_klass, super_klass, *L_success);
3733 delayed()->nop();
3734 // Check the supertype display:
3735 if (must_load_sco) {
3736 // Positive movl does right thing on LP64.
3737 lwu(temp_reg, super_klass, sco_offset);
3738 super_check_offset = RegisterOrConstant(temp_reg);
3739 }
3740 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3741 daddu(AT, sub_klass, AT);
3742 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3744 // This check has worked decisively for primary supers.
3745 // Secondary supers are sought in the super_cache ('super_cache_addr').
3746 // (Secondary supers are interfaces and very deeply nested subtypes.)
3747 // This works in the same check above because of a tricky aliasing
3748 // between the super_cache and the primary super display elements.
3749 // (The 'super_check_addr' can address either, as the case requires.)
3750 // Note that the cache is updated below if it does not help us find
3751 // what we need immediately.
3752 // So if it was a primary super, we can just fail immediately.
3753 // Otherwise, it's the slow path for us (no success at this point).
3755 if (super_check_offset.is_register()) {
3756 beq(super_klass, AT, *L_success);
3757 delayed()->nop();
3758 addi(AT, super_check_offset.as_register(), -sc_offset);
3759 if (L_failure == &L_fallthrough) {
3760 beq(AT, R0, *L_slow_path);
3761 delayed()->nop();
3762 } else {
3763 bne(AT, R0, *L_failure);
3764 delayed()->nop();
3765 b(*L_slow_path);
3766 delayed()->nop();
3767 }
3768 } else if (super_check_offset.as_constant() == sc_offset) {
3769 // Need a slow path; fast failure is impossible.
3770 if (L_slow_path == &L_fallthrough) {
3771 beq(super_klass, AT, *L_success);
3772 delayed()->nop();
3773 } else {
3774 bne(super_klass, AT, *L_slow_path);
3775 delayed()->nop();
3776 b(*L_success);
3777 delayed()->nop();
3778 }
3779 } else {
3780 // No slow path; it's a fast decision.
3781 if (L_failure == &L_fallthrough) {
3782 beq(super_klass, AT, *L_success);
3783 delayed()->nop();
3784 } else {
3785 bne(super_klass, AT, *L_failure);
3786 delayed()->nop();
3787 b(*L_success);
3788 delayed()->nop();
3789 }
3790 }
3792 bind(L_fallthrough);
3794 }
3797 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3798 Register super_klass,
3799 Register temp_reg,
3800 Register temp2_reg,
3801 Label* L_success,
3802 Label* L_failure,
3803 bool set_cond_codes) {
3804 assert_different_registers(sub_klass, super_klass, temp_reg);
3805 if (temp2_reg != noreg)
3806 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3807 else
3808 temp2_reg = T9;
3809 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3811 Label L_fallthrough;
3812 int label_nulls = 0;
3813 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3814 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3815 assert(label_nulls <= 1, "at most one NULL in the batch");
3817 // a couple of useful fields in sub_klass:
3818 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3819 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3820 Address secondary_supers_addr(sub_klass, ss_offset);
3821 Address super_cache_addr( sub_klass, sc_offset);
3823 // Do a linear scan of the secondary super-klass chain.
3824 // This code is rarely used, so simplicity is a virtue here.
3825 // The repne_scan instruction uses fixed registers, which we must spill.
3826 // Don't worry too much about pre-existing connections with the input regs.
3828 // Get super_klass value into rax (even if it was in rdi or rcx).
3829 #ifndef PRODUCT
3830 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3831 ExternalAddress pst_counter_addr((address) pst_counter);
3832 NOT_LP64( incrementl(pst_counter_addr) );
3833 #endif //PRODUCT
3835 // We will consult the secondary-super array.
3836 ld(temp_reg, secondary_supers_addr);
3837 // Load the array length. (Positive movl does right thing on LP64.)
3838 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3839 // Skip to start of data.
3840 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3842 // Scan RCX words at [RDI] for an occurrence of RAX.
3843 // Set NZ/Z based on last compare.
3844 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3845 // not change flags (only scas instruction which is repeated sets flags).
3846 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3848 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
3849 Label Loop, subtype;
3850 bind(Loop);
3851 beq(temp2_reg, R0, *L_failure);
3852 delayed()->nop();
3853 ld(AT, temp_reg, 0);
3854 beq(AT, super_klass, subtype);
3855 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3856 b(Loop);
3857 delayed()->daddi(temp2_reg, temp2_reg, -1);
3859 bind(subtype);
3860 sd(super_klass, super_cache_addr);
3861 if (L_success != &L_fallthrough) {
3862 b(*L_success);
3863 delayed()->nop();
3864 }
3866 // Success. Cache the super we found and proceed in triumph.
3867 #undef IS_A_TEMP
3869 bind(L_fallthrough);
3870 }
3872 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3873 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3874 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3875 verify_oop(oop_result, "broken oop in call_VM_base");
3876 }
3878 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3879 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3880 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3881 }
3883 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3884 int extra_slot_offset) {
3885 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3886 int stackElementSize = Interpreter::stackElementSize;
3887 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3888 #ifdef ASSERT
3889 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3890 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3891 #endif
3892 Register scale_reg = NOREG;
3893 Address::ScaleFactor scale_factor = Address::no_scale;
3894 if (arg_slot.is_constant()) {
3895 offset += arg_slot.as_constant() * stackElementSize;
3896 } else {
3897 scale_reg = arg_slot.as_register();
3898 scale_factor = Address::times_8;
3899 }
3900 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
3901 // offset += wordSize; // return PC is on stack
3902 if(scale_reg==NOREG) return Address(SP, offset);
3903 else {
3904 dsll(scale_reg, scale_reg, scale_factor);
3905 daddu(scale_reg, SP, scale_reg);
3906 return Address(scale_reg, offset);
3907 }
3908 }
3910 SkipIfEqual::~SkipIfEqual() {
3911 _masm->bind(_label);
3912 }
3914 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3915 switch (size_in_bytes) {
3916 #ifndef _LP64
3917 case 8:
3918 assert(dst2 != noreg, "second dest register required");
3919 lw(dst, src);
3920 lw(dst2, src.plus_disp(BytesPerInt));
3921 break;
3922 #else
3923 case 8: ld(dst, src); break;
3924 #endif
3925 case 4: lw(dst, src); break;
3926 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3927 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3928 default: ShouldNotReachHere();
3929 }
3930 }
3932 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
3933 switch (size_in_bytes) {
3934 #ifndef _LP64
3935 case 8:
3936 assert(src2 != noreg, "second source register required");
3937 sw(src, dst);
3938 sw(src2, dst.plus_disp(BytesPerInt));
3939 break;
3940 #else
3941 case 8: sd(src, dst); break;
3942 #endif
3943 case 4: sw(src, dst); break;
3944 case 2: sh(src, dst); break;
3945 case 1: sb(src, dst); break;
3946 default: ShouldNotReachHere();
3947 }
3948 }
3950 // Look up the method for a megamorphic invokeinterface call.
3951 // The target method is determined by <intf_klass, itable_index>.
3952 // The receiver klass is in recv_klass.
3953 // On success, the result will be in method_result, and execution falls through.
3954 // On failure, execution transfers to the given label.
3955 void MacroAssembler::lookup_interface_method(Register recv_klass,
3956 Register intf_klass,
3957 RegisterOrConstant itable_index,
3958 Register method_result,
3959 Register scan_temp,
3960 Label& L_no_such_interface) {
3961 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
3962 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3963 "caller must use same register for non-constant itable index as for method");
3965 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3966 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
3967 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3968 int scan_step = itableOffsetEntry::size() * wordSize;
3969 int vte_size = vtableEntry::size() * wordSize;
3970 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3971 assert(vte_size == wordSize, "else adjust times_vte_scale");
3973 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
3975 // %%% Could store the aligned, prescaled offset in the klassoop.
3976 dsll(scan_temp, scan_temp, times_vte_scale);
3977 daddu(scan_temp, recv_klass, scan_temp);
3978 daddiu(scan_temp, scan_temp, vtable_base);
3979 if (HeapWordsPerLong > 1) {
3980 // Round up to align_object_offset boundary
3981 // see code for InstanceKlass::start_of_itable!
3982 round_to(scan_temp, BytesPerLong);
3983 }
3985 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
3986 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
3987 if (itable_index.is_constant()) {
3988 set64(AT, (int)itable_index.is_constant());
3989 dsll(AT, AT, (int)Address::times_ptr);
3990 } else {
3991 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
3992 }
3993 daddu(AT, AT, recv_klass);
3994 daddiu(recv_klass, AT, itentry_off);
3996 Label search, found_method;
3998 for (int peel = 1; peel >= 0; peel--) {
3999 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
4001 if (peel) {
4002 beq(intf_klass, method_result, found_method);
4003 nop();
4004 } else {
4005 bne(intf_klass, method_result, search);
4006 nop();
4007 // (invert the test to fall through to found_method...)
4008 }
4010 if (!peel) break;
4012 bind(search);
4014 // Check that the previous entry is non-null. A null entry means that
4015 // the receiver class doesn't implement the interface, and wasn't the
4016 // same as when the caller was compiled.
4017 beq(method_result, R0, L_no_such_interface);
4018 nop();
4019 daddiu(scan_temp, scan_temp, scan_step);
4020 }
4022 bind(found_method);
4024 // Got a hit.
4025 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
4026 if(UseLoongsonISA) {
4027 gsldx(method_result, recv_klass, scan_temp, 0);
4028 } else {
4029 daddu(AT, recv_klass, scan_temp);
4030 ld(method_result, AT);
4031 }
4032 }
4034 // virtual method calling
4035 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4036 RegisterOrConstant vtable_index,
4037 Register method_result) {
4038 Register tmp = GP;
4039 push(tmp);
4041 if (vtable_index.is_constant()) {
4042 assert_different_registers(recv_klass, method_result, tmp);
4043 } else {
4044 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4045 }
4046 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4047 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4048 /*
4049 Address vtable_entry_addr(recv_klass,
4050 vtable_index, Address::times_ptr,
4051 base + vtableEntry::method_offset_in_bytes());
4052 */
4053 if (vtable_index.is_constant()) {
4054 set64(AT, vtable_index.as_constant());
4055 dsll(AT, AT, (int)Address::times_ptr);
4056 } else {
4057 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4058 }
4059 set64(tmp, base + vtableEntry::method_offset_in_bytes());
4060 daddu(tmp, tmp, AT);
4061 daddu(tmp, tmp, recv_klass);
4062 ld(method_result, tmp, 0);
4064 pop(tmp);
4065 }