Thu, 24 May 2018 19:57:44 +0800
[Code Reorganization] delete Trailing whitespace
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 // Implementation of MacroAssembler
52 intptr_t MacroAssembler::i[32] = {0};
53 float MacroAssembler::f[32] = {0.0};
55 void MacroAssembler::print(outputStream *s) {
56 unsigned int k;
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
58 s->print_cr("i%d = 0x%.16lx", k, i[k]);
59 }
60 s->cr();
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
63 s->print_cr("f%d = %f", k, f[k]);
64 }
65 s->cr();
66 }
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
71 void MacroAssembler::save_registers(MacroAssembler *masm) {
72 #define __ masm->
73 for(int k=0; k<32; k++) {
74 __ sw (as_Register(k), A0, i_offset(k));
75 }
77 for(int k=0; k<32; k++) {
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
79 }
80 #undef __
81 }
83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
84 #define __ masm->
85 for(int k=0; k<32; k++) {
86 __ lw (as_Register(k), A0, i_offset(k));
87 }
89 for(int k=0; k<32; k++) {
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
91 }
92 #undef __
93 }
96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
97 jint& stub_inst = *(jint*) branch;
98 jint *pc = (jint *)branch;
100 /* *
101 move(AT, RA); // dadd
102 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
103 nop();
104 lui(T9, 0); // to be patched
105 ori(T9, 0);
106 daddu(T9, T9, RA);
107 move(RA, AT);
108 jr(T9);
109 */
110 if((opcode(stub_inst) == special_op) && (special(stub_inst) == dadd_op)) {
112 assert(opcode(pc[3]) == lui_op
113 && opcode(pc[4]) == ori_op
114 && special(pc[5]) == daddu_op, "Not a branch label patch");
115 if(!(opcode(pc[3]) == lui_op
116 && opcode(pc[4]) == ori_op
117 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
119 int offset = target - branch;
120 if (!is_simm16(offset)) {
121 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
122 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
123 } else {
124 /* revert to "beq + nop" */
125 CodeBuffer cb(branch, 4 * 10);
126 MacroAssembler masm(&cb);
127 #define __ masm.
128 __ b(target);
129 __ nop();
130 __ nop();
131 __ nop();
132 __ nop();
133 __ nop();
134 __ nop();
135 __ nop();
136 }
137 return;
138 } else if (special(pc[4]) == jr_op
139 && opcode(pc[4]) == special_op
140 && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
142 CodeBuffer cb(branch, 4 * 4);
143 MacroAssembler masm(&cb);
144 masm.patchable_set48(T9, (long)(target));
145 return;
146 }
148 #ifndef PRODUCT
149 if (!is_simm16((target - branch - 4) >> 2)) {
150 tty->print_cr("Illegal patching: target=0x%lx", target);
151 int *p = (int *)branch;
152 for (int i = -10; i < 10; i++) {
153 tty->print("0x%lx, ", p[i]);
154 }
155 tty->print_cr("");
156 }
157 #endif
159 stub_inst = patched_branch(target - branch, stub_inst, 0);
160 }
162 static inline address first_cache_address() {
163 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
164 }
166 static inline address last_cache_address() {
167 return CodeCache::high_bound() - Assembler::InstructionSize;
168 }
170 int MacroAssembler::call_size(address target, bool far, bool patchable) {
171 if (patchable) return 6 << Assembler::LogInstructionSize;
172 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
173 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
174 }
176 // Can we reach target using jal/j from anywhere
177 // in the code cache (because code can be relocated)?
178 bool MacroAssembler::reachable_from_cache(address target) {
179 address cl = first_cache_address();
180 address ch = last_cache_address();
182 return fit_in_jal(target, cl) && fit_in_jal(target, ch);
183 }
185 void MacroAssembler::general_jump(address target) {
186 if (reachable_from_cache(target)) {
187 j(target);
188 nop();
189 } else {
190 set64(T9, (long)target);
191 jr(T9);
192 nop();
193 }
194 }
196 int MacroAssembler::insts_for_general_jump(address target) {
197 if (reachable_from_cache(target)) {
198 //j(target);
199 //nop();
200 return 2;
201 } else {
202 //set64(T9, (long)target);
203 //jr(T9);
204 //nop();
205 return insts_for_set64((jlong)target) + 2;
206 }
207 }
209 void MacroAssembler::patchable_jump(address target) {
210 if (reachable_from_cache(target)) {
211 nop();
212 nop();
213 nop();
214 nop();
215 j(target);
216 nop();
217 } else {
218 patchable_set48(T9, (long)target);
219 jr(T9);
220 nop();
221 }
222 }
224 int MacroAssembler::insts_for_patchable_jump(address target) {
225 return 6;
226 }
228 void MacroAssembler::general_call(address target) {
229 if (reachable_from_cache(target)) {
230 jal(target);
231 nop();
232 } else {
233 set64(T9, (long)target);
234 jalr(T9);
235 nop();
236 }
237 }
239 int MacroAssembler::insts_for_general_call(address target) {
240 if (reachable_from_cache(target)) {
241 //jal(target);
242 //nop();
243 return 2;
244 } else {
245 //set64(T9, (long)target);
246 //jalr(T9);
247 //nop();
248 return insts_for_set64((jlong)target) + 2;
249 }
250 }
252 void MacroAssembler::patchable_call(address target) {
253 if (reachable_from_cache(target)) {
254 nop();
255 nop();
256 nop();
257 nop();
258 jal(target);
259 nop();
260 } else {
261 patchable_set48(T9, (long)target);
262 jalr(T9);
263 nop();
264 }
265 }
267 int MacroAssembler::insts_for_patchable_call(address target) {
268 return 6;
269 }
271 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
272 u_char * cur_pc = pc();
274 /* Jin: Near/Far jump */
275 if(is_simm16((entry - pc() - 4) / 4)) {
276 Assembler::beq(rs, rt, offset(entry));
277 } else {
278 Label not_jump;
279 bne(rs, rt, not_jump);
280 delayed()->nop();
282 b_far(entry);
283 delayed()->nop();
285 bind(not_jump);
286 has_delay_slot();
287 }
288 }
290 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
291 if (L.is_bound()) {
292 beq_far(rs, rt, target(L));
293 } else {
294 u_char * cur_pc = pc();
295 Label not_jump;
296 bne(rs, rt, not_jump);
297 delayed()->nop();
299 b_far(L);
300 delayed()->nop();
302 bind(not_jump);
303 has_delay_slot();
304 }
305 }
307 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
308 u_char * cur_pc = pc();
310 /* Jin: Near/Far jump */
311 if(is_simm16((entry - pc() - 4) / 4)) {
312 Assembler::bne(rs, rt, offset(entry));
313 } else {
314 Label not_jump;
315 beq(rs, rt, not_jump);
316 delayed()->nop();
318 b_far(entry);
319 delayed()->nop();
321 bind(not_jump);
322 has_delay_slot();
323 }
324 }
326 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
327 if (L.is_bound()) {
328 bne_far(rs, rt, target(L));
329 } else {
330 u_char * cur_pc = pc();
331 Label not_jump;
332 beq(rs, rt, not_jump);
333 delayed()->nop();
335 b_far(L);
336 delayed()->nop();
338 bind(not_jump);
339 has_delay_slot();
340 }
341 }
343 void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
344 Label not_taken;
346 bne(rs, rt, not_taken);
347 nop();
349 jmp_far(L);
351 bind(not_taken);
352 }
354 void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
355 Label not_taken;
357 beq(rs, rt, not_taken);
358 nop();
360 jmp_far(L);
362 bind(not_taken);
363 }
365 void MacroAssembler::bc1t_long(Label& L) {
366 Label not_taken;
368 bc1f(not_taken);
369 nop();
371 jmp_far(L);
373 bind(not_taken);
374 }
376 void MacroAssembler::bc1f_long(Label& L) {
377 Label not_taken;
379 bc1t(not_taken);
380 nop();
382 jmp_far(L);
384 bind(not_taken);
385 }
387 void MacroAssembler::b_far(Label& L) {
388 if (L.is_bound()) {
389 b_far(target(L));
390 } else {
391 volatile address dest = target(L);
392 /*
393 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
394 0x00000055651ed514: dadd at, ra, zero
395 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
397 0x00000055651ed51c: sll zero, zero, 0
398 0x00000055651ed520: lui t9, 0x0
399 0x00000055651ed524: ori t9, t9, 0x21b8
400 0x00000055651ed528: daddu t9, t9, ra
401 0x00000055651ed52c: dadd ra, at, zero
402 0x00000055651ed530: jr t9
403 0x00000055651ed534: sll zero, zero, 0
404 */
405 move(AT, RA);
406 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
407 nop();
408 lui(T9, 0); // to be patched
409 ori(T9, T9, 0);
410 daddu(T9, T9, RA);
411 move(RA, AT);
412 jr(T9);
413 }
414 }
416 void MacroAssembler::b_far(address entry) {
417 u_char * cur_pc = pc();
419 /* Jin: Near/Far jump */
420 if(is_simm16((entry - pc() - 4) / 4)) {
421 b(offset(entry));
422 } else {
423 /* address must be bounded */
424 move(AT, RA);
425 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
426 nop();
427 li32(T9, entry - pc());
428 daddu(T9, T9, RA);
429 move(RA, AT);
430 jr(T9);
431 }
432 }
434 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
435 addu_long(AT, base, offset);
436 ld_ptr(rt, 0, AT);
437 }
439 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
440 addu_long(AT, base, offset);
441 st_ptr(rt, 0, AT);
442 }
444 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
445 addu_long(AT, base, offset);
446 ld_long(rt, 0, AT);
447 }
449 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
450 addu_long(AT, base, offset);
451 st_long(rt, 0, AT);
452 }
454 Address MacroAssembler::as_Address(AddressLiteral adr) {
455 return Address(adr.target(), adr.rspec());
456 }
458 Address MacroAssembler::as_Address(ArrayAddress adr) {
459 return Address::make_array(adr);
460 }
462 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
463 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
464 Label again;
466 li(tmp_reg1, counter_addr);
467 bind(again);
468 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
469 ll(tmp_reg2, tmp_reg1, 0);
470 addi(tmp_reg2, tmp_reg2, inc);
471 sc(tmp_reg2, tmp_reg1, 0);
472 beq(tmp_reg2, R0, again);
473 delayed()->nop();
474 }
476 int MacroAssembler::biased_locking_enter(Register lock_reg,
477 Register obj_reg,
478 Register swap_reg,
479 Register tmp_reg,
480 bool swap_reg_contains_mark,
481 Label& done,
482 Label* slow_case,
483 BiasedLockingCounters* counters) {
484 assert(UseBiasedLocking, "why call this otherwise?");
485 bool need_tmp_reg = false;
486 if (tmp_reg == noreg) {
487 need_tmp_reg = true;
488 tmp_reg = T9;
489 }
490 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
491 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
492 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
493 Address saved_mark_addr(lock_reg, 0);
495 // Biased locking
496 // See whether the lock is currently biased toward our thread and
497 // whether the epoch is still valid
498 // Note that the runtime guarantees sufficient alignment of JavaThread
499 // pointers to allow age to be placed into low bits
500 // First check to see whether biasing is even enabled for this object
501 Label cas_label;
502 int null_check_offset = -1;
503 if (!swap_reg_contains_mark) {
504 null_check_offset = offset();
505 ld_ptr(swap_reg, mark_addr);
506 }
508 if (need_tmp_reg) {
509 push(tmp_reg);
510 }
511 move(tmp_reg, swap_reg);
512 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
513 #ifdef _LP64
514 daddi(AT, R0, markOopDesc::biased_lock_pattern);
515 dsub(AT, AT, tmp_reg);
516 #else
517 addi(AT, R0, markOopDesc::biased_lock_pattern);
518 sub(AT, AT, tmp_reg);
519 #endif
520 if (need_tmp_reg) {
521 pop(tmp_reg);
522 }
524 bne(AT, R0, cas_label);
525 delayed()->nop();
528 // The bias pattern is present in the object's header. Need to check
529 // whether the bias owner and the epoch are both still current.
530 // Note that because there is no current thread register on MIPS we
531 // need to store off the mark word we read out of the object to
532 // avoid reloading it and needing to recheck invariants below. This
533 // store is unfortunate but it makes the overall code shorter and
534 // simpler.
535 st_ptr(swap_reg, saved_mark_addr);
536 if (need_tmp_reg) {
537 push(tmp_reg);
538 }
539 if (swap_reg_contains_mark) {
540 null_check_offset = offset();
541 }
542 load_prototype_header(tmp_reg, obj_reg);
543 xorr(tmp_reg, tmp_reg, swap_reg);
544 get_thread(swap_reg);
545 xorr(swap_reg, swap_reg, tmp_reg);
547 move(AT, ~((int) markOopDesc::age_mask_in_place));
548 andr(swap_reg, swap_reg, AT);
550 if (PrintBiasedLockingStatistics) {
551 Label L;
552 bne(swap_reg, R0, L);
553 delayed()->nop();
554 push(tmp_reg);
555 push(A0);
556 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
557 pop(A0);
558 pop(tmp_reg);
559 bind(L);
560 }
561 if (need_tmp_reg) {
562 pop(tmp_reg);
563 }
564 beq(swap_reg, R0, done);
565 delayed()->nop();
566 Label try_revoke_bias;
567 Label try_rebias;
569 // At this point we know that the header has the bias pattern and
570 // that we are not the bias owner in the current epoch. We need to
571 // figure out more details about the state of the header in order to
572 // know what operations can be legally performed on the object's
573 // header.
575 // If the low three bits in the xor result aren't clear, that means
576 // the prototype header is no longer biased and we have to revoke
577 // the bias on this object.
579 move(AT, markOopDesc::biased_lock_mask_in_place);
580 andr(AT, swap_reg, AT);
581 bne(AT, R0, try_revoke_bias);
582 delayed()->nop();
583 // Biasing is still enabled for this data type. See whether the
584 // epoch of the current bias is still valid, meaning that the epoch
585 // bits of the mark word are equal to the epoch bits of the
586 // prototype header. (Note that the prototype header's epoch bits
587 // only change at a safepoint.) If not, attempt to rebias the object
588 // toward the current thread. Note that we must be absolutely sure
589 // that the current epoch is invalid in order to do this because
590 // otherwise the manipulations it performs on the mark word are
591 // illegal.
593 move(AT, markOopDesc::epoch_mask_in_place);
594 andr(AT,swap_reg, AT);
595 bne(AT, R0, try_rebias);
596 delayed()->nop();
597 // The epoch of the current bias is still valid but we know nothing
598 // about the owner; it might be set or it might be clear. Try to
599 // acquire the bias of the object using an atomic operation. If this
600 // fails we will go in to the runtime to revoke the object's bias.
601 // Note that we first construct the presumed unbiased header so we
602 // don't accidentally blow away another thread's valid bias.
604 ld_ptr(swap_reg, saved_mark_addr);
606 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
607 andr(swap_reg, swap_reg, AT);
609 if (need_tmp_reg) {
610 push(tmp_reg);
611 }
612 get_thread(tmp_reg);
613 orr(tmp_reg, tmp_reg, swap_reg);
614 //if (os::is_MP()) {
615 // sync();
616 //}
617 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
618 if (need_tmp_reg) {
619 pop(tmp_reg);
620 }
621 // If the biasing toward our thread failed, this means that
622 // another thread succeeded in biasing it toward itself and we
623 // need to revoke that bias. The revocation will occur in the
624 // interpreter runtime in the slow case.
625 if (PrintBiasedLockingStatistics) {
626 Label L;
627 bne(AT, R0, L);
628 delayed()->nop();
629 push(tmp_reg);
630 push(A0);
631 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
632 pop(A0);
633 pop(tmp_reg);
634 bind(L);
635 }
636 if (slow_case != NULL) {
637 beq_far(AT, R0, *slow_case);
638 delayed()->nop();
639 }
640 b(done);
641 delayed()->nop();
643 bind(try_rebias);
644 // At this point we know the epoch has expired, meaning that the
645 // current "bias owner", if any, is actually invalid. Under these
646 // circumstances _only_, we are allowed to use the current header's
647 // value as the comparison value when doing the cas to acquire the
648 // bias in the current epoch. In other words, we allow transfer of
649 // the bias from one thread to another directly in this situation.
650 //
651 // FIXME: due to a lack of registers we currently blow away the age
652 // bits in this situation. Should attempt to preserve them.
653 if (need_tmp_reg) {
654 push(tmp_reg);
655 }
656 load_prototype_header(tmp_reg, obj_reg);
657 get_thread(swap_reg);
658 orr(tmp_reg, tmp_reg, swap_reg);
659 ld_ptr(swap_reg, saved_mark_addr);
661 //if (os::is_MP()) {
662 // sync();
663 //}
664 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
665 if (need_tmp_reg) {
666 pop(tmp_reg);
667 }
668 // If the biasing toward our thread failed, then another thread
669 // succeeded in biasing it toward itself and we need to revoke that
670 // bias. The revocation will occur in the runtime in the slow case.
671 if (PrintBiasedLockingStatistics) {
672 Label L;
673 bne(AT, R0, L);
674 delayed()->nop();
675 push(AT);
676 push(tmp_reg);
677 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
678 pop(tmp_reg);
679 pop(AT);
680 bind(L);
681 }
682 if (slow_case != NULL) {
683 beq_far(AT, R0, *slow_case);
684 delayed()->nop();
685 }
687 b(done);
688 delayed()->nop();
689 bind(try_revoke_bias);
690 // The prototype mark in the klass doesn't have the bias bit set any
691 // more, indicating that objects of this data type are not supposed
692 // to be biased any more. We are going to try to reset the mark of
693 // this object to the prototype value and fall through to the
694 // CAS-based locking scheme. Note that if our CAS fails, it means
695 // that another thread raced us for the privilege of revoking the
696 // bias of this particular object, so it's okay to continue in the
697 // normal locking code.
698 //
699 // FIXME: due to a lack of registers we currently blow away the age
700 // bits in this situation. Should attempt to preserve them.
701 ld_ptr(swap_reg, saved_mark_addr);
703 if (need_tmp_reg) {
704 push(tmp_reg);
705 }
706 load_prototype_header(tmp_reg, obj_reg);
707 //if (os::is_MP()) {
708 // lock();
709 //}
710 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
711 if (need_tmp_reg) {
712 pop(tmp_reg);
713 }
714 // Fall through to the normal CAS-based lock, because no matter what
715 // the result of the above CAS, some thread must have succeeded in
716 // removing the bias bit from the object's header.
717 if (PrintBiasedLockingStatistics) {
718 Label L;
719 bne(AT, R0, L);
720 delayed()->nop();
721 push(AT);
722 push(tmp_reg);
723 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
724 pop(tmp_reg);
725 pop(AT);
726 bind(L);
727 }
729 bind(cas_label);
730 return null_check_offset;
731 }
733 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
734 assert(UseBiasedLocking, "why call this otherwise?");
736 // Check for biased locking unlock case, which is a no-op
737 // Note: we do not have to check the thread ID for two reasons.
738 // First, the interpreter checks for IllegalMonitorStateException at
739 // a higher level. Second, if the bias was revoked while we held the
740 // lock, the object could not be rebiased toward another thread, so
741 // the bias bit would be clear.
742 #ifdef _LP64
743 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
744 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
745 daddi(AT, R0, markOopDesc::biased_lock_pattern);
746 #else
747 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
748 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
749 addi(AT, R0, markOopDesc::biased_lock_pattern);
750 #endif
752 beq(AT, temp_reg, done);
753 delayed()->nop();
754 }
756 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
757 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
758 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
759 Label L, E;
761 assert(number_of_arguments <= 4, "just check");
763 andi(AT, SP, 0xf);
764 beq(AT, R0, L);
765 delayed()->nop();
766 daddi(SP, SP, -8);
767 call(entry_point, relocInfo::runtime_call_type);
768 delayed()->nop();
769 daddi(SP, SP, 8);
770 b(E);
771 delayed()->nop();
773 bind(L);
774 call(entry_point, relocInfo::runtime_call_type);
775 delayed()->nop();
776 bind(E);
777 }
780 void MacroAssembler::jmp(address entry) {
781 patchable_set48(T9, (long)entry);
782 jr(T9);
783 }
785 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
786 switch (rtype) {
787 case relocInfo::runtime_call_type:
788 case relocInfo::none:
789 jmp(entry);
790 break;
791 default:
792 {
793 InstructionMark im(this);
794 relocate(rtype);
795 patchable_set48(T9, (long)entry);
796 jr(T9);
797 }
798 break;
799 }
800 }
802 void MacroAssembler::jmp_far(Label& L) {
803 if (L.is_bound()) {
804 address entry = target(L);
805 assert(entry != NULL, "jmp most probably wrong");
806 InstructionMark im(this);
808 relocate(relocInfo::internal_word_type);
809 patchable_set48(T9, (long)entry);
810 } else {
811 InstructionMark im(this);
812 L.add_patch_at(code(), locator());
814 relocate(relocInfo::internal_word_type);
815 patchable_set48(T9, (long)pc());
816 }
818 jr(T9);
819 nop();
820 }
821 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
822 int oop_index;
823 if (obj) {
824 oop_index = oop_recorder()->find_index(obj);
825 } else {
826 oop_index = oop_recorder()->allocate_metadata_index(obj);
827 }
828 relocate(metadata_Relocation::spec(oop_index));
829 patchable_set48(AT, (long)obj);
830 sd(AT, dst);
831 }
833 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
834 int oop_index;
835 if (obj) {
836 oop_index = oop_recorder()->find_index(obj);
837 } else {
838 oop_index = oop_recorder()->allocate_metadata_index(obj);
839 }
840 relocate(metadata_Relocation::spec(oop_index));
841 patchable_set48(dst, (long)obj);
842 }
844 void MacroAssembler::call(address entry) {
845 // c/c++ code assume T9 is entry point, so we just always move entry to t9
846 // maybe there is some more graceful method to handle this. FIXME
847 // For more info, see class NativeCall.
848 #ifndef _LP64
849 move(T9, (int)entry);
850 #else
851 patchable_set48(T9, (long)entry);
852 #endif
853 jalr(T9);
854 }
856 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
857 switch (rtype) {
858 case relocInfo::runtime_call_type:
859 case relocInfo::none:
860 call(entry);
861 break;
862 default:
863 {
864 InstructionMark im(this);
865 relocate(rtype);
866 call(entry);
867 }
868 break;
869 }
870 }
872 void MacroAssembler::call(address entry, RelocationHolder& rh)
873 {
874 switch (rh.type()) {
875 case relocInfo::runtime_call_type:
876 case relocInfo::none:
877 call(entry);
878 break;
879 default:
880 {
881 InstructionMark im(this);
882 relocate(rh);
883 call(entry);
884 }
885 break;
886 }
887 }
889 void MacroAssembler::ic_call(address entry) {
890 RelocationHolder rh = virtual_call_Relocation::spec(pc());
891 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
892 assert(entry != NULL, "call most probably wrong");
893 InstructionMark im(this);
894 relocate(rh);
895 patchable_call(entry);
896 }
898 void MacroAssembler::c2bool(Register r) {
899 Label L;
900 Assembler::beq(r, R0, L);
901 delayed()->nop();
902 move(r, 1);
903 bind(L);
904 }
906 #ifndef PRODUCT
907 extern "C" void findpc(intptr_t x);
908 #endif
910 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
911 // In order to get locks to work, we need to fake a in_VM state
912 JavaThread* thread = JavaThread::current();
913 JavaThreadState saved_state = thread->thread_state();
914 thread->set_thread_state(_thread_in_vm);
915 if (ShowMessageBoxOnError) {
916 JavaThread* thread = JavaThread::current();
917 JavaThreadState saved_state = thread->thread_state();
918 thread->set_thread_state(_thread_in_vm);
919 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
920 ttyLocker ttyl;
921 BytecodeCounter::print();
922 }
923 // To see where a verify_oop failed, get $ebx+40/X for this frame.
924 // This is the value of eip which points to where verify_oop will return.
925 if (os::message_box(msg, "Execution stopped, print registers?")) {
926 ttyLocker ttyl;
927 tty->print_cr("eip = 0x%08x", eip);
928 #ifndef PRODUCT
929 tty->cr();
930 findpc(eip);
931 tty->cr();
932 #endif
933 tty->print_cr("rax, = 0x%08x", rax);
934 tty->print_cr("rbx, = 0x%08x", rbx);
935 tty->print_cr("rcx = 0x%08x", rcx);
936 tty->print_cr("rdx = 0x%08x", rdx);
937 tty->print_cr("rdi = 0x%08x", rdi);
938 tty->print_cr("rsi = 0x%08x", rsi);
939 tty->print_cr("rbp, = 0x%08x", rbp);
940 tty->print_cr("rsp = 0x%08x", rsp);
941 BREAKPOINT;
942 }
943 } else {
944 ttyLocker ttyl;
945 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
946 assert(false, "DEBUG MESSAGE");
947 }
948 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
949 }
951 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
952 if ( ShowMessageBoxOnError ) {
953 JavaThreadState saved_state = JavaThread::current()->thread_state();
954 JavaThread::current()->set_thread_state(_thread_in_vm);
955 {
956 // In order to get locks work, we need to fake a in_VM state
957 ttyLocker ttyl;
958 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
959 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
960 BytecodeCounter::print();
961 }
963 // if (os::message_box(msg, "Execution stopped, print registers?"))
964 // regs->print(::tty);
965 }
966 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
967 }
968 else
969 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
970 }
973 void MacroAssembler::stop(const char* msg) {
974 li(A0, (long)msg);
975 #ifndef _LP64
976 //reserver space for argument. added by yjl 7/10/2005
977 addiu(SP, SP, - 1 * wordSize);
978 #endif
979 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
980 delayed()->nop();
981 #ifndef _LP64
982 //restore space for argument
983 addiu(SP, SP, 1 * wordSize);
984 #endif
985 brk(17);
986 }
988 void MacroAssembler::warn(const char* msg) {
989 #ifdef _LP64
990 pushad();
991 li(A0, (long)msg);
992 push(S2);
993 move(AT, -(StackAlignmentInBytes));
994 move(S2, SP); // use S2 as a sender SP holder
995 andr(SP, SP, AT); // align stack as required by ABI
996 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
997 delayed()->nop();
998 move(SP, S2); // use S2 as a sender SP holder
999 pop(S2);
1000 popad();
1001 #else
1002 pushad();
1003 addi(SP, SP, -4);
1004 sw(A0, SP, -1 * wordSize);
1005 li(A0, (long)msg);
1006 addi(SP, SP, -1 * wordSize);
1007 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1008 delayed()->nop();
1009 addi(SP, SP, 1 * wordSize);
1010 lw(A0, SP, -1 * wordSize);
1011 addi(SP, SP, 4);
1012 popad();
1013 #endif
1014 }
1016 void MacroAssembler::print_reg(Register reg) {
1017 /*
1018 char *s = getenv("PRINT_REG");
1019 if (s == NULL)
1020 return;
1021 if (strcmp(s, "1") != 0)
1022 return;
1023 */
1024 void * cur_pc = pc();
1025 pushad();
1026 NOT_LP64(push(FP);)
1028 li(A0, (long)reg->name());
1029 if (reg == SP)
1030 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
1031 else if (reg == A0)
1032 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
1033 else
1034 move(A1, reg);
1035 li(A2, (long)cur_pc);
1036 push(S2);
1037 move(AT, -(StackAlignmentInBytes));
1038 move(S2, SP); // use S2 as a sender SP holder
1039 andr(SP, SP, AT); // align stack as required by ABI
1040 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1041 delayed()->nop();
1042 move(SP, S2); // use S2 as a sender SP holder
1043 pop(S2);
1044 NOT_LP64(pop(FP);)
1045 popad();
1047 /*
1048 pushad();
1049 #ifdef _LP64
1050 if (reg == SP)
1051 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
1052 else
1053 move(A0, reg);
1054 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1055 delayed()->nop();
1056 #else
1057 push(FP);
1058 move(A0, reg);
1059 dsrl32(A1, reg, 0);
1060 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
1061 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1062 delayed()->nop();
1063 pop(FP);
1064 #endif
1065 popad();
1066 pushad();
1067 NOT_LP64(push(FP);)
1068 char b[50];
1069 sprintf((char *)b, " pc: %p\n",cur_pc);
1070 li(A0, (long)(char *)b);
1071 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1072 delayed()->nop();
1073 NOT_LP64(pop(FP);)
1074 popad();
1075 */
1076 }
1078 void MacroAssembler::print_reg(FloatRegister reg) {
1079 void * cur_pc = pc();
1080 pushad();
1081 NOT_LP64(push(FP);)
1082 li(A0, (long)reg->name());
1083 push(S2);
1084 move(AT, -(StackAlignmentInBytes));
1085 move(S2, SP); // use S2 as a sender SP holder
1086 andr(SP, SP, AT); // align stack as required by ABI
1087 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1088 delayed()->nop();
1089 move(SP, S2); // use S2 as a sender SP holder
1090 pop(S2);
1091 NOT_LP64(pop(FP);)
1092 popad();
1094 pushad();
1095 NOT_LP64(push(FP);)
1096 #if 1
1097 move(FP, SP);
1098 move(AT, -(StackAlignmentInBytes));
1099 andr(SP , SP , AT);
1100 mov_d(F12, reg);
1101 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1102 delayed()->nop();
1103 move(SP, FP);
1104 #else
1105 mov_s(F12, reg);
1106 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1107 //delayed()->nop();
1108 #endif
1109 NOT_LP64(pop(FP);)
1110 popad();
1112 #if 0
1113 pushad();
1114 NOT_LP64(push(FP);)
1115 char* b = new char[50];
1116 sprintf(b, " pc: %p\n", cur_pc);
1117 li(A0, (long)b);
1118 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1119 delayed()->nop();
1120 NOT_LP64(pop(FP);)
1121 popad();
1122 #endif
1123 }
1125 void MacroAssembler::increment(Register reg, int imm) {
1126 if (!imm) return;
1127 if (is_simm16(imm)) {
1128 #ifdef _LP64
1129 daddiu(reg, reg, imm);
1130 #else
1131 addiu(reg, reg, imm);
1132 #endif
1133 } else {
1134 move(AT, imm);
1135 #ifdef _LP64
1136 daddu(reg, reg, AT);
1137 #else
1138 addu(reg, reg, AT);
1139 #endif
1140 }
1141 }
1143 void MacroAssembler::decrement(Register reg, int imm) {
1144 increment(reg, -imm);
1145 }
1148 void MacroAssembler::call_VM(Register oop_result,
1149 address entry_point,
1150 bool check_exceptions) {
1151 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1152 }
1154 void MacroAssembler::call_VM(Register oop_result,
1155 address entry_point,
1156 Register arg_1,
1157 bool check_exceptions) {
1158 if (arg_1!=A1) move(A1, arg_1);
1159 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1160 }
1162 void MacroAssembler::call_VM(Register oop_result,
1163 address entry_point,
1164 Register arg_1,
1165 Register arg_2,
1166 bool check_exceptions) {
1167 if (arg_1!=A1) move(A1, arg_1);
1168 if (arg_2!=A2) move(A2, arg_2);
1169 assert(arg_2 != A1, "smashed argument");
1170 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1171 }
1173 void MacroAssembler::call_VM(Register oop_result,
1174 address entry_point,
1175 Register arg_1,
1176 Register arg_2,
1177 Register arg_3,
1178 bool check_exceptions) {
1179 if (arg_1!=A1) move(A1, arg_1);
1180 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1181 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1182 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1183 }
1185 void MacroAssembler::call_VM(Register oop_result,
1186 Register last_java_sp,
1187 address entry_point,
1188 int number_of_arguments,
1189 bool check_exceptions) {
1190 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1191 }
1193 void MacroAssembler::call_VM(Register oop_result,
1194 Register last_java_sp,
1195 address entry_point,
1196 Register arg_1,
1197 bool check_exceptions) {
1198 if (arg_1 != A1) move(A1, arg_1);
1199 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1200 }
1202 void MacroAssembler::call_VM(Register oop_result,
1203 Register last_java_sp,
1204 address entry_point,
1205 Register arg_1,
1206 Register arg_2,
1207 bool check_exceptions) {
1208 if (arg_1 != A1) move(A1, arg_1);
1209 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1210 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1211 }
1213 void MacroAssembler::call_VM(Register oop_result,
1214 Register last_java_sp,
1215 address entry_point,
1216 Register arg_1,
1217 Register arg_2,
1218 Register arg_3,
1219 bool check_exceptions) {
1220 if (arg_1 != A1) move(A1, arg_1);
1221 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1222 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1223 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1224 }
1226 void MacroAssembler::call_VM_base(Register oop_result,
1227 Register java_thread,
1228 Register last_java_sp,
1229 address entry_point,
1230 int number_of_arguments,
1231 bool check_exceptions) {
1233 address before_call_pc;
1234 // determine java_thread register
1235 if (!java_thread->is_valid()) {
1236 #ifndef OPT_THREAD
1237 java_thread = T2;
1238 get_thread(java_thread);
1239 #else
1240 java_thread = TREG;
1241 #endif
1242 }
1243 // determine last_java_sp register
1244 if (!last_java_sp->is_valid()) {
1245 last_java_sp = SP;
1246 }
1247 // debugging support
1248 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1249 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1250 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1251 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1253 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1255 // set last Java frame before call
1256 before_call_pc = (address)pc();
1257 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1259 // do the call
1260 move(A0, java_thread);
1261 call(entry_point, relocInfo::runtime_call_type);
1262 delayed()->nop();
1264 // restore the thread (cannot use the pushed argument since arguments
1265 // may be overwritten by C code generated by an optimizing compiler);
1266 // however can use the register value directly if it is callee saved.
1267 #ifndef OPT_THREAD
1268 get_thread(java_thread);
1269 #else
1270 #ifdef ASSERT
1271 {
1272 Label L;
1273 get_thread(AT);
1274 beq(java_thread, AT, L);
1275 delayed()->nop();
1276 stop("MacroAssembler::call_VM_base: TREG not callee saved?");
1277 bind(L);
1278 }
1279 #endif
1280 #endif
1282 // discard thread and arguments
1283 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1284 // reset last Java frame
1285 reset_last_Java_frame(java_thread, false, true);
1287 check_and_handle_popframe(java_thread);
1288 check_and_handle_earlyret(java_thread);
1289 if (check_exceptions) {
1290 // check for pending exceptions (java_thread is set upon return)
1291 Label L;
1292 #ifdef _LP64
1293 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1294 #else
1295 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1296 #endif
1297 beq(AT, R0, L);
1298 delayed()->nop();
1299 li(AT, before_call_pc);
1300 push(AT);
1301 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1302 delayed()->nop();
1303 bind(L);
1304 }
1306 // get oop result if there is one and reset the value in the thread
1307 if (oop_result->is_valid()) {
1308 #ifdef _LP64
1309 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1310 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1311 #else
1312 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1313 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1314 #endif
1315 verify_oop(oop_result);
1316 }
1317 }
1319 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1321 move(V0, SP);
1322 //we also reserve space for java_thread here
1323 #ifndef _LP64
1324 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1325 #endif
1326 move(AT, -(StackAlignmentInBytes));
1327 andr(SP, SP, AT);
1328 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1330 }
1332 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1333 call_VM_leaf_base(entry_point, number_of_arguments);
1334 }
1336 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1337 if (arg_0 != A0) move(A0, arg_0);
1338 call_VM_leaf(entry_point, 1);
1339 }
1341 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1342 if (arg_0 != A0) move(A0, arg_0);
1343 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1344 call_VM_leaf(entry_point, 2);
1345 }
1347 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1348 if (arg_0 != A0) move(A0, arg_0);
1349 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1350 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1351 call_VM_leaf(entry_point, 3);
1352 }
1353 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1354 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1355 }
1358 void MacroAssembler::super_call_VM_leaf(address entry_point,
1359 Register arg_1) {
1360 if (arg_1 != A0) move(A0, arg_1);
1361 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1362 }
1365 void MacroAssembler::super_call_VM_leaf(address entry_point,
1366 Register arg_1,
1367 Register arg_2) {
1368 if (arg_1 != A0) move(A0, arg_1);
1369 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1370 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1371 }
1372 void MacroAssembler::super_call_VM_leaf(address entry_point,
1373 Register arg_1,
1374 Register arg_2,
1375 Register arg_3) {
1376 if (arg_1 != A0) move(A0, arg_1);
1377 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1378 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1379 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1380 }
1382 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1383 }
1385 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1386 }
1388 void MacroAssembler::null_check(Register reg, int offset) {
1389 if (needs_explicit_null_check(offset)) {
1390 // provoke OS NULL exception if reg = NULL by
1391 // accessing M[reg] w/o changing any (non-CC) registers
1392 // NOTE: cmpl is plenty here to provoke a segv
1393 lw(AT, reg, 0);
1394 // Note: should probably use testl(rax, Address(reg, 0));
1395 // may be shorter code (however, this version of
1396 // testl needs to be implemented first)
1397 } else {
1398 // nothing to do, (later) access of M[reg + offset]
1399 // will provoke OS NULL exception if reg = NULL
1400 }
1401 }
1403 void MacroAssembler::enter() {
1404 push2(RA, FP);
1405 move(FP, SP);
1406 }
1408 void MacroAssembler::leave() {
1409 #ifndef _LP64
1410 //move(SP, FP);
1411 //pop2(FP, RA);
1412 addi(SP, FP, 2 * wordSize);
1413 lw(RA, SP, - 1 * wordSize);
1414 lw(FP, SP, - 2 * wordSize);
1415 #else
1416 daddi(SP, FP, 2 * wordSize);
1417 ld(RA, SP, - 1 * wordSize);
1418 ld(FP, SP, - 2 * wordSize);
1419 #endif
1420 }
1421 /*
1422 void MacroAssembler::os_breakpoint() {
1423 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1424 // (e.g., MSVC can't call ps() otherwise)
1425 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1426 }
1427 */
1428 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1429 // determine java_thread register
1430 if (!java_thread->is_valid()) {
1431 #ifndef OPT_THREAD
1432 java_thread = T1;
1433 get_thread(java_thread);
1434 #else
1435 java_thread = TREG;
1436 #endif
1437 }
1438 // we must set sp to zero to clear frame
1439 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1440 // must clear fp, so that compiled frames are not confused; it is possible
1441 // that we need it only for debugging
1442 if(clear_fp)
1443 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1445 if (clear_pc)
1446 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1447 }
1449 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1450 bool clear_pc) {
1451 Register thread = TREG;
1452 #ifndef OPT_THREAD
1453 get_thread(thread);
1454 #endif
1455 // we must set sp to zero to clear frame
1456 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1457 // must clear fp, so that compiled frames are not confused; it is
1458 // possible that we need it only for debugging
1459 if (clear_fp) {
1460 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1461 }
1463 if (clear_pc) {
1464 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1465 }
1466 }
1468 // Write serialization page so VM thread can do a pseudo remote membar.
1469 // We use the current thread pointer to calculate a thread specific
1470 // offset to write to within the page. This minimizes bus traffic
1471 // due to cache line collision.
1472 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1473 move(tmp, thread);
1474 srl(tmp, tmp,os::get_serialize_page_shift_count());
1475 move(AT, (os::vm_page_size() - sizeof(int)));
1476 andr(tmp, tmp,AT);
1477 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1478 }
1480 // Calls to C land
1481 //
1482 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1483 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1484 // has to be reset to 0. This is required to allow proper stack traversal.
1485 void MacroAssembler::set_last_Java_frame(Register java_thread,
1486 Register last_java_sp,
1487 Register last_java_fp,
1488 address last_java_pc) {
1489 // determine java_thread register
1490 if (!java_thread->is_valid()) {
1491 #ifndef OPT_THREAD
1492 java_thread = T2;
1493 get_thread(java_thread);
1494 #else
1495 java_thread = TREG;
1496 #endif
1497 }
1498 // determine last_java_sp register
1499 if (!last_java_sp->is_valid()) {
1500 last_java_sp = SP;
1501 }
1503 // last_java_fp is optional
1505 if (last_java_fp->is_valid()) {
1506 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1507 }
1509 // last_java_pc is optional
1511 if (last_java_pc != NULL) {
1512 relocate(relocInfo::internal_pc_type);
1513 patchable_set48(AT, (long)last_java_pc);
1514 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1515 }
1516 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1517 }
1519 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1520 Register last_java_fp,
1521 address last_java_pc) {
1522 // determine last_java_sp register
1523 if (!last_java_sp->is_valid()) {
1524 last_java_sp = SP;
1525 }
1527 Register thread = TREG;
1528 #ifndef OPT_THREAD
1529 get_thread(thread);
1530 #endif
1531 // last_java_fp is optional
1532 if (last_java_fp->is_valid()) {
1533 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1534 }
1536 // last_java_pc is optional
1537 if (last_java_pc != NULL) {
1538 Address java_pc(thread,
1539 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1540 li(AT, (intptr_t)(last_java_pc));
1541 sd(AT, java_pc);
1542 }
1544 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1545 }
1547 //////////////////////////////////////////////////////////////////////////////////
1548 #if INCLUDE_ALL_GCS
1550 void MacroAssembler::g1_write_barrier_pre(Register obj,
1551 Register pre_val,
1552 Register thread,
1553 Register tmp,
1554 bool tosca_live,
1555 bool expand_call) {
1557 // If expand_call is true then we expand the call_VM_leaf macro
1558 // directly to skip generating the check by
1559 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1561 #ifdef _LP64
1562 assert(thread == TREG, "must be");
1563 #endif // _LP64
1565 Label done;
1566 Label runtime;
1568 assert(pre_val != noreg, "check this code");
1570 if (obj != noreg) {
1571 assert_different_registers(obj, pre_val, tmp);
1572 assert(pre_val != V0, "check this code");
1573 }
1575 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1576 PtrQueue::byte_offset_of_active()));
1577 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1578 PtrQueue::byte_offset_of_index()));
1579 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1580 PtrQueue::byte_offset_of_buf()));
1583 // Is marking active?
1584 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1585 lw(AT, in_progress);
1586 } else {
1587 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1588 lb(AT, in_progress);
1589 }
1590 beq(AT, R0, done);
1591 nop();
1593 // Do we need to load the previous value?
1594 if (obj != noreg) {
1595 load_heap_oop(pre_val, Address(obj, 0));
1596 }
1598 // Is the previous value null?
1599 beq(pre_val, R0, done);
1600 nop();
1602 // Can we store original value in the thread's buffer?
1603 // Is index == 0?
1604 // (The index field is typed as size_t.)
1606 ld(tmp, index);
1607 beq(tmp, R0, runtime);
1608 nop();
1610 daddiu(tmp, tmp, -1 * wordSize);
1611 sd(tmp, index);
1612 ld(AT, buffer);
1613 daddu(tmp, tmp, AT);
1615 // Record the previous value
1616 sd(pre_val, tmp, 0);
1617 beq(R0, R0, done);
1618 nop();
1620 bind(runtime);
1621 // save the live input values
1622 if (tosca_live) push(V0);
1624 if (obj != noreg && obj != V0) push(obj);
1626 if (pre_val != V0) push(pre_val);
1628 // Calling the runtime using the regular call_VM_leaf mechanism generates
1629 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1630 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
1631 //
1632 // If we care generating the pre-barrier without a frame (e.g. in the
1633 // intrinsified Reference.get() routine) then ebp might be pointing to
1634 // the caller frame and so this check will most likely fail at runtime.
1635 //
1636 // Expanding the call directly bypasses the generation of the check.
1637 // So when we do not have have a full interpreter frame on the stack
1638 // expand_call should be passed true.
1640 NOT_LP64( push(thread); )
1642 if (expand_call) {
1643 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1644 if (thread != A1) move(A1, thread);
1645 if (pre_val != A0) move(A0, pre_val);
1646 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1647 } else {
1648 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1649 }
1651 NOT_LP64( pop(thread); )
1653 // save the live input values
1654 if (pre_val != V0)
1655 pop(pre_val);
1657 if (obj != noreg && obj != V0)
1658 pop(obj);
1660 if(tosca_live) pop(V0);
1662 bind(done);
1663 }
1665 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1666 Register new_val,
1667 Register thread,
1668 Register tmp,
1669 Register tmp2) {
1670 assert(tmp != AT, "must be");
1671 assert(tmp2 != AT, "must be");
1672 #ifdef _LP64
1673 assert(thread == TREG, "must be");
1674 #endif // _LP64
1676 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1677 PtrQueue::byte_offset_of_index()));
1678 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1679 PtrQueue::byte_offset_of_buf()));
1681 BarrierSet* bs = Universe::heap()->barrier_set();
1682 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1683 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1685 Label done;
1686 Label runtime;
1688 // Does store cross heap regions?
1689 xorr(AT, store_addr, new_val);
1690 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1691 beq(AT, R0, done);
1692 nop();
1695 // crosses regions, storing NULL?
1696 beq(new_val, R0, done);
1697 nop();
1699 // storing region crossing non-NULL, is card already dirty?
1700 const Register card_addr = tmp;
1701 const Register cardtable = tmp2;
1703 move(card_addr, store_addr);
1704 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1705 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1706 // a valid address and therefore is not properly handled by the relocation code.
1707 set64(cardtable, (intptr_t)ct->byte_map_base);
1708 daddu(card_addr, card_addr, cardtable);
1710 lb(AT, card_addr, 0);
1711 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1712 beq(AT, R0, done);
1713 nop();
1715 sync();
1716 lb(AT, card_addr, 0);
1717 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1718 beq(AT, R0, done);
1719 nop();
1722 // storing a region crossing, non-NULL oop, card is clean.
1723 // dirty card and log.
1724 move(AT, (int)CardTableModRefBS::dirty_card_val());
1725 sb(AT, card_addr, 0);
1727 lw(AT, queue_index);
1728 beq(AT, R0, runtime);
1729 nop();
1730 daddiu(AT, AT, -1 * wordSize);
1731 sw(AT, queue_index);
1732 ld(tmp2, buffer);
1733 #ifdef _LP64
1734 ld(AT, queue_index);
1735 daddu(tmp2, tmp2, AT);
1736 sd(card_addr, tmp2, 0);
1737 #else
1738 lw(AT, queue_index);
1739 addu32(tmp2, tmp2, AT);
1740 sw(card_addr, tmp2, 0);
1741 #endif
1742 beq(R0, R0, done);
1743 nop();
1745 bind(runtime);
1746 // save the live input values
1747 push(store_addr);
1748 push(new_val);
1749 #ifdef _LP64
1750 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1751 #else
1752 push(thread);
1753 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1754 pop(thread);
1755 #endif
1756 pop(new_val);
1757 pop(store_addr);
1759 bind(done);
1760 }
1762 #endif // INCLUDE_ALL_GCS
1763 //////////////////////////////////////////////////////////////////////////////////
1766 void MacroAssembler::store_check(Register obj) {
1767 // Does a store check for the oop in register obj. The content of
1768 // register obj is destroyed afterwards.
1769 store_check_part_1(obj);
1770 store_check_part_2(obj);
1771 }
1773 void MacroAssembler::store_check(Register obj, Address dst) {
1774 store_check(obj);
1775 }
1778 // split the store check operation so that other instructions can be scheduled inbetween
1779 void MacroAssembler::store_check_part_1(Register obj) {
1780 BarrierSet* bs = Universe::heap()->barrier_set();
1781 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1782 #ifdef _LP64
1783 dsrl(obj, obj, CardTableModRefBS::card_shift);
1784 #else
1785 shr(obj, CardTableModRefBS::card_shift);
1786 #endif
1787 }
1789 void MacroAssembler::store_check_part_2(Register obj) {
1790 BarrierSet* bs = Universe::heap()->barrier_set();
1791 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1792 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1793 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1795 set64(AT, (long)ct->byte_map_base);
1796 #ifdef _LP64
1797 dadd(AT, AT, obj);
1798 #else
1799 add(AT, AT, obj);
1800 #endif
1801 if (UseConcMarkSweepGC) sync();
1802 sb(R0, AT, 0);
1803 }
1805 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1806 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1807 Register t1, Register t2, Label& slow_case) {
1808 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1810 Register end = t2;
1811 #ifndef OPT_THREAD
1812 Register thread = t1;
1813 get_thread(thread);
1814 #else
1815 Register thread = TREG;
1816 #endif
1817 verify_tlab(t1, t2);//blows t1&t2
1819 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1821 if (var_size_in_bytes == NOREG) {
1822 // i dont think we need move con_size_in_bytes to a register first.
1823 // by yjl 8/17/2005
1824 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1825 addi(end, obj, con_size_in_bytes);
1826 } else {
1827 add(end, obj, var_size_in_bytes);
1828 }
1830 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1831 sltu(AT, AT, end);
1832 bne_far(AT, R0, slow_case);
1833 delayed()->nop();
1836 // update the tlab top pointer
1837 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1839 // recover var_size_in_bytes if necessary
1840 /*if (var_size_in_bytes == end) {
1841 sub(var_size_in_bytes, end, obj);
1842 }*/
1844 verify_tlab(t1, t2);
1845 }
1847 // Defines obj, preserves var_size_in_bytes
1848 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1849 Register t1, Register t2, Label& slow_case) {
1850 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1851 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1852 // No allocation in the shared eden.
1853 b_far(slow_case);
1854 delayed()->nop();
1855 } else {
1857 #ifndef _LP64
1858 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1859 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1860 #else
1861 Address heap_top(t1);
1862 li(t1, (long)Universe::heap()->top_addr());
1863 #endif
1864 ld_ptr(obj, heap_top);
1866 Register end = t2;
1867 Label retry;
1869 bind(retry);
1870 if (var_size_in_bytes == NOREG) {
1871 // i dont think we need move con_size_in_bytes to a register first.
1872 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1873 addi(end, obj, con_size_in_bytes);
1874 } else {
1875 add(end, obj, var_size_in_bytes);
1876 }
1877 // if end < obj then we wrapped around => object too long => slow case
1878 sltu(AT, end, obj);
1879 bne_far(AT, R0, slow_case);
1880 delayed()->nop();
1882 li(AT, (long)Universe::heap()->end_addr());
1883 sltu(AT, AT, end);
1884 bne_far(AT, R0, slow_case);
1885 delayed()->nop();
1886 // Compare obj with the top addr, and if still equal, store the new top addr in
1887 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1888 // it otherwise. Use lock prefix for atomicity on MPs.
1889 //if (os::is_MP()) {
1890 // sync();
1891 //}
1893 // if someone beat us on the allocation, try again, otherwise continue
1894 cmpxchg(end, heap_top, obj);
1895 beq_far(AT, R0, retry); //by yyq
1896 delayed()->nop();
1898 }
1899 }
1901 // C2 doesn't invoke this one.
1902 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1903 Register top = T0;
1904 Register t1 = T1;
1905 /* Jin: tlab_refill() is called in
1907 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
1909 In generate_code_for(), T2 has been assigned as a register(length), which is used
1910 after calling tlab_refill();
1911 Therefore, tlab_refill() should not use T2.
1913 Source:
1915 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
1916 at java.lang.System.arraycopy(Native Method)
1917 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
1918 at sun.misc.Resource.getBytes(Resource.java:117)
1919 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
1920 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
1921 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
1922 */
1923 Register t2 = T9;
1924 Register t3 = T3;
1925 Register thread_reg = T8;
1926 Label do_refill, discard_tlab;
1927 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1928 // No allocation in the shared eden.
1929 b(slow_case);
1930 delayed()->nop();
1931 }
1933 get_thread(thread_reg);
1935 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1936 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1938 // calculate amount of free space
1939 sub(t1, t1, top);
1940 shr(t1, LogHeapWordSize);
1942 // Retain tlab and allocate object in shared space if
1943 // the amount free in the tlab is too large to discard.
1944 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1945 slt(AT, t2, t1);
1946 beq(AT, R0, discard_tlab);
1947 delayed()->nop();
1949 // Retain
1951 #ifndef _LP64
1952 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1953 #else
1954 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1955 #endif
1956 add(t2, t2, AT);
1957 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1959 if (TLABStats) {
1960 // increment number of slow_allocations
1961 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1962 addiu(AT, AT, 1);
1963 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1964 }
1965 b(try_eden);
1966 delayed()->nop();
1968 bind(discard_tlab);
1969 if (TLABStats) {
1970 // increment number of refills
1971 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1972 addi(AT, AT, 1);
1973 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1974 // accumulate wastage -- t1 is amount free in tlab
1975 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1976 add(AT, AT, t1);
1977 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1978 }
1980 // if tlab is currently allocated (top or end != null) then
1981 // fill [top, end + alignment_reserve) with array object
1982 beq(top, R0, do_refill);
1983 delayed()->nop();
1985 // set up the mark word
1986 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1987 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1989 // set the length to the remaining space
1990 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1991 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1992 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1993 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1995 // set klass to intArrayKlass
1996 #ifndef _LP64
1997 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
1998 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
1999 #else
2000 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
2001 ld_ptr(t1, AT, 0);
2002 #endif
2003 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
2004 store_klass(top, t1);
2006 // refill the tlab with an eden allocation
2007 bind(do_refill);
2008 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2009 shl(t1, LogHeapWordSize);
2010 // add object_size ??
2011 eden_allocate(top, t1, 0, t2, t3, slow_case);
2013 // Check that t1 was preserved in eden_allocate.
2014 #ifdef ASSERT
2015 if (UseTLAB) {
2016 Label ok;
2017 assert_different_registers(thread_reg, t1);
2018 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2019 shl(AT, LogHeapWordSize);
2020 beq(AT, t1, ok);
2021 delayed()->nop();
2022 stop("assert(t1 != tlab size)");
2023 should_not_reach_here();
2025 bind(ok);
2026 }
2027 #endif
2028 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
2029 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2030 add(top, top, t1);
2031 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
2032 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2033 verify_tlab(t1, t2);
2034 b(retry);
2035 delayed()->nop();
2036 }
2038 static const double pi_4 = 0.7853981633974483;
2040 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
2041 // must get argument(a double) in F12/F13
2042 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
2043 //We need to preseve the register which maybe modified during the Call @Jerome
2044 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
2045 //save all modified register here
2046 // if (preserve_cpu_regs) {
2047 // }
2048 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
2049 pushad();
2050 //we should preserve the stack space before we call
2051 addi(SP, SP, -wordSize * 2);
2052 switch (trig){
2053 case 's' :
2054 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
2055 delayed()->nop();
2056 break;
2057 case 'c':
2058 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
2059 delayed()->nop();
2060 break;
2061 case 't':
2062 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
2063 delayed()->nop();
2064 break;
2065 default:assert (false, "bad intrinsic");
2066 break;
2068 }
2070 addi(SP, SP, wordSize * 2);
2071 popad();
2072 // if (preserve_cpu_regs) {
2073 // }
2074 }
2076 #ifdef _LP64
2077 void MacroAssembler::li(Register rd, long imm) {
2078 if (imm <= max_jint && imm >= min_jint) {
2079 li32(rd, (int)imm);
2080 } else if (julong(imm) <= 0xFFFFFFFF) {
2081 assert_not_delayed();
2082 // lui sign-extends, so we can't use that.
2083 ori(rd, R0, julong(imm) >> 16);
2084 dsll(rd, rd, 16);
2085 ori(rd, rd, split_low(imm));
2086 //aoqi_test
2087 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
2088 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2089 /* A 48-bit address */
2090 li48(rd, imm);
2091 } else {
2092 li64(rd, imm);
2093 }
2094 }
2095 #else
2096 void MacroAssembler::li(Register rd, long imm) {
2097 li32(rd, (int)imm);
2098 }
2099 #endif
2101 void MacroAssembler::li32(Register reg, int imm) {
2102 if (is_simm16(imm)) {
2103 /* Jin: for imm < 0, we should use addi instead of addiu.
2104 *
2105 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2106 *
2107 * 78 move [int:-1|I] [a0|I]
2108 * : daddi a0, zero, 0xffffffff (correct)
2109 * : daddiu a0, zero, 0xffffffff (incorrect)
2110 */
2111 if (imm >= 0)
2112 addiu(reg, R0, imm);
2113 else
2114 addi(reg, R0, imm);
2115 } else {
2116 lui(reg, split_low(imm >> 16));
2117 if (split_low(imm))
2118 ori(reg, reg, split_low(imm));
2119 }
2120 }
2122 #ifdef _LP64
2123 void MacroAssembler::set64(Register d, jlong value) {
2124 assert_not_delayed();
2126 int hi = (int)(value >> 32);
2127 int lo = (int)(value & ~0);
2129 if (value == lo) { // 32-bit integer
2130 if (is_simm16(value)) {
2131 daddiu(d, R0, value);
2132 } else {
2133 lui(d, split_low(value >> 16));
2134 if (split_low(value)) {
2135 ori(d, d, split_low(value));
2136 }
2137 }
2138 } else if (hi == 0) { // hardware zero-extends to upper 32
2139 ori(d, R0, julong(value) >> 16);
2140 dsll(d, d, 16);
2141 if (split_low(value)) {
2142 ori(d, d, split_low(value));
2143 }
2144 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2145 // 4 insts
2146 li48(d, value);
2147 } else { // li64
2148 // 6 insts
2149 li64(d, value);
2150 }
2151 }
2154 int MacroAssembler::insts_for_set64(jlong value) {
2155 int hi = (int)(value >> 32);
2156 int lo = (int)(value & ~0);
2158 int count = 0;
2160 if (value == lo) { // 32-bit integer
2161 if (is_simm16(value)) {
2162 //daddiu(d, R0, value);
2163 count++;
2164 } else {
2165 //lui(d, split_low(value >> 16));
2166 count++;
2167 if (split_low(value)) {
2168 //ori(d, d, split_low(value));
2169 count++;
2170 }
2171 }
2172 } else if (hi == 0) { // hardware zero-extends to upper 32
2173 //ori(d, R0, julong(value) >> 16);
2174 //dsll(d, d, 16);
2175 count += 2;
2176 if (split_low(value)) {
2177 //ori(d, d, split_low(value));
2178 count++;
2179 }
2180 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2181 // 4 insts
2182 //li48(d, value);
2183 count += 4;
2184 } else { // li64
2185 // 6 insts
2186 //li64(d, value);
2187 count += 6;
2188 }
2190 return count;
2191 }
2193 void MacroAssembler::patchable_set48(Register d, jlong value) {
2194 assert_not_delayed();
2196 int hi = (int)(value >> 32);
2197 int lo = (int)(value & ~0);
2199 int count = 0;
2201 if (value == lo) { // 32-bit integer
2202 if (is_simm16(value)) {
2203 daddiu(d, R0, value);
2204 count += 1;
2205 } else {
2206 lui(d, split_low(value >> 16));
2207 count += 1;
2208 if (split_low(value)) {
2209 ori(d, d, split_low(value));
2210 count += 1;
2211 }
2212 }
2213 } else if (hi == 0) { // hardware zero-extends to upper 32
2214 ori(d, R0, julong(value) >> 16);
2215 dsll(d, d, 16);
2216 count += 2;
2217 if (split_low(value)) {
2218 ori(d, d, split_low(value));
2219 count += 1;
2220 }
2221 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2222 // 4 insts
2223 li48(d, value);
2224 count += 4;
2225 } else { // li64
2226 tty->print_cr("value = 0x%x", value);
2227 guarantee(false, "Not supported yet !");
2228 }
2230 for (count; count < 4; count++) {
2231 nop();
2232 }
2233 }
2235 void MacroAssembler::patchable_set32(Register d, jlong value) {
2236 assert_not_delayed();
2238 int hi = (int)(value >> 32);
2239 int lo = (int)(value & ~0);
2241 int count = 0;
2243 if (value == lo) { // 32-bit integer
2244 if (is_simm16(value)) {
2245 daddiu(d, R0, value);
2246 count += 1;
2247 } else {
2248 lui(d, split_low(value >> 16));
2249 count += 1;
2250 if (split_low(value)) {
2251 ori(d, d, split_low(value));
2252 count += 1;
2253 }
2254 }
2255 } else if (hi == 0) { // hardware zero-extends to upper 32
2256 ori(d, R0, julong(value) >> 16);
2257 dsll(d, d, 16);
2258 count += 2;
2259 if (split_low(value)) {
2260 ori(d, d, split_low(value));
2261 count += 1;
2262 }
2263 } else {
2264 tty->print_cr("value = 0x%x", value);
2265 guarantee(false, "Not supported yet !");
2266 }
2268 for (count; count < 3; count++) {
2269 nop();
2270 }
2271 }
2273 void MacroAssembler::patchable_call32(Register d, jlong value) {
2274 assert_not_delayed();
2276 int hi = (int)(value >> 32);
2277 int lo = (int)(value & ~0);
2279 int count = 0;
2281 if (value == lo) { // 32-bit integer
2282 if (is_simm16(value)) {
2283 daddiu(d, R0, value);
2284 count += 1;
2285 } else {
2286 lui(d, split_low(value >> 16));
2287 count += 1;
2288 if (split_low(value)) {
2289 ori(d, d, split_low(value));
2290 count += 1;
2291 }
2292 }
2293 } else {
2294 tty->print_cr("value = 0x%x", value);
2295 guarantee(false, "Not supported yet !");
2296 }
2298 for (count; count < 2; count++) {
2299 nop();
2300 }
2301 }
2303 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2304 assert(UseCompressedClassPointers, "should only be used for compressed header");
2305 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2307 int klass_index = oop_recorder()->find_index(k);
2308 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2309 long narrowKlass = (long)Klass::encode_klass(k);
2311 relocate(rspec, Assembler::narrow_oop_operand);
2312 patchable_set48(dst, narrowKlass);
2313 }
2316 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2317 assert(UseCompressedOops, "should only be used for compressed header");
2318 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2320 int oop_index = oop_recorder()->find_index(obj);
2321 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2323 relocate(rspec, Assembler::narrow_oop_operand);
2324 patchable_set48(dst, oop_index);
2325 }
2327 void MacroAssembler::li64(Register rd, long imm) {
2328 assert_not_delayed();
2329 lui(rd, imm >> 48);
2330 ori(rd, rd, split_low(imm >> 32));
2331 dsll(rd, rd, 16);
2332 ori(rd, rd, split_low(imm >> 16));
2333 dsll(rd, rd, 16);
2334 ori(rd, rd, split_low(imm));
2335 }
2337 void MacroAssembler::li48(Register rd, long imm) {
2338 assert_not_delayed();
2339 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2340 lui(rd, imm >> 32);
2341 ori(rd, rd, split_low(imm >> 16));
2342 dsll(rd, rd, 16);
2343 ori(rd, rd, split_low(imm));
2344 }
2345 #endif
2346 // NOTE: i dont push eax as i486.
2347 // the x86 save eax for it use eax as the jump register
2348 void MacroAssembler::verify_oop(Register reg, const char* s) {
2349 /*
2350 if (!VerifyOops) return;
2352 // Pass register number to verify_oop_subroutine
2353 char* b = new char[strlen(s) + 50];
2354 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2355 push(rax); // save rax,
2356 push(reg); // pass register argument
2357 ExternalAddress buffer((address) b);
2358 // avoid using pushptr, as it modifies scratch registers
2359 // and our contract is not to modify anything
2360 movptr(rax, buffer.addr());
2361 push(rax);
2362 // call indirectly to solve generation ordering problem
2363 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2364 call(rax);
2365 */
2366 if (!VerifyOops) return;
2367 const char * b = NULL;
2368 stringStream ss;
2369 ss.print("verify_oop: %s: %s", reg->name(), s);
2370 b = code_string(ss.as_string());
2371 #ifdef _LP64
2372 pushad();
2373 move(A1, reg);
2374 li(A0, (long)b);
2375 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2376 ld(T9, AT, 0);
2377 jalr(T9);
2378 delayed()->nop();
2379 popad();
2380 #else
2381 // Pass register number to verify_oop_subroutine
2382 sw(T0, SP, - wordSize);
2383 sw(T1, SP, - 2*wordSize);
2384 sw(RA, SP, - 3*wordSize);
2385 sw(A0, SP ,- 4*wordSize);
2386 sw(A1, SP ,- 5*wordSize);
2387 sw(AT, SP ,- 6*wordSize);
2388 sw(T9, SP ,- 7*wordSize);
2389 addiu(SP, SP, - 7 * wordSize);
2390 move(A1, reg);
2391 li(A0, (long)b);
2392 // call indirectly to solve generation ordering problem
2393 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2394 lw(T9, AT, 0);
2395 jalr(T9);
2396 delayed()->nop();
2397 lw(T0, SP, 6* wordSize);
2398 lw(T1, SP, 5* wordSize);
2399 lw(RA, SP, 4* wordSize);
2400 lw(A0, SP, 3* wordSize);
2401 lw(A1, SP, 2* wordSize);
2402 lw(AT, SP, 1* wordSize);
2403 lw(T9, SP, 0* wordSize);
2404 addiu(SP, SP, 7 * wordSize);
2405 #endif
2406 }
2409 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2410 if (!VerifyOops) {
2411 nop();
2412 return;
2413 }
2414 // Pass register number to verify_oop_subroutine
2415 const char * b = NULL;
2416 stringStream ss;
2417 ss.print("verify_oop_addr: %s", s);
2418 b = code_string(ss.as_string());
2420 st_ptr(T0, SP, - wordSize);
2421 st_ptr(T1, SP, - 2*wordSize);
2422 st_ptr(RA, SP, - 3*wordSize);
2423 st_ptr(A0, SP, - 4*wordSize);
2424 st_ptr(A1, SP, - 5*wordSize);
2425 st_ptr(AT, SP, - 6*wordSize);
2426 st_ptr(T9, SP, - 7*wordSize);
2427 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2428 addiu(SP, SP, - 7 * wordSize);
2430 li(A0, (long)b);
2431 // call indirectly to solve generation ordering problem
2432 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2433 ld_ptr(T9, AT, 0);
2434 jalr(T9);
2435 delayed()->nop();
2436 ld_ptr(T0, SP, 6* wordSize);
2437 ld_ptr(T1, SP, 5* wordSize);
2438 ld_ptr(RA, SP, 4* wordSize);
2439 ld_ptr(A0, SP, 3* wordSize);
2440 ld_ptr(A1, SP, 2* wordSize);
2441 ld_ptr(AT, SP, 1* wordSize);
2442 ld_ptr(T9, SP, 0* wordSize);
2443 addiu(SP, SP, 7 * wordSize);
2444 }
2446 // used registers : T0, T1
2447 void MacroAssembler::verify_oop_subroutine() {
2448 // RA: ra
2449 // A0: char* error message
2450 // A1: oop object to verify
2452 Label exit, error;
2453 // increment counter
2454 li(T0, (long)StubRoutines::verify_oop_count_addr());
2455 lw(AT, T0, 0);
2456 #ifdef _LP64
2457 daddi(AT, AT, 1);
2458 #else
2459 addi(AT, AT, 1);
2460 #endif
2461 sw(AT, T0, 0);
2463 // make sure object is 'reasonable'
2464 beq(A1, R0, exit); // if obj is NULL it is ok
2465 delayed()->nop();
2467 // Check if the oop is in the right area of memory
2468 //const int oop_mask = Universe::verify_oop_mask();
2469 //const int oop_bits = Universe::verify_oop_bits();
2470 const uintptr_t oop_mask = Universe::verify_oop_mask();
2471 const uintptr_t oop_bits = Universe::verify_oop_bits();
2472 li(AT, oop_mask);
2473 andr(T0, A1, AT);
2474 li(AT, oop_bits);
2475 bne(T0, AT, error);
2476 delayed()->nop();
2478 // make sure klass is 'reasonable'
2479 //add for compressedoops
2480 reinit_heapbase();
2481 //add for compressedoops
2482 load_klass(T0, A1);
2483 beq(T0, R0, error); // if klass is NULL it is broken
2484 delayed()->nop();
2485 #if 0
2486 //FIXME:wuhui.
2487 // Check if the klass is in the right area of memory
2488 //const int klass_mask = Universe::verify_klass_mask();
2489 //const int klass_bits = Universe::verify_klass_bits();
2490 const uintptr_t klass_mask = Universe::verify_klass_mask();
2491 const uintptr_t klass_bits = Universe::verify_klass_bits();
2493 li(AT, klass_mask);
2494 andr(T1, T0, AT);
2495 li(AT, klass_bits);
2496 bne(T1, AT, error);
2497 delayed()->nop();
2498 // make sure klass' klass is 'reasonable'
2499 //add for compressedoops
2500 load_klass(T0, T0);
2501 beq(T0, R0, error); // if klass' klass is NULL it is broken
2502 delayed()->nop();
2504 li(AT, klass_mask);
2505 andr(T1, T0, AT);
2506 li(AT, klass_bits);
2507 bne(T1, AT, error);
2508 delayed()->nop(); // if klass not in right area of memory it is broken too.
2509 #endif
2510 // return if everything seems ok
2511 bind(exit);
2513 jr(RA);
2514 delayed()->nop();
2516 // handle errors
2517 bind(error);
2518 pushad();
2519 #ifndef _LP64
2520 addi(SP, SP, (-1) * wordSize);
2521 #endif
2522 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2523 delayed()->nop();
2524 #ifndef _LP64
2525 addiu(SP, SP, 1 * wordSize);
2526 #endif
2527 popad();
2528 jr(RA);
2529 delayed()->nop();
2530 }
2532 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2533 #ifdef ASSERT
2534 assert_different_registers(t1, t2, AT);
2535 if (UseTLAB && VerifyOops) {
2536 Label next, ok;
2538 get_thread(t1);
2540 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2541 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2542 sltu(AT, t2, AT);
2543 beq(AT, R0, next);
2544 delayed()->nop();
2546 stop("assert(top >= start)");
2548 bind(next);
2549 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2550 sltu(AT, AT, t2);
2551 beq(AT, R0, ok);
2552 delayed()->nop();
2554 stop("assert(top <= end)");
2556 bind(ok);
2558 }
2559 #endif
2560 }
2561 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2562 Register tmp,
2563 int offset) {
2564 intptr_t value = *delayed_value_addr;
2565 if (value != 0)
2566 return RegisterOrConstant(value + offset);
2567 AddressLiteral a(delayed_value_addr);
2568 // load indirectly to solve generation ordering problem
2569 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2570 //ld(tmp, a);
2571 if (offset != 0)
2572 daddi(tmp,tmp, offset);
2574 return RegisterOrConstant(tmp);
2575 }
2577 void MacroAssembler::hswap(Register reg) {
2578 //short
2579 //andi(reg, reg, 0xffff);
2580 srl(AT, reg, 8);
2581 sll(reg, reg, 24);
2582 sra(reg, reg, 16);
2583 orr(reg, reg, AT);
2584 }
2586 void MacroAssembler::huswap(Register reg) {
2587 #ifdef _LP64
2588 dsrl(AT, reg, 8);
2589 dsll(reg, reg, 24);
2590 dsrl(reg, reg, 16);
2591 orr(reg, reg, AT);
2592 andi(reg, reg, 0xffff);
2593 #else
2594 //andi(reg, reg, 0xffff);
2595 srl(AT, reg, 8);
2596 sll(reg, reg, 24);
2597 srl(reg, reg, 16);
2598 orr(reg, reg, AT);
2599 #endif
2600 }
2602 // something funny to do this will only one more register AT
2603 // 32 bits
2604 void MacroAssembler::swap(Register reg) {
2605 srl(AT, reg, 8);
2606 sll(reg, reg, 24);
2607 orr(reg, reg, AT);
2608 //reg : 4 1 2 3
2609 srl(AT, AT, 16);
2610 xorr(AT, AT, reg);
2611 andi(AT, AT, 0xff);
2612 //AT : 0 0 0 1^3);
2613 xorr(reg, reg, AT);
2614 //reg : 4 1 2 1
2615 sll(AT, AT, 16);
2616 xorr(reg, reg, AT);
2617 //reg : 4 3 2 1
2618 }
2620 #ifdef _LP64
2622 /* do 32-bit CAS using MIPS64 lld/scd
2624 Jin: cas_int should only compare 32-bits of the memory value.
2625 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2626 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2627 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2628 plus the high-32 bits or memory value, are stored togethor with SCD.
2630 Example:
2632 double d = 3.1415926;
2633 System.err.println("hello" + d);
2635 sun.misc.FloatingDecimal$1.<init>()
2636 |
2637 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2639 38 cas_int [a7a7|J] [a0|I] [a6|I]
2640 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2641 // a6: 0x4ab325aa
2643 again:
2644 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2646 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2647 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2648 0x00000055647f3c68: dsll32 t8, t8, 0
2649 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2650 0x00000055647f3c70: sll zero, zero, 0
2652 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2653 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2654 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2655 0x00000055647f3c80: and v1, a6, v1
2656 0x00000055647f3c84: or at, t8, v1
2657 0x00000055647f3c88: scd at, 0x0(a7)
2658 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2659 0x00000055647f3c90: sll zero, zero, 0
2660 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2661 0x00000055647f3c98: sll zero, zero, 0
2662 nequal:
2663 0x00000055647f45a4: dadd a0, t9, zero
2664 0x00000055647f45a8: dadd at, zero, zero
2665 done:
2666 */
2668 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2669 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
2670 Label done, again, nequal;
2672 bind(again);
2674 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2675 ll(AT, dest);
2676 bne(AT, c_reg, nequal);
2677 delayed()->nop();
2679 move(AT, x_reg);
2680 sc(AT, dest);
2681 beq(AT, R0, again);
2682 delayed()->nop();
2683 b(done);
2684 delayed()->nop();
2686 // not xchged
2687 bind(nequal);
2688 sync();
2689 move(c_reg, AT);
2690 move(AT, R0);
2692 bind(done);
2693 }
2694 #endif // cmpxchg32
2696 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2697 Label done, again, nequal;
2699 bind(again);
2700 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2701 #ifdef _LP64
2702 lld(AT, dest);
2703 #else
2704 ll(AT, dest);
2705 #endif
2706 bne(AT, c_reg, nequal);
2707 delayed()->nop();
2709 move(AT, x_reg);
2710 #ifdef _LP64
2711 scd(AT, dest);
2712 #else
2713 sc(AT, dest);
2714 #endif
2715 beq(AT, R0, again);
2716 delayed()->nop();
2717 b(done);
2718 delayed()->nop();
2720 // not xchged
2721 bind(nequal);
2722 sync();
2723 move(c_reg, AT);
2724 move(AT, R0);
2726 bind(done);
2727 }
2729 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2730 Label done, again, nequal;
2732 Register x_reg = x_regLo;
2733 dsll32(x_regHi, x_regHi, 0);
2734 dsll32(x_regLo, x_regLo, 0);
2735 dsrl32(x_regLo, x_regLo, 0);
2736 orr(x_reg, x_regLo, x_regHi);
2738 Register c_reg = c_regLo;
2739 dsll32(c_regHi, c_regHi, 0);
2740 dsll32(c_regLo, c_regLo, 0);
2741 dsrl32(c_regLo, c_regLo, 0);
2742 orr(c_reg, c_regLo, c_regHi);
2744 bind(again);
2746 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2747 lld(AT, dest);
2748 bne(AT, c_reg, nequal);
2749 delayed()->nop();
2751 //move(AT, x_reg);
2752 dadd(AT, x_reg, R0);
2753 scd(AT, dest);
2754 beq(AT, R0, again);
2755 delayed()->nop();
2756 b(done);
2757 delayed()->nop();
2759 // not xchged
2760 bind(nequal);
2761 sync();
2762 //move(c_reg, AT);
2763 //move(AT, R0);
2764 dadd(c_reg, AT, R0);
2765 dadd(AT, R0, R0);
2766 bind(done);
2767 }
2769 // be sure the three register is different
2770 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2771 assert_different_registers(tmp, fs, ft);
2772 div_s(tmp, fs, ft);
2773 trunc_l_s(tmp, tmp);
2774 cvt_s_l(tmp, tmp);
2775 mul_s(tmp, tmp, ft);
2776 sub_s(fd, fs, tmp);
2777 }
2779 // be sure the three register is different
2780 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2781 assert_different_registers(tmp, fs, ft);
2782 div_d(tmp, fs, ft);
2783 trunc_l_d(tmp, tmp);
2784 cvt_d_l(tmp, tmp);
2785 mul_d(tmp, tmp, ft);
2786 sub_d(fd, fs, tmp);
2787 }
2789 // Fast_Lock and Fast_Unlock used by C2
2791 // Because the transitions from emitted code to the runtime
2792 // monitorenter/exit helper stubs are so slow it's critical that
2793 // we inline both the stack-locking fast-path and the inflated fast path.
2794 //
2795 // See also: cmpFastLock and cmpFastUnlock.
2796 //
2797 // What follows is a specialized inline transliteration of the code
2798 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2799 // another option would be to emit TrySlowEnter and TrySlowExit methods
2800 // at startup-time. These methods would accept arguments as
2801 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2802 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2803 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2804 // In practice, however, the # of lock sites is bounded and is usually small.
2805 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2806 // if the processor uses simple bimodal branch predictors keyed by EIP
2807 // Since the helper routines would be called from multiple synchronization
2808 // sites.
2809 //
2810 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2811 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2812 // to those specialized methods. That'd give us a mostly platform-independent
2813 // implementation that the JITs could optimize and inline at their pleasure.
2814 // Done correctly, the only time we'd need to cross to native could would be
2815 // to park() or unpark() threads. We'd also need a few more unsafe operators
2816 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2817 // (b) explicit barriers or fence operations.
2818 //
2819 // TODO:
2820 //
2821 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2822 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2823 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2824 // the lock operators would typically be faster than reifying Self.
2825 //
2826 // * Ideally I'd define the primitives as:
2827 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2828 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2829 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2830 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2831 // Furthermore the register assignments are overconstrained, possibly resulting in
2832 // sub-optimal code near the synchronization site.
2833 //
2834 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2835 // Alternately, use a better sp-proximity test.
2836 //
2837 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2838 // Either one is sufficient to uniquely identify a thread.
2839 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2840 //
2841 // * Intrinsify notify() and notifyAll() for the common cases where the
2842 // object is locked by the calling thread but the waitlist is empty.
2843 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2844 //
2845 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2846 // But beware of excessive branch density on AMD Opterons.
2847 //
2848 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2849 // or failure of the fast-path. If the fast-path fails then we pass
2850 // control to the slow-path, typically in C. In Fast_Lock and
2851 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2852 // will emit a conditional branch immediately after the node.
2853 // So we have branches to branches and lots of ICC.ZF games.
2854 // Instead, it might be better to have C2 pass a "FailureLabel"
2855 // into Fast_Lock and Fast_Unlock. In the case of success, control
2856 // will drop through the node. ICC.ZF is undefined at exit.
2857 // In the case of failure, the node will branch directly to the
2858 // FailureLabel
2861 // obj: object to lock
2862 // box: on-stack box address (displaced header location) - KILLED
2863 // rax,: tmp -- KILLED
2864 // scr: tmp -- KILLED
2865 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2867 // Ensure the register assignents are disjoint
2868 guarantee (objReg != boxReg, "") ;
2869 guarantee (objReg != tmpReg, "") ;
2870 guarantee (objReg != scrReg, "") ;
2871 guarantee (boxReg != tmpReg, "") ;
2872 guarantee (boxReg != scrReg, "") ;
2875 block_comment("FastLock");
2876 /*
2877 move(AT, 0x0);
2878 return;
2879 */
2880 if (PrintBiasedLockingStatistics) {
2881 push(tmpReg);
2882 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2883 pop(tmpReg);
2884 }
2886 if (EmitSync & 1) {
2887 move(AT, 0x0);
2888 return;
2889 } else
2890 if (EmitSync & 2) {
2891 Label DONE_LABEL ;
2892 if (UseBiasedLocking) {
2893 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2894 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2895 }
2897 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2898 ori(tmpReg, tmpReg, 0x1);
2899 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2901 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2902 bne(AT, R0, DONE_LABEL);
2903 delayed()->nop();
2905 // Recursive locking
2906 dsubu(tmpReg, tmpReg, SP);
2907 li(AT, (7 - os::vm_page_size() ));
2908 andr(tmpReg, tmpReg, AT);
2909 sd(tmpReg, Address(boxReg, 0));
2910 bind(DONE_LABEL) ;
2911 } else {
2912 // Possible cases that we'll encounter in fast_lock
2913 // ------------------------------------------------
2914 // * Inflated
2915 // -- unlocked
2916 // -- Locked
2917 // = by self
2918 // = by other
2919 // * biased
2920 // -- by Self
2921 // -- by other
2922 // * neutral
2923 // * stack-locked
2924 // -- by self
2925 // = sp-proximity test hits
2926 // = sp-proximity test generates false-negative
2927 // -- by other
2928 //
2930 Label IsInflated, DONE_LABEL, PopDone ;
2932 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2933 // order to reduce the number of conditional branches in the most common cases.
2934 // Beware -- there's a subtle invariant that fetch of the markword
2935 // at [FETCH], below, will never observe a biased encoding (*101b).
2936 // If this invariant is not held we risk exclusion (safety) failure.
2937 if (UseBiasedLocking && !UseOptoBiasInlining) {
2938 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2939 }
2941 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2942 andi(AT, tmpReg, markOopDesc::monitor_value);
2943 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2944 delayed()->nop();
2946 // Attempt stack-locking ...
2947 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2948 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2949 //if (os::is_MP()) {
2950 // sync();
2951 //}
2953 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2954 //AT == 1: unlocked
2956 if (PrintBiasedLockingStatistics) {
2957 Label L;
2958 beq(AT, R0, L);
2959 delayed()->nop();
2960 push(T0);
2961 push(T1);
2962 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2963 pop(T1);
2964 pop(T0);
2965 bind(L);
2966 }
2967 bne(AT, R0, DONE_LABEL);
2968 delayed()->nop();
2970 // Recursive locking
2971 // The object is stack-locked: markword contains stack pointer to BasicLock.
2972 // Locked by current thread if difference with current SP is less than one page.
2973 dsubu(tmpReg, tmpReg, SP);
2974 li(AT, 7 - os::vm_page_size() );
2975 andr(tmpReg, tmpReg, AT);
2976 sd(tmpReg, Address(boxReg, 0));
2977 if (PrintBiasedLockingStatistics) {
2978 Label L;
2979 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2980 bne(tmpReg, R0, L);
2981 delayed()->nop();
2982 push(T0);
2983 push(T1);
2984 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2985 pop(T1);
2986 pop(T0);
2987 bind(L);
2988 }
2989 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
2991 b(DONE_LABEL) ;
2992 delayed()->nop();
2994 bind(IsInflated) ;
2995 // The object's monitor m is unlocked iff m->owner == NULL,
2996 // otherwise m->owner may contain a thread or a stack address.
2998 // TODO: someday avoid the ST-before-CAS penalty by
2999 // relocating (deferring) the following ST.
3000 // We should also think about trying a CAS without having
3001 // fetched _owner. If the CAS is successful we may
3002 // avoid an RTO->RTS upgrade on the $line.
3003 // Without cast to int32_t a movptr will destroy r10 which is typically obj
3004 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3005 sd(AT, Address(boxReg, 0));
3007 move(boxReg, tmpReg) ;
3008 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3009 // if (m->owner != 0) => AT = 0, goto slow path.
3010 move(AT, R0);
3011 bne(tmpReg, R0, DONE_LABEL);
3012 delayed()->nop();
3014 #ifndef OPT_THREAD
3015 get_thread (TREG) ;
3016 #endif
3017 // It's inflated and appears unlocked
3018 //if (os::is_MP()) {
3019 // sync();
3020 //}
3021 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
3022 // Intentional fall-through into DONE_LABEL ...
3025 // DONE_LABEL is a hot target - we'd really like to place it at the
3026 // start of cache line by padding with NOPs.
3027 // See the AMD and Intel software optimization manuals for the
3028 // most efficient "long" NOP encodings.
3029 // Unfortunately none of our alignment mechanisms suffice.
3030 bind(DONE_LABEL);
3032 // At DONE_LABEL the AT is set as follows ...
3033 // Fast_Unlock uses the same protocol.
3034 // AT == 1 -> Success
3035 // AT == 0 -> Failure - force control through the slow-path
3037 // Avoid branch-to-branch on AMD processors
3038 // This appears to be superstition.
3039 if (EmitSync & 32) nop() ;
3041 }
3042 }
3044 // obj: object to unlock
3045 // box: box address (displaced header location), killed. Must be EAX.
3046 // rbx,: killed tmp; cannot be obj nor box.
3047 //
3048 // Some commentary on balanced locking:
3049 //
3050 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3051 // Methods that don't have provably balanced locking are forced to run in the
3052 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3053 // The interpreter provides two properties:
3054 // I1: At return-time the interpreter automatically and quietly unlocks any
3055 // objects acquired the current activation (frame). Recall that the
3056 // interpreter maintains an on-stack list of locks currently held by
3057 // a frame.
3058 // I2: If a method attempts to unlock an object that is not held by the
3059 // the frame the interpreter throws IMSX.
3060 //
3061 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3062 // B() doesn't have provably balanced locking so it runs in the interpreter.
3063 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3064 // is still locked by A().
3065 //
3066 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3067 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3068 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3069 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3071 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3073 guarantee (objReg != boxReg, "") ;
3074 guarantee (objReg != tmpReg, "") ;
3075 guarantee (boxReg != tmpReg, "") ;
3079 block_comment("FastUnlock");
3082 if (EmitSync & 4) {
3083 // Disable - inhibit all inlining. Force control through the slow-path
3084 move(AT, 0x0);
3085 return;
3086 } else
3087 if (EmitSync & 8) {
3088 Label DONE_LABEL ;
3089 if (UseBiasedLocking) {
3090 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3091 }
3092 // classic stack-locking code ...
3093 ld(tmpReg, Address(boxReg, 0)) ;
3094 beq(tmpReg, R0, DONE_LABEL) ;
3095 move(AT, 0x1); // delay slot
3097 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3098 bind(DONE_LABEL);
3099 } else {
3100 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3102 // Critically, the biased locking test must have precedence over
3103 // and appear before the (box->dhw == 0) recursive stack-lock test.
3104 if (UseBiasedLocking && !UseOptoBiasInlining) {
3105 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3106 }
3108 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3109 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3110 delayed()->daddiu(AT, R0, 0x1);
3112 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3113 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3114 beq(AT, R0, Stacked) ; // Inflated?
3115 delayed()->nop();
3117 bind(Inflated) ;
3118 // It's inflated.
3119 // Despite our balanced locking property we still check that m->_owner == Self
3120 // as java routines or native JNI code called by this thread might
3121 // have released the lock.
3122 // Refer to the comments in synchronizer.cpp for how we might encode extra
3123 // state in _succ so we can avoid fetching EntryList|cxq.
3124 //
3125 // I'd like to add more cases in fast_lock() and fast_unlock() --
3126 // such as recursive enter and exit -- but we have to be wary of
3127 // I$ bloat, T$ effects and BP$ effects.
3128 //
3129 // If there's no contention try a 1-0 exit. That is, exit without
3130 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3131 // we detect and recover from the race that the 1-0 exit admits.
3132 //
3133 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3134 // before it STs null into _owner, releasing the lock. Updates
3135 // to data protected by the critical section must be visible before
3136 // we drop the lock (and thus before any other thread could acquire
3137 // the lock and observe the fields protected by the lock).
3138 // IA32's memory-model is SPO, so STs are ordered with respect to
3139 // each other and there's no need for an explicit barrier (fence).
3140 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3141 #ifndef OPT_THREAD
3142 get_thread (TREG) ;
3143 #endif
3145 // It's inflated
3146 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3147 xorr(boxReg, boxReg, TREG);
3149 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3150 orr(boxReg, boxReg, AT);
3152 move(AT, R0);
3153 bne(boxReg, R0, DONE_LABEL);
3154 delayed()->nop();
3156 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3157 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3158 orr(boxReg, boxReg, AT);
3160 move(AT, R0);
3161 bne(boxReg, R0, DONE_LABEL);
3162 delayed()->nop();
3164 sync();
3165 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3166 move(AT, 0x1);
3167 b(DONE_LABEL);
3168 delayed()->nop();
3170 bind (Stacked);
3171 ld(tmpReg, Address(boxReg, 0)) ;
3172 //if (os::is_MP()) { sync(); }
3173 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3175 if (EmitSync & 65536) {
3176 bind (CheckSucc);
3177 }
3179 bind(DONE_LABEL);
3181 // Avoid branch to branch on AMD processors
3182 if (EmitSync & 32768) { nop() ; }
3183 }
3184 }
3186 void MacroAssembler::align(int modulus) {
3187 while (offset() % modulus != 0) nop();
3188 }
3191 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3192 //Unimplemented();
3193 }
3195 #ifdef _LP64
3196 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3198 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3199 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3200 #else
3201 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3203 Register caller_saved_fpu_registers[] = {};
3204 #endif
3206 //We preserve all caller-saved register
3207 void MacroAssembler::pushad(){
3208 int i;
3210 /* Fixed-point registers */
3211 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3212 daddi(SP, SP, -1 * len * wordSize);
3213 for (i = 0; i < len; i++)
3214 {
3215 #ifdef _LP64
3216 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3217 #else
3218 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3219 #endif
3220 }
3222 /* Floating-point registers */
3223 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3224 daddi(SP, SP, -1 * len * wordSize);
3225 for (i = 0; i < len; i++)
3226 {
3227 #ifdef _LP64
3228 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3229 #else
3230 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3231 #endif
3232 }
3233 };
3235 void MacroAssembler::popad(){
3236 int i;
3238 /* Floating-point registers */
3239 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3240 for (i = 0; i < len; i++)
3241 {
3242 #ifdef _LP64
3243 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3244 #else
3245 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3246 #endif
3247 }
3248 daddi(SP, SP, len * wordSize);
3250 /* Fixed-point registers */
3251 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3252 for (i = 0; i < len; i++)
3253 {
3254 #ifdef _LP64
3255 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3256 #else
3257 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3258 #endif
3259 }
3260 daddi(SP, SP, len * wordSize);
3261 };
3263 void MacroAssembler::push2(Register reg1, Register reg2) {
3264 #ifdef _LP64
3265 daddi(SP, SP, -16);
3266 sd(reg2, SP, 0);
3267 sd(reg1, SP, 8);
3268 #else
3269 addi(SP, SP, -8);
3270 sw(reg2, SP, 0);
3271 sw(reg1, SP, 4);
3272 #endif
3273 }
3275 void MacroAssembler::pop2(Register reg1, Register reg2) {
3276 #ifdef _LP64
3277 ld(reg1, SP, 0);
3278 ld(reg2, SP, 8);
3279 daddi(SP, SP, 16);
3280 #else
3281 lw(reg1, SP, 0);
3282 lw(reg2, SP, 4);
3283 addi(SP, SP, 8);
3284 #endif
3285 }
3287 //for UseCompressedOops Option
3288 void MacroAssembler::load_klass(Register dst, Register src) {
3289 #ifdef _LP64
3290 if(UseCompressedClassPointers){
3291 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3292 decode_klass_not_null(dst);
3293 } else
3294 #endif
3295 ld(dst, src, oopDesc::klass_offset_in_bytes());
3296 }
3298 void MacroAssembler::store_klass(Register dst, Register src) {
3299 #ifdef _LP64
3300 if(UseCompressedClassPointers){
3301 encode_klass_not_null(src);
3302 sw(src, dst, oopDesc::klass_offset_in_bytes());
3303 } else {
3304 #endif
3305 sd(src, dst, oopDesc::klass_offset_in_bytes());
3306 }
3307 }
3309 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3310 load_klass(dst, src);
3311 ld(dst, Address(dst, Klass::prototype_header_offset()));
3312 }
3314 #ifdef _LP64
3315 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3316 if (UseCompressedClassPointers) {
3317 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3318 }
3319 }
3321 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3322 if(UseCompressedOops){
3323 lwu(dst, src);
3324 decode_heap_oop(dst);
3325 } else {
3326 ld(dst, src);
3327 }
3328 }
3330 void MacroAssembler::store_heap_oop(Address dst, Register src){
3331 if(UseCompressedOops){
3332 assert(!dst.uses(src), "not enough registers");
3333 encode_heap_oop(src);
3334 sw(src, dst);
3335 } else {
3336 sd(src, dst);
3337 }
3338 }
3340 void MacroAssembler::store_heap_oop_null(Address dst){
3341 if(UseCompressedOops){
3342 sw(R0, dst);
3343 } else {
3344 sd(R0, dst);
3345 }
3346 }
3348 #ifdef ASSERT
3349 void MacroAssembler::verify_heapbase(const char* msg) {
3350 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3351 assert (Universe::heap() != NULL, "java heap should be initialized");
3352 }
3353 #endif
3356 // Algorithm must match oop.inline.hpp encode_heap_oop.
3357 void MacroAssembler::encode_heap_oop(Register r) {
3358 #ifdef ASSERT
3359 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3360 #endif
3361 verify_oop(r, "broken oop in encode_heap_oop");
3362 if (Universe::narrow_oop_base() == NULL) {
3363 if (Universe::narrow_oop_shift() != 0) {
3364 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3365 shr(r, LogMinObjAlignmentInBytes);
3366 }
3367 return;
3368 }
3370 movz(r, S5_heapbase, r);
3371 dsub(r, r, S5_heapbase);
3372 if (Universe::narrow_oop_shift() != 0) {
3373 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3374 shr(r, LogMinObjAlignmentInBytes);
3375 }
3376 }
3378 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3379 #ifdef ASSERT
3380 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3381 #endif
3382 verify_oop(src, "broken oop in encode_heap_oop");
3383 if (Universe::narrow_oop_base() == NULL) {
3384 if (Universe::narrow_oop_shift() != 0) {
3385 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3386 dsrl(dst, src, LogMinObjAlignmentInBytes);
3387 } else {
3388 if (dst != src) move(dst, src);
3389 }
3390 } else {
3391 if (dst == src) {
3392 movz(dst, S5_heapbase, dst);
3393 dsub(dst, dst, S5_heapbase);
3394 if (Universe::narrow_oop_shift() != 0) {
3395 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3396 shr(dst, LogMinObjAlignmentInBytes);
3397 }
3398 } else {
3399 dsub(dst, src, S5_heapbase);
3400 if (Universe::narrow_oop_shift() != 0) {
3401 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3402 shr(dst, LogMinObjAlignmentInBytes);
3403 }
3404 movz(dst, R0, src);
3405 }
3406 }
3407 }
3409 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3410 assert (UseCompressedOops, "should be compressed");
3411 #ifdef ASSERT
3412 if (CheckCompressedOops) {
3413 Label ok;
3414 bne(r, R0, ok);
3415 delayed()->nop();
3416 stop("null oop passed to encode_heap_oop_not_null");
3417 bind(ok);
3418 }
3419 #endif
3420 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3421 if (Universe::narrow_oop_base() != NULL) {
3422 dsub(r, r, S5_heapbase);
3423 }
3424 if (Universe::narrow_oop_shift() != 0) {
3425 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3426 shr(r, LogMinObjAlignmentInBytes);
3427 }
3429 }
3431 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3432 assert (UseCompressedOops, "should be compressed");
3433 #ifdef ASSERT
3434 if (CheckCompressedOops) {
3435 Label ok;
3436 bne(src, R0, ok);
3437 delayed()->nop();
3438 stop("null oop passed to encode_heap_oop_not_null2");
3439 bind(ok);
3440 }
3441 #endif
3442 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3444 if (Universe::narrow_oop_base() != NULL) {
3445 dsub(dst, src, S5_heapbase);
3446 if (Universe::narrow_oop_shift() != 0) {
3447 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3448 shr(dst, LogMinObjAlignmentInBytes);
3449 }
3450 } else {
3451 if (Universe::narrow_oop_shift() != 0) {
3452 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3453 dsrl(dst, src, LogMinObjAlignmentInBytes);
3454 } else {
3455 if (dst != src) move(dst, src);
3456 }
3457 }
3458 }
3460 void MacroAssembler::decode_heap_oop(Register r) {
3461 #ifdef ASSERT
3462 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3463 #endif
3464 if (Universe::narrow_oop_base() == NULL) {
3465 if (Universe::narrow_oop_shift() != 0) {
3466 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3467 shl(r, LogMinObjAlignmentInBytes);
3468 }
3469 } else {
3470 move(AT, r);
3471 if (Universe::narrow_oop_shift() != 0) {
3472 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3473 shl(r, LogMinObjAlignmentInBytes);
3474 }
3475 dadd(r, r, S5_heapbase);
3476 movz(r, R0, AT);
3477 }
3478 verify_oop(r, "broken oop in decode_heap_oop");
3479 }
3481 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3482 #ifdef ASSERT
3483 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3484 #endif
3485 if (Universe::narrow_oop_base() == NULL) {
3486 if (Universe::narrow_oop_shift() != 0) {
3487 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3488 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3489 dsll(dst, src, LogMinObjAlignmentInBytes);
3490 } else {
3491 if (dst != src) move(dst, src);
3492 }
3493 } else {
3494 if (dst == src) {
3495 move(AT, dst);
3496 if (Universe::narrow_oop_shift() != 0) {
3497 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3498 shl(dst, LogMinObjAlignmentInBytes);
3499 }
3500 dadd(dst, dst, S5_heapbase);
3501 movz(dst, R0, AT);
3502 } else {
3503 if (Universe::narrow_oop_shift() != 0) {
3504 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3505 dsll(dst, src, LogMinObjAlignmentInBytes);
3506 daddu(dst, dst, S5_heapbase);
3507 } else {
3508 daddu(dst, src, S5_heapbase);
3509 }
3510 movz(dst, R0, src);
3511 }
3512 }
3513 verify_oop(dst, "broken oop in decode_heap_oop");
3514 }
3516 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3517 // Note: it will change flags
3518 assert (UseCompressedOops, "should only be used for compressed headers");
3519 assert (Universe::heap() != NULL, "java heap should be initialized");
3520 // Cannot assert, unverified entry point counts instructions (see .ad file)
3521 // vtableStubs also counts instructions in pd_code_size_limit.
3522 // Also do not verify_oop as this is called by verify_oop.
3523 if (Universe::narrow_oop_shift() != 0) {
3524 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3525 shl(r, LogMinObjAlignmentInBytes);
3526 if (Universe::narrow_oop_base() != NULL) {
3527 daddu(r, r, S5_heapbase);
3528 }
3529 } else {
3530 assert (Universe::narrow_oop_base() == NULL, "sanity");
3531 }
3532 }
3534 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3535 assert (UseCompressedOops, "should only be used for compressed headers");
3536 assert (Universe::heap() != NULL, "java heap should be initialized");
3538 // Cannot assert, unverified entry point counts instructions (see .ad file)
3539 // vtableStubs also counts instructions in pd_code_size_limit.
3540 // Also do not verify_oop as this is called by verify_oop.
3541 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3542 if (Universe::narrow_oop_shift() != 0) {
3543 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3544 if (LogMinObjAlignmentInBytes == Address::times_8) {
3545 dsll(dst, src, LogMinObjAlignmentInBytes);
3546 daddu(dst, dst, S5_heapbase);
3547 } else {
3548 dsll(dst, src, LogMinObjAlignmentInBytes);
3549 if (Universe::narrow_oop_base() != NULL) {
3550 daddu(dst, dst, S5_heapbase);
3551 }
3552 }
3553 } else {
3554 assert (Universe::narrow_oop_base() == NULL, "sanity");
3555 if (dst != src) {
3556 move(dst, src);
3557 }
3558 }
3559 }
3561 void MacroAssembler::encode_klass_not_null(Register r) {
3562 if (Universe::narrow_klass_base() != NULL) {
3563 assert(r != AT, "Encoding a klass in AT");
3564 set64(AT, (int64_t)Universe::narrow_klass_base());
3565 dsub(r, r, AT);
3566 }
3567 if (Universe::narrow_klass_shift() != 0) {
3568 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3569 shr(r, LogKlassAlignmentInBytes);
3570 }
3571 }
3573 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3574 if (dst == src) {
3575 encode_klass_not_null(src);
3576 } else {
3577 if (Universe::narrow_klass_base() != NULL) {
3578 set64(dst, (int64_t)Universe::narrow_klass_base());
3579 dsub(dst, src, dst);
3580 if (Universe::narrow_klass_shift() != 0) {
3581 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3582 shr(dst, LogKlassAlignmentInBytes);
3583 }
3584 } else {
3585 if (Universe::narrow_klass_shift() != 0) {
3586 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3587 dsrl(dst, src, LogKlassAlignmentInBytes);
3588 } else {
3589 move(dst, src);
3590 }
3591 }
3592 }
3593 }
3595 // Function instr_size_for_decode_klass_not_null() counts the instructions
3596 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3597 // when (Universe::heap() != NULL). Hence, if the instructions they
3598 // generate change, then this method needs to be updated.
3599 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3600 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3601 if (Universe::narrow_klass_base() != NULL) {
3602 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3603 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3604 } else {
3605 // longest load decode klass function, mov64, leaq
3606 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3607 }
3608 }
3610 void MacroAssembler::decode_klass_not_null(Register r) {
3611 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3612 assert(r != AT, "Decoding a klass in AT");
3613 // Cannot assert, unverified entry point counts instructions (see .ad file)
3614 // vtableStubs also counts instructions in pd_code_size_limit.
3615 // Also do not verify_oop as this is called by verify_oop.
3616 if (Universe::narrow_klass_shift() != 0) {
3617 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3618 shl(r, LogKlassAlignmentInBytes);
3619 }
3620 if (Universe::narrow_klass_base() != NULL) {
3621 set64(AT, (int64_t)Universe::narrow_klass_base());
3622 daddu(r, r, AT);
3623 //Not neccessary for MIPS at all.
3624 //reinit_heapbase();
3625 }
3626 }
3628 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3629 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3631 if (dst == src) {
3632 decode_klass_not_null(dst);
3633 } else {
3634 // Cannot assert, unverified entry point counts instructions (see .ad file)
3635 // vtableStubs also counts instructions in pd_code_size_limit.
3636 // Also do not verify_oop as this is called by verify_oop.
3637 set64(dst, (int64_t)Universe::narrow_klass_base());
3638 if (Universe::narrow_klass_shift() != 0) {
3639 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3640 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3641 dsll(AT, src, Address::times_8);
3642 daddu(dst, dst, AT);
3643 } else {
3644 daddu(dst, src, dst);
3645 }
3646 }
3647 }
3649 void MacroAssembler::incrementl(Register reg, int value) {
3650 if (value == min_jint) {
3651 move(AT, value);
3652 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3653 return;
3654 }
3655 if (value < 0) { decrementl(reg, -value); return; }
3656 if (value == 0) { ; return; }
3658 if(Assembler::is_simm16(value)) {
3659 NOT_LP64(addiu(reg, reg, value));
3660 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3661 } else {
3662 move(AT, value);
3663 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3664 }
3665 }
3667 void MacroAssembler::decrementl(Register reg, int value) {
3668 if (value == min_jint) {
3669 move(AT, value);
3670 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3671 return;
3672 }
3673 if (value < 0) { incrementl(reg, -value); return; }
3674 if (value == 0) { ; return; }
3676 if (Assembler::is_simm16(value)) {
3677 NOT_LP64(addiu(reg, reg, -value));
3678 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3679 } else {
3680 move(AT, value);
3681 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3682 }
3683 }
3685 void MacroAssembler::reinit_heapbase() {
3686 if (UseCompressedOops || UseCompressedClassPointers) {
3687 if (Universe::heap() != NULL) {
3688 if (Universe::narrow_oop_base() == NULL) {
3689 move(S5_heapbase, R0);
3690 } else {
3691 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3692 }
3693 } else {
3694 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3695 ld(S5_heapbase, S5_heapbase, 0);
3696 }
3697 }
3698 }
3699 #endif // _LP64
3701 void MacroAssembler::check_klass_subtype(Register sub_klass,
3702 Register super_klass,
3703 Register temp_reg,
3704 Label& L_success) {
3705 //implement ind gen_subtype_check
3706 Label L_failure;
3707 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3708 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3709 bind(L_failure);
3710 }
3712 SkipIfEqual::SkipIfEqual(
3713 MacroAssembler* masm, const bool* flag_addr, bool value) {
3714 _masm = masm;
3715 _masm->li(AT, (address)flag_addr);
3716 _masm->lb(AT,AT,0);
3717 _masm->addi(AT,AT,-value);
3718 _masm->beq(AT,R0,_label);
3719 _masm->delayed()->nop();
3720 }
3721 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3722 Register super_klass,
3723 Register temp_reg,
3724 Label* L_success,
3725 Label* L_failure,
3726 Label* L_slow_path,
3727 RegisterOrConstant super_check_offset) {
3728 assert_different_registers(sub_klass, super_klass, temp_reg);
3729 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3730 if (super_check_offset.is_register()) {
3731 assert_different_registers(sub_klass, super_klass,
3732 super_check_offset.as_register());
3733 } else if (must_load_sco) {
3734 assert(temp_reg != noreg, "supply either a temp or a register offset");
3735 }
3737 Label L_fallthrough;
3738 int label_nulls = 0;
3739 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3740 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3741 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3742 assert(label_nulls <= 1, "at most one NULL in the batch");
3744 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3745 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3746 // If the pointers are equal, we are done (e.g., String[] elements).
3747 // This self-check enables sharing of secondary supertype arrays among
3748 // non-primary types such as array-of-interface. Otherwise, each such
3749 // type would need its own customized SSA.
3750 // We move this check to the front of the fast path because many
3751 // type checks are in fact trivially successful in this manner,
3752 // so we get a nicely predicted branch right at the start of the check.
3753 beq(sub_klass, super_klass, *L_success);
3754 delayed()->nop();
3755 // Check the supertype display:
3756 if (must_load_sco) {
3757 // Positive movl does right thing on LP64.
3758 lwu(temp_reg, super_klass, sco_offset);
3759 super_check_offset = RegisterOrConstant(temp_reg);
3760 }
3761 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3762 daddu(AT, sub_klass, AT);
3763 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3765 // This check has worked decisively for primary supers.
3766 // Secondary supers are sought in the super_cache ('super_cache_addr').
3767 // (Secondary supers are interfaces and very deeply nested subtypes.)
3768 // This works in the same check above because of a tricky aliasing
3769 // between the super_cache and the primary super display elements.
3770 // (The 'super_check_addr' can address either, as the case requires.)
3771 // Note that the cache is updated below if it does not help us find
3772 // what we need immediately.
3773 // So if it was a primary super, we can just fail immediately.
3774 // Otherwise, it's the slow path for us (no success at this point).
3776 if (super_check_offset.is_register()) {
3777 beq(super_klass, AT, *L_success);
3778 delayed()->nop();
3779 addi(AT, super_check_offset.as_register(), -sc_offset);
3780 if (L_failure == &L_fallthrough) {
3781 beq(AT, R0, *L_slow_path);
3782 delayed()->nop();
3783 } else {
3784 bne(AT, R0, *L_failure);
3785 delayed()->nop();
3786 b(*L_slow_path);
3787 delayed()->nop();
3788 }
3789 } else if (super_check_offset.as_constant() == sc_offset) {
3790 // Need a slow path; fast failure is impossible.
3791 if (L_slow_path == &L_fallthrough) {
3792 beq(super_klass, AT, *L_success);
3793 delayed()->nop();
3794 } else {
3795 bne(super_klass, AT, *L_slow_path);
3796 delayed()->nop();
3797 b(*L_success);
3798 delayed()->nop();
3799 }
3800 } else {
3801 // No slow path; it's a fast decision.
3802 if (L_failure == &L_fallthrough) {
3803 beq(super_klass, AT, *L_success);
3804 delayed()->nop();
3805 } else {
3806 bne(super_klass, AT, *L_failure);
3807 delayed()->nop();
3808 b(*L_success);
3809 delayed()->nop();
3810 }
3811 }
3813 bind(L_fallthrough);
3815 }
3818 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3819 Register super_klass,
3820 Register temp_reg,
3821 Register temp2_reg,
3822 Label* L_success,
3823 Label* L_failure,
3824 bool set_cond_codes) {
3825 assert_different_registers(sub_klass, super_klass, temp_reg);
3826 if (temp2_reg != noreg)
3827 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3828 else
3829 temp2_reg = T9;
3830 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3832 Label L_fallthrough;
3833 int label_nulls = 0;
3834 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3835 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3836 assert(label_nulls <= 1, "at most one NULL in the batch");
3838 // a couple of useful fields in sub_klass:
3839 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3840 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3841 Address secondary_supers_addr(sub_klass, ss_offset);
3842 Address super_cache_addr( sub_klass, sc_offset);
3844 // Do a linear scan of the secondary super-klass chain.
3845 // This code is rarely used, so simplicity is a virtue here.
3846 // The repne_scan instruction uses fixed registers, which we must spill.
3847 // Don't worry too much about pre-existing connections with the input regs.
3849 // Get super_klass value into rax (even if it was in rdi or rcx).
3850 #ifndef PRODUCT
3851 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3852 ExternalAddress pst_counter_addr((address) pst_counter);
3853 NOT_LP64( incrementl(pst_counter_addr) );
3854 #endif //PRODUCT
3856 // We will consult the secondary-super array.
3857 ld(temp_reg, secondary_supers_addr);
3858 // Load the array length. (Positive movl does right thing on LP64.)
3859 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3860 // Skip to start of data.
3861 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3863 // Scan RCX words at [RDI] for an occurrence of RAX.
3864 // Set NZ/Z based on last compare.
3865 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3866 // not change flags (only scas instruction which is repeated sets flags).
3867 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3869 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
3870 Label Loop, subtype;
3871 bind(Loop);
3872 beq(temp2_reg, R0, *L_failure);
3873 delayed()->nop();
3874 ld(AT, temp_reg, 0);
3875 beq(AT, super_klass, subtype);
3876 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3877 b(Loop);
3878 delayed()->daddi(temp2_reg, temp2_reg, -1);
3880 bind(subtype);
3881 sd(super_klass, super_cache_addr);
3882 if (L_success != &L_fallthrough) {
3883 b(*L_success);
3884 delayed()->nop();
3885 }
3887 // Success. Cache the super we found and proceed in triumph.
3888 #undef IS_A_TEMP
3890 bind(L_fallthrough);
3891 }
3893 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3894 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3895 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3896 verify_oop(oop_result, "broken oop in call_VM_base");
3897 }
3899 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3900 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3901 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3902 }
3904 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3905 int extra_slot_offset) {
3906 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3907 int stackElementSize = Interpreter::stackElementSize;
3908 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3909 #ifdef ASSERT
3910 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3911 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3912 #endif
3913 Register scale_reg = NOREG;
3914 Address::ScaleFactor scale_factor = Address::no_scale;
3915 if (arg_slot.is_constant()) {
3916 offset += arg_slot.as_constant() * stackElementSize;
3917 } else {
3918 scale_reg = arg_slot.as_register();
3919 scale_factor = Address::times_8;
3920 }
3921 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
3922 // offset += wordSize; // return PC is on stack
3923 if(scale_reg==NOREG) return Address(SP, offset);
3924 else {
3925 dsll(scale_reg, scale_reg, scale_factor);
3926 daddu(scale_reg, SP, scale_reg);
3927 return Address(scale_reg, offset);
3928 }
3929 }
3931 SkipIfEqual::~SkipIfEqual() {
3932 _masm->bind(_label);
3933 }
3935 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3936 switch (size_in_bytes) {
3937 #ifndef _LP64
3938 case 8:
3939 assert(dst2 != noreg, "second dest register required");
3940 lw(dst, src);
3941 lw(dst2, src.plus_disp(BytesPerInt));
3942 break;
3943 #else
3944 case 8: ld(dst, src); break;
3945 #endif
3946 case 4: lw(dst, src); break;
3947 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3948 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3949 default: ShouldNotReachHere();
3950 }
3951 }
3953 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
3954 switch (size_in_bytes) {
3955 #ifndef _LP64
3956 case 8:
3957 assert(src2 != noreg, "second source register required");
3958 sw(src, dst);
3959 sw(src2, dst.plus_disp(BytesPerInt));
3960 break;
3961 #else
3962 case 8: sd(src, dst); break;
3963 #endif
3964 case 4: sw(src, dst); break;
3965 case 2: sh(src, dst); break;
3966 case 1: sb(src, dst); break;
3967 default: ShouldNotReachHere();
3968 }
3969 }
3971 // Look up the method for a megamorphic invokeinterface call.
3972 // The target method is determined by <intf_klass, itable_index>.
3973 // The receiver klass is in recv_klass.
3974 // On success, the result will be in method_result, and execution falls through.
3975 // On failure, execution transfers to the given label.
3976 void MacroAssembler::lookup_interface_method(Register recv_klass,
3977 Register intf_klass,
3978 RegisterOrConstant itable_index,
3979 Register method_result,
3980 Register scan_temp,
3981 Label& L_no_such_interface) {
3982 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
3983 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3984 "caller must use same register for non-constant itable index as for method");
3986 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3987 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
3988 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3989 int scan_step = itableOffsetEntry::size() * wordSize;
3990 int vte_size = vtableEntry::size() * wordSize;
3991 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3992 assert(vte_size == wordSize, "else adjust times_vte_scale");
3994 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
3996 // %%% Could store the aligned, prescaled offset in the klassoop.
3997 dsll(scan_temp, scan_temp, times_vte_scale);
3998 daddu(scan_temp, recv_klass, scan_temp);
3999 daddiu(scan_temp, scan_temp, vtable_base);
4000 if (HeapWordsPerLong > 1) {
4001 // Round up to align_object_offset boundary
4002 // see code for InstanceKlass::start_of_itable!
4003 round_to(scan_temp, BytesPerLong);
4004 }
4006 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
4007 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
4008 if (itable_index.is_constant()) {
4009 set64(AT, (int)itable_index.is_constant());
4010 dsll(AT, AT, (int)Address::times_ptr);
4011 } else {
4012 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
4013 }
4014 daddu(AT, AT, recv_klass);
4015 daddiu(recv_klass, AT, itentry_off);
4017 Label search, found_method;
4019 for (int peel = 1; peel >= 0; peel--) {
4020 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
4022 if (peel) {
4023 beq(intf_klass, method_result, found_method);
4024 nop();
4025 } else {
4026 bne(intf_klass, method_result, search);
4027 nop();
4028 // (invert the test to fall through to found_method...)
4029 }
4031 if (!peel) break;
4033 bind(search);
4035 // Check that the previous entry is non-null. A null entry means that
4036 // the receiver class doesn't implement the interface, and wasn't the
4037 // same as when the caller was compiled.
4038 beq(method_result, R0, L_no_such_interface);
4039 nop();
4040 daddiu(scan_temp, scan_temp, scan_step);
4041 }
4043 bind(found_method);
4045 // Got a hit.
4046 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
4047 if(UseLoongsonISA) {
4048 gsldx(method_result, recv_klass, scan_temp, 0);
4049 } else {
4050 daddu(AT, recv_klass, scan_temp);
4051 ld(method_result, AT);
4052 }
4053 }
4055 // virtual method calling
4056 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4057 RegisterOrConstant vtable_index,
4058 Register method_result) {
4059 Register tmp = GP;
4060 push(tmp);
4062 if (vtable_index.is_constant()) {
4063 assert_different_registers(recv_klass, method_result, tmp);
4064 } else {
4065 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4066 }
4067 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4068 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4069 /*
4070 Address vtable_entry_addr(recv_klass,
4071 vtable_index, Address::times_ptr,
4072 base + vtableEntry::method_offset_in_bytes());
4073 */
4074 if (vtable_index.is_constant()) {
4075 set64(AT, vtable_index.as_constant());
4076 dsll(AT, AT, (int)Address::times_ptr);
4077 } else {
4078 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4079 }
4080 set64(tmp, base + vtableEntry::method_offset_in_bytes());
4081 daddu(tmp, tmp, AT);
4082 daddu(tmp, tmp, recv_klass);
4083 ld(method_result, tmp, 0);
4085 pop(tmp);
4086 }