Tue, 12 Dec 2017 10:30:27 +0800
#6345 sync is controled by UseSyncLevel instead of Use3A2000
Reviewed-by: fujie
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2017, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc_interface/collectedHeap.inline.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "memory/cardTableModRefBS.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "memory/universe.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/biasedLocking.hpp"
38 #include "runtime/interfaceSupport.hpp"
39 #include "runtime/objectMonitor.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/sharedRuntime.hpp"
42 #include "runtime/stubRoutines.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc_implementation/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
50 // Implementation of MacroAssembler
52 intptr_t MacroAssembler::i[32] = {0};
53 float MacroAssembler::f[32] = {0.0};
55 void MacroAssembler::print(outputStream *s) {
56 unsigned int k;
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
58 s->print_cr("i%d = 0x%.16lx", k, i[k]);
59 }
60 s->cr();
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
63 s->print_cr("f%d = %f", k, f[k]);
64 }
65 s->cr();
66 }
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
71 void MacroAssembler::save_registers(MacroAssembler *masm) {
72 #define __ masm->
73 for(int k=0; k<32; k++) {
74 __ sw (as_Register(k), A0, i_offset(k));
75 }
77 for(int k=0; k<32; k++) {
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
79 }
80 #undef __
81 }
83 void MacroAssembler::restore_registers(MacroAssembler *masm) {
84 #define __ masm->
85 for(int k=0; k<32; k++) {
86 __ lw (as_Register(k), A0, i_offset(k));
87 }
89 for(int k=0; k<32; k++) {
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
91 }
92 #undef __
93 }
96 void MacroAssembler::pd_patch_instruction(address branch, address target) {
97 jint& stub_inst = *(jint*) branch;
99 /* *
100 move(AT, RA); // dadd
101 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
102 nop();
103 lui(T9, 0); // to be patched
104 ori(T9, 0);
105 daddu(T9, T9, RA);
106 move(RA, AT);
107 jr(T9);
108 */
109 if(special(stub_inst) == dadd_op) {
110 jint *pc = (jint *)branch;
112 assert(opcode(pc[3]) == lui_op
113 && opcode(pc[4]) == ori_op
114 && special(pc[5]) == daddu_op, "Not a branch label patch");
115 if(!(opcode(pc[3]) == lui_op
116 && opcode(pc[4]) == ori_op
117 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
119 int offset = target - branch;
120 if (!is_simm16(offset)) {
121 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
122 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
123 } else {
124 /* revert to "beq + nop" */
125 CodeBuffer cb(branch, 4 * 10);
126 MacroAssembler masm(&cb);
127 #define __ masm.
128 __ b(target);
129 __ nop();
130 __ nop();
131 __ nop();
132 __ nop();
133 __ nop();
134 __ nop();
135 __ nop();
136 }
137 return;
138 }
140 #ifndef PRODUCT
141 if (!is_simm16((target - branch - 4) >> 2)) {
142 tty->print_cr("Illegal patching: target=0x%lx", target);
143 int *p = (int *)branch;
144 for (int i = -10; i < 10; i++) {
145 tty->print("0x%lx, ", p[i]);
146 }
147 tty->print_cr("");
148 }
149 #endif
151 stub_inst = patched_branch(target - branch, stub_inst, 0);
152 }
154 static inline address first_cache_address() {
155 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
156 }
158 static inline address last_cache_address() {
159 return CodeCache::high_bound() - Assembler::InstructionSize;
160 }
162 int MacroAssembler::call_size(address target, bool far, bool patchable) {
163 if (patchable) return 6 << Assembler::LogInstructionSize;
164 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
165 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
166 }
168 // Can we reach target using jal/j from anywhere
169 // in the code cache (because code can be relocated)?
170 bool MacroAssembler::reachable_from_cache(address target) {
171 address cl = first_cache_address();
172 address ch = last_cache_address();
174 return fit_in_jal(target, cl) && fit_in_jal(target, ch);
175 }
177 void MacroAssembler::general_jump(address target) {
178 if (reachable_from_cache(target)) {
179 j(target);
180 nop();
181 } else {
182 set64(T9, (long)target);
183 jr(T9);
184 nop();
185 }
186 }
188 int MacroAssembler::insts_for_general_jump(address target) {
189 if (reachable_from_cache(target)) {
190 //j(target);
191 //nop();
192 return 2;
193 } else {
194 //set64(T9, (long)target);
195 //jr(T9);
196 //nop();
197 return insts_for_set64((jlong)target) + 2;
198 }
199 }
201 void MacroAssembler::patchable_jump(address target) {
202 if (reachable_from_cache(target)) {
203 nop();
204 nop();
205 nop();
206 nop();
207 j(target);
208 nop();
209 } else {
210 patchable_set48(T9, (long)target);
211 jr(T9);
212 nop();
213 }
214 }
216 int MacroAssembler::insts_for_patchable_jump(address target) {
217 return 6;
218 }
220 void MacroAssembler::general_call(address target) {
221 if (reachable_from_cache(target)) {
222 jal(target);
223 nop();
224 } else {
225 set64(T9, (long)target);
226 jalr(T9);
227 nop();
228 }
229 }
231 int MacroAssembler::insts_for_general_call(address target) {
232 if (reachable_from_cache(target)) {
233 //jal(target);
234 //nop();
235 return 2;
236 } else {
237 //set64(T9, (long)target);
238 //jalr(T9);
239 //nop();
240 return insts_for_set64((jlong)target) + 2;
241 }
242 }
244 void MacroAssembler::patchable_call(address target) {
245 if (reachable_from_cache(target)) {
246 nop();
247 nop();
248 nop();
249 nop();
250 jal(target);
251 nop();
252 } else {
253 patchable_set48(T9, (long)target);
254 jalr(T9);
255 nop();
256 }
257 }
259 int MacroAssembler::insts_for_patchable_call(address target) {
260 return 6;
261 }
263 void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
264 u_char * cur_pc = pc();
266 /* Jin: Near/Far jump */
267 if(is_simm16((entry - pc() - 4) / 4)) {
268 Assembler::beq(rs, rt, offset(entry));
269 } else {
270 Label not_jump;
271 bne(rs, rt, not_jump);
272 delayed()->nop();
274 b_far(entry);
275 delayed()->nop();
277 bind(not_jump);
278 has_delay_slot();
279 }
280 }
282 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
283 if (L.is_bound()) {
284 beq_far(rs, rt, target(L));
285 } else {
286 u_char * cur_pc = pc();
287 Label not_jump;
288 bne(rs, rt, not_jump);
289 delayed()->nop();
291 b_far(L);
292 delayed()->nop();
294 bind(not_jump);
295 has_delay_slot();
296 }
297 }
299 void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
300 u_char * cur_pc = pc();
302 /* Jin: Near/Far jump */
303 if(is_simm16((entry - pc() - 4) / 4)) {
304 Assembler::bne(rs, rt, offset(entry));
305 } else {
306 Label not_jump;
307 beq(rs, rt, not_jump);
308 delayed()->nop();
310 b_far(entry);
311 delayed()->nop();
313 bind(not_jump);
314 has_delay_slot();
315 }
316 }
318 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
319 if (L.is_bound()) {
320 bne_far(rs, rt, target(L));
321 } else {
322 u_char * cur_pc = pc();
323 Label not_jump;
324 beq(rs, rt, not_jump);
325 delayed()->nop();
327 b_far(L);
328 delayed()->nop();
330 bind(not_jump);
331 has_delay_slot();
332 }
333 }
335 void MacroAssembler::b_far(Label& L) {
336 if (L.is_bound()) {
337 b_far(target(L));
338 } else {
339 volatile address dest = target(L);
340 /*
341 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
342 0x00000055651ed514: dadd at, ra, zero
343 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
345 0x00000055651ed51c: sll zero, zero, 0
346 0x00000055651ed520: lui t9, 0x0
347 0x00000055651ed524: ori t9, t9, 0x21b8
348 0x00000055651ed528: daddu t9, t9, ra
349 0x00000055651ed52c: dadd ra, at, zero
350 0x00000055651ed530: jr t9
351 0x00000055651ed534: sll zero, zero, 0
352 */
353 move(AT, RA);
354 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
355 nop();
356 lui(T9, 0); // to be patched
357 ori(T9, T9, 0);
358 daddu(T9, T9, RA);
359 move(RA, AT);
360 jr(T9);
361 }
362 }
364 void MacroAssembler::b_far(address entry) {
365 u_char * cur_pc = pc();
367 /* Jin: Near/Far jump */
368 if(is_simm16((entry - pc() - 4) / 4)) {
369 b(offset(entry));
370 } else {
371 /* address must be bounded */
372 move(AT, RA);
373 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
374 nop();
375 li32(T9, entry - pc());
376 daddu(T9, T9, RA);
377 move(RA, AT);
378 jr(T9);
379 }
380 }
382 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
383 addu_long(AT, base, offset);
384 ld_ptr(rt, 0, AT);
385 }
387 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
388 addu_long(AT, base, offset);
389 st_ptr(rt, 0, AT);
390 }
392 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
393 addu_long(AT, base, offset);
394 ld_long(rt, 0, AT);
395 }
397 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
398 addu_long(AT, base, offset);
399 st_long(rt, 0, AT);
400 }
402 Address MacroAssembler::as_Address(AddressLiteral adr) {
403 return Address(adr.target(), adr.rspec());
404 }
406 Address MacroAssembler::as_Address(ArrayAddress adr) {
407 return Address::make_array(adr);
408 }
410 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
411 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
412 Label again;
414 li(tmp_reg1, counter_addr);
415 bind(again);
416 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
417 ll(tmp_reg2, tmp_reg1, 0);
418 addi(tmp_reg2, tmp_reg2, inc);
419 sc(tmp_reg2, tmp_reg1, 0);
420 beq(tmp_reg2, R0, again);
421 delayed()->nop();
422 }
424 int MacroAssembler::biased_locking_enter(Register lock_reg,
425 Register obj_reg,
426 Register swap_reg,
427 Register tmp_reg,
428 bool swap_reg_contains_mark,
429 Label& done,
430 Label* slow_case,
431 BiasedLockingCounters* counters) {
432 assert(UseBiasedLocking, "why call this otherwise?");
433 bool need_tmp_reg = false;
434 if (tmp_reg == noreg) {
435 need_tmp_reg = true;
436 tmp_reg = T9;
437 }
438 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
439 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
440 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
441 Address saved_mark_addr(lock_reg, 0);
443 // Biased locking
444 // See whether the lock is currently biased toward our thread and
445 // whether the epoch is still valid
446 // Note that the runtime guarantees sufficient alignment of JavaThread
447 // pointers to allow age to be placed into low bits
448 // First check to see whether biasing is even enabled for this object
449 Label cas_label;
450 int null_check_offset = -1;
451 if (!swap_reg_contains_mark) {
452 null_check_offset = offset();
453 ld_ptr(swap_reg, mark_addr);
454 }
456 if (need_tmp_reg) {
457 push(tmp_reg);
458 }
459 move(tmp_reg, swap_reg);
460 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
461 #ifdef _LP64
462 daddi(AT, R0, markOopDesc::biased_lock_pattern);
463 dsub(AT, AT, tmp_reg);
464 #else
465 addi(AT, R0, markOopDesc::biased_lock_pattern);
466 sub(AT, AT, tmp_reg);
467 #endif
468 if (need_tmp_reg) {
469 pop(tmp_reg);
470 }
472 bne(AT, R0, cas_label);
473 delayed()->nop();
476 // The bias pattern is present in the object's header. Need to check
477 // whether the bias owner and the epoch are both still current.
478 // Note that because there is no current thread register on MIPS we
479 // need to store off the mark word we read out of the object to
480 // avoid reloading it and needing to recheck invariants below. This
481 // store is unfortunate but it makes the overall code shorter and
482 // simpler.
483 st_ptr(swap_reg, saved_mark_addr);
484 if (need_tmp_reg) {
485 push(tmp_reg);
486 }
487 if (swap_reg_contains_mark) {
488 null_check_offset = offset();
489 }
490 load_prototype_header(tmp_reg, obj_reg);
491 xorr(tmp_reg, tmp_reg, swap_reg);
492 get_thread(swap_reg);
493 xorr(swap_reg, swap_reg, tmp_reg);
495 move(AT, ~((int) markOopDesc::age_mask_in_place));
496 andr(swap_reg, swap_reg, AT);
498 if (PrintBiasedLockingStatistics) {
499 Label L;
500 bne(swap_reg, R0, L);
501 delayed()->nop();
502 push(tmp_reg);
503 push(A0);
504 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
505 pop(A0);
506 pop(tmp_reg);
507 bind(L);
508 }
509 if (need_tmp_reg) {
510 pop(tmp_reg);
511 }
512 beq(swap_reg, R0, done);
513 delayed()->nop();
514 Label try_revoke_bias;
515 Label try_rebias;
517 // At this point we know that the header has the bias pattern and
518 // that we are not the bias owner in the current epoch. We need to
519 // figure out more details about the state of the header in order to
520 // know what operations can be legally performed on the object's
521 // header.
523 // If the low three bits in the xor result aren't clear, that means
524 // the prototype header is no longer biased and we have to revoke
525 // the bias on this object.
527 move(AT, markOopDesc::biased_lock_mask_in_place);
528 andr(AT, swap_reg, AT);
529 bne(AT, R0, try_revoke_bias);
530 delayed()->nop();
531 // Biasing is still enabled for this data type. See whether the
532 // epoch of the current bias is still valid, meaning that the epoch
533 // bits of the mark word are equal to the epoch bits of the
534 // prototype header. (Note that the prototype header's epoch bits
535 // only change at a safepoint.) If not, attempt to rebias the object
536 // toward the current thread. Note that we must be absolutely sure
537 // that the current epoch is invalid in order to do this because
538 // otherwise the manipulations it performs on the mark word are
539 // illegal.
541 move(AT, markOopDesc::epoch_mask_in_place);
542 andr(AT,swap_reg, AT);
543 bne(AT, R0, try_rebias);
544 delayed()->nop();
545 // The epoch of the current bias is still valid but we know nothing
546 // about the owner; it might be set or it might be clear. Try to
547 // acquire the bias of the object using an atomic operation. If this
548 // fails we will go in to the runtime to revoke the object's bias.
549 // Note that we first construct the presumed unbiased header so we
550 // don't accidentally blow away another thread's valid bias.
552 ld_ptr(swap_reg, saved_mark_addr);
554 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
555 andr(swap_reg, swap_reg, AT);
557 if (need_tmp_reg) {
558 push(tmp_reg);
559 }
560 get_thread(tmp_reg);
561 orr(tmp_reg, tmp_reg, swap_reg);
562 //if (os::is_MP()) {
563 // sync();
564 //}
565 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
566 if (need_tmp_reg) {
567 pop(tmp_reg);
568 }
569 // If the biasing toward our thread failed, this means that
570 // another thread succeeded in biasing it toward itself and we
571 // need to revoke that bias. The revocation will occur in the
572 // interpreter runtime in the slow case.
573 if (PrintBiasedLockingStatistics) {
574 Label L;
575 bne(AT, R0, L);
576 delayed()->nop();
577 push(tmp_reg);
578 push(A0);
579 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
580 pop(A0);
581 pop(tmp_reg);
582 bind(L);
583 }
584 if (slow_case != NULL) {
585 beq_far(AT, R0, *slow_case);
586 delayed()->nop();
587 }
588 b(done);
589 delayed()->nop();
591 bind(try_rebias);
592 // At this point we know the epoch has expired, meaning that the
593 // current "bias owner", if any, is actually invalid. Under these
594 // circumstances _only_, we are allowed to use the current header's
595 // value as the comparison value when doing the cas to acquire the
596 // bias in the current epoch. In other words, we allow transfer of
597 // the bias from one thread to another directly in this situation.
598 //
599 // FIXME: due to a lack of registers we currently blow away the age
600 // bits in this situation. Should attempt to preserve them.
601 if (need_tmp_reg) {
602 push(tmp_reg);
603 }
604 load_prototype_header(tmp_reg, obj_reg);
605 get_thread(swap_reg);
606 orr(tmp_reg, tmp_reg, swap_reg);
607 ld_ptr(swap_reg, saved_mark_addr);
609 //if (os::is_MP()) {
610 // sync();
611 //}
612 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
613 if (need_tmp_reg) {
614 pop(tmp_reg);
615 }
616 // If the biasing toward our thread failed, then another thread
617 // succeeded in biasing it toward itself and we need to revoke that
618 // bias. The revocation will occur in the runtime in the slow case.
619 if (PrintBiasedLockingStatistics) {
620 Label L;
621 bne(AT, R0, L);
622 delayed()->nop();
623 push(AT);
624 push(tmp_reg);
625 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
626 pop(tmp_reg);
627 pop(AT);
628 bind(L);
629 }
630 if (slow_case != NULL) {
631 beq_far(AT, R0, *slow_case);
632 delayed()->nop();
633 }
635 b(done);
636 delayed()->nop();
637 bind(try_revoke_bias);
638 // The prototype mark in the klass doesn't have the bias bit set any
639 // more, indicating that objects of this data type are not supposed
640 // to be biased any more. We are going to try to reset the mark of
641 // this object to the prototype value and fall through to the
642 // CAS-based locking scheme. Note that if our CAS fails, it means
643 // that another thread raced us for the privilege of revoking the
644 // bias of this particular object, so it's okay to continue in the
645 // normal locking code.
646 //
647 // FIXME: due to a lack of registers we currently blow away the age
648 // bits in this situation. Should attempt to preserve them.
649 ld_ptr(swap_reg, saved_mark_addr);
651 if (need_tmp_reg) {
652 push(tmp_reg);
653 }
654 load_prototype_header(tmp_reg, obj_reg);
655 //if (os::is_MP()) {
656 // lock();
657 //}
658 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
659 if (need_tmp_reg) {
660 pop(tmp_reg);
661 }
662 // Fall through to the normal CAS-based lock, because no matter what
663 // the result of the above CAS, some thread must have succeeded in
664 // removing the bias bit from the object's header.
665 if (PrintBiasedLockingStatistics) {
666 Label L;
667 bne(AT, R0, L);
668 delayed()->nop();
669 push(AT);
670 push(tmp_reg);
671 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
672 pop(tmp_reg);
673 pop(AT);
674 bind(L);
675 }
677 bind(cas_label);
678 return null_check_offset;
679 }
681 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
682 assert(UseBiasedLocking, "why call this otherwise?");
684 // Check for biased locking unlock case, which is a no-op
685 // Note: we do not have to check the thread ID for two reasons.
686 // First, the interpreter checks for IllegalMonitorStateException at
687 // a higher level. Second, if the bias was revoked while we held the
688 // lock, the object could not be rebiased toward another thread, so
689 // the bias bit would be clear.
690 #ifdef _LP64
691 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
692 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
693 daddi(AT, R0, markOopDesc::biased_lock_pattern);
694 #else
695 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
696 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
697 addi(AT, R0, markOopDesc::biased_lock_pattern);
698 #endif
700 beq(AT, temp_reg, done);
701 delayed()->nop();
702 }
704 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
705 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
706 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
707 Label L, E;
709 assert(number_of_arguments <= 4, "just check");
711 andi(AT, SP, 0xf);
712 beq(AT, R0, L);
713 delayed()->nop();
714 daddi(SP, SP, -8);
715 call(entry_point, relocInfo::runtime_call_type);
716 delayed()->nop();
717 daddi(SP, SP, 8);
718 b(E);
719 delayed()->nop();
721 bind(L);
722 call(entry_point, relocInfo::runtime_call_type);
723 delayed()->nop();
724 bind(E);
725 }
728 void MacroAssembler::jmp(address entry) {
729 patchable_set48(T9, (long)entry);
730 jr(T9);
731 }
733 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
734 switch (rtype) {
735 case relocInfo::runtime_call_type:
736 case relocInfo::none:
737 jmp(entry);
738 break;
739 default:
740 {
741 InstructionMark im(this);
742 relocate(rtype);
743 patchable_set48(T9, (long)entry);
744 jr(T9);
745 }
746 break;
747 }
748 }
750 void MacroAssembler::call(address entry) {
751 // c/c++ code assume T9 is entry point, so we just always move entry to t9
752 // maybe there is some more graceful method to handle this. FIXME
753 // For more info, see class NativeCall.
754 #ifndef _LP64
755 move(T9, (int)entry);
756 #else
757 patchable_set48(T9, (long)entry);
758 #endif
759 jalr(T9);
760 }
762 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
763 switch (rtype) {
764 case relocInfo::runtime_call_type:
765 case relocInfo::none:
766 call(entry);
767 break;
768 default:
769 {
770 InstructionMark im(this);
771 relocate(rtype);
772 call(entry);
773 }
774 break;
775 }
776 }
778 void MacroAssembler::call(address entry, RelocationHolder& rh)
779 {
780 switch (rh.type()) {
781 case relocInfo::runtime_call_type:
782 case relocInfo::none:
783 call(entry);
784 break;
785 default:
786 {
787 InstructionMark im(this);
788 relocate(rh);
789 call(entry);
790 }
791 break;
792 }
793 }
795 void MacroAssembler::ic_call(address entry) {
796 RelocationHolder rh = virtual_call_Relocation::spec(pc());
797 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
798 assert(entry != NULL, "call most probably wrong");
799 InstructionMark im(this);
800 relocate(rh);
801 patchable_call(entry);
802 }
804 void MacroAssembler::c2bool(Register r) {
805 Label L;
806 Assembler::beq(r, R0, L);
807 delayed()->nop();
808 move(r, 1);
809 bind(L);
810 }
812 #ifndef PRODUCT
813 extern "C" void findpc(intptr_t x);
814 #endif
816 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
817 // In order to get locks to work, we need to fake a in_VM state
818 JavaThread* thread = JavaThread::current();
819 JavaThreadState saved_state = thread->thread_state();
820 thread->set_thread_state(_thread_in_vm);
821 if (ShowMessageBoxOnError) {
822 JavaThread* thread = JavaThread::current();
823 JavaThreadState saved_state = thread->thread_state();
824 thread->set_thread_state(_thread_in_vm);
825 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
826 ttyLocker ttyl;
827 BytecodeCounter::print();
828 }
829 // To see where a verify_oop failed, get $ebx+40/X for this frame.
830 // This is the value of eip which points to where verify_oop will return.
831 if (os::message_box(msg, "Execution stopped, print registers?")) {
832 ttyLocker ttyl;
833 tty->print_cr("eip = 0x%08x", eip);
834 #ifndef PRODUCT
835 tty->cr();
836 findpc(eip);
837 tty->cr();
838 #endif
839 tty->print_cr("rax, = 0x%08x", rax);
840 tty->print_cr("rbx, = 0x%08x", rbx);
841 tty->print_cr("rcx = 0x%08x", rcx);
842 tty->print_cr("rdx = 0x%08x", rdx);
843 tty->print_cr("rdi = 0x%08x", rdi);
844 tty->print_cr("rsi = 0x%08x", rsi);
845 tty->print_cr("rbp, = 0x%08x", rbp);
846 tty->print_cr("rsp = 0x%08x", rsp);
847 BREAKPOINT;
848 }
849 } else {
850 ttyLocker ttyl;
851 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
852 assert(false, "DEBUG MESSAGE");
853 }
854 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
855 }
857 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
858 if ( ShowMessageBoxOnError ) {
859 JavaThreadState saved_state = JavaThread::current()->thread_state();
860 JavaThread::current()->set_thread_state(_thread_in_vm);
861 {
862 // In order to get locks work, we need to fake a in_VM state
863 ttyLocker ttyl;
864 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
865 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
866 BytecodeCounter::print();
867 }
869 // if (os::message_box(msg, "Execution stopped, print registers?"))
870 // regs->print(::tty);
871 }
872 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
873 }
874 else
875 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
876 }
879 void MacroAssembler::stop(const char* msg) {
880 li(A0, (long)msg);
881 #ifndef _LP64
882 //reserver space for argument. added by yjl 7/10/2005
883 addiu(SP, SP, - 1 * wordSize);
884 #endif
885 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
886 delayed()->nop();
887 #ifndef _LP64
888 //restore space for argument
889 addiu(SP, SP, 1 * wordSize);
890 #endif
891 brk(17);
892 }
894 void MacroAssembler::warn(const char* msg) {
895 #ifdef _LP64
896 pushad();
897 li(A0, (long)msg);
898 push(S2);
899 move(AT, -(StackAlignmentInBytes));
900 move(S2, SP); // use S2 as a sender SP holder
901 andr(SP, SP, AT); // align stack as required by ABI
902 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
903 delayed()->nop();
904 move(SP, S2); // use S2 as a sender SP holder
905 pop(S2);
906 popad();
907 #else
908 pushad();
909 addi(SP, SP, -4);
910 sw(A0, SP, -1 * wordSize);
911 li(A0, (long)msg);
912 addi(SP, SP, -1 * wordSize);
913 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
914 delayed()->nop();
915 addi(SP, SP, 1 * wordSize);
916 lw(A0, SP, -1 * wordSize);
917 addi(SP, SP, 4);
918 popad();
919 #endif
920 }
922 void MacroAssembler::print_reg(Register reg) {
923 /*
924 char *s = getenv("PRINT_REG");
925 if (s == NULL)
926 return;
927 if (strcmp(s, "1") != 0)
928 return;
929 */
930 void * cur_pc = pc();
931 pushad();
932 NOT_LP64(push(FP);)
934 li(A0, (long)reg->name());
935 if (reg == SP)
936 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
937 else if (reg == A0)
938 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
939 else
940 move(A1, reg);
941 li(A2, (long)cur_pc);
942 push(S2);
943 move(AT, -(StackAlignmentInBytes));
944 move(S2, SP); // use S2 as a sender SP holder
945 andr(SP, SP, AT); // align stack as required by ABI
946 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
947 delayed()->nop();
948 move(SP, S2); // use S2 as a sender SP holder
949 pop(S2);
950 NOT_LP64(pop(FP);)
951 popad();
953 /*
954 pushad();
955 #ifdef _LP64
956 if (reg == SP)
957 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
958 else
959 move(A0, reg);
960 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
961 delayed()->nop();
962 #else
963 push(FP);
964 move(A0, reg);
965 dsrl32(A1, reg, 0);
966 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
967 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
968 delayed()->nop();
969 pop(FP);
970 #endif
971 popad();
972 pushad();
973 NOT_LP64(push(FP);)
974 char b[50];
975 sprintf((char *)b, " pc: %p\n",cur_pc);
976 li(A0, (long)(char *)b);
977 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
978 delayed()->nop();
979 NOT_LP64(pop(FP);)
980 popad();
981 */
982 }
984 void MacroAssembler::print_reg(FloatRegister reg) {
985 void * cur_pc = pc();
986 pushad();
987 NOT_LP64(push(FP);)
988 li(A0, (long)reg->name());
989 push(S2);
990 move(AT, -(StackAlignmentInBytes));
991 move(S2, SP); // use S2 as a sender SP holder
992 andr(SP, SP, AT); // align stack as required by ABI
993 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
994 delayed()->nop();
995 move(SP, S2); // use S2 as a sender SP holder
996 pop(S2);
997 NOT_LP64(pop(FP);)
998 popad();
1000 pushad();
1001 NOT_LP64(push(FP);)
1002 #if 1
1003 move(FP, SP);
1004 move(AT, -(StackAlignmentInBytes));
1005 andr(SP , SP , AT);
1006 mov_d(F12, reg);
1007 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1008 delayed()->nop();
1009 move(SP, FP);
1010 #else
1011 mov_s(F12, reg);
1012 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1013 //delayed()->nop();
1014 #endif
1015 NOT_LP64(pop(FP);)
1016 popad();
1018 #if 0
1019 pushad();
1020 NOT_LP64(push(FP);)
1021 char* b = new char[50];
1022 sprintf(b, " pc: %p\n", cur_pc);
1023 li(A0, (long)b);
1024 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1025 delayed()->nop();
1026 NOT_LP64(pop(FP);)
1027 popad();
1028 #endif
1029 }
1031 void MacroAssembler::increment(Register reg, int imm) {
1032 if (!imm) return;
1033 if (is_simm16(imm)) {
1034 #ifdef _LP64
1035 daddiu(reg, reg, imm);
1036 #else
1037 addiu(reg, reg, imm);
1038 #endif
1039 } else {
1040 move(AT, imm);
1041 #ifdef _LP64
1042 daddu(reg, reg, AT);
1043 #else
1044 addu(reg, reg, AT);
1045 #endif
1046 }
1047 }
1049 void MacroAssembler::decrement(Register reg, int imm) {
1050 increment(reg, -imm);
1051 }
1054 void MacroAssembler::call_VM(Register oop_result,
1055 address entry_point,
1056 bool check_exceptions) {
1057 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1058 }
1060 void MacroAssembler::call_VM(Register oop_result,
1061 address entry_point,
1062 Register arg_1,
1063 bool check_exceptions) {
1064 if (arg_1!=A1) move(A1, arg_1);
1065 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1066 }
1068 void MacroAssembler::call_VM(Register oop_result,
1069 address entry_point,
1070 Register arg_1,
1071 Register arg_2,
1072 bool check_exceptions) {
1073 if (arg_1!=A1) move(A1, arg_1);
1074 if (arg_2!=A2) move(A2, arg_2);
1075 assert(arg_2 != A1, "smashed argument");
1076 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1077 }
1079 void MacroAssembler::call_VM(Register oop_result,
1080 address entry_point,
1081 Register arg_1,
1082 Register arg_2,
1083 Register arg_3,
1084 bool check_exceptions) {
1085 if (arg_1!=A1) move(A1, arg_1);
1086 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1087 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1088 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1089 }
1091 void MacroAssembler::call_VM(Register oop_result,
1092 Register last_java_sp,
1093 address entry_point,
1094 int number_of_arguments,
1095 bool check_exceptions) {
1096 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1097 }
1099 void MacroAssembler::call_VM(Register oop_result,
1100 Register last_java_sp,
1101 address entry_point,
1102 Register arg_1,
1103 bool check_exceptions) {
1104 if (arg_1 != A1) move(A1, arg_1);
1105 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1106 }
1108 void MacroAssembler::call_VM(Register oop_result,
1109 Register last_java_sp,
1110 address entry_point,
1111 Register arg_1,
1112 Register arg_2,
1113 bool check_exceptions) {
1114 if (arg_1 != A1) move(A1, arg_1);
1115 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1116 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1117 }
1119 void MacroAssembler::call_VM(Register oop_result,
1120 Register last_java_sp,
1121 address entry_point,
1122 Register arg_1,
1123 Register arg_2,
1124 Register arg_3,
1125 bool check_exceptions) {
1126 if (arg_1 != A1) move(A1, arg_1);
1127 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1128 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1129 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1130 }
1132 void MacroAssembler::call_VM_base(Register oop_result,
1133 Register java_thread,
1134 Register last_java_sp,
1135 address entry_point,
1136 int number_of_arguments,
1137 bool check_exceptions) {
1139 address before_call_pc;
1140 // determine java_thread register
1141 if (!java_thread->is_valid()) {
1142 #ifndef OPT_THREAD
1143 java_thread = T2;
1144 get_thread(java_thread);
1145 #else
1146 java_thread = TREG;
1147 #endif
1148 }
1149 // determine last_java_sp register
1150 if (!last_java_sp->is_valid()) {
1151 last_java_sp = SP;
1152 }
1153 // debugging support
1154 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1155 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1156 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1157 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1159 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1161 // set last Java frame before call
1162 before_call_pc = (address)pc();
1163 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1165 // do the call
1166 move(A0, java_thread);
1167 call(entry_point, relocInfo::runtime_call_type);
1168 delayed()->nop();
1169 //MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
1171 // restore the thread (cannot use the pushed argument since arguments
1172 // may be overwritten by C code generated by an optimizing compiler);
1173 // however can use the register value directly if it is callee saved.
1174 #ifndef OPT_THREAD
1175 get_thread(java_thread);
1176 #else
1177 #ifdef ASSERT
1178 {
1179 Label L;
1180 get_thread(AT);
1181 beq(java_thread, AT, L);
1182 delayed()->nop();
1183 stop("MacroAssembler::call_VM_base: TREG not callee saved?");
1184 bind(L);
1185 }
1186 #endif
1187 #endif
1189 // discard thread and arguments
1190 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1191 // reset last Java frame
1192 reset_last_Java_frame(java_thread, false, true);
1194 check_and_handle_popframe(java_thread);
1195 check_and_handle_earlyret(java_thread);
1196 if (check_exceptions) {
1197 // check for pending exceptions (java_thread is set upon return)
1198 Label L;
1199 #ifdef _LP64
1200 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1201 #else
1202 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1203 #endif
1204 beq(AT, R0, L);
1205 delayed()->nop();
1206 li(AT, before_call_pc);
1207 push(AT);
1208 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1209 delayed()->nop();
1210 bind(L);
1211 }
1213 // get oop result if there is one and reset the value in the thread
1214 if (oop_result->is_valid()) {
1215 #ifdef _LP64
1216 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1217 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1218 #else
1219 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1220 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1221 #endif
1222 verify_oop(oop_result);
1223 }
1224 }
1226 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1228 move(V0, SP);
1229 //we also reserve space for java_thread here
1230 #ifndef _LP64
1231 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1232 #endif
1233 move(AT, -(StackAlignmentInBytes));
1234 andr(SP, SP, AT);
1235 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1237 }
1239 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1240 call_VM_leaf_base(entry_point, number_of_arguments);
1241 }
1243 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1244 if (arg_0 != A0) move(A0, arg_0);
1245 call_VM_leaf(entry_point, 1);
1246 }
1248 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1249 if (arg_0 != A0) move(A0, arg_0);
1250 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1251 call_VM_leaf(entry_point, 2);
1252 }
1254 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1255 if (arg_0 != A0) move(A0, arg_0);
1256 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1257 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1258 call_VM_leaf(entry_point, 3);
1259 }
1260 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1261 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1262 }
1265 void MacroAssembler::super_call_VM_leaf(address entry_point,
1266 Register arg_1) {
1267 if (arg_1 != A0) move(A0, arg_1);
1268 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1269 }
1272 void MacroAssembler::super_call_VM_leaf(address entry_point,
1273 Register arg_1,
1274 Register arg_2) {
1275 if (arg_1 != A0) move(A0, arg_1);
1276 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1277 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1278 }
1279 void MacroAssembler::super_call_VM_leaf(address entry_point,
1280 Register arg_1,
1281 Register arg_2,
1282 Register arg_3) {
1283 if (arg_1 != A0) move(A0, arg_1);
1284 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1285 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1286 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1287 }
1289 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1290 }
1292 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1293 }
1295 void MacroAssembler::null_check(Register reg, int offset) {
1296 if (needs_explicit_null_check(offset)) {
1297 // provoke OS NULL exception if reg = NULL by
1298 // accessing M[reg] w/o changing any (non-CC) registers
1299 // NOTE: cmpl is plenty here to provoke a segv
1300 lw(AT, reg, 0);
1301 // Note: should probably use testl(rax, Address(reg, 0));
1302 // may be shorter code (however, this version of
1303 // testl needs to be implemented first)
1304 } else {
1305 // nothing to do, (later) access of M[reg + offset]
1306 // will provoke OS NULL exception if reg = NULL
1307 }
1308 }
1310 void MacroAssembler::enter() {
1311 push2(RA, FP);
1312 move(FP, SP);
1313 }
1315 void MacroAssembler::leave() {
1316 #ifndef _LP64
1317 //move(SP, FP);
1318 //pop2(FP, RA);
1319 addi(SP, FP, 2 * wordSize);
1320 lw(RA, SP, - 1 * wordSize);
1321 lw(FP, SP, - 2 * wordSize);
1322 #else
1323 daddi(SP, FP, 2 * wordSize);
1324 ld(RA, SP, - 1 * wordSize);
1325 ld(FP, SP, - 2 * wordSize);
1326 #endif
1327 }
1328 /*
1329 void MacroAssembler::os_breakpoint() {
1330 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1331 // (e.g., MSVC can't call ps() otherwise)
1332 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1333 }
1334 */
1335 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1336 // determine java_thread register
1337 if (!java_thread->is_valid()) {
1338 #ifndef OPT_THREAD
1339 java_thread = T1;
1340 get_thread(java_thread);
1341 #else
1342 java_thread = TREG;
1343 #endif
1344 }
1345 // we must set sp to zero to clear frame
1346 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1347 // must clear fp, so that compiled frames are not confused; it is possible
1348 // that we need it only for debugging
1349 if(clear_fp)
1350 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1352 if (clear_pc)
1353 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1354 }
1356 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1357 bool clear_pc) {
1358 Register thread = TREG;
1359 #ifndef OPT_THREAD
1360 get_thread(thread);
1361 #endif
1362 // we must set sp to zero to clear frame
1363 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1364 // must clear fp, so that compiled frames are not confused; it is
1365 // possible that we need it only for debugging
1366 if (clear_fp) {
1367 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1368 }
1370 if (clear_pc) {
1371 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1372 }
1373 }
1375 // Write serialization page so VM thread can do a pseudo remote membar.
1376 // We use the current thread pointer to calculate a thread specific
1377 // offset to write to within the page. This minimizes bus traffic
1378 // due to cache line collision.
1379 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1380 move(tmp, thread);
1381 srl(tmp, tmp,os::get_serialize_page_shift_count());
1382 move(AT, (os::vm_page_size() - sizeof(int)));
1383 andr(tmp, tmp,AT);
1384 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1385 }
1387 // Calls to C land
1388 //
1389 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1390 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1391 // has to be reset to 0. This is required to allow proper stack traversal.
1392 void MacroAssembler::set_last_Java_frame(Register java_thread,
1393 Register last_java_sp,
1394 Register last_java_fp,
1395 address last_java_pc) {
1396 // determine java_thread register
1397 if (!java_thread->is_valid()) {
1398 #ifndef OPT_THREAD
1399 java_thread = T2;
1400 get_thread(java_thread);
1401 #else
1402 java_thread = TREG;
1403 #endif
1404 }
1405 // determine last_java_sp register
1406 if (!last_java_sp->is_valid()) {
1407 last_java_sp = SP;
1408 }
1410 // last_java_fp is optional
1412 if (last_java_fp->is_valid()) {
1413 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1414 }
1416 // last_java_pc is optional
1418 if (last_java_pc != NULL) {
1419 relocate(relocInfo::internal_pc_type);
1420 patchable_set48(AT, (long)last_java_pc);
1421 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1422 }
1423 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1424 }
1426 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1427 Register last_java_fp,
1428 address last_java_pc) {
1429 // determine last_java_sp register
1430 if (!last_java_sp->is_valid()) {
1431 last_java_sp = SP;
1432 }
1434 Register thread = TREG;
1435 #ifndef OPT_THREAD
1436 get_thread(thread);
1437 #endif
1438 // last_java_fp is optional
1439 if (last_java_fp->is_valid()) {
1440 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1441 }
1443 // last_java_pc is optional
1444 if (last_java_pc != NULL) {
1445 Address java_pc(thread,
1446 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1447 li(AT, (intptr_t)(last_java_pc));
1448 sd(AT, java_pc);
1449 }
1451 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1452 }
1454 //////////////////////////////////////////////////////////////////////////////////
1455 #if INCLUDE_ALL_GCS
1457 void MacroAssembler::g1_write_barrier_pre(Register obj,
1458 Register pre_val,
1459 Register thread,
1460 Register tmp,
1461 bool tosca_live,
1462 bool expand_call) {
1464 // If expand_call is true then we expand the call_VM_leaf macro
1465 // directly to skip generating the check by
1466 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
1468 #ifdef _LP64
1469 assert(thread == TREG, "must be");
1470 #endif // _LP64
1472 Label done;
1473 Label runtime;
1475 assert(pre_val != noreg, "check this code");
1477 if (obj != noreg) {
1478 assert_different_registers(obj, pre_val, tmp);
1479 assert(pre_val != V0, "check this code");
1480 }
1482 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1483 PtrQueue::byte_offset_of_active()));
1484 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1485 PtrQueue::byte_offset_of_index()));
1486 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1487 PtrQueue::byte_offset_of_buf()));
1490 // Is marking active?
1491 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1492 lw(AT, in_progress);
1493 } else {
1494 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1495 lb(AT, in_progress);
1496 }
1497 beq(AT, R0, done);
1498 nop();
1500 // Do we need to load the previous value?
1501 if (obj != noreg) {
1502 load_heap_oop(pre_val, Address(obj, 0));
1503 }
1505 // Is the previous value null?
1506 beq(pre_val, R0, done);
1507 nop();
1509 // Can we store original value in the thread's buffer?
1510 // Is index == 0?
1511 // (The index field is typed as size_t.)
1513 ld(tmp, index);
1514 beq(tmp, R0, runtime);
1515 nop();
1517 daddiu(tmp, tmp, -1 * wordSize);
1518 sd(tmp, index);
1519 ld(AT, buffer);
1520 daddu(tmp, tmp, AT);
1522 // Record the previous value
1523 sd(pre_val, tmp, 0);
1524 beq(R0, R0, done);
1525 nop();
1527 bind(runtime);
1528 // save the live input values
1529 if (tosca_live) push(V0);
1531 if (obj != noreg && obj != V0) push(obj);
1533 if (pre_val != V0) push(pre_val);
1535 // Calling the runtime using the regular call_VM_leaf mechanism generates
1536 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
1537 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
1538 //
1539 // If we care generating the pre-barrier without a frame (e.g. in the
1540 // intrinsified Reference.get() routine) then ebp might be pointing to
1541 // the caller frame and so this check will most likely fail at runtime.
1542 //
1543 // Expanding the call directly bypasses the generation of the check.
1544 // So when we do not have have a full interpreter frame on the stack
1545 // expand_call should be passed true.
1547 NOT_LP64( push(thread); )
1549 if (expand_call) {
1550 LP64_ONLY( assert(pre_val != A1, "smashed arg"); )
1551 if (thread != A1) move(A1, thread);
1552 if (pre_val != A0) move(A0, pre_val);
1553 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
1554 } else {
1555 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
1556 }
1558 NOT_LP64( pop(thread); )
1560 // save the live input values
1561 if (pre_val != V0)
1562 pop(pre_val);
1564 if (obj != noreg && obj != V0)
1565 pop(obj);
1567 if(tosca_live) pop(V0);
1569 bind(done);
1570 }
1572 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1573 Register new_val,
1574 Register thread,
1575 Register tmp,
1576 Register tmp2) {
1577 assert(tmp != AT, "must be");
1578 assert(tmp2 != AT, "must be");
1579 #ifdef _LP64
1580 assert(thread == TREG, "must be");
1581 #endif // _LP64
1583 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1584 PtrQueue::byte_offset_of_index()));
1585 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1586 PtrQueue::byte_offset_of_buf()));
1588 BarrierSet* bs = Universe::heap()->barrier_set();
1589 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1590 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1592 Label done;
1593 Label runtime;
1595 // Does store cross heap regions?
1596 xorr(AT, store_addr, new_val);
1597 dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
1598 beq(AT, R0, done);
1599 nop();
1602 // crosses regions, storing NULL?
1603 beq(new_val, R0, done);
1604 nop();
1606 // storing region crossing non-NULL, is card already dirty?
1607 const Register card_addr = tmp;
1608 const Register cardtable = tmp2;
1610 move(card_addr, store_addr);
1611 dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
1612 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
1613 // a valid address and therefore is not properly handled by the relocation code.
1614 set64(cardtable, (intptr_t)ct->byte_map_base);
1615 daddu(card_addr, card_addr, cardtable);
1617 lb(AT, card_addr, 0);
1618 daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
1619 beq(AT, R0, done);
1620 nop();
1622 sync();
1623 lb(AT, card_addr, 0);
1624 daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
1625 beq(AT, R0, done);
1626 nop();
1629 // storing a region crossing, non-NULL oop, card is clean.
1630 // dirty card and log.
1631 move(AT, (int)CardTableModRefBS::dirty_card_val());
1632 sb(AT, card_addr, 0);
1634 lw(AT, queue_index);
1635 beq(AT, R0, runtime);
1636 nop();
1637 daddiu(AT, AT, -1 * wordSize);
1638 sw(AT, queue_index);
1639 ld(tmp2, buffer);
1640 #ifdef _LP64
1641 ld(AT, queue_index);
1642 daddu(tmp2, tmp2, AT);
1643 sd(card_addr, tmp2, 0);
1644 #else
1645 lw(AT, queue_index);
1646 addu32(tmp2, tmp2, AT);
1647 sw(card_addr, tmp2, 0);
1648 #endif
1649 beq(R0, R0, done);
1650 nop();
1652 bind(runtime);
1653 // save the live input values
1654 push(store_addr);
1655 push(new_val);
1656 #ifdef _LP64
1657 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
1658 #else
1659 push(thread);
1660 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1661 pop(thread);
1662 #endif
1663 pop(new_val);
1664 pop(store_addr);
1666 bind(done);
1667 }
1669 #endif // INCLUDE_ALL_GCS
1670 //////////////////////////////////////////////////////////////////////////////////
1673 void MacroAssembler::store_check(Register obj) {
1674 // Does a store check for the oop in register obj. The content of
1675 // register obj is destroyed afterwards.
1676 store_check_part_1(obj);
1677 store_check_part_2(obj);
1678 }
1680 void MacroAssembler::store_check(Register obj, Address dst) {
1681 store_check(obj);
1682 }
1685 // split the store check operation so that other instructions can be scheduled inbetween
1686 void MacroAssembler::store_check_part_1(Register obj) {
1687 BarrierSet* bs = Universe::heap()->barrier_set();
1688 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1689 #ifdef _LP64
1690 dsrl(obj, obj, CardTableModRefBS::card_shift);
1691 #else
1692 shr(obj, CardTableModRefBS::card_shift);
1693 #endif
1694 }
1696 void MacroAssembler::store_check_part_2(Register obj) {
1697 BarrierSet* bs = Universe::heap()->barrier_set();
1698 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1699 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1700 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1702 set64(AT, (long)ct->byte_map_base);
1703 #ifdef _LP64
1704 dadd(AT, AT, obj);
1705 #else
1706 add(AT, AT, obj);
1707 #endif
1708 if (UseConcMarkSweepGC) sync();
1709 sb(R0, AT, 0);
1710 }
1712 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1713 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1714 Register t1, Register t2, Label& slow_case) {
1715 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1717 Register end = t2;
1718 #ifndef OPT_THREAD
1719 Register thread = t1;
1720 get_thread(thread);
1721 #else
1722 Register thread = TREG;
1723 #endif
1724 verify_tlab(t1, t2);//blows t1&t2
1726 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1728 if (var_size_in_bytes == NOREG) {
1729 // i dont think we need move con_size_in_bytes to a register first.
1730 // by yjl 8/17/2005
1731 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1732 addi(end, obj, con_size_in_bytes);
1733 } else {
1734 add(end, obj, var_size_in_bytes);
1735 }
1737 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1738 sltu(AT, AT, end);
1739 bne_far(AT, R0, slow_case);
1740 delayed()->nop();
1743 // update the tlab top pointer
1744 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1746 // recover var_size_in_bytes if necessary
1747 /*if (var_size_in_bytes == end) {
1748 sub(var_size_in_bytes, end, obj);
1749 }*/
1751 verify_tlab(t1, t2);
1752 }
1754 // Defines obj, preserves var_size_in_bytes
1755 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1756 Register t1, Register t2, Label& slow_case) {
1757 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1758 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1759 // No allocation in the shared eden.
1760 b_far(slow_case);
1761 delayed()->nop();
1762 } else {
1764 #ifndef _LP64
1765 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1766 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1767 #else
1768 Address heap_top(t1);
1769 li(t1, (long)Universe::heap()->top_addr());
1770 #endif
1771 ld_ptr(obj, heap_top);
1773 Register end = t2;
1774 Label retry;
1776 bind(retry);
1777 if (var_size_in_bytes == NOREG) {
1778 // i dont think we need move con_size_in_bytes to a register first.
1779 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1780 addi(end, obj, con_size_in_bytes);
1781 } else {
1782 add(end, obj, var_size_in_bytes);
1783 }
1784 // if end < obj then we wrapped around => object too long => slow case
1785 sltu(AT, end, obj);
1786 bne_far(AT, R0, slow_case);
1787 delayed()->nop();
1789 li(AT, (long)Universe::heap()->end_addr());
1790 sltu(AT, AT, end);
1791 bne_far(AT, R0, slow_case);
1792 delayed()->nop();
1793 // Compare obj with the top addr, and if still equal, store the new top addr in
1794 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
1795 // it otherwise. Use lock prefix for atomicity on MPs.
1796 //if (os::is_MP()) {
1797 // sync();
1798 //}
1800 // if someone beat us on the allocation, try again, otherwise continue
1801 cmpxchg(end, heap_top, obj);
1802 beq_far(AT, R0, retry); //by yyq
1803 delayed()->nop();
1805 }
1806 }
1808 // C2 doesn't invoke this one.
1809 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
1810 Register top = T0;
1811 Register t1 = T1;
1812 /* Jin: tlab_refill() is called in
1814 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
1816 In generate_code_for(), T2 has been assigned as a register(length), which is used
1817 after calling tlab_refill();
1818 Therefore, tlab_refill() should not use T2.
1820 Source:
1822 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
1823 at java.lang.System.arraycopy(Native Method)
1824 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
1825 at sun.misc.Resource.getBytes(Resource.java:117)
1826 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
1827 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
1828 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
1829 */
1830 Register t2 = T9;
1831 Register t3 = T3;
1832 Register thread_reg = T8;
1833 Label do_refill, discard_tlab;
1834 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1835 // No allocation in the shared eden.
1836 b(slow_case);
1837 delayed()->nop();
1838 }
1840 get_thread(thread_reg);
1842 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1843 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1845 // calculate amount of free space
1846 sub(t1, t1, top);
1847 shr(t1, LogHeapWordSize);
1849 // Retain tlab and allocate object in shared space if
1850 // the amount free in the tlab is too large to discard.
1851 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1852 slt(AT, t2, t1);
1853 beq(AT, R0, discard_tlab);
1854 delayed()->nop();
1856 // Retain
1858 #ifndef _LP64
1859 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1860 #else
1861 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1862 #endif
1863 add(t2, t2, AT);
1864 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
1866 if (TLABStats) {
1867 // increment number of slow_allocations
1868 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1869 addiu(AT, AT, 1);
1870 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
1871 }
1872 b(try_eden);
1873 delayed()->nop();
1875 bind(discard_tlab);
1876 if (TLABStats) {
1877 // increment number of refills
1878 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1879 addi(AT, AT, 1);
1880 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
1881 // accumulate wastage -- t1 is amount free in tlab
1882 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1883 add(AT, AT, t1);
1884 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
1885 }
1887 // if tlab is currently allocated (top or end != null) then
1888 // fill [top, end + alignment_reserve) with array object
1889 beq(top, R0, do_refill);
1890 delayed()->nop();
1892 // set up the mark word
1893 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
1894 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
1896 // set the length to the remaining space
1897 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
1898 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
1899 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
1900 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
1902 // set klass to intArrayKlass
1903 #ifndef _LP64
1904 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
1905 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
1906 #else
1907 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
1908 ld_ptr(t1, AT, 0);
1909 #endif
1910 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
1911 store_klass(top, t1);
1913 // refill the tlab with an eden allocation
1914 bind(do_refill);
1915 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1916 shl(t1, LogHeapWordSize);
1917 // add object_size ??
1918 eden_allocate(top, t1, 0, t2, t3, slow_case);
1920 // Check that t1 was preserved in eden_allocate.
1921 #ifdef ASSERT
1922 if (UseTLAB) {
1923 Label ok;
1924 assert_different_registers(thread_reg, t1);
1925 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
1926 shl(AT, LogHeapWordSize);
1927 beq(AT, t1, ok);
1928 delayed()->nop();
1929 stop("assert(t1 != tlab size)");
1930 should_not_reach_here();
1932 bind(ok);
1933 }
1934 #endif
1935 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
1936 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
1937 add(top, top, t1);
1938 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
1939 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
1940 verify_tlab(t1, t2);
1941 b(retry);
1942 delayed()->nop();
1943 }
1945 static const double pi_4 = 0.7853981633974483;
1947 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
1948 // must get argument(a double) in F12/F13
1949 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
1950 //We need to preseve the register which maybe modified during the Call @Jerome
1951 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
1952 //save all modified register here
1953 // if (preserve_cpu_regs) {
1954 // }
1955 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
1956 pushad();
1957 //we should preserve the stack space before we call
1958 addi(SP, SP, -wordSize * 2);
1959 switch (trig){
1960 case 's' :
1961 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
1962 delayed()->nop();
1963 break;
1964 case 'c':
1965 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
1966 delayed()->nop();
1967 break;
1968 case 't':
1969 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
1970 delayed()->nop();
1971 break;
1972 default:assert (false, "bad intrinsic");
1973 break;
1975 }
1977 addi(SP, SP, wordSize * 2);
1978 popad();
1979 // if (preserve_cpu_regs) {
1980 // }
1981 }
1983 #ifdef _LP64
1984 void MacroAssembler::li(Register rd, long imm) {
1985 if (imm <= max_jint && imm >= min_jint) {
1986 li32(rd, (int)imm);
1987 } else if (julong(imm) <= 0xFFFFFFFF) {
1988 assert_not_delayed();
1989 // lui sign-extends, so we can't use that.
1990 ori(rd, R0, julong(imm) >> 16);
1991 dsll(rd, rd, 16);
1992 ori(rd, rd, split_low(imm));
1993 //aoqi_test
1994 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
1995 } else if ((imm > 0) && is_simm16(imm >> 32)) {
1996 /* A 48-bit address */
1997 li48(rd, imm);
1998 } else {
1999 li64(rd, imm);
2000 }
2001 }
2002 #else
2003 void MacroAssembler::li(Register rd, long imm) {
2004 li32(rd, (int)imm);
2005 }
2006 #endif
2008 void MacroAssembler::li32(Register reg, int imm) {
2009 if (is_simm16(imm)) {
2010 /* Jin: for imm < 0, we should use addi instead of addiu.
2011 *
2012 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2013 *
2014 * 78 move [int:-1|I] [a0|I]
2015 * : daddi a0, zero, 0xffffffff (correct)
2016 * : daddiu a0, zero, 0xffffffff (incorrect)
2017 */
2018 if (imm >= 0)
2019 addiu(reg, R0, imm);
2020 else
2021 addi(reg, R0, imm);
2022 } else {
2023 lui(reg, split_low(imm >> 16));
2024 if (split_low(imm))
2025 ori(reg, reg, split_low(imm));
2026 }
2027 }
2029 #ifdef _LP64
2030 void MacroAssembler::set64(Register d, jlong value) {
2031 assert_not_delayed();
2033 int hi = (int)(value >> 32);
2034 int lo = (int)(value & ~0);
2036 if (value == lo) { // 32-bit integer
2037 if (is_simm16(value)) {
2038 daddiu(d, R0, value);
2039 } else {
2040 lui(d, split_low(value >> 16));
2041 if (split_low(value)) {
2042 ori(d, d, split_low(value));
2043 }
2044 }
2045 } else if (hi == 0) { // hardware zero-extends to upper 32
2046 ori(d, R0, julong(value) >> 16);
2047 dsll(d, d, 16);
2048 if (split_low(value)) {
2049 ori(d, d, split_low(value));
2050 }
2051 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2052 // 4 insts
2053 li48(d, value);
2054 } else { // li64
2055 // 6 insts
2056 li64(d, value);
2057 }
2058 }
2061 int MacroAssembler::insts_for_set64(jlong value) {
2062 int hi = (int)(value >> 32);
2063 int lo = (int)(value & ~0);
2065 int count = 0;
2067 if (value == lo) { // 32-bit integer
2068 if (is_simm16(value)) {
2069 //daddiu(d, R0, value);
2070 count++;
2071 } else {
2072 //lui(d, split_low(value >> 16));
2073 count++;
2074 if (split_low(value)) {
2075 //ori(d, d, split_low(value));
2076 count++;
2077 }
2078 }
2079 } else if (hi == 0) { // hardware zero-extends to upper 32
2080 //ori(d, R0, julong(value) >> 16);
2081 //dsll(d, d, 16);
2082 count += 2;
2083 if (split_low(value)) {
2084 //ori(d, d, split_low(value));
2085 count++;
2086 }
2087 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2088 // 4 insts
2089 //li48(d, value);
2090 count += 4;
2091 } else { // li64
2092 // 6 insts
2093 //li64(d, value);
2094 count += 6;
2095 }
2097 return count;
2098 }
2100 void MacroAssembler::patchable_set48(Register d, jlong value) {
2101 assert_not_delayed();
2103 int hi = (int)(value >> 32);
2104 int lo = (int)(value & ~0);
2106 int count = 0;
2108 if (value == lo) { // 32-bit integer
2109 if (is_simm16(value)) {
2110 daddiu(d, R0, value);
2111 count += 1;
2112 } else {
2113 lui(d, split_low(value >> 16));
2114 count += 1;
2115 if (split_low(value)) {
2116 ori(d, d, split_low(value));
2117 count += 1;
2118 }
2119 }
2120 } else if (hi == 0) { // hardware zero-extends to upper 32
2121 ori(d, R0, julong(value) >> 16);
2122 dsll(d, d, 16);
2123 count += 2;
2124 if (split_low(value)) {
2125 ori(d, d, split_low(value));
2126 count += 1;
2127 }
2128 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2129 // 4 insts
2130 li48(d, value);
2131 count += 4;
2132 } else { // li64
2133 tty->print_cr("value = 0x%x", value);
2134 guarantee(false, "Not supported yet !");
2135 }
2137 for (count; count < 4; count++) {
2138 nop();
2139 }
2140 }
2142 void MacroAssembler::patchable_set32(Register d, jlong value) {
2143 assert_not_delayed();
2145 int hi = (int)(value >> 32);
2146 int lo = (int)(value & ~0);
2148 int count = 0;
2150 if (value == lo) { // 32-bit integer
2151 if (is_simm16(value)) {
2152 daddiu(d, R0, value);
2153 count += 1;
2154 } else {
2155 lui(d, split_low(value >> 16));
2156 count += 1;
2157 if (split_low(value)) {
2158 ori(d, d, split_low(value));
2159 count += 1;
2160 }
2161 }
2162 } else if (hi == 0) { // hardware zero-extends to upper 32
2163 ori(d, R0, julong(value) >> 16);
2164 dsll(d, d, 16);
2165 count += 2;
2166 if (split_low(value)) {
2167 ori(d, d, split_low(value));
2168 count += 1;
2169 }
2170 } else {
2171 tty->print_cr("value = 0x%x", value);
2172 guarantee(false, "Not supported yet !");
2173 }
2175 for (count; count < 3; count++) {
2176 nop();
2177 }
2178 }
2180 void MacroAssembler::patchable_call32(Register d, jlong value) {
2181 assert_not_delayed();
2183 int hi = (int)(value >> 32);
2184 int lo = (int)(value & ~0);
2186 int count = 0;
2188 if (value == lo) { // 32-bit integer
2189 if (is_simm16(value)) {
2190 daddiu(d, R0, value);
2191 count += 1;
2192 } else {
2193 lui(d, split_low(value >> 16));
2194 count += 1;
2195 if (split_low(value)) {
2196 ori(d, d, split_low(value));
2197 count += 1;
2198 }
2199 }
2200 } else {
2201 tty->print_cr("value = 0x%x", value);
2202 guarantee(false, "Not supported yet !");
2203 }
2205 for (count; count < 2; count++) {
2206 nop();
2207 }
2208 }
2210 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2211 assert(UseCompressedClassPointers, "should only be used for compressed header");
2212 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2214 int klass_index = oop_recorder()->find_index(k);
2215 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2216 long narrowKlass = (long)Klass::encode_klass(k);
2218 relocate(rspec, Assembler::narrow_oop_operand);
2219 patchable_set48(dst, narrowKlass);
2220 }
2223 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2224 assert(UseCompressedOops, "should only be used for compressed header");
2225 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2227 int oop_index = oop_recorder()->find_index(obj);
2228 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2230 relocate(rspec, Assembler::narrow_oop_operand);
2231 patchable_set48(dst, oop_index);
2232 }
2234 void MacroAssembler::li64(Register rd, long imm) {
2235 assert_not_delayed();
2236 lui(rd, imm >> 48);
2237 ori(rd, rd, split_low(imm >> 32));
2238 dsll(rd, rd, 16);
2239 ori(rd, rd, split_low(imm >> 16));
2240 dsll(rd, rd, 16);
2241 ori(rd, rd, split_low(imm));
2242 }
2244 void MacroAssembler::li48(Register rd, long imm) {
2245 assert_not_delayed();
2246 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2247 lui(rd, imm >> 32);
2248 ori(rd, rd, split_low(imm >> 16));
2249 dsll(rd, rd, 16);
2250 ori(rd, rd, split_low(imm));
2251 }
2252 #endif
2253 // NOTE: i dont push eax as i486.
2254 // the x86 save eax for it use eax as the jump register
2255 void MacroAssembler::verify_oop(Register reg, const char* s) {
2256 /*
2257 if (!VerifyOops) return;
2259 // Pass register number to verify_oop_subroutine
2260 char* b = new char[strlen(s) + 50];
2261 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2262 push(rax); // save rax,
2263 push(reg); // pass register argument
2264 ExternalAddress buffer((address) b);
2265 // avoid using pushptr, as it modifies scratch registers
2266 // and our contract is not to modify anything
2267 movptr(rax, buffer.addr());
2268 push(rax);
2269 // call indirectly to solve generation ordering problem
2270 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2271 call(rax);
2272 */
2273 if (!VerifyOops) return;
2274 const char * b = NULL;
2275 stringStream ss;
2276 ss.print("verify_oop: %s: %s", reg->name(), s);
2277 b = code_string(ss.as_string());
2278 #ifdef _LP64
2279 pushad();
2280 move(A1, reg);
2281 li(A0, (long)b);
2282 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2283 ld(T9, AT, 0);
2284 jalr(T9);
2285 delayed()->nop();
2286 popad();
2287 #else
2288 // Pass register number to verify_oop_subroutine
2289 sw(T0, SP, - wordSize);
2290 sw(T1, SP, - 2*wordSize);
2291 sw(RA, SP, - 3*wordSize);
2292 sw(A0, SP ,- 4*wordSize);
2293 sw(A1, SP ,- 5*wordSize);
2294 sw(AT, SP ,- 6*wordSize);
2295 sw(T9, SP ,- 7*wordSize);
2296 addiu(SP, SP, - 7 * wordSize);
2297 move(A1, reg);
2298 li(A0, (long)b);
2299 // call indirectly to solve generation ordering problem
2300 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2301 lw(T9, AT, 0);
2302 jalr(T9);
2303 delayed()->nop();
2304 lw(T0, SP, 6* wordSize);
2305 lw(T1, SP, 5* wordSize);
2306 lw(RA, SP, 4* wordSize);
2307 lw(A0, SP, 3* wordSize);
2308 lw(A1, SP, 2* wordSize);
2309 lw(AT, SP, 1* wordSize);
2310 lw(T9, SP, 0* wordSize);
2311 addiu(SP, SP, 7 * wordSize);
2312 #endif
2313 }
2316 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2317 if (!VerifyOops) {
2318 nop();
2319 return;
2320 }
2321 // Pass register number to verify_oop_subroutine
2322 const char * b = NULL;
2323 stringStream ss;
2324 ss.print("verify_oop_addr: %s", s);
2325 b = code_string(ss.as_string());
2327 st_ptr(T0, SP, - wordSize);
2328 st_ptr(T1, SP, - 2*wordSize);
2329 st_ptr(RA, SP, - 3*wordSize);
2330 st_ptr(A0, SP, - 4*wordSize);
2331 st_ptr(A1, SP, - 5*wordSize);
2332 st_ptr(AT, SP, - 6*wordSize);
2333 st_ptr(T9, SP, - 7*wordSize);
2334 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2335 addiu(SP, SP, - 7 * wordSize);
2337 li(A0, (long)b);
2338 // call indirectly to solve generation ordering problem
2339 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2340 ld_ptr(T9, AT, 0);
2341 jalr(T9);
2342 delayed()->nop();
2343 ld_ptr(T0, SP, 6* wordSize);
2344 ld_ptr(T1, SP, 5* wordSize);
2345 ld_ptr(RA, SP, 4* wordSize);
2346 ld_ptr(A0, SP, 3* wordSize);
2347 ld_ptr(A1, SP, 2* wordSize);
2348 ld_ptr(AT, SP, 1* wordSize);
2349 ld_ptr(T9, SP, 0* wordSize);
2350 addiu(SP, SP, 7 * wordSize);
2351 }
2353 // used registers : T0, T1
2354 void MacroAssembler::verify_oop_subroutine() {
2355 // RA: ra
2356 // A0: char* error message
2357 // A1: oop object to verify
2359 Label exit, error;
2360 // increment counter
2361 li(T0, (long)StubRoutines::verify_oop_count_addr());
2362 lw(AT, T0, 0);
2363 #ifdef _LP64
2364 daddi(AT, AT, 1);
2365 #else
2366 addi(AT, AT, 1);
2367 #endif
2368 sw(AT, T0, 0);
2370 // make sure object is 'reasonable'
2371 beq(A1, R0, exit); // if obj is NULL it is ok
2372 delayed()->nop();
2374 // Check if the oop is in the right area of memory
2375 //const int oop_mask = Universe::verify_oop_mask();
2376 //const int oop_bits = Universe::verify_oop_bits();
2377 const uintptr_t oop_mask = Universe::verify_oop_mask();
2378 const uintptr_t oop_bits = Universe::verify_oop_bits();
2379 li(AT, oop_mask);
2380 andr(T0, A1, AT);
2381 li(AT, oop_bits);
2382 bne(T0, AT, error);
2383 delayed()->nop();
2385 // make sure klass is 'reasonable'
2386 //add for compressedoops
2387 reinit_heapbase();
2388 //add for compressedoops
2389 load_klass(T0, A1);
2390 beq(T0, R0, error); // if klass is NULL it is broken
2391 delayed()->nop();
2392 #if 0
2393 //FIXME:wuhui.
2394 // Check if the klass is in the right area of memory
2395 //const int klass_mask = Universe::verify_klass_mask();
2396 //const int klass_bits = Universe::verify_klass_bits();
2397 const uintptr_t klass_mask = Universe::verify_klass_mask();
2398 const uintptr_t klass_bits = Universe::verify_klass_bits();
2400 li(AT, klass_mask);
2401 andr(T1, T0, AT);
2402 li(AT, klass_bits);
2403 bne(T1, AT, error);
2404 delayed()->nop();
2405 // make sure klass' klass is 'reasonable'
2406 //add for compressedoops
2407 load_klass(T0, T0);
2408 beq(T0, R0, error); // if klass' klass is NULL it is broken
2409 delayed()->nop();
2411 li(AT, klass_mask);
2412 andr(T1, T0, AT);
2413 li(AT, klass_bits);
2414 bne(T1, AT, error);
2415 delayed()->nop(); // if klass not in right area of memory it is broken too.
2416 #endif
2417 // return if everything seems ok
2418 bind(exit);
2420 jr(RA);
2421 delayed()->nop();
2423 // handle errors
2424 bind(error);
2425 pushad();
2426 #ifndef _LP64
2427 addi(SP, SP, (-1) * wordSize);
2428 #endif
2429 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2430 delayed()->nop();
2431 #ifndef _LP64
2432 addiu(SP, SP, 1 * wordSize);
2433 #endif
2434 popad();
2435 jr(RA);
2436 delayed()->nop();
2437 }
2439 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2440 #ifdef ASSERT
2441 assert_different_registers(t1, t2, AT);
2442 if (UseTLAB && VerifyOops) {
2443 Label next, ok;
2445 get_thread(t1);
2447 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2448 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2449 sltu(AT, t2, AT);
2450 beq(AT, R0, next);
2451 delayed()->nop();
2453 stop("assert(top >= start)");
2455 bind(next);
2456 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2457 sltu(AT, AT, t2);
2458 beq(AT, R0, ok);
2459 delayed()->nop();
2461 stop("assert(top <= end)");
2463 bind(ok);
2465 }
2466 #endif
2467 }
2468 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2469 Register tmp,
2470 int offset) {
2471 intptr_t value = *delayed_value_addr;
2472 if (value != 0)
2473 return RegisterOrConstant(value + offset);
2474 AddressLiteral a(delayed_value_addr);
2475 // load indirectly to solve generation ordering problem
2476 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2477 //ld(tmp, a);
2478 if (offset != 0)
2479 daddi(tmp,tmp, offset);
2481 return RegisterOrConstant(tmp);
2482 }
2484 void MacroAssembler::hswap(Register reg) {
2485 //short
2486 //andi(reg, reg, 0xffff);
2487 srl(AT, reg, 8);
2488 sll(reg, reg, 24);
2489 sra(reg, reg, 16);
2490 orr(reg, reg, AT);
2491 }
2493 void MacroAssembler::huswap(Register reg) {
2494 #ifdef _LP64
2495 dsrl(AT, reg, 8);
2496 dsll(reg, reg, 24);
2497 dsrl(reg, reg, 16);
2498 orr(reg, reg, AT);
2499 andi(reg, reg, 0xffff);
2500 #else
2501 //andi(reg, reg, 0xffff);
2502 srl(AT, reg, 8);
2503 sll(reg, reg, 24);
2504 srl(reg, reg, 16);
2505 orr(reg, reg, AT);
2506 #endif
2507 }
2509 // something funny to do this will only one more register AT
2510 // 32 bits
2511 void MacroAssembler::swap(Register reg) {
2512 srl(AT, reg, 8);
2513 sll(reg, reg, 24);
2514 orr(reg, reg, AT);
2515 //reg : 4 1 2 3
2516 srl(AT, AT, 16);
2517 xorr(AT, AT, reg);
2518 andi(AT, AT, 0xff);
2519 //AT : 0 0 0 1^3);
2520 xorr(reg, reg, AT);
2521 //reg : 4 1 2 1
2522 sll(AT, AT, 16);
2523 xorr(reg, reg, AT);
2524 //reg : 4 3 2 1
2525 }
2527 #ifdef _LP64
2529 /* do 32-bit CAS using MIPS64 lld/scd
2531 Jin: cas_int should only compare 32-bits of the memory value.
2532 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2533 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2534 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2535 plus the high-32 bits or memory value, are stored togethor with SCD.
2537 Example:
2539 double d = 3.1415926;
2540 System.err.println("hello" + d);
2542 sun.misc.FloatingDecimal$1.<init>()
2543 |
2544 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2546 38 cas_int [a7a7|J] [a0|I] [a6|I]
2547 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2548 // a6: 0x4ab325aa
2550 again:
2551 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2553 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2554 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2555 0x00000055647f3c68: dsll32 t8, t8, 0
2556 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2557 0x00000055647f3c70: sll zero, zero, 0
2559 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2560 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2561 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2562 0x00000055647f3c80: and v1, a6, v1
2563 0x00000055647f3c84: or at, t8, v1
2564 0x00000055647f3c88: scd at, 0x0(a7)
2565 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2566 0x00000055647f3c90: sll zero, zero, 0
2567 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2568 0x00000055647f3c98: sll zero, zero, 0
2569 nequal:
2570 0x00000055647f45a4: dadd a0, t9, zero
2571 0x00000055647f45a8: dadd at, zero, zero
2572 done:
2573 */
2575 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2576 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
2577 Label done, again, nequal;
2579 bind(again);
2581 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2582 ll(AT, dest);
2583 bne(AT, c_reg, nequal);
2584 delayed()->nop();
2586 move(AT, x_reg);
2587 sc(AT, dest);
2588 beq(AT, R0, again);
2589 delayed()->nop();
2590 b(done);
2591 delayed()->nop();
2593 // not xchged
2594 bind(nequal);
2595 sync();
2596 move(c_reg, AT);
2597 move(AT, R0);
2599 bind(done);
2600 }
2601 #endif // cmpxchg32
2603 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2604 Label done, again, nequal;
2606 bind(again);
2607 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2608 #ifdef _LP64
2609 lld(AT, dest);
2610 #else
2611 ll(AT, dest);
2612 #endif
2613 bne(AT, c_reg, nequal);
2614 delayed()->nop();
2616 move(AT, x_reg);
2617 #ifdef _LP64
2618 scd(AT, dest);
2619 #else
2620 sc(AT, dest);
2621 #endif
2622 beq(AT, R0, again);
2623 delayed()->nop();
2624 b(done);
2625 delayed()->nop();
2627 // not xchged
2628 bind(nequal);
2629 sync();
2630 move(c_reg, AT);
2631 move(AT, R0);
2633 bind(done);
2634 }
2636 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2637 Label done, again, nequal;
2639 Register x_reg = x_regLo;
2640 dsll32(x_regHi, x_regHi, 0);
2641 dsll32(x_regLo, x_regLo, 0);
2642 dsrl32(x_regLo, x_regLo, 0);
2643 orr(x_reg, x_regLo, x_regHi);
2645 Register c_reg = c_regLo;
2646 dsll32(c_regHi, c_regHi, 0);
2647 dsll32(c_regLo, c_regLo, 0);
2648 dsrl32(c_regLo, c_regLo, 0);
2649 orr(c_reg, c_regLo, c_regHi);
2651 bind(again);
2653 if(UseSyncLevel >= 3000 || UseSyncLevel < 2000) sync();
2654 lld(AT, dest);
2655 bne(AT, c_reg, nequal);
2656 delayed()->nop();
2658 //move(AT, x_reg);
2659 dadd(AT, x_reg, R0);
2660 scd(AT, dest);
2661 beq(AT, R0, again);
2662 delayed()->nop();
2663 b(done);
2664 delayed()->nop();
2666 // not xchged
2667 bind(nequal);
2668 sync();
2669 //move(c_reg, AT);
2670 //move(AT, R0);
2671 dadd(c_reg, AT, R0);
2672 dadd(AT, R0, R0);
2673 bind(done);
2674 }
2676 // be sure the three register is different
2677 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2678 assert_different_registers(tmp, fs, ft);
2679 div_s(tmp, fs, ft);
2680 trunc_l_s(tmp, tmp);
2681 cvt_s_l(tmp, tmp);
2682 mul_s(tmp, tmp, ft);
2683 sub_s(fd, fs, tmp);
2684 }
2686 // be sure the three register is different
2687 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2688 assert_different_registers(tmp, fs, ft);
2689 div_d(tmp, fs, ft);
2690 trunc_l_d(tmp, tmp);
2691 cvt_d_l(tmp, tmp);
2692 mul_d(tmp, tmp, ft);
2693 sub_d(fd, fs, tmp);
2694 }
2696 // Fast_Lock and Fast_Unlock used by C2
2698 // Because the transitions from emitted code to the runtime
2699 // monitorenter/exit helper stubs are so slow it's critical that
2700 // we inline both the stack-locking fast-path and the inflated fast path.
2701 //
2702 // See also: cmpFastLock and cmpFastUnlock.
2703 //
2704 // What follows is a specialized inline transliteration of the code
2705 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2706 // another option would be to emit TrySlowEnter and TrySlowExit methods
2707 // at startup-time. These methods would accept arguments as
2708 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2709 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2710 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2711 // In practice, however, the # of lock sites is bounded and is usually small.
2712 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2713 // if the processor uses simple bimodal branch predictors keyed by EIP
2714 // Since the helper routines would be called from multiple synchronization
2715 // sites.
2716 //
2717 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2718 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2719 // to those specialized methods. That'd give us a mostly platform-independent
2720 // implementation that the JITs could optimize and inline at their pleasure.
2721 // Done correctly, the only time we'd need to cross to native could would be
2722 // to park() or unpark() threads. We'd also need a few more unsafe operators
2723 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2724 // (b) explicit barriers or fence operations.
2725 //
2726 // TODO:
2727 //
2728 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2729 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2730 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2731 // the lock operators would typically be faster than reifying Self.
2732 //
2733 // * Ideally I'd define the primitives as:
2734 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2735 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2736 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2737 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2738 // Furthermore the register assignments are overconstrained, possibly resulting in
2739 // sub-optimal code near the synchronization site.
2740 //
2741 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2742 // Alternately, use a better sp-proximity test.
2743 //
2744 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2745 // Either one is sufficient to uniquely identify a thread.
2746 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2747 //
2748 // * Intrinsify notify() and notifyAll() for the common cases where the
2749 // object is locked by the calling thread but the waitlist is empty.
2750 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2751 //
2752 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2753 // But beware of excessive branch density on AMD Opterons.
2754 //
2755 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2756 // or failure of the fast-path. If the fast-path fails then we pass
2757 // control to the slow-path, typically in C. In Fast_Lock and
2758 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2759 // will emit a conditional branch immediately after the node.
2760 // So we have branches to branches and lots of ICC.ZF games.
2761 // Instead, it might be better to have C2 pass a "FailureLabel"
2762 // into Fast_Lock and Fast_Unlock. In the case of success, control
2763 // will drop through the node. ICC.ZF is undefined at exit.
2764 // In the case of failure, the node will branch directly to the
2765 // FailureLabel
2768 // obj: object to lock
2769 // box: on-stack box address (displaced header location) - KILLED
2770 // rax,: tmp -- KILLED
2771 // scr: tmp -- KILLED
2772 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2774 // Ensure the register assignents are disjoint
2775 guarantee (objReg != boxReg, "") ;
2776 guarantee (objReg != tmpReg, "") ;
2777 guarantee (objReg != scrReg, "") ;
2778 guarantee (boxReg != tmpReg, "") ;
2779 guarantee (boxReg != scrReg, "") ;
2782 block_comment("FastLock");
2783 /*
2784 move(AT, 0x0);
2785 return;
2786 */
2787 if (PrintBiasedLockingStatistics) {
2788 push(tmpReg);
2789 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2790 pop(tmpReg);
2791 }
2793 if (EmitSync & 1) {
2794 move(AT, 0x0);
2795 return;
2796 } else
2797 if (EmitSync & 2) {
2798 Label DONE_LABEL ;
2799 if (UseBiasedLocking) {
2800 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2801 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2802 }
2804 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2805 ori(tmpReg, tmpReg, 0x1);
2806 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2808 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2809 bne(AT, R0, DONE_LABEL);
2810 delayed()->nop();
2812 // Recursive locking
2813 dsubu(tmpReg, tmpReg, SP);
2814 li(AT, (7 - os::vm_page_size() ));
2815 andr(tmpReg, tmpReg, AT);
2816 sd(tmpReg, Address(boxReg, 0));
2817 bind(DONE_LABEL) ;
2818 } else {
2819 // Possible cases that we'll encounter in fast_lock
2820 // ------------------------------------------------
2821 // * Inflated
2822 // -- unlocked
2823 // -- Locked
2824 // = by self
2825 // = by other
2826 // * biased
2827 // -- by Self
2828 // -- by other
2829 // * neutral
2830 // * stack-locked
2831 // -- by self
2832 // = sp-proximity test hits
2833 // = sp-proximity test generates false-negative
2834 // -- by other
2835 //
2837 Label IsInflated, DONE_LABEL, PopDone ;
2839 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2840 // order to reduce the number of conditional branches in the most common cases.
2841 // Beware -- there's a subtle invariant that fetch of the markword
2842 // at [FETCH], below, will never observe a biased encoding (*101b).
2843 // If this invariant is not held we risk exclusion (safety) failure.
2844 if (UseBiasedLocking && !UseOptoBiasInlining) {
2845 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2846 }
2848 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2849 andi(AT, tmpReg, markOopDesc::monitor_value);
2850 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
2851 delayed()->nop();
2853 // Attempt stack-locking ...
2854 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
2855 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2856 //if (os::is_MP()) {
2857 // sync();
2858 //}
2860 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2861 //AT == 1: unlocked
2863 if (PrintBiasedLockingStatistics) {
2864 Label L;
2865 beq(AT, R0, L);
2866 delayed()->nop();
2867 push(T0);
2868 push(T1);
2869 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2870 pop(T1);
2871 pop(T0);
2872 bind(L);
2873 }
2874 bne(AT, R0, DONE_LABEL);
2875 delayed()->nop();
2877 // Recursive locking
2878 // The object is stack-locked: markword contains stack pointer to BasicLock.
2879 // Locked by current thread if difference with current SP is less than one page.
2880 dsubu(tmpReg, tmpReg, SP);
2881 li(AT, 7 - os::vm_page_size() );
2882 andr(tmpReg, tmpReg, AT);
2883 sd(tmpReg, Address(boxReg, 0));
2884 if (PrintBiasedLockingStatistics) {
2885 Label L;
2886 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
2887 bne(tmpReg, R0, L);
2888 delayed()->nop();
2889 push(T0);
2890 push(T1);
2891 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
2892 pop(T1);
2893 pop(T0);
2894 bind(L);
2895 }
2896 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
2898 b(DONE_LABEL) ;
2899 delayed()->nop();
2901 bind(IsInflated) ;
2902 // The object's monitor m is unlocked iff m->owner == NULL,
2903 // otherwise m->owner may contain a thread or a stack address.
2905 // TODO: someday avoid the ST-before-CAS penalty by
2906 // relocating (deferring) the following ST.
2907 // We should also think about trying a CAS without having
2908 // fetched _owner. If the CAS is successful we may
2909 // avoid an RTO->RTS upgrade on the $line.
2910 // Without cast to int32_t a movptr will destroy r10 which is typically obj
2911 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
2912 sd(AT, Address(boxReg, 0));
2914 move(boxReg, tmpReg) ;
2915 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2916 // if (m->owner != 0) => AT = 0, goto slow path.
2917 move(AT, R0);
2918 bne(tmpReg, R0, DONE_LABEL);
2919 delayed()->nop();
2921 #ifndef OPT_THREAD
2922 get_thread (TREG) ;
2923 #endif
2924 // It's inflated and appears unlocked
2925 //if (os::is_MP()) {
2926 // sync();
2927 //}
2928 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
2929 // Intentional fall-through into DONE_LABEL ...
2932 // DONE_LABEL is a hot target - we'd really like to place it at the
2933 // start of cache line by padding with NOPs.
2934 // See the AMD and Intel software optimization manuals for the
2935 // most efficient "long" NOP encodings.
2936 // Unfortunately none of our alignment mechanisms suffice.
2937 bind(DONE_LABEL);
2939 // At DONE_LABEL the AT is set as follows ...
2940 // Fast_Unlock uses the same protocol.
2941 // AT == 1 -> Success
2942 // AT == 0 -> Failure - force control through the slow-path
2944 // Avoid branch-to-branch on AMD processors
2945 // This appears to be superstition.
2946 if (EmitSync & 32) nop() ;
2948 }
2949 }
2951 // obj: object to unlock
2952 // box: box address (displaced header location), killed. Must be EAX.
2953 // rbx,: killed tmp; cannot be obj nor box.
2954 //
2955 // Some commentary on balanced locking:
2956 //
2957 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
2958 // Methods that don't have provably balanced locking are forced to run in the
2959 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
2960 // The interpreter provides two properties:
2961 // I1: At return-time the interpreter automatically and quietly unlocks any
2962 // objects acquired the current activation (frame). Recall that the
2963 // interpreter maintains an on-stack list of locks currently held by
2964 // a frame.
2965 // I2: If a method attempts to unlock an object that is not held by the
2966 // the frame the interpreter throws IMSX.
2967 //
2968 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
2969 // B() doesn't have provably balanced locking so it runs in the interpreter.
2970 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
2971 // is still locked by A().
2972 //
2973 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
2974 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
2975 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
2976 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
2978 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
2980 guarantee (objReg != boxReg, "") ;
2981 guarantee (objReg != tmpReg, "") ;
2982 guarantee (boxReg != tmpReg, "") ;
2986 block_comment("FastUnlock");
2989 if (EmitSync & 4) {
2990 // Disable - inhibit all inlining. Force control through the slow-path
2991 move(AT, 0x0);
2992 return;
2993 } else
2994 if (EmitSync & 8) {
2995 Label DONE_LABEL ;
2996 if (UseBiasedLocking) {
2997 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2998 }
2999 // classic stack-locking code ...
3000 ld(tmpReg, Address(boxReg, 0)) ;
3001 beq(tmpReg, R0, DONE_LABEL) ;
3002 move(AT, 0x1); // delay slot
3004 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3005 bind(DONE_LABEL);
3006 } else {
3007 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3009 // Critically, the biased locking test must have precedence over
3010 // and appear before the (box->dhw == 0) recursive stack-lock test.
3011 if (UseBiasedLocking && !UseOptoBiasInlining) {
3012 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3013 }
3015 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3016 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3017 delayed()->daddiu(AT, R0, 0x1);
3019 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3020 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3021 beq(AT, R0, Stacked) ; // Inflated?
3022 delayed()->nop();
3024 bind(Inflated) ;
3025 // It's inflated.
3026 // Despite our balanced locking property we still check that m->_owner == Self
3027 // as java routines or native JNI code called by this thread might
3028 // have released the lock.
3029 // Refer to the comments in synchronizer.cpp for how we might encode extra
3030 // state in _succ so we can avoid fetching EntryList|cxq.
3031 //
3032 // I'd like to add more cases in fast_lock() and fast_unlock() --
3033 // such as recursive enter and exit -- but we have to be wary of
3034 // I$ bloat, T$ effects and BP$ effects.
3035 //
3036 // If there's no contention try a 1-0 exit. That is, exit without
3037 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3038 // we detect and recover from the race that the 1-0 exit admits.
3039 //
3040 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3041 // before it STs null into _owner, releasing the lock. Updates
3042 // to data protected by the critical section must be visible before
3043 // we drop the lock (and thus before any other thread could acquire
3044 // the lock and observe the fields protected by the lock).
3045 // IA32's memory-model is SPO, so STs are ordered with respect to
3046 // each other and there's no need for an explicit barrier (fence).
3047 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3048 #ifndef OPT_THREAD
3049 get_thread (TREG) ;
3050 #endif
3052 // It's inflated
3053 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3054 xorr(boxReg, boxReg, TREG);
3056 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3057 orr(boxReg, boxReg, AT);
3059 move(AT, R0);
3060 bne(boxReg, R0, DONE_LABEL);
3061 delayed()->nop();
3063 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3064 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3065 orr(boxReg, boxReg, AT);
3067 move(AT, R0);
3068 bne(boxReg, R0, DONE_LABEL);
3069 delayed()->nop();
3071 sync();
3072 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3073 move(AT, 0x1);
3074 b(DONE_LABEL);
3075 delayed()->nop();
3077 bind (Stacked);
3078 ld(tmpReg, Address(boxReg, 0)) ;
3079 //if (os::is_MP()) { sync(); }
3080 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3082 if (EmitSync & 65536) {
3083 bind (CheckSucc);
3084 }
3086 bind(DONE_LABEL);
3088 // Avoid branch to branch on AMD processors
3089 if (EmitSync & 32768) { nop() ; }
3090 }
3091 }
3093 void MacroAssembler::align(int modulus) {
3094 while (offset() % modulus != 0) nop();
3095 }
3098 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3099 //Unimplemented();
3100 }
3102 #ifdef _LP64
3103 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3105 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3106 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3107 #else
3108 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3110 Register caller_saved_fpu_registers[] = {};
3111 #endif
3113 //We preserve all caller-saved register
3114 void MacroAssembler::pushad(){
3115 int i;
3117 /* Fixed-point registers */
3118 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3119 daddi(SP, SP, -1 * len * wordSize);
3120 for (i = 0; i < len; i++)
3121 {
3122 #ifdef _LP64
3123 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3124 #else
3125 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3126 #endif
3127 }
3129 /* Floating-point registers */
3130 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3131 daddi(SP, SP, -1 * len * wordSize);
3132 for (i = 0; i < len; i++)
3133 {
3134 #ifdef _LP64
3135 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3136 #else
3137 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3138 #endif
3139 }
3140 };
3142 void MacroAssembler::popad(){
3143 int i;
3145 /* Floating-point registers */
3146 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3147 for (i = 0; i < len; i++)
3148 {
3149 #ifdef _LP64
3150 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3151 #else
3152 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3153 #endif
3154 }
3155 daddi(SP, SP, len * wordSize);
3157 /* Fixed-point registers */
3158 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3159 for (i = 0; i < len; i++)
3160 {
3161 #ifdef _LP64
3162 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3163 #else
3164 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3165 #endif
3166 }
3167 daddi(SP, SP, len * wordSize);
3168 };
3170 void MacroAssembler::push2(Register reg1, Register reg2) {
3171 #ifdef _LP64
3172 daddi(SP, SP, -16);
3173 sd(reg2, SP, 0);
3174 sd(reg1, SP, 8);
3175 #else
3176 addi(SP, SP, -8);
3177 sw(reg2, SP, 0);
3178 sw(reg1, SP, 4);
3179 #endif
3180 }
3182 void MacroAssembler::pop2(Register reg1, Register reg2) {
3183 #ifdef _LP64
3184 ld(reg1, SP, 0);
3185 ld(reg2, SP, 8);
3186 daddi(SP, SP, 16);
3187 #else
3188 lw(reg1, SP, 0);
3189 lw(reg2, SP, 4);
3190 addi(SP, SP, 8);
3191 #endif
3192 }
3194 //for UseCompressedOops Option
3195 void MacroAssembler::load_klass(Register dst, Register src) {
3196 #ifdef _LP64
3197 if(UseCompressedClassPointers){
3198 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3199 decode_klass_not_null(dst);
3200 } else
3201 #endif
3202 ld(dst, src, oopDesc::klass_offset_in_bytes());
3203 }
3205 void MacroAssembler::store_klass(Register dst, Register src) {
3206 #ifdef _LP64
3207 if(UseCompressedClassPointers){
3208 encode_klass_not_null(src);
3209 sw(src, dst, oopDesc::klass_offset_in_bytes());
3210 } else {
3211 #endif
3212 sd(src, dst, oopDesc::klass_offset_in_bytes());
3213 }
3214 }
3216 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3217 load_klass(dst, src);
3218 ld(dst, Address(dst, Klass::prototype_header_offset()));
3219 }
3221 #ifdef _LP64
3222 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3223 if (UseCompressedClassPointers) {
3224 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3225 }
3226 }
3228 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3229 if(UseCompressedOops){
3230 lwu(dst, src);
3231 decode_heap_oop(dst);
3232 } else {
3233 ld(dst, src);
3234 }
3235 }
3237 void MacroAssembler::store_heap_oop(Address dst, Register src){
3238 if(UseCompressedOops){
3239 assert(!dst.uses(src), "not enough registers");
3240 encode_heap_oop(src);
3241 sw(src, dst);
3242 } else {
3243 sd(src, dst);
3244 }
3245 }
3247 void MacroAssembler::store_heap_oop_null(Address dst){
3248 if(UseCompressedOops){
3249 sw(R0, dst);
3250 } else {
3251 sd(R0, dst);
3252 }
3253 }
3255 #ifdef ASSERT
3256 void MacroAssembler::verify_heapbase(const char* msg) {
3257 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3258 assert (Universe::heap() != NULL, "java heap should be initialized");
3259 }
3260 #endif
3263 // Algorithm must match oop.inline.hpp encode_heap_oop.
3264 void MacroAssembler::encode_heap_oop(Register r) {
3265 #ifdef ASSERT
3266 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3267 #endif
3268 verify_oop(r, "broken oop in encode_heap_oop");
3269 if (Universe::narrow_oop_base() == NULL) {
3270 if (Universe::narrow_oop_shift() != 0) {
3271 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3272 shr(r, LogMinObjAlignmentInBytes);
3273 }
3274 return;
3275 }
3277 movz(r, S5_heapbase, r);
3278 dsub(r, r, S5_heapbase);
3279 if (Universe::narrow_oop_shift() != 0) {
3280 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3281 shr(r, LogMinObjAlignmentInBytes);
3282 }
3283 }
3285 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
3286 #ifdef ASSERT
3287 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3288 #endif
3289 verify_oop(src, "broken oop in encode_heap_oop");
3290 if (Universe::narrow_oop_base() == NULL) {
3291 if (Universe::narrow_oop_shift() != 0) {
3292 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3293 dsrl(dst, src, LogMinObjAlignmentInBytes);
3294 } else {
3295 if (dst != src) move(dst, src);
3296 }
3297 } else {
3298 if (dst == src) {
3299 movz(dst, S5_heapbase, dst);
3300 dsub(dst, dst, S5_heapbase);
3301 if (Universe::narrow_oop_shift() != 0) {
3302 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3303 shr(dst, LogMinObjAlignmentInBytes);
3304 }
3305 } else {
3306 dsub(dst, src, S5_heapbase);
3307 if (Universe::narrow_oop_shift() != 0) {
3308 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3309 shr(dst, LogMinObjAlignmentInBytes);
3310 }
3311 movz(dst, R0, src);
3312 }
3313 }
3314 }
3316 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3317 assert (UseCompressedOops, "should be compressed");
3318 #ifdef ASSERT
3319 if (CheckCompressedOops) {
3320 Label ok;
3321 bne(r, R0, ok);
3322 delayed()->nop();
3323 stop("null oop passed to encode_heap_oop_not_null");
3324 bind(ok);
3325 }
3326 #endif
3327 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3328 if (Universe::narrow_oop_base() != NULL) {
3329 dsub(r, r, S5_heapbase);
3330 }
3331 if (Universe::narrow_oop_shift() != 0) {
3332 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3333 shr(r, LogMinObjAlignmentInBytes);
3334 }
3336 }
3338 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3339 assert (UseCompressedOops, "should be compressed");
3340 #ifdef ASSERT
3341 if (CheckCompressedOops) {
3342 Label ok;
3343 bne(src, R0, ok);
3344 delayed()->nop();
3345 stop("null oop passed to encode_heap_oop_not_null2");
3346 bind(ok);
3347 }
3348 #endif
3349 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3351 if (Universe::narrow_oop_base() != NULL) {
3352 dsub(dst, src, S5_heapbase);
3353 if (Universe::narrow_oop_shift() != 0) {
3354 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3355 shr(dst, LogMinObjAlignmentInBytes);
3356 }
3357 } else {
3358 if (Universe::narrow_oop_shift() != 0) {
3359 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3360 dsrl(dst, src, LogMinObjAlignmentInBytes);
3361 } else {
3362 if (dst != src) move(dst, src);
3363 }
3364 }
3365 }
3367 void MacroAssembler::decode_heap_oop(Register r) {
3368 #ifdef ASSERT
3369 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3370 #endif
3371 if (Universe::narrow_oop_base() == NULL) {
3372 if (Universe::narrow_oop_shift() != 0) {
3373 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3374 shl(r, LogMinObjAlignmentInBytes);
3375 }
3376 } else {
3377 move(AT, r);
3378 if (Universe::narrow_oop_shift() != 0) {
3379 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3380 shl(r, LogMinObjAlignmentInBytes);
3381 }
3382 dadd(r, r, S5_heapbase);
3383 movz(r, R0, AT);
3384 }
3385 verify_oop(r, "broken oop in decode_heap_oop");
3386 }
3388 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3389 #ifdef ASSERT
3390 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
3391 #endif
3392 if (Universe::narrow_oop_base() == NULL) {
3393 if (Universe::narrow_oop_shift() != 0) {
3394 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3395 if (dst != src) nop(); // DON'T DELETE THIS GUY.
3396 dsll(dst, src, LogMinObjAlignmentInBytes);
3397 } else {
3398 if (dst != src) move(dst, src);
3399 }
3400 } else {
3401 if (dst == src) {
3402 move(AT, dst);
3403 if (Universe::narrow_oop_shift() != 0) {
3404 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3405 shl(dst, LogMinObjAlignmentInBytes);
3406 }
3407 dadd(dst, dst, S5_heapbase);
3408 movz(dst, R0, AT);
3409 } else {
3410 if (Universe::narrow_oop_shift() != 0) {
3411 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3412 dsll(dst, src, LogMinObjAlignmentInBytes);
3413 daddu(dst, dst, S5_heapbase);
3414 } else {
3415 daddu(dst, src, S5_heapbase);
3416 }
3417 movz(dst, R0, src);
3418 }
3419 }
3420 verify_oop(dst, "broken oop in decode_heap_oop");
3421 }
3423 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3424 // Note: it will change flags
3425 assert (UseCompressedOops, "should only be used for compressed headers");
3426 assert (Universe::heap() != NULL, "java heap should be initialized");
3427 // Cannot assert, unverified entry point counts instructions (see .ad file)
3428 // vtableStubs also counts instructions in pd_code_size_limit.
3429 // Also do not verify_oop as this is called by verify_oop.
3430 if (Universe::narrow_oop_shift() != 0) {
3431 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3432 shl(r, LogMinObjAlignmentInBytes);
3433 if (Universe::narrow_oop_base() != NULL) {
3434 daddu(r, r, S5_heapbase);
3435 }
3436 } else {
3437 assert (Universe::narrow_oop_base() == NULL, "sanity");
3438 }
3439 }
3441 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3442 assert (UseCompressedOops, "should only be used for compressed headers");
3443 assert (Universe::heap() != NULL, "java heap should be initialized");
3445 // Cannot assert, unverified entry point counts instructions (see .ad file)
3446 // vtableStubs also counts instructions in pd_code_size_limit.
3447 // Also do not verify_oop as this is called by verify_oop.
3448 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
3449 if (Universe::narrow_oop_shift() != 0) {
3450 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3451 if (LogMinObjAlignmentInBytes == Address::times_8) {
3452 dsll(dst, src, LogMinObjAlignmentInBytes);
3453 daddu(dst, dst, S5_heapbase);
3454 } else {
3455 dsll(dst, src, LogMinObjAlignmentInBytes);
3456 if (Universe::narrow_oop_base() != NULL) {
3457 daddu(dst, dst, S5_heapbase);
3458 }
3459 }
3460 } else {
3461 assert (Universe::narrow_oop_base() == NULL, "sanity");
3462 if (dst != src) {
3463 move(dst, src);
3464 }
3465 }
3466 }
3468 void MacroAssembler::encode_klass_not_null(Register r) {
3469 if (Universe::narrow_klass_base() != NULL) {
3470 assert(r != AT, "Encoding a klass in AT");
3471 set64(AT, (int64_t)Universe::narrow_klass_base());
3472 dsub(r, r, AT);
3473 }
3474 if (Universe::narrow_klass_shift() != 0) {
3475 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3476 shr(r, LogKlassAlignmentInBytes);
3477 }
3478 }
3480 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3481 if (dst == src) {
3482 encode_klass_not_null(src);
3483 } else {
3484 if (Universe::narrow_klass_base() != NULL) {
3485 set64(dst, (int64_t)Universe::narrow_klass_base());
3486 dsub(dst, src, dst);
3487 if (Universe::narrow_klass_shift() != 0) {
3488 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3489 shr(dst, LogKlassAlignmentInBytes);
3490 }
3491 } else {
3492 if (Universe::narrow_klass_shift() != 0) {
3493 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3494 dsrl(dst, src, LogKlassAlignmentInBytes);
3495 } else {
3496 move(dst, src);
3497 }
3498 }
3499 }
3500 }
3502 // Function instr_size_for_decode_klass_not_null() counts the instructions
3503 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3504 // when (Universe::heap() != NULL). Hence, if the instructions they
3505 // generate change, then this method needs to be updated.
3506 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3507 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
3508 if (Universe::narrow_klass_base() != NULL) {
3509 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
3510 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
3511 } else {
3512 // longest load decode klass function, mov64, leaq
3513 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
3514 }
3515 }
3517 void MacroAssembler::decode_klass_not_null(Register r) {
3518 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3519 assert(r != AT, "Decoding a klass in AT");
3520 // Cannot assert, unverified entry point counts instructions (see .ad file)
3521 // vtableStubs also counts instructions in pd_code_size_limit.
3522 // Also do not verify_oop as this is called by verify_oop.
3523 if (Universe::narrow_klass_shift() != 0) {
3524 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3525 shl(r, LogKlassAlignmentInBytes);
3526 }
3527 if (Universe::narrow_klass_base() != NULL) {
3528 set64(AT, (int64_t)Universe::narrow_klass_base());
3529 daddu(r, r, AT);
3530 //Not neccessary for MIPS at all.
3531 //reinit_heapbase();
3532 }
3533 }
3535 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3536 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3538 if (dst == src) {
3539 decode_klass_not_null(dst);
3540 } else {
3541 // Cannot assert, unverified entry point counts instructions (see .ad file)
3542 // vtableStubs also counts instructions in pd_code_size_limit.
3543 // Also do not verify_oop as this is called by verify_oop.
3544 set64(dst, (int64_t)Universe::narrow_klass_base());
3545 if (Universe::narrow_klass_shift() != 0) {
3546 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3547 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
3548 dsll(AT, src, Address::times_8);
3549 daddu(dst, dst, AT);
3550 } else {
3551 daddu(dst, src, dst);
3552 }
3553 }
3554 }
3556 void MacroAssembler::incrementl(Register reg, int value) {
3557 if (value == min_jint) {
3558 move(AT, value);
3559 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3560 return;
3561 }
3562 if (value < 0) { decrementl(reg, -value); return; }
3563 if (value == 0) { ; return; }
3565 if(Assembler::is_simm16(value)) {
3566 NOT_LP64(addiu(reg, reg, value));
3567 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
3568 } else {
3569 move(AT, value);
3570 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
3571 }
3572 }
3574 void MacroAssembler::decrementl(Register reg, int value) {
3575 if (value == min_jint) {
3576 move(AT, value);
3577 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3578 return;
3579 }
3580 if (value < 0) { incrementl(reg, -value); return; }
3581 if (value == 0) { ; return; }
3583 if (Assembler::is_simm16(value)) {
3584 NOT_LP64(addiu(reg, reg, -value));
3585 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
3586 } else {
3587 move(AT, value);
3588 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
3589 }
3590 }
3592 void MacroAssembler::reinit_heapbase() {
3593 if (UseCompressedOops || UseCompressedClassPointers) {
3594 if (Universe::heap() != NULL) {
3595 if (Universe::narrow_oop_base() == NULL) {
3596 move(S5_heapbase, R0);
3597 } else {
3598 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
3599 }
3600 } else {
3601 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
3602 ld(S5_heapbase, S5_heapbase, 0);
3603 }
3604 }
3605 }
3606 #endif // _LP64
3608 void MacroAssembler::check_klass_subtype(Register sub_klass,
3609 Register super_klass,
3610 Register temp_reg,
3611 Label& L_success) {
3612 //implement ind gen_subtype_check
3613 Label L_failure;
3614 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
3615 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
3616 bind(L_failure);
3617 }
3619 SkipIfEqual::SkipIfEqual(
3620 MacroAssembler* masm, const bool* flag_addr, bool value) {
3621 _masm = masm;
3622 _masm->li(AT, (address)flag_addr);
3623 _masm->lb(AT,AT,0);
3624 _masm->addi(AT,AT,-value);
3625 _masm->beq(AT,R0,_label);
3626 _masm->delayed()->nop();
3627 }
3628 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3629 Register super_klass,
3630 Register temp_reg,
3631 Label* L_success,
3632 Label* L_failure,
3633 Label* L_slow_path,
3634 RegisterOrConstant super_check_offset) {
3635 assert_different_registers(sub_klass, super_klass, temp_reg);
3636 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3637 if (super_check_offset.is_register()) {
3638 assert_different_registers(sub_klass, super_klass,
3639 super_check_offset.as_register());
3640 } else if (must_load_sco) {
3641 assert(temp_reg != noreg, "supply either a temp or a register offset");
3642 }
3644 Label L_fallthrough;
3645 int label_nulls = 0;
3646 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3647 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3648 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3649 assert(label_nulls <= 1, "at most one NULL in the batch");
3651 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3652 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3653 // If the pointers are equal, we are done (e.g., String[] elements).
3654 // This self-check enables sharing of secondary supertype arrays among
3655 // non-primary types such as array-of-interface. Otherwise, each such
3656 // type would need its own customized SSA.
3657 // We move this check to the front of the fast path because many
3658 // type checks are in fact trivially successful in this manner,
3659 // so we get a nicely predicted branch right at the start of the check.
3660 beq(sub_klass, super_klass, *L_success);
3661 delayed()->nop();
3662 // Check the supertype display:
3663 if (must_load_sco) {
3664 // Positive movl does right thing on LP64.
3665 lwu(temp_reg, super_klass, sco_offset);
3666 super_check_offset = RegisterOrConstant(temp_reg);
3667 }
3668 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
3669 daddu(AT, sub_klass, AT);
3670 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
3672 // This check has worked decisively for primary supers.
3673 // Secondary supers are sought in the super_cache ('super_cache_addr').
3674 // (Secondary supers are interfaces and very deeply nested subtypes.)
3675 // This works in the same check above because of a tricky aliasing
3676 // between the super_cache and the primary super display elements.
3677 // (The 'super_check_addr' can address either, as the case requires.)
3678 // Note that the cache is updated below if it does not help us find
3679 // what we need immediately.
3680 // So if it was a primary super, we can just fail immediately.
3681 // Otherwise, it's the slow path for us (no success at this point).
3683 if (super_check_offset.is_register()) {
3684 beq(super_klass, AT, *L_success);
3685 delayed()->nop();
3686 addi(AT, super_check_offset.as_register(), -sc_offset);
3687 if (L_failure == &L_fallthrough) {
3688 beq(AT, R0, *L_slow_path);
3689 delayed()->nop();
3690 } else {
3691 bne(AT, R0, *L_failure);
3692 delayed()->nop();
3693 b(*L_slow_path);
3694 delayed()->nop();
3695 }
3696 } else if (super_check_offset.as_constant() == sc_offset) {
3697 // Need a slow path; fast failure is impossible.
3698 if (L_slow_path == &L_fallthrough) {
3699 beq(super_klass, AT, *L_success);
3700 delayed()->nop();
3701 } else {
3702 bne(super_klass, AT, *L_slow_path);
3703 delayed()->nop();
3704 b(*L_success);
3705 delayed()->nop();
3706 }
3707 } else {
3708 // No slow path; it's a fast decision.
3709 if (L_failure == &L_fallthrough) {
3710 beq(super_klass, AT, *L_success);
3711 delayed()->nop();
3712 } else {
3713 bne(super_klass, AT, *L_failure);
3714 delayed()->nop();
3715 b(*L_success);
3716 delayed()->nop();
3717 }
3718 }
3720 bind(L_fallthrough);
3722 }
3725 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3726 Register super_klass,
3727 Register temp_reg,
3728 Register temp2_reg,
3729 Label* L_success,
3730 Label* L_failure,
3731 bool set_cond_codes) {
3732 assert_different_registers(sub_klass, super_klass, temp_reg);
3733 if (temp2_reg != noreg)
3734 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3735 else
3736 temp2_reg = T9;
3737 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3739 Label L_fallthrough;
3740 int label_nulls = 0;
3741 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3742 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3743 assert(label_nulls <= 1, "at most one NULL in the batch");
3745 // a couple of useful fields in sub_klass:
3746 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3747 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3748 Address secondary_supers_addr(sub_klass, ss_offset);
3749 Address super_cache_addr( sub_klass, sc_offset);
3751 // Do a linear scan of the secondary super-klass chain.
3752 // This code is rarely used, so simplicity is a virtue here.
3753 // The repne_scan instruction uses fixed registers, which we must spill.
3754 // Don't worry too much about pre-existing connections with the input regs.
3756 // Get super_klass value into rax (even if it was in rdi or rcx).
3757 #ifndef PRODUCT
3758 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3759 ExternalAddress pst_counter_addr((address) pst_counter);
3760 NOT_LP64( incrementl(pst_counter_addr) );
3761 #endif //PRODUCT
3763 // We will consult the secondary-super array.
3764 ld(temp_reg, secondary_supers_addr);
3765 // Load the array length. (Positive movl does right thing on LP64.)
3766 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
3767 // Skip to start of data.
3768 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
3770 // Scan RCX words at [RDI] for an occurrence of RAX.
3771 // Set NZ/Z based on last compare.
3772 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3773 // not change flags (only scas instruction which is repeated sets flags).
3774 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3776 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
3777 Label Loop, subtype;
3778 bind(Loop);
3779 beq(temp2_reg, R0, *L_failure);
3780 delayed()->nop();
3781 ld(AT, temp_reg, 0);
3782 beq(AT, super_klass, subtype);
3783 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
3784 b(Loop);
3785 delayed()->daddi(temp2_reg, temp2_reg, -1);
3787 bind(subtype);
3788 sd(super_klass, super_cache_addr);
3789 if (L_success != &L_fallthrough) {
3790 b(*L_success);
3791 delayed()->nop();
3792 }
3794 // Success. Cache the super we found and proceed in triumph.
3795 #undef IS_A_TEMP
3797 bind(L_fallthrough);
3798 }
3800 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
3801 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
3802 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
3803 verify_oop(oop_result, "broken oop in call_VM_base");
3804 }
3806 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
3807 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
3808 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
3809 }
3811 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3812 int extra_slot_offset) {
3813 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3814 int stackElementSize = Interpreter::stackElementSize;
3815 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
3816 #ifdef ASSERT
3817 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
3818 assert(offset1 - offset == stackElementSize, "correct arithmetic");
3819 #endif
3820 Register scale_reg = NOREG;
3821 Address::ScaleFactor scale_factor = Address::no_scale;
3822 if (arg_slot.is_constant()) {
3823 offset += arg_slot.as_constant() * stackElementSize;
3824 } else {
3825 scale_reg = arg_slot.as_register();
3826 scale_factor = Address::times_8;
3827 }
3828 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
3829 // offset += wordSize; // return PC is on stack
3830 if(scale_reg==NOREG) return Address(SP, offset);
3831 else {
3832 dsll(scale_reg, scale_reg, scale_factor);
3833 daddu(scale_reg, SP, scale_reg);
3834 return Address(scale_reg, offset);
3835 }
3836 }
3838 SkipIfEqual::~SkipIfEqual() {
3839 _masm->bind(_label);
3840 }
3842 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
3843 switch (size_in_bytes) {
3844 #ifndef _LP64
3845 case 8:
3846 assert(dst2 != noreg, "second dest register required");
3847 lw(dst, src);
3848 lw(dst2, src.plus_disp(BytesPerInt));
3849 break;
3850 #else
3851 case 8: ld(dst, src); break;
3852 #endif
3853 case 4: lw(dst, src); break;
3854 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
3855 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
3856 default: ShouldNotReachHere();
3857 }
3858 }
3860 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
3861 switch (size_in_bytes) {
3862 #ifndef _LP64
3863 case 8:
3864 assert(src2 != noreg, "second source register required");
3865 sw(src, dst);
3866 sw(src2, dst.plus_disp(BytesPerInt));
3867 break;
3868 #else
3869 case 8: sd(src, dst); break;
3870 #endif
3871 case 4: sw(src, dst); break;
3872 case 2: sh(src, dst); break;
3873 case 1: sb(src, dst); break;
3874 default: ShouldNotReachHere();
3875 }
3876 }
3878 // Look up the method for a megamorphic invokeinterface call.
3879 // The target method is determined by <intf_klass, itable_index>.
3880 // The receiver klass is in recv_klass.
3881 // On success, the result will be in method_result, and execution falls through.
3882 // On failure, execution transfers to the given label.
3883 void MacroAssembler::lookup_interface_method(Register recv_klass,
3884 Register intf_klass,
3885 RegisterOrConstant itable_index,
3886 Register method_result,
3887 Register scan_temp,
3888 Label& L_no_such_interface) {
3889 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
3890 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
3891 "caller must use same register for non-constant itable index as for method");
3893 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3894 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
3895 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3896 int scan_step = itableOffsetEntry::size() * wordSize;
3897 int vte_size = vtableEntry::size() * wordSize;
3898 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3899 assert(vte_size == wordSize, "else adjust times_vte_scale");
3901 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
3903 // %%% Could store the aligned, prescaled offset in the klassoop.
3904 dsll(scan_temp, scan_temp, times_vte_scale);
3905 daddu(scan_temp, recv_klass, scan_temp);
3906 daddiu(scan_temp, scan_temp, vtable_base);
3907 if (HeapWordsPerLong > 1) {
3908 // Round up to align_object_offset boundary
3909 // see code for InstanceKlass::start_of_itable!
3910 round_to(scan_temp, BytesPerLong);
3911 }
3913 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
3914 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
3915 if (itable_index.is_constant()) {
3916 set64(AT, (int)itable_index.is_constant());
3917 dsll(AT, AT, (int)Address::times_ptr);
3918 } else {
3919 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
3920 }
3921 daddu(AT, AT, recv_klass);
3922 daddiu(recv_klass, AT, itentry_off);
3924 Label search, found_method;
3926 for (int peel = 1; peel >= 0; peel--) {
3927 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
3929 if (peel) {
3930 beq(intf_klass, method_result, found_method);
3931 nop();
3932 } else {
3933 bne(intf_klass, method_result, search);
3934 nop();
3935 // (invert the test to fall through to found_method...)
3936 }
3938 if (!peel) break;
3940 bind(search);
3942 // Check that the previous entry is non-null. A null entry means that
3943 // the receiver class doesn't implement the interface, and wasn't the
3944 // same as when the caller was compiled.
3945 beq(method_result, R0, L_no_such_interface);
3946 nop();
3947 daddiu(scan_temp, scan_temp, scan_step);
3948 }
3950 bind(found_method);
3952 // Got a hit.
3953 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
3954 if(UseLoongsonISA) {
3955 gsldx(method_result, recv_klass, scan_temp, 0);
3956 } else {
3957 daddu(AT, recv_klass, scan_temp);
3958 ld(method_result, AT);
3959 }
3960 }
3962 // virtual method calling
3963 void MacroAssembler::lookup_virtual_method(Register recv_klass,
3964 RegisterOrConstant vtable_index,
3965 Register method_result) {
3966 Register tmp = GP;
3967 push(tmp);
3969 if (vtable_index.is_constant()) {
3970 assert_different_registers(recv_klass, method_result, tmp);
3971 } else {
3972 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
3973 }
3974 const int base = InstanceKlass::vtable_start_offset() * wordSize;
3975 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
3976 /*
3977 Address vtable_entry_addr(recv_klass,
3978 vtable_index, Address::times_ptr,
3979 base + vtableEntry::method_offset_in_bytes());
3980 */
3981 if (vtable_index.is_constant()) {
3982 set64(AT, vtable_index.as_constant());
3983 dsll(AT, AT, (int)Address::times_ptr);
3984 } else {
3985 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
3986 }
3987 set64(tmp, base + vtableEntry::method_offset_in_bytes());
3988 daddu(tmp, tmp, AT);
3989 daddu(tmp, tmp, recv_klass);
3990 ld(method_result, tmp, 0);
3992 pop(tmp);
3993 }