Tue, 26 Jul 2016 11:15:09 +0800
Instruction decoding support: add movn and movz in MIPS disassembler.
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "memory/cardTableModRefBS.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "prims/methodHandles.hpp"
34 #include "runtime/biasedLocking.hpp"
35 #include "runtime/interfaceSupport.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/os.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubRoutines.hpp"
40 #ifndef SERIALGC
41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
43 #include "gc_implementation/g1/heapRegion.hpp"
44 #endif
45 #ifdef PRODUCT
46 #define BLOCK_COMMENT(str) /* nothing */
47 #define STOP(error) stop(error)
48 #else
49 #define BLOCK_COMMENT(str) block_comment(str)
50 #define STOP(error) block_comment(error); stop(error)
51 #endif
53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
55 intptr_t MacroAssembler::i[32] = {0};
56 float MacroAssembler::f[32] = {0.0};
58 void MacroAssembler::print(outputStream *s) {
59 unsigned int k;
60 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
61 s->print_cr("i%d = 0x%.16lx", k, i[k]);
62 }
63 s->cr();
65 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
66 s->print_cr("f%d = %f", k, f[k]);
67 }
68 s->cr();
69 }
72 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
73 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
75 void MacroAssembler::save_registers(MacroAssembler *masm) {
76 #define __ masm->
77 for(int k=0; k<32; k++) {
78 __ sw (as_Register(k), A0, i_offset(k));
79 }
81 for(int k=0; k<32; k++) {
82 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
83 }
84 #undef __
85 }
87 void MacroAssembler::restore_registers(MacroAssembler *masm) {
88 #define __ masm->
89 for(int k=0; k<32; k++) {
90 __ lw (as_Register(k), A0, i_offset(k));
91 }
93 for(int k=0; k<32; k++) {
94 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
95 }
96 #undef __
97 }
100 // Implementation of AddressLiteral
102 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
103 _is_lval = false;
104 _target = target;
105 _rspec = rspec_from_rtype(rtype, target);
106 }
108 // Implementation of Address
110 //FIXME aoqi
111 //#ifdef _LP64
112 #if 0
114 Address Address::make_array(ArrayAddress adr) {
115 // Not implementable on 64bit machines
116 // Should have been handled higher up the call chain.
117 ShouldNotReachHere();
118 return Address();
119 }
121 // exceedingly dangerous constructor
122 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
123 _base = noreg;
124 _index = noreg;
125 _scale = no_scale;
126 _disp = disp;
127 switch (rtype) {
128 case relocInfo::external_word_type:
129 _rspec = external_word_Relocation::spec(loc);
130 break;
131 case relocInfo::internal_word_type:
132 _rspec = internal_word_Relocation::spec(loc);
133 break;
134 case relocInfo::runtime_call_type:
135 // HMM
136 _rspec = runtime_call_Relocation::spec();
137 break;
138 case relocInfo::poll_type:
139 case relocInfo::poll_return_type:
140 _rspec = Relocation::spec_simple(rtype);
141 break;
142 case relocInfo::none:
143 break;
144 default:
145 ShouldNotReachHere();
146 }
147 }
148 #else // LP64
150 Address Address::make_array(ArrayAddress adr) {
151 AddressLiteral base = adr.base();
152 Address index = adr.index();
153 assert(index._disp == 0, "must not have disp"); // maybe it can?
154 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
155 array._rspec = base._rspec;
156 return array;
157 }
159 // exceedingly dangerous constructor
160 Address::Address(address loc, RelocationHolder spec) {
161 _base = noreg;
162 _index = noreg;
163 _scale = no_scale;
164 _disp = (intptr_t) loc;
165 _rspec = spec;
166 }
168 #endif // _LP64
171 /*
172 // Convert the raw encoding form into the form expected by the constructor for
173 // Address. An index of 4 (rsp) corresponds to having no index, so convert
174 // that to noreg for the Address constructor.
175 Address Address::make_raw(int base, int index, int scale, int disp) {
176 bool valid_index = index != rsp->encoding();
177 if (valid_index) {
178 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
179 return madr;
180 } else {
181 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
182 return madr;
183 }
184 }
185 */
187 // Implementation of Assembler
188 const char *Assembler::ops_name[] = {
189 "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz",
190 "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui",
191 "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl",
192 "daddi", "daddiu", "ldl", "ldr", "", "", "", "",
193 "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu",
194 "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache",
195 "ll", "lwc1", "", "", "lld", "ldc1", "", "ld",
196 "sc", "swc1", "", "", "scd", "sdc1", "", "sd"
197 };
199 const char* Assembler::special_name[] = {
200 "sll", "", "srl", "sra", "sllv", "", "srlv", "srav",
201 "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync",
202 "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra",
203 "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu",
204 "add", "addu", "sub", "subu", "and", "or", "xor", "nor",
205 "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu",
206 "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "",
207 "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32"
208 };
210 const char* Assembler::regimm_name[] = {
211 "bltz", "bgez", "bltzl", "bgezl", "", "", "", "",
212 "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "",
213 "bltzal", "bgezal", "bltzall", "bgezall"
214 };
216 const char* Assembler::float_name[] = {
217 "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg",
218 "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w"
219 };
221 //misleading name, print only branch/jump instruction
222 void Assembler::print_instruction(int inst) {
223 const char *s;
224 switch( opcode(inst) ) {
225 default:
226 s = ops_name[opcode(inst)];
227 break;
228 case special_op:
229 s = special_name[special(inst)];
230 break;
231 case regimm_op:
232 s = special_name[rt(inst)];
233 break;
234 }
236 ::tty->print("%s", s);
237 }
239 void MacroAssembler::pd_patch_instruction(address branch, address target) {
240 jint& stub_inst = *(jint*) branch;
242 /* *
243 move(AT, RA); // dadd
244 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
245 nop();
246 lui(T9, 0); // to be patched
247 ori(T9, 0);
248 daddu(T9, T9, RA);
249 move(RA, AT);
250 jr(T9);
251 */
252 if(special(stub_inst) == dadd_op) {
253 jint *pc = (jint *)branch;
255 assert(opcode(pc[3]) == lui_op
256 && opcode(pc[4]) == ori_op
257 && special(pc[5]) == daddu_op, "Not a branch label patch");
258 if(!(opcode(pc[3]) == lui_op
259 && opcode(pc[4]) == ori_op
260 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
262 int offset = target - branch;
263 if (!is_simm16(offset))
264 {
265 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
266 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
267 }
268 else
269 {
270 /* revert to "beq + nop" */
271 CodeBuffer cb(branch, 4 * 10);
272 MacroAssembler masm(&cb);
273 #define __ masm.
274 __ b(target);
275 __ nop();
276 __ nop();
277 __ nop();
278 __ nop();
279 __ nop();
280 __ nop();
281 __ nop();
282 }
283 return;
284 }
286 #ifndef PRODUCT
287 if (!is_simm16((target - branch - 4) >> 2))
288 {
289 tty->print_cr("Illegal patching: target=0x%lx", target);
290 int *p = (int *)branch;
291 for (int i = -10; i < 10; i++)
292 {
293 tty->print("0x%lx, ", p[i]);
294 }
295 tty->print_cr("");
296 }
297 #endif
299 stub_inst = patched_branch(target - branch, stub_inst, 0);
300 }
302 //without check, maybe fixed
303 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
304 int v = (dest_pos - inst_pos - 4)>>2;
305 switch(opcode(inst)) {
306 case j_op:
307 case jal_op:
308 assert(false, "should not use j/jal here");
309 break;
310 default:
311 assert(is_simm16(v), "must be simm16");
312 #ifndef PRODUCT
313 if(!is_simm16(v))
314 {
315 tty->print_cr("must be simm16");
316 tty->print_cr("Inst: %lx", inst);
317 }
318 #endif
320 v = low16(v);
321 inst &= 0xffff0000;
322 break;
323 }
325 return inst | v;
326 }
328 int Assembler::branch_destination(int inst, int pos) {
329 int off;
331 switch(opcode(inst)) {
332 case j_op:
333 case jal_op:
334 assert(false, "should not use j/jal here");
335 break;
336 default:
337 off = expand(low16(inst), 15);
338 break;
339 }
341 return off ? pos + 4 + (off<<2) : 0;
342 }
344 int AbstractAssembler::code_fill_byte() {
345 return 0x00; // illegal instruction 0x00000000
346 }
348 // Now the Assembler instruction (identical for 32/64 bits)
350 void Assembler::lb(Register rt, Address src) {
351 lb(rt, src.base(), src.disp());
352 }
354 void Assembler::lbu(Register rt, Address src) {
355 lbu(rt, src.base(), src.disp());
356 }
358 void Assembler::ld(Register rt, Address src){
359 ld(rt, src.base(), src.disp());
360 }
362 void Assembler::ldl(Register rt, Address src){
363 ldl(rt, src.base(), src.disp());
364 }
366 void Assembler::ldr(Register rt, Address src){
367 ldr(rt, src.base(), src.disp());
368 }
370 void Assembler::lh(Register rt, Address src){
371 lh(rt, src.base(), src.disp());
372 }
374 void Assembler::lhu(Register rt, Address src){
375 lhu(rt, src.base(), src.disp());
376 }
378 void Assembler::ll(Register rt, Address src){
379 ll(rt, src.base(), src.disp());
380 }
382 void Assembler::lld(Register rt, Address src){
383 lld(rt, src.base(), src.disp());
384 }
386 void Assembler::lw(Register rt, Address src){
387 lw(rt, src.base(), src.disp());
388 }
389 void Assembler::lea(Register rt, Address src) {
390 #ifdef _LP64
391 daddi(rt, src.base(), src.disp());
392 #else
393 addi(rt, src.base(), src.disp());
394 #endif
395 }
397 void Assembler::lwl(Register rt, Address src){
398 lwl(rt, src.base(), src.disp());
399 }
401 void Assembler::lwr(Register rt, Address src){
402 lwr(rt, src.base(), src.disp());
403 }
405 void Assembler::lwu(Register rt, Address src){
406 lwu(rt, src.base(), src.disp());
407 }
409 void Assembler::sb(Register rt, Address dst) {
410 sb(rt, dst.base(), dst.disp());
411 }
413 void Assembler::sc(Register rt, Address dst) {
414 sc(rt, dst.base(), dst.disp());
415 }
417 void Assembler::scd(Register rt, Address dst) {
418 scd(rt, dst.base(), dst.disp());
419 }
421 void Assembler::sd(Register rt, Address dst) {
422 sd(rt, dst.base(), dst.disp());
423 }
425 void Assembler::sdl(Register rt, Address dst) {
426 sdl(rt, dst.base(), dst.disp());
427 }
429 void Assembler::sdr(Register rt, Address dst) {
430 sdr(rt, dst.base(), dst.disp());
431 }
433 void Assembler::sh(Register rt, Address dst) {
434 sh(rt, dst.base(), dst.disp());
435 }
437 void Assembler::sw(Register rt, Address dst) {
438 sw(rt, dst.base(), dst.disp());
439 }
441 void Assembler::swl(Register rt, Address dst) {
442 swl(rt, dst.base(), dst.disp());
443 }
445 void Assembler::swr(Register rt, Address dst) {
446 swr(rt, dst.base(), dst.disp());
447 }
449 void Assembler::lwc1(FloatRegister rt, Address src) {
450 lwc1(rt, src.base(), src.disp());
451 }
453 void Assembler::ldc1(FloatRegister rt, Address src) {
454 ldc1(rt, src.base(), src.disp());
455 }
457 void Assembler::swc1(FloatRegister rt, Address dst) {
458 swc1(rt, dst.base(), dst.disp());
459 }
461 void Assembler::sdc1(FloatRegister rt, Address dst) {
462 sdc1(rt, dst.base(), dst.disp());
463 }
465 void Assembler::j(address entry) {
466 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
467 emit_long((j_op<<26) | dest);
468 has_delay_slot();
469 }
471 void Assembler::jal(address entry) {
472 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
473 emit_long((jal_op<<26) | dest);
474 has_delay_slot();
475 }
477 void MacroAssembler::beq_far(Register rs, Register rt, address entry)
478 {
479 u_char * cur_pc = pc();
481 /* Jin: Near/Far jump */
482 if(is_simm16((entry - pc() - 4) / 4))
483 {
484 Assembler::beq(rs, rt, offset(entry));
485 }
486 else
487 {
488 Label not_jump;
489 bne(rs, rt, not_jump);
490 delayed()->nop();
492 b_far(entry);
493 delayed()->nop();
495 bind(not_jump);
496 has_delay_slot();
497 }
498 }
500 void MacroAssembler::beq_far(Register rs, Register rt, Label& L)
501 {
502 if (L.is_bound()) {
503 beq_far(rs, rt, target(L));
504 } else {
505 u_char * cur_pc = pc();
506 Label not_jump;
507 bne(rs, rt, not_jump);
508 delayed()->nop();
510 b_far(L);
511 delayed()->nop();
513 bind(not_jump);
514 has_delay_slot();
515 }
516 }
518 void MacroAssembler::bne_far(Register rs, Register rt, address entry)
519 {
520 u_char * cur_pc = pc();
522 /* Jin: Near/Far jump */
523 if(is_simm16((entry - pc() - 4) / 4))
524 {
525 Assembler::bne(rs, rt, offset(entry));
526 }
527 else
528 {
529 Label not_jump;
530 beq(rs, rt, not_jump);
531 delayed()->nop();
533 b_far(entry);
534 delayed()->nop();
536 bind(not_jump);
537 has_delay_slot();
538 }
539 }
541 void MacroAssembler::bne_far(Register rs, Register rt, Label& L)
542 {
543 if (L.is_bound()) {
544 bne_far(rs, rt, target(L));
545 } else {
546 u_char * cur_pc = pc();
547 Label not_jump;
548 beq(rs, rt, not_jump);
549 delayed()->nop();
551 b_far(L);
552 delayed()->nop();
554 bind(not_jump);
555 has_delay_slot();
556 }
557 }
559 void MacroAssembler::b_far(Label& L)
560 {
561 if (L.is_bound()) {
562 b_far(target(L));
563 } else {
564 volatile address dest = target(L);
565 /*
566 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
567 0x00000055651ed514: dadd at, ra, zero
568 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
570 0x00000055651ed51c: sll zero, zero, 0
571 0x00000055651ed520: lui t9, 0x0
572 0x00000055651ed524: ori t9, t9, 0x21b8
573 0x00000055651ed528: daddu t9, t9, ra
574 0x00000055651ed52c: dadd ra, at, zero
575 0x00000055651ed530: jr t9
576 0x00000055651ed534: sll zero, zero, 0
577 */
578 move(AT, RA);
579 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
580 nop();
581 lui(T9, 0); // to be patched
582 ori(T9, T9, 0);
583 daddu(T9, T9, RA);
584 move(RA, AT);
585 jr(T9);
586 }
587 }
589 void MacroAssembler::b_far(address entry)
590 {
591 u_char * cur_pc = pc();
593 /* Jin: Near/Far jump */
594 if(is_simm16((entry - pc() - 4) / 4))
595 {
596 b(offset(entry));
597 }
598 else
599 {
600 /* address must be bounded */
601 move(AT, RA);
602 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
603 nop();
604 li32(T9, entry - pc());
605 daddu(T9, T9, RA);
606 move(RA, AT);
607 jr(T9);
608 }
609 }
611 // Implementation of MacroAssembler
613 // First all the versions that have distinct versions depending on 32/64 bit
614 // Unless the difference is trivial (1 line or so).
616 //#ifndef _LP64
618 // 32bit versions
620 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
621 addu_long(AT, base, offset);
622 ld_ptr(rt, 0, AT);
623 }
625 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
626 addu_long(AT, base, offset);
627 st_ptr(rt, 0, AT);
628 }
630 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
631 addu_long(AT, base, offset);
632 ld_long(rt, 0, AT);
633 }
635 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
636 addu_long(AT, base, offset);
637 st_long(rt, 0, AT);
638 }
640 Address MacroAssembler::as_Address(AddressLiteral adr) {
641 return Address(adr.target(), adr.rspec());
642 }
644 Address MacroAssembler::as_Address(ArrayAddress adr) {
645 return Address::make_array(adr);
646 }
648 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
649 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
650 Label again;
652 bind(again);
653 sync();
654 li(tmp_reg1, counter_addr);
655 ll(tmp_reg2, tmp_reg1, 0);
656 addi(tmp_reg2, tmp_reg2, inc);
657 sc(tmp_reg2, tmp_reg1, 0);
658 beq(tmp_reg2, R0, again);
659 delayed()->nop();
660 }
661 int MacroAssembler::biased_locking_enter(Register lock_reg,
662 Register obj_reg,
663 Register swap_reg,
664 Register tmp_reg,
665 bool swap_reg_contains_mark,
666 Label& done,
667 Label* slow_case,
668 BiasedLockingCounters* counters) {
669 assert(UseBiasedLocking, "why call this otherwise?");
670 bool need_tmp_reg = false;
671 if (tmp_reg == noreg) {
672 need_tmp_reg = true;
673 tmp_reg = T9;
674 }
675 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
676 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
677 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
678 Address saved_mark_addr(lock_reg, 0);
680 // Biased locking
681 // See whether the lock is currently biased toward our thread and
682 // whether the epoch is still valid
683 // Note that the runtime guarantees sufficient alignment of JavaThread
684 // pointers to allow age to be placed into low bits
685 // First check to see whether biasing is even enabled for this object
686 Label cas_label;
687 int null_check_offset = -1;
688 if (!swap_reg_contains_mark) {
689 null_check_offset = offset();
690 ld_ptr(swap_reg, mark_addr);
691 }
693 if (need_tmp_reg) {
694 push(tmp_reg);
695 }
696 move(tmp_reg, swap_reg);
697 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
698 #ifdef _LP64
699 daddi(AT, R0, markOopDesc::biased_lock_pattern);
700 dsub(AT, AT, tmp_reg);
701 #else
702 addi(AT, R0, markOopDesc::biased_lock_pattern);
703 sub(AT, AT, tmp_reg);
704 #endif
705 if (need_tmp_reg) {
706 pop(tmp_reg);
707 }
709 bne(AT, R0, cas_label);
710 delayed()->nop();
713 // The bias pattern is present in the object's header. Need to check
714 // whether the bias owner and the epoch are both still current.
715 // Note that because there is no current thread register on MIPS we
716 // need to store off the mark word we read out of the object to
717 // avoid reloading it and needing to recheck invariants below. This
718 // store is unfortunate but it makes the overall code shorter and
719 // simpler.
720 st_ptr(swap_reg, saved_mark_addr);
721 if (need_tmp_reg) {
722 push(tmp_reg);
723 }
724 if (swap_reg_contains_mark) {
725 null_check_offset = offset();
726 }
727 load_prototype_header(tmp_reg, obj_reg);
728 xorr(tmp_reg, tmp_reg, swap_reg);
729 get_thread(swap_reg);
730 xorr(swap_reg, swap_reg, tmp_reg);
732 move(AT, ~((int) markOopDesc::age_mask_in_place));
733 andr(swap_reg, swap_reg, AT);
735 if (PrintBiasedLockingStatistics) {
736 Label L;
737 bne(swap_reg, R0, L);
738 delayed()->nop();
739 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, AT, tmp_reg);
740 bind(L);
741 }
742 if (need_tmp_reg) {
743 pop(tmp_reg);
744 }
745 beq(swap_reg, R0, done);
746 delayed()->nop();
747 Label try_revoke_bias;
748 Label try_rebias;
750 // At this point we know that the header has the bias pattern and
751 // that we are not the bias owner in the current epoch. We need to
752 // figure out more details about the state of the header in order to
753 // know what operations can be legally performed on the object's
754 // header.
756 // If the low three bits in the xor result aren't clear, that means
757 // the prototype header is no longer biased and we have to revoke
758 // the bias on this object.
760 move(AT, markOopDesc::biased_lock_mask_in_place);
761 andr(AT, swap_reg, AT);
762 bne(AT, R0, try_revoke_bias);
763 delayed()->nop();
764 // Biasing is still enabled for this data type. See whether the
765 // epoch of the current bias is still valid, meaning that the epoch
766 // bits of the mark word are equal to the epoch bits of the
767 // prototype header. (Note that the prototype header's epoch bits
768 // only change at a safepoint.) If not, attempt to rebias the object
769 // toward the current thread. Note that we must be absolutely sure
770 // that the current epoch is invalid in order to do this because
771 // otherwise the manipulations it performs on the mark word are
772 // illegal.
774 move(AT, markOopDesc::epoch_mask_in_place);
775 andr(AT,swap_reg, AT);
776 bne(AT, R0, try_rebias);
777 delayed()->nop();
778 // The epoch of the current bias is still valid but we know nothing
779 // about the owner; it might be set or it might be clear. Try to
780 // acquire the bias of the object using an atomic operation. If this
781 // fails we will go in to the runtime to revoke the object's bias.
782 // Note that we first construct the presumed unbiased header so we
783 // don't accidentally blow away another thread's valid bias.
785 ld_ptr(swap_reg, saved_mark_addr);
787 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
788 andr(swap_reg, swap_reg, AT);
790 if (need_tmp_reg) {
791 push(tmp_reg);
792 }
793 get_thread(tmp_reg);
794 orr(tmp_reg, tmp_reg, swap_reg);
795 //if (os::is_MP()) {
796 // lock();
797 //}
798 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
799 if (need_tmp_reg) {
800 pop(tmp_reg);
801 }
802 // If the biasing toward our thread failed, this means that
803 // another thread succeeded in biasing it toward itself and we
804 // need to revoke that bias. The revocation will occur in the
805 // interpreter runtime in the slow case.
806 if (PrintBiasedLockingStatistics) {
807 Label L;
808 bne(AT, R0, L);
809 delayed()->nop();
810 push(tmp_reg);
811 push(A0);
812 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
813 pop(A0);
814 pop(tmp_reg);
815 bind(L);
816 }
817 if (slow_case != NULL) {
818 beq_far(AT, R0, *slow_case);
819 delayed()->nop();
820 }
821 b(done);
822 delayed()->nop();
824 bind(try_rebias);
825 // At this point we know the epoch has expired, meaning that the
826 // current "bias owner", if any, is actually invalid. Under these
827 // circumstances _only_, we are allowed to use the current header's
828 // value as the comparison value when doing the cas to acquire the
829 // bias in the current epoch. In other words, we allow transfer of
830 // the bias from one thread to another directly in this situation.
831 //
832 // FIXME: due to a lack of registers we currently blow away the age
833 // bits in this situation. Should attempt to preserve them.
834 if (need_tmp_reg) {
835 push(tmp_reg);
836 }
837 load_prototype_header(tmp_reg, obj_reg);
838 get_thread(swap_reg);
839 orr(tmp_reg, tmp_reg, swap_reg);
840 ld_ptr(swap_reg, saved_mark_addr);
842 // if (os::is_MP()) {
843 // lock();
844 //}
845 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
846 if (need_tmp_reg) {
847 pop(tmp_reg);
848 }
849 // If the biasing toward our thread failed, then another thread
850 // succeeded in biasing it toward itself and we need to revoke that
851 // bias. The revocation will occur in the runtime in the slow case.
852 if (PrintBiasedLockingStatistics) {
853 Label L;
854 bne(AT, R0, L);
855 delayed()->nop();
856 push(AT);
857 push(tmp_reg);
858 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
859 pop(tmp_reg);
860 pop(AT);
861 bind(L);
862 }
863 if (slow_case != NULL) {
864 beq_far(AT, R0, *slow_case);
865 delayed()->nop();
866 }
868 b(done);
869 delayed()->nop();
870 bind(try_revoke_bias);
871 // The prototype mark in the klass doesn't have the bias bit set any
872 // more, indicating that objects of this data type are not supposed
873 // to be biased any more. We are going to try to reset the mark of
874 // this object to the prototype value and fall through to the
875 // CAS-based locking scheme. Note that if our CAS fails, it means
876 // that another thread raced us for the privilege of revoking the
877 // bias of this particular object, so it's okay to continue in the
878 // normal locking code.
879 //
880 // FIXME: due to a lack of registers we currently blow away the age
881 // bits in this situation. Should attempt to preserve them.
882 ld_ptr(swap_reg, saved_mark_addr);
884 if (need_tmp_reg) {
885 push(tmp_reg);
886 }
887 load_prototype_header(tmp_reg, obj_reg);
888 //if (os::is_MP()) {
889 // lock();
890 //}
891 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
892 if (need_tmp_reg) {
893 pop(tmp_reg);
894 }
895 // Fall through to the normal CAS-based lock, because no matter what
896 // the result of the above CAS, some thread must have succeeded in
897 // removing the bias bit from the object's header.
898 if (PrintBiasedLockingStatistics) {
899 Label L;
900 bne(AT, R0, L);
901 delayed()->nop();
902 push(AT);
903 push(tmp_reg);
904 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
905 pop(tmp_reg);
906 pop(AT);
907 bind(L);
908 }
910 bind(cas_label);
911 return null_check_offset;
912 }
914 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
915 assert(UseBiasedLocking, "why call this otherwise?");
917 // Check for biased locking unlock case, which is a no-op
918 // Note: we do not have to check the thread ID for two reasons.
919 // First, the interpreter checks for IllegalMonitorStateException at
920 // a higher level. Second, if the bias was revoked while we held the
921 // lock, the object could not be rebiased toward another thread, so
922 // the bias bit would be clear.
923 #ifdef _LP64
924 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
925 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
926 daddi(AT, R0, markOopDesc::biased_lock_pattern);
927 #else
928 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
929 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
930 addi(AT, R0, markOopDesc::biased_lock_pattern);
931 #endif
933 beq(AT, temp_reg, done);
934 delayed()->nop();
935 }
937 // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME.
938 // by yjl 6/27/2005
939 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
940 // by yjl 7/11/2005
941 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
942 // by yjl 8/1/2005
943 void MacroAssembler::call_VM_leaf_base(address entry_point,
944 int number_of_arguments) {
945 //call(RuntimeAddress(entry_point));
946 //increment(rsp, number_of_arguments * wordSize);
947 Label L, E;
949 assert(number_of_arguments <= 4, "just check");
951 andi(AT, SP, 0xf);
952 beq(AT, R0, L);
953 delayed()->nop();
954 daddi(SP, SP, -8);
955 {
956 call(entry_point, relocInfo::runtime_call_type);
957 delayed()->nop();
958 }
959 daddi(SP, SP, 8);
960 b(E);
961 delayed()->nop();
963 bind(L);
964 {
965 call(entry_point, relocInfo::runtime_call_type);
966 delayed()->nop();
967 }
968 bind(E);
969 }
972 void MacroAssembler::jmp(address entry) {
973 li48(T9, (long)entry);
974 jr(T9);
975 }
977 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
978 switch (rtype) {
979 case relocInfo::runtime_call_type:
980 case relocInfo::none:
981 jmp(entry);
982 break;
983 default:
984 {
985 InstructionMark im(this);
986 relocate(rtype);
987 li48(T9, (long)entry);
988 jr(T9);
989 }
990 break;
991 }
992 }
994 void MacroAssembler::call(address entry) {
995 // c/c++ code assume T9 is entry point, so we just always move entry to t9
996 // maybe there is some more graceful method to handle this. FIXME
997 // by yjl 6/27/2005
998 // For more info, see class NativeCall.
999 #ifndef _LP64
1000 move(T9, (int)entry);
1001 #else
1002 li48(T9, (long)entry);
1003 #endif
1004 jalr(T9);
1005 }
1007 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
1008 switch (rtype) {
1009 case relocInfo::runtime_call_type:
1010 case relocInfo::none:
1011 call(entry);
1012 break;
1013 default:
1014 {
1015 InstructionMark im(this);
1016 relocate(rtype);
1017 call(entry);
1018 }
1019 break;
1020 }
1021 }
1023 void MacroAssembler::call(address entry, RelocationHolder& rh)
1024 {
1025 switch (rh.type()) {
1026 case relocInfo::runtime_call_type:
1027 case relocInfo::none:
1028 call(entry);
1029 break;
1030 default:
1031 {
1032 InstructionMark im(this);
1033 relocate(rh);
1034 call(entry);
1035 }
1036 break;
1037 }
1038 }
1040 void MacroAssembler::ic_call(address entry) {
1041 RelocationHolder rh = virtual_call_Relocation::spec(pc());
1042 li64(IC_Klass, (long)Universe::non_oop_word());
1043 assert(entry != NULL, "call most probably wrong");
1044 InstructionMark im(this);
1045 relocate(rh);
1046 li48(T9, (long)entry);
1047 jalr(T9);
1048 delayed()->nop();
1049 }
1051 void MacroAssembler::c2bool(Register r) {
1052 Label L;
1053 Assembler::beq(r, R0, L);
1054 delayed()->nop();
1055 move(r, 1);
1056 bind(L);
1057 }
1059 #ifndef PRODUCT
1060 extern "C" void findpc(intptr_t x);
1061 #endif
1063 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
1064 // In order to get locks to work, we need to fake a in_VM state
1065 JavaThread* thread = JavaThread::current();
1066 JavaThreadState saved_state = thread->thread_state();
1067 thread->set_thread_state(_thread_in_vm);
1068 if (ShowMessageBoxOnError) {
1069 JavaThread* thread = JavaThread::current();
1070 JavaThreadState saved_state = thread->thread_state();
1071 thread->set_thread_state(_thread_in_vm);
1072 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1073 ttyLocker ttyl;
1074 BytecodeCounter::print();
1075 }
1076 // To see where a verify_oop failed, get $ebx+40/X for this frame.
1077 // This is the value of eip which points to where verify_oop will return.
1078 if (os::message_box(msg, "Execution stopped, print registers?")) {
1079 ttyLocker ttyl;
1080 tty->print_cr("eip = 0x%08x", eip);
1081 #ifndef PRODUCT
1082 tty->cr();
1083 findpc(eip);
1084 tty->cr();
1085 #endif
1086 tty->print_cr("rax, = 0x%08x", rax);
1087 tty->print_cr("rbx, = 0x%08x", rbx);
1088 tty->print_cr("rcx = 0x%08x", rcx);
1089 tty->print_cr("rdx = 0x%08x", rdx);
1090 tty->print_cr("rdi = 0x%08x", rdi);
1091 tty->print_cr("rsi = 0x%08x", rsi);
1092 tty->print_cr("rbp, = 0x%08x", rbp);
1093 tty->print_cr("rsp = 0x%08x", rsp);
1094 BREAKPOINT;
1095 }
1096 } else {
1097 ttyLocker ttyl;
1098 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1099 assert(false, "DEBUG MESSAGE");
1100 }
1101 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
1102 }
1104 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
1105 if ( ShowMessageBoxOnError ) {
1106 JavaThreadState saved_state = JavaThread::current()->thread_state();
1107 JavaThread::current()->set_thread_state(_thread_in_vm);
1108 {
1109 // In order to get locks work, we need to fake a in_VM state
1110 ttyLocker ttyl;
1111 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
1112 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1113 BytecodeCounter::print();
1114 }
1116 // if (os::message_box(msg, "Execution stopped, print registers?"))
1117 // regs->print(::tty);
1118 }
1119 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1120 }
1121 else
1122 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1123 }
1126 void MacroAssembler::stop(const char* msg) {
1127 li(A0, (long)msg);
1128 #ifndef _LP64
1129 //reserver space for argument. added by yjl 7/10/2005
1130 addiu(SP, SP, - 1 * wordSize);
1131 #endif
1132 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1133 delayed()->nop();
1134 #ifndef _LP64
1135 //restore space for argument
1136 addiu(SP, SP, 1 * wordSize);
1137 #endif
1138 brk(17);
1139 }
1141 void MacroAssembler::warn(const char* msg) {
1142 #ifdef _LP64
1143 pushad();
1144 li(A0, (long)msg);
1145 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1146 delayed()->nop();
1147 popad();
1148 #else
1149 pushad();
1150 addi(SP, SP, -4);
1151 sw(A0, SP, -1 * wordSize);
1152 li(A0, (long)msg);
1153 addi(SP, SP, -1 * wordSize);
1154 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1155 delayed()->nop();
1156 addi(SP, SP, 1 * wordSize);
1157 lw(A0, SP, -1 * wordSize);
1158 addi(SP, SP, 4);
1159 popad();
1160 #endif
1161 }
1163 void MacroAssembler::print_reg(Register reg) {
1164 /*
1165 char *s = getenv("PRINT_REG");
1166 if (s == NULL)
1167 return;
1168 if (strcmp(s, "1") != 0)
1169 return;
1170 */
1171 void * cur_pc = pc();
1172 pushad();
1173 NOT_LP64(push(FP);)
1175 li(A0, (long)reg->name());
1176 if (reg == SP)
1177 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
1178 else if (reg == A0)
1179 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
1180 else
1181 move(A1, reg);
1182 li(A2, (long)cur_pc);
1183 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1184 delayed()->nop();
1185 NOT_LP64(pop(FP);)
1186 popad();
1188 /*
1189 pushad();
1190 #ifdef _LP64
1191 if (reg == SP)
1192 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
1193 else
1194 move(A0, reg);
1195 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1196 delayed()->nop();
1197 #else
1198 push(FP);
1199 move(A0, reg);
1200 dsrl32(A1, reg, 0);
1201 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
1202 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1203 delayed()->nop();
1204 pop(FP);
1205 #endif
1206 popad();
1207 pushad();
1208 NOT_LP64(push(FP);)
1209 char b[50];
1210 sprintf((char *)b, " pc: %p\n",cur_pc);
1211 li(A0, (long)(char *)b);
1212 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1213 delayed()->nop();
1214 NOT_LP64(pop(FP);)
1215 popad();
1216 */
1217 }
1219 void MacroAssembler::print_reg(FloatRegister reg) {
1220 void * cur_pc = pc();
1221 pushad();
1222 NOT_LP64(push(FP);)
1223 li(A0, (long)reg->name());
1224 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1225 delayed()->nop();
1226 NOT_LP64(pop(FP);)
1227 popad();
1229 pushad();
1230 NOT_LP64(push(FP);)
1231 #if 1
1232 move(FP, SP);
1233 move(AT, -(StackAlignmentInBytes));
1234 andr(SP , SP , AT);
1235 mov_d(F12, reg);
1236 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1237 delayed()->nop();
1238 move(SP, FP);
1239 #else
1240 mov_s(F12, reg);
1241 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1242 //delayed()->nop();
1243 #endif
1244 NOT_LP64(pop(FP);)
1245 popad();
1247 #if 0
1248 pushad();
1249 NOT_LP64(push(FP);)
1250 char* b = new char[50];
1251 sprintf(b, " pc: %p\n", cur_pc);
1252 li(A0, (long)b);
1253 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1254 delayed()->nop();
1255 NOT_LP64(pop(FP);)
1256 popad();
1257 #endif
1258 }
1260 void MacroAssembler::increment(Register reg, int imm) {
1261 if (!imm) return;
1262 if (is_simm16(imm)) {
1263 #ifdef _LP64
1264 daddiu(reg, reg, imm);
1265 #else
1266 addiu(reg, reg, imm);
1267 #endif
1268 } else {
1269 move(AT, imm);
1270 #ifdef _LP64
1271 daddu(reg, reg, AT);
1272 #else
1273 addu(reg, reg, AT);
1274 #endif
1275 }
1276 }
1278 void MacroAssembler::decrement(Register reg, int imm) {
1279 increment(reg, -imm);
1280 }
1283 void MacroAssembler::call_VM(Register oop_result,
1284 address entry_point,
1285 bool check_exceptions) {
1286 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1287 }
1289 void MacroAssembler::call_VM(Register oop_result,
1290 address entry_point,
1291 Register arg_1,
1292 bool check_exceptions) {
1293 if (arg_1!=A1) move(A1, arg_1);
1294 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1295 }
1297 void MacroAssembler::call_VM(Register oop_result,
1298 address entry_point,
1299 Register arg_1,
1300 Register arg_2,
1301 bool check_exceptions) {
1302 if (arg_1!=A1) move(A1, arg_1);
1303 if (arg_2!=A2) move(A2, arg_2);
1304 assert(arg_2 != A1, "smashed argument");
1305 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1306 }
1308 void MacroAssembler::call_VM(Register oop_result,
1309 address entry_point,
1310 Register arg_1,
1311 Register arg_2,
1312 Register arg_3,
1313 bool check_exceptions) {
1314 if (arg_1!=A1) move(A1, arg_1);
1315 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1316 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1317 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1318 }
1320 void MacroAssembler::call_VM(Register oop_result,
1321 Register last_java_sp,
1322 address entry_point,
1323 int number_of_arguments,
1324 bool check_exceptions) {
1325 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1326 }
1328 void MacroAssembler::call_VM(Register oop_result,
1329 Register last_java_sp,
1330 address entry_point,
1331 Register arg_1,
1332 bool check_exceptions) {
1333 if (arg_1 != A1) move(A1, arg_1);
1334 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1335 }
1337 void MacroAssembler::call_VM(Register oop_result,
1338 Register last_java_sp,
1339 address entry_point,
1340 Register arg_1,
1341 Register arg_2,
1342 bool check_exceptions) {
1343 if (arg_1 != A1) move(A1, arg_1);
1344 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1345 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1346 }
1348 void MacroAssembler::call_VM(Register oop_result,
1349 Register last_java_sp,
1350 address entry_point,
1351 Register arg_1,
1352 Register arg_2,
1353 Register arg_3,
1354 bool check_exceptions) {
1355 if (arg_1 != A1) move(A1, arg_1);
1356 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1357 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1358 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1359 }
1361 void MacroAssembler::call_VM_base(Register oop_result,
1362 Register java_thread,
1363 Register last_java_sp,
1364 address entry_point,
1365 int number_of_arguments,
1366 bool check_exceptions) {
1368 address before_call_pc;
1369 // determine java_thread register
1370 if (!java_thread->is_valid()) {
1371 #ifndef OPT_THREAD
1372 java_thread = T2;
1373 get_thread(java_thread);
1374 #else
1375 java_thread = TREG;
1376 #endif
1377 }
1378 // determine last_java_sp register
1379 if (!last_java_sp->is_valid()) {
1380 last_java_sp = SP;
1381 }
1382 // debugging support
1383 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1384 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1385 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1386 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1388 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1390 // set last Java frame before call
1391 before_call_pc = (address)pc();
1392 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1394 // do the call
1395 move(A0, java_thread);
1396 call(entry_point, relocInfo::runtime_call_type);
1397 delayed()->nop();
1399 // restore the thread (cannot use the pushed argument since arguments
1400 // may be overwritten by C code generated by an optimizing compiler);
1401 // however can use the register value directly if it is callee saved.
1402 #ifndef OPT_THREAD
1403 if (java_thread >=S0 && java_thread <=S7) {
1404 #ifdef ASSERT
1405 { Label L;
1406 get_thread(AT);
1407 beq(java_thread, AT, L);
1408 delayed()->nop();
1409 stop("MacroAssembler::call_VM_base: edi not callee saved?");
1410 bind(L);
1411 }
1412 #endif
1413 } else {
1414 get_thread(java_thread);
1415 }
1416 #endif
1418 // discard thread and arguments
1419 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1420 // reset last Java frame
1421 reset_last_Java_frame(java_thread, false, true);
1423 check_and_handle_popframe(java_thread);
1424 check_and_handle_earlyret(java_thread);
1425 if (check_exceptions) {
1426 // check for pending exceptions (java_thread is set upon return)
1427 Label L;
1428 #ifdef _LP64
1429 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1430 #else
1431 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1432 #endif
1433 beq(AT, R0, L);
1434 delayed()->nop();
1435 li(AT, before_call_pc);
1436 push(AT);
1437 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1438 delayed()->nop();
1439 bind(L);
1440 }
1442 // get oop result if there is one and reset the value in the thread
1443 if (oop_result->is_valid()) {
1444 #ifdef _LP64
1445 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1446 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1447 #else
1448 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1449 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1450 #endif
1451 verify_oop(oop_result);
1452 }
1453 }
1455 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1457 move(V0, SP);
1458 //we also reserve space for java_thread here
1459 #ifndef _LP64
1460 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1461 #endif
1462 move(AT, -(StackAlignmentInBytes));
1463 andr(SP, SP, AT);
1464 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1466 }
1468 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1469 call_VM_leaf_base(entry_point, number_of_arguments);
1470 }
1472 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1473 if (arg_0 != A0) move(A0, arg_0);
1474 call_VM_leaf(entry_point, 1);
1475 }
1477 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1478 if (arg_0 != A0) move(A0, arg_0);
1479 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1480 call_VM_leaf(entry_point, 2);
1481 }
1483 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1484 if (arg_0 != A0) move(A0, arg_0);
1485 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1486 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1487 call_VM_leaf(entry_point, 3);
1488 }
1489 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1490 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1491 }
1494 void MacroAssembler::super_call_VM_leaf(address entry_point,
1495 Register arg_1) {
1496 if (arg_1 != A0) move(A0, arg_1);
1497 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1498 }
1501 void MacroAssembler::super_call_VM_leaf(address entry_point,
1502 Register arg_1,
1503 Register arg_2) {
1504 if (arg_1 != A0) move(A0, arg_1);
1505 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1506 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1507 }
1508 void MacroAssembler::super_call_VM_leaf(address entry_point,
1509 Register arg_1,
1510 Register arg_2,
1511 Register arg_3) {
1512 if (arg_1 != A0) move(A0, arg_1);
1513 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1514 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1515 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1516 }
1518 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1519 }
1521 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1522 }
1524 void MacroAssembler::null_check(Register reg, int offset) {
1525 if (needs_explicit_null_check(offset)) {
1526 // provoke OS NULL exception if reg = NULL by
1527 // accessing M[reg] w/o changing any (non-CC) registers
1528 // NOTE: cmpl is plenty here to provoke a segv
1529 lw(AT, reg, 0);
1530 /* Jin
1531 nop();
1532 nop();
1533 nop();
1534 */
1535 // Note: should probably use testl(rax, Address(reg, 0));
1536 // may be shorter code (however, this version of
1537 // testl needs to be implemented first)
1538 } else {
1539 // nothing to do, (later) access of M[reg + offset]
1540 // will provoke OS NULL exception if reg = NULL
1541 }
1542 }
1544 void MacroAssembler::enter() {
1545 push2(RA, FP);
1546 move(FP, SP);
1547 }
1549 void MacroAssembler::leave() {
1550 #ifndef _LP64
1551 //move(SP, FP);
1552 //pop2(FP, RA);
1553 addi(SP, FP, 2 * wordSize);
1554 lw(RA, SP, - 1 * wordSize);
1555 lw(FP, SP, - 2 * wordSize);
1556 #else
1557 daddi(SP, FP, 2 * wordSize);
1558 ld(RA, SP, - 1 * wordSize);
1559 ld(FP, SP, - 2 * wordSize);
1560 #endif
1561 }
1562 /*
1563 void MacroAssembler::os_breakpoint() {
1564 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1565 // (e.g., MSVC can't call ps() otherwise)
1566 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1567 }
1568 */
1569 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1570 // determine java_thread register
1571 if (!java_thread->is_valid()) {
1572 #ifndef OPT_THREAD
1573 java_thread = T1;
1574 get_thread(java_thread);
1575 #else
1576 java_thread = TREG;
1577 #endif
1578 }
1579 // we must set sp to zero to clear frame
1580 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1581 // must clear fp, so that compiled frames are not confused; it is possible
1582 // that we need it only for debugging
1583 if(clear_fp)
1584 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1586 if (clear_pc)
1587 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1588 }
1590 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1591 bool clear_pc) {
1592 Register thread = TREG;
1593 #ifndef OPT_THREAD
1594 get_thread(thread);
1595 #endif
1596 // we must set sp to zero to clear frame
1597 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1598 // must clear fp, so that compiled frames are not confused; it is
1599 // possible that we need it only for debugging
1600 if (clear_fp) {
1601 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1602 }
1604 if (clear_pc) {
1605 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1606 }
1607 }
1609 // Write serialization page so VM thread can do a pseudo remote membar.
1610 // We use the current thread pointer to calculate a thread specific
1611 // offset to write to within the page. This minimizes bus traffic
1612 // due to cache line collision.
1613 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1614 move(tmp, thread);
1615 srl(tmp, tmp,os::get_serialize_page_shift_count());
1616 move(AT, (os::vm_page_size() - sizeof(int)));
1617 andr(tmp, tmp,AT);
1618 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1619 }
1621 // Calls to C land
1622 //
1623 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1624 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1625 // has to be reset to 0. This is required to allow proper stack traversal.
1626 void MacroAssembler::set_last_Java_frame(Register java_thread,
1627 Register last_java_sp,
1628 Register last_java_fp,
1629 address last_java_pc) {
1630 // determine java_thread register
1631 if (!java_thread->is_valid()) {
1632 #ifndef OPT_THREAD
1633 java_thread = T2;
1634 get_thread(java_thread);
1635 #else
1636 java_thread = TREG;
1637 #endif
1638 }
1639 // determine last_java_sp register
1640 if (!last_java_sp->is_valid()) {
1641 last_java_sp = SP;
1642 }
1644 // last_java_fp is optional
1646 if (last_java_fp->is_valid()) {
1647 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1648 }
1650 // last_java_pc is optional
1652 if (last_java_pc != NULL) {
1653 relocate(relocInfo::internal_pc_type);
1654 li48(AT, (long)last_java_pc);
1655 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1656 }
1657 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1658 }
1660 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1661 Register last_java_fp,
1662 address last_java_pc) {
1663 // determine last_java_sp register
1664 if (!last_java_sp->is_valid()) {
1665 last_java_sp = SP;
1666 }
1668 Register thread = TREG;
1669 #ifndef OPT_THREAD
1670 get_thread(thread);
1671 #endif
1672 // last_java_fp is optional
1673 if (last_java_fp->is_valid()) {
1674 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1675 }
1677 // last_java_pc is optional
1678 if (last_java_pc != NULL) {
1679 Address java_pc(thread,
1680 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1681 li(AT, (intptr_t)(last_java_pc));
1682 sd(AT, java_pc);
1683 }
1685 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1686 }
1688 //////////////////////////////////////////////////////////////////////////////////
1689 #ifndef SERIALGC
1691 void MacroAssembler::g1_write_barrier_pre(Register obj,
1692 #ifndef _LP64
1693 Register thread,
1694 #endif
1695 Register tmp,
1696 Register tmp2,
1697 bool tosca_live) {
1698 /* LP64_ONLY(Register thread = r15_thread;)
1699 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1700 PtrQueue::byte_offset_of_active()));
1702 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1703 PtrQueue::byte_offset_of_index()));
1704 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1705 PtrQueue::byte_offset_of_buf()));
1708 Label done;
1709 Label runtime;
1711 // if (!marking_in_progress) goto done;
1712 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1713 cmpl(in_progress, 0);
1714 } else {
1715 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1716 cmpb(in_progress, 0);
1717 }
1718 jcc(Assembler::equal, done);
1720 // if (x.f == NULL) goto done;
1721 cmpptr(Address(obj, 0), NULL_WORD);
1722 jcc(Assembler::equal, done);
1724 // Can we store original value in the thread's buffer?
1726 LP64_ONLY(movslq(tmp, index);)
1727 movptr(tmp2, Address(obj, 0));
1728 #ifdef _LP64
1729 cmpq(tmp, 0);
1730 #else
1731 cmpl(index, 0);
1732 #endif
1733 jcc(Assembler::equal, runtime);
1734 #ifdef _LP64
1735 subq(tmp, wordSize);
1736 movl(index, tmp);
1737 addq(tmp, buffer);
1738 #else
1739 subl(index, wordSize);
1740 movl(tmp, buffer);
1741 addl(tmp, index);
1742 #endif
1743 movptr(Address(tmp, 0), tmp2);
1744 jmp(done);
1745 bind(runtime);
1746 // save the live input values
1747 if(tosca_live) push(rax);
1748 push(obj);
1749 #ifdef _LP64
1750 movq(c_rarg0, Address(obj, 0));
1751 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
1752 #else
1753 push(thread);
1754 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
1755 pop(thread);
1756 #endif
1757 pop(obj);
1758 if(tosca_live) pop(rax);
1759 bind(done);
1760 */
1761 }
1763 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1764 Register new_val,
1765 #ifndef _LP64
1766 Register thread,
1767 #endif
1768 Register tmp,
1769 Register tmp2) {
1771 /*LP64_ONLY(Register thread = r15_thread;)
1772 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1773 PtrQueue::byte_offset_of_index()));
1774 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1775 PtrQueue::byte_offset_of_buf()));
1776 BarrierSet* bs = Universe::heap()->barrier_set();
1777 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1778 Label done;
1779 Label runtime;
1781 // Does store cross heap regions?
1783 movptr(tmp, store_addr);
1784 xorptr(tmp, new_val);
1785 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
1786 jcc(Assembler::equal, done);
1788 // crosses regions, storing NULL?
1790 cmpptr(new_val, (int32_t) NULL_WORD);
1791 jcc(Assembler::equal, done);
1793 // storing region crossing non-NULL, is card already dirty?
1795 ExternalAddress cardtable((address) ct->byte_map_base);
1796 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1797 #ifdef _LP64
1798 const Register card_addr = tmp;
1800 movq(card_addr, store_addr);
1801 shrq(card_addr, CardTableModRefBS::card_shift);
1803 lea(tmp2, cardtable);
1805 // get the address of the card
1806 addq(card_addr, tmp2);
1807 #else
1808 const Register card_index = tmp;
1810 movl(card_index, store_addr);
1811 shrl(card_index, CardTableModRefBS::card_shift);
1813 Address index(noreg, card_index, Address::times_1);
1814 const Register card_addr = tmp;
1815 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
1816 #endif
1817 cmpb(Address(card_addr, 0), 0);
1818 jcc(Assembler::equal, done);
1820 // storing a region crossing, non-NULL oop, card is clean.
1821 // dirty card and log.
1823 movb(Address(card_addr, 0), 0);
1825 cmpl(queue_index, 0);
1826 jcc(Assembler::equal, runtime);
1827 subl(queue_index, wordSize);
1828 movptr(tmp2, buffer);
1829 #ifdef _LP64
1830 movslq(rscratch1, queue_index);
1831 addq(tmp2, rscratch1);
1832 movq(Address(tmp2, 0), card_addr);
1833 #else
1834 addl(tmp2, queue_index);
1835 movl(Address(tmp2, 0), card_index);
1836 #endif
1837 jmp(done);
1839 bind(runtime);
1840 // save the live input values
1841 push(store_addr);
1842 push(new_val);
1843 #ifdef _LP64
1844 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
1845 #else
1846 push(thread);
1847 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1848 pop(thread);
1849 #endif
1850 pop(new_val);
1851 pop(store_addr);
1853 bind(done);
1854 */
1855 }
1857 #endif // SERIALGC
1858 //////////////////////////////////////////////////////////////////////////////////
1861 void MacroAssembler::store_check(Register obj) {
1862 // Does a store check for the oop in register obj. The content of
1863 // register obj is destroyed afterwards.
1864 store_check_part_1(obj);
1865 store_check_part_2(obj);
1866 }
1868 void MacroAssembler::store_check(Register obj, Address dst) {
1869 store_check(obj);
1870 }
1873 // split the store check operation so that other instructions can be scheduled inbetween
1874 void MacroAssembler::store_check_part_1(Register obj) {
1875 BarrierSet* bs = Universe::heap()->barrier_set();
1876 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1877 #ifdef _LP64
1878 dsrl(obj, obj, CardTableModRefBS::card_shift);
1879 #else
1880 shr(obj, CardTableModRefBS::card_shift);
1881 #endif
1882 }
1884 void MacroAssembler::store_check_part_2(Register obj) {
1885 BarrierSet* bs = Universe::heap()->barrier_set();
1886 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1887 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1888 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1890 li(AT, (long)ct->byte_map_base);
1891 #ifdef _LP64
1892 dadd(AT, AT, obj);
1893 #else
1894 add(AT, AT, obj);
1895 #endif
1896 sb(R0, AT, 0);
1897 }
1898 /*
1899 void MacroAssembler::subptr(Register dst, int32_t imm32) {
1900 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
1901 }
1903 void MacroAssembler::subptr(Register dst, Register src) {
1904 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
1905 }
1907 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
1908 // src2 must be rval
1910 if (reachable(src2)) {
1911 testl(src1, as_Address(src2));
1912 } else {
1913 lea(rscratch1, src2);
1914 testl(src1, Address(rscratch1, 0));
1915 }
1916 }
1918 // C++ bool manipulation
1919 void MacroAssembler::testbool(Register dst) {
1920 if(sizeof(bool) == 1)
1921 testb(dst, 0xff);
1922 else if(sizeof(bool) == 2) {
1923 // testw implementation needed for two byte bools
1924 ShouldNotReachHere();
1925 } else if(sizeof(bool) == 4)
1926 testl(dst, dst);
1927 else
1928 // unsupported
1929 ShouldNotReachHere();
1930 }
1932 void MacroAssembler::testptr(Register dst, Register src) {
1933 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
1934 }
1937 */
1939 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
1940 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1941 Register t1, Register t2, Label& slow_case) {
1942 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
1944 Register end = t2;
1945 #ifndef OPT_THREAD
1946 Register thread = t1;
1947 get_thread(thread);
1948 #else
1949 Register thread = TREG;
1950 #endif
1951 verify_tlab(t1, t2);//blows t1&t2
1953 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
1955 if (var_size_in_bytes == NOREG) {
1956 // i dont think we need move con_size_in_bytes to a register first.
1957 // by yjl 8/17/2005
1958 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
1959 addi(end, obj, con_size_in_bytes);
1960 } else {
1961 add(end, obj, var_size_in_bytes);
1962 }
1964 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
1965 sltu(AT, AT, end);
1966 bne_far(AT, R0, slow_case);
1967 delayed()->nop();
1970 // update the tlab top pointer
1971 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
1973 // recover var_size_in_bytes if necessary
1974 /*if (var_size_in_bytes == end) {
1975 sub(var_size_in_bytes, end, obj);
1976 }*/
1978 verify_tlab(t1, t2);
1979 }
1981 // Defines obj, preserves var_size_in_bytes
1982 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
1983 Register t1, Register t2, Label& slow_case) {
1984 assert_different_registers(obj, var_size_in_bytes, t1, AT);
1985 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
1986 // No allocation in the shared eden.
1987 b_far(slow_case);
1988 delayed()->nop();
1989 } else {
1991 #ifndef _LP64
1992 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
1993 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
1994 #else
1995 Address heap_top(t1);
1996 li(t1, (long)Universe::heap()->top_addr());
1997 #endif
1998 ld_ptr(obj, heap_top);
2000 Register end = t2;
2001 Label retry;
2003 bind(retry);
2004 if (var_size_in_bytes == NOREG) {
2005 // i dont think we need move con_size_in_bytes to a register first.
2006 // by yjl 8/17/2005
2007 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
2008 addi(end, obj, con_size_in_bytes);
2009 } else {
2010 add(end, obj, var_size_in_bytes);
2011 }
2012 // if end < obj then we wrapped around => object too long => slow case
2013 sltu(AT, end, obj);
2014 bne_far(AT, R0, slow_case);
2015 delayed()->nop();
2017 //lui(AT, split_high((int)Universe::heap()->end_addr()));
2018 //lw(AT, AT, split_low((int)Universe::heap()->end_addr()));
2019 li(AT, (long)Universe::heap()->end_addr());
2020 sltu(AT, AT, end);
2021 bne_far(AT, R0, slow_case);
2022 delayed()->nop();
2023 // Compare obj with the top addr, and if still equal, store the new top addr in
2024 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
2025 // it otherwise. Use lock prefix for atomicity on MPs.
2026 if (os::is_MP()) {
2027 ///lock();
2028 }
2030 // if someone beat us on the allocation, try again, otherwise continue
2031 cmpxchg(end, heap_top, obj);
2032 beq_far(AT, R0, retry); //by yyq
2033 delayed()->nop();
2035 }
2036 }
2038 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
2039 Register top = T0;
2040 Register t1 = T1;
2041 /* Jin: tlab_refill() is called in
2043 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
2045 In generate_code_for(), T2 has been assigned as a register(length), which is used
2046 after calling tlab_refill();
2047 Therefore, tlab_refill() should not use T2.
2049 Source:
2051 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
2052 at java.lang.System.arraycopy(Native Method)
2053 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
2054 at sun.misc.Resource.getBytes(Resource.java:117)
2055 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
2056 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
2057 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
2058 */
2059 Register t2 = T9;
2060 Register t3 = T3;
2061 Register thread_reg = T8;
2062 Label do_refill, discard_tlab;
2063 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
2064 // No allocation in the shared eden.
2065 b(slow_case);
2066 delayed()->nop();
2067 }
2069 get_thread(thread_reg);
2071 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2072 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2074 // calculate amount of free space
2075 sub(t1, t1, top);
2076 shr(t1, LogHeapWordSize);
2078 // Retain tlab and allocate object in shared space if
2079 // the amount free in the tlab is too large to discard.
2080 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
2081 slt(AT, t2, t1);
2082 beq(AT, R0, discard_tlab);
2083 delayed()->nop();
2085 // Retain
2087 #ifndef _LP64
2088 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
2089 #else
2090 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
2091 #endif
2092 add(t2, t2, AT);
2093 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
2095 if (TLABStats) {
2096 // increment number of slow_allocations
2097 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
2098 addiu(AT, AT, 1);
2099 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
2100 }
2101 b(try_eden);
2102 delayed()->nop();
2104 bind(discard_tlab);
2105 if (TLABStats) {
2106 // increment number of refills
2107 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
2108 addi(AT, AT, 1);
2109 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
2110 // accumulate wastage -- t1 is amount free in tlab
2111 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
2112 add(AT, AT, t1);
2113 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
2114 }
2116 // if tlab is currently allocated (top or end != null) then
2117 // fill [top, end + alignment_reserve) with array object
2118 beq(top, R0, do_refill);
2119 delayed()->nop();
2121 // set up the mark word
2122 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
2123 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
2125 // set the length to the remaining space
2126 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
2127 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
2128 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
2129 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
2131 // set klass to intArrayKlass
2132 #ifndef _LP64
2133 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
2134 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
2135 #else
2136 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
2137 ld_ptr(t1, AT, 0);
2138 #endif
2139 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
2140 store_klass(top, t1);
2142 // refill the tlab with an eden allocation
2143 bind(do_refill);
2144 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2145 shl(t1, LogHeapWordSize);
2146 // add object_size ??
2147 eden_allocate(top, t1, 0, t2, t3, slow_case);
2149 // Check that t1 was preserved in eden_allocate.
2150 #ifdef ASSERT
2151 if (UseTLAB) {
2152 Label ok;
2153 assert_different_registers(thread_reg, t1);
2154 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2155 shl(AT, LogHeapWordSize);
2156 beq(AT, t1, ok);
2157 delayed()->nop();
2158 stop("assert(t1 != tlab size)");
2159 should_not_reach_here();
2161 bind(ok);
2162 }
2163 #endif
2164 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
2165 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2166 add(top, top, t1);
2167 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
2168 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2169 verify_tlab(t1, t2);
2170 b(retry);
2171 delayed()->nop();
2172 }
2174 static const double pi_4 = 0.7853981633974483;
2176 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
2177 // must get argument(a double) in F12/F13
2178 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
2179 //We need to preseve the register which maybe modified during the Call @Jerome
2180 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
2181 //save all modified register here
2182 // if (preserve_cpu_regs) {
2183 // }
2184 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
2185 pushad();
2186 //we should preserve the stack space before we call
2187 addi(SP, SP, -wordSize * 2);
2188 switch (trig){
2189 case 's' :
2190 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
2191 delayed()->nop();
2192 break;
2193 case 'c':
2194 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
2195 delayed()->nop();
2196 break;
2197 case 't':
2198 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
2199 delayed()->nop();
2200 break;
2201 default:assert (false, "bad intrinsic");
2202 break;
2204 }
2206 addi(SP, SP, wordSize * 2);
2207 popad();
2208 // if (preserve_cpu_regs) {
2209 // }
2210 }
2211 /*
2213 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
2214 ucomisd(dst, as_Address(src));
2215 }
2217 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
2218 ucomiss(dst, as_Address(src));
2219 }
2221 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
2222 if (reachable(src)) {
2223 xorpd(dst, as_Address(src));
2224 } else {
2225 lea(rscratch1, src);
2226 xorpd(dst, Address(rscratch1, 0));
2227 }
2228 }
2230 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
2231 if (reachable(src)) {
2232 xorps(dst, as_Address(src));
2233 } else {
2234 lea(rscratch1, src);
2235 xorps(dst, Address(rscratch1, 0));
2236 }
2237 }
2238 */
2240 #ifdef _LP64
2241 void MacroAssembler::li(Register rd, long imm) {
2242 if (imm <= max_jint && imm >= min_jint) {
2243 li32(rd, (int)imm);
2244 } else if (julong(imm) <= 0xFFFFFFFF) {
2245 assert_not_delayed();
2246 // lui sign-extends, so we can't use that.
2247 ori(rd, R0, julong(imm) >> 16);
2248 dsll(rd, rd, 16);
2249 ori(rd, rd, split_low(imm));
2250 //aoqi_test
2251 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
2252 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2253 /* A 48-bit address */
2254 li48(rd, imm);
2255 } else {
2256 li64(rd, imm);
2257 }
2258 }
2259 #else
2260 void MacroAssembler::li(Register rd, long imm) {
2261 li32(rd, (int)imm);
2262 }
2263 #endif
2265 void MacroAssembler::li32(Register reg, int imm) {
2266 if (is_simm16(imm)) {
2267 /* Jin: for imm < 0, we should use addi instead of addiu.
2268 *
2269 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2270 *
2271 * 78 move [int:-1|I] [a0|I]
2272 * : daddi a0, zero, 0xffffffff (correct)
2273 * : daddiu a0, zero, 0xffffffff (incorrect)
2274 */
2275 if (imm >= 0)
2276 addiu(reg, R0, imm);
2277 else
2278 addi(reg, R0, imm);
2279 } else {
2280 lui(reg, split_low(imm >> 16));
2281 if (split_low(imm))
2282 ori(reg, reg, split_low(imm));
2283 }
2284 }
2286 #ifdef _LP64
2287 void MacroAssembler::li64(Register rd, long imm) {
2288 assert_not_delayed();
2289 lui(rd, imm >> 48);
2290 ori(rd, rd, split_low(imm >> 32));
2291 dsll(rd, rd, 16);
2292 ori(rd, rd, split_low(imm >> 16));
2293 dsll(rd, rd, 16);
2294 ori(rd, rd, split_low(imm));
2295 }
2297 void MacroAssembler::li48(Register rd, long imm) {
2298 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2299 lui(rd, imm >> 32);
2300 ori(rd, rd, split_low(imm >> 16));
2301 dsll(rd, rd, 16);
2302 ori(rd, rd, split_low(imm));
2303 }
2304 #endif
2305 // NOTE: i dont push eax as i486.
2306 // the x86 save eax for it use eax as the jump register
2307 void MacroAssembler::verify_oop(Register reg, const char* s) {
2308 /*
2309 if (!VerifyOops) return;
2311 // Pass register number to verify_oop_subroutine
2312 char* b = new char[strlen(s) + 50];
2313 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2314 push(rax); // save rax,
2315 push(reg); // pass register argument
2316 ExternalAddress buffer((address) b);
2317 // avoid using pushptr, as it modifies scratch registers
2318 // and our contract is not to modify anything
2319 movptr(rax, buffer.addr());
2320 push(rax);
2321 // call indirectly to solve generation ordering problem
2322 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2323 call(rax);
2324 */
2325 if (!VerifyOops) return;
2326 const char * b = NULL;
2327 stringStream ss;
2328 ss.print("verify_oop: %s: %s", reg->name(), s);
2329 b = code_string(ss.as_string());
2330 #ifdef _LP64
2331 pushad();
2332 move(A1, reg);
2333 li(A0, (long)b);
2334 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2335 ld(T9, AT, 0);
2336 jalr(T9);
2337 delayed()->nop();
2338 popad();
2339 #else
2340 // Pass register number to verify_oop_subroutine
2341 sw(T0, SP, - wordSize);
2342 sw(T1, SP, - 2*wordSize);
2343 sw(RA, SP, - 3*wordSize);
2344 sw(A0, SP ,- 4*wordSize);
2345 sw(A1, SP ,- 5*wordSize);
2346 sw(AT, SP ,- 6*wordSize);
2347 sw(T9, SP ,- 7*wordSize);
2348 addiu(SP, SP, - 7 * wordSize);
2349 move(A1, reg);
2350 li(A0, (long)b);
2351 // call indirectly to solve generation ordering problem
2352 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2353 lw(T9, AT, 0);
2354 jalr(T9);
2355 delayed()->nop();
2356 lw(T0, SP, 6* wordSize);
2357 lw(T1, SP, 5* wordSize);
2358 lw(RA, SP, 4* wordSize);
2359 lw(A0, SP, 3* wordSize);
2360 lw(A1, SP, 2* wordSize);
2361 lw(AT, SP, 1* wordSize);
2362 lw(T9, SP, 0* wordSize);
2363 addiu(SP, SP, 7 * wordSize);
2364 #endif
2365 }
2368 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2369 if (!VerifyOops) {
2370 nop();
2371 return;
2372 }
2373 // Pass register number to verify_oop_subroutine
2374 const char * b = NULL;
2375 stringStream ss;
2376 ss.print("verify_oop_addr: %s", s);
2377 b = code_string(ss.as_string());
2379 st_ptr(T0, SP, - wordSize);
2380 st_ptr(T1, SP, - 2*wordSize);
2381 st_ptr(RA, SP, - 3*wordSize);
2382 st_ptr(A0, SP, - 4*wordSize);
2383 st_ptr(A1, SP, - 5*wordSize);
2384 st_ptr(AT, SP, - 6*wordSize);
2385 st_ptr(T9, SP, - 7*wordSize);
2386 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2387 addiu(SP, SP, - 7 * wordSize);
2389 li(A0, (long)b);
2390 // call indirectly to solve generation ordering problem
2391 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2392 ld_ptr(T9, AT, 0);
2393 jalr(T9);
2394 delayed()->nop();
2395 ld_ptr(T0, SP, 6* wordSize);
2396 ld_ptr(T1, SP, 5* wordSize);
2397 ld_ptr(RA, SP, 4* wordSize);
2398 ld_ptr(A0, SP, 3* wordSize);
2399 ld_ptr(A1, SP, 2* wordSize);
2400 ld_ptr(AT, SP, 1* wordSize);
2401 ld_ptr(T9, SP, 0* wordSize);
2402 addiu(SP, SP, 7 * wordSize);
2403 }
2405 // used registers : T0, T1
2406 void MacroAssembler::verify_oop_subroutine() {
2407 // RA: ra
2408 // A0: char* error message
2409 // A1: oop object to verify
2411 Label exit, error;
2412 // increment counter
2413 li(T0, (long)StubRoutines::verify_oop_count_addr());
2414 lw(AT, T0, 0);
2415 #ifdef _LP64
2416 //FIXME, aoqi: rewrite addi, addu, etc in 64bits mode.
2417 daddi(AT, AT, 1);
2418 #else
2419 addi(AT, AT, 1);
2420 #endif
2421 sw(AT, T0, 0);
2423 // make sure object is 'reasonable'
2424 beq(A1, R0, exit); // if obj is NULL it is ok
2425 delayed()->nop();
2427 // Check if the oop is in the right area of memory
2428 //const int oop_mask = Universe::verify_oop_mask();
2429 //const int oop_bits = Universe::verify_oop_bits();
2430 const uintptr_t oop_mask = Universe::verify_oop_mask();
2431 const uintptr_t oop_bits = Universe::verify_oop_bits();
2432 li(AT, oop_mask);
2433 andr(T0, A1, AT);
2434 li(AT, oop_bits);
2435 bne(T0, AT, error);
2436 delayed()->nop();
2438 // make sure klass is 'reasonable'
2439 //add for compressedoops
2440 reinit_heapbase();
2441 //add for compressedoops
2442 load_klass(T0, A1);
2443 beq(T0, R0, error); // if klass is NULL it is broken
2444 delayed()->nop();
2445 #if 0
2446 //FIXME:wuhui.
2447 // Check if the klass is in the right area of memory
2448 //const int klass_mask = Universe::verify_klass_mask();
2449 //const int klass_bits = Universe::verify_klass_bits();
2450 const uintptr_t klass_mask = Universe::verify_klass_mask();
2451 const uintptr_t klass_bits = Universe::verify_klass_bits();
2453 li(AT, klass_mask);
2454 andr(T1, T0, AT);
2455 li(AT, klass_bits);
2456 bne(T1, AT, error);
2457 delayed()->nop();
2458 // make sure klass' klass is 'reasonable'
2459 //add for compressedoops
2460 load_klass(T0, T0);
2461 beq(T0, R0, error); // if klass' klass is NULL it is broken
2462 delayed()->nop();
2464 li(AT, klass_mask);
2465 andr(T1, T0, AT);
2466 li(AT, klass_bits);
2467 bne(T1, AT, error);
2468 delayed()->nop(); // if klass not in right area of memory it is broken too.
2469 #endif
2470 // return if everything seems ok
2471 bind(exit);
2473 jr(RA);
2474 delayed()->nop();
2476 // handle errors
2477 bind(error);
2478 pushad();
2479 #ifndef _LP64
2480 addi(SP, SP, (-1) * wordSize);
2481 #endif
2482 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2483 delayed()->nop();
2484 #ifndef _LP64
2485 addiu(SP, SP, 1 * wordSize);
2486 #endif
2487 popad();
2488 jr(RA);
2489 delayed()->nop();
2490 }
2492 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2493 #ifdef ASSERT
2494 assert_different_registers(t1, t2, AT);
2495 if (UseTLAB && VerifyOops) {
2496 Label next, ok;
2498 get_thread(t1);
2500 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2501 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2502 sltu(AT, t2, AT);
2503 beq(AT, R0, next);
2504 delayed()->nop();
2506 stop("assert(top >= start)");
2508 bind(next);
2509 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2510 sltu(AT, AT, t2);
2511 beq(AT, R0, ok);
2512 delayed()->nop();
2514 stop("assert(top <= end)");
2516 bind(ok);
2518 /*
2519 Label next, ok;
2520 Register t1 = rsi;
2521 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
2523 push(t1);
2524 NOT_LP64(push(thread_reg));
2525 NOT_LP64(get_thread(thread_reg));
2527 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
2528 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
2529 jcc(Assembler::aboveEqual, next);
2530 stop("assert(top >= start)");
2531 should_not_reach_here();
2533 bind(next);
2534 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
2535 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
2536 jcc(Assembler::aboveEqual, ok);
2537 stop("assert(top <= end)");
2538 should_not_reach_here();
2540 bind(ok);
2541 NOT_LP64(pop(thread_reg));
2542 pop(t1);
2543 */
2544 }
2545 #endif
2546 }
2547 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2548 Register tmp,
2549 int offset) {
2550 intptr_t value = *delayed_value_addr;
2551 if (value != 0)
2552 return RegisterOrConstant(value + offset);
2553 AddressLiteral a(delayed_value_addr);
2554 // load indirectly to solve generation ordering problem
2555 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2556 //ld(tmp, a);
2557 /* #ifdef ASSERT
2558 { Label L;
2559 testptr(tmp, tmp);
2560 if (WizardMode) {
2561 jcc(Assembler::notZero, L);
2562 char* buf = new char[40];
2563 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
2564 STOP(buf);
2565 } else {
2566 jccb(Assembler::notZero, L);
2567 hlt();
2568 }
2569 bind(L);
2570 }
2571 #endif*/
2572 if (offset != 0)
2573 daddi(tmp,tmp, offset);
2575 return RegisterOrConstant(tmp);
2576 }
2578 void MacroAssembler::hswap(Register reg) {
2579 //andi(reg, reg, 0xffff);
2580 srl(AT, reg, 8);
2581 sll(reg, reg, 24);
2582 sra(reg, reg, 16);
2583 orr(reg, reg, AT);
2584 }
2586 void MacroAssembler::huswap(Register reg) {
2587 #ifdef _LP64
2588 dsrl(AT, reg, 8);
2589 dsll(reg, reg, 24);
2590 dsrl(reg, reg, 16);
2591 orr(reg, reg, AT);
2592 andi(reg, reg, 0xffff);
2593 #else
2594 //andi(reg, reg, 0xffff);
2595 srl(AT, reg, 8);
2596 sll(reg, reg, 24);
2597 srl(reg, reg, 16);
2598 orr(reg, reg, AT);
2599 #endif
2600 }
2602 // something funny to do this will only one more register AT
2603 // by yjl 6/29/2005
2604 void MacroAssembler::swap(Register reg) {
2605 srl(AT, reg, 8);
2606 sll(reg, reg, 24);
2607 orr(reg, reg, AT);
2608 //reg : 4 1 2 3
2609 srl(AT, AT, 16);
2610 xorr(AT, AT, reg);
2611 andi(AT, AT, 0xff);
2612 //AT : 0 0 0 1^3);
2613 xorr(reg, reg, AT);
2614 //reg : 4 1 2 1
2615 sll(AT, AT, 16);
2616 xorr(reg, reg, AT);
2617 //reg : 4 3 2 1
2618 }
2620 #ifdef _LP64
2622 /* do 32-bit CAS using MIPS64 lld/scd
2624 Jin: cas_int should only compare 32-bits of the memory value.
2625 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2626 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2627 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2628 plus the high-32 bits or memory value, are stored togethor with SCD.
2630 Example:
2632 double d = 3.1415926;
2633 System.err.println("hello" + d);
2635 sun.misc.FloatingDecimal$1.<init>()
2636 |
2637 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2639 38 cas_int [a7a7|J] [a0|I] [a6|I]
2640 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2641 // a6: 0x4ab325aa
2643 again:
2644 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2646 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2647 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2648 0x00000055647f3c68: dsll32 t8, t8, 0
2649 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2650 0x00000055647f3c70: sll zero, zero, 0
2652 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2653 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2654 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2655 0x00000055647f3c80: and v1, a6, v1
2656 0x00000055647f3c84: or at, t8, v1
2657 0x00000055647f3c88: scd at, 0x0(a7)
2658 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2659 0x00000055647f3c90: sll zero, zero, 0
2660 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2661 0x00000055647f3c98: sll zero, zero, 0
2662 nequal:
2663 0x00000055647f45a4: dadd a0, t9, zero
2664 0x00000055647f45a8: dadd at, zero, zero
2665 done:
2666 */
2668 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2669 #if 0
2670 Label done, again, nequal;
2671 bind(again);
2673 sync();
2674 lld(AT, dest);
2676 /* T9: 32 bits, sign extended
2677 * V1: low 32 bits, sign unextended
2678 * T8: high 32 bits (may be another variables's space)
2679 */
2680 sll(T9, AT, 0); // Use 32-bit sll to extend bit 31
2681 dsrl32(T8, AT, 0);
2682 dsll32(T8, T8, 0);
2684 bne(T9, c_reg, nequal);
2685 delayed()->nop();
2687 ori(V1, R0, 0xFFFF);
2688 dsll(V1, V1, 16);
2689 ori(V1, V1, 0xFFFF);
2690 andr(V1, x_reg, V1);
2691 orr(AT, T8, V1);
2692 scd(AT, dest);
2693 beq(AT, R0, again);
2694 delayed()->nop();
2695 b(done);
2696 delayed()->nop();
2698 // not xchged
2699 bind(nequal);
2700 move(c_reg, T9);
2701 move(AT, R0);
2703 bind(done);
2704 #else
2706 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
2707 Label done, again, nequal;
2709 bind(again);
2711 sync();
2712 ll(AT, dest);
2713 bne(AT, c_reg, nequal);
2714 delayed()->nop();
2716 move(AT, x_reg);
2717 sc(AT, dest);
2718 beq(AT, R0, again);
2719 delayed()->nop();
2720 b(done);
2721 delayed()->nop();
2723 // not xchged
2724 bind(nequal);
2725 sync();
2726 move(c_reg, AT);
2727 move(AT, R0);
2729 bind(done);
2730 #endif
2731 }
2732 #endif // cmpxchg32
2734 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
2735 Label done, again, nequal;
2737 bind(again);
2738 #ifdef _LP64
2739 sync();
2740 lld(AT, dest);
2741 #else
2742 sync();
2743 ll(AT, dest);
2744 #endif
2745 bne(AT, c_reg, nequal);
2746 delayed()->nop();
2748 move(AT, x_reg);
2749 #ifdef _LP64
2750 scd(AT, dest);
2751 #else
2752 sc(AT, dest);
2753 #endif
2754 beq(AT, R0, again);
2755 delayed()->nop();
2756 b(done);
2757 delayed()->nop();
2759 // not xchged
2760 bind(nequal);
2761 sync();
2762 move(c_reg, AT);
2763 move(AT, R0);
2765 bind(done);
2766 }
2768 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
2769 Label done, again, nequal;
2771 Register x_reg = x_regLo;
2772 dsll32(x_regHi, x_regHi, 0);
2773 dsll32(x_regLo, x_regLo, 0);
2774 dsrl32(x_regLo, x_regLo, 0);
2775 orr(x_reg, x_regLo, x_regHi);
2777 Register c_reg = c_regLo;
2778 dsll32(c_regHi, c_regHi, 0);
2779 dsll32(c_regLo, c_regLo, 0);
2780 dsrl32(c_regLo, c_regLo, 0);
2781 orr(c_reg, c_regLo, c_regHi);
2783 bind(again);
2785 sync();
2786 lld(AT, dest);
2787 bne(AT, c_reg, nequal);
2788 delayed()->nop();
2790 //move(AT, x_reg);
2791 dadd(AT, x_reg, R0);
2792 scd(AT, dest);
2793 beq(AT, R0, again);
2794 delayed()->nop();
2795 b(done);
2796 delayed()->nop();
2798 // not xchged
2799 bind(nequal);
2800 sync();
2801 //move(c_reg, AT);
2802 //move(AT, R0);
2803 dadd(c_reg, AT, R0);
2804 dadd(AT, R0, R0);
2805 bind(done);
2806 }
2808 // be sure the three register is different
2809 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2810 assert_different_registers(tmp, fs, ft);
2811 div_s(tmp, fs, ft);
2812 trunc_l_s(tmp, tmp);
2813 cvt_s_l(tmp, tmp);
2814 mul_s(tmp, tmp, ft);
2815 sub_s(fd, fs, tmp);
2816 }
2818 // be sure the three register is different
2819 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
2820 assert_different_registers(tmp, fs, ft);
2821 div_d(tmp, fs, ft);
2822 trunc_l_d(tmp, tmp);
2823 cvt_d_l(tmp, tmp);
2824 mul_d(tmp, tmp, ft);
2825 sub_d(fd, fs, tmp);
2826 }
2828 // Fast_Lock and Fast_Unlock used by C2
2830 // Because the transitions from emitted code to the runtime
2831 // monitorenter/exit helper stubs are so slow it's critical that
2832 // we inline both the stack-locking fast-path and the inflated fast path.
2833 //
2834 // See also: cmpFastLock and cmpFastUnlock.
2835 //
2836 // What follows is a specialized inline transliteration of the code
2837 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2838 // another option would be to emit TrySlowEnter and TrySlowExit methods
2839 // at startup-time. These methods would accept arguments as
2840 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2841 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2842 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2843 // In practice, however, the # of lock sites is bounded and is usually small.
2844 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2845 // if the processor uses simple bimodal branch predictors keyed by EIP
2846 // Since the helper routines would be called from multiple synchronization
2847 // sites.
2848 //
2849 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2850 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2851 // to those specialized methods. That'd give us a mostly platform-independent
2852 // implementation that the JITs could optimize and inline at their pleasure.
2853 // Done correctly, the only time we'd need to cross to native could would be
2854 // to park() or unpark() threads. We'd also need a few more unsafe operators
2855 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2856 // (b) explicit barriers or fence operations.
2857 //
2858 // TODO:
2859 //
2860 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2861 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2862 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2863 // the lock operators would typically be faster than reifying Self.
2864 //
2865 // * Ideally I'd define the primitives as:
2866 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2867 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2868 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2869 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2870 // Furthermore the register assignments are overconstrained, possibly resulting in
2871 // sub-optimal code near the synchronization site.
2872 //
2873 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2874 // Alternately, use a better sp-proximity test.
2875 //
2876 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2877 // Either one is sufficient to uniquely identify a thread.
2878 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2879 //
2880 // * Intrinsify notify() and notifyAll() for the common cases where the
2881 // object is locked by the calling thread but the waitlist is empty.
2882 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2883 //
2884 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2885 // But beware of excessive branch density on AMD Opterons.
2886 //
2887 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2888 // or failure of the fast-path. If the fast-path fails then we pass
2889 // control to the slow-path, typically in C. In Fast_Lock and
2890 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2891 // will emit a conditional branch immediately after the node.
2892 // So we have branches to branches and lots of ICC.ZF games.
2893 // Instead, it might be better to have C2 pass a "FailureLabel"
2894 // into Fast_Lock and Fast_Unlock. In the case of success, control
2895 // will drop through the node. ICC.ZF is undefined at exit.
2896 // In the case of failure, the node will branch directly to the
2897 // FailureLabel
2900 // obj: object to lock
2901 // box: on-stack box address (displaced header location) - KILLED
2902 // rax,: tmp -- KILLED
2903 // scr: tmp -- KILLED
2904 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
2906 tmpReg = T8;
2907 scrReg = S7;
2909 // Ensure the register assignents are disjoint
2910 guarantee (objReg != boxReg, "") ;
2911 guarantee (objReg != tmpReg, "") ;
2912 guarantee (objReg != scrReg, "") ;
2913 guarantee (boxReg != tmpReg, "") ;
2914 guarantee (boxReg != scrReg, "") ;
2917 block_comment("FastLock");
2918 /*
2919 __ move(AT, 0x0);
2920 return;
2921 */
2922 if (PrintBiasedLockingStatistics) {
2923 push(tmpReg);
2924 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
2925 pop(tmpReg);
2926 }
2928 if (EmitSync & 1) {
2929 // set box->dhw = unused_mark (3)
2930 // Force all sync thru slow-path: slow_enter() and slow_exit()
2931 move (AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
2932 sd(AT, Address(boxReg, 0));
2933 move (AT, (int32_t)0) ; // Eflags.ZF = 0
2934 } else
2935 if (EmitSync & 2) {
2936 Label DONE_LABEL ;
2937 if (UseBiasedLocking) {
2938 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2939 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2940 }
2942 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
2943 ori(tmpReg, tmpReg, 0x1);
2944 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2946 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2947 bne(AT, R0, DONE_LABEL);
2948 delayed()->nop();
2950 // Recursive locking
2951 dsubu(tmpReg, tmpReg, SP);
2952 li(AT, (7 - os::vm_page_size() ));
2953 andr(tmpReg, tmpReg, AT);
2954 sd(tmpReg, Address(boxReg, 0));
2955 bind(DONE_LABEL) ;
2956 } else {
2957 // Possible cases that we'll encounter in fast_lock
2958 // ------------------------------------------------
2959 // * Inflated
2960 // -- unlocked
2961 // -- Locked
2962 // = by self
2963 // = by other
2964 // * biased
2965 // -- by Self
2966 // -- by other
2967 // * neutral
2968 // * stack-locked
2969 // -- by self
2970 // = sp-proximity test hits
2971 // = sp-proximity test generates false-negative
2972 // -- by other
2973 //
2975 Label IsInflated, DONE_LABEL, PopDone ;
2977 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
2978 // order to reduce the number of conditional branches in the most common cases.
2979 // Beware -- there's a subtle invariant that fetch of the markword
2980 // at [FETCH], below, will never observe a biased encoding (*101b).
2981 // If this invariant is not held we risk exclusion (safety) failure.
2982 if (UseBiasedLocking && !UseOptoBiasInlining) {
2983 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
2984 }
2986 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
2987 andi(AT, tmpReg, 0x02); //If AT == 0x02 ==> the object is inflated, will not use the fast lock method.
2988 bne(AT, R0, IsInflated); // Inflated v (Stack-locked or neutral)
2989 delayed()->nop();
2991 // Attempt stack-locking ...
2992 ori (tmpReg, tmpReg, 0x1);
2993 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
2995 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
2997 if (PrintBiasedLockingStatistics) {
2998 Label L;
2999 beq(AT, R0, L);
3000 delayed()->nop();
3001 push(T0);
3002 push(T1);
3003 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
3004 pop(T1);
3005 pop(T0);
3006 bind(L);
3007 }
3008 bne(AT, R0, DONE_LABEL);
3009 delayed()->nop();
3011 // Recursive locking
3012 dsubu(tmpReg, tmpReg, SP);
3013 li(AT, 7 - os::vm_page_size() );
3014 andr(tmpReg, tmpReg, AT);
3015 sd(tmpReg, Address(boxReg, 0));
3016 if (PrintBiasedLockingStatistics) {
3017 Label L;
3018 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
3019 bne(tmpReg, R0, L);
3020 delayed()->nop();
3021 push(T0);
3022 push(T1);
3023 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
3024 pop(T1);
3025 pop(T0);
3026 bind(L);
3027 }
3028 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
3030 b(DONE_LABEL) ;
3031 delayed()->nop();
3033 bind(IsInflated) ;
3035 // TODO: someday avoid the ST-before-CAS penalty by
3036 // relocating (deferring) the following ST.
3037 // We should also think about trying a CAS without having
3038 // fetched _owner. If the CAS is successful we may
3039 // avoid an RTO->RTS upgrade on the $line.
3040 // Without cast to int32_t a movptr will destroy r10 which is typically obj
3041 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3042 sd(AT, Address(boxReg, 0));
3044 move(boxReg, tmpReg) ;
3045 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3046 sltiu(AT, tmpReg, 1); /* Jin: AT = !tmpReg; */
3047 bne(tmpReg, R0, DONE_LABEL);
3048 delayed()->nop();
3050 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
3051 // Intentional fall-through into DONE_LABEL ...
3054 // DONE_LABEL is a hot target - we'd really like to place it at the
3055 // start of cache line by padding with NOPs.
3056 // See the AMD and Intel software optimization manuals for the
3057 // most efficient "long" NOP encodings.
3058 // Unfortunately none of our alignment mechanisms suffice.
3059 bind(DONE_LABEL);
3061 // Avoid branch-to-branch on AMD processors
3062 // This appears to be superstition.
3063 if (EmitSync & 32) nop() ;
3066 // At DONE_LABEL the icc ZFlag is set as follows ...
3067 // Fast_Unlock uses the same protocol.
3068 // ZFlag == 1 -> Success
3069 // ZFlag == 0 -> Failure - force control through the slow-path
3070 }
3071 }
3073 // obj: object to unlock
3074 // box: box address (displaced header location), killed. Must be EAX.
3075 // rbx,: killed tmp; cannot be obj nor box.
3076 //
3077 // Some commentary on balanced locking:
3078 //
3079 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3080 // Methods that don't have provably balanced locking are forced to run in the
3081 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3082 // The interpreter provides two properties:
3083 // I1: At return-time the interpreter automatically and quietly unlocks any
3084 // objects acquired the current activation (frame). Recall that the
3085 // interpreter maintains an on-stack list of locks currently held by
3086 // a frame.
3087 // I2: If a method attempts to unlock an object that is not held by the
3088 // the frame the interpreter throws IMSX.
3089 //
3090 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3091 // B() doesn't have provably balanced locking so it runs in the interpreter.
3092 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3093 // is still locked by A().
3094 //
3095 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3096 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3097 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3098 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3100 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3102 tmpReg = T8;
3104 guarantee (objReg != boxReg, "") ;
3105 guarantee (objReg != tmpReg, "") ;
3106 guarantee (boxReg != tmpReg, "") ;
3110 block_comment("FastUnlock");
3112 /*
3113 move(AT, 0x0);
3114 return;
3115 */
3117 if (EmitSync & 4) {
3118 // Disable - inhibit all inlining. Force control through the slow-path
3119 move(AT, R0);
3120 } else
3121 if (EmitSync & 8) {
3122 Label DONE_LABEL ;
3123 if (UseBiasedLocking) {
3124 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3125 }
3126 // classic stack-locking code ...
3127 ld(tmpReg, Address(boxReg, 0)) ;
3128 beq(tmpReg, R0, DONE_LABEL) ;
3129 move(AT, 0x1); // delay slot
3131 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3132 bind(DONE_LABEL);
3133 } else {
3134 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3136 // Critically, the biased locking test must have precedence over
3137 // and appear before the (box->dhw == 0) recursive stack-lock test.
3138 if (UseBiasedLocking && !UseOptoBiasInlining) {
3139 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3140 }
3142 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3143 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3144 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3145 //move(AT, 0x1);
3146 //delayed()->nop();
3147 delayed()->daddiu(AT, R0, 0x1);
3149 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3150 beq(AT, R0, Stacked) ; // Inflated?
3151 delayed()->nop();
3153 bind(Inflated) ;
3154 // It's inflated.
3155 // Despite our balanced locking property we still check that m->_owner == Self
3156 // as java routines or native JNI code called by this thread might
3157 // have released the lock.
3158 // Refer to the comments in synchronizer.cpp for how we might encode extra
3159 // state in _succ so we can avoid fetching EntryList|cxq.
3160 //
3161 // I'd like to add more cases in fast_lock() and fast_unlock() --
3162 // such as recursive enter and exit -- but we have to be wary of
3163 // I$ bloat, T$ effects and BP$ effects.
3164 //
3165 // If there's no contention try a 1-0 exit. That is, exit without
3166 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3167 // we detect and recover from the race that the 1-0 exit admits.
3168 //
3169 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3170 // before it STs null into _owner, releasing the lock. Updates
3171 // to data protected by the critical section must be visible before
3172 // we drop the lock (and thus before any other thread could acquire
3173 // the lock and observe the fields protected by the lock).
3174 // IA32's memory-model is SPO, so STs are ordered with respect to
3175 // each other and there's no need for an explicit barrier (fence).
3176 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3177 #ifdef OPT_THREAD
3178 move(boxReg, TREG);
3179 #else
3180 get_thread (boxReg) ;
3181 #endif
3183 #ifndef _LP64
3185 // Note that we could employ various encoding schemes to reduce
3186 // the number of loads below (currently 4) to just 2 or 3.
3187 // Refer to the comments in synchronizer.cpp.
3188 // In practice the chain of fetches doesn't seem to impact performance, however.
3189 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3190 // Attempt to reduce branch density - AMD's branch predictor.
3191 ld(AT, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3192 xorr(boxReg, boxReg, AT);
3194 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3195 orr(boxReg, boxReg, AT);
3197 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3198 orr(boxReg, boxReg, AT);
3200 ld(AT, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3201 orr(boxReg, boxReg, AT);
3203 bne(boxReg, R0, DONE_LABEL);
3204 move(AT, R0); /* delay slot */
3206 sw(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3207 b(DONE_LABEL);
3208 move(AT, 0x1); /* delay slot */
3209 } else {
3210 ld(AT, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3211 xorr(boxReg, boxReg, AT);
3213 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3214 orr(boxReg, boxReg, AT);
3216 bne(boxReg, R0, DONE_LABEL);
3217 move(AT, R0); /* delay slot */
3219 ld(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3220 ld(AT, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3221 orr(boxReg, boxReg, AT);
3223 bne(boxReg, R0, CheckSucc);
3224 move(AT, R0); /* delay slot */
3226 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3227 b(DONE_LABEL);
3228 move(AT, 0x1); /* delay slot */
3229 }
3231 // The Following code fragment (EmitSync & 65536) improves the performance of
3232 // contended applications and contended synchronization microbenchmarks.
3233 // Unfortunately the emission of the code - even though not executed - causes regressions
3234 // in scimark and jetstream, evidently because of $ effects. Replacing the code
3235 // with an equal number of never-executed NOPs results in the same regression.
3236 // We leave it off by default.
3238 if ((EmitSync & 65536) != 0) {
3239 Label LSuccess, LGoSlowPath ;
3241 bind(CheckSucc) ;
3243 // Optional pre-test ... it's safe to elide this
3244 if ((EmitSync & 16) == 0) {
3245 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3246 beq(AT, R0, LGoSlowPath);
3247 delayed()->nop();
3248 }
3250 // We have a classic Dekker-style idiom:
3251 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
3252 // There are a number of ways to implement the barrier:
3253 // (1) lock:andl &m->_owner, 0
3254 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3255 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3256 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3257 // (2) If supported, an explicit MFENCE is appealing.
3258 // In older IA32 processors MFENCE is slower than lock:add or xchg
3259 // particularly if the write-buffer is full as might be the case if
3260 // if stores closely precede the fence or fence-equivalent instruction.
3261 // In more modern implementations MFENCE appears faster, however.
3262 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3263 // The $lines underlying the top-of-stack should be in M-state.
3264 // The locked add instruction is serializing, of course.
3265 // (4) Use xchg, which is serializing
3266 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3267 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3268 // The integer condition codes will tell us if succ was 0.
3269 // Since _succ and _owner should reside in the same $line and
3270 // we just stored into _owner, it's likely that the $line
3271 // remains in M-state for the lock:orl.
3272 //
3273 // We currently use (3), although it's likely that switching to (2)
3274 // is correct for the future.
3276 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3278 // Ratify _succ remains non-null
3279 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3280 bne(AT, R0, LSuccess);
3281 delayed()->nop(); /* delay slot */
3282 /*
3283 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3284 masm.jccb (Assembler::notZero, LSuccess) ;
3285 */
3287 move(boxReg, R0) ; // box is really EAX
3289 cmpxchg(SP, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
3290 beq(AT, R0, LSuccess);
3291 delayed()->nop();
3293 // Since we're low on registers we installed rsp as a placeholding in _owner.
3294 // Now install Self over rsp. This is safe as we're transitioning from
3295 // non-null to non=null
3296 get_thread (boxReg) ;
3297 sd(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3298 // Intentional fall-through into LGoSlowPath ...
3300 bind(LGoSlowPath) ;
3301 ori(boxReg, boxReg, 1) ; // set ICC.ZF=0 to indicate failure
3302 b(DONE_LABEL) ;
3303 move(AT, R0) ; /* delay slot */
3305 bind(LSuccess) ;
3306 move(boxReg, R0) ; // set ICC.ZF=1 to indicate success
3307 b(DONE_LABEL) ;
3308 move(AT, 0x1) ; /* delay slot */
3309 }
3311 bind (Stacked) ;
3312 // It's not inflated and it's not recursively stack-locked and it's not biased.
3313 // It must be stack-locked.
3314 // Try to reset the header to displaced header.
3315 // The "box" value on the stack is stable, so we can reload
3316 // and be assured we observe the same value as above.
3317 ld(tmpReg, Address(boxReg, 0)) ;
3319 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3320 // Intention fall-thru into DONE_LABEL
3323 // DONE_LABEL is a hot target - we'd really like to place it at the
3324 // start of cache line by padding with NOPs.
3325 // See the AMD and Intel software optimization manuals for the
3326 // most efficient "long" NOP encodings.
3327 // Unfortunately none of our alignment mechanisms suffice.
3328 if ((EmitSync & 65536) == 0) {
3329 bind (CheckSucc) ;
3330 }
3331 #else // _LP64
3332 // It's inflated
3333 ld(AT, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3334 xorr(boxReg, boxReg, AT);
3336 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3337 orr(boxReg, boxReg, AT);
3339 move(AT, R0);
3340 bne(boxReg, R0, DONE_LABEL);
3341 delayed()->nop();
3343 ld(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3344 ld(AT, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3345 orr(boxReg, boxReg, AT);
3347 move(AT, R0);
3348 bne(boxReg, R0, CheckSucc);
3349 delayed()->nop();
3351 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3352 move(AT, 0x1);
3353 b(DONE_LABEL);
3354 delayed()->nop();
3357 if ((EmitSync & 65536) == 0) {
3358 Label LSuccess, LGoSlowPath ;
3359 bind (CheckSucc);
3360 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3361 beq(AT, R0, LGoSlowPath);
3362 delayed()->nop();
3364 // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3365 // the explicit ST;MEMBAR combination, but masm doesn't currently support
3366 // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc
3367 // are all faster when the write buffer is populated.
3368 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3369 if (os::is_MP()) {
3370 // lock ();
3371 //addl (Address(rsp, 0), 0); //?
3372 }
3373 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3374 bne(AT, R0, LSuccess);
3375 delayed()->nop();
3377 move(boxReg, R0) ; // box is really EAX
3378 //if (os::is_MP()) { lock(); }
3379 cmpxchg(SP, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
3380 beq(AT, R0, LSuccess);
3381 delayed()->nop();
3382 // Intentional fall-through into slow-path
3384 bind (LGoSlowPath);
3385 ori(boxReg, boxReg, 1) ; // set ICC.ZF=0 to indicate failure
3386 move(AT, R0);
3387 b(DONE_LABEL) ;
3388 delayed()->nop();
3391 bind (LSuccess);
3392 move(boxReg, R0) ; // set ICC.ZF=1 to indicate success
3393 move(AT, 0x1) ;
3394 b(DONE_LABEL) ;
3395 delayed()->nop();
3396 }
3398 bind (Stacked);
3399 ld(tmpReg, Address(boxReg, 0)) ;
3400 //if (os::is_MP()) { lock(); }
3401 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3403 if (EmitSync & 65536) {
3404 bind (CheckSucc);
3405 }
3406 #endif
3408 bind(DONE_LABEL);
3410 // Avoid branch to branch on AMD processors
3411 if (EmitSync & 32768) { nop() ; }
3412 }
3413 }
3415 class ControlWord {
3416 public:
3417 int32_t _value;
3419 int rounding_control() const { return (_value >> 10) & 3 ; }
3420 int precision_control() const { return (_value >> 8) & 3 ; }
3421 bool precision() const { return ((_value >> 5) & 1) != 0; }
3422 bool underflow() const { return ((_value >> 4) & 1) != 0; }
3423 bool overflow() const { return ((_value >> 3) & 1) != 0; }
3424 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
3425 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
3426 bool invalid() const { return ((_value >> 0) & 1) != 0; }
3428 void print() const {
3429 // rounding control
3430 const char* rc;
3431 switch (rounding_control()) {
3432 case 0: rc = "round near"; break;
3433 case 1: rc = "round down"; break;
3434 case 2: rc = "round up "; break;
3435 case 3: rc = "chop "; break;
3436 };
3437 // precision control
3438 const char* pc;
3439 switch (precision_control()) {
3440 case 0: pc = "24 bits "; break;
3441 case 1: pc = "reserved"; break;
3442 case 2: pc = "53 bits "; break;
3443 case 3: pc = "64 bits "; break;
3444 };
3445 // flags
3446 char f[9];
3447 f[0] = ' ';
3448 f[1] = ' ';
3449 f[2] = (precision ()) ? 'P' : 'p';
3450 f[3] = (underflow ()) ? 'U' : 'u';
3451 f[4] = (overflow ()) ? 'O' : 'o';
3452 f[5] = (zero_divide ()) ? 'Z' : 'z';
3453 f[6] = (denormalized()) ? 'D' : 'd';
3454 f[7] = (invalid ()) ? 'I' : 'i';
3455 f[8] = '\x0';
3456 // output
3457 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
3458 }
3460 };
3462 class StatusWord {
3463 public:
3464 int32_t _value;
3466 bool busy() const { return ((_value >> 15) & 1) != 0; }
3467 bool C3() const { return ((_value >> 14) & 1) != 0; }
3468 bool C2() const { return ((_value >> 10) & 1) != 0; }
3469 bool C1() const { return ((_value >> 9) & 1) != 0; }
3470 bool C0() const { return ((_value >> 8) & 1) != 0; }
3471 int top() const { return (_value >> 11) & 7 ; }
3472 bool error_status() const { return ((_value >> 7) & 1) != 0; }
3473 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
3474 bool precision() const { return ((_value >> 5) & 1) != 0; }
3475 bool underflow() const { return ((_value >> 4) & 1) != 0; }
3476 bool overflow() const { return ((_value >> 3) & 1) != 0; }
3477 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
3478 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
3479 bool invalid() const { return ((_value >> 0) & 1) != 0; }
3481 void print() const {
3482 // condition codes
3483 char c[5];
3484 c[0] = (C3()) ? '3' : '-';
3485 c[1] = (C2()) ? '2' : '-';
3486 c[2] = (C1()) ? '1' : '-';
3487 c[3] = (C0()) ? '0' : '-';
3488 c[4] = '\x0';
3489 // flags
3490 char f[9];
3491 f[0] = (error_status()) ? 'E' : '-';
3492 f[1] = (stack_fault ()) ? 'S' : '-';
3493 f[2] = (precision ()) ? 'P' : '-';
3494 f[3] = (underflow ()) ? 'U' : '-';
3495 f[4] = (overflow ()) ? 'O' : '-';
3496 f[5] = (zero_divide ()) ? 'Z' : '-';
3497 f[6] = (denormalized()) ? 'D' : '-';
3498 f[7] = (invalid ()) ? 'I' : '-';
3499 f[8] = '\x0';
3500 // output
3501 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
3502 }
3504 };
3506 class TagWord {
3507 public:
3508 int32_t _value;
3510 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
3512 void print() const {
3513 printf("%04x", _value & 0xFFFF);
3514 }
3516 };
3518 class FPU_Register {
3519 public:
3520 int32_t _m0;
3521 int32_t _m1;
3522 int16_t _ex;
3524 bool is_indefinite() const {
3525 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
3526 }
3528 void print() const {
3529 char sign = (_ex < 0) ? '-' : '+';
3530 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
3531 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
3532 };
3534 };
3536 class FPU_State {
3537 public:
3538 enum {
3539 register_size = 10,
3540 number_of_registers = 8,
3541 register_mask = 7
3542 };
3544 ControlWord _control_word;
3545 StatusWord _status_word;
3546 TagWord _tag_word;
3547 int32_t _error_offset;
3548 int32_t _error_selector;
3549 int32_t _data_offset;
3550 int32_t _data_selector;
3551 int8_t _register[register_size * number_of_registers];
3553 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
3554 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
3556 const char* tag_as_string(int tag) const {
3557 switch (tag) {
3558 case 0: return "valid";
3559 case 1: return "zero";
3560 case 2: return "special";
3561 case 3: return "empty";
3562 }
3563 ShouldNotReachHere();
3564 return NULL;
3565 }
3567 void print() const {
3568 // print computation registers
3569 { int t = _status_word.top();
3570 for (int i = 0; i < number_of_registers; i++) {
3571 int j = (i - t) & register_mask;
3572 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
3573 st(j)->print();
3574 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
3575 }
3576 }
3577 printf("\n");
3578 // print control registers
3579 printf("ctrl = "); _control_word.print(); printf("\n");
3580 printf("stat = "); _status_word .print(); printf("\n");
3581 printf("tags = "); _tag_word .print(); printf("\n");
3582 }
3584 };
3586 class Flag_Register {
3587 public:
3588 int32_t _value;
3590 bool overflow() const { return ((_value >> 11) & 1) != 0; }
3591 bool direction() const { return ((_value >> 10) & 1) != 0; }
3592 bool sign() const { return ((_value >> 7) & 1) != 0; }
3593 bool zero() const { return ((_value >> 6) & 1) != 0; }
3594 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
3595 bool parity() const { return ((_value >> 2) & 1) != 0; }
3596 bool carry() const { return ((_value >> 0) & 1) != 0; }
3598 void print() const {
3599 // flags
3600 char f[8];
3601 f[0] = (overflow ()) ? 'O' : '-';
3602 f[1] = (direction ()) ? 'D' : '-';
3603 f[2] = (sign ()) ? 'S' : '-';
3604 f[3] = (zero ()) ? 'Z' : '-';
3605 f[4] = (auxiliary_carry()) ? 'A' : '-';
3606 f[5] = (parity ()) ? 'P' : '-';
3607 f[6] = (carry ()) ? 'C' : '-';
3608 f[7] = '\x0';
3609 // output
3610 printf("%08x flags = %s", _value, f);
3611 }
3613 };
3615 class IU_Register {
3616 public:
3617 int32_t _value;
3619 void print() const {
3620 printf("%08x %11d", _value, _value);
3621 }
3623 };
3625 class IU_State {
3626 public:
3627 Flag_Register _eflags;
3628 IU_Register _rdi;
3629 IU_Register _rsi;
3630 IU_Register _rbp;
3631 IU_Register _rsp;
3632 IU_Register _rbx;
3633 IU_Register _rdx;
3634 IU_Register _rcx;
3635 IU_Register _rax;
3637 void print() const {
3638 // computation registers
3639 printf("rax, = "); _rax.print(); printf("\n");
3640 printf("rbx, = "); _rbx.print(); printf("\n");
3641 printf("rcx = "); _rcx.print(); printf("\n");
3642 printf("rdx = "); _rdx.print(); printf("\n");
3643 printf("rdi = "); _rdi.print(); printf("\n");
3644 printf("rsi = "); _rsi.print(); printf("\n");
3645 printf("rbp, = "); _rbp.print(); printf("\n");
3646 printf("rsp = "); _rsp.print(); printf("\n");
3647 printf("\n");
3648 // control registers
3649 printf("flgs = "); _eflags.print(); printf("\n");
3650 }
3651 };
3654 class CPU_State {
3655 public:
3656 FPU_State _fpu_state;
3657 IU_State _iu_state;
3659 void print() const {
3660 printf("--------------------------------------------------\n");
3661 _iu_state .print();
3662 printf("\n");
3663 _fpu_state.print();
3664 printf("--------------------------------------------------\n");
3665 }
3667 };
3670 /*
3671 static void _print_CPU_state(CPU_State* state) {
3672 state->print();
3673 };
3675 void MacroAssembler::print_CPU_state() {
3676 push_CPU_state();
3677 push(rsp); // pass CPU state
3678 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
3679 addptr(rsp, wordSize); // discard argument
3680 pop_CPU_state();
3681 }
3682 */
3684 void MacroAssembler::align(int modulus) {
3685 while (offset() % modulus != 0) nop();
3686 }
3688 #if 0
3689 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
3690 static int counter = 0;
3691 FPU_State* fs = &state->_fpu_state;
3692 counter++;
3693 // For leaf calls, only verify that the top few elements remain empty.
3694 // We only need 1 empty at the top for C2 code.
3695 if( stack_depth < 0 ) {
3696 if( fs->tag_for_st(7) != 3 ) {
3697 printf("FPR7 not empty\n");
3698 state->print();
3699 assert(false, "error");
3700 return false;
3701 }
3702 return true; // All other stack states do not matter
3703 }
3705 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
3706 "bad FPU control word");
3708 // compute stack depth
3709 int i = 0;
3710 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
3711 int d = i;
3712 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
3713 // verify findings
3714 if (i != FPU_State::number_of_registers) {
3715 // stack not contiguous
3716 printf("%s: stack not contiguous at ST%d\n", s, i);
3717 state->print();
3718 assert(false, "error");
3719 return false;
3720 }
3721 // check if computed stack depth corresponds to expected stack depth
3722 if (stack_depth < 0) {
3723 // expected stack depth is -stack_depth or less
3724 if (d > -stack_depth) {
3725 // too many elements on the stack
3726 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
3727 state->print();
3728 assert(false, "error");
3729 return false;
3730 }
3731 } else {
3732 // expected stack depth is stack_depth
3733 if (d != stack_depth) {
3734 // wrong stack depth
3735 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
3736 state->print();
3737 assert(false, "error");
3738 return false;
3739 }
3740 }
3741 // everything is cool
3742 return true;
3743 }
3744 #endif
3747 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3748 //FIXME aoqi
3749 // %%%%% need to implement this
3750 //Unimplemented();
3751 /*
3752 if (!VerifyFPU) return;
3753 push_CPU_state();
3754 push(rsp); // pass CPU state
3755 ExternalAddress msg((address) s);
3756 // pass message string s
3757 pushptr(msg.addr());
3758 push(stack_depth); // pass stack depth
3759 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
3760 addptr(rsp, 3 * wordSize); // discard arguments
3761 // check for error
3762 { Label L;
3763 testl(rax, rax);
3764 jcc(Assembler::notZero, L);
3765 int3(); // break if error condition
3766 bind(L);
3767 }
3768 pop_CPU_state();
3769 */
3770 }
3772 #ifdef _LP64
3773 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3775 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3776 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3777 #else
3778 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3780 Register caller_saved_fpu_registers[] = {};
3781 #endif
3783 //We preserve all caller-saved register
3784 void MacroAssembler::pushad(){
3785 int i;
3787 /* Fixed-point registers */
3788 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3789 daddi(SP, SP, -1 * len * wordSize);
3790 for (i = 0; i < len; i++)
3791 {
3792 #ifdef _LP64
3793 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3794 #else
3795 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3796 #endif
3797 }
3799 /* Floating-point registers */
3800 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3801 daddi(SP, SP, -1 * len * wordSize);
3802 for (i = 0; i < len; i++)
3803 {
3804 #ifdef _LP64
3805 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3806 #else
3807 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3808 #endif
3809 }
3810 };
3812 void MacroAssembler::popad(){
3813 int i;
3815 /* Floating-point registers */
3816 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3817 for (i = 0; i < len; i++)
3818 {
3819 #ifdef _LP64
3820 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3821 #else
3822 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3823 #endif
3824 }
3825 daddi(SP, SP, len * wordSize);
3827 /* Fixed-point registers */
3828 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3829 for (i = 0; i < len; i++)
3830 {
3831 #ifdef _LP64
3832 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3833 #else
3834 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3835 #endif
3836 }
3837 daddi(SP, SP, len * wordSize);
3838 };
3840 void MacroAssembler::push2(Register reg1, Register reg2) {
3841 #ifdef _LP64
3842 daddi(SP, SP, -16);
3843 sd(reg2, SP, 0);
3844 sd(reg1, SP, 8);
3845 #else
3846 addi(SP, SP, -8);
3847 sw(reg2, SP, 0);
3848 sw(reg1, SP, 4);
3849 #endif
3850 }
3852 void MacroAssembler::pop2(Register reg1, Register reg2) {
3853 #ifdef _LP64
3854 ld(reg1, SP, 0);
3855 ld(reg2, SP, 8);
3856 daddi(SP, SP, 16);
3857 #else
3858 lw(reg1, SP, 0);
3859 lw(reg2, SP, 4);
3860 addi(SP, SP, 8);
3861 #endif
3862 }
3864 //for UseCompressedOops Option
3865 void MacroAssembler::load_klass(Register dst, Register src) {
3866 #ifdef _LP64
3867 if(UseCompressedClassPointers){
3868 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3869 decode_klass_not_null(dst);
3870 } else
3871 #endif
3872 ld(dst, src, oopDesc::klass_offset_in_bytes());
3873 }
3875 void MacroAssembler::store_klass(Register dst, Register src) {
3876 #ifdef _LP64
3877 if(UseCompressedClassPointers){
3878 encode_klass_not_null(src);
3879 sw(src, dst, oopDesc::klass_offset_in_bytes());
3880 } else {
3881 #endif
3882 sd(src, dst, oopDesc::klass_offset_in_bytes());
3883 }
3884 }
3886 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3887 load_klass(dst, src);
3888 ld(dst, Address(dst, Klass::prototype_header_offset()));
3889 }
3891 #ifdef _LP64
3892 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3893 if (UseCompressedClassPointers) {
3894 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3895 }
3896 }
3898 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3899 if(UseCompressedOops){
3900 lwu(dst, src);
3901 decode_heap_oop(dst);
3902 } else{
3903 ld(dst, src);
3904 }
3905 }
3907 void MacroAssembler::store_heap_oop(Address dst, Register src){
3908 if(UseCompressedOops){
3909 assert(!dst.uses(src), "not enough registers");
3910 encode_heap_oop(src);
3911 sw(src, dst);
3912 } else{
3913 sd(src, dst);
3914 }
3915 }
3917 #ifdef ASSERT
3918 void MacroAssembler::verify_heapbase(const char* msg) {
3919 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3920 assert (Universe::heap() != NULL, "java heap should be initialized");
3921 /* if (CheckCompressedOops) {
3922 Label ok;
3923 push(rscratch1); // cmpptr trashes rscratch1
3924 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
3925 jcc(Assembler::equal, ok);
3926 STOP(msg);
3927 bind(ok);
3928 pop(rscratch1);
3929 }*/
3930 }
3931 #endif
3934 // Algorithm must match oop.inline.hpp encode_heap_oop.
3935 void MacroAssembler::encode_heap_oop(Register r) {
3936 #ifdef ASSERT
3937 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
3938 #endif
3939 verify_oop(r, "broken oop in encode_heap_oop");
3940 if (Universe::narrow_oop_base() == NULL) {
3941 if (Universe::narrow_oop_shift() != 0) {
3942 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3943 shr(r, LogMinObjAlignmentInBytes);
3944 }
3945 return;
3946 }
3948 Label done;
3949 beq(r, R0, done);
3950 delayed()->nop();
3951 dsub(r, r, S5_heapbase);
3952 shr(r, LogMinObjAlignmentInBytes);
3953 bind(done);
3954 }
3956 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3957 assert (UseCompressedOops, "should be compressed");
3958 #ifdef ASSERT
3959 if (CheckCompressedOops) {
3960 Label ok;
3961 bne(r, R0, ok);
3962 delayed()->nop();
3963 stop("null oop passed to encode_heap_oop_not_null");
3964 bind(ok);
3965 }
3966 #endif
3967 verify_oop(r, "broken oop in encode_heap_oop_not_null");
3968 if (Universe::narrow_oop_base() != NULL) {
3969 dsub(r, r, S5_heapbase);
3970 }
3971 if (Universe::narrow_oop_shift() != 0) {
3972 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3973 shr(r, LogMinObjAlignmentInBytes);
3974 }
3976 }
3978 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3979 assert (UseCompressedOops, "should be compressed");
3980 #ifdef ASSERT
3981 if (CheckCompressedOops) {
3982 Label ok;
3983 bne(src, R0, ok);
3984 delayed()->nop();
3985 stop("null oop passed to encode_heap_oop_not_null2");
3986 bind(ok);
3987 }
3988 #endif
3989 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
3990 if (dst != src) {
3991 move(dst, src);
3992 }
3994 if (Universe::narrow_oop_base() != NULL) {
3995 dsub(dst, dst, S5_heapbase);
3996 }
3997 if (Universe::narrow_oop_shift() != 0) {
3998 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3999 shr(dst, LogMinObjAlignmentInBytes);
4000 }
4002 }
4004 void MacroAssembler::decode_heap_oop(Register r) {
4005 #ifdef ASSERT
4006 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
4007 #endif
4008 if (Universe::narrow_oop_base() == NULL) {
4009 if (Universe::narrow_oop_shift() != 0) {
4010 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4011 shl(r, LogMinObjAlignmentInBytes);
4012 }
4013 } else {
4014 Label done;
4015 shl(r, LogMinObjAlignmentInBytes);
4016 beq(r, R0, done);
4017 delayed()->nop();
4018 dadd(r, r, S5_heapbase);
4019 bind(done);
4020 }
4021 verify_oop(r, "broken oop in decode_heap_oop");
4022 }
4024 void MacroAssembler::decode_heap_oop_not_null(Register r) {
4025 // Note: it will change flags
4026 assert (UseCompressedOops, "should only be used for compressed headers");
4027 assert (Universe::heap() != NULL, "java heap should be initialized");
4028 // Cannot assert, unverified entry point counts instructions (see .ad file)
4029 // vtableStubs also counts instructions in pd_code_size_limit.
4030 // Also do not verify_oop as this is called by verify_oop.
4031 if (Universe::narrow_oop_shift() != 0) {
4032 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4033 shl(r, LogMinObjAlignmentInBytes);
4034 if (Universe::narrow_oop_base() != NULL) {
4035 dadd(r, r, S5_heapbase);
4036 }
4037 } else {
4038 assert (Universe::narrow_oop_base() == NULL, "sanity");
4039 }
4040 }
4042 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
4043 assert (UseCompressedOops, "should only be used for compressed headers");
4044 assert (Universe::heap() != NULL, "java heap should be initialized");
4046 // Cannot assert, unverified entry point counts instructions (see .ad file)
4047 // vtableStubs also counts instructions in pd_code_size_limit.
4048 // Also do not verify_oop as this is called by verify_oop.
4049 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
4050 if (Universe::narrow_oop_shift() != 0) {
4051 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4052 if (LogMinObjAlignmentInBytes == Address::times_8) {
4053 dsll(dst, src, LogMinObjAlignmentInBytes);
4054 dadd(dst, dst, S5_heapbase);
4055 } else {
4056 if (dst != src) {
4057 move(dst, src);
4058 }
4059 shl(dst, LogMinObjAlignmentInBytes);
4060 if (Universe::narrow_oop_base() != NULL) {
4061 dadd(dst, dst, S5_heapbase);
4062 }
4063 }
4064 } else {
4065 assert (Universe::narrow_oop_base() == NULL, "sanity");
4066 if (dst != src) {
4067 move(dst, src);
4068 }
4069 }
4070 }
4072 void MacroAssembler::encode_klass_not_null(Register r) {
4073 if (Universe::narrow_klass_base() != NULL) {
4074 // Use r12 as a scratch register in which to temporarily load the narrow_klass_base.
4075 assert(r != S5_heapbase, "Encoding a klass in r12");
4076 li48(S5_heapbase, (int64_t)Universe::narrow_klass_base());
4077 dsub(r, r, S5_heapbase);
4078 }
4079 if (Universe::narrow_klass_shift() != 0) {
4080 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4081 shr(r, LogKlassAlignmentInBytes);
4082 }
4083 if (Universe::narrow_klass_base() != NULL) {
4084 reinit_heapbase();
4085 }
4086 }
4088 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
4089 if (dst == src) {
4090 encode_klass_not_null(src);
4091 } else {
4092 if (Universe::narrow_klass_base() != NULL) {
4093 li48(dst, (int64_t)Universe::narrow_klass_base());
4094 dsub(dst, src, dst);
4095 } else {
4096 move(dst, src);
4097 }
4098 if (Universe::narrow_klass_shift() != 0) {
4099 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4100 shr(dst, LogKlassAlignmentInBytes);
4101 }
4102 }
4103 }
4105 // Function instr_size_for_decode_klass_not_null() counts the instructions
4106 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
4107 // when (Universe::heap() != NULL). Hence, if the instructions they
4108 // generate change, then this method needs to be updated.
4109 int MacroAssembler::instr_size_for_decode_klass_not_null() {
4110 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
4111 if (Universe::narrow_klass_base() != NULL) {
4112 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
4113 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
4114 } else {
4115 // longest load decode klass function, mov64, leaq
4116 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
4117 }
4118 }
4120 void MacroAssembler::decode_klass_not_null(Register r) {
4121 // Note: it will change flags
4122 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4123 assert(r != S5_heapbase, "Decoding a klass in r12");
4124 // Cannot assert, unverified entry point counts instructions (see .ad file)
4125 // vtableStubs also counts instructions in pd_code_size_limit.
4126 // Also do not verify_oop as this is called by verify_oop.
4127 if (Universe::narrow_klass_shift() != 0) {
4128 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4129 shl(r, LogKlassAlignmentInBytes);
4130 }
4131 if (Universe::narrow_klass_base() != NULL) {
4132 li48(S5_heapbase, (int64_t)Universe::narrow_klass_base());
4133 dadd(r, r, S5_heapbase);
4134 reinit_heapbase();
4135 }
4136 }
4138 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4139 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4141 if (dst == src) {
4142 decode_klass_not_null(dst);
4143 } else {
4144 // Cannot assert, unverified entry point counts instructions (see .ad file)
4145 // vtableStubs also counts instructions in pd_code_size_limit.
4146 // Also do not verify_oop as this is called by verify_oop.
4147 li48(S5_heapbase, (int64_t)Universe::narrow_klass_base());
4148 if (Universe::narrow_klass_shift() != 0) {
4149 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4150 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
4151 dsll(dst, src, Address::times_8);
4152 dadd(dst, dst, S5_heapbase);
4153 } else {
4154 dadd(dst, src, S5_heapbase);
4155 }
4156 reinit_heapbase();
4157 }
4158 }
4160 /*
4161 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
4162 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
4163 int oop_index = oop_recorder()->find_index(obj);
4164 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4165 mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
4166 }
4167 */
4169 void MacroAssembler::incrementl(Register reg, int value) {
4170 if (value == min_jint) {
4171 move(AT, value);
4172 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
4173 return;
4174 }
4175 if (value < 0) { decrementl(reg, -value); return; }
4176 if (value == 0) { ; return; }
4178 if(Assembler::is_simm16(value)) {
4179 NOT_LP64(addiu(reg, reg, value));
4180 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
4181 } else {
4182 move(AT, value);
4183 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
4184 }
4185 }
4187 void MacroAssembler::decrementl(Register reg, int value) {
4188 if (value == min_jint) {
4189 move(AT, value);
4190 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
4191 return;
4192 }
4193 if (value < 0) { incrementl(reg, -value); return; }
4194 if (value == 0) { ; return; }
4196 if(Assembler::is_simm16(value)) {
4197 NOT_LP64(addiu(reg, reg, -value));
4198 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
4199 } else {
4200 move(AT, value);
4201 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
4202 }
4203 }
4205 void MacroAssembler::reinit_heapbase() {
4206 if (UseCompressedOops || UseCompressedClassPointers) {
4207 if (Universe::heap() != NULL) {
4208 if (Universe::narrow_oop_base() == NULL) {
4209 move(S5_heapbase, R0);
4210 } else {
4211 li48(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
4212 }
4213 } else {
4214 li48(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
4215 ld(S5_heapbase, S5_heapbase, 0);
4216 }
4217 }
4218 }
4219 #endif // _LP64
4221 void MacroAssembler::check_klass_subtype(Register sub_klass,
4222 Register super_klass,
4223 Register temp_reg,
4224 Label& L_success) {
4225 //implement ind gen_subtype_check
4226 Label L_failure;
4227 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
4228 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
4229 bind(L_failure);
4230 }
4232 SkipIfEqual::SkipIfEqual(
4233 MacroAssembler* masm, const bool* flag_addr, bool value) {
4234 _masm = masm;
4235 _masm->li(AT, (address)flag_addr);
4236 _masm->lb(AT,AT,0);
4237 _masm->addi(AT,AT,-value);
4238 _masm->beq(AT,R0,_label);
4239 _masm->delayed()->nop();
4240 }
4241 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
4242 Register super_klass,
4243 Register temp_reg,
4244 Label* L_success,
4245 Label* L_failure,
4246 Label* L_slow_path,
4247 RegisterOrConstant super_check_offset) {
4248 assert_different_registers(sub_klass, super_klass, temp_reg);
4249 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
4250 if (super_check_offset.is_register()) {
4251 assert_different_registers(sub_klass, super_klass,
4252 super_check_offset.as_register());
4253 } else if (must_load_sco) {
4254 assert(temp_reg != noreg, "supply either a temp or a register offset");
4255 }
4257 Label L_fallthrough;
4258 int label_nulls = 0;
4259 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
4260 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
4261 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
4262 assert(label_nulls <= 1, "at most one NULL in the batch");
4264 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
4265 int sco_offset = in_bytes(Klass::super_check_offset_offset());
4266 // If the pointers are equal, we are done (e.g., String[] elements).
4267 // This self-check enables sharing of secondary supertype arrays among
4268 // non-primary types such as array-of-interface. Otherwise, each such
4269 // type would need its own customized SSA.
4270 // We move this check to the front of the fast path because many
4271 // type checks are in fact trivially successful in this manner,
4272 // so we get a nicely predicted branch right at the start of the check.
4273 //cmpptr(sub_klass, super_klass);
4274 //local_jcc(Assembler::equal, *L_success);
4275 beq(sub_klass, super_klass, *L_success);
4276 delayed()->nop();
4277 // Check the supertype display:
4278 if (must_load_sco) {
4279 // Positive movl does right thing on LP64.
4280 lwu(temp_reg, super_klass, sco_offset);
4281 super_check_offset = RegisterOrConstant(temp_reg);
4282 }
4283 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
4284 daddu(AT, sub_klass, AT);
4285 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
4287 // This check has worked decisively for primary supers.
4288 // Secondary supers are sought in the super_cache ('super_cache_addr').
4289 // (Secondary supers are interfaces and very deeply nested subtypes.)
4290 // This works in the same check above because of a tricky aliasing
4291 // between the super_cache and the primary super display elements.
4292 // (The 'super_check_addr' can address either, as the case requires.)
4293 // Note that the cache is updated below if it does not help us find
4294 // what we need immediately.
4295 // So if it was a primary super, we can just fail immediately.
4296 // Otherwise, it's the slow path for us (no success at this point).
4298 if (super_check_offset.is_register()) {
4299 beq(super_klass, AT, *L_success);
4300 delayed()->nop();
4301 addi(AT, super_check_offset.as_register(), -sc_offset);
4302 if (L_failure == &L_fallthrough) {
4303 beq(AT, R0, *L_slow_path);
4304 delayed()->nop();
4305 } else {
4306 bne(AT, R0, *L_failure);
4307 delayed()->nop();
4308 b(*L_slow_path);
4309 delayed()->nop();
4310 }
4311 } else if (super_check_offset.as_constant() == sc_offset) {
4312 // Need a slow path; fast failure is impossible.
4313 if (L_slow_path == &L_fallthrough) {
4314 beq(super_klass, AT, *L_success);
4315 delayed()->nop();
4316 } else {
4317 bne(super_klass, AT, *L_slow_path);
4318 delayed()->nop();
4319 b(*L_success);
4320 delayed()->nop();
4321 }
4322 } else {
4323 // No slow path; it's a fast decision.
4324 if (L_failure == &L_fallthrough) {
4325 beq(super_klass, AT, *L_success);
4326 delayed()->nop();
4327 } else {
4328 bne(super_klass, AT, *L_failure);
4329 delayed()->nop();
4330 b(*L_success);
4331 delayed()->nop();
4332 }
4333 }
4335 bind(L_fallthrough);
4337 }
4340 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
4341 Register super_klass,
4342 Register temp_reg,
4343 Register temp2_reg,
4344 Label* L_success,
4345 Label* L_failure,
4346 bool set_cond_codes) {
4347 assert_different_registers(sub_klass, super_klass, temp_reg);
4348 if (temp2_reg != noreg)
4349 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
4350 else
4351 temp2_reg = T9;
4352 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
4354 Label L_fallthrough;
4355 int label_nulls = 0;
4356 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
4357 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
4358 assert(label_nulls <= 1, "at most one NULL in the batch");
4360 // a couple of useful fields in sub_klass:
4361 int ss_offset = in_bytes(Klass::secondary_supers_offset());
4362 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
4363 Address secondary_supers_addr(sub_klass, ss_offset);
4364 Address super_cache_addr( sub_klass, sc_offset);
4366 // Do a linear scan of the secondary super-klass chain.
4367 // This code is rarely used, so simplicity is a virtue here.
4368 // The repne_scan instruction uses fixed registers, which we must spill.
4369 // Don't worry too much about pre-existing connections with the input regs.
4371 #if 0
4372 assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super)
4373 assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter)
4374 #endif
4376 // Get super_klass value into rax (even if it was in rdi or rcx).
4377 /*
4378 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
4379 if (super_klass != rax || UseCompressedOops) {
4380 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
4381 mov(rax, super_klass);
4382 }
4383 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
4384 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
4385 */
4386 #ifndef PRODUCT
4387 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
4388 ExternalAddress pst_counter_addr((address) pst_counter);
4389 NOT_LP64( incrementl(pst_counter_addr) );
4390 //LP64_ONLY( lea(rcx, pst_counter_addr) );
4391 //LP64_ONLY( incrementl(Address(rcx, 0)) );
4392 #endif //PRODUCT
4394 // We will consult the secondary-super array.
4395 ld(temp_reg, secondary_supers_addr);
4396 // Load the array length. (Positive movl does right thing on LP64.)
4397 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
4398 // Skip to start of data.
4399 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
4401 // Scan RCX words at [RDI] for an occurrence of RAX.
4402 // Set NZ/Z based on last compare.
4403 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
4404 // not change flags (only scas instruction which is repeated sets flags).
4405 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
4407 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
4408 Label Loop, subtype;
4409 bind(Loop);
4410 beq(temp2_reg, R0, *L_failure);
4411 delayed()->nop();
4412 ld(AT, temp_reg, 0);
4413 beq(AT, super_klass, subtype);
4414 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
4415 b(Loop);
4416 delayed()->daddi(temp2_reg, temp2_reg, -1);
4418 bind(subtype);
4419 sd(super_klass, super_cache_addr);
4420 if (L_success != &L_fallthrough) {
4421 b(*L_success);
4422 delayed()->nop();
4423 }
4425 /*
4426 if (set_cond_codes) {
4427 // Special hack for the AD files: rdi is guaranteed non-zero.
4428 assert(!pushed_rdi, "rdi must be left non-NULL");
4429 // Also, the condition codes are properly set Z/NZ on succeed/failure.
4430 }
4431 */
4432 // Success. Cache the super we found and proceed in triumph.
4433 #undef IS_A_TEMP
4435 bind(L_fallthrough);
4436 }
4437 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
4438 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
4439 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
4440 verify_oop(oop_result, "broken oop in call_VM_base");
4441 }
4443 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
4444 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
4445 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
4446 }
4448 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
4449 int extra_slot_offset) {
4450 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4451 int stackElementSize = Interpreter::stackElementSize;
4452 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4453 #ifdef ASSERT
4454 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4455 assert(offset1 - offset == stackElementSize, "correct arithmetic");
4456 #endif
4457 Register scale_reg = NOREG;
4458 Address::ScaleFactor scale_factor = Address::no_scale;
4459 if (arg_slot.is_constant()) {
4460 offset += arg_slot.as_constant() * stackElementSize;
4461 } else {
4462 scale_reg = arg_slot.as_register();
4463 scale_factor = Address::times_8;
4464 }
4465 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
4466 // offset += wordSize; // return PC is on stack
4467 if(scale_reg==NOREG) return Address(SP, offset);
4468 else {
4469 dsll(scale_reg, scale_reg, scale_factor);
4470 daddu(scale_reg, SP, scale_reg);
4471 return Address(scale_reg, offset);
4472 }
4473 }
4475 SkipIfEqual::~SkipIfEqual() {
4476 _masm->bind(_label);
4477 }
4479 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
4480 switch (size_in_bytes) {
4481 #ifndef _LP64
4482 case 8:
4483 assert(dst2 != noreg, "second dest register required");
4484 lw(dst, src);
4485 lw(dst2, src.plus_disp(BytesPerInt));
4486 break;
4487 #else
4488 case 8: ld(dst, src); break;
4489 #endif
4490 case 4: lw(dst, src); break;
4491 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
4492 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
4493 default: ShouldNotReachHere();
4494 }
4495 }
4497 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
4498 switch (size_in_bytes) {
4499 #ifndef _LP64
4500 case 8:
4501 assert(src2 != noreg, "second source register required");
4502 sw(src, dst);
4503 sw(src2, dst.plus_disp(BytesPerInt));
4504 break;
4505 #else
4506 case 8: sd(src, dst); break;
4507 #endif
4508 case 4: sw(src, dst); break;
4509 case 2: sh(src, dst); break;
4510 case 1: sb(src, dst); break;
4511 default: ShouldNotReachHere();
4512 }
4513 }
4515 // Look up the method for a megamorphic invokeinterface call.
4516 // The target method is determined by <intf_klass, itable_index>.
4517 // The receiver klass is in recv_klass.
4518 // On success, the result will be in method_result, and execution falls through.
4519 // On failure, execution transfers to the given label.
4520 void MacroAssembler::lookup_interface_method(Register recv_klass,
4521 Register intf_klass,
4522 RegisterOrConstant itable_index,
4523 Register method_result,
4524 Register scan_temp,
4525 Label& L_no_such_interface) {
4526 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
4527 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
4528 "caller must use same register for non-constant itable index as for method");
4530 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
4531 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
4532 int itentry_off = itableMethodEntry::method_offset_in_bytes();
4533 int scan_step = itableOffsetEntry::size() * wordSize;
4534 int vte_size = vtableEntry::size() * wordSize;
4535 Address::ScaleFactor times_vte_scale = Address::times_ptr;
4536 assert(vte_size == wordSize, "else adjust times_vte_scale");
4538 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
4540 // %%% Could store the aligned, prescaled offset in the klassoop.
4541 // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
4542 dsll(scan_temp, scan_temp, times_vte_scale);
4543 daddu(scan_temp, recv_klass, scan_temp);
4544 daddiu(scan_temp, scan_temp, vtable_base);
4545 if (HeapWordsPerLong > 1) {
4546 // Round up to align_object_offset boundary
4547 // see code for InstanceKlass::start_of_itable!
4548 round_to(scan_temp, BytesPerLong);
4549 }
4551 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
4552 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
4553 // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
4554 if (itable_index.is_constant()) {
4555 li48(AT, (int)itable_index.is_constant());
4556 dsll(AT, AT, (int)Address::times_ptr);
4557 } else {
4558 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
4559 }
4560 daddu(AT, AT, recv_klass);
4561 daddiu(recv_klass, AT, itentry_off);
4563 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
4564 // if (scan->interface() == intf) {
4565 // result = (klass + scan->offset() + itable_index);
4566 // }
4567 // }
4568 Label search, found_method;
4570 for (int peel = 1; peel >= 0; peel--) {
4571 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
4573 if (peel) {
4574 beq(intf_klass, method_result, found_method);
4575 nop();
4576 } else {
4577 bne(intf_klass, method_result, search);
4578 nop();
4579 // (invert the test to fall through to found_method...)
4580 }
4582 if (!peel) break;
4584 bind(search);
4586 // Check that the previous entry is non-null. A null entry means that
4587 // the receiver class doesn't implement the interface, and wasn't the
4588 // same as when the caller was compiled.
4589 beq(method_result, R0, L_no_such_interface);
4590 nop();
4591 daddiu(scan_temp, scan_temp, scan_step);
4592 }
4594 bind(found_method);
4596 // Got a hit.
4597 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
4598 ld(method_result, Address(recv_klass, scan_temp, Address::times_1));
4599 }
4602 // virtual method calling
4603 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4604 RegisterOrConstant vtable_index,
4605 Register method_result) {
4606 Register tmp = GP;
4607 push(tmp);
4609 if (vtable_index.is_constant()) {
4610 assert_different_registers(recv_klass, method_result, tmp);
4611 } else {
4612 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4613 }
4614 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4615 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4616 /*
4617 Address vtable_entry_addr(recv_klass,
4618 vtable_index, Address::times_ptr,
4619 base + vtableEntry::method_offset_in_bytes());
4620 */
4621 if (vtable_index.is_constant()) {
4622 li48(AT, vtable_index.as_constant());
4623 dsll(AT, AT, (int)Address::times_ptr);
4624 } else {
4625 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4626 }
4627 li48(tmp, base + vtableEntry::method_offset_in_bytes());
4628 daddu(tmp, tmp, AT);
4629 daddu(tmp, tmp, recv_klass);
4630 ld(method_result, tmp, 0);
4632 pop(tmp);
4633 }