Mon, 10 Apr 2017 14:48:12 -0400
[C2] Remove unnecessary nops for code alignment.
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "memory/cardTableModRefBS.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "prims/methodHandles.hpp"
34 #include "runtime/biasedLocking.hpp"
35 #include "runtime/interfaceSupport.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/os.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubRoutines.hpp"
40 #ifndef SERIALGC
41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
43 #include "gc_implementation/g1/heapRegion.hpp"
44 #endif
45 #ifdef PRODUCT
46 #define BLOCK_COMMENT(str) /* nothing */
47 #define STOP(error) stop(error)
48 #else
49 #define BLOCK_COMMENT(str) block_comment(str)
50 #define STOP(error) block_comment(error); stop(error)
51 #endif
53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
55 intptr_t MacroAssembler::i[32] = {0};
56 float MacroAssembler::f[32] = {0.0};
58 void MacroAssembler::print(outputStream *s) {
59 unsigned int k;
60 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
61 s->print_cr("i%d = 0x%.16lx", k, i[k]);
62 }
63 s->cr();
65 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
66 s->print_cr("f%d = %f", k, f[k]);
67 }
68 s->cr();
69 }
72 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
73 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
75 void MacroAssembler::save_registers(MacroAssembler *masm) {
76 #define __ masm->
77 for(int k=0; k<32; k++) {
78 __ sw (as_Register(k), A0, i_offset(k));
79 }
81 for(int k=0; k<32; k++) {
82 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
83 }
84 #undef __
85 }
87 void MacroAssembler::restore_registers(MacroAssembler *masm) {
88 #define __ masm->
89 for(int k=0; k<32; k++) {
90 __ lw (as_Register(k), A0, i_offset(k));
91 }
93 for(int k=0; k<32; k++) {
94 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
95 }
96 #undef __
97 }
100 // Implementation of AddressLiteral
102 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
103 _is_lval = false;
104 _target = target;
105 _rspec = rspec_from_rtype(rtype, target);
106 }
108 // Implementation of Address
111 Address Address::make_array(ArrayAddress adr) {
112 AddressLiteral base = adr.base();
113 Address index = adr.index();
114 assert(index._disp == 0, "must not have disp"); // maybe it can?
115 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
116 array._rspec = base._rspec;
117 return array;
118 }
120 // exceedingly dangerous constructor
121 Address::Address(address loc, RelocationHolder spec) {
122 _base = noreg;
123 _index = noreg;
124 _scale = no_scale;
125 _disp = (intptr_t) loc;
126 _rspec = spec;
127 }
130 // Implementation of Assembler
131 const char *Assembler::ops_name[] = {
132 "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz",
133 "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui",
134 "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl",
135 "daddi", "daddiu", "ldl", "ldr", "", "", "", "",
136 "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu",
137 "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache",
138 "ll", "lwc1", "", "", "lld", "ldc1", "", "ld",
139 "sc", "swc1", "", "", "scd", "sdc1", "", "sd"
140 };
142 const char* Assembler::special_name[] = {
143 "sll", "", "srl", "sra", "sllv", "", "srlv", "srav",
144 "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync",
145 "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra",
146 "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu",
147 "add", "addu", "sub", "subu", "and", "or", "xor", "nor",
148 "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu",
149 "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "",
150 "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32"
151 };
153 const char* Assembler::cop1_name[] = {
154 "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg",
155 "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w",
156 "", "", "", "", "", "", "", "",
157 "", "", "", "", "", "", "", "",
158 "", "", "", "", "", "", "", "",
159 "", "", "", "", "", "", "", "",
160 "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule",
161 "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt"
162 };
164 const char* Assembler::cop1x_name[] = {
165 "lwxc1", "ldxc1", "", "", "", "luxc1", "", "",
166 "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx",
167 "", "", "", "", "", "", "alnv.ps", "",
168 "", "", "", "", "", "", "", "",
169 "madd.s", "madd.d", "", "", "", "", "madd.ps", "",
170 "msub.s", "msub.d", "", "", "", "", "msub.ps", "",
171 "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "",
172 "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", ""
173 };
175 const char* Assembler::special2_name[] = {
176 "madd", "", "mul", "", "msub", "", "", "",
177 "", "", "", "", "", "", "", "",
178 "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "",
179 "", "", "", "", "gsmod", "gsdmod", "", "",
180 "", "", "", "", "", "", "", "",
181 "", "", "", "", "", "", "", "",
182 "", "", "", "", "", "", "", "",
183 "", "", "", "", "", "", "", ""
184 };
186 const char* Assembler::special3_name[] = {
187 "ext", "", "", "", "ins", "dinsm", "dinsu", "dins",
188 "", "", "", "", "", "", "", "",
189 "", "", "", "", "", "", "", "",
190 "", "", "", "", "", "", "", "",
191 "bshfl", "", "", "", "", "", "", "",
192 "", "", "", "", "", "", "", "",
193 "", "", "", "", "", "", "", "",
194 "", "", "", "", "", "", "", "",
195 };
197 const char* Assembler::regimm_name[] = {
198 "bltz", "bgez", "bltzl", "bgezl", "", "", "", "",
199 "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "",
200 "bltzal", "bgezal", "bltzall", "bgezall"
201 };
203 const char* Assembler::gs_ldc2_name[] = {
204 "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1"
205 };
208 const char* Assembler::gs_lwc2_name[] = {
209 "", "", "", "", "", "", "", "",
210 "", "", "", "", "", "", "", "",
211 "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt",
212 "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
213 "gslq", ""
214 };
216 const char* Assembler::gs_sdc2_name[] = {
217 "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1"
218 };
220 const char* Assembler::gs_swc2_name[] = {
221 "", "", "", "", "", "", "", "",
222 "", "", "", "", "", "", "", "",
223 "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt",
224 "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1",
225 "gssq", ""
226 };
228 //misleading name, print only branch/jump instruction
229 void Assembler::print_instruction(int inst) {
230 const char *s;
231 switch( opcode(inst) ) {
232 default:
233 s = ops_name[opcode(inst)];
234 break;
235 case special_op:
236 s = special_name[special(inst)];
237 break;
238 case regimm_op:
239 s = special_name[rt(inst)];
240 break;
241 }
243 ::tty->print("%s", s);
244 }
246 void MacroAssembler::pd_patch_instruction(address branch, address target) {
247 jint& stub_inst = *(jint*) branch;
249 /* *
250 move(AT, RA); // dadd
251 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
252 nop();
253 lui(T9, 0); // to be patched
254 ori(T9, 0);
255 daddu(T9, T9, RA);
256 move(RA, AT);
257 jr(T9);
258 */
259 if(special(stub_inst) == dadd_op) {
260 jint *pc = (jint *)branch;
262 assert(opcode(pc[3]) == lui_op
263 && opcode(pc[4]) == ori_op
264 && special(pc[5]) == daddu_op, "Not a branch label patch");
265 if(!(opcode(pc[3]) == lui_op
266 && opcode(pc[4]) == ori_op
267 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
269 int offset = target - branch;
270 if (!is_simm16(offset))
271 {
272 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
273 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
274 }
275 else
276 {
277 /* revert to "beq + nop" */
278 CodeBuffer cb(branch, 4 * 10);
279 MacroAssembler masm(&cb);
280 #define __ masm.
281 __ b(target);
282 __ nop();
283 __ nop();
284 __ nop();
285 __ nop();
286 __ nop();
287 __ nop();
288 __ nop();
289 }
290 return;
291 }
293 #ifndef PRODUCT
294 if (!is_simm16((target - branch - 4) >> 2))
295 {
296 tty->print_cr("Illegal patching: target=0x%lx", target);
297 int *p = (int *)branch;
298 for (int i = -10; i < 10; i++)
299 {
300 tty->print("0x%lx, ", p[i]);
301 }
302 tty->print_cr("");
303 }
304 #endif
306 stub_inst = patched_branch(target - branch, stub_inst, 0);
307 }
309 int Assembler::is_int_mask(int x) {
310 int xx = x;
311 int count = 0;
313 while (x != 0) {
314 x &= (x - 1);
315 count++;
316 }
318 if ((1<<count) == (xx+1)) {
319 return count;
320 } else {
321 return -1;
322 }
323 }
325 int Assembler::is_jlong_mask(jlong x) {
326 jlong xx = x;
327 int count = 0;
329 while (x != 0) {
330 x &= (x - 1);
331 count++;
332 }
334 if ((1<<count) == (xx+1)) {
335 return count;
336 } else {
337 return -1;
338 }
339 }
341 //without check, maybe fixed
342 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
343 int v = (dest_pos - inst_pos - 4)>>2;
344 switch(opcode(inst)) {
345 case j_op:
346 case jal_op:
347 assert(false, "should not use j/jal here");
348 break;
349 default:
350 assert(is_simm16(v), "must be simm16");
351 #ifndef PRODUCT
352 if(!is_simm16(v))
353 {
354 tty->print_cr("must be simm16");
355 tty->print_cr("Inst: %lx", inst);
356 }
357 #endif
359 v = low16(v);
360 inst &= 0xffff0000;
361 break;
362 }
364 return inst | v;
365 }
367 int Assembler::branch_destination(int inst, int pos) {
368 int off;
370 switch(opcode(inst)) {
371 case j_op:
372 case jal_op:
373 assert(false, "should not use j/jal here");
374 break;
375 default:
376 off = expand(low16(inst), 15);
377 break;
378 }
380 return off ? pos + 4 + (off<<2) : 0;
381 }
383 int AbstractAssembler::code_fill_byte() {
384 return 0x00; // illegal instruction 0x00000000
385 }
387 // Now the Assembler instruction (identical for 32/64 bits)
389 void Assembler::lb(Register rt, Address src) {
390 lb(rt, src.base(), src.disp());
391 }
393 void Assembler::lbu(Register rt, Address src) {
394 lbu(rt, src.base(), src.disp());
395 }
397 void Assembler::ld(Register rt, Address src){
398 ld(rt, src.base(), src.disp());
399 }
401 void Assembler::ldl(Register rt, Address src){
402 ldl(rt, src.base(), src.disp());
403 }
405 void Assembler::ldr(Register rt, Address src){
406 ldr(rt, src.base(), src.disp());
407 }
409 void Assembler::lh(Register rt, Address src){
410 lh(rt, src.base(), src.disp());
411 }
413 void Assembler::lhu(Register rt, Address src){
414 lhu(rt, src.base(), src.disp());
415 }
417 void Assembler::ll(Register rt, Address src){
418 ll(rt, src.base(), src.disp());
419 }
421 void Assembler::lld(Register rt, Address src){
422 lld(rt, src.base(), src.disp());
423 }
425 void Assembler::lw(Register rt, Address src){
426 lw(rt, src.base(), src.disp());
427 }
428 void Assembler::lea(Register rt, Address src) {
429 #ifdef _LP64
430 daddi(rt, src.base(), src.disp());
431 #else
432 addi(rt, src.base(), src.disp());
433 #endif
434 }
436 void Assembler::lwl(Register rt, Address src){
437 lwl(rt, src.base(), src.disp());
438 }
440 void Assembler::lwr(Register rt, Address src){
441 lwr(rt, src.base(), src.disp());
442 }
444 void Assembler::lwu(Register rt, Address src){
445 lwu(rt, src.base(), src.disp());
446 }
448 void Assembler::sb(Register rt, Address dst) {
449 sb(rt, dst.base(), dst.disp());
450 }
452 void Assembler::sc(Register rt, Address dst) {
453 sc(rt, dst.base(), dst.disp());
454 }
456 void Assembler::scd(Register rt, Address dst) {
457 scd(rt, dst.base(), dst.disp());
458 }
460 void Assembler::sd(Register rt, Address dst) {
461 sd(rt, dst.base(), dst.disp());
462 }
464 void Assembler::sdl(Register rt, Address dst) {
465 sdl(rt, dst.base(), dst.disp());
466 }
468 void Assembler::sdr(Register rt, Address dst) {
469 sdr(rt, dst.base(), dst.disp());
470 }
472 void Assembler::sh(Register rt, Address dst) {
473 sh(rt, dst.base(), dst.disp());
474 }
476 void Assembler::sw(Register rt, Address dst) {
477 sw(rt, dst.base(), dst.disp());
478 }
480 void Assembler::swl(Register rt, Address dst) {
481 swl(rt, dst.base(), dst.disp());
482 }
484 void Assembler::swr(Register rt, Address dst) {
485 swr(rt, dst.base(), dst.disp());
486 }
488 void Assembler::lwc1(FloatRegister rt, Address src) {
489 lwc1(rt, src.base(), src.disp());
490 }
492 void Assembler::ldc1(FloatRegister rt, Address src) {
493 ldc1(rt, src.base(), src.disp());
494 }
496 void Assembler::swc1(FloatRegister rt, Address dst) {
497 swc1(rt, dst.base(), dst.disp());
498 }
500 void Assembler::sdc1(FloatRegister rt, Address dst) {
501 sdc1(rt, dst.base(), dst.disp());
502 }
504 void Assembler::j(address entry) {
505 #ifdef MIPS64
506 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
507 #else
508 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
509 #endif
510 emit_long((j_op<<26) | dest);
511 has_delay_slot();
512 }
514 void Assembler::jal(address entry) {
515 #ifdef MIPS64
516 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
517 #else
518 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
519 #endif
520 emit_long((jal_op<<26) | dest);
521 has_delay_slot();
522 }
524 static inline address first_cache_address() {
525 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
526 }
528 static inline address last_cache_address() {
529 return CodeCache::high_bound() - Assembler::InstructionSize;
530 }
532 int MacroAssembler::call_size(address target, bool far, bool patchable) {
533 if (patchable) return 6 << Assembler::LogInstructionSize;
534 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
535 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
536 }
538 // Can we reach target using jal/j from anywhere
539 // in the code cache (because code can be relocated)?
540 bool MacroAssembler::reachable_from_cache(address target) {
541 address cl = first_cache_address();
542 address ch = last_cache_address();
544 return fit_in_jal(target, cl) && fit_in_jal(target, ch);
545 }
547 void MacroAssembler::general_jump(address target) {
548 if (reachable_from_cache(target)) {
549 j(target);
550 nop();
551 } else {
552 set64(T9, (long)target);
553 jr(T9);
554 nop();
555 }
556 }
558 int MacroAssembler::insts_for_general_jump(address target) {
559 if (reachable_from_cache(target)) {
560 //j(target);
561 //nop();
562 return 2;
563 } else {
564 //set64(T9, (long)target);
565 //jr(T9);
566 //nop();
567 return insts_for_set64((jlong)target) + 2;
568 }
569 }
571 void MacroAssembler::patchable_jump(address target) {
572 if (reachable_from_cache(target)) {
573 nop();
574 nop();
575 nop();
576 nop();
577 j(target);
578 nop();
579 } else {
580 patchable_set48(T9, (long)target);
581 jr(T9);
582 nop();
583 }
584 }
586 int MacroAssembler::insts_for_patchable_jump(address target) {
587 return 6;
588 }
590 void MacroAssembler::general_call(address target) {
591 if (reachable_from_cache(target)) {
592 jal(target);
593 nop();
594 } else {
595 set64(T9, (long)target);
596 jalr(T9);
597 nop();
598 }
599 }
601 int MacroAssembler::insts_for_general_call(address target) {
602 if (reachable_from_cache(target)) {
603 //jal(target);
604 //nop();
605 return 2;
606 } else {
607 //set64(T9, (long)target);
608 //jalr(T9);
609 //nop();
610 return insts_for_set64((jlong)target) + 2;
611 }
612 }
614 void MacroAssembler::patchable_call(address target) {
615 if (reachable_from_cache(target)) {
616 nop();
617 nop();
618 nop();
619 nop();
620 jal(target);
621 nop();
622 } else {
623 patchable_set48(T9, (long)target);
624 jalr(T9);
625 nop();
626 }
627 }
629 int MacroAssembler::insts_for_patchable_call(address target) {
630 return 6;
631 }
633 void MacroAssembler::beq_far(Register rs, Register rt, address entry)
634 {
635 u_char * cur_pc = pc();
637 /* Jin: Near/Far jump */
638 if(is_simm16((entry - pc() - 4) / 4))
639 {
640 Assembler::beq(rs, rt, offset(entry));
641 }
642 else
643 {
644 Label not_jump;
645 bne(rs, rt, not_jump);
646 delayed()->nop();
648 b_far(entry);
649 delayed()->nop();
651 bind(not_jump);
652 has_delay_slot();
653 }
654 }
656 void MacroAssembler::beq_far(Register rs, Register rt, Label& L)
657 {
658 if (L.is_bound()) {
659 beq_far(rs, rt, target(L));
660 } else {
661 u_char * cur_pc = pc();
662 Label not_jump;
663 bne(rs, rt, not_jump);
664 delayed()->nop();
666 b_far(L);
667 delayed()->nop();
669 bind(not_jump);
670 has_delay_slot();
671 }
672 }
674 void MacroAssembler::bne_far(Register rs, Register rt, address entry)
675 {
676 u_char * cur_pc = pc();
678 /* Jin: Near/Far jump */
679 if(is_simm16((entry - pc() - 4) / 4))
680 {
681 Assembler::bne(rs, rt, offset(entry));
682 }
683 else
684 {
685 Label not_jump;
686 beq(rs, rt, not_jump);
687 delayed()->nop();
689 b_far(entry);
690 delayed()->nop();
692 bind(not_jump);
693 has_delay_slot();
694 }
695 }
697 void MacroAssembler::bne_far(Register rs, Register rt, Label& L)
698 {
699 if (L.is_bound()) {
700 bne_far(rs, rt, target(L));
701 } else {
702 u_char * cur_pc = pc();
703 Label not_jump;
704 beq(rs, rt, not_jump);
705 delayed()->nop();
707 b_far(L);
708 delayed()->nop();
710 bind(not_jump);
711 has_delay_slot();
712 }
713 }
715 void MacroAssembler::b_far(Label& L)
716 {
717 if (L.is_bound()) {
718 b_far(target(L));
719 } else {
720 volatile address dest = target(L);
721 /*
722 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
723 0x00000055651ed514: dadd at, ra, zero
724 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
726 0x00000055651ed51c: sll zero, zero, 0
727 0x00000055651ed520: lui t9, 0x0
728 0x00000055651ed524: ori t9, t9, 0x21b8
729 0x00000055651ed528: daddu t9, t9, ra
730 0x00000055651ed52c: dadd ra, at, zero
731 0x00000055651ed530: jr t9
732 0x00000055651ed534: sll zero, zero, 0
733 */
734 move(AT, RA);
735 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
736 nop();
737 lui(T9, 0); // to be patched
738 ori(T9, T9, 0);
739 daddu(T9, T9, RA);
740 move(RA, AT);
741 jr(T9);
742 }
743 }
745 void MacroAssembler::b_far(address entry)
746 {
747 u_char * cur_pc = pc();
749 /* Jin: Near/Far jump */
750 if(is_simm16((entry - pc() - 4) / 4))
751 {
752 b(offset(entry));
753 }
754 else
755 {
756 /* address must be bounded */
757 move(AT, RA);
758 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
759 nop();
760 li32(T9, entry - pc());
761 daddu(T9, T9, RA);
762 move(RA, AT);
763 jr(T9);
764 }
765 }
767 // Implementation of MacroAssembler
769 // First all the versions that have distinct versions depending on 32/64 bit
770 // Unless the difference is trivial (1 line or so).
772 //#ifndef _LP64
774 // 32bit versions
776 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
777 addu_long(AT, base, offset);
778 ld_ptr(rt, 0, AT);
779 }
781 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
782 addu_long(AT, base, offset);
783 st_ptr(rt, 0, AT);
784 }
786 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
787 addu_long(AT, base, offset);
788 ld_long(rt, 0, AT);
789 }
791 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
792 addu_long(AT, base, offset);
793 st_long(rt, 0, AT);
794 }
796 Address MacroAssembler::as_Address(AddressLiteral adr) {
797 return Address(adr.target(), adr.rspec());
798 }
800 Address MacroAssembler::as_Address(ArrayAddress adr) {
801 return Address::make_array(adr);
802 }
804 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
805 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
806 Label again;
808 li(tmp_reg1, counter_addr);
809 bind(again);
810 if(!Use3A2000) sync();
811 ll(tmp_reg2, tmp_reg1, 0);
812 addi(tmp_reg2, tmp_reg2, inc);
813 sc(tmp_reg2, tmp_reg1, 0);
814 beq(tmp_reg2, R0, again);
815 delayed()->nop();
816 }
817 int MacroAssembler::biased_locking_enter(Register lock_reg,
818 Register obj_reg,
819 Register swap_reg,
820 Register tmp_reg,
821 bool swap_reg_contains_mark,
822 Label& done,
823 Label* slow_case,
824 BiasedLockingCounters* counters) {
825 assert(UseBiasedLocking, "why call this otherwise?");
826 bool need_tmp_reg = false;
827 if (tmp_reg == noreg) {
828 need_tmp_reg = true;
829 tmp_reg = T9;
830 }
831 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
832 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
833 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
834 Address saved_mark_addr(lock_reg, 0);
836 // Biased locking
837 // See whether the lock is currently biased toward our thread and
838 // whether the epoch is still valid
839 // Note that the runtime guarantees sufficient alignment of JavaThread
840 // pointers to allow age to be placed into low bits
841 // First check to see whether biasing is even enabled for this object
842 Label cas_label;
843 int null_check_offset = -1;
844 if (!swap_reg_contains_mark) {
845 null_check_offset = offset();
846 ld_ptr(swap_reg, mark_addr);
847 }
849 if (need_tmp_reg) {
850 push(tmp_reg);
851 }
852 move(tmp_reg, swap_reg);
853 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
854 #ifdef _LP64
855 daddi(AT, R0, markOopDesc::biased_lock_pattern);
856 dsub(AT, AT, tmp_reg);
857 #else
858 addi(AT, R0, markOopDesc::biased_lock_pattern);
859 sub(AT, AT, tmp_reg);
860 #endif
861 if (need_tmp_reg) {
862 pop(tmp_reg);
863 }
865 bne(AT, R0, cas_label);
866 delayed()->nop();
869 // The bias pattern is present in the object's header. Need to check
870 // whether the bias owner and the epoch are both still current.
871 // Note that because there is no current thread register on MIPS we
872 // need to store off the mark word we read out of the object to
873 // avoid reloading it and needing to recheck invariants below. This
874 // store is unfortunate but it makes the overall code shorter and
875 // simpler.
876 st_ptr(swap_reg, saved_mark_addr);
877 if (need_tmp_reg) {
878 push(tmp_reg);
879 }
880 if (swap_reg_contains_mark) {
881 null_check_offset = offset();
882 }
883 load_prototype_header(tmp_reg, obj_reg);
884 xorr(tmp_reg, tmp_reg, swap_reg);
885 get_thread(swap_reg);
886 xorr(swap_reg, swap_reg, tmp_reg);
888 move(AT, ~((int) markOopDesc::age_mask_in_place));
889 andr(swap_reg, swap_reg, AT);
891 if (PrintBiasedLockingStatistics) {
892 Label L;
893 bne(swap_reg, R0, L);
894 delayed()->nop();
895 push(tmp_reg);
896 push(A0);
897 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
898 pop(A0);
899 pop(tmp_reg);
900 bind(L);
901 }
902 if (need_tmp_reg) {
903 pop(tmp_reg);
904 }
905 beq(swap_reg, R0, done);
906 delayed()->nop();
907 Label try_revoke_bias;
908 Label try_rebias;
910 // At this point we know that the header has the bias pattern and
911 // that we are not the bias owner in the current epoch. We need to
912 // figure out more details about the state of the header in order to
913 // know what operations can be legally performed on the object's
914 // header.
916 // If the low three bits in the xor result aren't clear, that means
917 // the prototype header is no longer biased and we have to revoke
918 // the bias on this object.
920 move(AT, markOopDesc::biased_lock_mask_in_place);
921 andr(AT, swap_reg, AT);
922 bne(AT, R0, try_revoke_bias);
923 delayed()->nop();
924 // Biasing is still enabled for this data type. See whether the
925 // epoch of the current bias is still valid, meaning that the epoch
926 // bits of the mark word are equal to the epoch bits of the
927 // prototype header. (Note that the prototype header's epoch bits
928 // only change at a safepoint.) If not, attempt to rebias the object
929 // toward the current thread. Note that we must be absolutely sure
930 // that the current epoch is invalid in order to do this because
931 // otherwise the manipulations it performs on the mark word are
932 // illegal.
934 move(AT, markOopDesc::epoch_mask_in_place);
935 andr(AT,swap_reg, AT);
936 bne(AT, R0, try_rebias);
937 delayed()->nop();
938 // The epoch of the current bias is still valid but we know nothing
939 // about the owner; it might be set or it might be clear. Try to
940 // acquire the bias of the object using an atomic operation. If this
941 // fails we will go in to the runtime to revoke the object's bias.
942 // Note that we first construct the presumed unbiased header so we
943 // don't accidentally blow away another thread's valid bias.
945 ld_ptr(swap_reg, saved_mark_addr);
947 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
948 andr(swap_reg, swap_reg, AT);
950 if (need_tmp_reg) {
951 push(tmp_reg);
952 }
953 get_thread(tmp_reg);
954 orr(tmp_reg, tmp_reg, swap_reg);
955 //if (os::is_MP()) {
956 // lock();
957 //}
958 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
959 if (need_tmp_reg) {
960 pop(tmp_reg);
961 }
962 // If the biasing toward our thread failed, this means that
963 // another thread succeeded in biasing it toward itself and we
964 // need to revoke that bias. The revocation will occur in the
965 // interpreter runtime in the slow case.
966 if (PrintBiasedLockingStatistics) {
967 Label L;
968 bne(AT, R0, L);
969 delayed()->nop();
970 push(tmp_reg);
971 push(A0);
972 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
973 pop(A0);
974 pop(tmp_reg);
975 bind(L);
976 }
977 if (slow_case != NULL) {
978 beq_far(AT, R0, *slow_case);
979 delayed()->nop();
980 }
981 b(done);
982 delayed()->nop();
984 bind(try_rebias);
985 // At this point we know the epoch has expired, meaning that the
986 // current "bias owner", if any, is actually invalid. Under these
987 // circumstances _only_, we are allowed to use the current header's
988 // value as the comparison value when doing the cas to acquire the
989 // bias in the current epoch. In other words, we allow transfer of
990 // the bias from one thread to another directly in this situation.
991 //
992 // FIXME: due to a lack of registers we currently blow away the age
993 // bits in this situation. Should attempt to preserve them.
994 if (need_tmp_reg) {
995 push(tmp_reg);
996 }
997 load_prototype_header(tmp_reg, obj_reg);
998 get_thread(swap_reg);
999 orr(tmp_reg, tmp_reg, swap_reg);
1000 ld_ptr(swap_reg, saved_mark_addr);
1002 // if (os::is_MP()) {
1003 // lock();
1004 //}
1005 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
1006 if (need_tmp_reg) {
1007 pop(tmp_reg);
1008 }
1009 // If the biasing toward our thread failed, then another thread
1010 // succeeded in biasing it toward itself and we need to revoke that
1011 // bias. The revocation will occur in the runtime in the slow case.
1012 if (PrintBiasedLockingStatistics) {
1013 Label L;
1014 bne(AT, R0, L);
1015 delayed()->nop();
1016 push(AT);
1017 push(tmp_reg);
1018 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
1019 pop(tmp_reg);
1020 pop(AT);
1021 bind(L);
1022 }
1023 if (slow_case != NULL) {
1024 beq_far(AT, R0, *slow_case);
1025 delayed()->nop();
1026 }
1028 b(done);
1029 delayed()->nop();
1030 bind(try_revoke_bias);
1031 // The prototype mark in the klass doesn't have the bias bit set any
1032 // more, indicating that objects of this data type are not supposed
1033 // to be biased any more. We are going to try to reset the mark of
1034 // this object to the prototype value and fall through to the
1035 // CAS-based locking scheme. Note that if our CAS fails, it means
1036 // that another thread raced us for the privilege of revoking the
1037 // bias of this particular object, so it's okay to continue in the
1038 // normal locking code.
1039 //
1040 // FIXME: due to a lack of registers we currently blow away the age
1041 // bits in this situation. Should attempt to preserve them.
1042 ld_ptr(swap_reg, saved_mark_addr);
1044 if (need_tmp_reg) {
1045 push(tmp_reg);
1046 }
1047 load_prototype_header(tmp_reg, obj_reg);
1048 //if (os::is_MP()) {
1049 // lock();
1050 //}
1051 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
1052 if (need_tmp_reg) {
1053 pop(tmp_reg);
1054 }
1055 // Fall through to the normal CAS-based lock, because no matter what
1056 // the result of the above CAS, some thread must have succeeded in
1057 // removing the bias bit from the object's header.
1058 if (PrintBiasedLockingStatistics) {
1059 Label L;
1060 bne(AT, R0, L);
1061 delayed()->nop();
1062 push(AT);
1063 push(tmp_reg);
1064 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
1065 pop(tmp_reg);
1066 pop(AT);
1067 bind(L);
1068 }
1070 bind(cas_label);
1071 return null_check_offset;
1072 }
1074 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1075 assert(UseBiasedLocking, "why call this otherwise?");
1077 // Check for biased locking unlock case, which is a no-op
1078 // Note: we do not have to check the thread ID for two reasons.
1079 // First, the interpreter checks for IllegalMonitorStateException at
1080 // a higher level. Second, if the bias was revoked while we held the
1081 // lock, the object could not be rebiased toward another thread, so
1082 // the bias bit would be clear.
1083 #ifdef _LP64
1084 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1085 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1086 daddi(AT, R0, markOopDesc::biased_lock_pattern);
1087 #else
1088 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1089 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1090 addi(AT, R0, markOopDesc::biased_lock_pattern);
1091 #endif
1093 beq(AT, temp_reg, done);
1094 delayed()->nop();
1095 }
1097 // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME.
1098 // by yjl 6/27/2005
1099 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
1100 // by yjl 7/11/2005
1101 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
1102 // by yjl 8/1/2005
1103 void MacroAssembler::call_VM_leaf_base(address entry_point,
1104 int number_of_arguments) {
1105 //call(RuntimeAddress(entry_point));
1106 //increment(rsp, number_of_arguments * wordSize);
1107 Label L, E;
1109 assert(number_of_arguments <= 4, "just check");
1111 andi(AT, SP, 0xf);
1112 beq(AT, R0, L);
1113 delayed()->nop();
1114 daddi(SP, SP, -8);
1115 {
1116 call(entry_point, relocInfo::runtime_call_type);
1117 delayed()->nop();
1118 }
1119 daddi(SP, SP, 8);
1120 b(E);
1121 delayed()->nop();
1123 bind(L);
1124 {
1125 call(entry_point, relocInfo::runtime_call_type);
1126 delayed()->nop();
1127 }
1128 bind(E);
1129 }
1132 void MacroAssembler::jmp(address entry) {
1133 patchable_set48(T9, (long)entry);
1134 jr(T9);
1135 }
1137 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
1138 switch (rtype) {
1139 case relocInfo::runtime_call_type:
1140 case relocInfo::none:
1141 jmp(entry);
1142 break;
1143 default:
1144 {
1145 InstructionMark im(this);
1146 relocate(rtype);
1147 patchable_set48(T9, (long)entry);
1148 jr(T9);
1149 }
1150 break;
1151 }
1152 }
1154 void MacroAssembler::call(address entry) {
1155 // c/c++ code assume T9 is entry point, so we just always move entry to t9
1156 // maybe there is some more graceful method to handle this. FIXME
1157 // by yjl 6/27/2005
1158 // For more info, see class NativeCall.
1159 #ifndef _LP64
1160 move(T9, (int)entry);
1161 #else
1162 patchable_set48(T9, (long)entry);
1163 #endif
1164 jalr(T9);
1165 }
1167 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
1168 switch (rtype) {
1169 case relocInfo::runtime_call_type:
1170 case relocInfo::none:
1171 call(entry);
1172 break;
1173 default:
1174 {
1175 InstructionMark im(this);
1176 relocate(rtype);
1177 call(entry);
1178 }
1179 break;
1180 }
1181 }
1183 void MacroAssembler::call(address entry, RelocationHolder& rh)
1184 {
1185 switch (rh.type()) {
1186 case relocInfo::runtime_call_type:
1187 case relocInfo::none:
1188 call(entry);
1189 break;
1190 default:
1191 {
1192 InstructionMark im(this);
1193 relocate(rh);
1194 call(entry);
1195 }
1196 break;
1197 }
1198 }
1200 void MacroAssembler::ic_call(address entry) {
1201 RelocationHolder rh = virtual_call_Relocation::spec(pc());
1202 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
1203 assert(entry != NULL, "call most probably wrong");
1204 InstructionMark im(this);
1205 relocate(rh);
1206 patchable_call(entry);
1207 }
1209 void MacroAssembler::c2bool(Register r) {
1210 Label L;
1211 Assembler::beq(r, R0, L);
1212 delayed()->nop();
1213 move(r, 1);
1214 bind(L);
1215 }
1217 #ifndef PRODUCT
1218 extern "C" void findpc(intptr_t x);
1219 #endif
1221 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
1222 // In order to get locks to work, we need to fake a in_VM state
1223 JavaThread* thread = JavaThread::current();
1224 JavaThreadState saved_state = thread->thread_state();
1225 thread->set_thread_state(_thread_in_vm);
1226 if (ShowMessageBoxOnError) {
1227 JavaThread* thread = JavaThread::current();
1228 JavaThreadState saved_state = thread->thread_state();
1229 thread->set_thread_state(_thread_in_vm);
1230 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1231 ttyLocker ttyl;
1232 BytecodeCounter::print();
1233 }
1234 // To see where a verify_oop failed, get $ebx+40/X for this frame.
1235 // This is the value of eip which points to where verify_oop will return.
1236 if (os::message_box(msg, "Execution stopped, print registers?")) {
1237 ttyLocker ttyl;
1238 tty->print_cr("eip = 0x%08x", eip);
1239 #ifndef PRODUCT
1240 tty->cr();
1241 findpc(eip);
1242 tty->cr();
1243 #endif
1244 tty->print_cr("rax, = 0x%08x", rax);
1245 tty->print_cr("rbx, = 0x%08x", rbx);
1246 tty->print_cr("rcx = 0x%08x", rcx);
1247 tty->print_cr("rdx = 0x%08x", rdx);
1248 tty->print_cr("rdi = 0x%08x", rdi);
1249 tty->print_cr("rsi = 0x%08x", rsi);
1250 tty->print_cr("rbp, = 0x%08x", rbp);
1251 tty->print_cr("rsp = 0x%08x", rsp);
1252 BREAKPOINT;
1253 }
1254 } else {
1255 ttyLocker ttyl;
1256 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1257 assert(false, "DEBUG MESSAGE");
1258 }
1259 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
1260 }
1262 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
1263 if ( ShowMessageBoxOnError ) {
1264 JavaThreadState saved_state = JavaThread::current()->thread_state();
1265 JavaThread::current()->set_thread_state(_thread_in_vm);
1266 {
1267 // In order to get locks work, we need to fake a in_VM state
1268 ttyLocker ttyl;
1269 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
1270 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1271 BytecodeCounter::print();
1272 }
1274 // if (os::message_box(msg, "Execution stopped, print registers?"))
1275 // regs->print(::tty);
1276 }
1277 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1278 }
1279 else
1280 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1281 }
1284 void MacroAssembler::stop(const char* msg) {
1285 li(A0, (long)msg);
1286 #ifndef _LP64
1287 //reserver space for argument. added by yjl 7/10/2005
1288 addiu(SP, SP, - 1 * wordSize);
1289 #endif
1290 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1291 delayed()->nop();
1292 #ifndef _LP64
1293 //restore space for argument
1294 addiu(SP, SP, 1 * wordSize);
1295 #endif
1296 brk(17);
1297 }
1299 void MacroAssembler::warn(const char* msg) {
1300 #ifdef _LP64
1301 pushad();
1302 li(A0, (long)msg);
1303 push(S2);
1304 move(AT, -(StackAlignmentInBytes));
1305 move(S2, SP); // use S2 as a sender SP holder
1306 andr(SP, SP, AT); // align stack as required by ABI
1307 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1308 delayed()->nop();
1309 move(SP, S2); // use S2 as a sender SP holder
1310 pop(S2);
1311 popad();
1312 #else
1313 pushad();
1314 addi(SP, SP, -4);
1315 sw(A0, SP, -1 * wordSize);
1316 li(A0, (long)msg);
1317 addi(SP, SP, -1 * wordSize);
1318 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1319 delayed()->nop();
1320 addi(SP, SP, 1 * wordSize);
1321 lw(A0, SP, -1 * wordSize);
1322 addi(SP, SP, 4);
1323 popad();
1324 #endif
1325 }
1327 void MacroAssembler::print_reg(Register reg) {
1328 /*
1329 char *s = getenv("PRINT_REG");
1330 if (s == NULL)
1331 return;
1332 if (strcmp(s, "1") != 0)
1333 return;
1334 */
1335 void * cur_pc = pc();
1336 pushad();
1337 NOT_LP64(push(FP);)
1339 li(A0, (long)reg->name());
1340 if (reg == SP)
1341 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
1342 else if (reg == A0)
1343 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
1344 else
1345 move(A1, reg);
1346 li(A2, (long)cur_pc);
1347 push(S2);
1348 move(AT, -(StackAlignmentInBytes));
1349 move(S2, SP); // use S2 as a sender SP holder
1350 andr(SP, SP, AT); // align stack as required by ABI
1351 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1352 delayed()->nop();
1353 move(SP, S2); // use S2 as a sender SP holder
1354 pop(S2);
1355 NOT_LP64(pop(FP);)
1356 popad();
1358 /*
1359 pushad();
1360 #ifdef _LP64
1361 if (reg == SP)
1362 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
1363 else
1364 move(A0, reg);
1365 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1366 delayed()->nop();
1367 #else
1368 push(FP);
1369 move(A0, reg);
1370 dsrl32(A1, reg, 0);
1371 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
1372 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1373 delayed()->nop();
1374 pop(FP);
1375 #endif
1376 popad();
1377 pushad();
1378 NOT_LP64(push(FP);)
1379 char b[50];
1380 sprintf((char *)b, " pc: %p\n",cur_pc);
1381 li(A0, (long)(char *)b);
1382 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1383 delayed()->nop();
1384 NOT_LP64(pop(FP);)
1385 popad();
1386 */
1387 }
1389 void MacroAssembler::print_reg(FloatRegister reg) {
1390 void * cur_pc = pc();
1391 pushad();
1392 NOT_LP64(push(FP);)
1393 li(A0, (long)reg->name());
1394 push(S2);
1395 move(AT, -(StackAlignmentInBytes));
1396 move(S2, SP); // use S2 as a sender SP holder
1397 andr(SP, SP, AT); // align stack as required by ABI
1398 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1399 delayed()->nop();
1400 move(SP, S2); // use S2 as a sender SP holder
1401 pop(S2);
1402 NOT_LP64(pop(FP);)
1403 popad();
1405 pushad();
1406 NOT_LP64(push(FP);)
1407 #if 1
1408 move(FP, SP);
1409 move(AT, -(StackAlignmentInBytes));
1410 andr(SP , SP , AT);
1411 mov_d(F12, reg);
1412 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1413 delayed()->nop();
1414 move(SP, FP);
1415 #else
1416 mov_s(F12, reg);
1417 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1418 //delayed()->nop();
1419 #endif
1420 NOT_LP64(pop(FP);)
1421 popad();
1423 #if 0
1424 pushad();
1425 NOT_LP64(push(FP);)
1426 char* b = new char[50];
1427 sprintf(b, " pc: %p\n", cur_pc);
1428 li(A0, (long)b);
1429 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1430 delayed()->nop();
1431 NOT_LP64(pop(FP);)
1432 popad();
1433 #endif
1434 }
1436 void MacroAssembler::increment(Register reg, int imm) {
1437 if (!imm) return;
1438 if (is_simm16(imm)) {
1439 #ifdef _LP64
1440 daddiu(reg, reg, imm);
1441 #else
1442 addiu(reg, reg, imm);
1443 #endif
1444 } else {
1445 move(AT, imm);
1446 #ifdef _LP64
1447 daddu(reg, reg, AT);
1448 #else
1449 addu(reg, reg, AT);
1450 #endif
1451 }
1452 }
1454 void MacroAssembler::decrement(Register reg, int imm) {
1455 increment(reg, -imm);
1456 }
1459 void MacroAssembler::call_VM(Register oop_result,
1460 address entry_point,
1461 bool check_exceptions) {
1462 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1463 }
1465 void MacroAssembler::call_VM(Register oop_result,
1466 address entry_point,
1467 Register arg_1,
1468 bool check_exceptions) {
1469 if (arg_1!=A1) move(A1, arg_1);
1470 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1471 }
1473 void MacroAssembler::call_VM(Register oop_result,
1474 address entry_point,
1475 Register arg_1,
1476 Register arg_2,
1477 bool check_exceptions) {
1478 if (arg_1!=A1) move(A1, arg_1);
1479 if (arg_2!=A2) move(A2, arg_2);
1480 assert(arg_2 != A1, "smashed argument");
1481 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1482 }
1484 void MacroAssembler::call_VM(Register oop_result,
1485 address entry_point,
1486 Register arg_1,
1487 Register arg_2,
1488 Register arg_3,
1489 bool check_exceptions) {
1490 if (arg_1!=A1) move(A1, arg_1);
1491 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1492 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1493 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1494 }
1496 void MacroAssembler::call_VM(Register oop_result,
1497 Register last_java_sp,
1498 address entry_point,
1499 int number_of_arguments,
1500 bool check_exceptions) {
1501 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1502 }
1504 void MacroAssembler::call_VM(Register oop_result,
1505 Register last_java_sp,
1506 address entry_point,
1507 Register arg_1,
1508 bool check_exceptions) {
1509 if (arg_1 != A1) move(A1, arg_1);
1510 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1511 }
1513 void MacroAssembler::call_VM(Register oop_result,
1514 Register last_java_sp,
1515 address entry_point,
1516 Register arg_1,
1517 Register arg_2,
1518 bool check_exceptions) {
1519 if (arg_1 != A1) move(A1, arg_1);
1520 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1521 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1522 }
1524 void MacroAssembler::call_VM(Register oop_result,
1525 Register last_java_sp,
1526 address entry_point,
1527 Register arg_1,
1528 Register arg_2,
1529 Register arg_3,
1530 bool check_exceptions) {
1531 if (arg_1 != A1) move(A1, arg_1);
1532 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1533 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1534 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1535 }
1537 void MacroAssembler::call_VM_base(Register oop_result,
1538 Register java_thread,
1539 Register last_java_sp,
1540 address entry_point,
1541 int number_of_arguments,
1542 bool check_exceptions) {
1544 address before_call_pc;
1545 // determine java_thread register
1546 if (!java_thread->is_valid()) {
1547 #ifndef OPT_THREAD
1548 java_thread = T2;
1549 get_thread(java_thread);
1550 #else
1551 java_thread = TREG;
1552 #endif
1553 }
1554 // determine last_java_sp register
1555 if (!last_java_sp->is_valid()) {
1556 last_java_sp = SP;
1557 }
1558 // debugging support
1559 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1560 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1561 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1562 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1564 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1566 // set last Java frame before call
1567 before_call_pc = (address)pc();
1568 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1570 // do the call
1571 move(A0, java_thread);
1572 call(entry_point, relocInfo::runtime_call_type);
1573 delayed()->nop();
1575 // restore the thread (cannot use the pushed argument since arguments
1576 // may be overwritten by C code generated by an optimizing compiler);
1577 // however can use the register value directly if it is callee saved.
1578 #ifndef OPT_THREAD
1579 if (java_thread >=S0 && java_thread <=S7) {
1580 #ifdef ASSERT
1581 { Label L;
1582 get_thread(AT);
1583 beq(java_thread, AT, L);
1584 delayed()->nop();
1585 stop("MacroAssembler::call_VM_base: edi not callee saved?");
1586 bind(L);
1587 }
1588 #endif
1589 } else {
1590 get_thread(java_thread);
1591 }
1592 #endif
1594 // discard thread and arguments
1595 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1596 // reset last Java frame
1597 reset_last_Java_frame(java_thread, false, true);
1599 check_and_handle_popframe(java_thread);
1600 check_and_handle_earlyret(java_thread);
1601 if (check_exceptions) {
1602 // check for pending exceptions (java_thread is set upon return)
1603 Label L;
1604 #ifdef _LP64
1605 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1606 #else
1607 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1608 #endif
1609 beq(AT, R0, L);
1610 delayed()->nop();
1611 li(AT, before_call_pc);
1612 push(AT);
1613 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1614 delayed()->nop();
1615 bind(L);
1616 }
1618 // get oop result if there is one and reset the value in the thread
1619 if (oop_result->is_valid()) {
1620 #ifdef _LP64
1621 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1622 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1623 #else
1624 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1625 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1626 #endif
1627 verify_oop(oop_result);
1628 }
1629 }
1631 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1633 move(V0, SP);
1634 //we also reserve space for java_thread here
1635 #ifndef _LP64
1636 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1637 #endif
1638 move(AT, -(StackAlignmentInBytes));
1639 andr(SP, SP, AT);
1640 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1642 }
1644 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1645 call_VM_leaf_base(entry_point, number_of_arguments);
1646 }
1648 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1649 if (arg_0 != A0) move(A0, arg_0);
1650 call_VM_leaf(entry_point, 1);
1651 }
1653 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1654 if (arg_0 != A0) move(A0, arg_0);
1655 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1656 call_VM_leaf(entry_point, 2);
1657 }
1659 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1660 if (arg_0 != A0) move(A0, arg_0);
1661 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1662 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1663 call_VM_leaf(entry_point, 3);
1664 }
1665 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1666 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1667 }
1670 void MacroAssembler::super_call_VM_leaf(address entry_point,
1671 Register arg_1) {
1672 if (arg_1 != A0) move(A0, arg_1);
1673 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1674 }
1677 void MacroAssembler::super_call_VM_leaf(address entry_point,
1678 Register arg_1,
1679 Register arg_2) {
1680 if (arg_1 != A0) move(A0, arg_1);
1681 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1682 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1683 }
1684 void MacroAssembler::super_call_VM_leaf(address entry_point,
1685 Register arg_1,
1686 Register arg_2,
1687 Register arg_3) {
1688 if (arg_1 != A0) move(A0, arg_1);
1689 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1690 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1691 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1692 }
1694 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1695 }
1697 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1698 }
1700 void MacroAssembler::null_check(Register reg, int offset) {
1701 if (needs_explicit_null_check(offset)) {
1702 // provoke OS NULL exception if reg = NULL by
1703 // accessing M[reg] w/o changing any (non-CC) registers
1704 // NOTE: cmpl is plenty here to provoke a segv
1705 lw(AT, reg, 0);
1706 /* Jin
1707 nop();
1708 nop();
1709 nop();
1710 */
1711 // Note: should probably use testl(rax, Address(reg, 0));
1712 // may be shorter code (however, this version of
1713 // testl needs to be implemented first)
1714 } else {
1715 // nothing to do, (later) access of M[reg + offset]
1716 // will provoke OS NULL exception if reg = NULL
1717 }
1718 }
1720 void MacroAssembler::enter() {
1721 push2(RA, FP);
1722 move(FP, SP);
1723 }
1725 void MacroAssembler::leave() {
1726 #ifndef _LP64
1727 //move(SP, FP);
1728 //pop2(FP, RA);
1729 addi(SP, FP, 2 * wordSize);
1730 lw(RA, SP, - 1 * wordSize);
1731 lw(FP, SP, - 2 * wordSize);
1732 #else
1733 daddi(SP, FP, 2 * wordSize);
1734 ld(RA, SP, - 1 * wordSize);
1735 ld(FP, SP, - 2 * wordSize);
1736 #endif
1737 }
1738 /*
1739 void MacroAssembler::os_breakpoint() {
1740 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1741 // (e.g., MSVC can't call ps() otherwise)
1742 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1743 }
1744 */
1745 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1746 // determine java_thread register
1747 if (!java_thread->is_valid()) {
1748 #ifndef OPT_THREAD
1749 java_thread = T1;
1750 get_thread(java_thread);
1751 #else
1752 java_thread = TREG;
1753 #endif
1754 }
1755 // we must set sp to zero to clear frame
1756 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1757 // must clear fp, so that compiled frames are not confused; it is possible
1758 // that we need it only for debugging
1759 if(clear_fp)
1760 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1762 if (clear_pc)
1763 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1764 }
1766 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1767 bool clear_pc) {
1768 Register thread = TREG;
1769 #ifndef OPT_THREAD
1770 get_thread(thread);
1771 #endif
1772 // we must set sp to zero to clear frame
1773 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1774 // must clear fp, so that compiled frames are not confused; it is
1775 // possible that we need it only for debugging
1776 if (clear_fp) {
1777 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1778 }
1780 if (clear_pc) {
1781 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1782 }
1783 }
1785 // Write serialization page so VM thread can do a pseudo remote membar.
1786 // We use the current thread pointer to calculate a thread specific
1787 // offset to write to within the page. This minimizes bus traffic
1788 // due to cache line collision.
1789 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1790 move(tmp, thread);
1791 srl(tmp, tmp,os::get_serialize_page_shift_count());
1792 move(AT, (os::vm_page_size() - sizeof(int)));
1793 andr(tmp, tmp,AT);
1794 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1795 }
1797 // Calls to C land
1798 //
1799 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1800 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1801 // has to be reset to 0. This is required to allow proper stack traversal.
1802 void MacroAssembler::set_last_Java_frame(Register java_thread,
1803 Register last_java_sp,
1804 Register last_java_fp,
1805 address last_java_pc) {
1806 // determine java_thread register
1807 if (!java_thread->is_valid()) {
1808 #ifndef OPT_THREAD
1809 java_thread = T2;
1810 get_thread(java_thread);
1811 #else
1812 java_thread = TREG;
1813 #endif
1814 }
1815 // determine last_java_sp register
1816 if (!last_java_sp->is_valid()) {
1817 last_java_sp = SP;
1818 }
1820 // last_java_fp is optional
1822 if (last_java_fp->is_valid()) {
1823 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1824 }
1826 // last_java_pc is optional
1828 if (last_java_pc != NULL) {
1829 relocate(relocInfo::internal_pc_type);
1830 patchable_set48(AT, (long)last_java_pc);
1831 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1832 }
1833 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1834 }
1836 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1837 Register last_java_fp,
1838 address last_java_pc) {
1839 // determine last_java_sp register
1840 if (!last_java_sp->is_valid()) {
1841 last_java_sp = SP;
1842 }
1844 Register thread = TREG;
1845 #ifndef OPT_THREAD
1846 get_thread(thread);
1847 #endif
1848 // last_java_fp is optional
1849 if (last_java_fp->is_valid()) {
1850 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1851 }
1853 // last_java_pc is optional
1854 if (last_java_pc != NULL) {
1855 Address java_pc(thread,
1856 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1857 li(AT, (intptr_t)(last_java_pc));
1858 sd(AT, java_pc);
1859 }
1861 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1862 }
1864 //////////////////////////////////////////////////////////////////////////////////
1865 #ifndef SERIALGC
1867 void MacroAssembler::g1_write_barrier_pre(Register obj,
1868 #ifndef _LP64
1869 Register thread,
1870 #endif
1871 Register tmp,
1872 Register tmp2,
1873 bool tosca_live) {
1874 /* LP64_ONLY(Register thread = r15_thread;)
1875 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1876 PtrQueue::byte_offset_of_active()));
1878 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1879 PtrQueue::byte_offset_of_index()));
1880 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1881 PtrQueue::byte_offset_of_buf()));
1884 Label done;
1885 Label runtime;
1887 // if (!marking_in_progress) goto done;
1888 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1889 cmpl(in_progress, 0);
1890 } else {
1891 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1892 cmpb(in_progress, 0);
1893 }
1894 jcc(Assembler::equal, done);
1896 // if (x.f == NULL) goto done;
1897 cmpptr(Address(obj, 0), NULL_WORD);
1898 jcc(Assembler::equal, done);
1900 // Can we store original value in the thread's buffer?
1902 LP64_ONLY(movslq(tmp, index);)
1903 movptr(tmp2, Address(obj, 0));
1904 #ifdef _LP64
1905 cmpq(tmp, 0);
1906 #else
1907 cmpl(index, 0);
1908 #endif
1909 jcc(Assembler::equal, runtime);
1910 #ifdef _LP64
1911 subq(tmp, wordSize);
1912 movl(index, tmp);
1913 addq(tmp, buffer);
1914 #else
1915 subl(index, wordSize);
1916 movl(tmp, buffer);
1917 addl(tmp, index);
1918 #endif
1919 movptr(Address(tmp, 0), tmp2);
1920 jmp(done);
1921 bind(runtime);
1922 // save the live input values
1923 if(tosca_live) push(rax);
1924 push(obj);
1925 #ifdef _LP64
1926 movq(c_rarg0, Address(obj, 0));
1927 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
1928 #else
1929 push(thread);
1930 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
1931 pop(thread);
1932 #endif
1933 pop(obj);
1934 if(tosca_live) pop(rax);
1935 bind(done);
1936 */
1937 }
1939 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1940 Register new_val,
1941 #ifndef _LP64
1942 Register thread,
1943 #endif
1944 Register tmp,
1945 Register tmp2) {
1947 /*LP64_ONLY(Register thread = r15_thread;)
1948 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1949 PtrQueue::byte_offset_of_index()));
1950 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1951 PtrQueue::byte_offset_of_buf()));
1952 BarrierSet* bs = Universe::heap()->barrier_set();
1953 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1954 Label done;
1955 Label runtime;
1957 // Does store cross heap regions?
1959 movptr(tmp, store_addr);
1960 xorptr(tmp, new_val);
1961 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
1962 jcc(Assembler::equal, done);
1964 // crosses regions, storing NULL?
1966 cmpptr(new_val, (int32_t) NULL_WORD);
1967 jcc(Assembler::equal, done);
1969 // storing region crossing non-NULL, is card already dirty?
1971 ExternalAddress cardtable((address) ct->byte_map_base);
1972 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1973 #ifdef _LP64
1974 const Register card_addr = tmp;
1976 movq(card_addr, store_addr);
1977 shrq(card_addr, CardTableModRefBS::card_shift);
1979 lea(tmp2, cardtable);
1981 // get the address of the card
1982 addq(card_addr, tmp2);
1983 #else
1984 const Register card_index = tmp;
1986 movl(card_index, store_addr);
1987 shrl(card_index, CardTableModRefBS::card_shift);
1989 Address index(noreg, card_index, Address::times_1);
1990 const Register card_addr = tmp;
1991 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
1992 #endif
1993 cmpb(Address(card_addr, 0), 0);
1994 jcc(Assembler::equal, done);
1996 // storing a region crossing, non-NULL oop, card is clean.
1997 // dirty card and log.
1999 movb(Address(card_addr, 0), 0);
2001 cmpl(queue_index, 0);
2002 jcc(Assembler::equal, runtime);
2003 subl(queue_index, wordSize);
2004 movptr(tmp2, buffer);
2005 #ifdef _LP64
2006 movslq(rscratch1, queue_index);
2007 addq(tmp2, rscratch1);
2008 movq(Address(tmp2, 0), card_addr);
2009 #else
2010 addl(tmp2, queue_index);
2011 movl(Address(tmp2, 0), card_index);
2012 #endif
2013 jmp(done);
2015 bind(runtime);
2016 // save the live input values
2017 push(store_addr);
2018 push(new_val);
2019 #ifdef _LP64
2020 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
2021 #else
2022 push(thread);
2023 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
2024 pop(thread);
2025 #endif
2026 pop(new_val);
2027 pop(store_addr);
2029 bind(done);
2030 */
2031 }
2033 #endif // SERIALGC
2034 //////////////////////////////////////////////////////////////////////////////////
2037 void MacroAssembler::store_check(Register obj) {
2038 // Does a store check for the oop in register obj. The content of
2039 // register obj is destroyed afterwards.
2040 store_check_part_1(obj);
2041 store_check_part_2(obj);
2042 }
2044 void MacroAssembler::store_check(Register obj, Address dst) {
2045 store_check(obj);
2046 }
2049 // split the store check operation so that other instructions can be scheduled inbetween
2050 void MacroAssembler::store_check_part_1(Register obj) {
2051 BarrierSet* bs = Universe::heap()->barrier_set();
2052 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
2053 #ifdef _LP64
2054 dsrl(obj, obj, CardTableModRefBS::card_shift);
2055 #else
2056 shr(obj, CardTableModRefBS::card_shift);
2057 #endif
2058 }
2060 void MacroAssembler::store_check_part_2(Register obj) {
2061 BarrierSet* bs = Universe::heap()->barrier_set();
2062 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
2063 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
2064 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
2066 li(AT, (long)ct->byte_map_base);
2067 #ifdef _LP64
2068 dadd(AT, AT, obj);
2069 #else
2070 add(AT, AT, obj);
2071 #endif
2072 sb(R0, AT, 0);
2073 sync();
2074 }
2076 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
2077 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
2078 Register t1, Register t2, Label& slow_case) {
2079 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
2081 Register end = t2;
2082 #ifndef OPT_THREAD
2083 Register thread = t1;
2084 get_thread(thread);
2085 #else
2086 Register thread = TREG;
2087 #endif
2088 verify_tlab(t1, t2);//blows t1&t2
2090 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
2092 if (var_size_in_bytes == NOREG) {
2093 // i dont think we need move con_size_in_bytes to a register first.
2094 // by yjl 8/17/2005
2095 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
2096 addi(end, obj, con_size_in_bytes);
2097 } else {
2098 add(end, obj, var_size_in_bytes);
2099 }
2101 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
2102 sltu(AT, AT, end);
2103 bne_far(AT, R0, slow_case);
2104 delayed()->nop();
2107 // update the tlab top pointer
2108 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
2110 // recover var_size_in_bytes if necessary
2111 /*if (var_size_in_bytes == end) {
2112 sub(var_size_in_bytes, end, obj);
2113 }*/
2115 verify_tlab(t1, t2);
2116 }
2118 // Defines obj, preserves var_size_in_bytes
2119 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
2120 Register t1, Register t2, Label& slow_case) {
2121 assert_different_registers(obj, var_size_in_bytes, t1, AT);
2122 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
2123 // No allocation in the shared eden.
2124 b_far(slow_case);
2125 delayed()->nop();
2126 } else {
2128 #ifndef _LP64
2129 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
2130 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
2131 #else
2132 Address heap_top(t1);
2133 li(t1, (long)Universe::heap()->top_addr());
2134 #endif
2135 ld_ptr(obj, heap_top);
2137 Register end = t2;
2138 Label retry;
2140 bind(retry);
2141 if (var_size_in_bytes == NOREG) {
2142 // i dont think we need move con_size_in_bytes to a register first.
2143 // by yjl 8/17/2005
2144 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
2145 addi(end, obj, con_size_in_bytes);
2146 } else {
2147 add(end, obj, var_size_in_bytes);
2148 }
2149 // if end < obj then we wrapped around => object too long => slow case
2150 sltu(AT, end, obj);
2151 bne_far(AT, R0, slow_case);
2152 delayed()->nop();
2154 //lui(AT, split_high((int)Universe::heap()->end_addr()));
2155 //lw(AT, AT, split_low((int)Universe::heap()->end_addr()));
2156 li(AT, (long)Universe::heap()->end_addr());
2157 sltu(AT, AT, end);
2158 bne_far(AT, R0, slow_case);
2159 delayed()->nop();
2160 // Compare obj with the top addr, and if still equal, store the new top addr in
2161 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
2162 // it otherwise. Use lock prefix for atomicity on MPs.
2163 if (os::is_MP()) {
2164 ///lock();
2165 }
2167 // if someone beat us on the allocation, try again, otherwise continue
2168 cmpxchg(end, heap_top, obj);
2169 beq_far(AT, R0, retry); //by yyq
2170 delayed()->nop();
2172 }
2173 }
2175 // C2 doesn't invoke this one.
2176 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
2177 Register top = T0;
2178 Register t1 = T1;
2179 /* Jin: tlab_refill() is called in
2181 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
2183 In generate_code_for(), T2 has been assigned as a register(length), which is used
2184 after calling tlab_refill();
2185 Therefore, tlab_refill() should not use T2.
2187 Source:
2189 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
2190 at java.lang.System.arraycopy(Native Method)
2191 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
2192 at sun.misc.Resource.getBytes(Resource.java:117)
2193 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
2194 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
2195 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
2196 */
2197 Register t2 = T9;
2198 Register t3 = T3;
2199 Register thread_reg = T8;
2200 Label do_refill, discard_tlab;
2201 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
2202 // No allocation in the shared eden.
2203 b(slow_case);
2204 delayed()->nop();
2205 }
2207 get_thread(thread_reg);
2209 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2210 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2212 // calculate amount of free space
2213 sub(t1, t1, top);
2214 shr(t1, LogHeapWordSize);
2216 // Retain tlab and allocate object in shared space if
2217 // the amount free in the tlab is too large to discard.
2218 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
2219 slt(AT, t2, t1);
2220 beq(AT, R0, discard_tlab);
2221 delayed()->nop();
2223 // Retain
2225 #ifndef _LP64
2226 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
2227 #else
2228 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
2229 #endif
2230 add(t2, t2, AT);
2231 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
2233 if (TLABStats) {
2234 // increment number of slow_allocations
2235 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
2236 addiu(AT, AT, 1);
2237 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
2238 }
2239 b(try_eden);
2240 delayed()->nop();
2242 bind(discard_tlab);
2243 if (TLABStats) {
2244 // increment number of refills
2245 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
2246 addi(AT, AT, 1);
2247 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
2248 // accumulate wastage -- t1 is amount free in tlab
2249 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
2250 add(AT, AT, t1);
2251 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
2252 }
2254 // if tlab is currently allocated (top or end != null) then
2255 // fill [top, end + alignment_reserve) with array object
2256 beq(top, R0, do_refill);
2257 delayed()->nop();
2259 // set up the mark word
2260 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
2261 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
2263 // set the length to the remaining space
2264 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
2265 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
2266 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
2267 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
2269 // set klass to intArrayKlass
2270 #ifndef _LP64
2271 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
2272 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
2273 #else
2274 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
2275 ld_ptr(t1, AT, 0);
2276 #endif
2277 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
2278 store_klass(top, t1);
2280 // refill the tlab with an eden allocation
2281 bind(do_refill);
2282 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2283 shl(t1, LogHeapWordSize);
2284 // add object_size ??
2285 eden_allocate(top, t1, 0, t2, t3, slow_case);
2287 // Check that t1 was preserved in eden_allocate.
2288 #ifdef ASSERT
2289 if (UseTLAB) {
2290 Label ok;
2291 assert_different_registers(thread_reg, t1);
2292 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2293 shl(AT, LogHeapWordSize);
2294 beq(AT, t1, ok);
2295 delayed()->nop();
2296 stop("assert(t1 != tlab size)");
2297 should_not_reach_here();
2299 bind(ok);
2300 }
2301 #endif
2302 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
2303 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2304 add(top, top, t1);
2305 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
2306 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2307 verify_tlab(t1, t2);
2308 b(retry);
2309 delayed()->nop();
2310 }
2312 static const double pi_4 = 0.7853981633974483;
2314 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
2315 // must get argument(a double) in F12/F13
2316 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
2317 //We need to preseve the register which maybe modified during the Call @Jerome
2318 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
2319 //save all modified register here
2320 // if (preserve_cpu_regs) {
2321 // }
2322 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
2323 pushad();
2324 //we should preserve the stack space before we call
2325 addi(SP, SP, -wordSize * 2);
2326 switch (trig){
2327 case 's' :
2328 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
2329 delayed()->nop();
2330 break;
2331 case 'c':
2332 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
2333 delayed()->nop();
2334 break;
2335 case 't':
2336 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
2337 delayed()->nop();
2338 break;
2339 default:assert (false, "bad intrinsic");
2340 break;
2342 }
2344 addi(SP, SP, wordSize * 2);
2345 popad();
2346 // if (preserve_cpu_regs) {
2347 // }
2348 }
2350 #ifdef _LP64
2351 void MacroAssembler::li(Register rd, long imm) {
2352 if (imm <= max_jint && imm >= min_jint) {
2353 li32(rd, (int)imm);
2354 } else if (julong(imm) <= 0xFFFFFFFF) {
2355 assert_not_delayed();
2356 // lui sign-extends, so we can't use that.
2357 ori(rd, R0, julong(imm) >> 16);
2358 dsll(rd, rd, 16);
2359 ori(rd, rd, split_low(imm));
2360 //aoqi_test
2361 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
2362 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2363 /* A 48-bit address */
2364 li48(rd, imm);
2365 } else {
2366 li64(rd, imm);
2367 }
2368 }
2369 #else
2370 void MacroAssembler::li(Register rd, long imm) {
2371 li32(rd, (int)imm);
2372 }
2373 #endif
2375 void MacroAssembler::li32(Register reg, int imm) {
2376 if (is_simm16(imm)) {
2377 /* Jin: for imm < 0, we should use addi instead of addiu.
2378 *
2379 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2380 *
2381 * 78 move [int:-1|I] [a0|I]
2382 * : daddi a0, zero, 0xffffffff (correct)
2383 * : daddiu a0, zero, 0xffffffff (incorrect)
2384 */
2385 if (imm >= 0)
2386 addiu(reg, R0, imm);
2387 else
2388 addi(reg, R0, imm);
2389 } else {
2390 lui(reg, split_low(imm >> 16));
2391 if (split_low(imm))
2392 ori(reg, reg, split_low(imm));
2393 }
2394 }
2396 #ifdef _LP64
2397 void MacroAssembler::set64(Register d, jlong value) {
2398 assert_not_delayed();
2400 int hi = (int)(value >> 32);
2401 int lo = (int)(value & ~0);
2403 if (value == lo) { // 32-bit integer
2404 if (is_simm16(value)) {
2405 daddiu(d, R0, value);
2406 } else {
2407 lui(d, split_low(value >> 16));
2408 if (split_low(value)) {
2409 ori(d, d, split_low(value));
2410 }
2411 }
2412 } else if (hi == 0) { // hardware zero-extends to upper 32
2413 ori(d, R0, julong(value) >> 16);
2414 dsll(d, d, 16);
2415 if (split_low(value)) {
2416 ori(d, d, split_low(value));
2417 }
2418 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2419 // 4 insts
2420 li48(d, value);
2421 } else { // li64
2422 // 6 insts
2423 li64(d, value);
2424 }
2425 }
2428 int MacroAssembler::insts_for_set64(jlong value) {
2429 int hi = (int)(value >> 32);
2430 int lo = (int)(value & ~0);
2432 int count = 0;
2434 if (value == lo) { // 32-bit integer
2435 if (is_simm16(value)) {
2436 //daddiu(d, R0, value);
2437 count++;
2438 } else {
2439 //lui(d, split_low(value >> 16));
2440 count++;
2441 if (split_low(value)) {
2442 //ori(d, d, split_low(value));
2443 count++;
2444 }
2445 }
2446 } else if (hi == 0) { // hardware zero-extends to upper 32
2447 //ori(d, R0, julong(value) >> 16);
2448 //dsll(d, d, 16);
2449 count += 2;
2450 if (split_low(value)) {
2451 //ori(d, d, split_low(value));
2452 count++;
2453 }
2454 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2455 // 4 insts
2456 //li48(d, value);
2457 count += 4;
2458 } else { // li64
2459 // 6 insts
2460 //li64(d, value);
2461 count += 6;
2462 }
2464 return count;
2465 }
2467 void MacroAssembler::patchable_set48(Register d, jlong value) {
2468 assert_not_delayed();
2470 int hi = (int)(value >> 32);
2471 int lo = (int)(value & ~0);
2473 int count = 0;
2475 if (value == lo) { // 32-bit integer
2476 if (is_simm16(value)) {
2477 daddiu(d, R0, value);
2478 count += 1;
2479 } else {
2480 lui(d, split_low(value >> 16));
2481 count += 1;
2482 if (split_low(value)) {
2483 ori(d, d, split_low(value));
2484 count += 1;
2485 }
2486 }
2487 } else if (hi == 0) { // hardware zero-extends to upper 32
2488 ori(d, R0, julong(value) >> 16);
2489 dsll(d, d, 16);
2490 count += 2;
2491 if (split_low(value)) {
2492 ori(d, d, split_low(value));
2493 count += 1;
2494 }
2495 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2496 // 4 insts
2497 li48(d, value);
2498 count += 4;
2499 } else { // li64
2500 tty->print_cr("value = 0x%x", value);
2501 guarantee(false, "Not supported yet !");
2502 }
2504 for (count; count < 4; count++) {
2505 nop();
2506 }
2507 }
2509 void MacroAssembler::patchable_set32(Register d, jlong value) {
2510 assert_not_delayed();
2512 int hi = (int)(value >> 32);
2513 int lo = (int)(value & ~0);
2515 int count = 0;
2517 if (value == lo) { // 32-bit integer
2518 if (is_simm16(value)) {
2519 daddiu(d, R0, value);
2520 count += 1;
2521 } else {
2522 lui(d, split_low(value >> 16));
2523 count += 1;
2524 if (split_low(value)) {
2525 ori(d, d, split_low(value));
2526 count += 1;
2527 }
2528 }
2529 } else if (hi == 0) { // hardware zero-extends to upper 32
2530 ori(d, R0, julong(value) >> 16);
2531 dsll(d, d, 16);
2532 count += 2;
2533 if (split_low(value)) {
2534 ori(d, d, split_low(value));
2535 count += 1;
2536 }
2537 } else {
2538 tty->print_cr("value = 0x%x", value);
2539 guarantee(false, "Not supported yet !");
2540 }
2542 for (count; count < 3; count++) {
2543 nop();
2544 }
2545 }
2547 void MacroAssembler::patchable_call32(Register d, jlong value) {
2548 assert_not_delayed();
2550 int hi = (int)(value >> 32);
2551 int lo = (int)(value & ~0);
2553 int count = 0;
2555 if (value == lo) { // 32-bit integer
2556 if (is_simm16(value)) {
2557 daddiu(d, R0, value);
2558 count += 1;
2559 } else {
2560 lui(d, split_low(value >> 16));
2561 count += 1;
2562 if (split_low(value)) {
2563 ori(d, d, split_low(value));
2564 count += 1;
2565 }
2566 }
2567 } else {
2568 tty->print_cr("value = 0x%x", value);
2569 guarantee(false, "Not supported yet !");
2570 }
2572 for (count; count < 2; count++) {
2573 nop();
2574 }
2575 }
2577 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2578 assert(UseCompressedClassPointers, "should only be used for compressed header");
2579 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2581 int klass_index = oop_recorder()->find_index(k);
2582 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2583 long narrowKlass = (long)Klass::encode_klass(k);
2585 relocate(rspec, Assembler::narrow_oop_operand);
2586 patchable_set48(dst, narrowKlass);
2587 }
2590 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2591 assert(UseCompressedOops, "should only be used for compressed header");
2592 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2594 int oop_index = oop_recorder()->find_index(obj);
2595 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2597 relocate(rspec, Assembler::narrow_oop_operand);
2598 patchable_set48(dst, oop_index);
2599 }
2601 void MacroAssembler::li64(Register rd, long imm) {
2602 assert_not_delayed();
2603 lui(rd, imm >> 48);
2604 ori(rd, rd, split_low(imm >> 32));
2605 dsll(rd, rd, 16);
2606 ori(rd, rd, split_low(imm >> 16));
2607 dsll(rd, rd, 16);
2608 ori(rd, rd, split_low(imm));
2609 }
2611 void MacroAssembler::li48(Register rd, long imm) {
2612 assert_not_delayed();
2613 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2614 lui(rd, imm >> 32);
2615 ori(rd, rd, split_low(imm >> 16));
2616 dsll(rd, rd, 16);
2617 ori(rd, rd, split_low(imm));
2618 }
2619 #endif
2620 // NOTE: i dont push eax as i486.
2621 // the x86 save eax for it use eax as the jump register
2622 void MacroAssembler::verify_oop(Register reg, const char* s) {
2623 /*
2624 if (!VerifyOops) return;
2626 // Pass register number to verify_oop_subroutine
2627 char* b = new char[strlen(s) + 50];
2628 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2629 push(rax); // save rax,
2630 push(reg); // pass register argument
2631 ExternalAddress buffer((address) b);
2632 // avoid using pushptr, as it modifies scratch registers
2633 // and our contract is not to modify anything
2634 movptr(rax, buffer.addr());
2635 push(rax);
2636 // call indirectly to solve generation ordering problem
2637 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2638 call(rax);
2639 */
2640 if (!VerifyOops) return;
2641 const char * b = NULL;
2642 stringStream ss;
2643 ss.print("verify_oop: %s: %s", reg->name(), s);
2644 b = code_string(ss.as_string());
2645 #ifdef _LP64
2646 pushad();
2647 move(A1, reg);
2648 li(A0, (long)b);
2649 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2650 ld(T9, AT, 0);
2651 jalr(T9);
2652 delayed()->nop();
2653 popad();
2654 #else
2655 // Pass register number to verify_oop_subroutine
2656 sw(T0, SP, - wordSize);
2657 sw(T1, SP, - 2*wordSize);
2658 sw(RA, SP, - 3*wordSize);
2659 sw(A0, SP ,- 4*wordSize);
2660 sw(A1, SP ,- 5*wordSize);
2661 sw(AT, SP ,- 6*wordSize);
2662 sw(T9, SP ,- 7*wordSize);
2663 addiu(SP, SP, - 7 * wordSize);
2664 move(A1, reg);
2665 li(A0, (long)b);
2666 // call indirectly to solve generation ordering problem
2667 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2668 lw(T9, AT, 0);
2669 jalr(T9);
2670 delayed()->nop();
2671 lw(T0, SP, 6* wordSize);
2672 lw(T1, SP, 5* wordSize);
2673 lw(RA, SP, 4* wordSize);
2674 lw(A0, SP, 3* wordSize);
2675 lw(A1, SP, 2* wordSize);
2676 lw(AT, SP, 1* wordSize);
2677 lw(T9, SP, 0* wordSize);
2678 addiu(SP, SP, 7 * wordSize);
2679 #endif
2680 }
2683 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2684 if (!VerifyOops) {
2685 nop();
2686 return;
2687 }
2688 // Pass register number to verify_oop_subroutine
2689 const char * b = NULL;
2690 stringStream ss;
2691 ss.print("verify_oop_addr: %s", s);
2692 b = code_string(ss.as_string());
2694 st_ptr(T0, SP, - wordSize);
2695 st_ptr(T1, SP, - 2*wordSize);
2696 st_ptr(RA, SP, - 3*wordSize);
2697 st_ptr(A0, SP, - 4*wordSize);
2698 st_ptr(A1, SP, - 5*wordSize);
2699 st_ptr(AT, SP, - 6*wordSize);
2700 st_ptr(T9, SP, - 7*wordSize);
2701 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2702 addiu(SP, SP, - 7 * wordSize);
2704 li(A0, (long)b);
2705 // call indirectly to solve generation ordering problem
2706 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2707 ld_ptr(T9, AT, 0);
2708 jalr(T9);
2709 delayed()->nop();
2710 ld_ptr(T0, SP, 6* wordSize);
2711 ld_ptr(T1, SP, 5* wordSize);
2712 ld_ptr(RA, SP, 4* wordSize);
2713 ld_ptr(A0, SP, 3* wordSize);
2714 ld_ptr(A1, SP, 2* wordSize);
2715 ld_ptr(AT, SP, 1* wordSize);
2716 ld_ptr(T9, SP, 0* wordSize);
2717 addiu(SP, SP, 7 * wordSize);
2718 }
2720 // used registers : T0, T1
2721 void MacroAssembler::verify_oop_subroutine() {
2722 // RA: ra
2723 // A0: char* error message
2724 // A1: oop object to verify
2726 Label exit, error;
2727 // increment counter
2728 li(T0, (long)StubRoutines::verify_oop_count_addr());
2729 lw(AT, T0, 0);
2730 #ifdef _LP64
2731 //FIXME, aoqi: rewrite addi, addu, etc in 64bits mode.
2732 daddi(AT, AT, 1);
2733 #else
2734 addi(AT, AT, 1);
2735 #endif
2736 sw(AT, T0, 0);
2738 // make sure object is 'reasonable'
2739 beq(A1, R0, exit); // if obj is NULL it is ok
2740 delayed()->nop();
2742 // Check if the oop is in the right area of memory
2743 //const int oop_mask = Universe::verify_oop_mask();
2744 //const int oop_bits = Universe::verify_oop_bits();
2745 const uintptr_t oop_mask = Universe::verify_oop_mask();
2746 const uintptr_t oop_bits = Universe::verify_oop_bits();
2747 li(AT, oop_mask);
2748 andr(T0, A1, AT);
2749 li(AT, oop_bits);
2750 bne(T0, AT, error);
2751 delayed()->nop();
2753 // make sure klass is 'reasonable'
2754 //add for compressedoops
2755 reinit_heapbase();
2756 //add for compressedoops
2757 load_klass(T0, A1);
2758 beq(T0, R0, error); // if klass is NULL it is broken
2759 delayed()->nop();
2760 #if 0
2761 //FIXME:wuhui.
2762 // Check if the klass is in the right area of memory
2763 //const int klass_mask = Universe::verify_klass_mask();
2764 //const int klass_bits = Universe::verify_klass_bits();
2765 const uintptr_t klass_mask = Universe::verify_klass_mask();
2766 const uintptr_t klass_bits = Universe::verify_klass_bits();
2768 li(AT, klass_mask);
2769 andr(T1, T0, AT);
2770 li(AT, klass_bits);
2771 bne(T1, AT, error);
2772 delayed()->nop();
2773 // make sure klass' klass is 'reasonable'
2774 //add for compressedoops
2775 load_klass(T0, T0);
2776 beq(T0, R0, error); // if klass' klass is NULL it is broken
2777 delayed()->nop();
2779 li(AT, klass_mask);
2780 andr(T1, T0, AT);
2781 li(AT, klass_bits);
2782 bne(T1, AT, error);
2783 delayed()->nop(); // if klass not in right area of memory it is broken too.
2784 #endif
2785 // return if everything seems ok
2786 bind(exit);
2788 jr(RA);
2789 delayed()->nop();
2791 // handle errors
2792 bind(error);
2793 pushad();
2794 #ifndef _LP64
2795 addi(SP, SP, (-1) * wordSize);
2796 #endif
2797 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2798 delayed()->nop();
2799 #ifndef _LP64
2800 addiu(SP, SP, 1 * wordSize);
2801 #endif
2802 popad();
2803 jr(RA);
2804 delayed()->nop();
2805 }
2807 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2808 #ifdef ASSERT
2809 assert_different_registers(t1, t2, AT);
2810 if (UseTLAB && VerifyOops) {
2811 Label next, ok;
2813 get_thread(t1);
2815 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2816 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2817 sltu(AT, t2, AT);
2818 beq(AT, R0, next);
2819 delayed()->nop();
2821 stop("assert(top >= start)");
2823 bind(next);
2824 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2825 sltu(AT, AT, t2);
2826 beq(AT, R0, ok);
2827 delayed()->nop();
2829 stop("assert(top <= end)");
2831 bind(ok);
2833 /*
2834 Label next, ok;
2835 Register t1 = rsi;
2836 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
2838 push(t1);
2839 NOT_LP64(push(thread_reg));
2840 NOT_LP64(get_thread(thread_reg));
2842 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
2843 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
2844 jcc(Assembler::aboveEqual, next);
2845 stop("assert(top >= start)");
2846 should_not_reach_here();
2848 bind(next);
2849 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
2850 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
2851 jcc(Assembler::aboveEqual, ok);
2852 stop("assert(top <= end)");
2853 should_not_reach_here();
2855 bind(ok);
2856 NOT_LP64(pop(thread_reg));
2857 pop(t1);
2858 */
2859 }
2860 #endif
2861 }
2862 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2863 Register tmp,
2864 int offset) {
2865 intptr_t value = *delayed_value_addr;
2866 if (value != 0)
2867 return RegisterOrConstant(value + offset);
2868 AddressLiteral a(delayed_value_addr);
2869 // load indirectly to solve generation ordering problem
2870 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2871 //ld(tmp, a);
2872 /* #ifdef ASSERT
2873 { Label L;
2874 testptr(tmp, tmp);
2875 if (WizardMode) {
2876 jcc(Assembler::notZero, L);
2877 char* buf = new char[40];
2878 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
2879 STOP(buf);
2880 } else {
2881 jccb(Assembler::notZero, L);
2882 hlt();
2883 }
2884 bind(L);
2885 }
2886 #endif*/
2887 if (offset != 0)
2888 daddi(tmp,tmp, offset);
2890 return RegisterOrConstant(tmp);
2891 }
2893 void MacroAssembler::hswap(Register reg) {
2894 //short
2895 //andi(reg, reg, 0xffff);
2896 srl(AT, reg, 8);
2897 sll(reg, reg, 24);
2898 sra(reg, reg, 16);
2899 orr(reg, reg, AT);
2900 }
2902 void MacroAssembler::huswap(Register reg) {
2903 #ifdef _LP64
2904 dsrl(AT, reg, 8);
2905 dsll(reg, reg, 24);
2906 dsrl(reg, reg, 16);
2907 orr(reg, reg, AT);
2908 andi(reg, reg, 0xffff);
2909 #else
2910 //andi(reg, reg, 0xffff);
2911 srl(AT, reg, 8);
2912 sll(reg, reg, 24);
2913 srl(reg, reg, 16);
2914 orr(reg, reg, AT);
2915 #endif
2916 }
2918 // something funny to do this will only one more register AT
2919 // 32 bits
2920 // by yjl 6/29/2005
2921 void MacroAssembler::swap(Register reg) {
2922 srl(AT, reg, 8);
2923 sll(reg, reg, 24);
2924 orr(reg, reg, AT);
2925 //reg : 4 1 2 3
2926 srl(AT, AT, 16);
2927 xorr(AT, AT, reg);
2928 andi(AT, AT, 0xff);
2929 //AT : 0 0 0 1^3);
2930 xorr(reg, reg, AT);
2931 //reg : 4 1 2 1
2932 sll(AT, AT, 16);
2933 xorr(reg, reg, AT);
2934 //reg : 4 3 2 1
2935 }
2937 #ifdef _LP64
2939 /* do 32-bit CAS using MIPS64 lld/scd
2941 Jin: cas_int should only compare 32-bits of the memory value.
2942 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2943 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2944 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2945 plus the high-32 bits or memory value, are stored togethor with SCD.
2947 Example:
2949 double d = 3.1415926;
2950 System.err.println("hello" + d);
2952 sun.misc.FloatingDecimal$1.<init>()
2953 |
2954 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2956 38 cas_int [a7a7|J] [a0|I] [a6|I]
2957 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2958 // a6: 0x4ab325aa
2960 again:
2961 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2963 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2964 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2965 0x00000055647f3c68: dsll32 t8, t8, 0
2966 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2967 0x00000055647f3c70: sll zero, zero, 0
2969 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2970 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2971 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2972 0x00000055647f3c80: and v1, a6, v1
2973 0x00000055647f3c84: or at, t8, v1
2974 0x00000055647f3c88: scd at, 0x0(a7)
2975 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2976 0x00000055647f3c90: sll zero, zero, 0
2977 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2978 0x00000055647f3c98: sll zero, zero, 0
2979 nequal:
2980 0x00000055647f45a4: dadd a0, t9, zero
2981 0x00000055647f45a8: dadd at, zero, zero
2982 done:
2983 */
2985 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2986 #if 0
2987 Label done, again, nequal;
2988 bind(again);
2990 sync();
2991 lld(AT, dest);
2993 /* T9: 32 bits, sign extended
2994 * V1: low 32 bits, sign unextended
2995 * T8: high 32 bits (may be another variables's space)
2996 */
2997 sll(T9, AT, 0); // Use 32-bit sll to extend bit 31
2998 dsrl32(T8, AT, 0);
2999 dsll32(T8, T8, 0);
3001 bne(T9, c_reg, nequal);
3002 delayed()->nop();
3004 ori(V1, R0, 0xFFFF);
3005 dsll(V1, V1, 16);
3006 ori(V1, V1, 0xFFFF);
3007 andr(V1, x_reg, V1);
3008 orr(AT, T8, V1);
3009 scd(AT, dest);
3010 beq(AT, R0, again);
3011 delayed()->nop();
3012 b(done);
3013 delayed()->nop();
3015 // not xchged
3016 bind(nequal);
3017 move(c_reg, T9);
3018 move(AT, R0);
3020 bind(done);
3021 #else
3023 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
3024 Label done, again, nequal;
3026 bind(again);
3028 if(!Use3A2000) sync();
3029 ll(AT, dest);
3030 bne(AT, c_reg, nequal);
3031 delayed()->nop();
3033 move(AT, x_reg);
3034 sc(AT, dest);
3035 beq(AT, R0, again);
3036 delayed()->nop();
3037 b(done);
3038 delayed()->nop();
3040 // not xchged
3041 bind(nequal);
3042 sync();
3043 move(c_reg, AT);
3044 move(AT, R0);
3046 bind(done);
3047 #endif
3048 }
3049 #endif // cmpxchg32
3051 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
3052 Label done, again, nequal;
3054 bind(again);
3055 #ifdef _LP64
3056 if(!Use3A2000) sync();
3057 lld(AT, dest);
3058 #else
3059 if(!Use3A2000) sync();
3060 ll(AT, dest);
3061 #endif
3062 bne(AT, c_reg, nequal);
3063 delayed()->nop();
3065 move(AT, x_reg);
3066 #ifdef _LP64
3067 scd(AT, dest);
3068 #else
3069 sc(AT, dest);
3070 #endif
3071 beq(AT, R0, again);
3072 delayed()->nop();
3073 b(done);
3074 delayed()->nop();
3076 // not xchged
3077 bind(nequal);
3078 sync();
3079 move(c_reg, AT);
3080 move(AT, R0);
3082 bind(done);
3083 }
3085 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
3086 Label done, again, nequal;
3088 Register x_reg = x_regLo;
3089 dsll32(x_regHi, x_regHi, 0);
3090 dsll32(x_regLo, x_regLo, 0);
3091 dsrl32(x_regLo, x_regLo, 0);
3092 orr(x_reg, x_regLo, x_regHi);
3094 Register c_reg = c_regLo;
3095 dsll32(c_regHi, c_regHi, 0);
3096 dsll32(c_regLo, c_regLo, 0);
3097 dsrl32(c_regLo, c_regLo, 0);
3098 orr(c_reg, c_regLo, c_regHi);
3100 bind(again);
3102 if(!Use3A2000) sync();
3103 lld(AT, dest);
3104 bne(AT, c_reg, nequal);
3105 delayed()->nop();
3107 //move(AT, x_reg);
3108 dadd(AT, x_reg, R0);
3109 scd(AT, dest);
3110 beq(AT, R0, again);
3111 delayed()->nop();
3112 b(done);
3113 delayed()->nop();
3115 // not xchged
3116 bind(nequal);
3117 sync();
3118 //move(c_reg, AT);
3119 //move(AT, R0);
3120 dadd(c_reg, AT, R0);
3121 dadd(AT, R0, R0);
3122 bind(done);
3123 }
3125 // be sure the three register is different
3126 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
3127 assert_different_registers(tmp, fs, ft);
3128 div_s(tmp, fs, ft);
3129 trunc_l_s(tmp, tmp);
3130 cvt_s_l(tmp, tmp);
3131 mul_s(tmp, tmp, ft);
3132 sub_s(fd, fs, tmp);
3133 }
3135 // be sure the three register is different
3136 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
3137 assert_different_registers(tmp, fs, ft);
3138 div_d(tmp, fs, ft);
3139 trunc_l_d(tmp, tmp);
3140 cvt_d_l(tmp, tmp);
3141 mul_d(tmp, tmp, ft);
3142 sub_d(fd, fs, tmp);
3143 }
3145 // Fast_Lock and Fast_Unlock used by C2
3147 // Because the transitions from emitted code to the runtime
3148 // monitorenter/exit helper stubs are so slow it's critical that
3149 // we inline both the stack-locking fast-path and the inflated fast path.
3150 //
3151 // See also: cmpFastLock and cmpFastUnlock.
3152 //
3153 // What follows is a specialized inline transliteration of the code
3154 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
3155 // another option would be to emit TrySlowEnter and TrySlowExit methods
3156 // at startup-time. These methods would accept arguments as
3157 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3158 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
3159 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3160 // In practice, however, the # of lock sites is bounded and is usually small.
3161 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3162 // if the processor uses simple bimodal branch predictors keyed by EIP
3163 // Since the helper routines would be called from multiple synchronization
3164 // sites.
3165 //
3166 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3167 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3168 // to those specialized methods. That'd give us a mostly platform-independent
3169 // implementation that the JITs could optimize and inline at their pleasure.
3170 // Done correctly, the only time we'd need to cross to native could would be
3171 // to park() or unpark() threads. We'd also need a few more unsafe operators
3172 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3173 // (b) explicit barriers or fence operations.
3174 //
3175 // TODO:
3176 //
3177 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3178 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3179 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
3180 // the lock operators would typically be faster than reifying Self.
3181 //
3182 // * Ideally I'd define the primitives as:
3183 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3184 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3185 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
3186 // Instead, we're stuck with a rather awkward and brittle register assignments below.
3187 // Furthermore the register assignments are overconstrained, possibly resulting in
3188 // sub-optimal code near the synchronization site.
3189 //
3190 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
3191 // Alternately, use a better sp-proximity test.
3192 //
3193 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3194 // Either one is sufficient to uniquely identify a thread.
3195 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3196 //
3197 // * Intrinsify notify() and notifyAll() for the common cases where the
3198 // object is locked by the calling thread but the waitlist is empty.
3199 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3200 //
3201 // * use jccb and jmpb instead of jcc and jmp to improve code density.
3202 // But beware of excessive branch density on AMD Opterons.
3203 //
3204 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3205 // or failure of the fast-path. If the fast-path fails then we pass
3206 // control to the slow-path, typically in C. In Fast_Lock and
3207 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3208 // will emit a conditional branch immediately after the node.
3209 // So we have branches to branches and lots of ICC.ZF games.
3210 // Instead, it might be better to have C2 pass a "FailureLabel"
3211 // into Fast_Lock and Fast_Unlock. In the case of success, control
3212 // will drop through the node. ICC.ZF is undefined at exit.
3213 // In the case of failure, the node will branch directly to the
3214 // FailureLabel
3217 // obj: object to lock
3218 // box: on-stack box address (displaced header location) - KILLED
3219 // rax,: tmp -- KILLED
3220 // scr: tmp -- KILLED
3221 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
3223 // Ensure the register assignents are disjoint
3224 guarantee (objReg != boxReg, "") ;
3225 guarantee (objReg != tmpReg, "") ;
3226 guarantee (objReg != scrReg, "") ;
3227 guarantee (boxReg != tmpReg, "") ;
3228 guarantee (boxReg != scrReg, "") ;
3231 block_comment("FastLock");
3232 /*
3233 move(AT, 0x0);
3234 return;
3235 */
3236 if (PrintBiasedLockingStatistics) {
3237 push(tmpReg);
3238 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
3239 pop(tmpReg);
3240 }
3242 if (EmitSync & 1) {
3243 // set box->dhw = unused_mark (3)
3244 // Force all sync thru slow-path: slow_enter() and slow_exit()
3245 move (AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3246 sd(AT, Address(boxReg, 0));
3247 move (AT, (int32_t)0) ; // Eflags.ZF = 0
3248 } else
3249 if (EmitSync & 2) {
3250 Label DONE_LABEL ;
3251 if (UseBiasedLocking) {
3252 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3253 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
3254 }
3256 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
3257 ori(tmpReg, tmpReg, 0x1);
3258 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
3260 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
3261 bne(AT, R0, DONE_LABEL);
3262 delayed()->nop();
3264 // Recursive locking
3265 dsubu(tmpReg, tmpReg, SP);
3266 li(AT, (7 - os::vm_page_size() ));
3267 andr(tmpReg, tmpReg, AT);
3268 sd(tmpReg, Address(boxReg, 0));
3269 bind(DONE_LABEL) ;
3270 } else {
3271 // Possible cases that we'll encounter in fast_lock
3272 // ------------------------------------------------
3273 // * Inflated
3274 // -- unlocked
3275 // -- Locked
3276 // = by self
3277 // = by other
3278 // * biased
3279 // -- by Self
3280 // -- by other
3281 // * neutral
3282 // * stack-locked
3283 // -- by self
3284 // = sp-proximity test hits
3285 // = sp-proximity test generates false-negative
3286 // -- by other
3287 //
3289 Label IsInflated, DONE_LABEL, PopDone ;
3291 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3292 // order to reduce the number of conditional branches in the most common cases.
3293 // Beware -- there's a subtle invariant that fetch of the markword
3294 // at [FETCH], below, will never observe a biased encoding (*101b).
3295 // If this invariant is not held we risk exclusion (safety) failure.
3296 if (UseBiasedLocking && !UseOptoBiasInlining) {
3297 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
3298 }
3300 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
3301 andi(AT, tmpReg, markOopDesc::monitor_value);
3302 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
3303 delayed()->nop();
3305 // Attempt stack-locking ...
3306 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
3307 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
3309 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
3311 if (PrintBiasedLockingStatistics) {
3312 Label L;
3313 beq(AT, R0, L);
3314 delayed()->nop();
3315 push(T0);
3316 push(T1);
3317 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
3318 pop(T1);
3319 pop(T0);
3320 bind(L);
3321 }
3322 bne(AT, R0, DONE_LABEL);
3323 delayed()->nop();
3325 // Recursive locking
3326 // The object is stack-locked: markword contains stack pointer to BasicLock.
3327 // Locked by current thread if difference with current SP is less than one page.
3328 dsubu(tmpReg, tmpReg, SP);
3329 li(AT, 7 - os::vm_page_size() );
3330 andr(tmpReg, tmpReg, AT);
3331 sd(tmpReg, Address(boxReg, 0));
3332 if (PrintBiasedLockingStatistics) {
3333 Label L;
3334 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
3335 bne(tmpReg, R0, L);
3336 delayed()->nop();
3337 push(T0);
3338 push(T1);
3339 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
3340 pop(T1);
3341 pop(T0);
3342 bind(L);
3343 }
3344 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
3346 b(DONE_LABEL) ;
3347 delayed()->nop();
3349 bind(IsInflated) ;
3351 // TODO: someday avoid the ST-before-CAS penalty by
3352 // relocating (deferring) the following ST.
3353 // We should also think about trying a CAS without having
3354 // fetched _owner. If the CAS is successful we may
3355 // avoid an RTO->RTS upgrade on the $line.
3356 // Without cast to int32_t a movptr will destroy r10 which is typically obj
3357 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3358 sd(AT, Address(boxReg, 0));
3360 move(boxReg, tmpReg) ;
3361 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3362 // AT = (tmpReg == 0) ? 1:0
3363 sltiu(AT, tmpReg, 1); /* Jin: AT = !tmpReg; */
3364 bne(tmpReg, R0, DONE_LABEL);
3365 delayed()->nop();
3367 // It's inflated and appears unlocke
3368 if (os::is_MP()) {
3369 //lock();
3370 }
3371 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
3372 // Intentional fall-through into DONE_LABEL ...
3375 // DONE_LABEL is a hot target - we'd really like to place it at the
3376 // start of cache line by padding with NOPs.
3377 // See the AMD and Intel software optimization manuals for the
3378 // most efficient "long" NOP encodings.
3379 // Unfortunately none of our alignment mechanisms suffice.
3380 bind(DONE_LABEL);
3382 // Avoid branch-to-branch on AMD processors
3383 // This appears to be superstition.
3384 if (EmitSync & 32) nop() ;
3387 // At DONE_LABEL the icc ZFlag is set as follows ...
3388 // Fast_Unlock uses the same protocol.
3389 // ZFlag == 1 -> Success
3390 // ZFlag == 0 -> Failure - force control through the slow-path
3391 }
3392 }
3394 // obj: object to unlock
3395 // box: box address (displaced header location), killed. Must be EAX.
3396 // rbx,: killed tmp; cannot be obj nor box.
3397 //
3398 // Some commentary on balanced locking:
3399 //
3400 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3401 // Methods that don't have provably balanced locking are forced to run in the
3402 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3403 // The interpreter provides two properties:
3404 // I1: At return-time the interpreter automatically and quietly unlocks any
3405 // objects acquired the current activation (frame). Recall that the
3406 // interpreter maintains an on-stack list of locks currently held by
3407 // a frame.
3408 // I2: If a method attempts to unlock an object that is not held by the
3409 // the frame the interpreter throws IMSX.
3410 //
3411 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3412 // B() doesn't have provably balanced locking so it runs in the interpreter.
3413 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3414 // is still locked by A().
3415 //
3416 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3417 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3418 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3419 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3421 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3423 guarantee (objReg != boxReg, "") ;
3424 guarantee (objReg != tmpReg, "") ;
3425 guarantee (boxReg != tmpReg, "") ;
3429 block_comment("FastUnlock");
3431 /*
3432 move(AT, 0x0);
3433 return;
3434 */
3436 if (EmitSync & 4) {
3437 // Disable - inhibit all inlining. Force control through the slow-path
3438 move(AT, R0);
3439 } else
3440 if (EmitSync & 8) {
3441 Label DONE_LABEL ;
3442 if (UseBiasedLocking) {
3443 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3444 }
3445 // classic stack-locking code ...
3446 ld(tmpReg, Address(boxReg, 0)) ;
3447 beq(tmpReg, R0, DONE_LABEL) ;
3448 move(AT, 0x1); // delay slot
3450 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3451 bind(DONE_LABEL);
3452 } else {
3453 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3455 // Critically, the biased locking test must have precedence over
3456 // and appear before the (box->dhw == 0) recursive stack-lock test.
3457 if (UseBiasedLocking && !UseOptoBiasInlining) {
3458 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3459 }
3461 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3462 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3463 delayed()->daddiu(AT, R0, 0x1);
3465 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3466 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3467 beq(AT, R0, Stacked) ; // Inflated?
3468 delayed()->nop();
3470 bind(Inflated) ;
3471 // It's inflated.
3472 // Despite our balanced locking property we still check that m->_owner == Self
3473 // as java routines or native JNI code called by this thread might
3474 // have released the lock.
3475 // Refer to the comments in synchronizer.cpp for how we might encode extra
3476 // state in _succ so we can avoid fetching EntryList|cxq.
3477 //
3478 // I'd like to add more cases in fast_lock() and fast_unlock() --
3479 // such as recursive enter and exit -- but we have to be wary of
3480 // I$ bloat, T$ effects and BP$ effects.
3481 //
3482 // If there's no contention try a 1-0 exit. That is, exit without
3483 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3484 // we detect and recover from the race that the 1-0 exit admits.
3485 //
3486 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3487 // before it STs null into _owner, releasing the lock. Updates
3488 // to data protected by the critical section must be visible before
3489 // we drop the lock (and thus before any other thread could acquire
3490 // the lock and observe the fields protected by the lock).
3491 // IA32's memory-model is SPO, so STs are ordered with respect to
3492 // each other and there's no need for an explicit barrier (fence).
3493 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3494 #ifndef OPT_THREAD
3495 get_thread (TREG) ;
3496 #endif
3498 // It's inflated
3499 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3500 xorr(boxReg, boxReg, TREG);
3502 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3503 orr(boxReg, boxReg, AT);
3505 move(AT, R0);
3506 bne(boxReg, R0, DONE_LABEL);
3507 delayed()->nop();
3509 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3510 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3511 orr(boxReg, boxReg, AT);
3513 move(AT, R0);
3514 bne(boxReg, R0, CheckSucc);
3515 delayed()->nop();
3517 sync();
3518 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3519 move(AT, 0x1);
3520 b(DONE_LABEL);
3521 delayed()->nop();
3524 if ((EmitSync & 65536) == 0) {
3525 Label LSuccess, LGoSlowPath ;
3526 bind (CheckSucc);
3527 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3528 beq(AT, R0, LGoSlowPath);
3529 delayed()->nop();
3531 // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3532 // the explicit ST;MEMBAR combination, but masm doesn't currently support
3533 // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc
3534 // are all faster when the write buffer is populated.
3535 xorr(boxReg, boxReg, boxReg);
3536 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3537 if (os::is_MP()) {
3538 // lock ();
3539 }
3540 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3541 bne(AT, R0, LSuccess);
3542 delayed()->nop();
3544 #ifndef OPT_THREAD
3545 get_thread (TREG) ;
3546 #endif
3547 move(boxReg, R0) ; // box is really EAX
3548 //if (os::is_MP()) { lock(); }
3549 cmpxchg(TREG, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
3550 beq(AT, R0, LSuccess);
3551 delayed()->nop();
3552 // Intentional fall-through into slow-path
3554 bind (LGoSlowPath);
3555 move(AT, R0);
3556 b(DONE_LABEL) ;
3557 delayed()->nop();
3560 bind (LSuccess);
3561 move(AT, 0);
3562 sltiu(AT, boxReg, 1) ; // set ICC.ZF=1 to indicate success
3563 b(DONE_LABEL) ;
3564 delayed()->nop();
3565 }
3567 bind (Stacked);
3568 ld(tmpReg, Address(boxReg, 0)) ;
3569 //if (os::is_MP()) { lock(); }
3570 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3572 if (EmitSync & 65536) {
3573 bind (CheckSucc);
3574 }
3576 bind(DONE_LABEL);
3578 // Avoid branch to branch on AMD processors
3579 if (EmitSync & 32768) { nop() ; }
3580 }
3581 }
3583 class ControlWord {
3584 public:
3585 int32_t _value;
3587 int rounding_control() const { return (_value >> 10) & 3 ; }
3588 int precision_control() const { return (_value >> 8) & 3 ; }
3589 bool precision() const { return ((_value >> 5) & 1) != 0; }
3590 bool underflow() const { return ((_value >> 4) & 1) != 0; }
3591 bool overflow() const { return ((_value >> 3) & 1) != 0; }
3592 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
3593 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
3594 bool invalid() const { return ((_value >> 0) & 1) != 0; }
3596 void print() const {
3597 // rounding control
3598 const char* rc;
3599 switch (rounding_control()) {
3600 case 0: rc = "round near"; break;
3601 case 1: rc = "round down"; break;
3602 case 2: rc = "round up "; break;
3603 case 3: rc = "chop "; break;
3604 };
3605 // precision control
3606 const char* pc;
3607 switch (precision_control()) {
3608 case 0: pc = "24 bits "; break;
3609 case 1: pc = "reserved"; break;
3610 case 2: pc = "53 bits "; break;
3611 case 3: pc = "64 bits "; break;
3612 };
3613 // flags
3614 char f[9];
3615 f[0] = ' ';
3616 f[1] = ' ';
3617 f[2] = (precision ()) ? 'P' : 'p';
3618 f[3] = (underflow ()) ? 'U' : 'u';
3619 f[4] = (overflow ()) ? 'O' : 'o';
3620 f[5] = (zero_divide ()) ? 'Z' : 'z';
3621 f[6] = (denormalized()) ? 'D' : 'd';
3622 f[7] = (invalid ()) ? 'I' : 'i';
3623 f[8] = '\x0';
3624 // output
3625 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
3626 }
3628 };
3630 class StatusWord {
3631 public:
3632 int32_t _value;
3634 bool busy() const { return ((_value >> 15) & 1) != 0; }
3635 bool C3() const { return ((_value >> 14) & 1) != 0; }
3636 bool C2() const { return ((_value >> 10) & 1) != 0; }
3637 bool C1() const { return ((_value >> 9) & 1) != 0; }
3638 bool C0() const { return ((_value >> 8) & 1) != 0; }
3639 int top() const { return (_value >> 11) & 7 ; }
3640 bool error_status() const { return ((_value >> 7) & 1) != 0; }
3641 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
3642 bool precision() const { return ((_value >> 5) & 1) != 0; }
3643 bool underflow() const { return ((_value >> 4) & 1) != 0; }
3644 bool overflow() const { return ((_value >> 3) & 1) != 0; }
3645 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
3646 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
3647 bool invalid() const { return ((_value >> 0) & 1) != 0; }
3649 void print() const {
3650 // condition codes
3651 char c[5];
3652 c[0] = (C3()) ? '3' : '-';
3653 c[1] = (C2()) ? '2' : '-';
3654 c[2] = (C1()) ? '1' : '-';
3655 c[3] = (C0()) ? '0' : '-';
3656 c[4] = '\x0';
3657 // flags
3658 char f[9];
3659 f[0] = (error_status()) ? 'E' : '-';
3660 f[1] = (stack_fault ()) ? 'S' : '-';
3661 f[2] = (precision ()) ? 'P' : '-';
3662 f[3] = (underflow ()) ? 'U' : '-';
3663 f[4] = (overflow ()) ? 'O' : '-';
3664 f[5] = (zero_divide ()) ? 'Z' : '-';
3665 f[6] = (denormalized()) ? 'D' : '-';
3666 f[7] = (invalid ()) ? 'I' : '-';
3667 f[8] = '\x0';
3668 // output
3669 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
3670 }
3672 };
3674 class TagWord {
3675 public:
3676 int32_t _value;
3678 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
3680 void print() const {
3681 printf("%04x", _value & 0xFFFF);
3682 }
3684 };
3686 class FPU_Register {
3687 public:
3688 int32_t _m0;
3689 int32_t _m1;
3690 int16_t _ex;
3692 bool is_indefinite() const {
3693 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
3694 }
3696 void print() const {
3697 char sign = (_ex < 0) ? '-' : '+';
3698 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
3699 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
3700 };
3702 };
3704 class FPU_State {
3705 public:
3706 enum {
3707 register_size = 10,
3708 number_of_registers = 8,
3709 register_mask = 7
3710 };
3712 ControlWord _control_word;
3713 StatusWord _status_word;
3714 TagWord _tag_word;
3715 int32_t _error_offset;
3716 int32_t _error_selector;
3717 int32_t _data_offset;
3718 int32_t _data_selector;
3719 int8_t _register[register_size * number_of_registers];
3721 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
3722 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
3724 const char* tag_as_string(int tag) const {
3725 switch (tag) {
3726 case 0: return "valid";
3727 case 1: return "zero";
3728 case 2: return "special";
3729 case 3: return "empty";
3730 }
3731 ShouldNotReachHere();
3732 return NULL;
3733 }
3735 void print() const {
3736 // print computation registers
3737 { int t = _status_word.top();
3738 for (int i = 0; i < number_of_registers; i++) {
3739 int j = (i - t) & register_mask;
3740 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
3741 st(j)->print();
3742 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
3743 }
3744 }
3745 printf("\n");
3746 // print control registers
3747 printf("ctrl = "); _control_word.print(); printf("\n");
3748 printf("stat = "); _status_word .print(); printf("\n");
3749 printf("tags = "); _tag_word .print(); printf("\n");
3750 }
3752 };
3754 class Flag_Register {
3755 public:
3756 int32_t _value;
3758 bool overflow() const { return ((_value >> 11) & 1) != 0; }
3759 bool direction() const { return ((_value >> 10) & 1) != 0; }
3760 bool sign() const { return ((_value >> 7) & 1) != 0; }
3761 bool zero() const { return ((_value >> 6) & 1) != 0; }
3762 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
3763 bool parity() const { return ((_value >> 2) & 1) != 0; }
3764 bool carry() const { return ((_value >> 0) & 1) != 0; }
3766 void print() const {
3767 // flags
3768 char f[8];
3769 f[0] = (overflow ()) ? 'O' : '-';
3770 f[1] = (direction ()) ? 'D' : '-';
3771 f[2] = (sign ()) ? 'S' : '-';
3772 f[3] = (zero ()) ? 'Z' : '-';
3773 f[4] = (auxiliary_carry()) ? 'A' : '-';
3774 f[5] = (parity ()) ? 'P' : '-';
3775 f[6] = (carry ()) ? 'C' : '-';
3776 f[7] = '\x0';
3777 // output
3778 printf("%08x flags = %s", _value, f);
3779 }
3781 };
3783 class IU_Register {
3784 public:
3785 int32_t _value;
3787 void print() const {
3788 printf("%08x %11d", _value, _value);
3789 }
3791 };
3793 class IU_State {
3794 public:
3795 Flag_Register _eflags;
3796 IU_Register _rdi;
3797 IU_Register _rsi;
3798 IU_Register _rbp;
3799 IU_Register _rsp;
3800 IU_Register _rbx;
3801 IU_Register _rdx;
3802 IU_Register _rcx;
3803 IU_Register _rax;
3805 void print() const {
3806 // computation registers
3807 printf("rax, = "); _rax.print(); printf("\n");
3808 printf("rbx, = "); _rbx.print(); printf("\n");
3809 printf("rcx = "); _rcx.print(); printf("\n");
3810 printf("rdx = "); _rdx.print(); printf("\n");
3811 printf("rdi = "); _rdi.print(); printf("\n");
3812 printf("rsi = "); _rsi.print(); printf("\n");
3813 printf("rbp, = "); _rbp.print(); printf("\n");
3814 printf("rsp = "); _rsp.print(); printf("\n");
3815 printf("\n");
3816 // control registers
3817 printf("flgs = "); _eflags.print(); printf("\n");
3818 }
3819 };
3822 class CPU_State {
3823 public:
3824 FPU_State _fpu_state;
3825 IU_State _iu_state;
3827 void print() const {
3828 printf("--------------------------------------------------\n");
3829 _iu_state .print();
3830 printf("\n");
3831 _fpu_state.print();
3832 printf("--------------------------------------------------\n");
3833 }
3835 };
3838 /*
3839 static void _print_CPU_state(CPU_State* state) {
3840 state->print();
3841 };
3843 void MacroAssembler::print_CPU_state() {
3844 push_CPU_state();
3845 push(rsp); // pass CPU state
3846 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
3847 addptr(rsp, wordSize); // discard argument
3848 pop_CPU_state();
3849 }
3850 */
3852 void MacroAssembler::align(int modulus) {
3853 while (offset() % modulus != 0) nop();
3854 }
3857 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3858 //FIXME aoqi
3859 // %%%%% need to implement this
3860 //Unimplemented();
3861 /*
3862 if (!VerifyFPU) return;
3863 push_CPU_state();
3864 push(rsp); // pass CPU state
3865 ExternalAddress msg((address) s);
3866 // pass message string s
3867 pushptr(msg.addr());
3868 push(stack_depth); // pass stack depth
3869 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
3870 addptr(rsp, 3 * wordSize); // discard arguments
3871 // check for error
3872 { Label L;
3873 testl(rax, rax);
3874 jcc(Assembler::notZero, L);
3875 int3(); // break if error condition
3876 bind(L);
3877 }
3878 pop_CPU_state();
3879 */
3880 }
3882 #ifdef _LP64
3883 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3885 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3886 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3887 #else
3888 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3890 Register caller_saved_fpu_registers[] = {};
3891 #endif
3893 //We preserve all caller-saved register
3894 void MacroAssembler::pushad(){
3895 int i;
3897 /* Fixed-point registers */
3898 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3899 daddi(SP, SP, -1 * len * wordSize);
3900 for (i = 0; i < len; i++)
3901 {
3902 #ifdef _LP64
3903 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3904 #else
3905 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3906 #endif
3907 }
3909 /* Floating-point registers */
3910 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3911 daddi(SP, SP, -1 * len * wordSize);
3912 for (i = 0; i < len; i++)
3913 {
3914 #ifdef _LP64
3915 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3916 #else
3917 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3918 #endif
3919 }
3920 };
3922 void MacroAssembler::popad(){
3923 int i;
3925 /* Floating-point registers */
3926 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3927 for (i = 0; i < len; i++)
3928 {
3929 #ifdef _LP64
3930 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3931 #else
3932 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3933 #endif
3934 }
3935 daddi(SP, SP, len * wordSize);
3937 /* Fixed-point registers */
3938 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3939 for (i = 0; i < len; i++)
3940 {
3941 #ifdef _LP64
3942 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3943 #else
3944 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3945 #endif
3946 }
3947 daddi(SP, SP, len * wordSize);
3948 };
3950 void MacroAssembler::push2(Register reg1, Register reg2) {
3951 #ifdef _LP64
3952 daddi(SP, SP, -16);
3953 sd(reg2, SP, 0);
3954 sd(reg1, SP, 8);
3955 #else
3956 addi(SP, SP, -8);
3957 sw(reg2, SP, 0);
3958 sw(reg1, SP, 4);
3959 #endif
3960 }
3962 void MacroAssembler::pop2(Register reg1, Register reg2) {
3963 #ifdef _LP64
3964 ld(reg1, SP, 0);
3965 ld(reg2, SP, 8);
3966 daddi(SP, SP, 16);
3967 #else
3968 lw(reg1, SP, 0);
3969 lw(reg2, SP, 4);
3970 addi(SP, SP, 8);
3971 #endif
3972 }
3974 //for UseCompressedOops Option
3975 void MacroAssembler::load_klass(Register dst, Register src) {
3976 #ifdef _LP64
3977 if(UseCompressedClassPointers){
3978 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3979 decode_klass_not_null(dst);
3980 } else
3981 #endif
3982 ld(dst, src, oopDesc::klass_offset_in_bytes());
3983 }
3985 void MacroAssembler::store_klass(Register dst, Register src) {
3986 #ifdef _LP64
3987 if(UseCompressedClassPointers){
3988 encode_klass_not_null(src);
3989 sw(src, dst, oopDesc::klass_offset_in_bytes());
3990 } else {
3991 #endif
3992 sd(src, dst, oopDesc::klass_offset_in_bytes());
3993 }
3994 }
3996 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3997 load_klass(dst, src);
3998 ld(dst, Address(dst, Klass::prototype_header_offset()));
3999 }
4001 #ifdef _LP64
4002 void MacroAssembler::store_klass_gap(Register dst, Register src) {
4003 if (UseCompressedClassPointers) {
4004 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
4005 }
4006 }
4008 void MacroAssembler::load_heap_oop(Register dst, Address src) {
4009 if(UseCompressedOops){
4010 lwu(dst, src);
4011 decode_heap_oop(dst);
4012 } else{
4013 ld(dst, src);
4014 }
4015 }
4017 void MacroAssembler::store_heap_oop(Address dst, Register src){
4018 if(UseCompressedOops){
4019 assert(!dst.uses(src), "not enough registers");
4020 encode_heap_oop(src);
4021 sw(src, dst);
4022 } else{
4023 sd(src, dst);
4024 }
4025 }
4027 #ifdef ASSERT
4028 void MacroAssembler::verify_heapbase(const char* msg) {
4029 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
4030 assert (Universe::heap() != NULL, "java heap should be initialized");
4031 /* if (CheckCompressedOops) {
4032 Label ok;
4033 push(rscratch1); // cmpptr trashes rscratch1
4034 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
4035 jcc(Assembler::equal, ok);
4036 STOP(msg);
4037 bind(ok);
4038 pop(rscratch1);
4039 }*/
4040 }
4041 #endif
4044 // Algorithm must match oop.inline.hpp encode_heap_oop.
4045 void MacroAssembler::encode_heap_oop(Register r) {
4046 #ifdef ASSERT
4047 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
4048 #endif
4049 verify_oop(r, "broken oop in encode_heap_oop");
4050 if (Universe::narrow_oop_base() == NULL) {
4051 if (Universe::narrow_oop_shift() != 0) {
4052 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4053 shr(r, LogMinObjAlignmentInBytes);
4054 }
4055 return;
4056 }
4058 movz(r, S5_heapbase, r);
4059 dsub(r, r, S5_heapbase);
4060 if (Universe::narrow_oop_shift() != 0) {
4061 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4062 shr(r, LogMinObjAlignmentInBytes);
4063 }
4064 }
4066 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
4067 #ifdef ASSERT
4068 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
4069 #endif
4070 verify_oop(src, "broken oop in encode_heap_oop");
4071 if (Universe::narrow_oop_base() == NULL) {
4072 if (Universe::narrow_oop_shift() != 0) {
4073 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4074 dsrl(dst, src, LogMinObjAlignmentInBytes);
4075 } else {
4076 if (dst != src) move(dst, src);
4077 }
4078 } else {
4079 if (dst == src) {
4080 movz(dst, S5_heapbase, dst);
4081 dsub(dst, dst, S5_heapbase);
4082 if (Universe::narrow_oop_shift() != 0) {
4083 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4084 shr(dst, LogMinObjAlignmentInBytes);
4085 }
4086 } else {
4087 dsub(dst, src, S5_heapbase);
4088 if (Universe::narrow_oop_shift() != 0) {
4089 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4090 shr(dst, LogMinObjAlignmentInBytes);
4091 }
4092 movz(dst, R0, src);
4093 }
4094 }
4095 }
4097 void MacroAssembler::encode_heap_oop_not_null(Register r) {
4098 assert (UseCompressedOops, "should be compressed");
4099 #ifdef ASSERT
4100 if (CheckCompressedOops) {
4101 Label ok;
4102 bne(r, R0, ok);
4103 delayed()->nop();
4104 stop("null oop passed to encode_heap_oop_not_null");
4105 bind(ok);
4106 }
4107 #endif
4108 verify_oop(r, "broken oop in encode_heap_oop_not_null");
4109 if (Universe::narrow_oop_base() != NULL) {
4110 dsub(r, r, S5_heapbase);
4111 }
4112 if (Universe::narrow_oop_shift() != 0) {
4113 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4114 shr(r, LogMinObjAlignmentInBytes);
4115 }
4117 }
4119 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
4120 assert (UseCompressedOops, "should be compressed");
4121 #ifdef ASSERT
4122 if (CheckCompressedOops) {
4123 Label ok;
4124 bne(src, R0, ok);
4125 delayed()->nop();
4126 stop("null oop passed to encode_heap_oop_not_null2");
4127 bind(ok);
4128 }
4129 #endif
4130 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
4132 if (Universe::narrow_oop_base() != NULL) {
4133 dsub(dst, src, S5_heapbase);
4134 if (Universe::narrow_oop_shift() != 0) {
4135 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4136 shr(dst, LogMinObjAlignmentInBytes);
4137 }
4138 } else {
4139 if (Universe::narrow_oop_shift() != 0) {
4140 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4141 dsrl(dst, src, LogMinObjAlignmentInBytes);
4142 } else {
4143 if (dst != src) move(dst, src);
4144 }
4145 }
4146 }
4148 void MacroAssembler::decode_heap_oop(Register r) {
4149 #ifdef ASSERT
4150 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
4151 #endif
4152 if (Universe::narrow_oop_base() == NULL) {
4153 if (Universe::narrow_oop_shift() != 0) {
4154 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4155 shl(r, LogMinObjAlignmentInBytes);
4156 }
4157 } else {
4158 move(AT, r);
4159 if (Universe::narrow_oop_shift() != 0) {
4160 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4161 shl(r, LogMinObjAlignmentInBytes);
4162 }
4163 dadd(r, r, S5_heapbase);
4164 movz(r, R0, AT);
4165 }
4166 verify_oop(r, "broken oop in decode_heap_oop");
4167 }
4169 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
4170 #ifdef ASSERT
4171 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
4172 #endif
4173 if (Universe::narrow_oop_base() == NULL) {
4174 if (Universe::narrow_oop_shift() != 0) {
4175 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4176 dsll(dst, src, LogMinObjAlignmentInBytes);
4177 } else {
4178 if (dst != src) move(dst, src);
4179 }
4180 } else {
4181 if (dst == src) {
4182 move(AT, dst);
4183 if (Universe::narrow_oop_shift() != 0) {
4184 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4185 shl(dst, LogMinObjAlignmentInBytes);
4186 }
4187 dadd(dst, dst, S5_heapbase);
4188 movz(dst, R0, AT);
4189 } else {
4190 if (Universe::narrow_oop_shift() != 0) {
4191 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4192 dsll(dst, src, LogMinObjAlignmentInBytes);
4193 daddu(dst, dst, S5_heapbase);
4194 } else {
4195 daddu(dst, src, S5_heapbase);
4196 }
4197 movz(dst, R0, src);
4198 }
4199 }
4200 verify_oop(dst, "broken oop in decode_heap_oop");
4201 }
4203 void MacroAssembler::decode_heap_oop_not_null(Register r) {
4204 // Note: it will change flags
4205 assert (UseCompressedOops, "should only be used for compressed headers");
4206 assert (Universe::heap() != NULL, "java heap should be initialized");
4207 // Cannot assert, unverified entry point counts instructions (see .ad file)
4208 // vtableStubs also counts instructions in pd_code_size_limit.
4209 // Also do not verify_oop as this is called by verify_oop.
4210 if (Universe::narrow_oop_shift() != 0) {
4211 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4212 shl(r, LogMinObjAlignmentInBytes);
4213 if (Universe::narrow_oop_base() != NULL) {
4214 daddu(r, r, S5_heapbase);
4215 }
4216 } else {
4217 assert (Universe::narrow_oop_base() == NULL, "sanity");
4218 }
4219 }
4221 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
4222 assert (UseCompressedOops, "should only be used for compressed headers");
4223 assert (Universe::heap() != NULL, "java heap should be initialized");
4225 // Cannot assert, unverified entry point counts instructions (see .ad file)
4226 // vtableStubs also counts instructions in pd_code_size_limit.
4227 // Also do not verify_oop as this is called by verify_oop.
4228 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
4229 if (Universe::narrow_oop_shift() != 0) {
4230 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4231 if (LogMinObjAlignmentInBytes == Address::times_8) {
4232 dsll(dst, src, LogMinObjAlignmentInBytes);
4233 daddu(dst, dst, S5_heapbase);
4234 } else {
4235 dsll(dst, src, LogMinObjAlignmentInBytes);
4236 if (Universe::narrow_oop_base() != NULL) {
4237 daddu(dst, dst, S5_heapbase);
4238 }
4239 }
4240 } else {
4241 assert (Universe::narrow_oop_base() == NULL, "sanity");
4242 if (dst != src) {
4243 move(dst, src);
4244 }
4245 }
4246 }
4248 void MacroAssembler::encode_klass_not_null(Register r) {
4249 if (Universe::narrow_klass_base() != NULL) {
4250 assert(r != AT, "Encoding a klass in AT");
4251 set64(AT, (int64_t)Universe::narrow_klass_base());
4252 dsub(r, r, AT);
4253 }
4254 if (Universe::narrow_klass_shift() != 0) {
4255 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4256 shr(r, LogKlassAlignmentInBytes);
4257 }
4258 // Not neccessary for MIPS at all.
4259 //if (Universe::narrow_klass_base() != NULL) {
4260 // reinit_heapbase();
4261 //}
4262 }
4264 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
4265 if (dst == src) {
4266 encode_klass_not_null(src);
4267 } else {
4268 if (Universe::narrow_klass_base() != NULL) {
4269 set64(dst, (int64_t)Universe::narrow_klass_base());
4270 dsub(dst, src, dst);
4271 if (Universe::narrow_klass_shift() != 0) {
4272 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4273 shr(dst, LogKlassAlignmentInBytes);
4274 }
4275 } else {
4276 if (Universe::narrow_klass_shift() != 0) {
4277 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4278 dsrl(dst, src, LogKlassAlignmentInBytes);
4279 } else {
4280 move(dst, src);
4281 }
4282 }
4283 }
4284 }
4286 // Function instr_size_for_decode_klass_not_null() counts the instructions
4287 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
4288 // when (Universe::heap() != NULL). Hence, if the instructions they
4289 // generate change, then this method needs to be updated.
4290 int MacroAssembler::instr_size_for_decode_klass_not_null() {
4291 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
4292 if (Universe::narrow_klass_base() != NULL) {
4293 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
4294 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
4295 } else {
4296 // longest load decode klass function, mov64, leaq
4297 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
4298 }
4299 }
4301 void MacroAssembler::decode_klass_not_null(Register r) {
4302 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4303 assert(r != AT, "Decoding a klass in AT");
4304 // Cannot assert, unverified entry point counts instructions (see .ad file)
4305 // vtableStubs also counts instructions in pd_code_size_limit.
4306 // Also do not verify_oop as this is called by verify_oop.
4307 if (Universe::narrow_klass_shift() != 0) {
4308 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4309 shl(r, LogKlassAlignmentInBytes);
4310 }
4311 if (Universe::narrow_klass_base() != NULL) {
4312 set64(AT, (int64_t)Universe::narrow_klass_base());
4313 daddu(r, r, AT);
4314 //Not neccessary for MIPS at all.
4315 //reinit_heapbase();
4316 }
4317 }
4319 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4320 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4322 if (dst == src) {
4323 decode_klass_not_null(dst);
4324 } else {
4325 // Cannot assert, unverified entry point counts instructions (see .ad file)
4326 // vtableStubs also counts instructions in pd_code_size_limit.
4327 // Also do not verify_oop as this is called by verify_oop.
4328 set64(dst, (int64_t)Universe::narrow_klass_base());
4329 if (Universe::narrow_klass_shift() != 0) {
4330 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4331 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
4332 dsll(AT, src, Address::times_8);
4333 daddu(dst, dst, AT);
4334 } else {
4335 daddu(dst, src, dst);
4336 }
4337 }
4338 }
4340 void MacroAssembler::incrementl(Register reg, int value) {
4341 if (value == min_jint) {
4342 move(AT, value);
4343 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
4344 return;
4345 }
4346 if (value < 0) { decrementl(reg, -value); return; }
4347 if (value == 0) { ; return; }
4349 if(Assembler::is_simm16(value)) {
4350 NOT_LP64(addiu(reg, reg, value));
4351 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
4352 } else {
4353 move(AT, value);
4354 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
4355 }
4356 }
4358 void MacroAssembler::decrementl(Register reg, int value) {
4359 if (value == min_jint) {
4360 move(AT, value);
4361 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
4362 return;
4363 }
4364 if (value < 0) { incrementl(reg, -value); return; }
4365 if (value == 0) { ; return; }
4367 if(Assembler::is_simm16(value)) {
4368 NOT_LP64(addiu(reg, reg, -value));
4369 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
4370 } else {
4371 move(AT, value);
4372 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
4373 }
4374 }
4376 void MacroAssembler::reinit_heapbase() {
4377 if (UseCompressedOops || UseCompressedClassPointers) {
4378 if (Universe::heap() != NULL) {
4379 if (Universe::narrow_oop_base() == NULL) {
4380 move(S5_heapbase, R0);
4381 } else {
4382 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
4383 }
4384 } else {
4385 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
4386 ld(S5_heapbase, S5_heapbase, 0);
4387 }
4388 }
4389 }
4390 #endif // _LP64
4392 void MacroAssembler::check_klass_subtype(Register sub_klass,
4393 Register super_klass,
4394 Register temp_reg,
4395 Label& L_success) {
4396 //implement ind gen_subtype_check
4397 Label L_failure;
4398 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
4399 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
4400 bind(L_failure);
4401 }
4403 SkipIfEqual::SkipIfEqual(
4404 MacroAssembler* masm, const bool* flag_addr, bool value) {
4405 _masm = masm;
4406 _masm->li(AT, (address)flag_addr);
4407 _masm->lb(AT,AT,0);
4408 _masm->addi(AT,AT,-value);
4409 _masm->beq(AT,R0,_label);
4410 _masm->delayed()->nop();
4411 }
4412 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
4413 Register super_klass,
4414 Register temp_reg,
4415 Label* L_success,
4416 Label* L_failure,
4417 Label* L_slow_path,
4418 RegisterOrConstant super_check_offset) {
4419 assert_different_registers(sub_klass, super_klass, temp_reg);
4420 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
4421 if (super_check_offset.is_register()) {
4422 assert_different_registers(sub_klass, super_klass,
4423 super_check_offset.as_register());
4424 } else if (must_load_sco) {
4425 assert(temp_reg != noreg, "supply either a temp or a register offset");
4426 }
4428 Label L_fallthrough;
4429 int label_nulls = 0;
4430 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
4431 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
4432 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
4433 assert(label_nulls <= 1, "at most one NULL in the batch");
4435 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
4436 int sco_offset = in_bytes(Klass::super_check_offset_offset());
4437 // If the pointers are equal, we are done (e.g., String[] elements).
4438 // This self-check enables sharing of secondary supertype arrays among
4439 // non-primary types such as array-of-interface. Otherwise, each such
4440 // type would need its own customized SSA.
4441 // We move this check to the front of the fast path because many
4442 // type checks are in fact trivially successful in this manner,
4443 // so we get a nicely predicted branch right at the start of the check.
4444 //cmpptr(sub_klass, super_klass);
4445 //local_jcc(Assembler::equal, *L_success);
4446 beq(sub_klass, super_klass, *L_success);
4447 delayed()->nop();
4448 // Check the supertype display:
4449 if (must_load_sco) {
4450 // Positive movl does right thing on LP64.
4451 lwu(temp_reg, super_klass, sco_offset);
4452 super_check_offset = RegisterOrConstant(temp_reg);
4453 }
4454 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
4455 daddu(AT, sub_klass, AT);
4456 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
4458 // This check has worked decisively for primary supers.
4459 // Secondary supers are sought in the super_cache ('super_cache_addr').
4460 // (Secondary supers are interfaces and very deeply nested subtypes.)
4461 // This works in the same check above because of a tricky aliasing
4462 // between the super_cache and the primary super display elements.
4463 // (The 'super_check_addr' can address either, as the case requires.)
4464 // Note that the cache is updated below if it does not help us find
4465 // what we need immediately.
4466 // So if it was a primary super, we can just fail immediately.
4467 // Otherwise, it's the slow path for us (no success at this point).
4469 if (super_check_offset.is_register()) {
4470 beq(super_klass, AT, *L_success);
4471 delayed()->nop();
4472 addi(AT, super_check_offset.as_register(), -sc_offset);
4473 if (L_failure == &L_fallthrough) {
4474 beq(AT, R0, *L_slow_path);
4475 delayed()->nop();
4476 } else {
4477 bne(AT, R0, *L_failure);
4478 delayed()->nop();
4479 b(*L_slow_path);
4480 delayed()->nop();
4481 }
4482 } else if (super_check_offset.as_constant() == sc_offset) {
4483 // Need a slow path; fast failure is impossible.
4484 if (L_slow_path == &L_fallthrough) {
4485 beq(super_klass, AT, *L_success);
4486 delayed()->nop();
4487 } else {
4488 bne(super_klass, AT, *L_slow_path);
4489 delayed()->nop();
4490 b(*L_success);
4491 delayed()->nop();
4492 }
4493 } else {
4494 // No slow path; it's a fast decision.
4495 if (L_failure == &L_fallthrough) {
4496 beq(super_klass, AT, *L_success);
4497 delayed()->nop();
4498 } else {
4499 bne(super_klass, AT, *L_failure);
4500 delayed()->nop();
4501 b(*L_success);
4502 delayed()->nop();
4503 }
4504 }
4506 bind(L_fallthrough);
4508 }
4511 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
4512 Register super_klass,
4513 Register temp_reg,
4514 Register temp2_reg,
4515 Label* L_success,
4516 Label* L_failure,
4517 bool set_cond_codes) {
4518 assert_different_registers(sub_klass, super_klass, temp_reg);
4519 if (temp2_reg != noreg)
4520 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
4521 else
4522 temp2_reg = T9;
4523 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
4525 Label L_fallthrough;
4526 int label_nulls = 0;
4527 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
4528 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
4529 assert(label_nulls <= 1, "at most one NULL in the batch");
4531 // a couple of useful fields in sub_klass:
4532 int ss_offset = in_bytes(Klass::secondary_supers_offset());
4533 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
4534 Address secondary_supers_addr(sub_klass, ss_offset);
4535 Address super_cache_addr( sub_klass, sc_offset);
4537 // Do a linear scan of the secondary super-klass chain.
4538 // This code is rarely used, so simplicity is a virtue here.
4539 // The repne_scan instruction uses fixed registers, which we must spill.
4540 // Don't worry too much about pre-existing connections with the input regs.
4542 #if 0
4543 assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super)
4544 assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter)
4545 #endif
4547 // Get super_klass value into rax (even if it was in rdi or rcx).
4548 /*
4549 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
4550 if (super_klass != rax || UseCompressedOops) {
4551 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
4552 mov(rax, super_klass);
4553 }
4554 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
4555 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
4556 */
4557 #ifndef PRODUCT
4558 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
4559 ExternalAddress pst_counter_addr((address) pst_counter);
4560 NOT_LP64( incrementl(pst_counter_addr) );
4561 //LP64_ONLY( lea(rcx, pst_counter_addr) );
4562 //LP64_ONLY( incrementl(Address(rcx, 0)) );
4563 #endif //PRODUCT
4565 // We will consult the secondary-super array.
4566 ld(temp_reg, secondary_supers_addr);
4567 // Load the array length. (Positive movl does right thing on LP64.)
4568 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
4569 // Skip to start of data.
4570 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
4572 // Scan RCX words at [RDI] for an occurrence of RAX.
4573 // Set NZ/Z based on last compare.
4574 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
4575 // not change flags (only scas instruction which is repeated sets flags).
4576 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
4578 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
4579 Label Loop, subtype;
4580 bind(Loop);
4581 beq(temp2_reg, R0, *L_failure);
4582 delayed()->nop();
4583 ld(AT, temp_reg, 0);
4584 beq(AT, super_klass, subtype);
4585 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
4586 b(Loop);
4587 delayed()->daddi(temp2_reg, temp2_reg, -1);
4589 bind(subtype);
4590 sd(super_klass, super_cache_addr);
4591 if (L_success != &L_fallthrough) {
4592 b(*L_success);
4593 delayed()->nop();
4594 }
4596 /*
4597 if (set_cond_codes) {
4598 // Special hack for the AD files: rdi is guaranteed non-zero.
4599 assert(!pushed_rdi, "rdi must be left non-NULL");
4600 // Also, the condition codes are properly set Z/NZ on succeed/failure.
4601 }
4602 */
4603 // Success. Cache the super we found and proceed in triumph.
4604 #undef IS_A_TEMP
4606 bind(L_fallthrough);
4607 }
4608 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
4609 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
4610 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
4611 verify_oop(oop_result, "broken oop in call_VM_base");
4612 }
4614 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
4615 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
4616 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
4617 }
4619 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
4620 int extra_slot_offset) {
4621 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4622 int stackElementSize = Interpreter::stackElementSize;
4623 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4624 #ifdef ASSERT
4625 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4626 assert(offset1 - offset == stackElementSize, "correct arithmetic");
4627 #endif
4628 Register scale_reg = NOREG;
4629 Address::ScaleFactor scale_factor = Address::no_scale;
4630 if (arg_slot.is_constant()) {
4631 offset += arg_slot.as_constant() * stackElementSize;
4632 } else {
4633 scale_reg = arg_slot.as_register();
4634 scale_factor = Address::times_8;
4635 }
4636 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
4637 // offset += wordSize; // return PC is on stack
4638 if(scale_reg==NOREG) return Address(SP, offset);
4639 else {
4640 dsll(scale_reg, scale_reg, scale_factor);
4641 daddu(scale_reg, SP, scale_reg);
4642 return Address(scale_reg, offset);
4643 }
4644 }
4646 SkipIfEqual::~SkipIfEqual() {
4647 _masm->bind(_label);
4648 }
4650 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
4651 switch (size_in_bytes) {
4652 #ifndef _LP64
4653 case 8:
4654 assert(dst2 != noreg, "second dest register required");
4655 lw(dst, src);
4656 lw(dst2, src.plus_disp(BytesPerInt));
4657 break;
4658 #else
4659 case 8: ld(dst, src); break;
4660 #endif
4661 case 4: lw(dst, src); break;
4662 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
4663 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
4664 default: ShouldNotReachHere();
4665 }
4666 }
4668 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
4669 switch (size_in_bytes) {
4670 #ifndef _LP64
4671 case 8:
4672 assert(src2 != noreg, "second source register required");
4673 sw(src, dst);
4674 sw(src2, dst.plus_disp(BytesPerInt));
4675 break;
4676 #else
4677 case 8: sd(src, dst); break;
4678 #endif
4679 case 4: sw(src, dst); break;
4680 case 2: sh(src, dst); break;
4681 case 1: sb(src, dst); break;
4682 default: ShouldNotReachHere();
4683 }
4684 }
4686 // Look up the method for a megamorphic invokeinterface call.
4687 // The target method is determined by <intf_klass, itable_index>.
4688 // The receiver klass is in recv_klass.
4689 // On success, the result will be in method_result, and execution falls through.
4690 // On failure, execution transfers to the given label.
4691 void MacroAssembler::lookup_interface_method(Register recv_klass,
4692 Register intf_klass,
4693 RegisterOrConstant itable_index,
4694 Register method_result,
4695 Register scan_temp,
4696 Label& L_no_such_interface) {
4697 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
4698 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
4699 "caller must use same register for non-constant itable index as for method");
4701 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
4702 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
4703 int itentry_off = itableMethodEntry::method_offset_in_bytes();
4704 int scan_step = itableOffsetEntry::size() * wordSize;
4705 int vte_size = vtableEntry::size() * wordSize;
4706 Address::ScaleFactor times_vte_scale = Address::times_ptr;
4707 assert(vte_size == wordSize, "else adjust times_vte_scale");
4709 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
4711 // %%% Could store the aligned, prescaled offset in the klassoop.
4712 // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
4713 dsll(scan_temp, scan_temp, times_vte_scale);
4714 daddu(scan_temp, recv_klass, scan_temp);
4715 daddiu(scan_temp, scan_temp, vtable_base);
4716 if (HeapWordsPerLong > 1) {
4717 // Round up to align_object_offset boundary
4718 // see code for InstanceKlass::start_of_itable!
4719 round_to(scan_temp, BytesPerLong);
4720 }
4722 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
4723 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
4724 // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
4725 if (itable_index.is_constant()) {
4726 set64(AT, (int)itable_index.is_constant());
4727 dsll(AT, AT, (int)Address::times_ptr);
4728 } else {
4729 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
4730 }
4731 daddu(AT, AT, recv_klass);
4732 daddiu(recv_klass, AT, itentry_off);
4734 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
4735 // if (scan->interface() == intf) {
4736 // result = (klass + scan->offset() + itable_index);
4737 // }
4738 // }
4739 Label search, found_method;
4741 for (int peel = 1; peel >= 0; peel--) {
4742 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
4744 if (peel) {
4745 beq(intf_klass, method_result, found_method);
4746 nop();
4747 } else {
4748 bne(intf_klass, method_result, search);
4749 nop();
4750 // (invert the test to fall through to found_method...)
4751 }
4753 if (!peel) break;
4755 bind(search);
4757 // Check that the previous entry is non-null. A null entry means that
4758 // the receiver class doesn't implement the interface, and wasn't the
4759 // same as when the caller was compiled.
4760 beq(method_result, R0, L_no_such_interface);
4761 nop();
4762 daddiu(scan_temp, scan_temp, scan_step);
4763 }
4765 bind(found_method);
4767 // Got a hit.
4768 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
4769 //ld(method_result, Address(recv_klass, scan_temp, Address::times_1));
4770 if(UseLoongsonISA) {
4771 gsldx(method_result, recv_klass, scan_temp, 0);
4772 } else {
4773 daddu(AT, recv_klass, scan_temp);
4774 ld(method_result, AT);
4775 }
4776 }
4779 // virtual method calling
4780 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4781 RegisterOrConstant vtable_index,
4782 Register method_result) {
4783 Register tmp = GP;
4784 push(tmp);
4786 if (vtable_index.is_constant()) {
4787 assert_different_registers(recv_klass, method_result, tmp);
4788 } else {
4789 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4790 }
4791 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4792 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4793 /*
4794 Address vtable_entry_addr(recv_klass,
4795 vtable_index, Address::times_ptr,
4796 base + vtableEntry::method_offset_in_bytes());
4797 */
4798 if (vtable_index.is_constant()) {
4799 set64(AT, vtable_index.as_constant());
4800 dsll(AT, AT, (int)Address::times_ptr);
4801 } else {
4802 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4803 }
4804 set64(tmp, base + vtableEntry::method_offset_in_bytes());
4805 daddu(tmp, tmp, AT);
4806 daddu(tmp, tmp, recv_klass);
4807 ld(method_result, tmp, 0);
4809 pop(tmp);
4810 }