Fri, 31 Mar 2017 12:43:02 -0400
[C2] Optimize the oop/klass encoding and decoding (Follows a4946a9e94b0).
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/assembler.hpp"
28 #include "asm/assembler.inline.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "memory/cardTableModRefBS.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "prims/methodHandles.hpp"
34 #include "runtime/biasedLocking.hpp"
35 #include "runtime/interfaceSupport.hpp"
36 #include "runtime/objectMonitor.hpp"
37 #include "runtime/os.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubRoutines.hpp"
40 #ifndef SERIALGC
41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
43 #include "gc_implementation/g1/heapRegion.hpp"
44 #endif
45 #ifdef PRODUCT
46 #define BLOCK_COMMENT(str) /* nothing */
47 #define STOP(error) stop(error)
48 #else
49 #define BLOCK_COMMENT(str) block_comment(str)
50 #define STOP(error) block_comment(error); stop(error)
51 #endif
53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
55 intptr_t MacroAssembler::i[32] = {0};
56 float MacroAssembler::f[32] = {0.0};
58 void MacroAssembler::print(outputStream *s) {
59 unsigned int k;
60 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
61 s->print_cr("i%d = 0x%.16lx", k, i[k]);
62 }
63 s->cr();
65 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
66 s->print_cr("f%d = %f", k, f[k]);
67 }
68 s->cr();
69 }
72 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
73 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
75 void MacroAssembler::save_registers(MacroAssembler *masm) {
76 #define __ masm->
77 for(int k=0; k<32; k++) {
78 __ sw (as_Register(k), A0, i_offset(k));
79 }
81 for(int k=0; k<32; k++) {
82 __ swc1 (as_FloatRegister(k), A0, f_offset(k));
83 }
84 #undef __
85 }
87 void MacroAssembler::restore_registers(MacroAssembler *masm) {
88 #define __ masm->
89 for(int k=0; k<32; k++) {
90 __ lw (as_Register(k), A0, i_offset(k));
91 }
93 for(int k=0; k<32; k++) {
94 __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
95 }
96 #undef __
97 }
100 // Implementation of AddressLiteral
102 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
103 _is_lval = false;
104 _target = target;
105 _rspec = rspec_from_rtype(rtype, target);
106 }
108 // Implementation of Address
111 Address Address::make_array(ArrayAddress adr) {
112 AddressLiteral base = adr.base();
113 Address index = adr.index();
114 assert(index._disp == 0, "must not have disp"); // maybe it can?
115 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
116 array._rspec = base._rspec;
117 return array;
118 }
120 // exceedingly dangerous constructor
121 Address::Address(address loc, RelocationHolder spec) {
122 _base = noreg;
123 _index = noreg;
124 _scale = no_scale;
125 _disp = (intptr_t) loc;
126 _rspec = spec;
127 }
130 // Implementation of Assembler
131 const char *Assembler::ops_name[] = {
132 "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz",
133 "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui",
134 "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl",
135 "daddi", "daddiu", "ldl", "ldr", "", "", "", "",
136 "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu",
137 "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache",
138 "ll", "lwc1", "", "", "lld", "ldc1", "", "ld",
139 "sc", "swc1", "", "", "scd", "sdc1", "", "sd"
140 };
142 const char* Assembler::special_name[] = {
143 "sll", "", "srl", "sra", "sllv", "", "srlv", "srav",
144 "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync",
145 "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra",
146 "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu",
147 "add", "addu", "sub", "subu", "and", "or", "xor", "nor",
148 "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu",
149 "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "",
150 "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32"
151 };
153 const char* Assembler::cop1_name[] = {
154 "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg",
155 "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w",
156 "", "", "", "", "", "", "", "",
157 "", "", "", "", "", "", "", "",
158 "", "", "", "", "", "", "", "",
159 "", "", "", "", "", "", "", "",
160 "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule",
161 "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt"
162 };
164 const char* Assembler::cop1x_name[] = {
165 "lwxc1", "ldxc1", "", "", "", "luxc1", "", "",
166 "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx",
167 "", "", "", "", "", "", "alnv.ps", "",
168 "", "", "", "", "", "", "", "",
169 "madd.s", "madd.d", "", "", "", "", "madd.ps", "",
170 "msub.s", "msub.d", "", "", "", "", "msub.ps", "",
171 "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "",
172 "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", ""
173 };
175 const char* Assembler::special2_name[] = {
176 "madd", "", "mul", "", "msub", "", "", "",
177 "", "", "", "", "", "", "", "",
178 "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "",
179 "", "", "", "", "gsmod", "gsdmod", "", "",
180 "", "", "", "", "", "", "", "",
181 "", "", "", "", "", "", "", "",
182 "", "", "", "", "", "", "", "",
183 "", "", "", "", "", "", "", ""
184 };
186 const char* Assembler::special3_name[] = {
187 "ext", "", "", "", "ins", "dinsm", "dinsu", "dins",
188 "", "", "", "", "", "", "", "",
189 "", "", "", "", "", "", "", "",
190 "", "", "", "", "", "", "", "",
191 "bshfl", "", "", "", "", "", "", "",
192 "", "", "", "", "", "", "", "",
193 "", "", "", "", "", "", "", "",
194 "", "", "", "", "", "", "", "",
195 };
197 const char* Assembler::regimm_name[] = {
198 "bltz", "bgez", "bltzl", "bgezl", "", "", "", "",
199 "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "",
200 "bltzal", "bgezal", "bltzall", "bgezall"
201 };
203 const char* Assembler::gs_ldc2_name[] = {
204 "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1"
205 };
208 const char* Assembler::gs_lwc2_name[] = {
209 "", "", "", "", "", "", "", "",
210 "", "", "", "", "", "", "", "",
211 "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt",
212 "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
213 "gslq", ""
214 };
216 const char* Assembler::gs_sdc2_name[] = {
217 "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1"
218 };
220 const char* Assembler::gs_swc2_name[] = {
221 "", "", "", "", "", "", "", "",
222 "", "", "", "", "", "", "", "",
223 "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt",
224 "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1",
225 "gssq", ""
226 };
228 //misleading name, print only branch/jump instruction
229 void Assembler::print_instruction(int inst) {
230 const char *s;
231 switch( opcode(inst) ) {
232 default:
233 s = ops_name[opcode(inst)];
234 break;
235 case special_op:
236 s = special_name[special(inst)];
237 break;
238 case regimm_op:
239 s = special_name[rt(inst)];
240 break;
241 }
243 ::tty->print("%s", s);
244 }
246 void MacroAssembler::pd_patch_instruction(address branch, address target) {
247 jint& stub_inst = *(jint*) branch;
249 /* *
250 move(AT, RA); // dadd
251 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
252 nop();
253 lui(T9, 0); // to be patched
254 ori(T9, 0);
255 daddu(T9, T9, RA);
256 move(RA, AT);
257 jr(T9);
258 */
259 if(special(stub_inst) == dadd_op) {
260 jint *pc = (jint *)branch;
262 assert(opcode(pc[3]) == lui_op
263 && opcode(pc[4]) == ori_op
264 && special(pc[5]) == daddu_op, "Not a branch label patch");
265 if(!(opcode(pc[3]) == lui_op
266 && opcode(pc[4]) == ori_op
267 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
269 int offset = target - branch;
270 if (!is_simm16(offset))
271 {
272 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
273 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
274 }
275 else
276 {
277 /* revert to "beq + nop" */
278 CodeBuffer cb(branch, 4 * 10);
279 MacroAssembler masm(&cb);
280 #define __ masm.
281 __ b(target);
282 __ nop();
283 __ nop();
284 __ nop();
285 __ nop();
286 __ nop();
287 __ nop();
288 __ nop();
289 }
290 return;
291 }
293 #ifndef PRODUCT
294 if (!is_simm16((target - branch - 4) >> 2))
295 {
296 tty->print_cr("Illegal patching: target=0x%lx", target);
297 int *p = (int *)branch;
298 for (int i = -10; i < 10; i++)
299 {
300 tty->print("0x%lx, ", p[i]);
301 }
302 tty->print_cr("");
303 }
304 #endif
306 stub_inst = patched_branch(target - branch, stub_inst, 0);
307 }
309 int Assembler::is_int_mask(int x) {
310 int xx = x;
311 int count = 0;
313 while (x != 0) {
314 x &= (x - 1);
315 count++;
316 }
318 if ((1<<count) == (xx+1)) {
319 return count;
320 } else {
321 return -1;
322 }
323 }
325 int Assembler::is_jlong_mask(jlong x) {
326 jlong xx = x;
327 int count = 0;
329 while (x != 0) {
330 x &= (x - 1);
331 count++;
332 }
334 if ((1<<count) == (xx+1)) {
335 return count;
336 } else {
337 return -1;
338 }
339 }
341 //without check, maybe fixed
342 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
343 int v = (dest_pos - inst_pos - 4)>>2;
344 switch(opcode(inst)) {
345 case j_op:
346 case jal_op:
347 assert(false, "should not use j/jal here");
348 break;
349 default:
350 assert(is_simm16(v), "must be simm16");
351 #ifndef PRODUCT
352 if(!is_simm16(v))
353 {
354 tty->print_cr("must be simm16");
355 tty->print_cr("Inst: %lx", inst);
356 }
357 #endif
359 v = low16(v);
360 inst &= 0xffff0000;
361 break;
362 }
364 return inst | v;
365 }
367 int Assembler::branch_destination(int inst, int pos) {
368 int off;
370 switch(opcode(inst)) {
371 case j_op:
372 case jal_op:
373 assert(false, "should not use j/jal here");
374 break;
375 default:
376 off = expand(low16(inst), 15);
377 break;
378 }
380 return off ? pos + 4 + (off<<2) : 0;
381 }
383 int AbstractAssembler::code_fill_byte() {
384 return 0x00; // illegal instruction 0x00000000
385 }
387 // Now the Assembler instruction (identical for 32/64 bits)
389 void Assembler::lb(Register rt, Address src) {
390 lb(rt, src.base(), src.disp());
391 }
393 void Assembler::lbu(Register rt, Address src) {
394 lbu(rt, src.base(), src.disp());
395 }
397 void Assembler::ld(Register rt, Address src){
398 ld(rt, src.base(), src.disp());
399 }
401 void Assembler::ldl(Register rt, Address src){
402 ldl(rt, src.base(), src.disp());
403 }
405 void Assembler::ldr(Register rt, Address src){
406 ldr(rt, src.base(), src.disp());
407 }
409 void Assembler::lh(Register rt, Address src){
410 lh(rt, src.base(), src.disp());
411 }
413 void Assembler::lhu(Register rt, Address src){
414 lhu(rt, src.base(), src.disp());
415 }
417 void Assembler::ll(Register rt, Address src){
418 ll(rt, src.base(), src.disp());
419 }
421 void Assembler::lld(Register rt, Address src){
422 lld(rt, src.base(), src.disp());
423 }
425 void Assembler::lw(Register rt, Address src){
426 lw(rt, src.base(), src.disp());
427 }
428 void Assembler::lea(Register rt, Address src) {
429 #ifdef _LP64
430 daddi(rt, src.base(), src.disp());
431 #else
432 addi(rt, src.base(), src.disp());
433 #endif
434 }
436 void Assembler::lwl(Register rt, Address src){
437 lwl(rt, src.base(), src.disp());
438 }
440 void Assembler::lwr(Register rt, Address src){
441 lwr(rt, src.base(), src.disp());
442 }
444 void Assembler::lwu(Register rt, Address src){
445 lwu(rt, src.base(), src.disp());
446 }
448 void Assembler::sb(Register rt, Address dst) {
449 sb(rt, dst.base(), dst.disp());
450 }
452 void Assembler::sc(Register rt, Address dst) {
453 sc(rt, dst.base(), dst.disp());
454 }
456 void Assembler::scd(Register rt, Address dst) {
457 scd(rt, dst.base(), dst.disp());
458 }
460 void Assembler::sd(Register rt, Address dst) {
461 sd(rt, dst.base(), dst.disp());
462 }
464 void Assembler::sdl(Register rt, Address dst) {
465 sdl(rt, dst.base(), dst.disp());
466 }
468 void Assembler::sdr(Register rt, Address dst) {
469 sdr(rt, dst.base(), dst.disp());
470 }
472 void Assembler::sh(Register rt, Address dst) {
473 sh(rt, dst.base(), dst.disp());
474 }
476 void Assembler::sw(Register rt, Address dst) {
477 sw(rt, dst.base(), dst.disp());
478 }
480 void Assembler::swl(Register rt, Address dst) {
481 swl(rt, dst.base(), dst.disp());
482 }
484 void Assembler::swr(Register rt, Address dst) {
485 swr(rt, dst.base(), dst.disp());
486 }
488 void Assembler::lwc1(FloatRegister rt, Address src) {
489 lwc1(rt, src.base(), src.disp());
490 }
492 void Assembler::ldc1(FloatRegister rt, Address src) {
493 ldc1(rt, src.base(), src.disp());
494 }
496 void Assembler::swc1(FloatRegister rt, Address dst) {
497 swc1(rt, dst.base(), dst.disp());
498 }
500 void Assembler::sdc1(FloatRegister rt, Address dst) {
501 sdc1(rt, dst.base(), dst.disp());
502 }
504 void Assembler::j(address entry) {
505 #ifdef MIPS64
506 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
507 #else
508 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
509 #endif
510 emit_long((j_op<<26) | dest);
511 has_delay_slot();
512 }
514 void Assembler::jal(address entry) {
515 #ifdef MIPS64
516 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
517 #else
518 int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
519 #endif
520 emit_long((jal_op<<26) | dest);
521 has_delay_slot();
522 }
524 static inline address first_cache_address() {
525 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
526 }
528 static inline address last_cache_address() {
529 return CodeCache::high_bound() - Assembler::InstructionSize;
530 }
532 int MacroAssembler::call_size(address target, bool far, bool patchable) {
533 if (patchable) return 6 << Assembler::LogInstructionSize;
534 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
535 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
536 }
538 // Can we reach target using jal/j from anywhere
539 // in the code cache (because code can be relocated)?
540 bool MacroAssembler::reachable_from_cache(address target) {
541 address cl = first_cache_address();
542 address ch = last_cache_address();
544 return fit_in_jal(target, cl) && fit_in_jal(target, ch);
545 }
547 void MacroAssembler::general_jump(address target) {
548 if (reachable_from_cache(target)) {
549 j(target);
550 nop();
551 } else {
552 set64(T9, (long)target);
553 jr(T9);
554 nop();
555 }
556 }
558 void MacroAssembler::patchable_jump(address target) {
559 if (reachable_from_cache(target)) {
560 nop();
561 nop();
562 nop();
563 nop();
564 j(target);
565 nop();
566 } else {
567 patchable_set48(T9, (long)target);
568 jr(T9);
569 nop();
570 }
571 }
573 void MacroAssembler::general_call(address target) {
574 if (reachable_from_cache(target)) {
575 jal(target);
576 nop();
577 } else {
578 set64(T9, (long)target);
579 jalr(T9);
580 nop();
581 }
582 }
584 void MacroAssembler::patchable_call(address target) {
585 if (reachable_from_cache(target)) {
586 nop();
587 nop();
588 nop();
589 nop();
590 jal(target);
591 nop();
592 } else {
593 patchable_set48(T9, (long)target);
594 jalr(T9);
595 nop();
596 }
597 }
599 void MacroAssembler::beq_far(Register rs, Register rt, address entry)
600 {
601 u_char * cur_pc = pc();
603 /* Jin: Near/Far jump */
604 if(is_simm16((entry - pc() - 4) / 4))
605 {
606 Assembler::beq(rs, rt, offset(entry));
607 }
608 else
609 {
610 Label not_jump;
611 bne(rs, rt, not_jump);
612 delayed()->nop();
614 b_far(entry);
615 delayed()->nop();
617 bind(not_jump);
618 has_delay_slot();
619 }
620 }
622 void MacroAssembler::beq_far(Register rs, Register rt, Label& L)
623 {
624 if (L.is_bound()) {
625 beq_far(rs, rt, target(L));
626 } else {
627 u_char * cur_pc = pc();
628 Label not_jump;
629 bne(rs, rt, not_jump);
630 delayed()->nop();
632 b_far(L);
633 delayed()->nop();
635 bind(not_jump);
636 has_delay_slot();
637 }
638 }
640 void MacroAssembler::bne_far(Register rs, Register rt, address entry)
641 {
642 u_char * cur_pc = pc();
644 /* Jin: Near/Far jump */
645 if(is_simm16((entry - pc() - 4) / 4))
646 {
647 Assembler::bne(rs, rt, offset(entry));
648 }
649 else
650 {
651 Label not_jump;
652 beq(rs, rt, not_jump);
653 delayed()->nop();
655 b_far(entry);
656 delayed()->nop();
658 bind(not_jump);
659 has_delay_slot();
660 }
661 }
663 void MacroAssembler::bne_far(Register rs, Register rt, Label& L)
664 {
665 if (L.is_bound()) {
666 bne_far(rs, rt, target(L));
667 } else {
668 u_char * cur_pc = pc();
669 Label not_jump;
670 beq(rs, rt, not_jump);
671 delayed()->nop();
673 b_far(L);
674 delayed()->nop();
676 bind(not_jump);
677 has_delay_slot();
678 }
679 }
681 void MacroAssembler::b_far(Label& L)
682 {
683 if (L.is_bound()) {
684 b_far(target(L));
685 } else {
686 volatile address dest = target(L);
687 /*
688 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
689 0x00000055651ed514: dadd at, ra, zero
690 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
692 0x00000055651ed51c: sll zero, zero, 0
693 0x00000055651ed520: lui t9, 0x0
694 0x00000055651ed524: ori t9, t9, 0x21b8
695 0x00000055651ed528: daddu t9, t9, ra
696 0x00000055651ed52c: dadd ra, at, zero
697 0x00000055651ed530: jr t9
698 0x00000055651ed534: sll zero, zero, 0
699 */
700 move(AT, RA);
701 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
702 nop();
703 lui(T9, 0); // to be patched
704 ori(T9, T9, 0);
705 daddu(T9, T9, RA);
706 move(RA, AT);
707 jr(T9);
708 }
709 }
711 void MacroAssembler::b_far(address entry)
712 {
713 u_char * cur_pc = pc();
715 /* Jin: Near/Far jump */
716 if(is_simm16((entry - pc() - 4) / 4))
717 {
718 b(offset(entry));
719 }
720 else
721 {
722 /* address must be bounded */
723 move(AT, RA);
724 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
725 nop();
726 li32(T9, entry - pc());
727 daddu(T9, T9, RA);
728 move(RA, AT);
729 jr(T9);
730 }
731 }
733 // Implementation of MacroAssembler
735 // First all the versions that have distinct versions depending on 32/64 bit
736 // Unless the difference is trivial (1 line or so).
738 //#ifndef _LP64
740 // 32bit versions
742 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
743 addu_long(AT, base, offset);
744 ld_ptr(rt, 0, AT);
745 }
747 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
748 addu_long(AT, base, offset);
749 st_ptr(rt, 0, AT);
750 }
752 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
753 addu_long(AT, base, offset);
754 ld_long(rt, 0, AT);
755 }
757 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
758 addu_long(AT, base, offset);
759 st_long(rt, 0, AT);
760 }
762 Address MacroAssembler::as_Address(AddressLiteral adr) {
763 return Address(adr.target(), adr.rspec());
764 }
766 Address MacroAssembler::as_Address(ArrayAddress adr) {
767 return Address::make_array(adr);
768 }
770 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
771 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
772 Label again;
774 li(tmp_reg1, counter_addr);
775 bind(again);
776 if(!Use3A2000) sync();
777 ll(tmp_reg2, tmp_reg1, 0);
778 addi(tmp_reg2, tmp_reg2, inc);
779 sc(tmp_reg2, tmp_reg1, 0);
780 beq(tmp_reg2, R0, again);
781 delayed()->nop();
782 }
783 int MacroAssembler::biased_locking_enter(Register lock_reg,
784 Register obj_reg,
785 Register swap_reg,
786 Register tmp_reg,
787 bool swap_reg_contains_mark,
788 Label& done,
789 Label* slow_case,
790 BiasedLockingCounters* counters) {
791 assert(UseBiasedLocking, "why call this otherwise?");
792 bool need_tmp_reg = false;
793 if (tmp_reg == noreg) {
794 need_tmp_reg = true;
795 tmp_reg = T9;
796 }
797 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
798 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
799 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
800 Address saved_mark_addr(lock_reg, 0);
802 // Biased locking
803 // See whether the lock is currently biased toward our thread and
804 // whether the epoch is still valid
805 // Note that the runtime guarantees sufficient alignment of JavaThread
806 // pointers to allow age to be placed into low bits
807 // First check to see whether biasing is even enabled for this object
808 Label cas_label;
809 int null_check_offset = -1;
810 if (!swap_reg_contains_mark) {
811 null_check_offset = offset();
812 ld_ptr(swap_reg, mark_addr);
813 }
815 if (need_tmp_reg) {
816 push(tmp_reg);
817 }
818 move(tmp_reg, swap_reg);
819 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
820 #ifdef _LP64
821 daddi(AT, R0, markOopDesc::biased_lock_pattern);
822 dsub(AT, AT, tmp_reg);
823 #else
824 addi(AT, R0, markOopDesc::biased_lock_pattern);
825 sub(AT, AT, tmp_reg);
826 #endif
827 if (need_tmp_reg) {
828 pop(tmp_reg);
829 }
831 bne(AT, R0, cas_label);
832 delayed()->nop();
835 // The bias pattern is present in the object's header. Need to check
836 // whether the bias owner and the epoch are both still current.
837 // Note that because there is no current thread register on MIPS we
838 // need to store off the mark word we read out of the object to
839 // avoid reloading it and needing to recheck invariants below. This
840 // store is unfortunate but it makes the overall code shorter and
841 // simpler.
842 st_ptr(swap_reg, saved_mark_addr);
843 if (need_tmp_reg) {
844 push(tmp_reg);
845 }
846 if (swap_reg_contains_mark) {
847 null_check_offset = offset();
848 }
849 load_prototype_header(tmp_reg, obj_reg);
850 xorr(tmp_reg, tmp_reg, swap_reg);
851 get_thread(swap_reg);
852 xorr(swap_reg, swap_reg, tmp_reg);
854 move(AT, ~((int) markOopDesc::age_mask_in_place));
855 andr(swap_reg, swap_reg, AT);
857 if (PrintBiasedLockingStatistics) {
858 Label L;
859 bne(swap_reg, R0, L);
860 delayed()->nop();
861 push(tmp_reg);
862 push(A0);
863 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
864 pop(A0);
865 pop(tmp_reg);
866 bind(L);
867 }
868 if (need_tmp_reg) {
869 pop(tmp_reg);
870 }
871 beq(swap_reg, R0, done);
872 delayed()->nop();
873 Label try_revoke_bias;
874 Label try_rebias;
876 // At this point we know that the header has the bias pattern and
877 // that we are not the bias owner in the current epoch. We need to
878 // figure out more details about the state of the header in order to
879 // know what operations can be legally performed on the object's
880 // header.
882 // If the low three bits in the xor result aren't clear, that means
883 // the prototype header is no longer biased and we have to revoke
884 // the bias on this object.
886 move(AT, markOopDesc::biased_lock_mask_in_place);
887 andr(AT, swap_reg, AT);
888 bne(AT, R0, try_revoke_bias);
889 delayed()->nop();
890 // Biasing is still enabled for this data type. See whether the
891 // epoch of the current bias is still valid, meaning that the epoch
892 // bits of the mark word are equal to the epoch bits of the
893 // prototype header. (Note that the prototype header's epoch bits
894 // only change at a safepoint.) If not, attempt to rebias the object
895 // toward the current thread. Note that we must be absolutely sure
896 // that the current epoch is invalid in order to do this because
897 // otherwise the manipulations it performs on the mark word are
898 // illegal.
900 move(AT, markOopDesc::epoch_mask_in_place);
901 andr(AT,swap_reg, AT);
902 bne(AT, R0, try_rebias);
903 delayed()->nop();
904 // The epoch of the current bias is still valid but we know nothing
905 // about the owner; it might be set or it might be clear. Try to
906 // acquire the bias of the object using an atomic operation. If this
907 // fails we will go in to the runtime to revoke the object's bias.
908 // Note that we first construct the presumed unbiased header so we
909 // don't accidentally blow away another thread's valid bias.
911 ld_ptr(swap_reg, saved_mark_addr);
913 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
914 andr(swap_reg, swap_reg, AT);
916 if (need_tmp_reg) {
917 push(tmp_reg);
918 }
919 get_thread(tmp_reg);
920 orr(tmp_reg, tmp_reg, swap_reg);
921 //if (os::is_MP()) {
922 // lock();
923 //}
924 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
925 if (need_tmp_reg) {
926 pop(tmp_reg);
927 }
928 // If the biasing toward our thread failed, this means that
929 // another thread succeeded in biasing it toward itself and we
930 // need to revoke that bias. The revocation will occur in the
931 // interpreter runtime in the slow case.
932 if (PrintBiasedLockingStatistics) {
933 Label L;
934 bne(AT, R0, L);
935 delayed()->nop();
936 push(tmp_reg);
937 push(A0);
938 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
939 pop(A0);
940 pop(tmp_reg);
941 bind(L);
942 }
943 if (slow_case != NULL) {
944 beq_far(AT, R0, *slow_case);
945 delayed()->nop();
946 }
947 b(done);
948 delayed()->nop();
950 bind(try_rebias);
951 // At this point we know the epoch has expired, meaning that the
952 // current "bias owner", if any, is actually invalid. Under these
953 // circumstances _only_, we are allowed to use the current header's
954 // value as the comparison value when doing the cas to acquire the
955 // bias in the current epoch. In other words, we allow transfer of
956 // the bias from one thread to another directly in this situation.
957 //
958 // FIXME: due to a lack of registers we currently blow away the age
959 // bits in this situation. Should attempt to preserve them.
960 if (need_tmp_reg) {
961 push(tmp_reg);
962 }
963 load_prototype_header(tmp_reg, obj_reg);
964 get_thread(swap_reg);
965 orr(tmp_reg, tmp_reg, swap_reg);
966 ld_ptr(swap_reg, saved_mark_addr);
968 // if (os::is_MP()) {
969 // lock();
970 //}
971 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
972 if (need_tmp_reg) {
973 pop(tmp_reg);
974 }
975 // If the biasing toward our thread failed, then another thread
976 // succeeded in biasing it toward itself and we need to revoke that
977 // bias. The revocation will occur in the runtime in the slow case.
978 if (PrintBiasedLockingStatistics) {
979 Label L;
980 bne(AT, R0, L);
981 delayed()->nop();
982 push(AT);
983 push(tmp_reg);
984 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
985 pop(tmp_reg);
986 pop(AT);
987 bind(L);
988 }
989 if (slow_case != NULL) {
990 beq_far(AT, R0, *slow_case);
991 delayed()->nop();
992 }
994 b(done);
995 delayed()->nop();
996 bind(try_revoke_bias);
997 // The prototype mark in the klass doesn't have the bias bit set any
998 // more, indicating that objects of this data type are not supposed
999 // to be biased any more. We are going to try to reset the mark of
1000 // this object to the prototype value and fall through to the
1001 // CAS-based locking scheme. Note that if our CAS fails, it means
1002 // that another thread raced us for the privilege of revoking the
1003 // bias of this particular object, so it's okay to continue in the
1004 // normal locking code.
1005 //
1006 // FIXME: due to a lack of registers we currently blow away the age
1007 // bits in this situation. Should attempt to preserve them.
1008 ld_ptr(swap_reg, saved_mark_addr);
1010 if (need_tmp_reg) {
1011 push(tmp_reg);
1012 }
1013 load_prototype_header(tmp_reg, obj_reg);
1014 //if (os::is_MP()) {
1015 // lock();
1016 //}
1017 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
1018 if (need_tmp_reg) {
1019 pop(tmp_reg);
1020 }
1021 // Fall through to the normal CAS-based lock, because no matter what
1022 // the result of the above CAS, some thread must have succeeded in
1023 // removing the bias bit from the object's header.
1024 if (PrintBiasedLockingStatistics) {
1025 Label L;
1026 bne(AT, R0, L);
1027 delayed()->nop();
1028 push(AT);
1029 push(tmp_reg);
1030 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
1031 pop(tmp_reg);
1032 pop(AT);
1033 bind(L);
1034 }
1036 bind(cas_label);
1037 return null_check_offset;
1038 }
1040 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1041 assert(UseBiasedLocking, "why call this otherwise?");
1043 // Check for biased locking unlock case, which is a no-op
1044 // Note: we do not have to check the thread ID for two reasons.
1045 // First, the interpreter checks for IllegalMonitorStateException at
1046 // a higher level. Second, if the bias was revoked while we held the
1047 // lock, the object could not be rebiased toward another thread, so
1048 // the bias bit would be clear.
1049 #ifdef _LP64
1050 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1051 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1052 daddi(AT, R0, markOopDesc::biased_lock_pattern);
1053 #else
1054 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1055 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1056 addi(AT, R0, markOopDesc::biased_lock_pattern);
1057 #endif
1059 beq(AT, temp_reg, done);
1060 delayed()->nop();
1061 }
1063 // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME.
1064 // by yjl 6/27/2005
1065 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
1066 // by yjl 7/11/2005
1067 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
1068 // by yjl 8/1/2005
1069 void MacroAssembler::call_VM_leaf_base(address entry_point,
1070 int number_of_arguments) {
1071 //call(RuntimeAddress(entry_point));
1072 //increment(rsp, number_of_arguments * wordSize);
1073 Label L, E;
1075 assert(number_of_arguments <= 4, "just check");
1077 andi(AT, SP, 0xf);
1078 beq(AT, R0, L);
1079 delayed()->nop();
1080 daddi(SP, SP, -8);
1081 {
1082 call(entry_point, relocInfo::runtime_call_type);
1083 delayed()->nop();
1084 }
1085 daddi(SP, SP, 8);
1086 b(E);
1087 delayed()->nop();
1089 bind(L);
1090 {
1091 call(entry_point, relocInfo::runtime_call_type);
1092 delayed()->nop();
1093 }
1094 bind(E);
1095 }
1098 void MacroAssembler::jmp(address entry) {
1099 patchable_set48(T9, (long)entry);
1100 jr(T9);
1101 }
1103 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
1104 switch (rtype) {
1105 case relocInfo::runtime_call_type:
1106 case relocInfo::none:
1107 jmp(entry);
1108 break;
1109 default:
1110 {
1111 InstructionMark im(this);
1112 relocate(rtype);
1113 patchable_set48(T9, (long)entry);
1114 jr(T9);
1115 }
1116 break;
1117 }
1118 }
1120 void MacroAssembler::call(address entry) {
1121 // c/c++ code assume T9 is entry point, so we just always move entry to t9
1122 // maybe there is some more graceful method to handle this. FIXME
1123 // by yjl 6/27/2005
1124 // For more info, see class NativeCall.
1125 #ifndef _LP64
1126 move(T9, (int)entry);
1127 #else
1128 patchable_set48(T9, (long)entry);
1129 #endif
1130 jalr(T9);
1131 }
1133 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
1134 switch (rtype) {
1135 case relocInfo::runtime_call_type:
1136 case relocInfo::none:
1137 call(entry);
1138 break;
1139 default:
1140 {
1141 InstructionMark im(this);
1142 relocate(rtype);
1143 call(entry);
1144 }
1145 break;
1146 }
1147 }
1149 void MacroAssembler::call(address entry, RelocationHolder& rh)
1150 {
1151 switch (rh.type()) {
1152 case relocInfo::runtime_call_type:
1153 case relocInfo::none:
1154 call(entry);
1155 break;
1156 default:
1157 {
1158 InstructionMark im(this);
1159 relocate(rh);
1160 call(entry);
1161 }
1162 break;
1163 }
1164 }
1166 void MacroAssembler::ic_call(address entry) {
1167 RelocationHolder rh = virtual_call_Relocation::spec(pc());
1168 patchable_set48(IC_Klass, (long)Universe::non_oop_word());
1169 assert(entry != NULL, "call most probably wrong");
1170 InstructionMark im(this);
1171 relocate(rh);
1172 patchable_call(entry);
1173 }
1175 void MacroAssembler::c2bool(Register r) {
1176 Label L;
1177 Assembler::beq(r, R0, L);
1178 delayed()->nop();
1179 move(r, 1);
1180 bind(L);
1181 }
1183 #ifndef PRODUCT
1184 extern "C" void findpc(intptr_t x);
1185 #endif
1187 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
1188 // In order to get locks to work, we need to fake a in_VM state
1189 JavaThread* thread = JavaThread::current();
1190 JavaThreadState saved_state = thread->thread_state();
1191 thread->set_thread_state(_thread_in_vm);
1192 if (ShowMessageBoxOnError) {
1193 JavaThread* thread = JavaThread::current();
1194 JavaThreadState saved_state = thread->thread_state();
1195 thread->set_thread_state(_thread_in_vm);
1196 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1197 ttyLocker ttyl;
1198 BytecodeCounter::print();
1199 }
1200 // To see where a verify_oop failed, get $ebx+40/X for this frame.
1201 // This is the value of eip which points to where verify_oop will return.
1202 if (os::message_box(msg, "Execution stopped, print registers?")) {
1203 ttyLocker ttyl;
1204 tty->print_cr("eip = 0x%08x", eip);
1205 #ifndef PRODUCT
1206 tty->cr();
1207 findpc(eip);
1208 tty->cr();
1209 #endif
1210 tty->print_cr("rax, = 0x%08x", rax);
1211 tty->print_cr("rbx, = 0x%08x", rbx);
1212 tty->print_cr("rcx = 0x%08x", rcx);
1213 tty->print_cr("rdx = 0x%08x", rdx);
1214 tty->print_cr("rdi = 0x%08x", rdi);
1215 tty->print_cr("rsi = 0x%08x", rsi);
1216 tty->print_cr("rbp, = 0x%08x", rbp);
1217 tty->print_cr("rsp = 0x%08x", rsp);
1218 BREAKPOINT;
1219 }
1220 } else {
1221 ttyLocker ttyl;
1222 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1223 assert(false, "DEBUG MESSAGE");
1224 }
1225 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
1226 }
1228 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
1229 if ( ShowMessageBoxOnError ) {
1230 JavaThreadState saved_state = JavaThread::current()->thread_state();
1231 JavaThread::current()->set_thread_state(_thread_in_vm);
1232 {
1233 // In order to get locks work, we need to fake a in_VM state
1234 ttyLocker ttyl;
1235 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
1236 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1237 BytecodeCounter::print();
1238 }
1240 // if (os::message_box(msg, "Execution stopped, print registers?"))
1241 // regs->print(::tty);
1242 }
1243 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1244 }
1245 else
1246 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1247 }
1250 void MacroAssembler::stop(const char* msg) {
1251 li(A0, (long)msg);
1252 #ifndef _LP64
1253 //reserver space for argument. added by yjl 7/10/2005
1254 addiu(SP, SP, - 1 * wordSize);
1255 #endif
1256 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1257 delayed()->nop();
1258 #ifndef _LP64
1259 //restore space for argument
1260 addiu(SP, SP, 1 * wordSize);
1261 #endif
1262 brk(17);
1263 }
1265 void MacroAssembler::warn(const char* msg) {
1266 #ifdef _LP64
1267 pushad();
1268 li(A0, (long)msg);
1269 push(S2);
1270 move(AT, -(StackAlignmentInBytes));
1271 move(S2, SP); // use S2 as a sender SP holder
1272 andr(SP, SP, AT); // align stack as required by ABI
1273 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1274 delayed()->nop();
1275 move(SP, S2); // use S2 as a sender SP holder
1276 pop(S2);
1277 popad();
1278 #else
1279 pushad();
1280 addi(SP, SP, -4);
1281 sw(A0, SP, -1 * wordSize);
1282 li(A0, (long)msg);
1283 addi(SP, SP, -1 * wordSize);
1284 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1285 delayed()->nop();
1286 addi(SP, SP, 1 * wordSize);
1287 lw(A0, SP, -1 * wordSize);
1288 addi(SP, SP, 4);
1289 popad();
1290 #endif
1291 }
1293 void MacroAssembler::print_reg(Register reg) {
1294 /*
1295 char *s = getenv("PRINT_REG");
1296 if (s == NULL)
1297 return;
1298 if (strcmp(s, "1") != 0)
1299 return;
1300 */
1301 void * cur_pc = pc();
1302 pushad();
1303 NOT_LP64(push(FP);)
1305 li(A0, (long)reg->name());
1306 if (reg == SP)
1307 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
1308 else if (reg == A0)
1309 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
1310 else
1311 move(A1, reg);
1312 li(A2, (long)cur_pc);
1313 push(S2);
1314 move(AT, -(StackAlignmentInBytes));
1315 move(S2, SP); // use S2 as a sender SP holder
1316 andr(SP, SP, AT); // align stack as required by ABI
1317 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
1318 delayed()->nop();
1319 move(SP, S2); // use S2 as a sender SP holder
1320 pop(S2);
1321 NOT_LP64(pop(FP);)
1322 popad();
1324 /*
1325 pushad();
1326 #ifdef _LP64
1327 if (reg == SP)
1328 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
1329 else
1330 move(A0, reg);
1331 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1332 delayed()->nop();
1333 #else
1334 push(FP);
1335 move(A0, reg);
1336 dsrl32(A1, reg, 0);
1337 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
1338 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
1339 delayed()->nop();
1340 pop(FP);
1341 #endif
1342 popad();
1343 pushad();
1344 NOT_LP64(push(FP);)
1345 char b[50];
1346 sprintf((char *)b, " pc: %p\n",cur_pc);
1347 li(A0, (long)(char *)b);
1348 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1349 delayed()->nop();
1350 NOT_LP64(pop(FP);)
1351 popad();
1352 */
1353 }
1355 void MacroAssembler::print_reg(FloatRegister reg) {
1356 void * cur_pc = pc();
1357 pushad();
1358 NOT_LP64(push(FP);)
1359 li(A0, (long)reg->name());
1360 push(S2);
1361 move(AT, -(StackAlignmentInBytes));
1362 move(S2, SP); // use S2 as a sender SP holder
1363 andr(SP, SP, AT); // align stack as required by ABI
1364 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1365 delayed()->nop();
1366 move(SP, S2); // use S2 as a sender SP holder
1367 pop(S2);
1368 NOT_LP64(pop(FP);)
1369 popad();
1371 pushad();
1372 NOT_LP64(push(FP);)
1373 #if 1
1374 move(FP, SP);
1375 move(AT, -(StackAlignmentInBytes));
1376 andr(SP , SP , AT);
1377 mov_d(F12, reg);
1378 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
1379 delayed()->nop();
1380 move(SP, FP);
1381 #else
1382 mov_s(F12, reg);
1383 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
1384 //delayed()->nop();
1385 #endif
1386 NOT_LP64(pop(FP);)
1387 popad();
1389 #if 0
1390 pushad();
1391 NOT_LP64(push(FP);)
1392 char* b = new char[50];
1393 sprintf(b, " pc: %p\n", cur_pc);
1394 li(A0, (long)b);
1395 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
1396 delayed()->nop();
1397 NOT_LP64(pop(FP);)
1398 popad();
1399 #endif
1400 }
1402 void MacroAssembler::increment(Register reg, int imm) {
1403 if (!imm) return;
1404 if (is_simm16(imm)) {
1405 #ifdef _LP64
1406 daddiu(reg, reg, imm);
1407 #else
1408 addiu(reg, reg, imm);
1409 #endif
1410 } else {
1411 move(AT, imm);
1412 #ifdef _LP64
1413 daddu(reg, reg, AT);
1414 #else
1415 addu(reg, reg, AT);
1416 #endif
1417 }
1418 }
1420 void MacroAssembler::decrement(Register reg, int imm) {
1421 increment(reg, -imm);
1422 }
1425 void MacroAssembler::call_VM(Register oop_result,
1426 address entry_point,
1427 bool check_exceptions) {
1428 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1429 }
1431 void MacroAssembler::call_VM(Register oop_result,
1432 address entry_point,
1433 Register arg_1,
1434 bool check_exceptions) {
1435 if (arg_1!=A1) move(A1, arg_1);
1436 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1437 }
1439 void MacroAssembler::call_VM(Register oop_result,
1440 address entry_point,
1441 Register arg_1,
1442 Register arg_2,
1443 bool check_exceptions) {
1444 if (arg_1!=A1) move(A1, arg_1);
1445 if (arg_2!=A2) move(A2, arg_2);
1446 assert(arg_2 != A1, "smashed argument");
1447 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1448 }
1450 void MacroAssembler::call_VM(Register oop_result,
1451 address entry_point,
1452 Register arg_1,
1453 Register arg_2,
1454 Register arg_3,
1455 bool check_exceptions) {
1456 if (arg_1!=A1) move(A1, arg_1);
1457 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1458 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1459 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1460 }
1462 void MacroAssembler::call_VM(Register oop_result,
1463 Register last_java_sp,
1464 address entry_point,
1465 int number_of_arguments,
1466 bool check_exceptions) {
1467 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1468 }
1470 void MacroAssembler::call_VM(Register oop_result,
1471 Register last_java_sp,
1472 address entry_point,
1473 Register arg_1,
1474 bool check_exceptions) {
1475 if (arg_1 != A1) move(A1, arg_1);
1476 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1477 }
1479 void MacroAssembler::call_VM(Register oop_result,
1480 Register last_java_sp,
1481 address entry_point,
1482 Register arg_1,
1483 Register arg_2,
1484 bool check_exceptions) {
1485 if (arg_1 != A1) move(A1, arg_1);
1486 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1487 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1488 }
1490 void MacroAssembler::call_VM(Register oop_result,
1491 Register last_java_sp,
1492 address entry_point,
1493 Register arg_1,
1494 Register arg_2,
1495 Register arg_3,
1496 bool check_exceptions) {
1497 if (arg_1 != A1) move(A1, arg_1);
1498 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
1499 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
1500 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1501 }
1503 void MacroAssembler::call_VM_base(Register oop_result,
1504 Register java_thread,
1505 Register last_java_sp,
1506 address entry_point,
1507 int number_of_arguments,
1508 bool check_exceptions) {
1510 address before_call_pc;
1511 // determine java_thread register
1512 if (!java_thread->is_valid()) {
1513 #ifndef OPT_THREAD
1514 java_thread = T2;
1515 get_thread(java_thread);
1516 #else
1517 java_thread = TREG;
1518 #endif
1519 }
1520 // determine last_java_sp register
1521 if (!last_java_sp->is_valid()) {
1522 last_java_sp = SP;
1523 }
1524 // debugging support
1525 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1526 assert(number_of_arguments <= 4 , "cannot have negative number of arguments");
1527 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
1528 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
1530 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
1532 // set last Java frame before call
1533 before_call_pc = (address)pc();
1534 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
1536 // do the call
1537 move(A0, java_thread);
1538 call(entry_point, relocInfo::runtime_call_type);
1539 delayed()->nop();
1541 // restore the thread (cannot use the pushed argument since arguments
1542 // may be overwritten by C code generated by an optimizing compiler);
1543 // however can use the register value directly if it is callee saved.
1544 #ifndef OPT_THREAD
1545 if (java_thread >=S0 && java_thread <=S7) {
1546 #ifdef ASSERT
1547 { Label L;
1548 get_thread(AT);
1549 beq(java_thread, AT, L);
1550 delayed()->nop();
1551 stop("MacroAssembler::call_VM_base: edi not callee saved?");
1552 bind(L);
1553 }
1554 #endif
1555 } else {
1556 get_thread(java_thread);
1557 }
1558 #endif
1560 // discard thread and arguments
1561 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1562 // reset last Java frame
1563 reset_last_Java_frame(java_thread, false, true);
1565 check_and_handle_popframe(java_thread);
1566 check_and_handle_earlyret(java_thread);
1567 if (check_exceptions) {
1568 // check for pending exceptions (java_thread is set upon return)
1569 Label L;
1570 #ifdef _LP64
1571 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1572 #else
1573 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
1574 #endif
1575 beq(AT, R0, L);
1576 delayed()->nop();
1577 li(AT, before_call_pc);
1578 push(AT);
1579 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1580 delayed()->nop();
1581 bind(L);
1582 }
1584 // get oop result if there is one and reset the value in the thread
1585 if (oop_result->is_valid()) {
1586 #ifdef _LP64
1587 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1588 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1589 #else
1590 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
1591 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
1592 #endif
1593 verify_oop(oop_result);
1594 }
1595 }
1597 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1599 move(V0, SP);
1600 //we also reserve space for java_thread here
1601 #ifndef _LP64
1602 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
1603 #endif
1604 move(AT, -(StackAlignmentInBytes));
1605 andr(SP, SP, AT);
1606 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
1608 }
1610 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1611 call_VM_leaf_base(entry_point, number_of_arguments);
1612 }
1614 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1615 if (arg_0 != A0) move(A0, arg_0);
1616 call_VM_leaf(entry_point, 1);
1617 }
1619 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1620 if (arg_0 != A0) move(A0, arg_0);
1621 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1622 call_VM_leaf(entry_point, 2);
1623 }
1625 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1626 if (arg_0 != A0) move(A0, arg_0);
1627 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
1628 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
1629 call_VM_leaf(entry_point, 3);
1630 }
1631 void MacroAssembler::super_call_VM_leaf(address entry_point) {
1632 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1633 }
1636 void MacroAssembler::super_call_VM_leaf(address entry_point,
1637 Register arg_1) {
1638 if (arg_1 != A0) move(A0, arg_1);
1639 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1640 }
1643 void MacroAssembler::super_call_VM_leaf(address entry_point,
1644 Register arg_1,
1645 Register arg_2) {
1646 if (arg_1 != A0) move(A0, arg_1);
1647 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1648 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1649 }
1650 void MacroAssembler::super_call_VM_leaf(address entry_point,
1651 Register arg_1,
1652 Register arg_2,
1653 Register arg_3) {
1654 if (arg_1 != A0) move(A0, arg_1);
1655 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
1656 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
1657 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1658 }
1660 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1661 }
1663 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1664 }
1666 void MacroAssembler::null_check(Register reg, int offset) {
1667 if (needs_explicit_null_check(offset)) {
1668 // provoke OS NULL exception if reg = NULL by
1669 // accessing M[reg] w/o changing any (non-CC) registers
1670 // NOTE: cmpl is plenty here to provoke a segv
1671 lw(AT, reg, 0);
1672 /* Jin
1673 nop();
1674 nop();
1675 nop();
1676 */
1677 // Note: should probably use testl(rax, Address(reg, 0));
1678 // may be shorter code (however, this version of
1679 // testl needs to be implemented first)
1680 } else {
1681 // nothing to do, (later) access of M[reg + offset]
1682 // will provoke OS NULL exception if reg = NULL
1683 }
1684 }
1686 void MacroAssembler::enter() {
1687 push2(RA, FP);
1688 move(FP, SP);
1689 }
1691 void MacroAssembler::leave() {
1692 #ifndef _LP64
1693 //move(SP, FP);
1694 //pop2(FP, RA);
1695 addi(SP, FP, 2 * wordSize);
1696 lw(RA, SP, - 1 * wordSize);
1697 lw(FP, SP, - 2 * wordSize);
1698 #else
1699 daddi(SP, FP, 2 * wordSize);
1700 ld(RA, SP, - 1 * wordSize);
1701 ld(FP, SP, - 2 * wordSize);
1702 #endif
1703 }
1704 /*
1705 void MacroAssembler::os_breakpoint() {
1706 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
1707 // (e.g., MSVC can't call ps() otherwise)
1708 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
1709 }
1710 */
1711 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
1712 // determine java_thread register
1713 if (!java_thread->is_valid()) {
1714 #ifndef OPT_THREAD
1715 java_thread = T1;
1716 get_thread(java_thread);
1717 #else
1718 java_thread = TREG;
1719 #endif
1720 }
1721 // we must set sp to zero to clear frame
1722 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1723 // must clear fp, so that compiled frames are not confused; it is possible
1724 // that we need it only for debugging
1725 if(clear_fp)
1726 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1728 if (clear_pc)
1729 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1730 }
1732 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
1733 bool clear_pc) {
1734 Register thread = TREG;
1735 #ifndef OPT_THREAD
1736 get_thread(thread);
1737 #endif
1738 // we must set sp to zero to clear frame
1739 sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
1740 // must clear fp, so that compiled frames are not confused; it is
1741 // possible that we need it only for debugging
1742 if (clear_fp) {
1743 sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
1744 }
1746 if (clear_pc) {
1747 sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
1748 }
1749 }
1751 // Write serialization page so VM thread can do a pseudo remote membar.
1752 // We use the current thread pointer to calculate a thread specific
1753 // offset to write to within the page. This minimizes bus traffic
1754 // due to cache line collision.
1755 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
1756 move(tmp, thread);
1757 srl(tmp, tmp,os::get_serialize_page_shift_count());
1758 move(AT, (os::vm_page_size() - sizeof(int)));
1759 andr(tmp, tmp,AT);
1760 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
1761 }
1763 // Calls to C land
1764 //
1765 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
1766 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
1767 // has to be reset to 0. This is required to allow proper stack traversal.
1768 void MacroAssembler::set_last_Java_frame(Register java_thread,
1769 Register last_java_sp,
1770 Register last_java_fp,
1771 address last_java_pc) {
1772 // determine java_thread register
1773 if (!java_thread->is_valid()) {
1774 #ifndef OPT_THREAD
1775 java_thread = T2;
1776 get_thread(java_thread);
1777 #else
1778 java_thread = TREG;
1779 #endif
1780 }
1781 // determine last_java_sp register
1782 if (!last_java_sp->is_valid()) {
1783 last_java_sp = SP;
1784 }
1786 // last_java_fp is optional
1788 if (last_java_fp->is_valid()) {
1789 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
1790 }
1792 // last_java_pc is optional
1794 if (last_java_pc != NULL) {
1795 relocate(relocInfo::internal_pc_type);
1796 patchable_set48(AT, (long)last_java_pc);
1797 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
1798 }
1799 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
1800 }
1802 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
1803 Register last_java_fp,
1804 address last_java_pc) {
1805 // determine last_java_sp register
1806 if (!last_java_sp->is_valid()) {
1807 last_java_sp = SP;
1808 }
1810 Register thread = TREG;
1811 #ifndef OPT_THREAD
1812 get_thread(thread);
1813 #endif
1814 // last_java_fp is optional
1815 if (last_java_fp->is_valid()) {
1816 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
1817 }
1819 // last_java_pc is optional
1820 if (last_java_pc != NULL) {
1821 Address java_pc(thread,
1822 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1823 li(AT, (intptr_t)(last_java_pc));
1824 sd(AT, java_pc);
1825 }
1827 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
1828 }
1830 //////////////////////////////////////////////////////////////////////////////////
1831 #ifndef SERIALGC
1833 void MacroAssembler::g1_write_barrier_pre(Register obj,
1834 #ifndef _LP64
1835 Register thread,
1836 #endif
1837 Register tmp,
1838 Register tmp2,
1839 bool tosca_live) {
1840 /* LP64_ONLY(Register thread = r15_thread;)
1841 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1842 PtrQueue::byte_offset_of_active()));
1844 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1845 PtrQueue::byte_offset_of_index()));
1846 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1847 PtrQueue::byte_offset_of_buf()));
1850 Label done;
1851 Label runtime;
1853 // if (!marking_in_progress) goto done;
1854 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
1855 cmpl(in_progress, 0);
1856 } else {
1857 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
1858 cmpb(in_progress, 0);
1859 }
1860 jcc(Assembler::equal, done);
1862 // if (x.f == NULL) goto done;
1863 cmpptr(Address(obj, 0), NULL_WORD);
1864 jcc(Assembler::equal, done);
1866 // Can we store original value in the thread's buffer?
1868 LP64_ONLY(movslq(tmp, index);)
1869 movptr(tmp2, Address(obj, 0));
1870 #ifdef _LP64
1871 cmpq(tmp, 0);
1872 #else
1873 cmpl(index, 0);
1874 #endif
1875 jcc(Assembler::equal, runtime);
1876 #ifdef _LP64
1877 subq(tmp, wordSize);
1878 movl(index, tmp);
1879 addq(tmp, buffer);
1880 #else
1881 subl(index, wordSize);
1882 movl(tmp, buffer);
1883 addl(tmp, index);
1884 #endif
1885 movptr(Address(tmp, 0), tmp2);
1886 jmp(done);
1887 bind(runtime);
1888 // save the live input values
1889 if(tosca_live) push(rax);
1890 push(obj);
1891 #ifdef _LP64
1892 movq(c_rarg0, Address(obj, 0));
1893 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
1894 #else
1895 push(thread);
1896 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
1897 pop(thread);
1898 #endif
1899 pop(obj);
1900 if(tosca_live) pop(rax);
1901 bind(done);
1902 */
1903 }
1905 void MacroAssembler::g1_write_barrier_post(Register store_addr,
1906 Register new_val,
1907 #ifndef _LP64
1908 Register thread,
1909 #endif
1910 Register tmp,
1911 Register tmp2) {
1913 /*LP64_ONLY(Register thread = r15_thread;)
1914 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1915 PtrQueue::byte_offset_of_index()));
1916 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
1917 PtrQueue::byte_offset_of_buf()));
1918 BarrierSet* bs = Universe::heap()->barrier_set();
1919 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1920 Label done;
1921 Label runtime;
1923 // Does store cross heap regions?
1925 movptr(tmp, store_addr);
1926 xorptr(tmp, new_val);
1927 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
1928 jcc(Assembler::equal, done);
1930 // crosses regions, storing NULL?
1932 cmpptr(new_val, (int32_t) NULL_WORD);
1933 jcc(Assembler::equal, done);
1935 // storing region crossing non-NULL, is card already dirty?
1937 ExternalAddress cardtable((address) ct->byte_map_base);
1938 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1939 #ifdef _LP64
1940 const Register card_addr = tmp;
1942 movq(card_addr, store_addr);
1943 shrq(card_addr, CardTableModRefBS::card_shift);
1945 lea(tmp2, cardtable);
1947 // get the address of the card
1948 addq(card_addr, tmp2);
1949 #else
1950 const Register card_index = tmp;
1952 movl(card_index, store_addr);
1953 shrl(card_index, CardTableModRefBS::card_shift);
1955 Address index(noreg, card_index, Address::times_1);
1956 const Register card_addr = tmp;
1957 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
1958 #endif
1959 cmpb(Address(card_addr, 0), 0);
1960 jcc(Assembler::equal, done);
1962 // storing a region crossing, non-NULL oop, card is clean.
1963 // dirty card and log.
1965 movb(Address(card_addr, 0), 0);
1967 cmpl(queue_index, 0);
1968 jcc(Assembler::equal, runtime);
1969 subl(queue_index, wordSize);
1970 movptr(tmp2, buffer);
1971 #ifdef _LP64
1972 movslq(rscratch1, queue_index);
1973 addq(tmp2, rscratch1);
1974 movq(Address(tmp2, 0), card_addr);
1975 #else
1976 addl(tmp2, queue_index);
1977 movl(Address(tmp2, 0), card_index);
1978 #endif
1979 jmp(done);
1981 bind(runtime);
1982 // save the live input values
1983 push(store_addr);
1984 push(new_val);
1985 #ifdef _LP64
1986 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
1987 #else
1988 push(thread);
1989 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
1990 pop(thread);
1991 #endif
1992 pop(new_val);
1993 pop(store_addr);
1995 bind(done);
1996 */
1997 }
1999 #endif // SERIALGC
2000 //////////////////////////////////////////////////////////////////////////////////
2003 void MacroAssembler::store_check(Register obj) {
2004 // Does a store check for the oop in register obj. The content of
2005 // register obj is destroyed afterwards.
2006 store_check_part_1(obj);
2007 store_check_part_2(obj);
2008 }
2010 void MacroAssembler::store_check(Register obj, Address dst) {
2011 store_check(obj);
2012 }
2015 // split the store check operation so that other instructions can be scheduled inbetween
2016 void MacroAssembler::store_check_part_1(Register obj) {
2017 BarrierSet* bs = Universe::heap()->barrier_set();
2018 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
2019 #ifdef _LP64
2020 dsrl(obj, obj, CardTableModRefBS::card_shift);
2021 #else
2022 shr(obj, CardTableModRefBS::card_shift);
2023 #endif
2024 }
2026 void MacroAssembler::store_check_part_2(Register obj) {
2027 BarrierSet* bs = Universe::heap()->barrier_set();
2028 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
2029 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
2030 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
2032 li(AT, (long)ct->byte_map_base);
2033 #ifdef _LP64
2034 dadd(AT, AT, obj);
2035 #else
2036 add(AT, AT, obj);
2037 #endif
2038 sb(R0, AT, 0);
2039 sync();
2040 }
2042 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
2043 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
2044 Register t1, Register t2, Label& slow_case) {
2045 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
2047 Register end = t2;
2048 #ifndef OPT_THREAD
2049 Register thread = t1;
2050 get_thread(thread);
2051 #else
2052 Register thread = TREG;
2053 #endif
2054 verify_tlab(t1, t2);//blows t1&t2
2056 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
2058 if (var_size_in_bytes == NOREG) {
2059 // i dont think we need move con_size_in_bytes to a register first.
2060 // by yjl 8/17/2005
2061 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
2062 addi(end, obj, con_size_in_bytes);
2063 } else {
2064 add(end, obj, var_size_in_bytes);
2065 }
2067 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
2068 sltu(AT, AT, end);
2069 bne_far(AT, R0, slow_case);
2070 delayed()->nop();
2073 // update the tlab top pointer
2074 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
2076 // recover var_size_in_bytes if necessary
2077 /*if (var_size_in_bytes == end) {
2078 sub(var_size_in_bytes, end, obj);
2079 }*/
2081 verify_tlab(t1, t2);
2082 }
2084 // Defines obj, preserves var_size_in_bytes
2085 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
2086 Register t1, Register t2, Label& slow_case) {
2087 assert_different_registers(obj, var_size_in_bytes, t1, AT);
2088 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
2089 // No allocation in the shared eden.
2090 b_far(slow_case);
2091 delayed()->nop();
2092 } else {
2094 #ifndef _LP64
2095 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
2096 lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
2097 #else
2098 Address heap_top(t1);
2099 li(t1, (long)Universe::heap()->top_addr());
2100 #endif
2101 ld_ptr(obj, heap_top);
2103 Register end = t2;
2104 Label retry;
2106 bind(retry);
2107 if (var_size_in_bytes == NOREG) {
2108 // i dont think we need move con_size_in_bytes to a register first.
2109 // by yjl 8/17/2005
2110 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
2111 addi(end, obj, con_size_in_bytes);
2112 } else {
2113 add(end, obj, var_size_in_bytes);
2114 }
2115 // if end < obj then we wrapped around => object too long => slow case
2116 sltu(AT, end, obj);
2117 bne_far(AT, R0, slow_case);
2118 delayed()->nop();
2120 //lui(AT, split_high((int)Universe::heap()->end_addr()));
2121 //lw(AT, AT, split_low((int)Universe::heap()->end_addr()));
2122 li(AT, (long)Universe::heap()->end_addr());
2123 sltu(AT, AT, end);
2124 bne_far(AT, R0, slow_case);
2125 delayed()->nop();
2126 // Compare obj with the top addr, and if still equal, store the new top addr in
2127 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
2128 // it otherwise. Use lock prefix for atomicity on MPs.
2129 if (os::is_MP()) {
2130 ///lock();
2131 }
2133 // if someone beat us on the allocation, try again, otherwise continue
2134 cmpxchg(end, heap_top, obj);
2135 beq_far(AT, R0, retry); //by yyq
2136 delayed()->nop();
2138 }
2139 }
2141 // C2 doesn't invoke this one.
2142 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
2143 Register top = T0;
2144 Register t1 = T1;
2145 /* Jin: tlab_refill() is called in
2147 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
2149 In generate_code_for(), T2 has been assigned as a register(length), which is used
2150 after calling tlab_refill();
2151 Therefore, tlab_refill() should not use T2.
2153 Source:
2155 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
2156 at java.lang.System.arraycopy(Native Method)
2157 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array
2158 at sun.misc.Resource.getBytes(Resource.java:117)
2159 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
2160 at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
2161 at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
2162 */
2163 Register t2 = T9;
2164 Register t3 = T3;
2165 Register thread_reg = T8;
2166 Label do_refill, discard_tlab;
2167 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
2168 // No allocation in the shared eden.
2169 b(slow_case);
2170 delayed()->nop();
2171 }
2173 get_thread(thread_reg);
2175 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2176 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2178 // calculate amount of free space
2179 sub(t1, t1, top);
2180 shr(t1, LogHeapWordSize);
2182 // Retain tlab and allocate object in shared space if
2183 // the amount free in the tlab is too large to discard.
2184 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
2185 slt(AT, t2, t1);
2186 beq(AT, R0, discard_tlab);
2187 delayed()->nop();
2189 // Retain
2191 #ifndef _LP64
2192 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
2193 #else
2194 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
2195 #endif
2196 add(t2, t2, AT);
2197 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
2199 if (TLABStats) {
2200 // increment number of slow_allocations
2201 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
2202 addiu(AT, AT, 1);
2203 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
2204 }
2205 b(try_eden);
2206 delayed()->nop();
2208 bind(discard_tlab);
2209 if (TLABStats) {
2210 // increment number of refills
2211 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
2212 addi(AT, AT, 1);
2213 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
2214 // accumulate wastage -- t1 is amount free in tlab
2215 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
2216 add(AT, AT, t1);
2217 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
2218 }
2220 // if tlab is currently allocated (top or end != null) then
2221 // fill [top, end + alignment_reserve) with array object
2222 beq(top, R0, do_refill);
2223 delayed()->nop();
2225 // set up the mark word
2226 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
2227 st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
2229 // set the length to the remaining space
2230 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
2231 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
2232 shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
2233 sw(t1, top, arrayOopDesc::length_offset_in_bytes());
2235 // set klass to intArrayKlass
2236 #ifndef _LP64
2237 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
2238 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
2239 #else
2240 li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
2241 ld_ptr(t1, AT, 0);
2242 #endif
2243 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
2244 store_klass(top, t1);
2246 // refill the tlab with an eden allocation
2247 bind(do_refill);
2248 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2249 shl(t1, LogHeapWordSize);
2250 // add object_size ??
2251 eden_allocate(top, t1, 0, t2, t3, slow_case);
2253 // Check that t1 was preserved in eden_allocate.
2254 #ifdef ASSERT
2255 if (UseTLAB) {
2256 Label ok;
2257 assert_different_registers(thread_reg, t1);
2258 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
2259 shl(AT, LogHeapWordSize);
2260 beq(AT, t1, ok);
2261 delayed()->nop();
2262 stop("assert(t1 != tlab size)");
2263 should_not_reach_here();
2265 bind(ok);
2266 }
2267 #endif
2268 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
2269 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
2270 add(top, top, t1);
2271 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
2272 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
2273 verify_tlab(t1, t2);
2274 b(retry);
2275 delayed()->nop();
2276 }
2278 static const double pi_4 = 0.7853981633974483;
2280 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
2281 // must get argument(a double) in F12/F13
2282 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
2283 //We need to preseve the register which maybe modified during the Call @Jerome
2284 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
2285 //save all modified register here
2286 // if (preserve_cpu_regs) {
2287 // }
2288 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
2289 pushad();
2290 //we should preserve the stack space before we call
2291 addi(SP, SP, -wordSize * 2);
2292 switch (trig){
2293 case 's' :
2294 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
2295 delayed()->nop();
2296 break;
2297 case 'c':
2298 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
2299 delayed()->nop();
2300 break;
2301 case 't':
2302 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
2303 delayed()->nop();
2304 break;
2305 default:assert (false, "bad intrinsic");
2306 break;
2308 }
2310 addi(SP, SP, wordSize * 2);
2311 popad();
2312 // if (preserve_cpu_regs) {
2313 // }
2314 }
2316 #ifdef _LP64
2317 void MacroAssembler::li(Register rd, long imm) {
2318 if (imm <= max_jint && imm >= min_jint) {
2319 li32(rd, (int)imm);
2320 } else if (julong(imm) <= 0xFFFFFFFF) {
2321 assert_not_delayed();
2322 // lui sign-extends, so we can't use that.
2323 ori(rd, R0, julong(imm) >> 16);
2324 dsll(rd, rd, 16);
2325 ori(rd, rd, split_low(imm));
2326 //aoqi_test
2327 //} else if ((imm > 0) && ((imm >> 48) == 0)) {
2328 } else if ((imm > 0) && is_simm16(imm >> 32)) {
2329 /* A 48-bit address */
2330 li48(rd, imm);
2331 } else {
2332 li64(rd, imm);
2333 }
2334 }
2335 #else
2336 void MacroAssembler::li(Register rd, long imm) {
2337 li32(rd, (int)imm);
2338 }
2339 #endif
2341 void MacroAssembler::li32(Register reg, int imm) {
2342 if (is_simm16(imm)) {
2343 /* Jin: for imm < 0, we should use addi instead of addiu.
2344 *
2345 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
2346 *
2347 * 78 move [int:-1|I] [a0|I]
2348 * : daddi a0, zero, 0xffffffff (correct)
2349 * : daddiu a0, zero, 0xffffffff (incorrect)
2350 */
2351 if (imm >= 0)
2352 addiu(reg, R0, imm);
2353 else
2354 addi(reg, R0, imm);
2355 } else {
2356 lui(reg, split_low(imm >> 16));
2357 if (split_low(imm))
2358 ori(reg, reg, split_low(imm));
2359 }
2360 }
2362 #ifdef _LP64
2363 void MacroAssembler::set64(Register d, jlong value) {
2364 assert_not_delayed();
2366 int hi = (int)(value >> 32);
2367 int lo = (int)(value & ~0);
2369 if (value == lo) { // 32-bit integer
2370 if (is_simm16(value)) {
2371 daddiu(d, R0, value);
2372 } else {
2373 lui(d, split_low(value >> 16));
2374 if (split_low(value)) {
2375 ori(d, d, split_low(value));
2376 }
2377 }
2378 } else if (hi == 0) { // hardware zero-extends to upper 32
2379 ori(d, R0, julong(value) >> 16);
2380 dsll(d, d, 16);
2381 if (split_low(value)) {
2382 ori(d, d, split_low(value));
2383 }
2384 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2385 // 4 insts
2386 li48(d, value);
2387 } else { // li64
2388 // 6 insts
2389 li64(d, value);
2390 }
2391 }
2394 int MacroAssembler::insts_for_set64(jlong value) {
2395 int hi = (int)(value >> 32);
2396 int lo = (int)(value & ~0);
2398 int count = 0;
2400 if (value == lo) { // 32-bit integer
2401 if (is_simm16(value)) {
2402 //daddiu(d, R0, value);
2403 count++;
2404 } else {
2405 //lui(d, split_low(value >> 16));
2406 count++;
2407 if (split_low(value)) {
2408 //ori(d, d, split_low(value));
2409 count++;
2410 }
2411 }
2412 } else if (hi == 0) { // hardware zero-extends to upper 32
2413 //ori(d, R0, julong(value) >> 16);
2414 //dsll(d, d, 16);
2415 count += 2;
2416 if (split_low(value)) {
2417 //ori(d, d, split_low(value));
2418 count++;
2419 }
2420 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2421 // 4 insts
2422 //li48(d, value);
2423 count += 4;
2424 } else { // li64
2425 // 6 insts
2426 //li64(d, value);
2427 count += 6;
2428 }
2430 return count;
2431 }
2433 void MacroAssembler::patchable_set48(Register d, jlong value) {
2434 assert_not_delayed();
2436 int hi = (int)(value >> 32);
2437 int lo = (int)(value & ~0);
2439 int count = 0;
2441 if (value == lo) { // 32-bit integer
2442 if (is_simm16(value)) {
2443 daddiu(d, R0, value);
2444 count += 1;
2445 } else {
2446 lui(d, split_low(value >> 16));
2447 count += 1;
2448 if (split_low(value)) {
2449 ori(d, d, split_low(value));
2450 count += 1;
2451 }
2452 }
2453 } else if (hi == 0) { // hardware zero-extends to upper 32
2454 ori(d, R0, julong(value) >> 16);
2455 dsll(d, d, 16);
2456 count += 2;
2457 if (split_low(value)) {
2458 ori(d, d, split_low(value));
2459 count += 1;
2460 }
2461 } else if ((value> 0) && is_simm16(value >> 32)) { // li48
2462 // 4 insts
2463 li48(d, value);
2464 count += 4;
2465 } else { // li64
2466 tty->print_cr("value = 0x%x", value);
2467 guarantee(false, "Not supported yet !");
2468 }
2470 for (count; count < 4; count++) {
2471 nop();
2472 }
2473 }
2475 void MacroAssembler::patchable_set32(Register d, jlong value) {
2476 assert_not_delayed();
2478 int hi = (int)(value >> 32);
2479 int lo = (int)(value & ~0);
2481 int count = 0;
2483 if (value == lo) { // 32-bit integer
2484 if (is_simm16(value)) {
2485 daddiu(d, R0, value);
2486 count += 1;
2487 } else {
2488 lui(d, split_low(value >> 16));
2489 count += 1;
2490 if (split_low(value)) {
2491 ori(d, d, split_low(value));
2492 count += 1;
2493 }
2494 }
2495 } else if (hi == 0) { // hardware zero-extends to upper 32
2496 ori(d, R0, julong(value) >> 16);
2497 dsll(d, d, 16);
2498 count += 2;
2499 if (split_low(value)) {
2500 ori(d, d, split_low(value));
2501 count += 1;
2502 }
2503 } else {
2504 tty->print_cr("value = 0x%x", value);
2505 guarantee(false, "Not supported yet !");
2506 }
2508 for (count; count < 3; count++) {
2509 nop();
2510 }
2511 }
2513 void MacroAssembler::patchable_call32(Register d, jlong value) {
2514 assert_not_delayed();
2516 int hi = (int)(value >> 32);
2517 int lo = (int)(value & ~0);
2519 int count = 0;
2521 if (value == lo) { // 32-bit integer
2522 if (is_simm16(value)) {
2523 daddiu(d, R0, value);
2524 count += 1;
2525 } else {
2526 lui(d, split_low(value >> 16));
2527 count += 1;
2528 if (split_low(value)) {
2529 ori(d, d, split_low(value));
2530 count += 1;
2531 }
2532 }
2533 } else {
2534 tty->print_cr("value = 0x%x", value);
2535 guarantee(false, "Not supported yet !");
2536 }
2538 for (count; count < 2; count++) {
2539 nop();
2540 }
2541 }
2543 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2544 assert(UseCompressedClassPointers, "should only be used for compressed header");
2545 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2547 int klass_index = oop_recorder()->find_index(k);
2548 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2549 long narrowKlass = (long)Klass::encode_klass(k);
2551 relocate(rspec, Assembler::narrow_oop_operand);
2552 patchable_set48(dst, narrowKlass);
2553 }
2556 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2557 assert(UseCompressedOops, "should only be used for compressed header");
2558 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2560 int oop_index = oop_recorder()->find_index(obj);
2561 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2563 relocate(rspec, Assembler::narrow_oop_operand);
2564 patchable_set48(dst, oop_index);
2565 }
2567 void MacroAssembler::li64(Register rd, long imm) {
2568 assert_not_delayed();
2569 lui(rd, imm >> 48);
2570 ori(rd, rd, split_low(imm >> 32));
2571 dsll(rd, rd, 16);
2572 ori(rd, rd, split_low(imm >> 16));
2573 dsll(rd, rd, 16);
2574 ori(rd, rd, split_low(imm));
2575 }
2577 void MacroAssembler::li48(Register rd, long imm) {
2578 assert_not_delayed();
2579 assert(is_simm16(imm >> 32), "Not a 48-bit address");
2580 lui(rd, imm >> 32);
2581 ori(rd, rd, split_low(imm >> 16));
2582 dsll(rd, rd, 16);
2583 ori(rd, rd, split_low(imm));
2584 }
2585 #endif
2586 // NOTE: i dont push eax as i486.
2587 // the x86 save eax for it use eax as the jump register
2588 void MacroAssembler::verify_oop(Register reg, const char* s) {
2589 /*
2590 if (!VerifyOops) return;
2592 // Pass register number to verify_oop_subroutine
2593 char* b = new char[strlen(s) + 50];
2594 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
2595 push(rax); // save rax,
2596 push(reg); // pass register argument
2597 ExternalAddress buffer((address) b);
2598 // avoid using pushptr, as it modifies scratch registers
2599 // and our contract is not to modify anything
2600 movptr(rax, buffer.addr());
2601 push(rax);
2602 // call indirectly to solve generation ordering problem
2603 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2604 call(rax);
2605 */
2606 if (!VerifyOops) return;
2607 const char * b = NULL;
2608 stringStream ss;
2609 ss.print("verify_oop: %s: %s", reg->name(), s);
2610 b = code_string(ss.as_string());
2611 #ifdef _LP64
2612 pushad();
2613 move(A1, reg);
2614 li(A0, (long)b);
2615 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2616 ld(T9, AT, 0);
2617 jalr(T9);
2618 delayed()->nop();
2619 popad();
2620 #else
2621 // Pass register number to verify_oop_subroutine
2622 sw(T0, SP, - wordSize);
2623 sw(T1, SP, - 2*wordSize);
2624 sw(RA, SP, - 3*wordSize);
2625 sw(A0, SP ,- 4*wordSize);
2626 sw(A1, SP ,- 5*wordSize);
2627 sw(AT, SP ,- 6*wordSize);
2628 sw(T9, SP ,- 7*wordSize);
2629 addiu(SP, SP, - 7 * wordSize);
2630 move(A1, reg);
2631 li(A0, (long)b);
2632 // call indirectly to solve generation ordering problem
2633 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2634 lw(T9, AT, 0);
2635 jalr(T9);
2636 delayed()->nop();
2637 lw(T0, SP, 6* wordSize);
2638 lw(T1, SP, 5* wordSize);
2639 lw(RA, SP, 4* wordSize);
2640 lw(A0, SP, 3* wordSize);
2641 lw(A1, SP, 2* wordSize);
2642 lw(AT, SP, 1* wordSize);
2643 lw(T9, SP, 0* wordSize);
2644 addiu(SP, SP, 7 * wordSize);
2645 #endif
2646 }
2649 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
2650 if (!VerifyOops) {
2651 nop();
2652 return;
2653 }
2654 // Pass register number to verify_oop_subroutine
2655 const char * b = NULL;
2656 stringStream ss;
2657 ss.print("verify_oop_addr: %s", s);
2658 b = code_string(ss.as_string());
2660 st_ptr(T0, SP, - wordSize);
2661 st_ptr(T1, SP, - 2*wordSize);
2662 st_ptr(RA, SP, - 3*wordSize);
2663 st_ptr(A0, SP, - 4*wordSize);
2664 st_ptr(A1, SP, - 5*wordSize);
2665 st_ptr(AT, SP, - 6*wordSize);
2666 st_ptr(T9, SP, - 7*wordSize);
2667 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP
2668 addiu(SP, SP, - 7 * wordSize);
2670 li(A0, (long)b);
2671 // call indirectly to solve generation ordering problem
2672 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
2673 ld_ptr(T9, AT, 0);
2674 jalr(T9);
2675 delayed()->nop();
2676 ld_ptr(T0, SP, 6* wordSize);
2677 ld_ptr(T1, SP, 5* wordSize);
2678 ld_ptr(RA, SP, 4* wordSize);
2679 ld_ptr(A0, SP, 3* wordSize);
2680 ld_ptr(A1, SP, 2* wordSize);
2681 ld_ptr(AT, SP, 1* wordSize);
2682 ld_ptr(T9, SP, 0* wordSize);
2683 addiu(SP, SP, 7 * wordSize);
2684 }
2686 // used registers : T0, T1
2687 void MacroAssembler::verify_oop_subroutine() {
2688 // RA: ra
2689 // A0: char* error message
2690 // A1: oop object to verify
2692 Label exit, error;
2693 // increment counter
2694 li(T0, (long)StubRoutines::verify_oop_count_addr());
2695 lw(AT, T0, 0);
2696 #ifdef _LP64
2697 //FIXME, aoqi: rewrite addi, addu, etc in 64bits mode.
2698 daddi(AT, AT, 1);
2699 #else
2700 addi(AT, AT, 1);
2701 #endif
2702 sw(AT, T0, 0);
2704 // make sure object is 'reasonable'
2705 beq(A1, R0, exit); // if obj is NULL it is ok
2706 delayed()->nop();
2708 // Check if the oop is in the right area of memory
2709 //const int oop_mask = Universe::verify_oop_mask();
2710 //const int oop_bits = Universe::verify_oop_bits();
2711 const uintptr_t oop_mask = Universe::verify_oop_mask();
2712 const uintptr_t oop_bits = Universe::verify_oop_bits();
2713 li(AT, oop_mask);
2714 andr(T0, A1, AT);
2715 li(AT, oop_bits);
2716 bne(T0, AT, error);
2717 delayed()->nop();
2719 // make sure klass is 'reasonable'
2720 //add for compressedoops
2721 reinit_heapbase();
2722 //add for compressedoops
2723 load_klass(T0, A1);
2724 beq(T0, R0, error); // if klass is NULL it is broken
2725 delayed()->nop();
2726 #if 0
2727 //FIXME:wuhui.
2728 // Check if the klass is in the right area of memory
2729 //const int klass_mask = Universe::verify_klass_mask();
2730 //const int klass_bits = Universe::verify_klass_bits();
2731 const uintptr_t klass_mask = Universe::verify_klass_mask();
2732 const uintptr_t klass_bits = Universe::verify_klass_bits();
2734 li(AT, klass_mask);
2735 andr(T1, T0, AT);
2736 li(AT, klass_bits);
2737 bne(T1, AT, error);
2738 delayed()->nop();
2739 // make sure klass' klass is 'reasonable'
2740 //add for compressedoops
2741 load_klass(T0, T0);
2742 beq(T0, R0, error); // if klass' klass is NULL it is broken
2743 delayed()->nop();
2745 li(AT, klass_mask);
2746 andr(T1, T0, AT);
2747 li(AT, klass_bits);
2748 bne(T1, AT, error);
2749 delayed()->nop(); // if klass not in right area of memory it is broken too.
2750 #endif
2751 // return if everything seems ok
2752 bind(exit);
2754 jr(RA);
2755 delayed()->nop();
2757 // handle errors
2758 bind(error);
2759 pushad();
2760 #ifndef _LP64
2761 addi(SP, SP, (-1) * wordSize);
2762 #endif
2763 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
2764 delayed()->nop();
2765 #ifndef _LP64
2766 addiu(SP, SP, 1 * wordSize);
2767 #endif
2768 popad();
2769 jr(RA);
2770 delayed()->nop();
2771 }
2773 void MacroAssembler::verify_tlab(Register t1, Register t2) {
2774 #ifdef ASSERT
2775 assert_different_registers(t1, t2, AT);
2776 if (UseTLAB && VerifyOops) {
2777 Label next, ok;
2779 get_thread(t1);
2781 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
2782 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
2783 sltu(AT, t2, AT);
2784 beq(AT, R0, next);
2785 delayed()->nop();
2787 stop("assert(top >= start)");
2789 bind(next);
2790 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
2791 sltu(AT, AT, t2);
2792 beq(AT, R0, ok);
2793 delayed()->nop();
2795 stop("assert(top <= end)");
2797 bind(ok);
2799 /*
2800 Label next, ok;
2801 Register t1 = rsi;
2802 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
2804 push(t1);
2805 NOT_LP64(push(thread_reg));
2806 NOT_LP64(get_thread(thread_reg));
2808 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
2809 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
2810 jcc(Assembler::aboveEqual, next);
2811 stop("assert(top >= start)");
2812 should_not_reach_here();
2814 bind(next);
2815 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
2816 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
2817 jcc(Assembler::aboveEqual, ok);
2818 stop("assert(top <= end)");
2819 should_not_reach_here();
2821 bind(ok);
2822 NOT_LP64(pop(thread_reg));
2823 pop(t1);
2824 */
2825 }
2826 #endif
2827 }
2828 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2829 Register tmp,
2830 int offset) {
2831 intptr_t value = *delayed_value_addr;
2832 if (value != 0)
2833 return RegisterOrConstant(value + offset);
2834 AddressLiteral a(delayed_value_addr);
2835 // load indirectly to solve generation ordering problem
2836 //movptr(tmp, ExternalAddress((address) delayed_value_addr));
2837 //ld(tmp, a);
2838 /* #ifdef ASSERT
2839 { Label L;
2840 testptr(tmp, tmp);
2841 if (WizardMode) {
2842 jcc(Assembler::notZero, L);
2843 char* buf = new char[40];
2844 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
2845 STOP(buf);
2846 } else {
2847 jccb(Assembler::notZero, L);
2848 hlt();
2849 }
2850 bind(L);
2851 }
2852 #endif*/
2853 if (offset != 0)
2854 daddi(tmp,tmp, offset);
2856 return RegisterOrConstant(tmp);
2857 }
2859 void MacroAssembler::hswap(Register reg) {
2860 //short
2861 //andi(reg, reg, 0xffff);
2862 srl(AT, reg, 8);
2863 sll(reg, reg, 24);
2864 sra(reg, reg, 16);
2865 orr(reg, reg, AT);
2866 }
2868 void MacroAssembler::huswap(Register reg) {
2869 #ifdef _LP64
2870 dsrl(AT, reg, 8);
2871 dsll(reg, reg, 24);
2872 dsrl(reg, reg, 16);
2873 orr(reg, reg, AT);
2874 andi(reg, reg, 0xffff);
2875 #else
2876 //andi(reg, reg, 0xffff);
2877 srl(AT, reg, 8);
2878 sll(reg, reg, 24);
2879 srl(reg, reg, 16);
2880 orr(reg, reg, AT);
2881 #endif
2882 }
2884 // something funny to do this will only one more register AT
2885 // 32 bits
2886 // by yjl 6/29/2005
2887 void MacroAssembler::swap(Register reg) {
2888 srl(AT, reg, 8);
2889 sll(reg, reg, 24);
2890 orr(reg, reg, AT);
2891 //reg : 4 1 2 3
2892 srl(AT, AT, 16);
2893 xorr(AT, AT, reg);
2894 andi(AT, AT, 0xff);
2895 //AT : 0 0 0 1^3);
2896 xorr(reg, reg, AT);
2897 //reg : 4 1 2 1
2898 sll(AT, AT, 16);
2899 xorr(reg, reg, AT);
2900 //reg : 4 3 2 1
2901 }
2903 #ifdef _LP64
2905 /* do 32-bit CAS using MIPS64 lld/scd
2907 Jin: cas_int should only compare 32-bits of the memory value.
2908 However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
2909 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
2910 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
2911 plus the high-32 bits or memory value, are stored togethor with SCD.
2913 Example:
2915 double d = 3.1415926;
2916 System.err.println("hello" + d);
2918 sun.misc.FloatingDecimal$1.<init>()
2919 |
2920 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
2922 38 cas_int [a7a7|J] [a0|I] [a6|I]
2923 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
2924 // a6: 0x4ab325aa
2926 again:
2927 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63"
2929 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended)
2930 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits
2931 0x00000055647f3c68: dsll32 t8, t8, 0
2932 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal
2933 0x00000055647f3c70: sll zero, zero, 0
2935 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended)
2936 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF;
2937 0x00000055647f3c7c: ori v1, v1, 0xffffffff
2938 0x00000055647f3c80: and v1, a6, v1
2939 0x00000055647f3c84: or at, t8, v1
2940 0x00000055647f3c88: scd at, 0x0(a7)
2941 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again
2942 0x00000055647f3c90: sll zero, zero, 0
2943 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done
2944 0x00000055647f3c98: sll zero, zero, 0
2945 nequal:
2946 0x00000055647f45a4: dadd a0, t9, zero
2947 0x00000055647f45a8: dadd at, zero, zero
2948 done:
2949 */
2951 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
2952 #if 0
2953 Label done, again, nequal;
2954 bind(again);
2956 sync();
2957 lld(AT, dest);
2959 /* T9: 32 bits, sign extended
2960 * V1: low 32 bits, sign unextended
2961 * T8: high 32 bits (may be another variables's space)
2962 */
2963 sll(T9, AT, 0); // Use 32-bit sll to extend bit 31
2964 dsrl32(T8, AT, 0);
2965 dsll32(T8, T8, 0);
2967 bne(T9, c_reg, nequal);
2968 delayed()->nop();
2970 ori(V1, R0, 0xFFFF);
2971 dsll(V1, V1, 16);
2972 ori(V1, V1, 0xFFFF);
2973 andr(V1, x_reg, V1);
2974 orr(AT, T8, V1);
2975 scd(AT, dest);
2976 beq(AT, R0, again);
2977 delayed()->nop();
2978 b(done);
2979 delayed()->nop();
2981 // not xchged
2982 bind(nequal);
2983 move(c_reg, T9);
2984 move(AT, R0);
2986 bind(done);
2987 #else
2989 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
2990 Label done, again, nequal;
2992 bind(again);
2994 if(!Use3A2000) sync();
2995 ll(AT, dest);
2996 bne(AT, c_reg, nequal);
2997 delayed()->nop();
2999 move(AT, x_reg);
3000 sc(AT, dest);
3001 beq(AT, R0, again);
3002 delayed()->nop();
3003 b(done);
3004 delayed()->nop();
3006 // not xchged
3007 bind(nequal);
3008 sync();
3009 move(c_reg, AT);
3010 move(AT, R0);
3012 bind(done);
3013 #endif
3014 }
3015 #endif // cmpxchg32
3017 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
3018 Label done, again, nequal;
3020 bind(again);
3021 #ifdef _LP64
3022 if(!Use3A2000) sync();
3023 lld(AT, dest);
3024 #else
3025 if(!Use3A2000) sync();
3026 ll(AT, dest);
3027 #endif
3028 bne(AT, c_reg, nequal);
3029 delayed()->nop();
3031 move(AT, x_reg);
3032 #ifdef _LP64
3033 scd(AT, dest);
3034 #else
3035 sc(AT, dest);
3036 #endif
3037 beq(AT, R0, again);
3038 delayed()->nop();
3039 b(done);
3040 delayed()->nop();
3042 // not xchged
3043 bind(nequal);
3044 sync();
3045 move(c_reg, AT);
3046 move(AT, R0);
3048 bind(done);
3049 }
3051 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
3052 Label done, again, nequal;
3054 Register x_reg = x_regLo;
3055 dsll32(x_regHi, x_regHi, 0);
3056 dsll32(x_regLo, x_regLo, 0);
3057 dsrl32(x_regLo, x_regLo, 0);
3058 orr(x_reg, x_regLo, x_regHi);
3060 Register c_reg = c_regLo;
3061 dsll32(c_regHi, c_regHi, 0);
3062 dsll32(c_regLo, c_regLo, 0);
3063 dsrl32(c_regLo, c_regLo, 0);
3064 orr(c_reg, c_regLo, c_regHi);
3066 bind(again);
3068 if(!Use3A2000) sync();
3069 lld(AT, dest);
3070 bne(AT, c_reg, nequal);
3071 delayed()->nop();
3073 //move(AT, x_reg);
3074 dadd(AT, x_reg, R0);
3075 scd(AT, dest);
3076 beq(AT, R0, again);
3077 delayed()->nop();
3078 b(done);
3079 delayed()->nop();
3081 // not xchged
3082 bind(nequal);
3083 sync();
3084 //move(c_reg, AT);
3085 //move(AT, R0);
3086 dadd(c_reg, AT, R0);
3087 dadd(AT, R0, R0);
3088 bind(done);
3089 }
3091 // be sure the three register is different
3092 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
3093 assert_different_registers(tmp, fs, ft);
3094 div_s(tmp, fs, ft);
3095 trunc_l_s(tmp, tmp);
3096 cvt_s_l(tmp, tmp);
3097 mul_s(tmp, tmp, ft);
3098 sub_s(fd, fs, tmp);
3099 }
3101 // be sure the three register is different
3102 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
3103 assert_different_registers(tmp, fs, ft);
3104 div_d(tmp, fs, ft);
3105 trunc_l_d(tmp, tmp);
3106 cvt_d_l(tmp, tmp);
3107 mul_d(tmp, tmp, ft);
3108 sub_d(fd, fs, tmp);
3109 }
3111 // Fast_Lock and Fast_Unlock used by C2
3113 // Because the transitions from emitted code to the runtime
3114 // monitorenter/exit helper stubs are so slow it's critical that
3115 // we inline both the stack-locking fast-path and the inflated fast path.
3116 //
3117 // See also: cmpFastLock and cmpFastUnlock.
3118 //
3119 // What follows is a specialized inline transliteration of the code
3120 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
3121 // another option would be to emit TrySlowEnter and TrySlowExit methods
3122 // at startup-time. These methods would accept arguments as
3123 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3124 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
3125 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3126 // In practice, however, the # of lock sites is bounded and is usually small.
3127 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3128 // if the processor uses simple bimodal branch predictors keyed by EIP
3129 // Since the helper routines would be called from multiple synchronization
3130 // sites.
3131 //
3132 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3133 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3134 // to those specialized methods. That'd give us a mostly platform-independent
3135 // implementation that the JITs could optimize and inline at their pleasure.
3136 // Done correctly, the only time we'd need to cross to native could would be
3137 // to park() or unpark() threads. We'd also need a few more unsafe operators
3138 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3139 // (b) explicit barriers or fence operations.
3140 //
3141 // TODO:
3142 //
3143 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3144 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3145 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
3146 // the lock operators would typically be faster than reifying Self.
3147 //
3148 // * Ideally I'd define the primitives as:
3149 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3150 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3151 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
3152 // Instead, we're stuck with a rather awkward and brittle register assignments below.
3153 // Furthermore the register assignments are overconstrained, possibly resulting in
3154 // sub-optimal code near the synchronization site.
3155 //
3156 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
3157 // Alternately, use a better sp-proximity test.
3158 //
3159 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3160 // Either one is sufficient to uniquely identify a thread.
3161 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3162 //
3163 // * Intrinsify notify() and notifyAll() for the common cases where the
3164 // object is locked by the calling thread but the waitlist is empty.
3165 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3166 //
3167 // * use jccb and jmpb instead of jcc and jmp to improve code density.
3168 // But beware of excessive branch density on AMD Opterons.
3169 //
3170 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3171 // or failure of the fast-path. If the fast-path fails then we pass
3172 // control to the slow-path, typically in C. In Fast_Lock and
3173 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3174 // will emit a conditional branch immediately after the node.
3175 // So we have branches to branches and lots of ICC.ZF games.
3176 // Instead, it might be better to have C2 pass a "FailureLabel"
3177 // into Fast_Lock and Fast_Unlock. In the case of success, control
3178 // will drop through the node. ICC.ZF is undefined at exit.
3179 // In the case of failure, the node will branch directly to the
3180 // FailureLabel
3183 // obj: object to lock
3184 // box: on-stack box address (displaced header location) - KILLED
3185 // rax,: tmp -- KILLED
3186 // scr: tmp -- KILLED
3187 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
3189 // Ensure the register assignents are disjoint
3190 guarantee (objReg != boxReg, "") ;
3191 guarantee (objReg != tmpReg, "") ;
3192 guarantee (objReg != scrReg, "") ;
3193 guarantee (boxReg != tmpReg, "") ;
3194 guarantee (boxReg != scrReg, "") ;
3197 block_comment("FastLock");
3198 /*
3199 move(AT, 0x0);
3200 return;
3201 */
3202 if (PrintBiasedLockingStatistics) {
3203 push(tmpReg);
3204 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
3205 pop(tmpReg);
3206 }
3208 if (EmitSync & 1) {
3209 // set box->dhw = unused_mark (3)
3210 // Force all sync thru slow-path: slow_enter() and slow_exit()
3211 move (AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3212 sd(AT, Address(boxReg, 0));
3213 move (AT, (int32_t)0) ; // Eflags.ZF = 0
3214 } else
3215 if (EmitSync & 2) {
3216 Label DONE_LABEL ;
3217 if (UseBiasedLocking) {
3218 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3219 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
3220 }
3222 ld(tmpReg, Address(objReg, 0)) ; // fetch markword
3223 ori(tmpReg, tmpReg, 0x1);
3224 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
3226 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
3227 bne(AT, R0, DONE_LABEL);
3228 delayed()->nop();
3230 // Recursive locking
3231 dsubu(tmpReg, tmpReg, SP);
3232 li(AT, (7 - os::vm_page_size() ));
3233 andr(tmpReg, tmpReg, AT);
3234 sd(tmpReg, Address(boxReg, 0));
3235 bind(DONE_LABEL) ;
3236 } else {
3237 // Possible cases that we'll encounter in fast_lock
3238 // ------------------------------------------------
3239 // * Inflated
3240 // -- unlocked
3241 // -- Locked
3242 // = by self
3243 // = by other
3244 // * biased
3245 // -- by Self
3246 // -- by other
3247 // * neutral
3248 // * stack-locked
3249 // -- by self
3250 // = sp-proximity test hits
3251 // = sp-proximity test generates false-negative
3252 // -- by other
3253 //
3255 Label IsInflated, DONE_LABEL, PopDone ;
3257 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3258 // order to reduce the number of conditional branches in the most common cases.
3259 // Beware -- there's a subtle invariant that fetch of the markword
3260 // at [FETCH], below, will never observe a biased encoding (*101b).
3261 // If this invariant is not held we risk exclusion (safety) failure.
3262 if (UseBiasedLocking && !UseOptoBiasInlining) {
3263 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
3264 }
3266 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object.
3267 andi(AT, tmpReg, markOopDesc::monitor_value);
3268 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
3269 delayed()->nop();
3271 // Attempt stack-locking ...
3272 ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
3273 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
3275 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg
3277 if (PrintBiasedLockingStatistics) {
3278 Label L;
3279 beq(AT, R0, L);
3280 delayed()->nop();
3281 push(T0);
3282 push(T1);
3283 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
3284 pop(T1);
3285 pop(T0);
3286 bind(L);
3287 }
3288 bne(AT, R0, DONE_LABEL);
3289 delayed()->nop();
3291 // Recursive locking
3292 // The object is stack-locked: markword contains stack pointer to BasicLock.
3293 // Locked by current thread if difference with current SP is less than one page.
3294 dsubu(tmpReg, tmpReg, SP);
3295 li(AT, 7 - os::vm_page_size() );
3296 andr(tmpReg, tmpReg, AT);
3297 sd(tmpReg, Address(boxReg, 0));
3298 if (PrintBiasedLockingStatistics) {
3299 Label L;
3300 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
3301 bne(tmpReg, R0, L);
3302 delayed()->nop();
3303 push(T0);
3304 push(T1);
3305 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
3306 pop(T1);
3307 pop(T0);
3308 bind(L);
3309 }
3310 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
3312 b(DONE_LABEL) ;
3313 delayed()->nop();
3315 bind(IsInflated) ;
3317 // TODO: someday avoid the ST-before-CAS penalty by
3318 // relocating (deferring) the following ST.
3319 // We should also think about trying a CAS without having
3320 // fetched _owner. If the CAS is successful we may
3321 // avoid an RTO->RTS upgrade on the $line.
3322 // Without cast to int32_t a movptr will destroy r10 which is typically obj
3323 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
3324 sd(AT, Address(boxReg, 0));
3326 move(boxReg, tmpReg) ;
3327 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3328 // AT = (tmpReg == 0) ? 1:0
3329 sltiu(AT, tmpReg, 1); /* Jin: AT = !tmpReg; */
3330 bne(tmpReg, R0, DONE_LABEL);
3331 delayed()->nop();
3333 // It's inflated and appears unlocke
3334 if (os::is_MP()) {
3335 //lock();
3336 }
3337 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
3338 // Intentional fall-through into DONE_LABEL ...
3341 // DONE_LABEL is a hot target - we'd really like to place it at the
3342 // start of cache line by padding with NOPs.
3343 // See the AMD and Intel software optimization manuals for the
3344 // most efficient "long" NOP encodings.
3345 // Unfortunately none of our alignment mechanisms suffice.
3346 bind(DONE_LABEL);
3348 // Avoid branch-to-branch on AMD processors
3349 // This appears to be superstition.
3350 if (EmitSync & 32) nop() ;
3353 // At DONE_LABEL the icc ZFlag is set as follows ...
3354 // Fast_Unlock uses the same protocol.
3355 // ZFlag == 1 -> Success
3356 // ZFlag == 0 -> Failure - force control through the slow-path
3357 }
3358 }
3360 // obj: object to unlock
3361 // box: box address (displaced header location), killed. Must be EAX.
3362 // rbx,: killed tmp; cannot be obj nor box.
3363 //
3364 // Some commentary on balanced locking:
3365 //
3366 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3367 // Methods that don't have provably balanced locking are forced to run in the
3368 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3369 // The interpreter provides two properties:
3370 // I1: At return-time the interpreter automatically and quietly unlocks any
3371 // objects acquired the current activation (frame). Recall that the
3372 // interpreter maintains an on-stack list of locks currently held by
3373 // a frame.
3374 // I2: If a method attempts to unlock an object that is not held by the
3375 // the frame the interpreter throws IMSX.
3376 //
3377 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3378 // B() doesn't have provably balanced locking so it runs in the interpreter.
3379 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3380 // is still locked by A().
3381 //
3382 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3383 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3384 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3385 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3387 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
3389 guarantee (objReg != boxReg, "") ;
3390 guarantee (objReg != tmpReg, "") ;
3391 guarantee (boxReg != tmpReg, "") ;
3395 block_comment("FastUnlock");
3397 /*
3398 move(AT, 0x0);
3399 return;
3400 */
3402 if (EmitSync & 4) {
3403 // Disable - inhibit all inlining. Force control through the slow-path
3404 move(AT, R0);
3405 } else
3406 if (EmitSync & 8) {
3407 Label DONE_LABEL ;
3408 if (UseBiasedLocking) {
3409 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3410 }
3411 // classic stack-locking code ...
3412 ld(tmpReg, Address(boxReg, 0)) ;
3413 beq(tmpReg, R0, DONE_LABEL) ;
3414 move(AT, 0x1); // delay slot
3416 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box
3417 bind(DONE_LABEL);
3418 } else {
3419 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3421 // Critically, the biased locking test must have precedence over
3422 // and appear before the (box->dhw == 0) recursive stack-lock test.
3423 if (UseBiasedLocking && !UseOptoBiasInlining) {
3424 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3425 }
3427 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header
3428 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock
3429 delayed()->daddiu(AT, R0, 0x1);
3431 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3432 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated?
3433 beq(AT, R0, Stacked) ; // Inflated?
3434 delayed()->nop();
3436 bind(Inflated) ;
3437 // It's inflated.
3438 // Despite our balanced locking property we still check that m->_owner == Self
3439 // as java routines or native JNI code called by this thread might
3440 // have released the lock.
3441 // Refer to the comments in synchronizer.cpp for how we might encode extra
3442 // state in _succ so we can avoid fetching EntryList|cxq.
3443 //
3444 // I'd like to add more cases in fast_lock() and fast_unlock() --
3445 // such as recursive enter and exit -- but we have to be wary of
3446 // I$ bloat, T$ effects and BP$ effects.
3447 //
3448 // If there's no contention try a 1-0 exit. That is, exit without
3449 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3450 // we detect and recover from the race that the 1-0 exit admits.
3451 //
3452 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3453 // before it STs null into _owner, releasing the lock. Updates
3454 // to data protected by the critical section must be visible before
3455 // we drop the lock (and thus before any other thread could acquire
3456 // the lock and observe the fields protected by the lock).
3457 // IA32's memory-model is SPO, so STs are ordered with respect to
3458 // each other and there's no need for an explicit barrier (fence).
3459 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3460 #ifndef OPT_THREAD
3461 get_thread (TREG) ;
3462 #endif
3464 // It's inflated
3465 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3466 xorr(boxReg, boxReg, TREG);
3468 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3469 orr(boxReg, boxReg, AT);
3471 move(AT, R0);
3472 bne(boxReg, R0, DONE_LABEL);
3473 delayed()->nop();
3475 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3476 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3477 orr(boxReg, boxReg, AT);
3479 move(AT, R0);
3480 bne(boxReg, R0, CheckSucc);
3481 delayed()->nop();
3483 sync();
3484 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3485 move(AT, 0x1);
3486 b(DONE_LABEL);
3487 delayed()->nop();
3490 if ((EmitSync & 65536) == 0) {
3491 Label LSuccess, LGoSlowPath ;
3492 bind (CheckSucc);
3493 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3494 beq(AT, R0, LGoSlowPath);
3495 delayed()->nop();
3497 // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3498 // the explicit ST;MEMBAR combination, but masm doesn't currently support
3499 // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc
3500 // are all faster when the write buffer is populated.
3501 xorr(boxReg, boxReg, boxReg);
3502 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3503 if (os::is_MP()) {
3504 // lock ();
3505 }
3506 ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
3507 bne(AT, R0, LSuccess);
3508 delayed()->nop();
3510 #ifndef OPT_THREAD
3511 get_thread (TREG) ;
3512 #endif
3513 move(boxReg, R0) ; // box is really EAX
3514 //if (os::is_MP()) { lock(); }
3515 cmpxchg(TREG, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
3516 beq(AT, R0, LSuccess);
3517 delayed()->nop();
3518 // Intentional fall-through into slow-path
3520 bind (LGoSlowPath);
3521 move(AT, R0);
3522 b(DONE_LABEL) ;
3523 delayed()->nop();
3526 bind (LSuccess);
3527 move(AT, 0);
3528 sltiu(AT, boxReg, 1) ; // set ICC.ZF=1 to indicate success
3529 b(DONE_LABEL) ;
3530 delayed()->nop();
3531 }
3533 bind (Stacked);
3534 ld(tmpReg, Address(boxReg, 0)) ;
3535 //if (os::is_MP()) { lock(); }
3536 cmpxchg(tmpReg, Address(objReg, 0), boxReg);
3538 if (EmitSync & 65536) {
3539 bind (CheckSucc);
3540 }
3542 bind(DONE_LABEL);
3544 // Avoid branch to branch on AMD processors
3545 if (EmitSync & 32768) { nop() ; }
3546 }
3547 }
3549 class ControlWord {
3550 public:
3551 int32_t _value;
3553 int rounding_control() const { return (_value >> 10) & 3 ; }
3554 int precision_control() const { return (_value >> 8) & 3 ; }
3555 bool precision() const { return ((_value >> 5) & 1) != 0; }
3556 bool underflow() const { return ((_value >> 4) & 1) != 0; }
3557 bool overflow() const { return ((_value >> 3) & 1) != 0; }
3558 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
3559 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
3560 bool invalid() const { return ((_value >> 0) & 1) != 0; }
3562 void print() const {
3563 // rounding control
3564 const char* rc;
3565 switch (rounding_control()) {
3566 case 0: rc = "round near"; break;
3567 case 1: rc = "round down"; break;
3568 case 2: rc = "round up "; break;
3569 case 3: rc = "chop "; break;
3570 };
3571 // precision control
3572 const char* pc;
3573 switch (precision_control()) {
3574 case 0: pc = "24 bits "; break;
3575 case 1: pc = "reserved"; break;
3576 case 2: pc = "53 bits "; break;
3577 case 3: pc = "64 bits "; break;
3578 };
3579 // flags
3580 char f[9];
3581 f[0] = ' ';
3582 f[1] = ' ';
3583 f[2] = (precision ()) ? 'P' : 'p';
3584 f[3] = (underflow ()) ? 'U' : 'u';
3585 f[4] = (overflow ()) ? 'O' : 'o';
3586 f[5] = (zero_divide ()) ? 'Z' : 'z';
3587 f[6] = (denormalized()) ? 'D' : 'd';
3588 f[7] = (invalid ()) ? 'I' : 'i';
3589 f[8] = '\x0';
3590 // output
3591 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
3592 }
3594 };
3596 class StatusWord {
3597 public:
3598 int32_t _value;
3600 bool busy() const { return ((_value >> 15) & 1) != 0; }
3601 bool C3() const { return ((_value >> 14) & 1) != 0; }
3602 bool C2() const { return ((_value >> 10) & 1) != 0; }
3603 bool C1() const { return ((_value >> 9) & 1) != 0; }
3604 bool C0() const { return ((_value >> 8) & 1) != 0; }
3605 int top() const { return (_value >> 11) & 7 ; }
3606 bool error_status() const { return ((_value >> 7) & 1) != 0; }
3607 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
3608 bool precision() const { return ((_value >> 5) & 1) != 0; }
3609 bool underflow() const { return ((_value >> 4) & 1) != 0; }
3610 bool overflow() const { return ((_value >> 3) & 1) != 0; }
3611 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
3612 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
3613 bool invalid() const { return ((_value >> 0) & 1) != 0; }
3615 void print() const {
3616 // condition codes
3617 char c[5];
3618 c[0] = (C3()) ? '3' : '-';
3619 c[1] = (C2()) ? '2' : '-';
3620 c[2] = (C1()) ? '1' : '-';
3621 c[3] = (C0()) ? '0' : '-';
3622 c[4] = '\x0';
3623 // flags
3624 char f[9];
3625 f[0] = (error_status()) ? 'E' : '-';
3626 f[1] = (stack_fault ()) ? 'S' : '-';
3627 f[2] = (precision ()) ? 'P' : '-';
3628 f[3] = (underflow ()) ? 'U' : '-';
3629 f[4] = (overflow ()) ? 'O' : '-';
3630 f[5] = (zero_divide ()) ? 'Z' : '-';
3631 f[6] = (denormalized()) ? 'D' : '-';
3632 f[7] = (invalid ()) ? 'I' : '-';
3633 f[8] = '\x0';
3634 // output
3635 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
3636 }
3638 };
3640 class TagWord {
3641 public:
3642 int32_t _value;
3644 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
3646 void print() const {
3647 printf("%04x", _value & 0xFFFF);
3648 }
3650 };
3652 class FPU_Register {
3653 public:
3654 int32_t _m0;
3655 int32_t _m1;
3656 int16_t _ex;
3658 bool is_indefinite() const {
3659 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
3660 }
3662 void print() const {
3663 char sign = (_ex < 0) ? '-' : '+';
3664 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
3665 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
3666 };
3668 };
3670 class FPU_State {
3671 public:
3672 enum {
3673 register_size = 10,
3674 number_of_registers = 8,
3675 register_mask = 7
3676 };
3678 ControlWord _control_word;
3679 StatusWord _status_word;
3680 TagWord _tag_word;
3681 int32_t _error_offset;
3682 int32_t _error_selector;
3683 int32_t _data_offset;
3684 int32_t _data_selector;
3685 int8_t _register[register_size * number_of_registers];
3687 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
3688 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
3690 const char* tag_as_string(int tag) const {
3691 switch (tag) {
3692 case 0: return "valid";
3693 case 1: return "zero";
3694 case 2: return "special";
3695 case 3: return "empty";
3696 }
3697 ShouldNotReachHere();
3698 return NULL;
3699 }
3701 void print() const {
3702 // print computation registers
3703 { int t = _status_word.top();
3704 for (int i = 0; i < number_of_registers; i++) {
3705 int j = (i - t) & register_mask;
3706 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
3707 st(j)->print();
3708 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
3709 }
3710 }
3711 printf("\n");
3712 // print control registers
3713 printf("ctrl = "); _control_word.print(); printf("\n");
3714 printf("stat = "); _status_word .print(); printf("\n");
3715 printf("tags = "); _tag_word .print(); printf("\n");
3716 }
3718 };
3720 class Flag_Register {
3721 public:
3722 int32_t _value;
3724 bool overflow() const { return ((_value >> 11) & 1) != 0; }
3725 bool direction() const { return ((_value >> 10) & 1) != 0; }
3726 bool sign() const { return ((_value >> 7) & 1) != 0; }
3727 bool zero() const { return ((_value >> 6) & 1) != 0; }
3728 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
3729 bool parity() const { return ((_value >> 2) & 1) != 0; }
3730 bool carry() const { return ((_value >> 0) & 1) != 0; }
3732 void print() const {
3733 // flags
3734 char f[8];
3735 f[0] = (overflow ()) ? 'O' : '-';
3736 f[1] = (direction ()) ? 'D' : '-';
3737 f[2] = (sign ()) ? 'S' : '-';
3738 f[3] = (zero ()) ? 'Z' : '-';
3739 f[4] = (auxiliary_carry()) ? 'A' : '-';
3740 f[5] = (parity ()) ? 'P' : '-';
3741 f[6] = (carry ()) ? 'C' : '-';
3742 f[7] = '\x0';
3743 // output
3744 printf("%08x flags = %s", _value, f);
3745 }
3747 };
3749 class IU_Register {
3750 public:
3751 int32_t _value;
3753 void print() const {
3754 printf("%08x %11d", _value, _value);
3755 }
3757 };
3759 class IU_State {
3760 public:
3761 Flag_Register _eflags;
3762 IU_Register _rdi;
3763 IU_Register _rsi;
3764 IU_Register _rbp;
3765 IU_Register _rsp;
3766 IU_Register _rbx;
3767 IU_Register _rdx;
3768 IU_Register _rcx;
3769 IU_Register _rax;
3771 void print() const {
3772 // computation registers
3773 printf("rax, = "); _rax.print(); printf("\n");
3774 printf("rbx, = "); _rbx.print(); printf("\n");
3775 printf("rcx = "); _rcx.print(); printf("\n");
3776 printf("rdx = "); _rdx.print(); printf("\n");
3777 printf("rdi = "); _rdi.print(); printf("\n");
3778 printf("rsi = "); _rsi.print(); printf("\n");
3779 printf("rbp, = "); _rbp.print(); printf("\n");
3780 printf("rsp = "); _rsp.print(); printf("\n");
3781 printf("\n");
3782 // control registers
3783 printf("flgs = "); _eflags.print(); printf("\n");
3784 }
3785 };
3788 class CPU_State {
3789 public:
3790 FPU_State _fpu_state;
3791 IU_State _iu_state;
3793 void print() const {
3794 printf("--------------------------------------------------\n");
3795 _iu_state .print();
3796 printf("\n");
3797 _fpu_state.print();
3798 printf("--------------------------------------------------\n");
3799 }
3801 };
3804 /*
3805 static void _print_CPU_state(CPU_State* state) {
3806 state->print();
3807 };
3809 void MacroAssembler::print_CPU_state() {
3810 push_CPU_state();
3811 push(rsp); // pass CPU state
3812 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
3813 addptr(rsp, wordSize); // discard argument
3814 pop_CPU_state();
3815 }
3816 */
3818 void MacroAssembler::align(int modulus) {
3819 while (offset() % modulus != 0) nop();
3820 }
3823 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3824 //FIXME aoqi
3825 // %%%%% need to implement this
3826 //Unimplemented();
3827 /*
3828 if (!VerifyFPU) return;
3829 push_CPU_state();
3830 push(rsp); // pass CPU state
3831 ExternalAddress msg((address) s);
3832 // pass message string s
3833 pushptr(msg.addr());
3834 push(stack_depth); // pass stack depth
3835 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
3836 addptr(rsp, 3 * wordSize); // discard arguments
3837 // check for error
3838 { Label L;
3839 testl(rax, rax);
3840 jcc(Assembler::notZero, L);
3841 int3(); // break if error condition
3842 bind(L);
3843 }
3844 pop_CPU_state();
3845 */
3846 }
3848 #ifdef _LP64
3849 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3851 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
3852 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
3853 #else
3854 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
3856 Register caller_saved_fpu_registers[] = {};
3857 #endif
3859 //We preserve all caller-saved register
3860 void MacroAssembler::pushad(){
3861 int i;
3863 /* Fixed-point registers */
3864 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3865 daddi(SP, SP, -1 * len * wordSize);
3866 for (i = 0; i < len; i++)
3867 {
3868 #ifdef _LP64
3869 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3870 #else
3871 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3872 #endif
3873 }
3875 /* Floating-point registers */
3876 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3877 daddi(SP, SP, -1 * len * wordSize);
3878 for (i = 0; i < len; i++)
3879 {
3880 #ifdef _LP64
3881 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3882 #else
3883 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3884 #endif
3885 }
3886 };
3888 void MacroAssembler::popad(){
3889 int i;
3891 /* Floating-point registers */
3892 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
3893 for (i = 0; i < len; i++)
3894 {
3895 #ifdef _LP64
3896 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3897 #else
3898 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
3899 #endif
3900 }
3901 daddi(SP, SP, len * wordSize);
3903 /* Fixed-point registers */
3904 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
3905 for (i = 0; i < len; i++)
3906 {
3907 #ifdef _LP64
3908 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3909 #else
3910 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
3911 #endif
3912 }
3913 daddi(SP, SP, len * wordSize);
3914 };
3916 void MacroAssembler::push2(Register reg1, Register reg2) {
3917 #ifdef _LP64
3918 daddi(SP, SP, -16);
3919 sd(reg2, SP, 0);
3920 sd(reg1, SP, 8);
3921 #else
3922 addi(SP, SP, -8);
3923 sw(reg2, SP, 0);
3924 sw(reg1, SP, 4);
3925 #endif
3926 }
3928 void MacroAssembler::pop2(Register reg1, Register reg2) {
3929 #ifdef _LP64
3930 ld(reg1, SP, 0);
3931 ld(reg2, SP, 8);
3932 daddi(SP, SP, 16);
3933 #else
3934 lw(reg1, SP, 0);
3935 lw(reg2, SP, 4);
3936 addi(SP, SP, 8);
3937 #endif
3938 }
3940 //for UseCompressedOops Option
3941 void MacroAssembler::load_klass(Register dst, Register src) {
3942 #ifdef _LP64
3943 if(UseCompressedClassPointers){
3944 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
3945 decode_klass_not_null(dst);
3946 } else
3947 #endif
3948 ld(dst, src, oopDesc::klass_offset_in_bytes());
3949 }
3951 void MacroAssembler::store_klass(Register dst, Register src) {
3952 #ifdef _LP64
3953 if(UseCompressedClassPointers){
3954 encode_klass_not_null(src);
3955 sw(src, dst, oopDesc::klass_offset_in_bytes());
3956 } else {
3957 #endif
3958 sd(src, dst, oopDesc::klass_offset_in_bytes());
3959 }
3960 }
3962 void MacroAssembler::load_prototype_header(Register dst, Register src) {
3963 load_klass(dst, src);
3964 ld(dst, Address(dst, Klass::prototype_header_offset()));
3965 }
3967 #ifdef _LP64
3968 void MacroAssembler::store_klass_gap(Register dst, Register src) {
3969 if (UseCompressedClassPointers) {
3970 sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
3971 }
3972 }
3974 void MacroAssembler::load_heap_oop(Register dst, Address src) {
3975 if(UseCompressedOops){
3976 lwu(dst, src);
3977 decode_heap_oop(dst);
3978 } else{
3979 ld(dst, src);
3980 }
3981 }
3983 void MacroAssembler::store_heap_oop(Address dst, Register src){
3984 if(UseCompressedOops){
3985 assert(!dst.uses(src), "not enough registers");
3986 encode_heap_oop(src);
3987 sw(src, dst);
3988 } else{
3989 sd(src, dst);
3990 }
3991 }
3993 #ifdef ASSERT
3994 void MacroAssembler::verify_heapbase(const char* msg) {
3995 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
3996 assert (Universe::heap() != NULL, "java heap should be initialized");
3997 /* if (CheckCompressedOops) {
3998 Label ok;
3999 push(rscratch1); // cmpptr trashes rscratch1
4000 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
4001 jcc(Assembler::equal, ok);
4002 STOP(msg);
4003 bind(ok);
4004 pop(rscratch1);
4005 }*/
4006 }
4007 #endif
4010 // Algorithm must match oop.inline.hpp encode_heap_oop.
4011 void MacroAssembler::encode_heap_oop(Register r) {
4012 #ifdef ASSERT
4013 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
4014 #endif
4015 verify_oop(r, "broken oop in encode_heap_oop");
4016 if (Universe::narrow_oop_base() == NULL) {
4017 if (Universe::narrow_oop_shift() != 0) {
4018 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4019 shr(r, LogMinObjAlignmentInBytes);
4020 }
4021 return;
4022 }
4024 movz(r, S5_heapbase, r);
4025 dsub(r, r, S5_heapbase);
4026 if (Universe::narrow_oop_shift() != 0) {
4027 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4028 shr(r, LogMinObjAlignmentInBytes);
4029 }
4030 }
4032 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
4033 #ifdef ASSERT
4034 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
4035 #endif
4036 verify_oop(src, "broken oop in encode_heap_oop");
4037 if (Universe::narrow_oop_base() == NULL) {
4038 if (Universe::narrow_oop_shift() != 0) {
4039 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4040 dsrl(dst, src, LogMinObjAlignmentInBytes);
4041 } else {
4042 if (dst != src) move(dst, src);
4043 }
4044 } else {
4045 if (dst == src) {
4046 movz(dst, S5_heapbase, dst);
4047 dsub(dst, dst, S5_heapbase);
4048 if (Universe::narrow_oop_shift() != 0) {
4049 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4050 shr(dst, LogMinObjAlignmentInBytes);
4051 }
4052 } else {
4053 dsub(dst, src, S5_heapbase);
4054 if (Universe::narrow_oop_shift() != 0) {
4055 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4056 shr(dst, LogMinObjAlignmentInBytes);
4057 }
4058 movz(dst, R0, src);
4059 }
4060 }
4061 }
4063 void MacroAssembler::encode_heap_oop_not_null(Register r) {
4064 assert (UseCompressedOops, "should be compressed");
4065 #ifdef ASSERT
4066 if (CheckCompressedOops) {
4067 Label ok;
4068 bne(r, R0, ok);
4069 delayed()->nop();
4070 stop("null oop passed to encode_heap_oop_not_null");
4071 bind(ok);
4072 }
4073 #endif
4074 verify_oop(r, "broken oop in encode_heap_oop_not_null");
4075 if (Universe::narrow_oop_base() != NULL) {
4076 dsub(r, r, S5_heapbase);
4077 }
4078 if (Universe::narrow_oop_shift() != 0) {
4079 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4080 shr(r, LogMinObjAlignmentInBytes);
4081 }
4083 }
4085 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
4086 assert (UseCompressedOops, "should be compressed");
4087 #ifdef ASSERT
4088 if (CheckCompressedOops) {
4089 Label ok;
4090 bne(src, R0, ok);
4091 delayed()->nop();
4092 stop("null oop passed to encode_heap_oop_not_null2");
4093 bind(ok);
4094 }
4095 #endif
4096 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
4098 if (Universe::narrow_oop_base() != NULL) {
4099 dsub(dst, src, S5_heapbase);
4100 if (Universe::narrow_oop_shift() != 0) {
4101 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4102 shr(dst, LogMinObjAlignmentInBytes);
4103 }
4104 } else {
4105 if (Universe::narrow_oop_shift() != 0) {
4106 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4107 dsrl(dst, src, LogMinObjAlignmentInBytes);
4108 } else {
4109 if (dst != src) move(dst, src);
4110 }
4111 }
4112 }
4114 void MacroAssembler::decode_heap_oop(Register r) {
4115 #ifdef ASSERT
4116 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
4117 #endif
4118 if (Universe::narrow_oop_base() == NULL) {
4119 if (Universe::narrow_oop_shift() != 0) {
4120 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4121 shl(r, LogMinObjAlignmentInBytes);
4122 }
4123 } else {
4124 move(AT, r);
4125 if (Universe::narrow_oop_shift() != 0) {
4126 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4127 shl(r, LogMinObjAlignmentInBytes);
4128 }
4129 dadd(r, r, S5_heapbase);
4130 movz(r, R0, AT);
4131 }
4132 verify_oop(r, "broken oop in decode_heap_oop");
4133 }
4135 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
4136 #ifdef ASSERT
4137 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
4138 #endif
4139 if (Universe::narrow_oop_base() == NULL) {
4140 if (Universe::narrow_oop_shift() != 0) {
4141 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4142 dsll(dst, src, LogMinObjAlignmentInBytes);
4143 } else {
4144 if (dst != src) move(dst, src);
4145 }
4146 } else {
4147 if (dst == src) {
4148 move(AT, dst);
4149 if (Universe::narrow_oop_shift() != 0) {
4150 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4151 shl(dst, LogMinObjAlignmentInBytes);
4152 }
4153 dadd(dst, dst, S5_heapbase);
4154 movz(dst, R0, AT);
4155 } else {
4156 if (Universe::narrow_oop_shift() != 0) {
4157 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4158 dsll(dst, src, LogMinObjAlignmentInBytes);
4159 daddu(dst, dst, S5_heapbase);
4160 } else {
4161 daddu(dst, src, S5_heapbase);
4162 }
4163 movz(dst, R0, src);
4164 }
4165 }
4166 verify_oop(dst, "broken oop in decode_heap_oop");
4167 }
4169 void MacroAssembler::decode_heap_oop_not_null(Register r) {
4170 // Note: it will change flags
4171 assert (UseCompressedOops, "should only be used for compressed headers");
4172 assert (Universe::heap() != NULL, "java heap should be initialized");
4173 // Cannot assert, unverified entry point counts instructions (see .ad file)
4174 // vtableStubs also counts instructions in pd_code_size_limit.
4175 // Also do not verify_oop as this is called by verify_oop.
4176 if (Universe::narrow_oop_shift() != 0) {
4177 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4178 shl(r, LogMinObjAlignmentInBytes);
4179 if (Universe::narrow_oop_base() != NULL) {
4180 daddu(r, r, S5_heapbase);
4181 }
4182 } else {
4183 assert (Universe::narrow_oop_base() == NULL, "sanity");
4184 }
4185 }
4187 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
4188 assert (UseCompressedOops, "should only be used for compressed headers");
4189 assert (Universe::heap() != NULL, "java heap should be initialized");
4191 // Cannot assert, unverified entry point counts instructions (see .ad file)
4192 // vtableStubs also counts instructions in pd_code_size_limit.
4193 // Also do not verify_oop as this is called by verify_oop.
4194 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
4195 if (Universe::narrow_oop_shift() != 0) {
4196 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4197 if (LogMinObjAlignmentInBytes == Address::times_8) {
4198 dsll(dst, src, LogMinObjAlignmentInBytes);
4199 daddu(dst, dst, S5_heapbase);
4200 } else {
4201 dsll(dst, src, LogMinObjAlignmentInBytes);
4202 if (Universe::narrow_oop_base() != NULL) {
4203 daddu(dst, dst, S5_heapbase);
4204 }
4205 }
4206 } else {
4207 assert (Universe::narrow_oop_base() == NULL, "sanity");
4208 if (dst != src) {
4209 move(dst, src);
4210 }
4211 }
4212 }
4214 void MacroAssembler::encode_klass_not_null(Register r) {
4215 if (Universe::narrow_klass_base() != NULL) {
4216 assert(r != AT, "Encoding a klass in AT");
4217 set64(AT, (int64_t)Universe::narrow_klass_base());
4218 dsub(r, r, AT);
4219 }
4220 if (Universe::narrow_klass_shift() != 0) {
4221 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4222 shr(r, LogKlassAlignmentInBytes);
4223 }
4224 // Not neccessary for MIPS at all.
4225 //if (Universe::narrow_klass_base() != NULL) {
4226 // reinit_heapbase();
4227 //}
4228 }
4230 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
4231 if (dst == src) {
4232 encode_klass_not_null(src);
4233 } else {
4234 if (Universe::narrow_klass_base() != NULL) {
4235 set64(dst, (int64_t)Universe::narrow_klass_base());
4236 dsub(dst, src, dst);
4237 if (Universe::narrow_klass_shift() != 0) {
4238 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4239 shr(dst, LogKlassAlignmentInBytes);
4240 }
4241 } else {
4242 if (Universe::narrow_klass_shift() != 0) {
4243 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4244 dsrl(dst, src, LogKlassAlignmentInBytes);
4245 } else {
4246 move(dst, src);
4247 }
4248 }
4249 }
4250 }
4252 // Function instr_size_for_decode_klass_not_null() counts the instructions
4253 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
4254 // when (Universe::heap() != NULL). Hence, if the instructions they
4255 // generate change, then this method needs to be updated.
4256 int MacroAssembler::instr_size_for_decode_klass_not_null() {
4257 assert (UseCompressedClassPointers, "only for compressed klass ptrs");
4258 if (Universe::narrow_klass_base() != NULL) {
4259 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()).
4260 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
4261 } else {
4262 // longest load decode klass function, mov64, leaq
4263 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
4264 }
4265 }
4267 void MacroAssembler::decode_klass_not_null(Register r) {
4268 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4269 assert(r != AT, "Decoding a klass in AT");
4270 // Cannot assert, unverified entry point counts instructions (see .ad file)
4271 // vtableStubs also counts instructions in pd_code_size_limit.
4272 // Also do not verify_oop as this is called by verify_oop.
4273 if (Universe::narrow_klass_shift() != 0) {
4274 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4275 shl(r, LogKlassAlignmentInBytes);
4276 }
4277 if (Universe::narrow_klass_base() != NULL) {
4278 set64(AT, (int64_t)Universe::narrow_klass_base());
4279 daddu(r, r, AT);
4280 //Not neccessary for MIPS at all.
4281 //reinit_heapbase();
4282 }
4283 }
4285 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
4286 assert (UseCompressedClassPointers, "should only be used for compressed headers");
4288 if (dst == src) {
4289 decode_klass_not_null(dst);
4290 } else {
4291 // Cannot assert, unverified entry point counts instructions (see .ad file)
4292 // vtableStubs also counts instructions in pd_code_size_limit.
4293 // Also do not verify_oop as this is called by verify_oop.
4294 set64(dst, (int64_t)Universe::narrow_klass_base());
4295 if (Universe::narrow_klass_shift() != 0) {
4296 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
4297 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
4298 dsll(AT, src, Address::times_8);
4299 daddu(dst, dst, AT);
4300 } else {
4301 daddu(dst, src, dst);
4302 }
4303 }
4304 }
4306 void MacroAssembler::incrementl(Register reg, int value) {
4307 if (value == min_jint) {
4308 move(AT, value);
4309 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
4310 return;
4311 }
4312 if (value < 0) { decrementl(reg, -value); return; }
4313 if (value == 0) { ; return; }
4315 if(Assembler::is_simm16(value)) {
4316 NOT_LP64(addiu(reg, reg, value));
4317 LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
4318 } else {
4319 move(AT, value);
4320 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
4321 }
4322 }
4324 void MacroAssembler::decrementl(Register reg, int value) {
4325 if (value == min_jint) {
4326 move(AT, value);
4327 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
4328 return;
4329 }
4330 if (value < 0) { incrementl(reg, -value); return; }
4331 if (value == 0) { ; return; }
4333 if(Assembler::is_simm16(value)) {
4334 NOT_LP64(addiu(reg, reg, -value));
4335 LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
4336 } else {
4337 move(AT, value);
4338 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
4339 }
4340 }
4342 void MacroAssembler::reinit_heapbase() {
4343 if (UseCompressedOops || UseCompressedClassPointers) {
4344 if (Universe::heap() != NULL) {
4345 if (Universe::narrow_oop_base() == NULL) {
4346 move(S5_heapbase, R0);
4347 } else {
4348 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
4349 }
4350 } else {
4351 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
4352 ld(S5_heapbase, S5_heapbase, 0);
4353 }
4354 }
4355 }
4356 #endif // _LP64
4358 void MacroAssembler::check_klass_subtype(Register sub_klass,
4359 Register super_klass,
4360 Register temp_reg,
4361 Label& L_success) {
4362 //implement ind gen_subtype_check
4363 Label L_failure;
4364 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
4365 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
4366 bind(L_failure);
4367 }
4369 SkipIfEqual::SkipIfEqual(
4370 MacroAssembler* masm, const bool* flag_addr, bool value) {
4371 _masm = masm;
4372 _masm->li(AT, (address)flag_addr);
4373 _masm->lb(AT,AT,0);
4374 _masm->addi(AT,AT,-value);
4375 _masm->beq(AT,R0,_label);
4376 _masm->delayed()->nop();
4377 }
4378 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
4379 Register super_klass,
4380 Register temp_reg,
4381 Label* L_success,
4382 Label* L_failure,
4383 Label* L_slow_path,
4384 RegisterOrConstant super_check_offset) {
4385 assert_different_registers(sub_klass, super_klass, temp_reg);
4386 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
4387 if (super_check_offset.is_register()) {
4388 assert_different_registers(sub_klass, super_klass,
4389 super_check_offset.as_register());
4390 } else if (must_load_sco) {
4391 assert(temp_reg != noreg, "supply either a temp or a register offset");
4392 }
4394 Label L_fallthrough;
4395 int label_nulls = 0;
4396 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
4397 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
4398 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
4399 assert(label_nulls <= 1, "at most one NULL in the batch");
4401 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
4402 int sco_offset = in_bytes(Klass::super_check_offset_offset());
4403 // If the pointers are equal, we are done (e.g., String[] elements).
4404 // This self-check enables sharing of secondary supertype arrays among
4405 // non-primary types such as array-of-interface. Otherwise, each such
4406 // type would need its own customized SSA.
4407 // We move this check to the front of the fast path because many
4408 // type checks are in fact trivially successful in this manner,
4409 // so we get a nicely predicted branch right at the start of the check.
4410 //cmpptr(sub_klass, super_klass);
4411 //local_jcc(Assembler::equal, *L_success);
4412 beq(sub_klass, super_klass, *L_success);
4413 delayed()->nop();
4414 // Check the supertype display:
4415 if (must_load_sco) {
4416 // Positive movl does right thing on LP64.
4417 lwu(temp_reg, super_klass, sco_offset);
4418 super_check_offset = RegisterOrConstant(temp_reg);
4419 }
4420 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
4421 daddu(AT, sub_klass, AT);
4422 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
4424 // This check has worked decisively for primary supers.
4425 // Secondary supers are sought in the super_cache ('super_cache_addr').
4426 // (Secondary supers are interfaces and very deeply nested subtypes.)
4427 // This works in the same check above because of a tricky aliasing
4428 // between the super_cache and the primary super display elements.
4429 // (The 'super_check_addr' can address either, as the case requires.)
4430 // Note that the cache is updated below if it does not help us find
4431 // what we need immediately.
4432 // So if it was a primary super, we can just fail immediately.
4433 // Otherwise, it's the slow path for us (no success at this point).
4435 if (super_check_offset.is_register()) {
4436 beq(super_klass, AT, *L_success);
4437 delayed()->nop();
4438 addi(AT, super_check_offset.as_register(), -sc_offset);
4439 if (L_failure == &L_fallthrough) {
4440 beq(AT, R0, *L_slow_path);
4441 delayed()->nop();
4442 } else {
4443 bne(AT, R0, *L_failure);
4444 delayed()->nop();
4445 b(*L_slow_path);
4446 delayed()->nop();
4447 }
4448 } else if (super_check_offset.as_constant() == sc_offset) {
4449 // Need a slow path; fast failure is impossible.
4450 if (L_slow_path == &L_fallthrough) {
4451 beq(super_klass, AT, *L_success);
4452 delayed()->nop();
4453 } else {
4454 bne(super_klass, AT, *L_slow_path);
4455 delayed()->nop();
4456 b(*L_success);
4457 delayed()->nop();
4458 }
4459 } else {
4460 // No slow path; it's a fast decision.
4461 if (L_failure == &L_fallthrough) {
4462 beq(super_klass, AT, *L_success);
4463 delayed()->nop();
4464 } else {
4465 bne(super_klass, AT, *L_failure);
4466 delayed()->nop();
4467 b(*L_success);
4468 delayed()->nop();
4469 }
4470 }
4472 bind(L_fallthrough);
4474 }
4477 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
4478 Register super_klass,
4479 Register temp_reg,
4480 Register temp2_reg,
4481 Label* L_success,
4482 Label* L_failure,
4483 bool set_cond_codes) {
4484 assert_different_registers(sub_klass, super_klass, temp_reg);
4485 if (temp2_reg != noreg)
4486 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
4487 else
4488 temp2_reg = T9;
4489 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
4491 Label L_fallthrough;
4492 int label_nulls = 0;
4493 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
4494 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
4495 assert(label_nulls <= 1, "at most one NULL in the batch");
4497 // a couple of useful fields in sub_klass:
4498 int ss_offset = in_bytes(Klass::secondary_supers_offset());
4499 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
4500 Address secondary_supers_addr(sub_klass, ss_offset);
4501 Address super_cache_addr( sub_klass, sc_offset);
4503 // Do a linear scan of the secondary super-klass chain.
4504 // This code is rarely used, so simplicity is a virtue here.
4505 // The repne_scan instruction uses fixed registers, which we must spill.
4506 // Don't worry too much about pre-existing connections with the input regs.
4508 #if 0
4509 assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super)
4510 assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter)
4511 #endif
4513 // Get super_klass value into rax (even if it was in rdi or rcx).
4514 /*
4515 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
4516 if (super_klass != rax || UseCompressedOops) {
4517 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
4518 mov(rax, super_klass);
4519 }
4520 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
4521 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
4522 */
4523 #ifndef PRODUCT
4524 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
4525 ExternalAddress pst_counter_addr((address) pst_counter);
4526 NOT_LP64( incrementl(pst_counter_addr) );
4527 //LP64_ONLY( lea(rcx, pst_counter_addr) );
4528 //LP64_ONLY( incrementl(Address(rcx, 0)) );
4529 #endif //PRODUCT
4531 // We will consult the secondary-super array.
4532 ld(temp_reg, secondary_supers_addr);
4533 // Load the array length. (Positive movl does right thing on LP64.)
4534 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
4535 // Skip to start of data.
4536 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
4538 // Scan RCX words at [RDI] for an occurrence of RAX.
4539 // Set NZ/Z based on last compare.
4540 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
4541 // not change flags (only scas instruction which is repeated sets flags).
4542 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
4544 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
4545 Label Loop, subtype;
4546 bind(Loop);
4547 beq(temp2_reg, R0, *L_failure);
4548 delayed()->nop();
4549 ld(AT, temp_reg, 0);
4550 beq(AT, super_klass, subtype);
4551 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
4552 b(Loop);
4553 delayed()->daddi(temp2_reg, temp2_reg, -1);
4555 bind(subtype);
4556 sd(super_klass, super_cache_addr);
4557 if (L_success != &L_fallthrough) {
4558 b(*L_success);
4559 delayed()->nop();
4560 }
4562 /*
4563 if (set_cond_codes) {
4564 // Special hack for the AD files: rdi is guaranteed non-zero.
4565 assert(!pushed_rdi, "rdi must be left non-NULL");
4566 // Also, the condition codes are properly set Z/NZ on succeed/failure.
4567 }
4568 */
4569 // Success. Cache the super we found and proceed in triumph.
4570 #undef IS_A_TEMP
4572 bind(L_fallthrough);
4573 }
4574 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
4575 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
4576 sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
4577 verify_oop(oop_result, "broken oop in call_VM_base");
4578 }
4580 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
4581 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
4582 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
4583 }
4585 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
4586 int extra_slot_offset) {
4587 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4588 int stackElementSize = Interpreter::stackElementSize;
4589 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4590 #ifdef ASSERT
4591 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4592 assert(offset1 - offset == stackElementSize, "correct arithmetic");
4593 #endif
4594 Register scale_reg = NOREG;
4595 Address::ScaleFactor scale_factor = Address::no_scale;
4596 if (arg_slot.is_constant()) {
4597 offset += arg_slot.as_constant() * stackElementSize;
4598 } else {
4599 scale_reg = arg_slot.as_register();
4600 scale_factor = Address::times_8;
4601 }
4602 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
4603 // offset += wordSize; // return PC is on stack
4604 if(scale_reg==NOREG) return Address(SP, offset);
4605 else {
4606 dsll(scale_reg, scale_reg, scale_factor);
4607 daddu(scale_reg, SP, scale_reg);
4608 return Address(scale_reg, offset);
4609 }
4610 }
4612 SkipIfEqual::~SkipIfEqual() {
4613 _masm->bind(_label);
4614 }
4616 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
4617 switch (size_in_bytes) {
4618 #ifndef _LP64
4619 case 8:
4620 assert(dst2 != noreg, "second dest register required");
4621 lw(dst, src);
4622 lw(dst2, src.plus_disp(BytesPerInt));
4623 break;
4624 #else
4625 case 8: ld(dst, src); break;
4626 #endif
4627 case 4: lw(dst, src); break;
4628 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break;
4629 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break;
4630 default: ShouldNotReachHere();
4631 }
4632 }
4634 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
4635 switch (size_in_bytes) {
4636 #ifndef _LP64
4637 case 8:
4638 assert(src2 != noreg, "second source register required");
4639 sw(src, dst);
4640 sw(src2, dst.plus_disp(BytesPerInt));
4641 break;
4642 #else
4643 case 8: sd(src, dst); break;
4644 #endif
4645 case 4: sw(src, dst); break;
4646 case 2: sh(src, dst); break;
4647 case 1: sb(src, dst); break;
4648 default: ShouldNotReachHere();
4649 }
4650 }
4652 // Look up the method for a megamorphic invokeinterface call.
4653 // The target method is determined by <intf_klass, itable_index>.
4654 // The receiver klass is in recv_klass.
4655 // On success, the result will be in method_result, and execution falls through.
4656 // On failure, execution transfers to the given label.
4657 void MacroAssembler::lookup_interface_method(Register recv_klass,
4658 Register intf_klass,
4659 RegisterOrConstant itable_index,
4660 Register method_result,
4661 Register scan_temp,
4662 Label& L_no_such_interface) {
4663 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
4664 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
4665 "caller must use same register for non-constant itable index as for method");
4667 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
4668 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
4669 int itentry_off = itableMethodEntry::method_offset_in_bytes();
4670 int scan_step = itableOffsetEntry::size() * wordSize;
4671 int vte_size = vtableEntry::size() * wordSize;
4672 Address::ScaleFactor times_vte_scale = Address::times_ptr;
4673 assert(vte_size == wordSize, "else adjust times_vte_scale");
4675 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
4677 // %%% Could store the aligned, prescaled offset in the klassoop.
4678 // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
4679 dsll(scan_temp, scan_temp, times_vte_scale);
4680 daddu(scan_temp, recv_klass, scan_temp);
4681 daddiu(scan_temp, scan_temp, vtable_base);
4682 if (HeapWordsPerLong > 1) {
4683 // Round up to align_object_offset boundary
4684 // see code for InstanceKlass::start_of_itable!
4685 round_to(scan_temp, BytesPerLong);
4686 }
4688 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
4689 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
4690 // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
4691 if (itable_index.is_constant()) {
4692 set64(AT, (int)itable_index.is_constant());
4693 dsll(AT, AT, (int)Address::times_ptr);
4694 } else {
4695 dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
4696 }
4697 daddu(AT, AT, recv_klass);
4698 daddiu(recv_klass, AT, itentry_off);
4700 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
4701 // if (scan->interface() == intf) {
4702 // result = (klass + scan->offset() + itable_index);
4703 // }
4704 // }
4705 Label search, found_method;
4707 for (int peel = 1; peel >= 0; peel--) {
4708 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
4710 if (peel) {
4711 beq(intf_klass, method_result, found_method);
4712 nop();
4713 } else {
4714 bne(intf_klass, method_result, search);
4715 nop();
4716 // (invert the test to fall through to found_method...)
4717 }
4719 if (!peel) break;
4721 bind(search);
4723 // Check that the previous entry is non-null. A null entry means that
4724 // the receiver class doesn't implement the interface, and wasn't the
4725 // same as when the caller was compiled.
4726 beq(method_result, R0, L_no_such_interface);
4727 nop();
4728 daddiu(scan_temp, scan_temp, scan_step);
4729 }
4731 bind(found_method);
4733 // Got a hit.
4734 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
4735 //ld(method_result, Address(recv_klass, scan_temp, Address::times_1));
4736 if(UseLoongsonISA) {
4737 gsldx(method_result, recv_klass, scan_temp, 0);
4738 } else {
4739 daddu(AT, recv_klass, scan_temp);
4740 ld(method_result, AT);
4741 }
4742 }
4745 // virtual method calling
4746 void MacroAssembler::lookup_virtual_method(Register recv_klass,
4747 RegisterOrConstant vtable_index,
4748 Register method_result) {
4749 Register tmp = GP;
4750 push(tmp);
4752 if (vtable_index.is_constant()) {
4753 assert_different_registers(recv_klass, method_result, tmp);
4754 } else {
4755 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
4756 }
4757 const int base = InstanceKlass::vtable_start_offset() * wordSize;
4758 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
4759 /*
4760 Address vtable_entry_addr(recv_klass,
4761 vtable_index, Address::times_ptr,
4762 base + vtableEntry::method_offset_in_bytes());
4763 */
4764 if (vtable_index.is_constant()) {
4765 set64(AT, vtable_index.as_constant());
4766 dsll(AT, AT, (int)Address::times_ptr);
4767 } else {
4768 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
4769 }
4770 set64(tmp, base + vtableEntry::method_offset_in_bytes());
4771 daddu(tmp, tmp, AT);
4772 daddu(tmp, tmp, recv_klass);
4773 ld(method_result, tmp, 0);
4775 pop(tmp);
4776 }