src/cpu/mips/vm/assembler_mips.cpp

Mon, 10 Apr 2017 14:48:12 -0400

author
fujie
date
Mon, 10 Apr 2017 14:48:12 -0400
changeset 397
1e8b8bc62356
parent 391
910b77f150c4
child 404
11676c4f1569
permissions
-rw-r--r--

[C2] Remove unnecessary nops for code alignment.

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/assembler.hpp"
    28 #include "asm/assembler.inline.hpp"
    29 #include "gc_interface/collectedHeap.inline.hpp"
    30 #include "interpreter/interpreter.hpp"
    31 #include "memory/cardTableModRefBS.hpp"
    32 #include "memory/resourceArea.hpp"
    33 #include "prims/methodHandles.hpp"
    34 #include "runtime/biasedLocking.hpp"
    35 #include "runtime/interfaceSupport.hpp"
    36 #include "runtime/objectMonitor.hpp"
    37 #include "runtime/os.hpp"
    38 #include "runtime/sharedRuntime.hpp"
    39 #include "runtime/stubRoutines.hpp"
    40 #ifndef SERIALGC
    41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    43 #include "gc_implementation/g1/heapRegion.hpp"
    44 #endif
    45 #ifdef PRODUCT
    46 #define BLOCK_COMMENT(str) /* nothing */
    47 #define STOP(error) stop(error)
    48 #else
    49 #define BLOCK_COMMENT(str) block_comment(str)
    50 #define STOP(error) block_comment(error); stop(error)
    51 #endif
    53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
    55 intptr_t MacroAssembler::i[32] = {0};
    56 float MacroAssembler::f[32] = {0.0};
    58 void MacroAssembler::print(outputStream *s) {
    59 	unsigned int k;
    60 	for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    61 		s->print_cr("i%d = 0x%.16lx", k, i[k]);
    62 	}
    63 	s->cr();
    65 	for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    66 		s->print_cr("f%d = %f", k, f[k]); 
    67 	}
    68 	s->cr();
    69 }
    72 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    73 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    75 void MacroAssembler::save_registers(MacroAssembler *masm) {
    76 #define __ masm->
    77 	for(int k=0; k<32; k++) {
    78 		__ sw (as_Register(k), A0, i_offset(k));
    79 	}
    81 	for(int k=0; k<32; k++) {
    82 		__ swc1 (as_FloatRegister(k), A0, f_offset(k));
    83 	}
    84 #undef __
    85 }
    87 void MacroAssembler::restore_registers(MacroAssembler *masm) {
    88 #define __ masm->
    89 	for(int k=0; k<32; k++) {
    90 		__ lw (as_Register(k), A0, i_offset(k));
    91 	}
    93 	for(int k=0; k<32; k++) {
    94 		__ lwc1 (as_FloatRegister(k), A0, f_offset(k));
    95 	}
    96 #undef __
    97 }
   100 // Implementation of AddressLiteral
   102 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
   103   _is_lval = false;
   104   _target = target;
   105   _rspec = rspec_from_rtype(rtype, target);
   106 }
   108 // Implementation of Address
   111 Address Address::make_array(ArrayAddress adr) {
   112   AddressLiteral base = adr.base();
   113   Address index = adr.index();
   114   assert(index._disp == 0, "must not have disp"); // maybe it can?
   115   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
   116   array._rspec = base._rspec;
   117   return array;
   118 }
   120 // exceedingly dangerous constructor
   121 Address::Address(address loc, RelocationHolder spec) {
   122   _base  = noreg;
   123   _index = noreg;
   124   _scale = no_scale;
   125   _disp  = (intptr_t) loc;
   126   _rspec = spec;
   127 }
   130 // Implementation of Assembler
   131 const char *Assembler::ops_name[] = {
   132 	"special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
   133 	"addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
   134 	"cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
   135 	"daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
   136 	"lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
   137 	"sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
   138 	"ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
   139 	"sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
   140 };
   142 const char* Assembler::special_name[] = {
   143 	"sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
   144 	"jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
   145 	"mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
   146 	"mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
   147 	"add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
   148 	"",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
   149 	"tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
   150 	"dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
   151 };
   153 const char* Assembler::cop1_name[] = {
   154   "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
   155   "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
   156   "",         "",         "",         "",         "",         "",         "",         "",
   157   "",         "",         "",         "",         "",         "",         "",         "",
   158   "",         "",         "",         "",         "",         "",         "",         "",
   159   "",         "",         "",         "",         "",         "",         "",         "",
   160   "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
   161   "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
   162 };
   164 const char* Assembler::cop1x_name[] = {
   165 	"lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
   166 	"swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
   167 	"",         "",         "",         "",         "",         "",  "alnv.ps",         "",
   168 	"",         "",         "",         "",         "",         "",         "",         "",
   169 	"madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
   170 	"msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
   171 	"nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
   172 	"nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
   173 };
   175 const char* Assembler::special2_name[] = {
   176 	"madd",     "",         "mul",      "",         "msub",     "",         "",         "",
   177 	"",         "",         "",         "",         "",         "",         "",         "",
   178 	"",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
   179 	"",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
   180 	"",         "",         "",         "",         "",         "",         "",         "",
   181 	"",         "",         "",         "",         "",         "",         "",         "",
   182 	"",         "",         "",         "",         "",         "",         "",         "",
   183 	"",         "",         "",         "",         "",         "",         "",         ""
   184 };
   186 const char* Assembler::special3_name[] = {
   187 	"ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
   188 	"",         "",         "",         "",         "",         "",         "",         "",
   189 	"",         "",         "",         "",         "",         "",         "",         "",
   190 	"",         "",         "",         "",         "",         "",         "",         "",
   191 	"bshfl",    "",         "",         "",         "",         "",         "",         "",
   192 	"",         "",         "",         "",         "",         "",         "",         "",
   193 	"",         "",         "",         "",         "",         "",         "",         "",
   194 	"",         "",         "",         "",         "",         "",         "",         "",
   195 };
   197 const char* Assembler::regimm_name[] = {
   198 	"bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
   199 	"tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
   200 	"bltzal",   "bgezal",   "bltzall",  "bgezall"
   201 };
   203 const char* Assembler::gs_ldc2_name[] = {
   204 	"gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
   205 };
   208 const char* Assembler::gs_lwc2_name[] = {
   209         "",       "",       "",       "",         "",         "",         "",         "",
   210         "",       "",       "",       "",         "",         "",         "",         "",
   211         "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
   212         "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
   213         "gslq",   ""
   214 };
   216 const char* Assembler::gs_sdc2_name[] = {
   217 	"gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
   218 };
   220 const char* Assembler::gs_swc2_name[] = {
   221         "",        "",        "",        "",        "",          "",          "",         "",
   222         "",        "",        "",        "",        "",          "",          "",         "",
   223         "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
   224         "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
   225         "gssq",    ""
   226 };
   228 //misleading name, print only branch/jump instruction 
   229 void Assembler::print_instruction(int inst) {
   230 	const char *s;
   231 	switch( opcode(inst) ) {
   232 	default:
   233 		s = ops_name[opcode(inst)];
   234 		break;
   235 	case special_op:
   236 		s = special_name[special(inst)];
   237 		break;
   238 	case regimm_op:
   239 		s = special_name[rt(inst)];
   240 		break;
   241 	}
   243 	::tty->print("%s", s);
   244 }
   246 void MacroAssembler::pd_patch_instruction(address branch, address target) {
   247   jint& stub_inst = *(jint*) branch;
   249 /* *
   250 	move(AT, RA); // dadd
   251 	emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   252 	nop();
   253         lui(T9, 0); // to be patched
   254         ori(T9, 0);
   255 	daddu(T9, T9, RA);
   256 	move(RA, AT);
   257 	jr(T9);
   258  */
   259   if(special(stub_inst) == dadd_op) {
   260     jint *pc = (jint *)branch;
   262     assert(opcode(pc[3]) == lui_op
   263           && opcode(pc[4]) == ori_op
   264           && special(pc[5]) == daddu_op, "Not a branch label patch");
   265     if(!(opcode(pc[3]) == lui_op
   266           && opcode(pc[4]) == ori_op
   267           && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   269     int offset = target - branch;
   270     if (!is_simm16(offset))
   271     {
   272       pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   273       pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   274     }
   275     else
   276     {
   277       /* revert to "beq + nop" */
   278       CodeBuffer cb(branch, 4 * 10);
   279       MacroAssembler masm(&cb);
   280 #define __ masm.
   281       __ b(target);
   282       __ nop();
   283       __ nop();
   284       __ nop();
   285       __ nop();
   286       __ nop();
   287       __ nop();
   288       __ nop();
   289     }
   290     return;
   291   }
   293 #ifndef PRODUCT
   294   if (!is_simm16((target - branch - 4) >> 2))
   295   {
   296     tty->print_cr("Illegal patching: target=0x%lx", target);
   297     int *p = (int *)branch;
   298     for (int i = -10; i < 10; i++)
   299     {
   300        tty->print("0x%lx, ", p[i]);
   301     }
   302     tty->print_cr("");
   303   }
   304 #endif
   306   stub_inst = patched_branch(target - branch, stub_inst, 0);
   307 }
   309 int Assembler::is_int_mask(int x) {
   310    int xx = x;
   311    int count = 0;
   313    while (x != 0) {
   314       x &= (x - 1);
   315       count++;
   316    }
   318    if ((1<<count) == (xx+1)) {
   319       return count;
   320    } else {
   321       return -1;
   322    }
   323 }
   325 int Assembler::is_jlong_mask(jlong x) {
   326    jlong  xx = x;
   327    int count = 0;
   329    while (x != 0) {
   330       x &= (x - 1);
   331       count++;
   332    }
   334    if ((1<<count) == (xx+1)) {
   335       return count;
   336    } else {
   337       return -1;
   338    }
   339 }
   341 //without check, maybe fixed
   342 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
   343 	int v = (dest_pos - inst_pos - 4)>>2;
   344 	switch(opcode(inst)) {
   345 	case j_op:
   346 	case jal_op:
   347 		assert(false, "should not use j/jal here");
   348 		break;
   349 	default:
   350 		assert(is_simm16(v), "must be simm16");
   351 #ifndef PRODUCT
   352 		if(!is_simm16(v))
   353 		{ 
   354 			tty->print_cr("must be simm16");
   355 			tty->print_cr("Inst: %lx", inst);
   356 		}
   357 #endif
   359 		v = low16(v);
   360 		inst &= 0xffff0000;
   361 		break;
   362 	}
   364 	return inst | v;
   365 }
   367 int Assembler::branch_destination(int inst, int pos) {
   368 	int off;
   370 	switch(opcode(inst)) {
   371 	case j_op:
   372 	case jal_op:
   373 		assert(false, "should not use j/jal here");
   374 		break;
   375 	default:
   376 		off = expand(low16(inst), 15);
   377 		break;
   378 	}
   380 	return off ? pos + 4 + (off<<2) : 0;
   381 }
   383 int AbstractAssembler::code_fill_byte() {
   384 	  return 0x00;                  // illegal instruction 0x00000000
   385 }
   387 // Now the Assembler instruction (identical for 32/64 bits)
   389 void Assembler::lb(Register rt, Address src) {
   390 	lb(rt, src.base(), src.disp());
   391 }
   393 void Assembler::lbu(Register rt, Address src) {
   394 	lbu(rt, src.base(), src.disp());
   395 }
   397 void Assembler::ld(Register rt, Address src){
   398 	ld(rt, src.base(), src.disp());
   399 }
   401 void Assembler::ldl(Register rt, Address src){
   402 	ldl(rt, src.base(), src.disp());
   403 }
   405 void Assembler::ldr(Register rt, Address src){
   406 	ldr(rt, src.base(), src.disp());
   407 }
   409 void Assembler::lh(Register rt, Address src){
   410 	lh(rt, src.base(), src.disp());
   411 }
   413 void Assembler::lhu(Register rt, Address src){
   414 	lhu(rt, src.base(), src.disp());
   415 }
   417 void Assembler::ll(Register rt, Address src){
   418 	ll(rt, src.base(), src.disp());
   419 }
   421 void Assembler::lld(Register rt, Address src){
   422 	lld(rt, src.base(), src.disp());
   423 }
   425 void Assembler::lw(Register rt, Address src){
   426 	lw(rt, src.base(), src.disp());
   427 }
   428 void Assembler::lea(Register rt, Address src) {
   429 #ifdef _LP64
   430   daddi(rt, src.base(), src.disp());
   431 #else
   432   addi(rt, src.base(), src.disp());
   433 #endif
   434 }
   436 void Assembler::lwl(Register rt, Address src){
   437 	lwl(rt, src.base(), src.disp());
   438 }
   440 void Assembler::lwr(Register rt, Address src){
   441 	lwr(rt, src.base(), src.disp());
   442 }
   444 void Assembler::lwu(Register rt, Address src){
   445 	lwu(rt, src.base(), src.disp());
   446 }
   448 void Assembler::sb(Register rt, Address dst) {
   449 	sb(rt, dst.base(), dst.disp());
   450 }
   452 void Assembler::sc(Register rt, Address dst) {
   453 	sc(rt, dst.base(), dst.disp());
   454 }
   456 void Assembler::scd(Register rt, Address dst) {
   457 	scd(rt, dst.base(), dst.disp());
   458 }
   460 void Assembler::sd(Register rt, Address dst) {
   461 	sd(rt, dst.base(), dst.disp());
   462 }
   464 void Assembler::sdl(Register rt, Address dst) {
   465 	sdl(rt, dst.base(), dst.disp());
   466 }
   468 void Assembler::sdr(Register rt, Address dst) {
   469 	sdr(rt, dst.base(), dst.disp());
   470 }
   472 void Assembler::sh(Register rt, Address dst) {
   473 	sh(rt, dst.base(), dst.disp());
   474 }
   476 void Assembler::sw(Register rt, Address dst) {
   477 	sw(rt, dst.base(), dst.disp());
   478 }
   480 void Assembler::swl(Register rt, Address dst) {
   481 	swl(rt, dst.base(), dst.disp());
   482 }
   484 void Assembler::swr(Register rt, Address dst) {
   485 	swr(rt, dst.base(), dst.disp());
   486 }
   488 void Assembler::lwc1(FloatRegister rt, Address src) {
   489 	lwc1(rt, src.base(), src.disp());
   490 }
   492 void Assembler::ldc1(FloatRegister rt, Address src) {
   493 	ldc1(rt, src.base(), src.disp());
   494 }
   496 void Assembler::swc1(FloatRegister rt, Address dst) {
   497 	swc1(rt, dst.base(), dst.disp());
   498 }
   500 void Assembler::sdc1(FloatRegister rt, Address dst) {
   501 	sdc1(rt, dst.base(), dst.disp());
   502 }
   504 void Assembler::j(address entry) {
   505 #ifdef MIPS64
   506 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
   507 #else
   508 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
   509 #endif
   510 	emit_long((j_op<<26) | dest); 
   511 	has_delay_slot(); 
   512 }
   514 void Assembler::jal(address entry) {
   515 #ifdef MIPS64
   516 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
   517 #else
   518 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
   519 #endif
   520 	emit_long((jal_op<<26) | dest); 
   521 	has_delay_slot(); 
   522 }
   524 static inline address first_cache_address() {
   525   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   526 }
   528 static inline address last_cache_address() {
   529   return CodeCache::high_bound() - Assembler::InstructionSize;
   530 }
   532 int MacroAssembler::call_size(address target, bool far, bool patchable) {
   533   if (patchable) return 6 << Assembler::LogInstructionSize;
   534   if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop 
   535   return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   536 }
   538 // Can we reach target using jal/j from anywhere
   539 // in the code cache (because code can be relocated)?
   540 bool MacroAssembler::reachable_from_cache(address target) {
   541   address cl = first_cache_address();
   542   address ch = last_cache_address();
   544   return fit_in_jal(target, cl) && fit_in_jal(target, ch);
   545 }
   547 void MacroAssembler::general_jump(address target) {
   548   if (reachable_from_cache(target)) {
   549     j(target);
   550     nop();
   551   } else {
   552     set64(T9, (long)target);
   553     jr(T9);
   554     nop();
   555   }
   556 }
   558 int MacroAssembler::insts_for_general_jump(address target) {
   559   if (reachable_from_cache(target)) {
   560     //j(target);
   561     //nop();
   562     return 2;
   563   } else {
   564     //set64(T9, (long)target);
   565     //jr(T9);
   566     //nop();
   567     return insts_for_set64((jlong)target) + 2;
   568   }
   569 }
   571 void MacroAssembler::patchable_jump(address target) {
   572   if (reachable_from_cache(target)) {
   573     nop();
   574     nop();
   575     nop();
   576     nop();
   577     j(target);
   578     nop();
   579   } else {
   580     patchable_set48(T9, (long)target);
   581     jr(T9);
   582     nop();
   583   }
   584 }
   586 int MacroAssembler::insts_for_patchable_jump(address target) {
   587   return 6;
   588 }
   590 void MacroAssembler::general_call(address target) {
   591   if (reachable_from_cache(target)) {
   592     jal(target);
   593     nop();
   594   } else {
   595     set64(T9, (long)target);
   596     jalr(T9);
   597     nop();
   598   }
   599 }
   601 int MacroAssembler::insts_for_general_call(address target) {
   602   if (reachable_from_cache(target)) {
   603     //jal(target);
   604     //nop();
   605     return 2;
   606   } else {
   607     //set64(T9, (long)target);
   608     //jalr(T9);
   609     //nop();
   610     return insts_for_set64((jlong)target) + 2;
   611   }
   612 }
   614 void MacroAssembler::patchable_call(address target) {
   615   if (reachable_from_cache(target)) {
   616     nop();
   617     nop();
   618     nop();
   619     nop();
   620     jal(target);
   621     nop();
   622   } else {
   623     patchable_set48(T9, (long)target);
   624     jalr(T9);
   625     nop();
   626   }
   627 }
   629 int MacroAssembler::insts_for_patchable_call(address target) {
   630   return 6;
   631 }
   633 void MacroAssembler::beq_far(Register rs, Register rt, address entry)
   634 {
   635   u_char * cur_pc = pc();
   637   /* Jin: Near/Far jump */
   638   if(is_simm16((entry - pc() - 4) / 4))
   639   {
   640     Assembler::beq(rs, rt, offset(entry));
   641   }
   642   else
   643   {
   644     Label not_jump;
   645     bne(rs, rt, not_jump);
   646     delayed()->nop();
   648     b_far(entry); 
   649     delayed()->nop();
   651     bind(not_jump);
   652     has_delay_slot();
   653   }
   654 }
   656 void MacroAssembler::beq_far(Register rs, Register rt, Label& L)
   657 {
   658   if (L.is_bound()) {
   659     beq_far(rs, rt, target(L));
   660   } else {
   661     u_char * cur_pc = pc();
   662     Label not_jump;
   663     bne(rs, rt, not_jump);
   664     delayed()->nop();
   666     b_far(L); 
   667     delayed()->nop();
   669     bind(not_jump);
   670     has_delay_slot();
   671   }
   672 }
   674 void MacroAssembler::bne_far(Register rs, Register rt, address entry)
   675 {
   676   u_char * cur_pc = pc();
   678   /* Jin: Near/Far jump */
   679   if(is_simm16((entry - pc() - 4) / 4))
   680   {
   681     Assembler::bne(rs, rt, offset(entry));
   682   }
   683   else
   684   {
   685     Label not_jump;
   686     beq(rs, rt, not_jump);
   687     delayed()->nop();
   689     b_far(entry); 
   690     delayed()->nop();
   692     bind(not_jump);
   693     has_delay_slot();
   694   }
   695 }
   697 void MacroAssembler::bne_far(Register rs, Register rt, Label& L)
   698 {
   699   if (L.is_bound()) {
   700     bne_far(rs, rt, target(L));
   701   } else {
   702     u_char * cur_pc = pc();
   703     Label not_jump;
   704     beq(rs, rt, not_jump);
   705     delayed()->nop();
   707     b_far(L); 
   708     delayed()->nop();
   710     bind(not_jump);
   711     has_delay_slot();
   712   }
   713 }
   715 void MacroAssembler::b_far(Label& L)
   716 {
   717   if (L.is_bound()) {
   718     b_far(target(L));
   719   } else {
   720 	volatile address dest = target(L);
   721 /*
   722 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   723    0x00000055651ed514: dadd at, ra, zero
   724    0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   726    0x00000055651ed51c: sll zero, zero, 0
   727    0x00000055651ed520: lui t9, 0x0
   728    0x00000055651ed524: ori t9, t9, 0x21b8
   729    0x00000055651ed528: daddu t9, t9, ra
   730    0x00000055651ed52c: dadd ra, at, zero
   731    0x00000055651ed530: jr t9
   732    0x00000055651ed534: sll zero, zero, 0
   733 */
   734 	move(AT, RA);
   735 	emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   736 	nop();
   737         lui(T9, 0); // to be patched
   738         ori(T9, T9, 0);
   739 	daddu(T9, T9, RA);
   740 	move(RA, AT);
   741 	jr(T9);
   742   }
   743 }
   745 void MacroAssembler::b_far(address entry)
   746 { 
   747 	u_char * cur_pc = pc();
   749 	/* Jin: Near/Far jump */
   750 	if(is_simm16((entry - pc() - 4) / 4))
   751 	{
   752 		b(offset(entry));
   753 	}
   754 	else
   755 	{
   756 		/* address must be bounded */
   757 		move(AT, RA);
   758 	 	emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   759 		nop();
   760 		li32(T9, entry - pc());
   761 		daddu(T9, T9, RA);
   762 		move(RA, AT);
   763 		jr(T9);
   764 	}
   765 }
   767 // Implementation of MacroAssembler
   769 // First all the versions that have distinct versions depending on 32/64 bit
   770 // Unless the difference is trivial (1 line or so).
   772 //#ifndef _LP64
   774 // 32bit versions
   776 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   777   addu_long(AT, base, offset);
   778   ld_ptr(rt, 0, AT);
   779 }
   781 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   782   addu_long(AT, base, offset);
   783   st_ptr(rt, 0, AT);
   784 }
   786 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   787   addu_long(AT, base, offset);
   788   ld_long(rt, 0, AT);
   789 }
   791 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   792   addu_long(AT, base, offset);
   793   st_long(rt, 0, AT);
   794 }
   796 Address MacroAssembler::as_Address(AddressLiteral adr) {
   797   return Address(adr.target(), adr.rspec());
   798 }
   800 Address MacroAssembler::as_Address(ArrayAddress adr) {
   801   return Address::make_array(adr);
   802 }
   804 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   805 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   806   Label again;
   808   li(tmp_reg1, counter_addr);
   809   bind(again);
   810   if(!Use3A2000) sync();
   811   ll(tmp_reg2, tmp_reg1, 0);
   812   addi(tmp_reg2, tmp_reg2, inc);
   813   sc(tmp_reg2, tmp_reg1, 0);
   814   beq(tmp_reg2, R0, again);
   815   delayed()->nop();
   816 }
   817 int MacroAssembler::biased_locking_enter(Register lock_reg,
   818                                          Register obj_reg,
   819                                          Register swap_reg,
   820                                          Register tmp_reg,
   821                                          bool swap_reg_contains_mark,
   822                                          Label& done,
   823                                          Label* slow_case,
   824                                          BiasedLockingCounters* counters) {
   825   assert(UseBiasedLocking, "why call this otherwise?");
   826   bool need_tmp_reg = false;
   827   if (tmp_reg == noreg) {
   828     need_tmp_reg = true;
   829     tmp_reg = T9;
   830   }
   831   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   832   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   833   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   834   Address saved_mark_addr(lock_reg, 0);
   836   // Biased locking
   837   // See whether the lock is currently biased toward our thread and
   838   // whether the epoch is still valid
   839   // Note that the runtime guarantees sufficient alignment of JavaThread
   840   // pointers to allow age to be placed into low bits
   841   // First check to see whether biasing is even enabled for this object
   842   Label cas_label;
   843   int null_check_offset = -1;
   844   if (!swap_reg_contains_mark) {
   845     null_check_offset = offset();
   846     ld_ptr(swap_reg, mark_addr);
   847   }
   849   if (need_tmp_reg) {
   850     push(tmp_reg);
   851   }
   852   move(tmp_reg, swap_reg);
   853   andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   854 #ifdef _LP64
   855   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   856   dsub(AT, AT, tmp_reg);
   857 #else
   858   addi(AT, R0, markOopDesc::biased_lock_pattern);
   859   sub(AT, AT, tmp_reg);
   860 #endif
   861   if (need_tmp_reg) {
   862     pop(tmp_reg);
   863   }
   865   bne(AT, R0, cas_label);
   866   delayed()->nop();
   869   // The bias pattern is present in the object's header. Need to check
   870   // whether the bias owner and the epoch are both still current.
   871   // Note that because there is no current thread register on MIPS we
   872   // need to store off the mark word we read out of the object to
   873   // avoid reloading it and needing to recheck invariants below. This
   874   // store is unfortunate but it makes the overall code shorter and
   875   // simpler.
   876   st_ptr(swap_reg, saved_mark_addr);
   877   if (need_tmp_reg) {
   878     push(tmp_reg);
   879   }
   880   if (swap_reg_contains_mark) {
   881     null_check_offset = offset();
   882   }
   883   load_prototype_header(tmp_reg, obj_reg);
   884   xorr(tmp_reg, tmp_reg, swap_reg);
   885   get_thread(swap_reg);
   886   xorr(swap_reg, swap_reg, tmp_reg);
   888   move(AT, ~((int) markOopDesc::age_mask_in_place));
   889   andr(swap_reg, swap_reg, AT);
   891   if (PrintBiasedLockingStatistics) {
   892     Label L;
   893     bne(swap_reg, R0, L);
   894     delayed()->nop();
   895     push(tmp_reg);
   896     push(A0);
   897     atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   898     pop(A0);
   899     pop(tmp_reg);
   900     bind(L);
   901   }
   902   if (need_tmp_reg) {
   903     pop(tmp_reg);
   904   }
   905   beq(swap_reg, R0, done);
   906   delayed()->nop();
   907   Label try_revoke_bias;
   908   Label try_rebias;
   910   // At this point we know that the header has the bias pattern and
   911   // that we are not the bias owner in the current epoch. We need to
   912   // figure out more details about the state of the header in order to
   913   // know what operations can be legally performed on the object's
   914   // header.
   916   // If the low three bits in the xor result aren't clear, that means
   917   // the prototype header is no longer biased and we have to revoke
   918   // the bias on this object.
   920   move(AT, markOopDesc::biased_lock_mask_in_place);
   921   andr(AT, swap_reg, AT);
   922   bne(AT, R0, try_revoke_bias);
   923   delayed()->nop();
   924   // Biasing is still enabled for this data type. See whether the
   925   // epoch of the current bias is still valid, meaning that the epoch
   926   // bits of the mark word are equal to the epoch bits of the
   927   // prototype header. (Note that the prototype header's epoch bits
   928   // only change at a safepoint.) If not, attempt to rebias the object
   929   // toward the current thread. Note that we must be absolutely sure
   930   // that the current epoch is invalid in order to do this because
   931   // otherwise the manipulations it performs on the mark word are
   932   // illegal.
   934   move(AT, markOopDesc::epoch_mask_in_place);
   935   andr(AT,swap_reg, AT);
   936   bne(AT, R0, try_rebias);
   937   delayed()->nop();
   938   // The epoch of the current bias is still valid but we know nothing
   939   // about the owner; it might be set or it might be clear. Try to
   940   // acquire the bias of the object using an atomic operation. If this
   941   // fails we will go in to the runtime to revoke the object's bias.
   942   // Note that we first construct the presumed unbiased header so we
   943   // don't accidentally blow away another thread's valid bias.
   945   ld_ptr(swap_reg, saved_mark_addr);
   947   move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);  
   948   andr(swap_reg, swap_reg, AT);
   950   if (need_tmp_reg) {
   951     push(tmp_reg);
   952   }
   953   get_thread(tmp_reg);
   954   orr(tmp_reg, tmp_reg, swap_reg);
   955   //if (os::is_MP()) {
   956   // lock();
   957   //}
   958   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   959   if (need_tmp_reg) {
   960     pop(tmp_reg);
   961   }
   962   // If the biasing toward our thread failed, this means that
   963   // another thread succeeded in biasing it toward itself and we
   964   // need to revoke that bias. The revocation will occur in the
   965   // interpreter runtime in the slow case.
   966   if (PrintBiasedLockingStatistics) {
   967     Label L;
   968     bne(AT, R0, L);
   969     delayed()->nop();
   970     push(tmp_reg);
   971     push(A0);
   972     atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   973     pop(A0);
   974     pop(tmp_reg);
   975     bind(L);
   976   }
   977   if (slow_case != NULL) {
   978     beq_far(AT, R0, *slow_case);
   979     delayed()->nop();
   980   }
   981   b(done);
   982   delayed()->nop();
   984   bind(try_rebias);
   985   // At this point we know the epoch has expired, meaning that the
   986   // current "bias owner", if any, is actually invalid. Under these
   987   // circumstances _only_, we are allowed to use the current header's
   988   // value as the comparison value when doing the cas to acquire the
   989   // bias in the current epoch. In other words, we allow transfer of
   990   // the bias from one thread to another directly in this situation.
   991   //
   992   // FIXME: due to a lack of registers we currently blow away the age
   993   // bits in this situation. Should attempt to preserve them.
   994   if (need_tmp_reg) {
   995     push(tmp_reg);
   996   }
   997   load_prototype_header(tmp_reg, obj_reg);
   998   get_thread(swap_reg);
   999   orr(tmp_reg, tmp_reg, swap_reg);
  1000   ld_ptr(swap_reg, saved_mark_addr);
  1002   // if (os::is_MP()) {
  1003   //  lock();
  1004   //}
  1005   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
  1006   if (need_tmp_reg) {
  1007     pop(tmp_reg);
  1009   // If the biasing toward our thread failed, then another thread
  1010   // succeeded in biasing it toward itself and we need to revoke that
  1011   // bias. The revocation will occur in the runtime in the slow case.
  1012   if (PrintBiasedLockingStatistics) {
  1013     Label L;
  1014     bne(AT, R0, L);
  1015     delayed()->nop();
  1016     push(AT);
  1017     push(tmp_reg);
  1018     atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
  1019     pop(tmp_reg);
  1020     pop(AT);
  1021     bind(L);
  1023   if (slow_case != NULL) {
  1024     beq_far(AT, R0, *slow_case);
  1025     delayed()->nop();
  1028   b(done);
  1029   delayed()->nop();
  1030   bind(try_revoke_bias);
  1031   // The prototype mark in the klass doesn't have the bias bit set any
  1032   // more, indicating that objects of this data type are not supposed
  1033   // to be biased any more. We are going to try to reset the mark of
  1034   // this object to the prototype value and fall through to the
  1035   // CAS-based locking scheme. Note that if our CAS fails, it means
  1036   // that another thread raced us for the privilege of revoking the
  1037   // bias of this particular object, so it's okay to continue in the
  1038   // normal locking code.
  1039   //
  1040   // FIXME: due to a lack of registers we currently blow away the age
  1041   // bits in this situation. Should attempt to preserve them.
  1042   ld_ptr(swap_reg, saved_mark_addr);
  1044   if (need_tmp_reg) {
  1045     push(tmp_reg);
  1047   load_prototype_header(tmp_reg, obj_reg);
  1048   //if (os::is_MP()) {
  1049   // lock();
  1050   //}    
  1051   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
  1052   if (need_tmp_reg) {
  1053     pop(tmp_reg);
  1055   // Fall through to the normal CAS-based lock, because no matter what
  1056   // the result of the above CAS, some thread must have succeeded in
  1057   // removing the bias bit from the object's header.
  1058   if (PrintBiasedLockingStatistics) {
  1059     Label L;
  1060     bne(AT, R0, L);
  1061     delayed()->nop();
  1062     push(AT);
  1063     push(tmp_reg);
  1064     atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
  1065     pop(tmp_reg);
  1066     pop(AT);
  1067     bind(L);
  1070   bind(cas_label);
  1071   return null_check_offset;
  1074 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
  1075   assert(UseBiasedLocking, "why call this otherwise?");
  1077   // Check for biased locking unlock case, which is a no-op
  1078   // Note: we do not have to check the thread ID for two reasons.
  1079   // First, the interpreter checks for IllegalMonitorStateException at
  1080   // a higher level. Second, if the bias was revoked while we held the
  1081   // lock, the object could not be rebiased toward another thread, so
  1082   // the bias bit would be clear.
  1083 #ifdef _LP64
  1084   ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  1085   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
  1086   daddi(AT, R0, markOopDesc::biased_lock_pattern);
  1087 #else
  1088   lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  1089   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
  1090   addi(AT, R0, markOopDesc::biased_lock_pattern);
  1091 #endif
  1093   beq(AT, temp_reg, done);
  1094   delayed()->nop();
  1097 // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME. 
  1098 // by yjl 6/27/2005 
  1099 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
  1100 // by yjl 7/11/2005
  1101 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
  1102 // by yjl 8/1/2005
  1103 void MacroAssembler::call_VM_leaf_base(address entry_point,
  1104     int number_of_arguments) {
  1105   //call(RuntimeAddress(entry_point));
  1106   //increment(rsp, number_of_arguments * wordSize);
  1107   Label L, E;
  1109   assert(number_of_arguments <= 4, "just check");
  1111   andi(AT, SP, 0xf);
  1112   beq(AT, R0, L);
  1113   delayed()->nop();
  1114   daddi(SP, SP, -8);
  1116 	call(entry_point, relocInfo::runtime_call_type);
  1117 	delayed()->nop();
  1119   daddi(SP, SP, 8);
  1120   b(E);
  1121   delayed()->nop();
  1123   bind(L);
  1125 	call(entry_point, relocInfo::runtime_call_type);
  1126 	delayed()->nop();
  1128   bind(E);
  1132 void MacroAssembler::jmp(address entry) {
  1133   patchable_set48(T9, (long)entry);
  1134   jr(T9);
  1137 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
  1138   switch (rtype) {
  1139     case relocInfo::runtime_call_type:
  1140     case relocInfo::none:
  1141       jmp(entry);
  1142       break;
  1143     default:
  1145 	InstructionMark im(this);
  1146 	relocate(rtype);
  1147 	patchable_set48(T9, (long)entry);
  1148 	jr(T9);
  1150       break;
  1154 void MacroAssembler::call(address entry) {
  1155 // c/c++ code assume T9 is entry point, so we just always move entry to t9
  1156 // maybe there is some more graceful method to handle this. FIXME 
  1157 // by yjl 6/27/2005
  1158 // For more info, see class NativeCall.
  1159 #ifndef _LP64
  1160   move(T9, (int)entry);
  1161 #else
  1162   patchable_set48(T9, (long)entry);
  1163 #endif
  1164   jalr(T9);
  1167 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
  1168   switch (rtype) {
  1169     case relocInfo::runtime_call_type:
  1170     case relocInfo::none:
  1171       call(entry);
  1172       break;
  1173     default:
  1175 	InstructionMark im(this);
  1176 	relocate(rtype);
  1177 	call(entry);
  1179       break;
  1183 void MacroAssembler::call(address entry, RelocationHolder& rh)
  1185   switch (rh.type()) {
  1186     case relocInfo::runtime_call_type:
  1187     case relocInfo::none:
  1188       call(entry);
  1189       break;
  1190     default:
  1192 	InstructionMark im(this);
  1193 	relocate(rh);
  1194 	call(entry);
  1196       break;
  1200 void MacroAssembler::ic_call(address entry) {
  1201 	RelocationHolder rh = virtual_call_Relocation::spec(pc());
  1202 	patchable_set48(IC_Klass, (long)Universe::non_oop_word());
  1203 	assert(entry != NULL, "call most probably wrong");
  1204 	InstructionMark im(this);
  1205 	relocate(rh);
  1206         patchable_call(entry);
  1209 void MacroAssembler::c2bool(Register r) {
  1210   Label L;
  1211   Assembler::beq(r, R0, L);
  1212   delayed()->nop();
  1213   move(r, 1);
  1214   bind(L);
  1217 #ifndef PRODUCT
  1218 extern "C" void findpc(intptr_t x);
  1219 #endif
  1221 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
  1222   // In order to get locks to work, we need to fake a in_VM state
  1223   JavaThread* thread = JavaThread::current();
  1224   JavaThreadState saved_state = thread->thread_state();
  1225   thread->set_thread_state(_thread_in_vm);
  1226   if (ShowMessageBoxOnError) {
  1227     JavaThread* thread = JavaThread::current();
  1228     JavaThreadState saved_state = thread->thread_state();
  1229     thread->set_thread_state(_thread_in_vm);
  1230     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  1231       ttyLocker ttyl;
  1232       BytecodeCounter::print();
  1234     // To see where a verify_oop failed, get $ebx+40/X for this frame.
  1235     // This is the value of eip which points to where verify_oop will return.
  1236     if (os::message_box(msg, "Execution stopped, print registers?")) {
  1237       ttyLocker ttyl;
  1238       tty->print_cr("eip = 0x%08x", eip);
  1239 #ifndef PRODUCT
  1240       tty->cr();
  1241       findpc(eip);
  1242       tty->cr();
  1243 #endif
  1244       tty->print_cr("rax, = 0x%08x", rax);
  1245       tty->print_cr("rbx, = 0x%08x", rbx);
  1246       tty->print_cr("rcx = 0x%08x", rcx);
  1247       tty->print_cr("rdx = 0x%08x", rdx);
  1248       tty->print_cr("rdi = 0x%08x", rdi);
  1249       tty->print_cr("rsi = 0x%08x", rsi);
  1250       tty->print_cr("rbp, = 0x%08x", rbp);
  1251       tty->print_cr("rsp = 0x%08x", rsp);
  1252       BREAKPOINT;
  1254   } else {
  1255     ttyLocker ttyl;
  1256     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
  1257     assert(false, "DEBUG MESSAGE");
  1259   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
  1262 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
  1263   if ( ShowMessageBoxOnError ) {
  1264     JavaThreadState saved_state = JavaThread::current()->thread_state();
  1265     JavaThread::current()->set_thread_state(_thread_in_vm);
  1267       // In order to get locks work, we need to fake a in_VM state
  1268       ttyLocker ttyl;
  1269       ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
  1270       if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  1271 	BytecodeCounter::print();
  1274       //			if (os::message_box(msg, "Execution stopped, print registers?"))
  1275       //				regs->print(::tty);
  1277     ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
  1279   else
  1280     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
  1284 void MacroAssembler::stop(const char* msg) {
  1285   li(A0, (long)msg);
  1286 #ifndef _LP64
  1287   //reserver space for argument. added by yjl 7/10/2005
  1288   addiu(SP, SP, - 1 * wordSize);
  1289 #endif
  1290   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1291   delayed()->nop();
  1292 #ifndef _LP64
  1293   //restore space for argument
  1294   addiu(SP, SP, 1 * wordSize);
  1295 #endif
  1296   brk(17);
  1299 void MacroAssembler::warn(const char* msg) {
  1300 #ifdef _LP64
  1301   pushad();
  1302   li(A0, (long)msg);
  1303   push(S2);
  1304   move(AT, -(StackAlignmentInBytes));
  1305   move(S2, SP);     // use S2 as a sender SP holder
  1306   andr(SP, SP, AT); // align stack as required by ABI
  1307   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1308   delayed()->nop();
  1309   move(SP, S2);     // use S2 as a sender SP holder
  1310   pop(S2);
  1311   popad();
  1312 #else
  1313   pushad();
  1314   addi(SP, SP, -4);
  1315   sw(A0, SP, -1 * wordSize);
  1316   li(A0, (long)msg);
  1317   addi(SP, SP, -1 * wordSize);
  1318   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1319   delayed()->nop();
  1320   addi(SP, SP, 1 * wordSize);
  1321   lw(A0, SP, -1 * wordSize);
  1322   addi(SP, SP, 4);
  1323   popad();
  1324 #endif
  1327 void MacroAssembler::print_reg(Register reg) {
  1328 /*
  1329 char *s = getenv("PRINT_REG");
  1330 if (s == NULL)
  1331   return;
  1332 if (strcmp(s, "1") != 0)
  1333   return;
  1334 */
  1335   void * cur_pc = pc();
  1336   pushad();
  1337   NOT_LP64(push(FP);)
  1339   li(A0, (long)reg->name());
  1340   if (reg == SP)
  1341     addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
  1342   else if (reg == A0)
  1343     ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
  1344   else
  1345     move(A1, reg);
  1346   li(A2, (long)cur_pc);
  1347   push(S2);
  1348   move(AT, -(StackAlignmentInBytes));
  1349   move(S2, SP);     // use S2 as a sender SP holder
  1350   andr(SP, SP, AT); // align stack as required by ABI
  1351   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
  1352   delayed()->nop();
  1353   move(SP, S2);     // use S2 as a sender SP holder
  1354   pop(S2);
  1355   NOT_LP64(pop(FP);)
  1356   popad();
  1358 /*
  1359   pushad();
  1360 #ifdef _LP64
  1361   if (reg == SP)
  1362     addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
  1363   else
  1364     move(A0, reg);
  1365   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1366   delayed()->nop();
  1367 #else 
  1368   push(FP);
  1369   move(A0, reg);
  1370   dsrl32(A1, reg, 0);
  1371   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
  1372   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1373   delayed()->nop();
  1374   pop(FP);
  1375 #endif
  1376   popad();
  1377   pushad();
  1378   NOT_LP64(push(FP);)
  1379   char b[50];
  1380   sprintf((char *)b, " pc: %p\n",cur_pc);
  1381   li(A0, (long)(char *)b);
  1382   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1383   delayed()->nop();
  1384   NOT_LP64(pop(FP);)
  1385   popad();
  1386 */
  1389 void MacroAssembler::print_reg(FloatRegister reg) {
  1390   void * cur_pc = pc();
  1391   pushad();
  1392   NOT_LP64(push(FP);)
  1393   li(A0, (long)reg->name());
  1394   push(S2);
  1395   move(AT, -(StackAlignmentInBytes));
  1396   move(S2, SP);     // use S2 as a sender SP holder
  1397   andr(SP, SP, AT); // align stack as required by ABI
  1398   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1399   delayed()->nop();
  1400   move(SP, S2);     // use S2 as a sender SP holder
  1401   pop(S2);
  1402   NOT_LP64(pop(FP);)
  1403   popad();
  1405   pushad();
  1406   NOT_LP64(push(FP);)
  1407 #if 1
  1408   move(FP, SP);
  1409   move(AT, -(StackAlignmentInBytes));	
  1410   andr(SP , SP , AT);
  1411   mov_d(F12, reg);
  1412   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1413   delayed()->nop();
  1414   move(SP, FP);
  1415 #else
  1416   mov_s(F12, reg);
  1417   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
  1418   //delayed()->nop();
  1419 #endif
  1420   NOT_LP64(pop(FP);)
  1421   popad();
  1423 #if 0
  1424   pushad();
  1425   NOT_LP64(push(FP);)
  1426   char* b = new char[50];
  1427   sprintf(b, " pc: %p\n", cur_pc);
  1428   li(A0, (long)b);
  1429   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1430   delayed()->nop();
  1431   NOT_LP64(pop(FP);)
  1432   popad();
  1433 #endif
  1436 void MacroAssembler::increment(Register reg, int imm) {
  1437   if (!imm) return;
  1438   if (is_simm16(imm)) {
  1439 #ifdef _LP64
  1440     daddiu(reg, reg, imm);
  1441 #else
  1442     addiu(reg, reg, imm);
  1443 #endif
  1444   } else {
  1445     move(AT, imm);
  1446 #ifdef _LP64
  1447     daddu(reg, reg, AT);
  1448 #else
  1449     addu(reg, reg, AT);
  1450 #endif
  1454 void MacroAssembler::decrement(Register reg, int imm) {
  1455 	increment(reg, -imm);
  1459 void MacroAssembler::call_VM(Register oop_result,
  1460                              address entry_point,
  1461                              bool check_exceptions) {
  1462   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1465 void MacroAssembler::call_VM(Register oop_result,
  1466                              address entry_point,
  1467                              Register arg_1,
  1468                              bool check_exceptions) {
  1469   if (arg_1!=A1) move(A1, arg_1);
  1470   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1473 void MacroAssembler::call_VM(Register oop_result,
  1474                              address entry_point,
  1475                              Register arg_1,
  1476                              Register arg_2,
  1477                              bool check_exceptions) {
  1478   if (arg_1!=A1) move(A1, arg_1);
  1479   if (arg_2!=A2) move(A2, arg_2); 
  1480   assert(arg_2 != A1, "smashed argument");
  1481   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1484 void MacroAssembler::call_VM(Register oop_result,
  1485                              address entry_point,
  1486                              Register arg_1,
  1487                              Register arg_2,
  1488                              Register arg_3,
  1489                              bool check_exceptions) {
  1490   if (arg_1!=A1) move(A1, arg_1);
  1491   if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1492   if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1493   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1496 void MacroAssembler::call_VM(Register oop_result,
  1497                              Register last_java_sp,
  1498                              address entry_point,
  1499                              int number_of_arguments,
  1500                              bool check_exceptions) {
  1501   call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1504 void MacroAssembler::call_VM(Register oop_result,
  1505                              Register last_java_sp,
  1506                              address entry_point,
  1507                              Register arg_1,
  1508                              bool check_exceptions) {
  1509   if (arg_1 != A1) move(A1, arg_1);
  1510   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1513 void MacroAssembler::call_VM(Register oop_result,
  1514                              Register last_java_sp,
  1515                              address entry_point,
  1516                              Register arg_1,
  1517                              Register arg_2,
  1518                              bool check_exceptions) {
  1519   if (arg_1 != A1) move(A1, arg_1);
  1520   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1521   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1524 void MacroAssembler::call_VM(Register oop_result,
  1525                              Register last_java_sp,
  1526                              address entry_point,
  1527                              Register arg_1,
  1528                              Register arg_2,
  1529                              Register arg_3,
  1530                              bool check_exceptions) {
  1531   if (arg_1 != A1) move(A1, arg_1);
  1532   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1533   if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1534   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1537 void MacroAssembler::call_VM_base(Register oop_result,
  1538                                   Register java_thread,
  1539                                   Register last_java_sp,
  1540                                   address  entry_point,
  1541                                   int      number_of_arguments,
  1542 				  bool     check_exceptions) {
  1544   address before_call_pc;
  1545   // determine java_thread register
  1546   if (!java_thread->is_valid()) {
  1547 #ifndef OPT_THREAD
  1548     java_thread = T2;
  1549     get_thread(java_thread);
  1550 #else
  1551     java_thread = TREG;
  1552 #endif
  1554   // determine last_java_sp register
  1555   if (!last_java_sp->is_valid()) {
  1556     last_java_sp = SP;
  1558   // debugging support
  1559   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1560   assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1561   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1562   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1564   assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
  1566   // set last Java frame before call
  1567   before_call_pc = (address)pc();
  1568   set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1570   // do the call
  1571   move(A0, java_thread);
  1572   call(entry_point, relocInfo::runtime_call_type);
  1573   delayed()->nop();
  1575   // restore the thread (cannot use the pushed argument since arguments
  1576   // may be overwritten by C code generated by an optimizing compiler);
  1577   // however can use the register value directly if it is callee saved.
  1578 #ifndef OPT_THREAD
  1579   if (java_thread >=S0 && java_thread <=S7) {
  1580 #ifdef ASSERT
  1581     { Label L;
  1582       get_thread(AT);
  1583       beq(java_thread, AT, L);
  1584       delayed()->nop();
  1585       stop("MacroAssembler::call_VM_base: edi not callee saved?");
  1586       bind(L);
  1588 #endif
  1589   } else {
  1590     get_thread(java_thread);
  1592 #endif
  1594   // discard thread and arguments
  1595   ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); 
  1596   // reset last Java frame
  1597   reset_last_Java_frame(java_thread, false, true);
  1599   check_and_handle_popframe(java_thread);
  1600   check_and_handle_earlyret(java_thread);
  1601   if (check_exceptions) {
  1602     // check for pending exceptions (java_thread is set upon return)
  1603     Label L;
  1604 #ifdef _LP64
  1605     ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1606 #else
  1607     lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1608 #endif
  1609     beq(AT, R0, L);
  1610     delayed()->nop();
  1611     li(AT, before_call_pc);
  1612     push(AT);
  1613     jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1614     delayed()->nop();
  1615     bind(L);
  1618   // get oop result if there is one and reset the value in the thread
  1619   if (oop_result->is_valid()) {
  1620 #ifdef _LP64
  1621     ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1622     sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1623 #else
  1624     lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1625     sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1626 #endif
  1627     verify_oop(oop_result);
  1631 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1633   move(V0, SP);
  1634   //we also reserve space for java_thread here
  1635 #ifndef _LP64
  1636   daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1637 #endif
  1638   move(AT, -(StackAlignmentInBytes));
  1639   andr(SP, SP, AT);
  1640   call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1644 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1645   call_VM_leaf_base(entry_point, number_of_arguments);
  1648 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1649   if (arg_0 != A0) move(A0, arg_0);
  1650   call_VM_leaf(entry_point, 1);
  1653 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1654   if (arg_0 != A0) move(A0, arg_0);
  1655   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1656   call_VM_leaf(entry_point, 2);
  1659 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1660   if (arg_0 != A0) move(A0, arg_0);
  1661   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1662   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1663   call_VM_leaf(entry_point, 3);
  1665 void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1666 	MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1670 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1671                                                    Register arg_1) {
  1672   if (arg_1 != A0) move(A0, arg_1);
  1673   MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1677 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1678                                                    Register arg_1,
  1679                                                    Register arg_2) {
  1680   if (arg_1 != A0) move(A0, arg_1);
  1681   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1682   MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1684 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1685                                                    Register arg_1,
  1686                                                    Register arg_2,
  1687                                                    Register arg_3) {
  1688   if (arg_1 != A0) move(A0, arg_1);
  1689   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1690   if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1691   MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1694 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1697 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1700 void MacroAssembler::null_check(Register reg, int offset) {
  1701   if (needs_explicit_null_check(offset)) {
  1702     // provoke OS NULL exception if reg = NULL by
  1703     // accessing M[reg] w/o changing any (non-CC) registers
  1704     // NOTE: cmpl is plenty here to provoke a segv
  1705     lw(AT, reg, 0);
  1706 /* Jin
  1707     nop();	
  1708     nop();
  1709     nop();
  1710 */
  1711     // Note: should probably use testl(rax, Address(reg, 0));
  1712     //       may be shorter code (however, this version of
  1713     //       testl needs to be implemented first)
  1714   } else {
  1715     // nothing to do, (later) access of M[reg + offset]
  1716     // will provoke OS NULL exception if reg = NULL
  1720 void MacroAssembler::enter() {
  1721   push2(RA, FP);
  1722   move(FP, SP);
  1725 void MacroAssembler::leave() {
  1726 #ifndef _LP64
  1727   //move(SP, FP);
  1728   //pop2(FP, RA);
  1729   addi(SP, FP, 2 * wordSize);
  1730   lw(RA, SP, - 1 * wordSize);
  1731   lw(FP, SP, - 2 * wordSize);
  1732 #else
  1733   daddi(SP, FP, 2 * wordSize);
  1734   ld(RA, SP, - 1 * wordSize);
  1735   ld(FP, SP, - 2 * wordSize);
  1736 #endif
  1738 /*
  1739 void MacroAssembler::os_breakpoint() {
  1740   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1741   // (e.g., MSVC can't call ps() otherwise)
  1742   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1744 */
  1745 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  1746   // determine java_thread register
  1747   if (!java_thread->is_valid()) {
  1748 #ifndef OPT_THREAD
  1749     java_thread = T1;
  1750     get_thread(java_thread);
  1751 #else
  1752     java_thread = TREG;
  1753 #endif
  1755   // we must set sp to zero to clear frame
  1756   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1757   // must clear fp, so that compiled frames are not confused; it is possible
  1758   // that we need it only for debugging
  1759   if(clear_fp)	
  1760     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1762   if (clear_pc)
  1763     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1766 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  1767                                            bool clear_pc) {
  1768   Register thread = TREG;
  1769 #ifndef OPT_THREAD
  1770   get_thread(thread);
  1771 #endif
  1772   // we must set sp to zero to clear frame
  1773   sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1774   // must clear fp, so that compiled frames are not confused; it is
  1775   // possible that we need it only for debugging
  1776   if (clear_fp) {
  1777     sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1780   if (clear_pc) {
  1781     sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1785 // Write serialization page so VM thread can do a pseudo remote membar.
  1786 // We use the current thread pointer to calculate a thread specific
  1787 // offset to write to within the page. This minimizes bus traffic
  1788 // due to cache line collision.
  1789 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1790   move(tmp, thread);
  1791   srl(tmp, tmp,os::get_serialize_page_shift_count());
  1792   move(AT, (os::vm_page_size() - sizeof(int))); 
  1793   andr(tmp, tmp,AT);
  1794   sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1797 // Calls to C land
  1798 //
  1799 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1800 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1801 // has to be reset to 0. This is required to allow proper stack traversal.
  1802 void MacroAssembler::set_last_Java_frame(Register java_thread,
  1803                                          Register last_java_sp,
  1804                                          Register last_java_fp,
  1805                                          address  last_java_pc) {
  1806   // determine java_thread register
  1807   if (!java_thread->is_valid()) {
  1808 #ifndef OPT_THREAD
  1809     java_thread = T2;
  1810     get_thread(java_thread);
  1811 #else
  1812     java_thread = TREG;
  1813 #endif
  1815   // determine last_java_sp register
  1816   if (!last_java_sp->is_valid()) {
  1817     last_java_sp = SP;
  1820   // last_java_fp is optional
  1822   if (last_java_fp->is_valid()) {
  1823     st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1826   // last_java_pc is optional
  1828   if (last_java_pc != NULL) {
  1829     relocate(relocInfo::internal_pc_type);
  1830     patchable_set48(AT, (long)last_java_pc);
  1831     st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1833   st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1836 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1837                                          Register last_java_fp,
  1838                                          address  last_java_pc) {
  1839   // determine last_java_sp register
  1840   if (!last_java_sp->is_valid()) {
  1841     last_java_sp = SP; 
  1844   Register thread = TREG;
  1845 #ifndef OPT_THREAD
  1846   get_thread(thread);
  1847 #endif
  1848   // last_java_fp is optional
  1849   if (last_java_fp->is_valid()) {
  1850     sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1853   // last_java_pc is optional
  1854   if (last_java_pc != NULL) {
  1855     Address java_pc(thread,
  1856                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  1857     li(AT, (intptr_t)(last_java_pc));
  1858     sd(AT, java_pc);
  1861   sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1864 //////////////////////////////////////////////////////////////////////////////////
  1865 #ifndef SERIALGC
  1867 void MacroAssembler::g1_write_barrier_pre(Register obj,
  1868 #ifndef _LP64
  1869                                           Register thread,
  1870 #endif
  1871                                           Register tmp,
  1872                                           Register tmp2,
  1873                                           bool tosca_live) {
  1874 /*  LP64_ONLY(Register thread = r15_thread;)
  1875   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1876                                        PtrQueue::byte_offset_of_active()));
  1878   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1879                                        PtrQueue::byte_offset_of_index()));
  1880   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1881                                        PtrQueue::byte_offset_of_buf()));
  1884   Label done;
  1885   Label runtime;
  1887   // if (!marking_in_progress) goto done;
  1888   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1889     cmpl(in_progress, 0);
  1890   } else {
  1891     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1892     cmpb(in_progress, 0);
  1894   jcc(Assembler::equal, done);
  1896   // if (x.f == NULL) goto done;
  1897   cmpptr(Address(obj, 0), NULL_WORD);
  1898   jcc(Assembler::equal, done);
  1900   // Can we store original value in the thread's buffer?
  1902   LP64_ONLY(movslq(tmp, index);)
  1903   movptr(tmp2, Address(obj, 0));
  1904 #ifdef _LP64
  1905   cmpq(tmp, 0);
  1906 #else
  1907   cmpl(index, 0);
  1908 #endif
  1909   jcc(Assembler::equal, runtime);
  1910 #ifdef _LP64
  1911   subq(tmp, wordSize);
  1912   movl(index, tmp);
  1913   addq(tmp, buffer);
  1914 #else
  1915   subl(index, wordSize);
  1916   movl(tmp, buffer);
  1917   addl(tmp, index);
  1918 #endif
  1919   movptr(Address(tmp, 0), tmp2);
  1920   jmp(done);
  1921   bind(runtime);
  1922   // save the live input values
  1923   if(tosca_live) push(rax);
  1924   push(obj);
  1925 #ifdef _LP64
  1926   movq(c_rarg0, Address(obj, 0));
  1927   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
  1928 #else
  1929   push(thread);
  1930   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
  1931   pop(thread);
  1932 #endif
  1933   pop(obj);
  1934   if(tosca_live) pop(rax);
  1935   bind(done);
  1936 */
  1939 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1940                                            Register new_val,
  1941 #ifndef _LP64
  1942                                            Register thread,
  1943 #endif
  1944                                            Register tmp,
  1945                                            Register tmp2) {
  1947   /*LP64_ONLY(Register thread = r15_thread;)
  1948   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1949                                        PtrQueue::byte_offset_of_index()));
  1950   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1951                                        PtrQueue::byte_offset_of_buf()));
  1952   BarrierSet* bs = Universe::heap()->barrier_set();
  1953   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1954   Label done;
  1955   Label runtime;
  1957   // Does store cross heap regions?
  1959   movptr(tmp, store_addr);
  1960   xorptr(tmp, new_val);
  1961   shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
  1962   jcc(Assembler::equal, done);
  1964   // crosses regions, storing NULL?
  1966   cmpptr(new_val, (int32_t) NULL_WORD);
  1967   jcc(Assembler::equal, done);
  1969   // storing region crossing non-NULL, is card already dirty?
  1971   ExternalAddress cardtable((address) ct->byte_map_base);
  1972   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1973 #ifdef _LP64
  1974   const Register card_addr = tmp;
  1976   movq(card_addr, store_addr);
  1977   shrq(card_addr, CardTableModRefBS::card_shift);
  1979   lea(tmp2, cardtable);
  1981   // get the address of the card
  1982   addq(card_addr, tmp2);
  1983 #else
  1984   const Register card_index = tmp;
  1986   movl(card_index, store_addr);
  1987   shrl(card_index, CardTableModRefBS::card_shift);
  1989   Address index(noreg, card_index, Address::times_1);
  1990   const Register card_addr = tmp;
  1991   lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
  1992 #endif
  1993   cmpb(Address(card_addr, 0), 0);
  1994   jcc(Assembler::equal, done);
  1996   // storing a region crossing, non-NULL oop, card is clean.
  1997   // dirty card and log.
  1999   movb(Address(card_addr, 0), 0);
  2001   cmpl(queue_index, 0);
  2002   jcc(Assembler::equal, runtime);
  2003   subl(queue_index, wordSize);
  2004   movptr(tmp2, buffer);
  2005 #ifdef _LP64
  2006   movslq(rscratch1, queue_index);
  2007   addq(tmp2, rscratch1);
  2008   movq(Address(tmp2, 0), card_addr);
  2009 #else
  2010   addl(tmp2, queue_index);
  2011   movl(Address(tmp2, 0), card_index);
  2012 #endif
  2013   jmp(done);
  2015   bind(runtime);
  2016   // save the live input values
  2017   push(store_addr);
  2018   push(new_val);
  2019 #ifdef _LP64
  2020   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
  2021 #else
  2022   push(thread);
  2023   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  2024   pop(thread);
  2025 #endif
  2026   pop(new_val);
  2027   pop(store_addr);
  2029   bind(done);
  2030 */
  2033 #endif // SERIALGC
  2034 //////////////////////////////////////////////////////////////////////////////////
  2037 void MacroAssembler::store_check(Register obj) {
  2038   // Does a store check for the oop in register obj. The content of
  2039   // register obj is destroyed afterwards.
  2040   store_check_part_1(obj);
  2041   store_check_part_2(obj);
  2044 void MacroAssembler::store_check(Register obj, Address dst) {
  2045   store_check(obj);
  2049 // split the store check operation so that other instructions can be scheduled inbetween
  2050 void MacroAssembler::store_check_part_1(Register obj) {
  2051   BarrierSet* bs = Universe::heap()->barrier_set();
  2052   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  2053 #ifdef _LP64
  2054   dsrl(obj, obj, CardTableModRefBS::card_shift);
  2055 #else
  2056   shr(obj, CardTableModRefBS::card_shift);
  2057 #endif
  2060 void MacroAssembler::store_check_part_2(Register obj) {
  2061   BarrierSet* bs = Universe::heap()->barrier_set();
  2062   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  2063   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  2064   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  2066   li(AT, (long)ct->byte_map_base);
  2067 #ifdef _LP64
  2068   dadd(AT, AT, obj);
  2069 #else
  2070   add(AT, AT, obj);
  2071 #endif
  2072   sb(R0, AT, 0);
  2073   sync();
  2076 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  2077 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  2078                                    Register t1, Register t2, Label& slow_case) {
  2079   assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  2081   Register end = t2;
  2082 #ifndef OPT_THREAD
  2083   Register thread = t1;
  2084   get_thread(thread);
  2085 #else
  2086   Register thread = TREG;
  2087 #endif
  2088   verify_tlab(t1, t2);//blows t1&t2
  2090   ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  2092   if (var_size_in_bytes == NOREG) {
  2093     // i dont think we need move con_size_in_bytes to a register first.
  2094     // by yjl 8/17/2005
  2095     assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  2096     addi(end, obj, con_size_in_bytes);
  2097   } else {
  2098     add(end, obj, var_size_in_bytes);
  2101   ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  2102   sltu(AT, AT, end);
  2103   bne_far(AT, R0, slow_case);
  2104   delayed()->nop();
  2107   // update the tlab top pointer
  2108   st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  2110   // recover var_size_in_bytes if necessary
  2111   /*if (var_size_in_bytes == end) {
  2112     sub(var_size_in_bytes, end, obj);
  2113     }*/
  2115   verify_tlab(t1, t2);
  2118 // Defines obj, preserves var_size_in_bytes
  2119 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  2120 		Register t1, Register t2, Label& slow_case) {
  2121   assert_different_registers(obj, var_size_in_bytes, t1, AT);
  2122   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  2123     // No allocation in the shared eden.
  2124     b_far(slow_case);
  2125     delayed()->nop();
  2126   } else {
  2128 #ifndef _LP64
  2129     Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  2130     lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  2131 #else
  2132     Address heap_top(t1);
  2133     li(t1, (long)Universe::heap()->top_addr());
  2134 #endif
  2135     ld_ptr(obj, heap_top);
  2137     Register end = t2;
  2138     Label retry;
  2140     bind(retry);
  2141     if (var_size_in_bytes == NOREG) {
  2142     // i dont think we need move con_size_in_bytes to a register first.
  2143     // by yjl 8/17/2005
  2144       assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  2145       addi(end, obj, con_size_in_bytes);
  2146     } else {
  2147       add(end, obj, var_size_in_bytes);
  2149     // if end < obj then we wrapped around => object too long => slow case
  2150     sltu(AT, end, obj);
  2151     bne_far(AT, R0, slow_case);
  2152     delayed()->nop();
  2154     //lui(AT, split_high((int)Universe::heap()->end_addr()));
  2155     //lw(AT, AT, split_low((int)Universe::heap()->end_addr()));
  2156     li(AT, (long)Universe::heap()->end_addr());
  2157     sltu(AT, AT, end);
  2158     bne_far(AT, R0, slow_case);
  2159     delayed()->nop();
  2160     // Compare obj with the top addr, and if still equal, store the new top addr in
  2161     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  2162     // it otherwise. Use lock prefix for atomicity on MPs.
  2163     if (os::is_MP()) {
  2164     	///lock();
  2167     // if someone beat us on the allocation, try again, otherwise continue
  2168     cmpxchg(end, heap_top, obj);
  2169     beq_far(AT, R0, retry);    //by yyq
  2170     delayed()->nop();
  2175 // C2 doesn't invoke this one.
  2176 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  2177 	Register top = T0;
  2178 	Register t1  = T1;
  2179 /* Jin: tlab_refill() is called in 
  2181      [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
  2183   In generate_code_for(), T2 has been assigned as a register(length), which is used
  2184  after calling tlab_refill();
  2185   Therefore, tlab_refill() should not use T2.
  2187  Source:
  2189 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
  2190         at java.lang.System.arraycopy(Native Method)
  2191         at java.util.Arrays.copyOf(Arrays.java:2799)	<-- alloc_array
  2192         at sun.misc.Resource.getBytes(Resource.java:117)
  2193         at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
  2194         at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
  2195         at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
  2196  */
  2197 	Register t2  = T9;
  2198 	Register t3  = T3;
  2199 	Register thread_reg = T8;
  2200 	Label do_refill, discard_tlab;
  2201 	if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  2202 		// No allocation in the shared eden.
  2203 		b(slow_case);
  2204 		delayed()->nop();
  2207 	get_thread(thread_reg);
  2209 	ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  2210 	ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  2212 	// calculate amount of free space
  2213 	sub(t1, t1, top);
  2214 	shr(t1, LogHeapWordSize);
  2216 	// Retain tlab and allocate object in shared space if
  2217 	// the amount free in the tlab is too large to discard.
  2218 	ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  2219 	slt(AT, t2, t1);
  2220 	beq(AT, R0, discard_tlab);
  2221 	delayed()->nop();
  2223 	// Retain
  2225 #ifndef _LP64
  2226 	move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  2227 #else
  2228 	li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  2229 #endif
  2230 	add(t2, t2, AT);
  2231 	st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  2233 	if (TLABStats) {
  2234 		// increment number of slow_allocations
  2235 		lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  2236 		addiu(AT, AT, 1);
  2237 		sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  2239 	b(try_eden);
  2240 	delayed()->nop();
  2242   bind(discard_tlab);
  2243 	if (TLABStats) {
  2244 		// increment number of refills
  2245 		lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  2246 		addi(AT, AT, 1);
  2247 		sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  2248 		// accumulate wastage -- t1 is amount free in tlab
  2249 		lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  2250 		add(AT, AT, t1);
  2251 		sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  2254 	// if tlab is currently allocated (top or end != null) then
  2255 	// fill [top, end + alignment_reserve) with array object
  2256 	beq(top, R0, do_refill);
  2257 	delayed()->nop();
  2259 	// set up the mark word
  2260 	li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  2261 	st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  2263 	// set the length to the remaining space
  2264 	addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  2265 	addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  2266 	shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  2267 	sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  2269 	// set klass to intArrayKlass
  2270 #ifndef _LP64
  2271 	lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  2272 	lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  2273 #else
  2274 	li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  2275 	ld_ptr(t1, AT, 0);
  2276 #endif
  2277 	//st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  2278 	store_klass(top, t1);
  2280 	// refill the tlab with an eden allocation
  2281 	bind(do_refill);
  2282 	ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  2283 	shl(t1, LogHeapWordSize);
  2284 	// add object_size ??
  2285 	eden_allocate(top, t1, 0, t2, t3, slow_case);
  2287 	// Check that t1 was preserved in eden_allocate.
  2288 #ifdef ASSERT
  2289 	if (UseTLAB) {
  2290 		Label ok;
  2291 		assert_different_registers(thread_reg, t1);
  2292 		ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  2293 		shl(AT, LogHeapWordSize);
  2294 		beq(AT, t1, ok);
  2295 		delayed()->nop();
  2296 		stop("assert(t1 != tlab size)");
  2297 		should_not_reach_here();
  2299 		bind(ok);
  2301 #endif
  2302 	st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  2303 	st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  2304 	add(top, top, t1);	
  2305 	addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  2306 	st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  2307 	verify_tlab(t1, t2);
  2308 	b(retry);
  2309 	delayed()->nop();
  2312 static const double     pi_4 =  0.7853981633974483;
  2314 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
  2315 // must get argument(a double) in F12/F13
  2316 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  2317 //We need to preseve the register which maybe modified during the Call @Jerome
  2318 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  2319 //save all modified register here
  2320 //	if (preserve_cpu_regs) {
  2321 //	}
  2322 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9 
  2323 	pushad();
  2324 //we should preserve the stack space before we call
  2325 	addi(SP, SP, -wordSize * 2);
  2326         switch (trig){
  2327 		case 's' :
  2328               		call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  2329 			delayed()->nop();
  2330 			break;
  2331 		case 'c':	
  2332 			call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  2333 			delayed()->nop();
  2334 			break;
  2335 		case 't':
  2336 			call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  2337 			delayed()->nop();
  2338 			break;
  2339 		default:assert (false, "bad intrinsic");
  2340 		break;
  2344 	addi(SP, SP, wordSize * 2);
  2345 	popad();
  2346 //	if (preserve_cpu_regs) {
  2347 //	}
  2350 #ifdef _LP64
  2351 void MacroAssembler::li(Register rd, long imm) {
  2352   if (imm <= max_jint && imm >= min_jint) {
  2353     li32(rd, (int)imm);
  2354   } else if (julong(imm) <= 0xFFFFFFFF) {
  2355     assert_not_delayed();
  2356     // lui sign-extends, so we can't use that.
  2357     ori(rd, R0, julong(imm) >> 16);
  2358     dsll(rd, rd, 16);
  2359     ori(rd, rd, split_low(imm));
  2360   //aoqi_test
  2361   //} else if ((imm > 0) && ((imm >> 48) == 0)) {
  2362   } else if ((imm > 0) && is_simm16(imm >> 32)) {
  2363     /* A 48-bit address */
  2364     li48(rd, imm);
  2365   } else {
  2366     li64(rd, imm);
  2369 #else
  2370 void MacroAssembler::li(Register rd, long imm) {
  2371   li32(rd, (int)imm);
  2373 #endif
  2375 void MacroAssembler::li32(Register reg, int imm) {
  2376   if (is_simm16(imm)) {
  2377     /* Jin: for imm < 0, we should use addi instead of addiu.
  2379      *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  2381      *  78 move [int:-1|I] [a0|I]
  2382      *    : daddi a0, zero, 0xffffffff  (correct)
  2383      *    : daddiu a0, zero, 0xffffffff (incorrect)
  2384      */
  2385     if (imm >= 0)
  2386       addiu(reg, R0, imm);
  2387     else
  2388       addi(reg, R0, imm);
  2389   } else {
  2390     lui(reg, split_low(imm >> 16));
  2391     if (split_low(imm))
  2392       ori(reg, reg, split_low(imm));
  2396 #ifdef _LP64
  2397 void MacroAssembler::set64(Register d, jlong value) {
  2398   assert_not_delayed();
  2400   int hi = (int)(value >> 32);
  2401   int lo = (int)(value & ~0);
  2403   if (value == lo) {  // 32-bit integer
  2404     if (is_simm16(value)) {
  2405       daddiu(d, R0, value);
  2406     } else {
  2407       lui(d, split_low(value >> 16));
  2408       if (split_low(value)) {
  2409         ori(d, d, split_low(value));
  2412   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2413       ori(d, R0, julong(value) >> 16);
  2414       dsll(d, d, 16);
  2415       if (split_low(value)) {
  2416         ori(d, d, split_low(value));
  2418   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2419     // 4 insts
  2420     li48(d, value);
  2421   } else {  // li64
  2422     // 6 insts
  2423     li64(d, value);
  2428 int MacroAssembler::insts_for_set64(jlong value) {
  2429   int hi = (int)(value >> 32);
  2430   int lo = (int)(value & ~0);
  2432   int count = 0;
  2434   if (value == lo) {  // 32-bit integer
  2435     if (is_simm16(value)) {
  2436       //daddiu(d, R0, value);
  2437       count++;
  2438     } else {
  2439       //lui(d, split_low(value >> 16));
  2440       count++;
  2441       if (split_low(value)) {
  2442         //ori(d, d, split_low(value));
  2443         count++;
  2446   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2447       //ori(d, R0, julong(value) >> 16);
  2448       //dsll(d, d, 16);
  2449       count += 2;
  2450       if (split_low(value)) {
  2451         //ori(d, d, split_low(value));
  2452         count++;
  2454   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2455     // 4 insts
  2456     //li48(d, value);
  2457     count += 4;
  2458   } else {  // li64
  2459     // 6 insts
  2460     //li64(d, value);
  2461     count += 6;
  2464   return count; 
  2467 void MacroAssembler::patchable_set48(Register d, jlong value) {
  2468   assert_not_delayed();
  2470   int hi = (int)(value >> 32);
  2471   int lo = (int)(value & ~0);
  2473   int count = 0;
  2475   if (value == lo) {  // 32-bit integer
  2476     if (is_simm16(value)) {
  2477       daddiu(d, R0, value);
  2478       count += 1;
  2479     } else {
  2480       lui(d, split_low(value >> 16));
  2481       count += 1;
  2482       if (split_low(value)) {
  2483         ori(d, d, split_low(value));
  2484         count += 1;
  2487   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2488       ori(d, R0, julong(value) >> 16);
  2489       dsll(d, d, 16);
  2490       count += 2;
  2491       if (split_low(value)) {
  2492         ori(d, d, split_low(value));
  2493         count += 1;
  2495   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2496     // 4 insts
  2497     li48(d, value);
  2498     count += 4;
  2499   } else {  // li64
  2500     tty->print_cr("value = 0x%x", value);
  2501     guarantee(false, "Not supported yet !");
  2504   for (count; count < 4; count++) {
  2505     nop();
  2509 void MacroAssembler::patchable_set32(Register d, jlong value) {
  2510   assert_not_delayed();
  2512   int hi = (int)(value >> 32);
  2513   int lo = (int)(value & ~0);
  2515   int count = 0;
  2517   if (value == lo) {  // 32-bit integer
  2518     if (is_simm16(value)) {
  2519       daddiu(d, R0, value);
  2520       count += 1;
  2521     } else {
  2522       lui(d, split_low(value >> 16));
  2523       count += 1;
  2524       if (split_low(value)) {
  2525         ori(d, d, split_low(value));
  2526         count += 1;
  2529   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2530       ori(d, R0, julong(value) >> 16);
  2531       dsll(d, d, 16);
  2532       count += 2;
  2533       if (split_low(value)) {
  2534         ori(d, d, split_low(value));
  2535         count += 1;
  2537   } else {
  2538     tty->print_cr("value = 0x%x", value);
  2539     guarantee(false, "Not supported yet !");
  2542   for (count; count < 3; count++) {
  2543     nop();
  2547 void MacroAssembler::patchable_call32(Register d, jlong value) {
  2548   assert_not_delayed();
  2550   int hi = (int)(value >> 32);
  2551   int lo = (int)(value & ~0);
  2553   int count = 0;
  2555   if (value == lo) {  // 32-bit integer
  2556     if (is_simm16(value)) {
  2557       daddiu(d, R0, value);
  2558       count += 1;
  2559     } else {
  2560       lui(d, split_low(value >> 16));
  2561       count += 1;
  2562       if (split_low(value)) {
  2563         ori(d, d, split_low(value));
  2564         count += 1;
  2567   } else {
  2568     tty->print_cr("value = 0x%x", value);
  2569     guarantee(false, "Not supported yet !");
  2572   for (count; count < 2; count++) {
  2573     nop();
  2577 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  2578   assert(UseCompressedClassPointers, "should only be used for compressed header");
  2579   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2581   int klass_index = oop_recorder()->find_index(k);
  2582   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2583   long narrowKlass = (long)Klass::encode_klass(k);
  2585   relocate(rspec, Assembler::narrow_oop_operand);
  2586   patchable_set48(dst, narrowKlass);
  2590 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  2591   assert(UseCompressedOops, "should only be used for compressed header");
  2592   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2594   int oop_index = oop_recorder()->find_index(obj);
  2595   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2597   relocate(rspec, Assembler::narrow_oop_operand);
  2598   patchable_set48(dst, oop_index);
  2601 void MacroAssembler::li64(Register rd, long imm) {
  2602   assert_not_delayed();
  2603   lui(rd, imm >> 48);
  2604   ori(rd, rd, split_low(imm >> 32));
  2605   dsll(rd, rd, 16);
  2606   ori(rd, rd, split_low(imm >> 16));
  2607   dsll(rd, rd, 16);
  2608   ori(rd, rd, split_low(imm));
  2611 void MacroAssembler::li48(Register rd, long imm) {
  2612   assert_not_delayed();
  2613   assert(is_simm16(imm >> 32), "Not a 48-bit address");
  2614   lui(rd, imm >> 32);
  2615   ori(rd, rd, split_low(imm >> 16));
  2616   dsll(rd, rd, 16);
  2617   ori(rd, rd, split_low(imm));
  2619 #endif
  2620 // NOTE: i dont push eax as i486.
  2621 // the x86 save eax for it use eax as the jump register
  2622 void MacroAssembler::verify_oop(Register reg, const char* s) {
  2623   /*
  2624      if (!VerifyOops) return;
  2626   // Pass register number to verify_oop_subroutine
  2627   char* b = new char[strlen(s) + 50];
  2628   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  2629   push(rax);                          // save rax,
  2630   push(reg);                          // pass register argument
  2631   ExternalAddress buffer((address) b);
  2632   // avoid using pushptr, as it modifies scratch registers
  2633   // and our contract is not to modify anything
  2634   movptr(rax, buffer.addr());
  2635   push(rax);
  2636   // call indirectly to solve generation ordering problem
  2637   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  2638   call(rax);
  2639    */
  2640   if (!VerifyOops) return;
  2641   const char * b = NULL; 
  2642   stringStream ss;
  2643   ss.print("verify_oop: %s: %s", reg->name(), s);
  2644   b = code_string(ss.as_string());
  2645 #ifdef _LP64
  2646   pushad();
  2647   move(A1, reg);
  2648   li(A0, (long)b);
  2649   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2650   ld(T9, AT, 0);
  2651   jalr(T9);
  2652   delayed()->nop();
  2653   popad();
  2654 #else
  2655   // Pass register number to verify_oop_subroutine
  2656   sw(T0, SP, - wordSize);
  2657   sw(T1, SP, - 2*wordSize);
  2658   sw(RA, SP, - 3*wordSize);
  2659   sw(A0, SP ,- 4*wordSize);	
  2660   sw(A1, SP ,- 5*wordSize);	
  2661   sw(AT, SP ,- 6*wordSize);	
  2662   sw(T9, SP ,- 7*wordSize);	
  2663   addiu(SP, SP, - 7 * wordSize);
  2664   move(A1, reg);
  2665   li(A0, (long)b);
  2666   // call indirectly to solve generation ordering problem
  2667   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());        	
  2668   lw(T9, AT, 0);
  2669   jalr(T9);
  2670   delayed()->nop();
  2671   lw(T0, SP, 6* wordSize);
  2672   lw(T1, SP, 5* wordSize);
  2673   lw(RA, SP, 4* wordSize);
  2674   lw(A0, SP, 3* wordSize);
  2675   lw(A1, SP, 2* wordSize);
  2676   lw(AT, SP, 1* wordSize);
  2677   lw(T9, SP, 0* wordSize);
  2678   addiu(SP, SP, 7 * wordSize);
  2679 #endif
  2683 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  2684 	if (!VerifyOops) {
  2685 		nop();
  2686 		return;
  2688 	// Pass register number to verify_oop_subroutine
  2689 	const char * b = NULL;
  2690 	stringStream ss;
  2691 	ss.print("verify_oop_addr: %s",  s);
  2692 	b = code_string(ss.as_string());
  2694 	st_ptr(T0, SP, - wordSize);
  2695 	st_ptr(T1, SP, - 2*wordSize);
  2696 	st_ptr(RA, SP, - 3*wordSize);
  2697 	st_ptr(A0, SP, - 4*wordSize);	
  2698 	st_ptr(A1, SP, - 5*wordSize);	
  2699 	st_ptr(AT, SP, - 6*wordSize);	
  2700 	st_ptr(T9, SP, - 7*wordSize);	
  2701 	ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  2702 	addiu(SP, SP, - 7 * wordSize);
  2704 	li(A0, (long)b);
  2705 	// call indirectly to solve generation ordering problem
  2706 	li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());        	
  2707 	ld_ptr(T9, AT, 0);
  2708 	jalr(T9);
  2709 	delayed()->nop();
  2710 	ld_ptr(T0, SP, 6* wordSize);
  2711 	ld_ptr(T1, SP, 5* wordSize);
  2712 	ld_ptr(RA, SP, 4* wordSize);
  2713 	ld_ptr(A0, SP, 3* wordSize);
  2714 	ld_ptr(A1, SP, 2* wordSize);
  2715 	ld_ptr(AT, SP, 1* wordSize);
  2716 	ld_ptr(T9, SP, 0* wordSize);
  2717 	addiu(SP, SP, 7 * wordSize);
  2720 // used registers :  T0, T1
  2721 void MacroAssembler::verify_oop_subroutine() {
  2722   // RA: ra
  2723   // A0: char* error message    
  2724   // A1: oop   object to verify 
  2726   Label exit, error;
  2727   // increment counter
  2728   li(T0, (long)StubRoutines::verify_oop_count_addr());
  2729   lw(AT, T0, 0);
  2730 #ifdef _LP64
  2731 //FIXME, aoqi: rewrite addi, addu, etc in 64bits mode.
  2732   daddi(AT, AT, 1);
  2733 #else
  2734   addi(AT, AT, 1);
  2735 #endif
  2736   sw(AT, T0, 0);
  2738   // make sure object is 'reasonable'
  2739   beq(A1, R0, exit);         // if obj is NULL it is ok
  2740   delayed()->nop();
  2742   // Check if the oop is in the right area of memory
  2743   //const int oop_mask = Universe::verify_oop_mask();
  2744   //const int oop_bits = Universe::verify_oop_bits();
  2745   const uintptr_t oop_mask = Universe::verify_oop_mask();
  2746   const uintptr_t oop_bits = Universe::verify_oop_bits();
  2747   li(AT, oop_mask);
  2748   andr(T0, A1, AT);
  2749   li(AT, oop_bits);
  2750   bne(T0, AT, error);
  2751   delayed()->nop();
  2753   // make sure klass is 'reasonable'
  2754   //add for compressedoops
  2755   reinit_heapbase();
  2756   //add for compressedoops
  2757   load_klass(T0, A1);
  2758   beq(T0, R0, error);                        // if klass is NULL it is broken
  2759   delayed()->nop();
  2760   #if 0
  2761   //FIXME:wuhui.
  2762   // Check if the klass is in the right area of memory
  2763   //const int klass_mask = Universe::verify_klass_mask();
  2764   //const int klass_bits = Universe::verify_klass_bits();
  2765   const uintptr_t klass_mask = Universe::verify_klass_mask();
  2766   const uintptr_t klass_bits = Universe::verify_klass_bits();
  2768   li(AT, klass_mask);
  2769   andr(T1, T0, AT);
  2770   li(AT, klass_bits);
  2771   bne(T1, AT, error);
  2772   delayed()->nop();
  2773   // make sure klass' klass is 'reasonable'
  2774   //add for compressedoops
  2775   load_klass(T0, T0);
  2776   beq(T0, R0, error);  // if klass' klass is NULL it is broken
  2777   delayed()->nop();
  2779   li(AT, klass_mask);
  2780   andr(T1, T0, AT);
  2781   li(AT, klass_bits);
  2782   bne(T1, AT, error);
  2783   delayed()->nop();     // if klass not in right area of memory it is broken too.
  2784 #endif
  2785   // return if everything seems ok
  2786   bind(exit);
  2788   jr(RA);
  2789   delayed()->nop();
  2791   // handle errors
  2792   bind(error);
  2793   pushad();
  2794 #ifndef _LP64
  2795   addi(SP, SP, (-1) * wordSize);
  2796 #endif
  2797   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  2798   delayed()->nop();
  2799 #ifndef _LP64
  2800   addiu(SP, SP, 1 * wordSize);
  2801 #endif
  2802   popad();	
  2803   jr(RA);
  2804   delayed()->nop();
  2807 void MacroAssembler::verify_tlab(Register t1, Register t2) {
  2808 #ifdef ASSERT
  2809   assert_different_registers(t1, t2, AT);
  2810   if (UseTLAB && VerifyOops) {
  2811     Label next, ok;
  2813     get_thread(t1);
  2815     ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  2816     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  2817     sltu(AT, t2, AT);
  2818     beq(AT, R0, next);
  2819     delayed()->nop();
  2821     stop("assert(top >= start)");
  2823     bind(next);
  2824     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  2825     sltu(AT, AT, t2);
  2826     beq(AT, R0, ok);
  2827     delayed()->nop();
  2829     stop("assert(top <= end)");
  2831     bind(ok);
  2833     /*
  2834        Label next, ok;
  2835        Register t1 = rsi;
  2836        Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
  2838        push(t1);
  2839        NOT_LP64(push(thread_reg));
  2840        NOT_LP64(get_thread(thread_reg));
  2842        movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  2843        cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
  2844        jcc(Assembler::aboveEqual, next);
  2845        stop("assert(top >= start)");
  2846        should_not_reach_here();
  2848        bind(next);
  2849        movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
  2850        cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  2851        jcc(Assembler::aboveEqual, ok);
  2852        stop("assert(top <= end)");
  2853        should_not_reach_here();
  2855        bind(ok);
  2856        NOT_LP64(pop(thread_reg));
  2857        pop(t1);
  2858      */
  2860 #endif
  2862  RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  2863                                                        Register tmp,
  2864                                                        int offset) {
  2865    intptr_t value = *delayed_value_addr;
  2866    if (value != 0)
  2867    return RegisterOrConstant(value + offset);
  2868    AddressLiteral a(delayed_value_addr);
  2869    // load indirectly to solve generation ordering problem
  2870    //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  2871    //ld(tmp, a);
  2872   /* #ifdef ASSERT
  2873    { Label L;
  2874      testptr(tmp, tmp);
  2875      if (WizardMode) {
  2876             jcc(Assembler::notZero, L);
  2877             char* buf = new char[40];
  2878             sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
  2879             STOP(buf);
  2880                       } else {
  2881             jccb(Assembler::notZero, L);
  2882             hlt();
  2884      bind(L);
  2886    #endif*/
  2887    if (offset != 0)
  2888      daddi(tmp,tmp, offset);
  2890    return RegisterOrConstant(tmp);
  2893 void MacroAssembler::hswap(Register reg) {
  2894   //short
  2895   //andi(reg, reg, 0xffff);
  2896   srl(AT, reg, 8);
  2897   sll(reg, reg, 24);
  2898   sra(reg, reg, 16);
  2899   orr(reg, reg, AT);
  2902 void MacroAssembler::huswap(Register reg) {
  2903 #ifdef _LP64
  2904   dsrl(AT, reg, 8);
  2905   dsll(reg, reg, 24);
  2906   dsrl(reg, reg, 16);
  2907   orr(reg, reg, AT);
  2908   andi(reg, reg, 0xffff);
  2909 #else
  2910   //andi(reg, reg, 0xffff);
  2911   srl(AT, reg, 8);
  2912   sll(reg, reg, 24);
  2913   srl(reg, reg, 16);
  2914   orr(reg, reg, AT);
  2915 #endif
  2918 // something funny to do this will only one more register AT
  2919 // 32 bits
  2920 // by yjl 6/29/2005
  2921 void MacroAssembler::swap(Register reg) {
  2922 	srl(AT, reg, 8);
  2923 	sll(reg, reg, 24);
  2924 	orr(reg, reg, AT);
  2925 	//reg : 4 1 2 3
  2926 	srl(AT, AT, 16);
  2927 	xorr(AT, AT, reg);
  2928 	andi(AT, AT, 0xff);
  2929 	//AT : 0 0 0 1^3);
  2930 	xorr(reg, reg, AT);
  2931 	//reg : 4 1 2 1
  2932 	sll(AT, AT, 16);
  2933 	xorr(reg, reg, AT);
  2934 	//reg : 4 3 2 1
  2937 #ifdef _LP64
  2939 /* do 32-bit CAS using MIPS64 lld/scd
  2941   Jin: cas_int should only compare 32-bits of the memory value.
  2942        However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  2943        To simulate a 32-bit atomic operation, the value loaded with LLD should be split into 
  2944        tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  2945        plus the high-32 bits or memory value, are stored togethor with SCD.
  2947 Example:
  2949       double d = 3.1415926;
  2950       System.err.println("hello" + d);
  2952   sun.misc.FloatingDecimal$1.<init>()
  2954    `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  2956   38 cas_int [a7a7|J] [a0|I] [a6|I]   
  2957 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  2958 // a6: 0x4ab325aa
  2960 again:
  2961    0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  2963    0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  2964    0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  2965    0x00000055647f3c68: dsll32 t8, t8, 0
  2966    0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  2967    0x00000055647f3c70: sll zero, zero, 0
  2969    0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  2970    0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  2971    0x00000055647f3c7c: ori v1, v1, 0xffffffff
  2972    0x00000055647f3c80: and v1, a6, v1 
  2973    0x00000055647f3c84: or at, t8, v1 
  2974    0x00000055647f3c88: scd at, 0x0(a7)
  2975    0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  2976    0x00000055647f3c90: sll zero, zero, 0
  2977    0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  2978    0x00000055647f3c98: sll zero, zero, 0
  2979 nequal:
  2980    0x00000055647f45a4: dadd a0, t9, zero
  2981    0x00000055647f45a8: dadd at, zero, zero
  2982 done:
  2983 */
  2985 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  2986 #if 0
  2987   Label done, again, nequal;
  2988   bind(again);
  2990   sync();
  2991   lld(AT, dest);
  2993   /* T9:  32 bits, sign extended
  2994    * V1: low 32 bits, sign unextended
  2995    * T8: high 32 bits (may be another variables's space)
  2996    */
  2997   sll(T9, AT, 0);	// Use 32-bit sll to extend bit 31
  2998   dsrl32(T8, AT, 0);
  2999   dsll32(T8, T8, 0);
  3001   bne(T9, c_reg, nequal);
  3002   delayed()->nop(); 
  3004   ori(V1, R0, 0xFFFF);
  3005   dsll(V1, V1, 16);
  3006   ori(V1, V1, 0xFFFF);
  3007   andr(V1, x_reg, V1);
  3008   orr(AT, T8, V1);
  3009   scd(AT, dest);
  3010   beq(AT, R0, again);
  3011   delayed()->nop();
  3012   b(done);
  3013   delayed()->nop();
  3015   // not xchged
  3016   bind(nequal);
  3017   move(c_reg, T9);
  3018   move(AT, R0);
  3020   bind(done);
  3021 #else
  3023   /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
  3024   Label done, again, nequal;
  3026   bind(again);
  3028   if(!Use3A2000) sync();
  3029   ll(AT, dest);
  3030   bne(AT, c_reg, nequal);
  3031   delayed()->nop(); 
  3033   move(AT, x_reg);
  3034   sc(AT, dest);
  3035   beq(AT, R0, again);
  3036   delayed()->nop();
  3037   b(done);
  3038   delayed()->nop();
  3040   // not xchged
  3041   bind(nequal);
  3042   sync();
  3043   move(c_reg, AT);
  3044   move(AT, R0);
  3046   bind(done);
  3047 #endif
  3049 #endif	// cmpxchg32
  3051 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  3052   Label done, again, nequal;
  3054   bind(again);
  3055 #ifdef _LP64
  3056   if(!Use3A2000) sync();
  3057   lld(AT, dest);
  3058 #else
  3059   if(!Use3A2000) sync();
  3060   ll(AT, dest);
  3061 #endif
  3062   bne(AT, c_reg, nequal);
  3063   delayed()->nop(); 
  3065   move(AT, x_reg);
  3066 #ifdef _LP64
  3067   scd(AT, dest);
  3068 #else
  3069   sc(AT, dest);
  3070 #endif
  3071   beq(AT, R0, again);
  3072   delayed()->nop();
  3073   b(done);
  3074   delayed()->nop();
  3076   // not xchged
  3077   bind(nequal);
  3078   sync();
  3079   move(c_reg, AT);
  3080   move(AT, R0);
  3082   bind(done);
  3085 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  3086 	Label done, again, nequal;
  3088 	Register x_reg = x_regLo;
  3089 	dsll32(x_regHi, x_regHi, 0);
  3090 	dsll32(x_regLo, x_regLo, 0);
  3091 	dsrl32(x_regLo, x_regLo, 0);
  3092 	orr(x_reg, x_regLo, x_regHi);
  3094 	Register c_reg = c_regLo;
  3095 	dsll32(c_regHi, c_regHi, 0);
  3096 	dsll32(c_regLo, c_regLo, 0);
  3097 	dsrl32(c_regLo, c_regLo, 0);
  3098 	orr(c_reg, c_regLo, c_regHi);
  3100 	bind(again);
  3102         if(!Use3A2000) sync();
  3103 	lld(AT, dest);
  3104 	bne(AT, c_reg, nequal);
  3105 	delayed()->nop(); 
  3107 	//move(AT, x_reg);
  3108 	dadd(AT, x_reg, R0);
  3109 	scd(AT, dest);
  3110 	beq(AT, R0, again);
  3111 	delayed()->nop();
  3112 	b(done);
  3113 	delayed()->nop();
  3115 	// not xchged
  3116 	bind(nequal);
  3117 	sync();
  3118 	//move(c_reg, AT);
  3119 	//move(AT, R0);
  3120 	dadd(c_reg, AT, R0);
  3121 	dadd(AT, R0, R0);
  3122 	bind(done);
  3125 // be sure the three register is different
  3126 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {    
  3127   assert_different_registers(tmp, fs, ft); 
  3128 	div_s(tmp, fs, ft); 
  3129 	trunc_l_s(tmp, tmp); 
  3130 	cvt_s_l(tmp, tmp); 
  3131 	mul_s(tmp, tmp, ft); 
  3132 	sub_s(fd, fs, tmp); 
  3135 // be sure the three register is different
  3136 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {    
  3137 	assert_different_registers(tmp, fs, ft); 
  3138 	div_d(tmp, fs, ft); 
  3139 	trunc_l_d(tmp, tmp); 
  3140 	cvt_d_l(tmp, tmp); 
  3141 	mul_d(tmp, tmp, ft); 
  3142 	sub_d(fd, fs, tmp); 
  3145 // Fast_Lock and Fast_Unlock used by C2
  3147 // Because the transitions from emitted code to the runtime
  3148 // monitorenter/exit helper stubs are so slow it's critical that
  3149 // we inline both the stack-locking fast-path and the inflated fast path.
  3150 //
  3151 // See also: cmpFastLock and cmpFastUnlock.
  3152 //
  3153 // What follows is a specialized inline transliteration of the code
  3154 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  3155 // another option would be to emit TrySlowEnter and TrySlowExit methods
  3156 // at startup-time.  These methods would accept arguments as
  3157 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  3158 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  3159 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  3160 // In practice, however, the # of lock sites is bounded and is usually small.
  3161 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  3162 // if the processor uses simple bimodal branch predictors keyed by EIP
  3163 // Since the helper routines would be called from multiple synchronization
  3164 // sites.
  3165 //
  3166 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  3167 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  3168 // to those specialized methods.  That'd give us a mostly platform-independent
  3169 // implementation that the JITs could optimize and inline at their pleasure.
  3170 // Done correctly, the only time we'd need to cross to native could would be
  3171 // to park() or unpark() threads.  We'd also need a few more unsafe operators
  3172 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  3173 // (b) explicit barriers or fence operations.
  3174 //
  3175 // TODO:
  3176 //
  3177 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  3178 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  3179 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  3180 //    the lock operators would typically be faster than reifying Self.
  3181 //
  3182 // *  Ideally I'd define the primitives as:
  3183 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  3184 //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
  3185 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  3186 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
  3187 //    Furthermore the register assignments are overconstrained, possibly resulting in
  3188 //    sub-optimal code near the synchronization site.
  3189 //
  3190 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  3191 //    Alternately, use a better sp-proximity test.
  3192 //
  3193 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  3194 //    Either one is sufficient to uniquely identify a thread.
  3195 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  3196 //
  3197 // *  Intrinsify notify() and notifyAll() for the common cases where the
  3198 //    object is locked by the calling thread but the waitlist is empty.
  3199 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  3200 //
  3201 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  3202 //    But beware of excessive branch density on AMD Opterons.
  3203 //
  3204 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  3205 //    or failure of the fast-path.  If the fast-path fails then we pass
  3206 //    control to the slow-path, typically in C.  In Fast_Lock and
  3207 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  3208 //    will emit a conditional branch immediately after the node.
  3209 //    So we have branches to branches and lots of ICC.ZF games.
  3210 //    Instead, it might be better to have C2 pass a "FailureLabel"
  3211 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  3212 //    will drop through the node.  ICC.ZF is undefined at exit.
  3213 //    In the case of failure, the node will branch directly to the
  3214 //    FailureLabel
  3217 // obj: object to lock
  3218 // box: on-stack box address (displaced header location) - KILLED
  3219 // rax,: tmp -- KILLED
  3220 // scr: tmp -- KILLED
  3221 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  3223   // Ensure the register assignents are disjoint
  3224   guarantee (objReg != boxReg, "") ;
  3225   guarantee (objReg != tmpReg, "") ;
  3226   guarantee (objReg != scrReg, "") ;
  3227   guarantee (boxReg != tmpReg, "") ;
  3228   guarantee (boxReg != scrReg, "") ;
  3231   block_comment("FastLock");
  3232   /*
  3233      move(AT, 0x0);
  3234      return;
  3235      */
  3236   if (PrintBiasedLockingStatistics) {
  3237     push(tmpReg);
  3238     atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  3239     pop(tmpReg);
  3242   if (EmitSync & 1) {
  3243     // set box->dhw = unused_mark (3)
  3244     // Force all sync thru slow-path: slow_enter() and slow_exit()
  3245     move (AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  3246     sd(AT, Address(boxReg, 0));
  3247     move (AT, (int32_t)0) ;	// Eflags.ZF = 0
  3248   } else
  3249     if (EmitSync & 2) {
  3250       Label DONE_LABEL ;
  3251       if (UseBiasedLocking) {
  3252         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  3253         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  3256       ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  3257       ori(tmpReg, tmpReg, 0x1);
  3258       sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  3260       cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  3261       bne(AT, R0, DONE_LABEL);
  3262       delayed()->nop();
  3264       // Recursive locking
  3265       dsubu(tmpReg, tmpReg, SP);
  3266       li(AT, (7 - os::vm_page_size() ));
  3267       andr(tmpReg, tmpReg, AT);
  3268       sd(tmpReg, Address(boxReg, 0));
  3269       bind(DONE_LABEL) ;
  3270     } else {
  3271       // Possible cases that we'll encounter in fast_lock
  3272       // ------------------------------------------------
  3273       // * Inflated
  3274       //    -- unlocked
  3275       //    -- Locked
  3276       //       = by self
  3277       //       = by other
  3278       // * biased
  3279       //    -- by Self
  3280       //    -- by other
  3281       // * neutral
  3282       // * stack-locked
  3283       //    -- by self
  3284       //       = sp-proximity test hits
  3285       //       = sp-proximity test generates false-negative
  3286       //    -- by other
  3287       //
  3289       Label IsInflated, DONE_LABEL, PopDone ;
  3291       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  3292       // order to reduce the number of conditional branches in the most common cases.
  3293       // Beware -- there's a subtle invariant that fetch of the markword
  3294       // at [FETCH], below, will never observe a biased encoding (*101b).
  3295       // If this invariant is not held we risk exclusion (safety) failure.
  3296       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3297         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  3300       ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  3301       andi(AT, tmpReg, markOopDesc::monitor_value);
  3302       bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  3303       delayed()->nop();
  3305       // Attempt stack-locking ...
  3306       ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  3307       sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  3309       cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  3311       if (PrintBiasedLockingStatistics) {
  3312         Label L;
  3313         beq(AT, R0, L);
  3314         delayed()->nop();
  3315         push(T0);
  3316         push(T1);
  3317         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  3318         pop(T1);
  3319         pop(T0);
  3320         bind(L);
  3322       bne(AT, R0, DONE_LABEL);
  3323       delayed()->nop();
  3325       // Recursive locking
  3326       // The object is stack-locked: markword contains stack pointer to BasicLock.
  3327       // Locked by current thread if difference with current SP is less than one page.
  3328       dsubu(tmpReg, tmpReg, SP);
  3329       li(AT, 7 - os::vm_page_size() );
  3330       andr(tmpReg, tmpReg, AT);
  3331       sd(tmpReg, Address(boxReg, 0));
  3332       if (PrintBiasedLockingStatistics) {
  3333         Label L;
  3334         // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  3335         bne(tmpReg, R0, L);
  3336         delayed()->nop();
  3337         push(T0);
  3338         push(T1);
  3339         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  3340         pop(T1);
  3341         pop(T0);
  3342         bind(L);
  3344       sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
  3346       b(DONE_LABEL) ;
  3347       delayed()->nop();
  3349       bind(IsInflated) ;
  3351       // TODO: someday avoid the ST-before-CAS penalty by
  3352       // relocating (deferring) the following ST.
  3353       // We should also think about trying a CAS without having
  3354       // fetched _owner.  If the CAS is successful we may
  3355       // avoid an RTO->RTS upgrade on the $line.
  3356       // Without cast to int32_t a movptr will destroy r10 which is typically obj
  3357       li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  3358       sd(AT, Address(boxReg, 0));
  3360       move(boxReg, tmpReg) ;
  3361       ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3362       // AT = (tmpReg == 0) ? 1:0
  3363       sltiu(AT, tmpReg, 1);  /* Jin: AT = !tmpReg; */
  3364       bne(tmpReg, R0, DONE_LABEL);
  3365       delayed()->nop();
  3367       // It's inflated and appears unlocke
  3368       if (os::is_MP()) {
  3369         //lock();
  3371       cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  3372       // Intentional fall-through into DONE_LABEL ...
  3375       // DONE_LABEL is a hot target - we'd really like to place it at the
  3376       // start of cache line by padding with NOPs.
  3377       // See the AMD and Intel software optimization manuals for the
  3378       // most efficient "long" NOP encodings.
  3379       // Unfortunately none of our alignment mechanisms suffice.
  3380       bind(DONE_LABEL);
  3382       // Avoid branch-to-branch on AMD processors
  3383       // This appears to be superstition.
  3384       if (EmitSync & 32) nop() ;
  3387       // At DONE_LABEL the icc ZFlag is set as follows ...
  3388       // Fast_Unlock uses the same protocol.
  3389       // ZFlag == 1 -> Success
  3390       // ZFlag == 0 -> Failure - force control through the slow-path
  3394 // obj: object to unlock
  3395 // box: box address (displaced header location), killed.  Must be EAX.
  3396 // rbx,: killed tmp; cannot be obj nor box.
  3397 //
  3398 // Some commentary on balanced locking:
  3399 //
  3400 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  3401 // Methods that don't have provably balanced locking are forced to run in the
  3402 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  3403 // The interpreter provides two properties:
  3404 // I1:  At return-time the interpreter automatically and quietly unlocks any
  3405 //      objects acquired the current activation (frame).  Recall that the
  3406 //      interpreter maintains an on-stack list of locks currently held by
  3407 //      a frame.
  3408 // I2:  If a method attempts to unlock an object that is not held by the
  3409 //      the frame the interpreter throws IMSX.
  3410 //
  3411 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
  3412 // B() doesn't have provably balanced locking so it runs in the interpreter.
  3413 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  3414 // is still locked by A().
  3415 //
  3416 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  3417 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  3418 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  3419 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  3421 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  3423   guarantee (objReg != boxReg, "") ;
  3424   guarantee (objReg != tmpReg, "") ;
  3425   guarantee (boxReg != tmpReg, "") ;
  3429   block_comment("FastUnlock");
  3431   /*
  3432      move(AT, 0x0);
  3433      return;
  3434      */
  3436   if (EmitSync & 4) {
  3437     // Disable - inhibit all inlining.  Force control through the slow-path
  3438     move(AT, R0);
  3439   } else
  3440     if (EmitSync & 8) {
  3441       Label DONE_LABEL ;
  3442       if (UseBiasedLocking) {
  3443         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3445       // classic stack-locking code ...
  3446       ld(tmpReg, Address(boxReg, 0)) ;
  3447       beq(tmpReg, R0, DONE_LABEL) ;
  3448       move(AT, 0x1);  // delay slot
  3450       cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
  3451       bind(DONE_LABEL);
  3452     } else {
  3453       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  3455       // Critically, the biased locking test must have precedence over
  3456       // and appear before the (box->dhw == 0) recursive stack-lock test.
  3457       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3458         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3461       ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  3462       beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  3463       delayed()->daddiu(AT, R0, 0x1);
  3465       ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  3466       andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  3467       beq(AT, R0, Stacked) ;                     // Inflated?
  3468       delayed()->nop();
  3470       bind(Inflated) ;
  3471       // It's inflated.
  3472       // Despite our balanced locking property we still check that m->_owner == Self
  3473       // as java routines or native JNI code called by this thread might
  3474       // have released the lock.
  3475       // Refer to the comments in synchronizer.cpp for how we might encode extra
  3476       // state in _succ so we can avoid fetching EntryList|cxq.
  3477       //
  3478       // I'd like to add more cases in fast_lock() and fast_unlock() --
  3479       // such as recursive enter and exit -- but we have to be wary of
  3480       // I$ bloat, T$ effects and BP$ effects.
  3481       //
  3482       // If there's no contention try a 1-0 exit.  That is, exit without
  3483       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  3484       // we detect and recover from the race that the 1-0 exit admits.
  3485       //
  3486       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  3487       // before it STs null into _owner, releasing the lock.  Updates
  3488       // to data protected by the critical section must be visible before
  3489       // we drop the lock (and thus before any other thread could acquire
  3490       // the lock and observe the fields protected by the lock).
  3491       // IA32's memory-model is SPO, so STs are ordered with respect to
  3492       // each other and there's no need for an explicit barrier (fence).
  3493       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  3494 #ifndef OPT_THREAD
  3495       get_thread (TREG) ;
  3496 #endif
  3498       // It's inflated
  3499       ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3500       xorr(boxReg, boxReg, TREG);
  3502       ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  3503       orr(boxReg, boxReg, AT);
  3505       move(AT, R0);
  3506       bne(boxReg, R0, DONE_LABEL);
  3507       delayed()->nop();
  3509       ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  3510       ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  3511       orr(boxReg, boxReg, AT);
  3513       move(AT, R0);
  3514       bne(boxReg, R0, CheckSucc);
  3515       delayed()->nop();
  3517       sync();
  3518       sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3519       move(AT, 0x1);
  3520       b(DONE_LABEL);
  3521       delayed()->nop();
  3524       if ((EmitSync & 65536) == 0) {
  3525         Label LSuccess, LGoSlowPath ;
  3526         bind (CheckSucc);
  3527         ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
  3528         beq(AT, R0, LGoSlowPath);
  3529         delayed()->nop();
  3531         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
  3532         // the explicit ST;MEMBAR combination, but masm doesn't currently support
  3533         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
  3534         // are all faster when the write buffer is populated.
  3535         xorr(boxReg, boxReg, boxReg);
  3536         sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3537         if (os::is_MP()) {
  3538           // lock ();
  3540         ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
  3541         bne(AT, R0, LSuccess);
  3542         delayed()->nop();
  3544 #ifndef OPT_THREAD
  3545         get_thread (TREG) ;
  3546 #endif
  3547         move(boxReg, R0) ;                  // box is really EAX
  3548         //if (os::is_MP()) { lock(); }
  3549         cmpxchg(TREG, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
  3550         beq(AT, R0, LSuccess);
  3551         delayed()->nop();
  3552         // Intentional fall-through into slow-path
  3554         bind  (LGoSlowPath);
  3555         move(AT, R0);
  3556         b(DONE_LABEL) ;
  3557         delayed()->nop();
  3560         bind  (LSuccess);
  3561         move(AT, 0);
  3562         sltiu(AT, boxReg, 1) ;                 // set ICC.ZF=1 to indicate success
  3563         b(DONE_LABEL) ;
  3564         delayed()->nop();
  3567       bind  (Stacked);
  3568       ld(tmpReg, Address(boxReg, 0)) ;
  3569       //if (os::is_MP()) { lock(); }
  3570       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  3572       if (EmitSync & 65536) {
  3573         bind (CheckSucc);
  3576       bind(DONE_LABEL);
  3578       // Avoid branch to branch on AMD processors
  3579       if (EmitSync & 32768) { nop() ; }
  3583 class ControlWord {
  3584 				public:
  3585 								int32_t _value;
  3587   int  rounding_control() const        { return  (_value >> 10) & 3      ; }
  3588   int  precision_control() const       { return  (_value >>  8) & 3      ; }
  3589   bool precision() const               { return ((_value >>  5) & 1) != 0; }
  3590   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  3591   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  3592   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  3593   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  3594   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  3596   void print() const {
  3597     // rounding control
  3598     const char* rc;
  3599     switch (rounding_control()) {
  3600       case 0: rc = "round near"; break;
  3601       case 1: rc = "round down"; break;
  3602       case 2: rc = "round up  "; break;
  3603       case 3: rc = "chop      "; break;
  3604     };
  3605     // precision control
  3606     const char* pc;
  3607     switch (precision_control()) {
  3608       case 0: pc = "24 bits "; break;
  3609       case 1: pc = "reserved"; break;
  3610       case 2: pc = "53 bits "; break;
  3611       case 3: pc = "64 bits "; break;
  3612     };
  3613     // flags
  3614     char f[9];
  3615     f[0] = ' ';
  3616     f[1] = ' ';
  3617     f[2] = (precision   ()) ? 'P' : 'p';
  3618     f[3] = (underflow   ()) ? 'U' : 'u';
  3619     f[4] = (overflow    ()) ? 'O' : 'o';
  3620     f[5] = (zero_divide ()) ? 'Z' : 'z';
  3621     f[6] = (denormalized()) ? 'D' : 'd';
  3622     f[7] = (invalid     ()) ? 'I' : 'i';
  3623     f[8] = '\x0';
  3624     // output
  3625     printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
  3628 };
  3630 class StatusWord {
  3631  public:
  3632   int32_t _value;
  3634   bool busy() const                    { return ((_value >> 15) & 1) != 0; }
  3635   bool C3() const                      { return ((_value >> 14) & 1) != 0; }
  3636   bool C2() const                      { return ((_value >> 10) & 1) != 0; }
  3637   bool C1() const                      { return ((_value >>  9) & 1) != 0; }
  3638   bool C0() const                      { return ((_value >>  8) & 1) != 0; }
  3639   int  top() const                     { return  (_value >> 11) & 7      ; }
  3640   bool error_status() const            { return ((_value >>  7) & 1) != 0; }
  3641   bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
  3642   bool precision() const               { return ((_value >>  5) & 1) != 0; }
  3643   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  3644   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  3645   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  3646   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  3647   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  3649   void print() const {
  3650     // condition codes
  3651     char c[5];
  3652     c[0] = (C3()) ? '3' : '-';
  3653     c[1] = (C2()) ? '2' : '-';
  3654     c[2] = (C1()) ? '1' : '-';
  3655     c[3] = (C0()) ? '0' : '-';
  3656     c[4] = '\x0';
  3657     // flags
  3658     char f[9];
  3659     f[0] = (error_status()) ? 'E' : '-';
  3660     f[1] = (stack_fault ()) ? 'S' : '-';
  3661     f[2] = (precision   ()) ? 'P' : '-';
  3662     f[3] = (underflow   ()) ? 'U' : '-';
  3663     f[4] = (overflow    ()) ? 'O' : '-';
  3664     f[5] = (zero_divide ()) ? 'Z' : '-';
  3665     f[6] = (denormalized()) ? 'D' : '-';
  3666     f[7] = (invalid     ()) ? 'I' : '-';
  3667     f[8] = '\x0';
  3668     // output
  3669     printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
  3672 };
  3674 class TagWord {
  3675  public:
  3676   int32_t _value;
  3678   int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
  3680   void print() const {
  3681     printf("%04x", _value & 0xFFFF);
  3684 };
  3686 class FPU_Register {
  3687  public:
  3688   int32_t _m0;
  3689   int32_t _m1;
  3690   int16_t _ex;
  3692   bool is_indefinite() const           {
  3693     return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
  3696   void print() const {
  3697     char  sign = (_ex < 0) ? '-' : '+';
  3698     const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
  3699     printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
  3700   };
  3702 };
  3704 class FPU_State {
  3705  public:
  3706   enum {
  3707     register_size       = 10,
  3708     number_of_registers =  8,
  3709     register_mask       =  7
  3710   };
  3712   ControlWord  _control_word;
  3713   StatusWord   _status_word;
  3714   TagWord      _tag_word;
  3715   int32_t      _error_offset;
  3716   int32_t      _error_selector;
  3717   int32_t      _data_offset;
  3718   int32_t      _data_selector;
  3719   int8_t       _register[register_size * number_of_registers];
  3721   int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
  3722   FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
  3724   const char* tag_as_string(int tag) const {
  3725     switch (tag) {
  3726       case 0: return "valid";
  3727       case 1: return "zero";
  3728       case 2: return "special";
  3729       case 3: return "empty";
  3731     ShouldNotReachHere();
  3732     return NULL;
  3735   void print() const {
  3736     // print computation registers
  3737     { int t = _status_word.top();
  3738       for (int i = 0; i < number_of_registers; i++) {
  3739         int j = (i - t) & register_mask;
  3740         printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
  3741         st(j)->print();
  3742         printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
  3745     printf("\n");
  3746     // print control registers
  3747     printf("ctrl = "); _control_word.print(); printf("\n");
  3748     printf("stat = "); _status_word .print(); printf("\n");
  3749     printf("tags = "); _tag_word    .print(); printf("\n");
  3752 };
  3754 class Flag_Register {
  3755  public:
  3756   int32_t _value;
  3758   bool overflow() const                { return ((_value >> 11) & 1) != 0; }
  3759   bool direction() const               { return ((_value >> 10) & 1) != 0; }
  3760   bool sign() const                    { return ((_value >>  7) & 1) != 0; }
  3761   bool zero() const                    { return ((_value >>  6) & 1) != 0; }
  3762   bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
  3763   bool parity() const                  { return ((_value >>  2) & 1) != 0; }
  3764   bool carry() const                   { return ((_value >>  0) & 1) != 0; }
  3766   void print() const {
  3767     // flags
  3768     char f[8];
  3769     f[0] = (overflow       ()) ? 'O' : '-';
  3770     f[1] = (direction      ()) ? 'D' : '-';
  3771     f[2] = (sign           ()) ? 'S' : '-';
  3772     f[3] = (zero           ()) ? 'Z' : '-';
  3773     f[4] = (auxiliary_carry()) ? 'A' : '-';
  3774     f[5] = (parity         ()) ? 'P' : '-';
  3775     f[6] = (carry          ()) ? 'C' : '-';
  3776     f[7] = '\x0';
  3777     // output
  3778     printf("%08x  flags = %s", _value, f);
  3781 };
  3783 class IU_Register {
  3784  public:
  3785   int32_t _value;
  3787   void print() const {
  3788     printf("%08x  %11d", _value, _value);
  3791 };
  3793 class IU_State {
  3794  public:
  3795   Flag_Register _eflags;
  3796   IU_Register   _rdi;
  3797   IU_Register   _rsi;
  3798   IU_Register   _rbp;
  3799   IU_Register   _rsp;
  3800   IU_Register   _rbx;
  3801   IU_Register   _rdx;
  3802   IU_Register   _rcx;
  3803   IU_Register   _rax;
  3805   void print() const {
  3806     // computation registers
  3807     printf("rax,  = "); _rax.print(); printf("\n");
  3808     printf("rbx,  = "); _rbx.print(); printf("\n");
  3809     printf("rcx  = "); _rcx.print(); printf("\n");
  3810     printf("rdx  = "); _rdx.print(); printf("\n");
  3811     printf("rdi  = "); _rdi.print(); printf("\n");
  3812     printf("rsi  = "); _rsi.print(); printf("\n");
  3813     printf("rbp,  = "); _rbp.print(); printf("\n");
  3814     printf("rsp  = "); _rsp.print(); printf("\n");
  3815     printf("\n");
  3816     // control registers
  3817     printf("flgs = "); _eflags.print(); printf("\n");
  3819 };
  3822 class CPU_State {
  3823  public:
  3824   FPU_State _fpu_state;
  3825   IU_State  _iu_state;
  3827   void print() const {
  3828     printf("--------------------------------------------------\n");
  3829     _iu_state .print();
  3830     printf("\n");
  3831     _fpu_state.print();
  3832     printf("--------------------------------------------------\n");
  3835 };
  3838 /*
  3839 static void _print_CPU_state(CPU_State* state) {
  3840   state->print();
  3841 };
  3843 void MacroAssembler::print_CPU_state() {
  3844   push_CPU_state();
  3845   push(rsp);                // pass CPU state
  3846   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
  3847   addptr(rsp, wordSize);       // discard argument
  3848   pop_CPU_state();
  3850 */
  3852 void MacroAssembler::align(int modulus) {
  3853 	while (offset() % modulus != 0) nop();
  3857 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  3858 	//FIXME aoqi
  3859 	// %%%%% need to implement this
  3860 	//Unimplemented();
  3861 	/*
  3862 	if (!VerifyFPU) return;
  3863   push_CPU_state();
  3864   push(rsp);                // pass CPU state
  3865   ExternalAddress msg((address) s);
  3866   // pass message string s
  3867   pushptr(msg.addr());
  3868   push(stack_depth);        // pass stack depth
  3869   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
  3870   addptr(rsp, 3 * wordSize);   // discard arguments
  3871   // check for error
  3872   { Label L;
  3873     testl(rax, rax);
  3874     jcc(Assembler::notZero, L);
  3875     int3();                  // break if error condition
  3876     bind(L);
  3878   pop_CPU_state();
  3879 	*/
  3882 #ifdef _LP64
  3883 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3885 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
  3886 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  3887 #else
  3888 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3890 Register caller_saved_fpu_registers[] = {};
  3891 #endif
  3893 //We preserve all caller-saved register
  3894 void  MacroAssembler::pushad(){
  3895   int i;
  3897   /* Fixed-point registers */
  3898   int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3899   daddi(SP, SP, -1 * len * wordSize);
  3900   for (i = 0; i < len; i++)
  3902 #ifdef _LP64
  3903     sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3904 #else
  3905     sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3906 #endif
  3909   /* Floating-point registers */
  3910   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3911   daddi(SP, SP, -1 * len * wordSize);
  3912   for (i = 0; i < len; i++)
  3914 #ifdef _LP64
  3915     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3916 #else
  3917     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3918 #endif
  3920 };
  3922 void  MacroAssembler::popad(){
  3923   int i;
  3925   /* Floating-point registers */
  3926   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3927   for (i = 0; i < len; i++)
  3929 #ifdef _LP64
  3930     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3931 #else
  3932     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3933 #endif
  3935   daddi(SP, SP, len * wordSize);
  3937   /* Fixed-point registers */
  3938   len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3939   for (i = 0; i < len; i++)
  3941 #ifdef _LP64
  3942     ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3943 #else
  3944     lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3945 #endif
  3947   daddi(SP, SP, len * wordSize);
  3948 };
  3950 void MacroAssembler::push2(Register reg1, Register reg2) {
  3951 #ifdef _LP64
  3952   daddi(SP, SP, -16);
  3953   sd(reg2, SP, 0);
  3954   sd(reg1, SP, 8);
  3955 #else
  3956   addi(SP, SP, -8);
  3957   sw(reg2, SP, 0);
  3958   sw(reg1, SP, 4);
  3959 #endif
  3962 void MacroAssembler::pop2(Register reg1, Register reg2) {
  3963 #ifdef _LP64
  3964   ld(reg1, SP, 0);
  3965   ld(reg2, SP, 8);
  3966   daddi(SP, SP, 16);
  3967 #else
  3968   lw(reg1, SP, 0);
  3969   lw(reg2, SP, 4);
  3970   addi(SP, SP, 8);
  3971 #endif
  3974 //for UseCompressedOops Option
  3975 void MacroAssembler::load_klass(Register dst, Register src) {
  3976 #ifdef _LP64
  3977     if(UseCompressedClassPointers){
  3978         lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  3979 	decode_klass_not_null(dst);
  3980     } else 
  3981 #endif
  3982         ld(dst, src, oopDesc::klass_offset_in_bytes());
  3985 void MacroAssembler::store_klass(Register dst, Register src) {
  3986 #ifdef _LP64
  3987     if(UseCompressedClassPointers){
  3988 		encode_klass_not_null(src);
  3989 		sw(src, dst, oopDesc::klass_offset_in_bytes());
  3990     } else {
  3991 #endif 
  3992 		sd(src, dst, oopDesc::klass_offset_in_bytes());
  3996 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  3997   load_klass(dst, src);
  3998   ld(dst, Address(dst, Klass::prototype_header_offset()));
  4001 #ifdef _LP64
  4002 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  4003   if (UseCompressedClassPointers) {
  4004     sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  4008 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  4009     if(UseCompressedOops){
  4010 	lwu(dst, src); 
  4011 	decode_heap_oop(dst);
  4012     } else{
  4013 	ld(dst, src); 
  4017 void MacroAssembler::store_heap_oop(Address dst, Register src){
  4018     if(UseCompressedOops){
  4019        assert(!dst.uses(src), "not enough registers");
  4020        encode_heap_oop(src); 
  4021        sw(src, dst);
  4022     } else{
  4023        sd(src, dst);
  4027 #ifdef ASSERT
  4028 void MacroAssembler::verify_heapbase(const char* msg) {
  4029   assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  4030   assert (Universe::heap() != NULL, "java heap should be initialized");
  4031 /*  if (CheckCompressedOops) {
  4032     Label ok;
  4033     push(rscratch1); // cmpptr trashes rscratch1
  4034     cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
  4035     jcc(Assembler::equal, ok);
  4036     STOP(msg);
  4037     bind(ok);
  4038     pop(rscratch1);
  4039   }*/
  4041 #endif
  4044 // Algorithm must match oop.inline.hpp encode_heap_oop.
  4045 void MacroAssembler::encode_heap_oop(Register r) {
  4046 #ifdef ASSERT
  4047   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  4048 #endif
  4049   verify_oop(r, "broken oop in encode_heap_oop");
  4050   if (Universe::narrow_oop_base() == NULL) {
  4051     if (Universe::narrow_oop_shift() != 0) { 
  4052       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4053       shr(r, LogMinObjAlignmentInBytes);
  4055     return;
  4058     movz(r, S5_heapbase, r);
  4059     dsub(r, r, S5_heapbase);
  4060     if (Universe::narrow_oop_shift() != 0) {
  4061       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4062       shr(r, LogMinObjAlignmentInBytes);
  4066 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  4067 #ifdef ASSERT
  4068   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  4069 #endif
  4070   verify_oop(src, "broken oop in encode_heap_oop");
  4071   if (Universe::narrow_oop_base() == NULL) {
  4072     if (Universe::narrow_oop_shift() != 0) { 
  4073       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4074       dsrl(dst, src, LogMinObjAlignmentInBytes);
  4075     } else {
  4076       if (dst != src) move(dst, src);
  4078   } else {
  4079     if (dst == src) {
  4080       movz(dst, S5_heapbase, dst);
  4081       dsub(dst, dst, S5_heapbase);
  4082       if (Universe::narrow_oop_shift() != 0) {
  4083         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4084         shr(dst, LogMinObjAlignmentInBytes);
  4086     } else {
  4087       dsub(dst, src, S5_heapbase);
  4088       if (Universe::narrow_oop_shift() != 0) {
  4089         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4090         shr(dst, LogMinObjAlignmentInBytes);
  4092       movz(dst, R0, src);
  4097 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  4098     assert (UseCompressedOops, "should be compressed");
  4099 #ifdef ASSERT
  4100     if (CheckCompressedOops) {
  4101 	Label ok;
  4102 	bne(r, R0, ok);
  4103 	delayed()->nop();
  4104 	stop("null oop passed to encode_heap_oop_not_null");
  4105 	bind(ok);
  4107 #endif
  4108 	verify_oop(r, "broken oop in encode_heap_oop_not_null");
  4109 	if (Universe::narrow_oop_base() != NULL) {
  4110 		dsub(r, r, S5_heapbase);
  4112 	if (Universe::narrow_oop_shift() != 0) {
  4113 		assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4114 		shr(r, LogMinObjAlignmentInBytes);
  4119 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  4120     assert (UseCompressedOops, "should be compressed");
  4121 #ifdef ASSERT
  4122     if (CheckCompressedOops) {
  4123 	Label ok;
  4124 	bne(src, R0, ok);
  4125 	delayed()->nop();
  4126 	stop("null oop passed to encode_heap_oop_not_null2");
  4127 	bind(ok);
  4129 #endif
  4130     verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  4132     if (Universe::narrow_oop_base() != NULL) {
  4133     	dsub(dst, src, S5_heapbase);
  4134         if (Universe::narrow_oop_shift() != 0) {
  4135     	  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4136     	  shr(dst, LogMinObjAlignmentInBytes);
  4138     } else {
  4139         if (Universe::narrow_oop_shift() != 0) {
  4140     	  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4141           dsrl(dst, src, LogMinObjAlignmentInBytes);
  4142         } else {
  4143           if (dst != src) move(dst, src);
  4148 void  MacroAssembler::decode_heap_oop(Register r) {
  4149 #ifdef ASSERT
  4150   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  4151 #endif
  4152   if (Universe::narrow_oop_base() == NULL) {
  4153     if (Universe::narrow_oop_shift() != 0) {
  4154       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4155       shl(r, LogMinObjAlignmentInBytes);
  4157   } else {
  4158     move(AT, r);
  4159     if (Universe::narrow_oop_shift() != 0) {
  4160       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4161       shl(r, LogMinObjAlignmentInBytes);
  4163     dadd(r, r, S5_heapbase);
  4164     movz(r, R0, AT);
  4166   verify_oop(r, "broken oop in decode_heap_oop");
  4169 void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  4170 #ifdef ASSERT
  4171   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  4172 #endif
  4173   if (Universe::narrow_oop_base() == NULL) {
  4174     if (Universe::narrow_oop_shift() != 0) {
  4175       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4176       dsll(dst, src, LogMinObjAlignmentInBytes);
  4177     } else {
  4178       if (dst != src) move(dst, src);
  4180   } else {
  4181     if (dst == src) {
  4182       move(AT, dst);
  4183       if (Universe::narrow_oop_shift() != 0) {
  4184         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4185         shl(dst, LogMinObjAlignmentInBytes);
  4187       dadd(dst, dst, S5_heapbase);
  4188       movz(dst, R0, AT);
  4189     } else {
  4190       if (Universe::narrow_oop_shift() != 0) {
  4191         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4192         dsll(dst, src, LogMinObjAlignmentInBytes);
  4193         daddu(dst, dst, S5_heapbase);
  4194       } else {
  4195         daddu(dst, src, S5_heapbase);
  4197       movz(dst, R0, src);
  4200   verify_oop(dst, "broken oop in decode_heap_oop");
  4203 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  4204   // Note: it will change flags
  4205   assert (UseCompressedOops, "should only be used for compressed headers");
  4206   assert (Universe::heap() != NULL, "java heap should be initialized");
  4207   // Cannot assert, unverified entry point counts instructions (see .ad file)
  4208   // vtableStubs also counts instructions in pd_code_size_limit.
  4209   // Also do not verify_oop as this is called by verify_oop.
  4210   if (Universe::narrow_oop_shift() != 0) {
  4211     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4212     shl(r, LogMinObjAlignmentInBytes);
  4213     if (Universe::narrow_oop_base() != NULL) {
  4214       daddu(r, r, S5_heapbase);
  4216   } else {
  4217     assert (Universe::narrow_oop_base() == NULL, "sanity");
  4221 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  4222   assert (UseCompressedOops, "should only be used for compressed headers");
  4223   assert (Universe::heap() != NULL, "java heap should be initialized");
  4225   // Cannot assert, unverified entry point counts instructions (see .ad file)
  4226   // vtableStubs also counts instructions in pd_code_size_limit.
  4227   // Also do not verify_oop as this is called by verify_oop.
  4228   //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  4229   if (Universe::narrow_oop_shift() != 0) {
  4230     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4231     if (LogMinObjAlignmentInBytes == Address::times_8) {
  4232       dsll(dst, src, LogMinObjAlignmentInBytes);
  4233       daddu(dst, dst, S5_heapbase);
  4234     } else {
  4235       dsll(dst, src, LogMinObjAlignmentInBytes);
  4236       if (Universe::narrow_oop_base() != NULL) {
  4237         daddu(dst, dst, S5_heapbase);
  4240   } else {
  4241     assert (Universe::narrow_oop_base() == NULL, "sanity");
  4242     if (dst != src) {
  4243       move(dst, src);
  4248 void MacroAssembler::encode_klass_not_null(Register r) {
  4249   if (Universe::narrow_klass_base() != NULL) {
  4250     assert(r != AT, "Encoding a klass in AT");
  4251     set64(AT, (int64_t)Universe::narrow_klass_base());
  4252     dsub(r, r, AT);
  4254   if (Universe::narrow_klass_shift() != 0) {
  4255     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4256     shr(r, LogKlassAlignmentInBytes);
  4258   // Not neccessary for MIPS at all.
  4259   //if (Universe::narrow_klass_base() != NULL) {
  4260   //  reinit_heapbase();
  4261   //}
  4264 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  4265   if (dst == src) {
  4266     encode_klass_not_null(src);
  4267   } else {
  4268     if (Universe::narrow_klass_base() != NULL) {
  4269       set64(dst, (int64_t)Universe::narrow_klass_base());
  4270       dsub(dst, src, dst);
  4271       if (Universe::narrow_klass_shift() != 0) {
  4272         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4273         shr(dst, LogKlassAlignmentInBytes);
  4275     } else {
  4276       if (Universe::narrow_klass_shift() != 0) {
  4277         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4278         dsrl(dst, src, LogKlassAlignmentInBytes);
  4279       } else {
  4280         move(dst, src);
  4286 // Function instr_size_for_decode_klass_not_null() counts the instructions
  4287 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
  4288 // when (Universe::heap() != NULL).  Hence, if the instructions they
  4289 // generate change, then this method needs to be updated.
  4290 int MacroAssembler::instr_size_for_decode_klass_not_null() {
  4291   assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  4292   if (Universe::narrow_klass_base() != NULL) {
  4293     // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  4294     return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  4295   } else {
  4296     // longest load decode klass function, mov64, leaq
  4297     return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  4301 void  MacroAssembler::decode_klass_not_null(Register r) { 
  4302   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  4303   assert(r != AT, "Decoding a klass in AT");
  4304   // Cannot assert, unverified entry point counts instructions (see .ad file)
  4305   // vtableStubs also counts instructions in pd_code_size_limit.
  4306   // Also do not verify_oop as this is called by verify_oop.
  4307   if (Universe::narrow_klass_shift() != 0) { 
  4308     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4309     shl(r, LogKlassAlignmentInBytes);
  4311   if (Universe::narrow_klass_base() != NULL) {
  4312     set64(AT, (int64_t)Universe::narrow_klass_base());
  4313     daddu(r, r, AT);
  4314     //Not neccessary for MIPS at all.
  4315     //reinit_heapbase();
  4319 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  4320   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  4322   if (dst == src) {
  4323     decode_klass_not_null(dst);
  4324   } else {
  4325     // Cannot assert, unverified entry point counts instructions (see .ad file)
  4326     // vtableStubs also counts instructions in pd_code_size_limit.
  4327     // Also do not verify_oop as this is called by verify_oop.
  4328     set64(dst, (int64_t)Universe::narrow_klass_base());
  4329     if (Universe::narrow_klass_shift() != 0) {
  4330       assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4331       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  4332       dsll(AT, src, Address::times_8);
  4333       daddu(dst, dst, AT);
  4334     } else {
  4335       daddu(dst, src, dst);
  4340 void MacroAssembler::incrementl(Register reg, int value) {
  4341   if (value == min_jint) {
  4342      move(AT, value);
  4343      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  4344      return; 
  4346   if (value <  0) { decrementl(reg, -value); return; }
  4347   if (value == 0) {                        ; return; }
  4349   if(Assembler::is_simm16(value)) {
  4350      NOT_LP64(addiu(reg, reg, value));
  4351      LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  4352   } else {
  4353      move(AT, value);
  4354      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  4358 void MacroAssembler::decrementl(Register reg, int value) {
  4359   if (value == min_jint) {
  4360      move(AT, value);
  4361      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  4362      return;
  4364   if (value <  0) { incrementl(reg, -value); return; }
  4365   if (value == 0) {                        ; return; }
  4367   if(Assembler::is_simm16(value)) {
  4368      NOT_LP64(addiu(reg, reg, -value));
  4369      LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  4370   } else {
  4371      move(AT, value);
  4372      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  4376 void MacroAssembler::reinit_heapbase() {
  4377   if (UseCompressedOops || UseCompressedClassPointers) {
  4378     if (Universe::heap() != NULL) {
  4379       if (Universe::narrow_oop_base() == NULL) {
  4380         move(S5_heapbase, R0);
  4381       } else {
  4382         set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  4384     } else {
  4385       set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  4386       ld(S5_heapbase, S5_heapbase, 0);
  4390 #endif // _LP64
  4392 void MacroAssembler::check_klass_subtype(Register sub_klass,
  4393                            Register super_klass,
  4394                            Register temp_reg,
  4395                            Label& L_success) {
  4396 //implement ind   gen_subtype_check
  4397   Label L_failure;
  4398   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  4399   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  4400   bind(L_failure);
  4403 SkipIfEqual::SkipIfEqual(
  4404     MacroAssembler* masm, const bool* flag_addr, bool value) {
  4405   _masm = masm;
  4406   _masm->li(AT, (address)flag_addr);
  4407   _masm->lb(AT,AT,0);
  4408   _masm->addi(AT,AT,-value);
  4409   _masm->beq(AT,R0,_label);
  4410   _masm->delayed()->nop();
  4412 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  4413                                                    Register super_klass,
  4414                                                    Register temp_reg,
  4415                                                    Label* L_success,
  4416                                                    Label* L_failure,
  4417                                                    Label* L_slow_path,
  4418                                         RegisterOrConstant super_check_offset) {
  4419   assert_different_registers(sub_klass, super_klass, temp_reg);
  4420   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  4421   if (super_check_offset.is_register()) {
  4422     assert_different_registers(sub_klass, super_klass,
  4423                                super_check_offset.as_register());
  4424   } else if (must_load_sco) {
  4425     assert(temp_reg != noreg, "supply either a temp or a register offset");
  4428   Label L_fallthrough;
  4429   int label_nulls = 0;
  4430   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  4431   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  4432   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  4433   assert(label_nulls <= 1, "at most one NULL in the batch");
  4435   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  4436   int sco_offset = in_bytes(Klass::super_check_offset_offset());
  4437   // If the pointers are equal, we are done (e.g., String[] elements).
  4438   // This self-check enables sharing of secondary supertype arrays among
  4439   // non-primary types such as array-of-interface.  Otherwise, each such
  4440   // type would need its own customized SSA.
  4441   // We move this check to the front of the fast path because many
  4442   // type checks are in fact trivially successful in this manner,
  4443   // so we get a nicely predicted branch right at the start of the check.
  4444   //cmpptr(sub_klass, super_klass);
  4445   //local_jcc(Assembler::equal, *L_success);
  4446   beq(sub_klass, super_klass, *L_success);
  4447   delayed()->nop();
  4448   // Check the supertype display:
  4449   if (must_load_sco) {
  4450     // Positive movl does right thing on LP64.
  4451 	lwu(temp_reg, super_klass, sco_offset);
  4452     super_check_offset = RegisterOrConstant(temp_reg);
  4454   dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  4455   daddu(AT, sub_klass, AT);
  4456   ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  4458   // This check has worked decisively for primary supers.
  4459   // Secondary supers are sought in the super_cache ('super_cache_addr').
  4460   // (Secondary supers are interfaces and very deeply nested subtypes.)
  4461   // This works in the same check above because of a tricky aliasing
  4462   // between the super_cache and the primary super display elements.
  4463   // (The 'super_check_addr' can address either, as the case requires.)
  4464   // Note that the cache is updated below if it does not help us find
  4465   // what we need immediately.
  4466   // So if it was a primary super, we can just fail immediately.
  4467   // Otherwise, it's the slow path for us (no success at this point).
  4469   if (super_check_offset.is_register()) {
  4470 	beq(super_klass, AT, *L_success);
  4471 	delayed()->nop();
  4472 	addi(AT, super_check_offset.as_register(), -sc_offset);
  4473     if (L_failure == &L_fallthrough) {
  4474 	  beq(AT, R0, *L_slow_path);
  4475 	  delayed()->nop();
  4476     } else {
  4477 	  bne(AT, R0, *L_failure);
  4478 	  delayed()->nop();
  4479 	  b(*L_slow_path);
  4480 	  delayed()->nop();
  4482   } else if (super_check_offset.as_constant() == sc_offset) {
  4483     // Need a slow path; fast failure is impossible.
  4484     if (L_slow_path == &L_fallthrough) {
  4485 		beq(super_klass, AT, *L_success);
  4486 		delayed()->nop();
  4487     } else {
  4488 		bne(super_klass, AT, *L_slow_path);
  4489 		delayed()->nop();
  4490 		b(*L_success);
  4491 		delayed()->nop();
  4493   } else {
  4494     // No slow path; it's a fast decision.
  4495     if (L_failure == &L_fallthrough) {
  4496 		beq(super_klass, AT, *L_success);
  4497 		delayed()->nop();
  4498     } else {
  4499 		bne(super_klass, AT, *L_failure);
  4500 		delayed()->nop();
  4501 		b(*L_success);
  4502 		delayed()->nop();
  4506   bind(L_fallthrough);
  4511 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  4512                                                    Register super_klass,
  4513                                                    Register temp_reg,
  4514                                                    Register temp2_reg,
  4515                                                    Label* L_success,
  4516                                                    Label* L_failure,
  4517                                                    bool set_cond_codes) {
  4518   assert_different_registers(sub_klass, super_klass, temp_reg);
  4519   if (temp2_reg != noreg)
  4520     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  4521   else
  4522     temp2_reg = T9;
  4523 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  4525   Label L_fallthrough;
  4526   int label_nulls = 0;
  4527   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  4528   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  4529   assert(label_nulls <= 1, "at most one NULL in the batch");
  4531   // a couple of useful fields in sub_klass:
  4532   int ss_offset = in_bytes(Klass::secondary_supers_offset());
  4533   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  4534   Address secondary_supers_addr(sub_klass, ss_offset);
  4535   Address super_cache_addr(     sub_klass, sc_offset);
  4537   // Do a linear scan of the secondary super-klass chain.
  4538   // This code is rarely used, so simplicity is a virtue here.
  4539   // The repne_scan instruction uses fixed registers, which we must spill.
  4540   // Don't worry too much about pre-existing connections with the input regs.
  4542 #if 0
  4543   assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super)
  4544   assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter)
  4545 #endif
  4547   // Get super_klass value into rax (even if it was in rdi or rcx).
  4548 /*
  4549   bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
  4550   if (super_klass != rax || UseCompressedOops) {
  4551     if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
  4552     mov(rax, super_klass);
  4554   if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
  4555   if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
  4556 */
  4557 #ifndef PRODUCT
  4558   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  4559   ExternalAddress pst_counter_addr((address) pst_counter);
  4560   NOT_LP64(  incrementl(pst_counter_addr) );
  4561   //LP64_ONLY( lea(rcx, pst_counter_addr) );
  4562   //LP64_ONLY( incrementl(Address(rcx, 0)) );
  4563 #endif //PRODUCT
  4565   // We will consult the secondary-super array.
  4566   ld(temp_reg, secondary_supers_addr);
  4567   // Load the array length.  (Positive movl does right thing on LP64.)
  4568   lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  4569   // Skip to start of data.
  4570   daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  4572   // Scan RCX words at [RDI] for an occurrence of RAX.
  4573   // Set NZ/Z based on last compare.
  4574   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  4575   // not change flags (only scas instruction which is repeated sets flags).
  4576   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  4578   /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
  4579   Label Loop, subtype;
  4580   bind(Loop);
  4581   beq(temp2_reg, R0, *L_failure);
  4582   delayed()->nop();
  4583   ld(AT, temp_reg, 0);
  4584   beq(AT, super_klass, subtype);
  4585   delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  4586   b(Loop);
  4587   delayed()->daddi(temp2_reg, temp2_reg, -1); 
  4589   bind(subtype);
  4590   sd(super_klass, super_cache_addr);
  4591   if (L_success != &L_fallthrough) {
  4592 	  b(*L_success);
  4593 	  delayed()->nop();
  4596 /*
  4597   if (set_cond_codes) {
  4598     // Special hack for the AD files:  rdi is guaranteed non-zero.
  4599     assert(!pushed_rdi, "rdi must be left non-NULL");
  4600     // Also, the condition codes are properly set Z/NZ on succeed/failure.
  4602 */
  4603   // Success.  Cache the super we found and proceed in triumph.
  4604 #undef IS_A_TEMP
  4606   bind(L_fallthrough);
  4608 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  4609   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  4610   sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  4611   verify_oop(oop_result, "broken oop in call_VM_base");
  4614 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  4615   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  4616   sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  4619 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  4620                                          int extra_slot_offset) {
  4621   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  4622   int stackElementSize = Interpreter::stackElementSize;
  4623   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  4624 #ifdef ASSERT
  4625   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  4626   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  4627 #endif
  4628   Register             scale_reg    = NOREG;
  4629   Address::ScaleFactor scale_factor = Address::no_scale;
  4630   if (arg_slot.is_constant()) {
  4631     offset += arg_slot.as_constant() * stackElementSize;
  4632   } else {
  4633     scale_reg    = arg_slot.as_register();
  4634     scale_factor = Address::times_8;
  4636   // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
  4637   //  offset += wordSize;           // return PC is on stack
  4638   if(scale_reg==NOREG) return Address(SP, offset);
  4639   else {
  4640 	dsll(scale_reg, scale_reg, scale_factor);
  4641 	daddu(scale_reg, SP, scale_reg);
  4642 	return Address(scale_reg, offset);
  4646 SkipIfEqual::~SkipIfEqual() {
  4647   _masm->bind(_label);
  4650 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  4651   switch (size_in_bytes) {
  4652 #ifndef _LP64
  4653   case  8:
  4654     assert(dst2 != noreg, "second dest register required");
  4655     lw(dst,  src);
  4656     lw(dst2, src.plus_disp(BytesPerInt));
  4657     break;
  4658 #else
  4659   case  8:  ld(dst, src); break;
  4660 #endif
  4661   case  4:  lw(dst, src); break;
  4662   case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  4663   case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  4664   default:  ShouldNotReachHere();
  4668 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  4669   switch (size_in_bytes) {
  4670 #ifndef _LP64
  4671   case  8:
  4672     assert(src2 != noreg, "second source register required");
  4673     sw(src, dst);
  4674     sw(src2, dst.plus_disp(BytesPerInt));
  4675     break;
  4676 #else
  4677   case  8:  sd(src, dst); break;
  4678 #endif
  4679   case  4:  sw(src, dst); break;
  4680   case  2:  sh(src, dst); break;
  4681   case  1:  sb(src, dst); break;
  4682   default:  ShouldNotReachHere();
  4686 // Look up the method for a megamorphic invokeinterface call.
  4687 // The target method is determined by <intf_klass, itable_index>.
  4688 // The receiver klass is in recv_klass.
  4689 // On success, the result will be in method_result, and execution falls through.
  4690 // On failure, execution transfers to the given label.
  4691 void MacroAssembler::lookup_interface_method(Register recv_klass,
  4692                                              Register intf_klass,
  4693                                              RegisterOrConstant itable_index,
  4694                                              Register method_result,
  4695                                              Register scan_temp,
  4696                                              Label& L_no_such_interface) {
  4697   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
  4698   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  4699          "caller must use same register for non-constant itable index as for method");
  4701   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  4702   int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  4703   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  4704   int scan_step   = itableOffsetEntry::size() * wordSize;
  4705   int vte_size    = vtableEntry::size() * wordSize;
  4706   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  4707   assert(vte_size == wordSize, "else adjust times_vte_scale");
  4709   lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  4711   // %%% Could store the aligned, prescaled offset in the klassoop.
  4712 //  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
  4713   dsll(scan_temp, scan_temp, times_vte_scale);
  4714   daddu(scan_temp, recv_klass, scan_temp);
  4715   daddiu(scan_temp, scan_temp, vtable_base);
  4716   if (HeapWordsPerLong > 1) {
  4717     // Round up to align_object_offset boundary
  4718     // see code for InstanceKlass::start_of_itable!
  4719     round_to(scan_temp, BytesPerLong);
  4722   // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  4723   assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  4724 //  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
  4725   if (itable_index.is_constant()) {
  4726     set64(AT, (int)itable_index.is_constant());
  4727     dsll(AT, AT, (int)Address::times_ptr);
  4728   } else {
  4729     dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  4731   daddu(AT, AT, recv_klass);
  4732   daddiu(recv_klass, AT, itentry_off);
  4734   // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
  4735   //   if (scan->interface() == intf) {
  4736   //     result = (klass + scan->offset() + itable_index);
  4737   //   }
  4738   // }
  4739   Label search, found_method;
  4741   for (int peel = 1; peel >= 0; peel--) {
  4742     ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  4744     if (peel) {
  4745       beq(intf_klass, method_result, found_method);
  4746       nop();
  4747     } else {
  4748       bne(intf_klass, method_result, search);
  4749       nop();
  4750       // (invert the test to fall through to found_method...)
  4753     if (!peel)  break;
  4755     bind(search);
  4757     // Check that the previous entry is non-null.  A null entry means that
  4758     // the receiver class doesn't implement the interface, and wasn't the
  4759     // same as when the caller was compiled.
  4760     beq(method_result, R0, L_no_such_interface);
  4761     nop();
  4762     daddiu(scan_temp, scan_temp, scan_step);
  4765   bind(found_method);
  4767   // Got a hit.
  4768   lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  4769   //ld(method_result, Address(recv_klass, scan_temp, Address::times_1));
  4770   if(UseLoongsonISA) {
  4771     gsldx(method_result, recv_klass, scan_temp, 0);
  4772   } else {
  4773     daddu(AT, recv_klass, scan_temp);
  4774     ld(method_result, AT);
  4779 // virtual method calling
  4780 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  4781                                            RegisterOrConstant vtable_index,
  4782                                            Register method_result) {
  4783   Register tmp = GP;
  4784   push(tmp);
  4786   if (vtable_index.is_constant()) {
  4787     assert_different_registers(recv_klass, method_result, tmp);
  4788   } else {
  4789     assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  4791   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  4792   assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  4793 /*
  4794   Address vtable_entry_addr(recv_klass,
  4795                             vtable_index, Address::times_ptr,
  4796                             base + vtableEntry::method_offset_in_bytes());
  4797 */
  4798   if (vtable_index.is_constant()) {
  4799     set64(AT, vtable_index.as_constant());
  4800     dsll(AT, AT, (int)Address::times_ptr);
  4801   } else {
  4802     dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  4804   set64(tmp, base + vtableEntry::method_offset_in_bytes());
  4805   daddu(tmp, tmp, AT);
  4806   daddu(tmp, tmp, recv_klass);
  4807   ld(method_result, tmp, 0);
  4809   pop(tmp);

mercurial