src/cpu/mips/vm/assembler_mips.cpp

Fri, 31 Mar 2017 12:43:02 -0400

author
fujie
date
Fri, 31 Mar 2017 12:43:02 -0400
changeset 391
910b77f150c4
parent 389
76857a2c3534
child 397
1e8b8bc62356
permissions
-rw-r--r--

[C2] Optimize the oop/klass encoding and decoding (Follows a4946a9e94b0).

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/assembler.hpp"
    28 #include "asm/assembler.inline.hpp"
    29 #include "gc_interface/collectedHeap.inline.hpp"
    30 #include "interpreter/interpreter.hpp"
    31 #include "memory/cardTableModRefBS.hpp"
    32 #include "memory/resourceArea.hpp"
    33 #include "prims/methodHandles.hpp"
    34 #include "runtime/biasedLocking.hpp"
    35 #include "runtime/interfaceSupport.hpp"
    36 #include "runtime/objectMonitor.hpp"
    37 #include "runtime/os.hpp"
    38 #include "runtime/sharedRuntime.hpp"
    39 #include "runtime/stubRoutines.hpp"
    40 #ifndef SERIALGC
    41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    43 #include "gc_implementation/g1/heapRegion.hpp"
    44 #endif
    45 #ifdef PRODUCT
    46 #define BLOCK_COMMENT(str) /* nothing */
    47 #define STOP(error) stop(error)
    48 #else
    49 #define BLOCK_COMMENT(str) block_comment(str)
    50 #define STOP(error) block_comment(error); stop(error)
    51 #endif
    53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
    55 intptr_t MacroAssembler::i[32] = {0};
    56 float MacroAssembler::f[32] = {0.0};
    58 void MacroAssembler::print(outputStream *s) {
    59 	unsigned int k;
    60 	for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
    61 		s->print_cr("i%d = 0x%.16lx", k, i[k]);
    62 	}
    63 	s->cr();
    65 	for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
    66 		s->print_cr("f%d = %f", k, f[k]); 
    67 	}
    68 	s->cr();
    69 }
    72 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
    73 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
    75 void MacroAssembler::save_registers(MacroAssembler *masm) {
    76 #define __ masm->
    77 	for(int k=0; k<32; k++) {
    78 		__ sw (as_Register(k), A0, i_offset(k));
    79 	}
    81 	for(int k=0; k<32; k++) {
    82 		__ swc1 (as_FloatRegister(k), A0, f_offset(k));
    83 	}
    84 #undef __
    85 }
    87 void MacroAssembler::restore_registers(MacroAssembler *masm) {
    88 #define __ masm->
    89 	for(int k=0; k<32; k++) {
    90 		__ lw (as_Register(k), A0, i_offset(k));
    91 	}
    93 	for(int k=0; k<32; k++) {
    94 		__ lwc1 (as_FloatRegister(k), A0, f_offset(k));
    95 	}
    96 #undef __
    97 }
   100 // Implementation of AddressLiteral
   102 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
   103   _is_lval = false;
   104   _target = target;
   105   _rspec = rspec_from_rtype(rtype, target);
   106 }
   108 // Implementation of Address
   111 Address Address::make_array(ArrayAddress adr) {
   112   AddressLiteral base = adr.base();
   113   Address index = adr.index();
   114   assert(index._disp == 0, "must not have disp"); // maybe it can?
   115   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
   116   array._rspec = base._rspec;
   117   return array;
   118 }
   120 // exceedingly dangerous constructor
   121 Address::Address(address loc, RelocationHolder spec) {
   122   _base  = noreg;
   123   _index = noreg;
   124   _scale = no_scale;
   125   _disp  = (intptr_t) loc;
   126   _rspec = spec;
   127 }
   130 // Implementation of Assembler
   131 const char *Assembler::ops_name[] = {
   132 	"special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
   133 	"addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
   134 	"cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
   135 	"daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
   136 	"lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
   137 	"sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
   138 	"ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
   139 	"sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
   140 };
   142 const char* Assembler::special_name[] = {
   143 	"sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
   144 	"jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
   145 	"mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
   146 	"mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
   147 	"add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
   148 	"",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
   149 	"tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
   150 	"dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
   151 };
   153 const char* Assembler::cop1_name[] = {
   154   "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
   155   "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
   156   "",         "",         "",         "",         "",         "",         "",         "",
   157   "",         "",         "",         "",         "",         "",         "",         "",
   158   "",         "",         "",         "",         "",         "",         "",         "",
   159   "",         "",         "",         "",         "",         "",         "",         "",
   160   "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
   161   "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
   162 };
   164 const char* Assembler::cop1x_name[] = {
   165 	"lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
   166 	"swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
   167 	"",         "",         "",         "",         "",         "",  "alnv.ps",         "",
   168 	"",         "",         "",         "",         "",         "",         "",         "",
   169 	"madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
   170 	"msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
   171 	"nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
   172 	"nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
   173 };
   175 const char* Assembler::special2_name[] = {
   176 	"madd",     "",         "mul",      "",         "msub",     "",         "",         "",
   177 	"",         "",         "",         "",         "",         "",         "",         "",
   178 	"",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
   179 	"",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
   180 	"",         "",         "",         "",         "",         "",         "",         "",
   181 	"",         "",         "",         "",         "",         "",         "",         "",
   182 	"",         "",         "",         "",         "",         "",         "",         "",
   183 	"",         "",         "",         "",         "",         "",         "",         ""
   184 };
   186 const char* Assembler::special3_name[] = {
   187 	"ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
   188 	"",         "",         "",         "",         "",         "",         "",         "",
   189 	"",         "",         "",         "",         "",         "",         "",         "",
   190 	"",         "",         "",         "",         "",         "",         "",         "",
   191 	"bshfl",    "",         "",         "",         "",         "",         "",         "",
   192 	"",         "",         "",         "",         "",         "",         "",         "",
   193 	"",         "",         "",         "",         "",         "",         "",         "",
   194 	"",         "",         "",         "",         "",         "",         "",         "",
   195 };
   197 const char* Assembler::regimm_name[] = {
   198 	"bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
   199 	"tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
   200 	"bltzal",   "bgezal",   "bltzall",  "bgezall"
   201 };
   203 const char* Assembler::gs_ldc2_name[] = {
   204 	"gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
   205 };
   208 const char* Assembler::gs_lwc2_name[] = {
   209         "",       "",       "",       "",         "",         "",         "",         "",
   210         "",       "",       "",       "",         "",         "",         "",         "",
   211         "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
   212         "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
   213         "gslq",   ""
   214 };
   216 const char* Assembler::gs_sdc2_name[] = {
   217 	"gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
   218 };
   220 const char* Assembler::gs_swc2_name[] = {
   221         "",        "",        "",        "",        "",          "",          "",         "",
   222         "",        "",        "",        "",        "",          "",          "",         "",
   223         "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
   224         "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
   225         "gssq",    ""
   226 };
   228 //misleading name, print only branch/jump instruction 
   229 void Assembler::print_instruction(int inst) {
   230 	const char *s;
   231 	switch( opcode(inst) ) {
   232 	default:
   233 		s = ops_name[opcode(inst)];
   234 		break;
   235 	case special_op:
   236 		s = special_name[special(inst)];
   237 		break;
   238 	case regimm_op:
   239 		s = special_name[rt(inst)];
   240 		break;
   241 	}
   243 	::tty->print("%s", s);
   244 }
   246 void MacroAssembler::pd_patch_instruction(address branch, address target) {
   247   jint& stub_inst = *(jint*) branch;
   249 /* *
   250 	move(AT, RA); // dadd
   251 	emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   252 	nop();
   253         lui(T9, 0); // to be patched
   254         ori(T9, 0);
   255 	daddu(T9, T9, RA);
   256 	move(RA, AT);
   257 	jr(T9);
   258  */
   259   if(special(stub_inst) == dadd_op) {
   260     jint *pc = (jint *)branch;
   262     assert(opcode(pc[3]) == lui_op
   263           && opcode(pc[4]) == ori_op
   264           && special(pc[5]) == daddu_op, "Not a branch label patch");
   265     if(!(opcode(pc[3]) == lui_op
   266           && opcode(pc[4]) == ori_op
   267           && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
   269     int offset = target - branch;
   270     if (!is_simm16(offset))
   271     {
   272       pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
   273       pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
   274     }
   275     else
   276     {
   277       /* revert to "beq + nop" */
   278       CodeBuffer cb(branch, 4 * 10);
   279       MacroAssembler masm(&cb);
   280 #define __ masm.
   281       __ b(target);
   282       __ nop();
   283       __ nop();
   284       __ nop();
   285       __ nop();
   286       __ nop();
   287       __ nop();
   288       __ nop();
   289     }
   290     return;
   291   }
   293 #ifndef PRODUCT
   294   if (!is_simm16((target - branch - 4) >> 2))
   295   {
   296     tty->print_cr("Illegal patching: target=0x%lx", target);
   297     int *p = (int *)branch;
   298     for (int i = -10; i < 10; i++)
   299     {
   300        tty->print("0x%lx, ", p[i]);
   301     }
   302     tty->print_cr("");
   303   }
   304 #endif
   306   stub_inst = patched_branch(target - branch, stub_inst, 0);
   307 }
   309 int Assembler::is_int_mask(int x) {
   310    int xx = x;
   311    int count = 0;
   313    while (x != 0) {
   314       x &= (x - 1);
   315       count++;
   316    }
   318    if ((1<<count) == (xx+1)) {
   319       return count;
   320    } else {
   321       return -1;
   322    }
   323 }
   325 int Assembler::is_jlong_mask(jlong x) {
   326    jlong  xx = x;
   327    int count = 0;
   329    while (x != 0) {
   330       x &= (x - 1);
   331       count++;
   332    }
   334    if ((1<<count) == (xx+1)) {
   335       return count;
   336    } else {
   337       return -1;
   338    }
   339 }
   341 //without check, maybe fixed
   342 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
   343 	int v = (dest_pos - inst_pos - 4)>>2;
   344 	switch(opcode(inst)) {
   345 	case j_op:
   346 	case jal_op:
   347 		assert(false, "should not use j/jal here");
   348 		break;
   349 	default:
   350 		assert(is_simm16(v), "must be simm16");
   351 #ifndef PRODUCT
   352 		if(!is_simm16(v))
   353 		{ 
   354 			tty->print_cr("must be simm16");
   355 			tty->print_cr("Inst: %lx", inst);
   356 		}
   357 #endif
   359 		v = low16(v);
   360 		inst &= 0xffff0000;
   361 		break;
   362 	}
   364 	return inst | v;
   365 }
   367 int Assembler::branch_destination(int inst, int pos) {
   368 	int off;
   370 	switch(opcode(inst)) {
   371 	case j_op:
   372 	case jal_op:
   373 		assert(false, "should not use j/jal here");
   374 		break;
   375 	default:
   376 		off = expand(low16(inst), 15);
   377 		break;
   378 	}
   380 	return off ? pos + 4 + (off<<2) : 0;
   381 }
   383 int AbstractAssembler::code_fill_byte() {
   384 	  return 0x00;                  // illegal instruction 0x00000000
   385 }
   387 // Now the Assembler instruction (identical for 32/64 bits)
   389 void Assembler::lb(Register rt, Address src) {
   390 	lb(rt, src.base(), src.disp());
   391 }
   393 void Assembler::lbu(Register rt, Address src) {
   394 	lbu(rt, src.base(), src.disp());
   395 }
   397 void Assembler::ld(Register rt, Address src){
   398 	ld(rt, src.base(), src.disp());
   399 }
   401 void Assembler::ldl(Register rt, Address src){
   402 	ldl(rt, src.base(), src.disp());
   403 }
   405 void Assembler::ldr(Register rt, Address src){
   406 	ldr(rt, src.base(), src.disp());
   407 }
   409 void Assembler::lh(Register rt, Address src){
   410 	lh(rt, src.base(), src.disp());
   411 }
   413 void Assembler::lhu(Register rt, Address src){
   414 	lhu(rt, src.base(), src.disp());
   415 }
   417 void Assembler::ll(Register rt, Address src){
   418 	ll(rt, src.base(), src.disp());
   419 }
   421 void Assembler::lld(Register rt, Address src){
   422 	lld(rt, src.base(), src.disp());
   423 }
   425 void Assembler::lw(Register rt, Address src){
   426 	lw(rt, src.base(), src.disp());
   427 }
   428 void Assembler::lea(Register rt, Address src) {
   429 #ifdef _LP64
   430   daddi(rt, src.base(), src.disp());
   431 #else
   432   addi(rt, src.base(), src.disp());
   433 #endif
   434 }
   436 void Assembler::lwl(Register rt, Address src){
   437 	lwl(rt, src.base(), src.disp());
   438 }
   440 void Assembler::lwr(Register rt, Address src){
   441 	lwr(rt, src.base(), src.disp());
   442 }
   444 void Assembler::lwu(Register rt, Address src){
   445 	lwu(rt, src.base(), src.disp());
   446 }
   448 void Assembler::sb(Register rt, Address dst) {
   449 	sb(rt, dst.base(), dst.disp());
   450 }
   452 void Assembler::sc(Register rt, Address dst) {
   453 	sc(rt, dst.base(), dst.disp());
   454 }
   456 void Assembler::scd(Register rt, Address dst) {
   457 	scd(rt, dst.base(), dst.disp());
   458 }
   460 void Assembler::sd(Register rt, Address dst) {
   461 	sd(rt, dst.base(), dst.disp());
   462 }
   464 void Assembler::sdl(Register rt, Address dst) {
   465 	sdl(rt, dst.base(), dst.disp());
   466 }
   468 void Assembler::sdr(Register rt, Address dst) {
   469 	sdr(rt, dst.base(), dst.disp());
   470 }
   472 void Assembler::sh(Register rt, Address dst) {
   473 	sh(rt, dst.base(), dst.disp());
   474 }
   476 void Assembler::sw(Register rt, Address dst) {
   477 	sw(rt, dst.base(), dst.disp());
   478 }
   480 void Assembler::swl(Register rt, Address dst) {
   481 	swl(rt, dst.base(), dst.disp());
   482 }
   484 void Assembler::swr(Register rt, Address dst) {
   485 	swr(rt, dst.base(), dst.disp());
   486 }
   488 void Assembler::lwc1(FloatRegister rt, Address src) {
   489 	lwc1(rt, src.base(), src.disp());
   490 }
   492 void Assembler::ldc1(FloatRegister rt, Address src) {
   493 	ldc1(rt, src.base(), src.disp());
   494 }
   496 void Assembler::swc1(FloatRegister rt, Address dst) {
   497 	swc1(rt, dst.base(), dst.disp());
   498 }
   500 void Assembler::sdc1(FloatRegister rt, Address dst) {
   501 	sdc1(rt, dst.base(), dst.disp());
   502 }
   504 void Assembler::j(address entry) {
   505 #ifdef MIPS64
   506 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
   507 #else
   508 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
   509 #endif
   510 	emit_long((j_op<<26) | dest); 
   511 	has_delay_slot(); 
   512 }
   514 void Assembler::jal(address entry) {
   515 #ifdef MIPS64
   516 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xfffffffff0000000))>>2;
   517 #else
   518 	int dest = ((intptr_t)entry - (((intptr_t)pc() + 4) & 0xf0000000))>>2;
   519 #endif
   520 	emit_long((jal_op<<26) | dest); 
   521 	has_delay_slot(); 
   522 }
   524 static inline address first_cache_address() {
   525   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
   526 }
   528 static inline address last_cache_address() {
   529   return CodeCache::high_bound() - Assembler::InstructionSize;
   530 }
   532 int MacroAssembler::call_size(address target, bool far, bool patchable) {
   533   if (patchable) return 6 << Assembler::LogInstructionSize;
   534   if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop 
   535   return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
   536 }
   538 // Can we reach target using jal/j from anywhere
   539 // in the code cache (because code can be relocated)?
   540 bool MacroAssembler::reachable_from_cache(address target) {
   541   address cl = first_cache_address();
   542   address ch = last_cache_address();
   544   return fit_in_jal(target, cl) && fit_in_jal(target, ch);
   545 }
   547 void MacroAssembler::general_jump(address target) {
   548   if (reachable_from_cache(target)) {
   549     j(target);
   550     nop();
   551   } else {
   552     set64(T9, (long)target);
   553     jr(T9);
   554     nop();
   555   }
   556 }
   558 void MacroAssembler::patchable_jump(address target) {
   559   if (reachable_from_cache(target)) {
   560     nop();
   561     nop();
   562     nop();
   563     nop();
   564     j(target);
   565     nop();
   566   } else {
   567     patchable_set48(T9, (long)target);
   568     jr(T9);
   569     nop();
   570   }
   571 }
   573 void MacroAssembler::general_call(address target) {
   574   if (reachable_from_cache(target)) {
   575     jal(target);
   576     nop();
   577   } else {
   578     set64(T9, (long)target);
   579     jalr(T9);
   580     nop();
   581   }
   582 }
   584 void MacroAssembler::patchable_call(address target) {
   585   if (reachable_from_cache(target)) {
   586     nop();
   587     nop();
   588     nop();
   589     nop();
   590     jal(target);
   591     nop();
   592   } else {
   593     patchable_set48(T9, (long)target);
   594     jalr(T9);
   595     nop();
   596   }
   597 }
   599 void MacroAssembler::beq_far(Register rs, Register rt, address entry)
   600 {
   601   u_char * cur_pc = pc();
   603   /* Jin: Near/Far jump */
   604   if(is_simm16((entry - pc() - 4) / 4))
   605   {
   606     Assembler::beq(rs, rt, offset(entry));
   607   }
   608   else
   609   {
   610     Label not_jump;
   611     bne(rs, rt, not_jump);
   612     delayed()->nop();
   614     b_far(entry); 
   615     delayed()->nop();
   617     bind(not_jump);
   618     has_delay_slot();
   619   }
   620 }
   622 void MacroAssembler::beq_far(Register rs, Register rt, Label& L)
   623 {
   624   if (L.is_bound()) {
   625     beq_far(rs, rt, target(L));
   626   } else {
   627     u_char * cur_pc = pc();
   628     Label not_jump;
   629     bne(rs, rt, not_jump);
   630     delayed()->nop();
   632     b_far(L); 
   633     delayed()->nop();
   635     bind(not_jump);
   636     has_delay_slot();
   637   }
   638 }
   640 void MacroAssembler::bne_far(Register rs, Register rt, address entry)
   641 {
   642   u_char * cur_pc = pc();
   644   /* Jin: Near/Far jump */
   645   if(is_simm16((entry - pc() - 4) / 4))
   646   {
   647     Assembler::bne(rs, rt, offset(entry));
   648   }
   649   else
   650   {
   651     Label not_jump;
   652     beq(rs, rt, not_jump);
   653     delayed()->nop();
   655     b_far(entry); 
   656     delayed()->nop();
   658     bind(not_jump);
   659     has_delay_slot();
   660   }
   661 }
   663 void MacroAssembler::bne_far(Register rs, Register rt, Label& L)
   664 {
   665   if (L.is_bound()) {
   666     bne_far(rs, rt, target(L));
   667   } else {
   668     u_char * cur_pc = pc();
   669     Label not_jump;
   670     beq(rs, rt, not_jump);
   671     delayed()->nop();
   673     b_far(L); 
   674     delayed()->nop();
   676     bind(not_jump);
   677     has_delay_slot();
   678   }
   679 }
   681 void MacroAssembler::b_far(Label& L)
   682 {
   683   if (L.is_bound()) {
   684     b_far(target(L));
   685   } else {
   686 	volatile address dest = target(L);
   687 /*
   688 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
   689    0x00000055651ed514: dadd at, ra, zero
   690    0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
   692    0x00000055651ed51c: sll zero, zero, 0
   693    0x00000055651ed520: lui t9, 0x0
   694    0x00000055651ed524: ori t9, t9, 0x21b8
   695    0x00000055651ed528: daddu t9, t9, ra
   696    0x00000055651ed52c: dadd ra, at, zero
   697    0x00000055651ed530: jr t9
   698    0x00000055651ed534: sll zero, zero, 0
   699 */
   700 	move(AT, RA);
   701 	emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   702 	nop();
   703         lui(T9, 0); // to be patched
   704         ori(T9, T9, 0);
   705 	daddu(T9, T9, RA);
   706 	move(RA, AT);
   707 	jr(T9);
   708   }
   709 }
   711 void MacroAssembler::b_far(address entry)
   712 { 
   713 	u_char * cur_pc = pc();
   715 	/* Jin: Near/Far jump */
   716 	if(is_simm16((entry - pc() - 4) / 4))
   717 	{
   718 		b(offset(entry));
   719 	}
   720 	else
   721 	{
   722 		/* address must be bounded */
   723 		move(AT, RA);
   724 	 	emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
   725 		nop();
   726 		li32(T9, entry - pc());
   727 		daddu(T9, T9, RA);
   728 		move(RA, AT);
   729 		jr(T9);
   730 	}
   731 }
   733 // Implementation of MacroAssembler
   735 // First all the versions that have distinct versions depending on 32/64 bit
   736 // Unless the difference is trivial (1 line or so).
   738 //#ifndef _LP64
   740 // 32bit versions
   742 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
   743   addu_long(AT, base, offset);
   744   ld_ptr(rt, 0, AT);
   745 }
   747 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
   748   addu_long(AT, base, offset);
   749   st_ptr(rt, 0, AT);
   750 }
   752 void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
   753   addu_long(AT, base, offset);
   754   ld_long(rt, 0, AT);
   755 }
   757 void MacroAssembler::st_long(Register rt, Register offset, Register base) {
   758   addu_long(AT, base, offset);
   759   st_long(rt, 0, AT);
   760 }
   762 Address MacroAssembler::as_Address(AddressLiteral adr) {
   763   return Address(adr.target(), adr.rspec());
   764 }
   766 Address MacroAssembler::as_Address(ArrayAddress adr) {
   767   return Address::make_array(adr);
   768 }
   770 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
   771 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
   772   Label again;
   774   li(tmp_reg1, counter_addr);
   775   bind(again);
   776   if(!Use3A2000) sync();
   777   ll(tmp_reg2, tmp_reg1, 0);
   778   addi(tmp_reg2, tmp_reg2, inc);
   779   sc(tmp_reg2, tmp_reg1, 0);
   780   beq(tmp_reg2, R0, again);
   781   delayed()->nop();
   782 }
   783 int MacroAssembler::biased_locking_enter(Register lock_reg,
   784                                          Register obj_reg,
   785                                          Register swap_reg,
   786                                          Register tmp_reg,
   787                                          bool swap_reg_contains_mark,
   788                                          Label& done,
   789                                          Label* slow_case,
   790                                          BiasedLockingCounters* counters) {
   791   assert(UseBiasedLocking, "why call this otherwise?");
   792   bool need_tmp_reg = false;
   793   if (tmp_reg == noreg) {
   794     need_tmp_reg = true;
   795     tmp_reg = T9;
   796   }
   797   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
   798   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   799   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   800   Address saved_mark_addr(lock_reg, 0);
   802   // Biased locking
   803   // See whether the lock is currently biased toward our thread and
   804   // whether the epoch is still valid
   805   // Note that the runtime guarantees sufficient alignment of JavaThread
   806   // pointers to allow age to be placed into low bits
   807   // First check to see whether biasing is even enabled for this object
   808   Label cas_label;
   809   int null_check_offset = -1;
   810   if (!swap_reg_contains_mark) {
   811     null_check_offset = offset();
   812     ld_ptr(swap_reg, mark_addr);
   813   }
   815   if (need_tmp_reg) {
   816     push(tmp_reg);
   817   }
   818   move(tmp_reg, swap_reg);
   819   andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
   820 #ifdef _LP64
   821   daddi(AT, R0, markOopDesc::biased_lock_pattern);
   822   dsub(AT, AT, tmp_reg);
   823 #else
   824   addi(AT, R0, markOopDesc::biased_lock_pattern);
   825   sub(AT, AT, tmp_reg);
   826 #endif
   827   if (need_tmp_reg) {
   828     pop(tmp_reg);
   829   }
   831   bne(AT, R0, cas_label);
   832   delayed()->nop();
   835   // The bias pattern is present in the object's header. Need to check
   836   // whether the bias owner and the epoch are both still current.
   837   // Note that because there is no current thread register on MIPS we
   838   // need to store off the mark word we read out of the object to
   839   // avoid reloading it and needing to recheck invariants below. This
   840   // store is unfortunate but it makes the overall code shorter and
   841   // simpler.
   842   st_ptr(swap_reg, saved_mark_addr);
   843   if (need_tmp_reg) {
   844     push(tmp_reg);
   845   }
   846   if (swap_reg_contains_mark) {
   847     null_check_offset = offset();
   848   }
   849   load_prototype_header(tmp_reg, obj_reg);
   850   xorr(tmp_reg, tmp_reg, swap_reg);
   851   get_thread(swap_reg);
   852   xorr(swap_reg, swap_reg, tmp_reg);
   854   move(AT, ~((int) markOopDesc::age_mask_in_place));
   855   andr(swap_reg, swap_reg, AT);
   857   if (PrintBiasedLockingStatistics) {
   858     Label L;
   859     bne(swap_reg, R0, L);
   860     delayed()->nop();
   861     push(tmp_reg);
   862     push(A0);
   863     atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   864     pop(A0);
   865     pop(tmp_reg);
   866     bind(L);
   867   }
   868   if (need_tmp_reg) {
   869     pop(tmp_reg);
   870   }
   871   beq(swap_reg, R0, done);
   872   delayed()->nop();
   873   Label try_revoke_bias;
   874   Label try_rebias;
   876   // At this point we know that the header has the bias pattern and
   877   // that we are not the bias owner in the current epoch. We need to
   878   // figure out more details about the state of the header in order to
   879   // know what operations can be legally performed on the object's
   880   // header.
   882   // If the low three bits in the xor result aren't clear, that means
   883   // the prototype header is no longer biased and we have to revoke
   884   // the bias on this object.
   886   move(AT, markOopDesc::biased_lock_mask_in_place);
   887   andr(AT, swap_reg, AT);
   888   bne(AT, R0, try_revoke_bias);
   889   delayed()->nop();
   890   // Biasing is still enabled for this data type. See whether the
   891   // epoch of the current bias is still valid, meaning that the epoch
   892   // bits of the mark word are equal to the epoch bits of the
   893   // prototype header. (Note that the prototype header's epoch bits
   894   // only change at a safepoint.) If not, attempt to rebias the object
   895   // toward the current thread. Note that we must be absolutely sure
   896   // that the current epoch is invalid in order to do this because
   897   // otherwise the manipulations it performs on the mark word are
   898   // illegal.
   900   move(AT, markOopDesc::epoch_mask_in_place);
   901   andr(AT,swap_reg, AT);
   902   bne(AT, R0, try_rebias);
   903   delayed()->nop();
   904   // The epoch of the current bias is still valid but we know nothing
   905   // about the owner; it might be set or it might be clear. Try to
   906   // acquire the bias of the object using an atomic operation. If this
   907   // fails we will go in to the runtime to revoke the object's bias.
   908   // Note that we first construct the presumed unbiased header so we
   909   // don't accidentally blow away another thread's valid bias.
   911   ld_ptr(swap_reg, saved_mark_addr);
   913   move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);  
   914   andr(swap_reg, swap_reg, AT);
   916   if (need_tmp_reg) {
   917     push(tmp_reg);
   918   }
   919   get_thread(tmp_reg);
   920   orr(tmp_reg, tmp_reg, swap_reg);
   921   //if (os::is_MP()) {
   922   // lock();
   923   //}
   924   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   925   if (need_tmp_reg) {
   926     pop(tmp_reg);
   927   }
   928   // If the biasing toward our thread failed, this means that
   929   // another thread succeeded in biasing it toward itself and we
   930   // need to revoke that bias. The revocation will occur in the
   931   // interpreter runtime in the slow case.
   932   if (PrintBiasedLockingStatistics) {
   933     Label L;
   934     bne(AT, R0, L);
   935     delayed()->nop();
   936     push(tmp_reg);
   937     push(A0);
   938     atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
   939     pop(A0);
   940     pop(tmp_reg);
   941     bind(L);
   942   }
   943   if (slow_case != NULL) {
   944     beq_far(AT, R0, *slow_case);
   945     delayed()->nop();
   946   }
   947   b(done);
   948   delayed()->nop();
   950   bind(try_rebias);
   951   // At this point we know the epoch has expired, meaning that the
   952   // current "bias owner", if any, is actually invalid. Under these
   953   // circumstances _only_, we are allowed to use the current header's
   954   // value as the comparison value when doing the cas to acquire the
   955   // bias in the current epoch. In other words, we allow transfer of
   956   // the bias from one thread to another directly in this situation.
   957   //
   958   // FIXME: due to a lack of registers we currently blow away the age
   959   // bits in this situation. Should attempt to preserve them.
   960   if (need_tmp_reg) {
   961     push(tmp_reg);
   962   }
   963   load_prototype_header(tmp_reg, obj_reg);
   964   get_thread(swap_reg);
   965   orr(tmp_reg, tmp_reg, swap_reg);
   966   ld_ptr(swap_reg, saved_mark_addr);
   968   // if (os::is_MP()) {
   969   //  lock();
   970   //}
   971   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
   972   if (need_tmp_reg) {
   973     pop(tmp_reg);
   974   }
   975   // If the biasing toward our thread failed, then another thread
   976   // succeeded in biasing it toward itself and we need to revoke that
   977   // bias. The revocation will occur in the runtime in the slow case.
   978   if (PrintBiasedLockingStatistics) {
   979     Label L;
   980     bne(AT, R0, L);
   981     delayed()->nop();
   982     push(AT);
   983     push(tmp_reg);
   984     atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
   985     pop(tmp_reg);
   986     pop(AT);
   987     bind(L);
   988   }
   989   if (slow_case != NULL) {
   990     beq_far(AT, R0, *slow_case);
   991     delayed()->nop();
   992   }
   994   b(done);
   995   delayed()->nop();
   996   bind(try_revoke_bias);
   997   // The prototype mark in the klass doesn't have the bias bit set any
   998   // more, indicating that objects of this data type are not supposed
   999   // to be biased any more. We are going to try to reset the mark of
  1000   // this object to the prototype value and fall through to the
  1001   // CAS-based locking scheme. Note that if our CAS fails, it means
  1002   // that another thread raced us for the privilege of revoking the
  1003   // bias of this particular object, so it's okay to continue in the
  1004   // normal locking code.
  1005   //
  1006   // FIXME: due to a lack of registers we currently blow away the age
  1007   // bits in this situation. Should attempt to preserve them.
  1008   ld_ptr(swap_reg, saved_mark_addr);
  1010   if (need_tmp_reg) {
  1011     push(tmp_reg);
  1013   load_prototype_header(tmp_reg, obj_reg);
  1014   //if (os::is_MP()) {
  1015   // lock();
  1016   //}    
  1017   cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
  1018   if (need_tmp_reg) {
  1019     pop(tmp_reg);
  1021   // Fall through to the normal CAS-based lock, because no matter what
  1022   // the result of the above CAS, some thread must have succeeded in
  1023   // removing the bias bit from the object's header.
  1024   if (PrintBiasedLockingStatistics) {
  1025     Label L;
  1026     bne(AT, R0, L);
  1027     delayed()->nop();
  1028     push(AT);
  1029     push(tmp_reg);
  1030     atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
  1031     pop(tmp_reg);
  1032     pop(AT);
  1033     bind(L);
  1036   bind(cas_label);
  1037   return null_check_offset;
  1040 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
  1041   assert(UseBiasedLocking, "why call this otherwise?");
  1043   // Check for biased locking unlock case, which is a no-op
  1044   // Note: we do not have to check the thread ID for two reasons.
  1045   // First, the interpreter checks for IllegalMonitorStateException at
  1046   // a higher level. Second, if the bias was revoked while we held the
  1047   // lock, the object could not be rebiased toward another thread, so
  1048   // the bias bit would be clear.
  1049 #ifdef _LP64
  1050   ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  1051   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
  1052   daddi(AT, R0, markOopDesc::biased_lock_pattern);
  1053 #else
  1054   lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  1055   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
  1056   addi(AT, R0, markOopDesc::biased_lock_pattern);
  1057 #endif
  1059   beq(AT, temp_reg, done);
  1060   delayed()->nop();
  1063 // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME. 
  1064 // by yjl 6/27/2005 
  1065 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
  1066 // by yjl 7/11/2005
  1067 // this method will handle the stack problem, you need not to preserve the stack space for the argument now
  1068 // by yjl 8/1/2005
  1069 void MacroAssembler::call_VM_leaf_base(address entry_point,
  1070     int number_of_arguments) {
  1071   //call(RuntimeAddress(entry_point));
  1072   //increment(rsp, number_of_arguments * wordSize);
  1073   Label L, E;
  1075   assert(number_of_arguments <= 4, "just check");
  1077   andi(AT, SP, 0xf);
  1078   beq(AT, R0, L);
  1079   delayed()->nop();
  1080   daddi(SP, SP, -8);
  1082 	call(entry_point, relocInfo::runtime_call_type);
  1083 	delayed()->nop();
  1085   daddi(SP, SP, 8);
  1086   b(E);
  1087   delayed()->nop();
  1089   bind(L);
  1091 	call(entry_point, relocInfo::runtime_call_type);
  1092 	delayed()->nop();
  1094   bind(E);
  1098 void MacroAssembler::jmp(address entry) {
  1099   patchable_set48(T9, (long)entry);
  1100   jr(T9);
  1103 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
  1104   switch (rtype) {
  1105     case relocInfo::runtime_call_type:
  1106     case relocInfo::none:
  1107       jmp(entry);
  1108       break;
  1109     default:
  1111 	InstructionMark im(this);
  1112 	relocate(rtype);
  1113 	patchable_set48(T9, (long)entry);
  1114 	jr(T9);
  1116       break;
  1120 void MacroAssembler::call(address entry) {
  1121 // c/c++ code assume T9 is entry point, so we just always move entry to t9
  1122 // maybe there is some more graceful method to handle this. FIXME 
  1123 // by yjl 6/27/2005
  1124 // For more info, see class NativeCall.
  1125 #ifndef _LP64
  1126   move(T9, (int)entry);
  1127 #else
  1128   patchable_set48(T9, (long)entry);
  1129 #endif
  1130   jalr(T9);
  1133 void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
  1134   switch (rtype) {
  1135     case relocInfo::runtime_call_type:
  1136     case relocInfo::none:
  1137       call(entry);
  1138       break;
  1139     default:
  1141 	InstructionMark im(this);
  1142 	relocate(rtype);
  1143 	call(entry);
  1145       break;
  1149 void MacroAssembler::call(address entry, RelocationHolder& rh)
  1151   switch (rh.type()) {
  1152     case relocInfo::runtime_call_type:
  1153     case relocInfo::none:
  1154       call(entry);
  1155       break;
  1156     default:
  1158 	InstructionMark im(this);
  1159 	relocate(rh);
  1160 	call(entry);
  1162       break;
  1166 void MacroAssembler::ic_call(address entry) {
  1167 	RelocationHolder rh = virtual_call_Relocation::spec(pc());
  1168 	patchable_set48(IC_Klass, (long)Universe::non_oop_word());
  1169 	assert(entry != NULL, "call most probably wrong");
  1170 	InstructionMark im(this);
  1171 	relocate(rh);
  1172         patchable_call(entry);
  1175 void MacroAssembler::c2bool(Register r) {
  1176   Label L;
  1177   Assembler::beq(r, R0, L);
  1178   delayed()->nop();
  1179   move(r, 1);
  1180   bind(L);
  1183 #ifndef PRODUCT
  1184 extern "C" void findpc(intptr_t x);
  1185 #endif
  1187 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
  1188   // In order to get locks to work, we need to fake a in_VM state
  1189   JavaThread* thread = JavaThread::current();
  1190   JavaThreadState saved_state = thread->thread_state();
  1191   thread->set_thread_state(_thread_in_vm);
  1192   if (ShowMessageBoxOnError) {
  1193     JavaThread* thread = JavaThread::current();
  1194     JavaThreadState saved_state = thread->thread_state();
  1195     thread->set_thread_state(_thread_in_vm);
  1196     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  1197       ttyLocker ttyl;
  1198       BytecodeCounter::print();
  1200     // To see where a verify_oop failed, get $ebx+40/X for this frame.
  1201     // This is the value of eip which points to where verify_oop will return.
  1202     if (os::message_box(msg, "Execution stopped, print registers?")) {
  1203       ttyLocker ttyl;
  1204       tty->print_cr("eip = 0x%08x", eip);
  1205 #ifndef PRODUCT
  1206       tty->cr();
  1207       findpc(eip);
  1208       tty->cr();
  1209 #endif
  1210       tty->print_cr("rax, = 0x%08x", rax);
  1211       tty->print_cr("rbx, = 0x%08x", rbx);
  1212       tty->print_cr("rcx = 0x%08x", rcx);
  1213       tty->print_cr("rdx = 0x%08x", rdx);
  1214       tty->print_cr("rdi = 0x%08x", rdi);
  1215       tty->print_cr("rsi = 0x%08x", rsi);
  1216       tty->print_cr("rbp, = 0x%08x", rbp);
  1217       tty->print_cr("rsp = 0x%08x", rsp);
  1218       BREAKPOINT;
  1220   } else {
  1221     ttyLocker ttyl;
  1222     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
  1223     assert(false, "DEBUG MESSAGE");
  1225   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
  1228 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
  1229   if ( ShowMessageBoxOnError ) {
  1230     JavaThreadState saved_state = JavaThread::current()->thread_state();
  1231     JavaThread::current()->set_thread_state(_thread_in_vm);
  1233       // In order to get locks work, we need to fake a in_VM state
  1234       ttyLocker ttyl;
  1235       ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
  1236       if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  1237 	BytecodeCounter::print();
  1240       //			if (os::message_box(msg, "Execution stopped, print registers?"))
  1241       //				regs->print(::tty);
  1243     ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
  1245   else
  1246     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
  1250 void MacroAssembler::stop(const char* msg) {
  1251   li(A0, (long)msg);
  1252 #ifndef _LP64
  1253   //reserver space for argument. added by yjl 7/10/2005
  1254   addiu(SP, SP, - 1 * wordSize);
  1255 #endif
  1256   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1257   delayed()->nop();
  1258 #ifndef _LP64
  1259   //restore space for argument
  1260   addiu(SP, SP, 1 * wordSize);
  1261 #endif
  1262   brk(17);
  1265 void MacroAssembler::warn(const char* msg) {
  1266 #ifdef _LP64
  1267   pushad();
  1268   li(A0, (long)msg);
  1269   push(S2);
  1270   move(AT, -(StackAlignmentInBytes));
  1271   move(S2, SP);     // use S2 as a sender SP holder
  1272   andr(SP, SP, AT); // align stack as required by ABI
  1273   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1274   delayed()->nop();
  1275   move(SP, S2);     // use S2 as a sender SP holder
  1276   pop(S2);
  1277   popad();
  1278 #else
  1279   pushad();
  1280   addi(SP, SP, -4);
  1281   sw(A0, SP, -1 * wordSize);
  1282   li(A0, (long)msg);
  1283   addi(SP, SP, -1 * wordSize);
  1284   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  1285   delayed()->nop();
  1286   addi(SP, SP, 1 * wordSize);
  1287   lw(A0, SP, -1 * wordSize);
  1288   addi(SP, SP, 4);
  1289   popad();
  1290 #endif
  1293 void MacroAssembler::print_reg(Register reg) {
  1294 /*
  1295 char *s = getenv("PRINT_REG");
  1296 if (s == NULL)
  1297   return;
  1298 if (strcmp(s, "1") != 0)
  1299   return;
  1300 */
  1301   void * cur_pc = pc();
  1302   pushad();
  1303   NOT_LP64(push(FP);)
  1305   li(A0, (long)reg->name());
  1306   if (reg == SP)
  1307     addiu(A1, SP, wordSize * 23); //23 registers saved in pushad()
  1308   else if (reg == A0)
  1309     ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
  1310   else
  1311     move(A1, reg);
  1312   li(A2, (long)cur_pc);
  1313   push(S2);
  1314   move(AT, -(StackAlignmentInBytes));
  1315   move(S2, SP);     // use S2 as a sender SP holder
  1316   andr(SP, SP, AT); // align stack as required by ABI
  1317   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
  1318   delayed()->nop();
  1319   move(SP, S2);     // use S2 as a sender SP holder
  1320   pop(S2);
  1321   NOT_LP64(pop(FP);)
  1322   popad();
  1324 /*
  1325   pushad();
  1326 #ifdef _LP64
  1327   if (reg == SP)
  1328     addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
  1329   else
  1330     move(A0, reg);
  1331   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1332   delayed()->nop();
  1333 #else 
  1334   push(FP);
  1335   move(A0, reg);
  1336   dsrl32(A1, reg, 0);
  1337   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
  1338   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
  1339   delayed()->nop();
  1340   pop(FP);
  1341 #endif
  1342   popad();
  1343   pushad();
  1344   NOT_LP64(push(FP);)
  1345   char b[50];
  1346   sprintf((char *)b, " pc: %p\n",cur_pc);
  1347   li(A0, (long)(char *)b);
  1348   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1349   delayed()->nop();
  1350   NOT_LP64(pop(FP);)
  1351   popad();
  1352 */
  1355 void MacroAssembler::print_reg(FloatRegister reg) {
  1356   void * cur_pc = pc();
  1357   pushad();
  1358   NOT_LP64(push(FP);)
  1359   li(A0, (long)reg->name());
  1360   push(S2);
  1361   move(AT, -(StackAlignmentInBytes));
  1362   move(S2, SP);     // use S2 as a sender SP holder
  1363   andr(SP, SP, AT); // align stack as required by ABI
  1364   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1365   delayed()->nop();
  1366   move(SP, S2);     // use S2 as a sender SP holder
  1367   pop(S2);
  1368   NOT_LP64(pop(FP);)
  1369   popad();
  1371   pushad();
  1372   NOT_LP64(push(FP);)
  1373 #if 1
  1374   move(FP, SP);
  1375   move(AT, -(StackAlignmentInBytes));	
  1376   andr(SP , SP , AT);
  1377   mov_d(F12, reg);
  1378   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
  1379   delayed()->nop();
  1380   move(SP, FP);
  1381 #else
  1382   mov_s(F12, reg);
  1383   //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
  1384   //delayed()->nop();
  1385 #endif
  1386   NOT_LP64(pop(FP);)
  1387   popad();
  1389 #if 0
  1390   pushad();
  1391   NOT_LP64(push(FP);)
  1392   char* b = new char[50];
  1393   sprintf(b, " pc: %p\n", cur_pc);
  1394   li(A0, (long)b);
  1395   call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
  1396   delayed()->nop();
  1397   NOT_LP64(pop(FP);)
  1398   popad();
  1399 #endif
  1402 void MacroAssembler::increment(Register reg, int imm) {
  1403   if (!imm) return;
  1404   if (is_simm16(imm)) {
  1405 #ifdef _LP64
  1406     daddiu(reg, reg, imm);
  1407 #else
  1408     addiu(reg, reg, imm);
  1409 #endif
  1410   } else {
  1411     move(AT, imm);
  1412 #ifdef _LP64
  1413     daddu(reg, reg, AT);
  1414 #else
  1415     addu(reg, reg, AT);
  1416 #endif
  1420 void MacroAssembler::decrement(Register reg, int imm) {
  1421 	increment(reg, -imm);
  1425 void MacroAssembler::call_VM(Register oop_result,
  1426                              address entry_point,
  1427                              bool check_exceptions) {
  1428   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  1431 void MacroAssembler::call_VM(Register oop_result,
  1432                              address entry_point,
  1433                              Register arg_1,
  1434                              bool check_exceptions) {
  1435   if (arg_1!=A1) move(A1, arg_1);
  1436   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  1439 void MacroAssembler::call_VM(Register oop_result,
  1440                              address entry_point,
  1441                              Register arg_1,
  1442                              Register arg_2,
  1443                              bool check_exceptions) {
  1444   if (arg_1!=A1) move(A1, arg_1);
  1445   if (arg_2!=A2) move(A2, arg_2); 
  1446   assert(arg_2 != A1, "smashed argument");
  1447   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  1450 void MacroAssembler::call_VM(Register oop_result,
  1451                              address entry_point,
  1452                              Register arg_1,
  1453                              Register arg_2,
  1454                              Register arg_3,
  1455                              bool check_exceptions) {
  1456   if (arg_1!=A1) move(A1, arg_1);
  1457   if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1458   if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1459   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  1462 void MacroAssembler::call_VM(Register oop_result,
  1463                              Register last_java_sp,
  1464                              address entry_point,
  1465                              int number_of_arguments,
  1466                              bool check_exceptions) {
  1467   call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  1470 void MacroAssembler::call_VM(Register oop_result,
  1471                              Register last_java_sp,
  1472                              address entry_point,
  1473                              Register arg_1,
  1474                              bool check_exceptions) {
  1475   if (arg_1 != A1) move(A1, arg_1);
  1476   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  1479 void MacroAssembler::call_VM(Register oop_result,
  1480                              Register last_java_sp,
  1481                              address entry_point,
  1482                              Register arg_1,
  1483                              Register arg_2,
  1484                              bool check_exceptions) {
  1485   if (arg_1 != A1) move(A1, arg_1);
  1486   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1487   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  1490 void MacroAssembler::call_VM(Register oop_result,
  1491                              Register last_java_sp,
  1492                              address entry_point,
  1493                              Register arg_1,
  1494                              Register arg_2,
  1495                              Register arg_3,
  1496                              bool check_exceptions) {
  1497   if (arg_1 != A1) move(A1, arg_1);
  1498   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
  1499   if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
  1500   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  1503 void MacroAssembler::call_VM_base(Register oop_result,
  1504                                   Register java_thread,
  1505                                   Register last_java_sp,
  1506                                   address  entry_point,
  1507                                   int      number_of_arguments,
  1508 				  bool     check_exceptions) {
  1510   address before_call_pc;
  1511   // determine java_thread register
  1512   if (!java_thread->is_valid()) {
  1513 #ifndef OPT_THREAD
  1514     java_thread = T2;
  1515     get_thread(java_thread);
  1516 #else
  1517     java_thread = TREG;
  1518 #endif
  1520   // determine last_java_sp register
  1521   if (!last_java_sp->is_valid()) {
  1522     last_java_sp = SP;
  1524   // debugging support
  1525   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  1526   assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
  1527   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  1528   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  1530   assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
  1532   // set last Java frame before call
  1533   before_call_pc = (address)pc();
  1534   set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
  1536   // do the call
  1537   move(A0, java_thread);
  1538   call(entry_point, relocInfo::runtime_call_type);
  1539   delayed()->nop();
  1541   // restore the thread (cannot use the pushed argument since arguments
  1542   // may be overwritten by C code generated by an optimizing compiler);
  1543   // however can use the register value directly if it is callee saved.
  1544 #ifndef OPT_THREAD
  1545   if (java_thread >=S0 && java_thread <=S7) {
  1546 #ifdef ASSERT
  1547     { Label L;
  1548       get_thread(AT);
  1549       beq(java_thread, AT, L);
  1550       delayed()->nop();
  1551       stop("MacroAssembler::call_VM_base: edi not callee saved?");
  1552       bind(L);
  1554 #endif
  1555   } else {
  1556     get_thread(java_thread);
  1558 #endif
  1560   // discard thread and arguments
  1561   ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); 
  1562   // reset last Java frame
  1563   reset_last_Java_frame(java_thread, false, true);
  1565   check_and_handle_popframe(java_thread);
  1566   check_and_handle_earlyret(java_thread);
  1567   if (check_exceptions) {
  1568     // check for pending exceptions (java_thread is set upon return)
  1569     Label L;
  1570 #ifdef _LP64
  1571     ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1572 #else
  1573     lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
  1574 #endif
  1575     beq(AT, R0, L);
  1576     delayed()->nop();
  1577     li(AT, before_call_pc);
  1578     push(AT);
  1579     jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1580     delayed()->nop();
  1581     bind(L);
  1584   // get oop result if there is one and reset the value in the thread
  1585   if (oop_result->is_valid()) {
  1586 #ifdef _LP64
  1587     ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1588     sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1589 #else
  1590     lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1591     sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
  1592 #endif
  1593     verify_oop(oop_result);
  1597 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  1599   move(V0, SP);
  1600   //we also reserve space for java_thread here
  1601 #ifndef _LP64
  1602   daddi(SP, SP, (1 + number_of_arguments) * (- wordSize));
  1603 #endif
  1604   move(AT, -(StackAlignmentInBytes));
  1605   andr(SP, SP, AT);
  1606   call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
  1610 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  1611   call_VM_leaf_base(entry_point, number_of_arguments);
  1614 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  1615   if (arg_0 != A0) move(A0, arg_0);
  1616   call_VM_leaf(entry_point, 1);
  1619 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  1620   if (arg_0 != A0) move(A0, arg_0);
  1621   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1622   call_VM_leaf(entry_point, 2);
  1625 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  1626   if (arg_0 != A0) move(A0, arg_0);
  1627   if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
  1628   if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
  1629   call_VM_leaf(entry_point, 3);
  1631 void MacroAssembler::super_call_VM_leaf(address entry_point) {
  1632 	MacroAssembler::call_VM_leaf_base(entry_point, 0);
  1636 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1637                                                    Register arg_1) {
  1638   if (arg_1 != A0) move(A0, arg_1);
  1639   MacroAssembler::call_VM_leaf_base(entry_point, 1);
  1643 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1644                                                    Register arg_1,
  1645                                                    Register arg_2) {
  1646   if (arg_1 != A0) move(A0, arg_1);
  1647   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1648   MacroAssembler::call_VM_leaf_base(entry_point, 2);
  1650 void MacroAssembler::super_call_VM_leaf(address entry_point,
  1651                                                    Register arg_1,
  1652                                                    Register arg_2,
  1653                                                    Register arg_3) {
  1654   if (arg_1 != A0) move(A0, arg_1);
  1655   if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
  1656   if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
  1657   MacroAssembler::call_VM_leaf_base(entry_point, 3);
  1660 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  1663 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  1666 void MacroAssembler::null_check(Register reg, int offset) {
  1667   if (needs_explicit_null_check(offset)) {
  1668     // provoke OS NULL exception if reg = NULL by
  1669     // accessing M[reg] w/o changing any (non-CC) registers
  1670     // NOTE: cmpl is plenty here to provoke a segv
  1671     lw(AT, reg, 0);
  1672 /* Jin
  1673     nop();	
  1674     nop();
  1675     nop();
  1676 */
  1677     // Note: should probably use testl(rax, Address(reg, 0));
  1678     //       may be shorter code (however, this version of
  1679     //       testl needs to be implemented first)
  1680   } else {
  1681     // nothing to do, (later) access of M[reg + offset]
  1682     // will provoke OS NULL exception if reg = NULL
  1686 void MacroAssembler::enter() {
  1687   push2(RA, FP);
  1688   move(FP, SP);
  1691 void MacroAssembler::leave() {
  1692 #ifndef _LP64
  1693   //move(SP, FP);
  1694   //pop2(FP, RA);
  1695   addi(SP, FP, 2 * wordSize);
  1696   lw(RA, SP, - 1 * wordSize);
  1697   lw(FP, SP, - 2 * wordSize);
  1698 #else
  1699   daddi(SP, FP, 2 * wordSize);
  1700   ld(RA, SP, - 1 * wordSize);
  1701   ld(FP, SP, - 2 * wordSize);
  1702 #endif
  1704 /*
  1705 void MacroAssembler::os_breakpoint() {
  1706   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  1707   // (e.g., MSVC can't call ps() otherwise)
  1708   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  1710 */
  1711 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  1712   // determine java_thread register
  1713   if (!java_thread->is_valid()) {
  1714 #ifndef OPT_THREAD
  1715     java_thread = T1;
  1716     get_thread(java_thread);
  1717 #else
  1718     java_thread = TREG;
  1719 #endif
  1721   // we must set sp to zero to clear frame
  1722   st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1723   // must clear fp, so that compiled frames are not confused; it is possible
  1724   // that we need it only for debugging
  1725   if(clear_fp)	
  1726     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1728   if (clear_pc)
  1729     st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1732 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  1733                                            bool clear_pc) {
  1734   Register thread = TREG;
  1735 #ifndef OPT_THREAD
  1736   get_thread(thread);
  1737 #endif
  1738   // we must set sp to zero to clear frame
  1739   sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
  1740   // must clear fp, so that compiled frames are not confused; it is
  1741   // possible that we need it only for debugging
  1742   if (clear_fp) {
  1743     sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
  1746   if (clear_pc) {
  1747     sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
  1751 // Write serialization page so VM thread can do a pseudo remote membar.
  1752 // We use the current thread pointer to calculate a thread specific
  1753 // offset to write to within the page. This minimizes bus traffic
  1754 // due to cache line collision.
  1755 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  1756   move(tmp, thread);
  1757   srl(tmp, tmp,os::get_serialize_page_shift_count());
  1758   move(AT, (os::vm_page_size() - sizeof(int))); 
  1759   andr(tmp, tmp,AT);
  1760   sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page()));
  1763 // Calls to C land
  1764 //
  1765 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  1766 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  1767 // has to be reset to 0. This is required to allow proper stack traversal.
  1768 void MacroAssembler::set_last_Java_frame(Register java_thread,
  1769                                          Register last_java_sp,
  1770                                          Register last_java_fp,
  1771                                          address  last_java_pc) {
  1772   // determine java_thread register
  1773   if (!java_thread->is_valid()) {
  1774 #ifndef OPT_THREAD
  1775     java_thread = T2;
  1776     get_thread(java_thread);
  1777 #else
  1778     java_thread = TREG;
  1779 #endif
  1781   // determine last_java_sp register
  1782   if (!last_java_sp->is_valid()) {
  1783     last_java_sp = SP;
  1786   // last_java_fp is optional
  1788   if (last_java_fp->is_valid()) {
  1789     st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
  1792   // last_java_pc is optional
  1794   if (last_java_pc != NULL) {
  1795     relocate(relocInfo::internal_pc_type);
  1796     patchable_set48(AT, (long)last_java_pc);
  1797     st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
  1799   st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
  1802 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  1803                                          Register last_java_fp,
  1804                                          address  last_java_pc) {
  1805   // determine last_java_sp register
  1806   if (!last_java_sp->is_valid()) {
  1807     last_java_sp = SP; 
  1810   Register thread = TREG;
  1811 #ifndef OPT_THREAD
  1812   get_thread(thread);
  1813 #endif
  1814   // last_java_fp is optional
  1815   if (last_java_fp->is_valid()) {
  1816     sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
  1819   // last_java_pc is optional
  1820   if (last_java_pc != NULL) {
  1821     Address java_pc(thread,
  1822                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  1823     li(AT, (intptr_t)(last_java_pc));
  1824     sd(AT, java_pc);
  1827   sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
  1830 //////////////////////////////////////////////////////////////////////////////////
  1831 #ifndef SERIALGC
  1833 void MacroAssembler::g1_write_barrier_pre(Register obj,
  1834 #ifndef _LP64
  1835                                           Register thread,
  1836 #endif
  1837                                           Register tmp,
  1838                                           Register tmp2,
  1839                                           bool tosca_live) {
  1840 /*  LP64_ONLY(Register thread = r15_thread;)
  1841   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1842                                        PtrQueue::byte_offset_of_active()));
  1844   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1845                                        PtrQueue::byte_offset_of_index()));
  1846   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  1847                                        PtrQueue::byte_offset_of_buf()));
  1850   Label done;
  1851   Label runtime;
  1853   // if (!marking_in_progress) goto done;
  1854   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  1855     cmpl(in_progress, 0);
  1856   } else {
  1857     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  1858     cmpb(in_progress, 0);
  1860   jcc(Assembler::equal, done);
  1862   // if (x.f == NULL) goto done;
  1863   cmpptr(Address(obj, 0), NULL_WORD);
  1864   jcc(Assembler::equal, done);
  1866   // Can we store original value in the thread's buffer?
  1868   LP64_ONLY(movslq(tmp, index);)
  1869   movptr(tmp2, Address(obj, 0));
  1870 #ifdef _LP64
  1871   cmpq(tmp, 0);
  1872 #else
  1873   cmpl(index, 0);
  1874 #endif
  1875   jcc(Assembler::equal, runtime);
  1876 #ifdef _LP64
  1877   subq(tmp, wordSize);
  1878   movl(index, tmp);
  1879   addq(tmp, buffer);
  1880 #else
  1881   subl(index, wordSize);
  1882   movl(tmp, buffer);
  1883   addl(tmp, index);
  1884 #endif
  1885   movptr(Address(tmp, 0), tmp2);
  1886   jmp(done);
  1887   bind(runtime);
  1888   // save the live input values
  1889   if(tosca_live) push(rax);
  1890   push(obj);
  1891 #ifdef _LP64
  1892   movq(c_rarg0, Address(obj, 0));
  1893   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
  1894 #else
  1895   push(thread);
  1896   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
  1897   pop(thread);
  1898 #endif
  1899   pop(obj);
  1900   if(tosca_live) pop(rax);
  1901   bind(done);
  1902 */
  1905 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  1906                                            Register new_val,
  1907 #ifndef _LP64
  1908                                            Register thread,
  1909 #endif
  1910                                            Register tmp,
  1911                                            Register tmp2) {
  1913   /*LP64_ONLY(Register thread = r15_thread;)
  1914   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1915                                        PtrQueue::byte_offset_of_index()));
  1916   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  1917                                        PtrQueue::byte_offset_of_buf()));
  1918   BarrierSet* bs = Universe::heap()->barrier_set();
  1919   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  1920   Label done;
  1921   Label runtime;
  1923   // Does store cross heap regions?
  1925   movptr(tmp, store_addr);
  1926   xorptr(tmp, new_val);
  1927   shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
  1928   jcc(Assembler::equal, done);
  1930   // crosses regions, storing NULL?
  1932   cmpptr(new_val, (int32_t) NULL_WORD);
  1933   jcc(Assembler::equal, done);
  1935   // storing region crossing non-NULL, is card already dirty?
  1937   ExternalAddress cardtable((address) ct->byte_map_base);
  1938   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  1939 #ifdef _LP64
  1940   const Register card_addr = tmp;
  1942   movq(card_addr, store_addr);
  1943   shrq(card_addr, CardTableModRefBS::card_shift);
  1945   lea(tmp2, cardtable);
  1947   // get the address of the card
  1948   addq(card_addr, tmp2);
  1949 #else
  1950   const Register card_index = tmp;
  1952   movl(card_index, store_addr);
  1953   shrl(card_index, CardTableModRefBS::card_shift);
  1955   Address index(noreg, card_index, Address::times_1);
  1956   const Register card_addr = tmp;
  1957   lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
  1958 #endif
  1959   cmpb(Address(card_addr, 0), 0);
  1960   jcc(Assembler::equal, done);
  1962   // storing a region crossing, non-NULL oop, card is clean.
  1963   // dirty card and log.
  1965   movb(Address(card_addr, 0), 0);
  1967   cmpl(queue_index, 0);
  1968   jcc(Assembler::equal, runtime);
  1969   subl(queue_index, wordSize);
  1970   movptr(tmp2, buffer);
  1971 #ifdef _LP64
  1972   movslq(rscratch1, queue_index);
  1973   addq(tmp2, rscratch1);
  1974   movq(Address(tmp2, 0), card_addr);
  1975 #else
  1976   addl(tmp2, queue_index);
  1977   movl(Address(tmp2, 0), card_index);
  1978 #endif
  1979   jmp(done);
  1981   bind(runtime);
  1982   // save the live input values
  1983   push(store_addr);
  1984   push(new_val);
  1985 #ifdef _LP64
  1986   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
  1987 #else
  1988   push(thread);
  1989   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  1990   pop(thread);
  1991 #endif
  1992   pop(new_val);
  1993   pop(store_addr);
  1995   bind(done);
  1996 */
  1999 #endif // SERIALGC
  2000 //////////////////////////////////////////////////////////////////////////////////
  2003 void MacroAssembler::store_check(Register obj) {
  2004   // Does a store check for the oop in register obj. The content of
  2005   // register obj is destroyed afterwards.
  2006   store_check_part_1(obj);
  2007   store_check_part_2(obj);
  2010 void MacroAssembler::store_check(Register obj, Address dst) {
  2011   store_check(obj);
  2015 // split the store check operation so that other instructions can be scheduled inbetween
  2016 void MacroAssembler::store_check_part_1(Register obj) {
  2017   BarrierSet* bs = Universe::heap()->barrier_set();
  2018   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  2019 #ifdef _LP64
  2020   dsrl(obj, obj, CardTableModRefBS::card_shift);
  2021 #else
  2022   shr(obj, CardTableModRefBS::card_shift);
  2023 #endif
  2026 void MacroAssembler::store_check_part_2(Register obj) {
  2027   BarrierSet* bs = Universe::heap()->barrier_set();
  2028   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  2029   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  2030   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  2032   li(AT, (long)ct->byte_map_base);
  2033 #ifdef _LP64
  2034   dadd(AT, AT, obj);
  2035 #else
  2036   add(AT, AT, obj);
  2037 #endif
  2038   sb(R0, AT, 0);
  2039   sync();
  2042 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  2043 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  2044                                    Register t1, Register t2, Label& slow_case) {
  2045   assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
  2047   Register end = t2;
  2048 #ifndef OPT_THREAD
  2049   Register thread = t1;
  2050   get_thread(thread);
  2051 #else
  2052   Register thread = TREG;
  2053 #endif
  2054   verify_tlab(t1, t2);//blows t1&t2
  2056   ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
  2058   if (var_size_in_bytes == NOREG) {
  2059     // i dont think we need move con_size_in_bytes to a register first.
  2060     // by yjl 8/17/2005
  2061     assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  2062     addi(end, obj, con_size_in_bytes);
  2063   } else {
  2064     add(end, obj, var_size_in_bytes);
  2067   ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
  2068   sltu(AT, AT, end);
  2069   bne_far(AT, R0, slow_case);
  2070   delayed()->nop();
  2073   // update the tlab top pointer
  2074   st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
  2076   // recover var_size_in_bytes if necessary
  2077   /*if (var_size_in_bytes == end) {
  2078     sub(var_size_in_bytes, end, obj);
  2079     }*/
  2081   verify_tlab(t1, t2);
  2084 // Defines obj, preserves var_size_in_bytes
  2085 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
  2086 		Register t1, Register t2, Label& slow_case) {
  2087   assert_different_registers(obj, var_size_in_bytes, t1, AT);
  2088   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  2089     // No allocation in the shared eden.
  2090     b_far(slow_case);
  2091     delayed()->nop();
  2092   } else {
  2094 #ifndef _LP64
  2095     Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr()));
  2096     lui(t1, split_high((intptr_t)Universe::heap()->top_addr()));
  2097 #else
  2098     Address heap_top(t1);
  2099     li(t1, (long)Universe::heap()->top_addr());
  2100 #endif
  2101     ld_ptr(obj, heap_top);
  2103     Register end = t2;
  2104     Label retry;
  2106     bind(retry);
  2107     if (var_size_in_bytes == NOREG) {
  2108     // i dont think we need move con_size_in_bytes to a register first.
  2109     // by yjl 8/17/2005
  2110       assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
  2111       addi(end, obj, con_size_in_bytes);
  2112     } else {
  2113       add(end, obj, var_size_in_bytes);
  2115     // if end < obj then we wrapped around => object too long => slow case
  2116     sltu(AT, end, obj);
  2117     bne_far(AT, R0, slow_case);
  2118     delayed()->nop();
  2120     //lui(AT, split_high((int)Universe::heap()->end_addr()));
  2121     //lw(AT, AT, split_low((int)Universe::heap()->end_addr()));
  2122     li(AT, (long)Universe::heap()->end_addr());
  2123     sltu(AT, AT, end);
  2124     bne_far(AT, R0, slow_case);
  2125     delayed()->nop();
  2126     // Compare obj with the top addr, and if still equal, store the new top addr in
  2127     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  2128     // it otherwise. Use lock prefix for atomicity on MPs.
  2129     if (os::is_MP()) {
  2130     	///lock();
  2133     // if someone beat us on the allocation, try again, otherwise continue
  2134     cmpxchg(end, heap_top, obj);
  2135     beq_far(AT, R0, retry);    //by yyq
  2136     delayed()->nop();
  2141 // C2 doesn't invoke this one.
  2142 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
  2143 	Register top = T0;
  2144 	Register t1  = T1;
  2145 /* Jin: tlab_refill() is called in 
  2147      [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id);
  2149   In generate_code_for(), T2 has been assigned as a register(length), which is used
  2150  after calling tlab_refill();
  2151   Therefore, tlab_refill() should not use T2.
  2153  Source:
  2155 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException
  2156         at java.lang.System.arraycopy(Native Method)
  2157         at java.util.Arrays.copyOf(Arrays.java:2799)	<-- alloc_array
  2158         at sun.misc.Resource.getBytes(Resource.java:117)
  2159         at java.net.URLClassLoader.defineClass(URLClassLoader.java:273)
  2160         at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
  2161         at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
  2162  */
  2163 	Register t2  = T9;
  2164 	Register t3  = T3;
  2165 	Register thread_reg = T8;
  2166 	Label do_refill, discard_tlab;
  2167 	if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
  2168 		// No allocation in the shared eden.
  2169 		b(slow_case);
  2170 		delayed()->nop();
  2173 	get_thread(thread_reg);
  2175 	ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  2176 	ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  2178 	// calculate amount of free space
  2179 	sub(t1, t1, top);
  2180 	shr(t1, LogHeapWordSize);
  2182 	// Retain tlab and allocate object in shared space if
  2183 	// the amount free in the tlab is too large to discard.
  2184 	ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  2185 	slt(AT, t2, t1);
  2186 	beq(AT, R0, discard_tlab);
  2187 	delayed()->nop();
  2189 	// Retain
  2191 #ifndef _LP64
  2192 	move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  2193 #else
  2194 	li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  2195 #endif
  2196 	add(t2, t2, AT);
  2197 	st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
  2199 	if (TLABStats) {
  2200 		// increment number of slow_allocations
  2201 		lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  2202 		addiu(AT, AT, 1);
  2203 		sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
  2205 	b(try_eden);
  2206 	delayed()->nop();
  2208   bind(discard_tlab);
  2209 	if (TLABStats) {
  2210 		// increment number of refills
  2211 		lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  2212 		addi(AT, AT, 1);
  2213 		sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
  2214 		// accumulate wastage -- t1 is amount free in tlab
  2215 		lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  2216 		add(AT, AT, t1);
  2217 		sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
  2220 	// if tlab is currently allocated (top or end != null) then
  2221 	// fill [top, end + alignment_reserve) with array object
  2222 	beq(top, R0, do_refill);
  2223 	delayed()->nop();
  2225 	// set up the mark word
  2226 	li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
  2227 	st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
  2229 	// set the length to the remaining space
  2230 	addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
  2231 	addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
  2232 	shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  2233 	sw(t1, top, arrayOopDesc::length_offset_in_bytes());
  2235 	// set klass to intArrayKlass
  2236 #ifndef _LP64
  2237 	lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr()));
  2238 	lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr()));
  2239 #else
  2240 	li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
  2241 	ld_ptr(t1, AT, 0);
  2242 #endif
  2243 	//st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
  2244 	store_klass(top, t1);
  2246 	// refill the tlab with an eden allocation
  2247 	bind(do_refill);
  2248 	ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  2249 	shl(t1, LogHeapWordSize);
  2250 	// add object_size ??
  2251 	eden_allocate(top, t1, 0, t2, t3, slow_case);
  2253 	// Check that t1 was preserved in eden_allocate.
  2254 #ifdef ASSERT
  2255 	if (UseTLAB) {
  2256 		Label ok;
  2257 		assert_different_registers(thread_reg, t1);
  2258 		ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
  2259 		shl(AT, LogHeapWordSize);
  2260 		beq(AT, t1, ok);
  2261 		delayed()->nop();
  2262 		stop("assert(t1 != tlab size)");
  2263 		should_not_reach_here();
  2265 		bind(ok);
  2267 #endif
  2268 	st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
  2269 	st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
  2270 	add(top, top, t1);	
  2271 	addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  2272 	st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
  2273 	verify_tlab(t1, t2);
  2274 	b(retry);
  2275 	delayed()->nop();
  2278 static const double     pi_4 =  0.7853981633974483;
  2280 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
  2281 // must get argument(a double) in F12/F13
  2282 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
  2283 //We need to preseve the register which maybe modified during the Call @Jerome
  2284 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  2285 //save all modified register here
  2286 //	if (preserve_cpu_regs) {
  2287 //	}
  2288 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9 
  2289 	pushad();
  2290 //we should preserve the stack space before we call
  2291 	addi(SP, SP, -wordSize * 2);
  2292         switch (trig){
  2293 		case 's' :
  2294               		call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
  2295 			delayed()->nop();
  2296 			break;
  2297 		case 'c':	
  2298 			call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
  2299 			delayed()->nop();
  2300 			break;
  2301 		case 't':
  2302 			call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
  2303 			delayed()->nop();
  2304 			break;
  2305 		default:assert (false, "bad intrinsic");
  2306 		break;
  2310 	addi(SP, SP, wordSize * 2);
  2311 	popad();
  2312 //	if (preserve_cpu_regs) {
  2313 //	}
  2316 #ifdef _LP64
  2317 void MacroAssembler::li(Register rd, long imm) {
  2318   if (imm <= max_jint && imm >= min_jint) {
  2319     li32(rd, (int)imm);
  2320   } else if (julong(imm) <= 0xFFFFFFFF) {
  2321     assert_not_delayed();
  2322     // lui sign-extends, so we can't use that.
  2323     ori(rd, R0, julong(imm) >> 16);
  2324     dsll(rd, rd, 16);
  2325     ori(rd, rd, split_low(imm));
  2326   //aoqi_test
  2327   //} else if ((imm > 0) && ((imm >> 48) == 0)) {
  2328   } else if ((imm > 0) && is_simm16(imm >> 32)) {
  2329     /* A 48-bit address */
  2330     li48(rd, imm);
  2331   } else {
  2332     li64(rd, imm);
  2335 #else
  2336 void MacroAssembler::li(Register rd, long imm) {
  2337   li32(rd, (int)imm);
  2339 #endif
  2341 void MacroAssembler::li32(Register reg, int imm) {
  2342   if (is_simm16(imm)) {
  2343     /* Jin: for imm < 0, we should use addi instead of addiu.
  2345      *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
  2347      *  78 move [int:-1|I] [a0|I]
  2348      *    : daddi a0, zero, 0xffffffff  (correct)
  2349      *    : daddiu a0, zero, 0xffffffff (incorrect)
  2350      */
  2351     if (imm >= 0)
  2352       addiu(reg, R0, imm);
  2353     else
  2354       addi(reg, R0, imm);
  2355   } else {
  2356     lui(reg, split_low(imm >> 16));
  2357     if (split_low(imm))
  2358       ori(reg, reg, split_low(imm));
  2362 #ifdef _LP64
  2363 void MacroAssembler::set64(Register d, jlong value) {
  2364   assert_not_delayed();
  2366   int hi = (int)(value >> 32);
  2367   int lo = (int)(value & ~0);
  2369   if (value == lo) {  // 32-bit integer
  2370     if (is_simm16(value)) {
  2371       daddiu(d, R0, value);
  2372     } else {
  2373       lui(d, split_low(value >> 16));
  2374       if (split_low(value)) {
  2375         ori(d, d, split_low(value));
  2378   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2379       ori(d, R0, julong(value) >> 16);
  2380       dsll(d, d, 16);
  2381       if (split_low(value)) {
  2382         ori(d, d, split_low(value));
  2384   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2385     // 4 insts
  2386     li48(d, value);
  2387   } else {  // li64
  2388     // 6 insts
  2389     li64(d, value);
  2394 int MacroAssembler::insts_for_set64(jlong value) {
  2395   int hi = (int)(value >> 32);
  2396   int lo = (int)(value & ~0);
  2398   int count = 0;
  2400   if (value == lo) {  // 32-bit integer
  2401     if (is_simm16(value)) {
  2402       //daddiu(d, R0, value);
  2403       count++;
  2404     } else {
  2405       //lui(d, split_low(value >> 16));
  2406       count++;
  2407       if (split_low(value)) {
  2408         //ori(d, d, split_low(value));
  2409         count++;
  2412   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2413       //ori(d, R0, julong(value) >> 16);
  2414       //dsll(d, d, 16);
  2415       count += 2;
  2416       if (split_low(value)) {
  2417         //ori(d, d, split_low(value));
  2418         count++;
  2420   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2421     // 4 insts
  2422     //li48(d, value);
  2423     count += 4;
  2424   } else {  // li64
  2425     // 6 insts
  2426     //li64(d, value);
  2427     count += 6;
  2430   return count; 
  2433 void MacroAssembler::patchable_set48(Register d, jlong value) {
  2434   assert_not_delayed();
  2436   int hi = (int)(value >> 32);
  2437   int lo = (int)(value & ~0);
  2439   int count = 0;
  2441   if (value == lo) {  // 32-bit integer
  2442     if (is_simm16(value)) {
  2443       daddiu(d, R0, value);
  2444       count += 1;
  2445     } else {
  2446       lui(d, split_low(value >> 16));
  2447       count += 1;
  2448       if (split_low(value)) {
  2449         ori(d, d, split_low(value));
  2450         count += 1;
  2453   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2454       ori(d, R0, julong(value) >> 16);
  2455       dsll(d, d, 16);
  2456       count += 2;
  2457       if (split_low(value)) {
  2458         ori(d, d, split_low(value));
  2459         count += 1;
  2461   } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
  2462     // 4 insts
  2463     li48(d, value);
  2464     count += 4;
  2465   } else {  // li64
  2466     tty->print_cr("value = 0x%x", value);
  2467     guarantee(false, "Not supported yet !");
  2470   for (count; count < 4; count++) {
  2471     nop();
  2475 void MacroAssembler::patchable_set32(Register d, jlong value) {
  2476   assert_not_delayed();
  2478   int hi = (int)(value >> 32);
  2479   int lo = (int)(value & ~0);
  2481   int count = 0;
  2483   if (value == lo) {  // 32-bit integer
  2484     if (is_simm16(value)) {
  2485       daddiu(d, R0, value);
  2486       count += 1;
  2487     } else {
  2488       lui(d, split_low(value >> 16));
  2489       count += 1;
  2490       if (split_low(value)) {
  2491         ori(d, d, split_low(value));
  2492         count += 1;
  2495   } else if (hi == 0) {  // hardware zero-extends to upper 32
  2496       ori(d, R0, julong(value) >> 16);
  2497       dsll(d, d, 16);
  2498       count += 2;
  2499       if (split_low(value)) {
  2500         ori(d, d, split_low(value));
  2501         count += 1;
  2503   } else {
  2504     tty->print_cr("value = 0x%x", value);
  2505     guarantee(false, "Not supported yet !");
  2508   for (count; count < 3; count++) {
  2509     nop();
  2513 void MacroAssembler::patchable_call32(Register d, jlong value) {
  2514   assert_not_delayed();
  2516   int hi = (int)(value >> 32);
  2517   int lo = (int)(value & ~0);
  2519   int count = 0;
  2521   if (value == lo) {  // 32-bit integer
  2522     if (is_simm16(value)) {
  2523       daddiu(d, R0, value);
  2524       count += 1;
  2525     } else {
  2526       lui(d, split_low(value >> 16));
  2527       count += 1;
  2528       if (split_low(value)) {
  2529         ori(d, d, split_low(value));
  2530         count += 1;
  2533   } else {
  2534     tty->print_cr("value = 0x%x", value);
  2535     guarantee(false, "Not supported yet !");
  2538   for (count; count < 2; count++) {
  2539     nop();
  2543 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  2544   assert(UseCompressedClassPointers, "should only be used for compressed header");
  2545   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2547   int klass_index = oop_recorder()->find_index(k);
  2548   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2549   long narrowKlass = (long)Klass::encode_klass(k);
  2551   relocate(rspec, Assembler::narrow_oop_operand);
  2552   patchable_set48(dst, narrowKlass);
  2556 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  2557   assert(UseCompressedOops, "should only be used for compressed header");
  2558   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2560   int oop_index = oop_recorder()->find_index(obj);
  2561   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2563   relocate(rspec, Assembler::narrow_oop_operand);
  2564   patchable_set48(dst, oop_index);
  2567 void MacroAssembler::li64(Register rd, long imm) {
  2568   assert_not_delayed();
  2569   lui(rd, imm >> 48);
  2570   ori(rd, rd, split_low(imm >> 32));
  2571   dsll(rd, rd, 16);
  2572   ori(rd, rd, split_low(imm >> 16));
  2573   dsll(rd, rd, 16);
  2574   ori(rd, rd, split_low(imm));
  2577 void MacroAssembler::li48(Register rd, long imm) {
  2578   assert_not_delayed();
  2579   assert(is_simm16(imm >> 32), "Not a 48-bit address");
  2580   lui(rd, imm >> 32);
  2581   ori(rd, rd, split_low(imm >> 16));
  2582   dsll(rd, rd, 16);
  2583   ori(rd, rd, split_low(imm));
  2585 #endif
  2586 // NOTE: i dont push eax as i486.
  2587 // the x86 save eax for it use eax as the jump register
  2588 void MacroAssembler::verify_oop(Register reg, const char* s) {
  2589   /*
  2590      if (!VerifyOops) return;
  2592   // Pass register number to verify_oop_subroutine
  2593   char* b = new char[strlen(s) + 50];
  2594   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  2595   push(rax);                          // save rax,
  2596   push(reg);                          // pass register argument
  2597   ExternalAddress buffer((address) b);
  2598   // avoid using pushptr, as it modifies scratch registers
  2599   // and our contract is not to modify anything
  2600   movptr(rax, buffer.addr());
  2601   push(rax);
  2602   // call indirectly to solve generation ordering problem
  2603   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  2604   call(rax);
  2605    */
  2606   if (!VerifyOops) return;
  2607   const char * b = NULL; 
  2608   stringStream ss;
  2609   ss.print("verify_oop: %s: %s", reg->name(), s);
  2610   b = code_string(ss.as_string());
  2611 #ifdef _LP64
  2612   pushad();
  2613   move(A1, reg);
  2614   li(A0, (long)b);
  2615   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
  2616   ld(T9, AT, 0);
  2617   jalr(T9);
  2618   delayed()->nop();
  2619   popad();
  2620 #else
  2621   // Pass register number to verify_oop_subroutine
  2622   sw(T0, SP, - wordSize);
  2623   sw(T1, SP, - 2*wordSize);
  2624   sw(RA, SP, - 3*wordSize);
  2625   sw(A0, SP ,- 4*wordSize);	
  2626   sw(A1, SP ,- 5*wordSize);	
  2627   sw(AT, SP ,- 6*wordSize);	
  2628   sw(T9, SP ,- 7*wordSize);	
  2629   addiu(SP, SP, - 7 * wordSize);
  2630   move(A1, reg);
  2631   li(A0, (long)b);
  2632   // call indirectly to solve generation ordering problem
  2633   li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());        	
  2634   lw(T9, AT, 0);
  2635   jalr(T9);
  2636   delayed()->nop();
  2637   lw(T0, SP, 6* wordSize);
  2638   lw(T1, SP, 5* wordSize);
  2639   lw(RA, SP, 4* wordSize);
  2640   lw(A0, SP, 3* wordSize);
  2641   lw(A1, SP, 2* wordSize);
  2642   lw(AT, SP, 1* wordSize);
  2643   lw(T9, SP, 0* wordSize);
  2644   addiu(SP, SP, 7 * wordSize);
  2645 #endif
  2649 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  2650 	if (!VerifyOops) {
  2651 		nop();
  2652 		return;
  2654 	// Pass register number to verify_oop_subroutine
  2655 	const char * b = NULL;
  2656 	stringStream ss;
  2657 	ss.print("verify_oop_addr: %s",  s);
  2658 	b = code_string(ss.as_string());
  2660 	st_ptr(T0, SP, - wordSize);
  2661 	st_ptr(T1, SP, - 2*wordSize);
  2662 	st_ptr(RA, SP, - 3*wordSize);
  2663 	st_ptr(A0, SP, - 4*wordSize);	
  2664 	st_ptr(A1, SP, - 5*wordSize);	
  2665 	st_ptr(AT, SP, - 6*wordSize);	
  2666 	st_ptr(T9, SP, - 7*wordSize);	
  2667 	ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
  2668 	addiu(SP, SP, - 7 * wordSize);
  2670 	li(A0, (long)b);
  2671 	// call indirectly to solve generation ordering problem
  2672 	li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());        	
  2673 	ld_ptr(T9, AT, 0);
  2674 	jalr(T9);
  2675 	delayed()->nop();
  2676 	ld_ptr(T0, SP, 6* wordSize);
  2677 	ld_ptr(T1, SP, 5* wordSize);
  2678 	ld_ptr(RA, SP, 4* wordSize);
  2679 	ld_ptr(A0, SP, 3* wordSize);
  2680 	ld_ptr(A1, SP, 2* wordSize);
  2681 	ld_ptr(AT, SP, 1* wordSize);
  2682 	ld_ptr(T9, SP, 0* wordSize);
  2683 	addiu(SP, SP, 7 * wordSize);
  2686 // used registers :  T0, T1
  2687 void MacroAssembler::verify_oop_subroutine() {
  2688   // RA: ra
  2689   // A0: char* error message    
  2690   // A1: oop   object to verify 
  2692   Label exit, error;
  2693   // increment counter
  2694   li(T0, (long)StubRoutines::verify_oop_count_addr());
  2695   lw(AT, T0, 0);
  2696 #ifdef _LP64
  2697 //FIXME, aoqi: rewrite addi, addu, etc in 64bits mode.
  2698   daddi(AT, AT, 1);
  2699 #else
  2700   addi(AT, AT, 1);
  2701 #endif
  2702   sw(AT, T0, 0);
  2704   // make sure object is 'reasonable'
  2705   beq(A1, R0, exit);         // if obj is NULL it is ok
  2706   delayed()->nop();
  2708   // Check if the oop is in the right area of memory
  2709   //const int oop_mask = Universe::verify_oop_mask();
  2710   //const int oop_bits = Universe::verify_oop_bits();
  2711   const uintptr_t oop_mask = Universe::verify_oop_mask();
  2712   const uintptr_t oop_bits = Universe::verify_oop_bits();
  2713   li(AT, oop_mask);
  2714   andr(T0, A1, AT);
  2715   li(AT, oop_bits);
  2716   bne(T0, AT, error);
  2717   delayed()->nop();
  2719   // make sure klass is 'reasonable'
  2720   //add for compressedoops
  2721   reinit_heapbase();
  2722   //add for compressedoops
  2723   load_klass(T0, A1);
  2724   beq(T0, R0, error);                        // if klass is NULL it is broken
  2725   delayed()->nop();
  2726   #if 0
  2727   //FIXME:wuhui.
  2728   // Check if the klass is in the right area of memory
  2729   //const int klass_mask = Universe::verify_klass_mask();
  2730   //const int klass_bits = Universe::verify_klass_bits();
  2731   const uintptr_t klass_mask = Universe::verify_klass_mask();
  2732   const uintptr_t klass_bits = Universe::verify_klass_bits();
  2734   li(AT, klass_mask);
  2735   andr(T1, T0, AT);
  2736   li(AT, klass_bits);
  2737   bne(T1, AT, error);
  2738   delayed()->nop();
  2739   // make sure klass' klass is 'reasonable'
  2740   //add for compressedoops
  2741   load_klass(T0, T0);
  2742   beq(T0, R0, error);  // if klass' klass is NULL it is broken
  2743   delayed()->nop();
  2745   li(AT, klass_mask);
  2746   andr(T1, T0, AT);
  2747   li(AT, klass_bits);
  2748   bne(T1, AT, error);
  2749   delayed()->nop();     // if klass not in right area of memory it is broken too.
  2750 #endif
  2751   // return if everything seems ok
  2752   bind(exit);
  2754   jr(RA);
  2755   delayed()->nop();
  2757   // handle errors
  2758   bind(error);
  2759   pushad();
  2760 #ifndef _LP64
  2761   addi(SP, SP, (-1) * wordSize);
  2762 #endif
  2763   call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
  2764   delayed()->nop();
  2765 #ifndef _LP64
  2766   addiu(SP, SP, 1 * wordSize);
  2767 #endif
  2768   popad();	
  2769   jr(RA);
  2770   delayed()->nop();
  2773 void MacroAssembler::verify_tlab(Register t1, Register t2) {
  2774 #ifdef ASSERT
  2775   assert_different_registers(t1, t2, AT);
  2776   if (UseTLAB && VerifyOops) {
  2777     Label next, ok;
  2779     get_thread(t1);
  2781     ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
  2782     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
  2783     sltu(AT, t2, AT);
  2784     beq(AT, R0, next);
  2785     delayed()->nop();
  2787     stop("assert(top >= start)");
  2789     bind(next);
  2790     ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
  2791     sltu(AT, AT, t2);
  2792     beq(AT, R0, ok);
  2793     delayed()->nop();
  2795     stop("assert(top <= end)");
  2797     bind(ok);
  2799     /*
  2800        Label next, ok;
  2801        Register t1 = rsi;
  2802        Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
  2804        push(t1);
  2805        NOT_LP64(push(thread_reg));
  2806        NOT_LP64(get_thread(thread_reg));
  2808        movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  2809        cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
  2810        jcc(Assembler::aboveEqual, next);
  2811        stop("assert(top >= start)");
  2812        should_not_reach_here();
  2814        bind(next);
  2815        movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
  2816        cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  2817        jcc(Assembler::aboveEqual, ok);
  2818        stop("assert(top <= end)");
  2819        should_not_reach_here();
  2821        bind(ok);
  2822        NOT_LP64(pop(thread_reg));
  2823        pop(t1);
  2824      */
  2826 #endif
  2828  RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  2829                                                        Register tmp,
  2830                                                        int offset) {
  2831    intptr_t value = *delayed_value_addr;
  2832    if (value != 0)
  2833    return RegisterOrConstant(value + offset);
  2834    AddressLiteral a(delayed_value_addr);
  2835    // load indirectly to solve generation ordering problem
  2836    //movptr(tmp, ExternalAddress((address) delayed_value_addr));
  2837    //ld(tmp, a);
  2838   /* #ifdef ASSERT
  2839    { Label L;
  2840      testptr(tmp, tmp);
  2841      if (WizardMode) {
  2842             jcc(Assembler::notZero, L);
  2843             char* buf = new char[40];
  2844             sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
  2845             STOP(buf);
  2846                       } else {
  2847             jccb(Assembler::notZero, L);
  2848             hlt();
  2850      bind(L);
  2852    #endif*/
  2853    if (offset != 0)
  2854      daddi(tmp,tmp, offset);
  2856    return RegisterOrConstant(tmp);
  2859 void MacroAssembler::hswap(Register reg) {
  2860   //short
  2861   //andi(reg, reg, 0xffff);
  2862   srl(AT, reg, 8);
  2863   sll(reg, reg, 24);
  2864   sra(reg, reg, 16);
  2865   orr(reg, reg, AT);
  2868 void MacroAssembler::huswap(Register reg) {
  2869 #ifdef _LP64
  2870   dsrl(AT, reg, 8);
  2871   dsll(reg, reg, 24);
  2872   dsrl(reg, reg, 16);
  2873   orr(reg, reg, AT);
  2874   andi(reg, reg, 0xffff);
  2875 #else
  2876   //andi(reg, reg, 0xffff);
  2877   srl(AT, reg, 8);
  2878   sll(reg, reg, 24);
  2879   srl(reg, reg, 16);
  2880   orr(reg, reg, AT);
  2881 #endif
  2884 // something funny to do this will only one more register AT
  2885 // 32 bits
  2886 // by yjl 6/29/2005
  2887 void MacroAssembler::swap(Register reg) {
  2888 	srl(AT, reg, 8);
  2889 	sll(reg, reg, 24);
  2890 	orr(reg, reg, AT);
  2891 	//reg : 4 1 2 3
  2892 	srl(AT, AT, 16);
  2893 	xorr(AT, AT, reg);
  2894 	andi(AT, AT, 0xff);
  2895 	//AT : 0 0 0 1^3);
  2896 	xorr(reg, reg, AT);
  2897 	//reg : 4 1 2 1
  2898 	sll(AT, AT, 16);
  2899 	xorr(reg, reg, AT);
  2900 	//reg : 4 3 2 1
  2903 #ifdef _LP64
  2905 /* do 32-bit CAS using MIPS64 lld/scd
  2907   Jin: cas_int should only compare 32-bits of the memory value.
  2908        However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
  2909        To simulate a 32-bit atomic operation, the value loaded with LLD should be split into 
  2910        tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
  2911        plus the high-32 bits or memory value, are stored togethor with SCD.
  2913 Example:
  2915       double d = 3.1415926;
  2916       System.err.println("hello" + d);
  2918   sun.misc.FloatingDecimal$1.<init>()
  2920    `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
  2922   38 cas_int [a7a7|J] [a0|I] [a6|I]   
  2923 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
  2924 // a6: 0x4ab325aa
  2926 again:
  2927    0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
  2929    0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
  2930    0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
  2931    0x00000055647f3c68: dsll32 t8, t8, 0
  2932    0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
  2933    0x00000055647f3c70: sll zero, zero, 0
  2935    0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
  2936    0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
  2937    0x00000055647f3c7c: ori v1, v1, 0xffffffff
  2938    0x00000055647f3c80: and v1, a6, v1 
  2939    0x00000055647f3c84: or at, t8, v1 
  2940    0x00000055647f3c88: scd at, 0x0(a7)
  2941    0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
  2942    0x00000055647f3c90: sll zero, zero, 0
  2943    0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
  2944    0x00000055647f3c98: sll zero, zero, 0
  2945 nequal:
  2946    0x00000055647f45a4: dadd a0, t9, zero
  2947    0x00000055647f45a8: dadd at, zero, zero
  2948 done:
  2949 */
  2951 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
  2952 #if 0
  2953   Label done, again, nequal;
  2954   bind(again);
  2956   sync();
  2957   lld(AT, dest);
  2959   /* T9:  32 bits, sign extended
  2960    * V1: low 32 bits, sign unextended
  2961    * T8: high 32 bits (may be another variables's space)
  2962    */
  2963   sll(T9, AT, 0);	// Use 32-bit sll to extend bit 31
  2964   dsrl32(T8, AT, 0);
  2965   dsll32(T8, T8, 0);
  2967   bne(T9, c_reg, nequal);
  2968   delayed()->nop(); 
  2970   ori(V1, R0, 0xFFFF);
  2971   dsll(V1, V1, 16);
  2972   ori(V1, V1, 0xFFFF);
  2973   andr(V1, x_reg, V1);
  2974   orr(AT, T8, V1);
  2975   scd(AT, dest);
  2976   beq(AT, R0, again);
  2977   delayed()->nop();
  2978   b(done);
  2979   delayed()->nop();
  2981   // not xchged
  2982   bind(nequal);
  2983   move(c_reg, T9);
  2984   move(AT, R0);
  2986   bind(done);
  2987 #else
  2989   /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */
  2990   Label done, again, nequal;
  2992   bind(again);
  2994   if(!Use3A2000) sync();
  2995   ll(AT, dest);
  2996   bne(AT, c_reg, nequal);
  2997   delayed()->nop(); 
  2999   move(AT, x_reg);
  3000   sc(AT, dest);
  3001   beq(AT, R0, again);
  3002   delayed()->nop();
  3003   b(done);
  3004   delayed()->nop();
  3006   // not xchged
  3007   bind(nequal);
  3008   sync();
  3009   move(c_reg, AT);
  3010   move(AT, R0);
  3012   bind(done);
  3013 #endif
  3015 #endif	// cmpxchg32
  3017 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
  3018   Label done, again, nequal;
  3020   bind(again);
  3021 #ifdef _LP64
  3022   if(!Use3A2000) sync();
  3023   lld(AT, dest);
  3024 #else
  3025   if(!Use3A2000) sync();
  3026   ll(AT, dest);
  3027 #endif
  3028   bne(AT, c_reg, nequal);
  3029   delayed()->nop(); 
  3031   move(AT, x_reg);
  3032 #ifdef _LP64
  3033   scd(AT, dest);
  3034 #else
  3035   sc(AT, dest);
  3036 #endif
  3037   beq(AT, R0, again);
  3038   delayed()->nop();
  3039   b(done);
  3040   delayed()->nop();
  3042   // not xchged
  3043   bind(nequal);
  3044   sync();
  3045   move(c_reg, AT);
  3046   move(AT, R0);
  3048   bind(done);
  3051 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
  3052 	Label done, again, nequal;
  3054 	Register x_reg = x_regLo;
  3055 	dsll32(x_regHi, x_regHi, 0);
  3056 	dsll32(x_regLo, x_regLo, 0);
  3057 	dsrl32(x_regLo, x_regLo, 0);
  3058 	orr(x_reg, x_regLo, x_regHi);
  3060 	Register c_reg = c_regLo;
  3061 	dsll32(c_regHi, c_regHi, 0);
  3062 	dsll32(c_regLo, c_regLo, 0);
  3063 	dsrl32(c_regLo, c_regLo, 0);
  3064 	orr(c_reg, c_regLo, c_regHi);
  3066 	bind(again);
  3068         if(!Use3A2000) sync();
  3069 	lld(AT, dest);
  3070 	bne(AT, c_reg, nequal);
  3071 	delayed()->nop(); 
  3073 	//move(AT, x_reg);
  3074 	dadd(AT, x_reg, R0);
  3075 	scd(AT, dest);
  3076 	beq(AT, R0, again);
  3077 	delayed()->nop();
  3078 	b(done);
  3079 	delayed()->nop();
  3081 	// not xchged
  3082 	bind(nequal);
  3083 	sync();
  3084 	//move(c_reg, AT);
  3085 	//move(AT, R0);
  3086 	dadd(c_reg, AT, R0);
  3087 	dadd(AT, R0, R0);
  3088 	bind(done);
  3091 // be sure the three register is different
  3092 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {    
  3093   assert_different_registers(tmp, fs, ft); 
  3094 	div_s(tmp, fs, ft); 
  3095 	trunc_l_s(tmp, tmp); 
  3096 	cvt_s_l(tmp, tmp); 
  3097 	mul_s(tmp, tmp, ft); 
  3098 	sub_s(fd, fs, tmp); 
  3101 // be sure the three register is different
  3102 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {    
  3103 	assert_different_registers(tmp, fs, ft); 
  3104 	div_d(tmp, fs, ft); 
  3105 	trunc_l_d(tmp, tmp); 
  3106 	cvt_d_l(tmp, tmp); 
  3107 	mul_d(tmp, tmp, ft); 
  3108 	sub_d(fd, fs, tmp); 
  3111 // Fast_Lock and Fast_Unlock used by C2
  3113 // Because the transitions from emitted code to the runtime
  3114 // monitorenter/exit helper stubs are so slow it's critical that
  3115 // we inline both the stack-locking fast-path and the inflated fast path.
  3116 //
  3117 // See also: cmpFastLock and cmpFastUnlock.
  3118 //
  3119 // What follows is a specialized inline transliteration of the code
  3120 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
  3121 // another option would be to emit TrySlowEnter and TrySlowExit methods
  3122 // at startup-time.  These methods would accept arguments as
  3123 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  3124 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  3125 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  3126 // In practice, however, the # of lock sites is bounded and is usually small.
  3127 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  3128 // if the processor uses simple bimodal branch predictors keyed by EIP
  3129 // Since the helper routines would be called from multiple synchronization
  3130 // sites.
  3131 //
  3132 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
  3133 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
  3134 // to those specialized methods.  That'd give us a mostly platform-independent
  3135 // implementation that the JITs could optimize and inline at their pleasure.
  3136 // Done correctly, the only time we'd need to cross to native could would be
  3137 // to park() or unpark() threads.  We'd also need a few more unsafe operators
  3138 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  3139 // (b) explicit barriers or fence operations.
  3140 //
  3141 // TODO:
  3142 //
  3143 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  3144 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  3145 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  3146 //    the lock operators would typically be faster than reifying Self.
  3147 //
  3148 // *  Ideally I'd define the primitives as:
  3149 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  3150 //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
  3151 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
  3152 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
  3153 //    Furthermore the register assignments are overconstrained, possibly resulting in
  3154 //    sub-optimal code near the synchronization site.
  3155 //
  3156 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
  3157 //    Alternately, use a better sp-proximity test.
  3158 //
  3159 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
  3160 //    Either one is sufficient to uniquely identify a thread.
  3161 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
  3162 //
  3163 // *  Intrinsify notify() and notifyAll() for the common cases where the
  3164 //    object is locked by the calling thread but the waitlist is empty.
  3165 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  3166 //
  3167 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  3168 //    But beware of excessive branch density on AMD Opterons.
  3169 //
  3170 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  3171 //    or failure of the fast-path.  If the fast-path fails then we pass
  3172 //    control to the slow-path, typically in C.  In Fast_Lock and
  3173 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  3174 //    will emit a conditional branch immediately after the node.
  3175 //    So we have branches to branches and lots of ICC.ZF games.
  3176 //    Instead, it might be better to have C2 pass a "FailureLabel"
  3177 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  3178 //    will drop through the node.  ICC.ZF is undefined at exit.
  3179 //    In the case of failure, the node will branch directly to the
  3180 //    FailureLabel
  3183 // obj: object to lock
  3184 // box: on-stack box address (displaced header location) - KILLED
  3185 // rax,: tmp -- KILLED
  3186 // scr: tmp -- KILLED
  3187 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
  3189   // Ensure the register assignents are disjoint
  3190   guarantee (objReg != boxReg, "") ;
  3191   guarantee (objReg != tmpReg, "") ;
  3192   guarantee (objReg != scrReg, "") ;
  3193   guarantee (boxReg != tmpReg, "") ;
  3194   guarantee (boxReg != scrReg, "") ;
  3197   block_comment("FastLock");
  3198   /*
  3199      move(AT, 0x0);
  3200      return;
  3201      */
  3202   if (PrintBiasedLockingStatistics) {
  3203     push(tmpReg);
  3204     atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
  3205     pop(tmpReg);
  3208   if (EmitSync & 1) {
  3209     // set box->dhw = unused_mark (3)
  3210     // Force all sync thru slow-path: slow_enter() and slow_exit()
  3211     move (AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  3212     sd(AT, Address(boxReg, 0));
  3213     move (AT, (int32_t)0) ;	// Eflags.ZF = 0
  3214   } else
  3215     if (EmitSync & 2) {
  3216       Label DONE_LABEL ;
  3217       if (UseBiasedLocking) {
  3218         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
  3219         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  3222       ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
  3223       ori(tmpReg, tmpReg, 0x1);
  3224       sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
  3226       cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
  3227       bne(AT, R0, DONE_LABEL);
  3228       delayed()->nop();
  3230       // Recursive locking
  3231       dsubu(tmpReg, tmpReg, SP);
  3232       li(AT, (7 - os::vm_page_size() ));
  3233       andr(tmpReg, tmpReg, AT);
  3234       sd(tmpReg, Address(boxReg, 0));
  3235       bind(DONE_LABEL) ;
  3236     } else {
  3237       // Possible cases that we'll encounter in fast_lock
  3238       // ------------------------------------------------
  3239       // * Inflated
  3240       //    -- unlocked
  3241       //    -- Locked
  3242       //       = by self
  3243       //       = by other
  3244       // * biased
  3245       //    -- by Self
  3246       //    -- by other
  3247       // * neutral
  3248       // * stack-locked
  3249       //    -- by self
  3250       //       = sp-proximity test hits
  3251       //       = sp-proximity test generates false-negative
  3252       //    -- by other
  3253       //
  3255       Label IsInflated, DONE_LABEL, PopDone ;
  3257       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  3258       // order to reduce the number of conditional branches in the most common cases.
  3259       // Beware -- there's a subtle invariant that fetch of the markword
  3260       // at [FETCH], below, will never observe a biased encoding (*101b).
  3261       // If this invariant is not held we risk exclusion (safety) failure.
  3262       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3263         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
  3266       ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
  3267       andi(AT, tmpReg, markOopDesc::monitor_value);
  3268       bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
  3269       delayed()->nop();
  3271       // Attempt stack-locking ...
  3272       ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
  3273       sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
  3275       cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
  3277       if (PrintBiasedLockingStatistics) {
  3278         Label L;
  3279         beq(AT, R0, L);
  3280         delayed()->nop();
  3281         push(T0);
  3282         push(T1);
  3283         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  3284         pop(T1);
  3285         pop(T0);
  3286         bind(L);
  3288       bne(AT, R0, DONE_LABEL);
  3289       delayed()->nop();
  3291       // Recursive locking
  3292       // The object is stack-locked: markword contains stack pointer to BasicLock.
  3293       // Locked by current thread if difference with current SP is less than one page.
  3294       dsubu(tmpReg, tmpReg, SP);
  3295       li(AT, 7 - os::vm_page_size() );
  3296       andr(tmpReg, tmpReg, AT);
  3297       sd(tmpReg, Address(boxReg, 0));
  3298       if (PrintBiasedLockingStatistics) {
  3299         Label L;
  3300         // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
  3301         bne(tmpReg, R0, L);
  3302         delayed()->nop();
  3303         push(T0);
  3304         push(T1);
  3305         atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
  3306         pop(T1);
  3307         pop(T0);
  3308         bind(L);
  3310       sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
  3312       b(DONE_LABEL) ;
  3313       delayed()->nop();
  3315       bind(IsInflated) ;
  3317       // TODO: someday avoid the ST-before-CAS penalty by
  3318       // relocating (deferring) the following ST.
  3319       // We should also think about trying a CAS without having
  3320       // fetched _owner.  If the CAS is successful we may
  3321       // avoid an RTO->RTS upgrade on the $line.
  3322       // Without cast to int32_t a movptr will destroy r10 which is typically obj
  3323       li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
  3324       sd(AT, Address(boxReg, 0));
  3326       move(boxReg, tmpReg) ;
  3327       ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3328       // AT = (tmpReg == 0) ? 1:0
  3329       sltiu(AT, tmpReg, 1);  /* Jin: AT = !tmpReg; */
  3330       bne(tmpReg, R0, DONE_LABEL);
  3331       delayed()->nop();
  3333       // It's inflated and appears unlocke
  3334       if (os::is_MP()) {
  3335         //lock();
  3337       cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
  3338       // Intentional fall-through into DONE_LABEL ...
  3341       // DONE_LABEL is a hot target - we'd really like to place it at the
  3342       // start of cache line by padding with NOPs.
  3343       // See the AMD and Intel software optimization manuals for the
  3344       // most efficient "long" NOP encodings.
  3345       // Unfortunately none of our alignment mechanisms suffice.
  3346       bind(DONE_LABEL);
  3348       // Avoid branch-to-branch on AMD processors
  3349       // This appears to be superstition.
  3350       if (EmitSync & 32) nop() ;
  3353       // At DONE_LABEL the icc ZFlag is set as follows ...
  3354       // Fast_Unlock uses the same protocol.
  3355       // ZFlag == 1 -> Success
  3356       // ZFlag == 0 -> Failure - force control through the slow-path
  3360 // obj: object to unlock
  3361 // box: box address (displaced header location), killed.  Must be EAX.
  3362 // rbx,: killed tmp; cannot be obj nor box.
  3363 //
  3364 // Some commentary on balanced locking:
  3365 //
  3366 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  3367 // Methods that don't have provably balanced locking are forced to run in the
  3368 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  3369 // The interpreter provides two properties:
  3370 // I1:  At return-time the interpreter automatically and quietly unlocks any
  3371 //      objects acquired the current activation (frame).  Recall that the
  3372 //      interpreter maintains an on-stack list of locks currently held by
  3373 //      a frame.
  3374 // I2:  If a method attempts to unlock an object that is not held by the
  3375 //      the frame the interpreter throws IMSX.
  3376 //
  3377 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
  3378 // B() doesn't have provably balanced locking so it runs in the interpreter.
  3379 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
  3380 // is still locked by A().
  3381 //
  3382 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  3383 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  3384 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  3385 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  3387 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
  3389   guarantee (objReg != boxReg, "") ;
  3390   guarantee (objReg != tmpReg, "") ;
  3391   guarantee (boxReg != tmpReg, "") ;
  3395   block_comment("FastUnlock");
  3397   /*
  3398      move(AT, 0x0);
  3399      return;
  3400      */
  3402   if (EmitSync & 4) {
  3403     // Disable - inhibit all inlining.  Force control through the slow-path
  3404     move(AT, R0);
  3405   } else
  3406     if (EmitSync & 8) {
  3407       Label DONE_LABEL ;
  3408       if (UseBiasedLocking) {
  3409         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3411       // classic stack-locking code ...
  3412       ld(tmpReg, Address(boxReg, 0)) ;
  3413       beq(tmpReg, R0, DONE_LABEL) ;
  3414       move(AT, 0x1);  // delay slot
  3416       cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
  3417       bind(DONE_LABEL);
  3418     } else {
  3419       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
  3421       // Critically, the biased locking test must have precedence over
  3422       // and appear before the (box->dhw == 0) recursive stack-lock test.
  3423       if (UseBiasedLocking && !UseOptoBiasInlining) {
  3424         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  3427       ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
  3428       beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
  3429       delayed()->daddiu(AT, R0, 0x1);
  3431       ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
  3432       andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
  3433       beq(AT, R0, Stacked) ;                     // Inflated?
  3434       delayed()->nop();
  3436       bind(Inflated) ;
  3437       // It's inflated.
  3438       // Despite our balanced locking property we still check that m->_owner == Self
  3439       // as java routines or native JNI code called by this thread might
  3440       // have released the lock.
  3441       // Refer to the comments in synchronizer.cpp for how we might encode extra
  3442       // state in _succ so we can avoid fetching EntryList|cxq.
  3443       //
  3444       // I'd like to add more cases in fast_lock() and fast_unlock() --
  3445       // such as recursive enter and exit -- but we have to be wary of
  3446       // I$ bloat, T$ effects and BP$ effects.
  3447       //
  3448       // If there's no contention try a 1-0 exit.  That is, exit without
  3449       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  3450       // we detect and recover from the race that the 1-0 exit admits.
  3451       //
  3452       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  3453       // before it STs null into _owner, releasing the lock.  Updates
  3454       // to data protected by the critical section must be visible before
  3455       // we drop the lock (and thus before any other thread could acquire
  3456       // the lock and observe the fields protected by the lock).
  3457       // IA32's memory-model is SPO, so STs are ordered with respect to
  3458       // each other and there's no need for an explicit barrier (fence).
  3459       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  3460 #ifndef OPT_THREAD
  3461       get_thread (TREG) ;
  3462 #endif
  3464       // It's inflated
  3465       ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3466       xorr(boxReg, boxReg, TREG);
  3468       ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
  3469       orr(boxReg, boxReg, AT);
  3471       move(AT, R0);
  3472       bne(boxReg, R0, DONE_LABEL);
  3473       delayed()->nop();
  3475       ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
  3476       ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
  3477       orr(boxReg, boxReg, AT);
  3479       move(AT, R0);
  3480       bne(boxReg, R0, CheckSucc);
  3481       delayed()->nop();
  3483       sync();
  3484       sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3485       move(AT, 0x1);
  3486       b(DONE_LABEL);
  3487       delayed()->nop();
  3490       if ((EmitSync & 65536) == 0) {
  3491         Label LSuccess, LGoSlowPath ;
  3492         bind (CheckSucc);
  3493         ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
  3494         beq(AT, R0, LGoSlowPath);
  3495         delayed()->nop();
  3497         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
  3498         // the explicit ST;MEMBAR combination, but masm doesn't currently support
  3499         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
  3500         // are all faster when the write buffer is populated.
  3501         xorr(boxReg, boxReg, boxReg);
  3502         sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
  3503         if (os::is_MP()) {
  3504           // lock ();
  3506         ld(AT, Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2)) ;
  3507         bne(AT, R0, LSuccess);
  3508         delayed()->nop();
  3510 #ifndef OPT_THREAD
  3511         get_thread (TREG) ;
  3512 #endif
  3513         move(boxReg, R0) ;                  // box is really EAX
  3514         //if (os::is_MP()) { lock(); }
  3515         cmpxchg(TREG, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
  3516         beq(AT, R0, LSuccess);
  3517         delayed()->nop();
  3518         // Intentional fall-through into slow-path
  3520         bind  (LGoSlowPath);
  3521         move(AT, R0);
  3522         b(DONE_LABEL) ;
  3523         delayed()->nop();
  3526         bind  (LSuccess);
  3527         move(AT, 0);
  3528         sltiu(AT, boxReg, 1) ;                 // set ICC.ZF=1 to indicate success
  3529         b(DONE_LABEL) ;
  3530         delayed()->nop();
  3533       bind  (Stacked);
  3534       ld(tmpReg, Address(boxReg, 0)) ;
  3535       //if (os::is_MP()) { lock(); }
  3536       cmpxchg(tmpReg, Address(objReg, 0), boxReg);
  3538       if (EmitSync & 65536) {
  3539         bind (CheckSucc);
  3542       bind(DONE_LABEL);
  3544       // Avoid branch to branch on AMD processors
  3545       if (EmitSync & 32768) { nop() ; }
  3549 class ControlWord {
  3550 				public:
  3551 								int32_t _value;
  3553   int  rounding_control() const        { return  (_value >> 10) & 3      ; }
  3554   int  precision_control() const       { return  (_value >>  8) & 3      ; }
  3555   bool precision() const               { return ((_value >>  5) & 1) != 0; }
  3556   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  3557   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  3558   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  3559   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  3560   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  3562   void print() const {
  3563     // rounding control
  3564     const char* rc;
  3565     switch (rounding_control()) {
  3566       case 0: rc = "round near"; break;
  3567       case 1: rc = "round down"; break;
  3568       case 2: rc = "round up  "; break;
  3569       case 3: rc = "chop      "; break;
  3570     };
  3571     // precision control
  3572     const char* pc;
  3573     switch (precision_control()) {
  3574       case 0: pc = "24 bits "; break;
  3575       case 1: pc = "reserved"; break;
  3576       case 2: pc = "53 bits "; break;
  3577       case 3: pc = "64 bits "; break;
  3578     };
  3579     // flags
  3580     char f[9];
  3581     f[0] = ' ';
  3582     f[1] = ' ';
  3583     f[2] = (precision   ()) ? 'P' : 'p';
  3584     f[3] = (underflow   ()) ? 'U' : 'u';
  3585     f[4] = (overflow    ()) ? 'O' : 'o';
  3586     f[5] = (zero_divide ()) ? 'Z' : 'z';
  3587     f[6] = (denormalized()) ? 'D' : 'd';
  3588     f[7] = (invalid     ()) ? 'I' : 'i';
  3589     f[8] = '\x0';
  3590     // output
  3591     printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
  3594 };
  3596 class StatusWord {
  3597  public:
  3598   int32_t _value;
  3600   bool busy() const                    { return ((_value >> 15) & 1) != 0; }
  3601   bool C3() const                      { return ((_value >> 14) & 1) != 0; }
  3602   bool C2() const                      { return ((_value >> 10) & 1) != 0; }
  3603   bool C1() const                      { return ((_value >>  9) & 1) != 0; }
  3604   bool C0() const                      { return ((_value >>  8) & 1) != 0; }
  3605   int  top() const                     { return  (_value >> 11) & 7      ; }
  3606   bool error_status() const            { return ((_value >>  7) & 1) != 0; }
  3607   bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
  3608   bool precision() const               { return ((_value >>  5) & 1) != 0; }
  3609   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  3610   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  3611   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  3612   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  3613   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  3615   void print() const {
  3616     // condition codes
  3617     char c[5];
  3618     c[0] = (C3()) ? '3' : '-';
  3619     c[1] = (C2()) ? '2' : '-';
  3620     c[2] = (C1()) ? '1' : '-';
  3621     c[3] = (C0()) ? '0' : '-';
  3622     c[4] = '\x0';
  3623     // flags
  3624     char f[9];
  3625     f[0] = (error_status()) ? 'E' : '-';
  3626     f[1] = (stack_fault ()) ? 'S' : '-';
  3627     f[2] = (precision   ()) ? 'P' : '-';
  3628     f[3] = (underflow   ()) ? 'U' : '-';
  3629     f[4] = (overflow    ()) ? 'O' : '-';
  3630     f[5] = (zero_divide ()) ? 'Z' : '-';
  3631     f[6] = (denormalized()) ? 'D' : '-';
  3632     f[7] = (invalid     ()) ? 'I' : '-';
  3633     f[8] = '\x0';
  3634     // output
  3635     printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
  3638 };
  3640 class TagWord {
  3641  public:
  3642   int32_t _value;
  3644   int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
  3646   void print() const {
  3647     printf("%04x", _value & 0xFFFF);
  3650 };
  3652 class FPU_Register {
  3653  public:
  3654   int32_t _m0;
  3655   int32_t _m1;
  3656   int16_t _ex;
  3658   bool is_indefinite() const           {
  3659     return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
  3662   void print() const {
  3663     char  sign = (_ex < 0) ? '-' : '+';
  3664     const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
  3665     printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
  3666   };
  3668 };
  3670 class FPU_State {
  3671  public:
  3672   enum {
  3673     register_size       = 10,
  3674     number_of_registers =  8,
  3675     register_mask       =  7
  3676   };
  3678   ControlWord  _control_word;
  3679   StatusWord   _status_word;
  3680   TagWord      _tag_word;
  3681   int32_t      _error_offset;
  3682   int32_t      _error_selector;
  3683   int32_t      _data_offset;
  3684   int32_t      _data_selector;
  3685   int8_t       _register[register_size * number_of_registers];
  3687   int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
  3688   FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
  3690   const char* tag_as_string(int tag) const {
  3691     switch (tag) {
  3692       case 0: return "valid";
  3693       case 1: return "zero";
  3694       case 2: return "special";
  3695       case 3: return "empty";
  3697     ShouldNotReachHere();
  3698     return NULL;
  3701   void print() const {
  3702     // print computation registers
  3703     { int t = _status_word.top();
  3704       for (int i = 0; i < number_of_registers; i++) {
  3705         int j = (i - t) & register_mask;
  3706         printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
  3707         st(j)->print();
  3708         printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
  3711     printf("\n");
  3712     // print control registers
  3713     printf("ctrl = "); _control_word.print(); printf("\n");
  3714     printf("stat = "); _status_word .print(); printf("\n");
  3715     printf("tags = "); _tag_word    .print(); printf("\n");
  3718 };
  3720 class Flag_Register {
  3721  public:
  3722   int32_t _value;
  3724   bool overflow() const                { return ((_value >> 11) & 1) != 0; }
  3725   bool direction() const               { return ((_value >> 10) & 1) != 0; }
  3726   bool sign() const                    { return ((_value >>  7) & 1) != 0; }
  3727   bool zero() const                    { return ((_value >>  6) & 1) != 0; }
  3728   bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
  3729   bool parity() const                  { return ((_value >>  2) & 1) != 0; }
  3730   bool carry() const                   { return ((_value >>  0) & 1) != 0; }
  3732   void print() const {
  3733     // flags
  3734     char f[8];
  3735     f[0] = (overflow       ()) ? 'O' : '-';
  3736     f[1] = (direction      ()) ? 'D' : '-';
  3737     f[2] = (sign           ()) ? 'S' : '-';
  3738     f[3] = (zero           ()) ? 'Z' : '-';
  3739     f[4] = (auxiliary_carry()) ? 'A' : '-';
  3740     f[5] = (parity         ()) ? 'P' : '-';
  3741     f[6] = (carry          ()) ? 'C' : '-';
  3742     f[7] = '\x0';
  3743     // output
  3744     printf("%08x  flags = %s", _value, f);
  3747 };
  3749 class IU_Register {
  3750  public:
  3751   int32_t _value;
  3753   void print() const {
  3754     printf("%08x  %11d", _value, _value);
  3757 };
  3759 class IU_State {
  3760  public:
  3761   Flag_Register _eflags;
  3762   IU_Register   _rdi;
  3763   IU_Register   _rsi;
  3764   IU_Register   _rbp;
  3765   IU_Register   _rsp;
  3766   IU_Register   _rbx;
  3767   IU_Register   _rdx;
  3768   IU_Register   _rcx;
  3769   IU_Register   _rax;
  3771   void print() const {
  3772     // computation registers
  3773     printf("rax,  = "); _rax.print(); printf("\n");
  3774     printf("rbx,  = "); _rbx.print(); printf("\n");
  3775     printf("rcx  = "); _rcx.print(); printf("\n");
  3776     printf("rdx  = "); _rdx.print(); printf("\n");
  3777     printf("rdi  = "); _rdi.print(); printf("\n");
  3778     printf("rsi  = "); _rsi.print(); printf("\n");
  3779     printf("rbp,  = "); _rbp.print(); printf("\n");
  3780     printf("rsp  = "); _rsp.print(); printf("\n");
  3781     printf("\n");
  3782     // control registers
  3783     printf("flgs = "); _eflags.print(); printf("\n");
  3785 };
  3788 class CPU_State {
  3789  public:
  3790   FPU_State _fpu_state;
  3791   IU_State  _iu_state;
  3793   void print() const {
  3794     printf("--------------------------------------------------\n");
  3795     _iu_state .print();
  3796     printf("\n");
  3797     _fpu_state.print();
  3798     printf("--------------------------------------------------\n");
  3801 };
  3804 /*
  3805 static void _print_CPU_state(CPU_State* state) {
  3806   state->print();
  3807 };
  3809 void MacroAssembler::print_CPU_state() {
  3810   push_CPU_state();
  3811   push(rsp);                // pass CPU state
  3812   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
  3813   addptr(rsp, wordSize);       // discard argument
  3814   pop_CPU_state();
  3816 */
  3818 void MacroAssembler::align(int modulus) {
  3819 	while (offset() % modulus != 0) nop();
  3823 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  3824 	//FIXME aoqi
  3825 	// %%%%% need to implement this
  3826 	//Unimplemented();
  3827 	/*
  3828 	if (!VerifyFPU) return;
  3829   push_CPU_state();
  3830   push(rsp);                // pass CPU state
  3831   ExternalAddress msg((address) s);
  3832   // pass message string s
  3833   pushptr(msg.addr());
  3834   push(stack_depth);        // pass stack depth
  3835   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
  3836   addptr(rsp, 3 * wordSize);   // discard arguments
  3837   // check for error
  3838   { Label L;
  3839     testl(rax, rax);
  3840     jcc(Assembler::notZero, L);
  3841     int3();                  // break if error condition
  3842     bind(L);
  3844   pop_CPU_state();
  3845 	*/
  3848 #ifdef _LP64
  3849 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3851 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */
  3852 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
  3853 #else
  3854 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
  3856 Register caller_saved_fpu_registers[] = {};
  3857 #endif
  3859 //We preserve all caller-saved register
  3860 void  MacroAssembler::pushad(){
  3861   int i;
  3863   /* Fixed-point registers */
  3864   int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3865   daddi(SP, SP, -1 * len * wordSize);
  3866   for (i = 0; i < len; i++)
  3868 #ifdef _LP64
  3869     sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3870 #else
  3871     sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3872 #endif
  3875   /* Floating-point registers */
  3876   len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3877   daddi(SP, SP, -1 * len * wordSize);
  3878   for (i = 0; i < len; i++)
  3880 #ifdef _LP64
  3881     sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3882 #else
  3883     swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3884 #endif
  3886 };
  3888 void  MacroAssembler::popad(){
  3889   int i;
  3891   /* Floating-point registers */
  3892   int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
  3893   for (i = 0; i < len; i++)
  3895 #ifdef _LP64
  3896     ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3897 #else
  3898     lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
  3899 #endif
  3901   daddi(SP, SP, len * wordSize);
  3903   /* Fixed-point registers */
  3904   len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
  3905   for (i = 0; i < len; i++)
  3907 #ifdef _LP64
  3908     ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3909 #else
  3910     lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
  3911 #endif
  3913   daddi(SP, SP, len * wordSize);
  3914 };
  3916 void MacroAssembler::push2(Register reg1, Register reg2) {
  3917 #ifdef _LP64
  3918   daddi(SP, SP, -16);
  3919   sd(reg2, SP, 0);
  3920   sd(reg1, SP, 8);
  3921 #else
  3922   addi(SP, SP, -8);
  3923   sw(reg2, SP, 0);
  3924   sw(reg1, SP, 4);
  3925 #endif
  3928 void MacroAssembler::pop2(Register reg1, Register reg2) {
  3929 #ifdef _LP64
  3930   ld(reg1, SP, 0);
  3931   ld(reg2, SP, 8);
  3932   daddi(SP, SP, 16);
  3933 #else
  3934   lw(reg1, SP, 0);
  3935   lw(reg2, SP, 4);
  3936   addi(SP, SP, 8);
  3937 #endif
  3940 //for UseCompressedOops Option
  3941 void MacroAssembler::load_klass(Register dst, Register src) {
  3942 #ifdef _LP64
  3943     if(UseCompressedClassPointers){
  3944         lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  3945 	decode_klass_not_null(dst);
  3946     } else 
  3947 #endif
  3948         ld(dst, src, oopDesc::klass_offset_in_bytes());
  3951 void MacroAssembler::store_klass(Register dst, Register src) {
  3952 #ifdef _LP64
  3953     if(UseCompressedClassPointers){
  3954 		encode_klass_not_null(src);
  3955 		sw(src, dst, oopDesc::klass_offset_in_bytes());
  3956     } else {
  3957 #endif 
  3958 		sd(src, dst, oopDesc::klass_offset_in_bytes());
  3962 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  3963   load_klass(dst, src);
  3964   ld(dst, Address(dst, Klass::prototype_header_offset()));
  3967 #ifdef _LP64
  3968 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  3969   if (UseCompressedClassPointers) {
  3970     sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
  3974 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  3975     if(UseCompressedOops){
  3976 	lwu(dst, src); 
  3977 	decode_heap_oop(dst);
  3978     } else{
  3979 	ld(dst, src); 
  3983 void MacroAssembler::store_heap_oop(Address dst, Register src){
  3984     if(UseCompressedOops){
  3985        assert(!dst.uses(src), "not enough registers");
  3986        encode_heap_oop(src); 
  3987        sw(src, dst);
  3988     } else{
  3989        sd(src, dst);
  3993 #ifdef ASSERT
  3994 void MacroAssembler::verify_heapbase(const char* msg) {
  3995   assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
  3996   assert (Universe::heap() != NULL, "java heap should be initialized");
  3997 /*  if (CheckCompressedOops) {
  3998     Label ok;
  3999     push(rscratch1); // cmpptr trashes rscratch1
  4000     cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
  4001     jcc(Assembler::equal, ok);
  4002     STOP(msg);
  4003     bind(ok);
  4004     pop(rscratch1);
  4005   }*/
  4007 #endif
  4010 // Algorithm must match oop.inline.hpp encode_heap_oop.
  4011 void MacroAssembler::encode_heap_oop(Register r) {
  4012 #ifdef ASSERT
  4013   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  4014 #endif
  4015   verify_oop(r, "broken oop in encode_heap_oop");
  4016   if (Universe::narrow_oop_base() == NULL) {
  4017     if (Universe::narrow_oop_shift() != 0) { 
  4018       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4019       shr(r, LogMinObjAlignmentInBytes);
  4021     return;
  4024     movz(r, S5_heapbase, r);
  4025     dsub(r, r, S5_heapbase);
  4026     if (Universe::narrow_oop_shift() != 0) {
  4027       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4028       shr(r, LogMinObjAlignmentInBytes);
  4032 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
  4033 #ifdef ASSERT
  4034   verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
  4035 #endif
  4036   verify_oop(src, "broken oop in encode_heap_oop");
  4037   if (Universe::narrow_oop_base() == NULL) {
  4038     if (Universe::narrow_oop_shift() != 0) { 
  4039       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4040       dsrl(dst, src, LogMinObjAlignmentInBytes);
  4041     } else {
  4042       if (dst != src) move(dst, src);
  4044   } else {
  4045     if (dst == src) {
  4046       movz(dst, S5_heapbase, dst);
  4047       dsub(dst, dst, S5_heapbase);
  4048       if (Universe::narrow_oop_shift() != 0) {
  4049         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4050         shr(dst, LogMinObjAlignmentInBytes);
  4052     } else {
  4053       dsub(dst, src, S5_heapbase);
  4054       if (Universe::narrow_oop_shift() != 0) {
  4055         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4056         shr(dst, LogMinObjAlignmentInBytes);
  4058       movz(dst, R0, src);
  4063 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  4064     assert (UseCompressedOops, "should be compressed");
  4065 #ifdef ASSERT
  4066     if (CheckCompressedOops) {
  4067 	Label ok;
  4068 	bne(r, R0, ok);
  4069 	delayed()->nop();
  4070 	stop("null oop passed to encode_heap_oop_not_null");
  4071 	bind(ok);
  4073 #endif
  4074 	verify_oop(r, "broken oop in encode_heap_oop_not_null");
  4075 	if (Universe::narrow_oop_base() != NULL) {
  4076 		dsub(r, r, S5_heapbase);
  4078 	if (Universe::narrow_oop_shift() != 0) {
  4079 		assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4080 		shr(r, LogMinObjAlignmentInBytes);
  4085 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  4086     assert (UseCompressedOops, "should be compressed");
  4087 #ifdef ASSERT
  4088     if (CheckCompressedOops) {
  4089 	Label ok;
  4090 	bne(src, R0, ok);
  4091 	delayed()->nop();
  4092 	stop("null oop passed to encode_heap_oop_not_null2");
  4093 	bind(ok);
  4095 #endif
  4096     verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  4098     if (Universe::narrow_oop_base() != NULL) {
  4099     	dsub(dst, src, S5_heapbase);
  4100         if (Universe::narrow_oop_shift() != 0) {
  4101     	  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4102     	  shr(dst, LogMinObjAlignmentInBytes);
  4104     } else {
  4105         if (Universe::narrow_oop_shift() != 0) {
  4106     	  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4107           dsrl(dst, src, LogMinObjAlignmentInBytes);
  4108         } else {
  4109           if (dst != src) move(dst, src);
  4114 void  MacroAssembler::decode_heap_oop(Register r) {
  4115 #ifdef ASSERT
  4116   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  4117 #endif
  4118   if (Universe::narrow_oop_base() == NULL) {
  4119     if (Universe::narrow_oop_shift() != 0) {
  4120       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4121       shl(r, LogMinObjAlignmentInBytes);
  4123   } else {
  4124     move(AT, r);
  4125     if (Universe::narrow_oop_shift() != 0) {
  4126       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4127       shl(r, LogMinObjAlignmentInBytes);
  4129     dadd(r, r, S5_heapbase);
  4130     movz(r, R0, AT);
  4132   verify_oop(r, "broken oop in decode_heap_oop");
  4135 void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
  4136 #ifdef ASSERT
  4137   verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
  4138 #endif
  4139   if (Universe::narrow_oop_base() == NULL) {
  4140     if (Universe::narrow_oop_shift() != 0) {
  4141       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4142       dsll(dst, src, LogMinObjAlignmentInBytes);
  4143     } else {
  4144       if (dst != src) move(dst, src);
  4146   } else {
  4147     if (dst == src) {
  4148       move(AT, dst);
  4149       if (Universe::narrow_oop_shift() != 0) {
  4150         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4151         shl(dst, LogMinObjAlignmentInBytes);
  4153       dadd(dst, dst, S5_heapbase);
  4154       movz(dst, R0, AT);
  4155     } else {
  4156       if (Universe::narrow_oop_shift() != 0) {
  4157         assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4158         dsll(dst, src, LogMinObjAlignmentInBytes);
  4159         daddu(dst, dst, S5_heapbase);
  4160       } else {
  4161         daddu(dst, src, S5_heapbase);
  4163       movz(dst, R0, src);
  4166   verify_oop(dst, "broken oop in decode_heap_oop");
  4169 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  4170   // Note: it will change flags
  4171   assert (UseCompressedOops, "should only be used for compressed headers");
  4172   assert (Universe::heap() != NULL, "java heap should be initialized");
  4173   // Cannot assert, unverified entry point counts instructions (see .ad file)
  4174   // vtableStubs also counts instructions in pd_code_size_limit.
  4175   // Also do not verify_oop as this is called by verify_oop.
  4176   if (Universe::narrow_oop_shift() != 0) {
  4177     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4178     shl(r, LogMinObjAlignmentInBytes);
  4179     if (Universe::narrow_oop_base() != NULL) {
  4180       daddu(r, r, S5_heapbase);
  4182   } else {
  4183     assert (Universe::narrow_oop_base() == NULL, "sanity");
  4187 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  4188   assert (UseCompressedOops, "should only be used for compressed headers");
  4189   assert (Universe::heap() != NULL, "java heap should be initialized");
  4191   // Cannot assert, unverified entry point counts instructions (see .ad file)
  4192   // vtableStubs also counts instructions in pd_code_size_limit.
  4193   // Also do not verify_oop as this is called by verify_oop.
  4194   //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
  4195   if (Universe::narrow_oop_shift() != 0) {
  4196     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  4197     if (LogMinObjAlignmentInBytes == Address::times_8) {
  4198       dsll(dst, src, LogMinObjAlignmentInBytes);
  4199       daddu(dst, dst, S5_heapbase);
  4200     } else {
  4201       dsll(dst, src, LogMinObjAlignmentInBytes);
  4202       if (Universe::narrow_oop_base() != NULL) {
  4203         daddu(dst, dst, S5_heapbase);
  4206   } else {
  4207     assert (Universe::narrow_oop_base() == NULL, "sanity");
  4208     if (dst != src) {
  4209       move(dst, src);
  4214 void MacroAssembler::encode_klass_not_null(Register r) {
  4215   if (Universe::narrow_klass_base() != NULL) {
  4216     assert(r != AT, "Encoding a klass in AT");
  4217     set64(AT, (int64_t)Universe::narrow_klass_base());
  4218     dsub(r, r, AT);
  4220   if (Universe::narrow_klass_shift() != 0) {
  4221     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4222     shr(r, LogKlassAlignmentInBytes);
  4224   // Not neccessary for MIPS at all.
  4225   //if (Universe::narrow_klass_base() != NULL) {
  4226   //  reinit_heapbase();
  4227   //}
  4230 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  4231   if (dst == src) {
  4232     encode_klass_not_null(src);
  4233   } else {
  4234     if (Universe::narrow_klass_base() != NULL) {
  4235       set64(dst, (int64_t)Universe::narrow_klass_base());
  4236       dsub(dst, src, dst);
  4237       if (Universe::narrow_klass_shift() != 0) {
  4238         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4239         shr(dst, LogKlassAlignmentInBytes);
  4241     } else {
  4242       if (Universe::narrow_klass_shift() != 0) {
  4243         assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4244         dsrl(dst, src, LogKlassAlignmentInBytes);
  4245       } else {
  4246         move(dst, src);
  4252 // Function instr_size_for_decode_klass_not_null() counts the instructions
  4253 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
  4254 // when (Universe::heap() != NULL).  Hence, if the instructions they
  4255 // generate change, then this method needs to be updated.
  4256 int MacroAssembler::instr_size_for_decode_klass_not_null() {
  4257   assert (UseCompressedClassPointers, "only for compressed klass ptrs");
  4258   if (Universe::narrow_klass_base() != NULL) {
  4259     // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
  4260     return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
  4261   } else {
  4262     // longest load decode klass function, mov64, leaq
  4263     return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
  4267 void  MacroAssembler::decode_klass_not_null(Register r) { 
  4268   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  4269   assert(r != AT, "Decoding a klass in AT");
  4270   // Cannot assert, unverified entry point counts instructions (see .ad file)
  4271   // vtableStubs also counts instructions in pd_code_size_limit.
  4272   // Also do not verify_oop as this is called by verify_oop.
  4273   if (Universe::narrow_klass_shift() != 0) { 
  4274     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4275     shl(r, LogKlassAlignmentInBytes);
  4277   if (Universe::narrow_klass_base() != NULL) {
  4278     set64(AT, (int64_t)Universe::narrow_klass_base());
  4279     daddu(r, r, AT);
  4280     //Not neccessary for MIPS at all.
  4281     //reinit_heapbase();
  4285 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
  4286   assert (UseCompressedClassPointers, "should only be used for compressed headers");
  4288   if (dst == src) {
  4289     decode_klass_not_null(dst);
  4290   } else {
  4291     // Cannot assert, unverified entry point counts instructions (see .ad file)
  4292     // vtableStubs also counts instructions in pd_code_size_limit.
  4293     // Also do not verify_oop as this is called by verify_oop.
  4294     set64(dst, (int64_t)Universe::narrow_klass_base());
  4295     if (Universe::narrow_klass_shift() != 0) {
  4296       assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
  4297       assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
  4298       dsll(AT, src, Address::times_8);
  4299       daddu(dst, dst, AT);
  4300     } else {
  4301       daddu(dst, src, dst);
  4306 void MacroAssembler::incrementl(Register reg, int value) {
  4307   if (value == min_jint) {
  4308      move(AT, value);
  4309      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  4310      return; 
  4312   if (value <  0) { decrementl(reg, -value); return; }
  4313   if (value == 0) {                        ; return; }
  4315   if(Assembler::is_simm16(value)) {
  4316      NOT_LP64(addiu(reg, reg, value));
  4317      LP64_ONLY(move(AT, value); addu32(reg, reg, AT));
  4318   } else {
  4319      move(AT, value);
  4320      LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT));
  4324 void MacroAssembler::decrementl(Register reg, int value) {
  4325   if (value == min_jint) {
  4326      move(AT, value);
  4327      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  4328      return;
  4330   if (value <  0) { incrementl(reg, -value); return; }
  4331   if (value == 0) {                        ; return; }
  4333   if(Assembler::is_simm16(value)) {
  4334      NOT_LP64(addiu(reg, reg, -value));
  4335      LP64_ONLY(move(AT, value); subu32(reg, reg, AT));
  4336   } else {
  4337      move(AT, value);
  4338      LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT));
  4342 void MacroAssembler::reinit_heapbase() {
  4343   if (UseCompressedOops || UseCompressedClassPointers) {
  4344     if (Universe::heap() != NULL) {
  4345       if (Universe::narrow_oop_base() == NULL) {
  4346         move(S5_heapbase, R0);
  4347       } else {
  4348         set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
  4350     } else {
  4351       set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
  4352       ld(S5_heapbase, S5_heapbase, 0);
  4356 #endif // _LP64
  4358 void MacroAssembler::check_klass_subtype(Register sub_klass,
  4359                            Register super_klass,
  4360                            Register temp_reg,
  4361                            Label& L_success) {
  4362 //implement ind   gen_subtype_check
  4363   Label L_failure;
  4364   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  4365   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  4366   bind(L_failure);
  4369 SkipIfEqual::SkipIfEqual(
  4370     MacroAssembler* masm, const bool* flag_addr, bool value) {
  4371   _masm = masm;
  4372   _masm->li(AT, (address)flag_addr);
  4373   _masm->lb(AT,AT,0);
  4374   _masm->addi(AT,AT,-value);
  4375   _masm->beq(AT,R0,_label);
  4376   _masm->delayed()->nop();
  4378 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  4379                                                    Register super_klass,
  4380                                                    Register temp_reg,
  4381                                                    Label* L_success,
  4382                                                    Label* L_failure,
  4383                                                    Label* L_slow_path,
  4384                                         RegisterOrConstant super_check_offset) {
  4385   assert_different_registers(sub_klass, super_klass, temp_reg);
  4386   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  4387   if (super_check_offset.is_register()) {
  4388     assert_different_registers(sub_klass, super_klass,
  4389                                super_check_offset.as_register());
  4390   } else if (must_load_sco) {
  4391     assert(temp_reg != noreg, "supply either a temp or a register offset");
  4394   Label L_fallthrough;
  4395   int label_nulls = 0;
  4396   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  4397   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  4398   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  4399   assert(label_nulls <= 1, "at most one NULL in the batch");
  4401   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  4402   int sco_offset = in_bytes(Klass::super_check_offset_offset());
  4403   // If the pointers are equal, we are done (e.g., String[] elements).
  4404   // This self-check enables sharing of secondary supertype arrays among
  4405   // non-primary types such as array-of-interface.  Otherwise, each such
  4406   // type would need its own customized SSA.
  4407   // We move this check to the front of the fast path because many
  4408   // type checks are in fact trivially successful in this manner,
  4409   // so we get a nicely predicted branch right at the start of the check.
  4410   //cmpptr(sub_klass, super_klass);
  4411   //local_jcc(Assembler::equal, *L_success);
  4412   beq(sub_klass, super_klass, *L_success);
  4413   delayed()->nop();
  4414   // Check the supertype display:
  4415   if (must_load_sco) {
  4416     // Positive movl does right thing on LP64.
  4417 	lwu(temp_reg, super_klass, sco_offset);
  4418     super_check_offset = RegisterOrConstant(temp_reg);
  4420   dsll(AT, super_check_offset.register_or_noreg(), Address::times_1);
  4421   daddu(AT, sub_klass, AT);
  4422   ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
  4424   // This check has worked decisively for primary supers.
  4425   // Secondary supers are sought in the super_cache ('super_cache_addr').
  4426   // (Secondary supers are interfaces and very deeply nested subtypes.)
  4427   // This works in the same check above because of a tricky aliasing
  4428   // between the super_cache and the primary super display elements.
  4429   // (The 'super_check_addr' can address either, as the case requires.)
  4430   // Note that the cache is updated below if it does not help us find
  4431   // what we need immediately.
  4432   // So if it was a primary super, we can just fail immediately.
  4433   // Otherwise, it's the slow path for us (no success at this point).
  4435   if (super_check_offset.is_register()) {
  4436 	beq(super_klass, AT, *L_success);
  4437 	delayed()->nop();
  4438 	addi(AT, super_check_offset.as_register(), -sc_offset);
  4439     if (L_failure == &L_fallthrough) {
  4440 	  beq(AT, R0, *L_slow_path);
  4441 	  delayed()->nop();
  4442     } else {
  4443 	  bne(AT, R0, *L_failure);
  4444 	  delayed()->nop();
  4445 	  b(*L_slow_path);
  4446 	  delayed()->nop();
  4448   } else if (super_check_offset.as_constant() == sc_offset) {
  4449     // Need a slow path; fast failure is impossible.
  4450     if (L_slow_path == &L_fallthrough) {
  4451 		beq(super_klass, AT, *L_success);
  4452 		delayed()->nop();
  4453     } else {
  4454 		bne(super_klass, AT, *L_slow_path);
  4455 		delayed()->nop();
  4456 		b(*L_success);
  4457 		delayed()->nop();
  4459   } else {
  4460     // No slow path; it's a fast decision.
  4461     if (L_failure == &L_fallthrough) {
  4462 		beq(super_klass, AT, *L_success);
  4463 		delayed()->nop();
  4464     } else {
  4465 		bne(super_klass, AT, *L_failure);
  4466 		delayed()->nop();
  4467 		b(*L_success);
  4468 		delayed()->nop();
  4472   bind(L_fallthrough);
  4477 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  4478                                                    Register super_klass,
  4479                                                    Register temp_reg,
  4480                                                    Register temp2_reg,
  4481                                                    Label* L_success,
  4482                                                    Label* L_failure,
  4483                                                    bool set_cond_codes) {
  4484   assert_different_registers(sub_klass, super_klass, temp_reg);
  4485   if (temp2_reg != noreg)
  4486     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  4487   else
  4488     temp2_reg = T9;
  4489 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  4491   Label L_fallthrough;
  4492   int label_nulls = 0;
  4493   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  4494   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  4495   assert(label_nulls <= 1, "at most one NULL in the batch");
  4497   // a couple of useful fields in sub_klass:
  4498   int ss_offset = in_bytes(Klass::secondary_supers_offset());
  4499   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  4500   Address secondary_supers_addr(sub_klass, ss_offset);
  4501   Address super_cache_addr(     sub_klass, sc_offset);
  4503   // Do a linear scan of the secondary super-klass chain.
  4504   // This code is rarely used, so simplicity is a virtue here.
  4505   // The repne_scan instruction uses fixed registers, which we must spill.
  4506   // Don't worry too much about pre-existing connections with the input regs.
  4508 #if 0
  4509   assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super)
  4510   assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter)
  4511 #endif
  4513   // Get super_klass value into rax (even if it was in rdi or rcx).
  4514 /*
  4515   bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
  4516   if (super_klass != rax || UseCompressedOops) {
  4517     if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
  4518     mov(rax, super_klass);
  4520   if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
  4521   if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
  4522 */
  4523 #ifndef PRODUCT
  4524   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  4525   ExternalAddress pst_counter_addr((address) pst_counter);
  4526   NOT_LP64(  incrementl(pst_counter_addr) );
  4527   //LP64_ONLY( lea(rcx, pst_counter_addr) );
  4528   //LP64_ONLY( incrementl(Address(rcx, 0)) );
  4529 #endif //PRODUCT
  4531   // We will consult the secondary-super array.
  4532   ld(temp_reg, secondary_supers_addr);
  4533   // Load the array length.  (Positive movl does right thing on LP64.)
  4534   lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
  4535   // Skip to start of data.
  4536   daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
  4538   // Scan RCX words at [RDI] for an occurrence of RAX.
  4539   // Set NZ/Z based on last compare.
  4540   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  4541   // not change flags (only scas instruction which is repeated sets flags).
  4542   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  4544   /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */
  4545   Label Loop, subtype;
  4546   bind(Loop);
  4547   beq(temp2_reg, R0, *L_failure);
  4548   delayed()->nop();
  4549   ld(AT, temp_reg, 0);
  4550   beq(AT, super_klass, subtype);
  4551   delayed()->daddi(temp_reg, temp_reg, 1 * wordSize);
  4552   b(Loop);
  4553   delayed()->daddi(temp2_reg, temp2_reg, -1); 
  4555   bind(subtype);
  4556   sd(super_klass, super_cache_addr);
  4557   if (L_success != &L_fallthrough) {
  4558 	  b(*L_success);
  4559 	  delayed()->nop();
  4562 /*
  4563   if (set_cond_codes) {
  4564     // Special hack for the AD files:  rdi is guaranteed non-zero.
  4565     assert(!pushed_rdi, "rdi must be left non-NULL");
  4566     // Also, the condition codes are properly set Z/NZ on succeed/failure.
  4568 */
  4569   // Success.  Cache the super we found and proceed in triumph.
  4570 #undef IS_A_TEMP
  4572   bind(L_fallthrough);
  4574 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  4575   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  4576   sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
  4577   verify_oop(oop_result, "broken oop in call_VM_base");
  4580 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  4581   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  4582   sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
  4585 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  4586                                          int extra_slot_offset) {
  4587   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  4588   int stackElementSize = Interpreter::stackElementSize;
  4589   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  4590 #ifdef ASSERT
  4591   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  4592   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  4593 #endif
  4594   Register             scale_reg    = NOREG;
  4595   Address::ScaleFactor scale_factor = Address::no_scale;
  4596   if (arg_slot.is_constant()) {
  4597     offset += arg_slot.as_constant() * stackElementSize;
  4598   } else {
  4599     scale_reg    = arg_slot.as_register();
  4600     scale_factor = Address::times_8;
  4602   // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke.
  4603   //  offset += wordSize;           // return PC is on stack
  4604   if(scale_reg==NOREG) return Address(SP, offset);
  4605   else {
  4606 	dsll(scale_reg, scale_reg, scale_factor);
  4607 	daddu(scale_reg, SP, scale_reg);
  4608 	return Address(scale_reg, offset);
  4612 SkipIfEqual::~SkipIfEqual() {
  4613   _masm->bind(_label);
  4616 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  4617   switch (size_in_bytes) {
  4618 #ifndef _LP64
  4619   case  8:
  4620     assert(dst2 != noreg, "second dest register required");
  4621     lw(dst,  src);
  4622     lw(dst2, src.plus_disp(BytesPerInt));
  4623     break;
  4624 #else
  4625   case  8:  ld(dst, src); break;
  4626 #endif
  4627   case  4:  lw(dst, src); break;
  4628   case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
  4629   case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
  4630   default:  ShouldNotReachHere();
  4634 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  4635   switch (size_in_bytes) {
  4636 #ifndef _LP64
  4637   case  8:
  4638     assert(src2 != noreg, "second source register required");
  4639     sw(src, dst);
  4640     sw(src2, dst.plus_disp(BytesPerInt));
  4641     break;
  4642 #else
  4643   case  8:  sd(src, dst); break;
  4644 #endif
  4645   case  4:  sw(src, dst); break;
  4646   case  2:  sh(src, dst); break;
  4647   case  1:  sb(src, dst); break;
  4648   default:  ShouldNotReachHere();
  4652 // Look up the method for a megamorphic invokeinterface call.
  4653 // The target method is determined by <intf_klass, itable_index>.
  4654 // The receiver klass is in recv_klass.
  4655 // On success, the result will be in method_result, and execution falls through.
  4656 // On failure, execution transfers to the given label.
  4657 void MacroAssembler::lookup_interface_method(Register recv_klass,
  4658                                              Register intf_klass,
  4659                                              RegisterOrConstant itable_index,
  4660                                              Register method_result,
  4661                                              Register scan_temp,
  4662                                              Label& L_no_such_interface) {
  4663   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
  4664   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  4665          "caller must use same register for non-constant itable index as for method");
  4667   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  4668   int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
  4669   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  4670   int scan_step   = itableOffsetEntry::size() * wordSize;
  4671   int vte_size    = vtableEntry::size() * wordSize;
  4672   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  4673   assert(vte_size == wordSize, "else adjust times_vte_scale");
  4675   lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
  4677   // %%% Could store the aligned, prescaled offset in the klassoop.
  4678 //  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
  4679   dsll(scan_temp, scan_temp, times_vte_scale);
  4680   daddu(scan_temp, recv_klass, scan_temp);
  4681   daddiu(scan_temp, scan_temp, vtable_base);
  4682   if (HeapWordsPerLong > 1) {
  4683     // Round up to align_object_offset boundary
  4684     // see code for InstanceKlass::start_of_itable!
  4685     round_to(scan_temp, BytesPerLong);
  4688   // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  4689   assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  4690 //  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
  4691   if (itable_index.is_constant()) {
  4692     set64(AT, (int)itable_index.is_constant());
  4693     dsll(AT, AT, (int)Address::times_ptr);
  4694   } else {
  4695     dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
  4697   daddu(AT, AT, recv_klass);
  4698   daddiu(recv_klass, AT, itentry_off);
  4700   // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
  4701   //   if (scan->interface() == intf) {
  4702   //     result = (klass + scan->offset() + itable_index);
  4703   //   }
  4704   // }
  4705   Label search, found_method;
  4707   for (int peel = 1; peel >= 0; peel--) {
  4708     ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  4710     if (peel) {
  4711       beq(intf_klass, method_result, found_method);
  4712       nop();
  4713     } else {
  4714       bne(intf_klass, method_result, search);
  4715       nop();
  4716       // (invert the test to fall through to found_method...)
  4719     if (!peel)  break;
  4721     bind(search);
  4723     // Check that the previous entry is non-null.  A null entry means that
  4724     // the receiver class doesn't implement the interface, and wasn't the
  4725     // same as when the caller was compiled.
  4726     beq(method_result, R0, L_no_such_interface);
  4727     nop();
  4728     daddiu(scan_temp, scan_temp, scan_step);
  4731   bind(found_method);
  4733   // Got a hit.
  4734   lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  4735   //ld(method_result, Address(recv_klass, scan_temp, Address::times_1));
  4736   if(UseLoongsonISA) {
  4737     gsldx(method_result, recv_klass, scan_temp, 0);
  4738   } else {
  4739     daddu(AT, recv_klass, scan_temp);
  4740     ld(method_result, AT);
  4745 // virtual method calling
  4746 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  4747                                            RegisterOrConstant vtable_index,
  4748                                            Register method_result) {
  4749   Register tmp = GP;
  4750   push(tmp);
  4752   if (vtable_index.is_constant()) {
  4753     assert_different_registers(recv_klass, method_result, tmp);
  4754   } else {
  4755     assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
  4757   const int base = InstanceKlass::vtable_start_offset() * wordSize;
  4758   assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  4759 /*
  4760   Address vtable_entry_addr(recv_klass,
  4761                             vtable_index, Address::times_ptr,
  4762                             base + vtableEntry::method_offset_in_bytes());
  4763 */
  4764   if (vtable_index.is_constant()) {
  4765     set64(AT, vtable_index.as_constant());
  4766     dsll(AT, AT, (int)Address::times_ptr);
  4767   } else {
  4768     dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
  4770   set64(tmp, base + vtableEntry::method_offset_in_bytes());
  4771   daddu(tmp, tmp, AT);
  4772   daddu(tmp, tmp, recv_klass);
  4773   ld(method_result, tmp, 0);
  4775   pop(tmp);

mercurial