1.1 --- a/src/cpu/mips/vm/macroAssembler_mips.cpp Thu Feb 21 10:14:02 2019 +0800 1.2 +++ b/src/cpu/mips/vm/macroAssembler_mips.cpp Tue Mar 05 17:00:17 2019 +0800 1.3 @@ -1,6 +1,6 @@ 1.4 /* 1.5 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 - * Copyright (c) 2017, 2018, Loongson Technology. All rights reserved. 1.7 + * Copyright (c) 2017, 2019, Loongson Technology. All rights reserved. 1.8 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.9 * 1.10 * This code is free software; you can redistribute it and/or modify it 1.11 @@ -120,7 +120,7 @@ 1.12 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); 1.13 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); 1.14 } else { 1.15 - /* revert to "beq + nop" */ 1.16 + // revert to "beq + nop" 1.17 CodeBuffer cb(branch, 4 * 10); 1.18 MacroAssembler masm(&cb); 1.19 #define __ masm. 1.20 @@ -390,19 +390,19 @@ 1.21 b_far(target(L)); 1.22 } else { 1.23 volatile address dest = target(L); 1.24 -/* 1.25 -MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 1.26 - 0x00000055651ed514: dadd at, ra, zero 1.27 - 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 1.28 - 1.29 - 0x00000055651ed51c: sll zero, zero, 0 1.30 - 0x00000055651ed520: lui t9, 0x0 1.31 - 0x00000055651ed524: ori t9, t9, 0x21b8 1.32 - 0x00000055651ed528: daddu t9, t9, ra 1.33 - 0x00000055651ed52c: dadd ra, at, zero 1.34 - 0x00000055651ed530: jr t9 1.35 - 0x00000055651ed534: sll zero, zero, 0 1.36 -*/ 1.37 +// 1.38 +// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 1.39 +// 0x00000055651ed514: dadd at, ra, zero 1.40 +// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 1.41 +// 1.42 +// 0x00000055651ed51c: sll zero, zero, 0 1.43 +// 0x00000055651ed520: lui t9, 0x0 1.44 +// 0x00000055651ed524: ori t9, t9, 0x21b8 1.45 +// 0x00000055651ed528: daddu t9, t9, ra 1.46 +// 0x00000055651ed52c: dadd ra, at, zero 1.47 +// 0x00000055651ed530: jr t9 1.48 +// 0x00000055651ed534: sll zero, zero, 0 1.49 +// 1.50 move(AT, RA); 1.51 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); 1.52 nop(); 1.53 @@ -908,47 +908,6 @@ 1.54 extern "C" void findpc(intptr_t x); 1.55 #endif 1.56 1.57 -void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 1.58 - // In order to get locks to work, we need to fake a in_VM state 1.59 - JavaThread* thread = JavaThread::current(); 1.60 - JavaThreadState saved_state = thread->thread_state(); 1.61 - thread->set_thread_state(_thread_in_vm); 1.62 - if (ShowMessageBoxOnError) { 1.63 - JavaThread* thread = JavaThread::current(); 1.64 - JavaThreadState saved_state = thread->thread_state(); 1.65 - thread->set_thread_state(_thread_in_vm); 1.66 - if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1.67 - ttyLocker ttyl; 1.68 - BytecodeCounter::print(); 1.69 - } 1.70 - // To see where a verify_oop failed, get $ebx+40/X for this frame. 1.71 - // This is the value of eip which points to where verify_oop will return. 1.72 - if (os::message_box(msg, "Execution stopped, print registers?")) { 1.73 - ttyLocker ttyl; 1.74 - tty->print_cr("eip = 0x%08x", eip); 1.75 -#ifndef PRODUCT 1.76 - tty->cr(); 1.77 - findpc(eip); 1.78 - tty->cr(); 1.79 -#endif 1.80 - tty->print_cr("rax, = 0x%08x", rax); 1.81 - tty->print_cr("rbx, = 0x%08x", rbx); 1.82 - tty->print_cr("rcx = 0x%08x", rcx); 1.83 - tty->print_cr("rdx = 0x%08x", rdx); 1.84 - tty->print_cr("rdi = 0x%08x", rdi); 1.85 - tty->print_cr("rsi = 0x%08x", rsi); 1.86 - tty->print_cr("rbp, = 0x%08x", rbp); 1.87 - tty->print_cr("rsp = 0x%08x", rsp); 1.88 - BREAKPOINT; 1.89 - } 1.90 - } else { 1.91 - ttyLocker ttyl; 1.92 - ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1.93 - assert(false, "DEBUG MESSAGE"); 1.94 - } 1.95 - ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 1.96 -} 1.97 - 1.98 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { 1.99 if ( ShowMessageBoxOnError ) { 1.100 JavaThreadState saved_state = JavaThread::current()->thread_state(); 1.101 @@ -961,8 +920,6 @@ 1.102 BytecodeCounter::print(); 1.103 } 1.104 1.105 - // if (os::message_box(msg, "Execution stopped, print registers?")) 1.106 - // regs->print(::tty); 1.107 } 1.108 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); 1.109 } 1.110 @@ -1015,13 +972,6 @@ 1.111 } 1.112 1.113 void MacroAssembler::print_reg(Register reg) { 1.114 -/* 1.115 -char *s = getenv("PRINT_REG"); 1.116 -if (s == NULL) 1.117 - return; 1.118 -if (strcmp(s, "1") != 0) 1.119 - return; 1.120 -*/ 1.121 void * cur_pc = pc(); 1.122 pushad(); 1.123 NOT_LP64(push(FP);) 1.124 @@ -1045,35 +995,6 @@ 1.125 NOT_LP64(pop(FP);) 1.126 popad(); 1.127 1.128 -/* 1.129 - pushad(); 1.130 -#ifdef _LP64 1.131 - if (reg == SP) 1.132 - addiu(A0, SP, wordSize * 23); //23 registers saved in pushad() 1.133 - else 1.134 - move(A0, reg); 1.135 - call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); 1.136 - delayed()->nop(); 1.137 -#else 1.138 - push(FP); 1.139 - move(A0, reg); 1.140 - dsrl32(A1, reg, 0); 1.141 - //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type); 1.142 - call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); 1.143 - delayed()->nop(); 1.144 - pop(FP); 1.145 -#endif 1.146 - popad(); 1.147 - pushad(); 1.148 - NOT_LP64(push(FP);) 1.149 - char b[50]; 1.150 - sprintf((char *)b, " pc: %p\n",cur_pc); 1.151 - li(A0, (long)(char *)b); 1.152 - call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); 1.153 - delayed()->nop(); 1.154 - NOT_LP64(pop(FP);) 1.155 - popad(); 1.156 -*/ 1.157 } 1.158 1.159 void MacroAssembler::print_reg(FloatRegister reg) { 1.160 @@ -1094,7 +1015,6 @@ 1.161 1.162 pushad(); 1.163 NOT_LP64(push(FP);) 1.164 -#if 1 1.165 move(FP, SP); 1.166 move(AT, -(StackAlignmentInBytes)); 1.167 andr(SP , SP , AT); 1.168 @@ -1102,25 +1022,9 @@ 1.169 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type); 1.170 delayed()->nop(); 1.171 move(SP, FP); 1.172 -#else 1.173 - mov_s(F12, reg); 1.174 - //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type); 1.175 - //delayed()->nop(); 1.176 -#endif 1.177 NOT_LP64(pop(FP);) 1.178 popad(); 1.179 1.180 -#if 0 1.181 - pushad(); 1.182 - NOT_LP64(push(FP);) 1.183 - char* b = new char[50]; 1.184 - sprintf(b, " pc: %p\n", cur_pc); 1.185 - li(A0, (long)b); 1.186 - call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); 1.187 - delayed()->nop(); 1.188 - NOT_LP64(pop(FP);) 1.189 - popad(); 1.190 -#endif 1.191 } 1.192 1.193 void MacroAssembler::increment(Register reg, int imm) { 1.194 @@ -1251,7 +1155,7 @@ 1.195 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 1.196 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 1.197 1.198 - assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp"); 1.199 + assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); 1.200 1.201 // set last Java frame before call 1.202 before_call_pc = (address)pc(); 1.203 @@ -1392,9 +1296,6 @@ 1.204 // accessing M[reg] w/o changing any (non-CC) registers 1.205 // NOTE: cmpl is plenty here to provoke a segv 1.206 lw(AT, reg, 0); 1.207 - // Note: should probably use testl(rax, Address(reg, 0)); 1.208 - // may be shorter code (however, this version of 1.209 - // testl needs to be implemented first) 1.210 } else { 1.211 // nothing to do, (later) access of M[reg + offset] 1.212 // will provoke OS NULL exception if reg = NULL 1.213 @@ -1408,8 +1309,6 @@ 1.214 1.215 void MacroAssembler::leave() { 1.216 #ifndef _LP64 1.217 - //move(SP, FP); 1.218 - //pop2(FP, RA); 1.219 addi(SP, FP, 2 * wordSize); 1.220 lw(RA, SP, - 1 * wordSize); 1.221 lw(FP, SP, - 2 * wordSize); 1.222 @@ -1419,13 +1318,7 @@ 1.223 ld(FP, SP, - 2 * wordSize); 1.224 #endif 1.225 } 1.226 -/* 1.227 -void MacroAssembler::os_breakpoint() { 1.228 - // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 1.229 - // (e.g., MSVC can't call ps() otherwise) 1.230 - call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 1.231 -} 1.232 -*/ 1.233 + 1.234 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { 1.235 // determine java_thread register 1.236 if (!java_thread->is_valid()) { 1.237 @@ -1479,7 +1372,7 @@ 1.238 1.239 // Calls to C land 1.240 // 1.241 -// When entering C land, the rbp, & rsp of the last Java frame have to be recorded 1.242 +// When entering C land, the fp, & sp of the last Java frame have to be recorded 1.243 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 1.244 // has to be reset to 0. This is required to allow proper stack traversal. 1.245 void MacroAssembler::set_last_Java_frame(Register java_thread, 1.246 @@ -1624,10 +1517,10 @@ 1.247 1.248 // Calling the runtime using the regular call_VM_leaf mechanism generates 1.249 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 1.250 - // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 1.251 + // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL. 1.252 // 1.253 // If we care generating the pre-barrier without a frame (e.g. in the 1.254 - // intrinsified Reference.get() routine) then ebp might be pointing to 1.255 + // intrinsified Reference.get() routine) then fp might be pointing to 1.256 // the caller frame and so this check will most likely fail at runtime. 1.257 // 1.258 // Expanding the call directly bypasses the generation of the check. 1.259 @@ -2036,19 +1929,18 @@ 1.260 1.261 static const double pi_4 = 0.7853981633974483; 1.262 1.263 -// the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME 1.264 // must get argument(a double) in F12/F13 1.265 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { 1.266 //We need to preseve the register which maybe modified during the Call 1.267 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 1.268 -//save all modified register here 1.269 -//FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9 1.270 + // save all modified register here 1.271 + // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9 1.272 pushad(); 1.273 -//we should preserve the stack space before we call 1.274 + // we should preserve the stack space before we call 1.275 addi(SP, SP, -wordSize * 2); 1.276 - switch (trig){ 1.277 + switch (trig){ 1.278 case 's' : 1.279 - call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); 1.280 + call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); 1.281 delayed()->nop(); 1.282 break; 1.283 case 'c': 1.284 @@ -2079,7 +1971,7 @@ 1.285 dsll(rd, rd, 16); 1.286 ori(rd, rd, split_low(imm)); 1.287 } else if ((imm > 0) && is_simm16(imm >> 32)) { 1.288 - /* A 48-bit address */ 1.289 + // A 48-bit address 1.290 li48(rd, imm); 1.291 } else { 1.292 li64(rd, imm); 1.293 @@ -2093,14 +1985,14 @@ 1.294 1.295 void MacroAssembler::li32(Register reg, int imm) { 1.296 if (is_simm16(imm)) { 1.297 - /* for imm < 0, we should use addi instead of addiu. 1.298 - * 1.299 - * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint) 1.300 - * 1.301 - * 78 move [int:-1|I] [a0|I] 1.302 - * : daddi a0, zero, 0xffffffff (correct) 1.303 - * : daddiu a0, zero, 0xffffffff (incorrect) 1.304 - */ 1.305 + // for imm < 0, we should use addi instead of addiu. 1.306 + // 1.307 + // java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint) 1.308 + // 1.309 + // 78 move [int:-1|I] [a0|I] 1.310 + // : daddi a0, zero, 0xffffffff (correct) 1.311 + // : daddiu a0, zero, 0xffffffff (incorrect) 1.312 + // 1.313 if (imm >= 0) 1.314 addiu(reg, R0, imm); 1.315 else 1.316 @@ -2339,26 +2231,8 @@ 1.317 ori(rd, rd, split_low(imm)); 1.318 } 1.319 #endif 1.320 -// NOTE: i dont push eax as i486. 1.321 -// the x86 save eax for it use eax as the jump register 1.322 + 1.323 void MacroAssembler::verify_oop(Register reg, const char* s) { 1.324 - /* 1.325 - if (!VerifyOops) return; 1.326 - 1.327 - // Pass register number to verify_oop_subroutine 1.328 - char* b = new char[strlen(s) + 50]; 1.329 - sprintf(b, "verify_oop: %s: %s", reg->name(), s); 1.330 - push(rax); // save rax, 1.331 - push(reg); // pass register argument 1.332 - ExternalAddress buffer((address) b); 1.333 - // avoid using pushptr, as it modifies scratch registers 1.334 - // and our contract is not to modify anything 1.335 - movptr(rax, buffer.addr()); 1.336 - push(rax); 1.337 - // call indirectly to solve generation ordering problem 1.338 - movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 1.339 - call(rax); 1.340 - */ 1.341 if (!VerifyOops) return; 1.342 const char * b = NULL; 1.343 stringStream ss; 1.344 @@ -2461,8 +2335,8 @@ 1.345 delayed()->nop(); 1.346 1.347 // Check if the oop is in the right area of memory 1.348 - //const int oop_mask = Universe::verify_oop_mask(); 1.349 - //const int oop_bits = Universe::verify_oop_bits(); 1.350 + // const int oop_mask = Universe::verify_oop_mask(); 1.351 + // const int oop_bits = Universe::verify_oop_bits(); 1.352 const uintptr_t oop_mask = Universe::verify_oop_mask(); 1.353 const uintptr_t oop_bits = Universe::verify_oop_bits(); 1.354 li(AT, oop_mask); 1.355 @@ -2472,37 +2346,12 @@ 1.356 delayed()->nop(); 1.357 1.358 // make sure klass is 'reasonable' 1.359 - //add for compressedoops 1.360 + // add for compressedoops 1.361 reinit_heapbase(); 1.362 - //add for compressedoops 1.363 + // add for compressedoops 1.364 load_klass(T0, A1); 1.365 beq(T0, R0, error); // if klass is NULL it is broken 1.366 delayed()->nop(); 1.367 - #if 0 1.368 - //FIXME:wuhui. 1.369 - // Check if the klass is in the right area of memory 1.370 - //const int klass_mask = Universe::verify_klass_mask(); 1.371 - //const int klass_bits = Universe::verify_klass_bits(); 1.372 - const uintptr_t klass_mask = Universe::verify_klass_mask(); 1.373 - const uintptr_t klass_bits = Universe::verify_klass_bits(); 1.374 - 1.375 - li(AT, klass_mask); 1.376 - andr(T1, T0, AT); 1.377 - li(AT, klass_bits); 1.378 - bne(T1, AT, error); 1.379 - delayed()->nop(); 1.380 - // make sure klass' klass is 'reasonable' 1.381 - //add for compressedoops 1.382 - load_klass(T0, T0); 1.383 - beq(T0, R0, error); // if klass' klass is NULL it is broken 1.384 - delayed()->nop(); 1.385 - 1.386 - li(AT, klass_mask); 1.387 - andr(T1, T0, AT); 1.388 - li(AT, klass_bits); 1.389 - bne(T1, AT, error); 1.390 - delayed()->nop(); // if klass not in right area of memory it is broken too. 1.391 -#endif 1.392 // return if everything seems ok 1.393 bind(exit); 1.394 1.395 @@ -2554,21 +2403,22 @@ 1.396 } 1.397 #endif 1.398 } 1.399 - RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 1.400 + 1.401 +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 1.402 Register tmp, 1.403 int offset) { 1.404 - intptr_t value = *delayed_value_addr; 1.405 - if (value != 0) 1.406 - return RegisterOrConstant(value + offset); 1.407 - AddressLiteral a(delayed_value_addr); 1.408 - // load indirectly to solve generation ordering problem 1.409 - //movptr(tmp, ExternalAddress((address) delayed_value_addr)); 1.410 - //ld(tmp, a); 1.411 - if (offset != 0) 1.412 - daddi(tmp,tmp, offset); 1.413 - 1.414 - return RegisterOrConstant(tmp); 1.415 - } 1.416 + intptr_t value = *delayed_value_addr; 1.417 + if (value != 0) 1.418 + return RegisterOrConstant(value + offset); 1.419 + AddressLiteral a(delayed_value_addr); 1.420 + // load indirectly to solve generation ordering problem 1.421 + //movptr(tmp, ExternalAddress((address) delayed_value_addr)); 1.422 + //ld(tmp, a); 1.423 + if (offset != 0) 1.424 + daddi(tmp,tmp, offset); 1.425 + 1.426 + return RegisterOrConstant(tmp); 1.427 +} 1.428 1.429 void MacroAssembler::hswap(Register reg) { 1.430 //short 1.431 @@ -2615,54 +2465,54 @@ 1.432 1.433 #ifdef _LP64 1.434 1.435 -/* do 32-bit CAS using MIPS64 lld/scd 1.436 - 1.437 - cas_int should only compare 32-bits of the memory value. 1.438 - However, lld/scd will do 64-bit operation, which violates the intention of cas_int. 1.439 - To simulate a 32-bit atomic operation, the value loaded with LLD should be split into 1.440 - tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval, 1.441 - plus the high-32 bits or memory value, are stored togethor with SCD. 1.442 - 1.443 -Example: 1.444 - 1.445 - double d = 3.1415926; 1.446 - System.err.println("hello" + d); 1.447 - 1.448 - sun.misc.FloatingDecimal$1.<init>() 1.449 - | 1.450 - `- java.util.concurrent.atomic.AtomicInteger::compareAndSet() 1.451 - 1.452 - 38 cas_int [a7a7|J] [a0|I] [a6|I] 1.453 -// a0: 0xffffffffe8ea9f63 pc: 0x55647f3354 1.454 -// a6: 0x4ab325aa 1.455 - 1.456 -again: 1.457 - 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63" 1.458 - 1.459 - 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended) 1.460 - 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits 1.461 - 0x00000055647f3c68: dsll32 t8, t8, 0 1.462 - 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal 1.463 - 0x00000055647f3c70: sll zero, zero, 0 1.464 - 1.465 - 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended) 1.466 - 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF; 1.467 - 0x00000055647f3c7c: ori v1, v1, 0xffffffff 1.468 - 0x00000055647f3c80: and v1, a6, v1 1.469 - 0x00000055647f3c84: or at, t8, v1 1.470 - 0x00000055647f3c88: scd at, 0x0(a7) 1.471 - 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again 1.472 - 0x00000055647f3c90: sll zero, zero, 0 1.473 - 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done 1.474 - 0x00000055647f3c98: sll zero, zero, 0 1.475 -nequal: 1.476 - 0x00000055647f45a4: dadd a0, t9, zero 1.477 - 0x00000055647f45a8: dadd at, zero, zero 1.478 -done: 1.479 -*/ 1.480 +// do 32-bit CAS using MIPS64 lld/scd 1.481 +// 1.482 +// cas_int should only compare 32-bits of the memory value. 1.483 +// However, lld/scd will do 64-bit operation, which violates the intention of cas_int. 1.484 +// To simulate a 32-bit atomic operation, the value loaded with LLD should be split into 1.485 +// tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval, 1.486 +// plus the high-32 bits or memory value, are stored togethor with SCD. 1.487 +// 1.488 +//Example: 1.489 +// 1.490 +// double d = 3.1415926; 1.491 +// System.err.println("hello" + d); 1.492 +// 1.493 +// sun.misc.FloatingDecimal$1.<init>() 1.494 +// | 1.495 +// `- java.util.concurrent.atomic.AtomicInteger::compareAndSet() 1.496 +// 1.497 +// 38 cas_int [a7a7|J] [a0|I] [a6|I] 1.498 +// a0: 0xffffffffe8ea9f63 pc: 0x55647f3354 1.499 +// a6: 0x4ab325aa 1.500 +// 1.501 +//again: 1.502 +// 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63" 1.503 +// 1.504 +// 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended) 1.505 +// 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits 1.506 +// 0x00000055647f3c68: dsll32 t8, t8, 0 1.507 +// 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal 1.508 +// 0x00000055647f3c70: sll zero, zero, 0 1.509 +// 1.510 +// 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended) 1.511 +// 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF; 1.512 +// 0x00000055647f3c7c: ori v1, v1, 0xffffffff 1.513 +// 0x00000055647f3c80: and v1, a6, v1 1.514 +// 0x00000055647f3c84: or at, t8, v1 1.515 +// 0x00000055647f3c88: scd at, 0x0(a7) 1.516 +// 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again 1.517 +// 0x00000055647f3c90: sll zero, zero, 0 1.518 +// 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done 1.519 +// 0x00000055647f3c98: sll zero, zero, 0 1.520 +//nequal: 1.521 +// 0x00000055647f45a4: dadd a0, t9, zero 1.522 +// 0x00000055647f45a8: dadd at, zero, zero 1.523 +//done: 1.524 +// 1.525 1.526 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) { 1.527 - /* MIPS64 can use ll/sc for 32-bit atomic memory access */ 1.528 + // MIPS64 can use ll/sc for 32-bit atomic memory access 1.529 Label done, again, nequal; 1.530 1.531 bind(again); 1.532 @@ -2794,7 +2644,7 @@ 1.533 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat 1.534 // another option would be to emit TrySlowEnter and TrySlowExit methods 1.535 // at startup-time. These methods would accept arguments as 1.536 -// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure 1.537 +// (Obj, Self, box, Scratch) and return success-failure 1.538 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply 1.539 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. 1.540 // In practice, however, the # of lock sites is bounded and is usually small. 1.541 @@ -2820,8 +2670,8 @@ 1.542 // the lock operators would typically be faster than reifying Self. 1.543 // 1.544 // * Ideally I'd define the primitives as: 1.545 -// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. 1.546 -// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED 1.547 +// fast_lock (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED. 1.548 +// fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED 1.549 // Unfortunately ADLC bugs prevent us from expressing the ideal form. 1.550 // Instead, we're stuck with a rather awkward and brittle register assignments below. 1.551 // Furthermore the register assignments are overconstrained, possibly resulting in 1.552 @@ -2856,7 +2706,7 @@ 1.553 1.554 // obj: object to lock 1.555 // box: on-stack box address (displaced header location) - KILLED 1.556 -// rax,: tmp -- KILLED 1.557 +// tmp: tmp -- KILLED 1.558 // scr: tmp -- KILLED 1.559 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) { 1.560 1.561 @@ -2869,10 +2719,6 @@ 1.562 1.563 1.564 block_comment("FastLock"); 1.565 - /* 1.566 - move(AT, 0x0); 1.567 - return; 1.568 - */ 1.569 if (PrintBiasedLockingStatistics) { 1.570 push(tmpReg); 1.571 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg); 1.572 @@ -2982,7 +2828,7 @@ 1.573 pop(T0); 1.574 bind(L); 1.575 } 1.576 - sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */ 1.577 + sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0 1.578 1.579 b(DONE_LABEL) ; 1.580 delayed()->nop(); 1.581 @@ -3038,8 +2884,8 @@ 1.582 } 1.583 1.584 // obj: object to unlock 1.585 -// box: box address (displaced header location), killed. Must be EAX. 1.586 -// rbx,: killed tmp; cannot be obj nor box. 1.587 +// box: box address (displaced header location), killed. 1.588 +// tmp: killed tmp; cannot be obj nor box. 1.589 // 1.590 // Some commentary on balanced locking: 1.591 // 1.592 @@ -3070,8 +2916,6 @@ 1.593 guarantee (objReg != tmpReg, "") ; 1.594 guarantee (boxReg != tmpReg, "") ; 1.595 1.596 - 1.597 - 1.598 block_comment("FastUnlock"); 1.599 1.600 1.601 @@ -3090,7 +2934,7 @@ 1.602 beq(tmpReg, R0, DONE_LABEL) ; 1.603 move(AT, 0x1); // delay slot 1.604 1.605 - cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box 1.606 + cmpxchg(tmpReg, Address(objReg, 0), boxReg); 1.607 bind(DONE_LABEL); 1.608 } else { 1.609 Label DONE_LABEL, Stacked, CheckSucc, Inflated ; 1.610 @@ -3201,11 +3045,11 @@ 1.611 Register caller_saved_fpu_registers[] = {}; 1.612 #endif 1.613 1.614 -//We preserve all caller-saved register 1.615 +// We preserve all caller-saved register 1.616 void MacroAssembler::pushad(){ 1.617 int i; 1.618 1.619 - /* Fixed-point registers */ 1.620 + // Fixed-point registers 1.621 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); 1.622 daddi(SP, SP, -1 * len * wordSize); 1.623 for (i = 0; i < len; i++) 1.624 @@ -3217,7 +3061,7 @@ 1.625 #endif 1.626 } 1.627 1.628 - /* Floating-point registers */ 1.629 + // Floating-point registers 1.630 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); 1.631 daddi(SP, SP, -1 * len * wordSize); 1.632 for (i = 0; i < len; i++) 1.633 @@ -3233,7 +3077,7 @@ 1.634 void MacroAssembler::popad(){ 1.635 int i; 1.636 1.637 - /* Floating-point registers */ 1.638 + // Floating-point registers 1.639 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); 1.640 for (i = 0; i < len; i++) 1.641 { 1.642 @@ -3245,7 +3089,7 @@ 1.643 } 1.644 daddi(SP, SP, len * wordSize); 1.645 1.646 - /* Fixed-point registers */ 1.647 + // Fixed-point registers 1.648 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); 1.649 for (i = 0; i < len; i++) 1.650 { 1.651 @@ -3262,7 +3106,7 @@ 1.652 void MacroAssembler::pushad_except_v0() { 1.653 int i; 1.654 1.655 - /* Fixed-point registers */ 1.656 + // Fixed-point registers 1.657 int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); 1.658 daddi(SP, SP, -1 * len * wordSize); 1.659 for (i = 0; i < len; i++) { 1.660 @@ -3273,7 +3117,7 @@ 1.661 #endif 1.662 } 1.663 1.664 - /* Floating-point registers */ 1.665 + // Floating-point registers 1.666 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); 1.667 daddi(SP, SP, -1 * len * wordSize); 1.668 for (i = 0; i < len; i++) { 1.669 @@ -3288,7 +3132,7 @@ 1.670 void MacroAssembler::popad_except_v0() { 1.671 int i; 1.672 1.673 - /* Floating-point registers */ 1.674 + // Floating-point registers 1.675 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); 1.676 for (i = 0; i < len; i++) { 1.677 #ifdef _LP64 1.678 @@ -3299,7 +3143,7 @@ 1.679 } 1.680 daddi(SP, SP, len * wordSize); 1.681 1.682 - /* Fixed-point registers */ 1.683 + // Fixed-point registers 1.684 len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); 1.685 for (i = 0; i < len; i++) { 1.686 #ifdef _LP64 1.687 @@ -3335,7 +3179,7 @@ 1.688 #endif 1.689 } 1.690 1.691 -//for UseCompressedOops Option 1.692 +// for UseCompressedOops Option 1.693 void MacroAssembler::load_klass(Register dst, Register src) { 1.694 #ifdef _LP64 1.695 if(UseCompressedClassPointers){ 1.696 @@ -3897,7 +3741,6 @@ 1.697 // The repne_scan instruction uses fixed registers, which we must spill. 1.698 // Don't worry too much about pre-existing connections with the input regs. 1.699 1.700 - // Get super_klass value into rax (even if it was in rdi or rcx). 1.701 #ifndef PRODUCT 1.702 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 1.703 ExternalAddress pst_counter_addr((address) pst_counter); 1.704 @@ -3911,12 +3754,6 @@ 1.705 // Skip to start of data. 1.706 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes()); 1.707 1.708 - // Scan RCX words at [RDI] for an occurrence of RAX. 1.709 - // Set NZ/Z based on last compare. 1.710 - // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 1.711 - // not change flags (only scas instruction which is repeated sets flags). 1.712 - // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 1.713 - 1.714 // OpenJDK8 never compresses klass pointers in secondary-super array. 1.715 Label Loop, subtype; 1.716 bind(Loop); 1.717 @@ -4126,11 +3963,6 @@ 1.718 } 1.719 const int base = InstanceKlass::vtable_start_offset() * wordSize; 1.720 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 1.721 -/* 1.722 - Address vtable_entry_addr(recv_klass, 1.723 - vtable_index, Address::times_ptr, 1.724 - base + vtableEntry::method_offset_in_bytes()); 1.725 -*/ 1.726 if (vtable_index.is_constant()) { 1.727 set64(AT, vtable_index.as_constant()); 1.728 dsll(AT, AT, (int)Address::times_ptr);