src/cpu/mips/vm/macroAssembler_mips.cpp

changeset 9459
814e9e335067
parent 9267
d75acfefab6a
child 9461
ec49047577ae
     1.1 --- a/src/cpu/mips/vm/macroAssembler_mips.cpp	Thu Feb 21 10:14:02 2019 +0800
     1.2 +++ b/src/cpu/mips/vm/macroAssembler_mips.cpp	Tue Mar 05 17:00:17 2019 +0800
     1.3 @@ -1,6 +1,6 @@
     1.4  /*
     1.5   * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     1.6 - * Copyright (c) 2017, 2018, Loongson Technology. All rights reserved.
     1.7 + * Copyright (c) 2017, 2019, Loongson Technology. All rights reserved.
     1.8   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.9   *
    1.10   * This code is free software; you can redistribute it and/or modify it
    1.11 @@ -120,7 +120,7 @@
    1.12        pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
    1.13        pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
    1.14      } else {
    1.15 -      /* revert to "beq + nop" */
    1.16 +      // revert to "beq + nop"
    1.17        CodeBuffer cb(branch, 4 * 10);
    1.18        MacroAssembler masm(&cb);
    1.19  #define __ masm.
    1.20 @@ -390,19 +390,19 @@
    1.21      b_far(target(L));
    1.22    } else {
    1.23      volatile address dest = target(L);
    1.24 -/*
    1.25 -MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
    1.26 -   0x00000055651ed514: dadd at, ra, zero
    1.27 -   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
    1.28 -
    1.29 -   0x00000055651ed51c: sll zero, zero, 0
    1.30 -   0x00000055651ed520: lui t9, 0x0
    1.31 -   0x00000055651ed524: ori t9, t9, 0x21b8
    1.32 -   0x00000055651ed528: daddu t9, t9, ra
    1.33 -   0x00000055651ed52c: dadd ra, at, zero
    1.34 -   0x00000055651ed530: jr t9
    1.35 -   0x00000055651ed534: sll zero, zero, 0
    1.36 -*/
    1.37 +//
    1.38 +// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
    1.39 +//   0x00000055651ed514: dadd at, ra, zero
    1.40 +//   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
    1.41 +//
    1.42 +//   0x00000055651ed51c: sll zero, zero, 0
    1.43 +//   0x00000055651ed520: lui t9, 0x0
    1.44 +//   0x00000055651ed524: ori t9, t9, 0x21b8
    1.45 +//   0x00000055651ed528: daddu t9, t9, ra
    1.46 +//   0x00000055651ed52c: dadd ra, at, zero
    1.47 +//   0x00000055651ed530: jr t9
    1.48 +//   0x00000055651ed534: sll zero, zero, 0
    1.49 +//
    1.50      move(AT, RA);
    1.51      emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
    1.52      nop();
    1.53 @@ -908,47 +908,6 @@
    1.54  extern "C" void findpc(intptr_t x);
    1.55  #endif
    1.56  
    1.57 -void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
    1.58 -  // In order to get locks to work, we need to fake a in_VM state
    1.59 -  JavaThread* thread = JavaThread::current();
    1.60 -  JavaThreadState saved_state = thread->thread_state();
    1.61 -  thread->set_thread_state(_thread_in_vm);
    1.62 -  if (ShowMessageBoxOnError) {
    1.63 -    JavaThread* thread = JavaThread::current();
    1.64 -    JavaThreadState saved_state = thread->thread_state();
    1.65 -    thread->set_thread_state(_thread_in_vm);
    1.66 -    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
    1.67 -      ttyLocker ttyl;
    1.68 -      BytecodeCounter::print();
    1.69 -    }
    1.70 -    // To see where a verify_oop failed, get $ebx+40/X for this frame.
    1.71 -    // This is the value of eip which points to where verify_oop will return.
    1.72 -    if (os::message_box(msg, "Execution stopped, print registers?")) {
    1.73 -      ttyLocker ttyl;
    1.74 -      tty->print_cr("eip = 0x%08x", eip);
    1.75 -#ifndef PRODUCT
    1.76 -      tty->cr();
    1.77 -      findpc(eip);
    1.78 -      tty->cr();
    1.79 -#endif
    1.80 -      tty->print_cr("rax, = 0x%08x", rax);
    1.81 -      tty->print_cr("rbx, = 0x%08x", rbx);
    1.82 -      tty->print_cr("rcx = 0x%08x", rcx);
    1.83 -      tty->print_cr("rdx = 0x%08x", rdx);
    1.84 -      tty->print_cr("rdi = 0x%08x", rdi);
    1.85 -      tty->print_cr("rsi = 0x%08x", rsi);
    1.86 -      tty->print_cr("rbp, = 0x%08x", rbp);
    1.87 -      tty->print_cr("rsp = 0x%08x", rsp);
    1.88 -      BREAKPOINT;
    1.89 -    }
    1.90 -  } else {
    1.91 -    ttyLocker ttyl;
    1.92 -    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
    1.93 -    assert(false, "DEBUG MESSAGE");
    1.94 -  }
    1.95 -  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
    1.96 -}
    1.97 -
    1.98  void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
    1.99    if ( ShowMessageBoxOnError ) {
   1.100      JavaThreadState saved_state = JavaThread::current()->thread_state();
   1.101 @@ -961,8 +920,6 @@
   1.102    BytecodeCounter::print();
   1.103        }
   1.104  
   1.105 -      //      if (os::message_box(msg, "Execution stopped, print registers?"))
   1.106 -      //        regs->print(::tty);
   1.107      }
   1.108      ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
   1.109    }
   1.110 @@ -1015,13 +972,6 @@
   1.111  }
   1.112  
   1.113  void MacroAssembler::print_reg(Register reg) {
   1.114 -/*
   1.115 -char *s = getenv("PRINT_REG");
   1.116 -if (s == NULL)
   1.117 -  return;
   1.118 -if (strcmp(s, "1") != 0)
   1.119 -  return;
   1.120 -*/
   1.121    void * cur_pc = pc();
   1.122    pushad();
   1.123    NOT_LP64(push(FP);)
   1.124 @@ -1045,35 +995,6 @@
   1.125    NOT_LP64(pop(FP);)
   1.126    popad();
   1.127  
   1.128 -/*
   1.129 -  pushad();
   1.130 -#ifdef _LP64
   1.131 -  if (reg == SP)
   1.132 -    addiu(A0, SP, wordSize * 23); //23 registers saved in pushad()
   1.133 -  else
   1.134 -    move(A0, reg);
   1.135 -  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
   1.136 -  delayed()->nop();
   1.137 -#else
   1.138 -  push(FP);
   1.139 -  move(A0, reg);
   1.140 -  dsrl32(A1, reg, 0);
   1.141 -  //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type);
   1.142 -  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type);
   1.143 -  delayed()->nop();
   1.144 -  pop(FP);
   1.145 -#endif
   1.146 -  popad();
   1.147 -  pushad();
   1.148 -  NOT_LP64(push(FP);)
   1.149 -  char b[50];
   1.150 -  sprintf((char *)b, " pc: %p\n",cur_pc);
   1.151 -  li(A0, (long)(char *)b);
   1.152 -  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
   1.153 -  delayed()->nop();
   1.154 -  NOT_LP64(pop(FP);)
   1.155 -  popad();
   1.156 -*/
   1.157  }
   1.158  
   1.159  void MacroAssembler::print_reg(FloatRegister reg) {
   1.160 @@ -1094,7 +1015,6 @@
   1.161  
   1.162    pushad();
   1.163    NOT_LP64(push(FP);)
   1.164 -#if 1
   1.165    move(FP, SP);
   1.166    move(AT, -(StackAlignmentInBytes));
   1.167    andr(SP , SP , AT);
   1.168 @@ -1102,25 +1022,9 @@
   1.169    call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
   1.170    delayed()->nop();
   1.171    move(SP, FP);
   1.172 -#else
   1.173 -  mov_s(F12, reg);
   1.174 -  //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type);
   1.175 -  //delayed()->nop();
   1.176 -#endif
   1.177    NOT_LP64(pop(FP);)
   1.178    popad();
   1.179  
   1.180 -#if 0
   1.181 -  pushad();
   1.182 -  NOT_LP64(push(FP);)
   1.183 -  char* b = new char[50];
   1.184 -  sprintf(b, " pc: %p\n", cur_pc);
   1.185 -  li(A0, (long)b);
   1.186 -  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
   1.187 -  delayed()->nop();
   1.188 -  NOT_LP64(pop(FP);)
   1.189 -  popad();
   1.190 -#endif
   1.191  }
   1.192  
   1.193  void MacroAssembler::increment(Register reg, int imm) {
   1.194 @@ -1251,7 +1155,7 @@
   1.195    assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
   1.196    assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
   1.197  
   1.198 -  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
   1.199 +  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
   1.200  
   1.201    // set last Java frame before call
   1.202    before_call_pc = (address)pc();
   1.203 @@ -1392,9 +1296,6 @@
   1.204      // accessing M[reg] w/o changing any (non-CC) registers
   1.205      // NOTE: cmpl is plenty here to provoke a segv
   1.206      lw(AT, reg, 0);
   1.207 -    // Note: should probably use testl(rax, Address(reg, 0));
   1.208 -    //       may be shorter code (however, this version of
   1.209 -    //       testl needs to be implemented first)
   1.210    } else {
   1.211      // nothing to do, (later) access of M[reg + offset]
   1.212      // will provoke OS NULL exception if reg = NULL
   1.213 @@ -1408,8 +1309,6 @@
   1.214  
   1.215  void MacroAssembler::leave() {
   1.216  #ifndef _LP64
   1.217 -  //move(SP, FP);
   1.218 -  //pop2(FP, RA);
   1.219    addi(SP, FP, 2 * wordSize);
   1.220    lw(RA, SP, - 1 * wordSize);
   1.221    lw(FP, SP, - 2 * wordSize);
   1.222 @@ -1419,13 +1318,7 @@
   1.223    ld(FP, SP, - 2 * wordSize);
   1.224  #endif
   1.225  }
   1.226 -/*
   1.227 -void MacroAssembler::os_breakpoint() {
   1.228 -  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
   1.229 -  // (e.g., MSVC can't call ps() otherwise)
   1.230 -  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
   1.231 -}
   1.232 -*/
   1.233 +
   1.234  void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
   1.235    // determine java_thread register
   1.236    if (!java_thread->is_valid()) {
   1.237 @@ -1479,7 +1372,7 @@
   1.238  
   1.239  // Calls to C land
   1.240  //
   1.241 -// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
   1.242 +// When entering C land, the fp, & sp of the last Java frame have to be recorded
   1.243  // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
   1.244  // has to be reset to 0. This is required to allow proper stack traversal.
   1.245  void MacroAssembler::set_last_Java_frame(Register java_thread,
   1.246 @@ -1624,10 +1517,10 @@
   1.247  
   1.248    // Calling the runtime using the regular call_VM_leaf mechanism generates
   1.249    // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
   1.250 -  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
   1.251 +  // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
   1.252    //
   1.253    // If we care generating the pre-barrier without a frame (e.g. in the
   1.254 -  // intrinsified Reference.get() routine) then ebp might be pointing to
   1.255 +  // intrinsified Reference.get() routine) then fp might be pointing to
   1.256    // the caller frame and so this check will most likely fail at runtime.
   1.257    //
   1.258    // Expanding the call directly bypasses the generation of the check.
   1.259 @@ -2036,19 +1929,18 @@
   1.260  
   1.261  static const double     pi_4 =  0.7853981633974483;
   1.262  
   1.263 -// the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
   1.264  // must get argument(a double) in F12/F13
   1.265  //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
   1.266  //We need to preseve the register which maybe modified during the Call
   1.267  void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
   1.268 -//save all modified register here
   1.269 -//FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
   1.270 +  // save all modified register here
   1.271 +  // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9
   1.272    pushad();
   1.273 -//we should preserve the stack space before we call
   1.274 +  // we should preserve the stack space before we call
   1.275    addi(SP, SP, -wordSize * 2);
   1.276 -        switch (trig){
   1.277 +  switch (trig){
   1.278      case 's' :
   1.279 -                  call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
   1.280 +      call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
   1.281        delayed()->nop();
   1.282        break;
   1.283      case 'c':
   1.284 @@ -2079,7 +1971,7 @@
   1.285      dsll(rd, rd, 16);
   1.286      ori(rd, rd, split_low(imm));
   1.287    } else if ((imm > 0) && is_simm16(imm >> 32)) {
   1.288 -    /* A 48-bit address */
   1.289 +    // A 48-bit address
   1.290      li48(rd, imm);
   1.291    } else {
   1.292      li64(rd, imm);
   1.293 @@ -2093,14 +1985,14 @@
   1.294  
   1.295  void MacroAssembler::li32(Register reg, int imm) {
   1.296    if (is_simm16(imm)) {
   1.297 -    /* for imm < 0, we should use addi instead of addiu.
   1.298 -     *
   1.299 -     *  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
   1.300 -     *
   1.301 -     *  78 move [int:-1|I] [a0|I]
   1.302 -     *    : daddi a0, zero, 0xffffffff  (correct)
   1.303 -     *    : daddiu a0, zero, 0xffffffff (incorrect)
   1.304 -     */
   1.305 +    // for imm < 0, we should use addi instead of addiu.
   1.306 +    //
   1.307 +    //  java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint)
   1.308 +    //
   1.309 +    //  78 move [int:-1|I] [a0|I]
   1.310 +    //    : daddi a0, zero, 0xffffffff  (correct)
   1.311 +    //    : daddiu a0, zero, 0xffffffff (incorrect)
   1.312 +    //
   1.313      if (imm >= 0)
   1.314        addiu(reg, R0, imm);
   1.315      else
   1.316 @@ -2339,26 +2231,8 @@
   1.317    ori(rd, rd, split_low(imm));
   1.318  }
   1.319  #endif
   1.320 -// NOTE: i dont push eax as i486.
   1.321 -// the x86 save eax for it use eax as the jump register
   1.322 +
   1.323  void MacroAssembler::verify_oop(Register reg, const char* s) {
   1.324 -  /*
   1.325 -     if (!VerifyOops) return;
   1.326 -
   1.327 -  // Pass register number to verify_oop_subroutine
   1.328 -  char* b = new char[strlen(s) + 50];
   1.329 -  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
   1.330 -  push(rax);                          // save rax,
   1.331 -  push(reg);                          // pass register argument
   1.332 -  ExternalAddress buffer((address) b);
   1.333 -  // avoid using pushptr, as it modifies scratch registers
   1.334 -  // and our contract is not to modify anything
   1.335 -  movptr(rax, buffer.addr());
   1.336 -  push(rax);
   1.337 -  // call indirectly to solve generation ordering problem
   1.338 -  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
   1.339 -  call(rax);
   1.340 -   */
   1.341    if (!VerifyOops) return;
   1.342    const char * b = NULL;
   1.343    stringStream ss;
   1.344 @@ -2461,8 +2335,8 @@
   1.345    delayed()->nop();
   1.346  
   1.347    // Check if the oop is in the right area of memory
   1.348 -  //const int oop_mask = Universe::verify_oop_mask();
   1.349 -  //const int oop_bits = Universe::verify_oop_bits();
   1.350 +  // const int oop_mask = Universe::verify_oop_mask();
   1.351 +  // const int oop_bits = Universe::verify_oop_bits();
   1.352    const uintptr_t oop_mask = Universe::verify_oop_mask();
   1.353    const uintptr_t oop_bits = Universe::verify_oop_bits();
   1.354    li(AT, oop_mask);
   1.355 @@ -2472,37 +2346,12 @@
   1.356    delayed()->nop();
   1.357  
   1.358    // make sure klass is 'reasonable'
   1.359 -  //add for compressedoops
   1.360 +  // add for compressedoops
   1.361    reinit_heapbase();
   1.362 -  //add for compressedoops
   1.363 +  // add for compressedoops
   1.364    load_klass(T0, A1);
   1.365    beq(T0, R0, error);                        // if klass is NULL it is broken
   1.366    delayed()->nop();
   1.367 -  #if 0
   1.368 -  //FIXME:wuhui.
   1.369 -  // Check if the klass is in the right area of memory
   1.370 -  //const int klass_mask = Universe::verify_klass_mask();
   1.371 -  //const int klass_bits = Universe::verify_klass_bits();
   1.372 -  const uintptr_t klass_mask = Universe::verify_klass_mask();
   1.373 -  const uintptr_t klass_bits = Universe::verify_klass_bits();
   1.374 -
   1.375 -  li(AT, klass_mask);
   1.376 -  andr(T1, T0, AT);
   1.377 -  li(AT, klass_bits);
   1.378 -  bne(T1, AT, error);
   1.379 -  delayed()->nop();
   1.380 -  // make sure klass' klass is 'reasonable'
   1.381 -  //add for compressedoops
   1.382 -  load_klass(T0, T0);
   1.383 -  beq(T0, R0, error);  // if klass' klass is NULL it is broken
   1.384 -  delayed()->nop();
   1.385 -
   1.386 -  li(AT, klass_mask);
   1.387 -  andr(T1, T0, AT);
   1.388 -  li(AT, klass_bits);
   1.389 -  bne(T1, AT, error);
   1.390 -  delayed()->nop();     // if klass not in right area of memory it is broken too.
   1.391 -#endif
   1.392    // return if everything seems ok
   1.393    bind(exit);
   1.394  
   1.395 @@ -2554,21 +2403,22 @@
   1.396    }
   1.397  #endif
   1.398  }
   1.399 - RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
   1.400 +
   1.401 +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
   1.402                                                         Register tmp,
   1.403                                                         int offset) {
   1.404 -   intptr_t value = *delayed_value_addr;
   1.405 -   if (value != 0)
   1.406 -   return RegisterOrConstant(value + offset);
   1.407 -   AddressLiteral a(delayed_value_addr);
   1.408 -   // load indirectly to solve generation ordering problem
   1.409 -   //movptr(tmp, ExternalAddress((address) delayed_value_addr));
   1.410 -   //ld(tmp, a);
   1.411 -   if (offset != 0)
   1.412 -     daddi(tmp,tmp, offset);
   1.413 -
   1.414 -   return RegisterOrConstant(tmp);
   1.415 - }
   1.416 +  intptr_t value = *delayed_value_addr;
   1.417 +  if (value != 0)
   1.418 +  return RegisterOrConstant(value + offset);
   1.419 +  AddressLiteral a(delayed_value_addr);
   1.420 +  // load indirectly to solve generation ordering problem
   1.421 +  //movptr(tmp, ExternalAddress((address) delayed_value_addr));
   1.422 +  //ld(tmp, a);
   1.423 +  if (offset != 0)
   1.424 +    daddi(tmp,tmp, offset);
   1.425 +
   1.426 +  return RegisterOrConstant(tmp);
   1.427 +}
   1.428  
   1.429  void MacroAssembler::hswap(Register reg) {
   1.430    //short
   1.431 @@ -2615,54 +2465,54 @@
   1.432  
   1.433  #ifdef _LP64
   1.434  
   1.435 -/* do 32-bit CAS using MIPS64 lld/scd
   1.436 -
   1.437 -  cas_int should only compare 32-bits of the memory value.
   1.438 -  However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
   1.439 -  To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
   1.440 -  tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
   1.441 -  plus the high-32 bits or memory value, are stored togethor with SCD.
   1.442 -
   1.443 -Example:
   1.444 -
   1.445 -      double d = 3.1415926;
   1.446 -      System.err.println("hello" + d);
   1.447 -
   1.448 -  sun.misc.FloatingDecimal$1.<init>()
   1.449 -   |
   1.450 -   `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
   1.451 -
   1.452 -  38 cas_int [a7a7|J] [a0|I] [a6|I]
   1.453 -// a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
   1.454 -// a6: 0x4ab325aa
   1.455 -
   1.456 -again:
   1.457 -   0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
   1.458 -
   1.459 -   0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
   1.460 -   0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
   1.461 -   0x00000055647f3c68: dsll32 t8, t8, 0
   1.462 -   0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
   1.463 -   0x00000055647f3c70: sll zero, zero, 0
   1.464 -
   1.465 -   0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
   1.466 -   0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
   1.467 -   0x00000055647f3c7c: ori v1, v1, 0xffffffff
   1.468 -   0x00000055647f3c80: and v1, a6, v1
   1.469 -   0x00000055647f3c84: or at, t8, v1
   1.470 -   0x00000055647f3c88: scd at, 0x0(a7)
   1.471 -   0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
   1.472 -   0x00000055647f3c90: sll zero, zero, 0
   1.473 -   0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
   1.474 -   0x00000055647f3c98: sll zero, zero, 0
   1.475 -nequal:
   1.476 -   0x00000055647f45a4: dadd a0, t9, zero
   1.477 -   0x00000055647f45a8: dadd at, zero, zero
   1.478 -done:
   1.479 -*/
   1.480 +// do 32-bit CAS using MIPS64 lld/scd
   1.481 +//
   1.482 +//  cas_int should only compare 32-bits of the memory value.
   1.483 +//  However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
   1.484 +//  To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
   1.485 +//  tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
   1.486 +//  plus the high-32 bits or memory value, are stored togethor with SCD.
   1.487 +//
   1.488 +//Example:
   1.489 +//
   1.490 +//      double d = 3.1415926;
   1.491 +//      System.err.println("hello" + d);
   1.492 +//
   1.493 +//  sun.misc.FloatingDecimal$1.<init>()
   1.494 +//   |
   1.495 +//   `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
   1.496 +//
   1.497 +//  38 cas_int [a7a7|J] [a0|I] [a6|I]
   1.498 +//   a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
   1.499 +//   a6: 0x4ab325aa
   1.500 +//
   1.501 +//again:
   1.502 +//   0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
   1.503 +//
   1.504 +//   0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
   1.505 +//   0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
   1.506 +//   0x00000055647f3c68: dsll32 t8, t8, 0
   1.507 +//   0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
   1.508 +//   0x00000055647f3c70: sll zero, zero, 0
   1.509 +//
   1.510 +//   0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
   1.511 +//   0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
   1.512 +//   0x00000055647f3c7c: ori v1, v1, 0xffffffff
   1.513 +//   0x00000055647f3c80: and v1, a6, v1
   1.514 +//   0x00000055647f3c84: or at, t8, v1
   1.515 +//   0x00000055647f3c88: scd at, 0x0(a7)
   1.516 +//   0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
   1.517 +//   0x00000055647f3c90: sll zero, zero, 0
   1.518 +//   0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
   1.519 +//   0x00000055647f3c98: sll zero, zero, 0
   1.520 +//nequal:
   1.521 +//   0x00000055647f45a4: dadd a0, t9, zero
   1.522 +//   0x00000055647f45a8: dadd at, zero, zero
   1.523 +//done:
   1.524 +//
   1.525  
   1.526  void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
   1.527 -  /* MIPS64 can use ll/sc for 32-bit atomic memory access */
   1.528 +  // MIPS64 can use ll/sc for 32-bit atomic memory access
   1.529    Label done, again, nequal;
   1.530  
   1.531    bind(again);
   1.532 @@ -2794,7 +2644,7 @@
   1.533  // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
   1.534  // another option would be to emit TrySlowEnter and TrySlowExit methods
   1.535  // at startup-time.  These methods would accept arguments as
   1.536 -// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
   1.537 +// (Obj, Self, box, Scratch) and return success-failure
   1.538  // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
   1.539  // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
   1.540  // In practice, however, the # of lock sites is bounded and is usually small.
   1.541 @@ -2820,8 +2670,8 @@
   1.542  //    the lock operators would typically be faster than reifying Self.
   1.543  //
   1.544  // *  Ideally I'd define the primitives as:
   1.545 -//       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
   1.546 -//       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
   1.547 +//       fast_lock   (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED.
   1.548 +//       fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED
   1.549  //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
   1.550  //    Instead, we're stuck with a rather awkward and brittle register assignments below.
   1.551  //    Furthermore the register assignments are overconstrained, possibly resulting in
   1.552 @@ -2856,7 +2706,7 @@
   1.553  
   1.554  // obj: object to lock
   1.555  // box: on-stack box address (displaced header location) - KILLED
   1.556 -// rax,: tmp -- KILLED
   1.557 +// tmp: tmp -- KILLED
   1.558  // scr: tmp -- KILLED
   1.559  void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
   1.560  
   1.561 @@ -2869,10 +2719,6 @@
   1.562  
   1.563  
   1.564    block_comment("FastLock");
   1.565 -  /*
   1.566 -     move(AT, 0x0);
   1.567 -     return;
   1.568 -     */
   1.569    if (PrintBiasedLockingStatistics) {
   1.570      push(tmpReg);
   1.571      atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
   1.572 @@ -2982,7 +2828,7 @@
   1.573          pop(T0);
   1.574          bind(L);
   1.575        }
   1.576 -      sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */
   1.577 +      sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0
   1.578  
   1.579        b(DONE_LABEL) ;
   1.580        delayed()->nop();
   1.581 @@ -3038,8 +2884,8 @@
   1.582  }
   1.583  
   1.584  // obj: object to unlock
   1.585 -// box: box address (displaced header location), killed.  Must be EAX.
   1.586 -// rbx,: killed tmp; cannot be obj nor box.
   1.587 +// box: box address (displaced header location), killed.
   1.588 +// tmp: killed tmp; cannot be obj nor box.
   1.589  //
   1.590  // Some commentary on balanced locking:
   1.591  //
   1.592 @@ -3070,8 +2916,6 @@
   1.593    guarantee (objReg != tmpReg, "") ;
   1.594    guarantee (boxReg != tmpReg, "") ;
   1.595  
   1.596 -
   1.597 -
   1.598    block_comment("FastUnlock");
   1.599  
   1.600  
   1.601 @@ -3090,7 +2934,7 @@
   1.602        beq(tmpReg, R0, DONE_LABEL) ;
   1.603        move(AT, 0x1);  // delay slot
   1.604  
   1.605 -      cmpxchg(tmpReg, Address(objReg, 0), boxReg);          // Uses EAX which is box
   1.606 +      cmpxchg(tmpReg, Address(objReg, 0), boxReg);
   1.607        bind(DONE_LABEL);
   1.608      } else {
   1.609        Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
   1.610 @@ -3201,11 +3045,11 @@
   1.611  Register caller_saved_fpu_registers[] = {};
   1.612  #endif
   1.613  
   1.614 -//We preserve all caller-saved register
   1.615 +// We preserve all caller-saved register
   1.616  void  MacroAssembler::pushad(){
   1.617    int i;
   1.618  
   1.619 -  /* Fixed-point registers */
   1.620 +  // Fixed-point registers
   1.621    int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
   1.622    daddi(SP, SP, -1 * len * wordSize);
   1.623    for (i = 0; i < len; i++)
   1.624 @@ -3217,7 +3061,7 @@
   1.625  #endif
   1.626    }
   1.627  
   1.628 -  /* Floating-point registers */
   1.629 +  // Floating-point registers
   1.630    len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
   1.631    daddi(SP, SP, -1 * len * wordSize);
   1.632    for (i = 0; i < len; i++)
   1.633 @@ -3233,7 +3077,7 @@
   1.634  void  MacroAssembler::popad(){
   1.635    int i;
   1.636  
   1.637 -  /* Floating-point registers */
   1.638 +  // Floating-point registers
   1.639    int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
   1.640    for (i = 0; i < len; i++)
   1.641    {
   1.642 @@ -3245,7 +3089,7 @@
   1.643    }
   1.644    daddi(SP, SP, len * wordSize);
   1.645  
   1.646 -  /* Fixed-point registers */
   1.647 +  // Fixed-point registers
   1.648    len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
   1.649    for (i = 0; i < len; i++)
   1.650    {
   1.651 @@ -3262,7 +3106,7 @@
   1.652  void MacroAssembler::pushad_except_v0() {
   1.653    int i;
   1.654  
   1.655 -  /* Fixed-point registers */
   1.656 +  // Fixed-point registers
   1.657    int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
   1.658    daddi(SP, SP, -1 * len * wordSize);
   1.659    for (i = 0; i < len; i++) {
   1.660 @@ -3273,7 +3117,7 @@
   1.661  #endif
   1.662    }
   1.663  
   1.664 -  /* Floating-point registers */
   1.665 +  // Floating-point registers
   1.666    len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
   1.667    daddi(SP, SP, -1 * len * wordSize);
   1.668    for (i = 0; i < len; i++) {
   1.669 @@ -3288,7 +3132,7 @@
   1.670  void MacroAssembler::popad_except_v0() {
   1.671    int i;
   1.672  
   1.673 -  /* Floating-point registers */
   1.674 +  // Floating-point registers
   1.675    int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
   1.676    for (i = 0; i < len; i++) {
   1.677  #ifdef _LP64
   1.678 @@ -3299,7 +3143,7 @@
   1.679    }
   1.680    daddi(SP, SP, len * wordSize);
   1.681  
   1.682 -  /* Fixed-point registers */
   1.683 +  // Fixed-point registers
   1.684    len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
   1.685    for (i = 0; i < len; i++) {
   1.686  #ifdef _LP64
   1.687 @@ -3335,7 +3179,7 @@
   1.688  #endif
   1.689  }
   1.690  
   1.691 -//for UseCompressedOops Option
   1.692 +// for UseCompressedOops Option
   1.693  void MacroAssembler::load_klass(Register dst, Register src) {
   1.694  #ifdef _LP64
   1.695    if(UseCompressedClassPointers){
   1.696 @@ -3897,7 +3741,6 @@
   1.697    // The repne_scan instruction uses fixed registers, which we must spill.
   1.698    // Don't worry too much about pre-existing connections with the input regs.
   1.699  
   1.700 -  // Get super_klass value into rax (even if it was in rdi or rcx).
   1.701  #ifndef PRODUCT
   1.702    int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
   1.703    ExternalAddress pst_counter_addr((address) pst_counter);
   1.704 @@ -3911,12 +3754,6 @@
   1.705    // Skip to start of data.
   1.706    daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
   1.707  
   1.708 -  // Scan RCX words at [RDI] for an occurrence of RAX.
   1.709 -  // Set NZ/Z based on last compare.
   1.710 -  // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
   1.711 -  // not change flags (only scas instruction which is repeated sets flags).
   1.712 -  // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
   1.713 -
   1.714    // OpenJDK8 never compresses klass pointers in secondary-super array.
   1.715    Label Loop, subtype;
   1.716    bind(Loop);
   1.717 @@ -4126,11 +3963,6 @@
   1.718    }
   1.719    const int base = InstanceKlass::vtable_start_offset() * wordSize;
   1.720    assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
   1.721 -/*
   1.722 -  Address vtable_entry_addr(recv_klass,
   1.723 -                            vtable_index, Address::times_ptr,
   1.724 -                            base + vtableEntry::method_offset_in_bytes());
   1.725 -*/
   1.726    if (vtable_index.is_constant()) {
   1.727      set64(AT, vtable_index.as_constant());
   1.728      dsll(AT, AT, (int)Address::times_ptr);

mercurial