src/cpu/ppc/vm/macroAssembler_ppc.cpp

changeset 6495
67fa91961822
parent 6486
b0133e4187d3
child 6501
c668f307a4c0
     1.1 --- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Tue Dec 10 14:29:43 2013 +0100
     1.2 +++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Dec 11 00:06:11 2013 +0100
     1.3 @@ -97,8 +97,10 @@
     1.4    }
     1.5  }
     1.6  
     1.7 -void MacroAssembler::align(int modulus) {
     1.8 -  while (offset() % modulus != 0) nop();
     1.9 +void MacroAssembler::align(int modulus, int max, int rem) {
    1.10 +  int padding = (rem + modulus - (offset() % modulus)) % modulus;
    1.11 +  if (padding > max) return;
    1.12 +  for (int c = (padding >> 2); c > 0; --c) { nop(); }
    1.13  }
    1.14  
    1.15  // Issue instructions that calculate given TOC from global TOC.
    1.16 @@ -186,16 +188,25 @@
    1.17  
    1.18  #ifdef _LP64
    1.19  // Patch compressed oops or klass constants.
    1.20 +// Assembler sequence is
    1.21 +// 1) compressed oops:
    1.22 +//    lis  rx = const.hi
    1.23 +//    ori rx = rx | const.lo
    1.24 +// 2) compressed klass:
    1.25 +//    lis  rx = const.hi
    1.26 +//    clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
    1.27 +//    ori rx = rx | const.lo
    1.28 +// Clrldi will be passed by.
    1.29  int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
    1.30    assert(UseCompressedOops, "Should only patch compressed oops");
    1.31  
    1.32    const address inst2_addr = a;
    1.33    const int inst2 = *(int *)inst2_addr;
    1.34  
    1.35 -  // The relocation points to the second instruction, the addi,
    1.36 -  // and the addi reads and writes the same register dst.
    1.37 -  const int dst = inv_rt_field(inst2);
    1.38 -  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
    1.39 +  // The relocation points to the second instruction, the ori,
    1.40 +  // and the ori reads and writes the same register dst.
    1.41 +  const int dst = inv_rta_field(inst2);
    1.42 +  assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
    1.43    // Now, find the preceding addis which writes to dst.
    1.44    int inst1 = 0;
    1.45    address inst1_addr = inst2_addr - BytesPerInstWord;
    1.46 @@ -210,8 +221,9 @@
    1.47    int xc = (data >> 16) & 0xffff;
    1.48    int xd = (data >>  0) & 0xffff;
    1.49  
    1.50 -  set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2
    1.51 +  set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
    1.52    set_imm((int *)inst2_addr, (short)(xd));
    1.53 +
    1.54    return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
    1.55  }
    1.56  
    1.57 @@ -222,10 +234,10 @@
    1.58    const address inst2_addr = a;
    1.59    const int inst2 = *(int *)inst2_addr;
    1.60  
    1.61 -  // The relocation points to the second instruction, the addi,
    1.62 -  // and the addi reads and writes the same register dst.
    1.63 -  const int dst = inv_rt_field(inst2);
    1.64 -  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
    1.65 +  // The relocation points to the second instruction, the ori,
    1.66 +  // and the ori reads and writes the same register dst.
    1.67 +  const int dst = inv_rta_field(inst2);
    1.68 +  assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
    1.69    // Now, find the preceding lis which writes to dst.
    1.70    int inst1 = 0;
    1.71    address inst1_addr = inst2_addr - BytesPerInstWord;
    1.72 @@ -238,8 +250,9 @@
    1.73    }
    1.74    assert(inst1_found, "inst is not lis");
    1.75  
    1.76 -  uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff));
    1.77 -  uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16);
    1.78 +  uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
    1.79 +  uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
    1.80 +
    1.81    return (int) (xl | xh);
    1.82  }
    1.83  #endif // _LP64
    1.84 @@ -252,13 +265,10 @@
    1.85    // FIXME: We should insert relocation information for oops at the constant
    1.86    // pool entries instead of inserting it at the loads; patching of a constant
    1.87    // pool entry should be less expensive.
    1.88 -  Unimplemented();
    1.89 -  if (false) {
    1.90 -    address oop_address = address_constant((address)a.value(), RelocationHolder::none);
    1.91 -    // Relocate at the pc of the load.
    1.92 -    relocate(a.rspec());
    1.93 -    toc_offset = (int)(oop_address - code()->consts()->start());
    1.94 -  }
    1.95 +  address oop_address = address_constant((address)a.value(), RelocationHolder::none);
    1.96 +  // Relocate at the pc of the load.
    1.97 +  relocate(a.rspec());
    1.98 +  toc_offset = (int)(oop_address - code()->consts()->start());
    1.99    ld_largeoffset_unchecked(dst, toc_offset, toc, true);
   1.100  }
   1.101  
   1.102 @@ -532,7 +542,7 @@
   1.103        masm.b(dest);
   1.104      }
   1.105    }
   1.106 -  ICache::invalidate_range(instruction_addr, code_size);
   1.107 +  ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
   1.108  }
   1.109  
   1.110  // Emit a NOT mt-safe patchable 64 bit absolute call/jump.
   1.111 @@ -673,7 +683,7 @@
   1.112    CodeBuffer buf(instruction_addr, code_size);
   1.113    MacroAssembler masm(&buf);
   1.114    masm.bxx64_patchable(dest, relocInfo::none, link);
   1.115 -  ICache::invalidate_range(instruction_addr, code_size);
   1.116 +  ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
   1.117  }
   1.118  
   1.119  // Get dest address of a bxx64_patchable instruction.
   1.120 @@ -964,6 +974,14 @@
   1.121                         /*load env=*/true);
   1.122  }
   1.123  
   1.124 +address MacroAssembler::call_c_and_return_to_caller(Register fd) {
   1.125 +  return branch_to(fd, /*and_link=*/false,
   1.126 +                       /*save toc=*/false,
   1.127 +                       /*restore toc=*/false,
   1.128 +                       /*load toc=*/true,
   1.129 +                       /*load env=*/true);
   1.130 +}
   1.131 +
   1.132  address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) {
   1.133    if (rt != relocInfo::none) {
   1.134      // this call needs to be relocatable
   1.135 @@ -2315,7 +2333,7 @@
   1.136    if (last_Java_pc != noreg)
   1.137      std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
   1.138  
   1.139 -  // set last_Java_sp last
   1.140 +  // Set last_Java_sp last.
   1.141    std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
   1.142  }
   1.143  
   1.144 @@ -2454,6 +2472,57 @@
   1.145    }
   1.146  }
   1.147  
   1.148 +// Clear Array
   1.149 +// Kills both input registers. tmp == R0 is allowed.
   1.150 +void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
   1.151 +  // Procedure for large arrays (uses data cache block zero instruction).
   1.152 +    Label startloop, fast, fastloop, small_rest, restloop, done;
   1.153 +    const int cl_size         = VM_Version::get_cache_line_size(),
   1.154 +              cl_dwords       = cl_size>>3,
   1.155 +              cl_dw_addr_bits = exact_log2(cl_dwords),
   1.156 +              dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.
   1.157 +
   1.158 +//2:
   1.159 +    cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
   1.160 +    blt(CCR1, small_rest);                                      // Too small.
   1.161 +    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits);           // Extract dword offset within first cache line.
   1.162 +    beq(CCR0, fast);                                            // Already 128byte aligned.
   1.163 +
   1.164 +    subfic(tmp, tmp, cl_dwords);
   1.165 +    mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
   1.166 +    subf(cnt_dwords, tmp, cnt_dwords); // rest.
   1.167 +    li(tmp, 0);
   1.168 +//10:
   1.169 +  bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
   1.170 +    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
   1.171 +    addi(base_ptr, base_ptr, 8);
   1.172 +    bdnz(startloop);
   1.173 +//13:
   1.174 +  bind(fast);                                  // Clear 128byte blocks.
   1.175 +    srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
   1.176 +    andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
   1.177 +    mtctr(tmp);                                // Load counter.
   1.178 +//16:
   1.179 +  bind(fastloop);
   1.180 +    dcbz(base_ptr);                    // Clear 128byte aligned block.
   1.181 +    addi(base_ptr, base_ptr, cl_size);
   1.182 +    bdnz(fastloop);
   1.183 +    if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
   1.184 +//20:
   1.185 +  bind(small_rest);
   1.186 +    cmpdi(CCR0, cnt_dwords, 0);        // size 0?
   1.187 +    beq(CCR0, done);                   // rest == 0
   1.188 +    li(tmp, 0);
   1.189 +    mtctr(cnt_dwords);                 // Load counter.
   1.190 +//24:
   1.191 +  bind(restloop);                      // Clear rest.
   1.192 +    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
   1.193 +    addi(base_ptr, base_ptr, 8);
   1.194 +    bdnz(restloop);
   1.195 +//27:
   1.196 +  bind(done);
   1.197 +}
   1.198 +
   1.199  /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
   1.200  
   1.201  // Search for a single jchar in an jchar[].
   1.202 @@ -2926,12 +2995,11 @@
   1.203    if (!VerifyOops) {
   1.204      return;
   1.205    }
   1.206 -  // will be preserved.
   1.207 +  // Will be preserved.
   1.208    Register tmp = R11;
   1.209    assert(oop != tmp, "precondition");
   1.210    unsigned int nbytes_save = 10*8; // 10 volatile gprs
   1.211 -  address/* FunctionDescriptor** */fd =
   1.212 -    StubRoutines::verify_oop_subroutine_entry_address();
   1.213 +  address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
   1.214    // save tmp
   1.215    mr(R0, tmp);
   1.216    // kill tmp

mercurial