src/cpu/x86/vm/assembler_x86.cpp

changeset 3574
fd8114661503
parent 3399
1cb50d7a9d95
child 3687
fd09f2d8283e
     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Feb 15 16:29:40 2012 -0800
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Feb 15 21:37:49 2012 -0800
     1.3 @@ -236,6 +236,16 @@
     1.4    }
     1.5  }
     1.6  
     1.7 +// Force generation of a 4 byte immediate value even if it fits into 8bit
     1.8 +void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
     1.9 +  assert(isByte(op1) && isByte(op2), "wrong opcode");
    1.10 +  assert((op1 & 0x01) == 1, "should be 32bit operation");
    1.11 +  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
    1.12 +  emit_byte(op1);
    1.13 +  emit_byte(op2 | encode(dst));
    1.14 +  emit_long(imm32);
    1.15 +}
    1.16 +
    1.17  // immediate-to-memory forms
    1.18  void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
    1.19    assert((op1 & 0x01) == 1, "should be 32bit operation");
    1.20 @@ -939,6 +949,7 @@
    1.21  }
    1.22  
    1.23  void Assembler::addr_nop_4() {
    1.24 +  assert(UseAddressNop, "no CPU support");
    1.25    // 4 bytes: NOP DWORD PTR [EAX+0]
    1.26    emit_byte(0x0F);
    1.27    emit_byte(0x1F);
    1.28 @@ -947,6 +958,7 @@
    1.29  }
    1.30  
    1.31  void Assembler::addr_nop_5() {
    1.32 +  assert(UseAddressNop, "no CPU support");
    1.33    // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
    1.34    emit_byte(0x0F);
    1.35    emit_byte(0x1F);
    1.36 @@ -956,6 +968,7 @@
    1.37  }
    1.38  
    1.39  void Assembler::addr_nop_7() {
    1.40 +  assert(UseAddressNop, "no CPU support");
    1.41    // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
    1.42    emit_byte(0x0F);
    1.43    emit_byte(0x1F);
    1.44 @@ -964,6 +977,7 @@
    1.45  }
    1.46  
    1.47  void Assembler::addr_nop_8() {
    1.48 +  assert(UseAddressNop, "no CPU support");
    1.49    // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
    1.50    emit_byte(0x0F);
    1.51    emit_byte(0x1F);
    1.52 @@ -2769,6 +2783,12 @@
    1.53    emit_arith(0x81, 0xE8, dst, imm32);
    1.54  }
    1.55  
    1.56 +// Force generation of a 4 byte immediate value even if it fits into 8bit
    1.57 +void Assembler::subl_imm32(Register dst, int32_t imm32) {
    1.58 +  prefix(dst);
    1.59 +  emit_arith_imm32(0x81, 0xE8, dst, imm32);
    1.60 +}
    1.61 +
    1.62  void Assembler::subl(Register dst, Address src) {
    1.63    InstructionMark im(this);
    1.64    prefix(src, dst);
    1.65 @@ -4760,6 +4780,12 @@
    1.66    emit_arith(0x81, 0xE8, dst, imm32);
    1.67  }
    1.68  
    1.69 +// Force generation of a 4 byte immediate value even if it fits into 8bit
    1.70 +void Assembler::subq_imm32(Register dst, int32_t imm32) {
    1.71 +  (void) prefixq_and_encode(dst->encoding());
    1.72 +  emit_arith_imm32(0x81, 0xE8, dst, imm32);
    1.73 +}
    1.74 +
    1.75  void Assembler::subq(Register dst, Address src) {
    1.76    InstructionMark im(this);
    1.77    prefixq(src, dst);
    1.78 @@ -5101,15 +5127,6 @@
    1.79    }
    1.80  }
    1.81  
    1.82 -void MacroAssembler::fat_nop() {
    1.83 -  // A 5 byte nop that is safe for patching (see patch_verified_entry)
    1.84 -  emit_byte(0x26); // es:
    1.85 -  emit_byte(0x2e); // cs:
    1.86 -  emit_byte(0x64); // fs:
    1.87 -  emit_byte(0x65); // gs:
    1.88 -  emit_byte(0x90);
    1.89 -}
    1.90 -
    1.91  void MacroAssembler::jC2(Register tmp, Label& L) {
    1.92    // set parity bit if FPU flag C2 is set (via rax)
    1.93    save_rax(tmp);
    1.94 @@ -5704,17 +5721,6 @@
    1.95    /* else */      { subq(dst, value)       ; return; }
    1.96  }
    1.97  
    1.98 -void MacroAssembler::fat_nop() {
    1.99 -  // A 5 byte nop that is safe for patching (see patch_verified_entry)
   1.100 -  // Recommened sequence from 'Software Optimization Guide for the AMD
   1.101 -  // Hammer Processor'
   1.102 -  emit_byte(0x66);
   1.103 -  emit_byte(0x66);
   1.104 -  emit_byte(0x90);
   1.105 -  emit_byte(0x66);
   1.106 -  emit_byte(0x90);
   1.107 -}
   1.108 -
   1.109  void MacroAssembler::incrementq(Register reg, int value) {
   1.110    if (value == min_jint) { addq(reg, value); return; }
   1.111    if (value <  0) { decrementq(reg, -value); return; }
   1.112 @@ -6766,6 +6772,19 @@
   1.113    mov(rbp, rsp);
   1.114  }
   1.115  
   1.116 +// A 5 byte nop that is safe for patching (see patch_verified_entry)
   1.117 +void MacroAssembler::fat_nop() {
   1.118 +  if (UseAddressNop) {
   1.119 +    addr_nop_5();
   1.120 +  } else {
   1.121 +    emit_byte(0x26); // es:
   1.122 +    emit_byte(0x2e); // cs:
   1.123 +    emit_byte(0x64); // fs:
   1.124 +    emit_byte(0x65); // gs:
   1.125 +    emit_byte(0x90);
   1.126 +  }
   1.127 +}
   1.128 +
   1.129  void MacroAssembler::fcmp(Register tmp) {
   1.130    fcmp(tmp, 1, true, true);
   1.131  }
   1.132 @@ -7825,6 +7844,11 @@
   1.133    LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
   1.134  }
   1.135  
   1.136 +// Force generation of a 4 byte immediate value even if it fits into 8bit
   1.137 +void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
   1.138 +  LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
   1.139 +}
   1.140 +
   1.141  void MacroAssembler::subptr(Register dst, Register src) {
   1.142    LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
   1.143  }
   1.144 @@ -9292,6 +9316,80 @@
   1.145  }
   1.146  #endif // _LP64
   1.147  
   1.148 +
   1.149 +// C2 compiled method's prolog code.
   1.150 +void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
   1.151 +
   1.152 +  // WARNING: Initial instruction MUST be 5 bytes or longer so that
   1.153 +  // NativeJump::patch_verified_entry will be able to patch out the entry
   1.154 +  // code safely. The push to verify stack depth is ok at 5 bytes,
   1.155 +  // the frame allocation can be either 3 or 6 bytes. So if we don't do
   1.156 +  // stack bang then we must use the 6 byte frame allocation even if
   1.157 +  // we have no frame. :-(
   1.158 +
   1.159 +  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
   1.160 +  // Remove word for return addr
   1.161 +  framesize -= wordSize;
   1.162 +
   1.163 +  // Calls to C2R adapters often do not accept exceptional returns.
   1.164 +  // We require that their callers must bang for them.  But be careful, because
   1.165 +  // some VM calls (such as call site linkage) can use several kilobytes of
   1.166 +  // stack.  But the stack safety zone should account for that.
   1.167 +  // See bugs 4446381, 4468289, 4497237.
   1.168 +  if (stack_bang) {
   1.169 +    generate_stack_overflow_check(framesize);
   1.170 +
   1.171 +    // We always push rbp, so that on return to interpreter rbp, will be
   1.172 +    // restored correctly and we can correct the stack.
   1.173 +    push(rbp);
   1.174 +    // Remove word for ebp
   1.175 +    framesize -= wordSize;
   1.176 +
   1.177 +    // Create frame
   1.178 +    if (framesize) {
   1.179 +      subptr(rsp, framesize);
   1.180 +    }
   1.181 +  } else {
   1.182 +    // Create frame (force generation of a 4 byte immediate value)
   1.183 +    subptr_imm32(rsp, framesize);
   1.184 +
   1.185 +    // Save RBP register now.
   1.186 +    framesize -= wordSize;
   1.187 +    movptr(Address(rsp, framesize), rbp);
   1.188 +  }
   1.189 +
   1.190 +  if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
   1.191 +    framesize -= wordSize;
   1.192 +    movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
   1.193 +  }
   1.194 +
   1.195 +#ifndef _LP64
   1.196 +  // If method sets FPU control word do it now
   1.197 +  if (fp_mode_24b) {
   1.198 +    fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
   1.199 +  }
   1.200 +  if (UseSSE >= 2 && VerifyFPU) {
   1.201 +    verify_FPU(0, "FPU stack must be clean on entry");
   1.202 +  }
   1.203 +#endif
   1.204 +
   1.205 +#ifdef ASSERT
   1.206 +  if (VerifyStackAtCalls) {
   1.207 +    Label L;
   1.208 +    push(rax);
   1.209 +    mov(rax, rsp);
   1.210 +    andptr(rax, StackAlignmentInBytes-1);
   1.211 +    cmpptr(rax, StackAlignmentInBytes-wordSize);
   1.212 +    pop(rax);
   1.213 +    jcc(Assembler::equal, L);
   1.214 +    stop("Stack is not properly aligned!");
   1.215 +    bind(L);
   1.216 +  }
   1.217 +#endif
   1.218 +
   1.219 +}
   1.220 +
   1.221 +
   1.222  // IndexOf for constant substrings with size >= 8 chars
   1.223  // which don't need to be loaded through stack.
   1.224  void MacroAssembler::string_indexofC8(Register str1, Register str2,

mercurial