1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/assembler_x86_32.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,4979 @@ 1.4 +/* 1.5 + * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "incls/_precompiled.incl" 1.29 +#include "incls/_assembler_x86_32.cpp.incl" 1.30 + 1.31 +// Implementation of AddressLiteral 1.32 + 1.33 +AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 1.34 + _is_lval = false; 1.35 + _target = target; 1.36 + switch (rtype) { 1.37 + case relocInfo::oop_type: 1.38 + // Oops are a special case. Normally they would be their own section 1.39 + // but in cases like icBuffer they are literals in the code stream that 1.40 + // we don't have a section for. We use none so that we get a literal address 1.41 + // which is always patchable. 1.42 + break; 1.43 + case relocInfo::external_word_type: 1.44 + _rspec = external_word_Relocation::spec(target); 1.45 + break; 1.46 + case relocInfo::internal_word_type: 1.47 + _rspec = internal_word_Relocation::spec(target); 1.48 + break; 1.49 + case relocInfo::opt_virtual_call_type: 1.50 + _rspec = opt_virtual_call_Relocation::spec(); 1.51 + break; 1.52 + case relocInfo::static_call_type: 1.53 + _rspec = static_call_Relocation::spec(); 1.54 + break; 1.55 + case relocInfo::runtime_call_type: 1.56 + _rspec = runtime_call_Relocation::spec(); 1.57 + break; 1.58 + case relocInfo::poll_type: 1.59 + case relocInfo::poll_return_type: 1.60 + _rspec = Relocation::spec_simple(rtype); 1.61 + break; 1.62 + case relocInfo::none: 1.63 + break; 1.64 + default: 1.65 + ShouldNotReachHere(); 1.66 + break; 1.67 + } 1.68 +} 1.69 + 1.70 +// Implementation of Address 1.71 + 1.72 +Address Address::make_array(ArrayAddress adr) { 1.73 +#ifdef _LP64 1.74 + // Not implementable on 64bit machines 1.75 + // Should have been handled higher up the call chain. 1.76 + ShouldNotReachHere(); 1.77 +#else 1.78 + AddressLiteral base = adr.base(); 1.79 + Address index = adr.index(); 1.80 + assert(index._disp == 0, "must not have disp"); // maybe it can? 1.81 + Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 1.82 + array._rspec = base._rspec; 1.83 + return array; 1.84 +#endif // _LP64 1.85 +} 1.86 + 1.87 +#ifndef _LP64 1.88 + 1.89 +// exceedingly dangerous constructor 1.90 +Address::Address(address loc, RelocationHolder spec) { 1.91 + _base = noreg; 1.92 + _index = noreg; 1.93 + _scale = no_scale; 1.94 + _disp = (intptr_t) loc; 1.95 + _rspec = spec; 1.96 +} 1.97 +#endif // _LP64 1.98 + 1.99 +// Convert the raw encoding form into the form expected by the constructor for 1.100 +// Address. An index of 4 (rsp) corresponds to having no index, so convert 1.101 +// that to noreg for the Address constructor. 1.102 +Address Address::make_raw(int base, int index, int scale, int disp) { 1.103 + bool valid_index = index != rsp->encoding(); 1.104 + if (valid_index) { 1.105 + Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 1.106 + return madr; 1.107 + } else { 1.108 + Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 1.109 + return madr; 1.110 + } 1.111 +} 1.112 + 1.113 +// Implementation of Assembler 1.114 + 1.115 +int AbstractAssembler::code_fill_byte() { 1.116 + return (u_char)'\xF4'; // hlt 1.117 +} 1.118 + 1.119 +// make this go away someday 1.120 +void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 1.121 + if (rtype == relocInfo::none) 1.122 + emit_long(data); 1.123 + else emit_data(data, Relocation::spec_simple(rtype), format); 1.124 +} 1.125 + 1.126 + 1.127 +void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 1.128 + assert(imm32_operand == 0, "default format must be imm32 in this file"); 1.129 + assert(inst_mark() != NULL, "must be inside InstructionMark"); 1.130 + if (rspec.type() != relocInfo::none) { 1.131 + #ifdef ASSERT 1.132 + check_relocation(rspec, format); 1.133 + #endif 1.134 + // Do not use AbstractAssembler::relocate, which is not intended for 1.135 + // embedded words. Instead, relocate to the enclosing instruction. 1.136 + 1.137 + // hack. call32 is too wide for mask so use disp32 1.138 + if (format == call32_operand) 1.139 + code_section()->relocate(inst_mark(), rspec, disp32_operand); 1.140 + else 1.141 + code_section()->relocate(inst_mark(), rspec, format); 1.142 + } 1.143 + emit_long(data); 1.144 +} 1.145 + 1.146 + 1.147 +void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 1.148 + assert(dst->has_byte_register(), "must have byte register"); 1.149 + assert(isByte(op1) && isByte(op2), "wrong opcode"); 1.150 + assert(isByte(imm8), "not a byte"); 1.151 + assert((op1 & 0x01) == 0, "should be 8bit operation"); 1.152 + emit_byte(op1); 1.153 + emit_byte(op2 | dst->encoding()); 1.154 + emit_byte(imm8); 1.155 +} 1.156 + 1.157 + 1.158 +void Assembler::emit_arith(int op1, int op2, Register dst, int imm32) { 1.159 + assert(isByte(op1) && isByte(op2), "wrong opcode"); 1.160 + assert((op1 & 0x01) == 1, "should be 32bit operation"); 1.161 + assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 1.162 + if (is8bit(imm32)) { 1.163 + emit_byte(op1 | 0x02); // set sign bit 1.164 + emit_byte(op2 | dst->encoding()); 1.165 + emit_byte(imm32 & 0xFF); 1.166 + } else { 1.167 + emit_byte(op1); 1.168 + emit_byte(op2 | dst->encoding()); 1.169 + emit_long(imm32); 1.170 + } 1.171 +} 1.172 + 1.173 +// immediate-to-memory forms 1.174 +void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int imm32) { 1.175 + assert((op1 & 0x01) == 1, "should be 32bit operation"); 1.176 + assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 1.177 + if (is8bit(imm32)) { 1.178 + emit_byte(op1 | 0x02); // set sign bit 1.179 + emit_operand(rm,adr); 1.180 + emit_byte(imm32 & 0xFF); 1.181 + } else { 1.182 + emit_byte(op1); 1.183 + emit_operand(rm,adr); 1.184 + emit_long(imm32); 1.185 + } 1.186 +} 1.187 + 1.188 +void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 1.189 + assert(isByte(op1) && isByte(op2), "wrong opcode"); 1.190 + assert((op1 & 0x01) == 1, "should be 32bit operation"); 1.191 + assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 1.192 + InstructionMark im(this); 1.193 + emit_byte(op1); 1.194 + emit_byte(op2 | dst->encoding()); 1.195 + emit_data((int)obj, relocInfo::oop_type, 0); 1.196 +} 1.197 + 1.198 + 1.199 +void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 1.200 + assert(isByte(op1) && isByte(op2), "wrong opcode"); 1.201 + emit_byte(op1); 1.202 + emit_byte(op2 | dst->encoding() << 3 | src->encoding()); 1.203 +} 1.204 + 1.205 + 1.206 +void Assembler::emit_operand(Register reg, 1.207 + Register base, 1.208 + Register index, 1.209 + Address::ScaleFactor scale, 1.210 + int disp, 1.211 + RelocationHolder const& rspec) { 1.212 + 1.213 + relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 1.214 + if (base->is_valid()) { 1.215 + if (index->is_valid()) { 1.216 + assert(scale != Address::no_scale, "inconsistent address"); 1.217 + // [base + index*scale + disp] 1.218 + if (disp == 0 && rtype == relocInfo::none && base != rbp) { 1.219 + // [base + index*scale] 1.220 + // [00 reg 100][ss index base] 1.221 + assert(index != rsp, "illegal addressing mode"); 1.222 + emit_byte(0x04 | reg->encoding() << 3); 1.223 + emit_byte(scale << 6 | index->encoding() << 3 | base->encoding()); 1.224 + } else if (is8bit(disp) && rtype == relocInfo::none) { 1.225 + // [base + index*scale + imm8] 1.226 + // [01 reg 100][ss index base] imm8 1.227 + assert(index != rsp, "illegal addressing mode"); 1.228 + emit_byte(0x44 | reg->encoding() << 3); 1.229 + emit_byte(scale << 6 | index->encoding() << 3 | base->encoding()); 1.230 + emit_byte(disp & 0xFF); 1.231 + } else { 1.232 + // [base + index*scale + imm32] 1.233 + // [10 reg 100][ss index base] imm32 1.234 + assert(index != rsp, "illegal addressing mode"); 1.235 + emit_byte(0x84 | reg->encoding() << 3); 1.236 + emit_byte(scale << 6 | index->encoding() << 3 | base->encoding()); 1.237 + emit_data(disp, rspec, disp32_operand); 1.238 + } 1.239 + } else if (base == rsp) { 1.240 + // [esp + disp] 1.241 + if (disp == 0 && rtype == relocInfo::none) { 1.242 + // [esp] 1.243 + // [00 reg 100][00 100 100] 1.244 + emit_byte(0x04 | reg->encoding() << 3); 1.245 + emit_byte(0x24); 1.246 + } else if (is8bit(disp) && rtype == relocInfo::none) { 1.247 + // [esp + imm8] 1.248 + // [01 reg 100][00 100 100] imm8 1.249 + emit_byte(0x44 | reg->encoding() << 3); 1.250 + emit_byte(0x24); 1.251 + emit_byte(disp & 0xFF); 1.252 + } else { 1.253 + // [esp + imm32] 1.254 + // [10 reg 100][00 100 100] imm32 1.255 + emit_byte(0x84 | reg->encoding() << 3); 1.256 + emit_byte(0x24); 1.257 + emit_data(disp, rspec, disp32_operand); 1.258 + } 1.259 + } else { 1.260 + // [base + disp] 1.261 + assert(base != rsp, "illegal addressing mode"); 1.262 + if (disp == 0 && rtype == relocInfo::none && base != rbp) { 1.263 + // [base] 1.264 + // [00 reg base] 1.265 + assert(base != rbp, "illegal addressing mode"); 1.266 + emit_byte(0x00 | reg->encoding() << 3 | base->encoding()); 1.267 + } else if (is8bit(disp) && rtype == relocInfo::none) { 1.268 + // [base + imm8] 1.269 + // [01 reg base] imm8 1.270 + emit_byte(0x40 | reg->encoding() << 3 | base->encoding()); 1.271 + emit_byte(disp & 0xFF); 1.272 + } else { 1.273 + // [base + imm32] 1.274 + // [10 reg base] imm32 1.275 + emit_byte(0x80 | reg->encoding() << 3 | base->encoding()); 1.276 + emit_data(disp, rspec, disp32_operand); 1.277 + } 1.278 + } 1.279 + } else { 1.280 + if (index->is_valid()) { 1.281 + assert(scale != Address::no_scale, "inconsistent address"); 1.282 + // [index*scale + disp] 1.283 + // [00 reg 100][ss index 101] imm32 1.284 + assert(index != rsp, "illegal addressing mode"); 1.285 + emit_byte(0x04 | reg->encoding() << 3); 1.286 + emit_byte(scale << 6 | index->encoding() << 3 | 0x05); 1.287 + emit_data(disp, rspec, disp32_operand); 1.288 + } else { 1.289 + // [disp] 1.290 + // [00 reg 101] imm32 1.291 + emit_byte(0x05 | reg->encoding() << 3); 1.292 + emit_data(disp, rspec, disp32_operand); 1.293 + } 1.294 + } 1.295 +} 1.296 + 1.297 +// Secret local extension to Assembler::WhichOperand: 1.298 +#define end_pc_operand (_WhichOperand_limit) 1.299 + 1.300 +address Assembler::locate_operand(address inst, WhichOperand which) { 1.301 + // Decode the given instruction, and return the address of 1.302 + // an embedded 32-bit operand word. 1.303 + 1.304 + // If "which" is disp32_operand, selects the displacement portion 1.305 + // of an effective address specifier. 1.306 + // If "which" is imm32_operand, selects the trailing immediate constant. 1.307 + // If "which" is call32_operand, selects the displacement of a call or jump. 1.308 + // Caller is responsible for ensuring that there is such an operand, 1.309 + // and that it is 32 bits wide. 1.310 + 1.311 + // If "which" is end_pc_operand, find the end of the instruction. 1.312 + 1.313 + address ip = inst; 1.314 + 1.315 + debug_only(bool has_imm32 = false); 1.316 + int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 1.317 + 1.318 + again_after_prefix: 1.319 + switch (0xFF & *ip++) { 1.320 + 1.321 + // These convenience macros generate groups of "case" labels for the switch. 1.322 + #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 1.323 + #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 1.324 + case (x)+4: case (x)+5: case (x)+6: case (x)+7 1.325 + #define REP16(x) REP8((x)+0): \ 1.326 + case REP8((x)+8) 1.327 + 1.328 + case CS_segment: 1.329 + case SS_segment: 1.330 + case DS_segment: 1.331 + case ES_segment: 1.332 + case FS_segment: 1.333 + case GS_segment: 1.334 + assert(ip == inst+1, "only one prefix allowed"); 1.335 + goto again_after_prefix; 1.336 + 1.337 + case 0xFF: // pushl a; decl a; incl a; call a; jmp a 1.338 + case 0x88: // movb a, r 1.339 + case 0x89: // movl a, r 1.340 + case 0x8A: // movb r, a 1.341 + case 0x8B: // movl r, a 1.342 + case 0x8F: // popl a 1.343 + break; 1.344 + 1.345 + case 0x68: // pushl #32(oop?) 1.346 + if (which == end_pc_operand) return ip + 4; 1.347 + assert(which == imm32_operand, "pushl has no disp32"); 1.348 + return ip; // not produced by emit_operand 1.349 + 1.350 + case 0x66: // movw ... (size prefix) 1.351 + switch (0xFF & *ip++) { 1.352 + case 0x8B: // movw r, a 1.353 + case 0x89: // movw a, r 1.354 + break; 1.355 + case 0xC7: // movw a, #16 1.356 + tail_size = 2; // the imm16 1.357 + break; 1.358 + case 0x0F: // several SSE/SSE2 variants 1.359 + ip--; // reparse the 0x0F 1.360 + goto again_after_prefix; 1.361 + default: 1.362 + ShouldNotReachHere(); 1.363 + } 1.364 + break; 1.365 + 1.366 + case REP8(0xB8): // movl r, #32(oop?) 1.367 + if (which == end_pc_operand) return ip + 4; 1.368 + assert(which == imm32_operand || which == disp32_operand, ""); 1.369 + return ip; 1.370 + 1.371 + case 0x69: // imul r, a, #32 1.372 + case 0xC7: // movl a, #32(oop?) 1.373 + tail_size = 4; 1.374 + debug_only(has_imm32 = true); // has both kinds of operands! 1.375 + break; 1.376 + 1.377 + case 0x0F: // movx..., etc. 1.378 + switch (0xFF & *ip++) { 1.379 + case 0x12: // movlps 1.380 + case 0x28: // movaps 1.381 + case 0x2E: // ucomiss 1.382 + case 0x2F: // comiss 1.383 + case 0x54: // andps 1.384 + case 0x55: // andnps 1.385 + case 0x56: // orps 1.386 + case 0x57: // xorps 1.387 + case 0x6E: // movd 1.388 + case 0x7E: // movd 1.389 + case 0xAE: // ldmxcsr a 1.390 + // amd side says it these have both operands but that doesn't 1.391 + // appear to be true. 1.392 + // debug_only(has_imm32 = true); // has both kinds of operands! 1.393 + break; 1.394 + 1.395 + case 0xAD: // shrd r, a, %cl 1.396 + case 0xAF: // imul r, a 1.397 + case 0xBE: // movsxb r, a 1.398 + case 0xBF: // movsxw r, a 1.399 + case 0xB6: // movzxb r, a 1.400 + case 0xB7: // movzxw r, a 1.401 + case REP16(0x40): // cmovl cc, r, a 1.402 + case 0xB0: // cmpxchgb 1.403 + case 0xB1: // cmpxchg 1.404 + case 0xC1: // xaddl 1.405 + case 0xC7: // cmpxchg8 1.406 + case REP16(0x90): // setcc a 1.407 + // fall out of the switch to decode the address 1.408 + break; 1.409 + case 0xAC: // shrd r, a, #8 1.410 + tail_size = 1; // the imm8 1.411 + break; 1.412 + case REP16(0x80): // jcc rdisp32 1.413 + if (which == end_pc_operand) return ip + 4; 1.414 + assert(which == call32_operand, "jcc has no disp32 or imm32"); 1.415 + return ip; 1.416 + default: 1.417 + ShouldNotReachHere(); 1.418 + } 1.419 + break; 1.420 + 1.421 + case 0x81: // addl a, #32; addl r, #32 1.422 + // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 1.423 + // in the case of cmpl, the imm32 might be an oop 1.424 + tail_size = 4; 1.425 + debug_only(has_imm32 = true); // has both kinds of operands! 1.426 + break; 1.427 + 1.428 + case 0x85: // test r/m, r 1.429 + break; 1.430 + 1.431 + case 0x83: // addl a, #8; addl r, #8 1.432 + // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 1.433 + tail_size = 1; 1.434 + break; 1.435 + 1.436 + case 0x9B: 1.437 + switch (0xFF & *ip++) { 1.438 + case 0xD9: // fnstcw a 1.439 + break; 1.440 + default: 1.441 + ShouldNotReachHere(); 1.442 + } 1.443 + break; 1.444 + 1.445 + case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 1.446 + case REP4(0x10): // adc... 1.447 + case REP4(0x20): // and... 1.448 + case REP4(0x30): // xor... 1.449 + case REP4(0x08): // or... 1.450 + case REP4(0x18): // sbb... 1.451 + case REP4(0x28): // sub... 1.452 + case REP4(0x38): // cmp... 1.453 + case 0xF7: // mull a 1.454 + case 0x8D: // leal r, a 1.455 + case 0x87: // xchg r, a 1.456 + break; 1.457 + 1.458 + case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 1.459 + case 0xC6: // movb a, #8 1.460 + case 0x80: // cmpb a, #8 1.461 + case 0x6B: // imul r, a, #8 1.462 + tail_size = 1; // the imm8 1.463 + break; 1.464 + 1.465 + case 0xE8: // call rdisp32 1.466 + case 0xE9: // jmp rdisp32 1.467 + if (which == end_pc_operand) return ip + 4; 1.468 + assert(which == call32_operand, "call has no disp32 or imm32"); 1.469 + return ip; 1.470 + 1.471 + case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 1.472 + case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 1.473 + case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 1.474 + case 0xDD: // fld_d a; fst_d a; fstp_d a 1.475 + case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 1.476 + case 0xDF: // fild_d a; fistp_d a 1.477 + case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 1.478 + case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 1.479 + case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 1.480 + break; 1.481 + 1.482 + case 0xF3: // For SSE 1.483 + case 0xF2: // For SSE2 1.484 + ip++; ip++; 1.485 + break; 1.486 + 1.487 + default: 1.488 + ShouldNotReachHere(); 1.489 + 1.490 + #undef REP8 1.491 + #undef REP16 1.492 + } 1.493 + 1.494 + assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 1.495 + assert(which != imm32_operand || has_imm32, "instruction has no imm32 field"); 1.496 + 1.497 + // parse the output of emit_operand 1.498 + int op2 = 0xFF & *ip++; 1.499 + int base = op2 & 0x07; 1.500 + int op3 = -1; 1.501 + const int b100 = 4; 1.502 + const int b101 = 5; 1.503 + if (base == b100 && (op2 >> 6) != 3) { 1.504 + op3 = 0xFF & *ip++; 1.505 + base = op3 & 0x07; // refetch the base 1.506 + } 1.507 + // now ip points at the disp (if any) 1.508 + 1.509 + switch (op2 >> 6) { 1.510 + case 0: 1.511 + // [00 reg 100][ss index base] 1.512 + // [00 reg 100][00 100 rsp] 1.513 + // [00 reg base] 1.514 + // [00 reg 100][ss index 101][disp32] 1.515 + // [00 reg 101] [disp32] 1.516 + 1.517 + if (base == b101) { 1.518 + if (which == disp32_operand) 1.519 + return ip; // caller wants the disp32 1.520 + ip += 4; // skip the disp32 1.521 + } 1.522 + break; 1.523 + 1.524 + case 1: 1.525 + // [01 reg 100][ss index base][disp8] 1.526 + // [01 reg 100][00 100 rsp][disp8] 1.527 + // [01 reg base] [disp8] 1.528 + ip += 1; // skip the disp8 1.529 + break; 1.530 + 1.531 + case 2: 1.532 + // [10 reg 100][ss index base][disp32] 1.533 + // [10 reg 100][00 100 rsp][disp32] 1.534 + // [10 reg base] [disp32] 1.535 + if (which == disp32_operand) 1.536 + return ip; // caller wants the disp32 1.537 + ip += 4; // skip the disp32 1.538 + break; 1.539 + 1.540 + case 3: 1.541 + // [11 reg base] (not a memory addressing mode) 1.542 + break; 1.543 + } 1.544 + 1.545 + if (which == end_pc_operand) { 1.546 + return ip + tail_size; 1.547 + } 1.548 + 1.549 + assert(which == imm32_operand, "instruction has only an imm32 field"); 1.550 + return ip; 1.551 +} 1.552 + 1.553 +address Assembler::locate_next_instruction(address inst) { 1.554 + // Secretly share code with locate_operand: 1.555 + return locate_operand(inst, end_pc_operand); 1.556 +} 1.557 + 1.558 + 1.559 +#ifdef ASSERT 1.560 +void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 1.561 + address inst = inst_mark(); 1.562 + assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 1.563 + address opnd; 1.564 + 1.565 + Relocation* r = rspec.reloc(); 1.566 + if (r->type() == relocInfo::none) { 1.567 + return; 1.568 + } else if (r->is_call() || format == call32_operand) { 1.569 + // assert(format == imm32_operand, "cannot specify a nonzero format"); 1.570 + opnd = locate_operand(inst, call32_operand); 1.571 + } else if (r->is_data()) { 1.572 + assert(format == imm32_operand || format == disp32_operand, "format ok"); 1.573 + opnd = locate_operand(inst, (WhichOperand)format); 1.574 + } else { 1.575 + assert(format == imm32_operand, "cannot specify a format"); 1.576 + return; 1.577 + } 1.578 + assert(opnd == pc(), "must put operand where relocs can find it"); 1.579 +} 1.580 +#endif 1.581 + 1.582 + 1.583 + 1.584 +void Assembler::emit_operand(Register reg, Address adr) { 1.585 + emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 1.586 +} 1.587 + 1.588 + 1.589 +void Assembler::emit_farith(int b1, int b2, int i) { 1.590 + assert(isByte(b1) && isByte(b2), "wrong opcode"); 1.591 + assert(0 <= i && i < 8, "illegal stack offset"); 1.592 + emit_byte(b1); 1.593 + emit_byte(b2 + i); 1.594 +} 1.595 + 1.596 + 1.597 +void Assembler::pushad() { 1.598 + emit_byte(0x60); 1.599 +} 1.600 + 1.601 +void Assembler::popad() { 1.602 + emit_byte(0x61); 1.603 +} 1.604 + 1.605 +void Assembler::pushfd() { 1.606 + emit_byte(0x9C); 1.607 +} 1.608 + 1.609 +void Assembler::popfd() { 1.610 + emit_byte(0x9D); 1.611 +} 1.612 + 1.613 +void Assembler::pushl(int imm32) { 1.614 + emit_byte(0x68); 1.615 + emit_long(imm32); 1.616 +} 1.617 + 1.618 +#ifndef _LP64 1.619 +void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 1.620 + InstructionMark im(this); 1.621 + emit_byte(0x68); 1.622 + emit_data(imm32, rspec, 0); 1.623 +} 1.624 +#endif // _LP64 1.625 + 1.626 +void Assembler::pushl(Register src) { 1.627 + emit_byte(0x50 | src->encoding()); 1.628 +} 1.629 + 1.630 + 1.631 +void Assembler::pushl(Address src) { 1.632 + InstructionMark im(this); 1.633 + emit_byte(0xFF); 1.634 + emit_operand(rsi, src); 1.635 +} 1.636 + 1.637 +void Assembler::popl(Register dst) { 1.638 + emit_byte(0x58 | dst->encoding()); 1.639 +} 1.640 + 1.641 + 1.642 +void Assembler::popl(Address dst) { 1.643 + InstructionMark im(this); 1.644 + emit_byte(0x8F); 1.645 + emit_operand(rax, dst); 1.646 +} 1.647 + 1.648 + 1.649 +void Assembler::prefix(Prefix p) { 1.650 + a_byte(p); 1.651 +} 1.652 + 1.653 + 1.654 +void Assembler::movb(Register dst, Address src) { 1.655 + assert(dst->has_byte_register(), "must have byte register"); 1.656 + InstructionMark im(this); 1.657 + emit_byte(0x8A); 1.658 + emit_operand(dst, src); 1.659 +} 1.660 + 1.661 + 1.662 +void Assembler::movb(Address dst, int imm8) { 1.663 + InstructionMark im(this); 1.664 + emit_byte(0xC6); 1.665 + emit_operand(rax, dst); 1.666 + emit_byte(imm8); 1.667 +} 1.668 + 1.669 + 1.670 +void Assembler::movb(Address dst, Register src) { 1.671 + assert(src->has_byte_register(), "must have byte register"); 1.672 + InstructionMark im(this); 1.673 + emit_byte(0x88); 1.674 + emit_operand(src, dst); 1.675 +} 1.676 + 1.677 + 1.678 +void Assembler::movw(Address dst, int imm16) { 1.679 + InstructionMark im(this); 1.680 + 1.681 + emit_byte(0x66); // switch to 16-bit mode 1.682 + emit_byte(0xC7); 1.683 + emit_operand(rax, dst); 1.684 + emit_word(imm16); 1.685 +} 1.686 + 1.687 + 1.688 +void Assembler::movw(Register dst, Address src) { 1.689 + InstructionMark im(this); 1.690 + emit_byte(0x66); 1.691 + emit_byte(0x8B); 1.692 + emit_operand(dst, src); 1.693 +} 1.694 + 1.695 + 1.696 +void Assembler::movw(Address dst, Register src) { 1.697 + InstructionMark im(this); 1.698 + emit_byte(0x66); 1.699 + emit_byte(0x89); 1.700 + emit_operand(src, dst); 1.701 +} 1.702 + 1.703 + 1.704 +void Assembler::movl(Register dst, int imm32) { 1.705 + emit_byte(0xB8 | dst->encoding()); 1.706 + emit_long(imm32); 1.707 +} 1.708 + 1.709 +#ifndef _LP64 1.710 +void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 1.711 + 1.712 + InstructionMark im(this); 1.713 + emit_byte(0xB8 | dst->encoding()); 1.714 + emit_data((int)imm32, rspec, 0); 1.715 +} 1.716 +#endif // _LP64 1.717 + 1.718 +void Assembler::movl(Register dst, Register src) { 1.719 + emit_byte(0x8B); 1.720 + emit_byte(0xC0 | (dst->encoding() << 3) | src->encoding()); 1.721 +} 1.722 + 1.723 + 1.724 +void Assembler::movl(Register dst, Address src) { 1.725 + InstructionMark im(this); 1.726 + emit_byte(0x8B); 1.727 + emit_operand(dst, src); 1.728 +} 1.729 + 1.730 + 1.731 +void Assembler::movl(Address dst, int imm32) { 1.732 + InstructionMark im(this); 1.733 + emit_byte(0xC7); 1.734 + emit_operand(rax, dst); 1.735 + emit_long(imm32); 1.736 +} 1.737 + 1.738 +#ifndef _LP64 1.739 +void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 1.740 + InstructionMark im(this); 1.741 + emit_byte(0xC7); 1.742 + emit_operand(rax, dst); 1.743 + emit_data((int)imm32, rspec, 0); 1.744 +} 1.745 +#endif // _LP64 1.746 + 1.747 +void Assembler::movl(Address dst, Register src) { 1.748 + InstructionMark im(this); 1.749 + emit_byte(0x89); 1.750 + emit_operand(src, dst); 1.751 +} 1.752 + 1.753 +void Assembler::movsxb(Register dst, Address src) { 1.754 + InstructionMark im(this); 1.755 + emit_byte(0x0F); 1.756 + emit_byte(0xBE); 1.757 + emit_operand(dst, src); 1.758 +} 1.759 + 1.760 +void Assembler::movsxb(Register dst, Register src) { 1.761 + assert(src->has_byte_register(), "must have byte register"); 1.762 + emit_byte(0x0F); 1.763 + emit_byte(0xBE); 1.764 + emit_byte(0xC0 | (dst->encoding() << 3) | src->encoding()); 1.765 +} 1.766 + 1.767 + 1.768 +void Assembler::movsxw(Register dst, Address src) { 1.769 + InstructionMark im(this); 1.770 + emit_byte(0x0F); 1.771 + emit_byte(0xBF); 1.772 + emit_operand(dst, src); 1.773 +} 1.774 + 1.775 + 1.776 +void Assembler::movsxw(Register dst, Register src) { 1.777 + emit_byte(0x0F); 1.778 + emit_byte(0xBF); 1.779 + emit_byte(0xC0 | (dst->encoding() << 3) | src->encoding()); 1.780 +} 1.781 + 1.782 + 1.783 +void Assembler::movzxb(Register dst, Address src) { 1.784 + InstructionMark im(this); 1.785 + emit_byte(0x0F); 1.786 + emit_byte(0xB6); 1.787 + emit_operand(dst, src); 1.788 +} 1.789 + 1.790 + 1.791 +void Assembler::movzxb(Register dst, Register src) { 1.792 + assert(src->has_byte_register(), "must have byte register"); 1.793 + emit_byte(0x0F); 1.794 + emit_byte(0xB6); 1.795 + emit_byte(0xC0 | (dst->encoding() << 3) | src->encoding()); 1.796 +} 1.797 + 1.798 + 1.799 +void Assembler::movzxw(Register dst, Address src) { 1.800 + InstructionMark im(this); 1.801 + emit_byte(0x0F); 1.802 + emit_byte(0xB7); 1.803 + emit_operand(dst, src); 1.804 +} 1.805 + 1.806 + 1.807 +void Assembler::movzxw(Register dst, Register src) { 1.808 + emit_byte(0x0F); 1.809 + emit_byte(0xB7); 1.810 + emit_byte(0xC0 | (dst->encoding() << 3) | src->encoding()); 1.811 +} 1.812 + 1.813 + 1.814 +void Assembler::cmovl(Condition cc, Register dst, Register src) { 1.815 + guarantee(VM_Version::supports_cmov(), "illegal instruction"); 1.816 + emit_byte(0x0F); 1.817 + emit_byte(0x40 | cc); 1.818 + emit_byte(0xC0 | (dst->encoding() << 3) | src->encoding()); 1.819 +} 1.820 + 1.821 + 1.822 +void Assembler::cmovl(Condition cc, Register dst, Address src) { 1.823 + guarantee(VM_Version::supports_cmov(), "illegal instruction"); 1.824 + // The code below seems to be wrong - however the manual is inconclusive 1.825 + // do not use for now (remember to enable all callers when fixing this) 1.826 + Unimplemented(); 1.827 + // wrong bytes? 1.828 + InstructionMark im(this); 1.829 + emit_byte(0x0F); 1.830 + emit_byte(0x40 | cc); 1.831 + emit_operand(dst, src); 1.832 +} 1.833 + 1.834 + 1.835 +void Assembler::prefetcht0(Address src) { 1.836 + assert(VM_Version::supports_sse(), "must support"); 1.837 + InstructionMark im(this); 1.838 + emit_byte(0x0F); 1.839 + emit_byte(0x18); 1.840 + emit_operand(rcx, src); // 1, src 1.841 +} 1.842 + 1.843 + 1.844 +void Assembler::prefetcht1(Address src) { 1.845 + assert(VM_Version::supports_sse(), "must support"); 1.846 + InstructionMark im(this); 1.847 + emit_byte(0x0F); 1.848 + emit_byte(0x18); 1.849 + emit_operand(rdx, src); // 2, src 1.850 +} 1.851 + 1.852 + 1.853 +void Assembler::prefetcht2(Address src) { 1.854 + assert(VM_Version::supports_sse(), "must support"); 1.855 + InstructionMark im(this); 1.856 + emit_byte(0x0F); 1.857 + emit_byte(0x18); 1.858 + emit_operand(rbx, src); // 3, src 1.859 +} 1.860 + 1.861 + 1.862 +void Assembler::prefetchnta(Address src) { 1.863 + assert(VM_Version::supports_sse2(), "must support"); 1.864 + InstructionMark im(this); 1.865 + emit_byte(0x0F); 1.866 + emit_byte(0x18); 1.867 + emit_operand(rax, src); // 0, src 1.868 +} 1.869 + 1.870 + 1.871 +void Assembler::prefetchw(Address src) { 1.872 + assert(VM_Version::supports_3dnow(), "must support"); 1.873 + InstructionMark im(this); 1.874 + emit_byte(0x0F); 1.875 + emit_byte(0x0D); 1.876 + emit_operand(rcx, src); // 1, src 1.877 +} 1.878 + 1.879 + 1.880 +void Assembler::prefetchr(Address src) { 1.881 + assert(VM_Version::supports_3dnow(), "must support"); 1.882 + InstructionMark im(this); 1.883 + emit_byte(0x0F); 1.884 + emit_byte(0x0D); 1.885 + emit_operand(rax, src); // 0, src 1.886 +} 1.887 + 1.888 + 1.889 +void Assembler::adcl(Register dst, int imm32) { 1.890 + emit_arith(0x81, 0xD0, dst, imm32); 1.891 +} 1.892 + 1.893 + 1.894 +void Assembler::adcl(Register dst, Address src) { 1.895 + InstructionMark im(this); 1.896 + emit_byte(0x13); 1.897 + emit_operand(dst, src); 1.898 +} 1.899 + 1.900 + 1.901 +void Assembler::adcl(Register dst, Register src) { 1.902 + emit_arith(0x13, 0xC0, dst, src); 1.903 +} 1.904 + 1.905 + 1.906 +void Assembler::addl(Address dst, int imm32) { 1.907 + InstructionMark im(this); 1.908 + emit_arith_operand(0x81,rax,dst,imm32); 1.909 +} 1.910 + 1.911 + 1.912 +void Assembler::addl(Address dst, Register src) { 1.913 + InstructionMark im(this); 1.914 + emit_byte(0x01); 1.915 + emit_operand(src, dst); 1.916 +} 1.917 + 1.918 + 1.919 +void Assembler::addl(Register dst, int imm32) { 1.920 + emit_arith(0x81, 0xC0, dst, imm32); 1.921 +} 1.922 + 1.923 + 1.924 +void Assembler::addl(Register dst, Address src) { 1.925 + InstructionMark im(this); 1.926 + emit_byte(0x03); 1.927 + emit_operand(dst, src); 1.928 +} 1.929 + 1.930 + 1.931 +void Assembler::addl(Register dst, Register src) { 1.932 + emit_arith(0x03, 0xC0, dst, src); 1.933 +} 1.934 + 1.935 + 1.936 +void Assembler::andl(Register dst, int imm32) { 1.937 + emit_arith(0x81, 0xE0, dst, imm32); 1.938 +} 1.939 + 1.940 + 1.941 +void Assembler::andl(Register dst, Address src) { 1.942 + InstructionMark im(this); 1.943 + emit_byte(0x23); 1.944 + emit_operand(dst, src); 1.945 +} 1.946 + 1.947 + 1.948 +void Assembler::andl(Register dst, Register src) { 1.949 + emit_arith(0x23, 0xC0, dst, src); 1.950 +} 1.951 + 1.952 + 1.953 +void Assembler::cmpb(Address dst, int imm8) { 1.954 + InstructionMark im(this); 1.955 + emit_byte(0x80); 1.956 + emit_operand(rdi, dst); 1.957 + emit_byte(imm8); 1.958 +} 1.959 + 1.960 +void Assembler::cmpw(Address dst, int imm16) { 1.961 + InstructionMark im(this); 1.962 + emit_byte(0x66); 1.963 + emit_byte(0x81); 1.964 + emit_operand(rdi, dst); 1.965 + emit_word(imm16); 1.966 +} 1.967 + 1.968 +void Assembler::cmpl(Address dst, int imm32) { 1.969 + InstructionMark im(this); 1.970 + emit_byte(0x81); 1.971 + emit_operand(rdi, dst); 1.972 + emit_long(imm32); 1.973 +} 1.974 + 1.975 +#ifndef _LP64 1.976 +void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 1.977 + InstructionMark im(this); 1.978 + emit_byte(0x81); 1.979 + emit_byte(0xF8 | src1->encoding()); 1.980 + emit_data(imm32, rspec, 0); 1.981 +} 1.982 + 1.983 +void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 1.984 + InstructionMark im(this); 1.985 + emit_byte(0x81); 1.986 + emit_operand(rdi, src1); 1.987 + emit_data(imm32, rspec, 0); 1.988 +} 1.989 +#endif // _LP64 1.990 + 1.991 + 1.992 +void Assembler::cmpl(Register dst, int imm32) { 1.993 + emit_arith(0x81, 0xF8, dst, imm32); 1.994 +} 1.995 + 1.996 + 1.997 +void Assembler::cmpl(Register dst, Register src) { 1.998 + emit_arith(0x3B, 0xC0, dst, src); 1.999 +} 1.1000 + 1.1001 + 1.1002 +void Assembler::cmpl(Register dst, Address src) { 1.1003 + InstructionMark im(this); 1.1004 + emit_byte(0x3B); 1.1005 + emit_operand(dst, src); 1.1006 +} 1.1007 + 1.1008 + 1.1009 +void Assembler::decl(Register dst) { 1.1010 + // Don't use it directly. Use MacroAssembler::decrement() instead. 1.1011 + emit_byte(0x48 | dst->encoding()); 1.1012 +} 1.1013 + 1.1014 + 1.1015 +void Assembler::decl(Address dst) { 1.1016 + // Don't use it directly. Use MacroAssembler::decrement() instead. 1.1017 + InstructionMark im(this); 1.1018 + emit_byte(0xFF); 1.1019 + emit_operand(rcx, dst); 1.1020 +} 1.1021 + 1.1022 + 1.1023 +void Assembler::idivl(Register src) { 1.1024 + emit_byte(0xF7); 1.1025 + emit_byte(0xF8 | src->encoding()); 1.1026 +} 1.1027 + 1.1028 + 1.1029 +void Assembler::cdql() { 1.1030 + emit_byte(0x99); 1.1031 +} 1.1032 + 1.1033 + 1.1034 +void Assembler::imull(Register dst, Register src) { 1.1035 + emit_byte(0x0F); 1.1036 + emit_byte(0xAF); 1.1037 + emit_byte(0xC0 | dst->encoding() << 3 | src->encoding()); 1.1038 +} 1.1039 + 1.1040 + 1.1041 +void Assembler::imull(Register dst, Register src, int value) { 1.1042 + if (is8bit(value)) { 1.1043 + emit_byte(0x6B); 1.1044 + emit_byte(0xC0 | dst->encoding() << 3 | src->encoding()); 1.1045 + emit_byte(value); 1.1046 + } else { 1.1047 + emit_byte(0x69); 1.1048 + emit_byte(0xC0 | dst->encoding() << 3 | src->encoding()); 1.1049 + emit_long(value); 1.1050 + } 1.1051 +} 1.1052 + 1.1053 + 1.1054 +void Assembler::incl(Register dst) { 1.1055 + // Don't use it directly. Use MacroAssembler::increment() instead. 1.1056 + emit_byte(0x40 | dst->encoding()); 1.1057 +} 1.1058 + 1.1059 + 1.1060 +void Assembler::incl(Address dst) { 1.1061 + // Don't use it directly. Use MacroAssembler::increment() instead. 1.1062 + InstructionMark im(this); 1.1063 + emit_byte(0xFF); 1.1064 + emit_operand(rax, dst); 1.1065 +} 1.1066 + 1.1067 + 1.1068 +void Assembler::leal(Register dst, Address src) { 1.1069 + InstructionMark im(this); 1.1070 + emit_byte(0x8D); 1.1071 + emit_operand(dst, src); 1.1072 +} 1.1073 + 1.1074 +void Assembler::mull(Address src) { 1.1075 + InstructionMark im(this); 1.1076 + emit_byte(0xF7); 1.1077 + emit_operand(rsp, src); 1.1078 +} 1.1079 + 1.1080 + 1.1081 +void Assembler::mull(Register src) { 1.1082 + emit_byte(0xF7); 1.1083 + emit_byte(0xE0 | src->encoding()); 1.1084 +} 1.1085 + 1.1086 + 1.1087 +void Assembler::negl(Register dst) { 1.1088 + emit_byte(0xF7); 1.1089 + emit_byte(0xD8 | dst->encoding()); 1.1090 +} 1.1091 + 1.1092 + 1.1093 +void Assembler::notl(Register dst) { 1.1094 + emit_byte(0xF7); 1.1095 + emit_byte(0xD0 | dst->encoding()); 1.1096 +} 1.1097 + 1.1098 + 1.1099 +void Assembler::orl(Address dst, int imm32) { 1.1100 + InstructionMark im(this); 1.1101 + emit_byte(0x81); 1.1102 + emit_operand(rcx, dst); 1.1103 + emit_long(imm32); 1.1104 +} 1.1105 + 1.1106 +void Assembler::orl(Register dst, int imm32) { 1.1107 + emit_arith(0x81, 0xC8, dst, imm32); 1.1108 +} 1.1109 + 1.1110 + 1.1111 +void Assembler::orl(Register dst, Address src) { 1.1112 + InstructionMark im(this); 1.1113 + emit_byte(0x0B); 1.1114 + emit_operand(dst, src); 1.1115 +} 1.1116 + 1.1117 + 1.1118 +void Assembler::orl(Register dst, Register src) { 1.1119 + emit_arith(0x0B, 0xC0, dst, src); 1.1120 +} 1.1121 + 1.1122 + 1.1123 +void Assembler::rcll(Register dst, int imm8) { 1.1124 + assert(isShiftCount(imm8), "illegal shift count"); 1.1125 + if (imm8 == 1) { 1.1126 + emit_byte(0xD1); 1.1127 + emit_byte(0xD0 | dst->encoding()); 1.1128 + } else { 1.1129 + emit_byte(0xC1); 1.1130 + emit_byte(0xD0 | dst->encoding()); 1.1131 + emit_byte(imm8); 1.1132 + } 1.1133 +} 1.1134 + 1.1135 + 1.1136 +void Assembler::sarl(Register dst, int imm8) { 1.1137 + assert(isShiftCount(imm8), "illegal shift count"); 1.1138 + if (imm8 == 1) { 1.1139 + emit_byte(0xD1); 1.1140 + emit_byte(0xF8 | dst->encoding()); 1.1141 + } else { 1.1142 + emit_byte(0xC1); 1.1143 + emit_byte(0xF8 | dst->encoding()); 1.1144 + emit_byte(imm8); 1.1145 + } 1.1146 +} 1.1147 + 1.1148 + 1.1149 +void Assembler::sarl(Register dst) { 1.1150 + emit_byte(0xD3); 1.1151 + emit_byte(0xF8 | dst->encoding()); 1.1152 +} 1.1153 + 1.1154 + 1.1155 +void Assembler::sbbl(Address dst, int imm32) { 1.1156 + InstructionMark im(this); 1.1157 + emit_arith_operand(0x81,rbx,dst,imm32); 1.1158 +} 1.1159 + 1.1160 + 1.1161 +void Assembler::sbbl(Register dst, int imm32) { 1.1162 + emit_arith(0x81, 0xD8, dst, imm32); 1.1163 +} 1.1164 + 1.1165 + 1.1166 +void Assembler::sbbl(Register dst, Address src) { 1.1167 + InstructionMark im(this); 1.1168 + emit_byte(0x1B); 1.1169 + emit_operand(dst, src); 1.1170 +} 1.1171 + 1.1172 + 1.1173 +void Assembler::sbbl(Register dst, Register src) { 1.1174 + emit_arith(0x1B, 0xC0, dst, src); 1.1175 +} 1.1176 + 1.1177 + 1.1178 +void Assembler::shldl(Register dst, Register src) { 1.1179 + emit_byte(0x0F); 1.1180 + emit_byte(0xA5); 1.1181 + emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 1.1182 +} 1.1183 + 1.1184 + 1.1185 +void Assembler::shll(Register dst, int imm8) { 1.1186 + assert(isShiftCount(imm8), "illegal shift count"); 1.1187 + if (imm8 == 1 ) { 1.1188 + emit_byte(0xD1); 1.1189 + emit_byte(0xE0 | dst->encoding()); 1.1190 + } else { 1.1191 + emit_byte(0xC1); 1.1192 + emit_byte(0xE0 | dst->encoding()); 1.1193 + emit_byte(imm8); 1.1194 + } 1.1195 +} 1.1196 + 1.1197 + 1.1198 +void Assembler::shll(Register dst) { 1.1199 + emit_byte(0xD3); 1.1200 + emit_byte(0xE0 | dst->encoding()); 1.1201 +} 1.1202 + 1.1203 + 1.1204 +void Assembler::shrdl(Register dst, Register src) { 1.1205 + emit_byte(0x0F); 1.1206 + emit_byte(0xAD); 1.1207 + emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 1.1208 +} 1.1209 + 1.1210 + 1.1211 +void Assembler::shrl(Register dst, int imm8) { 1.1212 + assert(isShiftCount(imm8), "illegal shift count"); 1.1213 + emit_byte(0xC1); 1.1214 + emit_byte(0xE8 | dst->encoding()); 1.1215 + emit_byte(imm8); 1.1216 +} 1.1217 + 1.1218 + 1.1219 +void Assembler::shrl(Register dst) { 1.1220 + emit_byte(0xD3); 1.1221 + emit_byte(0xE8 | dst->encoding()); 1.1222 +} 1.1223 + 1.1224 + 1.1225 +void Assembler::subl(Address dst, int imm32) { 1.1226 + if (is8bit(imm32)) { 1.1227 + InstructionMark im(this); 1.1228 + emit_byte(0x83); 1.1229 + emit_operand(rbp, dst); 1.1230 + emit_byte(imm32 & 0xFF); 1.1231 + } else { 1.1232 + InstructionMark im(this); 1.1233 + emit_byte(0x81); 1.1234 + emit_operand(rbp, dst); 1.1235 + emit_long(imm32); 1.1236 + } 1.1237 +} 1.1238 + 1.1239 + 1.1240 +void Assembler::subl(Register dst, int imm32) { 1.1241 + emit_arith(0x81, 0xE8, dst, imm32); 1.1242 +} 1.1243 + 1.1244 + 1.1245 +void Assembler::subl(Address dst, Register src) { 1.1246 + InstructionMark im(this); 1.1247 + emit_byte(0x29); 1.1248 + emit_operand(src, dst); 1.1249 +} 1.1250 + 1.1251 + 1.1252 +void Assembler::subl(Register dst, Address src) { 1.1253 + InstructionMark im(this); 1.1254 + emit_byte(0x2B); 1.1255 + emit_operand(dst, src); 1.1256 +} 1.1257 + 1.1258 + 1.1259 +void Assembler::subl(Register dst, Register src) { 1.1260 + emit_arith(0x2B, 0xC0, dst, src); 1.1261 +} 1.1262 + 1.1263 + 1.1264 +void Assembler::testb(Register dst, int imm8) { 1.1265 + assert(dst->has_byte_register(), "must have byte register"); 1.1266 + emit_arith_b(0xF6, 0xC0, dst, imm8); 1.1267 +} 1.1268 + 1.1269 + 1.1270 +void Assembler::testl(Register dst, int imm32) { 1.1271 + // not using emit_arith because test 1.1272 + // doesn't support sign-extension of 1.1273 + // 8bit operands 1.1274 + if (dst->encoding() == 0) { 1.1275 + emit_byte(0xA9); 1.1276 + } else { 1.1277 + emit_byte(0xF7); 1.1278 + emit_byte(0xC0 | dst->encoding()); 1.1279 + } 1.1280 + emit_long(imm32); 1.1281 +} 1.1282 + 1.1283 + 1.1284 +void Assembler::testl(Register dst, Register src) { 1.1285 + emit_arith(0x85, 0xC0, dst, src); 1.1286 +} 1.1287 + 1.1288 +void Assembler::testl(Register dst, Address src) { 1.1289 + InstructionMark im(this); 1.1290 + emit_byte(0x85); 1.1291 + emit_operand(dst, src); 1.1292 +} 1.1293 + 1.1294 +void Assembler::xaddl(Address dst, Register src) { 1.1295 + InstructionMark im(this); 1.1296 + emit_byte(0x0F); 1.1297 + emit_byte(0xC1); 1.1298 + emit_operand(src, dst); 1.1299 +} 1.1300 + 1.1301 +void Assembler::xorl(Register dst, int imm32) { 1.1302 + emit_arith(0x81, 0xF0, dst, imm32); 1.1303 +} 1.1304 + 1.1305 + 1.1306 +void Assembler::xorl(Register dst, Address src) { 1.1307 + InstructionMark im(this); 1.1308 + emit_byte(0x33); 1.1309 + emit_operand(dst, src); 1.1310 +} 1.1311 + 1.1312 + 1.1313 +void Assembler::xorl(Register dst, Register src) { 1.1314 + emit_arith(0x33, 0xC0, dst, src); 1.1315 +} 1.1316 + 1.1317 + 1.1318 +void Assembler::bswap(Register reg) { 1.1319 + emit_byte(0x0F); 1.1320 + emit_byte(0xC8 | reg->encoding()); 1.1321 +} 1.1322 + 1.1323 + 1.1324 +void Assembler::lock() { 1.1325 + if (Atomics & 1) { 1.1326 + // Emit either nothing, a NOP, or a NOP: prefix 1.1327 + emit_byte(0x90) ; 1.1328 + } else { 1.1329 + emit_byte(0xF0); 1.1330 + } 1.1331 +} 1.1332 + 1.1333 + 1.1334 +void Assembler::xchg(Register reg, Address adr) { 1.1335 + InstructionMark im(this); 1.1336 + emit_byte(0x87); 1.1337 + emit_operand(reg, adr); 1.1338 +} 1.1339 + 1.1340 + 1.1341 +void Assembler::xchgl(Register dst, Register src) { 1.1342 + emit_byte(0x87); 1.1343 + emit_byte(0xc0 | dst->encoding() << 3 | src->encoding()); 1.1344 +} 1.1345 + 1.1346 + 1.1347 +// The 32-bit cmpxchg compares the value at adr with the contents of rax, 1.1348 +// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1.1349 +// The ZF is set if the compared values were equal, and cleared otherwise. 1.1350 +void Assembler::cmpxchg(Register reg, Address adr) { 1.1351 + if (Atomics & 2) { 1.1352 + // caveat: no instructionmark, so this isn't relocatable. 1.1353 + // Emit a synthetic, non-atomic, CAS equivalent. 1.1354 + // Beware. The synthetic form sets all ICCs, not just ZF. 1.1355 + // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1.1356 + cmpl (rax, adr) ; 1.1357 + movl (rax, adr) ; 1.1358 + if (reg != rax) { 1.1359 + Label L ; 1.1360 + jcc (Assembler::notEqual, L) ; 1.1361 + movl (adr, reg) ; 1.1362 + bind (L) ; 1.1363 + } 1.1364 + } else { 1.1365 + InstructionMark im(this); 1.1366 + emit_byte(0x0F); 1.1367 + emit_byte(0xB1); 1.1368 + emit_operand(reg, adr); 1.1369 + } 1.1370 +} 1.1371 + 1.1372 +// The 64-bit cmpxchg compares the value at adr with the contents of rdx:rax, 1.1373 +// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 1.1374 +// into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 1.1375 +void Assembler::cmpxchg8(Address adr) { 1.1376 + InstructionMark im(this); 1.1377 + emit_byte(0x0F); 1.1378 + emit_byte(0xc7); 1.1379 + emit_operand(rcx, adr); 1.1380 +} 1.1381 + 1.1382 +void Assembler::hlt() { 1.1383 + emit_byte(0xF4); 1.1384 +} 1.1385 + 1.1386 + 1.1387 +void Assembler::addr_nop_4() { 1.1388 + // 4 bytes: NOP DWORD PTR [EAX+0] 1.1389 + emit_byte(0x0F); 1.1390 + emit_byte(0x1F); 1.1391 + emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 1.1392 + emit_byte(0); // 8-bits offset (1 byte) 1.1393 +} 1.1394 + 1.1395 +void Assembler::addr_nop_5() { 1.1396 + // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1.1397 + emit_byte(0x0F); 1.1398 + emit_byte(0x1F); 1.1399 + emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 1.1400 + emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 1.1401 + emit_byte(0); // 8-bits offset (1 byte) 1.1402 +} 1.1403 + 1.1404 +void Assembler::addr_nop_7() { 1.1405 + // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1.1406 + emit_byte(0x0F); 1.1407 + emit_byte(0x1F); 1.1408 + emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 1.1409 + emit_long(0); // 32-bits offset (4 bytes) 1.1410 +} 1.1411 + 1.1412 +void Assembler::addr_nop_8() { 1.1413 + // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1.1414 + emit_byte(0x0F); 1.1415 + emit_byte(0x1F); 1.1416 + emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 1.1417 + emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 1.1418 + emit_long(0); // 32-bits offset (4 bytes) 1.1419 +} 1.1420 + 1.1421 +void Assembler::nop(int i) { 1.1422 + assert(i > 0, " "); 1.1423 + if (UseAddressNop && VM_Version::is_intel()) { 1.1424 + // 1.1425 + // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1.1426 + // 1: 0x90 1.1427 + // 2: 0x66 0x90 1.1428 + // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1.1429 + // 4: 0x0F 0x1F 0x40 0x00 1.1430 + // 5: 0x0F 0x1F 0x44 0x00 0x00 1.1431 + // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1.1432 + // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1.1433 + // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1434 + // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1435 + // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1436 + // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1437 + 1.1438 + // The rest coding is Intel specific - don't use consecutive address nops 1.1439 + 1.1440 + // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1.1441 + // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1.1442 + // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1.1443 + // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1.1444 + 1.1445 + while(i >= 15) { 1.1446 + // For Intel don't generate consecutive addess nops (mix with regular nops) 1.1447 + i -= 15; 1.1448 + emit_byte(0x66); // size prefix 1.1449 + emit_byte(0x66); // size prefix 1.1450 + emit_byte(0x66); // size prefix 1.1451 + addr_nop_8(); 1.1452 + emit_byte(0x66); // size prefix 1.1453 + emit_byte(0x66); // size prefix 1.1454 + emit_byte(0x66); // size prefix 1.1455 + emit_byte(0x90); // nop 1.1456 + } 1.1457 + switch (i) { 1.1458 + case 14: 1.1459 + emit_byte(0x66); // size prefix 1.1460 + case 13: 1.1461 + emit_byte(0x66); // size prefix 1.1462 + case 12: 1.1463 + addr_nop_8(); 1.1464 + emit_byte(0x66); // size prefix 1.1465 + emit_byte(0x66); // size prefix 1.1466 + emit_byte(0x66); // size prefix 1.1467 + emit_byte(0x90); // nop 1.1468 + break; 1.1469 + case 11: 1.1470 + emit_byte(0x66); // size prefix 1.1471 + case 10: 1.1472 + emit_byte(0x66); // size prefix 1.1473 + case 9: 1.1474 + emit_byte(0x66); // size prefix 1.1475 + case 8: 1.1476 + addr_nop_8(); 1.1477 + break; 1.1478 + case 7: 1.1479 + addr_nop_7(); 1.1480 + break; 1.1481 + case 6: 1.1482 + emit_byte(0x66); // size prefix 1.1483 + case 5: 1.1484 + addr_nop_5(); 1.1485 + break; 1.1486 + case 4: 1.1487 + addr_nop_4(); 1.1488 + break; 1.1489 + case 3: 1.1490 + // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1.1491 + emit_byte(0x66); // size prefix 1.1492 + case 2: 1.1493 + emit_byte(0x66); // size prefix 1.1494 + case 1: 1.1495 + emit_byte(0x90); // nop 1.1496 + break; 1.1497 + default: 1.1498 + assert(i == 0, " "); 1.1499 + } 1.1500 + return; 1.1501 + } 1.1502 + if (UseAddressNop && VM_Version::is_amd()) { 1.1503 + // 1.1504 + // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 1.1505 + // 1: 0x90 1.1506 + // 2: 0x66 0x90 1.1507 + // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1.1508 + // 4: 0x0F 0x1F 0x40 0x00 1.1509 + // 5: 0x0F 0x1F 0x44 0x00 0x00 1.1510 + // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1.1511 + // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1.1512 + // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1513 + // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1514 + // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1515 + // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1516 + 1.1517 + // The rest coding is AMD specific - use consecutive address nops 1.1518 + 1.1519 + // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 1.1520 + // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 1.1521 + // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1.1522 + // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1.1523 + // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1.1524 + // Size prefixes (0x66) are added for larger sizes 1.1525 + 1.1526 + while(i >= 22) { 1.1527 + i -= 11; 1.1528 + emit_byte(0x66); // size prefix 1.1529 + emit_byte(0x66); // size prefix 1.1530 + emit_byte(0x66); // size prefix 1.1531 + addr_nop_8(); 1.1532 + } 1.1533 + // Generate first nop for size between 21-12 1.1534 + switch (i) { 1.1535 + case 21: 1.1536 + i -= 1; 1.1537 + emit_byte(0x66); // size prefix 1.1538 + case 20: 1.1539 + case 19: 1.1540 + i -= 1; 1.1541 + emit_byte(0x66); // size prefix 1.1542 + case 18: 1.1543 + case 17: 1.1544 + i -= 1; 1.1545 + emit_byte(0x66); // size prefix 1.1546 + case 16: 1.1547 + case 15: 1.1548 + i -= 8; 1.1549 + addr_nop_8(); 1.1550 + break; 1.1551 + case 14: 1.1552 + case 13: 1.1553 + i -= 7; 1.1554 + addr_nop_7(); 1.1555 + break; 1.1556 + case 12: 1.1557 + i -= 6; 1.1558 + emit_byte(0x66); // size prefix 1.1559 + addr_nop_5(); 1.1560 + break; 1.1561 + default: 1.1562 + assert(i < 12, " "); 1.1563 + } 1.1564 + 1.1565 + // Generate second nop for size between 11-1 1.1566 + switch (i) { 1.1567 + case 11: 1.1568 + emit_byte(0x66); // size prefix 1.1569 + case 10: 1.1570 + emit_byte(0x66); // size prefix 1.1571 + case 9: 1.1572 + emit_byte(0x66); // size prefix 1.1573 + case 8: 1.1574 + addr_nop_8(); 1.1575 + break; 1.1576 + case 7: 1.1577 + addr_nop_7(); 1.1578 + break; 1.1579 + case 6: 1.1580 + emit_byte(0x66); // size prefix 1.1581 + case 5: 1.1582 + addr_nop_5(); 1.1583 + break; 1.1584 + case 4: 1.1585 + addr_nop_4(); 1.1586 + break; 1.1587 + case 3: 1.1588 + // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1.1589 + emit_byte(0x66); // size prefix 1.1590 + case 2: 1.1591 + emit_byte(0x66); // size prefix 1.1592 + case 1: 1.1593 + emit_byte(0x90); // nop 1.1594 + break; 1.1595 + default: 1.1596 + assert(i == 0, " "); 1.1597 + } 1.1598 + return; 1.1599 + } 1.1600 + 1.1601 + // Using nops with size prefixes "0x66 0x90". 1.1602 + // From AMD Optimization Guide: 1.1603 + // 1: 0x90 1.1604 + // 2: 0x66 0x90 1.1605 + // 3: 0x66 0x66 0x90 1.1606 + // 4: 0x66 0x66 0x66 0x90 1.1607 + // 5: 0x66 0x66 0x90 0x66 0x90 1.1608 + // 6: 0x66 0x66 0x90 0x66 0x66 0x90 1.1609 + // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 1.1610 + // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 1.1611 + // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 1.1612 + // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 1.1613 + // 1.1614 + while(i > 12) { 1.1615 + i -= 4; 1.1616 + emit_byte(0x66); // size prefix 1.1617 + emit_byte(0x66); 1.1618 + emit_byte(0x66); 1.1619 + emit_byte(0x90); // nop 1.1620 + } 1.1621 + // 1 - 12 nops 1.1622 + if(i > 8) { 1.1623 + if(i > 9) { 1.1624 + i -= 1; 1.1625 + emit_byte(0x66); 1.1626 + } 1.1627 + i -= 3; 1.1628 + emit_byte(0x66); 1.1629 + emit_byte(0x66); 1.1630 + emit_byte(0x90); 1.1631 + } 1.1632 + // 1 - 8 nops 1.1633 + if(i > 4) { 1.1634 + if(i > 6) { 1.1635 + i -= 1; 1.1636 + emit_byte(0x66); 1.1637 + } 1.1638 + i -= 3; 1.1639 + emit_byte(0x66); 1.1640 + emit_byte(0x66); 1.1641 + emit_byte(0x90); 1.1642 + } 1.1643 + switch (i) { 1.1644 + case 4: 1.1645 + emit_byte(0x66); 1.1646 + case 3: 1.1647 + emit_byte(0x66); 1.1648 + case 2: 1.1649 + emit_byte(0x66); 1.1650 + case 1: 1.1651 + emit_byte(0x90); 1.1652 + break; 1.1653 + default: 1.1654 + assert(i == 0, " "); 1.1655 + } 1.1656 +} 1.1657 + 1.1658 +void Assembler::ret(int imm16) { 1.1659 + if (imm16 == 0) { 1.1660 + emit_byte(0xC3); 1.1661 + } else { 1.1662 + emit_byte(0xC2); 1.1663 + emit_word(imm16); 1.1664 + } 1.1665 +} 1.1666 + 1.1667 + 1.1668 +void Assembler::set_byte_if_not_zero(Register dst) { 1.1669 + emit_byte(0x0F); 1.1670 + emit_byte(0x95); 1.1671 + emit_byte(0xE0 | dst->encoding()); 1.1672 +} 1.1673 + 1.1674 + 1.1675 +// copies a single word from [esi] to [edi] 1.1676 +void Assembler::smovl() { 1.1677 + emit_byte(0xA5); 1.1678 +} 1.1679 + 1.1680 +// copies data from [esi] to [edi] using rcx double words (m32) 1.1681 +void Assembler::rep_movl() { 1.1682 + emit_byte(0xF3); 1.1683 + emit_byte(0xA5); 1.1684 +} 1.1685 + 1.1686 + 1.1687 +// sets rcx double words (m32) with rax, value at [edi] 1.1688 +void Assembler::rep_set() { 1.1689 + emit_byte(0xF3); 1.1690 + emit_byte(0xAB); 1.1691 +} 1.1692 + 1.1693 +// scans rcx double words (m32) at [edi] for occurance of rax, 1.1694 +void Assembler::repne_scan() { 1.1695 + emit_byte(0xF2); 1.1696 + emit_byte(0xAF); 1.1697 +} 1.1698 + 1.1699 + 1.1700 +void Assembler::setb(Condition cc, Register dst) { 1.1701 + assert(0 <= cc && cc < 16, "illegal cc"); 1.1702 + emit_byte(0x0F); 1.1703 + emit_byte(0x90 | cc); 1.1704 + emit_byte(0xC0 | dst->encoding()); 1.1705 +} 1.1706 + 1.1707 +void Assembler::cld() { 1.1708 + emit_byte(0xfc); 1.1709 +} 1.1710 + 1.1711 +void Assembler::std() { 1.1712 + emit_byte(0xfd); 1.1713 +} 1.1714 + 1.1715 +void Assembler::emit_raw (unsigned char b) { 1.1716 + emit_byte (b) ; 1.1717 +} 1.1718 + 1.1719 +// Serializes memory. 1.1720 +void Assembler::membar() { 1.1721 + // Memory barriers are only needed on multiprocessors 1.1722 + if (os::is_MP()) { 1.1723 + if( VM_Version::supports_sse2() ) { 1.1724 + emit_byte( 0x0F ); // MFENCE; faster blows no regs 1.1725 + emit_byte( 0xAE ); 1.1726 + emit_byte( 0xF0 ); 1.1727 + } else { 1.1728 + // All usable chips support "locked" instructions which suffice 1.1729 + // as barriers, and are much faster than the alternative of 1.1730 + // using cpuid instruction. We use here a locked add [esp],0. 1.1731 + // This is conveniently otherwise a no-op except for blowing 1.1732 + // flags (which we save and restore.) 1.1733 + pushfd(); // Save eflags register 1.1734 + lock(); 1.1735 + addl(Address(rsp, 0), 0);// Assert the lock# signal here 1.1736 + popfd(); // Restore eflags register 1.1737 + } 1.1738 + } 1.1739 +} 1.1740 + 1.1741 +// Identify processor type and features 1.1742 +void Assembler::cpuid() { 1.1743 + // Note: we can't assert VM_Version::supports_cpuid() here 1.1744 + // because this instruction is used in the processor 1.1745 + // identification code. 1.1746 + emit_byte( 0x0F ); 1.1747 + emit_byte( 0xA2 ); 1.1748 +} 1.1749 + 1.1750 +void Assembler::call(Label& L, relocInfo::relocType rtype) { 1.1751 + if (L.is_bound()) { 1.1752 + const int long_size = 5; 1.1753 + int offs = target(L) - pc(); 1.1754 + assert(offs <= 0, "assembler error"); 1.1755 + InstructionMark im(this); 1.1756 + // 1110 1000 #32-bit disp 1.1757 + emit_byte(0xE8); 1.1758 + emit_data(offs - long_size, rtype, 0); 1.1759 + } else { 1.1760 + InstructionMark im(this); 1.1761 + // 1110 1000 #32-bit disp 1.1762 + L.add_patch_at(code(), locator()); 1.1763 + emit_byte(0xE8); 1.1764 + emit_data(int(0), rtype, 0); 1.1765 + } 1.1766 +} 1.1767 + 1.1768 +void Assembler::call(Register dst) { 1.1769 + emit_byte(0xFF); 1.1770 + emit_byte(0xD0 | dst->encoding()); 1.1771 +} 1.1772 + 1.1773 + 1.1774 +void Assembler::call(Address adr) { 1.1775 + InstructionMark im(this); 1.1776 + relocInfo::relocType rtype = adr.reloc(); 1.1777 + if (rtype != relocInfo::runtime_call_type) { 1.1778 + emit_byte(0xFF); 1.1779 + emit_operand(rdx, adr); 1.1780 + } else { 1.1781 + assert(false, "ack"); 1.1782 + } 1.1783 + 1.1784 +} 1.1785 + 1.1786 +void Assembler::call_literal(address dest, RelocationHolder const& rspec) { 1.1787 + InstructionMark im(this); 1.1788 + emit_byte(0xE8); 1.1789 + intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1.1790 + assert(dest != NULL, "must have a target"); 1.1791 + emit_data(disp, rspec, call32_operand); 1.1792 + 1.1793 +} 1.1794 + 1.1795 +void Assembler::jmp(Register entry) { 1.1796 + emit_byte(0xFF); 1.1797 + emit_byte(0xE0 | entry->encoding()); 1.1798 +} 1.1799 + 1.1800 + 1.1801 +void Assembler::jmp(Address adr) { 1.1802 + InstructionMark im(this); 1.1803 + emit_byte(0xFF); 1.1804 + emit_operand(rsp, adr); 1.1805 +} 1.1806 + 1.1807 +void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1.1808 + InstructionMark im(this); 1.1809 + emit_byte(0xE9); 1.1810 + assert(dest != NULL, "must have a target"); 1.1811 + intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1.1812 + emit_data(disp, rspec.reloc(), call32_operand); 1.1813 +} 1.1814 + 1.1815 +void Assembler::jmp(Label& L, relocInfo::relocType rtype) { 1.1816 + if (L.is_bound()) { 1.1817 + address entry = target(L); 1.1818 + assert(entry != NULL, "jmp most probably wrong"); 1.1819 + InstructionMark im(this); 1.1820 + const int short_size = 2; 1.1821 + const int long_size = 5; 1.1822 + intptr_t offs = entry - _code_pos; 1.1823 + if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1.1824 + emit_byte(0xEB); 1.1825 + emit_byte((offs - short_size) & 0xFF); 1.1826 + } else { 1.1827 + emit_byte(0xE9); 1.1828 + emit_long(offs - long_size); 1.1829 + } 1.1830 + } else { 1.1831 + // By default, forward jumps are always 32-bit displacements, since 1.1832 + // we can't yet know where the label will be bound. If you're sure that 1.1833 + // the forward jump will not run beyond 256 bytes, use jmpb to 1.1834 + // force an 8-bit displacement. 1.1835 + InstructionMark im(this); 1.1836 + relocate(rtype); 1.1837 + L.add_patch_at(code(), locator()); 1.1838 + emit_byte(0xE9); 1.1839 + emit_long(0); 1.1840 + } 1.1841 +} 1.1842 + 1.1843 +void Assembler::jmpb(Label& L) { 1.1844 + if (L.is_bound()) { 1.1845 + const int short_size = 2; 1.1846 + address entry = target(L); 1.1847 + assert(is8bit((entry - _code_pos) + short_size), 1.1848 + "Dispacement too large for a short jmp"); 1.1849 + assert(entry != NULL, "jmp most probably wrong"); 1.1850 + intptr_t offs = entry - _code_pos; 1.1851 + emit_byte(0xEB); 1.1852 + emit_byte((offs - short_size) & 0xFF); 1.1853 + } else { 1.1854 + InstructionMark im(this); 1.1855 + L.add_patch_at(code(), locator()); 1.1856 + emit_byte(0xEB); 1.1857 + emit_byte(0); 1.1858 + } 1.1859 +} 1.1860 + 1.1861 +void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) { 1.1862 + InstructionMark im(this); 1.1863 + relocate(rtype); 1.1864 + assert((0 <= cc) && (cc < 16), "illegal cc"); 1.1865 + if (L.is_bound()) { 1.1866 + address dst = target(L); 1.1867 + assert(dst != NULL, "jcc most probably wrong"); 1.1868 + 1.1869 + const int short_size = 2; 1.1870 + const int long_size = 6; 1.1871 + int offs = (int)dst - ((int)_code_pos); 1.1872 + if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1.1873 + // 0111 tttn #8-bit disp 1.1874 + emit_byte(0x70 | cc); 1.1875 + emit_byte((offs - short_size) & 0xFF); 1.1876 + } else { 1.1877 + // 0000 1111 1000 tttn #32-bit disp 1.1878 + emit_byte(0x0F); 1.1879 + emit_byte(0x80 | cc); 1.1880 + emit_long(offs - long_size); 1.1881 + } 1.1882 + } else { 1.1883 + // Note: could eliminate cond. jumps to this jump if condition 1.1884 + // is the same however, seems to be rather unlikely case. 1.1885 + // Note: use jccb() if label to be bound is very close to get 1.1886 + // an 8-bit displacement 1.1887 + L.add_patch_at(code(), locator()); 1.1888 + emit_byte(0x0F); 1.1889 + emit_byte(0x80 | cc); 1.1890 + emit_long(0); 1.1891 + } 1.1892 +} 1.1893 + 1.1894 +void Assembler::jccb(Condition cc, Label& L) { 1.1895 + if (L.is_bound()) { 1.1896 + const int short_size = 2; 1.1897 + address entry = target(L); 1.1898 + assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1.1899 + "Dispacement too large for a short jmp"); 1.1900 + intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1.1901 + // 0111 tttn #8-bit disp 1.1902 + emit_byte(0x70 | cc); 1.1903 + emit_byte((offs - short_size) & 0xFF); 1.1904 + jcc(cc, L); 1.1905 + } else { 1.1906 + InstructionMark im(this); 1.1907 + L.add_patch_at(code(), locator()); 1.1908 + emit_byte(0x70 | cc); 1.1909 + emit_byte(0); 1.1910 + } 1.1911 +} 1.1912 + 1.1913 +// FPU instructions 1.1914 + 1.1915 +void Assembler::fld1() { 1.1916 + emit_byte(0xD9); 1.1917 + emit_byte(0xE8); 1.1918 +} 1.1919 + 1.1920 + 1.1921 +void Assembler::fldz() { 1.1922 + emit_byte(0xD9); 1.1923 + emit_byte(0xEE); 1.1924 +} 1.1925 + 1.1926 + 1.1927 +void Assembler::fld_s(Address adr) { 1.1928 + InstructionMark im(this); 1.1929 + emit_byte(0xD9); 1.1930 + emit_operand(rax, adr); 1.1931 +} 1.1932 + 1.1933 + 1.1934 +void Assembler::fld_s (int index) { 1.1935 + emit_farith(0xD9, 0xC0, index); 1.1936 +} 1.1937 + 1.1938 + 1.1939 +void Assembler::fld_d(Address adr) { 1.1940 + InstructionMark im(this); 1.1941 + emit_byte(0xDD); 1.1942 + emit_operand(rax, adr); 1.1943 +} 1.1944 + 1.1945 + 1.1946 +void Assembler::fld_x(Address adr) { 1.1947 + InstructionMark im(this); 1.1948 + emit_byte(0xDB); 1.1949 + emit_operand(rbp, adr); 1.1950 +} 1.1951 + 1.1952 + 1.1953 +void Assembler::fst_s(Address adr) { 1.1954 + InstructionMark im(this); 1.1955 + emit_byte(0xD9); 1.1956 + emit_operand(rdx, adr); 1.1957 +} 1.1958 + 1.1959 + 1.1960 +void Assembler::fst_d(Address adr) { 1.1961 + InstructionMark im(this); 1.1962 + emit_byte(0xDD); 1.1963 + emit_operand(rdx, adr); 1.1964 +} 1.1965 + 1.1966 + 1.1967 +void Assembler::fstp_s(Address adr) { 1.1968 + InstructionMark im(this); 1.1969 + emit_byte(0xD9); 1.1970 + emit_operand(rbx, adr); 1.1971 +} 1.1972 + 1.1973 + 1.1974 +void Assembler::fstp_d(Address adr) { 1.1975 + InstructionMark im(this); 1.1976 + emit_byte(0xDD); 1.1977 + emit_operand(rbx, adr); 1.1978 +} 1.1979 + 1.1980 + 1.1981 +void Assembler::fstp_x(Address adr) { 1.1982 + InstructionMark im(this); 1.1983 + emit_byte(0xDB); 1.1984 + emit_operand(rdi, adr); 1.1985 +} 1.1986 + 1.1987 + 1.1988 +void Assembler::fstp_d(int index) { 1.1989 + emit_farith(0xDD, 0xD8, index); 1.1990 +} 1.1991 + 1.1992 + 1.1993 +void Assembler::fild_s(Address adr) { 1.1994 + InstructionMark im(this); 1.1995 + emit_byte(0xDB); 1.1996 + emit_operand(rax, adr); 1.1997 +} 1.1998 + 1.1999 + 1.2000 +void Assembler::fild_d(Address adr) { 1.2001 + InstructionMark im(this); 1.2002 + emit_byte(0xDF); 1.2003 + emit_operand(rbp, adr); 1.2004 +} 1.2005 + 1.2006 + 1.2007 +void Assembler::fistp_s(Address adr) { 1.2008 + InstructionMark im(this); 1.2009 + emit_byte(0xDB); 1.2010 + emit_operand(rbx, adr); 1.2011 +} 1.2012 + 1.2013 + 1.2014 +void Assembler::fistp_d(Address adr) { 1.2015 + InstructionMark im(this); 1.2016 + emit_byte(0xDF); 1.2017 + emit_operand(rdi, adr); 1.2018 +} 1.2019 + 1.2020 + 1.2021 +void Assembler::fist_s(Address adr) { 1.2022 + InstructionMark im(this); 1.2023 + emit_byte(0xDB); 1.2024 + emit_operand(rdx, adr); 1.2025 +} 1.2026 + 1.2027 + 1.2028 +void Assembler::fabs() { 1.2029 + emit_byte(0xD9); 1.2030 + emit_byte(0xE1); 1.2031 +} 1.2032 + 1.2033 + 1.2034 +void Assembler::fldln2() { 1.2035 + emit_byte(0xD9); 1.2036 + emit_byte(0xED); 1.2037 +} 1.2038 + 1.2039 +void Assembler::fyl2x() { 1.2040 + emit_byte(0xD9); 1.2041 + emit_byte(0xF1); 1.2042 +} 1.2043 + 1.2044 + 1.2045 +void Assembler::fldlg2() { 1.2046 + emit_byte(0xD9); 1.2047 + emit_byte(0xEC); 1.2048 +} 1.2049 + 1.2050 + 1.2051 +void Assembler::flog() { 1.2052 + fldln2(); 1.2053 + fxch(); 1.2054 + fyl2x(); 1.2055 +} 1.2056 + 1.2057 + 1.2058 +void Assembler::flog10() { 1.2059 + fldlg2(); 1.2060 + fxch(); 1.2061 + fyl2x(); 1.2062 +} 1.2063 + 1.2064 + 1.2065 +void Assembler::fsin() { 1.2066 + emit_byte(0xD9); 1.2067 + emit_byte(0xFE); 1.2068 +} 1.2069 + 1.2070 + 1.2071 +void Assembler::fcos() { 1.2072 + emit_byte(0xD9); 1.2073 + emit_byte(0xFF); 1.2074 +} 1.2075 + 1.2076 +void Assembler::ftan() { 1.2077 + emit_byte(0xD9); 1.2078 + emit_byte(0xF2); 1.2079 + emit_byte(0xDD); 1.2080 + emit_byte(0xD8); 1.2081 +} 1.2082 + 1.2083 +void Assembler::fsqrt() { 1.2084 + emit_byte(0xD9); 1.2085 + emit_byte(0xFA); 1.2086 +} 1.2087 + 1.2088 + 1.2089 +void Assembler::fchs() { 1.2090 + emit_byte(0xD9); 1.2091 + emit_byte(0xE0); 1.2092 +} 1.2093 + 1.2094 + 1.2095 +void Assembler::fadd_s(Address src) { 1.2096 + InstructionMark im(this); 1.2097 + emit_byte(0xD8); 1.2098 + emit_operand(rax, src); 1.2099 +} 1.2100 + 1.2101 + 1.2102 +void Assembler::fadd_d(Address src) { 1.2103 + InstructionMark im(this); 1.2104 + emit_byte(0xDC); 1.2105 + emit_operand(rax, src); 1.2106 +} 1.2107 + 1.2108 + 1.2109 +void Assembler::fadd(int i) { 1.2110 + emit_farith(0xD8, 0xC0, i); 1.2111 +} 1.2112 + 1.2113 + 1.2114 +void Assembler::fadda(int i) { 1.2115 + emit_farith(0xDC, 0xC0, i); 1.2116 +} 1.2117 + 1.2118 + 1.2119 +void Assembler::fsub_d(Address src) { 1.2120 + InstructionMark im(this); 1.2121 + emit_byte(0xDC); 1.2122 + emit_operand(rsp, src); 1.2123 +} 1.2124 + 1.2125 + 1.2126 +void Assembler::fsub_s(Address src) { 1.2127 + InstructionMark im(this); 1.2128 + emit_byte(0xD8); 1.2129 + emit_operand(rsp, src); 1.2130 +} 1.2131 + 1.2132 + 1.2133 +void Assembler::fsubr_s(Address src) { 1.2134 + InstructionMark im(this); 1.2135 + emit_byte(0xD8); 1.2136 + emit_operand(rbp, src); 1.2137 +} 1.2138 + 1.2139 + 1.2140 +void Assembler::fsubr_d(Address src) { 1.2141 + InstructionMark im(this); 1.2142 + emit_byte(0xDC); 1.2143 + emit_operand(rbp, src); 1.2144 +} 1.2145 + 1.2146 + 1.2147 +void Assembler::fmul_s(Address src) { 1.2148 + InstructionMark im(this); 1.2149 + emit_byte(0xD8); 1.2150 + emit_operand(rcx, src); 1.2151 +} 1.2152 + 1.2153 + 1.2154 +void Assembler::fmul_d(Address src) { 1.2155 + InstructionMark im(this); 1.2156 + emit_byte(0xDC); 1.2157 + emit_operand(rcx, src); 1.2158 +} 1.2159 + 1.2160 + 1.2161 +void Assembler::fmul(int i) { 1.2162 + emit_farith(0xD8, 0xC8, i); 1.2163 +} 1.2164 + 1.2165 + 1.2166 +void Assembler::fmula(int i) { 1.2167 + emit_farith(0xDC, 0xC8, i); 1.2168 +} 1.2169 + 1.2170 + 1.2171 +void Assembler::fdiv_s(Address src) { 1.2172 + InstructionMark im(this); 1.2173 + emit_byte(0xD8); 1.2174 + emit_operand(rsi, src); 1.2175 +} 1.2176 + 1.2177 + 1.2178 +void Assembler::fdiv_d(Address src) { 1.2179 + InstructionMark im(this); 1.2180 + emit_byte(0xDC); 1.2181 + emit_operand(rsi, src); 1.2182 +} 1.2183 + 1.2184 + 1.2185 +void Assembler::fdivr_s(Address src) { 1.2186 + InstructionMark im(this); 1.2187 + emit_byte(0xD8); 1.2188 + emit_operand(rdi, src); 1.2189 +} 1.2190 + 1.2191 + 1.2192 +void Assembler::fdivr_d(Address src) { 1.2193 + InstructionMark im(this); 1.2194 + emit_byte(0xDC); 1.2195 + emit_operand(rdi, src); 1.2196 +} 1.2197 + 1.2198 + 1.2199 +void Assembler::fsub(int i) { 1.2200 + emit_farith(0xD8, 0xE0, i); 1.2201 +} 1.2202 + 1.2203 + 1.2204 +void Assembler::fsuba(int i) { 1.2205 + emit_farith(0xDC, 0xE8, i); 1.2206 +} 1.2207 + 1.2208 + 1.2209 +void Assembler::fsubr(int i) { 1.2210 + emit_farith(0xD8, 0xE8, i); 1.2211 +} 1.2212 + 1.2213 + 1.2214 +void Assembler::fsubra(int i) { 1.2215 + emit_farith(0xDC, 0xE0, i); 1.2216 +} 1.2217 + 1.2218 + 1.2219 +void Assembler::fdiv(int i) { 1.2220 + emit_farith(0xD8, 0xF0, i); 1.2221 +} 1.2222 + 1.2223 + 1.2224 +void Assembler::fdiva(int i) { 1.2225 + emit_farith(0xDC, 0xF8, i); 1.2226 +} 1.2227 + 1.2228 + 1.2229 +void Assembler::fdivr(int i) { 1.2230 + emit_farith(0xD8, 0xF8, i); 1.2231 +} 1.2232 + 1.2233 + 1.2234 +void Assembler::fdivra(int i) { 1.2235 + emit_farith(0xDC, 0xF0, i); 1.2236 +} 1.2237 + 1.2238 + 1.2239 +// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 1.2240 +// is erroneous for some of the floating-point instructions below. 1.2241 + 1.2242 +void Assembler::fdivp(int i) { 1.2243 + emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 1.2244 +} 1.2245 + 1.2246 + 1.2247 +void Assembler::fdivrp(int i) { 1.2248 + emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 1.2249 +} 1.2250 + 1.2251 + 1.2252 +void Assembler::fsubp(int i) { 1.2253 + emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 1.2254 +} 1.2255 + 1.2256 + 1.2257 +void Assembler::fsubrp(int i) { 1.2258 + emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 1.2259 +} 1.2260 + 1.2261 + 1.2262 +void Assembler::faddp(int i) { 1.2263 + emit_farith(0xDE, 0xC0, i); 1.2264 +} 1.2265 + 1.2266 + 1.2267 +void Assembler::fmulp(int i) { 1.2268 + emit_farith(0xDE, 0xC8, i); 1.2269 +} 1.2270 + 1.2271 + 1.2272 +void Assembler::fprem() { 1.2273 + emit_byte(0xD9); 1.2274 + emit_byte(0xF8); 1.2275 +} 1.2276 + 1.2277 + 1.2278 +void Assembler::fprem1() { 1.2279 + emit_byte(0xD9); 1.2280 + emit_byte(0xF5); 1.2281 +} 1.2282 + 1.2283 + 1.2284 +void Assembler::fxch(int i) { 1.2285 + emit_farith(0xD9, 0xC8, i); 1.2286 +} 1.2287 + 1.2288 + 1.2289 +void Assembler::fincstp() { 1.2290 + emit_byte(0xD9); 1.2291 + emit_byte(0xF7); 1.2292 +} 1.2293 + 1.2294 + 1.2295 +void Assembler::fdecstp() { 1.2296 + emit_byte(0xD9); 1.2297 + emit_byte(0xF6); 1.2298 +} 1.2299 + 1.2300 + 1.2301 +void Assembler::ffree(int i) { 1.2302 + emit_farith(0xDD, 0xC0, i); 1.2303 +} 1.2304 + 1.2305 + 1.2306 +void Assembler::fcomp_s(Address src) { 1.2307 + InstructionMark im(this); 1.2308 + emit_byte(0xD8); 1.2309 + emit_operand(rbx, src); 1.2310 +} 1.2311 + 1.2312 + 1.2313 +void Assembler::fcomp_d(Address src) { 1.2314 + InstructionMark im(this); 1.2315 + emit_byte(0xDC); 1.2316 + emit_operand(rbx, src); 1.2317 +} 1.2318 + 1.2319 + 1.2320 +void Assembler::fcom(int i) { 1.2321 + emit_farith(0xD8, 0xD0, i); 1.2322 +} 1.2323 + 1.2324 + 1.2325 +void Assembler::fcomp(int i) { 1.2326 + emit_farith(0xD8, 0xD8, i); 1.2327 +} 1.2328 + 1.2329 + 1.2330 +void Assembler::fcompp() { 1.2331 + emit_byte(0xDE); 1.2332 + emit_byte(0xD9); 1.2333 +} 1.2334 + 1.2335 + 1.2336 +void Assembler::fucomi(int i) { 1.2337 + // make sure the instruction is supported (introduced for P6, together with cmov) 1.2338 + guarantee(VM_Version::supports_cmov(), "illegal instruction"); 1.2339 + emit_farith(0xDB, 0xE8, i); 1.2340 +} 1.2341 + 1.2342 + 1.2343 +void Assembler::fucomip(int i) { 1.2344 + // make sure the instruction is supported (introduced for P6, together with cmov) 1.2345 + guarantee(VM_Version::supports_cmov(), "illegal instruction"); 1.2346 + emit_farith(0xDF, 0xE8, i); 1.2347 +} 1.2348 + 1.2349 + 1.2350 +void Assembler::ftst() { 1.2351 + emit_byte(0xD9); 1.2352 + emit_byte(0xE4); 1.2353 +} 1.2354 + 1.2355 + 1.2356 +void Assembler::fnstsw_ax() { 1.2357 + emit_byte(0xdF); 1.2358 + emit_byte(0xE0); 1.2359 +} 1.2360 + 1.2361 + 1.2362 +void Assembler::fwait() { 1.2363 + emit_byte(0x9B); 1.2364 +} 1.2365 + 1.2366 + 1.2367 +void Assembler::finit() { 1.2368 + emit_byte(0x9B); 1.2369 + emit_byte(0xDB); 1.2370 + emit_byte(0xE3); 1.2371 +} 1.2372 + 1.2373 + 1.2374 +void Assembler::fldcw(Address src) { 1.2375 + InstructionMark im(this); 1.2376 + emit_byte(0xd9); 1.2377 + emit_operand(rbp, src); 1.2378 +} 1.2379 + 1.2380 + 1.2381 +void Assembler::fnstcw(Address src) { 1.2382 + InstructionMark im(this); 1.2383 + emit_byte(0x9B); 1.2384 + emit_byte(0xD9); 1.2385 + emit_operand(rdi, src); 1.2386 +} 1.2387 + 1.2388 +void Assembler::fnsave(Address dst) { 1.2389 + InstructionMark im(this); 1.2390 + emit_byte(0xDD); 1.2391 + emit_operand(rsi, dst); 1.2392 +} 1.2393 + 1.2394 + 1.2395 +void Assembler::frstor(Address src) { 1.2396 + InstructionMark im(this); 1.2397 + emit_byte(0xDD); 1.2398 + emit_operand(rsp, src); 1.2399 +} 1.2400 + 1.2401 + 1.2402 +void Assembler::fldenv(Address src) { 1.2403 + InstructionMark im(this); 1.2404 + emit_byte(0xD9); 1.2405 + emit_operand(rsp, src); 1.2406 +} 1.2407 + 1.2408 + 1.2409 +void Assembler::sahf() { 1.2410 + emit_byte(0x9E); 1.2411 +} 1.2412 + 1.2413 +// MMX operations 1.2414 +void Assembler::emit_operand(MMXRegister reg, Address adr) { 1.2415 + emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 1.2416 +} 1.2417 + 1.2418 +void Assembler::movq( MMXRegister dst, Address src ) { 1.2419 + assert( VM_Version::supports_mmx(), "" ); 1.2420 + emit_byte(0x0F); 1.2421 + emit_byte(0x6F); 1.2422 + emit_operand(dst,src); 1.2423 +} 1.2424 + 1.2425 +void Assembler::movq( Address dst, MMXRegister src ) { 1.2426 + assert( VM_Version::supports_mmx(), "" ); 1.2427 + emit_byte(0x0F); 1.2428 + emit_byte(0x7F); 1.2429 + emit_operand(src,dst); 1.2430 +} 1.2431 + 1.2432 +void Assembler::emms() { 1.2433 + emit_byte(0x0F); 1.2434 + emit_byte(0x77); 1.2435 +} 1.2436 + 1.2437 + 1.2438 + 1.2439 + 1.2440 +// SSE and SSE2 instructions 1.2441 +inline void Assembler::emit_sse_operand(XMMRegister reg, Address adr) { 1.2442 + assert(((Register)reg)->encoding() == reg->encoding(), "otherwise typecast is invalid"); 1.2443 + emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 1.2444 +} 1.2445 +inline void Assembler::emit_sse_operand(Register reg, Address adr) { 1.2446 + emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 1.2447 +} 1.2448 + 1.2449 +inline void Assembler::emit_sse_operand(XMMRegister dst, XMMRegister src) { 1.2450 + emit_byte(0xC0 | dst->encoding() << 3 | src->encoding()); 1.2451 +} 1.2452 +inline void Assembler::emit_sse_operand(XMMRegister dst, Register src) { 1.2453 + emit_byte(0xC0 | dst->encoding() << 3 | src->encoding()); 1.2454 +} 1.2455 +inline void Assembler::emit_sse_operand(Register dst, XMMRegister src) { 1.2456 + emit_byte(0xC0 | dst->encoding() << 3 | src->encoding()); 1.2457 +} 1.2458 + 1.2459 + 1.2460 +// Macro for creation of SSE2 instructions 1.2461 +// The SSE2 instricution set is highly regular, so this macro saves 1.2462 +// a lot of cut&paste 1.2463 +// Each macro expansion creates two methods (same name with different 1.2464 +// parameter list) 1.2465 +// 1.2466 +// Macro parameters: 1.2467 +// * name: name of the created methods 1.2468 +// * sse_version: either sse or sse2 for the assertion if instruction supported by processor 1.2469 +// * prefix: first opcode byte of the instruction (or 0 if no prefix byte) 1.2470 +// * opcode: last opcode byte of the instruction 1.2471 +// * conversion instruction have parameters of type Register instead of XMMRegister, 1.2472 +// so this can also configured with macro parameters 1.2473 +#define emit_sse_instruction(name, sse_version, prefix, opcode, dst_register_type, src_register_type) \ 1.2474 + \ 1.2475 + void Assembler:: name (dst_register_type dst, Address src) { \ 1.2476 + assert(VM_Version::supports_##sse_version(), ""); \ 1.2477 + \ 1.2478 + InstructionMark im(this); \ 1.2479 + if (prefix != 0) emit_byte(prefix); \ 1.2480 + emit_byte(0x0F); \ 1.2481 + emit_byte(opcode); \ 1.2482 + emit_sse_operand(dst, src); \ 1.2483 + } \ 1.2484 + \ 1.2485 + void Assembler:: name (dst_register_type dst, src_register_type src) { \ 1.2486 + assert(VM_Version::supports_##sse_version(), ""); \ 1.2487 + \ 1.2488 + if (prefix != 0) emit_byte(prefix); \ 1.2489 + emit_byte(0x0F); \ 1.2490 + emit_byte(opcode); \ 1.2491 + emit_sse_operand(dst, src); \ 1.2492 + } \ 1.2493 + 1.2494 +emit_sse_instruction(addss, sse, 0xF3, 0x58, XMMRegister, XMMRegister); 1.2495 +emit_sse_instruction(addsd, sse2, 0xF2, 0x58, XMMRegister, XMMRegister) 1.2496 +emit_sse_instruction(subss, sse, 0xF3, 0x5C, XMMRegister, XMMRegister) 1.2497 +emit_sse_instruction(subsd, sse2, 0xF2, 0x5C, XMMRegister, XMMRegister) 1.2498 +emit_sse_instruction(mulss, sse, 0xF3, 0x59, XMMRegister, XMMRegister) 1.2499 +emit_sse_instruction(mulsd, sse2, 0xF2, 0x59, XMMRegister, XMMRegister) 1.2500 +emit_sse_instruction(divss, sse, 0xF3, 0x5E, XMMRegister, XMMRegister) 1.2501 +emit_sse_instruction(divsd, sse2, 0xF2, 0x5E, XMMRegister, XMMRegister) 1.2502 +emit_sse_instruction(sqrtss, sse, 0xF3, 0x51, XMMRegister, XMMRegister) 1.2503 +emit_sse_instruction(sqrtsd, sse2, 0xF2, 0x51, XMMRegister, XMMRegister) 1.2504 + 1.2505 +emit_sse_instruction(pxor, sse2, 0x66, 0xEF, XMMRegister, XMMRegister) 1.2506 + 1.2507 +emit_sse_instruction(comiss, sse, 0, 0x2F, XMMRegister, XMMRegister) 1.2508 +emit_sse_instruction(comisd, sse2, 0x66, 0x2F, XMMRegister, XMMRegister) 1.2509 +emit_sse_instruction(ucomiss, sse, 0, 0x2E, XMMRegister, XMMRegister) 1.2510 +emit_sse_instruction(ucomisd, sse2, 0x66, 0x2E, XMMRegister, XMMRegister) 1.2511 + 1.2512 +emit_sse_instruction(cvtss2sd, sse2, 0xF3, 0x5A, XMMRegister, XMMRegister); 1.2513 +emit_sse_instruction(cvtsd2ss, sse2, 0xF2, 0x5A, XMMRegister, XMMRegister) 1.2514 +emit_sse_instruction(cvtsi2ss, sse, 0xF3, 0x2A, XMMRegister, Register); 1.2515 +emit_sse_instruction(cvtsi2sd, sse2, 0xF2, 0x2A, XMMRegister, Register) 1.2516 +emit_sse_instruction(cvtss2si, sse, 0xF3, 0x2D, Register, XMMRegister); 1.2517 +emit_sse_instruction(cvtsd2si, sse2, 0xF2, 0x2D, Register, XMMRegister) 1.2518 +emit_sse_instruction(cvttss2si, sse, 0xF3, 0x2C, Register, XMMRegister); 1.2519 +emit_sse_instruction(cvttsd2si, sse2, 0xF2, 0x2C, Register, XMMRegister) 1.2520 + 1.2521 +emit_sse_instruction(movss, sse, 0xF3, 0x10, XMMRegister, XMMRegister) 1.2522 +emit_sse_instruction(movsd, sse2, 0xF2, 0x10, XMMRegister, XMMRegister) 1.2523 + 1.2524 +emit_sse_instruction(movq, sse2, 0xF3, 0x7E, XMMRegister, XMMRegister); 1.2525 +emit_sse_instruction(movd, sse2, 0x66, 0x6E, XMMRegister, Register); 1.2526 +emit_sse_instruction(movdqa, sse2, 0x66, 0x6F, XMMRegister, XMMRegister); 1.2527 + 1.2528 +emit_sse_instruction(punpcklbw, sse2, 0x66, 0x60, XMMRegister, XMMRegister); 1.2529 + 1.2530 + 1.2531 +// Instruction not covered by macro 1.2532 +void Assembler::movq(Address dst, XMMRegister src) { 1.2533 + assert(VM_Version::supports_sse2(), ""); 1.2534 + 1.2535 + InstructionMark im(this); 1.2536 + emit_byte(0x66); 1.2537 + emit_byte(0x0F); 1.2538 + emit_byte(0xD6); 1.2539 + emit_sse_operand(src, dst); 1.2540 +} 1.2541 + 1.2542 +void Assembler::movd(Address dst, XMMRegister src) { 1.2543 + assert(VM_Version::supports_sse2(), ""); 1.2544 + 1.2545 + InstructionMark im(this); 1.2546 + emit_byte(0x66); 1.2547 + emit_byte(0x0F); 1.2548 + emit_byte(0x7E); 1.2549 + emit_sse_operand(src, dst); 1.2550 +} 1.2551 + 1.2552 +void Assembler::movd(Register dst, XMMRegister src) { 1.2553 + assert(VM_Version::supports_sse2(), ""); 1.2554 + 1.2555 + emit_byte(0x66); 1.2556 + emit_byte(0x0F); 1.2557 + emit_byte(0x7E); 1.2558 + emit_sse_operand(src, dst); 1.2559 +} 1.2560 + 1.2561 +void Assembler::movdqa(Address dst, XMMRegister src) { 1.2562 + assert(VM_Version::supports_sse2(), ""); 1.2563 + 1.2564 + InstructionMark im(this); 1.2565 + emit_byte(0x66); 1.2566 + emit_byte(0x0F); 1.2567 + emit_byte(0x7F); 1.2568 + emit_sse_operand(src, dst); 1.2569 +} 1.2570 + 1.2571 +void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 1.2572 + assert(isByte(mode), "invalid value"); 1.2573 + assert(VM_Version::supports_sse2(), ""); 1.2574 + 1.2575 + emit_byte(0x66); 1.2576 + emit_byte(0x0F); 1.2577 + emit_byte(0x70); 1.2578 + emit_sse_operand(dst, src); 1.2579 + emit_byte(mode & 0xFF); 1.2580 +} 1.2581 + 1.2582 +void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 1.2583 + assert(isByte(mode), "invalid value"); 1.2584 + assert(VM_Version::supports_sse2(), ""); 1.2585 + 1.2586 + InstructionMark im(this); 1.2587 + emit_byte(0x66); 1.2588 + emit_byte(0x0F); 1.2589 + emit_byte(0x70); 1.2590 + emit_sse_operand(dst, src); 1.2591 + emit_byte(mode & 0xFF); 1.2592 +} 1.2593 + 1.2594 +void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 1.2595 + assert(isByte(mode), "invalid value"); 1.2596 + assert(VM_Version::supports_sse2(), ""); 1.2597 + 1.2598 + emit_byte(0xF2); 1.2599 + emit_byte(0x0F); 1.2600 + emit_byte(0x70); 1.2601 + emit_sse_operand(dst, src); 1.2602 + emit_byte(mode & 0xFF); 1.2603 +} 1.2604 + 1.2605 +void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 1.2606 + assert(isByte(mode), "invalid value"); 1.2607 + assert(VM_Version::supports_sse2(), ""); 1.2608 + 1.2609 + InstructionMark im(this); 1.2610 + emit_byte(0xF2); 1.2611 + emit_byte(0x0F); 1.2612 + emit_byte(0x70); 1.2613 + emit_sse_operand(dst, src); 1.2614 + emit_byte(mode & 0xFF); 1.2615 +} 1.2616 + 1.2617 +void Assembler::psrlq(XMMRegister dst, int shift) { 1.2618 + assert(VM_Version::supports_sse2(), ""); 1.2619 + 1.2620 + emit_byte(0x66); 1.2621 + emit_byte(0x0F); 1.2622 + emit_byte(0x73); 1.2623 + emit_sse_operand(xmm2, dst); 1.2624 + emit_byte(shift); 1.2625 +} 1.2626 + 1.2627 +void Assembler::movss( Address dst, XMMRegister src ) { 1.2628 + assert(VM_Version::supports_sse(), ""); 1.2629 + 1.2630 + InstructionMark im(this); 1.2631 + emit_byte(0xF3); // single 1.2632 + emit_byte(0x0F); 1.2633 + emit_byte(0x11); // store 1.2634 + emit_sse_operand(src, dst); 1.2635 +} 1.2636 + 1.2637 +void Assembler::movsd( Address dst, XMMRegister src ) { 1.2638 + assert(VM_Version::supports_sse2(), ""); 1.2639 + 1.2640 + InstructionMark im(this); 1.2641 + emit_byte(0xF2); // double 1.2642 + emit_byte(0x0F); 1.2643 + emit_byte(0x11); // store 1.2644 + emit_sse_operand(src,dst); 1.2645 +} 1.2646 + 1.2647 +// New cpus require to use movaps and movapd to avoid partial register stall 1.2648 +// when moving between registers. 1.2649 +void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1.2650 + assert(VM_Version::supports_sse(), ""); 1.2651 + 1.2652 + emit_byte(0x0F); 1.2653 + emit_byte(0x28); 1.2654 + emit_sse_operand(dst, src); 1.2655 +} 1.2656 +void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1.2657 + assert(VM_Version::supports_sse2(), ""); 1.2658 + 1.2659 + emit_byte(0x66); 1.2660 + emit_byte(0x0F); 1.2661 + emit_byte(0x28); 1.2662 + emit_sse_operand(dst, src); 1.2663 +} 1.2664 + 1.2665 +// New cpus require to use movsd and movss to avoid partial register stall 1.2666 +// when loading from memory. But for old Opteron use movlpd instead of movsd. 1.2667 +// The selection is done in MacroAssembler::movdbl() and movflt(). 1.2668 +void Assembler::movlpd(XMMRegister dst, Address src) { 1.2669 + assert(VM_Version::supports_sse(), ""); 1.2670 + 1.2671 + InstructionMark im(this); 1.2672 + emit_byte(0x66); 1.2673 + emit_byte(0x0F); 1.2674 + emit_byte(0x12); 1.2675 + emit_sse_operand(dst, src); 1.2676 +} 1.2677 + 1.2678 + 1.2679 +emit_sse_instruction(andps, sse, 0, 0x54, XMMRegister, XMMRegister); 1.2680 +emit_sse_instruction(andpd, sse2, 0x66, 0x54, XMMRegister, XMMRegister); 1.2681 +emit_sse_instruction(andnps, sse, 0, 0x55, XMMRegister, XMMRegister); 1.2682 +emit_sse_instruction(andnpd, sse2, 0x66, 0x55, XMMRegister, XMMRegister); 1.2683 +emit_sse_instruction(orps, sse, 0, 0x56, XMMRegister, XMMRegister); 1.2684 +emit_sse_instruction(orpd, sse2, 0x66, 0x56, XMMRegister, XMMRegister); 1.2685 +emit_sse_instruction(xorps, sse, 0, 0x57, XMMRegister, XMMRegister); 1.2686 +emit_sse_instruction(xorpd, sse2, 0x66, 0x57, XMMRegister, XMMRegister); 1.2687 + 1.2688 + 1.2689 +void Assembler::ldmxcsr( Address src) { 1.2690 + InstructionMark im(this); 1.2691 + emit_byte(0x0F); 1.2692 + emit_byte(0xAE); 1.2693 + emit_operand(rdx /* 2 */, src); 1.2694 +} 1.2695 + 1.2696 +void Assembler::stmxcsr( Address dst) { 1.2697 + InstructionMark im(this); 1.2698 + emit_byte(0x0F); 1.2699 + emit_byte(0xAE); 1.2700 + emit_operand(rbx /* 3 */, dst); 1.2701 +} 1.2702 + 1.2703 +// Implementation of MacroAssembler 1.2704 + 1.2705 +Address MacroAssembler::as_Address(AddressLiteral adr) { 1.2706 + // amd64 always does this as a pc-rel 1.2707 + // we can be absolute or disp based on the instruction type 1.2708 + // jmp/call are displacements others are absolute 1.2709 + assert(!adr.is_lval(), "must be rval"); 1.2710 + 1.2711 + return Address(adr.target(), adr.rspec()); 1.2712 +} 1.2713 + 1.2714 +Address MacroAssembler::as_Address(ArrayAddress adr) { 1.2715 + return Address::make_array(adr); 1.2716 +} 1.2717 + 1.2718 +void MacroAssembler::fat_nop() { 1.2719 + // A 5 byte nop that is safe for patching (see patch_verified_entry) 1.2720 + emit_byte(0x26); // es: 1.2721 + emit_byte(0x2e); // cs: 1.2722 + emit_byte(0x64); // fs: 1.2723 + emit_byte(0x65); // gs: 1.2724 + emit_byte(0x90); 1.2725 +} 1.2726 + 1.2727 +// 32bit can do a case table jump in one instruction but we no longer allow the base 1.2728 +// to be installed in the Address class 1.2729 +void MacroAssembler::jump(ArrayAddress entry) { 1.2730 + jmp(as_Address(entry)); 1.2731 +} 1.2732 + 1.2733 +void MacroAssembler::jump(AddressLiteral dst) { 1.2734 + jmp_literal(dst.target(), dst.rspec()); 1.2735 +} 1.2736 + 1.2737 +void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 1.2738 + assert((0 <= cc) && (cc < 16), "illegal cc"); 1.2739 + 1.2740 + InstructionMark im(this); 1.2741 + 1.2742 + relocInfo::relocType rtype = dst.reloc(); 1.2743 + relocate(rtype); 1.2744 + const int short_size = 2; 1.2745 + const int long_size = 6; 1.2746 + int offs = (int)dst.target() - ((int)_code_pos); 1.2747 + if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1.2748 + // 0111 tttn #8-bit disp 1.2749 + emit_byte(0x70 | cc); 1.2750 + emit_byte((offs - short_size) & 0xFF); 1.2751 + } else { 1.2752 + // 0000 1111 1000 tttn #32-bit disp 1.2753 + emit_byte(0x0F); 1.2754 + emit_byte(0x80 | cc); 1.2755 + emit_long(offs - long_size); 1.2756 + } 1.2757 +} 1.2758 + 1.2759 +// Calls 1.2760 +void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 1.2761 + Assembler::call(L, rtype); 1.2762 +} 1.2763 + 1.2764 +void MacroAssembler::call(Register entry) { 1.2765 + Assembler::call(entry); 1.2766 +} 1.2767 + 1.2768 +void MacroAssembler::call(AddressLiteral entry) { 1.2769 + Assembler::call_literal(entry.target(), entry.rspec()); 1.2770 +} 1.2771 + 1.2772 + 1.2773 +void MacroAssembler::cmp8(AddressLiteral src1, int8_t imm) { 1.2774 + Assembler::cmpb(as_Address(src1), imm); 1.2775 +} 1.2776 + 1.2777 +void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 1.2778 + Assembler::cmpl(as_Address(src1), imm); 1.2779 +} 1.2780 + 1.2781 +void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 1.2782 + if (src2.is_lval()) { 1.2783 + cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1.2784 + } else { 1.2785 + Assembler::cmpl(src1, as_Address(src2)); 1.2786 + } 1.2787 +} 1.2788 + 1.2789 +void MacroAssembler::cmp32(Register src1, int32_t imm) { 1.2790 + Assembler::cmpl(src1, imm); 1.2791 +} 1.2792 + 1.2793 +void MacroAssembler::cmp32(Register src1, Address src2) { 1.2794 + Assembler::cmpl(src1, src2); 1.2795 +} 1.2796 + 1.2797 +void MacroAssembler::cmpoop(Address src1, jobject obj) { 1.2798 + cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 1.2799 +} 1.2800 + 1.2801 +void MacroAssembler::cmpoop(Register src1, jobject obj) { 1.2802 + cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 1.2803 +} 1.2804 + 1.2805 +void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 1.2806 + if (src2.is_lval()) { 1.2807 + // compare the effect address of src2 to src1 1.2808 + cmp_literal32(src1, (int32_t)src2.target(), src2.rspec()); 1.2809 + } else { 1.2810 + Assembler::cmpl(src1, as_Address(src2)); 1.2811 + } 1.2812 +} 1.2813 + 1.2814 +void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 1.2815 + assert(src2.is_lval(), "not a mem-mem compare"); 1.2816 + cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1.2817 +} 1.2818 + 1.2819 + 1.2820 +void MacroAssembler::cmpxchgptr(Register reg, AddressLiteral adr) { 1.2821 + cmpxchg(reg, as_Address(adr)); 1.2822 +} 1.2823 + 1.2824 +void MacroAssembler::increment(AddressLiteral dst) { 1.2825 + increment(as_Address(dst)); 1.2826 +} 1.2827 + 1.2828 +void MacroAssembler::increment(ArrayAddress dst) { 1.2829 + increment(as_Address(dst)); 1.2830 +} 1.2831 + 1.2832 +void MacroAssembler::lea(Register dst, AddressLiteral adr) { 1.2833 + // leal(dst, as_Address(adr)); 1.2834 + // see note in movl as to why we musr use a move 1.2835 + mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 1.2836 +} 1.2837 + 1.2838 +void MacroAssembler::lea(Address dst, AddressLiteral adr) { 1.2839 + // leal(dst, as_Address(adr)); 1.2840 + // see note in movl as to why we musr use a move 1.2841 + mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 1.2842 +} 1.2843 + 1.2844 +void MacroAssembler::mov32(AddressLiteral dst, Register src) { 1.2845 + Assembler::movl(as_Address(dst), src); 1.2846 +} 1.2847 + 1.2848 +void MacroAssembler::mov32(Register dst, AddressLiteral src) { 1.2849 + Assembler::movl(dst, as_Address(src)); 1.2850 +} 1.2851 + 1.2852 +void MacroAssembler::movbyte(ArrayAddress dst, int src) { 1.2853 + movb(as_Address(dst), src); 1.2854 +} 1.2855 + 1.2856 +void MacroAssembler::movoop(Address dst, jobject obj) { 1.2857 + mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 1.2858 +} 1.2859 + 1.2860 +void MacroAssembler::movoop(Register dst, jobject obj) { 1.2861 + mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 1.2862 +} 1.2863 + 1.2864 +void MacroAssembler::movptr(Register dst, AddressLiteral src) { 1.2865 + if (src.is_lval()) { 1.2866 + // essentially an lea 1.2867 + mov_literal32(dst, (int32_t) src.target(), src.rspec()); 1.2868 + } else { 1.2869 + // mov 32bits from an absolute address 1.2870 + movl(dst, as_Address(src)); 1.2871 + } 1.2872 +} 1.2873 + 1.2874 +void MacroAssembler::movptr(ArrayAddress dst, Register src) { 1.2875 + movl(as_Address(dst), src); 1.2876 +} 1.2877 + 1.2878 +void MacroAssembler::movptr(Register dst, ArrayAddress src) { 1.2879 + movl(dst, as_Address(src)); 1.2880 +} 1.2881 + 1.2882 +void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 1.2883 + movss(dst, as_Address(src)); 1.2884 +} 1.2885 + 1.2886 +void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 1.2887 + if (UseXmmLoadAndClearUpper) { movsd (dst, as_Address(src)); return; } 1.2888 + else { movlpd(dst, as_Address(src)); return; } 1.2889 +} 1.2890 + 1.2891 +void Assembler::pushoop(jobject obj) { 1.2892 + push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 1.2893 +} 1.2894 + 1.2895 + 1.2896 +void MacroAssembler::pushptr(AddressLiteral src) { 1.2897 + if (src.is_lval()) { 1.2898 + push_literal32((int32_t)src.target(), src.rspec()); 1.2899 + } else { 1.2900 + pushl(as_Address(src)); 1.2901 + } 1.2902 +} 1.2903 + 1.2904 +void MacroAssembler::test32(Register src1, AddressLiteral src2) { 1.2905 + // src2 must be rval 1.2906 + testl(src1, as_Address(src2)); 1.2907 +} 1.2908 + 1.2909 +// FPU 1.2910 + 1.2911 +void MacroAssembler::fld_x(AddressLiteral src) { 1.2912 + Assembler::fld_x(as_Address(src)); 1.2913 +} 1.2914 + 1.2915 +void MacroAssembler::fld_d(AddressLiteral src) { 1.2916 + fld_d(as_Address(src)); 1.2917 +} 1.2918 + 1.2919 +void MacroAssembler::fld_s(AddressLiteral src) { 1.2920 + fld_s(as_Address(src)); 1.2921 +} 1.2922 + 1.2923 +void MacroAssembler::fldcw(AddressLiteral src) { 1.2924 + Assembler::fldcw(as_Address(src)); 1.2925 +} 1.2926 + 1.2927 +void MacroAssembler::ldmxcsr(AddressLiteral src) { 1.2928 + Assembler::ldmxcsr(as_Address(src)); 1.2929 +} 1.2930 + 1.2931 +// SSE 1.2932 + 1.2933 +void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 1.2934 + andpd(dst, as_Address(src)); 1.2935 +} 1.2936 + 1.2937 +void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 1.2938 + comisd(dst, as_Address(src)); 1.2939 +} 1.2940 + 1.2941 +void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 1.2942 + comiss(dst, as_Address(src)); 1.2943 +} 1.2944 + 1.2945 +void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 1.2946 + movsd(dst, as_Address(src)); 1.2947 +} 1.2948 + 1.2949 +void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 1.2950 + movss(dst, as_Address(src)); 1.2951 +} 1.2952 + 1.2953 +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 1.2954 + xorpd(dst, as_Address(src)); 1.2955 +} 1.2956 + 1.2957 +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 1.2958 + xorps(dst, as_Address(src)); 1.2959 +} 1.2960 + 1.2961 +void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 1.2962 + ucomisd(dst, as_Address(src)); 1.2963 +} 1.2964 + 1.2965 +void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 1.2966 + ucomiss(dst, as_Address(src)); 1.2967 +} 1.2968 + 1.2969 +void MacroAssembler::null_check(Register reg, int offset) { 1.2970 + if (needs_explicit_null_check(offset)) { 1.2971 + // provoke OS NULL exception if reg = NULL by 1.2972 + // accessing M[reg] w/o changing any (non-CC) registers 1.2973 + cmpl(rax, Address(reg, 0)); 1.2974 + // Note: should probably use testl(rax, Address(reg, 0)); 1.2975 + // may be shorter code (however, this version of 1.2976 + // testl needs to be implemented first) 1.2977 + } else { 1.2978 + // nothing to do, (later) access of M[reg + offset] 1.2979 + // will provoke OS NULL exception if reg = NULL 1.2980 + } 1.2981 +} 1.2982 + 1.2983 + 1.2984 +int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1.2985 + // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 1.2986 + // and "3.9 Partial Register Penalties", p. 22). 1.2987 + int off; 1.2988 + if (VM_Version::is_P6() || src.uses(dst)) { 1.2989 + off = offset(); 1.2990 + movzxb(dst, src); 1.2991 + } else { 1.2992 + xorl(dst, dst); 1.2993 + off = offset(); 1.2994 + movb(dst, src); 1.2995 + } 1.2996 + return off; 1.2997 +} 1.2998 + 1.2999 + 1.3000 +int MacroAssembler::load_unsigned_word(Register dst, Address src) { 1.3001 + // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 1.3002 + // and "3.9 Partial Register Penalties", p. 22). 1.3003 + int off; 1.3004 + if (VM_Version::is_P6() || src.uses(dst)) { 1.3005 + off = offset(); 1.3006 + movzxw(dst, src); 1.3007 + } else { 1.3008 + xorl(dst, dst); 1.3009 + off = offset(); 1.3010 + movw(dst, src); 1.3011 + } 1.3012 + return off; 1.3013 +} 1.3014 + 1.3015 + 1.3016 +int MacroAssembler::load_signed_byte(Register dst, Address src) { 1.3017 + int off; 1.3018 + if (VM_Version::is_P6()) { 1.3019 + off = offset(); 1.3020 + movsxb(dst, src); 1.3021 + } else { 1.3022 + off = load_unsigned_byte(dst, src); 1.3023 + shll(dst, 24); 1.3024 + sarl(dst, 24); 1.3025 + } 1.3026 + return off; 1.3027 +} 1.3028 + 1.3029 + 1.3030 +int MacroAssembler::load_signed_word(Register dst, Address src) { 1.3031 + int off; 1.3032 + if (VM_Version::is_P6()) { 1.3033 + off = offset(); 1.3034 + movsxw(dst, src); 1.3035 + } else { 1.3036 + off = load_unsigned_word(dst, src); 1.3037 + shll(dst, 16); 1.3038 + sarl(dst, 16); 1.3039 + } 1.3040 + return off; 1.3041 +} 1.3042 + 1.3043 + 1.3044 +void MacroAssembler::extend_sign(Register hi, Register lo) { 1.3045 + // According to Intel Doc. AP-526, "Integer Divide", p.18. 1.3046 + if (VM_Version::is_P6() && hi == rdx && lo == rax) { 1.3047 + cdql(); 1.3048 + } else { 1.3049 + movl(hi, lo); 1.3050 + sarl(hi, 31); 1.3051 + } 1.3052 +} 1.3053 + 1.3054 + 1.3055 +void MacroAssembler::increment(Register reg, int value) { 1.3056 + if (value == min_jint) {addl(reg, value); return; } 1.3057 + if (value < 0) { decrement(reg, -value); return; } 1.3058 + if (value == 0) { ; return; } 1.3059 + if (value == 1 && UseIncDec) { incl(reg); return; } 1.3060 + /* else */ { addl(reg, value) ; return; } 1.3061 +} 1.3062 + 1.3063 +void MacroAssembler::increment(Address dst, int value) { 1.3064 + if (value == min_jint) {addl(dst, value); return; } 1.3065 + if (value < 0) { decrement(dst, -value); return; } 1.3066 + if (value == 0) { ; return; } 1.3067 + if (value == 1 && UseIncDec) { incl(dst); return; } 1.3068 + /* else */ { addl(dst, value) ; return; } 1.3069 +} 1.3070 + 1.3071 +void MacroAssembler::decrement(Register reg, int value) { 1.3072 + if (value == min_jint) {subl(reg, value); return; } 1.3073 + if (value < 0) { increment(reg, -value); return; } 1.3074 + if (value == 0) { ; return; } 1.3075 + if (value == 1 && UseIncDec) { decl(reg); return; } 1.3076 + /* else */ { subl(reg, value) ; return; } 1.3077 +} 1.3078 + 1.3079 +void MacroAssembler::decrement(Address dst, int value) { 1.3080 + if (value == min_jint) {subl(dst, value); return; } 1.3081 + if (value < 0) { increment(dst, -value); return; } 1.3082 + if (value == 0) { ; return; } 1.3083 + if (value == 1 && UseIncDec) { decl(dst); return; } 1.3084 + /* else */ { subl(dst, value) ; return; } 1.3085 +} 1.3086 + 1.3087 +void MacroAssembler::align(int modulus) { 1.3088 + if (offset() % modulus != 0) nop(modulus - (offset() % modulus)); 1.3089 +} 1.3090 + 1.3091 + 1.3092 +void MacroAssembler::enter() { 1.3093 + pushl(rbp); 1.3094 + movl(rbp, rsp); 1.3095 +} 1.3096 + 1.3097 + 1.3098 +void MacroAssembler::leave() { 1.3099 + movl(rsp, rbp); 1.3100 + popl(rbp); 1.3101 +} 1.3102 + 1.3103 +void MacroAssembler::set_last_Java_frame(Register java_thread, 1.3104 + Register last_java_sp, 1.3105 + Register last_java_fp, 1.3106 + address last_java_pc) { 1.3107 + // determine java_thread register 1.3108 + if (!java_thread->is_valid()) { 1.3109 + java_thread = rdi; 1.3110 + get_thread(java_thread); 1.3111 + } 1.3112 + // determine last_java_sp register 1.3113 + if (!last_java_sp->is_valid()) { 1.3114 + last_java_sp = rsp; 1.3115 + } 1.3116 + 1.3117 + // last_java_fp is optional 1.3118 + 1.3119 + if (last_java_fp->is_valid()) { 1.3120 + movl(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 1.3121 + } 1.3122 + 1.3123 + // last_java_pc is optional 1.3124 + 1.3125 + if (last_java_pc != NULL) { 1.3126 + lea(Address(java_thread, 1.3127 + JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 1.3128 + InternalAddress(last_java_pc)); 1.3129 + 1.3130 + } 1.3131 + movl(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 1.3132 +} 1.3133 + 1.3134 +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 1.3135 + // determine java_thread register 1.3136 + if (!java_thread->is_valid()) { 1.3137 + java_thread = rdi; 1.3138 + get_thread(java_thread); 1.3139 + } 1.3140 + // we must set sp to zero to clear frame 1.3141 + movl(Address(java_thread, JavaThread::last_Java_sp_offset()), 0); 1.3142 + if (clear_fp) { 1.3143 + movl(Address(java_thread, JavaThread::last_Java_fp_offset()), 0); 1.3144 + } 1.3145 + 1.3146 + if (clear_pc) 1.3147 + movl(Address(java_thread, JavaThread::last_Java_pc_offset()), 0); 1.3148 + 1.3149 +} 1.3150 + 1.3151 + 1.3152 + 1.3153 +// Implementation of call_VM versions 1.3154 + 1.3155 +void MacroAssembler::call_VM_leaf_base( 1.3156 + address entry_point, 1.3157 + int number_of_arguments 1.3158 +) { 1.3159 + call(RuntimeAddress(entry_point)); 1.3160 + increment(rsp, number_of_arguments * wordSize); 1.3161 +} 1.3162 + 1.3163 + 1.3164 +void MacroAssembler::call_VM_base( 1.3165 + Register oop_result, 1.3166 + Register java_thread, 1.3167 + Register last_java_sp, 1.3168 + address entry_point, 1.3169 + int number_of_arguments, 1.3170 + bool check_exceptions 1.3171 +) { 1.3172 + // determine java_thread register 1.3173 + if (!java_thread->is_valid()) { 1.3174 + java_thread = rdi; 1.3175 + get_thread(java_thread); 1.3176 + } 1.3177 + // determine last_java_sp register 1.3178 + if (!last_java_sp->is_valid()) { 1.3179 + last_java_sp = rsp; 1.3180 + } 1.3181 + // debugging support 1.3182 + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 1.3183 + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 1.3184 + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 1.3185 + // push java thread (becomes first argument of C function) 1.3186 + pushl(java_thread); 1.3187 + // set last Java frame before call 1.3188 + assert(last_java_sp != rbp, "this code doesn't work for last_java_sp == rbp, which currently can't portably work anyway since C2 doesn't save rbp,"); 1.3189 + // Only interpreter should have to set fp 1.3190 + set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 1.3191 + // do the call 1.3192 + call(RuntimeAddress(entry_point)); 1.3193 + // restore the thread (cannot use the pushed argument since arguments 1.3194 + // may be overwritten by C code generated by an optimizing compiler); 1.3195 + // however can use the register value directly if it is callee saved. 1.3196 + if (java_thread == rdi || java_thread == rsi) { 1.3197 + // rdi & rsi are callee saved -> nothing to do 1.3198 +#ifdef ASSERT 1.3199 + guarantee(java_thread != rax, "change this code"); 1.3200 + pushl(rax); 1.3201 + { Label L; 1.3202 + get_thread(rax); 1.3203 + cmpl(java_thread, rax); 1.3204 + jcc(Assembler::equal, L); 1.3205 + stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 1.3206 + bind(L); 1.3207 + } 1.3208 + popl(rax); 1.3209 +#endif 1.3210 + } else { 1.3211 + get_thread(java_thread); 1.3212 + } 1.3213 + // reset last Java frame 1.3214 + // Only interpreter should have to clear fp 1.3215 + reset_last_Java_frame(java_thread, true, false); 1.3216 + // discard thread and arguments 1.3217 + addl(rsp, (1 + number_of_arguments)*wordSize); 1.3218 + 1.3219 +#ifndef CC_INTERP 1.3220 + // C++ interp handles this in the interpreter 1.3221 + check_and_handle_popframe(java_thread); 1.3222 + check_and_handle_earlyret(java_thread); 1.3223 +#endif /* CC_INTERP */ 1.3224 + 1.3225 + if (check_exceptions) { 1.3226 + // check for pending exceptions (java_thread is set upon return) 1.3227 + cmpl(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD); 1.3228 + jump_cc(Assembler::notEqual, 1.3229 + RuntimeAddress(StubRoutines::forward_exception_entry())); 1.3230 + } 1.3231 + 1.3232 + // get oop result if there is one and reset the value in the thread 1.3233 + if (oop_result->is_valid()) { 1.3234 + movl(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 1.3235 + movl(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 1.3236 + verify_oop(oop_result); 1.3237 + } 1.3238 +} 1.3239 + 1.3240 + 1.3241 +void MacroAssembler::check_and_handle_popframe(Register java_thread) { 1.3242 +} 1.3243 + 1.3244 +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 1.3245 +} 1.3246 + 1.3247 +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 1.3248 + leal(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 1.3249 + call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 1.3250 +} 1.3251 + 1.3252 + 1.3253 +void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 1.3254 + Label C, E; 1.3255 + call(C, relocInfo::none); 1.3256 + jmp(E); 1.3257 + 1.3258 + bind(C); 1.3259 + call_VM_helper(oop_result, entry_point, 0, check_exceptions); 1.3260 + ret(0); 1.3261 + 1.3262 + bind(E); 1.3263 +} 1.3264 + 1.3265 + 1.3266 +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 1.3267 + Label C, E; 1.3268 + call(C, relocInfo::none); 1.3269 + jmp(E); 1.3270 + 1.3271 + bind(C); 1.3272 + pushl(arg_1); 1.3273 + call_VM_helper(oop_result, entry_point, 1, check_exceptions); 1.3274 + ret(0); 1.3275 + 1.3276 + bind(E); 1.3277 +} 1.3278 + 1.3279 + 1.3280 +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 1.3281 + Label C, E; 1.3282 + call(C, relocInfo::none); 1.3283 + jmp(E); 1.3284 + 1.3285 + bind(C); 1.3286 + pushl(arg_2); 1.3287 + pushl(arg_1); 1.3288 + call_VM_helper(oop_result, entry_point, 2, check_exceptions); 1.3289 + ret(0); 1.3290 + 1.3291 + bind(E); 1.3292 +} 1.3293 + 1.3294 + 1.3295 +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 1.3296 + Label C, E; 1.3297 + call(C, relocInfo::none); 1.3298 + jmp(E); 1.3299 + 1.3300 + bind(C); 1.3301 + pushl(arg_3); 1.3302 + pushl(arg_2); 1.3303 + pushl(arg_1); 1.3304 + call_VM_helper(oop_result, entry_point, 3, check_exceptions); 1.3305 + ret(0); 1.3306 + 1.3307 + bind(E); 1.3308 +} 1.3309 + 1.3310 + 1.3311 +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 1.3312 + call_VM_base(oop_result, noreg, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1.3313 +} 1.3314 + 1.3315 + 1.3316 +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 1.3317 + pushl(arg_1); 1.3318 + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1.3319 +} 1.3320 + 1.3321 + 1.3322 +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 1.3323 + pushl(arg_2); 1.3324 + pushl(arg_1); 1.3325 + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1.3326 +} 1.3327 + 1.3328 + 1.3329 +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 1.3330 + pushl(arg_3); 1.3331 + pushl(arg_2); 1.3332 + pushl(arg_1); 1.3333 + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1.3334 +} 1.3335 + 1.3336 + 1.3337 +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 1.3338 + call_VM_leaf_base(entry_point, number_of_arguments); 1.3339 +} 1.3340 + 1.3341 + 1.3342 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 1.3343 + pushl(arg_1); 1.3344 + call_VM_leaf(entry_point, 1); 1.3345 +} 1.3346 + 1.3347 + 1.3348 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 1.3349 + pushl(arg_2); 1.3350 + pushl(arg_1); 1.3351 + call_VM_leaf(entry_point, 2); 1.3352 +} 1.3353 + 1.3354 + 1.3355 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 1.3356 + pushl(arg_3); 1.3357 + pushl(arg_2); 1.3358 + pushl(arg_1); 1.3359 + call_VM_leaf(entry_point, 3); 1.3360 +} 1.3361 + 1.3362 + 1.3363 +// Calls to C land 1.3364 +// 1.3365 +// When entering C land, the rbp, & rsp of the last Java frame have to be recorded 1.3366 +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp 1.3367 +// has to be reset to 0. This is required to allow proper stack traversal. 1.3368 + 1.3369 +void MacroAssembler::store_check(Register obj) { 1.3370 + // Does a store check for the oop in register obj. The content of 1.3371 + // register obj is destroyed afterwards. 1.3372 + store_check_part_1(obj); 1.3373 + store_check_part_2(obj); 1.3374 +} 1.3375 + 1.3376 + 1.3377 +void MacroAssembler::store_check(Register obj, Address dst) { 1.3378 + store_check(obj); 1.3379 +} 1.3380 + 1.3381 + 1.3382 +// split the store check operation so that other instructions can be scheduled inbetween 1.3383 +void MacroAssembler::store_check_part_1(Register obj) { 1.3384 + BarrierSet* bs = Universe::heap()->barrier_set(); 1.3385 + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 1.3386 + shrl(obj, CardTableModRefBS::card_shift); 1.3387 +} 1.3388 + 1.3389 + 1.3390 +void MacroAssembler::store_check_part_2(Register obj) { 1.3391 + BarrierSet* bs = Universe::heap()->barrier_set(); 1.3392 + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 1.3393 + CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1.3394 + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1.3395 + ExternalAddress cardtable((address)ct->byte_map_base); 1.3396 + Address index(noreg, obj, Address::times_1); 1.3397 + 1.3398 + movb(as_Address(ArrayAddress(cardtable, index)), 0); 1.3399 +} 1.3400 + 1.3401 + 1.3402 +void MacroAssembler::c2bool(Register x) { 1.3403 + // implements x == 0 ? 0 : 1 1.3404 + // note: must only look at least-significant byte of x 1.3405 + // since C-style booleans are stored in one byte 1.3406 + // only! (was bug) 1.3407 + andl(x, 0xFF); 1.3408 + setb(Assembler::notZero, x); 1.3409 +} 1.3410 + 1.3411 + 1.3412 +int MacroAssembler::corrected_idivl(Register reg) { 1.3413 + // Full implementation of Java idiv and irem; checks for 1.3414 + // special case as described in JVM spec., p.243 & p.271. 1.3415 + // The function returns the (pc) offset of the idivl 1.3416 + // instruction - may be needed for implicit exceptions. 1.3417 + // 1.3418 + // normal case special case 1.3419 + // 1.3420 + // input : rax,: dividend min_int 1.3421 + // reg: divisor (may not be rax,/rdx) -1 1.3422 + // 1.3423 + // output: rax,: quotient (= rax, idiv reg) min_int 1.3424 + // rdx: remainder (= rax, irem reg) 0 1.3425 + assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 1.3426 + const int min_int = 0x80000000; 1.3427 + Label normal_case, special_case; 1.3428 + 1.3429 + // check for special case 1.3430 + cmpl(rax, min_int); 1.3431 + jcc(Assembler::notEqual, normal_case); 1.3432 + xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 1.3433 + cmpl(reg, -1); 1.3434 + jcc(Assembler::equal, special_case); 1.3435 + 1.3436 + // handle normal case 1.3437 + bind(normal_case); 1.3438 + cdql(); 1.3439 + int idivl_offset = offset(); 1.3440 + idivl(reg); 1.3441 + 1.3442 + // normal and special case exit 1.3443 + bind(special_case); 1.3444 + 1.3445 + return idivl_offset; 1.3446 +} 1.3447 + 1.3448 + 1.3449 +void MacroAssembler::lneg(Register hi, Register lo) { 1.3450 + negl(lo); 1.3451 + adcl(hi, 0); 1.3452 + negl(hi); 1.3453 +} 1.3454 + 1.3455 + 1.3456 +void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 1.3457 + // Multiplication of two Java long values stored on the stack 1.3458 + // as illustrated below. Result is in rdx:rax. 1.3459 + // 1.3460 + // rsp ---> [ ?? ] \ \ 1.3461 + // .... | y_rsp_offset | 1.3462 + // [ y_lo ] / (in bytes) | x_rsp_offset 1.3463 + // [ y_hi ] | (in bytes) 1.3464 + // .... | 1.3465 + // [ x_lo ] / 1.3466 + // [ x_hi ] 1.3467 + // .... 1.3468 + // 1.3469 + // Basic idea: lo(result) = lo(x_lo * y_lo) 1.3470 + // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 1.3471 + Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 1.3472 + Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 1.3473 + Label quick; 1.3474 + // load x_hi, y_hi and check if quick 1.3475 + // multiplication is possible 1.3476 + movl(rbx, x_hi); 1.3477 + movl(rcx, y_hi); 1.3478 + movl(rax, rbx); 1.3479 + orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 1.3480 + jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 1.3481 + // do full multiplication 1.3482 + // 1st step 1.3483 + mull(y_lo); // x_hi * y_lo 1.3484 + movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 1.3485 + // 2nd step 1.3486 + movl(rax, x_lo); 1.3487 + mull(rcx); // x_lo * y_hi 1.3488 + addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 1.3489 + // 3rd step 1.3490 + bind(quick); // note: rbx, = 0 if quick multiply! 1.3491 + movl(rax, x_lo); 1.3492 + mull(y_lo); // x_lo * y_lo 1.3493 + addl(rdx, rbx); // correct hi(x_lo * y_lo) 1.3494 +} 1.3495 + 1.3496 + 1.3497 +void MacroAssembler::lshl(Register hi, Register lo) { 1.3498 + // Java shift left long support (semantics as described in JVM spec., p.305) 1.3499 + // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 1.3500 + // shift value is in rcx ! 1.3501 + assert(hi != rcx, "must not use rcx"); 1.3502 + assert(lo != rcx, "must not use rcx"); 1.3503 + const Register s = rcx; // shift count 1.3504 + const int n = BitsPerWord; 1.3505 + Label L; 1.3506 + andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 1.3507 + cmpl(s, n); // if (s < n) 1.3508 + jcc(Assembler::less, L); // else (s >= n) 1.3509 + movl(hi, lo); // x := x << n 1.3510 + xorl(lo, lo); 1.3511 + // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 1.3512 + bind(L); // s (mod n) < n 1.3513 + shldl(hi, lo); // x := x << s 1.3514 + shll(lo); 1.3515 +} 1.3516 + 1.3517 + 1.3518 +void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 1.3519 + // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 1.3520 + // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 1.3521 + assert(hi != rcx, "must not use rcx"); 1.3522 + assert(lo != rcx, "must not use rcx"); 1.3523 + const Register s = rcx; // shift count 1.3524 + const int n = BitsPerWord; 1.3525 + Label L; 1.3526 + andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 1.3527 + cmpl(s, n); // if (s < n) 1.3528 + jcc(Assembler::less, L); // else (s >= n) 1.3529 + movl(lo, hi); // x := x >> n 1.3530 + if (sign_extension) sarl(hi, 31); 1.3531 + else xorl(hi, hi); 1.3532 + // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 1.3533 + bind(L); // s (mod n) < n 1.3534 + shrdl(lo, hi); // x := x >> s 1.3535 + if (sign_extension) sarl(hi); 1.3536 + else shrl(hi); 1.3537 +} 1.3538 + 1.3539 + 1.3540 +// Note: y_lo will be destroyed 1.3541 +void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 1.3542 + // Long compare for Java (semantics as described in JVM spec.) 1.3543 + Label high, low, done; 1.3544 + 1.3545 + cmpl(x_hi, y_hi); 1.3546 + jcc(Assembler::less, low); 1.3547 + jcc(Assembler::greater, high); 1.3548 + // x_hi is the return register 1.3549 + xorl(x_hi, x_hi); 1.3550 + cmpl(x_lo, y_lo); 1.3551 + jcc(Assembler::below, low); 1.3552 + jcc(Assembler::equal, done); 1.3553 + 1.3554 + bind(high); 1.3555 + xorl(x_hi, x_hi); 1.3556 + increment(x_hi); 1.3557 + jmp(done); 1.3558 + 1.3559 + bind(low); 1.3560 + xorl(x_hi, x_hi); 1.3561 + decrement(x_hi); 1.3562 + 1.3563 + bind(done); 1.3564 +} 1.3565 + 1.3566 + 1.3567 +void MacroAssembler::save_rax(Register tmp) { 1.3568 + if (tmp == noreg) pushl(rax); 1.3569 + else if (tmp != rax) movl(tmp, rax); 1.3570 +} 1.3571 + 1.3572 + 1.3573 +void MacroAssembler::restore_rax(Register tmp) { 1.3574 + if (tmp == noreg) popl(rax); 1.3575 + else if (tmp != rax) movl(rax, tmp); 1.3576 +} 1.3577 + 1.3578 + 1.3579 +void MacroAssembler::fremr(Register tmp) { 1.3580 + save_rax(tmp); 1.3581 + { Label L; 1.3582 + bind(L); 1.3583 + fprem(); 1.3584 + fwait(); fnstsw_ax(); 1.3585 + sahf(); 1.3586 + jcc(Assembler::parity, L); 1.3587 + } 1.3588 + restore_rax(tmp); 1.3589 + // Result is in ST0. 1.3590 + // Note: fxch & fpop to get rid of ST1 1.3591 + // (otherwise FPU stack could overflow eventually) 1.3592 + fxch(1); 1.3593 + fpop(); 1.3594 +} 1.3595 + 1.3596 + 1.3597 +static const double pi_4 = 0.7853981633974483; 1.3598 + 1.3599 +void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 1.3600 + // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 1.3601 + // was attempted in this code; unfortunately it appears that the 1.3602 + // switch to 80-bit precision and back causes this to be 1.3603 + // unprofitable compared with simply performing a runtime call if 1.3604 + // the argument is out of the (-pi/4, pi/4) range. 1.3605 + 1.3606 + Register tmp = noreg; 1.3607 + if (!VM_Version::supports_cmov()) { 1.3608 + // fcmp needs a temporary so preserve rbx, 1.3609 + tmp = rbx; 1.3610 + pushl(tmp); 1.3611 + } 1.3612 + 1.3613 + Label slow_case, done; 1.3614 + 1.3615 + // x ?<= pi/4 1.3616 + fld_d(ExternalAddress((address)&pi_4)); 1.3617 + fld_s(1); // Stack: X PI/4 X 1.3618 + fabs(); // Stack: |X| PI/4 X 1.3619 + fcmp(tmp); 1.3620 + jcc(Assembler::above, slow_case); 1.3621 + 1.3622 + // fastest case: -pi/4 <= x <= pi/4 1.3623 + switch(trig) { 1.3624 + case 's': 1.3625 + fsin(); 1.3626 + break; 1.3627 + case 'c': 1.3628 + fcos(); 1.3629 + break; 1.3630 + case 't': 1.3631 + ftan(); 1.3632 + break; 1.3633 + default: 1.3634 + assert(false, "bad intrinsic"); 1.3635 + break; 1.3636 + } 1.3637 + jmp(done); 1.3638 + 1.3639 + // slow case: runtime call 1.3640 + bind(slow_case); 1.3641 + // Preserve registers across runtime call 1.3642 + pushad(); 1.3643 + int incoming_argument_and_return_value_offset = -1; 1.3644 + if (num_fpu_regs_in_use > 1) { 1.3645 + // Must preserve all other FPU regs (could alternatively convert 1.3646 + // SharedRuntime::dsin and dcos into assembly routines known not to trash 1.3647 + // FPU state, but can not trust C compiler) 1.3648 + NEEDS_CLEANUP; 1.3649 + // NOTE that in this case we also push the incoming argument to 1.3650 + // the stack and restore it later; we also use this stack slot to 1.3651 + // hold the return value from dsin or dcos. 1.3652 + for (int i = 0; i < num_fpu_regs_in_use; i++) { 1.3653 + subl(rsp, wordSize*2); 1.3654 + fstp_d(Address(rsp, 0)); 1.3655 + } 1.3656 + incoming_argument_and_return_value_offset = 2*wordSize*(num_fpu_regs_in_use-1); 1.3657 + fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 1.3658 + } 1.3659 + subl(rsp, wordSize*2); 1.3660 + fstp_d(Address(rsp, 0)); 1.3661 + // NOTE: we must not use call_VM_leaf here because that requires a 1.3662 + // complete interpreter frame in debug mode -- same bug as 4387334 1.3663 + NEEDS_CLEANUP; 1.3664 + // Need to add stack banging before this runtime call if it needs to 1.3665 + // be taken; however, there is no generic stack banging routine at 1.3666 + // the MacroAssembler level 1.3667 + switch(trig) { 1.3668 + case 's': 1.3669 + { 1.3670 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin))); 1.3671 + } 1.3672 + break; 1.3673 + case 'c': 1.3674 + { 1.3675 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos))); 1.3676 + } 1.3677 + break; 1.3678 + case 't': 1.3679 + { 1.3680 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan))); 1.3681 + } 1.3682 + break; 1.3683 + default: 1.3684 + assert(false, "bad intrinsic"); 1.3685 + break; 1.3686 + } 1.3687 + addl(rsp, wordSize * 2); 1.3688 + if (num_fpu_regs_in_use > 1) { 1.3689 + // Must save return value to stack and then restore entire FPU stack 1.3690 + fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 1.3691 + for (int i = 0; i < num_fpu_regs_in_use; i++) { 1.3692 + fld_d(Address(rsp, 0)); 1.3693 + addl(rsp, wordSize*2); 1.3694 + } 1.3695 + } 1.3696 + popad(); 1.3697 + 1.3698 + // Come here with result in F-TOS 1.3699 + bind(done); 1.3700 + 1.3701 + if (tmp != noreg) { 1.3702 + popl(tmp); 1.3703 + } 1.3704 +} 1.3705 + 1.3706 +void MacroAssembler::jC2(Register tmp, Label& L) { 1.3707 + // set parity bit if FPU flag C2 is set (via rax) 1.3708 + save_rax(tmp); 1.3709 + fwait(); fnstsw_ax(); 1.3710 + sahf(); 1.3711 + restore_rax(tmp); 1.3712 + // branch 1.3713 + jcc(Assembler::parity, L); 1.3714 +} 1.3715 + 1.3716 + 1.3717 +void MacroAssembler::jnC2(Register tmp, Label& L) { 1.3718 + // set parity bit if FPU flag C2 is set (via rax) 1.3719 + save_rax(tmp); 1.3720 + fwait(); fnstsw_ax(); 1.3721 + sahf(); 1.3722 + restore_rax(tmp); 1.3723 + // branch 1.3724 + jcc(Assembler::noParity, L); 1.3725 +} 1.3726 + 1.3727 + 1.3728 +void MacroAssembler::fcmp(Register tmp) { 1.3729 + fcmp(tmp, 1, true, true); 1.3730 +} 1.3731 + 1.3732 + 1.3733 +void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 1.3734 + assert(!pop_right || pop_left, "usage error"); 1.3735 + if (VM_Version::supports_cmov()) { 1.3736 + assert(tmp == noreg, "unneeded temp"); 1.3737 + if (pop_left) { 1.3738 + fucomip(index); 1.3739 + } else { 1.3740 + fucomi(index); 1.3741 + } 1.3742 + if (pop_right) { 1.3743 + fpop(); 1.3744 + } 1.3745 + } else { 1.3746 + assert(tmp != noreg, "need temp"); 1.3747 + if (pop_left) { 1.3748 + if (pop_right) { 1.3749 + fcompp(); 1.3750 + } else { 1.3751 + fcomp(index); 1.3752 + } 1.3753 + } else { 1.3754 + fcom(index); 1.3755 + } 1.3756 + // convert FPU condition into eflags condition via rax, 1.3757 + save_rax(tmp); 1.3758 + fwait(); fnstsw_ax(); 1.3759 + sahf(); 1.3760 + restore_rax(tmp); 1.3761 + } 1.3762 + // condition codes set as follows: 1.3763 + // 1.3764 + // CF (corresponds to C0) if x < y 1.3765 + // PF (corresponds to C2) if unordered 1.3766 + // ZF (corresponds to C3) if x = y 1.3767 +} 1.3768 + 1.3769 + 1.3770 +void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 1.3771 + fcmp2int(dst, unordered_is_less, 1, true, true); 1.3772 +} 1.3773 + 1.3774 + 1.3775 +void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 1.3776 + fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 1.3777 + Label L; 1.3778 + if (unordered_is_less) { 1.3779 + movl(dst, -1); 1.3780 + jcc(Assembler::parity, L); 1.3781 + jcc(Assembler::below , L); 1.3782 + movl(dst, 0); 1.3783 + jcc(Assembler::equal , L); 1.3784 + increment(dst); 1.3785 + } else { // unordered is greater 1.3786 + movl(dst, 1); 1.3787 + jcc(Assembler::parity, L); 1.3788 + jcc(Assembler::above , L); 1.3789 + movl(dst, 0); 1.3790 + jcc(Assembler::equal , L); 1.3791 + decrement(dst); 1.3792 + } 1.3793 + bind(L); 1.3794 +} 1.3795 + 1.3796 +void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1.3797 + ucomiss(opr1, opr2); 1.3798 + 1.3799 + Label L; 1.3800 + if (unordered_is_less) { 1.3801 + movl(dst, -1); 1.3802 + jcc(Assembler::parity, L); 1.3803 + jcc(Assembler::below , L); 1.3804 + movl(dst, 0); 1.3805 + jcc(Assembler::equal , L); 1.3806 + increment(dst); 1.3807 + } else { // unordered is greater 1.3808 + movl(dst, 1); 1.3809 + jcc(Assembler::parity, L); 1.3810 + jcc(Assembler::above , L); 1.3811 + movl(dst, 0); 1.3812 + jcc(Assembler::equal , L); 1.3813 + decrement(dst); 1.3814 + } 1.3815 + bind(L); 1.3816 +} 1.3817 + 1.3818 +void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1.3819 + ucomisd(opr1, opr2); 1.3820 + 1.3821 + Label L; 1.3822 + if (unordered_is_less) { 1.3823 + movl(dst, -1); 1.3824 + jcc(Assembler::parity, L); 1.3825 + jcc(Assembler::below , L); 1.3826 + movl(dst, 0); 1.3827 + jcc(Assembler::equal , L); 1.3828 + increment(dst); 1.3829 + } else { // unordered is greater 1.3830 + movl(dst, 1); 1.3831 + jcc(Assembler::parity, L); 1.3832 + jcc(Assembler::above , L); 1.3833 + movl(dst, 0); 1.3834 + jcc(Assembler::equal , L); 1.3835 + decrement(dst); 1.3836 + } 1.3837 + bind(L); 1.3838 +} 1.3839 + 1.3840 + 1.3841 + 1.3842 +void MacroAssembler::fpop() { 1.3843 + ffree(); 1.3844 + fincstp(); 1.3845 +} 1.3846 + 1.3847 + 1.3848 +void MacroAssembler::sign_extend_short(Register reg) { 1.3849 + if (VM_Version::is_P6()) { 1.3850 + movsxw(reg, reg); 1.3851 + } else { 1.3852 + shll(reg, 16); 1.3853 + sarl(reg, 16); 1.3854 + } 1.3855 +} 1.3856 + 1.3857 + 1.3858 +void MacroAssembler::sign_extend_byte(Register reg) { 1.3859 + if (VM_Version::is_P6() && reg->has_byte_register()) { 1.3860 + movsxb(reg, reg); 1.3861 + } else { 1.3862 + shll(reg, 24); 1.3863 + sarl(reg, 24); 1.3864 + } 1.3865 +} 1.3866 + 1.3867 + 1.3868 +void MacroAssembler::division_with_shift (Register reg, int shift_value) { 1.3869 + assert (shift_value > 0, "illegal shift value"); 1.3870 + Label _is_positive; 1.3871 + testl (reg, reg); 1.3872 + jcc (Assembler::positive, _is_positive); 1.3873 + int offset = (1 << shift_value) - 1 ; 1.3874 + 1.3875 + increment(reg, offset); 1.3876 + 1.3877 + bind (_is_positive); 1.3878 + sarl(reg, shift_value); 1.3879 +} 1.3880 + 1.3881 + 1.3882 +void MacroAssembler::round_to(Register reg, int modulus) { 1.3883 + addl(reg, modulus - 1); 1.3884 + andl(reg, -modulus); 1.3885 +} 1.3886 + 1.3887 +// C++ bool manipulation 1.3888 + 1.3889 +void MacroAssembler::movbool(Register dst, Address src) { 1.3890 + if(sizeof(bool) == 1) 1.3891 + movb(dst, src); 1.3892 + else if(sizeof(bool) == 2) 1.3893 + movw(dst, src); 1.3894 + else if(sizeof(bool) == 4) 1.3895 + movl(dst, src); 1.3896 + else 1.3897 + // unsupported 1.3898 + ShouldNotReachHere(); 1.3899 +} 1.3900 + 1.3901 +void MacroAssembler::movbool(Address dst, bool boolconst) { 1.3902 + if(sizeof(bool) == 1) 1.3903 + movb(dst, (int) boolconst); 1.3904 + else if(sizeof(bool) == 2) 1.3905 + movw(dst, (int) boolconst); 1.3906 + else if(sizeof(bool) == 4) 1.3907 + movl(dst, (int) boolconst); 1.3908 + else 1.3909 + // unsupported 1.3910 + ShouldNotReachHere(); 1.3911 +} 1.3912 + 1.3913 +void MacroAssembler::movbool(Address dst, Register src) { 1.3914 + if(sizeof(bool) == 1) 1.3915 + movb(dst, src); 1.3916 + else if(sizeof(bool) == 2) 1.3917 + movw(dst, src); 1.3918 + else if(sizeof(bool) == 4) 1.3919 + movl(dst, src); 1.3920 + else 1.3921 + // unsupported 1.3922 + ShouldNotReachHere(); 1.3923 +} 1.3924 + 1.3925 +void MacroAssembler::testbool(Register dst) { 1.3926 + if(sizeof(bool) == 1) 1.3927 + testb(dst, (int) 0xff); 1.3928 + else if(sizeof(bool) == 2) { 1.3929 + // testw implementation needed for two byte bools 1.3930 + ShouldNotReachHere(); 1.3931 + } else if(sizeof(bool) == 4) 1.3932 + testl(dst, dst); 1.3933 + else 1.3934 + // unsupported 1.3935 + ShouldNotReachHere(); 1.3936 +} 1.3937 + 1.3938 +void MacroAssembler::verify_oop(Register reg, const char* s) { 1.3939 + if (!VerifyOops) return; 1.3940 + // Pass register number to verify_oop_subroutine 1.3941 + char* b = new char[strlen(s) + 50]; 1.3942 + sprintf(b, "verify_oop: %s: %s", reg->name(), s); 1.3943 + pushl(rax); // save rax, 1.3944 + pushl(reg); // pass register argument 1.3945 + ExternalAddress buffer((address) b); 1.3946 + pushptr(buffer.addr()); 1.3947 + // call indirectly to solve generation ordering problem 1.3948 + movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 1.3949 + call(rax); 1.3950 +} 1.3951 + 1.3952 + 1.3953 +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 1.3954 + if (!VerifyOops) return; 1.3955 + // QQQ fix this 1.3956 + // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 1.3957 + // Pass register number to verify_oop_subroutine 1.3958 + char* b = new char[strlen(s) + 50]; 1.3959 + sprintf(b, "verify_oop_addr: %s", s); 1.3960 + pushl(rax); // save rax, 1.3961 + // addr may contain rsp so we will have to adjust it based on the push 1.3962 + // we just did 1.3963 + if (addr.uses(rsp)) { 1.3964 + leal(rax, addr); 1.3965 + pushl(Address(rax, BytesPerWord)); 1.3966 + } else { 1.3967 + pushl(addr); 1.3968 + } 1.3969 + ExternalAddress buffer((address) b); 1.3970 + // pass msg argument 1.3971 + pushptr(buffer.addr()); 1.3972 + // call indirectly to solve generation ordering problem 1.3973 + movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 1.3974 + call(rax); 1.3975 + // Caller pops the arguments and restores rax, from the stack 1.3976 +} 1.3977 + 1.3978 + 1.3979 +void MacroAssembler::stop(const char* msg) { 1.3980 + ExternalAddress message((address)msg); 1.3981 + // push address of message 1.3982 + pushptr(message.addr()); 1.3983 + { Label L; call(L, relocInfo::none); bind(L); } // push eip 1.3984 + pushad(); // push registers 1.3985 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug))); 1.3986 + hlt(); 1.3987 +} 1.3988 + 1.3989 + 1.3990 +void MacroAssembler::warn(const char* msg) { 1.3991 + push_CPU_state(); 1.3992 + 1.3993 + ExternalAddress message((address) msg); 1.3994 + // push address of message 1.3995 + pushptr(message.addr()); 1.3996 + 1.3997 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 1.3998 + addl(rsp, wordSize); // discard argument 1.3999 + pop_CPU_state(); 1.4000 +} 1.4001 + 1.4002 + 1.4003 +void MacroAssembler::debug(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 1.4004 + // In order to get locks to work, we need to fake a in_VM state 1.4005 + JavaThread* thread = JavaThread::current(); 1.4006 + JavaThreadState saved_state = thread->thread_state(); 1.4007 + thread->set_thread_state(_thread_in_vm); 1.4008 + if (ShowMessageBoxOnError) { 1.4009 + JavaThread* thread = JavaThread::current(); 1.4010 + JavaThreadState saved_state = thread->thread_state(); 1.4011 + thread->set_thread_state(_thread_in_vm); 1.4012 + ttyLocker ttyl; 1.4013 + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1.4014 + BytecodeCounter::print(); 1.4015 + } 1.4016 + // To see where a verify_oop failed, get $ebx+40/X for this frame. 1.4017 + // This is the value of eip which points to where verify_oop will return. 1.4018 + if (os::message_box(msg, "Execution stopped, print registers?")) { 1.4019 + tty->print_cr("eip = 0x%08x", eip); 1.4020 + tty->print_cr("rax, = 0x%08x", rax); 1.4021 + tty->print_cr("rbx, = 0x%08x", rbx); 1.4022 + tty->print_cr("rcx = 0x%08x", rcx); 1.4023 + tty->print_cr("rdx = 0x%08x", rdx); 1.4024 + tty->print_cr("rdi = 0x%08x", rdi); 1.4025 + tty->print_cr("rsi = 0x%08x", rsi); 1.4026 + tty->print_cr("rbp, = 0x%08x", rbp); 1.4027 + tty->print_cr("rsp = 0x%08x", rsp); 1.4028 + BREAKPOINT; 1.4029 + } 1.4030 + } else { 1.4031 + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1.4032 + assert(false, "DEBUG MESSAGE"); 1.4033 + } 1.4034 + ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 1.4035 +} 1.4036 + 1.4037 + 1.4038 + 1.4039 +void MacroAssembler::os_breakpoint() { 1.4040 + // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 1.4041 + // (e.g., MSVC can't call ps() otherwise) 1.4042 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 1.4043 +} 1.4044 + 1.4045 + 1.4046 +void MacroAssembler::push_fTOS() { 1.4047 + subl(rsp, 2 * wordSize); 1.4048 + fstp_d(Address(rsp, 0)); 1.4049 +} 1.4050 + 1.4051 + 1.4052 +void MacroAssembler::pop_fTOS() { 1.4053 + fld_d(Address(rsp, 0)); 1.4054 + addl(rsp, 2 * wordSize); 1.4055 +} 1.4056 + 1.4057 + 1.4058 +void MacroAssembler::empty_FPU_stack() { 1.4059 + if (VM_Version::supports_mmx()) { 1.4060 + emms(); 1.4061 + } else { 1.4062 + for (int i = 8; i-- > 0; ) ffree(i); 1.4063 + } 1.4064 +} 1.4065 + 1.4066 + 1.4067 +class ControlWord { 1.4068 + public: 1.4069 + int32_t _value; 1.4070 + 1.4071 + int rounding_control() const { return (_value >> 10) & 3 ; } 1.4072 + int precision_control() const { return (_value >> 8) & 3 ; } 1.4073 + bool precision() const { return ((_value >> 5) & 1) != 0; } 1.4074 + bool underflow() const { return ((_value >> 4) & 1) != 0; } 1.4075 + bool overflow() const { return ((_value >> 3) & 1) != 0; } 1.4076 + bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 1.4077 + bool denormalized() const { return ((_value >> 1) & 1) != 0; } 1.4078 + bool invalid() const { return ((_value >> 0) & 1) != 0; } 1.4079 + 1.4080 + void print() const { 1.4081 + // rounding control 1.4082 + const char* rc; 1.4083 + switch (rounding_control()) { 1.4084 + case 0: rc = "round near"; break; 1.4085 + case 1: rc = "round down"; break; 1.4086 + case 2: rc = "round up "; break; 1.4087 + case 3: rc = "chop "; break; 1.4088 + }; 1.4089 + // precision control 1.4090 + const char* pc; 1.4091 + switch (precision_control()) { 1.4092 + case 0: pc = "24 bits "; break; 1.4093 + case 1: pc = "reserved"; break; 1.4094 + case 2: pc = "53 bits "; break; 1.4095 + case 3: pc = "64 bits "; break; 1.4096 + }; 1.4097 + // flags 1.4098 + char f[9]; 1.4099 + f[0] = ' '; 1.4100 + f[1] = ' '; 1.4101 + f[2] = (precision ()) ? 'P' : 'p'; 1.4102 + f[3] = (underflow ()) ? 'U' : 'u'; 1.4103 + f[4] = (overflow ()) ? 'O' : 'o'; 1.4104 + f[5] = (zero_divide ()) ? 'Z' : 'z'; 1.4105 + f[6] = (denormalized()) ? 'D' : 'd'; 1.4106 + f[7] = (invalid ()) ? 'I' : 'i'; 1.4107 + f[8] = '\x0'; 1.4108 + // output 1.4109 + printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 1.4110 + } 1.4111 + 1.4112 +}; 1.4113 + 1.4114 + 1.4115 +class StatusWord { 1.4116 + public: 1.4117 + int32_t _value; 1.4118 + 1.4119 + bool busy() const { return ((_value >> 15) & 1) != 0; } 1.4120 + bool C3() const { return ((_value >> 14) & 1) != 0; } 1.4121 + bool C2() const { return ((_value >> 10) & 1) != 0; } 1.4122 + bool C1() const { return ((_value >> 9) & 1) != 0; } 1.4123 + bool C0() const { return ((_value >> 8) & 1) != 0; } 1.4124 + int top() const { return (_value >> 11) & 7 ; } 1.4125 + bool error_status() const { return ((_value >> 7) & 1) != 0; } 1.4126 + bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 1.4127 + bool precision() const { return ((_value >> 5) & 1) != 0; } 1.4128 + bool underflow() const { return ((_value >> 4) & 1) != 0; } 1.4129 + bool overflow() const { return ((_value >> 3) & 1) != 0; } 1.4130 + bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 1.4131 + bool denormalized() const { return ((_value >> 1) & 1) != 0; } 1.4132 + bool invalid() const { return ((_value >> 0) & 1) != 0; } 1.4133 + 1.4134 + void print() const { 1.4135 + // condition codes 1.4136 + char c[5]; 1.4137 + c[0] = (C3()) ? '3' : '-'; 1.4138 + c[1] = (C2()) ? '2' : '-'; 1.4139 + c[2] = (C1()) ? '1' : '-'; 1.4140 + c[3] = (C0()) ? '0' : '-'; 1.4141 + c[4] = '\x0'; 1.4142 + // flags 1.4143 + char f[9]; 1.4144 + f[0] = (error_status()) ? 'E' : '-'; 1.4145 + f[1] = (stack_fault ()) ? 'S' : '-'; 1.4146 + f[2] = (precision ()) ? 'P' : '-'; 1.4147 + f[3] = (underflow ()) ? 'U' : '-'; 1.4148 + f[4] = (overflow ()) ? 'O' : '-'; 1.4149 + f[5] = (zero_divide ()) ? 'Z' : '-'; 1.4150 + f[6] = (denormalized()) ? 'D' : '-'; 1.4151 + f[7] = (invalid ()) ? 'I' : '-'; 1.4152 + f[8] = '\x0'; 1.4153 + // output 1.4154 + printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 1.4155 + } 1.4156 + 1.4157 +}; 1.4158 + 1.4159 + 1.4160 +class TagWord { 1.4161 + public: 1.4162 + int32_t _value; 1.4163 + 1.4164 + int tag_at(int i) const { return (_value >> (i*2)) & 3; } 1.4165 + 1.4166 + void print() const { 1.4167 + printf("%04x", _value & 0xFFFF); 1.4168 + } 1.4169 + 1.4170 +}; 1.4171 + 1.4172 + 1.4173 +class FPU_Register { 1.4174 + public: 1.4175 + int32_t _m0; 1.4176 + int32_t _m1; 1.4177 + int16_t _ex; 1.4178 + 1.4179 + bool is_indefinite() const { 1.4180 + return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 1.4181 + } 1.4182 + 1.4183 + void print() const { 1.4184 + char sign = (_ex < 0) ? '-' : '+'; 1.4185 + const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 1.4186 + printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 1.4187 + }; 1.4188 + 1.4189 +}; 1.4190 + 1.4191 + 1.4192 +class FPU_State { 1.4193 + public: 1.4194 + enum { 1.4195 + register_size = 10, 1.4196 + number_of_registers = 8, 1.4197 + register_mask = 7 1.4198 + }; 1.4199 + 1.4200 + ControlWord _control_word; 1.4201 + StatusWord _status_word; 1.4202 + TagWord _tag_word; 1.4203 + int32_t _error_offset; 1.4204 + int32_t _error_selector; 1.4205 + int32_t _data_offset; 1.4206 + int32_t _data_selector; 1.4207 + int8_t _register[register_size * number_of_registers]; 1.4208 + 1.4209 + int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 1.4210 + FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 1.4211 + 1.4212 + const char* tag_as_string(int tag) const { 1.4213 + switch (tag) { 1.4214 + case 0: return "valid"; 1.4215 + case 1: return "zero"; 1.4216 + case 2: return "special"; 1.4217 + case 3: return "empty"; 1.4218 + } 1.4219 + ShouldNotReachHere() 1.4220 + return NULL; 1.4221 + } 1.4222 + 1.4223 + void print() const { 1.4224 + // print computation registers 1.4225 + { int t = _status_word.top(); 1.4226 + for (int i = 0; i < number_of_registers; i++) { 1.4227 + int j = (i - t) & register_mask; 1.4228 + printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 1.4229 + st(j)->print(); 1.4230 + printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 1.4231 + } 1.4232 + } 1.4233 + printf("\n"); 1.4234 + // print control registers 1.4235 + printf("ctrl = "); _control_word.print(); printf("\n"); 1.4236 + printf("stat = "); _status_word .print(); printf("\n"); 1.4237 + printf("tags = "); _tag_word .print(); printf("\n"); 1.4238 + } 1.4239 + 1.4240 +}; 1.4241 + 1.4242 + 1.4243 +class Flag_Register { 1.4244 + public: 1.4245 + int32_t _value; 1.4246 + 1.4247 + bool overflow() const { return ((_value >> 11) & 1) != 0; } 1.4248 + bool direction() const { return ((_value >> 10) & 1) != 0; } 1.4249 + bool sign() const { return ((_value >> 7) & 1) != 0; } 1.4250 + bool zero() const { return ((_value >> 6) & 1) != 0; } 1.4251 + bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 1.4252 + bool parity() const { return ((_value >> 2) & 1) != 0; } 1.4253 + bool carry() const { return ((_value >> 0) & 1) != 0; } 1.4254 + 1.4255 + void print() const { 1.4256 + // flags 1.4257 + char f[8]; 1.4258 + f[0] = (overflow ()) ? 'O' : '-'; 1.4259 + f[1] = (direction ()) ? 'D' : '-'; 1.4260 + f[2] = (sign ()) ? 'S' : '-'; 1.4261 + f[3] = (zero ()) ? 'Z' : '-'; 1.4262 + f[4] = (auxiliary_carry()) ? 'A' : '-'; 1.4263 + f[5] = (parity ()) ? 'P' : '-'; 1.4264 + f[6] = (carry ()) ? 'C' : '-'; 1.4265 + f[7] = '\x0'; 1.4266 + // output 1.4267 + printf("%08x flags = %s", _value, f); 1.4268 + } 1.4269 + 1.4270 +}; 1.4271 + 1.4272 + 1.4273 +class IU_Register { 1.4274 + public: 1.4275 + int32_t _value; 1.4276 + 1.4277 + void print() const { 1.4278 + printf("%08x %11d", _value, _value); 1.4279 + } 1.4280 + 1.4281 +}; 1.4282 + 1.4283 + 1.4284 +class IU_State { 1.4285 + public: 1.4286 + Flag_Register _eflags; 1.4287 + IU_Register _rdi; 1.4288 + IU_Register _rsi; 1.4289 + IU_Register _rbp; 1.4290 + IU_Register _rsp; 1.4291 + IU_Register _rbx; 1.4292 + IU_Register _rdx; 1.4293 + IU_Register _rcx; 1.4294 + IU_Register _rax; 1.4295 + 1.4296 + void print() const { 1.4297 + // computation registers 1.4298 + printf("rax, = "); _rax.print(); printf("\n"); 1.4299 + printf("rbx, = "); _rbx.print(); printf("\n"); 1.4300 + printf("rcx = "); _rcx.print(); printf("\n"); 1.4301 + printf("rdx = "); _rdx.print(); printf("\n"); 1.4302 + printf("rdi = "); _rdi.print(); printf("\n"); 1.4303 + printf("rsi = "); _rsi.print(); printf("\n"); 1.4304 + printf("rbp, = "); _rbp.print(); printf("\n"); 1.4305 + printf("rsp = "); _rsp.print(); printf("\n"); 1.4306 + printf("\n"); 1.4307 + // control registers 1.4308 + printf("flgs = "); _eflags.print(); printf("\n"); 1.4309 + } 1.4310 +}; 1.4311 + 1.4312 + 1.4313 +class CPU_State { 1.4314 + public: 1.4315 + FPU_State _fpu_state; 1.4316 + IU_State _iu_state; 1.4317 + 1.4318 + void print() const { 1.4319 + printf("--------------------------------------------------\n"); 1.4320 + _iu_state .print(); 1.4321 + printf("\n"); 1.4322 + _fpu_state.print(); 1.4323 + printf("--------------------------------------------------\n"); 1.4324 + } 1.4325 + 1.4326 +}; 1.4327 + 1.4328 + 1.4329 +static void _print_CPU_state(CPU_State* state) { 1.4330 + state->print(); 1.4331 +}; 1.4332 + 1.4333 + 1.4334 +void MacroAssembler::print_CPU_state() { 1.4335 + push_CPU_state(); 1.4336 + pushl(rsp); // pass CPU state 1.4337 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 1.4338 + addl(rsp, wordSize); // discard argument 1.4339 + pop_CPU_state(); 1.4340 +} 1.4341 + 1.4342 + 1.4343 +static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 1.4344 + static int counter = 0; 1.4345 + FPU_State* fs = &state->_fpu_state; 1.4346 + counter++; 1.4347 + // For leaf calls, only verify that the top few elements remain empty. 1.4348 + // We only need 1 empty at the top for C2 code. 1.4349 + if( stack_depth < 0 ) { 1.4350 + if( fs->tag_for_st(7) != 3 ) { 1.4351 + printf("FPR7 not empty\n"); 1.4352 + state->print(); 1.4353 + assert(false, "error"); 1.4354 + return false; 1.4355 + } 1.4356 + return true; // All other stack states do not matter 1.4357 + } 1.4358 + 1.4359 + assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 1.4360 + "bad FPU control word"); 1.4361 + 1.4362 + // compute stack depth 1.4363 + int i = 0; 1.4364 + while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 1.4365 + int d = i; 1.4366 + while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 1.4367 + // verify findings 1.4368 + if (i != FPU_State::number_of_registers) { 1.4369 + // stack not contiguous 1.4370 + printf("%s: stack not contiguous at ST%d\n", s, i); 1.4371 + state->print(); 1.4372 + assert(false, "error"); 1.4373 + return false; 1.4374 + } 1.4375 + // check if computed stack depth corresponds to expected stack depth 1.4376 + if (stack_depth < 0) { 1.4377 + // expected stack depth is -stack_depth or less 1.4378 + if (d > -stack_depth) { 1.4379 + // too many elements on the stack 1.4380 + printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 1.4381 + state->print(); 1.4382 + assert(false, "error"); 1.4383 + return false; 1.4384 + } 1.4385 + } else { 1.4386 + // expected stack depth is stack_depth 1.4387 + if (d != stack_depth) { 1.4388 + // wrong stack depth 1.4389 + printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 1.4390 + state->print(); 1.4391 + assert(false, "error"); 1.4392 + return false; 1.4393 + } 1.4394 + } 1.4395 + // everything is cool 1.4396 + return true; 1.4397 +} 1.4398 + 1.4399 + 1.4400 +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 1.4401 + if (!VerifyFPU) return; 1.4402 + push_CPU_state(); 1.4403 + pushl(rsp); // pass CPU state 1.4404 + ExternalAddress msg((address) s); 1.4405 + // pass message string s 1.4406 + pushptr(msg.addr()); 1.4407 + pushl(stack_depth); // pass stack depth 1.4408 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 1.4409 + addl(rsp, 3 * wordSize); // discard arguments 1.4410 + // check for error 1.4411 + { Label L; 1.4412 + testl(rax, rax); 1.4413 + jcc(Assembler::notZero, L); 1.4414 + int3(); // break if error condition 1.4415 + bind(L); 1.4416 + } 1.4417 + pop_CPU_state(); 1.4418 +} 1.4419 + 1.4420 + 1.4421 +void MacroAssembler::push_IU_state() { 1.4422 + pushad(); 1.4423 + pushfd(); 1.4424 +} 1.4425 + 1.4426 + 1.4427 +void MacroAssembler::pop_IU_state() { 1.4428 + popfd(); 1.4429 + popad(); 1.4430 +} 1.4431 + 1.4432 + 1.4433 +void MacroAssembler::push_FPU_state() { 1.4434 + subl(rsp, FPUStateSizeInWords * wordSize); 1.4435 + fnsave(Address(rsp, 0)); 1.4436 + fwait(); 1.4437 +} 1.4438 + 1.4439 + 1.4440 +void MacroAssembler::pop_FPU_state() { 1.4441 + frstor(Address(rsp, 0)); 1.4442 + addl(rsp, FPUStateSizeInWords * wordSize); 1.4443 +} 1.4444 + 1.4445 + 1.4446 +void MacroAssembler::push_CPU_state() { 1.4447 + push_IU_state(); 1.4448 + push_FPU_state(); 1.4449 +} 1.4450 + 1.4451 + 1.4452 +void MacroAssembler::pop_CPU_state() { 1.4453 + pop_FPU_state(); 1.4454 + pop_IU_state(); 1.4455 +} 1.4456 + 1.4457 + 1.4458 +void MacroAssembler::push_callee_saved_registers() { 1.4459 + pushl(rsi); 1.4460 + pushl(rdi); 1.4461 + pushl(rdx); 1.4462 + pushl(rcx); 1.4463 +} 1.4464 + 1.4465 + 1.4466 +void MacroAssembler::pop_callee_saved_registers() { 1.4467 + popl(rcx); 1.4468 + popl(rdx); 1.4469 + popl(rdi); 1.4470 + popl(rsi); 1.4471 +} 1.4472 + 1.4473 + 1.4474 +void MacroAssembler::set_word_if_not_zero(Register dst) { 1.4475 + xorl(dst, dst); 1.4476 + set_byte_if_not_zero(dst); 1.4477 +} 1.4478 + 1.4479 +// Write serialization page so VM thread can do a pseudo remote membar. 1.4480 +// We use the current thread pointer to calculate a thread specific 1.4481 +// offset to write to within the page. This minimizes bus traffic 1.4482 +// due to cache line collision. 1.4483 +void MacroAssembler::serialize_memory(Register thread, Register tmp) { 1.4484 + movl(tmp, thread); 1.4485 + shrl(tmp, os::get_serialize_page_shift_count()); 1.4486 + andl(tmp, (os::vm_page_size() - sizeof(int))); 1.4487 + 1.4488 + Address index(noreg, tmp, Address::times_1); 1.4489 + ExternalAddress page(os::get_memory_serialize_page()); 1.4490 + 1.4491 + movptr(ArrayAddress(page, index), tmp); 1.4492 +} 1.4493 + 1.4494 + 1.4495 +void MacroAssembler::verify_tlab() { 1.4496 +#ifdef ASSERT 1.4497 + if (UseTLAB && VerifyOops) { 1.4498 + Label next, ok; 1.4499 + Register t1 = rsi; 1.4500 + Register thread_reg = rbx; 1.4501 + 1.4502 + pushl(t1); 1.4503 + pushl(thread_reg); 1.4504 + get_thread(thread_reg); 1.4505 + 1.4506 + movl(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 1.4507 + cmpl(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 1.4508 + jcc(Assembler::aboveEqual, next); 1.4509 + stop("assert(top >= start)"); 1.4510 + should_not_reach_here(); 1.4511 + 1.4512 + bind(next); 1.4513 + movl(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 1.4514 + cmpl(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 1.4515 + jcc(Assembler::aboveEqual, ok); 1.4516 + stop("assert(top <= end)"); 1.4517 + should_not_reach_here(); 1.4518 + 1.4519 + bind(ok); 1.4520 + popl(thread_reg); 1.4521 + popl(t1); 1.4522 + } 1.4523 +#endif 1.4524 +} 1.4525 + 1.4526 + 1.4527 +// Defines obj, preserves var_size_in_bytes 1.4528 +void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, 1.4529 + Register t1, Label& slow_case) { 1.4530 + assert(obj == rax, "obj must be in rax, for cmpxchg"); 1.4531 + assert_different_registers(obj, var_size_in_bytes, t1); 1.4532 + Register end = t1; 1.4533 + Label retry; 1.4534 + bind(retry); 1.4535 + ExternalAddress heap_top((address) Universe::heap()->top_addr()); 1.4536 + movptr(obj, heap_top); 1.4537 + if (var_size_in_bytes == noreg) { 1.4538 + leal(end, Address(obj, con_size_in_bytes)); 1.4539 + } else { 1.4540 + leal(end, Address(obj, var_size_in_bytes, Address::times_1)); 1.4541 + } 1.4542 + // if end < obj then we wrapped around => object too long => slow case 1.4543 + cmpl(end, obj); 1.4544 + jcc(Assembler::below, slow_case); 1.4545 + cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 1.4546 + jcc(Assembler::above, slow_case); 1.4547 + // Compare obj with the top addr, and if still equal, store the new top addr in 1.4548 + // end at the address of the top addr pointer. Sets ZF if was equal, and clears 1.4549 + // it otherwise. Use lock prefix for atomicity on MPs. 1.4550 + if (os::is_MP()) { 1.4551 + lock(); 1.4552 + } 1.4553 + cmpxchgptr(end, heap_top); 1.4554 + jcc(Assembler::notEqual, retry); 1.4555 +} 1.4556 + 1.4557 + 1.4558 +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 1.4559 +void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, 1.4560 + Register t1, Register t2, Label& slow_case) { 1.4561 + assert_different_registers(obj, t1, t2); 1.4562 + assert_different_registers(obj, var_size_in_bytes, t1); 1.4563 + Register end = t2; 1.4564 + Register thread = t1; 1.4565 + 1.4566 + verify_tlab(); 1.4567 + 1.4568 + get_thread(thread); 1.4569 + 1.4570 + movl(obj, Address(thread, JavaThread::tlab_top_offset())); 1.4571 + if (var_size_in_bytes == noreg) { 1.4572 + leal(end, Address(obj, con_size_in_bytes)); 1.4573 + } else { 1.4574 + leal(end, Address(obj, var_size_in_bytes, Address::times_1)); 1.4575 + } 1.4576 + cmpl(end, Address(thread, JavaThread::tlab_end_offset())); 1.4577 + jcc(Assembler::above, slow_case); 1.4578 + 1.4579 + // update the tlab top pointer 1.4580 + movl(Address(thread, JavaThread::tlab_top_offset()), end); 1.4581 + 1.4582 + // recover var_size_in_bytes if necessary 1.4583 + if (var_size_in_bytes == end) { 1.4584 + subl(var_size_in_bytes, obj); 1.4585 + } 1.4586 + verify_tlab(); 1.4587 +} 1.4588 + 1.4589 +// Preserves rbx, and rdx. 1.4590 +void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { 1.4591 + Register top = rax; 1.4592 + Register t1 = rcx; 1.4593 + Register t2 = rsi; 1.4594 + Register thread_reg = rdi; 1.4595 + assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 1.4596 + Label do_refill, discard_tlab; 1.4597 + 1.4598 + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 1.4599 + // No allocation in the shared eden. 1.4600 + jmp(slow_case); 1.4601 + } 1.4602 + 1.4603 + get_thread(thread_reg); 1.4604 + 1.4605 + movl(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 1.4606 + movl(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 1.4607 + 1.4608 + // calculate amount of free space 1.4609 + subl(t1, top); 1.4610 + shrl(t1, LogHeapWordSize); 1.4611 + 1.4612 + // Retain tlab and allocate object in shared space if 1.4613 + // the amount free in the tlab is too large to discard. 1.4614 + cmpl(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 1.4615 + jcc(Assembler::lessEqual, discard_tlab); 1.4616 + 1.4617 + // Retain 1.4618 + movl(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment()); 1.4619 + addl(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 1.4620 + if (TLABStats) { 1.4621 + // increment number of slow_allocations 1.4622 + addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 1.4623 + } 1.4624 + jmp(try_eden); 1.4625 + 1.4626 + bind(discard_tlab); 1.4627 + if (TLABStats) { 1.4628 + // increment number of refills 1.4629 + addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 1.4630 + // accumulate wastage -- t1 is amount free in tlab 1.4631 + addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 1.4632 + } 1.4633 + 1.4634 + // if tlab is currently allocated (top or end != null) then 1.4635 + // fill [top, end + alignment_reserve) with array object 1.4636 + testl (top, top); 1.4637 + jcc(Assembler::zero, do_refill); 1.4638 + 1.4639 + // set up the mark word 1.4640 + movl(Address(top, oopDesc::mark_offset_in_bytes()), (int)markOopDesc::prototype()->copy_set_hash(0x2)); 1.4641 + // set the length to the remaining space 1.4642 + subl(t1, typeArrayOopDesc::header_size(T_INT)); 1.4643 + addl(t1, ThreadLocalAllocBuffer::alignment_reserve()); 1.4644 + shll(t1, log2_intptr(HeapWordSize/sizeof(jint))); 1.4645 + movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 1.4646 + // set klass to intArrayKlass 1.4647 + // dubious reloc why not an oop reloc? 1.4648 + movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr())); 1.4649 + movl(Address(top, oopDesc::klass_offset_in_bytes()), t1); 1.4650 + 1.4651 + // refill the tlab with an eden allocation 1.4652 + bind(do_refill); 1.4653 + movl(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 1.4654 + shll(t1, LogHeapWordSize); 1.4655 + // add object_size ?? 1.4656 + eden_allocate(top, t1, 0, t2, slow_case); 1.4657 + 1.4658 + // Check that t1 was preserved in eden_allocate. 1.4659 +#ifdef ASSERT 1.4660 + if (UseTLAB) { 1.4661 + Label ok; 1.4662 + Register tsize = rsi; 1.4663 + assert_different_registers(tsize, thread_reg, t1); 1.4664 + pushl(tsize); 1.4665 + movl(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 1.4666 + shll(tsize, LogHeapWordSize); 1.4667 + cmpl(t1, tsize); 1.4668 + jcc(Assembler::equal, ok); 1.4669 + stop("assert(t1 != tlab size)"); 1.4670 + should_not_reach_here(); 1.4671 + 1.4672 + bind(ok); 1.4673 + popl(tsize); 1.4674 + } 1.4675 +#endif 1.4676 + movl(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 1.4677 + movl(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 1.4678 + addl(top, t1); 1.4679 + subl(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 1.4680 + movl(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 1.4681 + verify_tlab(); 1.4682 + jmp(retry); 1.4683 +} 1.4684 + 1.4685 + 1.4686 +int MacroAssembler::biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg, 1.4687 + bool swap_reg_contains_mark, 1.4688 + Label& done, Label* slow_case, 1.4689 + BiasedLockingCounters* counters) { 1.4690 + assert(UseBiasedLocking, "why call this otherwise?"); 1.4691 + assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 1.4692 + assert_different_registers(lock_reg, obj_reg, swap_reg); 1.4693 + 1.4694 + if (PrintBiasedLockingStatistics && counters == NULL) 1.4695 + counters = BiasedLocking::counters(); 1.4696 + 1.4697 + bool need_tmp_reg = false; 1.4698 + if (tmp_reg == noreg) { 1.4699 + need_tmp_reg = true; 1.4700 + tmp_reg = lock_reg; 1.4701 + } else { 1.4702 + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 1.4703 + } 1.4704 + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1.4705 + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 1.4706 + Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 1.4707 + Address saved_mark_addr(lock_reg, 0); 1.4708 + 1.4709 + // Biased locking 1.4710 + // See whether the lock is currently biased toward our thread and 1.4711 + // whether the epoch is still valid 1.4712 + // Note that the runtime guarantees sufficient alignment of JavaThread 1.4713 + // pointers to allow age to be placed into low bits 1.4714 + // First check to see whether biasing is even enabled for this object 1.4715 + Label cas_label; 1.4716 + int null_check_offset = -1; 1.4717 + if (!swap_reg_contains_mark) { 1.4718 + null_check_offset = offset(); 1.4719 + movl(swap_reg, mark_addr); 1.4720 + } 1.4721 + if (need_tmp_reg) { 1.4722 + pushl(tmp_reg); 1.4723 + } 1.4724 + movl(tmp_reg, swap_reg); 1.4725 + andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 1.4726 + cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 1.4727 + if (need_tmp_reg) { 1.4728 + popl(tmp_reg); 1.4729 + } 1.4730 + jcc(Assembler::notEqual, cas_label); 1.4731 + // The bias pattern is present in the object's header. Need to check 1.4732 + // whether the bias owner and the epoch are both still current. 1.4733 + // Note that because there is no current thread register on x86 we 1.4734 + // need to store off the mark word we read out of the object to 1.4735 + // avoid reloading it and needing to recheck invariants below. This 1.4736 + // store is unfortunate but it makes the overall code shorter and 1.4737 + // simpler. 1.4738 + movl(saved_mark_addr, swap_reg); 1.4739 + if (need_tmp_reg) { 1.4740 + pushl(tmp_reg); 1.4741 + } 1.4742 + get_thread(tmp_reg); 1.4743 + xorl(swap_reg, tmp_reg); 1.4744 + if (swap_reg_contains_mark) { 1.4745 + null_check_offset = offset(); 1.4746 + } 1.4747 + movl(tmp_reg, klass_addr); 1.4748 + xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 1.4749 + andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 1.4750 + if (need_tmp_reg) { 1.4751 + popl(tmp_reg); 1.4752 + } 1.4753 + if (counters != NULL) { 1.4754 + cond_inc32(Assembler::zero, 1.4755 + ExternalAddress((address)counters->biased_lock_entry_count_addr())); 1.4756 + } 1.4757 + jcc(Assembler::equal, done); 1.4758 + 1.4759 + Label try_revoke_bias; 1.4760 + Label try_rebias; 1.4761 + 1.4762 + // At this point we know that the header has the bias pattern and 1.4763 + // that we are not the bias owner in the current epoch. We need to 1.4764 + // figure out more details about the state of the header in order to 1.4765 + // know what operations can be legally performed on the object's 1.4766 + // header. 1.4767 + 1.4768 + // If the low three bits in the xor result aren't clear, that means 1.4769 + // the prototype header is no longer biased and we have to revoke 1.4770 + // the bias on this object. 1.4771 + testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 1.4772 + jcc(Assembler::notZero, try_revoke_bias); 1.4773 + 1.4774 + // Biasing is still enabled for this data type. See whether the 1.4775 + // epoch of the current bias is still valid, meaning that the epoch 1.4776 + // bits of the mark word are equal to the epoch bits of the 1.4777 + // prototype header. (Note that the prototype header's epoch bits 1.4778 + // only change at a safepoint.) If not, attempt to rebias the object 1.4779 + // toward the current thread. Note that we must be absolutely sure 1.4780 + // that the current epoch is invalid in order to do this because 1.4781 + // otherwise the manipulations it performs on the mark word are 1.4782 + // illegal. 1.4783 + testl(swap_reg, markOopDesc::epoch_mask_in_place); 1.4784 + jcc(Assembler::notZero, try_rebias); 1.4785 + 1.4786 + // The epoch of the current bias is still valid but we know nothing 1.4787 + // about the owner; it might be set or it might be clear. Try to 1.4788 + // acquire the bias of the object using an atomic operation. If this 1.4789 + // fails we will go in to the runtime to revoke the object's bias. 1.4790 + // Note that we first construct the presumed unbiased header so we 1.4791 + // don't accidentally blow away another thread's valid bias. 1.4792 + movl(swap_reg, saved_mark_addr); 1.4793 + andl(swap_reg, 1.4794 + markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 1.4795 + if (need_tmp_reg) { 1.4796 + pushl(tmp_reg); 1.4797 + } 1.4798 + get_thread(tmp_reg); 1.4799 + orl(tmp_reg, swap_reg); 1.4800 + if (os::is_MP()) { 1.4801 + lock(); 1.4802 + } 1.4803 + cmpxchg(tmp_reg, Address(obj_reg, 0)); 1.4804 + if (need_tmp_reg) { 1.4805 + popl(tmp_reg); 1.4806 + } 1.4807 + // If the biasing toward our thread failed, this means that 1.4808 + // another thread succeeded in biasing it toward itself and we 1.4809 + // need to revoke that bias. The revocation will occur in the 1.4810 + // interpreter runtime in the slow case. 1.4811 + if (counters != NULL) { 1.4812 + cond_inc32(Assembler::zero, 1.4813 + ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 1.4814 + } 1.4815 + if (slow_case != NULL) { 1.4816 + jcc(Assembler::notZero, *slow_case); 1.4817 + } 1.4818 + jmp(done); 1.4819 + 1.4820 + bind(try_rebias); 1.4821 + // At this point we know the epoch has expired, meaning that the 1.4822 + // current "bias owner", if any, is actually invalid. Under these 1.4823 + // circumstances _only_, we are allowed to use the current header's 1.4824 + // value as the comparison value when doing the cas to acquire the 1.4825 + // bias in the current epoch. In other words, we allow transfer of 1.4826 + // the bias from one thread to another directly in this situation. 1.4827 + // 1.4828 + // FIXME: due to a lack of registers we currently blow away the age 1.4829 + // bits in this situation. Should attempt to preserve them. 1.4830 + if (need_tmp_reg) { 1.4831 + pushl(tmp_reg); 1.4832 + } 1.4833 + get_thread(tmp_reg); 1.4834 + movl(swap_reg, klass_addr); 1.4835 + orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 1.4836 + movl(swap_reg, saved_mark_addr); 1.4837 + if (os::is_MP()) { 1.4838 + lock(); 1.4839 + } 1.4840 + cmpxchg(tmp_reg, Address(obj_reg, 0)); 1.4841 + if (need_tmp_reg) { 1.4842 + popl(tmp_reg); 1.4843 + } 1.4844 + // If the biasing toward our thread failed, then another thread 1.4845 + // succeeded in biasing it toward itself and we need to revoke that 1.4846 + // bias. The revocation will occur in the runtime in the slow case. 1.4847 + if (counters != NULL) { 1.4848 + cond_inc32(Assembler::zero, 1.4849 + ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 1.4850 + } 1.4851 + if (slow_case != NULL) { 1.4852 + jcc(Assembler::notZero, *slow_case); 1.4853 + } 1.4854 + jmp(done); 1.4855 + 1.4856 + bind(try_revoke_bias); 1.4857 + // The prototype mark in the klass doesn't have the bias bit set any 1.4858 + // more, indicating that objects of this data type are not supposed 1.4859 + // to be biased any more. We are going to try to reset the mark of 1.4860 + // this object to the prototype value and fall through to the 1.4861 + // CAS-based locking scheme. Note that if our CAS fails, it means 1.4862 + // that another thread raced us for the privilege of revoking the 1.4863 + // bias of this particular object, so it's okay to continue in the 1.4864 + // normal locking code. 1.4865 + // 1.4866 + // FIXME: due to a lack of registers we currently blow away the age 1.4867 + // bits in this situation. Should attempt to preserve them. 1.4868 + movl(swap_reg, saved_mark_addr); 1.4869 + if (need_tmp_reg) { 1.4870 + pushl(tmp_reg); 1.4871 + } 1.4872 + movl(tmp_reg, klass_addr); 1.4873 + movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 1.4874 + if (os::is_MP()) { 1.4875 + lock(); 1.4876 + } 1.4877 + cmpxchg(tmp_reg, Address(obj_reg, 0)); 1.4878 + if (need_tmp_reg) { 1.4879 + popl(tmp_reg); 1.4880 + } 1.4881 + // Fall through to the normal CAS-based lock, because no matter what 1.4882 + // the result of the above CAS, some thread must have succeeded in 1.4883 + // removing the bias bit from the object's header. 1.4884 + if (counters != NULL) { 1.4885 + cond_inc32(Assembler::zero, 1.4886 + ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 1.4887 + } 1.4888 + 1.4889 + bind(cas_label); 1.4890 + 1.4891 + return null_check_offset; 1.4892 +} 1.4893 + 1.4894 + 1.4895 +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 1.4896 + assert(UseBiasedLocking, "why call this otherwise?"); 1.4897 + 1.4898 + // Check for biased locking unlock case, which is a no-op 1.4899 + // Note: we do not have to check the thread ID for two reasons. 1.4900 + // First, the interpreter checks for IllegalMonitorStateException at 1.4901 + // a higher level. Second, if the bias was revoked while we held the 1.4902 + // lock, the object could not be rebiased toward another thread, so 1.4903 + // the bias bit would be clear. 1.4904 + movl(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1.4905 + andl(temp_reg, markOopDesc::biased_lock_mask_in_place); 1.4906 + cmpl(temp_reg, markOopDesc::biased_lock_pattern); 1.4907 + jcc(Assembler::equal, done); 1.4908 +} 1.4909 + 1.4910 + 1.4911 +Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 1.4912 + switch (cond) { 1.4913 + // Note some conditions are synonyms for others 1.4914 + case Assembler::zero: return Assembler::notZero; 1.4915 + case Assembler::notZero: return Assembler::zero; 1.4916 + case Assembler::less: return Assembler::greaterEqual; 1.4917 + case Assembler::lessEqual: return Assembler::greater; 1.4918 + case Assembler::greater: return Assembler::lessEqual; 1.4919 + case Assembler::greaterEqual: return Assembler::less; 1.4920 + case Assembler::below: return Assembler::aboveEqual; 1.4921 + case Assembler::belowEqual: return Assembler::above; 1.4922 + case Assembler::above: return Assembler::belowEqual; 1.4923 + case Assembler::aboveEqual: return Assembler::below; 1.4924 + case Assembler::overflow: return Assembler::noOverflow; 1.4925 + case Assembler::noOverflow: return Assembler::overflow; 1.4926 + case Assembler::negative: return Assembler::positive; 1.4927 + case Assembler::positive: return Assembler::negative; 1.4928 + case Assembler::parity: return Assembler::noParity; 1.4929 + case Assembler::noParity: return Assembler::parity; 1.4930 + } 1.4931 + ShouldNotReachHere(); return Assembler::overflow; 1.4932 +} 1.4933 + 1.4934 + 1.4935 +void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 1.4936 + Condition negated_cond = negate_condition(cond); 1.4937 + Label L; 1.4938 + jcc(negated_cond, L); 1.4939 + atomic_incl(counter_addr); 1.4940 + bind(L); 1.4941 +} 1.4942 + 1.4943 +void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 1.4944 + pushfd(); 1.4945 + if (os::is_MP()) 1.4946 + lock(); 1.4947 + increment(counter_addr); 1.4948 + popfd(); 1.4949 +} 1.4950 + 1.4951 +SkipIfEqual::SkipIfEqual( 1.4952 + MacroAssembler* masm, const bool* flag_addr, bool value) { 1.4953 + _masm = masm; 1.4954 + _masm->cmp8(ExternalAddress((address)flag_addr), value); 1.4955 + _masm->jcc(Assembler::equal, _label); 1.4956 +} 1.4957 + 1.4958 +SkipIfEqual::~SkipIfEqual() { 1.4959 + _masm->bind(_label); 1.4960 +} 1.4961 + 1.4962 + 1.4963 +// Writes to stack successive pages until offset reached to check for 1.4964 +// stack overflow + shadow pages. This clobbers tmp. 1.4965 +void MacroAssembler::bang_stack_size(Register size, Register tmp) { 1.4966 + movl(tmp, rsp); 1.4967 + // Bang stack for total size given plus shadow page size. 1.4968 + // Bang one page at a time because large size can bang beyond yellow and 1.4969 + // red zones. 1.4970 + Label loop; 1.4971 + bind(loop); 1.4972 + movl(Address(tmp, (-os::vm_page_size())), size ); 1.4973 + subl(tmp, os::vm_page_size()); 1.4974 + subl(size, os::vm_page_size()); 1.4975 + jcc(Assembler::greater, loop); 1.4976 + 1.4977 + // Bang down shadow pages too. 1.4978 + // The -1 because we already subtracted 1 page. 1.4979 + for (int i = 0; i< StackShadowPages-1; i++) { 1.4980 + movl(Address(tmp, (-i*os::vm_page_size())), size ); 1.4981 + } 1.4982 +}