Tue, 14 Jul 2009 15:40:39 -0700
6700789: G1: Enable use of compressed oops with G1 heaps
Summary: Modifications to G1 so as to allow the use of compressed oops.
Reviewed-by: apetrusenko, coleenp, jmasa, kvn, never, phh, tonyp
1 /*
2 * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_assembler_x86.cpp.incl"
28 // Implementation of AddressLiteral
30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
31 _is_lval = false;
32 _target = target;
33 switch (rtype) {
34 case relocInfo::oop_type:
35 // Oops are a special case. Normally they would be their own section
36 // but in cases like icBuffer they are literals in the code stream that
37 // we don't have a section for. We use none so that we get a literal address
38 // which is always patchable.
39 break;
40 case relocInfo::external_word_type:
41 _rspec = external_word_Relocation::spec(target);
42 break;
43 case relocInfo::internal_word_type:
44 _rspec = internal_word_Relocation::spec(target);
45 break;
46 case relocInfo::opt_virtual_call_type:
47 _rspec = opt_virtual_call_Relocation::spec();
48 break;
49 case relocInfo::static_call_type:
50 _rspec = static_call_Relocation::spec();
51 break;
52 case relocInfo::runtime_call_type:
53 _rspec = runtime_call_Relocation::spec();
54 break;
55 case relocInfo::poll_type:
56 case relocInfo::poll_return_type:
57 _rspec = Relocation::spec_simple(rtype);
58 break;
59 case relocInfo::none:
60 break;
61 default:
62 ShouldNotReachHere();
63 break;
64 }
65 }
67 // Implementation of Address
69 #ifdef _LP64
71 Address Address::make_array(ArrayAddress adr) {
72 // Not implementable on 64bit machines
73 // Should have been handled higher up the call chain.
74 ShouldNotReachHere();
75 return Address();
76 }
78 // exceedingly dangerous constructor
79 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
80 _base = noreg;
81 _index = noreg;
82 _scale = no_scale;
83 _disp = disp;
84 switch (rtype) {
85 case relocInfo::external_word_type:
86 _rspec = external_word_Relocation::spec(loc);
87 break;
88 case relocInfo::internal_word_type:
89 _rspec = internal_word_Relocation::spec(loc);
90 break;
91 case relocInfo::runtime_call_type:
92 // HMM
93 _rspec = runtime_call_Relocation::spec();
94 break;
95 case relocInfo::poll_type:
96 case relocInfo::poll_return_type:
97 _rspec = Relocation::spec_simple(rtype);
98 break;
99 case relocInfo::none:
100 break;
101 default:
102 ShouldNotReachHere();
103 }
104 }
105 #else // LP64
107 Address Address::make_array(ArrayAddress adr) {
108 AddressLiteral base = adr.base();
109 Address index = adr.index();
110 assert(index._disp == 0, "must not have disp"); // maybe it can?
111 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
112 array._rspec = base._rspec;
113 return array;
114 }
116 // exceedingly dangerous constructor
117 Address::Address(address loc, RelocationHolder spec) {
118 _base = noreg;
119 _index = noreg;
120 _scale = no_scale;
121 _disp = (intptr_t) loc;
122 _rspec = spec;
123 }
125 #endif // _LP64
129 // Convert the raw encoding form into the form expected by the constructor for
130 // Address. An index of 4 (rsp) corresponds to having no index, so convert
131 // that to noreg for the Address constructor.
132 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
133 RelocationHolder rspec;
134 if (disp_is_oop) {
135 rspec = Relocation::spec_simple(relocInfo::oop_type);
136 }
137 bool valid_index = index != rsp->encoding();
138 if (valid_index) {
139 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
140 madr._rspec = rspec;
141 return madr;
142 } else {
143 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
144 madr._rspec = rspec;
145 return madr;
146 }
147 }
149 // Implementation of Assembler
151 int AbstractAssembler::code_fill_byte() {
152 return (u_char)'\xF4'; // hlt
153 }
155 // make this go away someday
156 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
157 if (rtype == relocInfo::none)
158 emit_long(data);
159 else emit_data(data, Relocation::spec_simple(rtype), format);
160 }
162 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
163 assert(imm_operand == 0, "default format must be immediate in this file");
164 assert(inst_mark() != NULL, "must be inside InstructionMark");
165 if (rspec.type() != relocInfo::none) {
166 #ifdef ASSERT
167 check_relocation(rspec, format);
168 #endif
169 // Do not use AbstractAssembler::relocate, which is not intended for
170 // embedded words. Instead, relocate to the enclosing instruction.
172 // hack. call32 is too wide for mask so use disp32
173 if (format == call32_operand)
174 code_section()->relocate(inst_mark(), rspec, disp32_operand);
175 else
176 code_section()->relocate(inst_mark(), rspec, format);
177 }
178 emit_long(data);
179 }
181 static int encode(Register r) {
182 int enc = r->encoding();
183 if (enc >= 8) {
184 enc -= 8;
185 }
186 return enc;
187 }
189 static int encode(XMMRegister r) {
190 int enc = r->encoding();
191 if (enc >= 8) {
192 enc -= 8;
193 }
194 return enc;
195 }
197 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
198 assert(dst->has_byte_register(), "must have byte register");
199 assert(isByte(op1) && isByte(op2), "wrong opcode");
200 assert(isByte(imm8), "not a byte");
201 assert((op1 & 0x01) == 0, "should be 8bit operation");
202 emit_byte(op1);
203 emit_byte(op2 | encode(dst));
204 emit_byte(imm8);
205 }
208 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
209 assert(isByte(op1) && isByte(op2), "wrong opcode");
210 assert((op1 & 0x01) == 1, "should be 32bit operation");
211 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
212 if (is8bit(imm32)) {
213 emit_byte(op1 | 0x02); // set sign bit
214 emit_byte(op2 | encode(dst));
215 emit_byte(imm32 & 0xFF);
216 } else {
217 emit_byte(op1);
218 emit_byte(op2 | encode(dst));
219 emit_long(imm32);
220 }
221 }
223 // immediate-to-memory forms
224 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
225 assert((op1 & 0x01) == 1, "should be 32bit operation");
226 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
227 if (is8bit(imm32)) {
228 emit_byte(op1 | 0x02); // set sign bit
229 emit_operand(rm, adr, 1);
230 emit_byte(imm32 & 0xFF);
231 } else {
232 emit_byte(op1);
233 emit_operand(rm, adr, 4);
234 emit_long(imm32);
235 }
236 }
238 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
239 LP64_ONLY(ShouldNotReachHere());
240 assert(isByte(op1) && isByte(op2), "wrong opcode");
241 assert((op1 & 0x01) == 1, "should be 32bit operation");
242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243 InstructionMark im(this);
244 emit_byte(op1);
245 emit_byte(op2 | encode(dst));
246 emit_data((intptr_t)obj, relocInfo::oop_type, 0);
247 }
250 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
251 assert(isByte(op1) && isByte(op2), "wrong opcode");
252 emit_byte(op1);
253 emit_byte(op2 | encode(dst) << 3 | encode(src));
254 }
257 void Assembler::emit_operand(Register reg, Register base, Register index,
258 Address::ScaleFactor scale, int disp,
259 RelocationHolder const& rspec,
260 int rip_relative_correction) {
261 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
263 // Encode the registers as needed in the fields they are used in
265 int regenc = encode(reg) << 3;
266 int indexenc = index->is_valid() ? encode(index) << 3 : 0;
267 int baseenc = base->is_valid() ? encode(base) : 0;
269 if (base->is_valid()) {
270 if (index->is_valid()) {
271 assert(scale != Address::no_scale, "inconsistent address");
272 // [base + index*scale + disp]
273 if (disp == 0 && rtype == relocInfo::none &&
274 base != rbp LP64_ONLY(&& base != r13)) {
275 // [base + index*scale]
276 // [00 reg 100][ss index base]
277 assert(index != rsp, "illegal addressing mode");
278 emit_byte(0x04 | regenc);
279 emit_byte(scale << 6 | indexenc | baseenc);
280 } else if (is8bit(disp) && rtype == relocInfo::none) {
281 // [base + index*scale + imm8]
282 // [01 reg 100][ss index base] imm8
283 assert(index != rsp, "illegal addressing mode");
284 emit_byte(0x44 | regenc);
285 emit_byte(scale << 6 | indexenc | baseenc);
286 emit_byte(disp & 0xFF);
287 } else {
288 // [base + index*scale + disp32]
289 // [10 reg 100][ss index base] disp32
290 assert(index != rsp, "illegal addressing mode");
291 emit_byte(0x84 | regenc);
292 emit_byte(scale << 6 | indexenc | baseenc);
293 emit_data(disp, rspec, disp32_operand);
294 }
295 } else if (base == rsp LP64_ONLY(|| base == r12)) {
296 // [rsp + disp]
297 if (disp == 0 && rtype == relocInfo::none) {
298 // [rsp]
299 // [00 reg 100][00 100 100]
300 emit_byte(0x04 | regenc);
301 emit_byte(0x24);
302 } else if (is8bit(disp) && rtype == relocInfo::none) {
303 // [rsp + imm8]
304 // [01 reg 100][00 100 100] disp8
305 emit_byte(0x44 | regenc);
306 emit_byte(0x24);
307 emit_byte(disp & 0xFF);
308 } else {
309 // [rsp + imm32]
310 // [10 reg 100][00 100 100] disp32
311 emit_byte(0x84 | regenc);
312 emit_byte(0x24);
313 emit_data(disp, rspec, disp32_operand);
314 }
315 } else {
316 // [base + disp]
317 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
318 if (disp == 0 && rtype == relocInfo::none &&
319 base != rbp LP64_ONLY(&& base != r13)) {
320 // [base]
321 // [00 reg base]
322 emit_byte(0x00 | regenc | baseenc);
323 } else if (is8bit(disp) && rtype == relocInfo::none) {
324 // [base + disp8]
325 // [01 reg base] disp8
326 emit_byte(0x40 | regenc | baseenc);
327 emit_byte(disp & 0xFF);
328 } else {
329 // [base + disp32]
330 // [10 reg base] disp32
331 emit_byte(0x80 | regenc | baseenc);
332 emit_data(disp, rspec, disp32_operand);
333 }
334 }
335 } else {
336 if (index->is_valid()) {
337 assert(scale != Address::no_scale, "inconsistent address");
338 // [index*scale + disp]
339 // [00 reg 100][ss index 101] disp32
340 assert(index != rsp, "illegal addressing mode");
341 emit_byte(0x04 | regenc);
342 emit_byte(scale << 6 | indexenc | 0x05);
343 emit_data(disp, rspec, disp32_operand);
344 } else if (rtype != relocInfo::none ) {
345 // [disp] (64bit) RIP-RELATIVE (32bit) abs
346 // [00 000 101] disp32
348 emit_byte(0x05 | regenc);
349 // Note that the RIP-rel. correction applies to the generated
350 // disp field, but _not_ to the target address in the rspec.
352 // disp was created by converting the target address minus the pc
353 // at the start of the instruction. That needs more correction here.
354 // intptr_t disp = target - next_ip;
355 assert(inst_mark() != NULL, "must be inside InstructionMark");
356 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
357 int64_t adjusted = disp;
358 // Do rip-rel adjustment for 64bit
359 LP64_ONLY(adjusted -= (next_ip - inst_mark()));
360 assert(is_simm32(adjusted),
361 "must be 32bit offset (RIP relative address)");
362 emit_data((int32_t) adjusted, rspec, disp32_operand);
364 } else {
365 // 32bit never did this, did everything as the rip-rel/disp code above
366 // [disp] ABSOLUTE
367 // [00 reg 100][00 100 101] disp32
368 emit_byte(0x04 | regenc);
369 emit_byte(0x25);
370 emit_data(disp, rspec, disp32_operand);
371 }
372 }
373 }
375 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
376 Address::ScaleFactor scale, int disp,
377 RelocationHolder const& rspec) {
378 emit_operand((Register)reg, base, index, scale, disp, rspec);
379 }
381 // Secret local extension to Assembler::WhichOperand:
382 #define end_pc_operand (_WhichOperand_limit)
384 address Assembler::locate_operand(address inst, WhichOperand which) {
385 // Decode the given instruction, and return the address of
386 // an embedded 32-bit operand word.
388 // If "which" is disp32_operand, selects the displacement portion
389 // of an effective address specifier.
390 // If "which" is imm64_operand, selects the trailing immediate constant.
391 // If "which" is call32_operand, selects the displacement of a call or jump.
392 // Caller is responsible for ensuring that there is such an operand,
393 // and that it is 32/64 bits wide.
395 // If "which" is end_pc_operand, find the end of the instruction.
397 address ip = inst;
398 bool is_64bit = false;
400 debug_only(bool has_disp32 = false);
401 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
403 again_after_prefix:
404 switch (0xFF & *ip++) {
406 // These convenience macros generate groups of "case" labels for the switch.
407 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
408 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
409 case (x)+4: case (x)+5: case (x)+6: case (x)+7
410 #define REP16(x) REP8((x)+0): \
411 case REP8((x)+8)
413 case CS_segment:
414 case SS_segment:
415 case DS_segment:
416 case ES_segment:
417 case FS_segment:
418 case GS_segment:
419 // Seems dubious
420 LP64_ONLY(assert(false, "shouldn't have that prefix"));
421 assert(ip == inst+1, "only one prefix allowed");
422 goto again_after_prefix;
424 case 0x67:
425 case REX:
426 case REX_B:
427 case REX_X:
428 case REX_XB:
429 case REX_R:
430 case REX_RB:
431 case REX_RX:
432 case REX_RXB:
433 NOT_LP64(assert(false, "64bit prefixes"));
434 goto again_after_prefix;
436 case REX_W:
437 case REX_WB:
438 case REX_WX:
439 case REX_WXB:
440 case REX_WR:
441 case REX_WRB:
442 case REX_WRX:
443 case REX_WRXB:
444 NOT_LP64(assert(false, "64bit prefixes"));
445 is_64bit = true;
446 goto again_after_prefix;
448 case 0xFF: // pushq a; decl a; incl a; call a; jmp a
449 case 0x88: // movb a, r
450 case 0x89: // movl a, r
451 case 0x8A: // movb r, a
452 case 0x8B: // movl r, a
453 case 0x8F: // popl a
454 debug_only(has_disp32 = true);
455 break;
457 case 0x68: // pushq #32
458 if (which == end_pc_operand) {
459 return ip + 4;
460 }
461 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
462 return ip; // not produced by emit_operand
464 case 0x66: // movw ... (size prefix)
465 again_after_size_prefix2:
466 switch (0xFF & *ip++) {
467 case REX:
468 case REX_B:
469 case REX_X:
470 case REX_XB:
471 case REX_R:
472 case REX_RB:
473 case REX_RX:
474 case REX_RXB:
475 case REX_W:
476 case REX_WB:
477 case REX_WX:
478 case REX_WXB:
479 case REX_WR:
480 case REX_WRB:
481 case REX_WRX:
482 case REX_WRXB:
483 NOT_LP64(assert(false, "64bit prefix found"));
484 goto again_after_size_prefix2;
485 case 0x8B: // movw r, a
486 case 0x89: // movw a, r
487 debug_only(has_disp32 = true);
488 break;
489 case 0xC7: // movw a, #16
490 debug_only(has_disp32 = true);
491 tail_size = 2; // the imm16
492 break;
493 case 0x0F: // several SSE/SSE2 variants
494 ip--; // reparse the 0x0F
495 goto again_after_prefix;
496 default:
497 ShouldNotReachHere();
498 }
499 break;
501 case REP8(0xB8): // movl/q r, #32/#64(oop?)
502 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4);
503 // these asserts are somewhat nonsensical
504 #ifndef _LP64
505 assert(which == imm_operand || which == disp32_operand, "");
506 #else
507 assert((which == call32_operand || which == imm_operand) && is_64bit ||
508 which == narrow_oop_operand && !is_64bit, "");
509 #endif // _LP64
510 return ip;
512 case 0x69: // imul r, a, #32
513 case 0xC7: // movl a, #32(oop?)
514 tail_size = 4;
515 debug_only(has_disp32 = true); // has both kinds of operands!
516 break;
518 case 0x0F: // movx..., etc.
519 switch (0xFF & *ip++) {
520 case 0x12: // movlps
521 case 0x28: // movaps
522 case 0x2E: // ucomiss
523 case 0x2F: // comiss
524 case 0x54: // andps
525 case 0x55: // andnps
526 case 0x56: // orps
527 case 0x57: // xorps
528 case 0x6E: // movd
529 case 0x7E: // movd
530 case 0xAE: // ldmxcsr a
531 // 64bit side says it these have both operands but that doesn't
532 // appear to be true
533 debug_only(has_disp32 = true);
534 break;
536 case 0xAD: // shrd r, a, %cl
537 case 0xAF: // imul r, a
538 case 0xBE: // movsbl r, a (movsxb)
539 case 0xBF: // movswl r, a (movsxw)
540 case 0xB6: // movzbl r, a (movzxb)
541 case 0xB7: // movzwl r, a (movzxw)
542 case REP16(0x40): // cmovl cc, r, a
543 case 0xB0: // cmpxchgb
544 case 0xB1: // cmpxchg
545 case 0xC1: // xaddl
546 case 0xC7: // cmpxchg8
547 case REP16(0x90): // setcc a
548 debug_only(has_disp32 = true);
549 // fall out of the switch to decode the address
550 break;
552 case 0xAC: // shrd r, a, #8
553 debug_only(has_disp32 = true);
554 tail_size = 1; // the imm8
555 break;
557 case REP16(0x80): // jcc rdisp32
558 if (which == end_pc_operand) return ip + 4;
559 assert(which == call32_operand, "jcc has no disp32 or imm");
560 return ip;
561 default:
562 ShouldNotReachHere();
563 }
564 break;
566 case 0x81: // addl a, #32; addl r, #32
567 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
568 // on 32bit in the case of cmpl, the imm might be an oop
569 tail_size = 4;
570 debug_only(has_disp32 = true); // has both kinds of operands!
571 break;
573 case 0x83: // addl a, #8; addl r, #8
574 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
575 debug_only(has_disp32 = true); // has both kinds of operands!
576 tail_size = 1;
577 break;
579 case 0x9B:
580 switch (0xFF & *ip++) {
581 case 0xD9: // fnstcw a
582 debug_only(has_disp32 = true);
583 break;
584 default:
585 ShouldNotReachHere();
586 }
587 break;
589 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
590 case REP4(0x10): // adc...
591 case REP4(0x20): // and...
592 case REP4(0x30): // xor...
593 case REP4(0x08): // or...
594 case REP4(0x18): // sbb...
595 case REP4(0x28): // sub...
596 case 0xF7: // mull a
597 case 0x8D: // lea r, a
598 case 0x87: // xchg r, a
599 case REP4(0x38): // cmp...
600 case 0x85: // test r, a
601 debug_only(has_disp32 = true); // has both kinds of operands!
602 break;
604 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
605 case 0xC6: // movb a, #8
606 case 0x80: // cmpb a, #8
607 case 0x6B: // imul r, a, #8
608 debug_only(has_disp32 = true); // has both kinds of operands!
609 tail_size = 1; // the imm8
610 break;
612 case 0xE8: // call rdisp32
613 case 0xE9: // jmp rdisp32
614 if (which == end_pc_operand) return ip + 4;
615 assert(which == call32_operand, "call has no disp32 or imm");
616 return ip;
618 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
619 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
620 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
621 case 0xDD: // fld_d a; fst_d a; fstp_d a
622 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
623 case 0xDF: // fild_d a; fistp_d a
624 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
625 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
626 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
627 debug_only(has_disp32 = true);
628 break;
630 case 0xF0: // Lock
631 assert(os::is_MP(), "only on MP");
632 goto again_after_prefix;
634 case 0xF3: // For SSE
635 case 0xF2: // For SSE2
636 switch (0xFF & *ip++) {
637 case REX:
638 case REX_B:
639 case REX_X:
640 case REX_XB:
641 case REX_R:
642 case REX_RB:
643 case REX_RX:
644 case REX_RXB:
645 case REX_W:
646 case REX_WB:
647 case REX_WX:
648 case REX_WXB:
649 case REX_WR:
650 case REX_WRB:
651 case REX_WRX:
652 case REX_WRXB:
653 NOT_LP64(assert(false, "found 64bit prefix"));
654 ip++;
655 default:
656 ip++;
657 }
658 debug_only(has_disp32 = true); // has both kinds of operands!
659 break;
661 default:
662 ShouldNotReachHere();
664 #undef REP8
665 #undef REP16
666 }
668 assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
669 #ifdef _LP64
670 assert(which != imm_operand, "instruction is not a movq reg, imm64");
671 #else
672 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
673 assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
674 #endif // LP64
675 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
677 // parse the output of emit_operand
678 int op2 = 0xFF & *ip++;
679 int base = op2 & 0x07;
680 int op3 = -1;
681 const int b100 = 4;
682 const int b101 = 5;
683 if (base == b100 && (op2 >> 6) != 3) {
684 op3 = 0xFF & *ip++;
685 base = op3 & 0x07; // refetch the base
686 }
687 // now ip points at the disp (if any)
689 switch (op2 >> 6) {
690 case 0:
691 // [00 reg 100][ss index base]
692 // [00 reg 100][00 100 esp]
693 // [00 reg base]
694 // [00 reg 100][ss index 101][disp32]
695 // [00 reg 101] [disp32]
697 if (base == b101) {
698 if (which == disp32_operand)
699 return ip; // caller wants the disp32
700 ip += 4; // skip the disp32
701 }
702 break;
704 case 1:
705 // [01 reg 100][ss index base][disp8]
706 // [01 reg 100][00 100 esp][disp8]
707 // [01 reg base] [disp8]
708 ip += 1; // skip the disp8
709 break;
711 case 2:
712 // [10 reg 100][ss index base][disp32]
713 // [10 reg 100][00 100 esp][disp32]
714 // [10 reg base] [disp32]
715 if (which == disp32_operand)
716 return ip; // caller wants the disp32
717 ip += 4; // skip the disp32
718 break;
720 case 3:
721 // [11 reg base] (not a memory addressing mode)
722 break;
723 }
725 if (which == end_pc_operand) {
726 return ip + tail_size;
727 }
729 #ifdef _LP64
730 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
731 #else
732 assert(which == imm_operand, "instruction has only an imm field");
733 #endif // LP64
734 return ip;
735 }
737 address Assembler::locate_next_instruction(address inst) {
738 // Secretly share code with locate_operand:
739 return locate_operand(inst, end_pc_operand);
740 }
743 #ifdef ASSERT
744 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
745 address inst = inst_mark();
746 assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
747 address opnd;
749 Relocation* r = rspec.reloc();
750 if (r->type() == relocInfo::none) {
751 return;
752 } else if (r->is_call() || format == call32_operand) {
753 // assert(format == imm32_operand, "cannot specify a nonzero format");
754 opnd = locate_operand(inst, call32_operand);
755 } else if (r->is_data()) {
756 assert(format == imm_operand || format == disp32_operand
757 LP64_ONLY(|| format == narrow_oop_operand), "format ok");
758 opnd = locate_operand(inst, (WhichOperand)format);
759 } else {
760 assert(format == imm_operand, "cannot specify a format");
761 return;
762 }
763 assert(opnd == pc(), "must put operand where relocs can find it");
764 }
765 #endif // ASSERT
767 void Assembler::emit_operand32(Register reg, Address adr) {
768 assert(reg->encoding() < 8, "no extended registers");
769 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
770 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
771 adr._rspec);
772 }
774 void Assembler::emit_operand(Register reg, Address adr,
775 int rip_relative_correction) {
776 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
777 adr._rspec,
778 rip_relative_correction);
779 }
781 void Assembler::emit_operand(XMMRegister reg, Address adr) {
782 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
783 adr._rspec);
784 }
786 // MMX operations
787 void Assembler::emit_operand(MMXRegister reg, Address adr) {
788 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
789 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
790 }
792 // work around gcc (3.2.1-7a) bug
793 void Assembler::emit_operand(Address adr, MMXRegister reg) {
794 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
795 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
796 }
799 void Assembler::emit_farith(int b1, int b2, int i) {
800 assert(isByte(b1) && isByte(b2), "wrong opcode");
801 assert(0 <= i && i < 8, "illegal stack offset");
802 emit_byte(b1);
803 emit_byte(b2 + i);
804 }
807 // Now the Assembler instruction (identical for 32/64 bits)
809 void Assembler::adcl(Register dst, int32_t imm32) {
810 prefix(dst);
811 emit_arith(0x81, 0xD0, dst, imm32);
812 }
814 void Assembler::adcl(Register dst, Address src) {
815 InstructionMark im(this);
816 prefix(src, dst);
817 emit_byte(0x13);
818 emit_operand(dst, src);
819 }
821 void Assembler::adcl(Register dst, Register src) {
822 (void) prefix_and_encode(dst->encoding(), src->encoding());
823 emit_arith(0x13, 0xC0, dst, src);
824 }
826 void Assembler::addl(Address dst, int32_t imm32) {
827 InstructionMark im(this);
828 prefix(dst);
829 emit_arith_operand(0x81, rax, dst, imm32);
830 }
832 void Assembler::addl(Address dst, Register src) {
833 InstructionMark im(this);
834 prefix(dst, src);
835 emit_byte(0x01);
836 emit_operand(src, dst);
837 }
839 void Assembler::addl(Register dst, int32_t imm32) {
840 prefix(dst);
841 emit_arith(0x81, 0xC0, dst, imm32);
842 }
844 void Assembler::addl(Register dst, Address src) {
845 InstructionMark im(this);
846 prefix(src, dst);
847 emit_byte(0x03);
848 emit_operand(dst, src);
849 }
851 void Assembler::addl(Register dst, Register src) {
852 (void) prefix_and_encode(dst->encoding(), src->encoding());
853 emit_arith(0x03, 0xC0, dst, src);
854 }
856 void Assembler::addr_nop_4() {
857 // 4 bytes: NOP DWORD PTR [EAX+0]
858 emit_byte(0x0F);
859 emit_byte(0x1F);
860 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
861 emit_byte(0); // 8-bits offset (1 byte)
862 }
864 void Assembler::addr_nop_5() {
865 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
866 emit_byte(0x0F);
867 emit_byte(0x1F);
868 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
869 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
870 emit_byte(0); // 8-bits offset (1 byte)
871 }
873 void Assembler::addr_nop_7() {
874 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
875 emit_byte(0x0F);
876 emit_byte(0x1F);
877 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
878 emit_long(0); // 32-bits offset (4 bytes)
879 }
881 void Assembler::addr_nop_8() {
882 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
883 emit_byte(0x0F);
884 emit_byte(0x1F);
885 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
886 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
887 emit_long(0); // 32-bits offset (4 bytes)
888 }
890 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
891 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
892 emit_byte(0xF2);
893 int encode = prefix_and_encode(dst->encoding(), src->encoding());
894 emit_byte(0x0F);
895 emit_byte(0x58);
896 emit_byte(0xC0 | encode);
897 }
899 void Assembler::addsd(XMMRegister dst, Address src) {
900 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
901 InstructionMark im(this);
902 emit_byte(0xF2);
903 prefix(src, dst);
904 emit_byte(0x0F);
905 emit_byte(0x58);
906 emit_operand(dst, src);
907 }
909 void Assembler::addss(XMMRegister dst, XMMRegister src) {
910 NOT_LP64(assert(VM_Version::supports_sse(), ""));
911 emit_byte(0xF3);
912 int encode = prefix_and_encode(dst->encoding(), src->encoding());
913 emit_byte(0x0F);
914 emit_byte(0x58);
915 emit_byte(0xC0 | encode);
916 }
918 void Assembler::addss(XMMRegister dst, Address src) {
919 NOT_LP64(assert(VM_Version::supports_sse(), ""));
920 InstructionMark im(this);
921 emit_byte(0xF3);
922 prefix(src, dst);
923 emit_byte(0x0F);
924 emit_byte(0x58);
925 emit_operand(dst, src);
926 }
928 void Assembler::andl(Register dst, int32_t imm32) {
929 prefix(dst);
930 emit_arith(0x81, 0xE0, dst, imm32);
931 }
933 void Assembler::andl(Register dst, Address src) {
934 InstructionMark im(this);
935 prefix(src, dst);
936 emit_byte(0x23);
937 emit_operand(dst, src);
938 }
940 void Assembler::andl(Register dst, Register src) {
941 (void) prefix_and_encode(dst->encoding(), src->encoding());
942 emit_arith(0x23, 0xC0, dst, src);
943 }
945 void Assembler::andpd(XMMRegister dst, Address src) {
946 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
947 InstructionMark im(this);
948 emit_byte(0x66);
949 prefix(src, dst);
950 emit_byte(0x0F);
951 emit_byte(0x54);
952 emit_operand(dst, src);
953 }
955 void Assembler::bsfl(Register dst, Register src) {
956 int encode = prefix_and_encode(dst->encoding(), src->encoding());
957 emit_byte(0x0F);
958 emit_byte(0xBC);
959 emit_byte(0xC0 | encode);
960 }
962 void Assembler::bsrl(Register dst, Register src) {
963 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
964 int encode = prefix_and_encode(dst->encoding(), src->encoding());
965 emit_byte(0x0F);
966 emit_byte(0xBD);
967 emit_byte(0xC0 | encode);
968 }
970 void Assembler::bswapl(Register reg) { // bswap
971 int encode = prefix_and_encode(reg->encoding());
972 emit_byte(0x0F);
973 emit_byte(0xC8 | encode);
974 }
976 void Assembler::call(Label& L, relocInfo::relocType rtype) {
977 // suspect disp32 is always good
978 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
980 if (L.is_bound()) {
981 const int long_size = 5;
982 int offs = (int)( target(L) - pc() );
983 assert(offs <= 0, "assembler error");
984 InstructionMark im(this);
985 // 1110 1000 #32-bit disp
986 emit_byte(0xE8);
987 emit_data(offs - long_size, rtype, operand);
988 } else {
989 InstructionMark im(this);
990 // 1110 1000 #32-bit disp
991 L.add_patch_at(code(), locator());
993 emit_byte(0xE8);
994 emit_data(int(0), rtype, operand);
995 }
996 }
998 void Assembler::call(Register dst) {
999 // This was originally using a 32bit register encoding
1000 // and surely we want 64bit!
1001 // this is a 32bit encoding but in 64bit mode the default
1002 // operand size is 64bit so there is no need for the
1003 // wide prefix. So prefix only happens if we use the
1004 // new registers. Much like push/pop.
1005 int x = offset();
1006 // this may be true but dbx disassembles it as if it
1007 // were 32bits...
1008 // int encode = prefix_and_encode(dst->encoding());
1009 // if (offset() != x) assert(dst->encoding() >= 8, "what?");
1010 int encode = prefixq_and_encode(dst->encoding());
1012 emit_byte(0xFF);
1013 emit_byte(0xD0 | encode);
1014 }
1017 void Assembler::call(Address adr) {
1018 InstructionMark im(this);
1019 prefix(adr);
1020 emit_byte(0xFF);
1021 emit_operand(rdx, adr);
1022 }
1024 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1025 assert(entry != NULL, "call most probably wrong");
1026 InstructionMark im(this);
1027 emit_byte(0xE8);
1028 intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1029 assert(is_simm32(disp), "must be 32bit offset (call2)");
1030 // Technically, should use call32_operand, but this format is
1031 // implied by the fact that we're emitting a call instruction.
1033 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1034 emit_data((int) disp, rspec, operand);
1035 }
1037 void Assembler::cdql() {
1038 emit_byte(0x99);
1039 }
1041 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1042 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1043 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1044 emit_byte(0x0F);
1045 emit_byte(0x40 | cc);
1046 emit_byte(0xC0 | encode);
1047 }
1050 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1051 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1052 prefix(src, dst);
1053 emit_byte(0x0F);
1054 emit_byte(0x40 | cc);
1055 emit_operand(dst, src);
1056 }
1058 void Assembler::cmpb(Address dst, int imm8) {
1059 InstructionMark im(this);
1060 prefix(dst);
1061 emit_byte(0x80);
1062 emit_operand(rdi, dst, 1);
1063 emit_byte(imm8);
1064 }
1066 void Assembler::cmpl(Address dst, int32_t imm32) {
1067 InstructionMark im(this);
1068 prefix(dst);
1069 emit_byte(0x81);
1070 emit_operand(rdi, dst, 4);
1071 emit_long(imm32);
1072 }
1074 void Assembler::cmpl(Register dst, int32_t imm32) {
1075 prefix(dst);
1076 emit_arith(0x81, 0xF8, dst, imm32);
1077 }
1079 void Assembler::cmpl(Register dst, Register src) {
1080 (void) prefix_and_encode(dst->encoding(), src->encoding());
1081 emit_arith(0x3B, 0xC0, dst, src);
1082 }
1085 void Assembler::cmpl(Register dst, Address src) {
1086 InstructionMark im(this);
1087 prefix(src, dst);
1088 emit_byte(0x3B);
1089 emit_operand(dst, src);
1090 }
1092 void Assembler::cmpw(Address dst, int imm16) {
1093 InstructionMark im(this);
1094 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1095 emit_byte(0x66);
1096 emit_byte(0x81);
1097 emit_operand(rdi, dst, 2);
1098 emit_word(imm16);
1099 }
1101 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1102 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1103 // The ZF is set if the compared values were equal, and cleared otherwise.
1104 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1105 if (Atomics & 2) {
1106 // caveat: no instructionmark, so this isn't relocatable.
1107 // Emit a synthetic, non-atomic, CAS equivalent.
1108 // Beware. The synthetic form sets all ICCs, not just ZF.
1109 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1110 cmpl(rax, adr);
1111 movl(rax, adr);
1112 if (reg != rax) {
1113 Label L ;
1114 jcc(Assembler::notEqual, L);
1115 movl(adr, reg);
1116 bind(L);
1117 }
1118 } else {
1119 InstructionMark im(this);
1120 prefix(adr, reg);
1121 emit_byte(0x0F);
1122 emit_byte(0xB1);
1123 emit_operand(reg, adr);
1124 }
1125 }
1127 void Assembler::comisd(XMMRegister dst, Address src) {
1128 // NOTE: dbx seems to decode this as comiss even though the
1129 // 0x66 is there. Strangly ucomisd comes out correct
1130 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1131 emit_byte(0x66);
1132 comiss(dst, src);
1133 }
1135 void Assembler::comiss(XMMRegister dst, Address src) {
1136 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1138 InstructionMark im(this);
1139 prefix(src, dst);
1140 emit_byte(0x0F);
1141 emit_byte(0x2F);
1142 emit_operand(dst, src);
1143 }
1145 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1146 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1147 emit_byte(0xF3);
1148 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1149 emit_byte(0x0F);
1150 emit_byte(0xE6);
1151 emit_byte(0xC0 | encode);
1152 }
1154 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1155 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1156 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1157 emit_byte(0x0F);
1158 emit_byte(0x5B);
1159 emit_byte(0xC0 | encode);
1160 }
1162 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1163 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1164 emit_byte(0xF2);
1165 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1166 emit_byte(0x0F);
1167 emit_byte(0x5A);
1168 emit_byte(0xC0 | encode);
1169 }
1171 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1172 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1173 emit_byte(0xF2);
1174 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1175 emit_byte(0x0F);
1176 emit_byte(0x2A);
1177 emit_byte(0xC0 | encode);
1178 }
1180 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1181 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1182 emit_byte(0xF3);
1183 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1184 emit_byte(0x0F);
1185 emit_byte(0x2A);
1186 emit_byte(0xC0 | encode);
1187 }
1189 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1190 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1191 emit_byte(0xF3);
1192 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1193 emit_byte(0x0F);
1194 emit_byte(0x5A);
1195 emit_byte(0xC0 | encode);
1196 }
1198 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1199 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1200 emit_byte(0xF2);
1201 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1202 emit_byte(0x0F);
1203 emit_byte(0x2C);
1204 emit_byte(0xC0 | encode);
1205 }
1207 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1208 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1209 emit_byte(0xF3);
1210 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1211 emit_byte(0x0F);
1212 emit_byte(0x2C);
1213 emit_byte(0xC0 | encode);
1214 }
1216 void Assembler::decl(Address dst) {
1217 // Don't use it directly. Use MacroAssembler::decrement() instead.
1218 InstructionMark im(this);
1219 prefix(dst);
1220 emit_byte(0xFF);
1221 emit_operand(rcx, dst);
1222 }
1224 void Assembler::divsd(XMMRegister dst, Address src) {
1225 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1226 InstructionMark im(this);
1227 emit_byte(0xF2);
1228 prefix(src, dst);
1229 emit_byte(0x0F);
1230 emit_byte(0x5E);
1231 emit_operand(dst, src);
1232 }
1234 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1235 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1236 emit_byte(0xF2);
1237 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1238 emit_byte(0x0F);
1239 emit_byte(0x5E);
1240 emit_byte(0xC0 | encode);
1241 }
1243 void Assembler::divss(XMMRegister dst, Address src) {
1244 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1245 InstructionMark im(this);
1246 emit_byte(0xF3);
1247 prefix(src, dst);
1248 emit_byte(0x0F);
1249 emit_byte(0x5E);
1250 emit_operand(dst, src);
1251 }
1253 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1254 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1255 emit_byte(0xF3);
1256 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1257 emit_byte(0x0F);
1258 emit_byte(0x5E);
1259 emit_byte(0xC0 | encode);
1260 }
1262 void Assembler::emms() {
1263 NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1264 emit_byte(0x0F);
1265 emit_byte(0x77);
1266 }
1268 void Assembler::hlt() {
1269 emit_byte(0xF4);
1270 }
1272 void Assembler::idivl(Register src) {
1273 int encode = prefix_and_encode(src->encoding());
1274 emit_byte(0xF7);
1275 emit_byte(0xF8 | encode);
1276 }
1278 void Assembler::imull(Register dst, Register src) {
1279 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1280 emit_byte(0x0F);
1281 emit_byte(0xAF);
1282 emit_byte(0xC0 | encode);
1283 }
1286 void Assembler::imull(Register dst, Register src, int value) {
1287 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1288 if (is8bit(value)) {
1289 emit_byte(0x6B);
1290 emit_byte(0xC0 | encode);
1291 emit_byte(value);
1292 } else {
1293 emit_byte(0x69);
1294 emit_byte(0xC0 | encode);
1295 emit_long(value);
1296 }
1297 }
1299 void Assembler::incl(Address dst) {
1300 // Don't use it directly. Use MacroAssembler::increment() instead.
1301 InstructionMark im(this);
1302 prefix(dst);
1303 emit_byte(0xFF);
1304 emit_operand(rax, dst);
1305 }
1307 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1308 InstructionMark im(this);
1309 relocate(rtype);
1310 assert((0 <= cc) && (cc < 16), "illegal cc");
1311 if (L.is_bound()) {
1312 address dst = target(L);
1313 assert(dst != NULL, "jcc most probably wrong");
1315 const int short_size = 2;
1316 const int long_size = 6;
1317 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1318 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1319 // 0111 tttn #8-bit disp
1320 emit_byte(0x70 | cc);
1321 emit_byte((offs - short_size) & 0xFF);
1322 } else {
1323 // 0000 1111 1000 tttn #32-bit disp
1324 assert(is_simm32(offs - long_size),
1325 "must be 32bit offset (call4)");
1326 emit_byte(0x0F);
1327 emit_byte(0x80 | cc);
1328 emit_long(offs - long_size);
1329 }
1330 } else {
1331 // Note: could eliminate cond. jumps to this jump if condition
1332 // is the same however, seems to be rather unlikely case.
1333 // Note: use jccb() if label to be bound is very close to get
1334 // an 8-bit displacement
1335 L.add_patch_at(code(), locator());
1336 emit_byte(0x0F);
1337 emit_byte(0x80 | cc);
1338 emit_long(0);
1339 }
1340 }
1342 void Assembler::jccb(Condition cc, Label& L) {
1343 if (L.is_bound()) {
1344 const int short_size = 2;
1345 address entry = target(L);
1346 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1347 "Dispacement too large for a short jmp");
1348 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1349 // 0111 tttn #8-bit disp
1350 emit_byte(0x70 | cc);
1351 emit_byte((offs - short_size) & 0xFF);
1352 } else {
1353 InstructionMark im(this);
1354 L.add_patch_at(code(), locator());
1355 emit_byte(0x70 | cc);
1356 emit_byte(0);
1357 }
1358 }
1360 void Assembler::jmp(Address adr) {
1361 InstructionMark im(this);
1362 prefix(adr);
1363 emit_byte(0xFF);
1364 emit_operand(rsp, adr);
1365 }
1367 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1368 if (L.is_bound()) {
1369 address entry = target(L);
1370 assert(entry != NULL, "jmp most probably wrong");
1371 InstructionMark im(this);
1372 const int short_size = 2;
1373 const int long_size = 5;
1374 intptr_t offs = entry - _code_pos;
1375 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1376 emit_byte(0xEB);
1377 emit_byte((offs - short_size) & 0xFF);
1378 } else {
1379 emit_byte(0xE9);
1380 emit_long(offs - long_size);
1381 }
1382 } else {
1383 // By default, forward jumps are always 32-bit displacements, since
1384 // we can't yet know where the label will be bound. If you're sure that
1385 // the forward jump will not run beyond 256 bytes, use jmpb to
1386 // force an 8-bit displacement.
1387 InstructionMark im(this);
1388 relocate(rtype);
1389 L.add_patch_at(code(), locator());
1390 emit_byte(0xE9);
1391 emit_long(0);
1392 }
1393 }
1395 void Assembler::jmp(Register entry) {
1396 int encode = prefix_and_encode(entry->encoding());
1397 emit_byte(0xFF);
1398 emit_byte(0xE0 | encode);
1399 }
1401 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1402 InstructionMark im(this);
1403 emit_byte(0xE9);
1404 assert(dest != NULL, "must have a target");
1405 intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1406 assert(is_simm32(disp), "must be 32bit offset (jmp)");
1407 emit_data(disp, rspec.reloc(), call32_operand);
1408 }
1410 void Assembler::jmpb(Label& L) {
1411 if (L.is_bound()) {
1412 const int short_size = 2;
1413 address entry = target(L);
1414 assert(is8bit((entry - _code_pos) + short_size),
1415 "Dispacement too large for a short jmp");
1416 assert(entry != NULL, "jmp most probably wrong");
1417 intptr_t offs = entry - _code_pos;
1418 emit_byte(0xEB);
1419 emit_byte((offs - short_size) & 0xFF);
1420 } else {
1421 InstructionMark im(this);
1422 L.add_patch_at(code(), locator());
1423 emit_byte(0xEB);
1424 emit_byte(0);
1425 }
1426 }
1428 void Assembler::ldmxcsr( Address src) {
1429 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1430 InstructionMark im(this);
1431 prefix(src);
1432 emit_byte(0x0F);
1433 emit_byte(0xAE);
1434 emit_operand(as_Register(2), src);
1435 }
1437 void Assembler::leal(Register dst, Address src) {
1438 InstructionMark im(this);
1439 #ifdef _LP64
1440 emit_byte(0x67); // addr32
1441 prefix(src, dst);
1442 #endif // LP64
1443 emit_byte(0x8D);
1444 emit_operand(dst, src);
1445 }
1447 void Assembler::lock() {
1448 if (Atomics & 1) {
1449 // Emit either nothing, a NOP, or a NOP: prefix
1450 emit_byte(0x90) ;
1451 } else {
1452 emit_byte(0xF0);
1453 }
1454 }
1456 void Assembler::lzcntl(Register dst, Register src) {
1457 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1458 emit_byte(0xF3);
1459 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1460 emit_byte(0x0F);
1461 emit_byte(0xBD);
1462 emit_byte(0xC0 | encode);
1463 }
1465 // Emit mfence instruction
1466 void Assembler::mfence() {
1467 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1468 emit_byte( 0x0F );
1469 emit_byte( 0xAE );
1470 emit_byte( 0xF0 );
1471 }
1473 void Assembler::mov(Register dst, Register src) {
1474 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1475 }
1477 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1478 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1479 int dstenc = dst->encoding();
1480 int srcenc = src->encoding();
1481 emit_byte(0x66);
1482 if (dstenc < 8) {
1483 if (srcenc >= 8) {
1484 prefix(REX_B);
1485 srcenc -= 8;
1486 }
1487 } else {
1488 if (srcenc < 8) {
1489 prefix(REX_R);
1490 } else {
1491 prefix(REX_RB);
1492 srcenc -= 8;
1493 }
1494 dstenc -= 8;
1495 }
1496 emit_byte(0x0F);
1497 emit_byte(0x28);
1498 emit_byte(0xC0 | dstenc << 3 | srcenc);
1499 }
1501 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1502 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1503 int dstenc = dst->encoding();
1504 int srcenc = src->encoding();
1505 if (dstenc < 8) {
1506 if (srcenc >= 8) {
1507 prefix(REX_B);
1508 srcenc -= 8;
1509 }
1510 } else {
1511 if (srcenc < 8) {
1512 prefix(REX_R);
1513 } else {
1514 prefix(REX_RB);
1515 srcenc -= 8;
1516 }
1517 dstenc -= 8;
1518 }
1519 emit_byte(0x0F);
1520 emit_byte(0x28);
1521 emit_byte(0xC0 | dstenc << 3 | srcenc);
1522 }
1524 void Assembler::movb(Register dst, Address src) {
1525 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1526 InstructionMark im(this);
1527 prefix(src, dst, true);
1528 emit_byte(0x8A);
1529 emit_operand(dst, src);
1530 }
1533 void Assembler::movb(Address dst, int imm8) {
1534 InstructionMark im(this);
1535 prefix(dst);
1536 emit_byte(0xC6);
1537 emit_operand(rax, dst, 1);
1538 emit_byte(imm8);
1539 }
1542 void Assembler::movb(Address dst, Register src) {
1543 assert(src->has_byte_register(), "must have byte register");
1544 InstructionMark im(this);
1545 prefix(dst, src, true);
1546 emit_byte(0x88);
1547 emit_operand(src, dst);
1548 }
1550 void Assembler::movdl(XMMRegister dst, Register src) {
1551 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1552 emit_byte(0x66);
1553 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1554 emit_byte(0x0F);
1555 emit_byte(0x6E);
1556 emit_byte(0xC0 | encode);
1557 }
1559 void Assembler::movdl(Register dst, XMMRegister src) {
1560 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1561 emit_byte(0x66);
1562 // swap src/dst to get correct prefix
1563 int encode = prefix_and_encode(src->encoding(), dst->encoding());
1564 emit_byte(0x0F);
1565 emit_byte(0x7E);
1566 emit_byte(0xC0 | encode);
1567 }
1569 void Assembler::movdqa(XMMRegister dst, Address src) {
1570 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1571 InstructionMark im(this);
1572 emit_byte(0x66);
1573 prefix(src, dst);
1574 emit_byte(0x0F);
1575 emit_byte(0x6F);
1576 emit_operand(dst, src);
1577 }
1579 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1580 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1581 emit_byte(0x66);
1582 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1583 emit_byte(0x0F);
1584 emit_byte(0x6F);
1585 emit_byte(0xC0 | encode);
1586 }
1588 void Assembler::movdqa(Address dst, XMMRegister src) {
1589 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1590 InstructionMark im(this);
1591 emit_byte(0x66);
1592 prefix(dst, src);
1593 emit_byte(0x0F);
1594 emit_byte(0x7F);
1595 emit_operand(src, dst);
1596 }
1598 void Assembler::movdqu(XMMRegister dst, Address src) {
1599 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1600 InstructionMark im(this);
1601 emit_byte(0xF3);
1602 prefix(src, dst);
1603 emit_byte(0x0F);
1604 emit_byte(0x6F);
1605 emit_operand(dst, src);
1606 }
1608 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1609 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1610 emit_byte(0xF3);
1611 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1612 emit_byte(0x0F);
1613 emit_byte(0x6F);
1614 emit_byte(0xC0 | encode);
1615 }
1617 void Assembler::movdqu(Address dst, XMMRegister src) {
1618 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1619 InstructionMark im(this);
1620 emit_byte(0xF3);
1621 prefix(dst, src);
1622 emit_byte(0x0F);
1623 emit_byte(0x7F);
1624 emit_operand(src, dst);
1625 }
1627 // Uses zero extension on 64bit
1629 void Assembler::movl(Register dst, int32_t imm32) {
1630 int encode = prefix_and_encode(dst->encoding());
1631 emit_byte(0xB8 | encode);
1632 emit_long(imm32);
1633 }
1635 void Assembler::movl(Register dst, Register src) {
1636 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1637 emit_byte(0x8B);
1638 emit_byte(0xC0 | encode);
1639 }
1641 void Assembler::movl(Register dst, Address src) {
1642 InstructionMark im(this);
1643 prefix(src, dst);
1644 emit_byte(0x8B);
1645 emit_operand(dst, src);
1646 }
1648 void Assembler::movl(Address dst, int32_t imm32) {
1649 InstructionMark im(this);
1650 prefix(dst);
1651 emit_byte(0xC7);
1652 emit_operand(rax, dst, 4);
1653 emit_long(imm32);
1654 }
1656 void Assembler::movl(Address dst, Register src) {
1657 InstructionMark im(this);
1658 prefix(dst, src);
1659 emit_byte(0x89);
1660 emit_operand(src, dst);
1661 }
1663 // New cpus require to use movsd and movss to avoid partial register stall
1664 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1665 // The selection is done in MacroAssembler::movdbl() and movflt().
1666 void Assembler::movlpd(XMMRegister dst, Address src) {
1667 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1668 InstructionMark im(this);
1669 emit_byte(0x66);
1670 prefix(src, dst);
1671 emit_byte(0x0F);
1672 emit_byte(0x12);
1673 emit_operand(dst, src);
1674 }
1676 void Assembler::movq( MMXRegister dst, Address src ) {
1677 assert( VM_Version::supports_mmx(), "" );
1678 emit_byte(0x0F);
1679 emit_byte(0x6F);
1680 emit_operand(dst, src);
1681 }
1683 void Assembler::movq( Address dst, MMXRegister src ) {
1684 assert( VM_Version::supports_mmx(), "" );
1685 emit_byte(0x0F);
1686 emit_byte(0x7F);
1687 // workaround gcc (3.2.1-7a) bug
1688 // In that version of gcc with only an emit_operand(MMX, Address)
1689 // gcc will tail jump and try and reverse the parameters completely
1690 // obliterating dst in the process. By having a version available
1691 // that doesn't need to swap the args at the tail jump the bug is
1692 // avoided.
1693 emit_operand(dst, src);
1694 }
1696 void Assembler::movq(XMMRegister dst, Address src) {
1697 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1698 InstructionMark im(this);
1699 emit_byte(0xF3);
1700 prefix(src, dst);
1701 emit_byte(0x0F);
1702 emit_byte(0x7E);
1703 emit_operand(dst, src);
1704 }
1706 void Assembler::movq(Address dst, XMMRegister src) {
1707 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1708 InstructionMark im(this);
1709 emit_byte(0x66);
1710 prefix(dst, src);
1711 emit_byte(0x0F);
1712 emit_byte(0xD6);
1713 emit_operand(src, dst);
1714 }
1716 void Assembler::movsbl(Register dst, Address src) { // movsxb
1717 InstructionMark im(this);
1718 prefix(src, dst);
1719 emit_byte(0x0F);
1720 emit_byte(0xBE);
1721 emit_operand(dst, src);
1722 }
1724 void Assembler::movsbl(Register dst, Register src) { // movsxb
1725 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1726 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1727 emit_byte(0x0F);
1728 emit_byte(0xBE);
1729 emit_byte(0xC0 | encode);
1730 }
1732 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1733 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1734 emit_byte(0xF2);
1735 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1736 emit_byte(0x0F);
1737 emit_byte(0x10);
1738 emit_byte(0xC0 | encode);
1739 }
1741 void Assembler::movsd(XMMRegister dst, Address src) {
1742 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1743 InstructionMark im(this);
1744 emit_byte(0xF2);
1745 prefix(src, dst);
1746 emit_byte(0x0F);
1747 emit_byte(0x10);
1748 emit_operand(dst, src);
1749 }
1751 void Assembler::movsd(Address dst, XMMRegister src) {
1752 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1753 InstructionMark im(this);
1754 emit_byte(0xF2);
1755 prefix(dst, src);
1756 emit_byte(0x0F);
1757 emit_byte(0x11);
1758 emit_operand(src, dst);
1759 }
1761 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1762 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1763 emit_byte(0xF3);
1764 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1765 emit_byte(0x0F);
1766 emit_byte(0x10);
1767 emit_byte(0xC0 | encode);
1768 }
1770 void Assembler::movss(XMMRegister dst, Address src) {
1771 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1772 InstructionMark im(this);
1773 emit_byte(0xF3);
1774 prefix(src, dst);
1775 emit_byte(0x0F);
1776 emit_byte(0x10);
1777 emit_operand(dst, src);
1778 }
1780 void Assembler::movss(Address dst, XMMRegister src) {
1781 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1782 InstructionMark im(this);
1783 emit_byte(0xF3);
1784 prefix(dst, src);
1785 emit_byte(0x0F);
1786 emit_byte(0x11);
1787 emit_operand(src, dst);
1788 }
1790 void Assembler::movswl(Register dst, Address src) { // movsxw
1791 InstructionMark im(this);
1792 prefix(src, dst);
1793 emit_byte(0x0F);
1794 emit_byte(0xBF);
1795 emit_operand(dst, src);
1796 }
1798 void Assembler::movswl(Register dst, Register src) { // movsxw
1799 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1800 emit_byte(0x0F);
1801 emit_byte(0xBF);
1802 emit_byte(0xC0 | encode);
1803 }
1805 void Assembler::movw(Address dst, int imm16) {
1806 InstructionMark im(this);
1808 emit_byte(0x66); // switch to 16-bit mode
1809 prefix(dst);
1810 emit_byte(0xC7);
1811 emit_operand(rax, dst, 2);
1812 emit_word(imm16);
1813 }
1815 void Assembler::movw(Register dst, Address src) {
1816 InstructionMark im(this);
1817 emit_byte(0x66);
1818 prefix(src, dst);
1819 emit_byte(0x8B);
1820 emit_operand(dst, src);
1821 }
1823 void Assembler::movw(Address dst, Register src) {
1824 InstructionMark im(this);
1825 emit_byte(0x66);
1826 prefix(dst, src);
1827 emit_byte(0x89);
1828 emit_operand(src, dst);
1829 }
1831 void Assembler::movzbl(Register dst, Address src) { // movzxb
1832 InstructionMark im(this);
1833 prefix(src, dst);
1834 emit_byte(0x0F);
1835 emit_byte(0xB6);
1836 emit_operand(dst, src);
1837 }
1839 void Assembler::movzbl(Register dst, Register src) { // movzxb
1840 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1841 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1842 emit_byte(0x0F);
1843 emit_byte(0xB6);
1844 emit_byte(0xC0 | encode);
1845 }
1847 void Assembler::movzwl(Register dst, Address src) { // movzxw
1848 InstructionMark im(this);
1849 prefix(src, dst);
1850 emit_byte(0x0F);
1851 emit_byte(0xB7);
1852 emit_operand(dst, src);
1853 }
1855 void Assembler::movzwl(Register dst, Register src) { // movzxw
1856 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1857 emit_byte(0x0F);
1858 emit_byte(0xB7);
1859 emit_byte(0xC0 | encode);
1860 }
1862 void Assembler::mull(Address src) {
1863 InstructionMark im(this);
1864 prefix(src);
1865 emit_byte(0xF7);
1866 emit_operand(rsp, src);
1867 }
1869 void Assembler::mull(Register src) {
1870 int encode = prefix_and_encode(src->encoding());
1871 emit_byte(0xF7);
1872 emit_byte(0xE0 | encode);
1873 }
1875 void Assembler::mulsd(XMMRegister dst, Address src) {
1876 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1877 InstructionMark im(this);
1878 emit_byte(0xF2);
1879 prefix(src, dst);
1880 emit_byte(0x0F);
1881 emit_byte(0x59);
1882 emit_operand(dst, src);
1883 }
1885 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1886 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1887 emit_byte(0xF2);
1888 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1889 emit_byte(0x0F);
1890 emit_byte(0x59);
1891 emit_byte(0xC0 | encode);
1892 }
1894 void Assembler::mulss(XMMRegister dst, Address src) {
1895 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1896 InstructionMark im(this);
1897 emit_byte(0xF3);
1898 prefix(src, dst);
1899 emit_byte(0x0F);
1900 emit_byte(0x59);
1901 emit_operand(dst, src);
1902 }
1904 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1905 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1906 emit_byte(0xF3);
1907 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1908 emit_byte(0x0F);
1909 emit_byte(0x59);
1910 emit_byte(0xC0 | encode);
1911 }
1913 void Assembler::negl(Register dst) {
1914 int encode = prefix_and_encode(dst->encoding());
1915 emit_byte(0xF7);
1916 emit_byte(0xD8 | encode);
1917 }
1919 void Assembler::nop(int i) {
1920 #ifdef ASSERT
1921 assert(i > 0, " ");
1922 // The fancy nops aren't currently recognized by debuggers making it a
1923 // pain to disassemble code while debugging. If asserts are on clearly
1924 // speed is not an issue so simply use the single byte traditional nop
1925 // to do alignment.
1927 for (; i > 0 ; i--) emit_byte(0x90);
1928 return;
1930 #endif // ASSERT
1932 if (UseAddressNop && VM_Version::is_intel()) {
1933 //
1934 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1935 // 1: 0x90
1936 // 2: 0x66 0x90
1937 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1938 // 4: 0x0F 0x1F 0x40 0x00
1939 // 5: 0x0F 0x1F 0x44 0x00 0x00
1940 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1941 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1942 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1943 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1944 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1945 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1947 // The rest coding is Intel specific - don't use consecutive address nops
1949 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1950 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1951 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1952 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1954 while(i >= 15) {
1955 // For Intel don't generate consecutive addess nops (mix with regular nops)
1956 i -= 15;
1957 emit_byte(0x66); // size prefix
1958 emit_byte(0x66); // size prefix
1959 emit_byte(0x66); // size prefix
1960 addr_nop_8();
1961 emit_byte(0x66); // size prefix
1962 emit_byte(0x66); // size prefix
1963 emit_byte(0x66); // size prefix
1964 emit_byte(0x90); // nop
1965 }
1966 switch (i) {
1967 case 14:
1968 emit_byte(0x66); // size prefix
1969 case 13:
1970 emit_byte(0x66); // size prefix
1971 case 12:
1972 addr_nop_8();
1973 emit_byte(0x66); // size prefix
1974 emit_byte(0x66); // size prefix
1975 emit_byte(0x66); // size prefix
1976 emit_byte(0x90); // nop
1977 break;
1978 case 11:
1979 emit_byte(0x66); // size prefix
1980 case 10:
1981 emit_byte(0x66); // size prefix
1982 case 9:
1983 emit_byte(0x66); // size prefix
1984 case 8:
1985 addr_nop_8();
1986 break;
1987 case 7:
1988 addr_nop_7();
1989 break;
1990 case 6:
1991 emit_byte(0x66); // size prefix
1992 case 5:
1993 addr_nop_5();
1994 break;
1995 case 4:
1996 addr_nop_4();
1997 break;
1998 case 3:
1999 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2000 emit_byte(0x66); // size prefix
2001 case 2:
2002 emit_byte(0x66); // size prefix
2003 case 1:
2004 emit_byte(0x90); // nop
2005 break;
2006 default:
2007 assert(i == 0, " ");
2008 }
2009 return;
2010 }
2011 if (UseAddressNop && VM_Version::is_amd()) {
2012 //
2013 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2014 // 1: 0x90
2015 // 2: 0x66 0x90
2016 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2017 // 4: 0x0F 0x1F 0x40 0x00
2018 // 5: 0x0F 0x1F 0x44 0x00 0x00
2019 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2020 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2021 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2022 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2023 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2024 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2026 // The rest coding is AMD specific - use consecutive address nops
2028 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2029 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2030 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2031 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2032 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2033 // Size prefixes (0x66) are added for larger sizes
2035 while(i >= 22) {
2036 i -= 11;
2037 emit_byte(0x66); // size prefix
2038 emit_byte(0x66); // size prefix
2039 emit_byte(0x66); // size prefix
2040 addr_nop_8();
2041 }
2042 // Generate first nop for size between 21-12
2043 switch (i) {
2044 case 21:
2045 i -= 1;
2046 emit_byte(0x66); // size prefix
2047 case 20:
2048 case 19:
2049 i -= 1;
2050 emit_byte(0x66); // size prefix
2051 case 18:
2052 case 17:
2053 i -= 1;
2054 emit_byte(0x66); // size prefix
2055 case 16:
2056 case 15:
2057 i -= 8;
2058 addr_nop_8();
2059 break;
2060 case 14:
2061 case 13:
2062 i -= 7;
2063 addr_nop_7();
2064 break;
2065 case 12:
2066 i -= 6;
2067 emit_byte(0x66); // size prefix
2068 addr_nop_5();
2069 break;
2070 default:
2071 assert(i < 12, " ");
2072 }
2074 // Generate second nop for size between 11-1
2075 switch (i) {
2076 case 11:
2077 emit_byte(0x66); // size prefix
2078 case 10:
2079 emit_byte(0x66); // size prefix
2080 case 9:
2081 emit_byte(0x66); // size prefix
2082 case 8:
2083 addr_nop_8();
2084 break;
2085 case 7:
2086 addr_nop_7();
2087 break;
2088 case 6:
2089 emit_byte(0x66); // size prefix
2090 case 5:
2091 addr_nop_5();
2092 break;
2093 case 4:
2094 addr_nop_4();
2095 break;
2096 case 3:
2097 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2098 emit_byte(0x66); // size prefix
2099 case 2:
2100 emit_byte(0x66); // size prefix
2101 case 1:
2102 emit_byte(0x90); // nop
2103 break;
2104 default:
2105 assert(i == 0, " ");
2106 }
2107 return;
2108 }
2110 // Using nops with size prefixes "0x66 0x90".
2111 // From AMD Optimization Guide:
2112 // 1: 0x90
2113 // 2: 0x66 0x90
2114 // 3: 0x66 0x66 0x90
2115 // 4: 0x66 0x66 0x66 0x90
2116 // 5: 0x66 0x66 0x90 0x66 0x90
2117 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2118 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2119 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2120 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2121 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2122 //
2123 while(i > 12) {
2124 i -= 4;
2125 emit_byte(0x66); // size prefix
2126 emit_byte(0x66);
2127 emit_byte(0x66);
2128 emit_byte(0x90); // nop
2129 }
2130 // 1 - 12 nops
2131 if(i > 8) {
2132 if(i > 9) {
2133 i -= 1;
2134 emit_byte(0x66);
2135 }
2136 i -= 3;
2137 emit_byte(0x66);
2138 emit_byte(0x66);
2139 emit_byte(0x90);
2140 }
2141 // 1 - 8 nops
2142 if(i > 4) {
2143 if(i > 6) {
2144 i -= 1;
2145 emit_byte(0x66);
2146 }
2147 i -= 3;
2148 emit_byte(0x66);
2149 emit_byte(0x66);
2150 emit_byte(0x90);
2151 }
2152 switch (i) {
2153 case 4:
2154 emit_byte(0x66);
2155 case 3:
2156 emit_byte(0x66);
2157 case 2:
2158 emit_byte(0x66);
2159 case 1:
2160 emit_byte(0x90);
2161 break;
2162 default:
2163 assert(i == 0, " ");
2164 }
2165 }
2167 void Assembler::notl(Register dst) {
2168 int encode = prefix_and_encode(dst->encoding());
2169 emit_byte(0xF7);
2170 emit_byte(0xD0 | encode );
2171 }
2173 void Assembler::orl(Address dst, int32_t imm32) {
2174 InstructionMark im(this);
2175 prefix(dst);
2176 emit_byte(0x81);
2177 emit_operand(rcx, dst, 4);
2178 emit_long(imm32);
2179 }
2181 void Assembler::orl(Register dst, int32_t imm32) {
2182 prefix(dst);
2183 emit_arith(0x81, 0xC8, dst, imm32);
2184 }
2187 void Assembler::orl(Register dst, Address src) {
2188 InstructionMark im(this);
2189 prefix(src, dst);
2190 emit_byte(0x0B);
2191 emit_operand(dst, src);
2192 }
2195 void Assembler::orl(Register dst, Register src) {
2196 (void) prefix_and_encode(dst->encoding(), src->encoding());
2197 emit_arith(0x0B, 0xC0, dst, src);
2198 }
2200 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2201 assert(VM_Version::supports_sse4_2(), "");
2203 InstructionMark im(this);
2204 emit_byte(0x66);
2205 prefix(src, dst);
2206 emit_byte(0x0F);
2207 emit_byte(0x3A);
2208 emit_byte(0x61);
2209 emit_operand(dst, src);
2210 emit_byte(imm8);
2211 }
2213 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2214 assert(VM_Version::supports_sse4_2(), "");
2216 emit_byte(0x66);
2217 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2218 emit_byte(0x0F);
2219 emit_byte(0x3A);
2220 emit_byte(0x61);
2221 emit_byte(0xC0 | encode);
2222 emit_byte(imm8);
2223 }
2225 // generic
2226 void Assembler::pop(Register dst) {
2227 int encode = prefix_and_encode(dst->encoding());
2228 emit_byte(0x58 | encode);
2229 }
2231 void Assembler::popcntl(Register dst, Address src) {
2232 assert(VM_Version::supports_popcnt(), "must support");
2233 InstructionMark im(this);
2234 emit_byte(0xF3);
2235 prefix(src, dst);
2236 emit_byte(0x0F);
2237 emit_byte(0xB8);
2238 emit_operand(dst, src);
2239 }
2241 void Assembler::popcntl(Register dst, Register src) {
2242 assert(VM_Version::supports_popcnt(), "must support");
2243 emit_byte(0xF3);
2244 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2245 emit_byte(0x0F);
2246 emit_byte(0xB8);
2247 emit_byte(0xC0 | encode);
2248 }
2250 void Assembler::popf() {
2251 emit_byte(0x9D);
2252 }
2254 void Assembler::popl(Address dst) {
2255 // NOTE: this will adjust stack by 8byte on 64bits
2256 InstructionMark im(this);
2257 prefix(dst);
2258 emit_byte(0x8F);
2259 emit_operand(rax, dst);
2260 }
2262 void Assembler::prefetch_prefix(Address src) {
2263 prefix(src);
2264 emit_byte(0x0F);
2265 }
2267 void Assembler::prefetchnta(Address src) {
2268 NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2269 InstructionMark im(this);
2270 prefetch_prefix(src);
2271 emit_byte(0x18);
2272 emit_operand(rax, src); // 0, src
2273 }
2275 void Assembler::prefetchr(Address src) {
2276 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2277 InstructionMark im(this);
2278 prefetch_prefix(src);
2279 emit_byte(0x0D);
2280 emit_operand(rax, src); // 0, src
2281 }
2283 void Assembler::prefetcht0(Address src) {
2284 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2285 InstructionMark im(this);
2286 prefetch_prefix(src);
2287 emit_byte(0x18);
2288 emit_operand(rcx, src); // 1, src
2289 }
2291 void Assembler::prefetcht1(Address src) {
2292 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2293 InstructionMark im(this);
2294 prefetch_prefix(src);
2295 emit_byte(0x18);
2296 emit_operand(rdx, src); // 2, src
2297 }
2299 void Assembler::prefetcht2(Address src) {
2300 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2301 InstructionMark im(this);
2302 prefetch_prefix(src);
2303 emit_byte(0x18);
2304 emit_operand(rbx, src); // 3, src
2305 }
2307 void Assembler::prefetchw(Address src) {
2308 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2309 InstructionMark im(this);
2310 prefetch_prefix(src);
2311 emit_byte(0x0D);
2312 emit_operand(rcx, src); // 1, src
2313 }
2315 void Assembler::prefix(Prefix p) {
2316 a_byte(p);
2317 }
2319 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2320 assert(isByte(mode), "invalid value");
2321 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2323 emit_byte(0x66);
2324 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2325 emit_byte(0x0F);
2326 emit_byte(0x70);
2327 emit_byte(0xC0 | encode);
2328 emit_byte(mode & 0xFF);
2330 }
2332 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2333 assert(isByte(mode), "invalid value");
2334 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2336 InstructionMark im(this);
2337 emit_byte(0x66);
2338 prefix(src, dst);
2339 emit_byte(0x0F);
2340 emit_byte(0x70);
2341 emit_operand(dst, src);
2342 emit_byte(mode & 0xFF);
2343 }
2345 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2346 assert(isByte(mode), "invalid value");
2347 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2349 emit_byte(0xF2);
2350 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2351 emit_byte(0x0F);
2352 emit_byte(0x70);
2353 emit_byte(0xC0 | encode);
2354 emit_byte(mode & 0xFF);
2355 }
2357 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2358 assert(isByte(mode), "invalid value");
2359 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2361 InstructionMark im(this);
2362 emit_byte(0xF2);
2363 prefix(src, dst); // QQ new
2364 emit_byte(0x0F);
2365 emit_byte(0x70);
2366 emit_operand(dst, src);
2367 emit_byte(mode & 0xFF);
2368 }
2370 void Assembler::psrlq(XMMRegister dst, int shift) {
2371 // HMM Table D-1 says sse2 or mmx
2372 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2374 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2375 emit_byte(0x66);
2376 emit_byte(0x0F);
2377 emit_byte(0x73);
2378 emit_byte(0xC0 | encode);
2379 emit_byte(shift);
2380 }
2382 void Assembler::ptest(XMMRegister dst, Address src) {
2383 assert(VM_Version::supports_sse4_1(), "");
2385 InstructionMark im(this);
2386 emit_byte(0x66);
2387 prefix(src, dst);
2388 emit_byte(0x0F);
2389 emit_byte(0x38);
2390 emit_byte(0x17);
2391 emit_operand(dst, src);
2392 }
2394 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2395 assert(VM_Version::supports_sse4_1(), "");
2397 emit_byte(0x66);
2398 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2399 emit_byte(0x0F);
2400 emit_byte(0x38);
2401 emit_byte(0x17);
2402 emit_byte(0xC0 | encode);
2403 }
2405 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2406 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2407 emit_byte(0x66);
2408 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2409 emit_byte(0x0F);
2410 emit_byte(0x60);
2411 emit_byte(0xC0 | encode);
2412 }
2414 void Assembler::push(int32_t imm32) {
2415 // in 64bits we push 64bits onto the stack but only
2416 // take a 32bit immediate
2417 emit_byte(0x68);
2418 emit_long(imm32);
2419 }
2421 void Assembler::push(Register src) {
2422 int encode = prefix_and_encode(src->encoding());
2424 emit_byte(0x50 | encode);
2425 }
2427 void Assembler::pushf() {
2428 emit_byte(0x9C);
2429 }
2431 void Assembler::pushl(Address src) {
2432 // Note this will push 64bit on 64bit
2433 InstructionMark im(this);
2434 prefix(src);
2435 emit_byte(0xFF);
2436 emit_operand(rsi, src);
2437 }
2439 void Assembler::pxor(XMMRegister dst, Address src) {
2440 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2441 InstructionMark im(this);
2442 emit_byte(0x66);
2443 prefix(src, dst);
2444 emit_byte(0x0F);
2445 emit_byte(0xEF);
2446 emit_operand(dst, src);
2447 }
2449 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2450 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2451 InstructionMark im(this);
2452 emit_byte(0x66);
2453 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2454 emit_byte(0x0F);
2455 emit_byte(0xEF);
2456 emit_byte(0xC0 | encode);
2457 }
2459 void Assembler::rcll(Register dst, int imm8) {
2460 assert(isShiftCount(imm8), "illegal shift count");
2461 int encode = prefix_and_encode(dst->encoding());
2462 if (imm8 == 1) {
2463 emit_byte(0xD1);
2464 emit_byte(0xD0 | encode);
2465 } else {
2466 emit_byte(0xC1);
2467 emit_byte(0xD0 | encode);
2468 emit_byte(imm8);
2469 }
2470 }
2472 // copies data from [esi] to [edi] using rcx pointer sized words
2473 // generic
2474 void Assembler::rep_mov() {
2475 emit_byte(0xF3);
2476 // MOVSQ
2477 LP64_ONLY(prefix(REX_W));
2478 emit_byte(0xA5);
2479 }
2481 // sets rcx pointer sized words with rax, value at [edi]
2482 // generic
2483 void Assembler::rep_set() { // rep_set
2484 emit_byte(0xF3);
2485 // STOSQ
2486 LP64_ONLY(prefix(REX_W));
2487 emit_byte(0xAB);
2488 }
2490 // scans rcx pointer sized words at [edi] for occurance of rax,
2491 // generic
2492 void Assembler::repne_scan() { // repne_scan
2493 emit_byte(0xF2);
2494 // SCASQ
2495 LP64_ONLY(prefix(REX_W));
2496 emit_byte(0xAF);
2497 }
2499 #ifdef _LP64
2500 // scans rcx 4 byte words at [edi] for occurance of rax,
2501 // generic
2502 void Assembler::repne_scanl() { // repne_scan
2503 emit_byte(0xF2);
2504 // SCASL
2505 emit_byte(0xAF);
2506 }
2507 #endif
2509 void Assembler::ret(int imm16) {
2510 if (imm16 == 0) {
2511 emit_byte(0xC3);
2512 } else {
2513 emit_byte(0xC2);
2514 emit_word(imm16);
2515 }
2516 }
2518 void Assembler::sahf() {
2519 #ifdef _LP64
2520 // Not supported in 64bit mode
2521 ShouldNotReachHere();
2522 #endif
2523 emit_byte(0x9E);
2524 }
2526 void Assembler::sarl(Register dst, int imm8) {
2527 int encode = prefix_and_encode(dst->encoding());
2528 assert(isShiftCount(imm8), "illegal shift count");
2529 if (imm8 == 1) {
2530 emit_byte(0xD1);
2531 emit_byte(0xF8 | encode);
2532 } else {
2533 emit_byte(0xC1);
2534 emit_byte(0xF8 | encode);
2535 emit_byte(imm8);
2536 }
2537 }
2539 void Assembler::sarl(Register dst) {
2540 int encode = prefix_and_encode(dst->encoding());
2541 emit_byte(0xD3);
2542 emit_byte(0xF8 | encode);
2543 }
2545 void Assembler::sbbl(Address dst, int32_t imm32) {
2546 InstructionMark im(this);
2547 prefix(dst);
2548 emit_arith_operand(0x81, rbx, dst, imm32);
2549 }
2551 void Assembler::sbbl(Register dst, int32_t imm32) {
2552 prefix(dst);
2553 emit_arith(0x81, 0xD8, dst, imm32);
2554 }
2557 void Assembler::sbbl(Register dst, Address src) {
2558 InstructionMark im(this);
2559 prefix(src, dst);
2560 emit_byte(0x1B);
2561 emit_operand(dst, src);
2562 }
2564 void Assembler::sbbl(Register dst, Register src) {
2565 (void) prefix_and_encode(dst->encoding(), src->encoding());
2566 emit_arith(0x1B, 0xC0, dst, src);
2567 }
2569 void Assembler::setb(Condition cc, Register dst) {
2570 assert(0 <= cc && cc < 16, "illegal cc");
2571 int encode = prefix_and_encode(dst->encoding(), true);
2572 emit_byte(0x0F);
2573 emit_byte(0x90 | cc);
2574 emit_byte(0xC0 | encode);
2575 }
2577 void Assembler::shll(Register dst, int imm8) {
2578 assert(isShiftCount(imm8), "illegal shift count");
2579 int encode = prefix_and_encode(dst->encoding());
2580 if (imm8 == 1 ) {
2581 emit_byte(0xD1);
2582 emit_byte(0xE0 | encode);
2583 } else {
2584 emit_byte(0xC1);
2585 emit_byte(0xE0 | encode);
2586 emit_byte(imm8);
2587 }
2588 }
2590 void Assembler::shll(Register dst) {
2591 int encode = prefix_and_encode(dst->encoding());
2592 emit_byte(0xD3);
2593 emit_byte(0xE0 | encode);
2594 }
2596 void Assembler::shrl(Register dst, int imm8) {
2597 assert(isShiftCount(imm8), "illegal shift count");
2598 int encode = prefix_and_encode(dst->encoding());
2599 emit_byte(0xC1);
2600 emit_byte(0xE8 | encode);
2601 emit_byte(imm8);
2602 }
2604 void Assembler::shrl(Register dst) {
2605 int encode = prefix_and_encode(dst->encoding());
2606 emit_byte(0xD3);
2607 emit_byte(0xE8 | encode);
2608 }
2610 // copies a single word from [esi] to [edi]
2611 void Assembler::smovl() {
2612 emit_byte(0xA5);
2613 }
2615 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2616 // HMM Table D-1 says sse2
2617 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2618 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2619 emit_byte(0xF2);
2620 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2621 emit_byte(0x0F);
2622 emit_byte(0x51);
2623 emit_byte(0xC0 | encode);
2624 }
2626 void Assembler::stmxcsr( Address dst) {
2627 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2628 InstructionMark im(this);
2629 prefix(dst);
2630 emit_byte(0x0F);
2631 emit_byte(0xAE);
2632 emit_operand(as_Register(3), dst);
2633 }
2635 void Assembler::subl(Address dst, int32_t imm32) {
2636 InstructionMark im(this);
2637 prefix(dst);
2638 if (is8bit(imm32)) {
2639 emit_byte(0x83);
2640 emit_operand(rbp, dst, 1);
2641 emit_byte(imm32 & 0xFF);
2642 } else {
2643 emit_byte(0x81);
2644 emit_operand(rbp, dst, 4);
2645 emit_long(imm32);
2646 }
2647 }
2649 void Assembler::subl(Register dst, int32_t imm32) {
2650 prefix(dst);
2651 emit_arith(0x81, 0xE8, dst, imm32);
2652 }
2654 void Assembler::subl(Address dst, Register src) {
2655 InstructionMark im(this);
2656 prefix(dst, src);
2657 emit_byte(0x29);
2658 emit_operand(src, dst);
2659 }
2661 void Assembler::subl(Register dst, Address src) {
2662 InstructionMark im(this);
2663 prefix(src, dst);
2664 emit_byte(0x2B);
2665 emit_operand(dst, src);
2666 }
2668 void Assembler::subl(Register dst, Register src) {
2669 (void) prefix_and_encode(dst->encoding(), src->encoding());
2670 emit_arith(0x2B, 0xC0, dst, src);
2671 }
2673 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2674 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2675 emit_byte(0xF2);
2676 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2677 emit_byte(0x0F);
2678 emit_byte(0x5C);
2679 emit_byte(0xC0 | encode);
2680 }
2682 void Assembler::subsd(XMMRegister dst, Address src) {
2683 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2684 InstructionMark im(this);
2685 emit_byte(0xF2);
2686 prefix(src, dst);
2687 emit_byte(0x0F);
2688 emit_byte(0x5C);
2689 emit_operand(dst, src);
2690 }
2692 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2693 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2694 emit_byte(0xF3);
2695 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2696 emit_byte(0x0F);
2697 emit_byte(0x5C);
2698 emit_byte(0xC0 | encode);
2699 }
2701 void Assembler::subss(XMMRegister dst, Address src) {
2702 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2703 InstructionMark im(this);
2704 emit_byte(0xF3);
2705 prefix(src, dst);
2706 emit_byte(0x0F);
2707 emit_byte(0x5C);
2708 emit_operand(dst, src);
2709 }
2711 void Assembler::testb(Register dst, int imm8) {
2712 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2713 (void) prefix_and_encode(dst->encoding(), true);
2714 emit_arith_b(0xF6, 0xC0, dst, imm8);
2715 }
2717 void Assembler::testl(Register dst, int32_t imm32) {
2718 // not using emit_arith because test
2719 // doesn't support sign-extension of
2720 // 8bit operands
2721 int encode = dst->encoding();
2722 if (encode == 0) {
2723 emit_byte(0xA9);
2724 } else {
2725 encode = prefix_and_encode(encode);
2726 emit_byte(0xF7);
2727 emit_byte(0xC0 | encode);
2728 }
2729 emit_long(imm32);
2730 }
2732 void Assembler::testl(Register dst, Register src) {
2733 (void) prefix_and_encode(dst->encoding(), src->encoding());
2734 emit_arith(0x85, 0xC0, dst, src);
2735 }
2737 void Assembler::testl(Register dst, Address src) {
2738 InstructionMark im(this);
2739 prefix(src, dst);
2740 emit_byte(0x85);
2741 emit_operand(dst, src);
2742 }
2744 void Assembler::ucomisd(XMMRegister dst, Address src) {
2745 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2746 emit_byte(0x66);
2747 ucomiss(dst, src);
2748 }
2750 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2751 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2752 emit_byte(0x66);
2753 ucomiss(dst, src);
2754 }
2756 void Assembler::ucomiss(XMMRegister dst, Address src) {
2757 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2759 InstructionMark im(this);
2760 prefix(src, dst);
2761 emit_byte(0x0F);
2762 emit_byte(0x2E);
2763 emit_operand(dst, src);
2764 }
2766 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2767 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2768 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2769 emit_byte(0x0F);
2770 emit_byte(0x2E);
2771 emit_byte(0xC0 | encode);
2772 }
2775 void Assembler::xaddl(Address dst, Register src) {
2776 InstructionMark im(this);
2777 prefix(dst, src);
2778 emit_byte(0x0F);
2779 emit_byte(0xC1);
2780 emit_operand(src, dst);
2781 }
2783 void Assembler::xchgl(Register dst, Address src) { // xchg
2784 InstructionMark im(this);
2785 prefix(src, dst);
2786 emit_byte(0x87);
2787 emit_operand(dst, src);
2788 }
2790 void Assembler::xchgl(Register dst, Register src) {
2791 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2792 emit_byte(0x87);
2793 emit_byte(0xc0 | encode);
2794 }
2796 void Assembler::xorl(Register dst, int32_t imm32) {
2797 prefix(dst);
2798 emit_arith(0x81, 0xF0, dst, imm32);
2799 }
2801 void Assembler::xorl(Register dst, Address src) {
2802 InstructionMark im(this);
2803 prefix(src, dst);
2804 emit_byte(0x33);
2805 emit_operand(dst, src);
2806 }
2808 void Assembler::xorl(Register dst, Register src) {
2809 (void) prefix_and_encode(dst->encoding(), src->encoding());
2810 emit_arith(0x33, 0xC0, dst, src);
2811 }
2813 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2814 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2815 emit_byte(0x66);
2816 xorps(dst, src);
2817 }
2819 void Assembler::xorpd(XMMRegister dst, Address src) {
2820 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2821 InstructionMark im(this);
2822 emit_byte(0x66);
2823 prefix(src, dst);
2824 emit_byte(0x0F);
2825 emit_byte(0x57);
2826 emit_operand(dst, src);
2827 }
2830 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2831 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2832 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2833 emit_byte(0x0F);
2834 emit_byte(0x57);
2835 emit_byte(0xC0 | encode);
2836 }
2838 void Assembler::xorps(XMMRegister dst, Address src) {
2839 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2840 InstructionMark im(this);
2841 prefix(src, dst);
2842 emit_byte(0x0F);
2843 emit_byte(0x57);
2844 emit_operand(dst, src);
2845 }
2847 #ifndef _LP64
2848 // 32bit only pieces of the assembler
2850 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2851 // NO PREFIX AS NEVER 64BIT
2852 InstructionMark im(this);
2853 emit_byte(0x81);
2854 emit_byte(0xF8 | src1->encoding());
2855 emit_data(imm32, rspec, 0);
2856 }
2858 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2859 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2860 InstructionMark im(this);
2861 emit_byte(0x81);
2862 emit_operand(rdi, src1);
2863 emit_data(imm32, rspec, 0);
2864 }
2866 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2867 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2868 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise.
2869 void Assembler::cmpxchg8(Address adr) {
2870 InstructionMark im(this);
2871 emit_byte(0x0F);
2872 emit_byte(0xc7);
2873 emit_operand(rcx, adr);
2874 }
2876 void Assembler::decl(Register dst) {
2877 // Don't use it directly. Use MacroAssembler::decrementl() instead.
2878 emit_byte(0x48 | dst->encoding());
2879 }
2881 #endif // _LP64
2883 // 64bit typically doesn't use the x87 but needs to for the trig funcs
2885 void Assembler::fabs() {
2886 emit_byte(0xD9);
2887 emit_byte(0xE1);
2888 }
2890 void Assembler::fadd(int i) {
2891 emit_farith(0xD8, 0xC0, i);
2892 }
2894 void Assembler::fadd_d(Address src) {
2895 InstructionMark im(this);
2896 emit_byte(0xDC);
2897 emit_operand32(rax, src);
2898 }
2900 void Assembler::fadd_s(Address src) {
2901 InstructionMark im(this);
2902 emit_byte(0xD8);
2903 emit_operand32(rax, src);
2904 }
2906 void Assembler::fadda(int i) {
2907 emit_farith(0xDC, 0xC0, i);
2908 }
2910 void Assembler::faddp(int i) {
2911 emit_farith(0xDE, 0xC0, i);
2912 }
2914 void Assembler::fchs() {
2915 emit_byte(0xD9);
2916 emit_byte(0xE0);
2917 }
2919 void Assembler::fcom(int i) {
2920 emit_farith(0xD8, 0xD0, i);
2921 }
2923 void Assembler::fcomp(int i) {
2924 emit_farith(0xD8, 0xD8, i);
2925 }
2927 void Assembler::fcomp_d(Address src) {
2928 InstructionMark im(this);
2929 emit_byte(0xDC);
2930 emit_operand32(rbx, src);
2931 }
2933 void Assembler::fcomp_s(Address src) {
2934 InstructionMark im(this);
2935 emit_byte(0xD8);
2936 emit_operand32(rbx, src);
2937 }
2939 void Assembler::fcompp() {
2940 emit_byte(0xDE);
2941 emit_byte(0xD9);
2942 }
2944 void Assembler::fcos() {
2945 emit_byte(0xD9);
2946 emit_byte(0xFF);
2947 }
2949 void Assembler::fdecstp() {
2950 emit_byte(0xD9);
2951 emit_byte(0xF6);
2952 }
2954 void Assembler::fdiv(int i) {
2955 emit_farith(0xD8, 0xF0, i);
2956 }
2958 void Assembler::fdiv_d(Address src) {
2959 InstructionMark im(this);
2960 emit_byte(0xDC);
2961 emit_operand32(rsi, src);
2962 }
2964 void Assembler::fdiv_s(Address src) {
2965 InstructionMark im(this);
2966 emit_byte(0xD8);
2967 emit_operand32(rsi, src);
2968 }
2970 void Assembler::fdiva(int i) {
2971 emit_farith(0xDC, 0xF8, i);
2972 }
2974 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2975 // is erroneous for some of the floating-point instructions below.
2977 void Assembler::fdivp(int i) {
2978 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2979 }
2981 void Assembler::fdivr(int i) {
2982 emit_farith(0xD8, 0xF8, i);
2983 }
2985 void Assembler::fdivr_d(Address src) {
2986 InstructionMark im(this);
2987 emit_byte(0xDC);
2988 emit_operand32(rdi, src);
2989 }
2991 void Assembler::fdivr_s(Address src) {
2992 InstructionMark im(this);
2993 emit_byte(0xD8);
2994 emit_operand32(rdi, src);
2995 }
2997 void Assembler::fdivra(int i) {
2998 emit_farith(0xDC, 0xF0, i);
2999 }
3001 void Assembler::fdivrp(int i) {
3002 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3003 }
3005 void Assembler::ffree(int i) {
3006 emit_farith(0xDD, 0xC0, i);
3007 }
3009 void Assembler::fild_d(Address adr) {
3010 InstructionMark im(this);
3011 emit_byte(0xDF);
3012 emit_operand32(rbp, adr);
3013 }
3015 void Assembler::fild_s(Address adr) {
3016 InstructionMark im(this);
3017 emit_byte(0xDB);
3018 emit_operand32(rax, adr);
3019 }
3021 void Assembler::fincstp() {
3022 emit_byte(0xD9);
3023 emit_byte(0xF7);
3024 }
3026 void Assembler::finit() {
3027 emit_byte(0x9B);
3028 emit_byte(0xDB);
3029 emit_byte(0xE3);
3030 }
3032 void Assembler::fist_s(Address adr) {
3033 InstructionMark im(this);
3034 emit_byte(0xDB);
3035 emit_operand32(rdx, adr);
3036 }
3038 void Assembler::fistp_d(Address adr) {
3039 InstructionMark im(this);
3040 emit_byte(0xDF);
3041 emit_operand32(rdi, adr);
3042 }
3044 void Assembler::fistp_s(Address adr) {
3045 InstructionMark im(this);
3046 emit_byte(0xDB);
3047 emit_operand32(rbx, adr);
3048 }
3050 void Assembler::fld1() {
3051 emit_byte(0xD9);
3052 emit_byte(0xE8);
3053 }
3055 void Assembler::fld_d(Address adr) {
3056 InstructionMark im(this);
3057 emit_byte(0xDD);
3058 emit_operand32(rax, adr);
3059 }
3061 void Assembler::fld_s(Address adr) {
3062 InstructionMark im(this);
3063 emit_byte(0xD9);
3064 emit_operand32(rax, adr);
3065 }
3068 void Assembler::fld_s(int index) {
3069 emit_farith(0xD9, 0xC0, index);
3070 }
3072 void Assembler::fld_x(Address adr) {
3073 InstructionMark im(this);
3074 emit_byte(0xDB);
3075 emit_operand32(rbp, adr);
3076 }
3078 void Assembler::fldcw(Address src) {
3079 InstructionMark im(this);
3080 emit_byte(0xd9);
3081 emit_operand32(rbp, src);
3082 }
3084 void Assembler::fldenv(Address src) {
3085 InstructionMark im(this);
3086 emit_byte(0xD9);
3087 emit_operand32(rsp, src);
3088 }
3090 void Assembler::fldlg2() {
3091 emit_byte(0xD9);
3092 emit_byte(0xEC);
3093 }
3095 void Assembler::fldln2() {
3096 emit_byte(0xD9);
3097 emit_byte(0xED);
3098 }
3100 void Assembler::fldz() {
3101 emit_byte(0xD9);
3102 emit_byte(0xEE);
3103 }
3105 void Assembler::flog() {
3106 fldln2();
3107 fxch();
3108 fyl2x();
3109 }
3111 void Assembler::flog10() {
3112 fldlg2();
3113 fxch();
3114 fyl2x();
3115 }
3117 void Assembler::fmul(int i) {
3118 emit_farith(0xD8, 0xC8, i);
3119 }
3121 void Assembler::fmul_d(Address src) {
3122 InstructionMark im(this);
3123 emit_byte(0xDC);
3124 emit_operand32(rcx, src);
3125 }
3127 void Assembler::fmul_s(Address src) {
3128 InstructionMark im(this);
3129 emit_byte(0xD8);
3130 emit_operand32(rcx, src);
3131 }
3133 void Assembler::fmula(int i) {
3134 emit_farith(0xDC, 0xC8, i);
3135 }
3137 void Assembler::fmulp(int i) {
3138 emit_farith(0xDE, 0xC8, i);
3139 }
3141 void Assembler::fnsave(Address dst) {
3142 InstructionMark im(this);
3143 emit_byte(0xDD);
3144 emit_operand32(rsi, dst);
3145 }
3147 void Assembler::fnstcw(Address src) {
3148 InstructionMark im(this);
3149 emit_byte(0x9B);
3150 emit_byte(0xD9);
3151 emit_operand32(rdi, src);
3152 }
3154 void Assembler::fnstsw_ax() {
3155 emit_byte(0xdF);
3156 emit_byte(0xE0);
3157 }
3159 void Assembler::fprem() {
3160 emit_byte(0xD9);
3161 emit_byte(0xF8);
3162 }
3164 void Assembler::fprem1() {
3165 emit_byte(0xD9);
3166 emit_byte(0xF5);
3167 }
3169 void Assembler::frstor(Address src) {
3170 InstructionMark im(this);
3171 emit_byte(0xDD);
3172 emit_operand32(rsp, src);
3173 }
3175 void Assembler::fsin() {
3176 emit_byte(0xD9);
3177 emit_byte(0xFE);
3178 }
3180 void Assembler::fsqrt() {
3181 emit_byte(0xD9);
3182 emit_byte(0xFA);
3183 }
3185 void Assembler::fst_d(Address adr) {
3186 InstructionMark im(this);
3187 emit_byte(0xDD);
3188 emit_operand32(rdx, adr);
3189 }
3191 void Assembler::fst_s(Address adr) {
3192 InstructionMark im(this);
3193 emit_byte(0xD9);
3194 emit_operand32(rdx, adr);
3195 }
3197 void Assembler::fstp_d(Address adr) {
3198 InstructionMark im(this);
3199 emit_byte(0xDD);
3200 emit_operand32(rbx, adr);
3201 }
3203 void Assembler::fstp_d(int index) {
3204 emit_farith(0xDD, 0xD8, index);
3205 }
3207 void Assembler::fstp_s(Address adr) {
3208 InstructionMark im(this);
3209 emit_byte(0xD9);
3210 emit_operand32(rbx, adr);
3211 }
3213 void Assembler::fstp_x(Address adr) {
3214 InstructionMark im(this);
3215 emit_byte(0xDB);
3216 emit_operand32(rdi, adr);
3217 }
3219 void Assembler::fsub(int i) {
3220 emit_farith(0xD8, 0xE0, i);
3221 }
3223 void Assembler::fsub_d(Address src) {
3224 InstructionMark im(this);
3225 emit_byte(0xDC);
3226 emit_operand32(rsp, src);
3227 }
3229 void Assembler::fsub_s(Address src) {
3230 InstructionMark im(this);
3231 emit_byte(0xD8);
3232 emit_operand32(rsp, src);
3233 }
3235 void Assembler::fsuba(int i) {
3236 emit_farith(0xDC, 0xE8, i);
3237 }
3239 void Assembler::fsubp(int i) {
3240 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3241 }
3243 void Assembler::fsubr(int i) {
3244 emit_farith(0xD8, 0xE8, i);
3245 }
3247 void Assembler::fsubr_d(Address src) {
3248 InstructionMark im(this);
3249 emit_byte(0xDC);
3250 emit_operand32(rbp, src);
3251 }
3253 void Assembler::fsubr_s(Address src) {
3254 InstructionMark im(this);
3255 emit_byte(0xD8);
3256 emit_operand32(rbp, src);
3257 }
3259 void Assembler::fsubra(int i) {
3260 emit_farith(0xDC, 0xE0, i);
3261 }
3263 void Assembler::fsubrp(int i) {
3264 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3265 }
3267 void Assembler::ftan() {
3268 emit_byte(0xD9);
3269 emit_byte(0xF2);
3270 emit_byte(0xDD);
3271 emit_byte(0xD8);
3272 }
3274 void Assembler::ftst() {
3275 emit_byte(0xD9);
3276 emit_byte(0xE4);
3277 }
3279 void Assembler::fucomi(int i) {
3280 // make sure the instruction is supported (introduced for P6, together with cmov)
3281 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3282 emit_farith(0xDB, 0xE8, i);
3283 }
3285 void Assembler::fucomip(int i) {
3286 // make sure the instruction is supported (introduced for P6, together with cmov)
3287 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3288 emit_farith(0xDF, 0xE8, i);
3289 }
3291 void Assembler::fwait() {
3292 emit_byte(0x9B);
3293 }
3295 void Assembler::fxch(int i) {
3296 emit_farith(0xD9, 0xC8, i);
3297 }
3299 void Assembler::fyl2x() {
3300 emit_byte(0xD9);
3301 emit_byte(0xF1);
3302 }
3305 #ifndef _LP64
3307 void Assembler::incl(Register dst) {
3308 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3309 emit_byte(0x40 | dst->encoding());
3310 }
3312 void Assembler::lea(Register dst, Address src) {
3313 leal(dst, src);
3314 }
3316 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3317 InstructionMark im(this);
3318 emit_byte(0xC7);
3319 emit_operand(rax, dst);
3320 emit_data((int)imm32, rspec, 0);
3321 }
3323 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3324 InstructionMark im(this);
3325 int encode = prefix_and_encode(dst->encoding());
3326 emit_byte(0xB8 | encode);
3327 emit_data((int)imm32, rspec, 0);
3328 }
3330 void Assembler::popa() { // 32bit
3331 emit_byte(0x61);
3332 }
3334 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3335 InstructionMark im(this);
3336 emit_byte(0x68);
3337 emit_data(imm32, rspec, 0);
3338 }
3340 void Assembler::pusha() { // 32bit
3341 emit_byte(0x60);
3342 }
3344 void Assembler::set_byte_if_not_zero(Register dst) {
3345 emit_byte(0x0F);
3346 emit_byte(0x95);
3347 emit_byte(0xE0 | dst->encoding());
3348 }
3350 void Assembler::shldl(Register dst, Register src) {
3351 emit_byte(0x0F);
3352 emit_byte(0xA5);
3353 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3354 }
3356 void Assembler::shrdl(Register dst, Register src) {
3357 emit_byte(0x0F);
3358 emit_byte(0xAD);
3359 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3360 }
3362 #else // LP64
3364 // 64bit only pieces of the assembler
3365 // This should only be used by 64bit instructions that can use rip-relative
3366 // it cannot be used by instructions that want an immediate value.
3368 bool Assembler::reachable(AddressLiteral adr) {
3369 int64_t disp;
3370 // None will force a 64bit literal to the code stream. Likely a placeholder
3371 // for something that will be patched later and we need to certain it will
3372 // always be reachable.
3373 if (adr.reloc() == relocInfo::none) {
3374 return false;
3375 }
3376 if (adr.reloc() == relocInfo::internal_word_type) {
3377 // This should be rip relative and easily reachable.
3378 return true;
3379 }
3380 if (adr.reloc() == relocInfo::virtual_call_type ||
3381 adr.reloc() == relocInfo::opt_virtual_call_type ||
3382 adr.reloc() == relocInfo::static_call_type ||
3383 adr.reloc() == relocInfo::static_stub_type ) {
3384 // This should be rip relative within the code cache and easily
3385 // reachable until we get huge code caches. (At which point
3386 // ic code is going to have issues).
3387 return true;
3388 }
3389 if (adr.reloc() != relocInfo::external_word_type &&
3390 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special
3391 adr.reloc() != relocInfo::poll_type && // relocs to identify them
3392 adr.reloc() != relocInfo::runtime_call_type ) {
3393 return false;
3394 }
3396 // Stress the correction code
3397 if (ForceUnreachable) {
3398 // Must be runtimecall reloc, see if it is in the codecache
3399 // Flipping stuff in the codecache to be unreachable causes issues
3400 // with things like inline caches where the additional instructions
3401 // are not handled.
3402 if (CodeCache::find_blob(adr._target) == NULL) {
3403 return false;
3404 }
3405 }
3406 // For external_word_type/runtime_call_type if it is reachable from where we
3407 // are now (possibly a temp buffer) and where we might end up
3408 // anywhere in the codeCache then we are always reachable.
3409 // This would have to change if we ever save/restore shared code
3410 // to be more pessimistic.
3412 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3413 if (!is_simm32(disp)) return false;
3414 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3415 if (!is_simm32(disp)) return false;
3417 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3419 // Because rip relative is a disp + address_of_next_instruction and we
3420 // don't know the value of address_of_next_instruction we apply a fudge factor
3421 // to make sure we will be ok no matter the size of the instruction we get placed into.
3422 // We don't have to fudge the checks above here because they are already worst case.
3424 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3425 // + 4 because better safe than sorry.
3426 const int fudge = 12 + 4;
3427 if (disp < 0) {
3428 disp -= fudge;
3429 } else {
3430 disp += fudge;
3431 }
3432 return is_simm32(disp);
3433 }
3435 void Assembler::emit_data64(jlong data,
3436 relocInfo::relocType rtype,
3437 int format) {
3438 if (rtype == relocInfo::none) {
3439 emit_long64(data);
3440 } else {
3441 emit_data64(data, Relocation::spec_simple(rtype), format);
3442 }
3443 }
3445 void Assembler::emit_data64(jlong data,
3446 RelocationHolder const& rspec,
3447 int format) {
3448 assert(imm_operand == 0, "default format must be immediate in this file");
3449 assert(imm_operand == format, "must be immediate");
3450 assert(inst_mark() != NULL, "must be inside InstructionMark");
3451 // Do not use AbstractAssembler::relocate, which is not intended for
3452 // embedded words. Instead, relocate to the enclosing instruction.
3453 code_section()->relocate(inst_mark(), rspec, format);
3454 #ifdef ASSERT
3455 check_relocation(rspec, format);
3456 #endif
3457 emit_long64(data);
3458 }
3460 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3461 if (reg_enc >= 8) {
3462 prefix(REX_B);
3463 reg_enc -= 8;
3464 } else if (byteinst && reg_enc >= 4) {
3465 prefix(REX);
3466 }
3467 return reg_enc;
3468 }
3470 int Assembler::prefixq_and_encode(int reg_enc) {
3471 if (reg_enc < 8) {
3472 prefix(REX_W);
3473 } else {
3474 prefix(REX_WB);
3475 reg_enc -= 8;
3476 }
3477 return reg_enc;
3478 }
3480 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3481 if (dst_enc < 8) {
3482 if (src_enc >= 8) {
3483 prefix(REX_B);
3484 src_enc -= 8;
3485 } else if (byteinst && src_enc >= 4) {
3486 prefix(REX);
3487 }
3488 } else {
3489 if (src_enc < 8) {
3490 prefix(REX_R);
3491 } else {
3492 prefix(REX_RB);
3493 src_enc -= 8;
3494 }
3495 dst_enc -= 8;
3496 }
3497 return dst_enc << 3 | src_enc;
3498 }
3500 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3501 if (dst_enc < 8) {
3502 if (src_enc < 8) {
3503 prefix(REX_W);
3504 } else {
3505 prefix(REX_WB);
3506 src_enc -= 8;
3507 }
3508 } else {
3509 if (src_enc < 8) {
3510 prefix(REX_WR);
3511 } else {
3512 prefix(REX_WRB);
3513 src_enc -= 8;
3514 }
3515 dst_enc -= 8;
3516 }
3517 return dst_enc << 3 | src_enc;
3518 }
3520 void Assembler::prefix(Register reg) {
3521 if (reg->encoding() >= 8) {
3522 prefix(REX_B);
3523 }
3524 }
3526 void Assembler::prefix(Address adr) {
3527 if (adr.base_needs_rex()) {
3528 if (adr.index_needs_rex()) {
3529 prefix(REX_XB);
3530 } else {
3531 prefix(REX_B);
3532 }
3533 } else {
3534 if (adr.index_needs_rex()) {
3535 prefix(REX_X);
3536 }
3537 }
3538 }
3540 void Assembler::prefixq(Address adr) {
3541 if (adr.base_needs_rex()) {
3542 if (adr.index_needs_rex()) {
3543 prefix(REX_WXB);
3544 } else {
3545 prefix(REX_WB);
3546 }
3547 } else {
3548 if (adr.index_needs_rex()) {
3549 prefix(REX_WX);
3550 } else {
3551 prefix(REX_W);
3552 }
3553 }
3554 }
3557 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3558 if (reg->encoding() < 8) {
3559 if (adr.base_needs_rex()) {
3560 if (adr.index_needs_rex()) {
3561 prefix(REX_XB);
3562 } else {
3563 prefix(REX_B);
3564 }
3565 } else {
3566 if (adr.index_needs_rex()) {
3567 prefix(REX_X);
3568 } else if (reg->encoding() >= 4 ) {
3569 prefix(REX);
3570 }
3571 }
3572 } else {
3573 if (adr.base_needs_rex()) {
3574 if (adr.index_needs_rex()) {
3575 prefix(REX_RXB);
3576 } else {
3577 prefix(REX_RB);
3578 }
3579 } else {
3580 if (adr.index_needs_rex()) {
3581 prefix(REX_RX);
3582 } else {
3583 prefix(REX_R);
3584 }
3585 }
3586 }
3587 }
3589 void Assembler::prefixq(Address adr, Register src) {
3590 if (src->encoding() < 8) {
3591 if (adr.base_needs_rex()) {
3592 if (adr.index_needs_rex()) {
3593 prefix(REX_WXB);
3594 } else {
3595 prefix(REX_WB);
3596 }
3597 } else {
3598 if (adr.index_needs_rex()) {
3599 prefix(REX_WX);
3600 } else {
3601 prefix(REX_W);
3602 }
3603 }
3604 } else {
3605 if (adr.base_needs_rex()) {
3606 if (adr.index_needs_rex()) {
3607 prefix(REX_WRXB);
3608 } else {
3609 prefix(REX_WRB);
3610 }
3611 } else {
3612 if (adr.index_needs_rex()) {
3613 prefix(REX_WRX);
3614 } else {
3615 prefix(REX_WR);
3616 }
3617 }
3618 }
3619 }
3621 void Assembler::prefix(Address adr, XMMRegister reg) {
3622 if (reg->encoding() < 8) {
3623 if (adr.base_needs_rex()) {
3624 if (adr.index_needs_rex()) {
3625 prefix(REX_XB);
3626 } else {
3627 prefix(REX_B);
3628 }
3629 } else {
3630 if (adr.index_needs_rex()) {
3631 prefix(REX_X);
3632 }
3633 }
3634 } else {
3635 if (adr.base_needs_rex()) {
3636 if (adr.index_needs_rex()) {
3637 prefix(REX_RXB);
3638 } else {
3639 prefix(REX_RB);
3640 }
3641 } else {
3642 if (adr.index_needs_rex()) {
3643 prefix(REX_RX);
3644 } else {
3645 prefix(REX_R);
3646 }
3647 }
3648 }
3649 }
3651 void Assembler::adcq(Register dst, int32_t imm32) {
3652 (void) prefixq_and_encode(dst->encoding());
3653 emit_arith(0x81, 0xD0, dst, imm32);
3654 }
3656 void Assembler::adcq(Register dst, Address src) {
3657 InstructionMark im(this);
3658 prefixq(src, dst);
3659 emit_byte(0x13);
3660 emit_operand(dst, src);
3661 }
3663 void Assembler::adcq(Register dst, Register src) {
3664 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3665 emit_arith(0x13, 0xC0, dst, src);
3666 }
3668 void Assembler::addq(Address dst, int32_t imm32) {
3669 InstructionMark im(this);
3670 prefixq(dst);
3671 emit_arith_operand(0x81, rax, dst,imm32);
3672 }
3674 void Assembler::addq(Address dst, Register src) {
3675 InstructionMark im(this);
3676 prefixq(dst, src);
3677 emit_byte(0x01);
3678 emit_operand(src, dst);
3679 }
3681 void Assembler::addq(Register dst, int32_t imm32) {
3682 (void) prefixq_and_encode(dst->encoding());
3683 emit_arith(0x81, 0xC0, dst, imm32);
3684 }
3686 void Assembler::addq(Register dst, Address src) {
3687 InstructionMark im(this);
3688 prefixq(src, dst);
3689 emit_byte(0x03);
3690 emit_operand(dst, src);
3691 }
3693 void Assembler::addq(Register dst, Register src) {
3694 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3695 emit_arith(0x03, 0xC0, dst, src);
3696 }
3698 void Assembler::andq(Register dst, int32_t imm32) {
3699 (void) prefixq_and_encode(dst->encoding());
3700 emit_arith(0x81, 0xE0, dst, imm32);
3701 }
3703 void Assembler::andq(Register dst, Address src) {
3704 InstructionMark im(this);
3705 prefixq(src, dst);
3706 emit_byte(0x23);
3707 emit_operand(dst, src);
3708 }
3710 void Assembler::andq(Register dst, Register src) {
3711 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3712 emit_arith(0x23, 0xC0, dst, src);
3713 }
3715 void Assembler::bsfq(Register dst, Register src) {
3716 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3717 emit_byte(0x0F);
3718 emit_byte(0xBC);
3719 emit_byte(0xC0 | encode);
3720 }
3722 void Assembler::bsrq(Register dst, Register src) {
3723 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
3724 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3725 emit_byte(0x0F);
3726 emit_byte(0xBD);
3727 emit_byte(0xC0 | encode);
3728 }
3730 void Assembler::bswapq(Register reg) {
3731 int encode = prefixq_and_encode(reg->encoding());
3732 emit_byte(0x0F);
3733 emit_byte(0xC8 | encode);
3734 }
3736 void Assembler::cdqq() {
3737 prefix(REX_W);
3738 emit_byte(0x99);
3739 }
3741 void Assembler::clflush(Address adr) {
3742 prefix(adr);
3743 emit_byte(0x0F);
3744 emit_byte(0xAE);
3745 emit_operand(rdi, adr);
3746 }
3748 void Assembler::cmovq(Condition cc, Register dst, Register src) {
3749 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3750 emit_byte(0x0F);
3751 emit_byte(0x40 | cc);
3752 emit_byte(0xC0 | encode);
3753 }
3755 void Assembler::cmovq(Condition cc, Register dst, Address src) {
3756 InstructionMark im(this);
3757 prefixq(src, dst);
3758 emit_byte(0x0F);
3759 emit_byte(0x40 | cc);
3760 emit_operand(dst, src);
3761 }
3763 void Assembler::cmpq(Address dst, int32_t imm32) {
3764 InstructionMark im(this);
3765 prefixq(dst);
3766 emit_byte(0x81);
3767 emit_operand(rdi, dst, 4);
3768 emit_long(imm32);
3769 }
3771 void Assembler::cmpq(Register dst, int32_t imm32) {
3772 (void) prefixq_and_encode(dst->encoding());
3773 emit_arith(0x81, 0xF8, dst, imm32);
3774 }
3776 void Assembler::cmpq(Address dst, Register src) {
3777 InstructionMark im(this);
3778 prefixq(dst, src);
3779 emit_byte(0x3B);
3780 emit_operand(src, dst);
3781 }
3783 void Assembler::cmpq(Register dst, Register src) {
3784 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3785 emit_arith(0x3B, 0xC0, dst, src);
3786 }
3788 void Assembler::cmpq(Register dst, Address src) {
3789 InstructionMark im(this);
3790 prefixq(src, dst);
3791 emit_byte(0x3B);
3792 emit_operand(dst, src);
3793 }
3795 void Assembler::cmpxchgq(Register reg, Address adr) {
3796 InstructionMark im(this);
3797 prefixq(adr, reg);
3798 emit_byte(0x0F);
3799 emit_byte(0xB1);
3800 emit_operand(reg, adr);
3801 }
3803 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3804 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3805 emit_byte(0xF2);
3806 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3807 emit_byte(0x0F);
3808 emit_byte(0x2A);
3809 emit_byte(0xC0 | encode);
3810 }
3812 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3813 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3814 emit_byte(0xF3);
3815 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3816 emit_byte(0x0F);
3817 emit_byte(0x2A);
3818 emit_byte(0xC0 | encode);
3819 }
3821 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3822 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3823 emit_byte(0xF2);
3824 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3825 emit_byte(0x0F);
3826 emit_byte(0x2C);
3827 emit_byte(0xC0 | encode);
3828 }
3830 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3831 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3832 emit_byte(0xF3);
3833 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3834 emit_byte(0x0F);
3835 emit_byte(0x2C);
3836 emit_byte(0xC0 | encode);
3837 }
3839 void Assembler::decl(Register dst) {
3840 // Don't use it directly. Use MacroAssembler::decrementl() instead.
3841 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3842 int encode = prefix_and_encode(dst->encoding());
3843 emit_byte(0xFF);
3844 emit_byte(0xC8 | encode);
3845 }
3847 void Assembler::decq(Register dst) {
3848 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3849 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3850 int encode = prefixq_and_encode(dst->encoding());
3851 emit_byte(0xFF);
3852 emit_byte(0xC8 | encode);
3853 }
3855 void Assembler::decq(Address dst) {
3856 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3857 InstructionMark im(this);
3858 prefixq(dst);
3859 emit_byte(0xFF);
3860 emit_operand(rcx, dst);
3861 }
3863 void Assembler::fxrstor(Address src) {
3864 prefixq(src);
3865 emit_byte(0x0F);
3866 emit_byte(0xAE);
3867 emit_operand(as_Register(1), src);
3868 }
3870 void Assembler::fxsave(Address dst) {
3871 prefixq(dst);
3872 emit_byte(0x0F);
3873 emit_byte(0xAE);
3874 emit_operand(as_Register(0), dst);
3875 }
3877 void Assembler::idivq(Register src) {
3878 int encode = prefixq_and_encode(src->encoding());
3879 emit_byte(0xF7);
3880 emit_byte(0xF8 | encode);
3881 }
3883 void Assembler::imulq(Register dst, Register src) {
3884 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3885 emit_byte(0x0F);
3886 emit_byte(0xAF);
3887 emit_byte(0xC0 | encode);
3888 }
3890 void Assembler::imulq(Register dst, Register src, int value) {
3891 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3892 if (is8bit(value)) {
3893 emit_byte(0x6B);
3894 emit_byte(0xC0 | encode);
3895 emit_byte(value);
3896 } else {
3897 emit_byte(0x69);
3898 emit_byte(0xC0 | encode);
3899 emit_long(value);
3900 }
3901 }
3903 void Assembler::incl(Register dst) {
3904 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3905 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3906 int encode = prefix_and_encode(dst->encoding());
3907 emit_byte(0xFF);
3908 emit_byte(0xC0 | encode);
3909 }
3911 void Assembler::incq(Register dst) {
3912 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3913 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3914 int encode = prefixq_and_encode(dst->encoding());
3915 emit_byte(0xFF);
3916 emit_byte(0xC0 | encode);
3917 }
3919 void Assembler::incq(Address dst) {
3920 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3921 InstructionMark im(this);
3922 prefixq(dst);
3923 emit_byte(0xFF);
3924 emit_operand(rax, dst);
3925 }
3927 void Assembler::lea(Register dst, Address src) {
3928 leaq(dst, src);
3929 }
3931 void Assembler::leaq(Register dst, Address src) {
3932 InstructionMark im(this);
3933 prefixq(src, dst);
3934 emit_byte(0x8D);
3935 emit_operand(dst, src);
3936 }
3938 void Assembler::mov64(Register dst, int64_t imm64) {
3939 InstructionMark im(this);
3940 int encode = prefixq_and_encode(dst->encoding());
3941 emit_byte(0xB8 | encode);
3942 emit_long64(imm64);
3943 }
3945 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3946 InstructionMark im(this);
3947 int encode = prefixq_and_encode(dst->encoding());
3948 emit_byte(0xB8 | encode);
3949 emit_data64(imm64, rspec);
3950 }
3952 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3953 InstructionMark im(this);
3954 int encode = prefix_and_encode(dst->encoding());
3955 emit_byte(0xB8 | encode);
3956 emit_data((int)imm32, rspec, narrow_oop_operand);
3957 }
3959 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3960 InstructionMark im(this);
3961 prefix(dst);
3962 emit_byte(0xC7);
3963 emit_operand(rax, dst, 4);
3964 emit_data((int)imm32, rspec, narrow_oop_operand);
3965 }
3967 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3968 InstructionMark im(this);
3969 int encode = prefix_and_encode(src1->encoding());
3970 emit_byte(0x81);
3971 emit_byte(0xF8 | encode);
3972 emit_data((int)imm32, rspec, narrow_oop_operand);
3973 }
3975 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3976 InstructionMark im(this);
3977 prefix(src1);
3978 emit_byte(0x81);
3979 emit_operand(rax, src1, 4);
3980 emit_data((int)imm32, rspec, narrow_oop_operand);
3981 }
3983 void Assembler::lzcntq(Register dst, Register src) {
3984 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
3985 emit_byte(0xF3);
3986 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3987 emit_byte(0x0F);
3988 emit_byte(0xBD);
3989 emit_byte(0xC0 | encode);
3990 }
3992 void Assembler::movdq(XMMRegister dst, Register src) {
3993 // table D-1 says MMX/SSE2
3994 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3995 emit_byte(0x66);
3996 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3997 emit_byte(0x0F);
3998 emit_byte(0x6E);
3999 emit_byte(0xC0 | encode);
4000 }
4002 void Assembler::movdq(Register dst, XMMRegister src) {
4003 // table D-1 says MMX/SSE2
4004 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4005 emit_byte(0x66);
4006 // swap src/dst to get correct prefix
4007 int encode = prefixq_and_encode(src->encoding(), dst->encoding());
4008 emit_byte(0x0F);
4009 emit_byte(0x7E);
4010 emit_byte(0xC0 | encode);
4011 }
4013 void Assembler::movq(Register dst, Register src) {
4014 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4015 emit_byte(0x8B);
4016 emit_byte(0xC0 | encode);
4017 }
4019 void Assembler::movq(Register dst, Address src) {
4020 InstructionMark im(this);
4021 prefixq(src, dst);
4022 emit_byte(0x8B);
4023 emit_operand(dst, src);
4024 }
4026 void Assembler::movq(Address dst, Register src) {
4027 InstructionMark im(this);
4028 prefixq(dst, src);
4029 emit_byte(0x89);
4030 emit_operand(src, dst);
4031 }
4033 void Assembler::movsbq(Register dst, Address src) {
4034 InstructionMark im(this);
4035 prefixq(src, dst);
4036 emit_byte(0x0F);
4037 emit_byte(0xBE);
4038 emit_operand(dst, src);
4039 }
4041 void Assembler::movsbq(Register dst, Register src) {
4042 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4043 emit_byte(0x0F);
4044 emit_byte(0xBE);
4045 emit_byte(0xC0 | encode);
4046 }
4048 void Assembler::movslq(Register dst, int32_t imm32) {
4049 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx)
4050 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx)
4051 // as a result we shouldn't use until tested at runtime...
4052 ShouldNotReachHere();
4053 InstructionMark im(this);
4054 int encode = prefixq_and_encode(dst->encoding());
4055 emit_byte(0xC7 | encode);
4056 emit_long(imm32);
4057 }
4059 void Assembler::movslq(Address dst, int32_t imm32) {
4060 assert(is_simm32(imm32), "lost bits");
4061 InstructionMark im(this);
4062 prefixq(dst);
4063 emit_byte(0xC7);
4064 emit_operand(rax, dst, 4);
4065 emit_long(imm32);
4066 }
4068 void Assembler::movslq(Register dst, Address src) {
4069 InstructionMark im(this);
4070 prefixq(src, dst);
4071 emit_byte(0x63);
4072 emit_operand(dst, src);
4073 }
4075 void Assembler::movslq(Register dst, Register src) {
4076 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4077 emit_byte(0x63);
4078 emit_byte(0xC0 | encode);
4079 }
4081 void Assembler::movswq(Register dst, Address src) {
4082 InstructionMark im(this);
4083 prefixq(src, dst);
4084 emit_byte(0x0F);
4085 emit_byte(0xBF);
4086 emit_operand(dst, src);
4087 }
4089 void Assembler::movswq(Register dst, Register src) {
4090 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4091 emit_byte(0x0F);
4092 emit_byte(0xBF);
4093 emit_byte(0xC0 | encode);
4094 }
4096 void Assembler::movzbq(Register dst, Address src) {
4097 InstructionMark im(this);
4098 prefixq(src, dst);
4099 emit_byte(0x0F);
4100 emit_byte(0xB6);
4101 emit_operand(dst, src);
4102 }
4104 void Assembler::movzbq(Register dst, Register src) {
4105 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4106 emit_byte(0x0F);
4107 emit_byte(0xB6);
4108 emit_byte(0xC0 | encode);
4109 }
4111 void Assembler::movzwq(Register dst, Address src) {
4112 InstructionMark im(this);
4113 prefixq(src, dst);
4114 emit_byte(0x0F);
4115 emit_byte(0xB7);
4116 emit_operand(dst, src);
4117 }
4119 void Assembler::movzwq(Register dst, Register src) {
4120 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4121 emit_byte(0x0F);
4122 emit_byte(0xB7);
4123 emit_byte(0xC0 | encode);
4124 }
4126 void Assembler::negq(Register dst) {
4127 int encode = prefixq_and_encode(dst->encoding());
4128 emit_byte(0xF7);
4129 emit_byte(0xD8 | encode);
4130 }
4132 void Assembler::notq(Register dst) {
4133 int encode = prefixq_and_encode(dst->encoding());
4134 emit_byte(0xF7);
4135 emit_byte(0xD0 | encode);
4136 }
4138 void Assembler::orq(Address dst, int32_t imm32) {
4139 InstructionMark im(this);
4140 prefixq(dst);
4141 emit_byte(0x81);
4142 emit_operand(rcx, dst, 4);
4143 emit_long(imm32);
4144 }
4146 void Assembler::orq(Register dst, int32_t imm32) {
4147 (void) prefixq_and_encode(dst->encoding());
4148 emit_arith(0x81, 0xC8, dst, imm32);
4149 }
4151 void Assembler::orq(Register dst, Address src) {
4152 InstructionMark im(this);
4153 prefixq(src, dst);
4154 emit_byte(0x0B);
4155 emit_operand(dst, src);
4156 }
4158 void Assembler::orq(Register dst, Register src) {
4159 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4160 emit_arith(0x0B, 0xC0, dst, src);
4161 }
4163 void Assembler::popa() { // 64bit
4164 movq(r15, Address(rsp, 0));
4165 movq(r14, Address(rsp, wordSize));
4166 movq(r13, Address(rsp, 2 * wordSize));
4167 movq(r12, Address(rsp, 3 * wordSize));
4168 movq(r11, Address(rsp, 4 * wordSize));
4169 movq(r10, Address(rsp, 5 * wordSize));
4170 movq(r9, Address(rsp, 6 * wordSize));
4171 movq(r8, Address(rsp, 7 * wordSize));
4172 movq(rdi, Address(rsp, 8 * wordSize));
4173 movq(rsi, Address(rsp, 9 * wordSize));
4174 movq(rbp, Address(rsp, 10 * wordSize));
4175 // skip rsp
4176 movq(rbx, Address(rsp, 12 * wordSize));
4177 movq(rdx, Address(rsp, 13 * wordSize));
4178 movq(rcx, Address(rsp, 14 * wordSize));
4179 movq(rax, Address(rsp, 15 * wordSize));
4181 addq(rsp, 16 * wordSize);
4182 }
4184 void Assembler::popcntq(Register dst, Address src) {
4185 assert(VM_Version::supports_popcnt(), "must support");
4186 InstructionMark im(this);
4187 emit_byte(0xF3);
4188 prefixq(src, dst);
4189 emit_byte(0x0F);
4190 emit_byte(0xB8);
4191 emit_operand(dst, src);
4192 }
4194 void Assembler::popcntq(Register dst, Register src) {
4195 assert(VM_Version::supports_popcnt(), "must support");
4196 emit_byte(0xF3);
4197 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4198 emit_byte(0x0F);
4199 emit_byte(0xB8);
4200 emit_byte(0xC0 | encode);
4201 }
4203 void Assembler::popq(Address dst) {
4204 InstructionMark im(this);
4205 prefixq(dst);
4206 emit_byte(0x8F);
4207 emit_operand(rax, dst);
4208 }
4210 void Assembler::pusha() { // 64bit
4211 // we have to store original rsp. ABI says that 128 bytes
4212 // below rsp are local scratch.
4213 movq(Address(rsp, -5 * wordSize), rsp);
4215 subq(rsp, 16 * wordSize);
4217 movq(Address(rsp, 15 * wordSize), rax);
4218 movq(Address(rsp, 14 * wordSize), rcx);
4219 movq(Address(rsp, 13 * wordSize), rdx);
4220 movq(Address(rsp, 12 * wordSize), rbx);
4221 // skip rsp
4222 movq(Address(rsp, 10 * wordSize), rbp);
4223 movq(Address(rsp, 9 * wordSize), rsi);
4224 movq(Address(rsp, 8 * wordSize), rdi);
4225 movq(Address(rsp, 7 * wordSize), r8);
4226 movq(Address(rsp, 6 * wordSize), r9);
4227 movq(Address(rsp, 5 * wordSize), r10);
4228 movq(Address(rsp, 4 * wordSize), r11);
4229 movq(Address(rsp, 3 * wordSize), r12);
4230 movq(Address(rsp, 2 * wordSize), r13);
4231 movq(Address(rsp, wordSize), r14);
4232 movq(Address(rsp, 0), r15);
4233 }
4235 void Assembler::pushq(Address src) {
4236 InstructionMark im(this);
4237 prefixq(src);
4238 emit_byte(0xFF);
4239 emit_operand(rsi, src);
4240 }
4242 void Assembler::rclq(Register dst, int imm8) {
4243 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4244 int encode = prefixq_and_encode(dst->encoding());
4245 if (imm8 == 1) {
4246 emit_byte(0xD1);
4247 emit_byte(0xD0 | encode);
4248 } else {
4249 emit_byte(0xC1);
4250 emit_byte(0xD0 | encode);
4251 emit_byte(imm8);
4252 }
4253 }
4254 void Assembler::sarq(Register dst, int imm8) {
4255 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4256 int encode = prefixq_and_encode(dst->encoding());
4257 if (imm8 == 1) {
4258 emit_byte(0xD1);
4259 emit_byte(0xF8 | encode);
4260 } else {
4261 emit_byte(0xC1);
4262 emit_byte(0xF8 | encode);
4263 emit_byte(imm8);
4264 }
4265 }
4267 void Assembler::sarq(Register dst) {
4268 int encode = prefixq_and_encode(dst->encoding());
4269 emit_byte(0xD3);
4270 emit_byte(0xF8 | encode);
4271 }
4272 void Assembler::sbbq(Address dst, int32_t imm32) {
4273 InstructionMark im(this);
4274 prefixq(dst);
4275 emit_arith_operand(0x81, rbx, dst, imm32);
4276 }
4278 void Assembler::sbbq(Register dst, int32_t imm32) {
4279 (void) prefixq_and_encode(dst->encoding());
4280 emit_arith(0x81, 0xD8, dst, imm32);
4281 }
4283 void Assembler::sbbq(Register dst, Address src) {
4284 InstructionMark im(this);
4285 prefixq(src, dst);
4286 emit_byte(0x1B);
4287 emit_operand(dst, src);
4288 }
4290 void Assembler::sbbq(Register dst, Register src) {
4291 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4292 emit_arith(0x1B, 0xC0, dst, src);
4293 }
4295 void Assembler::shlq(Register dst, int imm8) {
4296 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4297 int encode = prefixq_and_encode(dst->encoding());
4298 if (imm8 == 1) {
4299 emit_byte(0xD1);
4300 emit_byte(0xE0 | encode);
4301 } else {
4302 emit_byte(0xC1);
4303 emit_byte(0xE0 | encode);
4304 emit_byte(imm8);
4305 }
4306 }
4308 void Assembler::shlq(Register dst) {
4309 int encode = prefixq_and_encode(dst->encoding());
4310 emit_byte(0xD3);
4311 emit_byte(0xE0 | encode);
4312 }
4314 void Assembler::shrq(Register dst, int imm8) {
4315 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4316 int encode = prefixq_and_encode(dst->encoding());
4317 emit_byte(0xC1);
4318 emit_byte(0xE8 | encode);
4319 emit_byte(imm8);
4320 }
4322 void Assembler::shrq(Register dst) {
4323 int encode = prefixq_and_encode(dst->encoding());
4324 emit_byte(0xD3);
4325 emit_byte(0xE8 | encode);
4326 }
4328 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4329 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4330 InstructionMark im(this);
4331 emit_byte(0xF2);
4332 prefix(src, dst);
4333 emit_byte(0x0F);
4334 emit_byte(0x51);
4335 emit_operand(dst, src);
4336 }
4338 void Assembler::subq(Address dst, int32_t imm32) {
4339 InstructionMark im(this);
4340 prefixq(dst);
4341 if (is8bit(imm32)) {
4342 emit_byte(0x83);
4343 emit_operand(rbp, dst, 1);
4344 emit_byte(imm32 & 0xFF);
4345 } else {
4346 emit_byte(0x81);
4347 emit_operand(rbp, dst, 4);
4348 emit_long(imm32);
4349 }
4350 }
4352 void Assembler::subq(Register dst, int32_t imm32) {
4353 (void) prefixq_and_encode(dst->encoding());
4354 emit_arith(0x81, 0xE8, dst, imm32);
4355 }
4357 void Assembler::subq(Address dst, Register src) {
4358 InstructionMark im(this);
4359 prefixq(dst, src);
4360 emit_byte(0x29);
4361 emit_operand(src, dst);
4362 }
4364 void Assembler::subq(Register dst, Address src) {
4365 InstructionMark im(this);
4366 prefixq(src, dst);
4367 emit_byte(0x2B);
4368 emit_operand(dst, src);
4369 }
4371 void Assembler::subq(Register dst, Register src) {
4372 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4373 emit_arith(0x2B, 0xC0, dst, src);
4374 }
4376 void Assembler::testq(Register dst, int32_t imm32) {
4377 // not using emit_arith because test
4378 // doesn't support sign-extension of
4379 // 8bit operands
4380 int encode = dst->encoding();
4381 if (encode == 0) {
4382 prefix(REX_W);
4383 emit_byte(0xA9);
4384 } else {
4385 encode = prefixq_and_encode(encode);
4386 emit_byte(0xF7);
4387 emit_byte(0xC0 | encode);
4388 }
4389 emit_long(imm32);
4390 }
4392 void Assembler::testq(Register dst, Register src) {
4393 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4394 emit_arith(0x85, 0xC0, dst, src);
4395 }
4397 void Assembler::xaddq(Address dst, Register src) {
4398 InstructionMark im(this);
4399 prefixq(dst, src);
4400 emit_byte(0x0F);
4401 emit_byte(0xC1);
4402 emit_operand(src, dst);
4403 }
4405 void Assembler::xchgq(Register dst, Address src) {
4406 InstructionMark im(this);
4407 prefixq(src, dst);
4408 emit_byte(0x87);
4409 emit_operand(dst, src);
4410 }
4412 void Assembler::xchgq(Register dst, Register src) {
4413 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4414 emit_byte(0x87);
4415 emit_byte(0xc0 | encode);
4416 }
4418 void Assembler::xorq(Register dst, Register src) {
4419 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4420 emit_arith(0x33, 0xC0, dst, src);
4421 }
4423 void Assembler::xorq(Register dst, Address src) {
4424 InstructionMark im(this);
4425 prefixq(src, dst);
4426 emit_byte(0x33);
4427 emit_operand(dst, src);
4428 }
4430 #endif // !LP64
4432 static Assembler::Condition reverse[] = {
4433 Assembler::noOverflow /* overflow = 0x0 */ ,
4434 Assembler::overflow /* noOverflow = 0x1 */ ,
4435 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
4436 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
4437 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
4438 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
4439 Assembler::above /* belowEqual = 0x6 */ ,
4440 Assembler::belowEqual /* above = 0x7 */ ,
4441 Assembler::positive /* negative = 0x8 */ ,
4442 Assembler::negative /* positive = 0x9 */ ,
4443 Assembler::noParity /* parity = 0xa */ ,
4444 Assembler::parity /* noParity = 0xb */ ,
4445 Assembler::greaterEqual /* less = 0xc */ ,
4446 Assembler::less /* greaterEqual = 0xd */ ,
4447 Assembler::greater /* lessEqual = 0xe */ ,
4448 Assembler::lessEqual /* greater = 0xf, */
4450 };
4453 // Implementation of MacroAssembler
4455 // First all the versions that have distinct versions depending on 32/64 bit
4456 // Unless the difference is trivial (1 line or so).
4458 #ifndef _LP64
4460 // 32bit versions
4462 Address MacroAssembler::as_Address(AddressLiteral adr) {
4463 return Address(adr.target(), adr.rspec());
4464 }
4466 Address MacroAssembler::as_Address(ArrayAddress adr) {
4467 return Address::make_array(adr);
4468 }
4470 int MacroAssembler::biased_locking_enter(Register lock_reg,
4471 Register obj_reg,
4472 Register swap_reg,
4473 Register tmp_reg,
4474 bool swap_reg_contains_mark,
4475 Label& done,
4476 Label* slow_case,
4477 BiasedLockingCounters* counters) {
4478 assert(UseBiasedLocking, "why call this otherwise?");
4479 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4480 assert_different_registers(lock_reg, obj_reg, swap_reg);
4482 if (PrintBiasedLockingStatistics && counters == NULL)
4483 counters = BiasedLocking::counters();
4485 bool need_tmp_reg = false;
4486 if (tmp_reg == noreg) {
4487 need_tmp_reg = true;
4488 tmp_reg = lock_reg;
4489 } else {
4490 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4491 }
4492 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4493 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4494 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
4495 Address saved_mark_addr(lock_reg, 0);
4497 // Biased locking
4498 // See whether the lock is currently biased toward our thread and
4499 // whether the epoch is still valid
4500 // Note that the runtime guarantees sufficient alignment of JavaThread
4501 // pointers to allow age to be placed into low bits
4502 // First check to see whether biasing is even enabled for this object
4503 Label cas_label;
4504 int null_check_offset = -1;
4505 if (!swap_reg_contains_mark) {
4506 null_check_offset = offset();
4507 movl(swap_reg, mark_addr);
4508 }
4509 if (need_tmp_reg) {
4510 push(tmp_reg);
4511 }
4512 movl(tmp_reg, swap_reg);
4513 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4514 cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4515 if (need_tmp_reg) {
4516 pop(tmp_reg);
4517 }
4518 jcc(Assembler::notEqual, cas_label);
4519 // The bias pattern is present in the object's header. Need to check
4520 // whether the bias owner and the epoch are both still current.
4521 // Note that because there is no current thread register on x86 we
4522 // need to store off the mark word we read out of the object to
4523 // avoid reloading it and needing to recheck invariants below. This
4524 // store is unfortunate but it makes the overall code shorter and
4525 // simpler.
4526 movl(saved_mark_addr, swap_reg);
4527 if (need_tmp_reg) {
4528 push(tmp_reg);
4529 }
4530 get_thread(tmp_reg);
4531 xorl(swap_reg, tmp_reg);
4532 if (swap_reg_contains_mark) {
4533 null_check_offset = offset();
4534 }
4535 movl(tmp_reg, klass_addr);
4536 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4537 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4538 if (need_tmp_reg) {
4539 pop(tmp_reg);
4540 }
4541 if (counters != NULL) {
4542 cond_inc32(Assembler::zero,
4543 ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4544 }
4545 jcc(Assembler::equal, done);
4547 Label try_revoke_bias;
4548 Label try_rebias;
4550 // At this point we know that the header has the bias pattern and
4551 // that we are not the bias owner in the current epoch. We need to
4552 // figure out more details about the state of the header in order to
4553 // know what operations can be legally performed on the object's
4554 // header.
4556 // If the low three bits in the xor result aren't clear, that means
4557 // the prototype header is no longer biased and we have to revoke
4558 // the bias on this object.
4559 testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4560 jcc(Assembler::notZero, try_revoke_bias);
4562 // Biasing is still enabled for this data type. See whether the
4563 // epoch of the current bias is still valid, meaning that the epoch
4564 // bits of the mark word are equal to the epoch bits of the
4565 // prototype header. (Note that the prototype header's epoch bits
4566 // only change at a safepoint.) If not, attempt to rebias the object
4567 // toward the current thread. Note that we must be absolutely sure
4568 // that the current epoch is invalid in order to do this because
4569 // otherwise the manipulations it performs on the mark word are
4570 // illegal.
4571 testl(swap_reg, markOopDesc::epoch_mask_in_place);
4572 jcc(Assembler::notZero, try_rebias);
4574 // The epoch of the current bias is still valid but we know nothing
4575 // about the owner; it might be set or it might be clear. Try to
4576 // acquire the bias of the object using an atomic operation. If this
4577 // fails we will go in to the runtime to revoke the object's bias.
4578 // Note that we first construct the presumed unbiased header so we
4579 // don't accidentally blow away another thread's valid bias.
4580 movl(swap_reg, saved_mark_addr);
4581 andl(swap_reg,
4582 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4583 if (need_tmp_reg) {
4584 push(tmp_reg);
4585 }
4586 get_thread(tmp_reg);
4587 orl(tmp_reg, swap_reg);
4588 if (os::is_MP()) {
4589 lock();
4590 }
4591 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4592 if (need_tmp_reg) {
4593 pop(tmp_reg);
4594 }
4595 // If the biasing toward our thread failed, this means that
4596 // another thread succeeded in biasing it toward itself and we
4597 // need to revoke that bias. The revocation will occur in the
4598 // interpreter runtime in the slow case.
4599 if (counters != NULL) {
4600 cond_inc32(Assembler::zero,
4601 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4602 }
4603 if (slow_case != NULL) {
4604 jcc(Assembler::notZero, *slow_case);
4605 }
4606 jmp(done);
4608 bind(try_rebias);
4609 // At this point we know the epoch has expired, meaning that the
4610 // current "bias owner", if any, is actually invalid. Under these
4611 // circumstances _only_, we are allowed to use the current header's
4612 // value as the comparison value when doing the cas to acquire the
4613 // bias in the current epoch. In other words, we allow transfer of
4614 // the bias from one thread to another directly in this situation.
4615 //
4616 // FIXME: due to a lack of registers we currently blow away the age
4617 // bits in this situation. Should attempt to preserve them.
4618 if (need_tmp_reg) {
4619 push(tmp_reg);
4620 }
4621 get_thread(tmp_reg);
4622 movl(swap_reg, klass_addr);
4623 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4624 movl(swap_reg, saved_mark_addr);
4625 if (os::is_MP()) {
4626 lock();
4627 }
4628 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4629 if (need_tmp_reg) {
4630 pop(tmp_reg);
4631 }
4632 // If the biasing toward our thread failed, then another thread
4633 // succeeded in biasing it toward itself and we need to revoke that
4634 // bias. The revocation will occur in the runtime in the slow case.
4635 if (counters != NULL) {
4636 cond_inc32(Assembler::zero,
4637 ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4638 }
4639 if (slow_case != NULL) {
4640 jcc(Assembler::notZero, *slow_case);
4641 }
4642 jmp(done);
4644 bind(try_revoke_bias);
4645 // The prototype mark in the klass doesn't have the bias bit set any
4646 // more, indicating that objects of this data type are not supposed
4647 // to be biased any more. We are going to try to reset the mark of
4648 // this object to the prototype value and fall through to the
4649 // CAS-based locking scheme. Note that if our CAS fails, it means
4650 // that another thread raced us for the privilege of revoking the
4651 // bias of this particular object, so it's okay to continue in the
4652 // normal locking code.
4653 //
4654 // FIXME: due to a lack of registers we currently blow away the age
4655 // bits in this situation. Should attempt to preserve them.
4656 movl(swap_reg, saved_mark_addr);
4657 if (need_tmp_reg) {
4658 push(tmp_reg);
4659 }
4660 movl(tmp_reg, klass_addr);
4661 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4662 if (os::is_MP()) {
4663 lock();
4664 }
4665 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4666 if (need_tmp_reg) {
4667 pop(tmp_reg);
4668 }
4669 // Fall through to the normal CAS-based lock, because no matter what
4670 // the result of the above CAS, some thread must have succeeded in
4671 // removing the bias bit from the object's header.
4672 if (counters != NULL) {
4673 cond_inc32(Assembler::zero,
4674 ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4675 }
4677 bind(cas_label);
4679 return null_check_offset;
4680 }
4681 void MacroAssembler::call_VM_leaf_base(address entry_point,
4682 int number_of_arguments) {
4683 call(RuntimeAddress(entry_point));
4684 increment(rsp, number_of_arguments * wordSize);
4685 }
4687 void MacroAssembler::cmpoop(Address src1, jobject obj) {
4688 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4689 }
4691 void MacroAssembler::cmpoop(Register src1, jobject obj) {
4692 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4693 }
4695 void MacroAssembler::extend_sign(Register hi, Register lo) {
4696 // According to Intel Doc. AP-526, "Integer Divide", p.18.
4697 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4698 cdql();
4699 } else {
4700 movl(hi, lo);
4701 sarl(hi, 31);
4702 }
4703 }
4705 void MacroAssembler::fat_nop() {
4706 // A 5 byte nop that is safe for patching (see patch_verified_entry)
4707 emit_byte(0x26); // es:
4708 emit_byte(0x2e); // cs:
4709 emit_byte(0x64); // fs:
4710 emit_byte(0x65); // gs:
4711 emit_byte(0x90);
4712 }
4714 void MacroAssembler::jC2(Register tmp, Label& L) {
4715 // set parity bit if FPU flag C2 is set (via rax)
4716 save_rax(tmp);
4717 fwait(); fnstsw_ax();
4718 sahf();
4719 restore_rax(tmp);
4720 // branch
4721 jcc(Assembler::parity, L);
4722 }
4724 void MacroAssembler::jnC2(Register tmp, Label& L) {
4725 // set parity bit if FPU flag C2 is set (via rax)
4726 save_rax(tmp);
4727 fwait(); fnstsw_ax();
4728 sahf();
4729 restore_rax(tmp);
4730 // branch
4731 jcc(Assembler::noParity, L);
4732 }
4734 // 32bit can do a case table jump in one instruction but we no longer allow the base
4735 // to be installed in the Address class
4736 void MacroAssembler::jump(ArrayAddress entry) {
4737 jmp(as_Address(entry));
4738 }
4740 // Note: y_lo will be destroyed
4741 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4742 // Long compare for Java (semantics as described in JVM spec.)
4743 Label high, low, done;
4745 cmpl(x_hi, y_hi);
4746 jcc(Assembler::less, low);
4747 jcc(Assembler::greater, high);
4748 // x_hi is the return register
4749 xorl(x_hi, x_hi);
4750 cmpl(x_lo, y_lo);
4751 jcc(Assembler::below, low);
4752 jcc(Assembler::equal, done);
4754 bind(high);
4755 xorl(x_hi, x_hi);
4756 increment(x_hi);
4757 jmp(done);
4759 bind(low);
4760 xorl(x_hi, x_hi);
4761 decrementl(x_hi);
4763 bind(done);
4764 }
4766 void MacroAssembler::lea(Register dst, AddressLiteral src) {
4767 mov_literal32(dst, (int32_t)src.target(), src.rspec());
4768 }
4770 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4771 // leal(dst, as_Address(adr));
4772 // see note in movl as to why we must use a move
4773 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4774 }
4776 void MacroAssembler::leave() {
4777 mov(rsp, rbp);
4778 pop(rbp);
4779 }
4781 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4782 // Multiplication of two Java long values stored on the stack
4783 // as illustrated below. Result is in rdx:rax.
4784 //
4785 // rsp ---> [ ?? ] \ \
4786 // .... | y_rsp_offset |
4787 // [ y_lo ] / (in bytes) | x_rsp_offset
4788 // [ y_hi ] | (in bytes)
4789 // .... |
4790 // [ x_lo ] /
4791 // [ x_hi ]
4792 // ....
4793 //
4794 // Basic idea: lo(result) = lo(x_lo * y_lo)
4795 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4796 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4797 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4798 Label quick;
4799 // load x_hi, y_hi and check if quick
4800 // multiplication is possible
4801 movl(rbx, x_hi);
4802 movl(rcx, y_hi);
4803 movl(rax, rbx);
4804 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4805 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
4806 // do full multiplication
4807 // 1st step
4808 mull(y_lo); // x_hi * y_lo
4809 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
4810 // 2nd step
4811 movl(rax, x_lo);
4812 mull(rcx); // x_lo * y_hi
4813 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
4814 // 3rd step
4815 bind(quick); // note: rbx, = 0 if quick multiply!
4816 movl(rax, x_lo);
4817 mull(y_lo); // x_lo * y_lo
4818 addl(rdx, rbx); // correct hi(x_lo * y_lo)
4819 }
4821 void MacroAssembler::lneg(Register hi, Register lo) {
4822 negl(lo);
4823 adcl(hi, 0);
4824 negl(hi);
4825 }
4827 void MacroAssembler::lshl(Register hi, Register lo) {
4828 // Java shift left long support (semantics as described in JVM spec., p.305)
4829 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4830 // shift value is in rcx !
4831 assert(hi != rcx, "must not use rcx");
4832 assert(lo != rcx, "must not use rcx");
4833 const Register s = rcx; // shift count
4834 const int n = BitsPerWord;
4835 Label L;
4836 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4837 cmpl(s, n); // if (s < n)
4838 jcc(Assembler::less, L); // else (s >= n)
4839 movl(hi, lo); // x := x << n
4840 xorl(lo, lo);
4841 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4842 bind(L); // s (mod n) < n
4843 shldl(hi, lo); // x := x << s
4844 shll(lo);
4845 }
4848 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4849 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4850 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4851 assert(hi != rcx, "must not use rcx");
4852 assert(lo != rcx, "must not use rcx");
4853 const Register s = rcx; // shift count
4854 const int n = BitsPerWord;
4855 Label L;
4856 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4857 cmpl(s, n); // if (s < n)
4858 jcc(Assembler::less, L); // else (s >= n)
4859 movl(lo, hi); // x := x >> n
4860 if (sign_extension) sarl(hi, 31);
4861 else xorl(hi, hi);
4862 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4863 bind(L); // s (mod n) < n
4864 shrdl(lo, hi); // x := x >> s
4865 if (sign_extension) sarl(hi);
4866 else shrl(hi);
4867 }
4869 void MacroAssembler::movoop(Register dst, jobject obj) {
4870 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4871 }
4873 void MacroAssembler::movoop(Address dst, jobject obj) {
4874 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4875 }
4877 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4878 if (src.is_lval()) {
4879 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4880 } else {
4881 movl(dst, as_Address(src));
4882 }
4883 }
4885 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4886 movl(as_Address(dst), src);
4887 }
4889 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4890 movl(dst, as_Address(src));
4891 }
4893 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
4894 void MacroAssembler::movptr(Address dst, intptr_t src) {
4895 movl(dst, src);
4896 }
4899 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4900 movsd(dst, as_Address(src));
4901 }
4903 void MacroAssembler::pop_callee_saved_registers() {
4904 pop(rcx);
4905 pop(rdx);
4906 pop(rdi);
4907 pop(rsi);
4908 }
4910 void MacroAssembler::pop_fTOS() {
4911 fld_d(Address(rsp, 0));
4912 addl(rsp, 2 * wordSize);
4913 }
4915 void MacroAssembler::push_callee_saved_registers() {
4916 push(rsi);
4917 push(rdi);
4918 push(rdx);
4919 push(rcx);
4920 }
4922 void MacroAssembler::push_fTOS() {
4923 subl(rsp, 2 * wordSize);
4924 fstp_d(Address(rsp, 0));
4925 }
4928 void MacroAssembler::pushoop(jobject obj) {
4929 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4930 }
4933 void MacroAssembler::pushptr(AddressLiteral src) {
4934 if (src.is_lval()) {
4935 push_literal32((int32_t)src.target(), src.rspec());
4936 } else {
4937 pushl(as_Address(src));
4938 }
4939 }
4941 void MacroAssembler::set_word_if_not_zero(Register dst) {
4942 xorl(dst, dst);
4943 set_byte_if_not_zero(dst);
4944 }
4946 static void pass_arg0(MacroAssembler* masm, Register arg) {
4947 masm->push(arg);
4948 }
4950 static void pass_arg1(MacroAssembler* masm, Register arg) {
4951 masm->push(arg);
4952 }
4954 static void pass_arg2(MacroAssembler* masm, Register arg) {
4955 masm->push(arg);
4956 }
4958 static void pass_arg3(MacroAssembler* masm, Register arg) {
4959 masm->push(arg);
4960 }
4962 #ifndef PRODUCT
4963 extern "C" void findpc(intptr_t x);
4964 #endif
4966 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4967 // In order to get locks to work, we need to fake a in_VM state
4968 JavaThread* thread = JavaThread::current();
4969 JavaThreadState saved_state = thread->thread_state();
4970 thread->set_thread_state(_thread_in_vm);
4971 if (ShowMessageBoxOnError) {
4972 JavaThread* thread = JavaThread::current();
4973 JavaThreadState saved_state = thread->thread_state();
4974 thread->set_thread_state(_thread_in_vm);
4975 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4976 ttyLocker ttyl;
4977 BytecodeCounter::print();
4978 }
4979 // To see where a verify_oop failed, get $ebx+40/X for this frame.
4980 // This is the value of eip which points to where verify_oop will return.
4981 if (os::message_box(msg, "Execution stopped, print registers?")) {
4982 ttyLocker ttyl;
4983 tty->print_cr("eip = 0x%08x", eip);
4984 #ifndef PRODUCT
4985 tty->cr();
4986 findpc(eip);
4987 tty->cr();
4988 #endif
4989 tty->print_cr("rax, = 0x%08x", rax);
4990 tty->print_cr("rbx, = 0x%08x", rbx);
4991 tty->print_cr("rcx = 0x%08x", rcx);
4992 tty->print_cr("rdx = 0x%08x", rdx);
4993 tty->print_cr("rdi = 0x%08x", rdi);
4994 tty->print_cr("rsi = 0x%08x", rsi);
4995 tty->print_cr("rbp, = 0x%08x", rbp);
4996 tty->print_cr("rsp = 0x%08x", rsp);
4997 BREAKPOINT;
4998 }
4999 } else {
5000 ttyLocker ttyl;
5001 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5002 assert(false, "DEBUG MESSAGE");
5003 }
5004 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5005 }
5007 void MacroAssembler::stop(const char* msg) {
5008 ExternalAddress message((address)msg);
5009 // push address of message
5010 pushptr(message.addr());
5011 { Label L; call(L, relocInfo::none); bind(L); } // push eip
5012 pusha(); // push registers
5013 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5014 hlt();
5015 }
5017 void MacroAssembler::warn(const char* msg) {
5018 push_CPU_state();
5020 ExternalAddress message((address) msg);
5021 // push address of message
5022 pushptr(message.addr());
5024 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5025 addl(rsp, wordSize); // discard argument
5026 pop_CPU_state();
5027 }
5029 #else // _LP64
5031 // 64 bit versions
5033 Address MacroAssembler::as_Address(AddressLiteral adr) {
5034 // amd64 always does this as a pc-rel
5035 // we can be absolute or disp based on the instruction type
5036 // jmp/call are displacements others are absolute
5037 assert(!adr.is_lval(), "must be rval");
5038 assert(reachable(adr), "must be");
5039 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5041 }
5043 Address MacroAssembler::as_Address(ArrayAddress adr) {
5044 AddressLiteral base = adr.base();
5045 lea(rscratch1, base);
5046 Address index = adr.index();
5047 assert(index._disp == 0, "must not have disp"); // maybe it can?
5048 Address array(rscratch1, index._index, index._scale, index._disp);
5049 return array;
5050 }
5052 int MacroAssembler::biased_locking_enter(Register lock_reg,
5053 Register obj_reg,
5054 Register swap_reg,
5055 Register tmp_reg,
5056 bool swap_reg_contains_mark,
5057 Label& done,
5058 Label* slow_case,
5059 BiasedLockingCounters* counters) {
5060 assert(UseBiasedLocking, "why call this otherwise?");
5061 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5062 assert(tmp_reg != noreg, "tmp_reg must be supplied");
5063 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5064 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5065 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
5066 Address saved_mark_addr(lock_reg, 0);
5068 if (PrintBiasedLockingStatistics && counters == NULL)
5069 counters = BiasedLocking::counters();
5071 // Biased locking
5072 // See whether the lock is currently biased toward our thread and
5073 // whether the epoch is still valid
5074 // Note that the runtime guarantees sufficient alignment of JavaThread
5075 // pointers to allow age to be placed into low bits
5076 // First check to see whether biasing is even enabled for this object
5077 Label cas_label;
5078 int null_check_offset = -1;
5079 if (!swap_reg_contains_mark) {
5080 null_check_offset = offset();
5081 movq(swap_reg, mark_addr);
5082 }
5083 movq(tmp_reg, swap_reg);
5084 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5085 cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5086 jcc(Assembler::notEqual, cas_label);
5087 // The bias pattern is present in the object's header. Need to check
5088 // whether the bias owner and the epoch are both still current.
5089 load_prototype_header(tmp_reg, obj_reg);
5090 orq(tmp_reg, r15_thread);
5091 xorq(tmp_reg, swap_reg);
5092 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5093 if (counters != NULL) {
5094 cond_inc32(Assembler::zero,
5095 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5096 }
5097 jcc(Assembler::equal, done);
5099 Label try_revoke_bias;
5100 Label try_rebias;
5102 // At this point we know that the header has the bias pattern and
5103 // that we are not the bias owner in the current epoch. We need to
5104 // figure out more details about the state of the header in order to
5105 // know what operations can be legally performed on the object's
5106 // header.
5108 // If the low three bits in the xor result aren't clear, that means
5109 // the prototype header is no longer biased and we have to revoke
5110 // the bias on this object.
5111 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5112 jcc(Assembler::notZero, try_revoke_bias);
5114 // Biasing is still enabled for this data type. See whether the
5115 // epoch of the current bias is still valid, meaning that the epoch
5116 // bits of the mark word are equal to the epoch bits of the
5117 // prototype header. (Note that the prototype header's epoch bits
5118 // only change at a safepoint.) If not, attempt to rebias the object
5119 // toward the current thread. Note that we must be absolutely sure
5120 // that the current epoch is invalid in order to do this because
5121 // otherwise the manipulations it performs on the mark word are
5122 // illegal.
5123 testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5124 jcc(Assembler::notZero, try_rebias);
5126 // The epoch of the current bias is still valid but we know nothing
5127 // about the owner; it might be set or it might be clear. Try to
5128 // acquire the bias of the object using an atomic operation. If this
5129 // fails we will go in to the runtime to revoke the object's bias.
5130 // Note that we first construct the presumed unbiased header so we
5131 // don't accidentally blow away another thread's valid bias.
5132 andq(swap_reg,
5133 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5134 movq(tmp_reg, swap_reg);
5135 orq(tmp_reg, r15_thread);
5136 if (os::is_MP()) {
5137 lock();
5138 }
5139 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5140 // If the biasing toward our thread failed, this means that
5141 // another thread succeeded in biasing it toward itself and we
5142 // need to revoke that bias. The revocation will occur in the
5143 // interpreter runtime in the slow case.
5144 if (counters != NULL) {
5145 cond_inc32(Assembler::zero,
5146 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5147 }
5148 if (slow_case != NULL) {
5149 jcc(Assembler::notZero, *slow_case);
5150 }
5151 jmp(done);
5153 bind(try_rebias);
5154 // At this point we know the epoch has expired, meaning that the
5155 // current "bias owner", if any, is actually invalid. Under these
5156 // circumstances _only_, we are allowed to use the current header's
5157 // value as the comparison value when doing the cas to acquire the
5158 // bias in the current epoch. In other words, we allow transfer of
5159 // the bias from one thread to another directly in this situation.
5160 //
5161 // FIXME: due to a lack of registers we currently blow away the age
5162 // bits in this situation. Should attempt to preserve them.
5163 load_prototype_header(tmp_reg, obj_reg);
5164 orq(tmp_reg, r15_thread);
5165 if (os::is_MP()) {
5166 lock();
5167 }
5168 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5169 // If the biasing toward our thread failed, then another thread
5170 // succeeded in biasing it toward itself and we need to revoke that
5171 // bias. The revocation will occur in the runtime in the slow case.
5172 if (counters != NULL) {
5173 cond_inc32(Assembler::zero,
5174 ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5175 }
5176 if (slow_case != NULL) {
5177 jcc(Assembler::notZero, *slow_case);
5178 }
5179 jmp(done);
5181 bind(try_revoke_bias);
5182 // The prototype mark in the klass doesn't have the bias bit set any
5183 // more, indicating that objects of this data type are not supposed
5184 // to be biased any more. We are going to try to reset the mark of
5185 // this object to the prototype value and fall through to the
5186 // CAS-based locking scheme. Note that if our CAS fails, it means
5187 // that another thread raced us for the privilege of revoking the
5188 // bias of this particular object, so it's okay to continue in the
5189 // normal locking code.
5190 //
5191 // FIXME: due to a lack of registers we currently blow away the age
5192 // bits in this situation. Should attempt to preserve them.
5193 load_prototype_header(tmp_reg, obj_reg);
5194 if (os::is_MP()) {
5195 lock();
5196 }
5197 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5198 // Fall through to the normal CAS-based lock, because no matter what
5199 // the result of the above CAS, some thread must have succeeded in
5200 // removing the bias bit from the object's header.
5201 if (counters != NULL) {
5202 cond_inc32(Assembler::zero,
5203 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5204 }
5206 bind(cas_label);
5208 return null_check_offset;
5209 }
5211 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5212 Label L, E;
5214 #ifdef _WIN64
5215 // Windows always allocates space for it's register args
5216 assert(num_args <= 4, "only register arguments supported");
5217 subq(rsp, frame::arg_reg_save_area_bytes);
5218 #endif
5220 // Align stack if necessary
5221 testl(rsp, 15);
5222 jcc(Assembler::zero, L);
5224 subq(rsp, 8);
5225 {
5226 call(RuntimeAddress(entry_point));
5227 }
5228 addq(rsp, 8);
5229 jmp(E);
5231 bind(L);
5232 {
5233 call(RuntimeAddress(entry_point));
5234 }
5236 bind(E);
5238 #ifdef _WIN64
5239 // restore stack pointer
5240 addq(rsp, frame::arg_reg_save_area_bytes);
5241 #endif
5243 }
5245 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5246 assert(!src2.is_lval(), "should use cmpptr");
5248 if (reachable(src2)) {
5249 cmpq(src1, as_Address(src2));
5250 } else {
5251 lea(rscratch1, src2);
5252 Assembler::cmpq(src1, Address(rscratch1, 0));
5253 }
5254 }
5256 int MacroAssembler::corrected_idivq(Register reg) {
5257 // Full implementation of Java ldiv and lrem; checks for special
5258 // case as described in JVM spec., p.243 & p.271. The function
5259 // returns the (pc) offset of the idivl instruction - may be needed
5260 // for implicit exceptions.
5261 //
5262 // normal case special case
5263 //
5264 // input : rax: dividend min_long
5265 // reg: divisor (may not be eax/edx) -1
5266 //
5267 // output: rax: quotient (= rax idiv reg) min_long
5268 // rdx: remainder (= rax irem reg) 0
5269 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5270 static const int64_t min_long = 0x8000000000000000;
5271 Label normal_case, special_case;
5273 // check for special case
5274 cmp64(rax, ExternalAddress((address) &min_long));
5275 jcc(Assembler::notEqual, normal_case);
5276 xorl(rdx, rdx); // prepare rdx for possible special case (where
5277 // remainder = 0)
5278 cmpq(reg, -1);
5279 jcc(Assembler::equal, special_case);
5281 // handle normal case
5282 bind(normal_case);
5283 cdqq();
5284 int idivq_offset = offset();
5285 idivq(reg);
5287 // normal and special case exit
5288 bind(special_case);
5290 return idivq_offset;
5291 }
5293 void MacroAssembler::decrementq(Register reg, int value) {
5294 if (value == min_jint) { subq(reg, value); return; }
5295 if (value < 0) { incrementq(reg, -value); return; }
5296 if (value == 0) { ; return; }
5297 if (value == 1 && UseIncDec) { decq(reg) ; return; }
5298 /* else */ { subq(reg, value) ; return; }
5299 }
5301 void MacroAssembler::decrementq(Address dst, int value) {
5302 if (value == min_jint) { subq(dst, value); return; }
5303 if (value < 0) { incrementq(dst, -value); return; }
5304 if (value == 0) { ; return; }
5305 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5306 /* else */ { subq(dst, value) ; return; }
5307 }
5309 void MacroAssembler::fat_nop() {
5310 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5311 // Recommened sequence from 'Software Optimization Guide for the AMD
5312 // Hammer Processor'
5313 emit_byte(0x66);
5314 emit_byte(0x66);
5315 emit_byte(0x90);
5316 emit_byte(0x66);
5317 emit_byte(0x90);
5318 }
5320 void MacroAssembler::incrementq(Register reg, int value) {
5321 if (value == min_jint) { addq(reg, value); return; }
5322 if (value < 0) { decrementq(reg, -value); return; }
5323 if (value == 0) { ; return; }
5324 if (value == 1 && UseIncDec) { incq(reg) ; return; }
5325 /* else */ { addq(reg, value) ; return; }
5326 }
5328 void MacroAssembler::incrementq(Address dst, int value) {
5329 if (value == min_jint) { addq(dst, value); return; }
5330 if (value < 0) { decrementq(dst, -value); return; }
5331 if (value == 0) { ; return; }
5332 if (value == 1 && UseIncDec) { incq(dst) ; return; }
5333 /* else */ { addq(dst, value) ; return; }
5334 }
5336 // 32bit can do a case table jump in one instruction but we no longer allow the base
5337 // to be installed in the Address class
5338 void MacroAssembler::jump(ArrayAddress entry) {
5339 lea(rscratch1, entry.base());
5340 Address dispatch = entry.index();
5341 assert(dispatch._base == noreg, "must be");
5342 dispatch._base = rscratch1;
5343 jmp(dispatch);
5344 }
5346 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5347 ShouldNotReachHere(); // 64bit doesn't use two regs
5348 cmpq(x_lo, y_lo);
5349 }
5351 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5352 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5353 }
5355 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5356 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5357 movptr(dst, rscratch1);
5358 }
5360 void MacroAssembler::leave() {
5361 // %%% is this really better? Why not on 32bit too?
5362 emit_byte(0xC9); // LEAVE
5363 }
5365 void MacroAssembler::lneg(Register hi, Register lo) {
5366 ShouldNotReachHere(); // 64bit doesn't use two regs
5367 negq(lo);
5368 }
5370 void MacroAssembler::movoop(Register dst, jobject obj) {
5371 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5372 }
5374 void MacroAssembler::movoop(Address dst, jobject obj) {
5375 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5376 movq(dst, rscratch1);
5377 }
5379 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5380 if (src.is_lval()) {
5381 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5382 } else {
5383 if (reachable(src)) {
5384 movq(dst, as_Address(src));
5385 } else {
5386 lea(rscratch1, src);
5387 movq(dst, Address(rscratch1,0));
5388 }
5389 }
5390 }
5392 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5393 movq(as_Address(dst), src);
5394 }
5396 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5397 movq(dst, as_Address(src));
5398 }
5400 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5401 void MacroAssembler::movptr(Address dst, intptr_t src) {
5402 mov64(rscratch1, src);
5403 movq(dst, rscratch1);
5404 }
5406 // These are mostly for initializing NULL
5407 void MacroAssembler::movptr(Address dst, int32_t src) {
5408 movslq(dst, src);
5409 }
5411 void MacroAssembler::movptr(Register dst, int32_t src) {
5412 mov64(dst, (intptr_t)src);
5413 }
5415 void MacroAssembler::pushoop(jobject obj) {
5416 movoop(rscratch1, obj);
5417 push(rscratch1);
5418 }
5420 void MacroAssembler::pushptr(AddressLiteral src) {
5421 lea(rscratch1, src);
5422 if (src.is_lval()) {
5423 push(rscratch1);
5424 } else {
5425 pushq(Address(rscratch1, 0));
5426 }
5427 }
5429 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5430 bool clear_pc) {
5431 // we must set sp to zero to clear frame
5432 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5433 // must clear fp, so that compiled frames are not confused; it is
5434 // possible that we need it only for debugging
5435 if (clear_fp) {
5436 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5437 }
5439 if (clear_pc) {
5440 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5441 }
5442 }
5444 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5445 Register last_java_fp,
5446 address last_java_pc) {
5447 // determine last_java_sp register
5448 if (!last_java_sp->is_valid()) {
5449 last_java_sp = rsp;
5450 }
5452 // last_java_fp is optional
5453 if (last_java_fp->is_valid()) {
5454 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5455 last_java_fp);
5456 }
5458 // last_java_pc is optional
5459 if (last_java_pc != NULL) {
5460 Address java_pc(r15_thread,
5461 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5462 lea(rscratch1, InternalAddress(last_java_pc));
5463 movptr(java_pc, rscratch1);
5464 }
5466 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5467 }
5469 static void pass_arg0(MacroAssembler* masm, Register arg) {
5470 if (c_rarg0 != arg ) {
5471 masm->mov(c_rarg0, arg);
5472 }
5473 }
5475 static void pass_arg1(MacroAssembler* masm, Register arg) {
5476 if (c_rarg1 != arg ) {
5477 masm->mov(c_rarg1, arg);
5478 }
5479 }
5481 static void pass_arg2(MacroAssembler* masm, Register arg) {
5482 if (c_rarg2 != arg ) {
5483 masm->mov(c_rarg2, arg);
5484 }
5485 }
5487 static void pass_arg3(MacroAssembler* masm, Register arg) {
5488 if (c_rarg3 != arg ) {
5489 masm->mov(c_rarg3, arg);
5490 }
5491 }
5493 void MacroAssembler::stop(const char* msg) {
5494 address rip = pc();
5495 pusha(); // get regs on stack
5496 lea(c_rarg0, ExternalAddress((address) msg));
5497 lea(c_rarg1, InternalAddress(rip));
5498 movq(c_rarg2, rsp); // pass pointer to regs array
5499 andq(rsp, -16); // align stack as required by ABI
5500 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5501 hlt();
5502 }
5504 void MacroAssembler::warn(const char* msg) {
5505 push(r12);
5506 movq(r12, rsp);
5507 andq(rsp, -16); // align stack as required by push_CPU_state and call
5509 push_CPU_state(); // keeps alignment at 16 bytes
5510 lea(c_rarg0, ExternalAddress((address) msg));
5511 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5512 pop_CPU_state();
5514 movq(rsp, r12);
5515 pop(r12);
5516 }
5518 #ifndef PRODUCT
5519 extern "C" void findpc(intptr_t x);
5520 #endif
5522 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5523 // In order to get locks to work, we need to fake a in_VM state
5524 if (ShowMessageBoxOnError ) {
5525 JavaThread* thread = JavaThread::current();
5526 JavaThreadState saved_state = thread->thread_state();
5527 thread->set_thread_state(_thread_in_vm);
5528 #ifndef PRODUCT
5529 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5530 ttyLocker ttyl;
5531 BytecodeCounter::print();
5532 }
5533 #endif
5534 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5535 // XXX correct this offset for amd64
5536 // This is the value of eip which points to where verify_oop will return.
5537 if (os::message_box(msg, "Execution stopped, print registers?")) {
5538 ttyLocker ttyl;
5539 tty->print_cr("rip = 0x%016lx", pc);
5540 #ifndef PRODUCT
5541 tty->cr();
5542 findpc(pc);
5543 tty->cr();
5544 #endif
5545 tty->print_cr("rax = 0x%016lx", regs[15]);
5546 tty->print_cr("rbx = 0x%016lx", regs[12]);
5547 tty->print_cr("rcx = 0x%016lx", regs[14]);
5548 tty->print_cr("rdx = 0x%016lx", regs[13]);
5549 tty->print_cr("rdi = 0x%016lx", regs[8]);
5550 tty->print_cr("rsi = 0x%016lx", regs[9]);
5551 tty->print_cr("rbp = 0x%016lx", regs[10]);
5552 tty->print_cr("rsp = 0x%016lx", regs[11]);
5553 tty->print_cr("r8 = 0x%016lx", regs[7]);
5554 tty->print_cr("r9 = 0x%016lx", regs[6]);
5555 tty->print_cr("r10 = 0x%016lx", regs[5]);
5556 tty->print_cr("r11 = 0x%016lx", regs[4]);
5557 tty->print_cr("r12 = 0x%016lx", regs[3]);
5558 tty->print_cr("r13 = 0x%016lx", regs[2]);
5559 tty->print_cr("r14 = 0x%016lx", regs[1]);
5560 tty->print_cr("r15 = 0x%016lx", regs[0]);
5561 BREAKPOINT;
5562 }
5563 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5564 } else {
5565 ttyLocker ttyl;
5566 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5567 msg);
5568 }
5569 }
5571 #endif // _LP64
5573 // Now versions that are common to 32/64 bit
5575 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5576 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5577 }
5579 void MacroAssembler::addptr(Register dst, Register src) {
5580 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5581 }
5583 void MacroAssembler::addptr(Address dst, Register src) {
5584 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5585 }
5587 void MacroAssembler::align(int modulus) {
5588 if (offset() % modulus != 0) {
5589 nop(modulus - (offset() % modulus));
5590 }
5591 }
5593 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5594 andpd(dst, as_Address(src));
5595 }
5597 void MacroAssembler::andptr(Register dst, int32_t imm32) {
5598 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5599 }
5601 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5602 pushf();
5603 if (os::is_MP())
5604 lock();
5605 incrementl(counter_addr);
5606 popf();
5607 }
5609 // Writes to stack successive pages until offset reached to check for
5610 // stack overflow + shadow pages. This clobbers tmp.
5611 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5612 movptr(tmp, rsp);
5613 // Bang stack for total size given plus shadow page size.
5614 // Bang one page at a time because large size can bang beyond yellow and
5615 // red zones.
5616 Label loop;
5617 bind(loop);
5618 movl(Address(tmp, (-os::vm_page_size())), size );
5619 subptr(tmp, os::vm_page_size());
5620 subl(size, os::vm_page_size());
5621 jcc(Assembler::greater, loop);
5623 // Bang down shadow pages too.
5624 // The -1 because we already subtracted 1 page.
5625 for (int i = 0; i< StackShadowPages-1; i++) {
5626 // this could be any sized move but this is can be a debugging crumb
5627 // so the bigger the better.
5628 movptr(Address(tmp, (-i*os::vm_page_size())), size );
5629 }
5630 }
5632 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5633 assert(UseBiasedLocking, "why call this otherwise?");
5635 // Check for biased locking unlock case, which is a no-op
5636 // Note: we do not have to check the thread ID for two reasons.
5637 // First, the interpreter checks for IllegalMonitorStateException at
5638 // a higher level. Second, if the bias was revoked while we held the
5639 // lock, the object could not be rebiased toward another thread, so
5640 // the bias bit would be clear.
5641 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5642 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5643 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5644 jcc(Assembler::equal, done);
5645 }
5647 void MacroAssembler::c2bool(Register x) {
5648 // implements x == 0 ? 0 : 1
5649 // note: must only look at least-significant byte of x
5650 // since C-style booleans are stored in one byte
5651 // only! (was bug)
5652 andl(x, 0xFF);
5653 setb(Assembler::notZero, x);
5654 }
5656 // Wouldn't need if AddressLiteral version had new name
5657 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5658 Assembler::call(L, rtype);
5659 }
5661 void MacroAssembler::call(Register entry) {
5662 Assembler::call(entry);
5663 }
5665 void MacroAssembler::call(AddressLiteral entry) {
5666 if (reachable(entry)) {
5667 Assembler::call_literal(entry.target(), entry.rspec());
5668 } else {
5669 lea(rscratch1, entry);
5670 Assembler::call(rscratch1);
5671 }
5672 }
5674 // Implementation of call_VM versions
5676 void MacroAssembler::call_VM(Register oop_result,
5677 address entry_point,
5678 bool check_exceptions) {
5679 Label C, E;
5680 call(C, relocInfo::none);
5681 jmp(E);
5683 bind(C);
5684 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5685 ret(0);
5687 bind(E);
5688 }
5690 void MacroAssembler::call_VM(Register oop_result,
5691 address entry_point,
5692 Register arg_1,
5693 bool check_exceptions) {
5694 Label C, E;
5695 call(C, relocInfo::none);
5696 jmp(E);
5698 bind(C);
5699 pass_arg1(this, arg_1);
5700 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5701 ret(0);
5703 bind(E);
5704 }
5706 void MacroAssembler::call_VM(Register oop_result,
5707 address entry_point,
5708 Register arg_1,
5709 Register arg_2,
5710 bool check_exceptions) {
5711 Label C, E;
5712 call(C, relocInfo::none);
5713 jmp(E);
5715 bind(C);
5717 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5719 pass_arg2(this, arg_2);
5720 pass_arg1(this, arg_1);
5721 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5722 ret(0);
5724 bind(E);
5725 }
5727 void MacroAssembler::call_VM(Register oop_result,
5728 address entry_point,
5729 Register arg_1,
5730 Register arg_2,
5731 Register arg_3,
5732 bool check_exceptions) {
5733 Label C, E;
5734 call(C, relocInfo::none);
5735 jmp(E);
5737 bind(C);
5739 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5740 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5741 pass_arg3(this, arg_3);
5743 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5744 pass_arg2(this, arg_2);
5746 pass_arg1(this, arg_1);
5747 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5748 ret(0);
5750 bind(E);
5751 }
5753 void MacroAssembler::call_VM(Register oop_result,
5754 Register last_java_sp,
5755 address entry_point,
5756 int number_of_arguments,
5757 bool check_exceptions) {
5758 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5759 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5760 }
5762 void MacroAssembler::call_VM(Register oop_result,
5763 Register last_java_sp,
5764 address entry_point,
5765 Register arg_1,
5766 bool check_exceptions) {
5767 pass_arg1(this, arg_1);
5768 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5769 }
5771 void MacroAssembler::call_VM(Register oop_result,
5772 Register last_java_sp,
5773 address entry_point,
5774 Register arg_1,
5775 Register arg_2,
5776 bool check_exceptions) {
5778 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5779 pass_arg2(this, arg_2);
5780 pass_arg1(this, arg_1);
5781 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5782 }
5784 void MacroAssembler::call_VM(Register oop_result,
5785 Register last_java_sp,
5786 address entry_point,
5787 Register arg_1,
5788 Register arg_2,
5789 Register arg_3,
5790 bool check_exceptions) {
5791 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5792 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5793 pass_arg3(this, arg_3);
5794 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5795 pass_arg2(this, arg_2);
5796 pass_arg1(this, arg_1);
5797 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5798 }
5800 void MacroAssembler::call_VM_base(Register oop_result,
5801 Register java_thread,
5802 Register last_java_sp,
5803 address entry_point,
5804 int number_of_arguments,
5805 bool check_exceptions) {
5806 // determine java_thread register
5807 if (!java_thread->is_valid()) {
5808 #ifdef _LP64
5809 java_thread = r15_thread;
5810 #else
5811 java_thread = rdi;
5812 get_thread(java_thread);
5813 #endif // LP64
5814 }
5815 // determine last_java_sp register
5816 if (!last_java_sp->is_valid()) {
5817 last_java_sp = rsp;
5818 }
5819 // debugging support
5820 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
5821 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5822 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
5823 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5825 // push java thread (becomes first argument of C function)
5827 NOT_LP64(push(java_thread); number_of_arguments++);
5828 LP64_ONLY(mov(c_rarg0, r15_thread));
5830 // set last Java frame before call
5831 assert(last_java_sp != rbp, "can't use ebp/rbp");
5833 // Only interpreter should have to set fp
5834 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5836 // do the call, remove parameters
5837 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5839 // restore the thread (cannot use the pushed argument since arguments
5840 // may be overwritten by C code generated by an optimizing compiler);
5841 // however can use the register value directly if it is callee saved.
5842 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5843 // rdi & rsi (also r15) are callee saved -> nothing to do
5844 #ifdef ASSERT
5845 guarantee(java_thread != rax, "change this code");
5846 push(rax);
5847 { Label L;
5848 get_thread(rax);
5849 cmpptr(java_thread, rax);
5850 jcc(Assembler::equal, L);
5851 stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5852 bind(L);
5853 }
5854 pop(rax);
5855 #endif
5856 } else {
5857 get_thread(java_thread);
5858 }
5859 // reset last Java frame
5860 // Only interpreter should have to clear fp
5861 reset_last_Java_frame(java_thread, true, false);
5863 #ifndef CC_INTERP
5864 // C++ interp handles this in the interpreter
5865 check_and_handle_popframe(java_thread);
5866 check_and_handle_earlyret(java_thread);
5867 #endif /* CC_INTERP */
5869 if (check_exceptions) {
5870 // check for pending exceptions (java_thread is set upon return)
5871 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5872 #ifndef _LP64
5873 jump_cc(Assembler::notEqual,
5874 RuntimeAddress(StubRoutines::forward_exception_entry()));
5875 #else
5876 // This used to conditionally jump to forward_exception however it is
5877 // possible if we relocate that the branch will not reach. So we must jump
5878 // around so we can always reach
5880 Label ok;
5881 jcc(Assembler::equal, ok);
5882 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5883 bind(ok);
5884 #endif // LP64
5885 }
5887 // get oop result if there is one and reset the value in the thread
5888 if (oop_result->is_valid()) {
5889 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5890 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5891 verify_oop(oop_result, "broken oop in call_VM_base");
5892 }
5893 }
5895 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5897 // Calculate the value for last_Java_sp
5898 // somewhat subtle. call_VM does an intermediate call
5899 // which places a return address on the stack just under the
5900 // stack pointer as the user finsihed with it. This allows
5901 // use to retrieve last_Java_pc from last_Java_sp[-1].
5902 // On 32bit we then have to push additional args on the stack to accomplish
5903 // the actual requested call. On 64bit call_VM only can use register args
5904 // so the only extra space is the return address that call_VM created.
5905 // This hopefully explains the calculations here.
5907 #ifdef _LP64
5908 // We've pushed one address, correct last_Java_sp
5909 lea(rax, Address(rsp, wordSize));
5910 #else
5911 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5912 #endif // LP64
5914 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5916 }
5918 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5919 call_VM_leaf_base(entry_point, number_of_arguments);
5920 }
5922 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5923 pass_arg0(this, arg_0);
5924 call_VM_leaf(entry_point, 1);
5925 }
5927 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5929 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5930 pass_arg1(this, arg_1);
5931 pass_arg0(this, arg_0);
5932 call_VM_leaf(entry_point, 2);
5933 }
5935 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5936 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5937 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5938 pass_arg2(this, arg_2);
5939 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5940 pass_arg1(this, arg_1);
5941 pass_arg0(this, arg_0);
5942 call_VM_leaf(entry_point, 3);
5943 }
5945 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5946 }
5948 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5949 }
5951 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5952 if (reachable(src1)) {
5953 cmpl(as_Address(src1), imm);
5954 } else {
5955 lea(rscratch1, src1);
5956 cmpl(Address(rscratch1, 0), imm);
5957 }
5958 }
5960 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5961 assert(!src2.is_lval(), "use cmpptr");
5962 if (reachable(src2)) {
5963 cmpl(src1, as_Address(src2));
5964 } else {
5965 lea(rscratch1, src2);
5966 cmpl(src1, Address(rscratch1, 0));
5967 }
5968 }
5970 void MacroAssembler::cmp32(Register src1, int32_t imm) {
5971 Assembler::cmpl(src1, imm);
5972 }
5974 void MacroAssembler::cmp32(Register src1, Address src2) {
5975 Assembler::cmpl(src1, src2);
5976 }
5978 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5979 ucomisd(opr1, opr2);
5981 Label L;
5982 if (unordered_is_less) {
5983 movl(dst, -1);
5984 jcc(Assembler::parity, L);
5985 jcc(Assembler::below , L);
5986 movl(dst, 0);
5987 jcc(Assembler::equal , L);
5988 increment(dst);
5989 } else { // unordered is greater
5990 movl(dst, 1);
5991 jcc(Assembler::parity, L);
5992 jcc(Assembler::above , L);
5993 movl(dst, 0);
5994 jcc(Assembler::equal , L);
5995 decrementl(dst);
5996 }
5997 bind(L);
5998 }
6000 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6001 ucomiss(opr1, opr2);
6003 Label L;
6004 if (unordered_is_less) {
6005 movl(dst, -1);
6006 jcc(Assembler::parity, L);
6007 jcc(Assembler::below , L);
6008 movl(dst, 0);
6009 jcc(Assembler::equal , L);
6010 increment(dst);
6011 } else { // unordered is greater
6012 movl(dst, 1);
6013 jcc(Assembler::parity, L);
6014 jcc(Assembler::above , L);
6015 movl(dst, 0);
6016 jcc(Assembler::equal , L);
6017 decrementl(dst);
6018 }
6019 bind(L);
6020 }
6023 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6024 if (reachable(src1)) {
6025 cmpb(as_Address(src1), imm);
6026 } else {
6027 lea(rscratch1, src1);
6028 cmpb(Address(rscratch1, 0), imm);
6029 }
6030 }
6032 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6033 #ifdef _LP64
6034 if (src2.is_lval()) {
6035 movptr(rscratch1, src2);
6036 Assembler::cmpq(src1, rscratch1);
6037 } else if (reachable(src2)) {
6038 cmpq(src1, as_Address(src2));
6039 } else {
6040 lea(rscratch1, src2);
6041 Assembler::cmpq(src1, Address(rscratch1, 0));
6042 }
6043 #else
6044 if (src2.is_lval()) {
6045 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6046 } else {
6047 cmpl(src1, as_Address(src2));
6048 }
6049 #endif // _LP64
6050 }
6052 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6053 assert(src2.is_lval(), "not a mem-mem compare");
6054 #ifdef _LP64
6055 // moves src2's literal address
6056 movptr(rscratch1, src2);
6057 Assembler::cmpq(src1, rscratch1);
6058 #else
6059 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6060 #endif // _LP64
6061 }
6063 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6064 if (reachable(adr)) {
6065 if (os::is_MP())
6066 lock();
6067 cmpxchgptr(reg, as_Address(adr));
6068 } else {
6069 lea(rscratch1, adr);
6070 if (os::is_MP())
6071 lock();
6072 cmpxchgptr(reg, Address(rscratch1, 0));
6073 }
6074 }
6076 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6077 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6078 }
6080 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6081 comisd(dst, as_Address(src));
6082 }
6084 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6085 comiss(dst, as_Address(src));
6086 }
6089 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6090 Condition negated_cond = negate_condition(cond);
6091 Label L;
6092 jcc(negated_cond, L);
6093 atomic_incl(counter_addr);
6094 bind(L);
6095 }
6097 int MacroAssembler::corrected_idivl(Register reg) {
6098 // Full implementation of Java idiv and irem; checks for
6099 // special case as described in JVM spec., p.243 & p.271.
6100 // The function returns the (pc) offset of the idivl
6101 // instruction - may be needed for implicit exceptions.
6102 //
6103 // normal case special case
6104 //
6105 // input : rax,: dividend min_int
6106 // reg: divisor (may not be rax,/rdx) -1
6107 //
6108 // output: rax,: quotient (= rax, idiv reg) min_int
6109 // rdx: remainder (= rax, irem reg) 0
6110 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6111 const int min_int = 0x80000000;
6112 Label normal_case, special_case;
6114 // check for special case
6115 cmpl(rax, min_int);
6116 jcc(Assembler::notEqual, normal_case);
6117 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6118 cmpl(reg, -1);
6119 jcc(Assembler::equal, special_case);
6121 // handle normal case
6122 bind(normal_case);
6123 cdql();
6124 int idivl_offset = offset();
6125 idivl(reg);
6127 // normal and special case exit
6128 bind(special_case);
6130 return idivl_offset;
6131 }
6135 void MacroAssembler::decrementl(Register reg, int value) {
6136 if (value == min_jint) {subl(reg, value) ; return; }
6137 if (value < 0) { incrementl(reg, -value); return; }
6138 if (value == 0) { ; return; }
6139 if (value == 1 && UseIncDec) { decl(reg) ; return; }
6140 /* else */ { subl(reg, value) ; return; }
6141 }
6143 void MacroAssembler::decrementl(Address dst, int value) {
6144 if (value == min_jint) {subl(dst, value) ; return; }
6145 if (value < 0) { incrementl(dst, -value); return; }
6146 if (value == 0) { ; return; }
6147 if (value == 1 && UseIncDec) { decl(dst) ; return; }
6148 /* else */ { subl(dst, value) ; return; }
6149 }
6151 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6152 assert (shift_value > 0, "illegal shift value");
6153 Label _is_positive;
6154 testl (reg, reg);
6155 jcc (Assembler::positive, _is_positive);
6156 int offset = (1 << shift_value) - 1 ;
6158 if (offset == 1) {
6159 incrementl(reg);
6160 } else {
6161 addl(reg, offset);
6162 }
6164 bind (_is_positive);
6165 sarl(reg, shift_value);
6166 }
6168 // !defined(COMPILER2) is because of stupid core builds
6169 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6170 void MacroAssembler::empty_FPU_stack() {
6171 if (VM_Version::supports_mmx()) {
6172 emms();
6173 } else {
6174 for (int i = 8; i-- > 0; ) ffree(i);
6175 }
6176 }
6177 #endif // !LP64 || C1 || !C2
6180 // Defines obj, preserves var_size_in_bytes
6181 void MacroAssembler::eden_allocate(Register obj,
6182 Register var_size_in_bytes,
6183 int con_size_in_bytes,
6184 Register t1,
6185 Label& slow_case) {
6186 assert(obj == rax, "obj must be in rax, for cmpxchg");
6187 assert_different_registers(obj, var_size_in_bytes, t1);
6188 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6189 jmp(slow_case);
6190 } else {
6191 Register end = t1;
6192 Label retry;
6193 bind(retry);
6194 ExternalAddress heap_top((address) Universe::heap()->top_addr());
6195 movptr(obj, heap_top);
6196 if (var_size_in_bytes == noreg) {
6197 lea(end, Address(obj, con_size_in_bytes));
6198 } else {
6199 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6200 }
6201 // if end < obj then we wrapped around => object too long => slow case
6202 cmpptr(end, obj);
6203 jcc(Assembler::below, slow_case);
6204 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6205 jcc(Assembler::above, slow_case);
6206 // Compare obj with the top addr, and if still equal, store the new top addr in
6207 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6208 // it otherwise. Use lock prefix for atomicity on MPs.
6209 locked_cmpxchgptr(end, heap_top);
6210 jcc(Assembler::notEqual, retry);
6211 }
6212 }
6214 void MacroAssembler::enter() {
6215 push(rbp);
6216 mov(rbp, rsp);
6217 }
6219 void MacroAssembler::fcmp(Register tmp) {
6220 fcmp(tmp, 1, true, true);
6221 }
6223 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6224 assert(!pop_right || pop_left, "usage error");
6225 if (VM_Version::supports_cmov()) {
6226 assert(tmp == noreg, "unneeded temp");
6227 if (pop_left) {
6228 fucomip(index);
6229 } else {
6230 fucomi(index);
6231 }
6232 if (pop_right) {
6233 fpop();
6234 }
6235 } else {
6236 assert(tmp != noreg, "need temp");
6237 if (pop_left) {
6238 if (pop_right) {
6239 fcompp();
6240 } else {
6241 fcomp(index);
6242 }
6243 } else {
6244 fcom(index);
6245 }
6246 // convert FPU condition into eflags condition via rax,
6247 save_rax(tmp);
6248 fwait(); fnstsw_ax();
6249 sahf();
6250 restore_rax(tmp);
6251 }
6252 // condition codes set as follows:
6253 //
6254 // CF (corresponds to C0) if x < y
6255 // PF (corresponds to C2) if unordered
6256 // ZF (corresponds to C3) if x = y
6257 }
6259 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6260 fcmp2int(dst, unordered_is_less, 1, true, true);
6261 }
6263 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6264 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6265 Label L;
6266 if (unordered_is_less) {
6267 movl(dst, -1);
6268 jcc(Assembler::parity, L);
6269 jcc(Assembler::below , L);
6270 movl(dst, 0);
6271 jcc(Assembler::equal , L);
6272 increment(dst);
6273 } else { // unordered is greater
6274 movl(dst, 1);
6275 jcc(Assembler::parity, L);
6276 jcc(Assembler::above , L);
6277 movl(dst, 0);
6278 jcc(Assembler::equal , L);
6279 decrementl(dst);
6280 }
6281 bind(L);
6282 }
6284 void MacroAssembler::fld_d(AddressLiteral src) {
6285 fld_d(as_Address(src));
6286 }
6288 void MacroAssembler::fld_s(AddressLiteral src) {
6289 fld_s(as_Address(src));
6290 }
6292 void MacroAssembler::fld_x(AddressLiteral src) {
6293 Assembler::fld_x(as_Address(src));
6294 }
6296 void MacroAssembler::fldcw(AddressLiteral src) {
6297 Assembler::fldcw(as_Address(src));
6298 }
6300 void MacroAssembler::fpop() {
6301 ffree();
6302 fincstp();
6303 }
6305 void MacroAssembler::fremr(Register tmp) {
6306 save_rax(tmp);
6307 { Label L;
6308 bind(L);
6309 fprem();
6310 fwait(); fnstsw_ax();
6311 #ifdef _LP64
6312 testl(rax, 0x400);
6313 jcc(Assembler::notEqual, L);
6314 #else
6315 sahf();
6316 jcc(Assembler::parity, L);
6317 #endif // _LP64
6318 }
6319 restore_rax(tmp);
6320 // Result is in ST0.
6321 // Note: fxch & fpop to get rid of ST1
6322 // (otherwise FPU stack could overflow eventually)
6323 fxch(1);
6324 fpop();
6325 }
6328 void MacroAssembler::incrementl(AddressLiteral dst) {
6329 if (reachable(dst)) {
6330 incrementl(as_Address(dst));
6331 } else {
6332 lea(rscratch1, dst);
6333 incrementl(Address(rscratch1, 0));
6334 }
6335 }
6337 void MacroAssembler::incrementl(ArrayAddress dst) {
6338 incrementl(as_Address(dst));
6339 }
6341 void MacroAssembler::incrementl(Register reg, int value) {
6342 if (value == min_jint) {addl(reg, value) ; return; }
6343 if (value < 0) { decrementl(reg, -value); return; }
6344 if (value == 0) { ; return; }
6345 if (value == 1 && UseIncDec) { incl(reg) ; return; }
6346 /* else */ { addl(reg, value) ; return; }
6347 }
6349 void MacroAssembler::incrementl(Address dst, int value) {
6350 if (value == min_jint) {addl(dst, value) ; return; }
6351 if (value < 0) { decrementl(dst, -value); return; }
6352 if (value == 0) { ; return; }
6353 if (value == 1 && UseIncDec) { incl(dst) ; return; }
6354 /* else */ { addl(dst, value) ; return; }
6355 }
6357 void MacroAssembler::jump(AddressLiteral dst) {
6358 if (reachable(dst)) {
6359 jmp_literal(dst.target(), dst.rspec());
6360 } else {
6361 lea(rscratch1, dst);
6362 jmp(rscratch1);
6363 }
6364 }
6366 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6367 if (reachable(dst)) {
6368 InstructionMark im(this);
6369 relocate(dst.reloc());
6370 const int short_size = 2;
6371 const int long_size = 6;
6372 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6373 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6374 // 0111 tttn #8-bit disp
6375 emit_byte(0x70 | cc);
6376 emit_byte((offs - short_size) & 0xFF);
6377 } else {
6378 // 0000 1111 1000 tttn #32-bit disp
6379 emit_byte(0x0F);
6380 emit_byte(0x80 | cc);
6381 emit_long(offs - long_size);
6382 }
6383 } else {
6384 #ifdef ASSERT
6385 warning("reversing conditional branch");
6386 #endif /* ASSERT */
6387 Label skip;
6388 jccb(reverse[cc], skip);
6389 lea(rscratch1, dst);
6390 Assembler::jmp(rscratch1);
6391 bind(skip);
6392 }
6393 }
6395 void MacroAssembler::ldmxcsr(AddressLiteral src) {
6396 if (reachable(src)) {
6397 Assembler::ldmxcsr(as_Address(src));
6398 } else {
6399 lea(rscratch1, src);
6400 Assembler::ldmxcsr(Address(rscratch1, 0));
6401 }
6402 }
6404 int MacroAssembler::load_signed_byte(Register dst, Address src) {
6405 int off;
6406 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6407 off = offset();
6408 movsbl(dst, src); // movsxb
6409 } else {
6410 off = load_unsigned_byte(dst, src);
6411 shll(dst, 24);
6412 sarl(dst, 24);
6413 }
6414 return off;
6415 }
6417 // Note: load_signed_short used to be called load_signed_word.
6418 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6419 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6420 // The term "word" in HotSpot means a 32- or 64-bit machine word.
6421 int MacroAssembler::load_signed_short(Register dst, Address src) {
6422 int off;
6423 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6424 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6425 // version but this is what 64bit has always done. This seems to imply
6426 // that users are only using 32bits worth.
6427 off = offset();
6428 movswl(dst, src); // movsxw
6429 } else {
6430 off = load_unsigned_short(dst, src);
6431 shll(dst, 16);
6432 sarl(dst, 16);
6433 }
6434 return off;
6435 }
6437 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6438 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6439 // and "3.9 Partial Register Penalties", p. 22).
6440 int off;
6441 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6442 off = offset();
6443 movzbl(dst, src); // movzxb
6444 } else {
6445 xorl(dst, dst);
6446 off = offset();
6447 movb(dst, src);
6448 }
6449 return off;
6450 }
6452 // Note: load_unsigned_short used to be called load_unsigned_word.
6453 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6454 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6455 // and "3.9 Partial Register Penalties", p. 22).
6456 int off;
6457 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6458 off = offset();
6459 movzwl(dst, src); // movzxw
6460 } else {
6461 xorl(dst, dst);
6462 off = offset();
6463 movw(dst, src);
6464 }
6465 return off;
6466 }
6468 void MacroAssembler::load_sized_value(Register dst, Address src,
6469 int size_in_bytes, bool is_signed) {
6470 switch (size_in_bytes ^ (is_signed ? -1 : 0)) {
6471 #ifndef _LP64
6472 // For case 8, caller is responsible for manually loading
6473 // the second word into another register.
6474 case ~8: // fall through:
6475 case 8: movl( dst, src ); break;
6476 #else
6477 case ~8: // fall through:
6478 case 8: movq( dst, src ); break;
6479 #endif
6480 case ~4: // fall through:
6481 case 4: movl( dst, src ); break;
6482 case ~2: load_signed_short( dst, src ); break;
6483 case 2: load_unsigned_short( dst, src ); break;
6484 case ~1: load_signed_byte( dst, src ); break;
6485 case 1: load_unsigned_byte( dst, src ); break;
6486 default: ShouldNotReachHere();
6487 }
6488 }
6490 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6491 if (reachable(dst)) {
6492 movl(as_Address(dst), src);
6493 } else {
6494 lea(rscratch1, dst);
6495 movl(Address(rscratch1, 0), src);
6496 }
6497 }
6499 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6500 if (reachable(src)) {
6501 movl(dst, as_Address(src));
6502 } else {
6503 lea(rscratch1, src);
6504 movl(dst, Address(rscratch1, 0));
6505 }
6506 }
6508 // C++ bool manipulation
6510 void MacroAssembler::movbool(Register dst, Address src) {
6511 if(sizeof(bool) == 1)
6512 movb(dst, src);
6513 else if(sizeof(bool) == 2)
6514 movw(dst, src);
6515 else if(sizeof(bool) == 4)
6516 movl(dst, src);
6517 else
6518 // unsupported
6519 ShouldNotReachHere();
6520 }
6522 void MacroAssembler::movbool(Address dst, bool boolconst) {
6523 if(sizeof(bool) == 1)
6524 movb(dst, (int) boolconst);
6525 else if(sizeof(bool) == 2)
6526 movw(dst, (int) boolconst);
6527 else if(sizeof(bool) == 4)
6528 movl(dst, (int) boolconst);
6529 else
6530 // unsupported
6531 ShouldNotReachHere();
6532 }
6534 void MacroAssembler::movbool(Address dst, Register src) {
6535 if(sizeof(bool) == 1)
6536 movb(dst, src);
6537 else if(sizeof(bool) == 2)
6538 movw(dst, src);
6539 else if(sizeof(bool) == 4)
6540 movl(dst, src);
6541 else
6542 // unsupported
6543 ShouldNotReachHere();
6544 }
6546 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6547 movb(as_Address(dst), src);
6548 }
6550 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6551 if (reachable(src)) {
6552 if (UseXmmLoadAndClearUpper) {
6553 movsd (dst, as_Address(src));
6554 } else {
6555 movlpd(dst, as_Address(src));
6556 }
6557 } else {
6558 lea(rscratch1, src);
6559 if (UseXmmLoadAndClearUpper) {
6560 movsd (dst, Address(rscratch1, 0));
6561 } else {
6562 movlpd(dst, Address(rscratch1, 0));
6563 }
6564 }
6565 }
6567 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6568 if (reachable(src)) {
6569 movss(dst, as_Address(src));
6570 } else {
6571 lea(rscratch1, src);
6572 movss(dst, Address(rscratch1, 0));
6573 }
6574 }
6576 void MacroAssembler::movptr(Register dst, Register src) {
6577 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6578 }
6580 void MacroAssembler::movptr(Register dst, Address src) {
6581 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6582 }
6584 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
6585 void MacroAssembler::movptr(Register dst, intptr_t src) {
6586 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6587 }
6589 void MacroAssembler::movptr(Address dst, Register src) {
6590 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6591 }
6593 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6594 if (reachable(src)) {
6595 movss(dst, as_Address(src));
6596 } else {
6597 lea(rscratch1, src);
6598 movss(dst, Address(rscratch1, 0));
6599 }
6600 }
6602 void MacroAssembler::null_check(Register reg, int offset) {
6603 if (needs_explicit_null_check(offset)) {
6604 // provoke OS NULL exception if reg = NULL by
6605 // accessing M[reg] w/o changing any (non-CC) registers
6606 // NOTE: cmpl is plenty here to provoke a segv
6607 cmpptr(rax, Address(reg, 0));
6608 // Note: should probably use testl(rax, Address(reg, 0));
6609 // may be shorter code (however, this version of
6610 // testl needs to be implemented first)
6611 } else {
6612 // nothing to do, (later) access of M[reg + offset]
6613 // will provoke OS NULL exception if reg = NULL
6614 }
6615 }
6617 void MacroAssembler::os_breakpoint() {
6618 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6619 // (e.g., MSVC can't call ps() otherwise)
6620 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6621 }
6623 void MacroAssembler::pop_CPU_state() {
6624 pop_FPU_state();
6625 pop_IU_state();
6626 }
6628 void MacroAssembler::pop_FPU_state() {
6629 NOT_LP64(frstor(Address(rsp, 0));)
6630 LP64_ONLY(fxrstor(Address(rsp, 0));)
6631 addptr(rsp, FPUStateSizeInWords * wordSize);
6632 }
6634 void MacroAssembler::pop_IU_state() {
6635 popa();
6636 LP64_ONLY(addq(rsp, 8));
6637 popf();
6638 }
6640 // Save Integer and Float state
6641 // Warning: Stack must be 16 byte aligned (64bit)
6642 void MacroAssembler::push_CPU_state() {
6643 push_IU_state();
6644 push_FPU_state();
6645 }
6647 void MacroAssembler::push_FPU_state() {
6648 subptr(rsp, FPUStateSizeInWords * wordSize);
6649 #ifndef _LP64
6650 fnsave(Address(rsp, 0));
6651 fwait();
6652 #else
6653 fxsave(Address(rsp, 0));
6654 #endif // LP64
6655 }
6657 void MacroAssembler::push_IU_state() {
6658 // Push flags first because pusha kills them
6659 pushf();
6660 // Make sure rsp stays 16-byte aligned
6661 LP64_ONLY(subq(rsp, 8));
6662 pusha();
6663 }
6665 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6666 // determine java_thread register
6667 if (!java_thread->is_valid()) {
6668 java_thread = rdi;
6669 get_thread(java_thread);
6670 }
6671 // we must set sp to zero to clear frame
6672 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6673 if (clear_fp) {
6674 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6675 }
6677 if (clear_pc)
6678 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6680 }
6682 void MacroAssembler::restore_rax(Register tmp) {
6683 if (tmp == noreg) pop(rax);
6684 else if (tmp != rax) mov(rax, tmp);
6685 }
6687 void MacroAssembler::round_to(Register reg, int modulus) {
6688 addptr(reg, modulus - 1);
6689 andptr(reg, -modulus);
6690 }
6692 void MacroAssembler::save_rax(Register tmp) {
6693 if (tmp == noreg) push(rax);
6694 else if (tmp != rax) mov(tmp, rax);
6695 }
6697 // Write serialization page so VM thread can do a pseudo remote membar.
6698 // We use the current thread pointer to calculate a thread specific
6699 // offset to write to within the page. This minimizes bus traffic
6700 // due to cache line collision.
6701 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6702 movl(tmp, thread);
6703 shrl(tmp, os::get_serialize_page_shift_count());
6704 andl(tmp, (os::vm_page_size() - sizeof(int)));
6706 Address index(noreg, tmp, Address::times_1);
6707 ExternalAddress page(os::get_memory_serialize_page());
6709 // Size of store must match masking code above
6710 movl(as_Address(ArrayAddress(page, index)), tmp);
6711 }
6713 // Calls to C land
6714 //
6715 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6716 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6717 // has to be reset to 0. This is required to allow proper stack traversal.
6718 void MacroAssembler::set_last_Java_frame(Register java_thread,
6719 Register last_java_sp,
6720 Register last_java_fp,
6721 address last_java_pc) {
6722 // determine java_thread register
6723 if (!java_thread->is_valid()) {
6724 java_thread = rdi;
6725 get_thread(java_thread);
6726 }
6727 // determine last_java_sp register
6728 if (!last_java_sp->is_valid()) {
6729 last_java_sp = rsp;
6730 }
6732 // last_java_fp is optional
6734 if (last_java_fp->is_valid()) {
6735 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6736 }
6738 // last_java_pc is optional
6740 if (last_java_pc != NULL) {
6741 lea(Address(java_thread,
6742 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6743 InternalAddress(last_java_pc));
6745 }
6746 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6747 }
6749 void MacroAssembler::shlptr(Register dst, int imm8) {
6750 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6751 }
6753 void MacroAssembler::shrptr(Register dst, int imm8) {
6754 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6755 }
6757 void MacroAssembler::sign_extend_byte(Register reg) {
6758 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6759 movsbl(reg, reg); // movsxb
6760 } else {
6761 shll(reg, 24);
6762 sarl(reg, 24);
6763 }
6764 }
6766 void MacroAssembler::sign_extend_short(Register reg) {
6767 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6768 movswl(reg, reg); // movsxw
6769 } else {
6770 shll(reg, 16);
6771 sarl(reg, 16);
6772 }
6773 }
6775 //////////////////////////////////////////////////////////////////////////////////
6776 #ifndef SERIALGC
6778 void MacroAssembler::g1_write_barrier_pre(Register obj,
6779 #ifndef _LP64
6780 Register thread,
6781 #endif
6782 Register tmp,
6783 Register tmp2,
6784 bool tosca_live) {
6785 LP64_ONLY(Register thread = r15_thread;)
6786 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6787 PtrQueue::byte_offset_of_active()));
6789 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6790 PtrQueue::byte_offset_of_index()));
6791 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6792 PtrQueue::byte_offset_of_buf()));
6795 Label done;
6796 Label runtime;
6798 // if (!marking_in_progress) goto done;
6799 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6800 cmpl(in_progress, 0);
6801 } else {
6802 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6803 cmpb(in_progress, 0);
6804 }
6805 jcc(Assembler::equal, done);
6807 // if (x.f == NULL) goto done;
6808 #ifdef _LP64
6809 load_heap_oop(tmp2, Address(obj, 0));
6810 #else
6811 movptr(tmp2, Address(obj, 0));
6812 #endif
6813 cmpptr(tmp2, (int32_t) NULL_WORD);
6814 jcc(Assembler::equal, done);
6816 // Can we store original value in the thread's buffer?
6818 #ifdef _LP64
6819 movslq(tmp, index);
6820 cmpq(tmp, 0);
6821 #else
6822 cmpl(index, 0);
6823 #endif
6824 jcc(Assembler::equal, runtime);
6825 #ifdef _LP64
6826 subq(tmp, wordSize);
6827 movl(index, tmp);
6828 addq(tmp, buffer);
6829 #else
6830 subl(index, wordSize);
6831 movl(tmp, buffer);
6832 addl(tmp, index);
6833 #endif
6834 movptr(Address(tmp, 0), tmp2);
6835 jmp(done);
6836 bind(runtime);
6837 // save the live input values
6838 if(tosca_live) push(rax);
6839 push(obj);
6840 #ifdef _LP64
6841 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
6842 #else
6843 push(thread);
6844 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6845 pop(thread);
6846 #endif
6847 pop(obj);
6848 if(tosca_live) pop(rax);
6849 bind(done);
6851 }
6853 void MacroAssembler::g1_write_barrier_post(Register store_addr,
6854 Register new_val,
6855 #ifndef _LP64
6856 Register thread,
6857 #endif
6858 Register tmp,
6859 Register tmp2) {
6861 LP64_ONLY(Register thread = r15_thread;)
6862 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6863 PtrQueue::byte_offset_of_index()));
6864 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6865 PtrQueue::byte_offset_of_buf()));
6866 BarrierSet* bs = Universe::heap()->barrier_set();
6867 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6868 Label done;
6869 Label runtime;
6871 // Does store cross heap regions?
6873 movptr(tmp, store_addr);
6874 xorptr(tmp, new_val);
6875 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6876 jcc(Assembler::equal, done);
6878 // crosses regions, storing NULL?
6880 cmpptr(new_val, (int32_t) NULL_WORD);
6881 jcc(Assembler::equal, done);
6883 // storing region crossing non-NULL, is card already dirty?
6885 ExternalAddress cardtable((address) ct->byte_map_base);
6886 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6887 #ifdef _LP64
6888 const Register card_addr = tmp;
6890 movq(card_addr, store_addr);
6891 shrq(card_addr, CardTableModRefBS::card_shift);
6893 lea(tmp2, cardtable);
6895 // get the address of the card
6896 addq(card_addr, tmp2);
6897 #else
6898 const Register card_index = tmp;
6900 movl(card_index, store_addr);
6901 shrl(card_index, CardTableModRefBS::card_shift);
6903 Address index(noreg, card_index, Address::times_1);
6904 const Register card_addr = tmp;
6905 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6906 #endif
6907 cmpb(Address(card_addr, 0), 0);
6908 jcc(Assembler::equal, done);
6910 // storing a region crossing, non-NULL oop, card is clean.
6911 // dirty card and log.
6913 movb(Address(card_addr, 0), 0);
6915 cmpl(queue_index, 0);
6916 jcc(Assembler::equal, runtime);
6917 subl(queue_index, wordSize);
6918 movptr(tmp2, buffer);
6919 #ifdef _LP64
6920 movslq(rscratch1, queue_index);
6921 addq(tmp2, rscratch1);
6922 movq(Address(tmp2, 0), card_addr);
6923 #else
6924 addl(tmp2, queue_index);
6925 movl(Address(tmp2, 0), card_index);
6926 #endif
6927 jmp(done);
6929 bind(runtime);
6930 // save the live input values
6931 push(store_addr);
6932 push(new_val);
6933 #ifdef _LP64
6934 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6935 #else
6936 push(thread);
6937 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6938 pop(thread);
6939 #endif
6940 pop(new_val);
6941 pop(store_addr);
6943 bind(done);
6945 }
6947 #endif // SERIALGC
6948 //////////////////////////////////////////////////////////////////////////////////
6951 void MacroAssembler::store_check(Register obj) {
6952 // Does a store check for the oop in register obj. The content of
6953 // register obj is destroyed afterwards.
6954 store_check_part_1(obj);
6955 store_check_part_2(obj);
6956 }
6958 void MacroAssembler::store_check(Register obj, Address dst) {
6959 store_check(obj);
6960 }
6963 // split the store check operation so that other instructions can be scheduled inbetween
6964 void MacroAssembler::store_check_part_1(Register obj) {
6965 BarrierSet* bs = Universe::heap()->barrier_set();
6966 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6967 shrptr(obj, CardTableModRefBS::card_shift);
6968 }
6970 void MacroAssembler::store_check_part_2(Register obj) {
6971 BarrierSet* bs = Universe::heap()->barrier_set();
6972 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6973 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6974 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6976 // The calculation for byte_map_base is as follows:
6977 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
6978 // So this essentially converts an address to a displacement and
6979 // it will never need to be relocated. On 64bit however the value may be too
6980 // large for a 32bit displacement
6982 intptr_t disp = (intptr_t) ct->byte_map_base;
6983 if (is_simm32(disp)) {
6984 Address cardtable(noreg, obj, Address::times_1, disp);
6985 movb(cardtable, 0);
6986 } else {
6987 // By doing it as an ExternalAddress disp could be converted to a rip-relative
6988 // displacement and done in a single instruction given favorable mapping and
6989 // a smarter version of as_Address. Worst case it is two instructions which
6990 // is no worse off then loading disp into a register and doing as a simple
6991 // Address() as above.
6992 // We can't do as ExternalAddress as the only style since if disp == 0 we'll
6993 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
6994 // in some cases we'll get a single instruction version.
6996 ExternalAddress cardtable((address)disp);
6997 Address index(noreg, obj, Address::times_1);
6998 movb(as_Address(ArrayAddress(cardtable, index)), 0);
6999 }
7000 }
7002 void MacroAssembler::subptr(Register dst, int32_t imm32) {
7003 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7004 }
7006 void MacroAssembler::subptr(Register dst, Register src) {
7007 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7008 }
7010 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
7011 // src2 must be rval
7013 if (reachable(src2)) {
7014 testl(src1, as_Address(src2));
7015 } else {
7016 lea(rscratch1, src2);
7017 testl(src1, Address(rscratch1, 0));
7018 }
7019 }
7021 // C++ bool manipulation
7022 void MacroAssembler::testbool(Register dst) {
7023 if(sizeof(bool) == 1)
7024 testb(dst, 0xff);
7025 else if(sizeof(bool) == 2) {
7026 // testw implementation needed for two byte bools
7027 ShouldNotReachHere();
7028 } else if(sizeof(bool) == 4)
7029 testl(dst, dst);
7030 else
7031 // unsupported
7032 ShouldNotReachHere();
7033 }
7035 void MacroAssembler::testptr(Register dst, Register src) {
7036 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
7037 }
7039 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
7040 void MacroAssembler::tlab_allocate(Register obj,
7041 Register var_size_in_bytes,
7042 int con_size_in_bytes,
7043 Register t1,
7044 Register t2,
7045 Label& slow_case) {
7046 assert_different_registers(obj, t1, t2);
7047 assert_different_registers(obj, var_size_in_bytes, t1);
7048 Register end = t2;
7049 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
7051 verify_tlab();
7053 NOT_LP64(get_thread(thread));
7055 movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
7056 if (var_size_in_bytes == noreg) {
7057 lea(end, Address(obj, con_size_in_bytes));
7058 } else {
7059 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
7060 }
7061 cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
7062 jcc(Assembler::above, slow_case);
7064 // update the tlab top pointer
7065 movptr(Address(thread, JavaThread::tlab_top_offset()), end);
7067 // recover var_size_in_bytes if necessary
7068 if (var_size_in_bytes == end) {
7069 subptr(var_size_in_bytes, obj);
7070 }
7071 verify_tlab();
7072 }
7074 // Preserves rbx, and rdx.
7075 void MacroAssembler::tlab_refill(Label& retry,
7076 Label& try_eden,
7077 Label& slow_case) {
7078 Register top = rax;
7079 Register t1 = rcx;
7080 Register t2 = rsi;
7081 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
7082 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
7083 Label do_refill, discard_tlab;
7085 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7086 // No allocation in the shared eden.
7087 jmp(slow_case);
7088 }
7090 NOT_LP64(get_thread(thread_reg));
7092 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7093 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7095 // calculate amount of free space
7096 subptr(t1, top);
7097 shrptr(t1, LogHeapWordSize);
7099 // Retain tlab and allocate object in shared space if
7100 // the amount free in the tlab is too large to discard.
7101 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7102 jcc(Assembler::lessEqual, discard_tlab);
7104 // Retain
7105 // %%% yuck as movptr...
7106 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7107 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7108 if (TLABStats) {
7109 // increment number of slow_allocations
7110 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7111 }
7112 jmp(try_eden);
7114 bind(discard_tlab);
7115 if (TLABStats) {
7116 // increment number of refills
7117 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7118 // accumulate wastage -- t1 is amount free in tlab
7119 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7120 }
7122 // if tlab is currently allocated (top or end != null) then
7123 // fill [top, end + alignment_reserve) with array object
7124 testptr (top, top);
7125 jcc(Assembler::zero, do_refill);
7127 // set up the mark word
7128 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7129 // set the length to the remaining space
7130 subptr(t1, typeArrayOopDesc::header_size(T_INT));
7131 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7132 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7133 movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7134 // set klass to intArrayKlass
7135 // dubious reloc why not an oop reloc?
7136 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
7137 // store klass last. concurrent gcs assumes klass length is valid if
7138 // klass field is not null.
7139 store_klass(top, t1);
7141 // refill the tlab with an eden allocation
7142 bind(do_refill);
7143 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7144 shlptr(t1, LogHeapWordSize);
7145 // add object_size ??
7146 eden_allocate(top, t1, 0, t2, slow_case);
7148 // Check that t1 was preserved in eden_allocate.
7149 #ifdef ASSERT
7150 if (UseTLAB) {
7151 Label ok;
7152 Register tsize = rsi;
7153 assert_different_registers(tsize, thread_reg, t1);
7154 push(tsize);
7155 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7156 shlptr(tsize, LogHeapWordSize);
7157 cmpptr(t1, tsize);
7158 jcc(Assembler::equal, ok);
7159 stop("assert(t1 != tlab size)");
7160 should_not_reach_here();
7162 bind(ok);
7163 pop(tsize);
7164 }
7165 #endif
7166 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7167 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7168 addptr(top, t1);
7169 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7170 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7171 verify_tlab();
7172 jmp(retry);
7173 }
7175 static const double pi_4 = 0.7853981633974483;
7177 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7178 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7179 // was attempted in this code; unfortunately it appears that the
7180 // switch to 80-bit precision and back causes this to be
7181 // unprofitable compared with simply performing a runtime call if
7182 // the argument is out of the (-pi/4, pi/4) range.
7184 Register tmp = noreg;
7185 if (!VM_Version::supports_cmov()) {
7186 // fcmp needs a temporary so preserve rbx,
7187 tmp = rbx;
7188 push(tmp);
7189 }
7191 Label slow_case, done;
7193 ExternalAddress pi4_adr = (address)&pi_4;
7194 if (reachable(pi4_adr)) {
7195 // x ?<= pi/4
7196 fld_d(pi4_adr);
7197 fld_s(1); // Stack: X PI/4 X
7198 fabs(); // Stack: |X| PI/4 X
7199 fcmp(tmp);
7200 jcc(Assembler::above, slow_case);
7202 // fastest case: -pi/4 <= x <= pi/4
7203 switch(trig) {
7204 case 's':
7205 fsin();
7206 break;
7207 case 'c':
7208 fcos();
7209 break;
7210 case 't':
7211 ftan();
7212 break;
7213 default:
7214 assert(false, "bad intrinsic");
7215 break;
7216 }
7217 jmp(done);
7218 }
7220 // slow case: runtime call
7221 bind(slow_case);
7222 // Preserve registers across runtime call
7223 pusha();
7224 int incoming_argument_and_return_value_offset = -1;
7225 if (num_fpu_regs_in_use > 1) {
7226 // Must preserve all other FPU regs (could alternatively convert
7227 // SharedRuntime::dsin and dcos into assembly routines known not to trash
7228 // FPU state, but can not trust C compiler)
7229 NEEDS_CLEANUP;
7230 // NOTE that in this case we also push the incoming argument to
7231 // the stack and restore it later; we also use this stack slot to
7232 // hold the return value from dsin or dcos.
7233 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7234 subptr(rsp, sizeof(jdouble));
7235 fstp_d(Address(rsp, 0));
7236 }
7237 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7238 fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7239 }
7240 subptr(rsp, sizeof(jdouble));
7241 fstp_d(Address(rsp, 0));
7242 #ifdef _LP64
7243 movdbl(xmm0, Address(rsp, 0));
7244 #endif // _LP64
7246 // NOTE: we must not use call_VM_leaf here because that requires a
7247 // complete interpreter frame in debug mode -- same bug as 4387334
7248 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7249 // do proper 64bit abi
7251 NEEDS_CLEANUP;
7252 // Need to add stack banging before this runtime call if it needs to
7253 // be taken; however, there is no generic stack banging routine at
7254 // the MacroAssembler level
7255 switch(trig) {
7256 case 's':
7257 {
7258 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7259 }
7260 break;
7261 case 'c':
7262 {
7263 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7264 }
7265 break;
7266 case 't':
7267 {
7268 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7269 }
7270 break;
7271 default:
7272 assert(false, "bad intrinsic");
7273 break;
7274 }
7275 #ifdef _LP64
7276 movsd(Address(rsp, 0), xmm0);
7277 fld_d(Address(rsp, 0));
7278 #endif // _LP64
7279 addptr(rsp, sizeof(jdouble));
7280 if (num_fpu_regs_in_use > 1) {
7281 // Must save return value to stack and then restore entire FPU stack
7282 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7283 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7284 fld_d(Address(rsp, 0));
7285 addptr(rsp, sizeof(jdouble));
7286 }
7287 }
7288 popa();
7290 // Come here with result in F-TOS
7291 bind(done);
7293 if (tmp != noreg) {
7294 pop(tmp);
7295 }
7296 }
7299 // Look up the method for a megamorphic invokeinterface call.
7300 // The target method is determined by <intf_klass, itable_index>.
7301 // The receiver klass is in recv_klass.
7302 // On success, the result will be in method_result, and execution falls through.
7303 // On failure, execution transfers to the given label.
7304 void MacroAssembler::lookup_interface_method(Register recv_klass,
7305 Register intf_klass,
7306 RegisterOrConstant itable_index,
7307 Register method_result,
7308 Register scan_temp,
7309 Label& L_no_such_interface) {
7310 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7311 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7312 "caller must use same register for non-constant itable index as for method");
7314 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7315 int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7316 int itentry_off = itableMethodEntry::method_offset_in_bytes();
7317 int scan_step = itableOffsetEntry::size() * wordSize;
7318 int vte_size = vtableEntry::size() * wordSize;
7319 Address::ScaleFactor times_vte_scale = Address::times_ptr;
7320 assert(vte_size == wordSize, "else adjust times_vte_scale");
7322 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7324 // %%% Could store the aligned, prescaled offset in the klassoop.
7325 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7326 if (HeapWordsPerLong > 1) {
7327 // Round up to align_object_offset boundary
7328 // see code for instanceKlass::start_of_itable!
7329 round_to(scan_temp, BytesPerLong);
7330 }
7332 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7333 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7334 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7336 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7337 // if (scan->interface() == intf) {
7338 // result = (klass + scan->offset() + itable_index);
7339 // }
7340 // }
7341 Label search, found_method;
7343 for (int peel = 1; peel >= 0; peel--) {
7344 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7345 cmpptr(intf_klass, method_result);
7347 if (peel) {
7348 jccb(Assembler::equal, found_method);
7349 } else {
7350 jccb(Assembler::notEqual, search);
7351 // (invert the test to fall through to found_method...)
7352 }
7354 if (!peel) break;
7356 bind(search);
7358 // Check that the previous entry is non-null. A null entry means that
7359 // the receiver class doesn't implement the interface, and wasn't the
7360 // same as when the caller was compiled.
7361 testptr(method_result, method_result);
7362 jcc(Assembler::zero, L_no_such_interface);
7363 addptr(scan_temp, scan_step);
7364 }
7366 bind(found_method);
7368 // Got a hit.
7369 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7370 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7371 }
7374 void MacroAssembler::check_klass_subtype(Register sub_klass,
7375 Register super_klass,
7376 Register temp_reg,
7377 Label& L_success) {
7378 Label L_failure;
7379 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
7380 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7381 bind(L_failure);
7382 }
7385 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7386 Register super_klass,
7387 Register temp_reg,
7388 Label* L_success,
7389 Label* L_failure,
7390 Label* L_slow_path,
7391 RegisterOrConstant super_check_offset) {
7392 assert_different_registers(sub_klass, super_klass, temp_reg);
7393 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7394 if (super_check_offset.is_register()) {
7395 assert_different_registers(sub_klass, super_klass,
7396 super_check_offset.as_register());
7397 } else if (must_load_sco) {
7398 assert(temp_reg != noreg, "supply either a temp or a register offset");
7399 }
7401 Label L_fallthrough;
7402 int label_nulls = 0;
7403 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7404 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7405 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7406 assert(label_nulls <= 1, "at most one NULL in the batch");
7408 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7409 Klass::secondary_super_cache_offset_in_bytes());
7410 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7411 Klass::super_check_offset_offset_in_bytes());
7412 Address super_check_offset_addr(super_klass, sco_offset);
7414 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7415 // range of a jccb. If this routine grows larger, reconsider at
7416 // least some of these.
7417 #define local_jcc(assembler_cond, label) \
7418 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
7419 else jcc( assembler_cond, label) /*omit semi*/
7421 // Hacked jmp, which may only be used just before L_fallthrough.
7422 #define final_jmp(label) \
7423 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
7424 else jmp(label) /*omit semi*/
7426 // If the pointers are equal, we are done (e.g., String[] elements).
7427 // This self-check enables sharing of secondary supertype arrays among
7428 // non-primary types such as array-of-interface. Otherwise, each such
7429 // type would need its own customized SSA.
7430 // We move this check to the front of the fast path because many
7431 // type checks are in fact trivially successful in this manner,
7432 // so we get a nicely predicted branch right at the start of the check.
7433 cmpptr(sub_klass, super_klass);
7434 local_jcc(Assembler::equal, *L_success);
7436 // Check the supertype display:
7437 if (must_load_sco) {
7438 // Positive movl does right thing on LP64.
7439 movl(temp_reg, super_check_offset_addr);
7440 super_check_offset = RegisterOrConstant(temp_reg);
7441 }
7442 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7443 cmpptr(super_klass, super_check_addr); // load displayed supertype
7445 // This check has worked decisively for primary supers.
7446 // Secondary supers are sought in the super_cache ('super_cache_addr').
7447 // (Secondary supers are interfaces and very deeply nested subtypes.)
7448 // This works in the same check above because of a tricky aliasing
7449 // between the super_cache and the primary super display elements.
7450 // (The 'super_check_addr' can address either, as the case requires.)
7451 // Note that the cache is updated below if it does not help us find
7452 // what we need immediately.
7453 // So if it was a primary super, we can just fail immediately.
7454 // Otherwise, it's the slow path for us (no success at this point).
7456 if (super_check_offset.is_register()) {
7457 local_jcc(Assembler::equal, *L_success);
7458 cmpl(super_check_offset.as_register(), sc_offset);
7459 if (L_failure == &L_fallthrough) {
7460 local_jcc(Assembler::equal, *L_slow_path);
7461 } else {
7462 local_jcc(Assembler::notEqual, *L_failure);
7463 final_jmp(*L_slow_path);
7464 }
7465 } else if (super_check_offset.as_constant() == sc_offset) {
7466 // Need a slow path; fast failure is impossible.
7467 if (L_slow_path == &L_fallthrough) {
7468 local_jcc(Assembler::equal, *L_success);
7469 } else {
7470 local_jcc(Assembler::notEqual, *L_slow_path);
7471 final_jmp(*L_success);
7472 }
7473 } else {
7474 // No slow path; it's a fast decision.
7475 if (L_failure == &L_fallthrough) {
7476 local_jcc(Assembler::equal, *L_success);
7477 } else {
7478 local_jcc(Assembler::notEqual, *L_failure);
7479 final_jmp(*L_success);
7480 }
7481 }
7483 bind(L_fallthrough);
7485 #undef local_jcc
7486 #undef final_jmp
7487 }
7490 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7491 Register super_klass,
7492 Register temp_reg,
7493 Register temp2_reg,
7494 Label* L_success,
7495 Label* L_failure,
7496 bool set_cond_codes) {
7497 assert_different_registers(sub_klass, super_klass, temp_reg);
7498 if (temp2_reg != noreg)
7499 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7500 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7502 Label L_fallthrough;
7503 int label_nulls = 0;
7504 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7505 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7506 assert(label_nulls <= 1, "at most one NULL in the batch");
7508 // a couple of useful fields in sub_klass:
7509 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7510 Klass::secondary_supers_offset_in_bytes());
7511 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7512 Klass::secondary_super_cache_offset_in_bytes());
7513 Address secondary_supers_addr(sub_klass, ss_offset);
7514 Address super_cache_addr( sub_klass, sc_offset);
7516 // Do a linear scan of the secondary super-klass chain.
7517 // This code is rarely used, so simplicity is a virtue here.
7518 // The repne_scan instruction uses fixed registers, which we must spill.
7519 // Don't worry too much about pre-existing connections with the input regs.
7521 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7522 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7524 // Get super_klass value into rax (even if it was in rdi or rcx).
7525 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7526 if (super_klass != rax || UseCompressedOops) {
7527 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7528 mov(rax, super_klass);
7529 }
7530 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7531 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7533 #ifndef PRODUCT
7534 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7535 ExternalAddress pst_counter_addr((address) pst_counter);
7536 NOT_LP64( incrementl(pst_counter_addr) );
7537 LP64_ONLY( lea(rcx, pst_counter_addr) );
7538 LP64_ONLY( incrementl(Address(rcx, 0)) );
7539 #endif //PRODUCT
7541 // We will consult the secondary-super array.
7542 movptr(rdi, secondary_supers_addr);
7543 // Load the array length. (Positive movl does right thing on LP64.)
7544 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7545 // Skip to start of data.
7546 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7548 // Scan RCX words at [RDI] for an occurrence of RAX.
7549 // Set NZ/Z based on last compare.
7550 #ifdef _LP64
7551 // This part is tricky, as values in supers array could be 32 or 64 bit wide
7552 // and we store values in objArrays always encoded, thus we need to encode
7553 // the value of rax before repne. Note that rax is dead after the repne.
7554 if (UseCompressedOops) {
7555 encode_heap_oop_not_null(rax);
7556 // The superclass is never null; it would be a basic system error if a null
7557 // pointer were to sneak in here. Note that we have already loaded the
7558 // Klass::super_check_offset from the super_klass in the fast path,
7559 // so if there is a null in that register, we are already in the afterlife.
7560 repne_scanl();
7561 } else
7562 #endif // _LP64
7563 repne_scan();
7565 // Unspill the temp. registers:
7566 if (pushed_rdi) pop(rdi);
7567 if (pushed_rcx) pop(rcx);
7568 if (pushed_rax) pop(rax);
7570 if (set_cond_codes) {
7571 // Special hack for the AD files: rdi is guaranteed non-zero.
7572 assert(!pushed_rdi, "rdi must be left non-NULL");
7573 // Also, the condition codes are properly set Z/NZ on succeed/failure.
7574 }
7576 if (L_failure == &L_fallthrough)
7577 jccb(Assembler::notEqual, *L_failure);
7578 else jcc(Assembler::notEqual, *L_failure);
7580 // Success. Cache the super we found and proceed in triumph.
7581 movptr(super_cache_addr, super_klass);
7583 if (L_success != &L_fallthrough) {
7584 jmp(*L_success);
7585 }
7587 #undef IS_A_TEMP
7589 bind(L_fallthrough);
7590 }
7593 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7594 ucomisd(dst, as_Address(src));
7595 }
7597 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7598 ucomiss(dst, as_Address(src));
7599 }
7601 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7602 if (reachable(src)) {
7603 xorpd(dst, as_Address(src));
7604 } else {
7605 lea(rscratch1, src);
7606 xorpd(dst, Address(rscratch1, 0));
7607 }
7608 }
7610 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7611 if (reachable(src)) {
7612 xorps(dst, as_Address(src));
7613 } else {
7614 lea(rscratch1, src);
7615 xorps(dst, Address(rscratch1, 0));
7616 }
7617 }
7619 void MacroAssembler::verify_oop(Register reg, const char* s) {
7620 if (!VerifyOops) return;
7622 // Pass register number to verify_oop_subroutine
7623 char* b = new char[strlen(s) + 50];
7624 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7625 push(rax); // save rax,
7626 push(reg); // pass register argument
7627 ExternalAddress buffer((address) b);
7628 // avoid using pushptr, as it modifies scratch registers
7629 // and our contract is not to modify anything
7630 movptr(rax, buffer.addr());
7631 push(rax);
7632 // call indirectly to solve generation ordering problem
7633 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7634 call(rax);
7635 }
7638 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
7639 Register tmp,
7640 int offset) {
7641 intptr_t value = *delayed_value_addr;
7642 if (value != 0)
7643 return RegisterOrConstant(value + offset);
7645 // load indirectly to solve generation ordering problem
7646 movptr(tmp, ExternalAddress((address) delayed_value_addr));
7648 #ifdef ASSERT
7649 Label L;
7650 testl(tmp, tmp);
7651 jccb(Assembler::notZero, L);
7652 hlt();
7653 bind(L);
7654 #endif
7656 if (offset != 0)
7657 addptr(tmp, offset);
7659 return RegisterOrConstant(tmp);
7660 }
7663 // registers on entry:
7664 // - rax ('check' register): required MethodType
7665 // - rcx: method handle
7666 // - rdx, rsi, or ?: killable temp
7667 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
7668 Register temp_reg,
7669 Label& wrong_method_type) {
7670 if (UseCompressedOops) unimplemented(); // field accesses must decode
7671 // compare method type against that of the receiver
7672 cmpptr(mtype_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)));
7673 jcc(Assembler::notEqual, wrong_method_type);
7674 }
7677 // A method handle has a "vmslots" field which gives the size of its
7678 // argument list in JVM stack slots. This field is either located directly
7679 // in every method handle, or else is indirectly accessed through the
7680 // method handle's MethodType. This macro hides the distinction.
7681 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
7682 Register temp_reg) {
7683 if (UseCompressedOops) unimplemented(); // field accesses must decode
7684 // load mh.type.form.vmslots
7685 if (java_dyn_MethodHandle::vmslots_offset_in_bytes() != 0) {
7686 // hoist vmslots into every mh to avoid dependent load chain
7687 movl(vmslots_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
7688 } else {
7689 Register temp2_reg = vmslots_reg;
7690 movptr(temp2_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)));
7691 movptr(temp2_reg, Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg)));
7692 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_dyn_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
7693 }
7694 }
7697 // registers on entry:
7698 // - rcx: method handle
7699 // - rdx: killable temp (interpreted only)
7700 // - rax: killable temp (compiled only)
7701 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
7702 assert(mh_reg == rcx, "caller must put MH object in rcx");
7703 assert_different_registers(mh_reg, temp_reg);
7705 if (UseCompressedOops) unimplemented(); // field accesses must decode
7707 // pick out the interpreted side of the handler
7708 movptr(temp_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
7710 // off we go...
7711 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
7713 // for the various stubs which take control at this point,
7714 // see MethodHandles::generate_method_handle_stub
7715 }
7718 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
7719 int extra_slot_offset) {
7720 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
7721 int stackElementSize = Interpreter::stackElementSize();
7722 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
7723 #ifdef ASSERT
7724 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
7725 assert(offset1 - offset == stackElementSize, "correct arithmetic");
7726 #endif
7727 Register scale_reg = noreg;
7728 Address::ScaleFactor scale_factor = Address::no_scale;
7729 if (arg_slot.is_constant()) {
7730 offset += arg_slot.as_constant() * stackElementSize;
7731 } else {
7732 scale_reg = arg_slot.as_register();
7733 scale_factor = Address::times(stackElementSize);
7734 }
7735 offset += wordSize; // return PC is on stack
7736 return Address(rsp, scale_reg, scale_factor, offset);
7737 }
7740 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7741 if (!VerifyOops) return;
7743 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7744 // Pass register number to verify_oop_subroutine
7745 char* b = new char[strlen(s) + 50];
7746 sprintf(b, "verify_oop_addr: %s", s);
7748 push(rax); // save rax,
7749 // addr may contain rsp so we will have to adjust it based on the push
7750 // we just did
7751 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7752 // stores rax into addr which is backwards of what was intended.
7753 if (addr.uses(rsp)) {
7754 lea(rax, addr);
7755 pushptr(Address(rax, BytesPerWord));
7756 } else {
7757 pushptr(addr);
7758 }
7760 ExternalAddress buffer((address) b);
7761 // pass msg argument
7762 // avoid using pushptr, as it modifies scratch registers
7763 // and our contract is not to modify anything
7764 movptr(rax, buffer.addr());
7765 push(rax);
7767 // call indirectly to solve generation ordering problem
7768 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7769 call(rax);
7770 // Caller pops the arguments and restores rax, from the stack
7771 }
7773 void MacroAssembler::verify_tlab() {
7774 #ifdef ASSERT
7775 if (UseTLAB && VerifyOops) {
7776 Label next, ok;
7777 Register t1 = rsi;
7778 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7780 push(t1);
7781 NOT_LP64(push(thread_reg));
7782 NOT_LP64(get_thread(thread_reg));
7784 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7785 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7786 jcc(Assembler::aboveEqual, next);
7787 stop("assert(top >= start)");
7788 should_not_reach_here();
7790 bind(next);
7791 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7792 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7793 jcc(Assembler::aboveEqual, ok);
7794 stop("assert(top <= end)");
7795 should_not_reach_here();
7797 bind(ok);
7798 NOT_LP64(pop(thread_reg));
7799 pop(t1);
7800 }
7801 #endif
7802 }
7804 class ControlWord {
7805 public:
7806 int32_t _value;
7808 int rounding_control() const { return (_value >> 10) & 3 ; }
7809 int precision_control() const { return (_value >> 8) & 3 ; }
7810 bool precision() const { return ((_value >> 5) & 1) != 0; }
7811 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7812 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7813 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7814 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7815 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7817 void print() const {
7818 // rounding control
7819 const char* rc;
7820 switch (rounding_control()) {
7821 case 0: rc = "round near"; break;
7822 case 1: rc = "round down"; break;
7823 case 2: rc = "round up "; break;
7824 case 3: rc = "chop "; break;
7825 };
7826 // precision control
7827 const char* pc;
7828 switch (precision_control()) {
7829 case 0: pc = "24 bits "; break;
7830 case 1: pc = "reserved"; break;
7831 case 2: pc = "53 bits "; break;
7832 case 3: pc = "64 bits "; break;
7833 };
7834 // flags
7835 char f[9];
7836 f[0] = ' ';
7837 f[1] = ' ';
7838 f[2] = (precision ()) ? 'P' : 'p';
7839 f[3] = (underflow ()) ? 'U' : 'u';
7840 f[4] = (overflow ()) ? 'O' : 'o';
7841 f[5] = (zero_divide ()) ? 'Z' : 'z';
7842 f[6] = (denormalized()) ? 'D' : 'd';
7843 f[7] = (invalid ()) ? 'I' : 'i';
7844 f[8] = '\x0';
7845 // output
7846 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7847 }
7849 };
7851 class StatusWord {
7852 public:
7853 int32_t _value;
7855 bool busy() const { return ((_value >> 15) & 1) != 0; }
7856 bool C3() const { return ((_value >> 14) & 1) != 0; }
7857 bool C2() const { return ((_value >> 10) & 1) != 0; }
7858 bool C1() const { return ((_value >> 9) & 1) != 0; }
7859 bool C0() const { return ((_value >> 8) & 1) != 0; }
7860 int top() const { return (_value >> 11) & 7 ; }
7861 bool error_status() const { return ((_value >> 7) & 1) != 0; }
7862 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
7863 bool precision() const { return ((_value >> 5) & 1) != 0; }
7864 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7865 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7866 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7867 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7868 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7870 void print() const {
7871 // condition codes
7872 char c[5];
7873 c[0] = (C3()) ? '3' : '-';
7874 c[1] = (C2()) ? '2' : '-';
7875 c[2] = (C1()) ? '1' : '-';
7876 c[3] = (C0()) ? '0' : '-';
7877 c[4] = '\x0';
7878 // flags
7879 char f[9];
7880 f[0] = (error_status()) ? 'E' : '-';
7881 f[1] = (stack_fault ()) ? 'S' : '-';
7882 f[2] = (precision ()) ? 'P' : '-';
7883 f[3] = (underflow ()) ? 'U' : '-';
7884 f[4] = (overflow ()) ? 'O' : '-';
7885 f[5] = (zero_divide ()) ? 'Z' : '-';
7886 f[6] = (denormalized()) ? 'D' : '-';
7887 f[7] = (invalid ()) ? 'I' : '-';
7888 f[8] = '\x0';
7889 // output
7890 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
7891 }
7893 };
7895 class TagWord {
7896 public:
7897 int32_t _value;
7899 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
7901 void print() const {
7902 printf("%04x", _value & 0xFFFF);
7903 }
7905 };
7907 class FPU_Register {
7908 public:
7909 int32_t _m0;
7910 int32_t _m1;
7911 int16_t _ex;
7913 bool is_indefinite() const {
7914 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7915 }
7917 void print() const {
7918 char sign = (_ex < 0) ? '-' : '+';
7919 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
7920 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
7921 };
7923 };
7925 class FPU_State {
7926 public:
7927 enum {
7928 register_size = 10,
7929 number_of_registers = 8,
7930 register_mask = 7
7931 };
7933 ControlWord _control_word;
7934 StatusWord _status_word;
7935 TagWord _tag_word;
7936 int32_t _error_offset;
7937 int32_t _error_selector;
7938 int32_t _data_offset;
7939 int32_t _data_selector;
7940 int8_t _register[register_size * number_of_registers];
7942 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7943 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
7945 const char* tag_as_string(int tag) const {
7946 switch (tag) {
7947 case 0: return "valid";
7948 case 1: return "zero";
7949 case 2: return "special";
7950 case 3: return "empty";
7951 }
7952 ShouldNotReachHere()
7953 return NULL;
7954 }
7956 void print() const {
7957 // print computation registers
7958 { int t = _status_word.top();
7959 for (int i = 0; i < number_of_registers; i++) {
7960 int j = (i - t) & register_mask;
7961 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
7962 st(j)->print();
7963 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
7964 }
7965 }
7966 printf("\n");
7967 // print control registers
7968 printf("ctrl = "); _control_word.print(); printf("\n");
7969 printf("stat = "); _status_word .print(); printf("\n");
7970 printf("tags = "); _tag_word .print(); printf("\n");
7971 }
7973 };
7975 class Flag_Register {
7976 public:
7977 int32_t _value;
7979 bool overflow() const { return ((_value >> 11) & 1) != 0; }
7980 bool direction() const { return ((_value >> 10) & 1) != 0; }
7981 bool sign() const { return ((_value >> 7) & 1) != 0; }
7982 bool zero() const { return ((_value >> 6) & 1) != 0; }
7983 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
7984 bool parity() const { return ((_value >> 2) & 1) != 0; }
7985 bool carry() const { return ((_value >> 0) & 1) != 0; }
7987 void print() const {
7988 // flags
7989 char f[8];
7990 f[0] = (overflow ()) ? 'O' : '-';
7991 f[1] = (direction ()) ? 'D' : '-';
7992 f[2] = (sign ()) ? 'S' : '-';
7993 f[3] = (zero ()) ? 'Z' : '-';
7994 f[4] = (auxiliary_carry()) ? 'A' : '-';
7995 f[5] = (parity ()) ? 'P' : '-';
7996 f[6] = (carry ()) ? 'C' : '-';
7997 f[7] = '\x0';
7998 // output
7999 printf("%08x flags = %s", _value, f);
8000 }
8002 };
8004 class IU_Register {
8005 public:
8006 int32_t _value;
8008 void print() const {
8009 printf("%08x %11d", _value, _value);
8010 }
8012 };
8014 class IU_State {
8015 public:
8016 Flag_Register _eflags;
8017 IU_Register _rdi;
8018 IU_Register _rsi;
8019 IU_Register _rbp;
8020 IU_Register _rsp;
8021 IU_Register _rbx;
8022 IU_Register _rdx;
8023 IU_Register _rcx;
8024 IU_Register _rax;
8026 void print() const {
8027 // computation registers
8028 printf("rax, = "); _rax.print(); printf("\n");
8029 printf("rbx, = "); _rbx.print(); printf("\n");
8030 printf("rcx = "); _rcx.print(); printf("\n");
8031 printf("rdx = "); _rdx.print(); printf("\n");
8032 printf("rdi = "); _rdi.print(); printf("\n");
8033 printf("rsi = "); _rsi.print(); printf("\n");
8034 printf("rbp, = "); _rbp.print(); printf("\n");
8035 printf("rsp = "); _rsp.print(); printf("\n");
8036 printf("\n");
8037 // control registers
8038 printf("flgs = "); _eflags.print(); printf("\n");
8039 }
8040 };
8043 class CPU_State {
8044 public:
8045 FPU_State _fpu_state;
8046 IU_State _iu_state;
8048 void print() const {
8049 printf("--------------------------------------------------\n");
8050 _iu_state .print();
8051 printf("\n");
8052 _fpu_state.print();
8053 printf("--------------------------------------------------\n");
8054 }
8056 };
8059 static void _print_CPU_state(CPU_State* state) {
8060 state->print();
8061 };
8064 void MacroAssembler::print_CPU_state() {
8065 push_CPU_state();
8066 push(rsp); // pass CPU state
8067 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
8068 addptr(rsp, wordSize); // discard argument
8069 pop_CPU_state();
8070 }
8073 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
8074 static int counter = 0;
8075 FPU_State* fs = &state->_fpu_state;
8076 counter++;
8077 // For leaf calls, only verify that the top few elements remain empty.
8078 // We only need 1 empty at the top for C2 code.
8079 if( stack_depth < 0 ) {
8080 if( fs->tag_for_st(7) != 3 ) {
8081 printf("FPR7 not empty\n");
8082 state->print();
8083 assert(false, "error");
8084 return false;
8085 }
8086 return true; // All other stack states do not matter
8087 }
8089 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
8090 "bad FPU control word");
8092 // compute stack depth
8093 int i = 0;
8094 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
8095 int d = i;
8096 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
8097 // verify findings
8098 if (i != FPU_State::number_of_registers) {
8099 // stack not contiguous
8100 printf("%s: stack not contiguous at ST%d\n", s, i);
8101 state->print();
8102 assert(false, "error");
8103 return false;
8104 }
8105 // check if computed stack depth corresponds to expected stack depth
8106 if (stack_depth < 0) {
8107 // expected stack depth is -stack_depth or less
8108 if (d > -stack_depth) {
8109 // too many elements on the stack
8110 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
8111 state->print();
8112 assert(false, "error");
8113 return false;
8114 }
8115 } else {
8116 // expected stack depth is stack_depth
8117 if (d != stack_depth) {
8118 // wrong stack depth
8119 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
8120 state->print();
8121 assert(false, "error");
8122 return false;
8123 }
8124 }
8125 // everything is cool
8126 return true;
8127 }
8130 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
8131 if (!VerifyFPU) return;
8132 push_CPU_state();
8133 push(rsp); // pass CPU state
8134 ExternalAddress msg((address) s);
8135 // pass message string s
8136 pushptr(msg.addr());
8137 push(stack_depth); // pass stack depth
8138 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
8139 addptr(rsp, 3 * wordSize); // discard arguments
8140 // check for error
8141 { Label L;
8142 testl(rax, rax);
8143 jcc(Assembler::notZero, L);
8144 int3(); // break if error condition
8145 bind(L);
8146 }
8147 pop_CPU_state();
8148 }
8150 void MacroAssembler::load_klass(Register dst, Register src) {
8151 #ifdef _LP64
8152 if (UseCompressedOops) {
8153 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8154 decode_heap_oop_not_null(dst);
8155 } else
8156 #endif
8157 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8158 }
8160 void MacroAssembler::load_prototype_header(Register dst, Register src) {
8161 #ifdef _LP64
8162 if (UseCompressedOops) {
8163 assert (Universe::heap() != NULL, "java heap should be initialized");
8164 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8165 if (Universe::narrow_oop_shift() != 0) {
8166 assert(Address::times_8 == LogMinObjAlignmentInBytes &&
8167 Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8168 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8169 } else {
8170 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8171 }
8172 } else
8173 #endif
8174 {
8175 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8176 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8177 }
8178 }
8180 void MacroAssembler::store_klass(Register dst, Register src) {
8181 #ifdef _LP64
8182 if (UseCompressedOops) {
8183 encode_heap_oop_not_null(src);
8184 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8185 } else
8186 #endif
8187 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8188 }
8190 #ifdef _LP64
8191 void MacroAssembler::store_klass_gap(Register dst, Register src) {
8192 if (UseCompressedOops) {
8193 // Store to klass gap in destination
8194 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8195 }
8196 }
8198 void MacroAssembler::load_heap_oop(Register dst, Address src) {
8199 if (UseCompressedOops) {
8200 movl(dst, src);
8201 decode_heap_oop(dst);
8202 } else {
8203 movq(dst, src);
8204 }
8205 }
8207 void MacroAssembler::store_heap_oop(Address dst, Register src) {
8208 if (UseCompressedOops) {
8209 assert(!dst.uses(src), "not enough registers");
8210 encode_heap_oop(src);
8211 movl(dst, src);
8212 } else {
8213 movq(dst, src);
8214 }
8215 }
8217 // Algorithm must match oop.inline.hpp encode_heap_oop.
8218 void MacroAssembler::encode_heap_oop(Register r) {
8219 assert (UseCompressedOops, "should be compressed");
8220 assert (Universe::heap() != NULL, "java heap should be initialized");
8221 if (Universe::narrow_oop_base() == NULL) {
8222 verify_oop(r, "broken oop in encode_heap_oop");
8223 if (Universe::narrow_oop_shift() != 0) {
8224 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8225 shrq(r, LogMinObjAlignmentInBytes);
8226 }
8227 return;
8228 }
8229 #ifdef ASSERT
8230 if (CheckCompressedOops) {
8231 Label ok;
8232 push(rscratch1); // cmpptr trashes rscratch1
8233 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8234 jcc(Assembler::equal, ok);
8235 stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
8236 bind(ok);
8237 pop(rscratch1);
8238 }
8239 #endif
8240 verify_oop(r, "broken oop in encode_heap_oop");
8241 testq(r, r);
8242 cmovq(Assembler::equal, r, r12_heapbase);
8243 subq(r, r12_heapbase);
8244 shrq(r, LogMinObjAlignmentInBytes);
8245 }
8247 void MacroAssembler::encode_heap_oop_not_null(Register r) {
8248 assert (UseCompressedOops, "should be compressed");
8249 assert (Universe::heap() != NULL, "java heap should be initialized");
8250 #ifdef ASSERT
8251 if (CheckCompressedOops) {
8252 Label ok;
8253 testq(r, r);
8254 jcc(Assembler::notEqual, ok);
8255 stop("null oop passed to encode_heap_oop_not_null");
8256 bind(ok);
8257 }
8258 #endif
8259 verify_oop(r, "broken oop in encode_heap_oop_not_null");
8260 if (Universe::narrow_oop_base() != NULL) {
8261 subq(r, r12_heapbase);
8262 }
8263 if (Universe::narrow_oop_shift() != 0) {
8264 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8265 shrq(r, LogMinObjAlignmentInBytes);
8266 }
8267 }
8269 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8270 assert (UseCompressedOops, "should be compressed");
8271 assert (Universe::heap() != NULL, "java heap should be initialized");
8272 #ifdef ASSERT
8273 if (CheckCompressedOops) {
8274 Label ok;
8275 testq(src, src);
8276 jcc(Assembler::notEqual, ok);
8277 stop("null oop passed to encode_heap_oop_not_null2");
8278 bind(ok);
8279 }
8280 #endif
8281 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8282 if (dst != src) {
8283 movq(dst, src);
8284 }
8285 if (Universe::narrow_oop_base() != NULL) {
8286 subq(dst, r12_heapbase);
8287 }
8288 if (Universe::narrow_oop_shift() != 0) {
8289 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8290 shrq(dst, LogMinObjAlignmentInBytes);
8291 }
8292 }
8294 void MacroAssembler::decode_heap_oop(Register r) {
8295 assert (UseCompressedOops, "should be compressed");
8296 assert (Universe::heap() != NULL, "java heap should be initialized");
8297 if (Universe::narrow_oop_base() == NULL) {
8298 if (Universe::narrow_oop_shift() != 0) {
8299 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8300 shlq(r, LogMinObjAlignmentInBytes);
8301 }
8302 verify_oop(r, "broken oop in decode_heap_oop");
8303 return;
8304 }
8305 #ifdef ASSERT
8306 if (CheckCompressedOops) {
8307 Label ok;
8308 push(rscratch1);
8309 cmpptr(r12_heapbase,
8310 ExternalAddress((address)Universe::narrow_oop_base_addr()));
8311 jcc(Assembler::equal, ok);
8312 stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
8313 bind(ok);
8314 pop(rscratch1);
8315 }
8316 #endif
8318 Label done;
8319 shlq(r, LogMinObjAlignmentInBytes);
8320 jccb(Assembler::equal, done);
8321 addq(r, r12_heapbase);
8322 #if 0
8323 // alternate decoding probably a wash.
8324 testq(r, r);
8325 jccb(Assembler::equal, done);
8326 leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8327 #endif
8328 bind(done);
8329 verify_oop(r, "broken oop in decode_heap_oop");
8330 }
8332 void MacroAssembler::decode_heap_oop_not_null(Register r) {
8333 assert (UseCompressedOops, "should only be used for compressed headers");
8334 assert (Universe::heap() != NULL, "java heap should be initialized");
8335 // Cannot assert, unverified entry point counts instructions (see .ad file)
8336 // vtableStubs also counts instructions in pd_code_size_limit.
8337 // Also do not verify_oop as this is called by verify_oop.
8338 if (Universe::narrow_oop_base() == NULL) {
8339 if (Universe::narrow_oop_shift() != 0) {
8340 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8341 shlq(r, LogMinObjAlignmentInBytes);
8342 }
8343 } else {
8344 assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8345 Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8346 leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8347 }
8348 }
8350 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8351 assert (UseCompressedOops, "should only be used for compressed headers");
8352 assert (Universe::heap() != NULL, "java heap should be initialized");
8353 // Cannot assert, unverified entry point counts instructions (see .ad file)
8354 // vtableStubs also counts instructions in pd_code_size_limit.
8355 // Also do not verify_oop as this is called by verify_oop.
8356 if (Universe::narrow_oop_shift() != 0) {
8357 assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8358 Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8359 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8360 } else if (dst != src) {
8361 movq(dst, src);
8362 }
8363 }
8365 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8366 assert (UseCompressedOops, "should only be used for compressed headers");
8367 assert (Universe::heap() != NULL, "java heap should be initialized");
8368 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8369 int oop_index = oop_recorder()->find_index(obj);
8370 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8371 mov_narrow_oop(dst, oop_index, rspec);
8372 }
8374 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8375 assert (UseCompressedOops, "should only be used for compressed headers");
8376 assert (Universe::heap() != NULL, "java heap should be initialized");
8377 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8378 int oop_index = oop_recorder()->find_index(obj);
8379 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8380 mov_narrow_oop(dst, oop_index, rspec);
8381 }
8383 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8384 assert (UseCompressedOops, "should only be used for compressed headers");
8385 assert (Universe::heap() != NULL, "java heap should be initialized");
8386 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8387 int oop_index = oop_recorder()->find_index(obj);
8388 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8389 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8390 }
8392 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8393 assert (UseCompressedOops, "should only be used for compressed headers");
8394 assert (Universe::heap() != NULL, "java heap should be initialized");
8395 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8396 int oop_index = oop_recorder()->find_index(obj);
8397 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8398 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8399 }
8401 void MacroAssembler::reinit_heapbase() {
8402 if (UseCompressedOops) {
8403 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8404 }
8405 }
8406 #endif // _LP64
8408 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8409 switch (cond) {
8410 // Note some conditions are synonyms for others
8411 case Assembler::zero: return Assembler::notZero;
8412 case Assembler::notZero: return Assembler::zero;
8413 case Assembler::less: return Assembler::greaterEqual;
8414 case Assembler::lessEqual: return Assembler::greater;
8415 case Assembler::greater: return Assembler::lessEqual;
8416 case Assembler::greaterEqual: return Assembler::less;
8417 case Assembler::below: return Assembler::aboveEqual;
8418 case Assembler::belowEqual: return Assembler::above;
8419 case Assembler::above: return Assembler::belowEqual;
8420 case Assembler::aboveEqual: return Assembler::below;
8421 case Assembler::overflow: return Assembler::noOverflow;
8422 case Assembler::noOverflow: return Assembler::overflow;
8423 case Assembler::negative: return Assembler::positive;
8424 case Assembler::positive: return Assembler::negative;
8425 case Assembler::parity: return Assembler::noParity;
8426 case Assembler::noParity: return Assembler::parity;
8427 }
8428 ShouldNotReachHere(); return Assembler::overflow;
8429 }
8431 SkipIfEqual::SkipIfEqual(
8432 MacroAssembler* masm, const bool* flag_addr, bool value) {
8433 _masm = masm;
8434 _masm->cmp8(ExternalAddress((address)flag_addr), value);
8435 _masm->jcc(Assembler::equal, _label);
8436 }
8438 SkipIfEqual::~SkipIfEqual() {
8439 _masm->bind(_label);
8440 }