Tue, 12 Jun 2012 10:02:36 +0200
7174532: jdk/test/java/lang/Math/WorstCaseTests.java failing on x86
Summary: increase precision on x86 for the steps of the computation of exp and pow.
Reviewed-by: kvn
1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "assembler_x86.inline.hpp"
27 #include "gc_interface/collectedHeap.inline.hpp"
28 #include "interpreter/interpreter.hpp"
29 #include "memory/cardTableModRefBS.hpp"
30 #include "memory/resourceArea.hpp"
31 #include "prims/methodHandles.hpp"
32 #include "runtime/biasedLocking.hpp"
33 #include "runtime/interfaceSupport.hpp"
34 #include "runtime/objectMonitor.hpp"
35 #include "runtime/os.hpp"
36 #include "runtime/sharedRuntime.hpp"
37 #include "runtime/stubRoutines.hpp"
38 #ifndef SERIALGC
39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
41 #include "gc_implementation/g1/heapRegion.hpp"
42 #endif
44 // Implementation of AddressLiteral
46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
47 _is_lval = false;
48 _target = target;
49 switch (rtype) {
50 case relocInfo::oop_type:
51 // Oops are a special case. Normally they would be their own section
52 // but in cases like icBuffer they are literals in the code stream that
53 // we don't have a section for. We use none so that we get a literal address
54 // which is always patchable.
55 break;
56 case relocInfo::external_word_type:
57 _rspec = external_word_Relocation::spec(target);
58 break;
59 case relocInfo::internal_word_type:
60 _rspec = internal_word_Relocation::spec(target);
61 break;
62 case relocInfo::opt_virtual_call_type:
63 _rspec = opt_virtual_call_Relocation::spec();
64 break;
65 case relocInfo::static_call_type:
66 _rspec = static_call_Relocation::spec();
67 break;
68 case relocInfo::runtime_call_type:
69 _rspec = runtime_call_Relocation::spec();
70 break;
71 case relocInfo::poll_type:
72 case relocInfo::poll_return_type:
73 _rspec = Relocation::spec_simple(rtype);
74 break;
75 case relocInfo::none:
76 break;
77 default:
78 ShouldNotReachHere();
79 break;
80 }
81 }
83 // Implementation of Address
85 #ifdef _LP64
87 Address Address::make_array(ArrayAddress adr) {
88 // Not implementable on 64bit machines
89 // Should have been handled higher up the call chain.
90 ShouldNotReachHere();
91 return Address();
92 }
94 // exceedingly dangerous constructor
95 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
96 _base = noreg;
97 _index = noreg;
98 _scale = no_scale;
99 _disp = disp;
100 switch (rtype) {
101 case relocInfo::external_word_type:
102 _rspec = external_word_Relocation::spec(loc);
103 break;
104 case relocInfo::internal_word_type:
105 _rspec = internal_word_Relocation::spec(loc);
106 break;
107 case relocInfo::runtime_call_type:
108 // HMM
109 _rspec = runtime_call_Relocation::spec();
110 break;
111 case relocInfo::poll_type:
112 case relocInfo::poll_return_type:
113 _rspec = Relocation::spec_simple(rtype);
114 break;
115 case relocInfo::none:
116 break;
117 default:
118 ShouldNotReachHere();
119 }
120 }
121 #else // LP64
123 Address Address::make_array(ArrayAddress adr) {
124 AddressLiteral base = adr.base();
125 Address index = adr.index();
126 assert(index._disp == 0, "must not have disp"); // maybe it can?
127 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
128 array._rspec = base._rspec;
129 return array;
130 }
132 // exceedingly dangerous constructor
133 Address::Address(address loc, RelocationHolder spec) {
134 _base = noreg;
135 _index = noreg;
136 _scale = no_scale;
137 _disp = (intptr_t) loc;
138 _rspec = spec;
139 }
141 #endif // _LP64
145 // Convert the raw encoding form into the form expected by the constructor for
146 // Address. An index of 4 (rsp) corresponds to having no index, so convert
147 // that to noreg for the Address constructor.
148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
149 RelocationHolder rspec;
150 if (disp_is_oop) {
151 rspec = Relocation::spec_simple(relocInfo::oop_type);
152 }
153 bool valid_index = index != rsp->encoding();
154 if (valid_index) {
155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
156 madr._rspec = rspec;
157 return madr;
158 } else {
159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
160 madr._rspec = rspec;
161 return madr;
162 }
163 }
165 // Implementation of Assembler
167 int AbstractAssembler::code_fill_byte() {
168 return (u_char)'\xF4'; // hlt
169 }
171 // make this go away someday
172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
173 if (rtype == relocInfo::none)
174 emit_long(data);
175 else emit_data(data, Relocation::spec_simple(rtype), format);
176 }
178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
179 assert(imm_operand == 0, "default format must be immediate in this file");
180 assert(inst_mark() != NULL, "must be inside InstructionMark");
181 if (rspec.type() != relocInfo::none) {
182 #ifdef ASSERT
183 check_relocation(rspec, format);
184 #endif
185 // Do not use AbstractAssembler::relocate, which is not intended for
186 // embedded words. Instead, relocate to the enclosing instruction.
188 // hack. call32 is too wide for mask so use disp32
189 if (format == call32_operand)
190 code_section()->relocate(inst_mark(), rspec, disp32_operand);
191 else
192 code_section()->relocate(inst_mark(), rspec, format);
193 }
194 emit_long(data);
195 }
197 static int encode(Register r) {
198 int enc = r->encoding();
199 if (enc >= 8) {
200 enc -= 8;
201 }
202 return enc;
203 }
205 static int encode(XMMRegister r) {
206 int enc = r->encoding();
207 if (enc >= 8) {
208 enc -= 8;
209 }
210 return enc;
211 }
213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
214 assert(dst->has_byte_register(), "must have byte register");
215 assert(isByte(op1) && isByte(op2), "wrong opcode");
216 assert(isByte(imm8), "not a byte");
217 assert((op1 & 0x01) == 0, "should be 8bit operation");
218 emit_byte(op1);
219 emit_byte(op2 | encode(dst));
220 emit_byte(imm8);
221 }
224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
225 assert(isByte(op1) && isByte(op2), "wrong opcode");
226 assert((op1 & 0x01) == 1, "should be 32bit operation");
227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
228 if (is8bit(imm32)) {
229 emit_byte(op1 | 0x02); // set sign bit
230 emit_byte(op2 | encode(dst));
231 emit_byte(imm32 & 0xFF);
232 } else {
233 emit_byte(op1);
234 emit_byte(op2 | encode(dst));
235 emit_long(imm32);
236 }
237 }
239 // Force generation of a 4 byte immediate value even if it fits into 8bit
240 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
241 assert(isByte(op1) && isByte(op2), "wrong opcode");
242 assert((op1 & 0x01) == 1, "should be 32bit operation");
243 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
244 emit_byte(op1);
245 emit_byte(op2 | encode(dst));
246 emit_long(imm32);
247 }
249 // immediate-to-memory forms
250 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
251 assert((op1 & 0x01) == 1, "should be 32bit operation");
252 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
253 if (is8bit(imm32)) {
254 emit_byte(op1 | 0x02); // set sign bit
255 emit_operand(rm, adr, 1);
256 emit_byte(imm32 & 0xFF);
257 } else {
258 emit_byte(op1);
259 emit_operand(rm, adr, 4);
260 emit_long(imm32);
261 }
262 }
264 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
265 LP64_ONLY(ShouldNotReachHere());
266 assert(isByte(op1) && isByte(op2), "wrong opcode");
267 assert((op1 & 0x01) == 1, "should be 32bit operation");
268 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
269 InstructionMark im(this);
270 emit_byte(op1);
271 emit_byte(op2 | encode(dst));
272 emit_data((intptr_t)obj, relocInfo::oop_type, 0);
273 }
276 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
277 assert(isByte(op1) && isByte(op2), "wrong opcode");
278 emit_byte(op1);
279 emit_byte(op2 | encode(dst) << 3 | encode(src));
280 }
283 void Assembler::emit_operand(Register reg, Register base, Register index,
284 Address::ScaleFactor scale, int disp,
285 RelocationHolder const& rspec,
286 int rip_relative_correction) {
287 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
289 // Encode the registers as needed in the fields they are used in
291 int regenc = encode(reg) << 3;
292 int indexenc = index->is_valid() ? encode(index) << 3 : 0;
293 int baseenc = base->is_valid() ? encode(base) : 0;
295 if (base->is_valid()) {
296 if (index->is_valid()) {
297 assert(scale != Address::no_scale, "inconsistent address");
298 // [base + index*scale + disp]
299 if (disp == 0 && rtype == relocInfo::none &&
300 base != rbp LP64_ONLY(&& base != r13)) {
301 // [base + index*scale]
302 // [00 reg 100][ss index base]
303 assert(index != rsp, "illegal addressing mode");
304 emit_byte(0x04 | regenc);
305 emit_byte(scale << 6 | indexenc | baseenc);
306 } else if (is8bit(disp) && rtype == relocInfo::none) {
307 // [base + index*scale + imm8]
308 // [01 reg 100][ss index base] imm8
309 assert(index != rsp, "illegal addressing mode");
310 emit_byte(0x44 | regenc);
311 emit_byte(scale << 6 | indexenc | baseenc);
312 emit_byte(disp & 0xFF);
313 } else {
314 // [base + index*scale + disp32]
315 // [10 reg 100][ss index base] disp32
316 assert(index != rsp, "illegal addressing mode");
317 emit_byte(0x84 | regenc);
318 emit_byte(scale << 6 | indexenc | baseenc);
319 emit_data(disp, rspec, disp32_operand);
320 }
321 } else if (base == rsp LP64_ONLY(|| base == r12)) {
322 // [rsp + disp]
323 if (disp == 0 && rtype == relocInfo::none) {
324 // [rsp]
325 // [00 reg 100][00 100 100]
326 emit_byte(0x04 | regenc);
327 emit_byte(0x24);
328 } else if (is8bit(disp) && rtype == relocInfo::none) {
329 // [rsp + imm8]
330 // [01 reg 100][00 100 100] disp8
331 emit_byte(0x44 | regenc);
332 emit_byte(0x24);
333 emit_byte(disp & 0xFF);
334 } else {
335 // [rsp + imm32]
336 // [10 reg 100][00 100 100] disp32
337 emit_byte(0x84 | regenc);
338 emit_byte(0x24);
339 emit_data(disp, rspec, disp32_operand);
340 }
341 } else {
342 // [base + disp]
343 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
344 if (disp == 0 && rtype == relocInfo::none &&
345 base != rbp LP64_ONLY(&& base != r13)) {
346 // [base]
347 // [00 reg base]
348 emit_byte(0x00 | regenc | baseenc);
349 } else if (is8bit(disp) && rtype == relocInfo::none) {
350 // [base + disp8]
351 // [01 reg base] disp8
352 emit_byte(0x40 | regenc | baseenc);
353 emit_byte(disp & 0xFF);
354 } else {
355 // [base + disp32]
356 // [10 reg base] disp32
357 emit_byte(0x80 | regenc | baseenc);
358 emit_data(disp, rspec, disp32_operand);
359 }
360 }
361 } else {
362 if (index->is_valid()) {
363 assert(scale != Address::no_scale, "inconsistent address");
364 // [index*scale + disp]
365 // [00 reg 100][ss index 101] disp32
366 assert(index != rsp, "illegal addressing mode");
367 emit_byte(0x04 | regenc);
368 emit_byte(scale << 6 | indexenc | 0x05);
369 emit_data(disp, rspec, disp32_operand);
370 } else if (rtype != relocInfo::none ) {
371 // [disp] (64bit) RIP-RELATIVE (32bit) abs
372 // [00 000 101] disp32
374 emit_byte(0x05 | regenc);
375 // Note that the RIP-rel. correction applies to the generated
376 // disp field, but _not_ to the target address in the rspec.
378 // disp was created by converting the target address minus the pc
379 // at the start of the instruction. That needs more correction here.
380 // intptr_t disp = target - next_ip;
381 assert(inst_mark() != NULL, "must be inside InstructionMark");
382 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
383 int64_t adjusted = disp;
384 // Do rip-rel adjustment for 64bit
385 LP64_ONLY(adjusted -= (next_ip - inst_mark()));
386 assert(is_simm32(adjusted),
387 "must be 32bit offset (RIP relative address)");
388 emit_data((int32_t) adjusted, rspec, disp32_operand);
390 } else {
391 // 32bit never did this, did everything as the rip-rel/disp code above
392 // [disp] ABSOLUTE
393 // [00 reg 100][00 100 101] disp32
394 emit_byte(0x04 | regenc);
395 emit_byte(0x25);
396 emit_data(disp, rspec, disp32_operand);
397 }
398 }
399 }
401 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
402 Address::ScaleFactor scale, int disp,
403 RelocationHolder const& rspec) {
404 emit_operand((Register)reg, base, index, scale, disp, rspec);
405 }
407 // Secret local extension to Assembler::WhichOperand:
408 #define end_pc_operand (_WhichOperand_limit)
410 address Assembler::locate_operand(address inst, WhichOperand which) {
411 // Decode the given instruction, and return the address of
412 // an embedded 32-bit operand word.
414 // If "which" is disp32_operand, selects the displacement portion
415 // of an effective address specifier.
416 // If "which" is imm64_operand, selects the trailing immediate constant.
417 // If "which" is call32_operand, selects the displacement of a call or jump.
418 // Caller is responsible for ensuring that there is such an operand,
419 // and that it is 32/64 bits wide.
421 // If "which" is end_pc_operand, find the end of the instruction.
423 address ip = inst;
424 bool is_64bit = false;
426 debug_only(bool has_disp32 = false);
427 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
429 again_after_prefix:
430 switch (0xFF & *ip++) {
432 // These convenience macros generate groups of "case" labels for the switch.
433 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
434 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
435 case (x)+4: case (x)+5: case (x)+6: case (x)+7
436 #define REP16(x) REP8((x)+0): \
437 case REP8((x)+8)
439 case CS_segment:
440 case SS_segment:
441 case DS_segment:
442 case ES_segment:
443 case FS_segment:
444 case GS_segment:
445 // Seems dubious
446 LP64_ONLY(assert(false, "shouldn't have that prefix"));
447 assert(ip == inst+1, "only one prefix allowed");
448 goto again_after_prefix;
450 case 0x67:
451 case REX:
452 case REX_B:
453 case REX_X:
454 case REX_XB:
455 case REX_R:
456 case REX_RB:
457 case REX_RX:
458 case REX_RXB:
459 NOT_LP64(assert(false, "64bit prefixes"));
460 goto again_after_prefix;
462 case REX_W:
463 case REX_WB:
464 case REX_WX:
465 case REX_WXB:
466 case REX_WR:
467 case REX_WRB:
468 case REX_WRX:
469 case REX_WRXB:
470 NOT_LP64(assert(false, "64bit prefixes"));
471 is_64bit = true;
472 goto again_after_prefix;
474 case 0xFF: // pushq a; decl a; incl a; call a; jmp a
475 case 0x88: // movb a, r
476 case 0x89: // movl a, r
477 case 0x8A: // movb r, a
478 case 0x8B: // movl r, a
479 case 0x8F: // popl a
480 debug_only(has_disp32 = true);
481 break;
483 case 0x68: // pushq #32
484 if (which == end_pc_operand) {
485 return ip + 4;
486 }
487 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
488 return ip; // not produced by emit_operand
490 case 0x66: // movw ... (size prefix)
491 again_after_size_prefix2:
492 switch (0xFF & *ip++) {
493 case REX:
494 case REX_B:
495 case REX_X:
496 case REX_XB:
497 case REX_R:
498 case REX_RB:
499 case REX_RX:
500 case REX_RXB:
501 case REX_W:
502 case REX_WB:
503 case REX_WX:
504 case REX_WXB:
505 case REX_WR:
506 case REX_WRB:
507 case REX_WRX:
508 case REX_WRXB:
509 NOT_LP64(assert(false, "64bit prefix found"));
510 goto again_after_size_prefix2;
511 case 0x8B: // movw r, a
512 case 0x89: // movw a, r
513 debug_only(has_disp32 = true);
514 break;
515 case 0xC7: // movw a, #16
516 debug_only(has_disp32 = true);
517 tail_size = 2; // the imm16
518 break;
519 case 0x0F: // several SSE/SSE2 variants
520 ip--; // reparse the 0x0F
521 goto again_after_prefix;
522 default:
523 ShouldNotReachHere();
524 }
525 break;
527 case REP8(0xB8): // movl/q r, #32/#64(oop?)
528 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4);
529 // these asserts are somewhat nonsensical
530 #ifndef _LP64
531 assert(which == imm_operand || which == disp32_operand,
532 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
533 #else
534 assert((which == call32_operand || which == imm_operand) && is_64bit ||
535 which == narrow_oop_operand && !is_64bit,
536 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
537 #endif // _LP64
538 return ip;
540 case 0x69: // imul r, a, #32
541 case 0xC7: // movl a, #32(oop?)
542 tail_size = 4;
543 debug_only(has_disp32 = true); // has both kinds of operands!
544 break;
546 case 0x0F: // movx..., etc.
547 switch (0xFF & *ip++) {
548 case 0x3A: // pcmpestri
549 tail_size = 1;
550 case 0x38: // ptest, pmovzxbw
551 ip++; // skip opcode
552 debug_only(has_disp32 = true); // has both kinds of operands!
553 break;
555 case 0x70: // pshufd r, r/a, #8
556 debug_only(has_disp32 = true); // has both kinds of operands!
557 case 0x73: // psrldq r, #8
558 tail_size = 1;
559 break;
561 case 0x12: // movlps
562 case 0x28: // movaps
563 case 0x2E: // ucomiss
564 case 0x2F: // comiss
565 case 0x54: // andps
566 case 0x55: // andnps
567 case 0x56: // orps
568 case 0x57: // xorps
569 case 0x6E: // movd
570 case 0x7E: // movd
571 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
572 debug_only(has_disp32 = true);
573 break;
575 case 0xAD: // shrd r, a, %cl
576 case 0xAF: // imul r, a
577 case 0xBE: // movsbl r, a (movsxb)
578 case 0xBF: // movswl r, a (movsxw)
579 case 0xB6: // movzbl r, a (movzxb)
580 case 0xB7: // movzwl r, a (movzxw)
581 case REP16(0x40): // cmovl cc, r, a
582 case 0xB0: // cmpxchgb
583 case 0xB1: // cmpxchg
584 case 0xC1: // xaddl
585 case 0xC7: // cmpxchg8
586 case REP16(0x90): // setcc a
587 debug_only(has_disp32 = true);
588 // fall out of the switch to decode the address
589 break;
591 case 0xC4: // pinsrw r, a, #8
592 debug_only(has_disp32 = true);
593 case 0xC5: // pextrw r, r, #8
594 tail_size = 1; // the imm8
595 break;
597 case 0xAC: // shrd r, a, #8
598 debug_only(has_disp32 = true);
599 tail_size = 1; // the imm8
600 break;
602 case REP16(0x80): // jcc rdisp32
603 if (which == end_pc_operand) return ip + 4;
604 assert(which == call32_operand, "jcc has no disp32 or imm");
605 return ip;
606 default:
607 ShouldNotReachHere();
608 }
609 break;
611 case 0x81: // addl a, #32; addl r, #32
612 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
613 // on 32bit in the case of cmpl, the imm might be an oop
614 tail_size = 4;
615 debug_only(has_disp32 = true); // has both kinds of operands!
616 break;
618 case 0x83: // addl a, #8; addl r, #8
619 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
620 debug_only(has_disp32 = true); // has both kinds of operands!
621 tail_size = 1;
622 break;
624 case 0x9B:
625 switch (0xFF & *ip++) {
626 case 0xD9: // fnstcw a
627 debug_only(has_disp32 = true);
628 break;
629 default:
630 ShouldNotReachHere();
631 }
632 break;
634 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
635 case REP4(0x10): // adc...
636 case REP4(0x20): // and...
637 case REP4(0x30): // xor...
638 case REP4(0x08): // or...
639 case REP4(0x18): // sbb...
640 case REP4(0x28): // sub...
641 case 0xF7: // mull a
642 case 0x8D: // lea r, a
643 case 0x87: // xchg r, a
644 case REP4(0x38): // cmp...
645 case 0x85: // test r, a
646 debug_only(has_disp32 = true); // has both kinds of operands!
647 break;
649 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
650 case 0xC6: // movb a, #8
651 case 0x80: // cmpb a, #8
652 case 0x6B: // imul r, a, #8
653 debug_only(has_disp32 = true); // has both kinds of operands!
654 tail_size = 1; // the imm8
655 break;
657 case 0xC4: // VEX_3bytes
658 case 0xC5: // VEX_2bytes
659 assert((UseAVX > 0), "shouldn't have VEX prefix");
660 assert(ip == inst+1, "no prefixes allowed");
661 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
662 // but they have prefix 0x0F and processed when 0x0F processed above.
663 //
664 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
665 // instructions (these instructions are not supported in 64-bit mode).
666 // To distinguish them bits [7:6] are set in the VEX second byte since
667 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
668 // those VEX bits REX and vvvv bits are inverted.
669 //
670 // Fortunately C2 doesn't generate these instructions so we don't need
671 // to check for them in product version.
673 // Check second byte
674 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
676 // First byte
677 if ((0xFF & *inst) == VEX_3bytes) {
678 ip++; // third byte
679 is_64bit = ((VEX_W & *ip) == VEX_W);
680 }
681 ip++; // opcode
682 // To find the end of instruction (which == end_pc_operand).
683 switch (0xFF & *ip) {
684 case 0x61: // pcmpestri r, r/a, #8
685 case 0x70: // pshufd r, r/a, #8
686 case 0x73: // psrldq r, #8
687 tail_size = 1; // the imm8
688 break;
689 default:
690 break;
691 }
692 ip++; // skip opcode
693 debug_only(has_disp32 = true); // has both kinds of operands!
694 break;
696 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
697 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
698 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
699 case 0xDD: // fld_d a; fst_d a; fstp_d a
700 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
701 case 0xDF: // fild_d a; fistp_d a
702 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
703 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
704 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
705 debug_only(has_disp32 = true);
706 break;
708 case 0xE8: // call rdisp32
709 case 0xE9: // jmp rdisp32
710 if (which == end_pc_operand) return ip + 4;
711 assert(which == call32_operand, "call has no disp32 or imm");
712 return ip;
714 case 0xF0: // Lock
715 assert(os::is_MP(), "only on MP");
716 goto again_after_prefix;
718 case 0xF3: // For SSE
719 case 0xF2: // For SSE2
720 switch (0xFF & *ip++) {
721 case REX:
722 case REX_B:
723 case REX_X:
724 case REX_XB:
725 case REX_R:
726 case REX_RB:
727 case REX_RX:
728 case REX_RXB:
729 case REX_W:
730 case REX_WB:
731 case REX_WX:
732 case REX_WXB:
733 case REX_WR:
734 case REX_WRB:
735 case REX_WRX:
736 case REX_WRXB:
737 NOT_LP64(assert(false, "found 64bit prefix"));
738 ip++;
739 default:
740 ip++;
741 }
742 debug_only(has_disp32 = true); // has both kinds of operands!
743 break;
745 default:
746 ShouldNotReachHere();
748 #undef REP8
749 #undef REP16
750 }
752 assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
753 #ifdef _LP64
754 assert(which != imm_operand, "instruction is not a movq reg, imm64");
755 #else
756 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
757 assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
758 #endif // LP64
759 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
761 // parse the output of emit_operand
762 int op2 = 0xFF & *ip++;
763 int base = op2 & 0x07;
764 int op3 = -1;
765 const int b100 = 4;
766 const int b101 = 5;
767 if (base == b100 && (op2 >> 6) != 3) {
768 op3 = 0xFF & *ip++;
769 base = op3 & 0x07; // refetch the base
770 }
771 // now ip points at the disp (if any)
773 switch (op2 >> 6) {
774 case 0:
775 // [00 reg 100][ss index base]
776 // [00 reg 100][00 100 esp]
777 // [00 reg base]
778 // [00 reg 100][ss index 101][disp32]
779 // [00 reg 101] [disp32]
781 if (base == b101) {
782 if (which == disp32_operand)
783 return ip; // caller wants the disp32
784 ip += 4; // skip the disp32
785 }
786 break;
788 case 1:
789 // [01 reg 100][ss index base][disp8]
790 // [01 reg 100][00 100 esp][disp8]
791 // [01 reg base] [disp8]
792 ip += 1; // skip the disp8
793 break;
795 case 2:
796 // [10 reg 100][ss index base][disp32]
797 // [10 reg 100][00 100 esp][disp32]
798 // [10 reg base] [disp32]
799 if (which == disp32_operand)
800 return ip; // caller wants the disp32
801 ip += 4; // skip the disp32
802 break;
804 case 3:
805 // [11 reg base] (not a memory addressing mode)
806 break;
807 }
809 if (which == end_pc_operand) {
810 return ip + tail_size;
811 }
813 #ifdef _LP64
814 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
815 #else
816 assert(which == imm_operand, "instruction has only an imm field");
817 #endif // LP64
818 return ip;
819 }
821 address Assembler::locate_next_instruction(address inst) {
822 // Secretly share code with locate_operand:
823 return locate_operand(inst, end_pc_operand);
824 }
827 #ifdef ASSERT
828 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
829 address inst = inst_mark();
830 assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
831 address opnd;
833 Relocation* r = rspec.reloc();
834 if (r->type() == relocInfo::none) {
835 return;
836 } else if (r->is_call() || format == call32_operand) {
837 // assert(format == imm32_operand, "cannot specify a nonzero format");
838 opnd = locate_operand(inst, call32_operand);
839 } else if (r->is_data()) {
840 assert(format == imm_operand || format == disp32_operand
841 LP64_ONLY(|| format == narrow_oop_operand), "format ok");
842 opnd = locate_operand(inst, (WhichOperand)format);
843 } else {
844 assert(format == imm_operand, "cannot specify a format");
845 return;
846 }
847 assert(opnd == pc(), "must put operand where relocs can find it");
848 }
849 #endif // ASSERT
851 void Assembler::emit_operand32(Register reg, Address adr) {
852 assert(reg->encoding() < 8, "no extended registers");
853 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
854 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
855 adr._rspec);
856 }
858 void Assembler::emit_operand(Register reg, Address adr,
859 int rip_relative_correction) {
860 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
861 adr._rspec,
862 rip_relative_correction);
863 }
865 void Assembler::emit_operand(XMMRegister reg, Address adr) {
866 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
867 adr._rspec);
868 }
870 // MMX operations
871 void Assembler::emit_operand(MMXRegister reg, Address adr) {
872 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
873 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
874 }
876 // work around gcc (3.2.1-7a) bug
877 void Assembler::emit_operand(Address adr, MMXRegister reg) {
878 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
879 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
880 }
883 void Assembler::emit_farith(int b1, int b2, int i) {
884 assert(isByte(b1) && isByte(b2), "wrong opcode");
885 assert(0 <= i && i < 8, "illegal stack offset");
886 emit_byte(b1);
887 emit_byte(b2 + i);
888 }
891 // Now the Assembler instructions (identical for 32/64 bits)
893 void Assembler::adcl(Address dst, int32_t imm32) {
894 InstructionMark im(this);
895 prefix(dst);
896 emit_arith_operand(0x81, rdx, dst, imm32);
897 }
899 void Assembler::adcl(Address dst, Register src) {
900 InstructionMark im(this);
901 prefix(dst, src);
902 emit_byte(0x11);
903 emit_operand(src, dst);
904 }
906 void Assembler::adcl(Register dst, int32_t imm32) {
907 prefix(dst);
908 emit_arith(0x81, 0xD0, dst, imm32);
909 }
911 void Assembler::adcl(Register dst, Address src) {
912 InstructionMark im(this);
913 prefix(src, dst);
914 emit_byte(0x13);
915 emit_operand(dst, src);
916 }
918 void Assembler::adcl(Register dst, Register src) {
919 (void) prefix_and_encode(dst->encoding(), src->encoding());
920 emit_arith(0x13, 0xC0, dst, src);
921 }
923 void Assembler::addl(Address dst, int32_t imm32) {
924 InstructionMark im(this);
925 prefix(dst);
926 emit_arith_operand(0x81, rax, dst, imm32);
927 }
929 void Assembler::addl(Address dst, Register src) {
930 InstructionMark im(this);
931 prefix(dst, src);
932 emit_byte(0x01);
933 emit_operand(src, dst);
934 }
936 void Assembler::addl(Register dst, int32_t imm32) {
937 prefix(dst);
938 emit_arith(0x81, 0xC0, dst, imm32);
939 }
941 void Assembler::addl(Register dst, Address src) {
942 InstructionMark im(this);
943 prefix(src, dst);
944 emit_byte(0x03);
945 emit_operand(dst, src);
946 }
948 void Assembler::addl(Register dst, Register src) {
949 (void) prefix_and_encode(dst->encoding(), src->encoding());
950 emit_arith(0x03, 0xC0, dst, src);
951 }
953 void Assembler::addr_nop_4() {
954 assert(UseAddressNop, "no CPU support");
955 // 4 bytes: NOP DWORD PTR [EAX+0]
956 emit_byte(0x0F);
957 emit_byte(0x1F);
958 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
959 emit_byte(0); // 8-bits offset (1 byte)
960 }
962 void Assembler::addr_nop_5() {
963 assert(UseAddressNop, "no CPU support");
964 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
965 emit_byte(0x0F);
966 emit_byte(0x1F);
967 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
968 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
969 emit_byte(0); // 8-bits offset (1 byte)
970 }
972 void Assembler::addr_nop_7() {
973 assert(UseAddressNop, "no CPU support");
974 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
975 emit_byte(0x0F);
976 emit_byte(0x1F);
977 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
978 emit_long(0); // 32-bits offset (4 bytes)
979 }
981 void Assembler::addr_nop_8() {
982 assert(UseAddressNop, "no CPU support");
983 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
984 emit_byte(0x0F);
985 emit_byte(0x1F);
986 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
987 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
988 emit_long(0); // 32-bits offset (4 bytes)
989 }
991 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
992 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
993 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
994 emit_byte(0x58);
995 emit_byte(0xC0 | encode);
996 }
998 void Assembler::addsd(XMMRegister dst, Address src) {
999 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1000 InstructionMark im(this);
1001 simd_prefix(dst, dst, src, VEX_SIMD_F2);
1002 emit_byte(0x58);
1003 emit_operand(dst, src);
1004 }
1006 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1007 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1008 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1009 emit_byte(0x58);
1010 emit_byte(0xC0 | encode);
1011 }
1013 void Assembler::addss(XMMRegister dst, Address src) {
1014 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1015 InstructionMark im(this);
1016 simd_prefix(dst, dst, src, VEX_SIMD_F3);
1017 emit_byte(0x58);
1018 emit_operand(dst, src);
1019 }
1021 void Assembler::andl(Address dst, int32_t imm32) {
1022 InstructionMark im(this);
1023 prefix(dst);
1024 emit_byte(0x81);
1025 emit_operand(rsp, dst, 4);
1026 emit_long(imm32);
1027 }
1029 void Assembler::andl(Register dst, int32_t imm32) {
1030 prefix(dst);
1031 emit_arith(0x81, 0xE0, dst, imm32);
1032 }
1034 void Assembler::andl(Register dst, Address src) {
1035 InstructionMark im(this);
1036 prefix(src, dst);
1037 emit_byte(0x23);
1038 emit_operand(dst, src);
1039 }
1041 void Assembler::andl(Register dst, Register src) {
1042 (void) prefix_and_encode(dst->encoding(), src->encoding());
1043 emit_arith(0x23, 0xC0, dst, src);
1044 }
1046 void Assembler::andpd(XMMRegister dst, Address src) {
1047 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1048 InstructionMark im(this);
1049 simd_prefix(dst, dst, src, VEX_SIMD_66);
1050 emit_byte(0x54);
1051 emit_operand(dst, src);
1052 }
1054 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
1055 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1056 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
1057 emit_byte(0x54);
1058 emit_byte(0xC0 | encode);
1059 }
1061 void Assembler::andps(XMMRegister dst, Address src) {
1062 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1063 InstructionMark im(this);
1064 simd_prefix(dst, dst, src, VEX_SIMD_NONE);
1065 emit_byte(0x54);
1066 emit_operand(dst, src);
1067 }
1069 void Assembler::andps(XMMRegister dst, XMMRegister src) {
1070 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1071 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
1072 emit_byte(0x54);
1073 emit_byte(0xC0 | encode);
1074 }
1076 void Assembler::bsfl(Register dst, Register src) {
1077 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1078 emit_byte(0x0F);
1079 emit_byte(0xBC);
1080 emit_byte(0xC0 | encode);
1081 }
1083 void Assembler::bsrl(Register dst, Register src) {
1084 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
1085 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1086 emit_byte(0x0F);
1087 emit_byte(0xBD);
1088 emit_byte(0xC0 | encode);
1089 }
1091 void Assembler::bswapl(Register reg) { // bswap
1092 int encode = prefix_and_encode(reg->encoding());
1093 emit_byte(0x0F);
1094 emit_byte(0xC8 | encode);
1095 }
1097 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1098 // suspect disp32 is always good
1099 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1101 if (L.is_bound()) {
1102 const int long_size = 5;
1103 int offs = (int)( target(L) - pc() );
1104 assert(offs <= 0, "assembler error");
1105 InstructionMark im(this);
1106 // 1110 1000 #32-bit disp
1107 emit_byte(0xE8);
1108 emit_data(offs - long_size, rtype, operand);
1109 } else {
1110 InstructionMark im(this);
1111 // 1110 1000 #32-bit disp
1112 L.add_patch_at(code(), locator());
1114 emit_byte(0xE8);
1115 emit_data(int(0), rtype, operand);
1116 }
1117 }
1119 void Assembler::call(Register dst) {
1120 int encode = prefix_and_encode(dst->encoding());
1121 emit_byte(0xFF);
1122 emit_byte(0xD0 | encode);
1123 }
1126 void Assembler::call(Address adr) {
1127 InstructionMark im(this);
1128 prefix(adr);
1129 emit_byte(0xFF);
1130 emit_operand(rdx, adr);
1131 }
1133 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1134 assert(entry != NULL, "call most probably wrong");
1135 InstructionMark im(this);
1136 emit_byte(0xE8);
1137 intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1138 assert(is_simm32(disp), "must be 32bit offset (call2)");
1139 // Technically, should use call32_operand, but this format is
1140 // implied by the fact that we're emitting a call instruction.
1142 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1143 emit_data((int) disp, rspec, operand);
1144 }
1146 void Assembler::cdql() {
1147 emit_byte(0x99);
1148 }
1150 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1151 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1152 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1153 emit_byte(0x0F);
1154 emit_byte(0x40 | cc);
1155 emit_byte(0xC0 | encode);
1156 }
1159 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1160 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1161 prefix(src, dst);
1162 emit_byte(0x0F);
1163 emit_byte(0x40 | cc);
1164 emit_operand(dst, src);
1165 }
1167 void Assembler::cmpb(Address dst, int imm8) {
1168 InstructionMark im(this);
1169 prefix(dst);
1170 emit_byte(0x80);
1171 emit_operand(rdi, dst, 1);
1172 emit_byte(imm8);
1173 }
1175 void Assembler::cmpl(Address dst, int32_t imm32) {
1176 InstructionMark im(this);
1177 prefix(dst);
1178 emit_byte(0x81);
1179 emit_operand(rdi, dst, 4);
1180 emit_long(imm32);
1181 }
1183 void Assembler::cmpl(Register dst, int32_t imm32) {
1184 prefix(dst);
1185 emit_arith(0x81, 0xF8, dst, imm32);
1186 }
1188 void Assembler::cmpl(Register dst, Register src) {
1189 (void) prefix_and_encode(dst->encoding(), src->encoding());
1190 emit_arith(0x3B, 0xC0, dst, src);
1191 }
1194 void Assembler::cmpl(Register dst, Address src) {
1195 InstructionMark im(this);
1196 prefix(src, dst);
1197 emit_byte(0x3B);
1198 emit_operand(dst, src);
1199 }
1201 void Assembler::cmpw(Address dst, int imm16) {
1202 InstructionMark im(this);
1203 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1204 emit_byte(0x66);
1205 emit_byte(0x81);
1206 emit_operand(rdi, dst, 2);
1207 emit_word(imm16);
1208 }
1210 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1211 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1212 // The ZF is set if the compared values were equal, and cleared otherwise.
1213 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1214 if (Atomics & 2) {
1215 // caveat: no instructionmark, so this isn't relocatable.
1216 // Emit a synthetic, non-atomic, CAS equivalent.
1217 // Beware. The synthetic form sets all ICCs, not just ZF.
1218 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1219 cmpl(rax, adr);
1220 movl(rax, adr);
1221 if (reg != rax) {
1222 Label L ;
1223 jcc(Assembler::notEqual, L);
1224 movl(adr, reg);
1225 bind(L);
1226 }
1227 } else {
1228 InstructionMark im(this);
1229 prefix(adr, reg);
1230 emit_byte(0x0F);
1231 emit_byte(0xB1);
1232 emit_operand(reg, adr);
1233 }
1234 }
1236 void Assembler::comisd(XMMRegister dst, Address src) {
1237 // NOTE: dbx seems to decode this as comiss even though the
1238 // 0x66 is there. Strangly ucomisd comes out correct
1239 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1240 InstructionMark im(this);
1241 simd_prefix(dst, src, VEX_SIMD_66);
1242 emit_byte(0x2F);
1243 emit_operand(dst, src);
1244 }
1246 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1247 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1248 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1249 emit_byte(0x2F);
1250 emit_byte(0xC0 | encode);
1251 }
1253 void Assembler::comiss(XMMRegister dst, Address src) {
1254 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1255 InstructionMark im(this);
1256 simd_prefix(dst, src, VEX_SIMD_NONE);
1257 emit_byte(0x2F);
1258 emit_operand(dst, src);
1259 }
1261 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1262 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1263 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
1264 emit_byte(0x2F);
1265 emit_byte(0xC0 | encode);
1266 }
1268 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1269 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1270 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1271 emit_byte(0xE6);
1272 emit_byte(0xC0 | encode);
1273 }
1275 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1276 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1277 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
1278 emit_byte(0x5B);
1279 emit_byte(0xC0 | encode);
1280 }
1282 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1283 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1284 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1285 emit_byte(0x5A);
1286 emit_byte(0xC0 | encode);
1287 }
1289 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1290 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1291 InstructionMark im(this);
1292 simd_prefix(dst, dst, src, VEX_SIMD_F2);
1293 emit_byte(0x5A);
1294 emit_operand(dst, src);
1295 }
1297 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1298 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1299 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1300 emit_byte(0x2A);
1301 emit_byte(0xC0 | encode);
1302 }
1304 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1305 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1306 InstructionMark im(this);
1307 simd_prefix(dst, dst, src, VEX_SIMD_F2);
1308 emit_byte(0x2A);
1309 emit_operand(dst, src);
1310 }
1312 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1313 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1314 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1315 emit_byte(0x2A);
1316 emit_byte(0xC0 | encode);
1317 }
1319 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1320 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1321 InstructionMark im(this);
1322 simd_prefix(dst, dst, src, VEX_SIMD_F3);
1323 emit_byte(0x2A);
1324 emit_operand(dst, src);
1325 }
1327 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1328 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1329 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1330 emit_byte(0x5A);
1331 emit_byte(0xC0 | encode);
1332 }
1334 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1335 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1336 InstructionMark im(this);
1337 simd_prefix(dst, dst, src, VEX_SIMD_F3);
1338 emit_byte(0x5A);
1339 emit_operand(dst, src);
1340 }
1343 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1344 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1345 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1346 emit_byte(0x2C);
1347 emit_byte(0xC0 | encode);
1348 }
1350 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1351 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1352 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1353 emit_byte(0x2C);
1354 emit_byte(0xC0 | encode);
1355 }
1357 void Assembler::decl(Address dst) {
1358 // Don't use it directly. Use MacroAssembler::decrement() instead.
1359 InstructionMark im(this);
1360 prefix(dst);
1361 emit_byte(0xFF);
1362 emit_operand(rcx, dst);
1363 }
1365 void Assembler::divsd(XMMRegister dst, Address src) {
1366 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1367 InstructionMark im(this);
1368 simd_prefix(dst, dst, src, VEX_SIMD_F2);
1369 emit_byte(0x5E);
1370 emit_operand(dst, src);
1371 }
1373 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1374 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1375 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1376 emit_byte(0x5E);
1377 emit_byte(0xC0 | encode);
1378 }
1380 void Assembler::divss(XMMRegister dst, Address src) {
1381 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1382 InstructionMark im(this);
1383 simd_prefix(dst, dst, src, VEX_SIMD_F3);
1384 emit_byte(0x5E);
1385 emit_operand(dst, src);
1386 }
1388 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1389 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1390 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1391 emit_byte(0x5E);
1392 emit_byte(0xC0 | encode);
1393 }
1395 void Assembler::emms() {
1396 NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1397 emit_byte(0x0F);
1398 emit_byte(0x77);
1399 }
1401 void Assembler::hlt() {
1402 emit_byte(0xF4);
1403 }
1405 void Assembler::idivl(Register src) {
1406 int encode = prefix_and_encode(src->encoding());
1407 emit_byte(0xF7);
1408 emit_byte(0xF8 | encode);
1409 }
1411 void Assembler::divl(Register src) { // Unsigned
1412 int encode = prefix_and_encode(src->encoding());
1413 emit_byte(0xF7);
1414 emit_byte(0xF0 | encode);
1415 }
1417 void Assembler::imull(Register dst, Register src) {
1418 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1419 emit_byte(0x0F);
1420 emit_byte(0xAF);
1421 emit_byte(0xC0 | encode);
1422 }
1425 void Assembler::imull(Register dst, Register src, int value) {
1426 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1427 if (is8bit(value)) {
1428 emit_byte(0x6B);
1429 emit_byte(0xC0 | encode);
1430 emit_byte(value & 0xFF);
1431 } else {
1432 emit_byte(0x69);
1433 emit_byte(0xC0 | encode);
1434 emit_long(value);
1435 }
1436 }
1438 void Assembler::incl(Address dst) {
1439 // Don't use it directly. Use MacroAssembler::increment() instead.
1440 InstructionMark im(this);
1441 prefix(dst);
1442 emit_byte(0xFF);
1443 emit_operand(rax, dst);
1444 }
1446 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1447 InstructionMark im(this);
1448 assert((0 <= cc) && (cc < 16), "illegal cc");
1449 if (L.is_bound()) {
1450 address dst = target(L);
1451 assert(dst != NULL, "jcc most probably wrong");
1453 const int short_size = 2;
1454 const int long_size = 6;
1455 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1456 if (maybe_short && is8bit(offs - short_size)) {
1457 // 0111 tttn #8-bit disp
1458 emit_byte(0x70 | cc);
1459 emit_byte((offs - short_size) & 0xFF);
1460 } else {
1461 // 0000 1111 1000 tttn #32-bit disp
1462 assert(is_simm32(offs - long_size),
1463 "must be 32bit offset (call4)");
1464 emit_byte(0x0F);
1465 emit_byte(0x80 | cc);
1466 emit_long(offs - long_size);
1467 }
1468 } else {
1469 // Note: could eliminate cond. jumps to this jump if condition
1470 // is the same however, seems to be rather unlikely case.
1471 // Note: use jccb() if label to be bound is very close to get
1472 // an 8-bit displacement
1473 L.add_patch_at(code(), locator());
1474 emit_byte(0x0F);
1475 emit_byte(0x80 | cc);
1476 emit_long(0);
1477 }
1478 }
1480 void Assembler::jccb(Condition cc, Label& L) {
1481 if (L.is_bound()) {
1482 const int short_size = 2;
1483 address entry = target(L);
1484 #ifdef ASSERT
1485 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
1486 intptr_t delta = short_branch_delta();
1487 if (delta != 0) {
1488 dist += (dist < 0 ? (-delta) :delta);
1489 }
1490 assert(is8bit(dist), "Dispacement too large for a short jmp");
1491 #endif
1492 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1493 // 0111 tttn #8-bit disp
1494 emit_byte(0x70 | cc);
1495 emit_byte((offs - short_size) & 0xFF);
1496 } else {
1497 InstructionMark im(this);
1498 L.add_patch_at(code(), locator());
1499 emit_byte(0x70 | cc);
1500 emit_byte(0);
1501 }
1502 }
1504 void Assembler::jmp(Address adr) {
1505 InstructionMark im(this);
1506 prefix(adr);
1507 emit_byte(0xFF);
1508 emit_operand(rsp, adr);
1509 }
1511 void Assembler::jmp(Label& L, bool maybe_short) {
1512 if (L.is_bound()) {
1513 address entry = target(L);
1514 assert(entry != NULL, "jmp most probably wrong");
1515 InstructionMark im(this);
1516 const int short_size = 2;
1517 const int long_size = 5;
1518 intptr_t offs = entry - _code_pos;
1519 if (maybe_short && is8bit(offs - short_size)) {
1520 emit_byte(0xEB);
1521 emit_byte((offs - short_size) & 0xFF);
1522 } else {
1523 emit_byte(0xE9);
1524 emit_long(offs - long_size);
1525 }
1526 } else {
1527 // By default, forward jumps are always 32-bit displacements, since
1528 // we can't yet know where the label will be bound. If you're sure that
1529 // the forward jump will not run beyond 256 bytes, use jmpb to
1530 // force an 8-bit displacement.
1531 InstructionMark im(this);
1532 L.add_patch_at(code(), locator());
1533 emit_byte(0xE9);
1534 emit_long(0);
1535 }
1536 }
1538 void Assembler::jmp(Register entry) {
1539 int encode = prefix_and_encode(entry->encoding());
1540 emit_byte(0xFF);
1541 emit_byte(0xE0 | encode);
1542 }
1544 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1545 InstructionMark im(this);
1546 emit_byte(0xE9);
1547 assert(dest != NULL, "must have a target");
1548 intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1549 assert(is_simm32(disp), "must be 32bit offset (jmp)");
1550 emit_data(disp, rspec.reloc(), call32_operand);
1551 }
1553 void Assembler::jmpb(Label& L) {
1554 if (L.is_bound()) {
1555 const int short_size = 2;
1556 address entry = target(L);
1557 assert(entry != NULL, "jmp most probably wrong");
1558 #ifdef ASSERT
1559 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
1560 intptr_t delta = short_branch_delta();
1561 if (delta != 0) {
1562 dist += (dist < 0 ? (-delta) :delta);
1563 }
1564 assert(is8bit(dist), "Dispacement too large for a short jmp");
1565 #endif
1566 intptr_t offs = entry - _code_pos;
1567 emit_byte(0xEB);
1568 emit_byte((offs - short_size) & 0xFF);
1569 } else {
1570 InstructionMark im(this);
1571 L.add_patch_at(code(), locator());
1572 emit_byte(0xEB);
1573 emit_byte(0);
1574 }
1575 }
1577 void Assembler::ldmxcsr( Address src) {
1578 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1579 InstructionMark im(this);
1580 prefix(src);
1581 emit_byte(0x0F);
1582 emit_byte(0xAE);
1583 emit_operand(as_Register(2), src);
1584 }
1586 void Assembler::leal(Register dst, Address src) {
1587 InstructionMark im(this);
1588 #ifdef _LP64
1589 emit_byte(0x67); // addr32
1590 prefix(src, dst);
1591 #endif // LP64
1592 emit_byte(0x8D);
1593 emit_operand(dst, src);
1594 }
1596 void Assembler::lock() {
1597 if (Atomics & 1) {
1598 // Emit either nothing, a NOP, or a NOP: prefix
1599 emit_byte(0x90) ;
1600 } else {
1601 emit_byte(0xF0);
1602 }
1603 }
1605 void Assembler::lzcntl(Register dst, Register src) {
1606 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1607 emit_byte(0xF3);
1608 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1609 emit_byte(0x0F);
1610 emit_byte(0xBD);
1611 emit_byte(0xC0 | encode);
1612 }
1614 // Emit mfence instruction
1615 void Assembler::mfence() {
1616 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1617 emit_byte( 0x0F );
1618 emit_byte( 0xAE );
1619 emit_byte( 0xF0 );
1620 }
1622 void Assembler::mov(Register dst, Register src) {
1623 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1624 }
1626 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1627 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1628 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1629 emit_byte(0x28);
1630 emit_byte(0xC0 | encode);
1631 }
1633 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1634 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1635 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
1636 emit_byte(0x28);
1637 emit_byte(0xC0 | encode);
1638 }
1640 void Assembler::movb(Register dst, Address src) {
1641 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1642 InstructionMark im(this);
1643 prefix(src, dst, true);
1644 emit_byte(0x8A);
1645 emit_operand(dst, src);
1646 }
1649 void Assembler::movb(Address dst, int imm8) {
1650 InstructionMark im(this);
1651 prefix(dst);
1652 emit_byte(0xC6);
1653 emit_operand(rax, dst, 1);
1654 emit_byte(imm8);
1655 }
1658 void Assembler::movb(Address dst, Register src) {
1659 assert(src->has_byte_register(), "must have byte register");
1660 InstructionMark im(this);
1661 prefix(dst, src, true);
1662 emit_byte(0x88);
1663 emit_operand(src, dst);
1664 }
1666 void Assembler::movdl(XMMRegister dst, Register src) {
1667 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1668 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1669 emit_byte(0x6E);
1670 emit_byte(0xC0 | encode);
1671 }
1673 void Assembler::movdl(Register dst, XMMRegister src) {
1674 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1675 // swap src/dst to get correct prefix
1676 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1677 emit_byte(0x7E);
1678 emit_byte(0xC0 | encode);
1679 }
1681 void Assembler::movdl(XMMRegister dst, Address src) {
1682 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1683 InstructionMark im(this);
1684 simd_prefix(dst, src, VEX_SIMD_66);
1685 emit_byte(0x6E);
1686 emit_operand(dst, src);
1687 }
1689 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1690 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1691 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1692 emit_byte(0x6F);
1693 emit_byte(0xC0 | encode);
1694 }
1696 void Assembler::movdqu(XMMRegister dst, Address src) {
1697 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1698 InstructionMark im(this);
1699 simd_prefix(dst, src, VEX_SIMD_F3);
1700 emit_byte(0x6F);
1701 emit_operand(dst, src);
1702 }
1704 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1705 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1706 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1707 emit_byte(0x6F);
1708 emit_byte(0xC0 | encode);
1709 }
1711 void Assembler::movdqu(Address dst, XMMRegister src) {
1712 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1713 InstructionMark im(this);
1714 simd_prefix(dst, src, VEX_SIMD_F3);
1715 emit_byte(0x7F);
1716 emit_operand(src, dst);
1717 }
1719 // Uses zero extension on 64bit
1721 void Assembler::movl(Register dst, int32_t imm32) {
1722 int encode = prefix_and_encode(dst->encoding());
1723 emit_byte(0xB8 | encode);
1724 emit_long(imm32);
1725 }
1727 void Assembler::movl(Register dst, Register src) {
1728 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1729 emit_byte(0x8B);
1730 emit_byte(0xC0 | encode);
1731 }
1733 void Assembler::movl(Register dst, Address src) {
1734 InstructionMark im(this);
1735 prefix(src, dst);
1736 emit_byte(0x8B);
1737 emit_operand(dst, src);
1738 }
1740 void Assembler::movl(Address dst, int32_t imm32) {
1741 InstructionMark im(this);
1742 prefix(dst);
1743 emit_byte(0xC7);
1744 emit_operand(rax, dst, 4);
1745 emit_long(imm32);
1746 }
1748 void Assembler::movl(Address dst, Register src) {
1749 InstructionMark im(this);
1750 prefix(dst, src);
1751 emit_byte(0x89);
1752 emit_operand(src, dst);
1753 }
1755 // New cpus require to use movsd and movss to avoid partial register stall
1756 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1757 // The selection is done in MacroAssembler::movdbl() and movflt().
1758 void Assembler::movlpd(XMMRegister dst, Address src) {
1759 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1760 InstructionMark im(this);
1761 simd_prefix(dst, dst, src, VEX_SIMD_66);
1762 emit_byte(0x12);
1763 emit_operand(dst, src);
1764 }
1766 void Assembler::movq( MMXRegister dst, Address src ) {
1767 assert( VM_Version::supports_mmx(), "" );
1768 emit_byte(0x0F);
1769 emit_byte(0x6F);
1770 emit_operand(dst, src);
1771 }
1773 void Assembler::movq( Address dst, MMXRegister src ) {
1774 assert( VM_Version::supports_mmx(), "" );
1775 emit_byte(0x0F);
1776 emit_byte(0x7F);
1777 // workaround gcc (3.2.1-7a) bug
1778 // In that version of gcc with only an emit_operand(MMX, Address)
1779 // gcc will tail jump and try and reverse the parameters completely
1780 // obliterating dst in the process. By having a version available
1781 // that doesn't need to swap the args at the tail jump the bug is
1782 // avoided.
1783 emit_operand(dst, src);
1784 }
1786 void Assembler::movq(XMMRegister dst, Address src) {
1787 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1788 InstructionMark im(this);
1789 simd_prefix(dst, src, VEX_SIMD_F3);
1790 emit_byte(0x7E);
1791 emit_operand(dst, src);
1792 }
1794 void Assembler::movq(Address dst, XMMRegister src) {
1795 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1796 InstructionMark im(this);
1797 simd_prefix(dst, src, VEX_SIMD_66);
1798 emit_byte(0xD6);
1799 emit_operand(src, dst);
1800 }
1802 void Assembler::movsbl(Register dst, Address src) { // movsxb
1803 InstructionMark im(this);
1804 prefix(src, dst);
1805 emit_byte(0x0F);
1806 emit_byte(0xBE);
1807 emit_operand(dst, src);
1808 }
1810 void Assembler::movsbl(Register dst, Register src) { // movsxb
1811 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1812 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1813 emit_byte(0x0F);
1814 emit_byte(0xBE);
1815 emit_byte(0xC0 | encode);
1816 }
1818 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1819 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1820 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1821 emit_byte(0x10);
1822 emit_byte(0xC0 | encode);
1823 }
1825 void Assembler::movsd(XMMRegister dst, Address src) {
1826 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1827 InstructionMark im(this);
1828 simd_prefix(dst, src, VEX_SIMD_F2);
1829 emit_byte(0x10);
1830 emit_operand(dst, src);
1831 }
1833 void Assembler::movsd(Address dst, XMMRegister src) {
1834 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1835 InstructionMark im(this);
1836 simd_prefix(dst, src, VEX_SIMD_F2);
1837 emit_byte(0x11);
1838 emit_operand(src, dst);
1839 }
1841 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1842 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1843 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1844 emit_byte(0x10);
1845 emit_byte(0xC0 | encode);
1846 }
1848 void Assembler::movss(XMMRegister dst, Address src) {
1849 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1850 InstructionMark im(this);
1851 simd_prefix(dst, src, VEX_SIMD_F3);
1852 emit_byte(0x10);
1853 emit_operand(dst, src);
1854 }
1856 void Assembler::movss(Address dst, XMMRegister src) {
1857 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1858 InstructionMark im(this);
1859 simd_prefix(dst, src, VEX_SIMD_F3);
1860 emit_byte(0x11);
1861 emit_operand(src, dst);
1862 }
1864 void Assembler::movswl(Register dst, Address src) { // movsxw
1865 InstructionMark im(this);
1866 prefix(src, dst);
1867 emit_byte(0x0F);
1868 emit_byte(0xBF);
1869 emit_operand(dst, src);
1870 }
1872 void Assembler::movswl(Register dst, Register src) { // movsxw
1873 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1874 emit_byte(0x0F);
1875 emit_byte(0xBF);
1876 emit_byte(0xC0 | encode);
1877 }
1879 void Assembler::movw(Address dst, int imm16) {
1880 InstructionMark im(this);
1882 emit_byte(0x66); // switch to 16-bit mode
1883 prefix(dst);
1884 emit_byte(0xC7);
1885 emit_operand(rax, dst, 2);
1886 emit_word(imm16);
1887 }
1889 void Assembler::movw(Register dst, Address src) {
1890 InstructionMark im(this);
1891 emit_byte(0x66);
1892 prefix(src, dst);
1893 emit_byte(0x8B);
1894 emit_operand(dst, src);
1895 }
1897 void Assembler::movw(Address dst, Register src) {
1898 InstructionMark im(this);
1899 emit_byte(0x66);
1900 prefix(dst, src);
1901 emit_byte(0x89);
1902 emit_operand(src, dst);
1903 }
1905 void Assembler::movzbl(Register dst, Address src) { // movzxb
1906 InstructionMark im(this);
1907 prefix(src, dst);
1908 emit_byte(0x0F);
1909 emit_byte(0xB6);
1910 emit_operand(dst, src);
1911 }
1913 void Assembler::movzbl(Register dst, Register src) { // movzxb
1914 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1915 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1916 emit_byte(0x0F);
1917 emit_byte(0xB6);
1918 emit_byte(0xC0 | encode);
1919 }
1921 void Assembler::movzwl(Register dst, Address src) { // movzxw
1922 InstructionMark im(this);
1923 prefix(src, dst);
1924 emit_byte(0x0F);
1925 emit_byte(0xB7);
1926 emit_operand(dst, src);
1927 }
1929 void Assembler::movzwl(Register dst, Register src) { // movzxw
1930 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1931 emit_byte(0x0F);
1932 emit_byte(0xB7);
1933 emit_byte(0xC0 | encode);
1934 }
1936 void Assembler::mull(Address src) {
1937 InstructionMark im(this);
1938 prefix(src);
1939 emit_byte(0xF7);
1940 emit_operand(rsp, src);
1941 }
1943 void Assembler::mull(Register src) {
1944 int encode = prefix_and_encode(src->encoding());
1945 emit_byte(0xF7);
1946 emit_byte(0xE0 | encode);
1947 }
1949 void Assembler::mulsd(XMMRegister dst, Address src) {
1950 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1951 InstructionMark im(this);
1952 simd_prefix(dst, dst, src, VEX_SIMD_F2);
1953 emit_byte(0x59);
1954 emit_operand(dst, src);
1955 }
1957 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1958 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1959 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1960 emit_byte(0x59);
1961 emit_byte(0xC0 | encode);
1962 }
1964 void Assembler::mulss(XMMRegister dst, Address src) {
1965 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1966 InstructionMark im(this);
1967 simd_prefix(dst, dst, src, VEX_SIMD_F3);
1968 emit_byte(0x59);
1969 emit_operand(dst, src);
1970 }
1972 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1973 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1974 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1975 emit_byte(0x59);
1976 emit_byte(0xC0 | encode);
1977 }
1979 void Assembler::negl(Register dst) {
1980 int encode = prefix_and_encode(dst->encoding());
1981 emit_byte(0xF7);
1982 emit_byte(0xD8 | encode);
1983 }
1985 void Assembler::nop(int i) {
1986 #ifdef ASSERT
1987 assert(i > 0, " ");
1988 // The fancy nops aren't currently recognized by debuggers making it a
1989 // pain to disassemble code while debugging. If asserts are on clearly
1990 // speed is not an issue so simply use the single byte traditional nop
1991 // to do alignment.
1993 for (; i > 0 ; i--) emit_byte(0x90);
1994 return;
1996 #endif // ASSERT
1998 if (UseAddressNop && VM_Version::is_intel()) {
1999 //
2000 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2001 // 1: 0x90
2002 // 2: 0x66 0x90
2003 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2004 // 4: 0x0F 0x1F 0x40 0x00
2005 // 5: 0x0F 0x1F 0x44 0x00 0x00
2006 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2007 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2008 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2009 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2010 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2011 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2013 // The rest coding is Intel specific - don't use consecutive address nops
2015 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2016 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2017 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2018 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2020 while(i >= 15) {
2021 // For Intel don't generate consecutive addess nops (mix with regular nops)
2022 i -= 15;
2023 emit_byte(0x66); // size prefix
2024 emit_byte(0x66); // size prefix
2025 emit_byte(0x66); // size prefix
2026 addr_nop_8();
2027 emit_byte(0x66); // size prefix
2028 emit_byte(0x66); // size prefix
2029 emit_byte(0x66); // size prefix
2030 emit_byte(0x90); // nop
2031 }
2032 switch (i) {
2033 case 14:
2034 emit_byte(0x66); // size prefix
2035 case 13:
2036 emit_byte(0x66); // size prefix
2037 case 12:
2038 addr_nop_8();
2039 emit_byte(0x66); // size prefix
2040 emit_byte(0x66); // size prefix
2041 emit_byte(0x66); // size prefix
2042 emit_byte(0x90); // nop
2043 break;
2044 case 11:
2045 emit_byte(0x66); // size prefix
2046 case 10:
2047 emit_byte(0x66); // size prefix
2048 case 9:
2049 emit_byte(0x66); // size prefix
2050 case 8:
2051 addr_nop_8();
2052 break;
2053 case 7:
2054 addr_nop_7();
2055 break;
2056 case 6:
2057 emit_byte(0x66); // size prefix
2058 case 5:
2059 addr_nop_5();
2060 break;
2061 case 4:
2062 addr_nop_4();
2063 break;
2064 case 3:
2065 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2066 emit_byte(0x66); // size prefix
2067 case 2:
2068 emit_byte(0x66); // size prefix
2069 case 1:
2070 emit_byte(0x90); // nop
2071 break;
2072 default:
2073 assert(i == 0, " ");
2074 }
2075 return;
2076 }
2077 if (UseAddressNop && VM_Version::is_amd()) {
2078 //
2079 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2080 // 1: 0x90
2081 // 2: 0x66 0x90
2082 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2083 // 4: 0x0F 0x1F 0x40 0x00
2084 // 5: 0x0F 0x1F 0x44 0x00 0x00
2085 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2086 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2087 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2088 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2089 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2090 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2092 // The rest coding is AMD specific - use consecutive address nops
2094 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2095 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2096 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2097 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2098 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2099 // Size prefixes (0x66) are added for larger sizes
2101 while(i >= 22) {
2102 i -= 11;
2103 emit_byte(0x66); // size prefix
2104 emit_byte(0x66); // size prefix
2105 emit_byte(0x66); // size prefix
2106 addr_nop_8();
2107 }
2108 // Generate first nop for size between 21-12
2109 switch (i) {
2110 case 21:
2111 i -= 1;
2112 emit_byte(0x66); // size prefix
2113 case 20:
2114 case 19:
2115 i -= 1;
2116 emit_byte(0x66); // size prefix
2117 case 18:
2118 case 17:
2119 i -= 1;
2120 emit_byte(0x66); // size prefix
2121 case 16:
2122 case 15:
2123 i -= 8;
2124 addr_nop_8();
2125 break;
2126 case 14:
2127 case 13:
2128 i -= 7;
2129 addr_nop_7();
2130 break;
2131 case 12:
2132 i -= 6;
2133 emit_byte(0x66); // size prefix
2134 addr_nop_5();
2135 break;
2136 default:
2137 assert(i < 12, " ");
2138 }
2140 // Generate second nop for size between 11-1
2141 switch (i) {
2142 case 11:
2143 emit_byte(0x66); // size prefix
2144 case 10:
2145 emit_byte(0x66); // size prefix
2146 case 9:
2147 emit_byte(0x66); // size prefix
2148 case 8:
2149 addr_nop_8();
2150 break;
2151 case 7:
2152 addr_nop_7();
2153 break;
2154 case 6:
2155 emit_byte(0x66); // size prefix
2156 case 5:
2157 addr_nop_5();
2158 break;
2159 case 4:
2160 addr_nop_4();
2161 break;
2162 case 3:
2163 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2164 emit_byte(0x66); // size prefix
2165 case 2:
2166 emit_byte(0x66); // size prefix
2167 case 1:
2168 emit_byte(0x90); // nop
2169 break;
2170 default:
2171 assert(i == 0, " ");
2172 }
2173 return;
2174 }
2176 // Using nops with size prefixes "0x66 0x90".
2177 // From AMD Optimization Guide:
2178 // 1: 0x90
2179 // 2: 0x66 0x90
2180 // 3: 0x66 0x66 0x90
2181 // 4: 0x66 0x66 0x66 0x90
2182 // 5: 0x66 0x66 0x90 0x66 0x90
2183 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2184 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2185 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2186 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2187 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2188 //
2189 while(i > 12) {
2190 i -= 4;
2191 emit_byte(0x66); // size prefix
2192 emit_byte(0x66);
2193 emit_byte(0x66);
2194 emit_byte(0x90); // nop
2195 }
2196 // 1 - 12 nops
2197 if(i > 8) {
2198 if(i > 9) {
2199 i -= 1;
2200 emit_byte(0x66);
2201 }
2202 i -= 3;
2203 emit_byte(0x66);
2204 emit_byte(0x66);
2205 emit_byte(0x90);
2206 }
2207 // 1 - 8 nops
2208 if(i > 4) {
2209 if(i > 6) {
2210 i -= 1;
2211 emit_byte(0x66);
2212 }
2213 i -= 3;
2214 emit_byte(0x66);
2215 emit_byte(0x66);
2216 emit_byte(0x90);
2217 }
2218 switch (i) {
2219 case 4:
2220 emit_byte(0x66);
2221 case 3:
2222 emit_byte(0x66);
2223 case 2:
2224 emit_byte(0x66);
2225 case 1:
2226 emit_byte(0x90);
2227 break;
2228 default:
2229 assert(i == 0, " ");
2230 }
2231 }
2233 void Assembler::notl(Register dst) {
2234 int encode = prefix_and_encode(dst->encoding());
2235 emit_byte(0xF7);
2236 emit_byte(0xD0 | encode );
2237 }
2239 void Assembler::orl(Address dst, int32_t imm32) {
2240 InstructionMark im(this);
2241 prefix(dst);
2242 emit_arith_operand(0x81, rcx, dst, imm32);
2243 }
2245 void Assembler::orl(Register dst, int32_t imm32) {
2246 prefix(dst);
2247 emit_arith(0x81, 0xC8, dst, imm32);
2248 }
2250 void Assembler::orl(Register dst, Address src) {
2251 InstructionMark im(this);
2252 prefix(src, dst);
2253 emit_byte(0x0B);
2254 emit_operand(dst, src);
2255 }
2257 void Assembler::orl(Register dst, Register src) {
2258 (void) prefix_and_encode(dst->encoding(), src->encoding());
2259 emit_arith(0x0B, 0xC0, dst, src);
2260 }
2262 void Assembler::packuswb(XMMRegister dst, Address src) {
2263 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2264 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2265 InstructionMark im(this);
2266 simd_prefix(dst, dst, src, VEX_SIMD_66);
2267 emit_byte(0x67);
2268 emit_operand(dst, src);
2269 }
2271 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2272 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2273 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
2274 emit_byte(0x67);
2275 emit_byte(0xC0 | encode);
2276 }
2278 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2279 assert(VM_Version::supports_sse4_2(), "");
2280 InstructionMark im(this);
2281 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2282 emit_byte(0x61);
2283 emit_operand(dst, src);
2284 emit_byte(imm8);
2285 }
2287 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2288 assert(VM_Version::supports_sse4_2(), "");
2289 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2290 emit_byte(0x61);
2291 emit_byte(0xC0 | encode);
2292 emit_byte(imm8);
2293 }
2295 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2296 assert(VM_Version::supports_sse4_1(), "");
2297 InstructionMark im(this);
2298 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2299 emit_byte(0x30);
2300 emit_operand(dst, src);
2301 }
2303 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2304 assert(VM_Version::supports_sse4_1(), "");
2305 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2306 emit_byte(0x30);
2307 emit_byte(0xC0 | encode);
2308 }
2310 // generic
2311 void Assembler::pop(Register dst) {
2312 int encode = prefix_and_encode(dst->encoding());
2313 emit_byte(0x58 | encode);
2314 }
2316 void Assembler::popcntl(Register dst, Address src) {
2317 assert(VM_Version::supports_popcnt(), "must support");
2318 InstructionMark im(this);
2319 emit_byte(0xF3);
2320 prefix(src, dst);
2321 emit_byte(0x0F);
2322 emit_byte(0xB8);
2323 emit_operand(dst, src);
2324 }
2326 void Assembler::popcntl(Register dst, Register src) {
2327 assert(VM_Version::supports_popcnt(), "must support");
2328 emit_byte(0xF3);
2329 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2330 emit_byte(0x0F);
2331 emit_byte(0xB8);
2332 emit_byte(0xC0 | encode);
2333 }
2335 void Assembler::popf() {
2336 emit_byte(0x9D);
2337 }
2339 #ifndef _LP64 // no 32bit push/pop on amd64
2340 void Assembler::popl(Address dst) {
2341 // NOTE: this will adjust stack by 8byte on 64bits
2342 InstructionMark im(this);
2343 prefix(dst);
2344 emit_byte(0x8F);
2345 emit_operand(rax, dst);
2346 }
2347 #endif
2349 void Assembler::prefetch_prefix(Address src) {
2350 prefix(src);
2351 emit_byte(0x0F);
2352 }
2354 void Assembler::prefetchnta(Address src) {
2355 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2356 InstructionMark im(this);
2357 prefetch_prefix(src);
2358 emit_byte(0x18);
2359 emit_operand(rax, src); // 0, src
2360 }
2362 void Assembler::prefetchr(Address src) {
2363 assert(VM_Version::supports_3dnow_prefetch(), "must support");
2364 InstructionMark im(this);
2365 prefetch_prefix(src);
2366 emit_byte(0x0D);
2367 emit_operand(rax, src); // 0, src
2368 }
2370 void Assembler::prefetcht0(Address src) {
2371 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2372 InstructionMark im(this);
2373 prefetch_prefix(src);
2374 emit_byte(0x18);
2375 emit_operand(rcx, src); // 1, src
2376 }
2378 void Assembler::prefetcht1(Address src) {
2379 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2380 InstructionMark im(this);
2381 prefetch_prefix(src);
2382 emit_byte(0x18);
2383 emit_operand(rdx, src); // 2, src
2384 }
2386 void Assembler::prefetcht2(Address src) {
2387 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2388 InstructionMark im(this);
2389 prefetch_prefix(src);
2390 emit_byte(0x18);
2391 emit_operand(rbx, src); // 3, src
2392 }
2394 void Assembler::prefetchw(Address src) {
2395 assert(VM_Version::supports_3dnow_prefetch(), "must support");
2396 InstructionMark im(this);
2397 prefetch_prefix(src);
2398 emit_byte(0x0D);
2399 emit_operand(rcx, src); // 1, src
2400 }
2402 void Assembler::prefix(Prefix p) {
2403 a_byte(p);
2404 }
2406 void Assembler::por(XMMRegister dst, XMMRegister src) {
2407 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2408 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
2409 emit_byte(0xEB);
2410 emit_byte(0xC0 | encode);
2411 }
2413 void Assembler::por(XMMRegister dst, Address src) {
2414 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2415 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2416 InstructionMark im(this);
2417 simd_prefix(dst, dst, src, VEX_SIMD_66);
2418 emit_byte(0xEB);
2419 emit_operand(dst, src);
2420 }
2422 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2423 assert(isByte(mode), "invalid value");
2424 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2425 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
2426 emit_byte(0x70);
2427 emit_byte(0xC0 | encode);
2428 emit_byte(mode & 0xFF);
2430 }
2432 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2433 assert(isByte(mode), "invalid value");
2434 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2435 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2436 InstructionMark im(this);
2437 simd_prefix(dst, src, VEX_SIMD_66);
2438 emit_byte(0x70);
2439 emit_operand(dst, src);
2440 emit_byte(mode & 0xFF);
2441 }
2443 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2444 assert(isByte(mode), "invalid value");
2445 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2446 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
2447 emit_byte(0x70);
2448 emit_byte(0xC0 | encode);
2449 emit_byte(mode & 0xFF);
2450 }
2452 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2453 assert(isByte(mode), "invalid value");
2454 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2455 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2456 InstructionMark im(this);
2457 simd_prefix(dst, src, VEX_SIMD_F2);
2458 emit_byte(0x70);
2459 emit_operand(dst, src);
2460 emit_byte(mode & 0xFF);
2461 }
2463 void Assembler::psrlq(XMMRegister dst, int shift) {
2464 // Shift 64 bit value logically right by specified number of bits.
2465 // HMM Table D-1 says sse2 or mmx.
2466 // Do not confuse it with psrldq SSE2 instruction which
2467 // shifts 128 bit value in xmm register by number of bytes.
2468 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2469 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
2470 emit_byte(0x73);
2471 emit_byte(0xC0 | encode);
2472 emit_byte(shift);
2473 }
2475 void Assembler::psrldq(XMMRegister dst, int shift) {
2476 // Shift 128 bit value in xmm register by number of bytes.
2477 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2478 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2479 emit_byte(0x73);
2480 emit_byte(0xC0 | encode);
2481 emit_byte(shift);
2482 }
2484 void Assembler::ptest(XMMRegister dst, Address src) {
2485 assert(VM_Version::supports_sse4_1(), "");
2486 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2487 InstructionMark im(this);
2488 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2489 emit_byte(0x17);
2490 emit_operand(dst, src);
2491 }
2493 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2494 assert(VM_Version::supports_sse4_1(), "");
2495 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2496 emit_byte(0x17);
2497 emit_byte(0xC0 | encode);
2498 }
2500 void Assembler::punpcklbw(XMMRegister dst, Address src) {
2501 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2502 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2503 InstructionMark im(this);
2504 simd_prefix(dst, dst, src, VEX_SIMD_66);
2505 emit_byte(0x60);
2506 emit_operand(dst, src);
2507 }
2509 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2510 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2511 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
2512 emit_byte(0x60);
2513 emit_byte(0xC0 | encode);
2514 }
2516 void Assembler::punpckldq(XMMRegister dst, Address src) {
2517 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2518 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2519 InstructionMark im(this);
2520 simd_prefix(dst, dst, src, VEX_SIMD_66);
2521 emit_byte(0x62);
2522 emit_operand(dst, src);
2523 }
2525 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2526 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2527 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
2528 emit_byte(0x62);
2529 emit_byte(0xC0 | encode);
2530 }
2532 void Assembler::push(int32_t imm32) {
2533 // in 64bits we push 64bits onto the stack but only
2534 // take a 32bit immediate
2535 emit_byte(0x68);
2536 emit_long(imm32);
2537 }
2539 void Assembler::push(Register src) {
2540 int encode = prefix_and_encode(src->encoding());
2542 emit_byte(0x50 | encode);
2543 }
2545 void Assembler::pushf() {
2546 emit_byte(0x9C);
2547 }
2549 #ifndef _LP64 // no 32bit push/pop on amd64
2550 void Assembler::pushl(Address src) {
2551 // Note this will push 64bit on 64bit
2552 InstructionMark im(this);
2553 prefix(src);
2554 emit_byte(0xFF);
2555 emit_operand(rsi, src);
2556 }
2557 #endif
2559 void Assembler::pxor(XMMRegister dst, Address src) {
2560 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2561 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2562 InstructionMark im(this);
2563 simd_prefix(dst, dst, src, VEX_SIMD_66);
2564 emit_byte(0xEF);
2565 emit_operand(dst, src);
2566 }
2568 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2569 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2570 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
2571 emit_byte(0xEF);
2572 emit_byte(0xC0 | encode);
2573 }
2575 void Assembler::rcll(Register dst, int imm8) {
2576 assert(isShiftCount(imm8), "illegal shift count");
2577 int encode = prefix_and_encode(dst->encoding());
2578 if (imm8 == 1) {
2579 emit_byte(0xD1);
2580 emit_byte(0xD0 | encode);
2581 } else {
2582 emit_byte(0xC1);
2583 emit_byte(0xD0 | encode);
2584 emit_byte(imm8);
2585 }
2586 }
2588 // copies data from [esi] to [edi] using rcx pointer sized words
2589 // generic
2590 void Assembler::rep_mov() {
2591 emit_byte(0xF3);
2592 // MOVSQ
2593 LP64_ONLY(prefix(REX_W));
2594 emit_byte(0xA5);
2595 }
2597 // sets rcx pointer sized words with rax, value at [edi]
2598 // generic
2599 void Assembler::rep_set() { // rep_set
2600 emit_byte(0xF3);
2601 // STOSQ
2602 LP64_ONLY(prefix(REX_W));
2603 emit_byte(0xAB);
2604 }
2606 // scans rcx pointer sized words at [edi] for occurance of rax,
2607 // generic
2608 void Assembler::repne_scan() { // repne_scan
2609 emit_byte(0xF2);
2610 // SCASQ
2611 LP64_ONLY(prefix(REX_W));
2612 emit_byte(0xAF);
2613 }
2615 #ifdef _LP64
2616 // scans rcx 4 byte words at [edi] for occurance of rax,
2617 // generic
2618 void Assembler::repne_scanl() { // repne_scan
2619 emit_byte(0xF2);
2620 // SCASL
2621 emit_byte(0xAF);
2622 }
2623 #endif
2625 void Assembler::ret(int imm16) {
2626 if (imm16 == 0) {
2627 emit_byte(0xC3);
2628 } else {
2629 emit_byte(0xC2);
2630 emit_word(imm16);
2631 }
2632 }
2634 void Assembler::sahf() {
2635 #ifdef _LP64
2636 // Not supported in 64bit mode
2637 ShouldNotReachHere();
2638 #endif
2639 emit_byte(0x9E);
2640 }
2642 void Assembler::sarl(Register dst, int imm8) {
2643 int encode = prefix_and_encode(dst->encoding());
2644 assert(isShiftCount(imm8), "illegal shift count");
2645 if (imm8 == 1) {
2646 emit_byte(0xD1);
2647 emit_byte(0xF8 | encode);
2648 } else {
2649 emit_byte(0xC1);
2650 emit_byte(0xF8 | encode);
2651 emit_byte(imm8);
2652 }
2653 }
2655 void Assembler::sarl(Register dst) {
2656 int encode = prefix_and_encode(dst->encoding());
2657 emit_byte(0xD3);
2658 emit_byte(0xF8 | encode);
2659 }
2661 void Assembler::sbbl(Address dst, int32_t imm32) {
2662 InstructionMark im(this);
2663 prefix(dst);
2664 emit_arith_operand(0x81, rbx, dst, imm32);
2665 }
2667 void Assembler::sbbl(Register dst, int32_t imm32) {
2668 prefix(dst);
2669 emit_arith(0x81, 0xD8, dst, imm32);
2670 }
2673 void Assembler::sbbl(Register dst, Address src) {
2674 InstructionMark im(this);
2675 prefix(src, dst);
2676 emit_byte(0x1B);
2677 emit_operand(dst, src);
2678 }
2680 void Assembler::sbbl(Register dst, Register src) {
2681 (void) prefix_and_encode(dst->encoding(), src->encoding());
2682 emit_arith(0x1B, 0xC0, dst, src);
2683 }
2685 void Assembler::setb(Condition cc, Register dst) {
2686 assert(0 <= cc && cc < 16, "illegal cc");
2687 int encode = prefix_and_encode(dst->encoding(), true);
2688 emit_byte(0x0F);
2689 emit_byte(0x90 | cc);
2690 emit_byte(0xC0 | encode);
2691 }
2693 void Assembler::shll(Register dst, int imm8) {
2694 assert(isShiftCount(imm8), "illegal shift count");
2695 int encode = prefix_and_encode(dst->encoding());
2696 if (imm8 == 1 ) {
2697 emit_byte(0xD1);
2698 emit_byte(0xE0 | encode);
2699 } else {
2700 emit_byte(0xC1);
2701 emit_byte(0xE0 | encode);
2702 emit_byte(imm8);
2703 }
2704 }
2706 void Assembler::shll(Register dst) {
2707 int encode = prefix_and_encode(dst->encoding());
2708 emit_byte(0xD3);
2709 emit_byte(0xE0 | encode);
2710 }
2712 void Assembler::shrl(Register dst, int imm8) {
2713 assert(isShiftCount(imm8), "illegal shift count");
2714 int encode = prefix_and_encode(dst->encoding());
2715 emit_byte(0xC1);
2716 emit_byte(0xE8 | encode);
2717 emit_byte(imm8);
2718 }
2720 void Assembler::shrl(Register dst) {
2721 int encode = prefix_and_encode(dst->encoding());
2722 emit_byte(0xD3);
2723 emit_byte(0xE8 | encode);
2724 }
2726 // copies a single word from [esi] to [edi]
2727 void Assembler::smovl() {
2728 emit_byte(0xA5);
2729 }
2731 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2732 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2733 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
2734 emit_byte(0x51);
2735 emit_byte(0xC0 | encode);
2736 }
2738 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2739 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2740 InstructionMark im(this);
2741 simd_prefix(dst, dst, src, VEX_SIMD_F2);
2742 emit_byte(0x51);
2743 emit_operand(dst, src);
2744 }
2746 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2747 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2748 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
2749 emit_byte(0x51);
2750 emit_byte(0xC0 | encode);
2751 }
2753 void Assembler::sqrtss(XMMRegister dst, Address src) {
2754 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2755 InstructionMark im(this);
2756 simd_prefix(dst, dst, src, VEX_SIMD_F3);
2757 emit_byte(0x51);
2758 emit_operand(dst, src);
2759 }
2761 void Assembler::stmxcsr( Address dst) {
2762 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2763 InstructionMark im(this);
2764 prefix(dst);
2765 emit_byte(0x0F);
2766 emit_byte(0xAE);
2767 emit_operand(as_Register(3), dst);
2768 }
2770 void Assembler::subl(Address dst, int32_t imm32) {
2771 InstructionMark im(this);
2772 prefix(dst);
2773 emit_arith_operand(0x81, rbp, dst, imm32);
2774 }
2776 void Assembler::subl(Address dst, Register src) {
2777 InstructionMark im(this);
2778 prefix(dst, src);
2779 emit_byte(0x29);
2780 emit_operand(src, dst);
2781 }
2783 void Assembler::subl(Register dst, int32_t imm32) {
2784 prefix(dst);
2785 emit_arith(0x81, 0xE8, dst, imm32);
2786 }
2788 // Force generation of a 4 byte immediate value even if it fits into 8bit
2789 void Assembler::subl_imm32(Register dst, int32_t imm32) {
2790 prefix(dst);
2791 emit_arith_imm32(0x81, 0xE8, dst, imm32);
2792 }
2794 void Assembler::subl(Register dst, Address src) {
2795 InstructionMark im(this);
2796 prefix(src, dst);
2797 emit_byte(0x2B);
2798 emit_operand(dst, src);
2799 }
2801 void Assembler::subl(Register dst, Register src) {
2802 (void) prefix_and_encode(dst->encoding(), src->encoding());
2803 emit_arith(0x2B, 0xC0, dst, src);
2804 }
2806 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2807 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2808 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
2809 emit_byte(0x5C);
2810 emit_byte(0xC0 | encode);
2811 }
2813 void Assembler::subsd(XMMRegister dst, Address src) {
2814 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2815 InstructionMark im(this);
2816 simd_prefix(dst, dst, src, VEX_SIMD_F2);
2817 emit_byte(0x5C);
2818 emit_operand(dst, src);
2819 }
2821 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2822 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2823 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
2824 emit_byte(0x5C);
2825 emit_byte(0xC0 | encode);
2826 }
2828 void Assembler::subss(XMMRegister dst, Address src) {
2829 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2830 InstructionMark im(this);
2831 simd_prefix(dst, dst, src, VEX_SIMD_F3);
2832 emit_byte(0x5C);
2833 emit_operand(dst, src);
2834 }
2836 void Assembler::testb(Register dst, int imm8) {
2837 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2838 (void) prefix_and_encode(dst->encoding(), true);
2839 emit_arith_b(0xF6, 0xC0, dst, imm8);
2840 }
2842 void Assembler::testl(Register dst, int32_t imm32) {
2843 // not using emit_arith because test
2844 // doesn't support sign-extension of
2845 // 8bit operands
2846 int encode = dst->encoding();
2847 if (encode == 0) {
2848 emit_byte(0xA9);
2849 } else {
2850 encode = prefix_and_encode(encode);
2851 emit_byte(0xF7);
2852 emit_byte(0xC0 | encode);
2853 }
2854 emit_long(imm32);
2855 }
2857 void Assembler::testl(Register dst, Register src) {
2858 (void) prefix_and_encode(dst->encoding(), src->encoding());
2859 emit_arith(0x85, 0xC0, dst, src);
2860 }
2862 void Assembler::testl(Register dst, Address src) {
2863 InstructionMark im(this);
2864 prefix(src, dst);
2865 emit_byte(0x85);
2866 emit_operand(dst, src);
2867 }
2869 void Assembler::ucomisd(XMMRegister dst, Address src) {
2870 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2871 InstructionMark im(this);
2872 simd_prefix(dst, src, VEX_SIMD_66);
2873 emit_byte(0x2E);
2874 emit_operand(dst, src);
2875 }
2877 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2878 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2879 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
2880 emit_byte(0x2E);
2881 emit_byte(0xC0 | encode);
2882 }
2884 void Assembler::ucomiss(XMMRegister dst, Address src) {
2885 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2886 InstructionMark im(this);
2887 simd_prefix(dst, src, VEX_SIMD_NONE);
2888 emit_byte(0x2E);
2889 emit_operand(dst, src);
2890 }
2892 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2893 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2894 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
2895 emit_byte(0x2E);
2896 emit_byte(0xC0 | encode);
2897 }
2900 void Assembler::xaddl(Address dst, Register src) {
2901 InstructionMark im(this);
2902 prefix(dst, src);
2903 emit_byte(0x0F);
2904 emit_byte(0xC1);
2905 emit_operand(src, dst);
2906 }
2908 void Assembler::xchgl(Register dst, Address src) { // xchg
2909 InstructionMark im(this);
2910 prefix(src, dst);
2911 emit_byte(0x87);
2912 emit_operand(dst, src);
2913 }
2915 void Assembler::xchgl(Register dst, Register src) {
2916 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2917 emit_byte(0x87);
2918 emit_byte(0xc0 | encode);
2919 }
2921 void Assembler::xorl(Register dst, int32_t imm32) {
2922 prefix(dst);
2923 emit_arith(0x81, 0xF0, dst, imm32);
2924 }
2926 void Assembler::xorl(Register dst, Address src) {
2927 InstructionMark im(this);
2928 prefix(src, dst);
2929 emit_byte(0x33);
2930 emit_operand(dst, src);
2931 }
2933 void Assembler::xorl(Register dst, Register src) {
2934 (void) prefix_and_encode(dst->encoding(), src->encoding());
2935 emit_arith(0x33, 0xC0, dst, src);
2936 }
2938 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2939 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2940 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
2941 emit_byte(0x57);
2942 emit_byte(0xC0 | encode);
2943 }
2945 void Assembler::xorpd(XMMRegister dst, Address src) {
2946 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2947 InstructionMark im(this);
2948 simd_prefix(dst, dst, src, VEX_SIMD_66);
2949 emit_byte(0x57);
2950 emit_operand(dst, src);
2951 }
2954 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2955 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2956 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
2957 emit_byte(0x57);
2958 emit_byte(0xC0 | encode);
2959 }
2961 void Assembler::xorps(XMMRegister dst, Address src) {
2962 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2963 InstructionMark im(this);
2964 simd_prefix(dst, dst, src, VEX_SIMD_NONE);
2965 emit_byte(0x57);
2966 emit_operand(dst, src);
2967 }
2969 // AVX 3-operands non destructive source instructions (encoded with VEX prefix)
2971 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
2972 assert(VM_Version::supports_avx(), "");
2973 InstructionMark im(this);
2974 vex_prefix(dst, nds, src, VEX_SIMD_F2);
2975 emit_byte(0x58);
2976 emit_operand(dst, src);
2977 }
2979 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2980 assert(VM_Version::supports_avx(), "");
2981 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
2982 emit_byte(0x58);
2983 emit_byte(0xC0 | encode);
2984 }
2986 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
2987 assert(VM_Version::supports_avx(), "");
2988 InstructionMark im(this);
2989 vex_prefix(dst, nds, src, VEX_SIMD_F3);
2990 emit_byte(0x58);
2991 emit_operand(dst, src);
2992 }
2994 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2995 assert(VM_Version::supports_avx(), "");
2996 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
2997 emit_byte(0x58);
2998 emit_byte(0xC0 | encode);
2999 }
3001 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
3002 assert(VM_Version::supports_avx(), "");
3003 InstructionMark im(this);
3004 vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
3005 emit_byte(0x54);
3006 emit_operand(dst, src);
3007 }
3009 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
3010 assert(VM_Version::supports_avx(), "");
3011 InstructionMark im(this);
3012 vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
3013 emit_byte(0x54);
3014 emit_operand(dst, src);
3015 }
3017 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3018 assert(VM_Version::supports_avx(), "");
3019 InstructionMark im(this);
3020 vex_prefix(dst, nds, src, VEX_SIMD_F2);
3021 emit_byte(0x5E);
3022 emit_operand(dst, src);
3023 }
3025 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3026 assert(VM_Version::supports_avx(), "");
3027 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
3028 emit_byte(0x5E);
3029 emit_byte(0xC0 | encode);
3030 }
3032 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3033 assert(VM_Version::supports_avx(), "");
3034 InstructionMark im(this);
3035 vex_prefix(dst, nds, src, VEX_SIMD_F3);
3036 emit_byte(0x5E);
3037 emit_operand(dst, src);
3038 }
3040 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3041 assert(VM_Version::supports_avx(), "");
3042 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
3043 emit_byte(0x5E);
3044 emit_byte(0xC0 | encode);
3045 }
3047 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3048 assert(VM_Version::supports_avx(), "");
3049 InstructionMark im(this);
3050 vex_prefix(dst, nds, src, VEX_SIMD_F2);
3051 emit_byte(0x59);
3052 emit_operand(dst, src);
3053 }
3055 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3056 assert(VM_Version::supports_avx(), "");
3057 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
3058 emit_byte(0x59);
3059 emit_byte(0xC0 | encode);
3060 }
3062 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3063 InstructionMark im(this);
3064 vex_prefix(dst, nds, src, VEX_SIMD_F3);
3065 emit_byte(0x59);
3066 emit_operand(dst, src);
3067 }
3069 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3070 assert(VM_Version::supports_avx(), "");
3071 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
3072 emit_byte(0x59);
3073 emit_byte(0xC0 | encode);
3074 }
3077 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3078 assert(VM_Version::supports_avx(), "");
3079 InstructionMark im(this);
3080 vex_prefix(dst, nds, src, VEX_SIMD_F2);
3081 emit_byte(0x5C);
3082 emit_operand(dst, src);
3083 }
3085 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3086 assert(VM_Version::supports_avx(), "");
3087 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
3088 emit_byte(0x5C);
3089 emit_byte(0xC0 | encode);
3090 }
3092 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3093 assert(VM_Version::supports_avx(), "");
3094 InstructionMark im(this);
3095 vex_prefix(dst, nds, src, VEX_SIMD_F3);
3096 emit_byte(0x5C);
3097 emit_operand(dst, src);
3098 }
3100 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3101 assert(VM_Version::supports_avx(), "");
3102 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
3103 emit_byte(0x5C);
3104 emit_byte(0xC0 | encode);
3105 }
3107 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
3108 assert(VM_Version::supports_avx(), "");
3109 InstructionMark im(this);
3110 vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
3111 emit_byte(0x57);
3112 emit_operand(dst, src);
3113 }
3115 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
3116 assert(VM_Version::supports_avx(), "");
3117 InstructionMark im(this);
3118 vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
3119 emit_byte(0x57);
3120 emit_operand(dst, src);
3121 }
3124 #ifndef _LP64
3125 // 32bit only pieces of the assembler
3127 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3128 // NO PREFIX AS NEVER 64BIT
3129 InstructionMark im(this);
3130 emit_byte(0x81);
3131 emit_byte(0xF8 | src1->encoding());
3132 emit_data(imm32, rspec, 0);
3133 }
3135 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3136 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3137 InstructionMark im(this);
3138 emit_byte(0x81);
3139 emit_operand(rdi, src1);
3140 emit_data(imm32, rspec, 0);
3141 }
3143 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
3144 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
3145 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise.
3146 void Assembler::cmpxchg8(Address adr) {
3147 InstructionMark im(this);
3148 emit_byte(0x0F);
3149 emit_byte(0xc7);
3150 emit_operand(rcx, adr);
3151 }
3153 void Assembler::decl(Register dst) {
3154 // Don't use it directly. Use MacroAssembler::decrementl() instead.
3155 emit_byte(0x48 | dst->encoding());
3156 }
3158 #endif // _LP64
3160 // 64bit typically doesn't use the x87 but needs to for the trig funcs
3162 void Assembler::fabs() {
3163 emit_byte(0xD9);
3164 emit_byte(0xE1);
3165 }
3167 void Assembler::fadd(int i) {
3168 emit_farith(0xD8, 0xC0, i);
3169 }
3171 void Assembler::fadd_d(Address src) {
3172 InstructionMark im(this);
3173 emit_byte(0xDC);
3174 emit_operand32(rax, src);
3175 }
3177 void Assembler::fadd_s(Address src) {
3178 InstructionMark im(this);
3179 emit_byte(0xD8);
3180 emit_operand32(rax, src);
3181 }
3183 void Assembler::fadda(int i) {
3184 emit_farith(0xDC, 0xC0, i);
3185 }
3187 void Assembler::faddp(int i) {
3188 emit_farith(0xDE, 0xC0, i);
3189 }
3191 void Assembler::fchs() {
3192 emit_byte(0xD9);
3193 emit_byte(0xE0);
3194 }
3196 void Assembler::fcom(int i) {
3197 emit_farith(0xD8, 0xD0, i);
3198 }
3200 void Assembler::fcomp(int i) {
3201 emit_farith(0xD8, 0xD8, i);
3202 }
3204 void Assembler::fcomp_d(Address src) {
3205 InstructionMark im(this);
3206 emit_byte(0xDC);
3207 emit_operand32(rbx, src);
3208 }
3210 void Assembler::fcomp_s(Address src) {
3211 InstructionMark im(this);
3212 emit_byte(0xD8);
3213 emit_operand32(rbx, src);
3214 }
3216 void Assembler::fcompp() {
3217 emit_byte(0xDE);
3218 emit_byte(0xD9);
3219 }
3221 void Assembler::fcos() {
3222 emit_byte(0xD9);
3223 emit_byte(0xFF);
3224 }
3226 void Assembler::fdecstp() {
3227 emit_byte(0xD9);
3228 emit_byte(0xF6);
3229 }
3231 void Assembler::fdiv(int i) {
3232 emit_farith(0xD8, 0xF0, i);
3233 }
3235 void Assembler::fdiv_d(Address src) {
3236 InstructionMark im(this);
3237 emit_byte(0xDC);
3238 emit_operand32(rsi, src);
3239 }
3241 void Assembler::fdiv_s(Address src) {
3242 InstructionMark im(this);
3243 emit_byte(0xD8);
3244 emit_operand32(rsi, src);
3245 }
3247 void Assembler::fdiva(int i) {
3248 emit_farith(0xDC, 0xF8, i);
3249 }
3251 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
3252 // is erroneous for some of the floating-point instructions below.
3254 void Assembler::fdivp(int i) {
3255 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
3256 }
3258 void Assembler::fdivr(int i) {
3259 emit_farith(0xD8, 0xF8, i);
3260 }
3262 void Assembler::fdivr_d(Address src) {
3263 InstructionMark im(this);
3264 emit_byte(0xDC);
3265 emit_operand32(rdi, src);
3266 }
3268 void Assembler::fdivr_s(Address src) {
3269 InstructionMark im(this);
3270 emit_byte(0xD8);
3271 emit_operand32(rdi, src);
3272 }
3274 void Assembler::fdivra(int i) {
3275 emit_farith(0xDC, 0xF0, i);
3276 }
3278 void Assembler::fdivrp(int i) {
3279 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3280 }
3282 void Assembler::ffree(int i) {
3283 emit_farith(0xDD, 0xC0, i);
3284 }
3286 void Assembler::fild_d(Address adr) {
3287 InstructionMark im(this);
3288 emit_byte(0xDF);
3289 emit_operand32(rbp, adr);
3290 }
3292 void Assembler::fild_s(Address adr) {
3293 InstructionMark im(this);
3294 emit_byte(0xDB);
3295 emit_operand32(rax, adr);
3296 }
3298 void Assembler::fincstp() {
3299 emit_byte(0xD9);
3300 emit_byte(0xF7);
3301 }
3303 void Assembler::finit() {
3304 emit_byte(0x9B);
3305 emit_byte(0xDB);
3306 emit_byte(0xE3);
3307 }
3309 void Assembler::fist_s(Address adr) {
3310 InstructionMark im(this);
3311 emit_byte(0xDB);
3312 emit_operand32(rdx, adr);
3313 }
3315 void Assembler::fistp_d(Address adr) {
3316 InstructionMark im(this);
3317 emit_byte(0xDF);
3318 emit_operand32(rdi, adr);
3319 }
3321 void Assembler::fistp_s(Address adr) {
3322 InstructionMark im(this);
3323 emit_byte(0xDB);
3324 emit_operand32(rbx, adr);
3325 }
3327 void Assembler::fld1() {
3328 emit_byte(0xD9);
3329 emit_byte(0xE8);
3330 }
3332 void Assembler::fld_d(Address adr) {
3333 InstructionMark im(this);
3334 emit_byte(0xDD);
3335 emit_operand32(rax, adr);
3336 }
3338 void Assembler::fld_s(Address adr) {
3339 InstructionMark im(this);
3340 emit_byte(0xD9);
3341 emit_operand32(rax, adr);
3342 }
3345 void Assembler::fld_s(int index) {
3346 emit_farith(0xD9, 0xC0, index);
3347 }
3349 void Assembler::fld_x(Address adr) {
3350 InstructionMark im(this);
3351 emit_byte(0xDB);
3352 emit_operand32(rbp, adr);
3353 }
3355 void Assembler::fldcw(Address src) {
3356 InstructionMark im(this);
3357 emit_byte(0xd9);
3358 emit_operand32(rbp, src);
3359 }
3361 void Assembler::fldenv(Address src) {
3362 InstructionMark im(this);
3363 emit_byte(0xD9);
3364 emit_operand32(rsp, src);
3365 }
3367 void Assembler::fldlg2() {
3368 emit_byte(0xD9);
3369 emit_byte(0xEC);
3370 }
3372 void Assembler::fldln2() {
3373 emit_byte(0xD9);
3374 emit_byte(0xED);
3375 }
3377 void Assembler::fldz() {
3378 emit_byte(0xD9);
3379 emit_byte(0xEE);
3380 }
3382 void Assembler::flog() {
3383 fldln2();
3384 fxch();
3385 fyl2x();
3386 }
3388 void Assembler::flog10() {
3389 fldlg2();
3390 fxch();
3391 fyl2x();
3392 }
3394 void Assembler::fmul(int i) {
3395 emit_farith(0xD8, 0xC8, i);
3396 }
3398 void Assembler::fmul_d(Address src) {
3399 InstructionMark im(this);
3400 emit_byte(0xDC);
3401 emit_operand32(rcx, src);
3402 }
3404 void Assembler::fmul_s(Address src) {
3405 InstructionMark im(this);
3406 emit_byte(0xD8);
3407 emit_operand32(rcx, src);
3408 }
3410 void Assembler::fmula(int i) {
3411 emit_farith(0xDC, 0xC8, i);
3412 }
3414 void Assembler::fmulp(int i) {
3415 emit_farith(0xDE, 0xC8, i);
3416 }
3418 void Assembler::fnsave(Address dst) {
3419 InstructionMark im(this);
3420 emit_byte(0xDD);
3421 emit_operand32(rsi, dst);
3422 }
3424 void Assembler::fnstcw(Address src) {
3425 InstructionMark im(this);
3426 emit_byte(0x9B);
3427 emit_byte(0xD9);
3428 emit_operand32(rdi, src);
3429 }
3431 void Assembler::fnstsw_ax() {
3432 emit_byte(0xdF);
3433 emit_byte(0xE0);
3434 }
3436 void Assembler::fprem() {
3437 emit_byte(0xD9);
3438 emit_byte(0xF8);
3439 }
3441 void Assembler::fprem1() {
3442 emit_byte(0xD9);
3443 emit_byte(0xF5);
3444 }
3446 void Assembler::frstor(Address src) {
3447 InstructionMark im(this);
3448 emit_byte(0xDD);
3449 emit_operand32(rsp, src);
3450 }
3452 void Assembler::fsin() {
3453 emit_byte(0xD9);
3454 emit_byte(0xFE);
3455 }
3457 void Assembler::fsqrt() {
3458 emit_byte(0xD9);
3459 emit_byte(0xFA);
3460 }
3462 void Assembler::fst_d(Address adr) {
3463 InstructionMark im(this);
3464 emit_byte(0xDD);
3465 emit_operand32(rdx, adr);
3466 }
3468 void Assembler::fst_s(Address adr) {
3469 InstructionMark im(this);
3470 emit_byte(0xD9);
3471 emit_operand32(rdx, adr);
3472 }
3474 void Assembler::fstp_d(Address adr) {
3475 InstructionMark im(this);
3476 emit_byte(0xDD);
3477 emit_operand32(rbx, adr);
3478 }
3480 void Assembler::fstp_d(int index) {
3481 emit_farith(0xDD, 0xD8, index);
3482 }
3484 void Assembler::fstp_s(Address adr) {
3485 InstructionMark im(this);
3486 emit_byte(0xD9);
3487 emit_operand32(rbx, adr);
3488 }
3490 void Assembler::fstp_x(Address adr) {
3491 InstructionMark im(this);
3492 emit_byte(0xDB);
3493 emit_operand32(rdi, adr);
3494 }
3496 void Assembler::fsub(int i) {
3497 emit_farith(0xD8, 0xE0, i);
3498 }
3500 void Assembler::fsub_d(Address src) {
3501 InstructionMark im(this);
3502 emit_byte(0xDC);
3503 emit_operand32(rsp, src);
3504 }
3506 void Assembler::fsub_s(Address src) {
3507 InstructionMark im(this);
3508 emit_byte(0xD8);
3509 emit_operand32(rsp, src);
3510 }
3512 void Assembler::fsuba(int i) {
3513 emit_farith(0xDC, 0xE8, i);
3514 }
3516 void Assembler::fsubp(int i) {
3517 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3518 }
3520 void Assembler::fsubr(int i) {
3521 emit_farith(0xD8, 0xE8, i);
3522 }
3524 void Assembler::fsubr_d(Address src) {
3525 InstructionMark im(this);
3526 emit_byte(0xDC);
3527 emit_operand32(rbp, src);
3528 }
3530 void Assembler::fsubr_s(Address src) {
3531 InstructionMark im(this);
3532 emit_byte(0xD8);
3533 emit_operand32(rbp, src);
3534 }
3536 void Assembler::fsubra(int i) {
3537 emit_farith(0xDC, 0xE0, i);
3538 }
3540 void Assembler::fsubrp(int i) {
3541 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3542 }
3544 void Assembler::ftan() {
3545 emit_byte(0xD9);
3546 emit_byte(0xF2);
3547 emit_byte(0xDD);
3548 emit_byte(0xD8);
3549 }
3551 void Assembler::ftst() {
3552 emit_byte(0xD9);
3553 emit_byte(0xE4);
3554 }
3556 void Assembler::fucomi(int i) {
3557 // make sure the instruction is supported (introduced for P6, together with cmov)
3558 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3559 emit_farith(0xDB, 0xE8, i);
3560 }
3562 void Assembler::fucomip(int i) {
3563 // make sure the instruction is supported (introduced for P6, together with cmov)
3564 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3565 emit_farith(0xDF, 0xE8, i);
3566 }
3568 void Assembler::fwait() {
3569 emit_byte(0x9B);
3570 }
3572 void Assembler::fxch(int i) {
3573 emit_farith(0xD9, 0xC8, i);
3574 }
3576 void Assembler::fyl2x() {
3577 emit_byte(0xD9);
3578 emit_byte(0xF1);
3579 }
3581 void Assembler::frndint() {
3582 emit_byte(0xD9);
3583 emit_byte(0xFC);
3584 }
3586 void Assembler::f2xm1() {
3587 emit_byte(0xD9);
3588 emit_byte(0xF0);
3589 }
3591 void Assembler::fldl2e() {
3592 emit_byte(0xD9);
3593 emit_byte(0xEA);
3594 }
3596 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
3597 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
3598 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
3599 static int simd_opc[4] = { 0, 0, 0x38, 0x3A };
3601 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
3602 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
3603 if (pre > 0) {
3604 emit_byte(simd_pre[pre]);
3605 }
3606 if (rex_w) {
3607 prefixq(adr, xreg);
3608 } else {
3609 prefix(adr, xreg);
3610 }
3611 if (opc > 0) {
3612 emit_byte(0x0F);
3613 int opc2 = simd_opc[opc];
3614 if (opc2 > 0) {
3615 emit_byte(opc2);
3616 }
3617 }
3618 }
3620 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
3621 if (pre > 0) {
3622 emit_byte(simd_pre[pre]);
3623 }
3624 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
3625 prefix_and_encode(dst_enc, src_enc);
3626 if (opc > 0) {
3627 emit_byte(0x0F);
3628 int opc2 = simd_opc[opc];
3629 if (opc2 > 0) {
3630 emit_byte(opc2);
3631 }
3632 }
3633 return encode;
3634 }
3637 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
3638 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
3639 prefix(VEX_3bytes);
3641 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
3642 byte1 = (~byte1) & 0xE0;
3643 byte1 |= opc;
3644 a_byte(byte1);
3646 int byte2 = ((~nds_enc) & 0xf) << 3;
3647 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
3648 emit_byte(byte2);
3649 } else {
3650 prefix(VEX_2bytes);
3652 int byte1 = vex_r ? VEX_R : 0;
3653 byte1 = (~byte1) & 0x80;
3654 byte1 |= ((~nds_enc) & 0xf) << 3;
3655 byte1 |= (vector256 ? 4 : 0) | pre;
3656 emit_byte(byte1);
3657 }
3658 }
3660 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
3661 bool vex_r = (xreg_enc >= 8);
3662 bool vex_b = adr.base_needs_rex();
3663 bool vex_x = adr.index_needs_rex();
3664 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
3665 }
3667 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
3668 bool vex_r = (dst_enc >= 8);
3669 bool vex_b = (src_enc >= 8);
3670 bool vex_x = false;
3671 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
3672 return (((dst_enc & 7) << 3) | (src_enc & 7));
3673 }
3676 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
3677 if (UseAVX > 0) {
3678 int xreg_enc = xreg->encoding();
3679 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3680 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
3681 } else {
3682 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
3683 rex_prefix(adr, xreg, pre, opc, rex_w);
3684 }
3685 }
3687 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
3688 int dst_enc = dst->encoding();
3689 int src_enc = src->encoding();
3690 if (UseAVX > 0) {
3691 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3692 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
3693 } else {
3694 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
3695 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
3696 }
3697 }
3699 #ifndef _LP64
3701 void Assembler::incl(Register dst) {
3702 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3703 emit_byte(0x40 | dst->encoding());
3704 }
3706 void Assembler::lea(Register dst, Address src) {
3707 leal(dst, src);
3708 }
3710 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3711 InstructionMark im(this);
3712 emit_byte(0xC7);
3713 emit_operand(rax, dst);
3714 emit_data((int)imm32, rspec, 0);
3715 }
3717 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3718 InstructionMark im(this);
3719 int encode = prefix_and_encode(dst->encoding());
3720 emit_byte(0xB8 | encode);
3721 emit_data((int)imm32, rspec, 0);
3722 }
3724 void Assembler::popa() { // 32bit
3725 emit_byte(0x61);
3726 }
3728 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3729 InstructionMark im(this);
3730 emit_byte(0x68);
3731 emit_data(imm32, rspec, 0);
3732 }
3734 void Assembler::pusha() { // 32bit
3735 emit_byte(0x60);
3736 }
3738 void Assembler::set_byte_if_not_zero(Register dst) {
3739 emit_byte(0x0F);
3740 emit_byte(0x95);
3741 emit_byte(0xE0 | dst->encoding());
3742 }
3744 void Assembler::shldl(Register dst, Register src) {
3745 emit_byte(0x0F);
3746 emit_byte(0xA5);
3747 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3748 }
3750 void Assembler::shrdl(Register dst, Register src) {
3751 emit_byte(0x0F);
3752 emit_byte(0xAD);
3753 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3754 }
3756 #else // LP64
3758 void Assembler::set_byte_if_not_zero(Register dst) {
3759 int enc = prefix_and_encode(dst->encoding(), true);
3760 emit_byte(0x0F);
3761 emit_byte(0x95);
3762 emit_byte(0xE0 | enc);
3763 }
3765 // 64bit only pieces of the assembler
3766 // This should only be used by 64bit instructions that can use rip-relative
3767 // it cannot be used by instructions that want an immediate value.
3769 bool Assembler::reachable(AddressLiteral adr) {
3770 int64_t disp;
3771 // None will force a 64bit literal to the code stream. Likely a placeholder
3772 // for something that will be patched later and we need to certain it will
3773 // always be reachable.
3774 if (adr.reloc() == relocInfo::none) {
3775 return false;
3776 }
3777 if (adr.reloc() == relocInfo::internal_word_type) {
3778 // This should be rip relative and easily reachable.
3779 return true;
3780 }
3781 if (adr.reloc() == relocInfo::virtual_call_type ||
3782 adr.reloc() == relocInfo::opt_virtual_call_type ||
3783 adr.reloc() == relocInfo::static_call_type ||
3784 adr.reloc() == relocInfo::static_stub_type ) {
3785 // This should be rip relative within the code cache and easily
3786 // reachable until we get huge code caches. (At which point
3787 // ic code is going to have issues).
3788 return true;
3789 }
3790 if (adr.reloc() != relocInfo::external_word_type &&
3791 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special
3792 adr.reloc() != relocInfo::poll_type && // relocs to identify them
3793 adr.reloc() != relocInfo::runtime_call_type ) {
3794 return false;
3795 }
3797 // Stress the correction code
3798 if (ForceUnreachable) {
3799 // Must be runtimecall reloc, see if it is in the codecache
3800 // Flipping stuff in the codecache to be unreachable causes issues
3801 // with things like inline caches where the additional instructions
3802 // are not handled.
3803 if (CodeCache::find_blob(adr._target) == NULL) {
3804 return false;
3805 }
3806 }
3807 // For external_word_type/runtime_call_type if it is reachable from where we
3808 // are now (possibly a temp buffer) and where we might end up
3809 // anywhere in the codeCache then we are always reachable.
3810 // This would have to change if we ever save/restore shared code
3811 // to be more pessimistic.
3812 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3813 if (!is_simm32(disp)) return false;
3814 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3815 if (!is_simm32(disp)) return false;
3817 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3819 // Because rip relative is a disp + address_of_next_instruction and we
3820 // don't know the value of address_of_next_instruction we apply a fudge factor
3821 // to make sure we will be ok no matter the size of the instruction we get placed into.
3822 // We don't have to fudge the checks above here because they are already worst case.
3824 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3825 // + 4 because better safe than sorry.
3826 const int fudge = 12 + 4;
3827 if (disp < 0) {
3828 disp -= fudge;
3829 } else {
3830 disp += fudge;
3831 }
3832 return is_simm32(disp);
3833 }
3835 // Check if the polling page is not reachable from the code cache using rip-relative
3836 // addressing.
3837 bool Assembler::is_polling_page_far() {
3838 intptr_t addr = (intptr_t)os::get_polling_page();
3839 return ForceUnreachable ||
3840 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
3841 !is_simm32(addr - (intptr_t)CodeCache::high_bound());
3842 }
3844 void Assembler::emit_data64(jlong data,
3845 relocInfo::relocType rtype,
3846 int format) {
3847 if (rtype == relocInfo::none) {
3848 emit_long64(data);
3849 } else {
3850 emit_data64(data, Relocation::spec_simple(rtype), format);
3851 }
3852 }
3854 void Assembler::emit_data64(jlong data,
3855 RelocationHolder const& rspec,
3856 int format) {
3857 assert(imm_operand == 0, "default format must be immediate in this file");
3858 assert(imm_operand == format, "must be immediate");
3859 assert(inst_mark() != NULL, "must be inside InstructionMark");
3860 // Do not use AbstractAssembler::relocate, which is not intended for
3861 // embedded words. Instead, relocate to the enclosing instruction.
3862 code_section()->relocate(inst_mark(), rspec, format);
3863 #ifdef ASSERT
3864 check_relocation(rspec, format);
3865 #endif
3866 emit_long64(data);
3867 }
3869 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3870 if (reg_enc >= 8) {
3871 prefix(REX_B);
3872 reg_enc -= 8;
3873 } else if (byteinst && reg_enc >= 4) {
3874 prefix(REX);
3875 }
3876 return reg_enc;
3877 }
3879 int Assembler::prefixq_and_encode(int reg_enc) {
3880 if (reg_enc < 8) {
3881 prefix(REX_W);
3882 } else {
3883 prefix(REX_WB);
3884 reg_enc -= 8;
3885 }
3886 return reg_enc;
3887 }
3889 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3890 if (dst_enc < 8) {
3891 if (src_enc >= 8) {
3892 prefix(REX_B);
3893 src_enc -= 8;
3894 } else if (byteinst && src_enc >= 4) {
3895 prefix(REX);
3896 }
3897 } else {
3898 if (src_enc < 8) {
3899 prefix(REX_R);
3900 } else {
3901 prefix(REX_RB);
3902 src_enc -= 8;
3903 }
3904 dst_enc -= 8;
3905 }
3906 return dst_enc << 3 | src_enc;
3907 }
3909 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3910 if (dst_enc < 8) {
3911 if (src_enc < 8) {
3912 prefix(REX_W);
3913 } else {
3914 prefix(REX_WB);
3915 src_enc -= 8;
3916 }
3917 } else {
3918 if (src_enc < 8) {
3919 prefix(REX_WR);
3920 } else {
3921 prefix(REX_WRB);
3922 src_enc -= 8;
3923 }
3924 dst_enc -= 8;
3925 }
3926 return dst_enc << 3 | src_enc;
3927 }
3929 void Assembler::prefix(Register reg) {
3930 if (reg->encoding() >= 8) {
3931 prefix(REX_B);
3932 }
3933 }
3935 void Assembler::prefix(Address adr) {
3936 if (adr.base_needs_rex()) {
3937 if (adr.index_needs_rex()) {
3938 prefix(REX_XB);
3939 } else {
3940 prefix(REX_B);
3941 }
3942 } else {
3943 if (adr.index_needs_rex()) {
3944 prefix(REX_X);
3945 }
3946 }
3947 }
3949 void Assembler::prefixq(Address adr) {
3950 if (adr.base_needs_rex()) {
3951 if (adr.index_needs_rex()) {
3952 prefix(REX_WXB);
3953 } else {
3954 prefix(REX_WB);
3955 }
3956 } else {
3957 if (adr.index_needs_rex()) {
3958 prefix(REX_WX);
3959 } else {
3960 prefix(REX_W);
3961 }
3962 }
3963 }
3966 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3967 if (reg->encoding() < 8) {
3968 if (adr.base_needs_rex()) {
3969 if (adr.index_needs_rex()) {
3970 prefix(REX_XB);
3971 } else {
3972 prefix(REX_B);
3973 }
3974 } else {
3975 if (adr.index_needs_rex()) {
3976 prefix(REX_X);
3977 } else if (byteinst && reg->encoding() >= 4 ) {
3978 prefix(REX);
3979 }
3980 }
3981 } else {
3982 if (adr.base_needs_rex()) {
3983 if (adr.index_needs_rex()) {
3984 prefix(REX_RXB);
3985 } else {
3986 prefix(REX_RB);
3987 }
3988 } else {
3989 if (adr.index_needs_rex()) {
3990 prefix(REX_RX);
3991 } else {
3992 prefix(REX_R);
3993 }
3994 }
3995 }
3996 }
3998 void Assembler::prefixq(Address adr, Register src) {
3999 if (src->encoding() < 8) {
4000 if (adr.base_needs_rex()) {
4001 if (adr.index_needs_rex()) {
4002 prefix(REX_WXB);
4003 } else {
4004 prefix(REX_WB);
4005 }
4006 } else {
4007 if (adr.index_needs_rex()) {
4008 prefix(REX_WX);
4009 } else {
4010 prefix(REX_W);
4011 }
4012 }
4013 } else {
4014 if (adr.base_needs_rex()) {
4015 if (adr.index_needs_rex()) {
4016 prefix(REX_WRXB);
4017 } else {
4018 prefix(REX_WRB);
4019 }
4020 } else {
4021 if (adr.index_needs_rex()) {
4022 prefix(REX_WRX);
4023 } else {
4024 prefix(REX_WR);
4025 }
4026 }
4027 }
4028 }
4030 void Assembler::prefix(Address adr, XMMRegister reg) {
4031 if (reg->encoding() < 8) {
4032 if (adr.base_needs_rex()) {
4033 if (adr.index_needs_rex()) {
4034 prefix(REX_XB);
4035 } else {
4036 prefix(REX_B);
4037 }
4038 } else {
4039 if (adr.index_needs_rex()) {
4040 prefix(REX_X);
4041 }
4042 }
4043 } else {
4044 if (adr.base_needs_rex()) {
4045 if (adr.index_needs_rex()) {
4046 prefix(REX_RXB);
4047 } else {
4048 prefix(REX_RB);
4049 }
4050 } else {
4051 if (adr.index_needs_rex()) {
4052 prefix(REX_RX);
4053 } else {
4054 prefix(REX_R);
4055 }
4056 }
4057 }
4058 }
4060 void Assembler::prefixq(Address adr, XMMRegister src) {
4061 if (src->encoding() < 8) {
4062 if (adr.base_needs_rex()) {
4063 if (adr.index_needs_rex()) {
4064 prefix(REX_WXB);
4065 } else {
4066 prefix(REX_WB);
4067 }
4068 } else {
4069 if (adr.index_needs_rex()) {
4070 prefix(REX_WX);
4071 } else {
4072 prefix(REX_W);
4073 }
4074 }
4075 } else {
4076 if (adr.base_needs_rex()) {
4077 if (adr.index_needs_rex()) {
4078 prefix(REX_WRXB);
4079 } else {
4080 prefix(REX_WRB);
4081 }
4082 } else {
4083 if (adr.index_needs_rex()) {
4084 prefix(REX_WRX);
4085 } else {
4086 prefix(REX_WR);
4087 }
4088 }
4089 }
4090 }
4092 void Assembler::adcq(Register dst, int32_t imm32) {
4093 (void) prefixq_and_encode(dst->encoding());
4094 emit_arith(0x81, 0xD0, dst, imm32);
4095 }
4097 void Assembler::adcq(Register dst, Address src) {
4098 InstructionMark im(this);
4099 prefixq(src, dst);
4100 emit_byte(0x13);
4101 emit_operand(dst, src);
4102 }
4104 void Assembler::adcq(Register dst, Register src) {
4105 (int) prefixq_and_encode(dst->encoding(), src->encoding());
4106 emit_arith(0x13, 0xC0, dst, src);
4107 }
4109 void Assembler::addq(Address dst, int32_t imm32) {
4110 InstructionMark im(this);
4111 prefixq(dst);
4112 emit_arith_operand(0x81, rax, dst,imm32);
4113 }
4115 void Assembler::addq(Address dst, Register src) {
4116 InstructionMark im(this);
4117 prefixq(dst, src);
4118 emit_byte(0x01);
4119 emit_operand(src, dst);
4120 }
4122 void Assembler::addq(Register dst, int32_t imm32) {
4123 (void) prefixq_and_encode(dst->encoding());
4124 emit_arith(0x81, 0xC0, dst, imm32);
4125 }
4127 void Assembler::addq(Register dst, Address src) {
4128 InstructionMark im(this);
4129 prefixq(src, dst);
4130 emit_byte(0x03);
4131 emit_operand(dst, src);
4132 }
4134 void Assembler::addq(Register dst, Register src) {
4135 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4136 emit_arith(0x03, 0xC0, dst, src);
4137 }
4139 void Assembler::andq(Address dst, int32_t imm32) {
4140 InstructionMark im(this);
4141 prefixq(dst);
4142 emit_byte(0x81);
4143 emit_operand(rsp, dst, 4);
4144 emit_long(imm32);
4145 }
4147 void Assembler::andq(Register dst, int32_t imm32) {
4148 (void) prefixq_and_encode(dst->encoding());
4149 emit_arith(0x81, 0xE0, dst, imm32);
4150 }
4152 void Assembler::andq(Register dst, Address src) {
4153 InstructionMark im(this);
4154 prefixq(src, dst);
4155 emit_byte(0x23);
4156 emit_operand(dst, src);
4157 }
4159 void Assembler::andq(Register dst, Register src) {
4160 (int) prefixq_and_encode(dst->encoding(), src->encoding());
4161 emit_arith(0x23, 0xC0, dst, src);
4162 }
4164 void Assembler::bsfq(Register dst, Register src) {
4165 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4166 emit_byte(0x0F);
4167 emit_byte(0xBC);
4168 emit_byte(0xC0 | encode);
4169 }
4171 void Assembler::bsrq(Register dst, Register src) {
4172 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
4173 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4174 emit_byte(0x0F);
4175 emit_byte(0xBD);
4176 emit_byte(0xC0 | encode);
4177 }
4179 void Assembler::bswapq(Register reg) {
4180 int encode = prefixq_and_encode(reg->encoding());
4181 emit_byte(0x0F);
4182 emit_byte(0xC8 | encode);
4183 }
4185 void Assembler::cdqq() {
4186 prefix(REX_W);
4187 emit_byte(0x99);
4188 }
4190 void Assembler::clflush(Address adr) {
4191 prefix(adr);
4192 emit_byte(0x0F);
4193 emit_byte(0xAE);
4194 emit_operand(rdi, adr);
4195 }
4197 void Assembler::cmovq(Condition cc, Register dst, Register src) {
4198 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4199 emit_byte(0x0F);
4200 emit_byte(0x40 | cc);
4201 emit_byte(0xC0 | encode);
4202 }
4204 void Assembler::cmovq(Condition cc, Register dst, Address src) {
4205 InstructionMark im(this);
4206 prefixq(src, dst);
4207 emit_byte(0x0F);
4208 emit_byte(0x40 | cc);
4209 emit_operand(dst, src);
4210 }
4212 void Assembler::cmpq(Address dst, int32_t imm32) {
4213 InstructionMark im(this);
4214 prefixq(dst);
4215 emit_byte(0x81);
4216 emit_operand(rdi, dst, 4);
4217 emit_long(imm32);
4218 }
4220 void Assembler::cmpq(Register dst, int32_t imm32) {
4221 (void) prefixq_and_encode(dst->encoding());
4222 emit_arith(0x81, 0xF8, dst, imm32);
4223 }
4225 void Assembler::cmpq(Address dst, Register src) {
4226 InstructionMark im(this);
4227 prefixq(dst, src);
4228 emit_byte(0x3B);
4229 emit_operand(src, dst);
4230 }
4232 void Assembler::cmpq(Register dst, Register src) {
4233 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4234 emit_arith(0x3B, 0xC0, dst, src);
4235 }
4237 void Assembler::cmpq(Register dst, Address src) {
4238 InstructionMark im(this);
4239 prefixq(src, dst);
4240 emit_byte(0x3B);
4241 emit_operand(dst, src);
4242 }
4244 void Assembler::cmpxchgq(Register reg, Address adr) {
4245 InstructionMark im(this);
4246 prefixq(adr, reg);
4247 emit_byte(0x0F);
4248 emit_byte(0xB1);
4249 emit_operand(reg, adr);
4250 }
4252 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
4253 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4254 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
4255 emit_byte(0x2A);
4256 emit_byte(0xC0 | encode);
4257 }
4259 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
4260 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4261 InstructionMark im(this);
4262 simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
4263 emit_byte(0x2A);
4264 emit_operand(dst, src);
4265 }
4267 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
4268 NOT_LP64(assert(VM_Version::supports_sse(), ""));
4269 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
4270 emit_byte(0x2A);
4271 emit_byte(0xC0 | encode);
4272 }
4274 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
4275 NOT_LP64(assert(VM_Version::supports_sse(), ""));
4276 InstructionMark im(this);
4277 simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
4278 emit_byte(0x2A);
4279 emit_operand(dst, src);
4280 }
4282 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
4283 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4284 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
4285 emit_byte(0x2C);
4286 emit_byte(0xC0 | encode);
4287 }
4289 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
4290 NOT_LP64(assert(VM_Version::supports_sse(), ""));
4291 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
4292 emit_byte(0x2C);
4293 emit_byte(0xC0 | encode);
4294 }
4296 void Assembler::decl(Register dst) {
4297 // Don't use it directly. Use MacroAssembler::decrementl() instead.
4298 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
4299 int encode = prefix_and_encode(dst->encoding());
4300 emit_byte(0xFF);
4301 emit_byte(0xC8 | encode);
4302 }
4304 void Assembler::decq(Register dst) {
4305 // Don't use it directly. Use MacroAssembler::decrementq() instead.
4306 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4307 int encode = prefixq_and_encode(dst->encoding());
4308 emit_byte(0xFF);
4309 emit_byte(0xC8 | encode);
4310 }
4312 void Assembler::decq(Address dst) {
4313 // Don't use it directly. Use MacroAssembler::decrementq() instead.
4314 InstructionMark im(this);
4315 prefixq(dst);
4316 emit_byte(0xFF);
4317 emit_operand(rcx, dst);
4318 }
4320 void Assembler::fxrstor(Address src) {
4321 prefixq(src);
4322 emit_byte(0x0F);
4323 emit_byte(0xAE);
4324 emit_operand(as_Register(1), src);
4325 }
4327 void Assembler::fxsave(Address dst) {
4328 prefixq(dst);
4329 emit_byte(0x0F);
4330 emit_byte(0xAE);
4331 emit_operand(as_Register(0), dst);
4332 }
4334 void Assembler::idivq(Register src) {
4335 int encode = prefixq_and_encode(src->encoding());
4336 emit_byte(0xF7);
4337 emit_byte(0xF8 | encode);
4338 }
4340 void Assembler::imulq(Register dst, Register src) {
4341 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4342 emit_byte(0x0F);
4343 emit_byte(0xAF);
4344 emit_byte(0xC0 | encode);
4345 }
4347 void Assembler::imulq(Register dst, Register src, int value) {
4348 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4349 if (is8bit(value)) {
4350 emit_byte(0x6B);
4351 emit_byte(0xC0 | encode);
4352 emit_byte(value & 0xFF);
4353 } else {
4354 emit_byte(0x69);
4355 emit_byte(0xC0 | encode);
4356 emit_long(value);
4357 }
4358 }
4360 void Assembler::incl(Register dst) {
4361 // Don't use it directly. Use MacroAssembler::incrementl() instead.
4362 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4363 int encode = prefix_and_encode(dst->encoding());
4364 emit_byte(0xFF);
4365 emit_byte(0xC0 | encode);
4366 }
4368 void Assembler::incq(Register dst) {
4369 // Don't use it directly. Use MacroAssembler::incrementq() instead.
4370 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4371 int encode = prefixq_and_encode(dst->encoding());
4372 emit_byte(0xFF);
4373 emit_byte(0xC0 | encode);
4374 }
4376 void Assembler::incq(Address dst) {
4377 // Don't use it directly. Use MacroAssembler::incrementq() instead.
4378 InstructionMark im(this);
4379 prefixq(dst);
4380 emit_byte(0xFF);
4381 emit_operand(rax, dst);
4382 }
4384 void Assembler::lea(Register dst, Address src) {
4385 leaq(dst, src);
4386 }
4388 void Assembler::leaq(Register dst, Address src) {
4389 InstructionMark im(this);
4390 prefixq(src, dst);
4391 emit_byte(0x8D);
4392 emit_operand(dst, src);
4393 }
4395 void Assembler::mov64(Register dst, int64_t imm64) {
4396 InstructionMark im(this);
4397 int encode = prefixq_and_encode(dst->encoding());
4398 emit_byte(0xB8 | encode);
4399 emit_long64(imm64);
4400 }
4402 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
4403 InstructionMark im(this);
4404 int encode = prefixq_and_encode(dst->encoding());
4405 emit_byte(0xB8 | encode);
4406 emit_data64(imm64, rspec);
4407 }
4409 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4410 InstructionMark im(this);
4411 int encode = prefix_and_encode(dst->encoding());
4412 emit_byte(0xB8 | encode);
4413 emit_data((int)imm32, rspec, narrow_oop_operand);
4414 }
4416 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) {
4417 InstructionMark im(this);
4418 prefix(dst);
4419 emit_byte(0xC7);
4420 emit_operand(rax, dst, 4);
4421 emit_data((int)imm32, rspec, narrow_oop_operand);
4422 }
4424 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
4425 InstructionMark im(this);
4426 int encode = prefix_and_encode(src1->encoding());
4427 emit_byte(0x81);
4428 emit_byte(0xF8 | encode);
4429 emit_data((int)imm32, rspec, narrow_oop_operand);
4430 }
4432 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
4433 InstructionMark im(this);
4434 prefix(src1);
4435 emit_byte(0x81);
4436 emit_operand(rax, src1, 4);
4437 emit_data((int)imm32, rspec, narrow_oop_operand);
4438 }
4440 void Assembler::lzcntq(Register dst, Register src) {
4441 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
4442 emit_byte(0xF3);
4443 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4444 emit_byte(0x0F);
4445 emit_byte(0xBD);
4446 emit_byte(0xC0 | encode);
4447 }
4449 void Assembler::movdq(XMMRegister dst, Register src) {
4450 // table D-1 says MMX/SSE2
4451 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4452 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
4453 emit_byte(0x6E);
4454 emit_byte(0xC0 | encode);
4455 }
4457 void Assembler::movdq(Register dst, XMMRegister src) {
4458 // table D-1 says MMX/SSE2
4459 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4460 // swap src/dst to get correct prefix
4461 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
4462 emit_byte(0x7E);
4463 emit_byte(0xC0 | encode);
4464 }
4466 void Assembler::movq(Register dst, Register src) {
4467 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4468 emit_byte(0x8B);
4469 emit_byte(0xC0 | encode);
4470 }
4472 void Assembler::movq(Register dst, Address src) {
4473 InstructionMark im(this);
4474 prefixq(src, dst);
4475 emit_byte(0x8B);
4476 emit_operand(dst, src);
4477 }
4479 void Assembler::movq(Address dst, Register src) {
4480 InstructionMark im(this);
4481 prefixq(dst, src);
4482 emit_byte(0x89);
4483 emit_operand(src, dst);
4484 }
4486 void Assembler::movsbq(Register dst, Address src) {
4487 InstructionMark im(this);
4488 prefixq(src, dst);
4489 emit_byte(0x0F);
4490 emit_byte(0xBE);
4491 emit_operand(dst, src);
4492 }
4494 void Assembler::movsbq(Register dst, Register src) {
4495 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4496 emit_byte(0x0F);
4497 emit_byte(0xBE);
4498 emit_byte(0xC0 | encode);
4499 }
4501 void Assembler::movslq(Register dst, int32_t imm32) {
4502 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx)
4503 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx)
4504 // as a result we shouldn't use until tested at runtime...
4505 ShouldNotReachHere();
4506 InstructionMark im(this);
4507 int encode = prefixq_and_encode(dst->encoding());
4508 emit_byte(0xC7 | encode);
4509 emit_long(imm32);
4510 }
4512 void Assembler::movslq(Address dst, int32_t imm32) {
4513 assert(is_simm32(imm32), "lost bits");
4514 InstructionMark im(this);
4515 prefixq(dst);
4516 emit_byte(0xC7);
4517 emit_operand(rax, dst, 4);
4518 emit_long(imm32);
4519 }
4521 void Assembler::movslq(Register dst, Address src) {
4522 InstructionMark im(this);
4523 prefixq(src, dst);
4524 emit_byte(0x63);
4525 emit_operand(dst, src);
4526 }
4528 void Assembler::movslq(Register dst, Register src) {
4529 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4530 emit_byte(0x63);
4531 emit_byte(0xC0 | encode);
4532 }
4534 void Assembler::movswq(Register dst, Address src) {
4535 InstructionMark im(this);
4536 prefixq(src, dst);
4537 emit_byte(0x0F);
4538 emit_byte(0xBF);
4539 emit_operand(dst, src);
4540 }
4542 void Assembler::movswq(Register dst, Register src) {
4543 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4544 emit_byte(0x0F);
4545 emit_byte(0xBF);
4546 emit_byte(0xC0 | encode);
4547 }
4549 void Assembler::movzbq(Register dst, Address src) {
4550 InstructionMark im(this);
4551 prefixq(src, dst);
4552 emit_byte(0x0F);
4553 emit_byte(0xB6);
4554 emit_operand(dst, src);
4555 }
4557 void Assembler::movzbq(Register dst, Register src) {
4558 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4559 emit_byte(0x0F);
4560 emit_byte(0xB6);
4561 emit_byte(0xC0 | encode);
4562 }
4564 void Assembler::movzwq(Register dst, Address src) {
4565 InstructionMark im(this);
4566 prefixq(src, dst);
4567 emit_byte(0x0F);
4568 emit_byte(0xB7);
4569 emit_operand(dst, src);
4570 }
4572 void Assembler::movzwq(Register dst, Register src) {
4573 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4574 emit_byte(0x0F);
4575 emit_byte(0xB7);
4576 emit_byte(0xC0 | encode);
4577 }
4579 void Assembler::negq(Register dst) {
4580 int encode = prefixq_and_encode(dst->encoding());
4581 emit_byte(0xF7);
4582 emit_byte(0xD8 | encode);
4583 }
4585 void Assembler::notq(Register dst) {
4586 int encode = prefixq_and_encode(dst->encoding());
4587 emit_byte(0xF7);
4588 emit_byte(0xD0 | encode);
4589 }
4591 void Assembler::orq(Address dst, int32_t imm32) {
4592 InstructionMark im(this);
4593 prefixq(dst);
4594 emit_byte(0x81);
4595 emit_operand(rcx, dst, 4);
4596 emit_long(imm32);
4597 }
4599 void Assembler::orq(Register dst, int32_t imm32) {
4600 (void) prefixq_and_encode(dst->encoding());
4601 emit_arith(0x81, 0xC8, dst, imm32);
4602 }
4604 void Assembler::orq(Register dst, Address src) {
4605 InstructionMark im(this);
4606 prefixq(src, dst);
4607 emit_byte(0x0B);
4608 emit_operand(dst, src);
4609 }
4611 void Assembler::orq(Register dst, Register src) {
4612 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4613 emit_arith(0x0B, 0xC0, dst, src);
4614 }
4616 void Assembler::popa() { // 64bit
4617 movq(r15, Address(rsp, 0));
4618 movq(r14, Address(rsp, wordSize));
4619 movq(r13, Address(rsp, 2 * wordSize));
4620 movq(r12, Address(rsp, 3 * wordSize));
4621 movq(r11, Address(rsp, 4 * wordSize));
4622 movq(r10, Address(rsp, 5 * wordSize));
4623 movq(r9, Address(rsp, 6 * wordSize));
4624 movq(r8, Address(rsp, 7 * wordSize));
4625 movq(rdi, Address(rsp, 8 * wordSize));
4626 movq(rsi, Address(rsp, 9 * wordSize));
4627 movq(rbp, Address(rsp, 10 * wordSize));
4628 // skip rsp
4629 movq(rbx, Address(rsp, 12 * wordSize));
4630 movq(rdx, Address(rsp, 13 * wordSize));
4631 movq(rcx, Address(rsp, 14 * wordSize));
4632 movq(rax, Address(rsp, 15 * wordSize));
4634 addq(rsp, 16 * wordSize);
4635 }
4637 void Assembler::popcntq(Register dst, Address src) {
4638 assert(VM_Version::supports_popcnt(), "must support");
4639 InstructionMark im(this);
4640 emit_byte(0xF3);
4641 prefixq(src, dst);
4642 emit_byte(0x0F);
4643 emit_byte(0xB8);
4644 emit_operand(dst, src);
4645 }
4647 void Assembler::popcntq(Register dst, Register src) {
4648 assert(VM_Version::supports_popcnt(), "must support");
4649 emit_byte(0xF3);
4650 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4651 emit_byte(0x0F);
4652 emit_byte(0xB8);
4653 emit_byte(0xC0 | encode);
4654 }
4656 void Assembler::popq(Address dst) {
4657 InstructionMark im(this);
4658 prefixq(dst);
4659 emit_byte(0x8F);
4660 emit_operand(rax, dst);
4661 }
4663 void Assembler::pusha() { // 64bit
4664 // we have to store original rsp. ABI says that 128 bytes
4665 // below rsp are local scratch.
4666 movq(Address(rsp, -5 * wordSize), rsp);
4668 subq(rsp, 16 * wordSize);
4670 movq(Address(rsp, 15 * wordSize), rax);
4671 movq(Address(rsp, 14 * wordSize), rcx);
4672 movq(Address(rsp, 13 * wordSize), rdx);
4673 movq(Address(rsp, 12 * wordSize), rbx);
4674 // skip rsp
4675 movq(Address(rsp, 10 * wordSize), rbp);
4676 movq(Address(rsp, 9 * wordSize), rsi);
4677 movq(Address(rsp, 8 * wordSize), rdi);
4678 movq(Address(rsp, 7 * wordSize), r8);
4679 movq(Address(rsp, 6 * wordSize), r9);
4680 movq(Address(rsp, 5 * wordSize), r10);
4681 movq(Address(rsp, 4 * wordSize), r11);
4682 movq(Address(rsp, 3 * wordSize), r12);
4683 movq(Address(rsp, 2 * wordSize), r13);
4684 movq(Address(rsp, wordSize), r14);
4685 movq(Address(rsp, 0), r15);
4686 }
4688 void Assembler::pushq(Address src) {
4689 InstructionMark im(this);
4690 prefixq(src);
4691 emit_byte(0xFF);
4692 emit_operand(rsi, src);
4693 }
4695 void Assembler::rclq(Register dst, int imm8) {
4696 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4697 int encode = prefixq_and_encode(dst->encoding());
4698 if (imm8 == 1) {
4699 emit_byte(0xD1);
4700 emit_byte(0xD0 | encode);
4701 } else {
4702 emit_byte(0xC1);
4703 emit_byte(0xD0 | encode);
4704 emit_byte(imm8);
4705 }
4706 }
4707 void Assembler::sarq(Register dst, int imm8) {
4708 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4709 int encode = prefixq_and_encode(dst->encoding());
4710 if (imm8 == 1) {
4711 emit_byte(0xD1);
4712 emit_byte(0xF8 | encode);
4713 } else {
4714 emit_byte(0xC1);
4715 emit_byte(0xF8 | encode);
4716 emit_byte(imm8);
4717 }
4718 }
4720 void Assembler::sarq(Register dst) {
4721 int encode = prefixq_and_encode(dst->encoding());
4722 emit_byte(0xD3);
4723 emit_byte(0xF8 | encode);
4724 }
4726 void Assembler::sbbq(Address dst, int32_t imm32) {
4727 InstructionMark im(this);
4728 prefixq(dst);
4729 emit_arith_operand(0x81, rbx, dst, imm32);
4730 }
4732 void Assembler::sbbq(Register dst, int32_t imm32) {
4733 (void) prefixq_and_encode(dst->encoding());
4734 emit_arith(0x81, 0xD8, dst, imm32);
4735 }
4737 void Assembler::sbbq(Register dst, Address src) {
4738 InstructionMark im(this);
4739 prefixq(src, dst);
4740 emit_byte(0x1B);
4741 emit_operand(dst, src);
4742 }
4744 void Assembler::sbbq(Register dst, Register src) {
4745 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4746 emit_arith(0x1B, 0xC0, dst, src);
4747 }
4749 void Assembler::shlq(Register dst, int imm8) {
4750 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4751 int encode = prefixq_and_encode(dst->encoding());
4752 if (imm8 == 1) {
4753 emit_byte(0xD1);
4754 emit_byte(0xE0 | encode);
4755 } else {
4756 emit_byte(0xC1);
4757 emit_byte(0xE0 | encode);
4758 emit_byte(imm8);
4759 }
4760 }
4762 void Assembler::shlq(Register dst) {
4763 int encode = prefixq_and_encode(dst->encoding());
4764 emit_byte(0xD3);
4765 emit_byte(0xE0 | encode);
4766 }
4768 void Assembler::shrq(Register dst, int imm8) {
4769 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4770 int encode = prefixq_and_encode(dst->encoding());
4771 emit_byte(0xC1);
4772 emit_byte(0xE8 | encode);
4773 emit_byte(imm8);
4774 }
4776 void Assembler::shrq(Register dst) {
4777 int encode = prefixq_and_encode(dst->encoding());
4778 emit_byte(0xD3);
4779 emit_byte(0xE8 | encode);
4780 }
4782 void Assembler::subq(Address dst, int32_t imm32) {
4783 InstructionMark im(this);
4784 prefixq(dst);
4785 emit_arith_operand(0x81, rbp, dst, imm32);
4786 }
4788 void Assembler::subq(Address dst, Register src) {
4789 InstructionMark im(this);
4790 prefixq(dst, src);
4791 emit_byte(0x29);
4792 emit_operand(src, dst);
4793 }
4795 void Assembler::subq(Register dst, int32_t imm32) {
4796 (void) prefixq_and_encode(dst->encoding());
4797 emit_arith(0x81, 0xE8, dst, imm32);
4798 }
4800 // Force generation of a 4 byte immediate value even if it fits into 8bit
4801 void Assembler::subq_imm32(Register dst, int32_t imm32) {
4802 (void) prefixq_and_encode(dst->encoding());
4803 emit_arith_imm32(0x81, 0xE8, dst, imm32);
4804 }
4806 void Assembler::subq(Register dst, Address src) {
4807 InstructionMark im(this);
4808 prefixq(src, dst);
4809 emit_byte(0x2B);
4810 emit_operand(dst, src);
4811 }
4813 void Assembler::subq(Register dst, Register src) {
4814 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4815 emit_arith(0x2B, 0xC0, dst, src);
4816 }
4818 void Assembler::testq(Register dst, int32_t imm32) {
4819 // not using emit_arith because test
4820 // doesn't support sign-extension of
4821 // 8bit operands
4822 int encode = dst->encoding();
4823 if (encode == 0) {
4824 prefix(REX_W);
4825 emit_byte(0xA9);
4826 } else {
4827 encode = prefixq_and_encode(encode);
4828 emit_byte(0xF7);
4829 emit_byte(0xC0 | encode);
4830 }
4831 emit_long(imm32);
4832 }
4834 void Assembler::testq(Register dst, Register src) {
4835 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4836 emit_arith(0x85, 0xC0, dst, src);
4837 }
4839 void Assembler::xaddq(Address dst, Register src) {
4840 InstructionMark im(this);
4841 prefixq(dst, src);
4842 emit_byte(0x0F);
4843 emit_byte(0xC1);
4844 emit_operand(src, dst);
4845 }
4847 void Assembler::xchgq(Register dst, Address src) {
4848 InstructionMark im(this);
4849 prefixq(src, dst);
4850 emit_byte(0x87);
4851 emit_operand(dst, src);
4852 }
4854 void Assembler::xchgq(Register dst, Register src) {
4855 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4856 emit_byte(0x87);
4857 emit_byte(0xc0 | encode);
4858 }
4860 void Assembler::xorq(Register dst, Register src) {
4861 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4862 emit_arith(0x33, 0xC0, dst, src);
4863 }
4865 void Assembler::xorq(Register dst, Address src) {
4866 InstructionMark im(this);
4867 prefixq(src, dst);
4868 emit_byte(0x33);
4869 emit_operand(dst, src);
4870 }
4872 #endif // !LP64
4874 static Assembler::Condition reverse[] = {
4875 Assembler::noOverflow /* overflow = 0x0 */ ,
4876 Assembler::overflow /* noOverflow = 0x1 */ ,
4877 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
4878 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
4879 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
4880 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
4881 Assembler::above /* belowEqual = 0x6 */ ,
4882 Assembler::belowEqual /* above = 0x7 */ ,
4883 Assembler::positive /* negative = 0x8 */ ,
4884 Assembler::negative /* positive = 0x9 */ ,
4885 Assembler::noParity /* parity = 0xa */ ,
4886 Assembler::parity /* noParity = 0xb */ ,
4887 Assembler::greaterEqual /* less = 0xc */ ,
4888 Assembler::less /* greaterEqual = 0xd */ ,
4889 Assembler::greater /* lessEqual = 0xe */ ,
4890 Assembler::lessEqual /* greater = 0xf, */
4892 };
4895 // Implementation of MacroAssembler
4897 // First all the versions that have distinct versions depending on 32/64 bit
4898 // Unless the difference is trivial (1 line or so).
4900 #ifndef _LP64
4902 // 32bit versions
4904 Address MacroAssembler::as_Address(AddressLiteral adr) {
4905 return Address(adr.target(), adr.rspec());
4906 }
4908 Address MacroAssembler::as_Address(ArrayAddress adr) {
4909 return Address::make_array(adr);
4910 }
4912 int MacroAssembler::biased_locking_enter(Register lock_reg,
4913 Register obj_reg,
4914 Register swap_reg,
4915 Register tmp_reg,
4916 bool swap_reg_contains_mark,
4917 Label& done,
4918 Label* slow_case,
4919 BiasedLockingCounters* counters) {
4920 assert(UseBiasedLocking, "why call this otherwise?");
4921 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4922 assert_different_registers(lock_reg, obj_reg, swap_reg);
4924 if (PrintBiasedLockingStatistics && counters == NULL)
4925 counters = BiasedLocking::counters();
4927 bool need_tmp_reg = false;
4928 if (tmp_reg == noreg) {
4929 need_tmp_reg = true;
4930 tmp_reg = lock_reg;
4931 } else {
4932 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4933 }
4934 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4935 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4936 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
4937 Address saved_mark_addr(lock_reg, 0);
4939 // Biased locking
4940 // See whether the lock is currently biased toward our thread and
4941 // whether the epoch is still valid
4942 // Note that the runtime guarantees sufficient alignment of JavaThread
4943 // pointers to allow age to be placed into low bits
4944 // First check to see whether biasing is even enabled for this object
4945 Label cas_label;
4946 int null_check_offset = -1;
4947 if (!swap_reg_contains_mark) {
4948 null_check_offset = offset();
4949 movl(swap_reg, mark_addr);
4950 }
4951 if (need_tmp_reg) {
4952 push(tmp_reg);
4953 }
4954 movl(tmp_reg, swap_reg);
4955 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4956 cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4957 if (need_tmp_reg) {
4958 pop(tmp_reg);
4959 }
4960 jcc(Assembler::notEqual, cas_label);
4961 // The bias pattern is present in the object's header. Need to check
4962 // whether the bias owner and the epoch are both still current.
4963 // Note that because there is no current thread register on x86 we
4964 // need to store off the mark word we read out of the object to
4965 // avoid reloading it and needing to recheck invariants below. This
4966 // store is unfortunate but it makes the overall code shorter and
4967 // simpler.
4968 movl(saved_mark_addr, swap_reg);
4969 if (need_tmp_reg) {
4970 push(tmp_reg);
4971 }
4972 get_thread(tmp_reg);
4973 xorl(swap_reg, tmp_reg);
4974 if (swap_reg_contains_mark) {
4975 null_check_offset = offset();
4976 }
4977 movl(tmp_reg, klass_addr);
4978 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
4979 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4980 if (need_tmp_reg) {
4981 pop(tmp_reg);
4982 }
4983 if (counters != NULL) {
4984 cond_inc32(Assembler::zero,
4985 ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4986 }
4987 jcc(Assembler::equal, done);
4989 Label try_revoke_bias;
4990 Label try_rebias;
4992 // At this point we know that the header has the bias pattern and
4993 // that we are not the bias owner in the current epoch. We need to
4994 // figure out more details about the state of the header in order to
4995 // know what operations can be legally performed on the object's
4996 // header.
4998 // If the low three bits in the xor result aren't clear, that means
4999 // the prototype header is no longer biased and we have to revoke
5000 // the bias on this object.
5001 testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
5002 jcc(Assembler::notZero, try_revoke_bias);
5004 // Biasing is still enabled for this data type. See whether the
5005 // epoch of the current bias is still valid, meaning that the epoch
5006 // bits of the mark word are equal to the epoch bits of the
5007 // prototype header. (Note that the prototype header's epoch bits
5008 // only change at a safepoint.) If not, attempt to rebias the object
5009 // toward the current thread. Note that we must be absolutely sure
5010 // that the current epoch is invalid in order to do this because
5011 // otherwise the manipulations it performs on the mark word are
5012 // illegal.
5013 testl(swap_reg, markOopDesc::epoch_mask_in_place);
5014 jcc(Assembler::notZero, try_rebias);
5016 // The epoch of the current bias is still valid but we know nothing
5017 // about the owner; it might be set or it might be clear. Try to
5018 // acquire the bias of the object using an atomic operation. If this
5019 // fails we will go in to the runtime to revoke the object's bias.
5020 // Note that we first construct the presumed unbiased header so we
5021 // don't accidentally blow away another thread's valid bias.
5022 movl(swap_reg, saved_mark_addr);
5023 andl(swap_reg,
5024 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5025 if (need_tmp_reg) {
5026 push(tmp_reg);
5027 }
5028 get_thread(tmp_reg);
5029 orl(tmp_reg, swap_reg);
5030 if (os::is_MP()) {
5031 lock();
5032 }
5033 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
5034 if (need_tmp_reg) {
5035 pop(tmp_reg);
5036 }
5037 // If the biasing toward our thread failed, this means that
5038 // another thread succeeded in biasing it toward itself and we
5039 // need to revoke that bias. The revocation will occur in the
5040 // interpreter runtime in the slow case.
5041 if (counters != NULL) {
5042 cond_inc32(Assembler::zero,
5043 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
5044 }
5045 if (slow_case != NULL) {
5046 jcc(Assembler::notZero, *slow_case);
5047 }
5048 jmp(done);
5050 bind(try_rebias);
5051 // At this point we know the epoch has expired, meaning that the
5052 // current "bias owner", if any, is actually invalid. Under these
5053 // circumstances _only_, we are allowed to use the current header's
5054 // value as the comparison value when doing the cas to acquire the
5055 // bias in the current epoch. In other words, we allow transfer of
5056 // the bias from one thread to another directly in this situation.
5057 //
5058 // FIXME: due to a lack of registers we currently blow away the age
5059 // bits in this situation. Should attempt to preserve them.
5060 if (need_tmp_reg) {
5061 push(tmp_reg);
5062 }
5063 get_thread(tmp_reg);
5064 movl(swap_reg, klass_addr);
5065 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
5066 movl(swap_reg, saved_mark_addr);
5067 if (os::is_MP()) {
5068 lock();
5069 }
5070 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
5071 if (need_tmp_reg) {
5072 pop(tmp_reg);
5073 }
5074 // If the biasing toward our thread failed, then another thread
5075 // succeeded in biasing it toward itself and we need to revoke that
5076 // bias. The revocation will occur in the runtime in the slow case.
5077 if (counters != NULL) {
5078 cond_inc32(Assembler::zero,
5079 ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
5080 }
5081 if (slow_case != NULL) {
5082 jcc(Assembler::notZero, *slow_case);
5083 }
5084 jmp(done);
5086 bind(try_revoke_bias);
5087 // The prototype mark in the klass doesn't have the bias bit set any
5088 // more, indicating that objects of this data type are not supposed
5089 // to be biased any more. We are going to try to reset the mark of
5090 // this object to the prototype value and fall through to the
5091 // CAS-based locking scheme. Note that if our CAS fails, it means
5092 // that another thread raced us for the privilege of revoking the
5093 // bias of this particular object, so it's okay to continue in the
5094 // normal locking code.
5095 //
5096 // FIXME: due to a lack of registers we currently blow away the age
5097 // bits in this situation. Should attempt to preserve them.
5098 movl(swap_reg, saved_mark_addr);
5099 if (need_tmp_reg) {
5100 push(tmp_reg);
5101 }
5102 movl(tmp_reg, klass_addr);
5103 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
5104 if (os::is_MP()) {
5105 lock();
5106 }
5107 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
5108 if (need_tmp_reg) {
5109 pop(tmp_reg);
5110 }
5111 // Fall through to the normal CAS-based lock, because no matter what
5112 // the result of the above CAS, some thread must have succeeded in
5113 // removing the bias bit from the object's header.
5114 if (counters != NULL) {
5115 cond_inc32(Assembler::zero,
5116 ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
5117 }
5119 bind(cas_label);
5121 return null_check_offset;
5122 }
5123 void MacroAssembler::call_VM_leaf_base(address entry_point,
5124 int number_of_arguments) {
5125 call(RuntimeAddress(entry_point));
5126 increment(rsp, number_of_arguments * wordSize);
5127 }
5129 void MacroAssembler::cmpoop(Address src1, jobject obj) {
5130 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
5131 }
5133 void MacroAssembler::cmpoop(Register src1, jobject obj) {
5134 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
5135 }
5137 void MacroAssembler::extend_sign(Register hi, Register lo) {
5138 // According to Intel Doc. AP-526, "Integer Divide", p.18.
5139 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
5140 cdql();
5141 } else {
5142 movl(hi, lo);
5143 sarl(hi, 31);
5144 }
5145 }
5147 void MacroAssembler::jC2(Register tmp, Label& L) {
5148 // set parity bit if FPU flag C2 is set (via rax)
5149 save_rax(tmp);
5150 fwait(); fnstsw_ax();
5151 sahf();
5152 restore_rax(tmp);
5153 // branch
5154 jcc(Assembler::parity, L);
5155 }
5157 void MacroAssembler::jnC2(Register tmp, Label& L) {
5158 // set parity bit if FPU flag C2 is set (via rax)
5159 save_rax(tmp);
5160 fwait(); fnstsw_ax();
5161 sahf();
5162 restore_rax(tmp);
5163 // branch
5164 jcc(Assembler::noParity, L);
5165 }
5167 // 32bit can do a case table jump in one instruction but we no longer allow the base
5168 // to be installed in the Address class
5169 void MacroAssembler::jump(ArrayAddress entry) {
5170 jmp(as_Address(entry));
5171 }
5173 // Note: y_lo will be destroyed
5174 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5175 // Long compare for Java (semantics as described in JVM spec.)
5176 Label high, low, done;
5178 cmpl(x_hi, y_hi);
5179 jcc(Assembler::less, low);
5180 jcc(Assembler::greater, high);
5181 // x_hi is the return register
5182 xorl(x_hi, x_hi);
5183 cmpl(x_lo, y_lo);
5184 jcc(Assembler::below, low);
5185 jcc(Assembler::equal, done);
5187 bind(high);
5188 xorl(x_hi, x_hi);
5189 increment(x_hi);
5190 jmp(done);
5192 bind(low);
5193 xorl(x_hi, x_hi);
5194 decrementl(x_hi);
5196 bind(done);
5197 }
5199 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5200 mov_literal32(dst, (int32_t)src.target(), src.rspec());
5201 }
5203 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5204 // leal(dst, as_Address(adr));
5205 // see note in movl as to why we must use a move
5206 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
5207 }
5209 void MacroAssembler::leave() {
5210 mov(rsp, rbp);
5211 pop(rbp);
5212 }
5214 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
5215 // Multiplication of two Java long values stored on the stack
5216 // as illustrated below. Result is in rdx:rax.
5217 //
5218 // rsp ---> [ ?? ] \ \
5219 // .... | y_rsp_offset |
5220 // [ y_lo ] / (in bytes) | x_rsp_offset
5221 // [ y_hi ] | (in bytes)
5222 // .... |
5223 // [ x_lo ] /
5224 // [ x_hi ]
5225 // ....
5226 //
5227 // Basic idea: lo(result) = lo(x_lo * y_lo)
5228 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
5229 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
5230 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
5231 Label quick;
5232 // load x_hi, y_hi and check if quick
5233 // multiplication is possible
5234 movl(rbx, x_hi);
5235 movl(rcx, y_hi);
5236 movl(rax, rbx);
5237 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
5238 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
5239 // do full multiplication
5240 // 1st step
5241 mull(y_lo); // x_hi * y_lo
5242 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
5243 // 2nd step
5244 movl(rax, x_lo);
5245 mull(rcx); // x_lo * y_hi
5246 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
5247 // 3rd step
5248 bind(quick); // note: rbx, = 0 if quick multiply!
5249 movl(rax, x_lo);
5250 mull(y_lo); // x_lo * y_lo
5251 addl(rdx, rbx); // correct hi(x_lo * y_lo)
5252 }
5254 void MacroAssembler::lneg(Register hi, Register lo) {
5255 negl(lo);
5256 adcl(hi, 0);
5257 negl(hi);
5258 }
5260 void MacroAssembler::lshl(Register hi, Register lo) {
5261 // Java shift left long support (semantics as described in JVM spec., p.305)
5262 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
5263 // shift value is in rcx !
5264 assert(hi != rcx, "must not use rcx");
5265 assert(lo != rcx, "must not use rcx");
5266 const Register s = rcx; // shift count
5267 const int n = BitsPerWord;
5268 Label L;
5269 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
5270 cmpl(s, n); // if (s < n)
5271 jcc(Assembler::less, L); // else (s >= n)
5272 movl(hi, lo); // x := x << n
5273 xorl(lo, lo);
5274 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
5275 bind(L); // s (mod n) < n
5276 shldl(hi, lo); // x := x << s
5277 shll(lo);
5278 }
5281 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
5282 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
5283 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
5284 assert(hi != rcx, "must not use rcx");
5285 assert(lo != rcx, "must not use rcx");
5286 const Register s = rcx; // shift count
5287 const int n = BitsPerWord;
5288 Label L;
5289 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
5290 cmpl(s, n); // if (s < n)
5291 jcc(Assembler::less, L); // else (s >= n)
5292 movl(lo, hi); // x := x >> n
5293 if (sign_extension) sarl(hi, 31);
5294 else xorl(hi, hi);
5295 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
5296 bind(L); // s (mod n) < n
5297 shrdl(lo, hi); // x := x >> s
5298 if (sign_extension) sarl(hi);
5299 else shrl(hi);
5300 }
5302 void MacroAssembler::movoop(Register dst, jobject obj) {
5303 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
5304 }
5306 void MacroAssembler::movoop(Address dst, jobject obj) {
5307 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
5308 }
5310 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5311 if (src.is_lval()) {
5312 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
5313 } else {
5314 movl(dst, as_Address(src));
5315 }
5316 }
5318 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5319 movl(as_Address(dst), src);
5320 }
5322 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5323 movl(dst, as_Address(src));
5324 }
5326 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5327 void MacroAssembler::movptr(Address dst, intptr_t src) {
5328 movl(dst, src);
5329 }
5332 void MacroAssembler::pop_callee_saved_registers() {
5333 pop(rcx);
5334 pop(rdx);
5335 pop(rdi);
5336 pop(rsi);
5337 }
5339 void MacroAssembler::pop_fTOS() {
5340 fld_d(Address(rsp, 0));
5341 addl(rsp, 2 * wordSize);
5342 }
5344 void MacroAssembler::push_callee_saved_registers() {
5345 push(rsi);
5346 push(rdi);
5347 push(rdx);
5348 push(rcx);
5349 }
5351 void MacroAssembler::push_fTOS() {
5352 subl(rsp, 2 * wordSize);
5353 fstp_d(Address(rsp, 0));
5354 }
5357 void MacroAssembler::pushoop(jobject obj) {
5358 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
5359 }
5362 void MacroAssembler::pushptr(AddressLiteral src) {
5363 if (src.is_lval()) {
5364 push_literal32((int32_t)src.target(), src.rspec());
5365 } else {
5366 pushl(as_Address(src));
5367 }
5368 }
5370 void MacroAssembler::set_word_if_not_zero(Register dst) {
5371 xorl(dst, dst);
5372 set_byte_if_not_zero(dst);
5373 }
5375 static void pass_arg0(MacroAssembler* masm, Register arg) {
5376 masm->push(arg);
5377 }
5379 static void pass_arg1(MacroAssembler* masm, Register arg) {
5380 masm->push(arg);
5381 }
5383 static void pass_arg2(MacroAssembler* masm, Register arg) {
5384 masm->push(arg);
5385 }
5387 static void pass_arg3(MacroAssembler* masm, Register arg) {
5388 masm->push(arg);
5389 }
5391 #ifndef PRODUCT
5392 extern "C" void findpc(intptr_t x);
5393 #endif
5395 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
5396 // In order to get locks to work, we need to fake a in_VM state
5397 JavaThread* thread = JavaThread::current();
5398 JavaThreadState saved_state = thread->thread_state();
5399 thread->set_thread_state(_thread_in_vm);
5400 if (ShowMessageBoxOnError) {
5401 JavaThread* thread = JavaThread::current();
5402 JavaThreadState saved_state = thread->thread_state();
5403 thread->set_thread_state(_thread_in_vm);
5404 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5405 ttyLocker ttyl;
5406 BytecodeCounter::print();
5407 }
5408 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5409 // This is the value of eip which points to where verify_oop will return.
5410 if (os::message_box(msg, "Execution stopped, print registers?")) {
5411 ttyLocker ttyl;
5412 tty->print_cr("eip = 0x%08x", eip);
5413 #ifndef PRODUCT
5414 if ((WizardMode || Verbose) && PrintMiscellaneous) {
5415 tty->cr();
5416 findpc(eip);
5417 tty->cr();
5418 }
5419 #endif
5420 tty->print_cr("rax = 0x%08x", rax);
5421 tty->print_cr("rbx = 0x%08x", rbx);
5422 tty->print_cr("rcx = 0x%08x", rcx);
5423 tty->print_cr("rdx = 0x%08x", rdx);
5424 tty->print_cr("rdi = 0x%08x", rdi);
5425 tty->print_cr("rsi = 0x%08x", rsi);
5426 tty->print_cr("rbp = 0x%08x", rbp);
5427 tty->print_cr("rsp = 0x%08x", rsp);
5428 BREAKPOINT;
5429 assert(false, "start up GDB");
5430 }
5431 } else {
5432 ttyLocker ttyl;
5433 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5434 assert(false, err_msg("DEBUG MESSAGE: %s", msg));
5435 }
5436 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5437 }
5439 void MacroAssembler::stop(const char* msg) {
5440 ExternalAddress message((address)msg);
5441 // push address of message
5442 pushptr(message.addr());
5443 { Label L; call(L, relocInfo::none); bind(L); } // push eip
5444 pusha(); // push registers
5445 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5446 hlt();
5447 }
5449 void MacroAssembler::warn(const char* msg) {
5450 push_CPU_state();
5452 ExternalAddress message((address) msg);
5453 // push address of message
5454 pushptr(message.addr());
5456 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5457 addl(rsp, wordSize); // discard argument
5458 pop_CPU_state();
5459 }
5461 #else // _LP64
5463 // 64 bit versions
5465 Address MacroAssembler::as_Address(AddressLiteral adr) {
5466 // amd64 always does this as a pc-rel
5467 // we can be absolute or disp based on the instruction type
5468 // jmp/call are displacements others are absolute
5469 assert(!adr.is_lval(), "must be rval");
5470 assert(reachable(adr), "must be");
5471 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5473 }
5475 Address MacroAssembler::as_Address(ArrayAddress adr) {
5476 AddressLiteral base = adr.base();
5477 lea(rscratch1, base);
5478 Address index = adr.index();
5479 assert(index._disp == 0, "must not have disp"); // maybe it can?
5480 Address array(rscratch1, index._index, index._scale, index._disp);
5481 return array;
5482 }
5484 int MacroAssembler::biased_locking_enter(Register lock_reg,
5485 Register obj_reg,
5486 Register swap_reg,
5487 Register tmp_reg,
5488 bool swap_reg_contains_mark,
5489 Label& done,
5490 Label* slow_case,
5491 BiasedLockingCounters* counters) {
5492 assert(UseBiasedLocking, "why call this otherwise?");
5493 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5494 assert(tmp_reg != noreg, "tmp_reg must be supplied");
5495 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5496 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5497 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
5498 Address saved_mark_addr(lock_reg, 0);
5500 if (PrintBiasedLockingStatistics && counters == NULL)
5501 counters = BiasedLocking::counters();
5503 // Biased locking
5504 // See whether the lock is currently biased toward our thread and
5505 // whether the epoch is still valid
5506 // Note that the runtime guarantees sufficient alignment of JavaThread
5507 // pointers to allow age to be placed into low bits
5508 // First check to see whether biasing is even enabled for this object
5509 Label cas_label;
5510 int null_check_offset = -1;
5511 if (!swap_reg_contains_mark) {
5512 null_check_offset = offset();
5513 movq(swap_reg, mark_addr);
5514 }
5515 movq(tmp_reg, swap_reg);
5516 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5517 cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5518 jcc(Assembler::notEqual, cas_label);
5519 // The bias pattern is present in the object's header. Need to check
5520 // whether the bias owner and the epoch are both still current.
5521 load_prototype_header(tmp_reg, obj_reg);
5522 orq(tmp_reg, r15_thread);
5523 xorq(tmp_reg, swap_reg);
5524 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5525 if (counters != NULL) {
5526 cond_inc32(Assembler::zero,
5527 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5528 }
5529 jcc(Assembler::equal, done);
5531 Label try_revoke_bias;
5532 Label try_rebias;
5534 // At this point we know that the header has the bias pattern and
5535 // that we are not the bias owner in the current epoch. We need to
5536 // figure out more details about the state of the header in order to
5537 // know what operations can be legally performed on the object's
5538 // header.
5540 // If the low three bits in the xor result aren't clear, that means
5541 // the prototype header is no longer biased and we have to revoke
5542 // the bias on this object.
5543 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5544 jcc(Assembler::notZero, try_revoke_bias);
5546 // Biasing is still enabled for this data type. See whether the
5547 // epoch of the current bias is still valid, meaning that the epoch
5548 // bits of the mark word are equal to the epoch bits of the
5549 // prototype header. (Note that the prototype header's epoch bits
5550 // only change at a safepoint.) If not, attempt to rebias the object
5551 // toward the current thread. Note that we must be absolutely sure
5552 // that the current epoch is invalid in order to do this because
5553 // otherwise the manipulations it performs on the mark word are
5554 // illegal.
5555 testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5556 jcc(Assembler::notZero, try_rebias);
5558 // The epoch of the current bias is still valid but we know nothing
5559 // about the owner; it might be set or it might be clear. Try to
5560 // acquire the bias of the object using an atomic operation. If this
5561 // fails we will go in to the runtime to revoke the object's bias.
5562 // Note that we first construct the presumed unbiased header so we
5563 // don't accidentally blow away another thread's valid bias.
5564 andq(swap_reg,
5565 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5566 movq(tmp_reg, swap_reg);
5567 orq(tmp_reg, r15_thread);
5568 if (os::is_MP()) {
5569 lock();
5570 }
5571 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5572 // If the biasing toward our thread failed, this means that
5573 // another thread succeeded in biasing it toward itself and we
5574 // need to revoke that bias. The revocation will occur in the
5575 // interpreter runtime in the slow case.
5576 if (counters != NULL) {
5577 cond_inc32(Assembler::zero,
5578 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5579 }
5580 if (slow_case != NULL) {
5581 jcc(Assembler::notZero, *slow_case);
5582 }
5583 jmp(done);
5585 bind(try_rebias);
5586 // At this point we know the epoch has expired, meaning that the
5587 // current "bias owner", if any, is actually invalid. Under these
5588 // circumstances _only_, we are allowed to use the current header's
5589 // value as the comparison value when doing the cas to acquire the
5590 // bias in the current epoch. In other words, we allow transfer of
5591 // the bias from one thread to another directly in this situation.
5592 //
5593 // FIXME: due to a lack of registers we currently blow away the age
5594 // bits in this situation. Should attempt to preserve them.
5595 load_prototype_header(tmp_reg, obj_reg);
5596 orq(tmp_reg, r15_thread);
5597 if (os::is_MP()) {
5598 lock();
5599 }
5600 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5601 // If the biasing toward our thread failed, then another thread
5602 // succeeded in biasing it toward itself and we need to revoke that
5603 // bias. The revocation will occur in the runtime in the slow case.
5604 if (counters != NULL) {
5605 cond_inc32(Assembler::zero,
5606 ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5607 }
5608 if (slow_case != NULL) {
5609 jcc(Assembler::notZero, *slow_case);
5610 }
5611 jmp(done);
5613 bind(try_revoke_bias);
5614 // The prototype mark in the klass doesn't have the bias bit set any
5615 // more, indicating that objects of this data type are not supposed
5616 // to be biased any more. We are going to try to reset the mark of
5617 // this object to the prototype value and fall through to the
5618 // CAS-based locking scheme. Note that if our CAS fails, it means
5619 // that another thread raced us for the privilege of revoking the
5620 // bias of this particular object, so it's okay to continue in the
5621 // normal locking code.
5622 //
5623 // FIXME: due to a lack of registers we currently blow away the age
5624 // bits in this situation. Should attempt to preserve them.
5625 load_prototype_header(tmp_reg, obj_reg);
5626 if (os::is_MP()) {
5627 lock();
5628 }
5629 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5630 // Fall through to the normal CAS-based lock, because no matter what
5631 // the result of the above CAS, some thread must have succeeded in
5632 // removing the bias bit from the object's header.
5633 if (counters != NULL) {
5634 cond_inc32(Assembler::zero,
5635 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5636 }
5638 bind(cas_label);
5640 return null_check_offset;
5641 }
5643 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5644 Label L, E;
5646 #ifdef _WIN64
5647 // Windows always allocates space for it's register args
5648 assert(num_args <= 4, "only register arguments supported");
5649 subq(rsp, frame::arg_reg_save_area_bytes);
5650 #endif
5652 // Align stack if necessary
5653 testl(rsp, 15);
5654 jcc(Assembler::zero, L);
5656 subq(rsp, 8);
5657 {
5658 call(RuntimeAddress(entry_point));
5659 }
5660 addq(rsp, 8);
5661 jmp(E);
5663 bind(L);
5664 {
5665 call(RuntimeAddress(entry_point));
5666 }
5668 bind(E);
5670 #ifdef _WIN64
5671 // restore stack pointer
5672 addq(rsp, frame::arg_reg_save_area_bytes);
5673 #endif
5675 }
5677 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5678 assert(!src2.is_lval(), "should use cmpptr");
5680 if (reachable(src2)) {
5681 cmpq(src1, as_Address(src2));
5682 } else {
5683 lea(rscratch1, src2);
5684 Assembler::cmpq(src1, Address(rscratch1, 0));
5685 }
5686 }
5688 int MacroAssembler::corrected_idivq(Register reg) {
5689 // Full implementation of Java ldiv and lrem; checks for special
5690 // case as described in JVM spec., p.243 & p.271. The function
5691 // returns the (pc) offset of the idivl instruction - may be needed
5692 // for implicit exceptions.
5693 //
5694 // normal case special case
5695 //
5696 // input : rax: dividend min_long
5697 // reg: divisor (may not be eax/edx) -1
5698 //
5699 // output: rax: quotient (= rax idiv reg) min_long
5700 // rdx: remainder (= rax irem reg) 0
5701 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5702 static const int64_t min_long = 0x8000000000000000;
5703 Label normal_case, special_case;
5705 // check for special case
5706 cmp64(rax, ExternalAddress((address) &min_long));
5707 jcc(Assembler::notEqual, normal_case);
5708 xorl(rdx, rdx); // prepare rdx for possible special case (where
5709 // remainder = 0)
5710 cmpq(reg, -1);
5711 jcc(Assembler::equal, special_case);
5713 // handle normal case
5714 bind(normal_case);
5715 cdqq();
5716 int idivq_offset = offset();
5717 idivq(reg);
5719 // normal and special case exit
5720 bind(special_case);
5722 return idivq_offset;
5723 }
5725 void MacroAssembler::decrementq(Register reg, int value) {
5726 if (value == min_jint) { subq(reg, value); return; }
5727 if (value < 0) { incrementq(reg, -value); return; }
5728 if (value == 0) { ; return; }
5729 if (value == 1 && UseIncDec) { decq(reg) ; return; }
5730 /* else */ { subq(reg, value) ; return; }
5731 }
5733 void MacroAssembler::decrementq(Address dst, int value) {
5734 if (value == min_jint) { subq(dst, value); return; }
5735 if (value < 0) { incrementq(dst, -value); return; }
5736 if (value == 0) { ; return; }
5737 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5738 /* else */ { subq(dst, value) ; return; }
5739 }
5741 void MacroAssembler::incrementq(Register reg, int value) {
5742 if (value == min_jint) { addq(reg, value); return; }
5743 if (value < 0) { decrementq(reg, -value); return; }
5744 if (value == 0) { ; return; }
5745 if (value == 1 && UseIncDec) { incq(reg) ; return; }
5746 /* else */ { addq(reg, value) ; return; }
5747 }
5749 void MacroAssembler::incrementq(Address dst, int value) {
5750 if (value == min_jint) { addq(dst, value); return; }
5751 if (value < 0) { decrementq(dst, -value); return; }
5752 if (value == 0) { ; return; }
5753 if (value == 1 && UseIncDec) { incq(dst) ; return; }
5754 /* else */ { addq(dst, value) ; return; }
5755 }
5757 // 32bit can do a case table jump in one instruction but we no longer allow the base
5758 // to be installed in the Address class
5759 void MacroAssembler::jump(ArrayAddress entry) {
5760 lea(rscratch1, entry.base());
5761 Address dispatch = entry.index();
5762 assert(dispatch._base == noreg, "must be");
5763 dispatch._base = rscratch1;
5764 jmp(dispatch);
5765 }
5767 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5768 ShouldNotReachHere(); // 64bit doesn't use two regs
5769 cmpq(x_lo, y_lo);
5770 }
5772 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5773 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5774 }
5776 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5777 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5778 movptr(dst, rscratch1);
5779 }
5781 void MacroAssembler::leave() {
5782 // %%% is this really better? Why not on 32bit too?
5783 emit_byte(0xC9); // LEAVE
5784 }
5786 void MacroAssembler::lneg(Register hi, Register lo) {
5787 ShouldNotReachHere(); // 64bit doesn't use two regs
5788 negq(lo);
5789 }
5791 void MacroAssembler::movoop(Register dst, jobject obj) {
5792 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5793 }
5795 void MacroAssembler::movoop(Address dst, jobject obj) {
5796 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5797 movq(dst, rscratch1);
5798 }
5800 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5801 if (src.is_lval()) {
5802 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5803 } else {
5804 if (reachable(src)) {
5805 movq(dst, as_Address(src));
5806 } else {
5807 lea(rscratch1, src);
5808 movq(dst, Address(rscratch1,0));
5809 }
5810 }
5811 }
5813 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5814 movq(as_Address(dst), src);
5815 }
5817 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5818 movq(dst, as_Address(src));
5819 }
5821 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5822 void MacroAssembler::movptr(Address dst, intptr_t src) {
5823 mov64(rscratch1, src);
5824 movq(dst, rscratch1);
5825 }
5827 // These are mostly for initializing NULL
5828 void MacroAssembler::movptr(Address dst, int32_t src) {
5829 movslq(dst, src);
5830 }
5832 void MacroAssembler::movptr(Register dst, int32_t src) {
5833 mov64(dst, (intptr_t)src);
5834 }
5836 void MacroAssembler::pushoop(jobject obj) {
5837 movoop(rscratch1, obj);
5838 push(rscratch1);
5839 }
5841 void MacroAssembler::pushptr(AddressLiteral src) {
5842 lea(rscratch1, src);
5843 if (src.is_lval()) {
5844 push(rscratch1);
5845 } else {
5846 pushq(Address(rscratch1, 0));
5847 }
5848 }
5850 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5851 bool clear_pc) {
5852 // we must set sp to zero to clear frame
5853 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5854 // must clear fp, so that compiled frames are not confused; it is
5855 // possible that we need it only for debugging
5856 if (clear_fp) {
5857 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5858 }
5860 if (clear_pc) {
5861 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5862 }
5863 }
5865 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5866 Register last_java_fp,
5867 address last_java_pc) {
5868 // determine last_java_sp register
5869 if (!last_java_sp->is_valid()) {
5870 last_java_sp = rsp;
5871 }
5873 // last_java_fp is optional
5874 if (last_java_fp->is_valid()) {
5875 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5876 last_java_fp);
5877 }
5879 // last_java_pc is optional
5880 if (last_java_pc != NULL) {
5881 Address java_pc(r15_thread,
5882 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5883 lea(rscratch1, InternalAddress(last_java_pc));
5884 movptr(java_pc, rscratch1);
5885 }
5887 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5888 }
5890 static void pass_arg0(MacroAssembler* masm, Register arg) {
5891 if (c_rarg0 != arg ) {
5892 masm->mov(c_rarg0, arg);
5893 }
5894 }
5896 static void pass_arg1(MacroAssembler* masm, Register arg) {
5897 if (c_rarg1 != arg ) {
5898 masm->mov(c_rarg1, arg);
5899 }
5900 }
5902 static void pass_arg2(MacroAssembler* masm, Register arg) {
5903 if (c_rarg2 != arg ) {
5904 masm->mov(c_rarg2, arg);
5905 }
5906 }
5908 static void pass_arg3(MacroAssembler* masm, Register arg) {
5909 if (c_rarg3 != arg ) {
5910 masm->mov(c_rarg3, arg);
5911 }
5912 }
5914 void MacroAssembler::stop(const char* msg) {
5915 address rip = pc();
5916 pusha(); // get regs on stack
5917 lea(c_rarg0, ExternalAddress((address) msg));
5918 lea(c_rarg1, InternalAddress(rip));
5919 movq(c_rarg2, rsp); // pass pointer to regs array
5920 andq(rsp, -16); // align stack as required by ABI
5921 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5922 hlt();
5923 }
5925 void MacroAssembler::warn(const char* msg) {
5926 push(rsp);
5927 andq(rsp, -16); // align stack as required by push_CPU_state and call
5929 push_CPU_state(); // keeps alignment at 16 bytes
5930 lea(c_rarg0, ExternalAddress((address) msg));
5931 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5932 pop_CPU_state();
5933 pop(rsp);
5934 }
5936 #ifndef PRODUCT
5937 extern "C" void findpc(intptr_t x);
5938 #endif
5940 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5941 // In order to get locks to work, we need to fake a in_VM state
5942 if (ShowMessageBoxOnError ) {
5943 JavaThread* thread = JavaThread::current();
5944 JavaThreadState saved_state = thread->thread_state();
5945 thread->set_thread_state(_thread_in_vm);
5946 #ifndef PRODUCT
5947 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5948 ttyLocker ttyl;
5949 BytecodeCounter::print();
5950 }
5951 #endif
5952 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5953 // XXX correct this offset for amd64
5954 // This is the value of eip which points to where verify_oop will return.
5955 if (os::message_box(msg, "Execution stopped, print registers?")) {
5956 ttyLocker ttyl;
5957 tty->print_cr("rip = 0x%016lx", pc);
5958 #ifndef PRODUCT
5959 tty->cr();
5960 findpc(pc);
5961 tty->cr();
5962 #endif
5963 tty->print_cr("rax = 0x%016lx", regs[15]);
5964 tty->print_cr("rbx = 0x%016lx", regs[12]);
5965 tty->print_cr("rcx = 0x%016lx", regs[14]);
5966 tty->print_cr("rdx = 0x%016lx", regs[13]);
5967 tty->print_cr("rdi = 0x%016lx", regs[8]);
5968 tty->print_cr("rsi = 0x%016lx", regs[9]);
5969 tty->print_cr("rbp = 0x%016lx", regs[10]);
5970 tty->print_cr("rsp = 0x%016lx", regs[11]);
5971 tty->print_cr("r8 = 0x%016lx", regs[7]);
5972 tty->print_cr("r9 = 0x%016lx", regs[6]);
5973 tty->print_cr("r10 = 0x%016lx", regs[5]);
5974 tty->print_cr("r11 = 0x%016lx", regs[4]);
5975 tty->print_cr("r12 = 0x%016lx", regs[3]);
5976 tty->print_cr("r13 = 0x%016lx", regs[2]);
5977 tty->print_cr("r14 = 0x%016lx", regs[1]);
5978 tty->print_cr("r15 = 0x%016lx", regs[0]);
5979 BREAKPOINT;
5980 }
5981 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5982 } else {
5983 ttyLocker ttyl;
5984 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5985 msg);
5986 assert(false, err_msg("DEBUG MESSAGE: %s", msg));
5987 }
5988 }
5990 #endif // _LP64
5992 // Now versions that are common to 32/64 bit
5994 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5995 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5996 }
5998 void MacroAssembler::addptr(Register dst, Register src) {
5999 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
6000 }
6002 void MacroAssembler::addptr(Address dst, Register src) {
6003 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
6004 }
6006 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
6007 if (reachable(src)) {
6008 Assembler::addsd(dst, as_Address(src));
6009 } else {
6010 lea(rscratch1, src);
6011 Assembler::addsd(dst, Address(rscratch1, 0));
6012 }
6013 }
6015 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
6016 if (reachable(src)) {
6017 addss(dst, as_Address(src));
6018 } else {
6019 lea(rscratch1, src);
6020 addss(dst, Address(rscratch1, 0));
6021 }
6022 }
6024 void MacroAssembler::align(int modulus) {
6025 if (offset() % modulus != 0) {
6026 nop(modulus - (offset() % modulus));
6027 }
6028 }
6030 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
6031 // Used in sign-masking with aligned address.
6032 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
6033 if (reachable(src)) {
6034 Assembler::andpd(dst, as_Address(src));
6035 } else {
6036 lea(rscratch1, src);
6037 Assembler::andpd(dst, Address(rscratch1, 0));
6038 }
6039 }
6041 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
6042 // Used in sign-masking with aligned address.
6043 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
6044 if (reachable(src)) {
6045 Assembler::andps(dst, as_Address(src));
6046 } else {
6047 lea(rscratch1, src);
6048 Assembler::andps(dst, Address(rscratch1, 0));
6049 }
6050 }
6052 void MacroAssembler::andptr(Register dst, int32_t imm32) {
6053 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
6054 }
6056 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
6057 pushf();
6058 if (os::is_MP())
6059 lock();
6060 incrementl(counter_addr);
6061 popf();
6062 }
6064 // Writes to stack successive pages until offset reached to check for
6065 // stack overflow + shadow pages. This clobbers tmp.
6066 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
6067 movptr(tmp, rsp);
6068 // Bang stack for total size given plus shadow page size.
6069 // Bang one page at a time because large size can bang beyond yellow and
6070 // red zones.
6071 Label loop;
6072 bind(loop);
6073 movl(Address(tmp, (-os::vm_page_size())), size );
6074 subptr(tmp, os::vm_page_size());
6075 subl(size, os::vm_page_size());
6076 jcc(Assembler::greater, loop);
6078 // Bang down shadow pages too.
6079 // The -1 because we already subtracted 1 page.
6080 for (int i = 0; i< StackShadowPages-1; i++) {
6081 // this could be any sized move but this is can be a debugging crumb
6082 // so the bigger the better.
6083 movptr(Address(tmp, (-i*os::vm_page_size())), size );
6084 }
6085 }
6087 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
6088 assert(UseBiasedLocking, "why call this otherwise?");
6090 // Check for biased locking unlock case, which is a no-op
6091 // Note: we do not have to check the thread ID for two reasons.
6092 // First, the interpreter checks for IllegalMonitorStateException at
6093 // a higher level. Second, if the bias was revoked while we held the
6094 // lock, the object could not be rebiased toward another thread, so
6095 // the bias bit would be clear.
6096 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
6097 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
6098 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
6099 jcc(Assembler::equal, done);
6100 }
6102 void MacroAssembler::c2bool(Register x) {
6103 // implements x == 0 ? 0 : 1
6104 // note: must only look at least-significant byte of x
6105 // since C-style booleans are stored in one byte
6106 // only! (was bug)
6107 andl(x, 0xFF);
6108 setb(Assembler::notZero, x);
6109 }
6111 // Wouldn't need if AddressLiteral version had new name
6112 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
6113 Assembler::call(L, rtype);
6114 }
6116 void MacroAssembler::call(Register entry) {
6117 Assembler::call(entry);
6118 }
6120 void MacroAssembler::call(AddressLiteral entry) {
6121 if (reachable(entry)) {
6122 Assembler::call_literal(entry.target(), entry.rspec());
6123 } else {
6124 lea(rscratch1, entry);
6125 Assembler::call(rscratch1);
6126 }
6127 }
6129 // Implementation of call_VM versions
6131 void MacroAssembler::call_VM(Register oop_result,
6132 address entry_point,
6133 bool check_exceptions) {
6134 Label C, E;
6135 call(C, relocInfo::none);
6136 jmp(E);
6138 bind(C);
6139 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
6140 ret(0);
6142 bind(E);
6143 }
6145 void MacroAssembler::call_VM(Register oop_result,
6146 address entry_point,
6147 Register arg_1,
6148 bool check_exceptions) {
6149 Label C, E;
6150 call(C, relocInfo::none);
6151 jmp(E);
6153 bind(C);
6154 pass_arg1(this, arg_1);
6155 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
6156 ret(0);
6158 bind(E);
6159 }
6161 void MacroAssembler::call_VM(Register oop_result,
6162 address entry_point,
6163 Register arg_1,
6164 Register arg_2,
6165 bool check_exceptions) {
6166 Label C, E;
6167 call(C, relocInfo::none);
6168 jmp(E);
6170 bind(C);
6172 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6174 pass_arg2(this, arg_2);
6175 pass_arg1(this, arg_1);
6176 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
6177 ret(0);
6179 bind(E);
6180 }
6182 void MacroAssembler::call_VM(Register oop_result,
6183 address entry_point,
6184 Register arg_1,
6185 Register arg_2,
6186 Register arg_3,
6187 bool check_exceptions) {
6188 Label C, E;
6189 call(C, relocInfo::none);
6190 jmp(E);
6192 bind(C);
6194 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
6195 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
6196 pass_arg3(this, arg_3);
6198 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6199 pass_arg2(this, arg_2);
6201 pass_arg1(this, arg_1);
6202 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
6203 ret(0);
6205 bind(E);
6206 }
6208 void MacroAssembler::call_VM(Register oop_result,
6209 Register last_java_sp,
6210 address entry_point,
6211 int number_of_arguments,
6212 bool check_exceptions) {
6213 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
6214 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
6215 }
6217 void MacroAssembler::call_VM(Register oop_result,
6218 Register last_java_sp,
6219 address entry_point,
6220 Register arg_1,
6221 bool check_exceptions) {
6222 pass_arg1(this, arg_1);
6223 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
6224 }
6226 void MacroAssembler::call_VM(Register oop_result,
6227 Register last_java_sp,
6228 address entry_point,
6229 Register arg_1,
6230 Register arg_2,
6231 bool check_exceptions) {
6233 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6234 pass_arg2(this, arg_2);
6235 pass_arg1(this, arg_1);
6236 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
6237 }
6239 void MacroAssembler::call_VM(Register oop_result,
6240 Register last_java_sp,
6241 address entry_point,
6242 Register arg_1,
6243 Register arg_2,
6244 Register arg_3,
6245 bool check_exceptions) {
6246 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
6247 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
6248 pass_arg3(this, arg_3);
6249 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6250 pass_arg2(this, arg_2);
6251 pass_arg1(this, arg_1);
6252 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
6253 }
6255 void MacroAssembler::super_call_VM(Register oop_result,
6256 Register last_java_sp,
6257 address entry_point,
6258 int number_of_arguments,
6259 bool check_exceptions) {
6260 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
6261 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
6262 }
6264 void MacroAssembler::super_call_VM(Register oop_result,
6265 Register last_java_sp,
6266 address entry_point,
6267 Register arg_1,
6268 bool check_exceptions) {
6269 pass_arg1(this, arg_1);
6270 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
6271 }
6273 void MacroAssembler::super_call_VM(Register oop_result,
6274 Register last_java_sp,
6275 address entry_point,
6276 Register arg_1,
6277 Register arg_2,
6278 bool check_exceptions) {
6280 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6281 pass_arg2(this, arg_2);
6282 pass_arg1(this, arg_1);
6283 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
6284 }
6286 void MacroAssembler::super_call_VM(Register oop_result,
6287 Register last_java_sp,
6288 address entry_point,
6289 Register arg_1,
6290 Register arg_2,
6291 Register arg_3,
6292 bool check_exceptions) {
6293 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
6294 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
6295 pass_arg3(this, arg_3);
6296 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6297 pass_arg2(this, arg_2);
6298 pass_arg1(this, arg_1);
6299 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
6300 }
6302 void MacroAssembler::call_VM_base(Register oop_result,
6303 Register java_thread,
6304 Register last_java_sp,
6305 address entry_point,
6306 int number_of_arguments,
6307 bool check_exceptions) {
6308 // determine java_thread register
6309 if (!java_thread->is_valid()) {
6310 #ifdef _LP64
6311 java_thread = r15_thread;
6312 #else
6313 java_thread = rdi;
6314 get_thread(java_thread);
6315 #endif // LP64
6316 }
6317 // determine last_java_sp register
6318 if (!last_java_sp->is_valid()) {
6319 last_java_sp = rsp;
6320 }
6321 // debugging support
6322 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
6323 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
6324 #ifdef ASSERT
6325 // TraceBytecodes does not use r12 but saves it over the call, so don't verify
6326 // r12 is the heapbase.
6327 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base");)
6328 #endif // ASSERT
6330 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
6331 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
6333 // push java thread (becomes first argument of C function)
6335 NOT_LP64(push(java_thread); number_of_arguments++);
6336 LP64_ONLY(mov(c_rarg0, r15_thread));
6338 // set last Java frame before call
6339 assert(last_java_sp != rbp, "can't use ebp/rbp");
6341 // Only interpreter should have to set fp
6342 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
6344 // do the call, remove parameters
6345 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
6347 // restore the thread (cannot use the pushed argument since arguments
6348 // may be overwritten by C code generated by an optimizing compiler);
6349 // however can use the register value directly if it is callee saved.
6350 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
6351 // rdi & rsi (also r15) are callee saved -> nothing to do
6352 #ifdef ASSERT
6353 guarantee(java_thread != rax, "change this code");
6354 push(rax);
6355 { Label L;
6356 get_thread(rax);
6357 cmpptr(java_thread, rax);
6358 jcc(Assembler::equal, L);
6359 stop("MacroAssembler::call_VM_base: rdi not callee saved?");
6360 bind(L);
6361 }
6362 pop(rax);
6363 #endif
6364 } else {
6365 get_thread(java_thread);
6366 }
6367 // reset last Java frame
6368 // Only interpreter should have to clear fp
6369 reset_last_Java_frame(java_thread, true, false);
6371 #ifndef CC_INTERP
6372 // C++ interp handles this in the interpreter
6373 check_and_handle_popframe(java_thread);
6374 check_and_handle_earlyret(java_thread);
6375 #endif /* CC_INTERP */
6377 if (check_exceptions) {
6378 // check for pending exceptions (java_thread is set upon return)
6379 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
6380 #ifndef _LP64
6381 jump_cc(Assembler::notEqual,
6382 RuntimeAddress(StubRoutines::forward_exception_entry()));
6383 #else
6384 // This used to conditionally jump to forward_exception however it is
6385 // possible if we relocate that the branch will not reach. So we must jump
6386 // around so we can always reach
6388 Label ok;
6389 jcc(Assembler::equal, ok);
6390 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
6391 bind(ok);
6392 #endif // LP64
6393 }
6395 // get oop result if there is one and reset the value in the thread
6396 if (oop_result->is_valid()) {
6397 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
6398 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
6399 verify_oop(oop_result, "broken oop in call_VM_base");
6400 }
6401 }
6403 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
6405 // Calculate the value for last_Java_sp
6406 // somewhat subtle. call_VM does an intermediate call
6407 // which places a return address on the stack just under the
6408 // stack pointer as the user finsihed with it. This allows
6409 // use to retrieve last_Java_pc from last_Java_sp[-1].
6410 // On 32bit we then have to push additional args on the stack to accomplish
6411 // the actual requested call. On 64bit call_VM only can use register args
6412 // so the only extra space is the return address that call_VM created.
6413 // This hopefully explains the calculations here.
6415 #ifdef _LP64
6416 // We've pushed one address, correct last_Java_sp
6417 lea(rax, Address(rsp, wordSize));
6418 #else
6419 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
6420 #endif // LP64
6422 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
6424 }
6426 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
6427 call_VM_leaf_base(entry_point, number_of_arguments);
6428 }
6430 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
6431 pass_arg0(this, arg_0);
6432 call_VM_leaf(entry_point, 1);
6433 }
6435 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
6437 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6438 pass_arg1(this, arg_1);
6439 pass_arg0(this, arg_0);
6440 call_VM_leaf(entry_point, 2);
6441 }
6443 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
6444 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6445 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6446 pass_arg2(this, arg_2);
6447 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6448 pass_arg1(this, arg_1);
6449 pass_arg0(this, arg_0);
6450 call_VM_leaf(entry_point, 3);
6451 }
6453 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
6454 pass_arg0(this, arg_0);
6455 MacroAssembler::call_VM_leaf_base(entry_point, 1);
6456 }
6458 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
6460 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6461 pass_arg1(this, arg_1);
6462 pass_arg0(this, arg_0);
6463 MacroAssembler::call_VM_leaf_base(entry_point, 2);
6464 }
6466 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
6467 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6468 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6469 pass_arg2(this, arg_2);
6470 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6471 pass_arg1(this, arg_1);
6472 pass_arg0(this, arg_0);
6473 MacroAssembler::call_VM_leaf_base(entry_point, 3);
6474 }
6476 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
6477 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
6478 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
6479 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
6480 pass_arg3(this, arg_3);
6481 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6482 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6483 pass_arg2(this, arg_2);
6484 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6485 pass_arg1(this, arg_1);
6486 pass_arg0(this, arg_0);
6487 MacroAssembler::call_VM_leaf_base(entry_point, 4);
6488 }
6490 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
6491 }
6493 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
6494 }
6496 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
6497 if (reachable(src1)) {
6498 cmpl(as_Address(src1), imm);
6499 } else {
6500 lea(rscratch1, src1);
6501 cmpl(Address(rscratch1, 0), imm);
6502 }
6503 }
6505 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
6506 assert(!src2.is_lval(), "use cmpptr");
6507 if (reachable(src2)) {
6508 cmpl(src1, as_Address(src2));
6509 } else {
6510 lea(rscratch1, src2);
6511 cmpl(src1, Address(rscratch1, 0));
6512 }
6513 }
6515 void MacroAssembler::cmp32(Register src1, int32_t imm) {
6516 Assembler::cmpl(src1, imm);
6517 }
6519 void MacroAssembler::cmp32(Register src1, Address src2) {
6520 Assembler::cmpl(src1, src2);
6521 }
6523 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6524 ucomisd(opr1, opr2);
6526 Label L;
6527 if (unordered_is_less) {
6528 movl(dst, -1);
6529 jcc(Assembler::parity, L);
6530 jcc(Assembler::below , L);
6531 movl(dst, 0);
6532 jcc(Assembler::equal , L);
6533 increment(dst);
6534 } else { // unordered is greater
6535 movl(dst, 1);
6536 jcc(Assembler::parity, L);
6537 jcc(Assembler::above , L);
6538 movl(dst, 0);
6539 jcc(Assembler::equal , L);
6540 decrementl(dst);
6541 }
6542 bind(L);
6543 }
6545 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6546 ucomiss(opr1, opr2);
6548 Label L;
6549 if (unordered_is_less) {
6550 movl(dst, -1);
6551 jcc(Assembler::parity, L);
6552 jcc(Assembler::below , L);
6553 movl(dst, 0);
6554 jcc(Assembler::equal , L);
6555 increment(dst);
6556 } else { // unordered is greater
6557 movl(dst, 1);
6558 jcc(Assembler::parity, L);
6559 jcc(Assembler::above , L);
6560 movl(dst, 0);
6561 jcc(Assembler::equal , L);
6562 decrementl(dst);
6563 }
6564 bind(L);
6565 }
6568 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6569 if (reachable(src1)) {
6570 cmpb(as_Address(src1), imm);
6571 } else {
6572 lea(rscratch1, src1);
6573 cmpb(Address(rscratch1, 0), imm);
6574 }
6575 }
6577 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6578 #ifdef _LP64
6579 if (src2.is_lval()) {
6580 movptr(rscratch1, src2);
6581 Assembler::cmpq(src1, rscratch1);
6582 } else if (reachable(src2)) {
6583 cmpq(src1, as_Address(src2));
6584 } else {
6585 lea(rscratch1, src2);
6586 Assembler::cmpq(src1, Address(rscratch1, 0));
6587 }
6588 #else
6589 if (src2.is_lval()) {
6590 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6591 } else {
6592 cmpl(src1, as_Address(src2));
6593 }
6594 #endif // _LP64
6595 }
6597 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6598 assert(src2.is_lval(), "not a mem-mem compare");
6599 #ifdef _LP64
6600 // moves src2's literal address
6601 movptr(rscratch1, src2);
6602 Assembler::cmpq(src1, rscratch1);
6603 #else
6604 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6605 #endif // _LP64
6606 }
6608 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6609 if (reachable(adr)) {
6610 if (os::is_MP())
6611 lock();
6612 cmpxchgptr(reg, as_Address(adr));
6613 } else {
6614 lea(rscratch1, adr);
6615 if (os::is_MP())
6616 lock();
6617 cmpxchgptr(reg, Address(rscratch1, 0));
6618 }
6619 }
6621 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6622 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6623 }
6625 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6626 if (reachable(src)) {
6627 Assembler::comisd(dst, as_Address(src));
6628 } else {
6629 lea(rscratch1, src);
6630 Assembler::comisd(dst, Address(rscratch1, 0));
6631 }
6632 }
6634 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6635 if (reachable(src)) {
6636 Assembler::comiss(dst, as_Address(src));
6637 } else {
6638 lea(rscratch1, src);
6639 Assembler::comiss(dst, Address(rscratch1, 0));
6640 }
6641 }
6644 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6645 Condition negated_cond = negate_condition(cond);
6646 Label L;
6647 jcc(negated_cond, L);
6648 atomic_incl(counter_addr);
6649 bind(L);
6650 }
6652 int MacroAssembler::corrected_idivl(Register reg) {
6653 // Full implementation of Java idiv and irem; checks for
6654 // special case as described in JVM spec., p.243 & p.271.
6655 // The function returns the (pc) offset of the idivl
6656 // instruction - may be needed for implicit exceptions.
6657 //
6658 // normal case special case
6659 //
6660 // input : rax,: dividend min_int
6661 // reg: divisor (may not be rax,/rdx) -1
6662 //
6663 // output: rax,: quotient (= rax, idiv reg) min_int
6664 // rdx: remainder (= rax, irem reg) 0
6665 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6666 const int min_int = 0x80000000;
6667 Label normal_case, special_case;
6669 // check for special case
6670 cmpl(rax, min_int);
6671 jcc(Assembler::notEqual, normal_case);
6672 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6673 cmpl(reg, -1);
6674 jcc(Assembler::equal, special_case);
6676 // handle normal case
6677 bind(normal_case);
6678 cdql();
6679 int idivl_offset = offset();
6680 idivl(reg);
6682 // normal and special case exit
6683 bind(special_case);
6685 return idivl_offset;
6686 }
6690 void MacroAssembler::decrementl(Register reg, int value) {
6691 if (value == min_jint) {subl(reg, value) ; return; }
6692 if (value < 0) { incrementl(reg, -value); return; }
6693 if (value == 0) { ; return; }
6694 if (value == 1 && UseIncDec) { decl(reg) ; return; }
6695 /* else */ { subl(reg, value) ; return; }
6696 }
6698 void MacroAssembler::decrementl(Address dst, int value) {
6699 if (value == min_jint) {subl(dst, value) ; return; }
6700 if (value < 0) { incrementl(dst, -value); return; }
6701 if (value == 0) { ; return; }
6702 if (value == 1 && UseIncDec) { decl(dst) ; return; }
6703 /* else */ { subl(dst, value) ; return; }
6704 }
6706 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6707 assert (shift_value > 0, "illegal shift value");
6708 Label _is_positive;
6709 testl (reg, reg);
6710 jcc (Assembler::positive, _is_positive);
6711 int offset = (1 << shift_value) - 1 ;
6713 if (offset == 1) {
6714 incrementl(reg);
6715 } else {
6716 addl(reg, offset);
6717 }
6719 bind (_is_positive);
6720 sarl(reg, shift_value);
6721 }
6723 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
6724 if (reachable(src)) {
6725 Assembler::divsd(dst, as_Address(src));
6726 } else {
6727 lea(rscratch1, src);
6728 Assembler::divsd(dst, Address(rscratch1, 0));
6729 }
6730 }
6732 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
6733 if (reachable(src)) {
6734 Assembler::divss(dst, as_Address(src));
6735 } else {
6736 lea(rscratch1, src);
6737 Assembler::divss(dst, Address(rscratch1, 0));
6738 }
6739 }
6741 // !defined(COMPILER2) is because of stupid core builds
6742 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6743 void MacroAssembler::empty_FPU_stack() {
6744 if (VM_Version::supports_mmx()) {
6745 emms();
6746 } else {
6747 for (int i = 8; i-- > 0; ) ffree(i);
6748 }
6749 }
6750 #endif // !LP64 || C1 || !C2
6753 // Defines obj, preserves var_size_in_bytes
6754 void MacroAssembler::eden_allocate(Register obj,
6755 Register var_size_in_bytes,
6756 int con_size_in_bytes,
6757 Register t1,
6758 Label& slow_case) {
6759 assert(obj == rax, "obj must be in rax, for cmpxchg");
6760 assert_different_registers(obj, var_size_in_bytes, t1);
6761 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6762 jmp(slow_case);
6763 } else {
6764 Register end = t1;
6765 Label retry;
6766 bind(retry);
6767 ExternalAddress heap_top((address) Universe::heap()->top_addr());
6768 movptr(obj, heap_top);
6769 if (var_size_in_bytes == noreg) {
6770 lea(end, Address(obj, con_size_in_bytes));
6771 } else {
6772 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6773 }
6774 // if end < obj then we wrapped around => object too long => slow case
6775 cmpptr(end, obj);
6776 jcc(Assembler::below, slow_case);
6777 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6778 jcc(Assembler::above, slow_case);
6779 // Compare obj with the top addr, and if still equal, store the new top addr in
6780 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6781 // it otherwise. Use lock prefix for atomicity on MPs.
6782 locked_cmpxchgptr(end, heap_top);
6783 jcc(Assembler::notEqual, retry);
6784 }
6785 }
6787 void MacroAssembler::enter() {
6788 push(rbp);
6789 mov(rbp, rsp);
6790 }
6792 // A 5 byte nop that is safe for patching (see patch_verified_entry)
6793 void MacroAssembler::fat_nop() {
6794 if (UseAddressNop) {
6795 addr_nop_5();
6796 } else {
6797 emit_byte(0x26); // es:
6798 emit_byte(0x2e); // cs:
6799 emit_byte(0x64); // fs:
6800 emit_byte(0x65); // gs:
6801 emit_byte(0x90);
6802 }
6803 }
6805 void MacroAssembler::fcmp(Register tmp) {
6806 fcmp(tmp, 1, true, true);
6807 }
6809 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6810 assert(!pop_right || pop_left, "usage error");
6811 if (VM_Version::supports_cmov()) {
6812 assert(tmp == noreg, "unneeded temp");
6813 if (pop_left) {
6814 fucomip(index);
6815 } else {
6816 fucomi(index);
6817 }
6818 if (pop_right) {
6819 fpop();
6820 }
6821 } else {
6822 assert(tmp != noreg, "need temp");
6823 if (pop_left) {
6824 if (pop_right) {
6825 fcompp();
6826 } else {
6827 fcomp(index);
6828 }
6829 } else {
6830 fcom(index);
6831 }
6832 // convert FPU condition into eflags condition via rax,
6833 save_rax(tmp);
6834 fwait(); fnstsw_ax();
6835 sahf();
6836 restore_rax(tmp);
6837 }
6838 // condition codes set as follows:
6839 //
6840 // CF (corresponds to C0) if x < y
6841 // PF (corresponds to C2) if unordered
6842 // ZF (corresponds to C3) if x = y
6843 }
6845 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6846 fcmp2int(dst, unordered_is_less, 1, true, true);
6847 }
6849 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6850 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6851 Label L;
6852 if (unordered_is_less) {
6853 movl(dst, -1);
6854 jcc(Assembler::parity, L);
6855 jcc(Assembler::below , L);
6856 movl(dst, 0);
6857 jcc(Assembler::equal , L);
6858 increment(dst);
6859 } else { // unordered is greater
6860 movl(dst, 1);
6861 jcc(Assembler::parity, L);
6862 jcc(Assembler::above , L);
6863 movl(dst, 0);
6864 jcc(Assembler::equal , L);
6865 decrementl(dst);
6866 }
6867 bind(L);
6868 }
6870 void MacroAssembler::fld_d(AddressLiteral src) {
6871 fld_d(as_Address(src));
6872 }
6874 void MacroAssembler::fld_s(AddressLiteral src) {
6875 fld_s(as_Address(src));
6876 }
6878 void MacroAssembler::fld_x(AddressLiteral src) {
6879 Assembler::fld_x(as_Address(src));
6880 }
6882 void MacroAssembler::fldcw(AddressLiteral src) {
6883 Assembler::fldcw(as_Address(src));
6884 }
6886 void MacroAssembler::pow_exp_core_encoding() {
6887 // kills rax, rcx, rdx
6888 subptr(rsp,sizeof(jdouble));
6889 // computes 2^X. Stack: X ...
6890 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
6891 // keep it on the thread's stack to compute 2^int(X) later
6892 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
6893 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
6894 fld_s(0); // Stack: X X ...
6895 frndint(); // Stack: int(X) X ...
6896 fsuba(1); // Stack: int(X) X-int(X) ...
6897 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
6898 f2xm1(); // Stack: 2^(X-int(X))-1 ...
6899 fld1(); // Stack: 1 2^(X-int(X))-1 ...
6900 faddp(1); // Stack: 2^(X-int(X))
6901 // computes 2^(int(X)): add exponent bias (1023) to int(X), then
6902 // shift int(X)+1023 to exponent position.
6903 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
6904 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
6905 // values so detect them and set result to NaN.
6906 movl(rax,Address(rsp,0));
6907 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
6908 addl(rax, 1023);
6909 movl(rdx,rax);
6910 shll(rax,20);
6911 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
6912 addl(rdx,1);
6913 // Check that 1 < int(X)+1023+1 < 2048
6914 // in 3 steps:
6915 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
6916 // 2- (int(X)+1023+1)&-2048 != 0
6917 // 3- (int(X)+1023+1)&-2048 != 1
6918 // Do 2- first because addl just updated the flags.
6919 cmov32(Assembler::equal,rax,rcx);
6920 cmpl(rdx,1);
6921 cmov32(Assembler::equal,rax,rcx);
6922 testl(rdx,rcx);
6923 cmov32(Assembler::notEqual,rax,rcx);
6924 movl(Address(rsp,4),rax);
6925 movl(Address(rsp,0),0);
6926 fmul_d(Address(rsp,0)); // Stack: 2^X ...
6927 addptr(rsp,sizeof(jdouble));
6928 }
6930 void MacroAssembler::increase_precision() {
6931 subptr(rsp, BytesPerWord);
6932 fnstcw(Address(rsp, 0));
6933 movl(rax, Address(rsp, 0));
6934 orl(rax, 0x300);
6935 push(rax);
6936 fldcw(Address(rsp, 0));
6937 pop(rax);
6938 }
6940 void MacroAssembler::restore_precision() {
6941 fldcw(Address(rsp, 0));
6942 addptr(rsp, BytesPerWord);
6943 }
6945 void MacroAssembler::fast_pow() {
6946 // computes X^Y = 2^(Y * log2(X))
6947 // if fast computation is not possible, result is NaN. Requires
6948 // fallback from user of this macro.
6949 // increase precision for intermediate steps of the computation
6950 increase_precision();
6951 fyl2x(); // Stack: (Y*log2(X)) ...
6952 pow_exp_core_encoding(); // Stack: exp(X) ...
6953 restore_precision();
6954 }
6956 void MacroAssembler::fast_exp() {
6957 // computes exp(X) = 2^(X * log2(e))
6958 // if fast computation is not possible, result is NaN. Requires
6959 // fallback from user of this macro.
6960 // increase precision for intermediate steps of the computation
6961 increase_precision();
6962 fldl2e(); // Stack: log2(e) X ...
6963 fmulp(1); // Stack: (X*log2(e)) ...
6964 pow_exp_core_encoding(); // Stack: exp(X) ...
6965 restore_precision();
6966 }
6968 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
6969 // kills rax, rcx, rdx
6970 // pow and exp needs 2 extra registers on the fpu stack.
6971 Label slow_case, done;
6972 Register tmp = noreg;
6973 if (!VM_Version::supports_cmov()) {
6974 // fcmp needs a temporary so preserve rdx,
6975 tmp = rdx;
6976 }
6977 Register tmp2 = rax;
6978 Register tmp3 = rcx;
6980 if (is_exp) {
6981 // Stack: X
6982 fld_s(0); // duplicate argument for runtime call. Stack: X X
6983 fast_exp(); // Stack: exp(X) X
6984 fcmp(tmp, 0, false, false); // Stack: exp(X) X
6985 // exp(X) not equal to itself: exp(X) is NaN go to slow case.
6986 jcc(Assembler::parity, slow_case);
6987 // get rid of duplicate argument. Stack: exp(X)
6988 if (num_fpu_regs_in_use > 0) {
6989 fxch();
6990 fpop();
6991 } else {
6992 ffree(1);
6993 }
6994 jmp(done);
6995 } else {
6996 // Stack: X Y
6997 Label x_negative, y_odd;
6999 fldz(); // Stack: 0 X Y
7000 fcmp(tmp, 1, true, false); // Stack: X Y
7001 jcc(Assembler::above, x_negative);
7003 // X >= 0
7005 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
7006 fld_s(1); // Stack: X Y X Y
7007 fast_pow(); // Stack: X^Y X Y
7008 fcmp(tmp, 0, false, false); // Stack: X^Y X Y
7009 // X^Y not equal to itself: X^Y is NaN go to slow case.
7010 jcc(Assembler::parity, slow_case);
7011 // get rid of duplicate arguments. Stack: X^Y
7012 if (num_fpu_regs_in_use > 0) {
7013 fxch(); fpop();
7014 fxch(); fpop();
7015 } else {
7016 ffree(2);
7017 ffree(1);
7018 }
7019 jmp(done);
7021 // X <= 0
7022 bind(x_negative);
7024 fld_s(1); // Stack: Y X Y
7025 frndint(); // Stack: int(Y) X Y
7026 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
7027 jcc(Assembler::notEqual, slow_case);
7029 subptr(rsp, 8);
7031 // For X^Y, when X < 0, Y has to be an integer and the final
7032 // result depends on whether it's odd or even. We just checked
7033 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit
7034 // integer to test its parity. If int(Y) is huge and doesn't fit
7035 // in the 64 bit integer range, the integer indefinite value will
7036 // end up in the gp registers. Huge numbers are all even, the
7037 // integer indefinite number is even so it's fine.
7039 #ifdef ASSERT
7040 // Let's check we don't end up with an integer indefinite number
7041 // when not expected. First test for huge numbers: check whether
7042 // int(Y)+1 == int(Y) which is true for very large numbers and
7043 // those are all even. A 64 bit integer is guaranteed to not
7044 // overflow for numbers where y+1 != y (when precision is set to
7045 // double precision).
7046 Label y_not_huge;
7048 fld1(); // Stack: 1 int(Y) X Y
7049 fadd(1); // Stack: 1+int(Y) int(Y) X Y
7051 #ifdef _LP64
7052 // trip to memory to force the precision down from double extended
7053 // precision
7054 fstp_d(Address(rsp, 0));
7055 fld_d(Address(rsp, 0));
7056 #endif
7058 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y
7059 #endif
7061 // move int(Y) as 64 bit integer to thread's stack
7062 fistp_d(Address(rsp,0)); // Stack: X Y
7064 #ifdef ASSERT
7065 jcc(Assembler::notEqual, y_not_huge);
7067 // Y is huge so we know it's even. It may not fit in a 64 bit
7068 // integer and we don't want the debug code below to see the
7069 // integer indefinite value so overwrite int(Y) on the thread's
7070 // stack with 0.
7071 movl(Address(rsp, 0), 0);
7072 movl(Address(rsp, 4), 0);
7074 bind(y_not_huge);
7075 #endif
7077 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
7078 fld_s(1); // Stack: X Y X Y
7079 fabs(); // Stack: abs(X) Y X Y
7080 fast_pow(); // Stack: abs(X)^Y X Y
7081 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
7082 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
7084 pop(tmp2);
7085 NOT_LP64(pop(tmp3));
7086 jcc(Assembler::parity, slow_case);
7088 #ifdef ASSERT
7089 // Check that int(Y) is not integer indefinite value (int
7090 // overflow). Shouldn't happen because for values that would
7091 // overflow, 1+int(Y)==Y which was tested earlier.
7092 #ifndef _LP64
7093 {
7094 Label integer;
7095 testl(tmp2, tmp2);
7096 jcc(Assembler::notZero, integer);
7097 cmpl(tmp3, 0x80000000);
7098 jcc(Assembler::notZero, integer);
7099 stop("integer indefinite value shouldn't be seen here");
7100 bind(integer);
7101 }
7102 #else
7103 {
7104 Label integer;
7105 mov(tmp3, tmp2); // preserve tmp2 for parity check below
7106 shlq(tmp3, 1);
7107 jcc(Assembler::carryClear, integer);
7108 jcc(Assembler::notZero, integer);
7109 stop("integer indefinite value shouldn't be seen here");
7110 bind(integer);
7111 }
7112 #endif
7113 #endif
7115 // get rid of duplicate arguments. Stack: X^Y
7116 if (num_fpu_regs_in_use > 0) {
7117 fxch(); fpop();
7118 fxch(); fpop();
7119 } else {
7120 ffree(2);
7121 ffree(1);
7122 }
7124 testl(tmp2, 1);
7125 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
7126 // X <= 0, Y even: X^Y = -abs(X)^Y
7128 fchs(); // Stack: -abs(X)^Y Y
7129 jmp(done);
7130 }
7132 // slow case: runtime call
7133 bind(slow_case);
7135 fpop(); // pop incorrect result or int(Y)
7137 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
7138 is_exp ? 1 : 2, num_fpu_regs_in_use);
7140 // Come here with result in F-TOS
7141 bind(done);
7142 }
7144 void MacroAssembler::fpop() {
7145 ffree();
7146 fincstp();
7147 }
7149 void MacroAssembler::fremr(Register tmp) {
7150 save_rax(tmp);
7151 { Label L;
7152 bind(L);
7153 fprem();
7154 fwait(); fnstsw_ax();
7155 #ifdef _LP64
7156 testl(rax, 0x400);
7157 jcc(Assembler::notEqual, L);
7158 #else
7159 sahf();
7160 jcc(Assembler::parity, L);
7161 #endif // _LP64
7162 }
7163 restore_rax(tmp);
7164 // Result is in ST0.
7165 // Note: fxch & fpop to get rid of ST1
7166 // (otherwise FPU stack could overflow eventually)
7167 fxch(1);
7168 fpop();
7169 }
7172 void MacroAssembler::incrementl(AddressLiteral dst) {
7173 if (reachable(dst)) {
7174 incrementl(as_Address(dst));
7175 } else {
7176 lea(rscratch1, dst);
7177 incrementl(Address(rscratch1, 0));
7178 }
7179 }
7181 void MacroAssembler::incrementl(ArrayAddress dst) {
7182 incrementl(as_Address(dst));
7183 }
7185 void MacroAssembler::incrementl(Register reg, int value) {
7186 if (value == min_jint) {addl(reg, value) ; return; }
7187 if (value < 0) { decrementl(reg, -value); return; }
7188 if (value == 0) { ; return; }
7189 if (value == 1 && UseIncDec) { incl(reg) ; return; }
7190 /* else */ { addl(reg, value) ; return; }
7191 }
7193 void MacroAssembler::incrementl(Address dst, int value) {
7194 if (value == min_jint) {addl(dst, value) ; return; }
7195 if (value < 0) { decrementl(dst, -value); return; }
7196 if (value == 0) { ; return; }
7197 if (value == 1 && UseIncDec) { incl(dst) ; return; }
7198 /* else */ { addl(dst, value) ; return; }
7199 }
7201 void MacroAssembler::jump(AddressLiteral dst) {
7202 if (reachable(dst)) {
7203 jmp_literal(dst.target(), dst.rspec());
7204 } else {
7205 lea(rscratch1, dst);
7206 jmp(rscratch1);
7207 }
7208 }
7210 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
7211 if (reachable(dst)) {
7212 InstructionMark im(this);
7213 relocate(dst.reloc());
7214 const int short_size = 2;
7215 const int long_size = 6;
7216 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
7217 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
7218 // 0111 tttn #8-bit disp
7219 emit_byte(0x70 | cc);
7220 emit_byte((offs - short_size) & 0xFF);
7221 } else {
7222 // 0000 1111 1000 tttn #32-bit disp
7223 emit_byte(0x0F);
7224 emit_byte(0x80 | cc);
7225 emit_long(offs - long_size);
7226 }
7227 } else {
7228 #ifdef ASSERT
7229 warning("reversing conditional branch");
7230 #endif /* ASSERT */
7231 Label skip;
7232 jccb(reverse[cc], skip);
7233 lea(rscratch1, dst);
7234 Assembler::jmp(rscratch1);
7235 bind(skip);
7236 }
7237 }
7239 void MacroAssembler::ldmxcsr(AddressLiteral src) {
7240 if (reachable(src)) {
7241 Assembler::ldmxcsr(as_Address(src));
7242 } else {
7243 lea(rscratch1, src);
7244 Assembler::ldmxcsr(Address(rscratch1, 0));
7245 }
7246 }
7248 int MacroAssembler::load_signed_byte(Register dst, Address src) {
7249 int off;
7250 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
7251 off = offset();
7252 movsbl(dst, src); // movsxb
7253 } else {
7254 off = load_unsigned_byte(dst, src);
7255 shll(dst, 24);
7256 sarl(dst, 24);
7257 }
7258 return off;
7259 }
7261 // Note: load_signed_short used to be called load_signed_word.
7262 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler
7263 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.
7264 // The term "word" in HotSpot means a 32- or 64-bit machine word.
7265 int MacroAssembler::load_signed_short(Register dst, Address src) {
7266 int off;
7267 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
7268 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
7269 // version but this is what 64bit has always done. This seems to imply
7270 // that users are only using 32bits worth.
7271 off = offset();
7272 movswl(dst, src); // movsxw
7273 } else {
7274 off = load_unsigned_short(dst, src);
7275 shll(dst, 16);
7276 sarl(dst, 16);
7277 }
7278 return off;
7279 }
7281 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
7282 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
7283 // and "3.9 Partial Register Penalties", p. 22).
7284 int off;
7285 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
7286 off = offset();
7287 movzbl(dst, src); // movzxb
7288 } else {
7289 xorl(dst, dst);
7290 off = offset();
7291 movb(dst, src);
7292 }
7293 return off;
7294 }
7296 // Note: load_unsigned_short used to be called load_unsigned_word.
7297 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
7298 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
7299 // and "3.9 Partial Register Penalties", p. 22).
7300 int off;
7301 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
7302 off = offset();
7303 movzwl(dst, src); // movzxw
7304 } else {
7305 xorl(dst, dst);
7306 off = offset();
7307 movw(dst, src);
7308 }
7309 return off;
7310 }
7312 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
7313 switch (size_in_bytes) {
7314 #ifndef _LP64
7315 case 8:
7316 assert(dst2 != noreg, "second dest register required");
7317 movl(dst, src);
7318 movl(dst2, src.plus_disp(BytesPerInt));
7319 break;
7320 #else
7321 case 8: movq(dst, src); break;
7322 #endif
7323 case 4: movl(dst, src); break;
7324 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
7325 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
7326 default: ShouldNotReachHere();
7327 }
7328 }
7330 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
7331 switch (size_in_bytes) {
7332 #ifndef _LP64
7333 case 8:
7334 assert(src2 != noreg, "second source register required");
7335 movl(dst, src);
7336 movl(dst.plus_disp(BytesPerInt), src2);
7337 break;
7338 #else
7339 case 8: movq(dst, src); break;
7340 #endif
7341 case 4: movl(dst, src); break;
7342 case 2: movw(dst, src); break;
7343 case 1: movb(dst, src); break;
7344 default: ShouldNotReachHere();
7345 }
7346 }
7348 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
7349 if (reachable(dst)) {
7350 movl(as_Address(dst), src);
7351 } else {
7352 lea(rscratch1, dst);
7353 movl(Address(rscratch1, 0), src);
7354 }
7355 }
7357 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
7358 if (reachable(src)) {
7359 movl(dst, as_Address(src));
7360 } else {
7361 lea(rscratch1, src);
7362 movl(dst, Address(rscratch1, 0));
7363 }
7364 }
7366 // C++ bool manipulation
7368 void MacroAssembler::movbool(Register dst, Address src) {
7369 if(sizeof(bool) == 1)
7370 movb(dst, src);
7371 else if(sizeof(bool) == 2)
7372 movw(dst, src);
7373 else if(sizeof(bool) == 4)
7374 movl(dst, src);
7375 else
7376 // unsupported
7377 ShouldNotReachHere();
7378 }
7380 void MacroAssembler::movbool(Address dst, bool boolconst) {
7381 if(sizeof(bool) == 1)
7382 movb(dst, (int) boolconst);
7383 else if(sizeof(bool) == 2)
7384 movw(dst, (int) boolconst);
7385 else if(sizeof(bool) == 4)
7386 movl(dst, (int) boolconst);
7387 else
7388 // unsupported
7389 ShouldNotReachHere();
7390 }
7392 void MacroAssembler::movbool(Address dst, Register src) {
7393 if(sizeof(bool) == 1)
7394 movb(dst, src);
7395 else if(sizeof(bool) == 2)
7396 movw(dst, src);
7397 else if(sizeof(bool) == 4)
7398 movl(dst, src);
7399 else
7400 // unsupported
7401 ShouldNotReachHere();
7402 }
7404 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
7405 movb(as_Address(dst), src);
7406 }
7408 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
7409 if (reachable(src)) {
7410 if (UseXmmLoadAndClearUpper) {
7411 movsd (dst, as_Address(src));
7412 } else {
7413 movlpd(dst, as_Address(src));
7414 }
7415 } else {
7416 lea(rscratch1, src);
7417 if (UseXmmLoadAndClearUpper) {
7418 movsd (dst, Address(rscratch1, 0));
7419 } else {
7420 movlpd(dst, Address(rscratch1, 0));
7421 }
7422 }
7423 }
7425 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
7426 if (reachable(src)) {
7427 movss(dst, as_Address(src));
7428 } else {
7429 lea(rscratch1, src);
7430 movss(dst, Address(rscratch1, 0));
7431 }
7432 }
7434 void MacroAssembler::movptr(Register dst, Register src) {
7435 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
7436 }
7438 void MacroAssembler::movptr(Register dst, Address src) {
7439 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
7440 }
7442 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
7443 void MacroAssembler::movptr(Register dst, intptr_t src) {
7444 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
7445 }
7447 void MacroAssembler::movptr(Address dst, Register src) {
7448 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
7449 }
7451 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
7452 if (reachable(src)) {
7453 Assembler::movsd(dst, as_Address(src));
7454 } else {
7455 lea(rscratch1, src);
7456 Assembler::movsd(dst, Address(rscratch1, 0));
7457 }
7458 }
7460 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
7461 if (reachable(src)) {
7462 Assembler::movss(dst, as_Address(src));
7463 } else {
7464 lea(rscratch1, src);
7465 Assembler::movss(dst, Address(rscratch1, 0));
7466 }
7467 }
7469 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
7470 if (reachable(src)) {
7471 Assembler::mulsd(dst, as_Address(src));
7472 } else {
7473 lea(rscratch1, src);
7474 Assembler::mulsd(dst, Address(rscratch1, 0));
7475 }
7476 }
7478 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
7479 if (reachable(src)) {
7480 Assembler::mulss(dst, as_Address(src));
7481 } else {
7482 lea(rscratch1, src);
7483 Assembler::mulss(dst, Address(rscratch1, 0));
7484 }
7485 }
7487 void MacroAssembler::null_check(Register reg, int offset) {
7488 if (needs_explicit_null_check(offset)) {
7489 // provoke OS NULL exception if reg = NULL by
7490 // accessing M[reg] w/o changing any (non-CC) registers
7491 // NOTE: cmpl is plenty here to provoke a segv
7492 cmpptr(rax, Address(reg, 0));
7493 // Note: should probably use testl(rax, Address(reg, 0));
7494 // may be shorter code (however, this version of
7495 // testl needs to be implemented first)
7496 } else {
7497 // nothing to do, (later) access of M[reg + offset]
7498 // will provoke OS NULL exception if reg = NULL
7499 }
7500 }
7502 void MacroAssembler::os_breakpoint() {
7503 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
7504 // (e.g., MSVC can't call ps() otherwise)
7505 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
7506 }
7508 void MacroAssembler::pop_CPU_state() {
7509 pop_FPU_state();
7510 pop_IU_state();
7511 }
7513 void MacroAssembler::pop_FPU_state() {
7514 NOT_LP64(frstor(Address(rsp, 0));)
7515 LP64_ONLY(fxrstor(Address(rsp, 0));)
7516 addptr(rsp, FPUStateSizeInWords * wordSize);
7517 }
7519 void MacroAssembler::pop_IU_state() {
7520 popa();
7521 LP64_ONLY(addq(rsp, 8));
7522 popf();
7523 }
7525 // Save Integer and Float state
7526 // Warning: Stack must be 16 byte aligned (64bit)
7527 void MacroAssembler::push_CPU_state() {
7528 push_IU_state();
7529 push_FPU_state();
7530 }
7532 void MacroAssembler::push_FPU_state() {
7533 subptr(rsp, FPUStateSizeInWords * wordSize);
7534 #ifndef _LP64
7535 fnsave(Address(rsp, 0));
7536 fwait();
7537 #else
7538 fxsave(Address(rsp, 0));
7539 #endif // LP64
7540 }
7542 void MacroAssembler::push_IU_state() {
7543 // Push flags first because pusha kills them
7544 pushf();
7545 // Make sure rsp stays 16-byte aligned
7546 LP64_ONLY(subq(rsp, 8));
7547 pusha();
7548 }
7550 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
7551 // determine java_thread register
7552 if (!java_thread->is_valid()) {
7553 java_thread = rdi;
7554 get_thread(java_thread);
7555 }
7556 // we must set sp to zero to clear frame
7557 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
7558 if (clear_fp) {
7559 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
7560 }
7562 if (clear_pc)
7563 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
7565 }
7567 void MacroAssembler::restore_rax(Register tmp) {
7568 if (tmp == noreg) pop(rax);
7569 else if (tmp != rax) mov(rax, tmp);
7570 }
7572 void MacroAssembler::round_to(Register reg, int modulus) {
7573 addptr(reg, modulus - 1);
7574 andptr(reg, -modulus);
7575 }
7577 void MacroAssembler::save_rax(Register tmp) {
7578 if (tmp == noreg) push(rax);
7579 else if (tmp != rax) mov(tmp, rax);
7580 }
7582 // Write serialization page so VM thread can do a pseudo remote membar.
7583 // We use the current thread pointer to calculate a thread specific
7584 // offset to write to within the page. This minimizes bus traffic
7585 // due to cache line collision.
7586 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
7587 movl(tmp, thread);
7588 shrl(tmp, os::get_serialize_page_shift_count());
7589 andl(tmp, (os::vm_page_size() - sizeof(int)));
7591 Address index(noreg, tmp, Address::times_1);
7592 ExternalAddress page(os::get_memory_serialize_page());
7594 // Size of store must match masking code above
7595 movl(as_Address(ArrayAddress(page, index)), tmp);
7596 }
7598 // Calls to C land
7599 //
7600 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
7601 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
7602 // has to be reset to 0. This is required to allow proper stack traversal.
7603 void MacroAssembler::set_last_Java_frame(Register java_thread,
7604 Register last_java_sp,
7605 Register last_java_fp,
7606 address last_java_pc) {
7607 // determine java_thread register
7608 if (!java_thread->is_valid()) {
7609 java_thread = rdi;
7610 get_thread(java_thread);
7611 }
7612 // determine last_java_sp register
7613 if (!last_java_sp->is_valid()) {
7614 last_java_sp = rsp;
7615 }
7617 // last_java_fp is optional
7619 if (last_java_fp->is_valid()) {
7620 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
7621 }
7623 // last_java_pc is optional
7625 if (last_java_pc != NULL) {
7626 lea(Address(java_thread,
7627 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
7628 InternalAddress(last_java_pc));
7630 }
7631 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
7632 }
7634 void MacroAssembler::shlptr(Register dst, int imm8) {
7635 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
7636 }
7638 void MacroAssembler::shrptr(Register dst, int imm8) {
7639 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
7640 }
7642 void MacroAssembler::sign_extend_byte(Register reg) {
7643 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
7644 movsbl(reg, reg); // movsxb
7645 } else {
7646 shll(reg, 24);
7647 sarl(reg, 24);
7648 }
7649 }
7651 void MacroAssembler::sign_extend_short(Register reg) {
7652 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
7653 movswl(reg, reg); // movsxw
7654 } else {
7655 shll(reg, 16);
7656 sarl(reg, 16);
7657 }
7658 }
7660 void MacroAssembler::testl(Register dst, AddressLiteral src) {
7661 assert(reachable(src), "Address should be reachable");
7662 testl(dst, as_Address(src));
7663 }
7665 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
7666 if (reachable(src)) {
7667 Assembler::sqrtsd(dst, as_Address(src));
7668 } else {
7669 lea(rscratch1, src);
7670 Assembler::sqrtsd(dst, Address(rscratch1, 0));
7671 }
7672 }
7674 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
7675 if (reachable(src)) {
7676 Assembler::sqrtss(dst, as_Address(src));
7677 } else {
7678 lea(rscratch1, src);
7679 Assembler::sqrtss(dst, Address(rscratch1, 0));
7680 }
7681 }
7683 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
7684 if (reachable(src)) {
7685 Assembler::subsd(dst, as_Address(src));
7686 } else {
7687 lea(rscratch1, src);
7688 Assembler::subsd(dst, Address(rscratch1, 0));
7689 }
7690 }
7692 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
7693 if (reachable(src)) {
7694 Assembler::subss(dst, as_Address(src));
7695 } else {
7696 lea(rscratch1, src);
7697 Assembler::subss(dst, Address(rscratch1, 0));
7698 }
7699 }
7701 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7702 if (reachable(src)) {
7703 Assembler::ucomisd(dst, as_Address(src));
7704 } else {
7705 lea(rscratch1, src);
7706 Assembler::ucomisd(dst, Address(rscratch1, 0));
7707 }
7708 }
7710 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7711 if (reachable(src)) {
7712 Assembler::ucomiss(dst, as_Address(src));
7713 } else {
7714 lea(rscratch1, src);
7715 Assembler::ucomiss(dst, Address(rscratch1, 0));
7716 }
7717 }
7719 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7720 // Used in sign-bit flipping with aligned address.
7721 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
7722 if (reachable(src)) {
7723 Assembler::xorpd(dst, as_Address(src));
7724 } else {
7725 lea(rscratch1, src);
7726 Assembler::xorpd(dst, Address(rscratch1, 0));
7727 }
7728 }
7730 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7731 // Used in sign-bit flipping with aligned address.
7732 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
7733 if (reachable(src)) {
7734 Assembler::xorps(dst, as_Address(src));
7735 } else {
7736 lea(rscratch1, src);
7737 Assembler::xorps(dst, Address(rscratch1, 0));
7738 }
7739 }
7741 // AVX 3-operands instructions
7743 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7744 if (reachable(src)) {
7745 vaddsd(dst, nds, as_Address(src));
7746 } else {
7747 lea(rscratch1, src);
7748 vaddsd(dst, nds, Address(rscratch1, 0));
7749 }
7750 }
7752 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7753 if (reachable(src)) {
7754 vaddss(dst, nds, as_Address(src));
7755 } else {
7756 lea(rscratch1, src);
7757 vaddss(dst, nds, Address(rscratch1, 0));
7758 }
7759 }
7761 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7762 if (reachable(src)) {
7763 vandpd(dst, nds, as_Address(src));
7764 } else {
7765 lea(rscratch1, src);
7766 vandpd(dst, nds, Address(rscratch1, 0));
7767 }
7768 }
7770 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7771 if (reachable(src)) {
7772 vandps(dst, nds, as_Address(src));
7773 } else {
7774 lea(rscratch1, src);
7775 vandps(dst, nds, Address(rscratch1, 0));
7776 }
7777 }
7779 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7780 if (reachable(src)) {
7781 vdivsd(dst, nds, as_Address(src));
7782 } else {
7783 lea(rscratch1, src);
7784 vdivsd(dst, nds, Address(rscratch1, 0));
7785 }
7786 }
7788 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7789 if (reachable(src)) {
7790 vdivss(dst, nds, as_Address(src));
7791 } else {
7792 lea(rscratch1, src);
7793 vdivss(dst, nds, Address(rscratch1, 0));
7794 }
7795 }
7797 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7798 if (reachable(src)) {
7799 vmulsd(dst, nds, as_Address(src));
7800 } else {
7801 lea(rscratch1, src);
7802 vmulsd(dst, nds, Address(rscratch1, 0));
7803 }
7804 }
7806 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7807 if (reachable(src)) {
7808 vmulss(dst, nds, as_Address(src));
7809 } else {
7810 lea(rscratch1, src);
7811 vmulss(dst, nds, Address(rscratch1, 0));
7812 }
7813 }
7815 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7816 if (reachable(src)) {
7817 vsubsd(dst, nds, as_Address(src));
7818 } else {
7819 lea(rscratch1, src);
7820 vsubsd(dst, nds, Address(rscratch1, 0));
7821 }
7822 }
7824 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7825 if (reachable(src)) {
7826 vsubss(dst, nds, as_Address(src));
7827 } else {
7828 lea(rscratch1, src);
7829 vsubss(dst, nds, Address(rscratch1, 0));
7830 }
7831 }
7833 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7834 if (reachable(src)) {
7835 vxorpd(dst, nds, as_Address(src));
7836 } else {
7837 lea(rscratch1, src);
7838 vxorpd(dst, nds, Address(rscratch1, 0));
7839 }
7840 }
7842 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
7843 if (reachable(src)) {
7844 vxorps(dst, nds, as_Address(src));
7845 } else {
7846 lea(rscratch1, src);
7847 vxorps(dst, nds, Address(rscratch1, 0));
7848 }
7849 }
7852 //////////////////////////////////////////////////////////////////////////////////
7853 #ifndef SERIALGC
7855 void MacroAssembler::g1_write_barrier_pre(Register obj,
7856 Register pre_val,
7857 Register thread,
7858 Register tmp,
7859 bool tosca_live,
7860 bool expand_call) {
7862 // If expand_call is true then we expand the call_VM_leaf macro
7863 // directly to skip generating the check by
7864 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
7866 #ifdef _LP64
7867 assert(thread == r15_thread, "must be");
7868 #endif // _LP64
7870 Label done;
7871 Label runtime;
7873 assert(pre_val != noreg, "check this code");
7875 if (obj != noreg) {
7876 assert_different_registers(obj, pre_val, tmp);
7877 assert(pre_val != rax, "check this code");
7878 }
7880 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
7881 PtrQueue::byte_offset_of_active()));
7882 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
7883 PtrQueue::byte_offset_of_index()));
7884 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
7885 PtrQueue::byte_offset_of_buf()));
7888 // Is marking active?
7889 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
7890 cmpl(in_progress, 0);
7891 } else {
7892 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
7893 cmpb(in_progress, 0);
7894 }
7895 jcc(Assembler::equal, done);
7897 // Do we need to load the previous value?
7898 if (obj != noreg) {
7899 load_heap_oop(pre_val, Address(obj, 0));
7900 }
7902 // Is the previous value null?
7903 cmpptr(pre_val, (int32_t) NULL_WORD);
7904 jcc(Assembler::equal, done);
7906 // Can we store original value in the thread's buffer?
7907 // Is index == 0?
7908 // (The index field is typed as size_t.)
7910 movptr(tmp, index); // tmp := *index_adr
7911 cmpptr(tmp, 0); // tmp == 0?
7912 jcc(Assembler::equal, runtime); // If yes, goto runtime
7914 subptr(tmp, wordSize); // tmp := tmp - wordSize
7915 movptr(index, tmp); // *index_adr := tmp
7916 addptr(tmp, buffer); // tmp := tmp + *buffer_adr
7918 // Record the previous value
7919 movptr(Address(tmp, 0), pre_val);
7920 jmp(done);
7922 bind(runtime);
7923 // save the live input values
7924 if(tosca_live) push(rax);
7926 if (obj != noreg && obj != rax)
7927 push(obj);
7929 if (pre_val != rax)
7930 push(pre_val);
7932 // Calling the runtime using the regular call_VM_leaf mechanism generates
7933 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
7934 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
7935 //
7936 // If we care generating the pre-barrier without a frame (e.g. in the
7937 // intrinsified Reference.get() routine) then ebp might be pointing to
7938 // the caller frame and so this check will most likely fail at runtime.
7939 //
7940 // Expanding the call directly bypasses the generation of the check.
7941 // So when we do not have have a full interpreter frame on the stack
7942 // expand_call should be passed true.
7944 NOT_LP64( push(thread); )
7946 if (expand_call) {
7947 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
7948 pass_arg1(this, thread);
7949 pass_arg0(this, pre_val);
7950 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
7951 } else {
7952 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
7953 }
7955 NOT_LP64( pop(thread); )
7957 // save the live input values
7958 if (pre_val != rax)
7959 pop(pre_val);
7961 if (obj != noreg && obj != rax)
7962 pop(obj);
7964 if(tosca_live) pop(rax);
7966 bind(done);
7967 }
7969 void MacroAssembler::g1_write_barrier_post(Register store_addr,
7970 Register new_val,
7971 Register thread,
7972 Register tmp,
7973 Register tmp2) {
7974 #ifdef _LP64
7975 assert(thread == r15_thread, "must be");
7976 #endif // _LP64
7978 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
7979 PtrQueue::byte_offset_of_index()));
7980 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
7981 PtrQueue::byte_offset_of_buf()));
7983 BarrierSet* bs = Universe::heap()->barrier_set();
7984 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7985 Label done;
7986 Label runtime;
7988 // Does store cross heap regions?
7990 movptr(tmp, store_addr);
7991 xorptr(tmp, new_val);
7992 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
7993 jcc(Assembler::equal, done);
7995 // crosses regions, storing NULL?
7997 cmpptr(new_val, (int32_t) NULL_WORD);
7998 jcc(Assembler::equal, done);
8000 // storing region crossing non-NULL, is card already dirty?
8002 ExternalAddress cardtable((address) ct->byte_map_base);
8003 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
8004 #ifdef _LP64
8005 const Register card_addr = tmp;
8007 movq(card_addr, store_addr);
8008 shrq(card_addr, CardTableModRefBS::card_shift);
8010 lea(tmp2, cardtable);
8012 // get the address of the card
8013 addq(card_addr, tmp2);
8014 #else
8015 const Register card_index = tmp;
8017 movl(card_index, store_addr);
8018 shrl(card_index, CardTableModRefBS::card_shift);
8020 Address index(noreg, card_index, Address::times_1);
8021 const Register card_addr = tmp;
8022 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
8023 #endif
8024 cmpb(Address(card_addr, 0), 0);
8025 jcc(Assembler::equal, done);
8027 // storing a region crossing, non-NULL oop, card is clean.
8028 // dirty card and log.
8030 movb(Address(card_addr, 0), 0);
8032 cmpl(queue_index, 0);
8033 jcc(Assembler::equal, runtime);
8034 subl(queue_index, wordSize);
8035 movptr(tmp2, buffer);
8036 #ifdef _LP64
8037 movslq(rscratch1, queue_index);
8038 addq(tmp2, rscratch1);
8039 movq(Address(tmp2, 0), card_addr);
8040 #else
8041 addl(tmp2, queue_index);
8042 movl(Address(tmp2, 0), card_index);
8043 #endif
8044 jmp(done);
8046 bind(runtime);
8047 // save the live input values
8048 push(store_addr);
8049 push(new_val);
8050 #ifdef _LP64
8051 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
8052 #else
8053 push(thread);
8054 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
8055 pop(thread);
8056 #endif
8057 pop(new_val);
8058 pop(store_addr);
8060 bind(done);
8061 }
8063 #endif // SERIALGC
8064 //////////////////////////////////////////////////////////////////////////////////
8067 void MacroAssembler::store_check(Register obj) {
8068 // Does a store check for the oop in register obj. The content of
8069 // register obj is destroyed afterwards.
8070 store_check_part_1(obj);
8071 store_check_part_2(obj);
8072 }
8074 void MacroAssembler::store_check(Register obj, Address dst) {
8075 store_check(obj);
8076 }
8079 // split the store check operation so that other instructions can be scheduled inbetween
8080 void MacroAssembler::store_check_part_1(Register obj) {
8081 BarrierSet* bs = Universe::heap()->barrier_set();
8082 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
8083 shrptr(obj, CardTableModRefBS::card_shift);
8084 }
8086 void MacroAssembler::store_check_part_2(Register obj) {
8087 BarrierSet* bs = Universe::heap()->barrier_set();
8088 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
8089 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
8090 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
8092 // The calculation for byte_map_base is as follows:
8093 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
8094 // So this essentially converts an address to a displacement and
8095 // it will never need to be relocated. On 64bit however the value may be too
8096 // large for a 32bit displacement
8098 intptr_t disp = (intptr_t) ct->byte_map_base;
8099 if (is_simm32(disp)) {
8100 Address cardtable(noreg, obj, Address::times_1, disp);
8101 movb(cardtable, 0);
8102 } else {
8103 // By doing it as an ExternalAddress disp could be converted to a rip-relative
8104 // displacement and done in a single instruction given favorable mapping and
8105 // a smarter version of as_Address. Worst case it is two instructions which
8106 // is no worse off then loading disp into a register and doing as a simple
8107 // Address() as above.
8108 // We can't do as ExternalAddress as the only style since if disp == 0 we'll
8109 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
8110 // in some cases we'll get a single instruction version.
8112 ExternalAddress cardtable((address)disp);
8113 Address index(noreg, obj, Address::times_1);
8114 movb(as_Address(ArrayAddress(cardtable, index)), 0);
8115 }
8116 }
8118 void MacroAssembler::subptr(Register dst, int32_t imm32) {
8119 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
8120 }
8122 // Force generation of a 4 byte immediate value even if it fits into 8bit
8123 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
8124 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
8125 }
8127 void MacroAssembler::subptr(Register dst, Register src) {
8128 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
8129 }
8131 // C++ bool manipulation
8132 void MacroAssembler::testbool(Register dst) {
8133 if(sizeof(bool) == 1)
8134 testb(dst, 0xff);
8135 else if(sizeof(bool) == 2) {
8136 // testw implementation needed for two byte bools
8137 ShouldNotReachHere();
8138 } else if(sizeof(bool) == 4)
8139 testl(dst, dst);
8140 else
8141 // unsupported
8142 ShouldNotReachHere();
8143 }
8145 void MacroAssembler::testptr(Register dst, Register src) {
8146 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
8147 }
8149 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
8150 void MacroAssembler::tlab_allocate(Register obj,
8151 Register var_size_in_bytes,
8152 int con_size_in_bytes,
8153 Register t1,
8154 Register t2,
8155 Label& slow_case) {
8156 assert_different_registers(obj, t1, t2);
8157 assert_different_registers(obj, var_size_in_bytes, t1);
8158 Register end = t2;
8159 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
8161 verify_tlab();
8163 NOT_LP64(get_thread(thread));
8165 movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
8166 if (var_size_in_bytes == noreg) {
8167 lea(end, Address(obj, con_size_in_bytes));
8168 } else {
8169 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
8170 }
8171 cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
8172 jcc(Assembler::above, slow_case);
8174 // update the tlab top pointer
8175 movptr(Address(thread, JavaThread::tlab_top_offset()), end);
8177 // recover var_size_in_bytes if necessary
8178 if (var_size_in_bytes == end) {
8179 subptr(var_size_in_bytes, obj);
8180 }
8181 verify_tlab();
8182 }
8184 // Preserves rbx, and rdx.
8185 Register MacroAssembler::tlab_refill(Label& retry,
8186 Label& try_eden,
8187 Label& slow_case) {
8188 Register top = rax;
8189 Register t1 = rcx;
8190 Register t2 = rsi;
8191 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
8192 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
8193 Label do_refill, discard_tlab;
8195 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
8196 // No allocation in the shared eden.
8197 jmp(slow_case);
8198 }
8200 NOT_LP64(get_thread(thread_reg));
8202 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
8203 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
8205 // calculate amount of free space
8206 subptr(t1, top);
8207 shrptr(t1, LogHeapWordSize);
8209 // Retain tlab and allocate object in shared space if
8210 // the amount free in the tlab is too large to discard.
8211 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
8212 jcc(Assembler::lessEqual, discard_tlab);
8214 // Retain
8215 // %%% yuck as movptr...
8216 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
8217 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
8218 if (TLABStats) {
8219 // increment number of slow_allocations
8220 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
8221 }
8222 jmp(try_eden);
8224 bind(discard_tlab);
8225 if (TLABStats) {
8226 // increment number of refills
8227 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
8228 // accumulate wastage -- t1 is amount free in tlab
8229 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
8230 }
8232 // if tlab is currently allocated (top or end != null) then
8233 // fill [top, end + alignment_reserve) with array object
8234 testptr(top, top);
8235 jcc(Assembler::zero, do_refill);
8237 // set up the mark word
8238 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
8239 // set the length to the remaining space
8240 subptr(t1, typeArrayOopDesc::header_size(T_INT));
8241 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
8242 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
8243 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
8244 // set klass to intArrayKlass
8245 // dubious reloc why not an oop reloc?
8246 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
8247 // store klass last. concurrent gcs assumes klass length is valid if
8248 // klass field is not null.
8249 store_klass(top, t1);
8251 movptr(t1, top);
8252 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
8253 incr_allocated_bytes(thread_reg, t1, 0);
8255 // refill the tlab with an eden allocation
8256 bind(do_refill);
8257 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
8258 shlptr(t1, LogHeapWordSize);
8259 // allocate new tlab, address returned in top
8260 eden_allocate(top, t1, 0, t2, slow_case);
8262 // Check that t1 was preserved in eden_allocate.
8263 #ifdef ASSERT
8264 if (UseTLAB) {
8265 Label ok;
8266 Register tsize = rsi;
8267 assert_different_registers(tsize, thread_reg, t1);
8268 push(tsize);
8269 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
8270 shlptr(tsize, LogHeapWordSize);
8271 cmpptr(t1, tsize);
8272 jcc(Assembler::equal, ok);
8273 stop("assert(t1 != tlab size)");
8274 should_not_reach_here();
8276 bind(ok);
8277 pop(tsize);
8278 }
8279 #endif
8280 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
8281 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
8282 addptr(top, t1);
8283 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
8284 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
8285 verify_tlab();
8286 jmp(retry);
8288 return thread_reg; // for use by caller
8289 }
8291 void MacroAssembler::incr_allocated_bytes(Register thread,
8292 Register var_size_in_bytes,
8293 int con_size_in_bytes,
8294 Register t1) {
8295 if (!thread->is_valid()) {
8296 #ifdef _LP64
8297 thread = r15_thread;
8298 #else
8299 assert(t1->is_valid(), "need temp reg");
8300 thread = t1;
8301 get_thread(thread);
8302 #endif
8303 }
8305 #ifdef _LP64
8306 if (var_size_in_bytes->is_valid()) {
8307 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
8308 } else {
8309 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
8310 }
8311 #else
8312 if (var_size_in_bytes->is_valid()) {
8313 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
8314 } else {
8315 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
8316 }
8317 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
8318 #endif
8319 }
8321 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
8322 pusha();
8324 // if we are coming from c1, xmm registers may be live
8325 if (UseSSE >= 1) {
8326 subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
8327 }
8328 int off = 0;
8329 if (UseSSE == 1) {
8330 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
8331 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
8332 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
8333 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
8334 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
8335 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
8336 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
8337 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
8338 } else if (UseSSE >= 2) {
8339 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0);
8340 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1);
8341 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2);
8342 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3);
8343 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4);
8344 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5);
8345 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6);
8346 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7);
8347 #ifdef _LP64
8348 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8);
8349 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9);
8350 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10);
8351 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11);
8352 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12);
8353 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13);
8354 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14);
8355 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15);
8356 #endif
8357 }
8359 // Preserve registers across runtime call
8360 int incoming_argument_and_return_value_offset = -1;
8361 if (num_fpu_regs_in_use > 1) {
8362 // Must preserve all other FPU regs (could alternatively convert
8363 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
8364 // FPU state, but can not trust C compiler)
8365 NEEDS_CLEANUP;
8366 // NOTE that in this case we also push the incoming argument(s) to
8367 // the stack and restore it later; we also use this stack slot to
8368 // hold the return value from dsin, dcos etc.
8369 for (int i = 0; i < num_fpu_regs_in_use; i++) {
8370 subptr(rsp, sizeof(jdouble));
8371 fstp_d(Address(rsp, 0));
8372 }
8373 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
8374 for (int i = nb_args-1; i >= 0; i--) {
8375 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
8376 }
8377 }
8379 subptr(rsp, nb_args*sizeof(jdouble));
8380 for (int i = 0; i < nb_args; i++) {
8381 fstp_d(Address(rsp, i*sizeof(jdouble)));
8382 }
8384 #ifdef _LP64
8385 if (nb_args > 0) {
8386 movdbl(xmm0, Address(rsp, 0));
8387 }
8388 if (nb_args > 1) {
8389 movdbl(xmm1, Address(rsp, sizeof(jdouble)));
8390 }
8391 assert(nb_args <= 2, "unsupported number of args");
8392 #endif // _LP64
8394 // NOTE: we must not use call_VM_leaf here because that requires a
8395 // complete interpreter frame in debug mode -- same bug as 4387334
8396 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
8397 // do proper 64bit abi
8399 NEEDS_CLEANUP;
8400 // Need to add stack banging before this runtime call if it needs to
8401 // be taken; however, there is no generic stack banging routine at
8402 // the MacroAssembler level
8404 MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
8406 #ifdef _LP64
8407 movsd(Address(rsp, 0), xmm0);
8408 fld_d(Address(rsp, 0));
8409 #endif // _LP64
8410 addptr(rsp, sizeof(jdouble) * nb_args);
8411 if (num_fpu_regs_in_use > 1) {
8412 // Must save return value to stack and then restore entire FPU
8413 // stack except incoming arguments
8414 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
8415 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
8416 fld_d(Address(rsp, 0));
8417 addptr(rsp, sizeof(jdouble));
8418 }
8419 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
8420 addptr(rsp, sizeof(jdouble) * nb_args);
8421 }
8423 off = 0;
8424 if (UseSSE == 1) {
8425 movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
8426 movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
8427 movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
8428 movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
8429 movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
8430 movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
8431 movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
8432 movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
8433 } else if (UseSSE >= 2) {
8434 movdbl(xmm0, Address(rsp,off++*sizeof(jdouble)));
8435 movdbl(xmm1, Address(rsp,off++*sizeof(jdouble)));
8436 movdbl(xmm2, Address(rsp,off++*sizeof(jdouble)));
8437 movdbl(xmm3, Address(rsp,off++*sizeof(jdouble)));
8438 movdbl(xmm4, Address(rsp,off++*sizeof(jdouble)));
8439 movdbl(xmm5, Address(rsp,off++*sizeof(jdouble)));
8440 movdbl(xmm6, Address(rsp,off++*sizeof(jdouble)));
8441 movdbl(xmm7, Address(rsp,off++*sizeof(jdouble)));
8442 #ifdef _LP64
8443 movdbl(xmm8, Address(rsp,off++*sizeof(jdouble)));
8444 movdbl(xmm9, Address(rsp,off++*sizeof(jdouble)));
8445 movdbl(xmm10, Address(rsp,off++*sizeof(jdouble)));
8446 movdbl(xmm11, Address(rsp,off++*sizeof(jdouble)));
8447 movdbl(xmm12, Address(rsp,off++*sizeof(jdouble)));
8448 movdbl(xmm13, Address(rsp,off++*sizeof(jdouble)));
8449 movdbl(xmm14, Address(rsp,off++*sizeof(jdouble)));
8450 movdbl(xmm15, Address(rsp,off++*sizeof(jdouble)));
8451 #endif
8452 }
8453 if (UseSSE >= 1) {
8454 addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
8455 }
8456 popa();
8457 }
8459 static const double pi_4 = 0.7853981633974483;
8461 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
8462 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
8463 // was attempted in this code; unfortunately it appears that the
8464 // switch to 80-bit precision and back causes this to be
8465 // unprofitable compared with simply performing a runtime call if
8466 // the argument is out of the (-pi/4, pi/4) range.
8468 Register tmp = noreg;
8469 if (!VM_Version::supports_cmov()) {
8470 // fcmp needs a temporary so preserve rbx,
8471 tmp = rbx;
8472 push(tmp);
8473 }
8475 Label slow_case, done;
8477 ExternalAddress pi4_adr = (address)&pi_4;
8478 if (reachable(pi4_adr)) {
8479 // x ?<= pi/4
8480 fld_d(pi4_adr);
8481 fld_s(1); // Stack: X PI/4 X
8482 fabs(); // Stack: |X| PI/4 X
8483 fcmp(tmp);
8484 jcc(Assembler::above, slow_case);
8486 // fastest case: -pi/4 <= x <= pi/4
8487 switch(trig) {
8488 case 's':
8489 fsin();
8490 break;
8491 case 'c':
8492 fcos();
8493 break;
8494 case 't':
8495 ftan();
8496 break;
8497 default:
8498 assert(false, "bad intrinsic");
8499 break;
8500 }
8501 jmp(done);
8502 }
8504 // slow case: runtime call
8505 bind(slow_case);
8507 switch(trig) {
8508 case 's':
8509 {
8510 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
8511 }
8512 break;
8513 case 'c':
8514 {
8515 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
8516 }
8517 break;
8518 case 't':
8519 {
8520 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
8521 }
8522 break;
8523 default:
8524 assert(false, "bad intrinsic");
8525 break;
8526 }
8528 // Come here with result in F-TOS
8529 bind(done);
8531 if (tmp != noreg) {
8532 pop(tmp);
8533 }
8534 }
8537 // Look up the method for a megamorphic invokeinterface call.
8538 // The target method is determined by <intf_klass, itable_index>.
8539 // The receiver klass is in recv_klass.
8540 // On success, the result will be in method_result, and execution falls through.
8541 // On failure, execution transfers to the given label.
8542 void MacroAssembler::lookup_interface_method(Register recv_klass,
8543 Register intf_klass,
8544 RegisterOrConstant itable_index,
8545 Register method_result,
8546 Register scan_temp,
8547 Label& L_no_such_interface) {
8548 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
8549 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
8550 "caller must use same register for non-constant itable index as for method");
8552 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
8553 int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
8554 int itentry_off = itableMethodEntry::method_offset_in_bytes();
8555 int scan_step = itableOffsetEntry::size() * wordSize;
8556 int vte_size = vtableEntry::size() * wordSize;
8557 Address::ScaleFactor times_vte_scale = Address::times_ptr;
8558 assert(vte_size == wordSize, "else adjust times_vte_scale");
8560 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
8562 // %%% Could store the aligned, prescaled offset in the klassoop.
8563 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
8564 if (HeapWordsPerLong > 1) {
8565 // Round up to align_object_offset boundary
8566 // see code for instanceKlass::start_of_itable!
8567 round_to(scan_temp, BytesPerLong);
8568 }
8570 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
8571 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
8572 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
8574 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
8575 // if (scan->interface() == intf) {
8576 // result = (klass + scan->offset() + itable_index);
8577 // }
8578 // }
8579 Label search, found_method;
8581 for (int peel = 1; peel >= 0; peel--) {
8582 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
8583 cmpptr(intf_klass, method_result);
8585 if (peel) {
8586 jccb(Assembler::equal, found_method);
8587 } else {
8588 jccb(Assembler::notEqual, search);
8589 // (invert the test to fall through to found_method...)
8590 }
8592 if (!peel) break;
8594 bind(search);
8596 // Check that the previous entry is non-null. A null entry means that
8597 // the receiver class doesn't implement the interface, and wasn't the
8598 // same as when the caller was compiled.
8599 testptr(method_result, method_result);
8600 jcc(Assembler::zero, L_no_such_interface);
8601 addptr(scan_temp, scan_step);
8602 }
8604 bind(found_method);
8606 // Got a hit.
8607 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
8608 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
8609 }
8612 void MacroAssembler::check_klass_subtype(Register sub_klass,
8613 Register super_klass,
8614 Register temp_reg,
8615 Label& L_success) {
8616 Label L_failure;
8617 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
8618 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
8619 bind(L_failure);
8620 }
8623 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
8624 Register super_klass,
8625 Register temp_reg,
8626 Label* L_success,
8627 Label* L_failure,
8628 Label* L_slow_path,
8629 RegisterOrConstant super_check_offset) {
8630 assert_different_registers(sub_klass, super_klass, temp_reg);
8631 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
8632 if (super_check_offset.is_register()) {
8633 assert_different_registers(sub_klass, super_klass,
8634 super_check_offset.as_register());
8635 } else if (must_load_sco) {
8636 assert(temp_reg != noreg, "supply either a temp or a register offset");
8637 }
8639 Label L_fallthrough;
8640 int label_nulls = 0;
8641 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
8642 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
8643 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
8644 assert(label_nulls <= 1, "at most one NULL in the batch");
8646 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
8647 int sco_offset = in_bytes(Klass::super_check_offset_offset());
8648 Address super_check_offset_addr(super_klass, sco_offset);
8650 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
8651 // range of a jccb. If this routine grows larger, reconsider at
8652 // least some of these.
8653 #define local_jcc(assembler_cond, label) \
8654 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
8655 else jcc( assembler_cond, label) /*omit semi*/
8657 // Hacked jmp, which may only be used just before L_fallthrough.
8658 #define final_jmp(label) \
8659 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
8660 else jmp(label) /*omit semi*/
8662 // If the pointers are equal, we are done (e.g., String[] elements).
8663 // This self-check enables sharing of secondary supertype arrays among
8664 // non-primary types such as array-of-interface. Otherwise, each such
8665 // type would need its own customized SSA.
8666 // We move this check to the front of the fast path because many
8667 // type checks are in fact trivially successful in this manner,
8668 // so we get a nicely predicted branch right at the start of the check.
8669 cmpptr(sub_klass, super_klass);
8670 local_jcc(Assembler::equal, *L_success);
8672 // Check the supertype display:
8673 if (must_load_sco) {
8674 // Positive movl does right thing on LP64.
8675 movl(temp_reg, super_check_offset_addr);
8676 super_check_offset = RegisterOrConstant(temp_reg);
8677 }
8678 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
8679 cmpptr(super_klass, super_check_addr); // load displayed supertype
8681 // This check has worked decisively for primary supers.
8682 // Secondary supers are sought in the super_cache ('super_cache_addr').
8683 // (Secondary supers are interfaces and very deeply nested subtypes.)
8684 // This works in the same check above because of a tricky aliasing
8685 // between the super_cache and the primary super display elements.
8686 // (The 'super_check_addr' can address either, as the case requires.)
8687 // Note that the cache is updated below if it does not help us find
8688 // what we need immediately.
8689 // So if it was a primary super, we can just fail immediately.
8690 // Otherwise, it's the slow path for us (no success at this point).
8692 if (super_check_offset.is_register()) {
8693 local_jcc(Assembler::equal, *L_success);
8694 cmpl(super_check_offset.as_register(), sc_offset);
8695 if (L_failure == &L_fallthrough) {
8696 local_jcc(Assembler::equal, *L_slow_path);
8697 } else {
8698 local_jcc(Assembler::notEqual, *L_failure);
8699 final_jmp(*L_slow_path);
8700 }
8701 } else if (super_check_offset.as_constant() == sc_offset) {
8702 // Need a slow path; fast failure is impossible.
8703 if (L_slow_path == &L_fallthrough) {
8704 local_jcc(Assembler::equal, *L_success);
8705 } else {
8706 local_jcc(Assembler::notEqual, *L_slow_path);
8707 final_jmp(*L_success);
8708 }
8709 } else {
8710 // No slow path; it's a fast decision.
8711 if (L_failure == &L_fallthrough) {
8712 local_jcc(Assembler::equal, *L_success);
8713 } else {
8714 local_jcc(Assembler::notEqual, *L_failure);
8715 final_jmp(*L_success);
8716 }
8717 }
8719 bind(L_fallthrough);
8721 #undef local_jcc
8722 #undef final_jmp
8723 }
8726 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
8727 Register super_klass,
8728 Register temp_reg,
8729 Register temp2_reg,
8730 Label* L_success,
8731 Label* L_failure,
8732 bool set_cond_codes) {
8733 assert_different_registers(sub_klass, super_klass, temp_reg);
8734 if (temp2_reg != noreg)
8735 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
8736 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
8738 Label L_fallthrough;
8739 int label_nulls = 0;
8740 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
8741 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
8742 assert(label_nulls <= 1, "at most one NULL in the batch");
8744 // a couple of useful fields in sub_klass:
8745 int ss_offset = in_bytes(Klass::secondary_supers_offset());
8746 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
8747 Address secondary_supers_addr(sub_klass, ss_offset);
8748 Address super_cache_addr( sub_klass, sc_offset);
8750 // Do a linear scan of the secondary super-klass chain.
8751 // This code is rarely used, so simplicity is a virtue here.
8752 // The repne_scan instruction uses fixed registers, which we must spill.
8753 // Don't worry too much about pre-existing connections with the input regs.
8755 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
8756 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
8758 // Get super_klass value into rax (even if it was in rdi or rcx).
8759 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
8760 if (super_klass != rax || UseCompressedOops) {
8761 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
8762 mov(rax, super_klass);
8763 }
8764 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
8765 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
8767 #ifndef PRODUCT
8768 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
8769 ExternalAddress pst_counter_addr((address) pst_counter);
8770 NOT_LP64( incrementl(pst_counter_addr) );
8771 LP64_ONLY( lea(rcx, pst_counter_addr) );
8772 LP64_ONLY( incrementl(Address(rcx, 0)) );
8773 #endif //PRODUCT
8775 // We will consult the secondary-super array.
8776 movptr(rdi, secondary_supers_addr);
8777 // Load the array length. (Positive movl does right thing on LP64.)
8778 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
8779 // Skip to start of data.
8780 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
8782 // Scan RCX words at [RDI] for an occurrence of RAX.
8783 // Set NZ/Z based on last compare.
8784 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
8785 // not change flags (only scas instruction which is repeated sets flags).
8786 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
8787 #ifdef _LP64
8788 // This part is tricky, as values in supers array could be 32 or 64 bit wide
8789 // and we store values in objArrays always encoded, thus we need to encode
8790 // the value of rax before repne. Note that rax is dead after the repne.
8791 if (UseCompressedOops) {
8792 encode_heap_oop_not_null(rax); // Changes flags.
8793 // The superclass is never null; it would be a basic system error if a null
8794 // pointer were to sneak in here. Note that we have already loaded the
8795 // Klass::super_check_offset from the super_klass in the fast path,
8796 // so if there is a null in that register, we are already in the afterlife.
8797 testl(rax,rax); // Set Z = 0
8798 repne_scanl();
8799 } else
8800 #endif // _LP64
8801 {
8802 testptr(rax,rax); // Set Z = 0
8803 repne_scan();
8804 }
8805 // Unspill the temp. registers:
8806 if (pushed_rdi) pop(rdi);
8807 if (pushed_rcx) pop(rcx);
8808 if (pushed_rax) pop(rax);
8810 if (set_cond_codes) {
8811 // Special hack for the AD files: rdi is guaranteed non-zero.
8812 assert(!pushed_rdi, "rdi must be left non-NULL");
8813 // Also, the condition codes are properly set Z/NZ on succeed/failure.
8814 }
8816 if (L_failure == &L_fallthrough)
8817 jccb(Assembler::notEqual, *L_failure);
8818 else jcc(Assembler::notEqual, *L_failure);
8820 // Success. Cache the super we found and proceed in triumph.
8821 movptr(super_cache_addr, super_klass);
8823 if (L_success != &L_fallthrough) {
8824 jmp(*L_success);
8825 }
8827 #undef IS_A_TEMP
8829 bind(L_fallthrough);
8830 }
8833 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
8834 if (VM_Version::supports_cmov()) {
8835 cmovl(cc, dst, src);
8836 } else {
8837 Label L;
8838 jccb(negate_condition(cc), L);
8839 movl(dst, src);
8840 bind(L);
8841 }
8842 }
8844 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
8845 if (VM_Version::supports_cmov()) {
8846 cmovl(cc, dst, src);
8847 } else {
8848 Label L;
8849 jccb(negate_condition(cc), L);
8850 movl(dst, src);
8851 bind(L);
8852 }
8853 }
8855 void MacroAssembler::verify_oop(Register reg, const char* s) {
8856 if (!VerifyOops) return;
8858 // Pass register number to verify_oop_subroutine
8859 char* b = new char[strlen(s) + 50];
8860 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
8861 #ifdef _LP64
8862 push(rscratch1); // save r10, trashed by movptr()
8863 #endif
8864 push(rax); // save rax,
8865 push(reg); // pass register argument
8866 ExternalAddress buffer((address) b);
8867 // avoid using pushptr, as it modifies scratch registers
8868 // and our contract is not to modify anything
8869 movptr(rax, buffer.addr());
8870 push(rax);
8871 // call indirectly to solve generation ordering problem
8872 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
8873 call(rax);
8874 // Caller pops the arguments (oop, message) and restores rax, r10
8875 }
8878 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
8879 Register tmp,
8880 int offset) {
8881 intptr_t value = *delayed_value_addr;
8882 if (value != 0)
8883 return RegisterOrConstant(value + offset);
8885 // load indirectly to solve generation ordering problem
8886 movptr(tmp, ExternalAddress((address) delayed_value_addr));
8888 #ifdef ASSERT
8889 { Label L;
8890 testptr(tmp, tmp);
8891 if (WizardMode) {
8892 jcc(Assembler::notZero, L);
8893 char* buf = new char[40];
8894 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
8895 stop(buf);
8896 } else {
8897 jccb(Assembler::notZero, L);
8898 hlt();
8899 }
8900 bind(L);
8901 }
8902 #endif
8904 if (offset != 0)
8905 addptr(tmp, offset);
8907 return RegisterOrConstant(tmp);
8908 }
8911 // registers on entry:
8912 // - rax ('check' register): required MethodType
8913 // - rcx: method handle
8914 // - rdx, rsi, or ?: killable temp
8915 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
8916 Register temp_reg,
8917 Label& wrong_method_type) {
8918 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
8919 // compare method type against that of the receiver
8920 if (UseCompressedOops) {
8921 load_heap_oop(temp_reg, type_addr);
8922 cmpptr(mtype_reg, temp_reg);
8923 } else {
8924 cmpptr(mtype_reg, type_addr);
8925 }
8926 jcc(Assembler::notEqual, wrong_method_type);
8927 }
8930 // A method handle has a "vmslots" field which gives the size of its
8931 // argument list in JVM stack slots. This field is either located directly
8932 // in every method handle, or else is indirectly accessed through the
8933 // method handle's MethodType. This macro hides the distinction.
8934 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
8935 Register temp_reg) {
8936 assert_different_registers(vmslots_reg, mh_reg, temp_reg);
8937 // load mh.type.form.vmslots
8938 Register temp2_reg = vmslots_reg;
8939 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
8940 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
8941 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
8942 }
8945 // registers on entry:
8946 // - rcx: method handle
8947 // - rdx: killable temp (interpreted only)
8948 // - rax: killable temp (compiled only)
8949 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
8950 assert(mh_reg == rcx, "caller must put MH object in rcx");
8951 assert_different_registers(mh_reg, temp_reg);
8953 // pick out the interpreted side of the handler
8954 // NOTE: vmentry is not an oop!
8955 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
8957 // off we go...
8958 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
8960 // for the various stubs which take control at this point,
8961 // see MethodHandles::generate_method_handle_stub
8962 }
8965 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
8966 int extra_slot_offset) {
8967 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
8968 int stackElementSize = Interpreter::stackElementSize;
8969 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
8970 #ifdef ASSERT
8971 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
8972 assert(offset1 - offset == stackElementSize, "correct arithmetic");
8973 #endif
8974 Register scale_reg = noreg;
8975 Address::ScaleFactor scale_factor = Address::no_scale;
8976 if (arg_slot.is_constant()) {
8977 offset += arg_slot.as_constant() * stackElementSize;
8978 } else {
8979 scale_reg = arg_slot.as_register();
8980 scale_factor = Address::times(stackElementSize);
8981 }
8982 offset += wordSize; // return PC is on stack
8983 return Address(rsp, scale_reg, scale_factor, offset);
8984 }
8987 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
8988 if (!VerifyOops) return;
8990 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
8991 // Pass register number to verify_oop_subroutine
8992 char* b = new char[strlen(s) + 50];
8993 sprintf(b, "verify_oop_addr: %s", s);
8995 #ifdef _LP64
8996 push(rscratch1); // save r10, trashed by movptr()
8997 #endif
8998 push(rax); // save rax,
8999 // addr may contain rsp so we will have to adjust it based on the push
9000 // we just did (and on 64 bit we do two pushes)
9001 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
9002 // stores rax into addr which is backwards of what was intended.
9003 if (addr.uses(rsp)) {
9004 lea(rax, addr);
9005 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
9006 } else {
9007 pushptr(addr);
9008 }
9010 ExternalAddress buffer((address) b);
9011 // pass msg argument
9012 // avoid using pushptr, as it modifies scratch registers
9013 // and our contract is not to modify anything
9014 movptr(rax, buffer.addr());
9015 push(rax);
9017 // call indirectly to solve generation ordering problem
9018 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
9019 call(rax);
9020 // Caller pops the arguments (addr, message) and restores rax, r10.
9021 }
9023 void MacroAssembler::verify_tlab() {
9024 #ifdef ASSERT
9025 if (UseTLAB && VerifyOops) {
9026 Label next, ok;
9027 Register t1 = rsi;
9028 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
9030 push(t1);
9031 NOT_LP64(push(thread_reg));
9032 NOT_LP64(get_thread(thread_reg));
9034 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
9035 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
9036 jcc(Assembler::aboveEqual, next);
9037 stop("assert(top >= start)");
9038 should_not_reach_here();
9040 bind(next);
9041 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
9042 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
9043 jcc(Assembler::aboveEqual, ok);
9044 stop("assert(top <= end)");
9045 should_not_reach_here();
9047 bind(ok);
9048 NOT_LP64(pop(thread_reg));
9049 pop(t1);
9050 }
9051 #endif
9052 }
9054 class ControlWord {
9055 public:
9056 int32_t _value;
9058 int rounding_control() const { return (_value >> 10) & 3 ; }
9059 int precision_control() const { return (_value >> 8) & 3 ; }
9060 bool precision() const { return ((_value >> 5) & 1) != 0; }
9061 bool underflow() const { return ((_value >> 4) & 1) != 0; }
9062 bool overflow() const { return ((_value >> 3) & 1) != 0; }
9063 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
9064 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
9065 bool invalid() const { return ((_value >> 0) & 1) != 0; }
9067 void print() const {
9068 // rounding control
9069 const char* rc;
9070 switch (rounding_control()) {
9071 case 0: rc = "round near"; break;
9072 case 1: rc = "round down"; break;
9073 case 2: rc = "round up "; break;
9074 case 3: rc = "chop "; break;
9075 };
9076 // precision control
9077 const char* pc;
9078 switch (precision_control()) {
9079 case 0: pc = "24 bits "; break;
9080 case 1: pc = "reserved"; break;
9081 case 2: pc = "53 bits "; break;
9082 case 3: pc = "64 bits "; break;
9083 };
9084 // flags
9085 char f[9];
9086 f[0] = ' ';
9087 f[1] = ' ';
9088 f[2] = (precision ()) ? 'P' : 'p';
9089 f[3] = (underflow ()) ? 'U' : 'u';
9090 f[4] = (overflow ()) ? 'O' : 'o';
9091 f[5] = (zero_divide ()) ? 'Z' : 'z';
9092 f[6] = (denormalized()) ? 'D' : 'd';
9093 f[7] = (invalid ()) ? 'I' : 'i';
9094 f[8] = '\x0';
9095 // output
9096 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
9097 }
9099 };
9101 class StatusWord {
9102 public:
9103 int32_t _value;
9105 bool busy() const { return ((_value >> 15) & 1) != 0; }
9106 bool C3() const { return ((_value >> 14) & 1) != 0; }
9107 bool C2() const { return ((_value >> 10) & 1) != 0; }
9108 bool C1() const { return ((_value >> 9) & 1) != 0; }
9109 bool C0() const { return ((_value >> 8) & 1) != 0; }
9110 int top() const { return (_value >> 11) & 7 ; }
9111 bool error_status() const { return ((_value >> 7) & 1) != 0; }
9112 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
9113 bool precision() const { return ((_value >> 5) & 1) != 0; }
9114 bool underflow() const { return ((_value >> 4) & 1) != 0; }
9115 bool overflow() const { return ((_value >> 3) & 1) != 0; }
9116 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
9117 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
9118 bool invalid() const { return ((_value >> 0) & 1) != 0; }
9120 void print() const {
9121 // condition codes
9122 char c[5];
9123 c[0] = (C3()) ? '3' : '-';
9124 c[1] = (C2()) ? '2' : '-';
9125 c[2] = (C1()) ? '1' : '-';
9126 c[3] = (C0()) ? '0' : '-';
9127 c[4] = '\x0';
9128 // flags
9129 char f[9];
9130 f[0] = (error_status()) ? 'E' : '-';
9131 f[1] = (stack_fault ()) ? 'S' : '-';
9132 f[2] = (precision ()) ? 'P' : '-';
9133 f[3] = (underflow ()) ? 'U' : '-';
9134 f[4] = (overflow ()) ? 'O' : '-';
9135 f[5] = (zero_divide ()) ? 'Z' : '-';
9136 f[6] = (denormalized()) ? 'D' : '-';
9137 f[7] = (invalid ()) ? 'I' : '-';
9138 f[8] = '\x0';
9139 // output
9140 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
9141 }
9143 };
9145 class TagWord {
9146 public:
9147 int32_t _value;
9149 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
9151 void print() const {
9152 printf("%04x", _value & 0xFFFF);
9153 }
9155 };
9157 class FPU_Register {
9158 public:
9159 int32_t _m0;
9160 int32_t _m1;
9161 int16_t _ex;
9163 bool is_indefinite() const {
9164 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
9165 }
9167 void print() const {
9168 char sign = (_ex < 0) ? '-' : '+';
9169 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
9170 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
9171 };
9173 };
9175 class FPU_State {
9176 public:
9177 enum {
9178 register_size = 10,
9179 number_of_registers = 8,
9180 register_mask = 7
9181 };
9183 ControlWord _control_word;
9184 StatusWord _status_word;
9185 TagWord _tag_word;
9186 int32_t _error_offset;
9187 int32_t _error_selector;
9188 int32_t _data_offset;
9189 int32_t _data_selector;
9190 int8_t _register[register_size * number_of_registers];
9192 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
9193 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
9195 const char* tag_as_string(int tag) const {
9196 switch (tag) {
9197 case 0: return "valid";
9198 case 1: return "zero";
9199 case 2: return "special";
9200 case 3: return "empty";
9201 }
9202 ShouldNotReachHere();
9203 return NULL;
9204 }
9206 void print() const {
9207 // print computation registers
9208 { int t = _status_word.top();
9209 for (int i = 0; i < number_of_registers; i++) {
9210 int j = (i - t) & register_mask;
9211 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
9212 st(j)->print();
9213 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
9214 }
9215 }
9216 printf("\n");
9217 // print control registers
9218 printf("ctrl = "); _control_word.print(); printf("\n");
9219 printf("stat = "); _status_word .print(); printf("\n");
9220 printf("tags = "); _tag_word .print(); printf("\n");
9221 }
9223 };
9225 class Flag_Register {
9226 public:
9227 int32_t _value;
9229 bool overflow() const { return ((_value >> 11) & 1) != 0; }
9230 bool direction() const { return ((_value >> 10) & 1) != 0; }
9231 bool sign() const { return ((_value >> 7) & 1) != 0; }
9232 bool zero() const { return ((_value >> 6) & 1) != 0; }
9233 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
9234 bool parity() const { return ((_value >> 2) & 1) != 0; }
9235 bool carry() const { return ((_value >> 0) & 1) != 0; }
9237 void print() const {
9238 // flags
9239 char f[8];
9240 f[0] = (overflow ()) ? 'O' : '-';
9241 f[1] = (direction ()) ? 'D' : '-';
9242 f[2] = (sign ()) ? 'S' : '-';
9243 f[3] = (zero ()) ? 'Z' : '-';
9244 f[4] = (auxiliary_carry()) ? 'A' : '-';
9245 f[5] = (parity ()) ? 'P' : '-';
9246 f[6] = (carry ()) ? 'C' : '-';
9247 f[7] = '\x0';
9248 // output
9249 printf("%08x flags = %s", _value, f);
9250 }
9252 };
9254 class IU_Register {
9255 public:
9256 int32_t _value;
9258 void print() const {
9259 printf("%08x %11d", _value, _value);
9260 }
9262 };
9264 class IU_State {
9265 public:
9266 Flag_Register _eflags;
9267 IU_Register _rdi;
9268 IU_Register _rsi;
9269 IU_Register _rbp;
9270 IU_Register _rsp;
9271 IU_Register _rbx;
9272 IU_Register _rdx;
9273 IU_Register _rcx;
9274 IU_Register _rax;
9276 void print() const {
9277 // computation registers
9278 printf("rax, = "); _rax.print(); printf("\n");
9279 printf("rbx, = "); _rbx.print(); printf("\n");
9280 printf("rcx = "); _rcx.print(); printf("\n");
9281 printf("rdx = "); _rdx.print(); printf("\n");
9282 printf("rdi = "); _rdi.print(); printf("\n");
9283 printf("rsi = "); _rsi.print(); printf("\n");
9284 printf("rbp, = "); _rbp.print(); printf("\n");
9285 printf("rsp = "); _rsp.print(); printf("\n");
9286 printf("\n");
9287 // control registers
9288 printf("flgs = "); _eflags.print(); printf("\n");
9289 }
9290 };
9293 class CPU_State {
9294 public:
9295 FPU_State _fpu_state;
9296 IU_State _iu_state;
9298 void print() const {
9299 printf("--------------------------------------------------\n");
9300 _iu_state .print();
9301 printf("\n");
9302 _fpu_state.print();
9303 printf("--------------------------------------------------\n");
9304 }
9306 };
9309 static void _print_CPU_state(CPU_State* state) {
9310 state->print();
9311 };
9314 void MacroAssembler::print_CPU_state() {
9315 push_CPU_state();
9316 push(rsp); // pass CPU state
9317 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
9318 addptr(rsp, wordSize); // discard argument
9319 pop_CPU_state();
9320 }
9323 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
9324 static int counter = 0;
9325 FPU_State* fs = &state->_fpu_state;
9326 counter++;
9327 // For leaf calls, only verify that the top few elements remain empty.
9328 // We only need 1 empty at the top for C2 code.
9329 if( stack_depth < 0 ) {
9330 if( fs->tag_for_st(7) != 3 ) {
9331 printf("FPR7 not empty\n");
9332 state->print();
9333 assert(false, "error");
9334 return false;
9335 }
9336 return true; // All other stack states do not matter
9337 }
9339 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
9340 "bad FPU control word");
9342 // compute stack depth
9343 int i = 0;
9344 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
9345 int d = i;
9346 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
9347 // verify findings
9348 if (i != FPU_State::number_of_registers) {
9349 // stack not contiguous
9350 printf("%s: stack not contiguous at ST%d\n", s, i);
9351 state->print();
9352 assert(false, "error");
9353 return false;
9354 }
9355 // check if computed stack depth corresponds to expected stack depth
9356 if (stack_depth < 0) {
9357 // expected stack depth is -stack_depth or less
9358 if (d > -stack_depth) {
9359 // too many elements on the stack
9360 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
9361 state->print();
9362 assert(false, "error");
9363 return false;
9364 }
9365 } else {
9366 // expected stack depth is stack_depth
9367 if (d != stack_depth) {
9368 // wrong stack depth
9369 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
9370 state->print();
9371 assert(false, "error");
9372 return false;
9373 }
9374 }
9375 // everything is cool
9376 return true;
9377 }
9380 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
9381 if (!VerifyFPU) return;
9382 push_CPU_state();
9383 push(rsp); // pass CPU state
9384 ExternalAddress msg((address) s);
9385 // pass message string s
9386 pushptr(msg.addr());
9387 push(stack_depth); // pass stack depth
9388 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
9389 addptr(rsp, 3 * wordSize); // discard arguments
9390 // check for error
9391 { Label L;
9392 testl(rax, rax);
9393 jcc(Assembler::notZero, L);
9394 int3(); // break if error condition
9395 bind(L);
9396 }
9397 pop_CPU_state();
9398 }
9400 void MacroAssembler::load_klass(Register dst, Register src) {
9401 #ifdef _LP64
9402 if (UseCompressedOops) {
9403 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
9404 decode_heap_oop_not_null(dst);
9405 } else
9406 #endif
9407 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
9408 }
9410 void MacroAssembler::load_prototype_header(Register dst, Register src) {
9411 #ifdef _LP64
9412 if (UseCompressedOops) {
9413 assert (Universe::heap() != NULL, "java heap should be initialized");
9414 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
9415 if (Universe::narrow_oop_shift() != 0) {
9416 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
9417 if (LogMinObjAlignmentInBytes == Address::times_8) {
9418 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
9419 } else {
9420 // OK to use shift since we don't need to preserve flags.
9421 shlq(dst, LogMinObjAlignmentInBytes);
9422 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset()));
9423 }
9424 } else {
9425 movq(dst, Address(dst, Klass::prototype_header_offset()));
9426 }
9427 } else
9428 #endif
9429 {
9430 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
9431 movptr(dst, Address(dst, Klass::prototype_header_offset()));
9432 }
9433 }
9435 void MacroAssembler::store_klass(Register dst, Register src) {
9436 #ifdef _LP64
9437 if (UseCompressedOops) {
9438 encode_heap_oop_not_null(src);
9439 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
9440 } else
9441 #endif
9442 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
9443 }
9445 void MacroAssembler::load_heap_oop(Register dst, Address src) {
9446 #ifdef _LP64
9447 if (UseCompressedOops) {
9448 movl(dst, src);
9449 decode_heap_oop(dst);
9450 } else
9451 #endif
9452 movptr(dst, src);
9453 }
9455 // Doesn't do verfication, generates fixed size code
9456 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
9457 #ifdef _LP64
9458 if (UseCompressedOops) {
9459 movl(dst, src);
9460 decode_heap_oop_not_null(dst);
9461 } else
9462 #endif
9463 movptr(dst, src);
9464 }
9466 void MacroAssembler::store_heap_oop(Address dst, Register src) {
9467 #ifdef _LP64
9468 if (UseCompressedOops) {
9469 assert(!dst.uses(src), "not enough registers");
9470 encode_heap_oop(src);
9471 movl(dst, src);
9472 } else
9473 #endif
9474 movptr(dst, src);
9475 }
9477 // Used for storing NULLs.
9478 void MacroAssembler::store_heap_oop_null(Address dst) {
9479 #ifdef _LP64
9480 if (UseCompressedOops) {
9481 movl(dst, (int32_t)NULL_WORD);
9482 } else {
9483 movslq(dst, (int32_t)NULL_WORD);
9484 }
9485 #else
9486 movl(dst, (int32_t)NULL_WORD);
9487 #endif
9488 }
9490 #ifdef _LP64
9491 void MacroAssembler::store_klass_gap(Register dst, Register src) {
9492 if (UseCompressedOops) {
9493 // Store to klass gap in destination
9494 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
9495 }
9496 }
9498 #ifdef ASSERT
9499 void MacroAssembler::verify_heapbase(const char* msg) {
9500 assert (UseCompressedOops, "should be compressed");
9501 assert (Universe::heap() != NULL, "java heap should be initialized");
9502 if (CheckCompressedOops) {
9503 Label ok;
9504 push(rscratch1); // cmpptr trashes rscratch1
9505 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
9506 jcc(Assembler::equal, ok);
9507 stop(msg);
9508 bind(ok);
9509 pop(rscratch1);
9510 }
9511 }
9512 #endif
9514 // Algorithm must match oop.inline.hpp encode_heap_oop.
9515 void MacroAssembler::encode_heap_oop(Register r) {
9516 #ifdef ASSERT
9517 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
9518 #endif
9519 verify_oop(r, "broken oop in encode_heap_oop");
9520 if (Universe::narrow_oop_base() == NULL) {
9521 if (Universe::narrow_oop_shift() != 0) {
9522 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
9523 shrq(r, LogMinObjAlignmentInBytes);
9524 }
9525 return;
9526 }
9527 testq(r, r);
9528 cmovq(Assembler::equal, r, r12_heapbase);
9529 subq(r, r12_heapbase);
9530 shrq(r, LogMinObjAlignmentInBytes);
9531 }
9533 void MacroAssembler::encode_heap_oop_not_null(Register r) {
9534 #ifdef ASSERT
9535 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
9536 if (CheckCompressedOops) {
9537 Label ok;
9538 testq(r, r);
9539 jcc(Assembler::notEqual, ok);
9540 stop("null oop passed to encode_heap_oop_not_null");
9541 bind(ok);
9542 }
9543 #endif
9544 verify_oop(r, "broken oop in encode_heap_oop_not_null");
9545 if (Universe::narrow_oop_base() != NULL) {
9546 subq(r, r12_heapbase);
9547 }
9548 if (Universe::narrow_oop_shift() != 0) {
9549 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
9550 shrq(r, LogMinObjAlignmentInBytes);
9551 }
9552 }
9554 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
9555 #ifdef ASSERT
9556 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
9557 if (CheckCompressedOops) {
9558 Label ok;
9559 testq(src, src);
9560 jcc(Assembler::notEqual, ok);
9561 stop("null oop passed to encode_heap_oop_not_null2");
9562 bind(ok);
9563 }
9564 #endif
9565 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
9566 if (dst != src) {
9567 movq(dst, src);
9568 }
9569 if (Universe::narrow_oop_base() != NULL) {
9570 subq(dst, r12_heapbase);
9571 }
9572 if (Universe::narrow_oop_shift() != 0) {
9573 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
9574 shrq(dst, LogMinObjAlignmentInBytes);
9575 }
9576 }
9578 void MacroAssembler::decode_heap_oop(Register r) {
9579 #ifdef ASSERT
9580 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
9581 #endif
9582 if (Universe::narrow_oop_base() == NULL) {
9583 if (Universe::narrow_oop_shift() != 0) {
9584 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
9585 shlq(r, LogMinObjAlignmentInBytes);
9586 }
9587 } else {
9588 Label done;
9589 shlq(r, LogMinObjAlignmentInBytes);
9590 jccb(Assembler::equal, done);
9591 addq(r, r12_heapbase);
9592 bind(done);
9593 }
9594 verify_oop(r, "broken oop in decode_heap_oop");
9595 }
9597 void MacroAssembler::decode_heap_oop_not_null(Register r) {
9598 // Note: it will change flags
9599 assert (UseCompressedOops, "should only be used for compressed headers");
9600 assert (Universe::heap() != NULL, "java heap should be initialized");
9601 // Cannot assert, unverified entry point counts instructions (see .ad file)
9602 // vtableStubs also counts instructions in pd_code_size_limit.
9603 // Also do not verify_oop as this is called by verify_oop.
9604 if (Universe::narrow_oop_shift() != 0) {
9605 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
9606 shlq(r, LogMinObjAlignmentInBytes);
9607 if (Universe::narrow_oop_base() != NULL) {
9608 addq(r, r12_heapbase);
9609 }
9610 } else {
9611 assert (Universe::narrow_oop_base() == NULL, "sanity");
9612 }
9613 }
9615 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
9616 // Note: it will change flags
9617 assert (UseCompressedOops, "should only be used for compressed headers");
9618 assert (Universe::heap() != NULL, "java heap should be initialized");
9619 // Cannot assert, unverified entry point counts instructions (see .ad file)
9620 // vtableStubs also counts instructions in pd_code_size_limit.
9621 // Also do not verify_oop as this is called by verify_oop.
9622 if (Universe::narrow_oop_shift() != 0) {
9623 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
9624 if (LogMinObjAlignmentInBytes == Address::times_8) {
9625 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
9626 } else {
9627 if (dst != src) {
9628 movq(dst, src);
9629 }
9630 shlq(dst, LogMinObjAlignmentInBytes);
9631 if (Universe::narrow_oop_base() != NULL) {
9632 addq(dst, r12_heapbase);
9633 }
9634 }
9635 } else {
9636 assert (Universe::narrow_oop_base() == NULL, "sanity");
9637 if (dst != src) {
9638 movq(dst, src);
9639 }
9640 }
9641 }
9643 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
9644 assert (UseCompressedOops, "should only be used for compressed headers");
9645 assert (Universe::heap() != NULL, "java heap should be initialized");
9646 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
9647 int oop_index = oop_recorder()->find_index(obj);
9648 RelocationHolder rspec = oop_Relocation::spec(oop_index);
9649 mov_narrow_oop(dst, oop_index, rspec);
9650 }
9652 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
9653 assert (UseCompressedOops, "should only be used for compressed headers");
9654 assert (Universe::heap() != NULL, "java heap should be initialized");
9655 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
9656 int oop_index = oop_recorder()->find_index(obj);
9657 RelocationHolder rspec = oop_Relocation::spec(oop_index);
9658 mov_narrow_oop(dst, oop_index, rspec);
9659 }
9661 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
9662 assert (UseCompressedOops, "should only be used for compressed headers");
9663 assert (Universe::heap() != NULL, "java heap should be initialized");
9664 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
9665 int oop_index = oop_recorder()->find_index(obj);
9666 RelocationHolder rspec = oop_Relocation::spec(oop_index);
9667 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
9668 }
9670 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
9671 assert (UseCompressedOops, "should only be used for compressed headers");
9672 assert (Universe::heap() != NULL, "java heap should be initialized");
9673 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
9674 int oop_index = oop_recorder()->find_index(obj);
9675 RelocationHolder rspec = oop_Relocation::spec(oop_index);
9676 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
9677 }
9679 void MacroAssembler::reinit_heapbase() {
9680 if (UseCompressedOops) {
9681 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
9682 }
9683 }
9684 #endif // _LP64
9687 // C2 compiled method's prolog code.
9688 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
9690 // WARNING: Initial instruction MUST be 5 bytes or longer so that
9691 // NativeJump::patch_verified_entry will be able to patch out the entry
9692 // code safely. The push to verify stack depth is ok at 5 bytes,
9693 // the frame allocation can be either 3 or 6 bytes. So if we don't do
9694 // stack bang then we must use the 6 byte frame allocation even if
9695 // we have no frame. :-(
9697 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
9698 // Remove word for return addr
9699 framesize -= wordSize;
9701 // Calls to C2R adapters often do not accept exceptional returns.
9702 // We require that their callers must bang for them. But be careful, because
9703 // some VM calls (such as call site linkage) can use several kilobytes of
9704 // stack. But the stack safety zone should account for that.
9705 // See bugs 4446381, 4468289, 4497237.
9706 if (stack_bang) {
9707 generate_stack_overflow_check(framesize);
9709 // We always push rbp, so that on return to interpreter rbp, will be
9710 // restored correctly and we can correct the stack.
9711 push(rbp);
9712 // Remove word for ebp
9713 framesize -= wordSize;
9715 // Create frame
9716 if (framesize) {
9717 subptr(rsp, framesize);
9718 }
9719 } else {
9720 // Create frame (force generation of a 4 byte immediate value)
9721 subptr_imm32(rsp, framesize);
9723 // Save RBP register now.
9724 framesize -= wordSize;
9725 movptr(Address(rsp, framesize), rbp);
9726 }
9728 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
9729 framesize -= wordSize;
9730 movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
9731 }
9733 #ifndef _LP64
9734 // If method sets FPU control word do it now
9735 if (fp_mode_24b) {
9736 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
9737 }
9738 if (UseSSE >= 2 && VerifyFPU) {
9739 verify_FPU(0, "FPU stack must be clean on entry");
9740 }
9741 #endif
9743 #ifdef ASSERT
9744 if (VerifyStackAtCalls) {
9745 Label L;
9746 push(rax);
9747 mov(rax, rsp);
9748 andptr(rax, StackAlignmentInBytes-1);
9749 cmpptr(rax, StackAlignmentInBytes-wordSize);
9750 pop(rax);
9751 jcc(Assembler::equal, L);
9752 stop("Stack is not properly aligned!");
9753 bind(L);
9754 }
9755 #endif
9757 }
9760 // IndexOf for constant substrings with size >= 8 chars
9761 // which don't need to be loaded through stack.
9762 void MacroAssembler::string_indexofC8(Register str1, Register str2,
9763 Register cnt1, Register cnt2,
9764 int int_cnt2, Register result,
9765 XMMRegister vec, Register tmp) {
9766 ShortBranchVerifier sbv(this);
9767 assert(UseSSE42Intrinsics, "SSE4.2 is required");
9769 // This method uses pcmpestri inxtruction with bound registers
9770 // inputs:
9771 // xmm - substring
9772 // rax - substring length (elements count)
9773 // mem - scanned string
9774 // rdx - string length (elements count)
9775 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
9776 // outputs:
9777 // rcx - matched index in string
9778 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
9780 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
9781 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
9782 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
9784 // Note, inline_string_indexOf() generates checks:
9785 // if (substr.count > string.count) return -1;
9786 // if (substr.count == 0) return 0;
9787 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
9789 // Load substring.
9790 movdqu(vec, Address(str2, 0));
9791 movl(cnt2, int_cnt2);
9792 movptr(result, str1); // string addr
9794 if (int_cnt2 > 8) {
9795 jmpb(SCAN_TO_SUBSTR);
9797 // Reload substr for rescan, this code
9798 // is executed only for large substrings (> 8 chars)
9799 bind(RELOAD_SUBSTR);
9800 movdqu(vec, Address(str2, 0));
9801 negptr(cnt2); // Jumped here with negative cnt2, convert to positive
9803 bind(RELOAD_STR);
9804 // We came here after the beginning of the substring was
9805 // matched but the rest of it was not so we need to search
9806 // again. Start from the next element after the previous match.
9808 // cnt2 is number of substring reminding elements and
9809 // cnt1 is number of string reminding elements when cmp failed.
9810 // Restored cnt1 = cnt1 - cnt2 + int_cnt2
9811 subl(cnt1, cnt2);
9812 addl(cnt1, int_cnt2);
9813 movl(cnt2, int_cnt2); // Now restore cnt2
9815 decrementl(cnt1); // Shift to next element
9816 cmpl(cnt1, cnt2);
9817 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
9819 addptr(result, 2);
9821 } // (int_cnt2 > 8)
9823 // Scan string for start of substr in 16-byte vectors
9824 bind(SCAN_TO_SUBSTR);
9825 pcmpestri(vec, Address(result, 0), 0x0d);
9826 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
9827 subl(cnt1, 8);
9828 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
9829 cmpl(cnt1, cnt2);
9830 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
9831 addptr(result, 16);
9832 jmpb(SCAN_TO_SUBSTR);
9834 // Found a potential substr
9835 bind(FOUND_CANDIDATE);
9836 // Matched whole vector if first element matched (tmp(rcx) == 0).
9837 if (int_cnt2 == 8) {
9838 jccb(Assembler::overflow, RET_FOUND); // OF == 1
9839 } else { // int_cnt2 > 8
9840 jccb(Assembler::overflow, FOUND_SUBSTR);
9841 }
9842 // After pcmpestri tmp(rcx) contains matched element index
9843 // Compute start addr of substr
9844 lea(result, Address(result, tmp, Address::times_2));
9846 // Make sure string is still long enough
9847 subl(cnt1, tmp);
9848 cmpl(cnt1, cnt2);
9849 if (int_cnt2 == 8) {
9850 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
9851 } else { // int_cnt2 > 8
9852 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
9853 }
9854 // Left less then substring.
9856 bind(RET_NOT_FOUND);
9857 movl(result, -1);
9858 jmpb(EXIT);
9860 if (int_cnt2 > 8) {
9861 // This code is optimized for the case when whole substring
9862 // is matched if its head is matched.
9863 bind(MATCH_SUBSTR_HEAD);
9864 pcmpestri(vec, Address(result, 0), 0x0d);
9865 // Reload only string if does not match
9866 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
9868 Label CONT_SCAN_SUBSTR;
9869 // Compare the rest of substring (> 8 chars).
9870 bind(FOUND_SUBSTR);
9871 // First 8 chars are already matched.
9872 negptr(cnt2);
9873 addptr(cnt2, 8);
9875 bind(SCAN_SUBSTR);
9876 subl(cnt1, 8);
9877 cmpl(cnt2, -8); // Do not read beyond substring
9878 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
9879 // Back-up strings to avoid reading beyond substring:
9880 // cnt1 = cnt1 - cnt2 + 8
9881 addl(cnt1, cnt2); // cnt2 is negative
9882 addl(cnt1, 8);
9883 movl(cnt2, 8); negptr(cnt2);
9884 bind(CONT_SCAN_SUBSTR);
9885 if (int_cnt2 < (int)G) {
9886 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
9887 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
9888 } else {
9889 // calculate index in register to avoid integer overflow (int_cnt2*2)
9890 movl(tmp, int_cnt2);
9891 addptr(tmp, cnt2);
9892 movdqu(vec, Address(str2, tmp, Address::times_2, 0));
9893 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
9894 }
9895 // Need to reload strings pointers if not matched whole vector
9896 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
9897 addptr(cnt2, 8);
9898 jcc(Assembler::negative, SCAN_SUBSTR);
9899 // Fall through if found full substring
9901 } // (int_cnt2 > 8)
9903 bind(RET_FOUND);
9904 // Found result if we matched full small substring.
9905 // Compute substr offset
9906 subptr(result, str1);
9907 shrl(result, 1); // index
9908 bind(EXIT);
9910 } // string_indexofC8
9912 // Small strings are loaded through stack if they cross page boundary.
9913 void MacroAssembler::string_indexof(Register str1, Register str2,
9914 Register cnt1, Register cnt2,
9915 int int_cnt2, Register result,
9916 XMMRegister vec, Register tmp) {
9917 ShortBranchVerifier sbv(this);
9918 assert(UseSSE42Intrinsics, "SSE4.2 is required");
9919 //
9920 // int_cnt2 is length of small (< 8 chars) constant substring
9921 // or (-1) for non constant substring in which case its length
9922 // is in cnt2 register.
9923 //
9924 // Note, inline_string_indexOf() generates checks:
9925 // if (substr.count > string.count) return -1;
9926 // if (substr.count == 0) return 0;
9927 //
9928 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
9930 // This method uses pcmpestri inxtruction with bound registers
9931 // inputs:
9932 // xmm - substring
9933 // rax - substring length (elements count)
9934 // mem - scanned string
9935 // rdx - string length (elements count)
9936 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
9937 // outputs:
9938 // rcx - matched index in string
9939 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
9941 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
9942 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
9943 FOUND_CANDIDATE;
9945 { //========================================================
9946 // We don't know where these strings are located
9947 // and we can't read beyond them. Load them through stack.
9948 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
9950 movptr(tmp, rsp); // save old SP
9952 if (int_cnt2 > 0) { // small (< 8 chars) constant substring
9953 if (int_cnt2 == 1) { // One char
9954 load_unsigned_short(result, Address(str2, 0));
9955 movdl(vec, result); // move 32 bits
9956 } else if (int_cnt2 == 2) { // Two chars
9957 movdl(vec, Address(str2, 0)); // move 32 bits
9958 } else if (int_cnt2 == 4) { // Four chars
9959 movq(vec, Address(str2, 0)); // move 64 bits
9960 } else { // cnt2 = { 3, 5, 6, 7 }
9961 // Array header size is 12 bytes in 32-bit VM
9962 // + 6 bytes for 3 chars == 18 bytes,
9963 // enough space to load vec and shift.
9964 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
9965 movdqu(vec, Address(str2, (int_cnt2*2)-16));
9966 psrldq(vec, 16-(int_cnt2*2));
9967 }
9968 } else { // not constant substring
9969 cmpl(cnt2, 8);
9970 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
9972 // We can read beyond string if srt+16 does not cross page boundary
9973 // since heaps are aligned and mapped by pages.
9974 assert(os::vm_page_size() < (int)G, "default page should be small");
9975 movl(result, str2); // We need only low 32 bits
9976 andl(result, (os::vm_page_size()-1));
9977 cmpl(result, (os::vm_page_size()-16));
9978 jccb(Assembler::belowEqual, CHECK_STR);
9980 // Move small strings to stack to allow load 16 bytes into vec.
9981 subptr(rsp, 16);
9982 int stk_offset = wordSize-2;
9983 push(cnt2);
9985 bind(COPY_SUBSTR);
9986 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
9987 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
9988 decrement(cnt2);
9989 jccb(Assembler::notZero, COPY_SUBSTR);
9991 pop(cnt2);
9992 movptr(str2, rsp); // New substring address
9993 } // non constant
9995 bind(CHECK_STR);
9996 cmpl(cnt1, 8);
9997 jccb(Assembler::aboveEqual, BIG_STRINGS);
9999 // Check cross page boundary.
10000 movl(result, str1); // We need only low 32 bits
10001 andl(result, (os::vm_page_size()-1));
10002 cmpl(result, (os::vm_page_size()-16));
10003 jccb(Assembler::belowEqual, BIG_STRINGS);
10005 subptr(rsp, 16);
10006 int stk_offset = -2;
10007 if (int_cnt2 < 0) { // not constant
10008 push(cnt2);
10009 stk_offset += wordSize;
10010 }
10011 movl(cnt2, cnt1);
10013 bind(COPY_STR);
10014 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
10015 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
10016 decrement(cnt2);
10017 jccb(Assembler::notZero, COPY_STR);
10019 if (int_cnt2 < 0) { // not constant
10020 pop(cnt2);
10021 }
10022 movptr(str1, rsp); // New string address
10024 bind(BIG_STRINGS);
10025 // Load substring.
10026 if (int_cnt2 < 0) { // -1
10027 movdqu(vec, Address(str2, 0));
10028 push(cnt2); // substr count
10029 push(str2); // substr addr
10030 push(str1); // string addr
10031 } else {
10032 // Small (< 8 chars) constant substrings are loaded already.
10033 movl(cnt2, int_cnt2);
10034 }
10035 push(tmp); // original SP
10037 } // Finished loading
10039 //========================================================
10040 // Start search
10041 //
10043 movptr(result, str1); // string addr
10045 if (int_cnt2 < 0) { // Only for non constant substring
10046 jmpb(SCAN_TO_SUBSTR);
10048 // SP saved at sp+0
10049 // String saved at sp+1*wordSize
10050 // Substr saved at sp+2*wordSize
10051 // Substr count saved at sp+3*wordSize
10053 // Reload substr for rescan, this code
10054 // is executed only for large substrings (> 8 chars)
10055 bind(RELOAD_SUBSTR);
10056 movptr(str2, Address(rsp, 2*wordSize));
10057 movl(cnt2, Address(rsp, 3*wordSize));
10058 movdqu(vec, Address(str2, 0));
10059 // We came here after the beginning of the substring was
10060 // matched but the rest of it was not so we need to search
10061 // again. Start from the next element after the previous match.
10062 subptr(str1, result); // Restore counter
10063 shrl(str1, 1);
10064 addl(cnt1, str1);
10065 decrementl(cnt1); // Shift to next element
10066 cmpl(cnt1, cnt2);
10067 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
10069 addptr(result, 2);
10070 } // non constant
10072 // Scan string for start of substr in 16-byte vectors
10073 bind(SCAN_TO_SUBSTR);
10074 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
10075 pcmpestri(vec, Address(result, 0), 0x0d);
10076 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
10077 subl(cnt1, 8);
10078 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
10079 cmpl(cnt1, cnt2);
10080 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
10081 addptr(result, 16);
10083 bind(ADJUST_STR);
10084 cmpl(cnt1, 8); // Do not read beyond string
10085 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
10086 // Back-up string to avoid reading beyond string.
10087 lea(result, Address(result, cnt1, Address::times_2, -16));
10088 movl(cnt1, 8);
10089 jmpb(SCAN_TO_SUBSTR);
10091 // Found a potential substr
10092 bind(FOUND_CANDIDATE);
10093 // After pcmpestri tmp(rcx) contains matched element index
10095 // Make sure string is still long enough
10096 subl(cnt1, tmp);
10097 cmpl(cnt1, cnt2);
10098 jccb(Assembler::greaterEqual, FOUND_SUBSTR);
10099 // Left less then substring.
10101 bind(RET_NOT_FOUND);
10102 movl(result, -1);
10103 jmpb(CLEANUP);
10105 bind(FOUND_SUBSTR);
10106 // Compute start addr of substr
10107 lea(result, Address(result, tmp, Address::times_2));
10109 if (int_cnt2 > 0) { // Constant substring
10110 // Repeat search for small substring (< 8 chars)
10111 // from new point without reloading substring.
10112 // Have to check that we don't read beyond string.
10113 cmpl(tmp, 8-int_cnt2);
10114 jccb(Assembler::greater, ADJUST_STR);
10115 // Fall through if matched whole substring.
10116 } else { // non constant
10117 assert(int_cnt2 == -1, "should be != 0");
10119 addl(tmp, cnt2);
10120 // Found result if we matched whole substring.
10121 cmpl(tmp, 8);
10122 jccb(Assembler::lessEqual, RET_FOUND);
10124 // Repeat search for small substring (<= 8 chars)
10125 // from new point 'str1' without reloading substring.
10126 cmpl(cnt2, 8);
10127 // Have to check that we don't read beyond string.
10128 jccb(Assembler::lessEqual, ADJUST_STR);
10130 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
10131 // Compare the rest of substring (> 8 chars).
10132 movptr(str1, result);
10134 cmpl(tmp, cnt2);
10135 // First 8 chars are already matched.
10136 jccb(Assembler::equal, CHECK_NEXT);
10138 bind(SCAN_SUBSTR);
10139 pcmpestri(vec, Address(str1, 0), 0x0d);
10140 // Need to reload strings pointers if not matched whole vector
10141 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
10143 bind(CHECK_NEXT);
10144 subl(cnt2, 8);
10145 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
10146 addptr(str1, 16);
10147 addptr(str2, 16);
10148 subl(cnt1, 8);
10149 cmpl(cnt2, 8); // Do not read beyond substring
10150 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
10151 // Back-up strings to avoid reading beyond substring.
10152 lea(str2, Address(str2, cnt2, Address::times_2, -16));
10153 lea(str1, Address(str1, cnt2, Address::times_2, -16));
10154 subl(cnt1, cnt2);
10155 movl(cnt2, 8);
10156 addl(cnt1, 8);
10157 bind(CONT_SCAN_SUBSTR);
10158 movdqu(vec, Address(str2, 0));
10159 jmpb(SCAN_SUBSTR);
10161 bind(RET_FOUND_LONG);
10162 movptr(str1, Address(rsp, wordSize));
10163 } // non constant
10165 bind(RET_FOUND);
10166 // Compute substr offset
10167 subptr(result, str1);
10168 shrl(result, 1); // index
10170 bind(CLEANUP);
10171 pop(rsp); // restore SP
10173 } // string_indexof
10175 // Compare strings.
10176 void MacroAssembler::string_compare(Register str1, Register str2,
10177 Register cnt1, Register cnt2, Register result,
10178 XMMRegister vec1) {
10179 ShortBranchVerifier sbv(this);
10180 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
10182 // Compute the minimum of the string lengths and the
10183 // difference of the string lengths (stack).
10184 // Do the conditional move stuff
10185 movl(result, cnt1);
10186 subl(cnt1, cnt2);
10187 push(cnt1);
10188 cmov32(Assembler::lessEqual, cnt2, result);
10190 // Is the minimum length zero?
10191 testl(cnt2, cnt2);
10192 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
10194 // Load first characters
10195 load_unsigned_short(result, Address(str1, 0));
10196 load_unsigned_short(cnt1, Address(str2, 0));
10198 // Compare first characters
10199 subl(result, cnt1);
10200 jcc(Assembler::notZero, POP_LABEL);
10201 decrementl(cnt2);
10202 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
10204 {
10205 // Check after comparing first character to see if strings are equivalent
10206 Label LSkip2;
10207 // Check if the strings start at same location
10208 cmpptr(str1, str2);
10209 jccb(Assembler::notEqual, LSkip2);
10211 // Check if the length difference is zero (from stack)
10212 cmpl(Address(rsp, 0), 0x0);
10213 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
10215 // Strings might not be equivalent
10216 bind(LSkip2);
10217 }
10219 Address::ScaleFactor scale = Address::times_2;
10220 int stride = 8;
10222 // Advance to next element
10223 addptr(str1, 16/stride);
10224 addptr(str2, 16/stride);
10226 if (UseSSE42Intrinsics) {
10227 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
10228 int pcmpmask = 0x19;
10229 // Setup to compare 16-byte vectors
10230 movl(result, cnt2);
10231 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
10232 jccb(Assembler::zero, COMPARE_TAIL);
10234 lea(str1, Address(str1, result, scale));
10235 lea(str2, Address(str2, result, scale));
10236 negptr(result);
10238 // pcmpestri
10239 // inputs:
10240 // vec1- substring
10241 // rax - negative string length (elements count)
10242 // mem - scaned string
10243 // rdx - string length (elements count)
10244 // pcmpmask - cmp mode: 11000 (string compare with negated result)
10245 // + 00 (unsigned bytes) or + 01 (unsigned shorts)
10246 // outputs:
10247 // rcx - first mismatched element index
10248 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
10250 bind(COMPARE_WIDE_VECTORS);
10251 movdqu(vec1, Address(str1, result, scale));
10252 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
10253 // After pcmpestri cnt1(rcx) contains mismatched element index
10255 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
10256 addptr(result, stride);
10257 subptr(cnt2, stride);
10258 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
10260 // compare wide vectors tail
10261 testl(result, result);
10262 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
10264 movl(cnt2, stride);
10265 movl(result, stride);
10266 negptr(result);
10267 movdqu(vec1, Address(str1, result, scale));
10268 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
10269 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
10271 // Mismatched characters in the vectors
10272 bind(VECTOR_NOT_EQUAL);
10273 addptr(result, cnt1);
10274 movptr(cnt2, result);
10275 load_unsigned_short(result, Address(str1, cnt2, scale));
10276 load_unsigned_short(cnt1, Address(str2, cnt2, scale));
10277 subl(result, cnt1);
10278 jmpb(POP_LABEL);
10280 bind(COMPARE_TAIL); // limit is zero
10281 movl(cnt2, result);
10282 // Fallthru to tail compare
10283 }
10285 // Shift str2 and str1 to the end of the arrays, negate min
10286 lea(str1, Address(str1, cnt2, scale, 0));
10287 lea(str2, Address(str2, cnt2, scale, 0));
10288 negptr(cnt2);
10290 // Compare the rest of the elements
10291 bind(WHILE_HEAD_LABEL);
10292 load_unsigned_short(result, Address(str1, cnt2, scale, 0));
10293 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
10294 subl(result, cnt1);
10295 jccb(Assembler::notZero, POP_LABEL);
10296 increment(cnt2);
10297 jccb(Assembler::notZero, WHILE_HEAD_LABEL);
10299 // Strings are equal up to min length. Return the length difference.
10300 bind(LENGTH_DIFF_LABEL);
10301 pop(result);
10302 jmpb(DONE_LABEL);
10304 // Discard the stored length difference
10305 bind(POP_LABEL);
10306 pop(cnt1);
10308 // That's it
10309 bind(DONE_LABEL);
10310 }
10312 // Compare char[] arrays aligned to 4 bytes or substrings.
10313 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
10314 Register limit, Register result, Register chr,
10315 XMMRegister vec1, XMMRegister vec2) {
10316 ShortBranchVerifier sbv(this);
10317 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
10319 int length_offset = arrayOopDesc::length_offset_in_bytes();
10320 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
10322 // Check the input args
10323 cmpptr(ary1, ary2);
10324 jcc(Assembler::equal, TRUE_LABEL);
10326 if (is_array_equ) {
10327 // Need additional checks for arrays_equals.
10328 testptr(ary1, ary1);
10329 jcc(Assembler::zero, FALSE_LABEL);
10330 testptr(ary2, ary2);
10331 jcc(Assembler::zero, FALSE_LABEL);
10333 // Check the lengths
10334 movl(limit, Address(ary1, length_offset));
10335 cmpl(limit, Address(ary2, length_offset));
10336 jcc(Assembler::notEqual, FALSE_LABEL);
10337 }
10339 // count == 0
10340 testl(limit, limit);
10341 jcc(Assembler::zero, TRUE_LABEL);
10343 if (is_array_equ) {
10344 // Load array address
10345 lea(ary1, Address(ary1, base_offset));
10346 lea(ary2, Address(ary2, base_offset));
10347 }
10349 shll(limit, 1); // byte count != 0
10350 movl(result, limit); // copy
10352 if (UseSSE42Intrinsics) {
10353 // With SSE4.2, use double quad vector compare
10354 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
10356 // Compare 16-byte vectors
10357 andl(result, 0x0000000e); // tail count (in bytes)
10358 andl(limit, 0xfffffff0); // vector count (in bytes)
10359 jccb(Assembler::zero, COMPARE_TAIL);
10361 lea(ary1, Address(ary1, limit, Address::times_1));
10362 lea(ary2, Address(ary2, limit, Address::times_1));
10363 negptr(limit);
10365 bind(COMPARE_WIDE_VECTORS);
10366 movdqu(vec1, Address(ary1, limit, Address::times_1));
10367 movdqu(vec2, Address(ary2, limit, Address::times_1));
10368 pxor(vec1, vec2);
10370 ptest(vec1, vec1);
10371 jccb(Assembler::notZero, FALSE_LABEL);
10372 addptr(limit, 16);
10373 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
10375 testl(result, result);
10376 jccb(Assembler::zero, TRUE_LABEL);
10378 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
10379 movdqu(vec2, Address(ary2, result, Address::times_1, -16));
10380 pxor(vec1, vec2);
10382 ptest(vec1, vec1);
10383 jccb(Assembler::notZero, FALSE_LABEL);
10384 jmpb(TRUE_LABEL);
10386 bind(COMPARE_TAIL); // limit is zero
10387 movl(limit, result);
10388 // Fallthru to tail compare
10389 }
10391 // Compare 4-byte vectors
10392 andl(limit, 0xfffffffc); // vector count (in bytes)
10393 jccb(Assembler::zero, COMPARE_CHAR);
10395 lea(ary1, Address(ary1, limit, Address::times_1));
10396 lea(ary2, Address(ary2, limit, Address::times_1));
10397 negptr(limit);
10399 bind(COMPARE_VECTORS);
10400 movl(chr, Address(ary1, limit, Address::times_1));
10401 cmpl(chr, Address(ary2, limit, Address::times_1));
10402 jccb(Assembler::notEqual, FALSE_LABEL);
10403 addptr(limit, 4);
10404 jcc(Assembler::notZero, COMPARE_VECTORS);
10406 // Compare trailing char (final 2 bytes), if any
10407 bind(COMPARE_CHAR);
10408 testl(result, 0x2); // tail char
10409 jccb(Assembler::zero, TRUE_LABEL);
10410 load_unsigned_short(chr, Address(ary1, 0));
10411 load_unsigned_short(limit, Address(ary2, 0));
10412 cmpl(chr, limit);
10413 jccb(Assembler::notEqual, FALSE_LABEL);
10415 bind(TRUE_LABEL);
10416 movl(result, 1); // return true
10417 jmpb(DONE);
10419 bind(FALSE_LABEL);
10420 xorl(result, result); // return false
10422 // That's it
10423 bind(DONE);
10424 }
10426 #ifdef PRODUCT
10427 #define BLOCK_COMMENT(str) /* nothing */
10428 #else
10429 #define BLOCK_COMMENT(str) block_comment(str)
10430 #endif
10432 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
10433 void MacroAssembler::generate_fill(BasicType t, bool aligned,
10434 Register to, Register value, Register count,
10435 Register rtmp, XMMRegister xtmp) {
10436 ShortBranchVerifier sbv(this);
10437 assert_different_registers(to, value, count, rtmp);
10438 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
10439 Label L_fill_2_bytes, L_fill_4_bytes;
10441 int shift = -1;
10442 switch (t) {
10443 case T_BYTE:
10444 shift = 2;
10445 break;
10446 case T_SHORT:
10447 shift = 1;
10448 break;
10449 case T_INT:
10450 shift = 0;
10451 break;
10452 default: ShouldNotReachHere();
10453 }
10455 if (t == T_BYTE) {
10456 andl(value, 0xff);
10457 movl(rtmp, value);
10458 shll(rtmp, 8);
10459 orl(value, rtmp);
10460 }
10461 if (t == T_SHORT) {
10462 andl(value, 0xffff);
10463 }
10464 if (t == T_BYTE || t == T_SHORT) {
10465 movl(rtmp, value);
10466 shll(rtmp, 16);
10467 orl(value, rtmp);
10468 }
10470 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
10471 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
10472 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
10473 // align source address at 4 bytes address boundary
10474 if (t == T_BYTE) {
10475 // One byte misalignment happens only for byte arrays
10476 testptr(to, 1);
10477 jccb(Assembler::zero, L_skip_align1);
10478 movb(Address(to, 0), value);
10479 increment(to);
10480 decrement(count);
10481 BIND(L_skip_align1);
10482 }
10483 // Two bytes misalignment happens only for byte and short (char) arrays
10484 testptr(to, 2);
10485 jccb(Assembler::zero, L_skip_align2);
10486 movw(Address(to, 0), value);
10487 addptr(to, 2);
10488 subl(count, 1<<(shift-1));
10489 BIND(L_skip_align2);
10490 }
10491 if (UseSSE < 2) {
10492 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
10493 // Fill 32-byte chunks
10494 subl(count, 8 << shift);
10495 jcc(Assembler::less, L_check_fill_8_bytes);
10496 align(16);
10498 BIND(L_fill_32_bytes_loop);
10500 for (int i = 0; i < 32; i += 4) {
10501 movl(Address(to, i), value);
10502 }
10504 addptr(to, 32);
10505 subl(count, 8 << shift);
10506 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
10507 BIND(L_check_fill_8_bytes);
10508 addl(count, 8 << shift);
10509 jccb(Assembler::zero, L_exit);
10510 jmpb(L_fill_8_bytes);
10512 //
10513 // length is too short, just fill qwords
10514 //
10515 BIND(L_fill_8_bytes_loop);
10516 movl(Address(to, 0), value);
10517 movl(Address(to, 4), value);
10518 addptr(to, 8);
10519 BIND(L_fill_8_bytes);
10520 subl(count, 1 << (shift + 1));
10521 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
10522 // fall through to fill 4 bytes
10523 } else {
10524 Label L_fill_32_bytes;
10525 if (!UseUnalignedLoadStores) {
10526 // align to 8 bytes, we know we are 4 byte aligned to start
10527 testptr(to, 4);
10528 jccb(Assembler::zero, L_fill_32_bytes);
10529 movl(Address(to, 0), value);
10530 addptr(to, 4);
10531 subl(count, 1<<shift);
10532 }
10533 BIND(L_fill_32_bytes);
10534 {
10535 assert( UseSSE >= 2, "supported cpu only" );
10536 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
10537 // Fill 32-byte chunks
10538 movdl(xtmp, value);
10539 pshufd(xtmp, xtmp, 0);
10541 subl(count, 8 << shift);
10542 jcc(Assembler::less, L_check_fill_8_bytes);
10543 align(16);
10545 BIND(L_fill_32_bytes_loop);
10547 if (UseUnalignedLoadStores) {
10548 movdqu(Address(to, 0), xtmp);
10549 movdqu(Address(to, 16), xtmp);
10550 } else {
10551 movq(Address(to, 0), xtmp);
10552 movq(Address(to, 8), xtmp);
10553 movq(Address(to, 16), xtmp);
10554 movq(Address(to, 24), xtmp);
10555 }
10557 addptr(to, 32);
10558 subl(count, 8 << shift);
10559 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
10560 BIND(L_check_fill_8_bytes);
10561 addl(count, 8 << shift);
10562 jccb(Assembler::zero, L_exit);
10563 jmpb(L_fill_8_bytes);
10565 //
10566 // length is too short, just fill qwords
10567 //
10568 BIND(L_fill_8_bytes_loop);
10569 movq(Address(to, 0), xtmp);
10570 addptr(to, 8);
10571 BIND(L_fill_8_bytes);
10572 subl(count, 1 << (shift + 1));
10573 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
10574 }
10575 }
10576 // fill trailing 4 bytes
10577 BIND(L_fill_4_bytes);
10578 testl(count, 1<<shift);
10579 jccb(Assembler::zero, L_fill_2_bytes);
10580 movl(Address(to, 0), value);
10581 if (t == T_BYTE || t == T_SHORT) {
10582 addptr(to, 4);
10583 BIND(L_fill_2_bytes);
10584 // fill trailing 2 bytes
10585 testl(count, 1<<(shift-1));
10586 jccb(Assembler::zero, L_fill_byte);
10587 movw(Address(to, 0), value);
10588 if (t == T_BYTE) {
10589 addptr(to, 2);
10590 BIND(L_fill_byte);
10591 // fill trailing byte
10592 testl(count, 1);
10593 jccb(Assembler::zero, L_exit);
10594 movb(Address(to, 0), value);
10595 } else {
10596 BIND(L_fill_byte);
10597 }
10598 } else {
10599 BIND(L_fill_2_bytes);
10600 }
10601 BIND(L_exit);
10602 }
10603 #undef BIND
10604 #undef BLOCK_COMMENT
10607 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
10608 switch (cond) {
10609 // Note some conditions are synonyms for others
10610 case Assembler::zero: return Assembler::notZero;
10611 case Assembler::notZero: return Assembler::zero;
10612 case Assembler::less: return Assembler::greaterEqual;
10613 case Assembler::lessEqual: return Assembler::greater;
10614 case Assembler::greater: return Assembler::lessEqual;
10615 case Assembler::greaterEqual: return Assembler::less;
10616 case Assembler::below: return Assembler::aboveEqual;
10617 case Assembler::belowEqual: return Assembler::above;
10618 case Assembler::above: return Assembler::belowEqual;
10619 case Assembler::aboveEqual: return Assembler::below;
10620 case Assembler::overflow: return Assembler::noOverflow;
10621 case Assembler::noOverflow: return Assembler::overflow;
10622 case Assembler::negative: return Assembler::positive;
10623 case Assembler::positive: return Assembler::negative;
10624 case Assembler::parity: return Assembler::noParity;
10625 case Assembler::noParity: return Assembler::parity;
10626 }
10627 ShouldNotReachHere(); return Assembler::overflow;
10628 }
10630 SkipIfEqual::SkipIfEqual(
10631 MacroAssembler* masm, const bool* flag_addr, bool value) {
10632 _masm = masm;
10633 _masm->cmp8(ExternalAddress((address)flag_addr), value);
10634 _masm->jcc(Assembler::equal, _label);
10635 }
10637 SkipIfEqual::~SkipIfEqual() {
10638 _masm->bind(_label);
10639 }