Thu, 07 Apr 2011 09:53:20 -0700
7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes
1 /*
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "assembler_x86.inline.hpp"
27 #include "gc_interface/collectedHeap.inline.hpp"
28 #include "interpreter/interpreter.hpp"
29 #include "memory/cardTableModRefBS.hpp"
30 #include "memory/resourceArea.hpp"
31 #include "prims/methodHandles.hpp"
32 #include "runtime/biasedLocking.hpp"
33 #include "runtime/interfaceSupport.hpp"
34 #include "runtime/objectMonitor.hpp"
35 #include "runtime/os.hpp"
36 #include "runtime/sharedRuntime.hpp"
37 #include "runtime/stubRoutines.hpp"
38 #ifndef SERIALGC
39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
41 #include "gc_implementation/g1/heapRegion.hpp"
42 #endif
44 // Implementation of AddressLiteral
46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
47 _is_lval = false;
48 _target = target;
49 switch (rtype) {
50 case relocInfo::oop_type:
51 // Oops are a special case. Normally they would be their own section
52 // but in cases like icBuffer they are literals in the code stream that
53 // we don't have a section for. We use none so that we get a literal address
54 // which is always patchable.
55 break;
56 case relocInfo::external_word_type:
57 _rspec = external_word_Relocation::spec(target);
58 break;
59 case relocInfo::internal_word_type:
60 _rspec = internal_word_Relocation::spec(target);
61 break;
62 case relocInfo::opt_virtual_call_type:
63 _rspec = opt_virtual_call_Relocation::spec();
64 break;
65 case relocInfo::static_call_type:
66 _rspec = static_call_Relocation::spec();
67 break;
68 case relocInfo::runtime_call_type:
69 _rspec = runtime_call_Relocation::spec();
70 break;
71 case relocInfo::poll_type:
72 case relocInfo::poll_return_type:
73 _rspec = Relocation::spec_simple(rtype);
74 break;
75 case relocInfo::none:
76 break;
77 default:
78 ShouldNotReachHere();
79 break;
80 }
81 }
83 // Implementation of Address
85 #ifdef _LP64
87 Address Address::make_array(ArrayAddress adr) {
88 // Not implementable on 64bit machines
89 // Should have been handled higher up the call chain.
90 ShouldNotReachHere();
91 return Address();
92 }
94 // exceedingly dangerous constructor
95 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
96 _base = noreg;
97 _index = noreg;
98 _scale = no_scale;
99 _disp = disp;
100 switch (rtype) {
101 case relocInfo::external_word_type:
102 _rspec = external_word_Relocation::spec(loc);
103 break;
104 case relocInfo::internal_word_type:
105 _rspec = internal_word_Relocation::spec(loc);
106 break;
107 case relocInfo::runtime_call_type:
108 // HMM
109 _rspec = runtime_call_Relocation::spec();
110 break;
111 case relocInfo::poll_type:
112 case relocInfo::poll_return_type:
113 _rspec = Relocation::spec_simple(rtype);
114 break;
115 case relocInfo::none:
116 break;
117 default:
118 ShouldNotReachHere();
119 }
120 }
121 #else // LP64
123 Address Address::make_array(ArrayAddress adr) {
124 AddressLiteral base = adr.base();
125 Address index = adr.index();
126 assert(index._disp == 0, "must not have disp"); // maybe it can?
127 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
128 array._rspec = base._rspec;
129 return array;
130 }
132 // exceedingly dangerous constructor
133 Address::Address(address loc, RelocationHolder spec) {
134 _base = noreg;
135 _index = noreg;
136 _scale = no_scale;
137 _disp = (intptr_t) loc;
138 _rspec = spec;
139 }
141 #endif // _LP64
145 // Convert the raw encoding form into the form expected by the constructor for
146 // Address. An index of 4 (rsp) corresponds to having no index, so convert
147 // that to noreg for the Address constructor.
148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
149 RelocationHolder rspec;
150 if (disp_is_oop) {
151 rspec = Relocation::spec_simple(relocInfo::oop_type);
152 }
153 bool valid_index = index != rsp->encoding();
154 if (valid_index) {
155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
156 madr._rspec = rspec;
157 return madr;
158 } else {
159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
160 madr._rspec = rspec;
161 return madr;
162 }
163 }
165 // Implementation of Assembler
167 int AbstractAssembler::code_fill_byte() {
168 return (u_char)'\xF4'; // hlt
169 }
171 // make this go away someday
172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
173 if (rtype == relocInfo::none)
174 emit_long(data);
175 else emit_data(data, Relocation::spec_simple(rtype), format);
176 }
178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
179 assert(imm_operand == 0, "default format must be immediate in this file");
180 assert(inst_mark() != NULL, "must be inside InstructionMark");
181 if (rspec.type() != relocInfo::none) {
182 #ifdef ASSERT
183 check_relocation(rspec, format);
184 #endif
185 // Do not use AbstractAssembler::relocate, which is not intended for
186 // embedded words. Instead, relocate to the enclosing instruction.
188 // hack. call32 is too wide for mask so use disp32
189 if (format == call32_operand)
190 code_section()->relocate(inst_mark(), rspec, disp32_operand);
191 else
192 code_section()->relocate(inst_mark(), rspec, format);
193 }
194 emit_long(data);
195 }
197 static int encode(Register r) {
198 int enc = r->encoding();
199 if (enc >= 8) {
200 enc -= 8;
201 }
202 return enc;
203 }
205 static int encode(XMMRegister r) {
206 int enc = r->encoding();
207 if (enc >= 8) {
208 enc -= 8;
209 }
210 return enc;
211 }
213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
214 assert(dst->has_byte_register(), "must have byte register");
215 assert(isByte(op1) && isByte(op2), "wrong opcode");
216 assert(isByte(imm8), "not a byte");
217 assert((op1 & 0x01) == 0, "should be 8bit operation");
218 emit_byte(op1);
219 emit_byte(op2 | encode(dst));
220 emit_byte(imm8);
221 }
224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
225 assert(isByte(op1) && isByte(op2), "wrong opcode");
226 assert((op1 & 0x01) == 1, "should be 32bit operation");
227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
228 if (is8bit(imm32)) {
229 emit_byte(op1 | 0x02); // set sign bit
230 emit_byte(op2 | encode(dst));
231 emit_byte(imm32 & 0xFF);
232 } else {
233 emit_byte(op1);
234 emit_byte(op2 | encode(dst));
235 emit_long(imm32);
236 }
237 }
239 // immediate-to-memory forms
240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
241 assert((op1 & 0x01) == 1, "should be 32bit operation");
242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243 if (is8bit(imm32)) {
244 emit_byte(op1 | 0x02); // set sign bit
245 emit_operand(rm, adr, 1);
246 emit_byte(imm32 & 0xFF);
247 } else {
248 emit_byte(op1);
249 emit_operand(rm, adr, 4);
250 emit_long(imm32);
251 }
252 }
254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
255 LP64_ONLY(ShouldNotReachHere());
256 assert(isByte(op1) && isByte(op2), "wrong opcode");
257 assert((op1 & 0x01) == 1, "should be 32bit operation");
258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
259 InstructionMark im(this);
260 emit_byte(op1);
261 emit_byte(op2 | encode(dst));
262 emit_data((intptr_t)obj, relocInfo::oop_type, 0);
263 }
266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
267 assert(isByte(op1) && isByte(op2), "wrong opcode");
268 emit_byte(op1);
269 emit_byte(op2 | encode(dst) << 3 | encode(src));
270 }
273 void Assembler::emit_operand(Register reg, Register base, Register index,
274 Address::ScaleFactor scale, int disp,
275 RelocationHolder const& rspec,
276 int rip_relative_correction) {
277 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
279 // Encode the registers as needed in the fields they are used in
281 int regenc = encode(reg) << 3;
282 int indexenc = index->is_valid() ? encode(index) << 3 : 0;
283 int baseenc = base->is_valid() ? encode(base) : 0;
285 if (base->is_valid()) {
286 if (index->is_valid()) {
287 assert(scale != Address::no_scale, "inconsistent address");
288 // [base + index*scale + disp]
289 if (disp == 0 && rtype == relocInfo::none &&
290 base != rbp LP64_ONLY(&& base != r13)) {
291 // [base + index*scale]
292 // [00 reg 100][ss index base]
293 assert(index != rsp, "illegal addressing mode");
294 emit_byte(0x04 | regenc);
295 emit_byte(scale << 6 | indexenc | baseenc);
296 } else if (is8bit(disp) && rtype == relocInfo::none) {
297 // [base + index*scale + imm8]
298 // [01 reg 100][ss index base] imm8
299 assert(index != rsp, "illegal addressing mode");
300 emit_byte(0x44 | regenc);
301 emit_byte(scale << 6 | indexenc | baseenc);
302 emit_byte(disp & 0xFF);
303 } else {
304 // [base + index*scale + disp32]
305 // [10 reg 100][ss index base] disp32
306 assert(index != rsp, "illegal addressing mode");
307 emit_byte(0x84 | regenc);
308 emit_byte(scale << 6 | indexenc | baseenc);
309 emit_data(disp, rspec, disp32_operand);
310 }
311 } else if (base == rsp LP64_ONLY(|| base == r12)) {
312 // [rsp + disp]
313 if (disp == 0 && rtype == relocInfo::none) {
314 // [rsp]
315 // [00 reg 100][00 100 100]
316 emit_byte(0x04 | regenc);
317 emit_byte(0x24);
318 } else if (is8bit(disp) && rtype == relocInfo::none) {
319 // [rsp + imm8]
320 // [01 reg 100][00 100 100] disp8
321 emit_byte(0x44 | regenc);
322 emit_byte(0x24);
323 emit_byte(disp & 0xFF);
324 } else {
325 // [rsp + imm32]
326 // [10 reg 100][00 100 100] disp32
327 emit_byte(0x84 | regenc);
328 emit_byte(0x24);
329 emit_data(disp, rspec, disp32_operand);
330 }
331 } else {
332 // [base + disp]
333 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
334 if (disp == 0 && rtype == relocInfo::none &&
335 base != rbp LP64_ONLY(&& base != r13)) {
336 // [base]
337 // [00 reg base]
338 emit_byte(0x00 | regenc | baseenc);
339 } else if (is8bit(disp) && rtype == relocInfo::none) {
340 // [base + disp8]
341 // [01 reg base] disp8
342 emit_byte(0x40 | regenc | baseenc);
343 emit_byte(disp & 0xFF);
344 } else {
345 // [base + disp32]
346 // [10 reg base] disp32
347 emit_byte(0x80 | regenc | baseenc);
348 emit_data(disp, rspec, disp32_operand);
349 }
350 }
351 } else {
352 if (index->is_valid()) {
353 assert(scale != Address::no_scale, "inconsistent address");
354 // [index*scale + disp]
355 // [00 reg 100][ss index 101] disp32
356 assert(index != rsp, "illegal addressing mode");
357 emit_byte(0x04 | regenc);
358 emit_byte(scale << 6 | indexenc | 0x05);
359 emit_data(disp, rspec, disp32_operand);
360 } else if (rtype != relocInfo::none ) {
361 // [disp] (64bit) RIP-RELATIVE (32bit) abs
362 // [00 000 101] disp32
364 emit_byte(0x05 | regenc);
365 // Note that the RIP-rel. correction applies to the generated
366 // disp field, but _not_ to the target address in the rspec.
368 // disp was created by converting the target address minus the pc
369 // at the start of the instruction. That needs more correction here.
370 // intptr_t disp = target - next_ip;
371 assert(inst_mark() != NULL, "must be inside InstructionMark");
372 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
373 int64_t adjusted = disp;
374 // Do rip-rel adjustment for 64bit
375 LP64_ONLY(adjusted -= (next_ip - inst_mark()));
376 assert(is_simm32(adjusted),
377 "must be 32bit offset (RIP relative address)");
378 emit_data((int32_t) adjusted, rspec, disp32_operand);
380 } else {
381 // 32bit never did this, did everything as the rip-rel/disp code above
382 // [disp] ABSOLUTE
383 // [00 reg 100][00 100 101] disp32
384 emit_byte(0x04 | regenc);
385 emit_byte(0x25);
386 emit_data(disp, rspec, disp32_operand);
387 }
388 }
389 }
391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
392 Address::ScaleFactor scale, int disp,
393 RelocationHolder const& rspec) {
394 emit_operand((Register)reg, base, index, scale, disp, rspec);
395 }
397 // Secret local extension to Assembler::WhichOperand:
398 #define end_pc_operand (_WhichOperand_limit)
400 address Assembler::locate_operand(address inst, WhichOperand which) {
401 // Decode the given instruction, and return the address of
402 // an embedded 32-bit operand word.
404 // If "which" is disp32_operand, selects the displacement portion
405 // of an effective address specifier.
406 // If "which" is imm64_operand, selects the trailing immediate constant.
407 // If "which" is call32_operand, selects the displacement of a call or jump.
408 // Caller is responsible for ensuring that there is such an operand,
409 // and that it is 32/64 bits wide.
411 // If "which" is end_pc_operand, find the end of the instruction.
413 address ip = inst;
414 bool is_64bit = false;
416 debug_only(bool has_disp32 = false);
417 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
419 again_after_prefix:
420 switch (0xFF & *ip++) {
422 // These convenience macros generate groups of "case" labels for the switch.
423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
425 case (x)+4: case (x)+5: case (x)+6: case (x)+7
426 #define REP16(x) REP8((x)+0): \
427 case REP8((x)+8)
429 case CS_segment:
430 case SS_segment:
431 case DS_segment:
432 case ES_segment:
433 case FS_segment:
434 case GS_segment:
435 // Seems dubious
436 LP64_ONLY(assert(false, "shouldn't have that prefix"));
437 assert(ip == inst+1, "only one prefix allowed");
438 goto again_after_prefix;
440 case 0x67:
441 case REX:
442 case REX_B:
443 case REX_X:
444 case REX_XB:
445 case REX_R:
446 case REX_RB:
447 case REX_RX:
448 case REX_RXB:
449 NOT_LP64(assert(false, "64bit prefixes"));
450 goto again_after_prefix;
452 case REX_W:
453 case REX_WB:
454 case REX_WX:
455 case REX_WXB:
456 case REX_WR:
457 case REX_WRB:
458 case REX_WRX:
459 case REX_WRXB:
460 NOT_LP64(assert(false, "64bit prefixes"));
461 is_64bit = true;
462 goto again_after_prefix;
464 case 0xFF: // pushq a; decl a; incl a; call a; jmp a
465 case 0x88: // movb a, r
466 case 0x89: // movl a, r
467 case 0x8A: // movb r, a
468 case 0x8B: // movl r, a
469 case 0x8F: // popl a
470 debug_only(has_disp32 = true);
471 break;
473 case 0x68: // pushq #32
474 if (which == end_pc_operand) {
475 return ip + 4;
476 }
477 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
478 return ip; // not produced by emit_operand
480 case 0x66: // movw ... (size prefix)
481 again_after_size_prefix2:
482 switch (0xFF & *ip++) {
483 case REX:
484 case REX_B:
485 case REX_X:
486 case REX_XB:
487 case REX_R:
488 case REX_RB:
489 case REX_RX:
490 case REX_RXB:
491 case REX_W:
492 case REX_WB:
493 case REX_WX:
494 case REX_WXB:
495 case REX_WR:
496 case REX_WRB:
497 case REX_WRX:
498 case REX_WRXB:
499 NOT_LP64(assert(false, "64bit prefix found"));
500 goto again_after_size_prefix2;
501 case 0x8B: // movw r, a
502 case 0x89: // movw a, r
503 debug_only(has_disp32 = true);
504 break;
505 case 0xC7: // movw a, #16
506 debug_only(has_disp32 = true);
507 tail_size = 2; // the imm16
508 break;
509 case 0x0F: // several SSE/SSE2 variants
510 ip--; // reparse the 0x0F
511 goto again_after_prefix;
512 default:
513 ShouldNotReachHere();
514 }
515 break;
517 case REP8(0xB8): // movl/q r, #32/#64(oop?)
518 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4);
519 // these asserts are somewhat nonsensical
520 #ifndef _LP64
521 assert(which == imm_operand || which == disp32_operand, "");
522 #else
523 assert((which == call32_operand || which == imm_operand) && is_64bit ||
524 which == narrow_oop_operand && !is_64bit, "");
525 #endif // _LP64
526 return ip;
528 case 0x69: // imul r, a, #32
529 case 0xC7: // movl a, #32(oop?)
530 tail_size = 4;
531 debug_only(has_disp32 = true); // has both kinds of operands!
532 break;
534 case 0x0F: // movx..., etc.
535 switch (0xFF & *ip++) {
536 case 0x12: // movlps
537 case 0x28: // movaps
538 case 0x2E: // ucomiss
539 case 0x2F: // comiss
540 case 0x54: // andps
541 case 0x55: // andnps
542 case 0x56: // orps
543 case 0x57: // xorps
544 case 0x6E: // movd
545 case 0x7E: // movd
546 case 0xAE: // ldmxcsr a
547 // 64bit side says it these have both operands but that doesn't
548 // appear to be true
549 debug_only(has_disp32 = true);
550 break;
552 case 0xAD: // shrd r, a, %cl
553 case 0xAF: // imul r, a
554 case 0xBE: // movsbl r, a (movsxb)
555 case 0xBF: // movswl r, a (movsxw)
556 case 0xB6: // movzbl r, a (movzxb)
557 case 0xB7: // movzwl r, a (movzxw)
558 case REP16(0x40): // cmovl cc, r, a
559 case 0xB0: // cmpxchgb
560 case 0xB1: // cmpxchg
561 case 0xC1: // xaddl
562 case 0xC7: // cmpxchg8
563 case REP16(0x90): // setcc a
564 debug_only(has_disp32 = true);
565 // fall out of the switch to decode the address
566 break;
568 case 0xAC: // shrd r, a, #8
569 debug_only(has_disp32 = true);
570 tail_size = 1; // the imm8
571 break;
573 case REP16(0x80): // jcc rdisp32
574 if (which == end_pc_operand) return ip + 4;
575 assert(which == call32_operand, "jcc has no disp32 or imm");
576 return ip;
577 default:
578 ShouldNotReachHere();
579 }
580 break;
582 case 0x81: // addl a, #32; addl r, #32
583 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
584 // on 32bit in the case of cmpl, the imm might be an oop
585 tail_size = 4;
586 debug_only(has_disp32 = true); // has both kinds of operands!
587 break;
589 case 0x83: // addl a, #8; addl r, #8
590 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
591 debug_only(has_disp32 = true); // has both kinds of operands!
592 tail_size = 1;
593 break;
595 case 0x9B:
596 switch (0xFF & *ip++) {
597 case 0xD9: // fnstcw a
598 debug_only(has_disp32 = true);
599 break;
600 default:
601 ShouldNotReachHere();
602 }
603 break;
605 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
606 case REP4(0x10): // adc...
607 case REP4(0x20): // and...
608 case REP4(0x30): // xor...
609 case REP4(0x08): // or...
610 case REP4(0x18): // sbb...
611 case REP4(0x28): // sub...
612 case 0xF7: // mull a
613 case 0x8D: // lea r, a
614 case 0x87: // xchg r, a
615 case REP4(0x38): // cmp...
616 case 0x85: // test r, a
617 debug_only(has_disp32 = true); // has both kinds of operands!
618 break;
620 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
621 case 0xC6: // movb a, #8
622 case 0x80: // cmpb a, #8
623 case 0x6B: // imul r, a, #8
624 debug_only(has_disp32 = true); // has both kinds of operands!
625 tail_size = 1; // the imm8
626 break;
628 case 0xE8: // call rdisp32
629 case 0xE9: // jmp rdisp32
630 if (which == end_pc_operand) return ip + 4;
631 assert(which == call32_operand, "call has no disp32 or imm");
632 return ip;
634 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
635 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
636 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
637 case 0xDD: // fld_d a; fst_d a; fstp_d a
638 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
639 case 0xDF: // fild_d a; fistp_d a
640 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
641 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
642 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
643 debug_only(has_disp32 = true);
644 break;
646 case 0xF0: // Lock
647 assert(os::is_MP(), "only on MP");
648 goto again_after_prefix;
650 case 0xF3: // For SSE
651 case 0xF2: // For SSE2
652 switch (0xFF & *ip++) {
653 case REX:
654 case REX_B:
655 case REX_X:
656 case REX_XB:
657 case REX_R:
658 case REX_RB:
659 case REX_RX:
660 case REX_RXB:
661 case REX_W:
662 case REX_WB:
663 case REX_WX:
664 case REX_WXB:
665 case REX_WR:
666 case REX_WRB:
667 case REX_WRX:
668 case REX_WRXB:
669 NOT_LP64(assert(false, "found 64bit prefix"));
670 ip++;
671 default:
672 ip++;
673 }
674 debug_only(has_disp32 = true); // has both kinds of operands!
675 break;
677 default:
678 ShouldNotReachHere();
680 #undef REP8
681 #undef REP16
682 }
684 assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
685 #ifdef _LP64
686 assert(which != imm_operand, "instruction is not a movq reg, imm64");
687 #else
688 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
689 assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
690 #endif // LP64
691 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
693 // parse the output of emit_operand
694 int op2 = 0xFF & *ip++;
695 int base = op2 & 0x07;
696 int op3 = -1;
697 const int b100 = 4;
698 const int b101 = 5;
699 if (base == b100 && (op2 >> 6) != 3) {
700 op3 = 0xFF & *ip++;
701 base = op3 & 0x07; // refetch the base
702 }
703 // now ip points at the disp (if any)
705 switch (op2 >> 6) {
706 case 0:
707 // [00 reg 100][ss index base]
708 // [00 reg 100][00 100 esp]
709 // [00 reg base]
710 // [00 reg 100][ss index 101][disp32]
711 // [00 reg 101] [disp32]
713 if (base == b101) {
714 if (which == disp32_operand)
715 return ip; // caller wants the disp32
716 ip += 4; // skip the disp32
717 }
718 break;
720 case 1:
721 // [01 reg 100][ss index base][disp8]
722 // [01 reg 100][00 100 esp][disp8]
723 // [01 reg base] [disp8]
724 ip += 1; // skip the disp8
725 break;
727 case 2:
728 // [10 reg 100][ss index base][disp32]
729 // [10 reg 100][00 100 esp][disp32]
730 // [10 reg base] [disp32]
731 if (which == disp32_operand)
732 return ip; // caller wants the disp32
733 ip += 4; // skip the disp32
734 break;
736 case 3:
737 // [11 reg base] (not a memory addressing mode)
738 break;
739 }
741 if (which == end_pc_operand) {
742 return ip + tail_size;
743 }
745 #ifdef _LP64
746 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
747 #else
748 assert(which == imm_operand, "instruction has only an imm field");
749 #endif // LP64
750 return ip;
751 }
753 address Assembler::locate_next_instruction(address inst) {
754 // Secretly share code with locate_operand:
755 return locate_operand(inst, end_pc_operand);
756 }
759 #ifdef ASSERT
760 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
761 address inst = inst_mark();
762 assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
763 address opnd;
765 Relocation* r = rspec.reloc();
766 if (r->type() == relocInfo::none) {
767 return;
768 } else if (r->is_call() || format == call32_operand) {
769 // assert(format == imm32_operand, "cannot specify a nonzero format");
770 opnd = locate_operand(inst, call32_operand);
771 } else if (r->is_data()) {
772 assert(format == imm_operand || format == disp32_operand
773 LP64_ONLY(|| format == narrow_oop_operand), "format ok");
774 opnd = locate_operand(inst, (WhichOperand)format);
775 } else {
776 assert(format == imm_operand, "cannot specify a format");
777 return;
778 }
779 assert(opnd == pc(), "must put operand where relocs can find it");
780 }
781 #endif // ASSERT
783 void Assembler::emit_operand32(Register reg, Address adr) {
784 assert(reg->encoding() < 8, "no extended registers");
785 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
786 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
787 adr._rspec);
788 }
790 void Assembler::emit_operand(Register reg, Address adr,
791 int rip_relative_correction) {
792 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
793 adr._rspec,
794 rip_relative_correction);
795 }
797 void Assembler::emit_operand(XMMRegister reg, Address adr) {
798 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
799 adr._rspec);
800 }
802 // MMX operations
803 void Assembler::emit_operand(MMXRegister reg, Address adr) {
804 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
805 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
806 }
808 // work around gcc (3.2.1-7a) bug
809 void Assembler::emit_operand(Address adr, MMXRegister reg) {
810 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
811 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
812 }
815 void Assembler::emit_farith(int b1, int b2, int i) {
816 assert(isByte(b1) && isByte(b2), "wrong opcode");
817 assert(0 <= i && i < 8, "illegal stack offset");
818 emit_byte(b1);
819 emit_byte(b2 + i);
820 }
823 // Now the Assembler instructions (identical for 32/64 bits)
825 void Assembler::adcl(Address dst, int32_t imm32) {
826 InstructionMark im(this);
827 prefix(dst);
828 emit_arith_operand(0x81, rdx, dst, imm32);
829 }
831 void Assembler::adcl(Address dst, Register src) {
832 InstructionMark im(this);
833 prefix(dst, src);
834 emit_byte(0x11);
835 emit_operand(src, dst);
836 }
838 void Assembler::adcl(Register dst, int32_t imm32) {
839 prefix(dst);
840 emit_arith(0x81, 0xD0, dst, imm32);
841 }
843 void Assembler::adcl(Register dst, Address src) {
844 InstructionMark im(this);
845 prefix(src, dst);
846 emit_byte(0x13);
847 emit_operand(dst, src);
848 }
850 void Assembler::adcl(Register dst, Register src) {
851 (void) prefix_and_encode(dst->encoding(), src->encoding());
852 emit_arith(0x13, 0xC0, dst, src);
853 }
855 void Assembler::addl(Address dst, int32_t imm32) {
856 InstructionMark im(this);
857 prefix(dst);
858 emit_arith_operand(0x81, rax, dst, imm32);
859 }
861 void Assembler::addl(Address dst, Register src) {
862 InstructionMark im(this);
863 prefix(dst, src);
864 emit_byte(0x01);
865 emit_operand(src, dst);
866 }
868 void Assembler::addl(Register dst, int32_t imm32) {
869 prefix(dst);
870 emit_arith(0x81, 0xC0, dst, imm32);
871 }
873 void Assembler::addl(Register dst, Address src) {
874 InstructionMark im(this);
875 prefix(src, dst);
876 emit_byte(0x03);
877 emit_operand(dst, src);
878 }
880 void Assembler::addl(Register dst, Register src) {
881 (void) prefix_and_encode(dst->encoding(), src->encoding());
882 emit_arith(0x03, 0xC0, dst, src);
883 }
885 void Assembler::addr_nop_4() {
886 // 4 bytes: NOP DWORD PTR [EAX+0]
887 emit_byte(0x0F);
888 emit_byte(0x1F);
889 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
890 emit_byte(0); // 8-bits offset (1 byte)
891 }
893 void Assembler::addr_nop_5() {
894 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
895 emit_byte(0x0F);
896 emit_byte(0x1F);
897 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
898 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
899 emit_byte(0); // 8-bits offset (1 byte)
900 }
902 void Assembler::addr_nop_7() {
903 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
904 emit_byte(0x0F);
905 emit_byte(0x1F);
906 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
907 emit_long(0); // 32-bits offset (4 bytes)
908 }
910 void Assembler::addr_nop_8() {
911 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
912 emit_byte(0x0F);
913 emit_byte(0x1F);
914 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
915 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
916 emit_long(0); // 32-bits offset (4 bytes)
917 }
919 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
920 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
921 emit_byte(0xF2);
922 int encode = prefix_and_encode(dst->encoding(), src->encoding());
923 emit_byte(0x0F);
924 emit_byte(0x58);
925 emit_byte(0xC0 | encode);
926 }
928 void Assembler::addsd(XMMRegister dst, Address src) {
929 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
930 InstructionMark im(this);
931 emit_byte(0xF2);
932 prefix(src, dst);
933 emit_byte(0x0F);
934 emit_byte(0x58);
935 emit_operand(dst, src);
936 }
938 void Assembler::addss(XMMRegister dst, XMMRegister src) {
939 NOT_LP64(assert(VM_Version::supports_sse(), ""));
940 emit_byte(0xF3);
941 int encode = prefix_and_encode(dst->encoding(), src->encoding());
942 emit_byte(0x0F);
943 emit_byte(0x58);
944 emit_byte(0xC0 | encode);
945 }
947 void Assembler::addss(XMMRegister dst, Address src) {
948 NOT_LP64(assert(VM_Version::supports_sse(), ""));
949 InstructionMark im(this);
950 emit_byte(0xF3);
951 prefix(src, dst);
952 emit_byte(0x0F);
953 emit_byte(0x58);
954 emit_operand(dst, src);
955 }
957 void Assembler::andl(Register dst, int32_t imm32) {
958 prefix(dst);
959 emit_arith(0x81, 0xE0, dst, imm32);
960 }
962 void Assembler::andl(Register dst, Address src) {
963 InstructionMark im(this);
964 prefix(src, dst);
965 emit_byte(0x23);
966 emit_operand(dst, src);
967 }
969 void Assembler::andl(Register dst, Register src) {
970 (void) prefix_and_encode(dst->encoding(), src->encoding());
971 emit_arith(0x23, 0xC0, dst, src);
972 }
974 void Assembler::andpd(XMMRegister dst, Address src) {
975 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
976 InstructionMark im(this);
977 emit_byte(0x66);
978 prefix(src, dst);
979 emit_byte(0x0F);
980 emit_byte(0x54);
981 emit_operand(dst, src);
982 }
984 void Assembler::bsfl(Register dst, Register src) {
985 int encode = prefix_and_encode(dst->encoding(), src->encoding());
986 emit_byte(0x0F);
987 emit_byte(0xBC);
988 emit_byte(0xC0 | encode);
989 }
991 void Assembler::bsrl(Register dst, Register src) {
992 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
993 int encode = prefix_and_encode(dst->encoding(), src->encoding());
994 emit_byte(0x0F);
995 emit_byte(0xBD);
996 emit_byte(0xC0 | encode);
997 }
999 void Assembler::bswapl(Register reg) { // bswap
1000 int encode = prefix_and_encode(reg->encoding());
1001 emit_byte(0x0F);
1002 emit_byte(0xC8 | encode);
1003 }
1005 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1006 // suspect disp32 is always good
1007 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1009 if (L.is_bound()) {
1010 const int long_size = 5;
1011 int offs = (int)( target(L) - pc() );
1012 assert(offs <= 0, "assembler error");
1013 InstructionMark im(this);
1014 // 1110 1000 #32-bit disp
1015 emit_byte(0xE8);
1016 emit_data(offs - long_size, rtype, operand);
1017 } else {
1018 InstructionMark im(this);
1019 // 1110 1000 #32-bit disp
1020 L.add_patch_at(code(), locator());
1022 emit_byte(0xE8);
1023 emit_data(int(0), rtype, operand);
1024 }
1025 }
1027 void Assembler::call(Register dst) {
1028 // This was originally using a 32bit register encoding
1029 // and surely we want 64bit!
1030 // this is a 32bit encoding but in 64bit mode the default
1031 // operand size is 64bit so there is no need for the
1032 // wide prefix. So prefix only happens if we use the
1033 // new registers. Much like push/pop.
1034 int x = offset();
1035 // this may be true but dbx disassembles it as if it
1036 // were 32bits...
1037 // int encode = prefix_and_encode(dst->encoding());
1038 // if (offset() != x) assert(dst->encoding() >= 8, "what?");
1039 int encode = prefixq_and_encode(dst->encoding());
1041 emit_byte(0xFF);
1042 emit_byte(0xD0 | encode);
1043 }
1046 void Assembler::call(Address adr) {
1047 InstructionMark im(this);
1048 prefix(adr);
1049 emit_byte(0xFF);
1050 emit_operand(rdx, adr);
1051 }
1053 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1054 assert(entry != NULL, "call most probably wrong");
1055 InstructionMark im(this);
1056 emit_byte(0xE8);
1057 intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1058 assert(is_simm32(disp), "must be 32bit offset (call2)");
1059 // Technically, should use call32_operand, but this format is
1060 // implied by the fact that we're emitting a call instruction.
1062 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1063 emit_data((int) disp, rspec, operand);
1064 }
1066 void Assembler::cdql() {
1067 emit_byte(0x99);
1068 }
1070 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1071 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1072 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1073 emit_byte(0x0F);
1074 emit_byte(0x40 | cc);
1075 emit_byte(0xC0 | encode);
1076 }
1079 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1080 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1081 prefix(src, dst);
1082 emit_byte(0x0F);
1083 emit_byte(0x40 | cc);
1084 emit_operand(dst, src);
1085 }
1087 void Assembler::cmpb(Address dst, int imm8) {
1088 InstructionMark im(this);
1089 prefix(dst);
1090 emit_byte(0x80);
1091 emit_operand(rdi, dst, 1);
1092 emit_byte(imm8);
1093 }
1095 void Assembler::cmpl(Address dst, int32_t imm32) {
1096 InstructionMark im(this);
1097 prefix(dst);
1098 emit_byte(0x81);
1099 emit_operand(rdi, dst, 4);
1100 emit_long(imm32);
1101 }
1103 void Assembler::cmpl(Register dst, int32_t imm32) {
1104 prefix(dst);
1105 emit_arith(0x81, 0xF8, dst, imm32);
1106 }
1108 void Assembler::cmpl(Register dst, Register src) {
1109 (void) prefix_and_encode(dst->encoding(), src->encoding());
1110 emit_arith(0x3B, 0xC0, dst, src);
1111 }
1114 void Assembler::cmpl(Register dst, Address src) {
1115 InstructionMark im(this);
1116 prefix(src, dst);
1117 emit_byte(0x3B);
1118 emit_operand(dst, src);
1119 }
1121 void Assembler::cmpw(Address dst, int imm16) {
1122 InstructionMark im(this);
1123 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1124 emit_byte(0x66);
1125 emit_byte(0x81);
1126 emit_operand(rdi, dst, 2);
1127 emit_word(imm16);
1128 }
1130 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1131 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1132 // The ZF is set if the compared values were equal, and cleared otherwise.
1133 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1134 if (Atomics & 2) {
1135 // caveat: no instructionmark, so this isn't relocatable.
1136 // Emit a synthetic, non-atomic, CAS equivalent.
1137 // Beware. The synthetic form sets all ICCs, not just ZF.
1138 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1139 cmpl(rax, adr);
1140 movl(rax, adr);
1141 if (reg != rax) {
1142 Label L ;
1143 jcc(Assembler::notEqual, L);
1144 movl(adr, reg);
1145 bind(L);
1146 }
1147 } else {
1148 InstructionMark im(this);
1149 prefix(adr, reg);
1150 emit_byte(0x0F);
1151 emit_byte(0xB1);
1152 emit_operand(reg, adr);
1153 }
1154 }
1156 void Assembler::comisd(XMMRegister dst, Address src) {
1157 // NOTE: dbx seems to decode this as comiss even though the
1158 // 0x66 is there. Strangly ucomisd comes out correct
1159 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1160 emit_byte(0x66);
1161 comiss(dst, src);
1162 }
1164 void Assembler::comiss(XMMRegister dst, Address src) {
1165 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1167 InstructionMark im(this);
1168 prefix(src, dst);
1169 emit_byte(0x0F);
1170 emit_byte(0x2F);
1171 emit_operand(dst, src);
1172 }
1174 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1175 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1176 emit_byte(0xF3);
1177 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1178 emit_byte(0x0F);
1179 emit_byte(0xE6);
1180 emit_byte(0xC0 | encode);
1181 }
1183 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1184 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1185 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1186 emit_byte(0x0F);
1187 emit_byte(0x5B);
1188 emit_byte(0xC0 | encode);
1189 }
1191 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1192 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1193 emit_byte(0xF2);
1194 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1195 emit_byte(0x0F);
1196 emit_byte(0x5A);
1197 emit_byte(0xC0 | encode);
1198 }
1200 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1201 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1202 emit_byte(0xF2);
1203 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1204 emit_byte(0x0F);
1205 emit_byte(0x2A);
1206 emit_byte(0xC0 | encode);
1207 }
1209 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1210 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1211 emit_byte(0xF3);
1212 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1213 emit_byte(0x0F);
1214 emit_byte(0x2A);
1215 emit_byte(0xC0 | encode);
1216 }
1218 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1219 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1220 emit_byte(0xF3);
1221 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1222 emit_byte(0x0F);
1223 emit_byte(0x5A);
1224 emit_byte(0xC0 | encode);
1225 }
1227 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1228 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1229 emit_byte(0xF2);
1230 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1231 emit_byte(0x0F);
1232 emit_byte(0x2C);
1233 emit_byte(0xC0 | encode);
1234 }
1236 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1237 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1238 emit_byte(0xF3);
1239 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1240 emit_byte(0x0F);
1241 emit_byte(0x2C);
1242 emit_byte(0xC0 | encode);
1243 }
1245 void Assembler::decl(Address dst) {
1246 // Don't use it directly. Use MacroAssembler::decrement() instead.
1247 InstructionMark im(this);
1248 prefix(dst);
1249 emit_byte(0xFF);
1250 emit_operand(rcx, dst);
1251 }
1253 void Assembler::divsd(XMMRegister dst, Address src) {
1254 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1255 InstructionMark im(this);
1256 emit_byte(0xF2);
1257 prefix(src, dst);
1258 emit_byte(0x0F);
1259 emit_byte(0x5E);
1260 emit_operand(dst, src);
1261 }
1263 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1264 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1265 emit_byte(0xF2);
1266 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1267 emit_byte(0x0F);
1268 emit_byte(0x5E);
1269 emit_byte(0xC0 | encode);
1270 }
1272 void Assembler::divss(XMMRegister dst, Address src) {
1273 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1274 InstructionMark im(this);
1275 emit_byte(0xF3);
1276 prefix(src, dst);
1277 emit_byte(0x0F);
1278 emit_byte(0x5E);
1279 emit_operand(dst, src);
1280 }
1282 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1283 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1284 emit_byte(0xF3);
1285 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1286 emit_byte(0x0F);
1287 emit_byte(0x5E);
1288 emit_byte(0xC0 | encode);
1289 }
1291 void Assembler::emms() {
1292 NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1293 emit_byte(0x0F);
1294 emit_byte(0x77);
1295 }
1297 void Assembler::hlt() {
1298 emit_byte(0xF4);
1299 }
1301 void Assembler::idivl(Register src) {
1302 int encode = prefix_and_encode(src->encoding());
1303 emit_byte(0xF7);
1304 emit_byte(0xF8 | encode);
1305 }
1307 void Assembler::divl(Register src) { // Unsigned
1308 int encode = prefix_and_encode(src->encoding());
1309 emit_byte(0xF7);
1310 emit_byte(0xF0 | encode);
1311 }
1313 void Assembler::imull(Register dst, Register src) {
1314 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1315 emit_byte(0x0F);
1316 emit_byte(0xAF);
1317 emit_byte(0xC0 | encode);
1318 }
1321 void Assembler::imull(Register dst, Register src, int value) {
1322 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1323 if (is8bit(value)) {
1324 emit_byte(0x6B);
1325 emit_byte(0xC0 | encode);
1326 emit_byte(value & 0xFF);
1327 } else {
1328 emit_byte(0x69);
1329 emit_byte(0xC0 | encode);
1330 emit_long(value);
1331 }
1332 }
1334 void Assembler::incl(Address dst) {
1335 // Don't use it directly. Use MacroAssembler::increment() instead.
1336 InstructionMark im(this);
1337 prefix(dst);
1338 emit_byte(0xFF);
1339 emit_operand(rax, dst);
1340 }
1342 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1343 InstructionMark im(this);
1344 relocate(rtype);
1345 assert((0 <= cc) && (cc < 16), "illegal cc");
1346 if (L.is_bound()) {
1347 address dst = target(L);
1348 assert(dst != NULL, "jcc most probably wrong");
1350 const int short_size = 2;
1351 const int long_size = 6;
1352 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1353 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1354 // 0111 tttn #8-bit disp
1355 emit_byte(0x70 | cc);
1356 emit_byte((offs - short_size) & 0xFF);
1357 } else {
1358 // 0000 1111 1000 tttn #32-bit disp
1359 assert(is_simm32(offs - long_size),
1360 "must be 32bit offset (call4)");
1361 emit_byte(0x0F);
1362 emit_byte(0x80 | cc);
1363 emit_long(offs - long_size);
1364 }
1365 } else {
1366 // Note: could eliminate cond. jumps to this jump if condition
1367 // is the same however, seems to be rather unlikely case.
1368 // Note: use jccb() if label to be bound is very close to get
1369 // an 8-bit displacement
1370 L.add_patch_at(code(), locator());
1371 emit_byte(0x0F);
1372 emit_byte(0x80 | cc);
1373 emit_long(0);
1374 }
1375 }
1377 void Assembler::jccb(Condition cc, Label& L) {
1378 if (L.is_bound()) {
1379 const int short_size = 2;
1380 address entry = target(L);
1381 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1382 "Dispacement too large for a short jmp");
1383 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1384 // 0111 tttn #8-bit disp
1385 emit_byte(0x70 | cc);
1386 emit_byte((offs - short_size) & 0xFF);
1387 } else {
1388 InstructionMark im(this);
1389 L.add_patch_at(code(), locator());
1390 emit_byte(0x70 | cc);
1391 emit_byte(0);
1392 }
1393 }
1395 void Assembler::jmp(Address adr) {
1396 InstructionMark im(this);
1397 prefix(adr);
1398 emit_byte(0xFF);
1399 emit_operand(rsp, adr);
1400 }
1402 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1403 if (L.is_bound()) {
1404 address entry = target(L);
1405 assert(entry != NULL, "jmp most probably wrong");
1406 InstructionMark im(this);
1407 const int short_size = 2;
1408 const int long_size = 5;
1409 intptr_t offs = entry - _code_pos;
1410 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1411 emit_byte(0xEB);
1412 emit_byte((offs - short_size) & 0xFF);
1413 } else {
1414 emit_byte(0xE9);
1415 emit_long(offs - long_size);
1416 }
1417 } else {
1418 // By default, forward jumps are always 32-bit displacements, since
1419 // we can't yet know where the label will be bound. If you're sure that
1420 // the forward jump will not run beyond 256 bytes, use jmpb to
1421 // force an 8-bit displacement.
1422 InstructionMark im(this);
1423 relocate(rtype);
1424 L.add_patch_at(code(), locator());
1425 emit_byte(0xE9);
1426 emit_long(0);
1427 }
1428 }
1430 void Assembler::jmp(Register entry) {
1431 int encode = prefix_and_encode(entry->encoding());
1432 emit_byte(0xFF);
1433 emit_byte(0xE0 | encode);
1434 }
1436 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1437 InstructionMark im(this);
1438 emit_byte(0xE9);
1439 assert(dest != NULL, "must have a target");
1440 intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1441 assert(is_simm32(disp), "must be 32bit offset (jmp)");
1442 emit_data(disp, rspec.reloc(), call32_operand);
1443 }
1445 void Assembler::jmpb(Label& L) {
1446 if (L.is_bound()) {
1447 const int short_size = 2;
1448 address entry = target(L);
1449 assert(is8bit((entry - _code_pos) + short_size),
1450 "Dispacement too large for a short jmp");
1451 assert(entry != NULL, "jmp most probably wrong");
1452 intptr_t offs = entry - _code_pos;
1453 emit_byte(0xEB);
1454 emit_byte((offs - short_size) & 0xFF);
1455 } else {
1456 InstructionMark im(this);
1457 L.add_patch_at(code(), locator());
1458 emit_byte(0xEB);
1459 emit_byte(0);
1460 }
1461 }
1463 void Assembler::ldmxcsr( Address src) {
1464 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1465 InstructionMark im(this);
1466 prefix(src);
1467 emit_byte(0x0F);
1468 emit_byte(0xAE);
1469 emit_operand(as_Register(2), src);
1470 }
1472 void Assembler::leal(Register dst, Address src) {
1473 InstructionMark im(this);
1474 #ifdef _LP64
1475 emit_byte(0x67); // addr32
1476 prefix(src, dst);
1477 #endif // LP64
1478 emit_byte(0x8D);
1479 emit_operand(dst, src);
1480 }
1482 void Assembler::lock() {
1483 if (Atomics & 1) {
1484 // Emit either nothing, a NOP, or a NOP: prefix
1485 emit_byte(0x90) ;
1486 } else {
1487 emit_byte(0xF0);
1488 }
1489 }
1491 void Assembler::lzcntl(Register dst, Register src) {
1492 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1493 emit_byte(0xF3);
1494 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1495 emit_byte(0x0F);
1496 emit_byte(0xBD);
1497 emit_byte(0xC0 | encode);
1498 }
1500 // Emit mfence instruction
1501 void Assembler::mfence() {
1502 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1503 emit_byte( 0x0F );
1504 emit_byte( 0xAE );
1505 emit_byte( 0xF0 );
1506 }
1508 void Assembler::mov(Register dst, Register src) {
1509 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1510 }
1512 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1513 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1514 int dstenc = dst->encoding();
1515 int srcenc = src->encoding();
1516 emit_byte(0x66);
1517 if (dstenc < 8) {
1518 if (srcenc >= 8) {
1519 prefix(REX_B);
1520 srcenc -= 8;
1521 }
1522 } else {
1523 if (srcenc < 8) {
1524 prefix(REX_R);
1525 } else {
1526 prefix(REX_RB);
1527 srcenc -= 8;
1528 }
1529 dstenc -= 8;
1530 }
1531 emit_byte(0x0F);
1532 emit_byte(0x28);
1533 emit_byte(0xC0 | dstenc << 3 | srcenc);
1534 }
1536 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1537 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1538 int dstenc = dst->encoding();
1539 int srcenc = src->encoding();
1540 if (dstenc < 8) {
1541 if (srcenc >= 8) {
1542 prefix(REX_B);
1543 srcenc -= 8;
1544 }
1545 } else {
1546 if (srcenc < 8) {
1547 prefix(REX_R);
1548 } else {
1549 prefix(REX_RB);
1550 srcenc -= 8;
1551 }
1552 dstenc -= 8;
1553 }
1554 emit_byte(0x0F);
1555 emit_byte(0x28);
1556 emit_byte(0xC0 | dstenc << 3 | srcenc);
1557 }
1559 void Assembler::movb(Register dst, Address src) {
1560 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1561 InstructionMark im(this);
1562 prefix(src, dst, true);
1563 emit_byte(0x8A);
1564 emit_operand(dst, src);
1565 }
1568 void Assembler::movb(Address dst, int imm8) {
1569 InstructionMark im(this);
1570 prefix(dst);
1571 emit_byte(0xC6);
1572 emit_operand(rax, dst, 1);
1573 emit_byte(imm8);
1574 }
1577 void Assembler::movb(Address dst, Register src) {
1578 assert(src->has_byte_register(), "must have byte register");
1579 InstructionMark im(this);
1580 prefix(dst, src, true);
1581 emit_byte(0x88);
1582 emit_operand(src, dst);
1583 }
1585 void Assembler::movdl(XMMRegister dst, Register src) {
1586 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1587 emit_byte(0x66);
1588 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1589 emit_byte(0x0F);
1590 emit_byte(0x6E);
1591 emit_byte(0xC0 | encode);
1592 }
1594 void Assembler::movdl(Register dst, XMMRegister src) {
1595 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1596 emit_byte(0x66);
1597 // swap src/dst to get correct prefix
1598 int encode = prefix_and_encode(src->encoding(), dst->encoding());
1599 emit_byte(0x0F);
1600 emit_byte(0x7E);
1601 emit_byte(0xC0 | encode);
1602 }
1604 void Assembler::movdl(XMMRegister dst, Address src) {
1605 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1606 InstructionMark im(this);
1607 emit_byte(0x66);
1608 prefix(src, dst);
1609 emit_byte(0x0F);
1610 emit_byte(0x6E);
1611 emit_operand(dst, src);
1612 }
1615 void Assembler::movdqa(XMMRegister dst, Address src) {
1616 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1617 InstructionMark im(this);
1618 emit_byte(0x66);
1619 prefix(src, dst);
1620 emit_byte(0x0F);
1621 emit_byte(0x6F);
1622 emit_operand(dst, src);
1623 }
1625 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1626 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1627 emit_byte(0x66);
1628 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1629 emit_byte(0x0F);
1630 emit_byte(0x6F);
1631 emit_byte(0xC0 | encode);
1632 }
1634 void Assembler::movdqa(Address dst, XMMRegister src) {
1635 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1636 InstructionMark im(this);
1637 emit_byte(0x66);
1638 prefix(dst, src);
1639 emit_byte(0x0F);
1640 emit_byte(0x7F);
1641 emit_operand(src, dst);
1642 }
1644 void Assembler::movdqu(XMMRegister dst, Address src) {
1645 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1646 InstructionMark im(this);
1647 emit_byte(0xF3);
1648 prefix(src, dst);
1649 emit_byte(0x0F);
1650 emit_byte(0x6F);
1651 emit_operand(dst, src);
1652 }
1654 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1655 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1656 emit_byte(0xF3);
1657 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1658 emit_byte(0x0F);
1659 emit_byte(0x6F);
1660 emit_byte(0xC0 | encode);
1661 }
1663 void Assembler::movdqu(Address dst, XMMRegister src) {
1664 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1665 InstructionMark im(this);
1666 emit_byte(0xF3);
1667 prefix(dst, src);
1668 emit_byte(0x0F);
1669 emit_byte(0x7F);
1670 emit_operand(src, dst);
1671 }
1673 // Uses zero extension on 64bit
1675 void Assembler::movl(Register dst, int32_t imm32) {
1676 int encode = prefix_and_encode(dst->encoding());
1677 emit_byte(0xB8 | encode);
1678 emit_long(imm32);
1679 }
1681 void Assembler::movl(Register dst, Register src) {
1682 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1683 emit_byte(0x8B);
1684 emit_byte(0xC0 | encode);
1685 }
1687 void Assembler::movl(Register dst, Address src) {
1688 InstructionMark im(this);
1689 prefix(src, dst);
1690 emit_byte(0x8B);
1691 emit_operand(dst, src);
1692 }
1694 void Assembler::movl(Address dst, int32_t imm32) {
1695 InstructionMark im(this);
1696 prefix(dst);
1697 emit_byte(0xC7);
1698 emit_operand(rax, dst, 4);
1699 emit_long(imm32);
1700 }
1702 void Assembler::movl(Address dst, Register src) {
1703 InstructionMark im(this);
1704 prefix(dst, src);
1705 emit_byte(0x89);
1706 emit_operand(src, dst);
1707 }
1709 // New cpus require to use movsd and movss to avoid partial register stall
1710 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1711 // The selection is done in MacroAssembler::movdbl() and movflt().
1712 void Assembler::movlpd(XMMRegister dst, Address src) {
1713 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1714 InstructionMark im(this);
1715 emit_byte(0x66);
1716 prefix(src, dst);
1717 emit_byte(0x0F);
1718 emit_byte(0x12);
1719 emit_operand(dst, src);
1720 }
1722 void Assembler::movq( MMXRegister dst, Address src ) {
1723 assert( VM_Version::supports_mmx(), "" );
1724 emit_byte(0x0F);
1725 emit_byte(0x6F);
1726 emit_operand(dst, src);
1727 }
1729 void Assembler::movq( Address dst, MMXRegister src ) {
1730 assert( VM_Version::supports_mmx(), "" );
1731 emit_byte(0x0F);
1732 emit_byte(0x7F);
1733 // workaround gcc (3.2.1-7a) bug
1734 // In that version of gcc with only an emit_operand(MMX, Address)
1735 // gcc will tail jump and try and reverse the parameters completely
1736 // obliterating dst in the process. By having a version available
1737 // that doesn't need to swap the args at the tail jump the bug is
1738 // avoided.
1739 emit_operand(dst, src);
1740 }
1742 void Assembler::movq(XMMRegister dst, Address src) {
1743 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1744 InstructionMark im(this);
1745 emit_byte(0xF3);
1746 prefix(src, dst);
1747 emit_byte(0x0F);
1748 emit_byte(0x7E);
1749 emit_operand(dst, src);
1750 }
1752 void Assembler::movq(Address dst, XMMRegister src) {
1753 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1754 InstructionMark im(this);
1755 emit_byte(0x66);
1756 prefix(dst, src);
1757 emit_byte(0x0F);
1758 emit_byte(0xD6);
1759 emit_operand(src, dst);
1760 }
1762 void Assembler::movsbl(Register dst, Address src) { // movsxb
1763 InstructionMark im(this);
1764 prefix(src, dst);
1765 emit_byte(0x0F);
1766 emit_byte(0xBE);
1767 emit_operand(dst, src);
1768 }
1770 void Assembler::movsbl(Register dst, Register src) { // movsxb
1771 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1772 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1773 emit_byte(0x0F);
1774 emit_byte(0xBE);
1775 emit_byte(0xC0 | encode);
1776 }
1778 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1779 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1780 emit_byte(0xF2);
1781 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1782 emit_byte(0x0F);
1783 emit_byte(0x10);
1784 emit_byte(0xC0 | encode);
1785 }
1787 void Assembler::movsd(XMMRegister dst, Address src) {
1788 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1789 InstructionMark im(this);
1790 emit_byte(0xF2);
1791 prefix(src, dst);
1792 emit_byte(0x0F);
1793 emit_byte(0x10);
1794 emit_operand(dst, src);
1795 }
1797 void Assembler::movsd(Address dst, XMMRegister src) {
1798 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1799 InstructionMark im(this);
1800 emit_byte(0xF2);
1801 prefix(dst, src);
1802 emit_byte(0x0F);
1803 emit_byte(0x11);
1804 emit_operand(src, dst);
1805 }
1807 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1808 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1809 emit_byte(0xF3);
1810 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1811 emit_byte(0x0F);
1812 emit_byte(0x10);
1813 emit_byte(0xC0 | encode);
1814 }
1816 void Assembler::movss(XMMRegister dst, Address src) {
1817 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1818 InstructionMark im(this);
1819 emit_byte(0xF3);
1820 prefix(src, dst);
1821 emit_byte(0x0F);
1822 emit_byte(0x10);
1823 emit_operand(dst, src);
1824 }
1826 void Assembler::movss(Address dst, XMMRegister src) {
1827 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1828 InstructionMark im(this);
1829 emit_byte(0xF3);
1830 prefix(dst, src);
1831 emit_byte(0x0F);
1832 emit_byte(0x11);
1833 emit_operand(src, dst);
1834 }
1836 void Assembler::movswl(Register dst, Address src) { // movsxw
1837 InstructionMark im(this);
1838 prefix(src, dst);
1839 emit_byte(0x0F);
1840 emit_byte(0xBF);
1841 emit_operand(dst, src);
1842 }
1844 void Assembler::movswl(Register dst, Register src) { // movsxw
1845 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1846 emit_byte(0x0F);
1847 emit_byte(0xBF);
1848 emit_byte(0xC0 | encode);
1849 }
1851 void Assembler::movw(Address dst, int imm16) {
1852 InstructionMark im(this);
1854 emit_byte(0x66); // switch to 16-bit mode
1855 prefix(dst);
1856 emit_byte(0xC7);
1857 emit_operand(rax, dst, 2);
1858 emit_word(imm16);
1859 }
1861 void Assembler::movw(Register dst, Address src) {
1862 InstructionMark im(this);
1863 emit_byte(0x66);
1864 prefix(src, dst);
1865 emit_byte(0x8B);
1866 emit_operand(dst, src);
1867 }
1869 void Assembler::movw(Address dst, Register src) {
1870 InstructionMark im(this);
1871 emit_byte(0x66);
1872 prefix(dst, src);
1873 emit_byte(0x89);
1874 emit_operand(src, dst);
1875 }
1877 void Assembler::movzbl(Register dst, Address src) { // movzxb
1878 InstructionMark im(this);
1879 prefix(src, dst);
1880 emit_byte(0x0F);
1881 emit_byte(0xB6);
1882 emit_operand(dst, src);
1883 }
1885 void Assembler::movzbl(Register dst, Register src) { // movzxb
1886 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1887 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1888 emit_byte(0x0F);
1889 emit_byte(0xB6);
1890 emit_byte(0xC0 | encode);
1891 }
1893 void Assembler::movzwl(Register dst, Address src) { // movzxw
1894 InstructionMark im(this);
1895 prefix(src, dst);
1896 emit_byte(0x0F);
1897 emit_byte(0xB7);
1898 emit_operand(dst, src);
1899 }
1901 void Assembler::movzwl(Register dst, Register src) { // movzxw
1902 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1903 emit_byte(0x0F);
1904 emit_byte(0xB7);
1905 emit_byte(0xC0 | encode);
1906 }
1908 void Assembler::mull(Address src) {
1909 InstructionMark im(this);
1910 prefix(src);
1911 emit_byte(0xF7);
1912 emit_operand(rsp, src);
1913 }
1915 void Assembler::mull(Register src) {
1916 int encode = prefix_and_encode(src->encoding());
1917 emit_byte(0xF7);
1918 emit_byte(0xE0 | encode);
1919 }
1921 void Assembler::mulsd(XMMRegister dst, Address src) {
1922 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1923 InstructionMark im(this);
1924 emit_byte(0xF2);
1925 prefix(src, dst);
1926 emit_byte(0x0F);
1927 emit_byte(0x59);
1928 emit_operand(dst, src);
1929 }
1931 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1932 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1933 emit_byte(0xF2);
1934 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1935 emit_byte(0x0F);
1936 emit_byte(0x59);
1937 emit_byte(0xC0 | encode);
1938 }
1940 void Assembler::mulss(XMMRegister dst, Address src) {
1941 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1942 InstructionMark im(this);
1943 emit_byte(0xF3);
1944 prefix(src, dst);
1945 emit_byte(0x0F);
1946 emit_byte(0x59);
1947 emit_operand(dst, src);
1948 }
1950 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1951 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1952 emit_byte(0xF3);
1953 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1954 emit_byte(0x0F);
1955 emit_byte(0x59);
1956 emit_byte(0xC0 | encode);
1957 }
1959 void Assembler::negl(Register dst) {
1960 int encode = prefix_and_encode(dst->encoding());
1961 emit_byte(0xF7);
1962 emit_byte(0xD8 | encode);
1963 }
1965 void Assembler::nop(int i) {
1966 #ifdef ASSERT
1967 assert(i > 0, " ");
1968 // The fancy nops aren't currently recognized by debuggers making it a
1969 // pain to disassemble code while debugging. If asserts are on clearly
1970 // speed is not an issue so simply use the single byte traditional nop
1971 // to do alignment.
1973 for (; i > 0 ; i--) emit_byte(0x90);
1974 return;
1976 #endif // ASSERT
1978 if (UseAddressNop && VM_Version::is_intel()) {
1979 //
1980 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1981 // 1: 0x90
1982 // 2: 0x66 0x90
1983 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1984 // 4: 0x0F 0x1F 0x40 0x00
1985 // 5: 0x0F 0x1F 0x44 0x00 0x00
1986 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1987 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1988 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1989 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1990 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1991 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1993 // The rest coding is Intel specific - don't use consecutive address nops
1995 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1996 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1997 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1998 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2000 while(i >= 15) {
2001 // For Intel don't generate consecutive addess nops (mix with regular nops)
2002 i -= 15;
2003 emit_byte(0x66); // size prefix
2004 emit_byte(0x66); // size prefix
2005 emit_byte(0x66); // size prefix
2006 addr_nop_8();
2007 emit_byte(0x66); // size prefix
2008 emit_byte(0x66); // size prefix
2009 emit_byte(0x66); // size prefix
2010 emit_byte(0x90); // nop
2011 }
2012 switch (i) {
2013 case 14:
2014 emit_byte(0x66); // size prefix
2015 case 13:
2016 emit_byte(0x66); // size prefix
2017 case 12:
2018 addr_nop_8();
2019 emit_byte(0x66); // size prefix
2020 emit_byte(0x66); // size prefix
2021 emit_byte(0x66); // size prefix
2022 emit_byte(0x90); // nop
2023 break;
2024 case 11:
2025 emit_byte(0x66); // size prefix
2026 case 10:
2027 emit_byte(0x66); // size prefix
2028 case 9:
2029 emit_byte(0x66); // size prefix
2030 case 8:
2031 addr_nop_8();
2032 break;
2033 case 7:
2034 addr_nop_7();
2035 break;
2036 case 6:
2037 emit_byte(0x66); // size prefix
2038 case 5:
2039 addr_nop_5();
2040 break;
2041 case 4:
2042 addr_nop_4();
2043 break;
2044 case 3:
2045 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2046 emit_byte(0x66); // size prefix
2047 case 2:
2048 emit_byte(0x66); // size prefix
2049 case 1:
2050 emit_byte(0x90); // nop
2051 break;
2052 default:
2053 assert(i == 0, " ");
2054 }
2055 return;
2056 }
2057 if (UseAddressNop && VM_Version::is_amd()) {
2058 //
2059 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2060 // 1: 0x90
2061 // 2: 0x66 0x90
2062 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2063 // 4: 0x0F 0x1F 0x40 0x00
2064 // 5: 0x0F 0x1F 0x44 0x00 0x00
2065 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2066 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2067 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2068 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2069 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2070 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2072 // The rest coding is AMD specific - use consecutive address nops
2074 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2075 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2076 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2077 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2078 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2079 // Size prefixes (0x66) are added for larger sizes
2081 while(i >= 22) {
2082 i -= 11;
2083 emit_byte(0x66); // size prefix
2084 emit_byte(0x66); // size prefix
2085 emit_byte(0x66); // size prefix
2086 addr_nop_8();
2087 }
2088 // Generate first nop for size between 21-12
2089 switch (i) {
2090 case 21:
2091 i -= 1;
2092 emit_byte(0x66); // size prefix
2093 case 20:
2094 case 19:
2095 i -= 1;
2096 emit_byte(0x66); // size prefix
2097 case 18:
2098 case 17:
2099 i -= 1;
2100 emit_byte(0x66); // size prefix
2101 case 16:
2102 case 15:
2103 i -= 8;
2104 addr_nop_8();
2105 break;
2106 case 14:
2107 case 13:
2108 i -= 7;
2109 addr_nop_7();
2110 break;
2111 case 12:
2112 i -= 6;
2113 emit_byte(0x66); // size prefix
2114 addr_nop_5();
2115 break;
2116 default:
2117 assert(i < 12, " ");
2118 }
2120 // Generate second nop for size between 11-1
2121 switch (i) {
2122 case 11:
2123 emit_byte(0x66); // size prefix
2124 case 10:
2125 emit_byte(0x66); // size prefix
2126 case 9:
2127 emit_byte(0x66); // size prefix
2128 case 8:
2129 addr_nop_8();
2130 break;
2131 case 7:
2132 addr_nop_7();
2133 break;
2134 case 6:
2135 emit_byte(0x66); // size prefix
2136 case 5:
2137 addr_nop_5();
2138 break;
2139 case 4:
2140 addr_nop_4();
2141 break;
2142 case 3:
2143 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2144 emit_byte(0x66); // size prefix
2145 case 2:
2146 emit_byte(0x66); // size prefix
2147 case 1:
2148 emit_byte(0x90); // nop
2149 break;
2150 default:
2151 assert(i == 0, " ");
2152 }
2153 return;
2154 }
2156 // Using nops with size prefixes "0x66 0x90".
2157 // From AMD Optimization Guide:
2158 // 1: 0x90
2159 // 2: 0x66 0x90
2160 // 3: 0x66 0x66 0x90
2161 // 4: 0x66 0x66 0x66 0x90
2162 // 5: 0x66 0x66 0x90 0x66 0x90
2163 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2164 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2165 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2166 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2167 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2168 //
2169 while(i > 12) {
2170 i -= 4;
2171 emit_byte(0x66); // size prefix
2172 emit_byte(0x66);
2173 emit_byte(0x66);
2174 emit_byte(0x90); // nop
2175 }
2176 // 1 - 12 nops
2177 if(i > 8) {
2178 if(i > 9) {
2179 i -= 1;
2180 emit_byte(0x66);
2181 }
2182 i -= 3;
2183 emit_byte(0x66);
2184 emit_byte(0x66);
2185 emit_byte(0x90);
2186 }
2187 // 1 - 8 nops
2188 if(i > 4) {
2189 if(i > 6) {
2190 i -= 1;
2191 emit_byte(0x66);
2192 }
2193 i -= 3;
2194 emit_byte(0x66);
2195 emit_byte(0x66);
2196 emit_byte(0x90);
2197 }
2198 switch (i) {
2199 case 4:
2200 emit_byte(0x66);
2201 case 3:
2202 emit_byte(0x66);
2203 case 2:
2204 emit_byte(0x66);
2205 case 1:
2206 emit_byte(0x90);
2207 break;
2208 default:
2209 assert(i == 0, " ");
2210 }
2211 }
2213 void Assembler::notl(Register dst) {
2214 int encode = prefix_and_encode(dst->encoding());
2215 emit_byte(0xF7);
2216 emit_byte(0xD0 | encode );
2217 }
2219 void Assembler::orl(Address dst, int32_t imm32) {
2220 InstructionMark im(this);
2221 prefix(dst);
2222 emit_arith_operand(0x81, rcx, dst, imm32);
2223 }
2225 void Assembler::orl(Register dst, int32_t imm32) {
2226 prefix(dst);
2227 emit_arith(0x81, 0xC8, dst, imm32);
2228 }
2230 void Assembler::orl(Register dst, Address src) {
2231 InstructionMark im(this);
2232 prefix(src, dst);
2233 emit_byte(0x0B);
2234 emit_operand(dst, src);
2235 }
2237 void Assembler::orl(Register dst, Register src) {
2238 (void) prefix_and_encode(dst->encoding(), src->encoding());
2239 emit_arith(0x0B, 0xC0, dst, src);
2240 }
2242 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2243 assert(VM_Version::supports_sse4_2(), "");
2245 InstructionMark im(this);
2246 emit_byte(0x66);
2247 prefix(src, dst);
2248 emit_byte(0x0F);
2249 emit_byte(0x3A);
2250 emit_byte(0x61);
2251 emit_operand(dst, src);
2252 emit_byte(imm8);
2253 }
2255 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2256 assert(VM_Version::supports_sse4_2(), "");
2258 emit_byte(0x66);
2259 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2260 emit_byte(0x0F);
2261 emit_byte(0x3A);
2262 emit_byte(0x61);
2263 emit_byte(0xC0 | encode);
2264 emit_byte(imm8);
2265 }
2267 // generic
2268 void Assembler::pop(Register dst) {
2269 int encode = prefix_and_encode(dst->encoding());
2270 emit_byte(0x58 | encode);
2271 }
2273 void Assembler::popcntl(Register dst, Address src) {
2274 assert(VM_Version::supports_popcnt(), "must support");
2275 InstructionMark im(this);
2276 emit_byte(0xF3);
2277 prefix(src, dst);
2278 emit_byte(0x0F);
2279 emit_byte(0xB8);
2280 emit_operand(dst, src);
2281 }
2283 void Assembler::popcntl(Register dst, Register src) {
2284 assert(VM_Version::supports_popcnt(), "must support");
2285 emit_byte(0xF3);
2286 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2287 emit_byte(0x0F);
2288 emit_byte(0xB8);
2289 emit_byte(0xC0 | encode);
2290 }
2292 void Assembler::popf() {
2293 emit_byte(0x9D);
2294 }
2296 #ifndef _LP64 // no 32bit push/pop on amd64
2297 void Assembler::popl(Address dst) {
2298 // NOTE: this will adjust stack by 8byte on 64bits
2299 InstructionMark im(this);
2300 prefix(dst);
2301 emit_byte(0x8F);
2302 emit_operand(rax, dst);
2303 }
2304 #endif
2306 void Assembler::prefetch_prefix(Address src) {
2307 prefix(src);
2308 emit_byte(0x0F);
2309 }
2311 void Assembler::prefetchnta(Address src) {
2312 NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2313 InstructionMark im(this);
2314 prefetch_prefix(src);
2315 emit_byte(0x18);
2316 emit_operand(rax, src); // 0, src
2317 }
2319 void Assembler::prefetchr(Address src) {
2320 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2321 InstructionMark im(this);
2322 prefetch_prefix(src);
2323 emit_byte(0x0D);
2324 emit_operand(rax, src); // 0, src
2325 }
2327 void Assembler::prefetcht0(Address src) {
2328 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2329 InstructionMark im(this);
2330 prefetch_prefix(src);
2331 emit_byte(0x18);
2332 emit_operand(rcx, src); // 1, src
2333 }
2335 void Assembler::prefetcht1(Address src) {
2336 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2337 InstructionMark im(this);
2338 prefetch_prefix(src);
2339 emit_byte(0x18);
2340 emit_operand(rdx, src); // 2, src
2341 }
2343 void Assembler::prefetcht2(Address src) {
2344 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2345 InstructionMark im(this);
2346 prefetch_prefix(src);
2347 emit_byte(0x18);
2348 emit_operand(rbx, src); // 3, src
2349 }
2351 void Assembler::prefetchw(Address src) {
2352 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2353 InstructionMark im(this);
2354 prefetch_prefix(src);
2355 emit_byte(0x0D);
2356 emit_operand(rcx, src); // 1, src
2357 }
2359 void Assembler::prefix(Prefix p) {
2360 a_byte(p);
2361 }
2363 void Assembler::por(XMMRegister dst, XMMRegister src) {
2364 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2366 emit_byte(0x66);
2367 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2368 emit_byte(0x0F);
2370 emit_byte(0xEB);
2371 emit_byte(0xC0 | encode);
2372 }
2374 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2375 assert(isByte(mode), "invalid value");
2376 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2378 emit_byte(0x66);
2379 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2380 emit_byte(0x0F);
2381 emit_byte(0x70);
2382 emit_byte(0xC0 | encode);
2383 emit_byte(mode & 0xFF);
2385 }
2387 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2388 assert(isByte(mode), "invalid value");
2389 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2391 InstructionMark im(this);
2392 emit_byte(0x66);
2393 prefix(src, dst);
2394 emit_byte(0x0F);
2395 emit_byte(0x70);
2396 emit_operand(dst, src);
2397 emit_byte(mode & 0xFF);
2398 }
2400 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2401 assert(isByte(mode), "invalid value");
2402 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2404 emit_byte(0xF2);
2405 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2406 emit_byte(0x0F);
2407 emit_byte(0x70);
2408 emit_byte(0xC0 | encode);
2409 emit_byte(mode & 0xFF);
2410 }
2412 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2413 assert(isByte(mode), "invalid value");
2414 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2416 InstructionMark im(this);
2417 emit_byte(0xF2);
2418 prefix(src, dst); // QQ new
2419 emit_byte(0x0F);
2420 emit_byte(0x70);
2421 emit_operand(dst, src);
2422 emit_byte(mode & 0xFF);
2423 }
2425 void Assembler::psrlq(XMMRegister dst, int shift) {
2426 // Shift 64 bit value logically right by specified number of bits.
2427 // HMM Table D-1 says sse2 or mmx.
2428 // Do not confuse it with psrldq SSE2 instruction which
2429 // shifts 128 bit value in xmm register by number of bytes.
2430 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2432 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2433 emit_byte(0x66);
2434 emit_byte(0x0F);
2435 emit_byte(0x73);
2436 emit_byte(0xC0 | encode);
2437 emit_byte(shift);
2438 }
2440 void Assembler::psrldq(XMMRegister dst, int shift) {
2441 // Shift 128 bit value in xmm register by number of bytes.
2442 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2444 int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
2445 emit_byte(0x66);
2446 emit_byte(0x0F);
2447 emit_byte(0x73);
2448 emit_byte(0xC0 | encode);
2449 emit_byte(shift);
2450 }
2452 void Assembler::ptest(XMMRegister dst, Address src) {
2453 assert(VM_Version::supports_sse4_1(), "");
2455 InstructionMark im(this);
2456 emit_byte(0x66);
2457 prefix(src, dst);
2458 emit_byte(0x0F);
2459 emit_byte(0x38);
2460 emit_byte(0x17);
2461 emit_operand(dst, src);
2462 }
2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2465 assert(VM_Version::supports_sse4_1(), "");
2467 emit_byte(0x66);
2468 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2469 emit_byte(0x0F);
2470 emit_byte(0x38);
2471 emit_byte(0x17);
2472 emit_byte(0xC0 | encode);
2473 }
2475 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2476 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2477 emit_byte(0x66);
2478 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2479 emit_byte(0x0F);
2480 emit_byte(0x60);
2481 emit_byte(0xC0 | encode);
2482 }
2484 void Assembler::push(int32_t imm32) {
2485 // in 64bits we push 64bits onto the stack but only
2486 // take a 32bit immediate
2487 emit_byte(0x68);
2488 emit_long(imm32);
2489 }
2491 void Assembler::push(Register src) {
2492 int encode = prefix_and_encode(src->encoding());
2494 emit_byte(0x50 | encode);
2495 }
2497 void Assembler::pushf() {
2498 emit_byte(0x9C);
2499 }
2501 #ifndef _LP64 // no 32bit push/pop on amd64
2502 void Assembler::pushl(Address src) {
2503 // Note this will push 64bit on 64bit
2504 InstructionMark im(this);
2505 prefix(src);
2506 emit_byte(0xFF);
2507 emit_operand(rsi, src);
2508 }
2509 #endif
2511 void Assembler::pxor(XMMRegister dst, Address src) {
2512 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2513 InstructionMark im(this);
2514 emit_byte(0x66);
2515 prefix(src, dst);
2516 emit_byte(0x0F);
2517 emit_byte(0xEF);
2518 emit_operand(dst, src);
2519 }
2521 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2522 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2523 InstructionMark im(this);
2524 emit_byte(0x66);
2525 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2526 emit_byte(0x0F);
2527 emit_byte(0xEF);
2528 emit_byte(0xC0 | encode);
2529 }
2531 void Assembler::rcll(Register dst, int imm8) {
2532 assert(isShiftCount(imm8), "illegal shift count");
2533 int encode = prefix_and_encode(dst->encoding());
2534 if (imm8 == 1) {
2535 emit_byte(0xD1);
2536 emit_byte(0xD0 | encode);
2537 } else {
2538 emit_byte(0xC1);
2539 emit_byte(0xD0 | encode);
2540 emit_byte(imm8);
2541 }
2542 }
2544 // copies data from [esi] to [edi] using rcx pointer sized words
2545 // generic
2546 void Assembler::rep_mov() {
2547 emit_byte(0xF3);
2548 // MOVSQ
2549 LP64_ONLY(prefix(REX_W));
2550 emit_byte(0xA5);
2551 }
2553 // sets rcx pointer sized words with rax, value at [edi]
2554 // generic
2555 void Assembler::rep_set() { // rep_set
2556 emit_byte(0xF3);
2557 // STOSQ
2558 LP64_ONLY(prefix(REX_W));
2559 emit_byte(0xAB);
2560 }
2562 // scans rcx pointer sized words at [edi] for occurance of rax,
2563 // generic
2564 void Assembler::repne_scan() { // repne_scan
2565 emit_byte(0xF2);
2566 // SCASQ
2567 LP64_ONLY(prefix(REX_W));
2568 emit_byte(0xAF);
2569 }
2571 #ifdef _LP64
2572 // scans rcx 4 byte words at [edi] for occurance of rax,
2573 // generic
2574 void Assembler::repne_scanl() { // repne_scan
2575 emit_byte(0xF2);
2576 // SCASL
2577 emit_byte(0xAF);
2578 }
2579 #endif
2581 void Assembler::ret(int imm16) {
2582 if (imm16 == 0) {
2583 emit_byte(0xC3);
2584 } else {
2585 emit_byte(0xC2);
2586 emit_word(imm16);
2587 }
2588 }
2590 void Assembler::sahf() {
2591 #ifdef _LP64
2592 // Not supported in 64bit mode
2593 ShouldNotReachHere();
2594 #endif
2595 emit_byte(0x9E);
2596 }
2598 void Assembler::sarl(Register dst, int imm8) {
2599 int encode = prefix_and_encode(dst->encoding());
2600 assert(isShiftCount(imm8), "illegal shift count");
2601 if (imm8 == 1) {
2602 emit_byte(0xD1);
2603 emit_byte(0xF8 | encode);
2604 } else {
2605 emit_byte(0xC1);
2606 emit_byte(0xF8 | encode);
2607 emit_byte(imm8);
2608 }
2609 }
2611 void Assembler::sarl(Register dst) {
2612 int encode = prefix_and_encode(dst->encoding());
2613 emit_byte(0xD3);
2614 emit_byte(0xF8 | encode);
2615 }
2617 void Assembler::sbbl(Address dst, int32_t imm32) {
2618 InstructionMark im(this);
2619 prefix(dst);
2620 emit_arith_operand(0x81, rbx, dst, imm32);
2621 }
2623 void Assembler::sbbl(Register dst, int32_t imm32) {
2624 prefix(dst);
2625 emit_arith(0x81, 0xD8, dst, imm32);
2626 }
2629 void Assembler::sbbl(Register dst, Address src) {
2630 InstructionMark im(this);
2631 prefix(src, dst);
2632 emit_byte(0x1B);
2633 emit_operand(dst, src);
2634 }
2636 void Assembler::sbbl(Register dst, Register src) {
2637 (void) prefix_and_encode(dst->encoding(), src->encoding());
2638 emit_arith(0x1B, 0xC0, dst, src);
2639 }
2641 void Assembler::setb(Condition cc, Register dst) {
2642 assert(0 <= cc && cc < 16, "illegal cc");
2643 int encode = prefix_and_encode(dst->encoding(), true);
2644 emit_byte(0x0F);
2645 emit_byte(0x90 | cc);
2646 emit_byte(0xC0 | encode);
2647 }
2649 void Assembler::shll(Register dst, int imm8) {
2650 assert(isShiftCount(imm8), "illegal shift count");
2651 int encode = prefix_and_encode(dst->encoding());
2652 if (imm8 == 1 ) {
2653 emit_byte(0xD1);
2654 emit_byte(0xE0 | encode);
2655 } else {
2656 emit_byte(0xC1);
2657 emit_byte(0xE0 | encode);
2658 emit_byte(imm8);
2659 }
2660 }
2662 void Assembler::shll(Register dst) {
2663 int encode = prefix_and_encode(dst->encoding());
2664 emit_byte(0xD3);
2665 emit_byte(0xE0 | encode);
2666 }
2668 void Assembler::shrl(Register dst, int imm8) {
2669 assert(isShiftCount(imm8), "illegal shift count");
2670 int encode = prefix_and_encode(dst->encoding());
2671 emit_byte(0xC1);
2672 emit_byte(0xE8 | encode);
2673 emit_byte(imm8);
2674 }
2676 void Assembler::shrl(Register dst) {
2677 int encode = prefix_and_encode(dst->encoding());
2678 emit_byte(0xD3);
2679 emit_byte(0xE8 | encode);
2680 }
2682 // copies a single word from [esi] to [edi]
2683 void Assembler::smovl() {
2684 emit_byte(0xA5);
2685 }
2687 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2688 // HMM Table D-1 says sse2
2689 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2690 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2691 emit_byte(0xF2);
2692 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2693 emit_byte(0x0F);
2694 emit_byte(0x51);
2695 emit_byte(0xC0 | encode);
2696 }
2698 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2699 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2700 InstructionMark im(this);
2701 emit_byte(0xF2);
2702 prefix(src, dst);
2703 emit_byte(0x0F);
2704 emit_byte(0x51);
2705 emit_operand(dst, src);
2706 }
2708 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2709 // HMM Table D-1 says sse2
2710 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2711 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2712 emit_byte(0xF3);
2713 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2714 emit_byte(0x0F);
2715 emit_byte(0x51);
2716 emit_byte(0xC0 | encode);
2717 }
2719 void Assembler::sqrtss(XMMRegister dst, Address src) {
2720 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2721 InstructionMark im(this);
2722 emit_byte(0xF3);
2723 prefix(src, dst);
2724 emit_byte(0x0F);
2725 emit_byte(0x51);
2726 emit_operand(dst, src);
2727 }
2729 void Assembler::stmxcsr( Address dst) {
2730 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2731 InstructionMark im(this);
2732 prefix(dst);
2733 emit_byte(0x0F);
2734 emit_byte(0xAE);
2735 emit_operand(as_Register(3), dst);
2736 }
2738 void Assembler::subl(Address dst, int32_t imm32) {
2739 InstructionMark im(this);
2740 prefix(dst);
2741 emit_arith_operand(0x81, rbp, dst, imm32);
2742 }
2744 void Assembler::subl(Address dst, Register src) {
2745 InstructionMark im(this);
2746 prefix(dst, src);
2747 emit_byte(0x29);
2748 emit_operand(src, dst);
2749 }
2751 void Assembler::subl(Register dst, int32_t imm32) {
2752 prefix(dst);
2753 emit_arith(0x81, 0xE8, dst, imm32);
2754 }
2756 void Assembler::subl(Register dst, Address src) {
2757 InstructionMark im(this);
2758 prefix(src, dst);
2759 emit_byte(0x2B);
2760 emit_operand(dst, src);
2761 }
2763 void Assembler::subl(Register dst, Register src) {
2764 (void) prefix_and_encode(dst->encoding(), src->encoding());
2765 emit_arith(0x2B, 0xC0, dst, src);
2766 }
2768 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2769 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2770 emit_byte(0xF2);
2771 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2772 emit_byte(0x0F);
2773 emit_byte(0x5C);
2774 emit_byte(0xC0 | encode);
2775 }
2777 void Assembler::subsd(XMMRegister dst, Address src) {
2778 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2779 InstructionMark im(this);
2780 emit_byte(0xF2);
2781 prefix(src, dst);
2782 emit_byte(0x0F);
2783 emit_byte(0x5C);
2784 emit_operand(dst, src);
2785 }
2787 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2788 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2789 emit_byte(0xF3);
2790 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2791 emit_byte(0x0F);
2792 emit_byte(0x5C);
2793 emit_byte(0xC0 | encode);
2794 }
2796 void Assembler::subss(XMMRegister dst, Address src) {
2797 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2798 InstructionMark im(this);
2799 emit_byte(0xF3);
2800 prefix(src, dst);
2801 emit_byte(0x0F);
2802 emit_byte(0x5C);
2803 emit_operand(dst, src);
2804 }
2806 void Assembler::testb(Register dst, int imm8) {
2807 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2808 (void) prefix_and_encode(dst->encoding(), true);
2809 emit_arith_b(0xF6, 0xC0, dst, imm8);
2810 }
2812 void Assembler::testl(Register dst, int32_t imm32) {
2813 // not using emit_arith because test
2814 // doesn't support sign-extension of
2815 // 8bit operands
2816 int encode = dst->encoding();
2817 if (encode == 0) {
2818 emit_byte(0xA9);
2819 } else {
2820 encode = prefix_and_encode(encode);
2821 emit_byte(0xF7);
2822 emit_byte(0xC0 | encode);
2823 }
2824 emit_long(imm32);
2825 }
2827 void Assembler::testl(Register dst, Register src) {
2828 (void) prefix_and_encode(dst->encoding(), src->encoding());
2829 emit_arith(0x85, 0xC0, dst, src);
2830 }
2832 void Assembler::testl(Register dst, Address src) {
2833 InstructionMark im(this);
2834 prefix(src, dst);
2835 emit_byte(0x85);
2836 emit_operand(dst, src);
2837 }
2839 void Assembler::ucomisd(XMMRegister dst, Address src) {
2840 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2841 emit_byte(0x66);
2842 ucomiss(dst, src);
2843 }
2845 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2846 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2847 emit_byte(0x66);
2848 ucomiss(dst, src);
2849 }
2851 void Assembler::ucomiss(XMMRegister dst, Address src) {
2852 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2854 InstructionMark im(this);
2855 prefix(src, dst);
2856 emit_byte(0x0F);
2857 emit_byte(0x2E);
2858 emit_operand(dst, src);
2859 }
2861 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2862 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2863 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2864 emit_byte(0x0F);
2865 emit_byte(0x2E);
2866 emit_byte(0xC0 | encode);
2867 }
2870 void Assembler::xaddl(Address dst, Register src) {
2871 InstructionMark im(this);
2872 prefix(dst, src);
2873 emit_byte(0x0F);
2874 emit_byte(0xC1);
2875 emit_operand(src, dst);
2876 }
2878 void Assembler::xchgl(Register dst, Address src) { // xchg
2879 InstructionMark im(this);
2880 prefix(src, dst);
2881 emit_byte(0x87);
2882 emit_operand(dst, src);
2883 }
2885 void Assembler::xchgl(Register dst, Register src) {
2886 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2887 emit_byte(0x87);
2888 emit_byte(0xc0 | encode);
2889 }
2891 void Assembler::xorl(Register dst, int32_t imm32) {
2892 prefix(dst);
2893 emit_arith(0x81, 0xF0, dst, imm32);
2894 }
2896 void Assembler::xorl(Register dst, Address src) {
2897 InstructionMark im(this);
2898 prefix(src, dst);
2899 emit_byte(0x33);
2900 emit_operand(dst, src);
2901 }
2903 void Assembler::xorl(Register dst, Register src) {
2904 (void) prefix_and_encode(dst->encoding(), src->encoding());
2905 emit_arith(0x33, 0xC0, dst, src);
2906 }
2908 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2909 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2910 emit_byte(0x66);
2911 xorps(dst, src);
2912 }
2914 void Assembler::xorpd(XMMRegister dst, Address src) {
2915 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2916 InstructionMark im(this);
2917 emit_byte(0x66);
2918 prefix(src, dst);
2919 emit_byte(0x0F);
2920 emit_byte(0x57);
2921 emit_operand(dst, src);
2922 }
2925 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2926 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2927 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2928 emit_byte(0x0F);
2929 emit_byte(0x57);
2930 emit_byte(0xC0 | encode);
2931 }
2933 void Assembler::xorps(XMMRegister dst, Address src) {
2934 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2935 InstructionMark im(this);
2936 prefix(src, dst);
2937 emit_byte(0x0F);
2938 emit_byte(0x57);
2939 emit_operand(dst, src);
2940 }
2942 #ifndef _LP64
2943 // 32bit only pieces of the assembler
2945 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2946 // NO PREFIX AS NEVER 64BIT
2947 InstructionMark im(this);
2948 emit_byte(0x81);
2949 emit_byte(0xF8 | src1->encoding());
2950 emit_data(imm32, rspec, 0);
2951 }
2953 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2954 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2955 InstructionMark im(this);
2956 emit_byte(0x81);
2957 emit_operand(rdi, src1);
2958 emit_data(imm32, rspec, 0);
2959 }
2961 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2962 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2963 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise.
2964 void Assembler::cmpxchg8(Address adr) {
2965 InstructionMark im(this);
2966 emit_byte(0x0F);
2967 emit_byte(0xc7);
2968 emit_operand(rcx, adr);
2969 }
2971 void Assembler::decl(Register dst) {
2972 // Don't use it directly. Use MacroAssembler::decrementl() instead.
2973 emit_byte(0x48 | dst->encoding());
2974 }
2976 #endif // _LP64
2978 // 64bit typically doesn't use the x87 but needs to for the trig funcs
2980 void Assembler::fabs() {
2981 emit_byte(0xD9);
2982 emit_byte(0xE1);
2983 }
2985 void Assembler::fadd(int i) {
2986 emit_farith(0xD8, 0xC0, i);
2987 }
2989 void Assembler::fadd_d(Address src) {
2990 InstructionMark im(this);
2991 emit_byte(0xDC);
2992 emit_operand32(rax, src);
2993 }
2995 void Assembler::fadd_s(Address src) {
2996 InstructionMark im(this);
2997 emit_byte(0xD8);
2998 emit_operand32(rax, src);
2999 }
3001 void Assembler::fadda(int i) {
3002 emit_farith(0xDC, 0xC0, i);
3003 }
3005 void Assembler::faddp(int i) {
3006 emit_farith(0xDE, 0xC0, i);
3007 }
3009 void Assembler::fchs() {
3010 emit_byte(0xD9);
3011 emit_byte(0xE0);
3012 }
3014 void Assembler::fcom(int i) {
3015 emit_farith(0xD8, 0xD0, i);
3016 }
3018 void Assembler::fcomp(int i) {
3019 emit_farith(0xD8, 0xD8, i);
3020 }
3022 void Assembler::fcomp_d(Address src) {
3023 InstructionMark im(this);
3024 emit_byte(0xDC);
3025 emit_operand32(rbx, src);
3026 }
3028 void Assembler::fcomp_s(Address src) {
3029 InstructionMark im(this);
3030 emit_byte(0xD8);
3031 emit_operand32(rbx, src);
3032 }
3034 void Assembler::fcompp() {
3035 emit_byte(0xDE);
3036 emit_byte(0xD9);
3037 }
3039 void Assembler::fcos() {
3040 emit_byte(0xD9);
3041 emit_byte(0xFF);
3042 }
3044 void Assembler::fdecstp() {
3045 emit_byte(0xD9);
3046 emit_byte(0xF6);
3047 }
3049 void Assembler::fdiv(int i) {
3050 emit_farith(0xD8, 0xF0, i);
3051 }
3053 void Assembler::fdiv_d(Address src) {
3054 InstructionMark im(this);
3055 emit_byte(0xDC);
3056 emit_operand32(rsi, src);
3057 }
3059 void Assembler::fdiv_s(Address src) {
3060 InstructionMark im(this);
3061 emit_byte(0xD8);
3062 emit_operand32(rsi, src);
3063 }
3065 void Assembler::fdiva(int i) {
3066 emit_farith(0xDC, 0xF8, i);
3067 }
3069 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
3070 // is erroneous for some of the floating-point instructions below.
3072 void Assembler::fdivp(int i) {
3073 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
3074 }
3076 void Assembler::fdivr(int i) {
3077 emit_farith(0xD8, 0xF8, i);
3078 }
3080 void Assembler::fdivr_d(Address src) {
3081 InstructionMark im(this);
3082 emit_byte(0xDC);
3083 emit_operand32(rdi, src);
3084 }
3086 void Assembler::fdivr_s(Address src) {
3087 InstructionMark im(this);
3088 emit_byte(0xD8);
3089 emit_operand32(rdi, src);
3090 }
3092 void Assembler::fdivra(int i) {
3093 emit_farith(0xDC, 0xF0, i);
3094 }
3096 void Assembler::fdivrp(int i) {
3097 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3098 }
3100 void Assembler::ffree(int i) {
3101 emit_farith(0xDD, 0xC0, i);
3102 }
3104 void Assembler::fild_d(Address adr) {
3105 InstructionMark im(this);
3106 emit_byte(0xDF);
3107 emit_operand32(rbp, adr);
3108 }
3110 void Assembler::fild_s(Address adr) {
3111 InstructionMark im(this);
3112 emit_byte(0xDB);
3113 emit_operand32(rax, adr);
3114 }
3116 void Assembler::fincstp() {
3117 emit_byte(0xD9);
3118 emit_byte(0xF7);
3119 }
3121 void Assembler::finit() {
3122 emit_byte(0x9B);
3123 emit_byte(0xDB);
3124 emit_byte(0xE3);
3125 }
3127 void Assembler::fist_s(Address adr) {
3128 InstructionMark im(this);
3129 emit_byte(0xDB);
3130 emit_operand32(rdx, adr);
3131 }
3133 void Assembler::fistp_d(Address adr) {
3134 InstructionMark im(this);
3135 emit_byte(0xDF);
3136 emit_operand32(rdi, adr);
3137 }
3139 void Assembler::fistp_s(Address adr) {
3140 InstructionMark im(this);
3141 emit_byte(0xDB);
3142 emit_operand32(rbx, adr);
3143 }
3145 void Assembler::fld1() {
3146 emit_byte(0xD9);
3147 emit_byte(0xE8);
3148 }
3150 void Assembler::fld_d(Address adr) {
3151 InstructionMark im(this);
3152 emit_byte(0xDD);
3153 emit_operand32(rax, adr);
3154 }
3156 void Assembler::fld_s(Address adr) {
3157 InstructionMark im(this);
3158 emit_byte(0xD9);
3159 emit_operand32(rax, adr);
3160 }
3163 void Assembler::fld_s(int index) {
3164 emit_farith(0xD9, 0xC0, index);
3165 }
3167 void Assembler::fld_x(Address adr) {
3168 InstructionMark im(this);
3169 emit_byte(0xDB);
3170 emit_operand32(rbp, adr);
3171 }
3173 void Assembler::fldcw(Address src) {
3174 InstructionMark im(this);
3175 emit_byte(0xd9);
3176 emit_operand32(rbp, src);
3177 }
3179 void Assembler::fldenv(Address src) {
3180 InstructionMark im(this);
3181 emit_byte(0xD9);
3182 emit_operand32(rsp, src);
3183 }
3185 void Assembler::fldlg2() {
3186 emit_byte(0xD9);
3187 emit_byte(0xEC);
3188 }
3190 void Assembler::fldln2() {
3191 emit_byte(0xD9);
3192 emit_byte(0xED);
3193 }
3195 void Assembler::fldz() {
3196 emit_byte(0xD9);
3197 emit_byte(0xEE);
3198 }
3200 void Assembler::flog() {
3201 fldln2();
3202 fxch();
3203 fyl2x();
3204 }
3206 void Assembler::flog10() {
3207 fldlg2();
3208 fxch();
3209 fyl2x();
3210 }
3212 void Assembler::fmul(int i) {
3213 emit_farith(0xD8, 0xC8, i);
3214 }
3216 void Assembler::fmul_d(Address src) {
3217 InstructionMark im(this);
3218 emit_byte(0xDC);
3219 emit_operand32(rcx, src);
3220 }
3222 void Assembler::fmul_s(Address src) {
3223 InstructionMark im(this);
3224 emit_byte(0xD8);
3225 emit_operand32(rcx, src);
3226 }
3228 void Assembler::fmula(int i) {
3229 emit_farith(0xDC, 0xC8, i);
3230 }
3232 void Assembler::fmulp(int i) {
3233 emit_farith(0xDE, 0xC8, i);
3234 }
3236 void Assembler::fnsave(Address dst) {
3237 InstructionMark im(this);
3238 emit_byte(0xDD);
3239 emit_operand32(rsi, dst);
3240 }
3242 void Assembler::fnstcw(Address src) {
3243 InstructionMark im(this);
3244 emit_byte(0x9B);
3245 emit_byte(0xD9);
3246 emit_operand32(rdi, src);
3247 }
3249 void Assembler::fnstsw_ax() {
3250 emit_byte(0xdF);
3251 emit_byte(0xE0);
3252 }
3254 void Assembler::fprem() {
3255 emit_byte(0xD9);
3256 emit_byte(0xF8);
3257 }
3259 void Assembler::fprem1() {
3260 emit_byte(0xD9);
3261 emit_byte(0xF5);
3262 }
3264 void Assembler::frstor(Address src) {
3265 InstructionMark im(this);
3266 emit_byte(0xDD);
3267 emit_operand32(rsp, src);
3268 }
3270 void Assembler::fsin() {
3271 emit_byte(0xD9);
3272 emit_byte(0xFE);
3273 }
3275 void Assembler::fsqrt() {
3276 emit_byte(0xD9);
3277 emit_byte(0xFA);
3278 }
3280 void Assembler::fst_d(Address adr) {
3281 InstructionMark im(this);
3282 emit_byte(0xDD);
3283 emit_operand32(rdx, adr);
3284 }
3286 void Assembler::fst_s(Address adr) {
3287 InstructionMark im(this);
3288 emit_byte(0xD9);
3289 emit_operand32(rdx, adr);
3290 }
3292 void Assembler::fstp_d(Address adr) {
3293 InstructionMark im(this);
3294 emit_byte(0xDD);
3295 emit_operand32(rbx, adr);
3296 }
3298 void Assembler::fstp_d(int index) {
3299 emit_farith(0xDD, 0xD8, index);
3300 }
3302 void Assembler::fstp_s(Address adr) {
3303 InstructionMark im(this);
3304 emit_byte(0xD9);
3305 emit_operand32(rbx, adr);
3306 }
3308 void Assembler::fstp_x(Address adr) {
3309 InstructionMark im(this);
3310 emit_byte(0xDB);
3311 emit_operand32(rdi, adr);
3312 }
3314 void Assembler::fsub(int i) {
3315 emit_farith(0xD8, 0xE0, i);
3316 }
3318 void Assembler::fsub_d(Address src) {
3319 InstructionMark im(this);
3320 emit_byte(0xDC);
3321 emit_operand32(rsp, src);
3322 }
3324 void Assembler::fsub_s(Address src) {
3325 InstructionMark im(this);
3326 emit_byte(0xD8);
3327 emit_operand32(rsp, src);
3328 }
3330 void Assembler::fsuba(int i) {
3331 emit_farith(0xDC, 0xE8, i);
3332 }
3334 void Assembler::fsubp(int i) {
3335 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3336 }
3338 void Assembler::fsubr(int i) {
3339 emit_farith(0xD8, 0xE8, i);
3340 }
3342 void Assembler::fsubr_d(Address src) {
3343 InstructionMark im(this);
3344 emit_byte(0xDC);
3345 emit_operand32(rbp, src);
3346 }
3348 void Assembler::fsubr_s(Address src) {
3349 InstructionMark im(this);
3350 emit_byte(0xD8);
3351 emit_operand32(rbp, src);
3352 }
3354 void Assembler::fsubra(int i) {
3355 emit_farith(0xDC, 0xE0, i);
3356 }
3358 void Assembler::fsubrp(int i) {
3359 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3360 }
3362 void Assembler::ftan() {
3363 emit_byte(0xD9);
3364 emit_byte(0xF2);
3365 emit_byte(0xDD);
3366 emit_byte(0xD8);
3367 }
3369 void Assembler::ftst() {
3370 emit_byte(0xD9);
3371 emit_byte(0xE4);
3372 }
3374 void Assembler::fucomi(int i) {
3375 // make sure the instruction is supported (introduced for P6, together with cmov)
3376 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3377 emit_farith(0xDB, 0xE8, i);
3378 }
3380 void Assembler::fucomip(int i) {
3381 // make sure the instruction is supported (introduced for P6, together with cmov)
3382 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3383 emit_farith(0xDF, 0xE8, i);
3384 }
3386 void Assembler::fwait() {
3387 emit_byte(0x9B);
3388 }
3390 void Assembler::fxch(int i) {
3391 emit_farith(0xD9, 0xC8, i);
3392 }
3394 void Assembler::fyl2x() {
3395 emit_byte(0xD9);
3396 emit_byte(0xF1);
3397 }
3400 #ifndef _LP64
3402 void Assembler::incl(Register dst) {
3403 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3404 emit_byte(0x40 | dst->encoding());
3405 }
3407 void Assembler::lea(Register dst, Address src) {
3408 leal(dst, src);
3409 }
3411 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3412 InstructionMark im(this);
3413 emit_byte(0xC7);
3414 emit_operand(rax, dst);
3415 emit_data((int)imm32, rspec, 0);
3416 }
3418 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3419 InstructionMark im(this);
3420 int encode = prefix_and_encode(dst->encoding());
3421 emit_byte(0xB8 | encode);
3422 emit_data((int)imm32, rspec, 0);
3423 }
3425 void Assembler::popa() { // 32bit
3426 emit_byte(0x61);
3427 }
3429 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3430 InstructionMark im(this);
3431 emit_byte(0x68);
3432 emit_data(imm32, rspec, 0);
3433 }
3435 void Assembler::pusha() { // 32bit
3436 emit_byte(0x60);
3437 }
3439 void Assembler::set_byte_if_not_zero(Register dst) {
3440 emit_byte(0x0F);
3441 emit_byte(0x95);
3442 emit_byte(0xE0 | dst->encoding());
3443 }
3445 void Assembler::shldl(Register dst, Register src) {
3446 emit_byte(0x0F);
3447 emit_byte(0xA5);
3448 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3449 }
3451 void Assembler::shrdl(Register dst, Register src) {
3452 emit_byte(0x0F);
3453 emit_byte(0xAD);
3454 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3455 }
3457 #else // LP64
3459 void Assembler::set_byte_if_not_zero(Register dst) {
3460 int enc = prefix_and_encode(dst->encoding(), true);
3461 emit_byte(0x0F);
3462 emit_byte(0x95);
3463 emit_byte(0xE0 | enc);
3464 }
3466 // 64bit only pieces of the assembler
3467 // This should only be used by 64bit instructions that can use rip-relative
3468 // it cannot be used by instructions that want an immediate value.
3470 bool Assembler::reachable(AddressLiteral adr) {
3471 int64_t disp;
3472 // None will force a 64bit literal to the code stream. Likely a placeholder
3473 // for something that will be patched later and we need to certain it will
3474 // always be reachable.
3475 if (adr.reloc() == relocInfo::none) {
3476 return false;
3477 }
3478 if (adr.reloc() == relocInfo::internal_word_type) {
3479 // This should be rip relative and easily reachable.
3480 return true;
3481 }
3482 if (adr.reloc() == relocInfo::virtual_call_type ||
3483 adr.reloc() == relocInfo::opt_virtual_call_type ||
3484 adr.reloc() == relocInfo::static_call_type ||
3485 adr.reloc() == relocInfo::static_stub_type ) {
3486 // This should be rip relative within the code cache and easily
3487 // reachable until we get huge code caches. (At which point
3488 // ic code is going to have issues).
3489 return true;
3490 }
3491 if (adr.reloc() != relocInfo::external_word_type &&
3492 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special
3493 adr.reloc() != relocInfo::poll_type && // relocs to identify them
3494 adr.reloc() != relocInfo::runtime_call_type ) {
3495 return false;
3496 }
3498 // Stress the correction code
3499 if (ForceUnreachable) {
3500 // Must be runtimecall reloc, see if it is in the codecache
3501 // Flipping stuff in the codecache to be unreachable causes issues
3502 // with things like inline caches where the additional instructions
3503 // are not handled.
3504 if (CodeCache::find_blob(adr._target) == NULL) {
3505 return false;
3506 }
3507 }
3508 // For external_word_type/runtime_call_type if it is reachable from where we
3509 // are now (possibly a temp buffer) and where we might end up
3510 // anywhere in the codeCache then we are always reachable.
3511 // This would have to change if we ever save/restore shared code
3512 // to be more pessimistic.
3514 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3515 if (!is_simm32(disp)) return false;
3516 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3517 if (!is_simm32(disp)) return false;
3519 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3521 // Because rip relative is a disp + address_of_next_instruction and we
3522 // don't know the value of address_of_next_instruction we apply a fudge factor
3523 // to make sure we will be ok no matter the size of the instruction we get placed into.
3524 // We don't have to fudge the checks above here because they are already worst case.
3526 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3527 // + 4 because better safe than sorry.
3528 const int fudge = 12 + 4;
3529 if (disp < 0) {
3530 disp -= fudge;
3531 } else {
3532 disp += fudge;
3533 }
3534 return is_simm32(disp);
3535 }
3537 void Assembler::emit_data64(jlong data,
3538 relocInfo::relocType rtype,
3539 int format) {
3540 if (rtype == relocInfo::none) {
3541 emit_long64(data);
3542 } else {
3543 emit_data64(data, Relocation::spec_simple(rtype), format);
3544 }
3545 }
3547 void Assembler::emit_data64(jlong data,
3548 RelocationHolder const& rspec,
3549 int format) {
3550 assert(imm_operand == 0, "default format must be immediate in this file");
3551 assert(imm_operand == format, "must be immediate");
3552 assert(inst_mark() != NULL, "must be inside InstructionMark");
3553 // Do not use AbstractAssembler::relocate, which is not intended for
3554 // embedded words. Instead, relocate to the enclosing instruction.
3555 code_section()->relocate(inst_mark(), rspec, format);
3556 #ifdef ASSERT
3557 check_relocation(rspec, format);
3558 #endif
3559 emit_long64(data);
3560 }
3562 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3563 if (reg_enc >= 8) {
3564 prefix(REX_B);
3565 reg_enc -= 8;
3566 } else if (byteinst && reg_enc >= 4) {
3567 prefix(REX);
3568 }
3569 return reg_enc;
3570 }
3572 int Assembler::prefixq_and_encode(int reg_enc) {
3573 if (reg_enc < 8) {
3574 prefix(REX_W);
3575 } else {
3576 prefix(REX_WB);
3577 reg_enc -= 8;
3578 }
3579 return reg_enc;
3580 }
3582 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3583 if (dst_enc < 8) {
3584 if (src_enc >= 8) {
3585 prefix(REX_B);
3586 src_enc -= 8;
3587 } else if (byteinst && src_enc >= 4) {
3588 prefix(REX);
3589 }
3590 } else {
3591 if (src_enc < 8) {
3592 prefix(REX_R);
3593 } else {
3594 prefix(REX_RB);
3595 src_enc -= 8;
3596 }
3597 dst_enc -= 8;
3598 }
3599 return dst_enc << 3 | src_enc;
3600 }
3602 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3603 if (dst_enc < 8) {
3604 if (src_enc < 8) {
3605 prefix(REX_W);
3606 } else {
3607 prefix(REX_WB);
3608 src_enc -= 8;
3609 }
3610 } else {
3611 if (src_enc < 8) {
3612 prefix(REX_WR);
3613 } else {
3614 prefix(REX_WRB);
3615 src_enc -= 8;
3616 }
3617 dst_enc -= 8;
3618 }
3619 return dst_enc << 3 | src_enc;
3620 }
3622 void Assembler::prefix(Register reg) {
3623 if (reg->encoding() >= 8) {
3624 prefix(REX_B);
3625 }
3626 }
3628 void Assembler::prefix(Address adr) {
3629 if (adr.base_needs_rex()) {
3630 if (adr.index_needs_rex()) {
3631 prefix(REX_XB);
3632 } else {
3633 prefix(REX_B);
3634 }
3635 } else {
3636 if (adr.index_needs_rex()) {
3637 prefix(REX_X);
3638 }
3639 }
3640 }
3642 void Assembler::prefixq(Address adr) {
3643 if (adr.base_needs_rex()) {
3644 if (adr.index_needs_rex()) {
3645 prefix(REX_WXB);
3646 } else {
3647 prefix(REX_WB);
3648 }
3649 } else {
3650 if (adr.index_needs_rex()) {
3651 prefix(REX_WX);
3652 } else {
3653 prefix(REX_W);
3654 }
3655 }
3656 }
3659 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3660 if (reg->encoding() < 8) {
3661 if (adr.base_needs_rex()) {
3662 if (adr.index_needs_rex()) {
3663 prefix(REX_XB);
3664 } else {
3665 prefix(REX_B);
3666 }
3667 } else {
3668 if (adr.index_needs_rex()) {
3669 prefix(REX_X);
3670 } else if (reg->encoding() >= 4 ) {
3671 prefix(REX);
3672 }
3673 }
3674 } else {
3675 if (adr.base_needs_rex()) {
3676 if (adr.index_needs_rex()) {
3677 prefix(REX_RXB);
3678 } else {
3679 prefix(REX_RB);
3680 }
3681 } else {
3682 if (adr.index_needs_rex()) {
3683 prefix(REX_RX);
3684 } else {
3685 prefix(REX_R);
3686 }
3687 }
3688 }
3689 }
3691 void Assembler::prefixq(Address adr, Register src) {
3692 if (src->encoding() < 8) {
3693 if (adr.base_needs_rex()) {
3694 if (adr.index_needs_rex()) {
3695 prefix(REX_WXB);
3696 } else {
3697 prefix(REX_WB);
3698 }
3699 } else {
3700 if (adr.index_needs_rex()) {
3701 prefix(REX_WX);
3702 } else {
3703 prefix(REX_W);
3704 }
3705 }
3706 } else {
3707 if (adr.base_needs_rex()) {
3708 if (adr.index_needs_rex()) {
3709 prefix(REX_WRXB);
3710 } else {
3711 prefix(REX_WRB);
3712 }
3713 } else {
3714 if (adr.index_needs_rex()) {
3715 prefix(REX_WRX);
3716 } else {
3717 prefix(REX_WR);
3718 }
3719 }
3720 }
3721 }
3723 void Assembler::prefix(Address adr, XMMRegister reg) {
3724 if (reg->encoding() < 8) {
3725 if (adr.base_needs_rex()) {
3726 if (adr.index_needs_rex()) {
3727 prefix(REX_XB);
3728 } else {
3729 prefix(REX_B);
3730 }
3731 } else {
3732 if (adr.index_needs_rex()) {
3733 prefix(REX_X);
3734 }
3735 }
3736 } else {
3737 if (adr.base_needs_rex()) {
3738 if (adr.index_needs_rex()) {
3739 prefix(REX_RXB);
3740 } else {
3741 prefix(REX_RB);
3742 }
3743 } else {
3744 if (adr.index_needs_rex()) {
3745 prefix(REX_RX);
3746 } else {
3747 prefix(REX_R);
3748 }
3749 }
3750 }
3751 }
3753 void Assembler::adcq(Register dst, int32_t imm32) {
3754 (void) prefixq_and_encode(dst->encoding());
3755 emit_arith(0x81, 0xD0, dst, imm32);
3756 }
3758 void Assembler::adcq(Register dst, Address src) {
3759 InstructionMark im(this);
3760 prefixq(src, dst);
3761 emit_byte(0x13);
3762 emit_operand(dst, src);
3763 }
3765 void Assembler::adcq(Register dst, Register src) {
3766 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3767 emit_arith(0x13, 0xC0, dst, src);
3768 }
3770 void Assembler::addq(Address dst, int32_t imm32) {
3771 InstructionMark im(this);
3772 prefixq(dst);
3773 emit_arith_operand(0x81, rax, dst,imm32);
3774 }
3776 void Assembler::addq(Address dst, Register src) {
3777 InstructionMark im(this);
3778 prefixq(dst, src);
3779 emit_byte(0x01);
3780 emit_operand(src, dst);
3781 }
3783 void Assembler::addq(Register dst, int32_t imm32) {
3784 (void) prefixq_and_encode(dst->encoding());
3785 emit_arith(0x81, 0xC0, dst, imm32);
3786 }
3788 void Assembler::addq(Register dst, Address src) {
3789 InstructionMark im(this);
3790 prefixq(src, dst);
3791 emit_byte(0x03);
3792 emit_operand(dst, src);
3793 }
3795 void Assembler::addq(Register dst, Register src) {
3796 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3797 emit_arith(0x03, 0xC0, dst, src);
3798 }
3800 void Assembler::andq(Register dst, int32_t imm32) {
3801 (void) prefixq_and_encode(dst->encoding());
3802 emit_arith(0x81, 0xE0, dst, imm32);
3803 }
3805 void Assembler::andq(Register dst, Address src) {
3806 InstructionMark im(this);
3807 prefixq(src, dst);
3808 emit_byte(0x23);
3809 emit_operand(dst, src);
3810 }
3812 void Assembler::andq(Register dst, Register src) {
3813 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3814 emit_arith(0x23, 0xC0, dst, src);
3815 }
3817 void Assembler::bsfq(Register dst, Register src) {
3818 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3819 emit_byte(0x0F);
3820 emit_byte(0xBC);
3821 emit_byte(0xC0 | encode);
3822 }
3824 void Assembler::bsrq(Register dst, Register src) {
3825 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
3826 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3827 emit_byte(0x0F);
3828 emit_byte(0xBD);
3829 emit_byte(0xC0 | encode);
3830 }
3832 void Assembler::bswapq(Register reg) {
3833 int encode = prefixq_and_encode(reg->encoding());
3834 emit_byte(0x0F);
3835 emit_byte(0xC8 | encode);
3836 }
3838 void Assembler::cdqq() {
3839 prefix(REX_W);
3840 emit_byte(0x99);
3841 }
3843 void Assembler::clflush(Address adr) {
3844 prefix(adr);
3845 emit_byte(0x0F);
3846 emit_byte(0xAE);
3847 emit_operand(rdi, adr);
3848 }
3850 void Assembler::cmovq(Condition cc, Register dst, Register src) {
3851 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3852 emit_byte(0x0F);
3853 emit_byte(0x40 | cc);
3854 emit_byte(0xC0 | encode);
3855 }
3857 void Assembler::cmovq(Condition cc, Register dst, Address src) {
3858 InstructionMark im(this);
3859 prefixq(src, dst);
3860 emit_byte(0x0F);
3861 emit_byte(0x40 | cc);
3862 emit_operand(dst, src);
3863 }
3865 void Assembler::cmpq(Address dst, int32_t imm32) {
3866 InstructionMark im(this);
3867 prefixq(dst);
3868 emit_byte(0x81);
3869 emit_operand(rdi, dst, 4);
3870 emit_long(imm32);
3871 }
3873 void Assembler::cmpq(Register dst, int32_t imm32) {
3874 (void) prefixq_and_encode(dst->encoding());
3875 emit_arith(0x81, 0xF8, dst, imm32);
3876 }
3878 void Assembler::cmpq(Address dst, Register src) {
3879 InstructionMark im(this);
3880 prefixq(dst, src);
3881 emit_byte(0x3B);
3882 emit_operand(src, dst);
3883 }
3885 void Assembler::cmpq(Register dst, Register src) {
3886 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3887 emit_arith(0x3B, 0xC0, dst, src);
3888 }
3890 void Assembler::cmpq(Register dst, Address src) {
3891 InstructionMark im(this);
3892 prefixq(src, dst);
3893 emit_byte(0x3B);
3894 emit_operand(dst, src);
3895 }
3897 void Assembler::cmpxchgq(Register reg, Address adr) {
3898 InstructionMark im(this);
3899 prefixq(adr, reg);
3900 emit_byte(0x0F);
3901 emit_byte(0xB1);
3902 emit_operand(reg, adr);
3903 }
3905 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3906 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3907 emit_byte(0xF2);
3908 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3909 emit_byte(0x0F);
3910 emit_byte(0x2A);
3911 emit_byte(0xC0 | encode);
3912 }
3914 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3915 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3916 emit_byte(0xF3);
3917 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3918 emit_byte(0x0F);
3919 emit_byte(0x2A);
3920 emit_byte(0xC0 | encode);
3921 }
3923 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3924 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3925 emit_byte(0xF2);
3926 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3927 emit_byte(0x0F);
3928 emit_byte(0x2C);
3929 emit_byte(0xC0 | encode);
3930 }
3932 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3933 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3934 emit_byte(0xF3);
3935 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3936 emit_byte(0x0F);
3937 emit_byte(0x2C);
3938 emit_byte(0xC0 | encode);
3939 }
3941 void Assembler::decl(Register dst) {
3942 // Don't use it directly. Use MacroAssembler::decrementl() instead.
3943 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3944 int encode = prefix_and_encode(dst->encoding());
3945 emit_byte(0xFF);
3946 emit_byte(0xC8 | encode);
3947 }
3949 void Assembler::decq(Register dst) {
3950 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3951 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3952 int encode = prefixq_and_encode(dst->encoding());
3953 emit_byte(0xFF);
3954 emit_byte(0xC8 | encode);
3955 }
3957 void Assembler::decq(Address dst) {
3958 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3959 InstructionMark im(this);
3960 prefixq(dst);
3961 emit_byte(0xFF);
3962 emit_operand(rcx, dst);
3963 }
3965 void Assembler::fxrstor(Address src) {
3966 prefixq(src);
3967 emit_byte(0x0F);
3968 emit_byte(0xAE);
3969 emit_operand(as_Register(1), src);
3970 }
3972 void Assembler::fxsave(Address dst) {
3973 prefixq(dst);
3974 emit_byte(0x0F);
3975 emit_byte(0xAE);
3976 emit_operand(as_Register(0), dst);
3977 }
3979 void Assembler::idivq(Register src) {
3980 int encode = prefixq_and_encode(src->encoding());
3981 emit_byte(0xF7);
3982 emit_byte(0xF8 | encode);
3983 }
3985 void Assembler::imulq(Register dst, Register src) {
3986 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3987 emit_byte(0x0F);
3988 emit_byte(0xAF);
3989 emit_byte(0xC0 | encode);
3990 }
3992 void Assembler::imulq(Register dst, Register src, int value) {
3993 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3994 if (is8bit(value)) {
3995 emit_byte(0x6B);
3996 emit_byte(0xC0 | encode);
3997 emit_byte(value & 0xFF);
3998 } else {
3999 emit_byte(0x69);
4000 emit_byte(0xC0 | encode);
4001 emit_long(value);
4002 }
4003 }
4005 void Assembler::incl(Register dst) {
4006 // Don't use it directly. Use MacroAssembler::incrementl() instead.
4007 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4008 int encode = prefix_and_encode(dst->encoding());
4009 emit_byte(0xFF);
4010 emit_byte(0xC0 | encode);
4011 }
4013 void Assembler::incq(Register dst) {
4014 // Don't use it directly. Use MacroAssembler::incrementq() instead.
4015 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4016 int encode = prefixq_and_encode(dst->encoding());
4017 emit_byte(0xFF);
4018 emit_byte(0xC0 | encode);
4019 }
4021 void Assembler::incq(Address dst) {
4022 // Don't use it directly. Use MacroAssembler::incrementq() instead.
4023 InstructionMark im(this);
4024 prefixq(dst);
4025 emit_byte(0xFF);
4026 emit_operand(rax, dst);
4027 }
4029 void Assembler::lea(Register dst, Address src) {
4030 leaq(dst, src);
4031 }
4033 void Assembler::leaq(Register dst, Address src) {
4034 InstructionMark im(this);
4035 prefixq(src, dst);
4036 emit_byte(0x8D);
4037 emit_operand(dst, src);
4038 }
4040 void Assembler::mov64(Register dst, int64_t imm64) {
4041 InstructionMark im(this);
4042 int encode = prefixq_and_encode(dst->encoding());
4043 emit_byte(0xB8 | encode);
4044 emit_long64(imm64);
4045 }
4047 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
4048 InstructionMark im(this);
4049 int encode = prefixq_and_encode(dst->encoding());
4050 emit_byte(0xB8 | encode);
4051 emit_data64(imm64, rspec);
4052 }
4054 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4055 InstructionMark im(this);
4056 int encode = prefix_and_encode(dst->encoding());
4057 emit_byte(0xB8 | encode);
4058 emit_data((int)imm32, rspec, narrow_oop_operand);
4059 }
4061 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) {
4062 InstructionMark im(this);
4063 prefix(dst);
4064 emit_byte(0xC7);
4065 emit_operand(rax, dst, 4);
4066 emit_data((int)imm32, rspec, narrow_oop_operand);
4067 }
4069 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
4070 InstructionMark im(this);
4071 int encode = prefix_and_encode(src1->encoding());
4072 emit_byte(0x81);
4073 emit_byte(0xF8 | encode);
4074 emit_data((int)imm32, rspec, narrow_oop_operand);
4075 }
4077 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
4078 InstructionMark im(this);
4079 prefix(src1);
4080 emit_byte(0x81);
4081 emit_operand(rax, src1, 4);
4082 emit_data((int)imm32, rspec, narrow_oop_operand);
4083 }
4085 void Assembler::lzcntq(Register dst, Register src) {
4086 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
4087 emit_byte(0xF3);
4088 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4089 emit_byte(0x0F);
4090 emit_byte(0xBD);
4091 emit_byte(0xC0 | encode);
4092 }
4094 void Assembler::movdq(XMMRegister dst, Register src) {
4095 // table D-1 says MMX/SSE2
4096 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4097 emit_byte(0x66);
4098 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4099 emit_byte(0x0F);
4100 emit_byte(0x6E);
4101 emit_byte(0xC0 | encode);
4102 }
4104 void Assembler::movdq(Register dst, XMMRegister src) {
4105 // table D-1 says MMX/SSE2
4106 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4107 emit_byte(0x66);
4108 // swap src/dst to get correct prefix
4109 int encode = prefixq_and_encode(src->encoding(), dst->encoding());
4110 emit_byte(0x0F);
4111 emit_byte(0x7E);
4112 emit_byte(0xC0 | encode);
4113 }
4115 void Assembler::movq(Register dst, Register src) {
4116 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4117 emit_byte(0x8B);
4118 emit_byte(0xC0 | encode);
4119 }
4121 void Assembler::movq(Register dst, Address src) {
4122 InstructionMark im(this);
4123 prefixq(src, dst);
4124 emit_byte(0x8B);
4125 emit_operand(dst, src);
4126 }
4128 void Assembler::movq(Address dst, Register src) {
4129 InstructionMark im(this);
4130 prefixq(dst, src);
4131 emit_byte(0x89);
4132 emit_operand(src, dst);
4133 }
4135 void Assembler::movsbq(Register dst, Address src) {
4136 InstructionMark im(this);
4137 prefixq(src, dst);
4138 emit_byte(0x0F);
4139 emit_byte(0xBE);
4140 emit_operand(dst, src);
4141 }
4143 void Assembler::movsbq(Register dst, Register src) {
4144 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4145 emit_byte(0x0F);
4146 emit_byte(0xBE);
4147 emit_byte(0xC0 | encode);
4148 }
4150 void Assembler::movslq(Register dst, int32_t imm32) {
4151 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx)
4152 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx)
4153 // as a result we shouldn't use until tested at runtime...
4154 ShouldNotReachHere();
4155 InstructionMark im(this);
4156 int encode = prefixq_and_encode(dst->encoding());
4157 emit_byte(0xC7 | encode);
4158 emit_long(imm32);
4159 }
4161 void Assembler::movslq(Address dst, int32_t imm32) {
4162 assert(is_simm32(imm32), "lost bits");
4163 InstructionMark im(this);
4164 prefixq(dst);
4165 emit_byte(0xC7);
4166 emit_operand(rax, dst, 4);
4167 emit_long(imm32);
4168 }
4170 void Assembler::movslq(Register dst, Address src) {
4171 InstructionMark im(this);
4172 prefixq(src, dst);
4173 emit_byte(0x63);
4174 emit_operand(dst, src);
4175 }
4177 void Assembler::movslq(Register dst, Register src) {
4178 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4179 emit_byte(0x63);
4180 emit_byte(0xC0 | encode);
4181 }
4183 void Assembler::movswq(Register dst, Address src) {
4184 InstructionMark im(this);
4185 prefixq(src, dst);
4186 emit_byte(0x0F);
4187 emit_byte(0xBF);
4188 emit_operand(dst, src);
4189 }
4191 void Assembler::movswq(Register dst, Register src) {
4192 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4193 emit_byte(0x0F);
4194 emit_byte(0xBF);
4195 emit_byte(0xC0 | encode);
4196 }
4198 void Assembler::movzbq(Register dst, Address src) {
4199 InstructionMark im(this);
4200 prefixq(src, dst);
4201 emit_byte(0x0F);
4202 emit_byte(0xB6);
4203 emit_operand(dst, src);
4204 }
4206 void Assembler::movzbq(Register dst, Register src) {
4207 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4208 emit_byte(0x0F);
4209 emit_byte(0xB6);
4210 emit_byte(0xC0 | encode);
4211 }
4213 void Assembler::movzwq(Register dst, Address src) {
4214 InstructionMark im(this);
4215 prefixq(src, dst);
4216 emit_byte(0x0F);
4217 emit_byte(0xB7);
4218 emit_operand(dst, src);
4219 }
4221 void Assembler::movzwq(Register dst, Register src) {
4222 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4223 emit_byte(0x0F);
4224 emit_byte(0xB7);
4225 emit_byte(0xC0 | encode);
4226 }
4228 void Assembler::negq(Register dst) {
4229 int encode = prefixq_and_encode(dst->encoding());
4230 emit_byte(0xF7);
4231 emit_byte(0xD8 | encode);
4232 }
4234 void Assembler::notq(Register dst) {
4235 int encode = prefixq_and_encode(dst->encoding());
4236 emit_byte(0xF7);
4237 emit_byte(0xD0 | encode);
4238 }
4240 void Assembler::orq(Address dst, int32_t imm32) {
4241 InstructionMark im(this);
4242 prefixq(dst);
4243 emit_byte(0x81);
4244 emit_operand(rcx, dst, 4);
4245 emit_long(imm32);
4246 }
4248 void Assembler::orq(Register dst, int32_t imm32) {
4249 (void) prefixq_and_encode(dst->encoding());
4250 emit_arith(0x81, 0xC8, dst, imm32);
4251 }
4253 void Assembler::orq(Register dst, Address src) {
4254 InstructionMark im(this);
4255 prefixq(src, dst);
4256 emit_byte(0x0B);
4257 emit_operand(dst, src);
4258 }
4260 void Assembler::orq(Register dst, Register src) {
4261 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4262 emit_arith(0x0B, 0xC0, dst, src);
4263 }
4265 void Assembler::popa() { // 64bit
4266 movq(r15, Address(rsp, 0));
4267 movq(r14, Address(rsp, wordSize));
4268 movq(r13, Address(rsp, 2 * wordSize));
4269 movq(r12, Address(rsp, 3 * wordSize));
4270 movq(r11, Address(rsp, 4 * wordSize));
4271 movq(r10, Address(rsp, 5 * wordSize));
4272 movq(r9, Address(rsp, 6 * wordSize));
4273 movq(r8, Address(rsp, 7 * wordSize));
4274 movq(rdi, Address(rsp, 8 * wordSize));
4275 movq(rsi, Address(rsp, 9 * wordSize));
4276 movq(rbp, Address(rsp, 10 * wordSize));
4277 // skip rsp
4278 movq(rbx, Address(rsp, 12 * wordSize));
4279 movq(rdx, Address(rsp, 13 * wordSize));
4280 movq(rcx, Address(rsp, 14 * wordSize));
4281 movq(rax, Address(rsp, 15 * wordSize));
4283 addq(rsp, 16 * wordSize);
4284 }
4286 void Assembler::popcntq(Register dst, Address src) {
4287 assert(VM_Version::supports_popcnt(), "must support");
4288 InstructionMark im(this);
4289 emit_byte(0xF3);
4290 prefixq(src, dst);
4291 emit_byte(0x0F);
4292 emit_byte(0xB8);
4293 emit_operand(dst, src);
4294 }
4296 void Assembler::popcntq(Register dst, Register src) {
4297 assert(VM_Version::supports_popcnt(), "must support");
4298 emit_byte(0xF3);
4299 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4300 emit_byte(0x0F);
4301 emit_byte(0xB8);
4302 emit_byte(0xC0 | encode);
4303 }
4305 void Assembler::popq(Address dst) {
4306 InstructionMark im(this);
4307 prefixq(dst);
4308 emit_byte(0x8F);
4309 emit_operand(rax, dst);
4310 }
4312 void Assembler::pusha() { // 64bit
4313 // we have to store original rsp. ABI says that 128 bytes
4314 // below rsp are local scratch.
4315 movq(Address(rsp, -5 * wordSize), rsp);
4317 subq(rsp, 16 * wordSize);
4319 movq(Address(rsp, 15 * wordSize), rax);
4320 movq(Address(rsp, 14 * wordSize), rcx);
4321 movq(Address(rsp, 13 * wordSize), rdx);
4322 movq(Address(rsp, 12 * wordSize), rbx);
4323 // skip rsp
4324 movq(Address(rsp, 10 * wordSize), rbp);
4325 movq(Address(rsp, 9 * wordSize), rsi);
4326 movq(Address(rsp, 8 * wordSize), rdi);
4327 movq(Address(rsp, 7 * wordSize), r8);
4328 movq(Address(rsp, 6 * wordSize), r9);
4329 movq(Address(rsp, 5 * wordSize), r10);
4330 movq(Address(rsp, 4 * wordSize), r11);
4331 movq(Address(rsp, 3 * wordSize), r12);
4332 movq(Address(rsp, 2 * wordSize), r13);
4333 movq(Address(rsp, wordSize), r14);
4334 movq(Address(rsp, 0), r15);
4335 }
4337 void Assembler::pushq(Address src) {
4338 InstructionMark im(this);
4339 prefixq(src);
4340 emit_byte(0xFF);
4341 emit_operand(rsi, src);
4342 }
4344 void Assembler::rclq(Register dst, int imm8) {
4345 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4346 int encode = prefixq_and_encode(dst->encoding());
4347 if (imm8 == 1) {
4348 emit_byte(0xD1);
4349 emit_byte(0xD0 | encode);
4350 } else {
4351 emit_byte(0xC1);
4352 emit_byte(0xD0 | encode);
4353 emit_byte(imm8);
4354 }
4355 }
4356 void Assembler::sarq(Register dst, int imm8) {
4357 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4358 int encode = prefixq_and_encode(dst->encoding());
4359 if (imm8 == 1) {
4360 emit_byte(0xD1);
4361 emit_byte(0xF8 | encode);
4362 } else {
4363 emit_byte(0xC1);
4364 emit_byte(0xF8 | encode);
4365 emit_byte(imm8);
4366 }
4367 }
4369 void Assembler::sarq(Register dst) {
4370 int encode = prefixq_and_encode(dst->encoding());
4371 emit_byte(0xD3);
4372 emit_byte(0xF8 | encode);
4373 }
4375 void Assembler::sbbq(Address dst, int32_t imm32) {
4376 InstructionMark im(this);
4377 prefixq(dst);
4378 emit_arith_operand(0x81, rbx, dst, imm32);
4379 }
4381 void Assembler::sbbq(Register dst, int32_t imm32) {
4382 (void) prefixq_and_encode(dst->encoding());
4383 emit_arith(0x81, 0xD8, dst, imm32);
4384 }
4386 void Assembler::sbbq(Register dst, Address src) {
4387 InstructionMark im(this);
4388 prefixq(src, dst);
4389 emit_byte(0x1B);
4390 emit_operand(dst, src);
4391 }
4393 void Assembler::sbbq(Register dst, Register src) {
4394 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4395 emit_arith(0x1B, 0xC0, dst, src);
4396 }
4398 void Assembler::shlq(Register dst, int imm8) {
4399 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4400 int encode = prefixq_and_encode(dst->encoding());
4401 if (imm8 == 1) {
4402 emit_byte(0xD1);
4403 emit_byte(0xE0 | encode);
4404 } else {
4405 emit_byte(0xC1);
4406 emit_byte(0xE0 | encode);
4407 emit_byte(imm8);
4408 }
4409 }
4411 void Assembler::shlq(Register dst) {
4412 int encode = prefixq_and_encode(dst->encoding());
4413 emit_byte(0xD3);
4414 emit_byte(0xE0 | encode);
4415 }
4417 void Assembler::shrq(Register dst, int imm8) {
4418 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4419 int encode = prefixq_and_encode(dst->encoding());
4420 emit_byte(0xC1);
4421 emit_byte(0xE8 | encode);
4422 emit_byte(imm8);
4423 }
4425 void Assembler::shrq(Register dst) {
4426 int encode = prefixq_and_encode(dst->encoding());
4427 emit_byte(0xD3);
4428 emit_byte(0xE8 | encode);
4429 }
4431 void Assembler::subq(Address dst, int32_t imm32) {
4432 InstructionMark im(this);
4433 prefixq(dst);
4434 emit_arith_operand(0x81, rbp, dst, imm32);
4435 }
4437 void Assembler::subq(Address dst, Register src) {
4438 InstructionMark im(this);
4439 prefixq(dst, src);
4440 emit_byte(0x29);
4441 emit_operand(src, dst);
4442 }
4444 void Assembler::subq(Register dst, int32_t imm32) {
4445 (void) prefixq_and_encode(dst->encoding());
4446 emit_arith(0x81, 0xE8, dst, imm32);
4447 }
4449 void Assembler::subq(Register dst, Address src) {
4450 InstructionMark im(this);
4451 prefixq(src, dst);
4452 emit_byte(0x2B);
4453 emit_operand(dst, src);
4454 }
4456 void Assembler::subq(Register dst, Register src) {
4457 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4458 emit_arith(0x2B, 0xC0, dst, src);
4459 }
4461 void Assembler::testq(Register dst, int32_t imm32) {
4462 // not using emit_arith because test
4463 // doesn't support sign-extension of
4464 // 8bit operands
4465 int encode = dst->encoding();
4466 if (encode == 0) {
4467 prefix(REX_W);
4468 emit_byte(0xA9);
4469 } else {
4470 encode = prefixq_and_encode(encode);
4471 emit_byte(0xF7);
4472 emit_byte(0xC0 | encode);
4473 }
4474 emit_long(imm32);
4475 }
4477 void Assembler::testq(Register dst, Register src) {
4478 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4479 emit_arith(0x85, 0xC0, dst, src);
4480 }
4482 void Assembler::xaddq(Address dst, Register src) {
4483 InstructionMark im(this);
4484 prefixq(dst, src);
4485 emit_byte(0x0F);
4486 emit_byte(0xC1);
4487 emit_operand(src, dst);
4488 }
4490 void Assembler::xchgq(Register dst, Address src) {
4491 InstructionMark im(this);
4492 prefixq(src, dst);
4493 emit_byte(0x87);
4494 emit_operand(dst, src);
4495 }
4497 void Assembler::xchgq(Register dst, Register src) {
4498 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4499 emit_byte(0x87);
4500 emit_byte(0xc0 | encode);
4501 }
4503 void Assembler::xorq(Register dst, Register src) {
4504 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4505 emit_arith(0x33, 0xC0, dst, src);
4506 }
4508 void Assembler::xorq(Register dst, Address src) {
4509 InstructionMark im(this);
4510 prefixq(src, dst);
4511 emit_byte(0x33);
4512 emit_operand(dst, src);
4513 }
4515 #endif // !LP64
4517 static Assembler::Condition reverse[] = {
4518 Assembler::noOverflow /* overflow = 0x0 */ ,
4519 Assembler::overflow /* noOverflow = 0x1 */ ,
4520 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
4521 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
4522 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
4523 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
4524 Assembler::above /* belowEqual = 0x6 */ ,
4525 Assembler::belowEqual /* above = 0x7 */ ,
4526 Assembler::positive /* negative = 0x8 */ ,
4527 Assembler::negative /* positive = 0x9 */ ,
4528 Assembler::noParity /* parity = 0xa */ ,
4529 Assembler::parity /* noParity = 0xb */ ,
4530 Assembler::greaterEqual /* less = 0xc */ ,
4531 Assembler::less /* greaterEqual = 0xd */ ,
4532 Assembler::greater /* lessEqual = 0xe */ ,
4533 Assembler::lessEqual /* greater = 0xf, */
4535 };
4538 // Implementation of MacroAssembler
4540 // First all the versions that have distinct versions depending on 32/64 bit
4541 // Unless the difference is trivial (1 line or so).
4543 #ifndef _LP64
4545 // 32bit versions
4547 Address MacroAssembler::as_Address(AddressLiteral adr) {
4548 return Address(adr.target(), adr.rspec());
4549 }
4551 Address MacroAssembler::as_Address(ArrayAddress adr) {
4552 return Address::make_array(adr);
4553 }
4555 int MacroAssembler::biased_locking_enter(Register lock_reg,
4556 Register obj_reg,
4557 Register swap_reg,
4558 Register tmp_reg,
4559 bool swap_reg_contains_mark,
4560 Label& done,
4561 Label* slow_case,
4562 BiasedLockingCounters* counters) {
4563 assert(UseBiasedLocking, "why call this otherwise?");
4564 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4565 assert_different_registers(lock_reg, obj_reg, swap_reg);
4567 if (PrintBiasedLockingStatistics && counters == NULL)
4568 counters = BiasedLocking::counters();
4570 bool need_tmp_reg = false;
4571 if (tmp_reg == noreg) {
4572 need_tmp_reg = true;
4573 tmp_reg = lock_reg;
4574 } else {
4575 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4576 }
4577 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4578 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4579 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
4580 Address saved_mark_addr(lock_reg, 0);
4582 // Biased locking
4583 // See whether the lock is currently biased toward our thread and
4584 // whether the epoch is still valid
4585 // Note that the runtime guarantees sufficient alignment of JavaThread
4586 // pointers to allow age to be placed into low bits
4587 // First check to see whether biasing is even enabled for this object
4588 Label cas_label;
4589 int null_check_offset = -1;
4590 if (!swap_reg_contains_mark) {
4591 null_check_offset = offset();
4592 movl(swap_reg, mark_addr);
4593 }
4594 if (need_tmp_reg) {
4595 push(tmp_reg);
4596 }
4597 movl(tmp_reg, swap_reg);
4598 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4599 cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4600 if (need_tmp_reg) {
4601 pop(tmp_reg);
4602 }
4603 jcc(Assembler::notEqual, cas_label);
4604 // The bias pattern is present in the object's header. Need to check
4605 // whether the bias owner and the epoch are both still current.
4606 // Note that because there is no current thread register on x86 we
4607 // need to store off the mark word we read out of the object to
4608 // avoid reloading it and needing to recheck invariants below. This
4609 // store is unfortunate but it makes the overall code shorter and
4610 // simpler.
4611 movl(saved_mark_addr, swap_reg);
4612 if (need_tmp_reg) {
4613 push(tmp_reg);
4614 }
4615 get_thread(tmp_reg);
4616 xorl(swap_reg, tmp_reg);
4617 if (swap_reg_contains_mark) {
4618 null_check_offset = offset();
4619 }
4620 movl(tmp_reg, klass_addr);
4621 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4622 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4623 if (need_tmp_reg) {
4624 pop(tmp_reg);
4625 }
4626 if (counters != NULL) {
4627 cond_inc32(Assembler::zero,
4628 ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4629 }
4630 jcc(Assembler::equal, done);
4632 Label try_revoke_bias;
4633 Label try_rebias;
4635 // At this point we know that the header has the bias pattern and
4636 // that we are not the bias owner in the current epoch. We need to
4637 // figure out more details about the state of the header in order to
4638 // know what operations can be legally performed on the object's
4639 // header.
4641 // If the low three bits in the xor result aren't clear, that means
4642 // the prototype header is no longer biased and we have to revoke
4643 // the bias on this object.
4644 testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4645 jcc(Assembler::notZero, try_revoke_bias);
4647 // Biasing is still enabled for this data type. See whether the
4648 // epoch of the current bias is still valid, meaning that the epoch
4649 // bits of the mark word are equal to the epoch bits of the
4650 // prototype header. (Note that the prototype header's epoch bits
4651 // only change at a safepoint.) If not, attempt to rebias the object
4652 // toward the current thread. Note that we must be absolutely sure
4653 // that the current epoch is invalid in order to do this because
4654 // otherwise the manipulations it performs on the mark word are
4655 // illegal.
4656 testl(swap_reg, markOopDesc::epoch_mask_in_place);
4657 jcc(Assembler::notZero, try_rebias);
4659 // The epoch of the current bias is still valid but we know nothing
4660 // about the owner; it might be set or it might be clear. Try to
4661 // acquire the bias of the object using an atomic operation. If this
4662 // fails we will go in to the runtime to revoke the object's bias.
4663 // Note that we first construct the presumed unbiased header so we
4664 // don't accidentally blow away another thread's valid bias.
4665 movl(swap_reg, saved_mark_addr);
4666 andl(swap_reg,
4667 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4668 if (need_tmp_reg) {
4669 push(tmp_reg);
4670 }
4671 get_thread(tmp_reg);
4672 orl(tmp_reg, swap_reg);
4673 if (os::is_MP()) {
4674 lock();
4675 }
4676 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4677 if (need_tmp_reg) {
4678 pop(tmp_reg);
4679 }
4680 // If the biasing toward our thread failed, this means that
4681 // another thread succeeded in biasing it toward itself and we
4682 // need to revoke that bias. The revocation will occur in the
4683 // interpreter runtime in the slow case.
4684 if (counters != NULL) {
4685 cond_inc32(Assembler::zero,
4686 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4687 }
4688 if (slow_case != NULL) {
4689 jcc(Assembler::notZero, *slow_case);
4690 }
4691 jmp(done);
4693 bind(try_rebias);
4694 // At this point we know the epoch has expired, meaning that the
4695 // current "bias owner", if any, is actually invalid. Under these
4696 // circumstances _only_, we are allowed to use the current header's
4697 // value as the comparison value when doing the cas to acquire the
4698 // bias in the current epoch. In other words, we allow transfer of
4699 // the bias from one thread to another directly in this situation.
4700 //
4701 // FIXME: due to a lack of registers we currently blow away the age
4702 // bits in this situation. Should attempt to preserve them.
4703 if (need_tmp_reg) {
4704 push(tmp_reg);
4705 }
4706 get_thread(tmp_reg);
4707 movl(swap_reg, klass_addr);
4708 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4709 movl(swap_reg, saved_mark_addr);
4710 if (os::is_MP()) {
4711 lock();
4712 }
4713 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4714 if (need_tmp_reg) {
4715 pop(tmp_reg);
4716 }
4717 // If the biasing toward our thread failed, then another thread
4718 // succeeded in biasing it toward itself and we need to revoke that
4719 // bias. The revocation will occur in the runtime in the slow case.
4720 if (counters != NULL) {
4721 cond_inc32(Assembler::zero,
4722 ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4723 }
4724 if (slow_case != NULL) {
4725 jcc(Assembler::notZero, *slow_case);
4726 }
4727 jmp(done);
4729 bind(try_revoke_bias);
4730 // The prototype mark in the klass doesn't have the bias bit set any
4731 // more, indicating that objects of this data type are not supposed
4732 // to be biased any more. We are going to try to reset the mark of
4733 // this object to the prototype value and fall through to the
4734 // CAS-based locking scheme. Note that if our CAS fails, it means
4735 // that another thread raced us for the privilege of revoking the
4736 // bias of this particular object, so it's okay to continue in the
4737 // normal locking code.
4738 //
4739 // FIXME: due to a lack of registers we currently blow away the age
4740 // bits in this situation. Should attempt to preserve them.
4741 movl(swap_reg, saved_mark_addr);
4742 if (need_tmp_reg) {
4743 push(tmp_reg);
4744 }
4745 movl(tmp_reg, klass_addr);
4746 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4747 if (os::is_MP()) {
4748 lock();
4749 }
4750 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4751 if (need_tmp_reg) {
4752 pop(tmp_reg);
4753 }
4754 // Fall through to the normal CAS-based lock, because no matter what
4755 // the result of the above CAS, some thread must have succeeded in
4756 // removing the bias bit from the object's header.
4757 if (counters != NULL) {
4758 cond_inc32(Assembler::zero,
4759 ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4760 }
4762 bind(cas_label);
4764 return null_check_offset;
4765 }
4766 void MacroAssembler::call_VM_leaf_base(address entry_point,
4767 int number_of_arguments) {
4768 call(RuntimeAddress(entry_point));
4769 increment(rsp, number_of_arguments * wordSize);
4770 }
4772 void MacroAssembler::cmpoop(Address src1, jobject obj) {
4773 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4774 }
4776 void MacroAssembler::cmpoop(Register src1, jobject obj) {
4777 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4778 }
4780 void MacroAssembler::extend_sign(Register hi, Register lo) {
4781 // According to Intel Doc. AP-526, "Integer Divide", p.18.
4782 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4783 cdql();
4784 } else {
4785 movl(hi, lo);
4786 sarl(hi, 31);
4787 }
4788 }
4790 void MacroAssembler::fat_nop() {
4791 // A 5 byte nop that is safe for patching (see patch_verified_entry)
4792 emit_byte(0x26); // es:
4793 emit_byte(0x2e); // cs:
4794 emit_byte(0x64); // fs:
4795 emit_byte(0x65); // gs:
4796 emit_byte(0x90);
4797 }
4799 void MacroAssembler::jC2(Register tmp, Label& L) {
4800 // set parity bit if FPU flag C2 is set (via rax)
4801 save_rax(tmp);
4802 fwait(); fnstsw_ax();
4803 sahf();
4804 restore_rax(tmp);
4805 // branch
4806 jcc(Assembler::parity, L);
4807 }
4809 void MacroAssembler::jnC2(Register tmp, Label& L) {
4810 // set parity bit if FPU flag C2 is set (via rax)
4811 save_rax(tmp);
4812 fwait(); fnstsw_ax();
4813 sahf();
4814 restore_rax(tmp);
4815 // branch
4816 jcc(Assembler::noParity, L);
4817 }
4819 // 32bit can do a case table jump in one instruction but we no longer allow the base
4820 // to be installed in the Address class
4821 void MacroAssembler::jump(ArrayAddress entry) {
4822 jmp(as_Address(entry));
4823 }
4825 // Note: y_lo will be destroyed
4826 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4827 // Long compare for Java (semantics as described in JVM spec.)
4828 Label high, low, done;
4830 cmpl(x_hi, y_hi);
4831 jcc(Assembler::less, low);
4832 jcc(Assembler::greater, high);
4833 // x_hi is the return register
4834 xorl(x_hi, x_hi);
4835 cmpl(x_lo, y_lo);
4836 jcc(Assembler::below, low);
4837 jcc(Assembler::equal, done);
4839 bind(high);
4840 xorl(x_hi, x_hi);
4841 increment(x_hi);
4842 jmp(done);
4844 bind(low);
4845 xorl(x_hi, x_hi);
4846 decrementl(x_hi);
4848 bind(done);
4849 }
4851 void MacroAssembler::lea(Register dst, AddressLiteral src) {
4852 mov_literal32(dst, (int32_t)src.target(), src.rspec());
4853 }
4855 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4856 // leal(dst, as_Address(adr));
4857 // see note in movl as to why we must use a move
4858 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4859 }
4861 void MacroAssembler::leave() {
4862 mov(rsp, rbp);
4863 pop(rbp);
4864 }
4866 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4867 // Multiplication of two Java long values stored on the stack
4868 // as illustrated below. Result is in rdx:rax.
4869 //
4870 // rsp ---> [ ?? ] \ \
4871 // .... | y_rsp_offset |
4872 // [ y_lo ] / (in bytes) | x_rsp_offset
4873 // [ y_hi ] | (in bytes)
4874 // .... |
4875 // [ x_lo ] /
4876 // [ x_hi ]
4877 // ....
4878 //
4879 // Basic idea: lo(result) = lo(x_lo * y_lo)
4880 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4881 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4882 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4883 Label quick;
4884 // load x_hi, y_hi and check if quick
4885 // multiplication is possible
4886 movl(rbx, x_hi);
4887 movl(rcx, y_hi);
4888 movl(rax, rbx);
4889 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4890 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
4891 // do full multiplication
4892 // 1st step
4893 mull(y_lo); // x_hi * y_lo
4894 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
4895 // 2nd step
4896 movl(rax, x_lo);
4897 mull(rcx); // x_lo * y_hi
4898 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
4899 // 3rd step
4900 bind(quick); // note: rbx, = 0 if quick multiply!
4901 movl(rax, x_lo);
4902 mull(y_lo); // x_lo * y_lo
4903 addl(rdx, rbx); // correct hi(x_lo * y_lo)
4904 }
4906 void MacroAssembler::lneg(Register hi, Register lo) {
4907 negl(lo);
4908 adcl(hi, 0);
4909 negl(hi);
4910 }
4912 void MacroAssembler::lshl(Register hi, Register lo) {
4913 // Java shift left long support (semantics as described in JVM spec., p.305)
4914 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4915 // shift value is in rcx !
4916 assert(hi != rcx, "must not use rcx");
4917 assert(lo != rcx, "must not use rcx");
4918 const Register s = rcx; // shift count
4919 const int n = BitsPerWord;
4920 Label L;
4921 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4922 cmpl(s, n); // if (s < n)
4923 jcc(Assembler::less, L); // else (s >= n)
4924 movl(hi, lo); // x := x << n
4925 xorl(lo, lo);
4926 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4927 bind(L); // s (mod n) < n
4928 shldl(hi, lo); // x := x << s
4929 shll(lo);
4930 }
4933 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4934 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4935 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4936 assert(hi != rcx, "must not use rcx");
4937 assert(lo != rcx, "must not use rcx");
4938 const Register s = rcx; // shift count
4939 const int n = BitsPerWord;
4940 Label L;
4941 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4942 cmpl(s, n); // if (s < n)
4943 jcc(Assembler::less, L); // else (s >= n)
4944 movl(lo, hi); // x := x >> n
4945 if (sign_extension) sarl(hi, 31);
4946 else xorl(hi, hi);
4947 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4948 bind(L); // s (mod n) < n
4949 shrdl(lo, hi); // x := x >> s
4950 if (sign_extension) sarl(hi);
4951 else shrl(hi);
4952 }
4954 void MacroAssembler::movoop(Register dst, jobject obj) {
4955 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4956 }
4958 void MacroAssembler::movoop(Address dst, jobject obj) {
4959 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4960 }
4962 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4963 if (src.is_lval()) {
4964 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4965 } else {
4966 movl(dst, as_Address(src));
4967 }
4968 }
4970 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4971 movl(as_Address(dst), src);
4972 }
4974 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4975 movl(dst, as_Address(src));
4976 }
4978 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
4979 void MacroAssembler::movptr(Address dst, intptr_t src) {
4980 movl(dst, src);
4981 }
4984 void MacroAssembler::pop_callee_saved_registers() {
4985 pop(rcx);
4986 pop(rdx);
4987 pop(rdi);
4988 pop(rsi);
4989 }
4991 void MacroAssembler::pop_fTOS() {
4992 fld_d(Address(rsp, 0));
4993 addl(rsp, 2 * wordSize);
4994 }
4996 void MacroAssembler::push_callee_saved_registers() {
4997 push(rsi);
4998 push(rdi);
4999 push(rdx);
5000 push(rcx);
5001 }
5003 void MacroAssembler::push_fTOS() {
5004 subl(rsp, 2 * wordSize);
5005 fstp_d(Address(rsp, 0));
5006 }
5009 void MacroAssembler::pushoop(jobject obj) {
5010 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
5011 }
5014 void MacroAssembler::pushptr(AddressLiteral src) {
5015 if (src.is_lval()) {
5016 push_literal32((int32_t)src.target(), src.rspec());
5017 } else {
5018 pushl(as_Address(src));
5019 }
5020 }
5022 void MacroAssembler::set_word_if_not_zero(Register dst) {
5023 xorl(dst, dst);
5024 set_byte_if_not_zero(dst);
5025 }
5027 static void pass_arg0(MacroAssembler* masm, Register arg) {
5028 masm->push(arg);
5029 }
5031 static void pass_arg1(MacroAssembler* masm, Register arg) {
5032 masm->push(arg);
5033 }
5035 static void pass_arg2(MacroAssembler* masm, Register arg) {
5036 masm->push(arg);
5037 }
5039 static void pass_arg3(MacroAssembler* masm, Register arg) {
5040 masm->push(arg);
5041 }
5043 #ifndef PRODUCT
5044 extern "C" void findpc(intptr_t x);
5045 #endif
5047 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
5048 // In order to get locks to work, we need to fake a in_VM state
5049 JavaThread* thread = JavaThread::current();
5050 JavaThreadState saved_state = thread->thread_state();
5051 thread->set_thread_state(_thread_in_vm);
5052 if (ShowMessageBoxOnError) {
5053 JavaThread* thread = JavaThread::current();
5054 JavaThreadState saved_state = thread->thread_state();
5055 thread->set_thread_state(_thread_in_vm);
5056 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5057 ttyLocker ttyl;
5058 BytecodeCounter::print();
5059 }
5060 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5061 // This is the value of eip which points to where verify_oop will return.
5062 if (os::message_box(msg, "Execution stopped, print registers?")) {
5063 ttyLocker ttyl;
5064 tty->print_cr("eip = 0x%08x", eip);
5065 #ifndef PRODUCT
5066 if ((WizardMode || Verbose) && PrintMiscellaneous) {
5067 tty->cr();
5068 findpc(eip);
5069 tty->cr();
5070 }
5071 #endif
5072 tty->print_cr("rax = 0x%08x", rax);
5073 tty->print_cr("rbx = 0x%08x", rbx);
5074 tty->print_cr("rcx = 0x%08x", rcx);
5075 tty->print_cr("rdx = 0x%08x", rdx);
5076 tty->print_cr("rdi = 0x%08x", rdi);
5077 tty->print_cr("rsi = 0x%08x", rsi);
5078 tty->print_cr("rbp = 0x%08x", rbp);
5079 tty->print_cr("rsp = 0x%08x", rsp);
5080 BREAKPOINT;
5081 assert(false, "start up GDB");
5082 }
5083 } else {
5084 ttyLocker ttyl;
5085 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5086 assert(false, "DEBUG MESSAGE");
5087 }
5088 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5089 }
5091 void MacroAssembler::stop(const char* msg) {
5092 ExternalAddress message((address)msg);
5093 // push address of message
5094 pushptr(message.addr());
5095 { Label L; call(L, relocInfo::none); bind(L); } // push eip
5096 pusha(); // push registers
5097 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5098 hlt();
5099 }
5101 void MacroAssembler::warn(const char* msg) {
5102 push_CPU_state();
5104 ExternalAddress message((address) msg);
5105 // push address of message
5106 pushptr(message.addr());
5108 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5109 addl(rsp, wordSize); // discard argument
5110 pop_CPU_state();
5111 }
5113 #else // _LP64
5115 // 64 bit versions
5117 Address MacroAssembler::as_Address(AddressLiteral adr) {
5118 // amd64 always does this as a pc-rel
5119 // we can be absolute or disp based on the instruction type
5120 // jmp/call are displacements others are absolute
5121 assert(!adr.is_lval(), "must be rval");
5122 assert(reachable(adr), "must be");
5123 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5125 }
5127 Address MacroAssembler::as_Address(ArrayAddress adr) {
5128 AddressLiteral base = adr.base();
5129 lea(rscratch1, base);
5130 Address index = adr.index();
5131 assert(index._disp == 0, "must not have disp"); // maybe it can?
5132 Address array(rscratch1, index._index, index._scale, index._disp);
5133 return array;
5134 }
5136 int MacroAssembler::biased_locking_enter(Register lock_reg,
5137 Register obj_reg,
5138 Register swap_reg,
5139 Register tmp_reg,
5140 bool swap_reg_contains_mark,
5141 Label& done,
5142 Label* slow_case,
5143 BiasedLockingCounters* counters) {
5144 assert(UseBiasedLocking, "why call this otherwise?");
5145 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5146 assert(tmp_reg != noreg, "tmp_reg must be supplied");
5147 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5148 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5149 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
5150 Address saved_mark_addr(lock_reg, 0);
5152 if (PrintBiasedLockingStatistics && counters == NULL)
5153 counters = BiasedLocking::counters();
5155 // Biased locking
5156 // See whether the lock is currently biased toward our thread and
5157 // whether the epoch is still valid
5158 // Note that the runtime guarantees sufficient alignment of JavaThread
5159 // pointers to allow age to be placed into low bits
5160 // First check to see whether biasing is even enabled for this object
5161 Label cas_label;
5162 int null_check_offset = -1;
5163 if (!swap_reg_contains_mark) {
5164 null_check_offset = offset();
5165 movq(swap_reg, mark_addr);
5166 }
5167 movq(tmp_reg, swap_reg);
5168 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5169 cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5170 jcc(Assembler::notEqual, cas_label);
5171 // The bias pattern is present in the object's header. Need to check
5172 // whether the bias owner and the epoch are both still current.
5173 load_prototype_header(tmp_reg, obj_reg);
5174 orq(tmp_reg, r15_thread);
5175 xorq(tmp_reg, swap_reg);
5176 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5177 if (counters != NULL) {
5178 cond_inc32(Assembler::zero,
5179 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5180 }
5181 jcc(Assembler::equal, done);
5183 Label try_revoke_bias;
5184 Label try_rebias;
5186 // At this point we know that the header has the bias pattern and
5187 // that we are not the bias owner in the current epoch. We need to
5188 // figure out more details about the state of the header in order to
5189 // know what operations can be legally performed on the object's
5190 // header.
5192 // If the low three bits in the xor result aren't clear, that means
5193 // the prototype header is no longer biased and we have to revoke
5194 // the bias on this object.
5195 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5196 jcc(Assembler::notZero, try_revoke_bias);
5198 // Biasing is still enabled for this data type. See whether the
5199 // epoch of the current bias is still valid, meaning that the epoch
5200 // bits of the mark word are equal to the epoch bits of the
5201 // prototype header. (Note that the prototype header's epoch bits
5202 // only change at a safepoint.) If not, attempt to rebias the object
5203 // toward the current thread. Note that we must be absolutely sure
5204 // that the current epoch is invalid in order to do this because
5205 // otherwise the manipulations it performs on the mark word are
5206 // illegal.
5207 testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5208 jcc(Assembler::notZero, try_rebias);
5210 // The epoch of the current bias is still valid but we know nothing
5211 // about the owner; it might be set or it might be clear. Try to
5212 // acquire the bias of the object using an atomic operation. If this
5213 // fails we will go in to the runtime to revoke the object's bias.
5214 // Note that we first construct the presumed unbiased header so we
5215 // don't accidentally blow away another thread's valid bias.
5216 andq(swap_reg,
5217 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5218 movq(tmp_reg, swap_reg);
5219 orq(tmp_reg, r15_thread);
5220 if (os::is_MP()) {
5221 lock();
5222 }
5223 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5224 // If the biasing toward our thread failed, this means that
5225 // another thread succeeded in biasing it toward itself and we
5226 // need to revoke that bias. The revocation will occur in the
5227 // interpreter runtime in the slow case.
5228 if (counters != NULL) {
5229 cond_inc32(Assembler::zero,
5230 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5231 }
5232 if (slow_case != NULL) {
5233 jcc(Assembler::notZero, *slow_case);
5234 }
5235 jmp(done);
5237 bind(try_rebias);
5238 // At this point we know the epoch has expired, meaning that the
5239 // current "bias owner", if any, is actually invalid. Under these
5240 // circumstances _only_, we are allowed to use the current header's
5241 // value as the comparison value when doing the cas to acquire the
5242 // bias in the current epoch. In other words, we allow transfer of
5243 // the bias from one thread to another directly in this situation.
5244 //
5245 // FIXME: due to a lack of registers we currently blow away the age
5246 // bits in this situation. Should attempt to preserve them.
5247 load_prototype_header(tmp_reg, obj_reg);
5248 orq(tmp_reg, r15_thread);
5249 if (os::is_MP()) {
5250 lock();
5251 }
5252 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5253 // If the biasing toward our thread failed, then another thread
5254 // succeeded in biasing it toward itself and we need to revoke that
5255 // bias. The revocation will occur in the runtime in the slow case.
5256 if (counters != NULL) {
5257 cond_inc32(Assembler::zero,
5258 ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5259 }
5260 if (slow_case != NULL) {
5261 jcc(Assembler::notZero, *slow_case);
5262 }
5263 jmp(done);
5265 bind(try_revoke_bias);
5266 // The prototype mark in the klass doesn't have the bias bit set any
5267 // more, indicating that objects of this data type are not supposed
5268 // to be biased any more. We are going to try to reset the mark of
5269 // this object to the prototype value and fall through to the
5270 // CAS-based locking scheme. Note that if our CAS fails, it means
5271 // that another thread raced us for the privilege of revoking the
5272 // bias of this particular object, so it's okay to continue in the
5273 // normal locking code.
5274 //
5275 // FIXME: due to a lack of registers we currently blow away the age
5276 // bits in this situation. Should attempt to preserve them.
5277 load_prototype_header(tmp_reg, obj_reg);
5278 if (os::is_MP()) {
5279 lock();
5280 }
5281 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5282 // Fall through to the normal CAS-based lock, because no matter what
5283 // the result of the above CAS, some thread must have succeeded in
5284 // removing the bias bit from the object's header.
5285 if (counters != NULL) {
5286 cond_inc32(Assembler::zero,
5287 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5288 }
5290 bind(cas_label);
5292 return null_check_offset;
5293 }
5295 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5296 Label L, E;
5298 #ifdef _WIN64
5299 // Windows always allocates space for it's register args
5300 assert(num_args <= 4, "only register arguments supported");
5301 subq(rsp, frame::arg_reg_save_area_bytes);
5302 #endif
5304 // Align stack if necessary
5305 testl(rsp, 15);
5306 jcc(Assembler::zero, L);
5308 subq(rsp, 8);
5309 {
5310 call(RuntimeAddress(entry_point));
5311 }
5312 addq(rsp, 8);
5313 jmp(E);
5315 bind(L);
5316 {
5317 call(RuntimeAddress(entry_point));
5318 }
5320 bind(E);
5322 #ifdef _WIN64
5323 // restore stack pointer
5324 addq(rsp, frame::arg_reg_save_area_bytes);
5325 #endif
5327 }
5329 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5330 assert(!src2.is_lval(), "should use cmpptr");
5332 if (reachable(src2)) {
5333 cmpq(src1, as_Address(src2));
5334 } else {
5335 lea(rscratch1, src2);
5336 Assembler::cmpq(src1, Address(rscratch1, 0));
5337 }
5338 }
5340 int MacroAssembler::corrected_idivq(Register reg) {
5341 // Full implementation of Java ldiv and lrem; checks for special
5342 // case as described in JVM spec., p.243 & p.271. The function
5343 // returns the (pc) offset of the idivl instruction - may be needed
5344 // for implicit exceptions.
5345 //
5346 // normal case special case
5347 //
5348 // input : rax: dividend min_long
5349 // reg: divisor (may not be eax/edx) -1
5350 //
5351 // output: rax: quotient (= rax idiv reg) min_long
5352 // rdx: remainder (= rax irem reg) 0
5353 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5354 static const int64_t min_long = 0x8000000000000000;
5355 Label normal_case, special_case;
5357 // check for special case
5358 cmp64(rax, ExternalAddress((address) &min_long));
5359 jcc(Assembler::notEqual, normal_case);
5360 xorl(rdx, rdx); // prepare rdx for possible special case (where
5361 // remainder = 0)
5362 cmpq(reg, -1);
5363 jcc(Assembler::equal, special_case);
5365 // handle normal case
5366 bind(normal_case);
5367 cdqq();
5368 int idivq_offset = offset();
5369 idivq(reg);
5371 // normal and special case exit
5372 bind(special_case);
5374 return idivq_offset;
5375 }
5377 void MacroAssembler::decrementq(Register reg, int value) {
5378 if (value == min_jint) { subq(reg, value); return; }
5379 if (value < 0) { incrementq(reg, -value); return; }
5380 if (value == 0) { ; return; }
5381 if (value == 1 && UseIncDec) { decq(reg) ; return; }
5382 /* else */ { subq(reg, value) ; return; }
5383 }
5385 void MacroAssembler::decrementq(Address dst, int value) {
5386 if (value == min_jint) { subq(dst, value); return; }
5387 if (value < 0) { incrementq(dst, -value); return; }
5388 if (value == 0) { ; return; }
5389 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5390 /* else */ { subq(dst, value) ; return; }
5391 }
5393 void MacroAssembler::fat_nop() {
5394 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5395 // Recommened sequence from 'Software Optimization Guide for the AMD
5396 // Hammer Processor'
5397 emit_byte(0x66);
5398 emit_byte(0x66);
5399 emit_byte(0x90);
5400 emit_byte(0x66);
5401 emit_byte(0x90);
5402 }
5404 void MacroAssembler::incrementq(Register reg, int value) {
5405 if (value == min_jint) { addq(reg, value); return; }
5406 if (value < 0) { decrementq(reg, -value); return; }
5407 if (value == 0) { ; return; }
5408 if (value == 1 && UseIncDec) { incq(reg) ; return; }
5409 /* else */ { addq(reg, value) ; return; }
5410 }
5412 void MacroAssembler::incrementq(Address dst, int value) {
5413 if (value == min_jint) { addq(dst, value); return; }
5414 if (value < 0) { decrementq(dst, -value); return; }
5415 if (value == 0) { ; return; }
5416 if (value == 1 && UseIncDec) { incq(dst) ; return; }
5417 /* else */ { addq(dst, value) ; return; }
5418 }
5420 // 32bit can do a case table jump in one instruction but we no longer allow the base
5421 // to be installed in the Address class
5422 void MacroAssembler::jump(ArrayAddress entry) {
5423 lea(rscratch1, entry.base());
5424 Address dispatch = entry.index();
5425 assert(dispatch._base == noreg, "must be");
5426 dispatch._base = rscratch1;
5427 jmp(dispatch);
5428 }
5430 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5431 ShouldNotReachHere(); // 64bit doesn't use two regs
5432 cmpq(x_lo, y_lo);
5433 }
5435 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5436 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5437 }
5439 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5440 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5441 movptr(dst, rscratch1);
5442 }
5444 void MacroAssembler::leave() {
5445 // %%% is this really better? Why not on 32bit too?
5446 emit_byte(0xC9); // LEAVE
5447 }
5449 void MacroAssembler::lneg(Register hi, Register lo) {
5450 ShouldNotReachHere(); // 64bit doesn't use two regs
5451 negq(lo);
5452 }
5454 void MacroAssembler::movoop(Register dst, jobject obj) {
5455 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5456 }
5458 void MacroAssembler::movoop(Address dst, jobject obj) {
5459 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5460 movq(dst, rscratch1);
5461 }
5463 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5464 if (src.is_lval()) {
5465 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5466 } else {
5467 if (reachable(src)) {
5468 movq(dst, as_Address(src));
5469 } else {
5470 lea(rscratch1, src);
5471 movq(dst, Address(rscratch1,0));
5472 }
5473 }
5474 }
5476 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5477 movq(as_Address(dst), src);
5478 }
5480 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5481 movq(dst, as_Address(src));
5482 }
5484 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5485 void MacroAssembler::movptr(Address dst, intptr_t src) {
5486 mov64(rscratch1, src);
5487 movq(dst, rscratch1);
5488 }
5490 // These are mostly for initializing NULL
5491 void MacroAssembler::movptr(Address dst, int32_t src) {
5492 movslq(dst, src);
5493 }
5495 void MacroAssembler::movptr(Register dst, int32_t src) {
5496 mov64(dst, (intptr_t)src);
5497 }
5499 void MacroAssembler::pushoop(jobject obj) {
5500 movoop(rscratch1, obj);
5501 push(rscratch1);
5502 }
5504 void MacroAssembler::pushptr(AddressLiteral src) {
5505 lea(rscratch1, src);
5506 if (src.is_lval()) {
5507 push(rscratch1);
5508 } else {
5509 pushq(Address(rscratch1, 0));
5510 }
5511 }
5513 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5514 bool clear_pc) {
5515 // we must set sp to zero to clear frame
5516 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5517 // must clear fp, so that compiled frames are not confused; it is
5518 // possible that we need it only for debugging
5519 if (clear_fp) {
5520 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5521 }
5523 if (clear_pc) {
5524 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5525 }
5526 }
5528 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5529 Register last_java_fp,
5530 address last_java_pc) {
5531 // determine last_java_sp register
5532 if (!last_java_sp->is_valid()) {
5533 last_java_sp = rsp;
5534 }
5536 // last_java_fp is optional
5537 if (last_java_fp->is_valid()) {
5538 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5539 last_java_fp);
5540 }
5542 // last_java_pc is optional
5543 if (last_java_pc != NULL) {
5544 Address java_pc(r15_thread,
5545 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5546 lea(rscratch1, InternalAddress(last_java_pc));
5547 movptr(java_pc, rscratch1);
5548 }
5550 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5551 }
5553 static void pass_arg0(MacroAssembler* masm, Register arg) {
5554 if (c_rarg0 != arg ) {
5555 masm->mov(c_rarg0, arg);
5556 }
5557 }
5559 static void pass_arg1(MacroAssembler* masm, Register arg) {
5560 if (c_rarg1 != arg ) {
5561 masm->mov(c_rarg1, arg);
5562 }
5563 }
5565 static void pass_arg2(MacroAssembler* masm, Register arg) {
5566 if (c_rarg2 != arg ) {
5567 masm->mov(c_rarg2, arg);
5568 }
5569 }
5571 static void pass_arg3(MacroAssembler* masm, Register arg) {
5572 if (c_rarg3 != arg ) {
5573 masm->mov(c_rarg3, arg);
5574 }
5575 }
5577 void MacroAssembler::stop(const char* msg) {
5578 address rip = pc();
5579 pusha(); // get regs on stack
5580 lea(c_rarg0, ExternalAddress((address) msg));
5581 lea(c_rarg1, InternalAddress(rip));
5582 movq(c_rarg2, rsp); // pass pointer to regs array
5583 andq(rsp, -16); // align stack as required by ABI
5584 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5585 hlt();
5586 }
5588 void MacroAssembler::warn(const char* msg) {
5589 push(rsp);
5590 andq(rsp, -16); // align stack as required by push_CPU_state and call
5592 push_CPU_state(); // keeps alignment at 16 bytes
5593 lea(c_rarg0, ExternalAddress((address) msg));
5594 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5595 pop_CPU_state();
5596 pop(rsp);
5597 }
5599 #ifndef PRODUCT
5600 extern "C" void findpc(intptr_t x);
5601 #endif
5603 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5604 // In order to get locks to work, we need to fake a in_VM state
5605 if (ShowMessageBoxOnError ) {
5606 JavaThread* thread = JavaThread::current();
5607 JavaThreadState saved_state = thread->thread_state();
5608 thread->set_thread_state(_thread_in_vm);
5609 #ifndef PRODUCT
5610 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5611 ttyLocker ttyl;
5612 BytecodeCounter::print();
5613 }
5614 #endif
5615 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5616 // XXX correct this offset for amd64
5617 // This is the value of eip which points to where verify_oop will return.
5618 if (os::message_box(msg, "Execution stopped, print registers?")) {
5619 ttyLocker ttyl;
5620 tty->print_cr("rip = 0x%016lx", pc);
5621 #ifndef PRODUCT
5622 tty->cr();
5623 findpc(pc);
5624 tty->cr();
5625 #endif
5626 tty->print_cr("rax = 0x%016lx", regs[15]);
5627 tty->print_cr("rbx = 0x%016lx", regs[12]);
5628 tty->print_cr("rcx = 0x%016lx", regs[14]);
5629 tty->print_cr("rdx = 0x%016lx", regs[13]);
5630 tty->print_cr("rdi = 0x%016lx", regs[8]);
5631 tty->print_cr("rsi = 0x%016lx", regs[9]);
5632 tty->print_cr("rbp = 0x%016lx", regs[10]);
5633 tty->print_cr("rsp = 0x%016lx", regs[11]);
5634 tty->print_cr("r8 = 0x%016lx", regs[7]);
5635 tty->print_cr("r9 = 0x%016lx", regs[6]);
5636 tty->print_cr("r10 = 0x%016lx", regs[5]);
5637 tty->print_cr("r11 = 0x%016lx", regs[4]);
5638 tty->print_cr("r12 = 0x%016lx", regs[3]);
5639 tty->print_cr("r13 = 0x%016lx", regs[2]);
5640 tty->print_cr("r14 = 0x%016lx", regs[1]);
5641 tty->print_cr("r15 = 0x%016lx", regs[0]);
5642 BREAKPOINT;
5643 }
5644 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5645 } else {
5646 ttyLocker ttyl;
5647 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5648 msg);
5649 }
5650 }
5652 #endif // _LP64
5654 // Now versions that are common to 32/64 bit
5656 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5657 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5658 }
5660 void MacroAssembler::addptr(Register dst, Register src) {
5661 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5662 }
5664 void MacroAssembler::addptr(Address dst, Register src) {
5665 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5666 }
5668 void MacroAssembler::align(int modulus) {
5669 if (offset() % modulus != 0) {
5670 nop(modulus - (offset() % modulus));
5671 }
5672 }
5674 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5675 if (reachable(src)) {
5676 andpd(dst, as_Address(src));
5677 } else {
5678 lea(rscratch1, src);
5679 andpd(dst, Address(rscratch1, 0));
5680 }
5681 }
5683 void MacroAssembler::andptr(Register dst, int32_t imm32) {
5684 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5685 }
5687 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5688 pushf();
5689 if (os::is_MP())
5690 lock();
5691 incrementl(counter_addr);
5692 popf();
5693 }
5695 // Writes to stack successive pages until offset reached to check for
5696 // stack overflow + shadow pages. This clobbers tmp.
5697 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5698 movptr(tmp, rsp);
5699 // Bang stack for total size given plus shadow page size.
5700 // Bang one page at a time because large size can bang beyond yellow and
5701 // red zones.
5702 Label loop;
5703 bind(loop);
5704 movl(Address(tmp, (-os::vm_page_size())), size );
5705 subptr(tmp, os::vm_page_size());
5706 subl(size, os::vm_page_size());
5707 jcc(Assembler::greater, loop);
5709 // Bang down shadow pages too.
5710 // The -1 because we already subtracted 1 page.
5711 for (int i = 0; i< StackShadowPages-1; i++) {
5712 // this could be any sized move but this is can be a debugging crumb
5713 // so the bigger the better.
5714 movptr(Address(tmp, (-i*os::vm_page_size())), size );
5715 }
5716 }
5718 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5719 assert(UseBiasedLocking, "why call this otherwise?");
5721 // Check for biased locking unlock case, which is a no-op
5722 // Note: we do not have to check the thread ID for two reasons.
5723 // First, the interpreter checks for IllegalMonitorStateException at
5724 // a higher level. Second, if the bias was revoked while we held the
5725 // lock, the object could not be rebiased toward another thread, so
5726 // the bias bit would be clear.
5727 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5728 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5729 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5730 jcc(Assembler::equal, done);
5731 }
5733 void MacroAssembler::c2bool(Register x) {
5734 // implements x == 0 ? 0 : 1
5735 // note: must only look at least-significant byte of x
5736 // since C-style booleans are stored in one byte
5737 // only! (was bug)
5738 andl(x, 0xFF);
5739 setb(Assembler::notZero, x);
5740 }
5742 // Wouldn't need if AddressLiteral version had new name
5743 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5744 Assembler::call(L, rtype);
5745 }
5747 void MacroAssembler::call(Register entry) {
5748 Assembler::call(entry);
5749 }
5751 void MacroAssembler::call(AddressLiteral entry) {
5752 if (reachable(entry)) {
5753 Assembler::call_literal(entry.target(), entry.rspec());
5754 } else {
5755 lea(rscratch1, entry);
5756 Assembler::call(rscratch1);
5757 }
5758 }
5760 // Implementation of call_VM versions
5762 void MacroAssembler::call_VM(Register oop_result,
5763 address entry_point,
5764 bool check_exceptions) {
5765 Label C, E;
5766 call(C, relocInfo::none);
5767 jmp(E);
5769 bind(C);
5770 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5771 ret(0);
5773 bind(E);
5774 }
5776 void MacroAssembler::call_VM(Register oop_result,
5777 address entry_point,
5778 Register arg_1,
5779 bool check_exceptions) {
5780 Label C, E;
5781 call(C, relocInfo::none);
5782 jmp(E);
5784 bind(C);
5785 pass_arg1(this, arg_1);
5786 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5787 ret(0);
5789 bind(E);
5790 }
5792 void MacroAssembler::call_VM(Register oop_result,
5793 address entry_point,
5794 Register arg_1,
5795 Register arg_2,
5796 bool check_exceptions) {
5797 Label C, E;
5798 call(C, relocInfo::none);
5799 jmp(E);
5801 bind(C);
5803 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5805 pass_arg2(this, arg_2);
5806 pass_arg1(this, arg_1);
5807 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5808 ret(0);
5810 bind(E);
5811 }
5813 void MacroAssembler::call_VM(Register oop_result,
5814 address entry_point,
5815 Register arg_1,
5816 Register arg_2,
5817 Register arg_3,
5818 bool check_exceptions) {
5819 Label C, E;
5820 call(C, relocInfo::none);
5821 jmp(E);
5823 bind(C);
5825 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5826 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5827 pass_arg3(this, arg_3);
5829 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5830 pass_arg2(this, arg_2);
5832 pass_arg1(this, arg_1);
5833 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5834 ret(0);
5836 bind(E);
5837 }
5839 void MacroAssembler::call_VM(Register oop_result,
5840 Register last_java_sp,
5841 address entry_point,
5842 int number_of_arguments,
5843 bool check_exceptions) {
5844 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5845 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5846 }
5848 void MacroAssembler::call_VM(Register oop_result,
5849 Register last_java_sp,
5850 address entry_point,
5851 Register arg_1,
5852 bool check_exceptions) {
5853 pass_arg1(this, arg_1);
5854 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5855 }
5857 void MacroAssembler::call_VM(Register oop_result,
5858 Register last_java_sp,
5859 address entry_point,
5860 Register arg_1,
5861 Register arg_2,
5862 bool check_exceptions) {
5864 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5865 pass_arg2(this, arg_2);
5866 pass_arg1(this, arg_1);
5867 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5868 }
5870 void MacroAssembler::call_VM(Register oop_result,
5871 Register last_java_sp,
5872 address entry_point,
5873 Register arg_1,
5874 Register arg_2,
5875 Register arg_3,
5876 bool check_exceptions) {
5877 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5878 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5879 pass_arg3(this, arg_3);
5880 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5881 pass_arg2(this, arg_2);
5882 pass_arg1(this, arg_1);
5883 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5884 }
5886 void MacroAssembler::call_VM_base(Register oop_result,
5887 Register java_thread,
5888 Register last_java_sp,
5889 address entry_point,
5890 int number_of_arguments,
5891 bool check_exceptions) {
5892 // determine java_thread register
5893 if (!java_thread->is_valid()) {
5894 #ifdef _LP64
5895 java_thread = r15_thread;
5896 #else
5897 java_thread = rdi;
5898 get_thread(java_thread);
5899 #endif // LP64
5900 }
5901 // determine last_java_sp register
5902 if (!last_java_sp->is_valid()) {
5903 last_java_sp = rsp;
5904 }
5905 // debugging support
5906 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
5907 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5908 #ifdef ASSERT
5909 LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");)
5910 #endif // ASSERT
5912 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
5913 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5915 // push java thread (becomes first argument of C function)
5917 NOT_LP64(push(java_thread); number_of_arguments++);
5918 LP64_ONLY(mov(c_rarg0, r15_thread));
5920 // set last Java frame before call
5921 assert(last_java_sp != rbp, "can't use ebp/rbp");
5923 // Only interpreter should have to set fp
5924 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5926 // do the call, remove parameters
5927 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5929 // restore the thread (cannot use the pushed argument since arguments
5930 // may be overwritten by C code generated by an optimizing compiler);
5931 // however can use the register value directly if it is callee saved.
5932 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5933 // rdi & rsi (also r15) are callee saved -> nothing to do
5934 #ifdef ASSERT
5935 guarantee(java_thread != rax, "change this code");
5936 push(rax);
5937 { Label L;
5938 get_thread(rax);
5939 cmpptr(java_thread, rax);
5940 jcc(Assembler::equal, L);
5941 stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5942 bind(L);
5943 }
5944 pop(rax);
5945 #endif
5946 } else {
5947 get_thread(java_thread);
5948 }
5949 // reset last Java frame
5950 // Only interpreter should have to clear fp
5951 reset_last_Java_frame(java_thread, true, false);
5953 #ifndef CC_INTERP
5954 // C++ interp handles this in the interpreter
5955 check_and_handle_popframe(java_thread);
5956 check_and_handle_earlyret(java_thread);
5957 #endif /* CC_INTERP */
5959 if (check_exceptions) {
5960 // check for pending exceptions (java_thread is set upon return)
5961 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5962 #ifndef _LP64
5963 jump_cc(Assembler::notEqual,
5964 RuntimeAddress(StubRoutines::forward_exception_entry()));
5965 #else
5966 // This used to conditionally jump to forward_exception however it is
5967 // possible if we relocate that the branch will not reach. So we must jump
5968 // around so we can always reach
5970 Label ok;
5971 jcc(Assembler::equal, ok);
5972 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5973 bind(ok);
5974 #endif // LP64
5975 }
5977 // get oop result if there is one and reset the value in the thread
5978 if (oop_result->is_valid()) {
5979 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5980 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5981 verify_oop(oop_result, "broken oop in call_VM_base");
5982 }
5983 }
5985 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5987 // Calculate the value for last_Java_sp
5988 // somewhat subtle. call_VM does an intermediate call
5989 // which places a return address on the stack just under the
5990 // stack pointer as the user finsihed with it. This allows
5991 // use to retrieve last_Java_pc from last_Java_sp[-1].
5992 // On 32bit we then have to push additional args on the stack to accomplish
5993 // the actual requested call. On 64bit call_VM only can use register args
5994 // so the only extra space is the return address that call_VM created.
5995 // This hopefully explains the calculations here.
5997 #ifdef _LP64
5998 // We've pushed one address, correct last_Java_sp
5999 lea(rax, Address(rsp, wordSize));
6000 #else
6001 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
6002 #endif // LP64
6004 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
6006 }
6008 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
6009 call_VM_leaf_base(entry_point, number_of_arguments);
6010 }
6012 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
6013 pass_arg0(this, arg_0);
6014 call_VM_leaf(entry_point, 1);
6015 }
6017 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
6019 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6020 pass_arg1(this, arg_1);
6021 pass_arg0(this, arg_0);
6022 call_VM_leaf(entry_point, 2);
6023 }
6025 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
6026 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6027 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6028 pass_arg2(this, arg_2);
6029 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6030 pass_arg1(this, arg_1);
6031 pass_arg0(this, arg_0);
6032 call_VM_leaf(entry_point, 3);
6033 }
6035 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
6036 }
6038 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
6039 }
6041 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
6042 if (reachable(src1)) {
6043 cmpl(as_Address(src1), imm);
6044 } else {
6045 lea(rscratch1, src1);
6046 cmpl(Address(rscratch1, 0), imm);
6047 }
6048 }
6050 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
6051 assert(!src2.is_lval(), "use cmpptr");
6052 if (reachable(src2)) {
6053 cmpl(src1, as_Address(src2));
6054 } else {
6055 lea(rscratch1, src2);
6056 cmpl(src1, Address(rscratch1, 0));
6057 }
6058 }
6060 void MacroAssembler::cmp32(Register src1, int32_t imm) {
6061 Assembler::cmpl(src1, imm);
6062 }
6064 void MacroAssembler::cmp32(Register src1, Address src2) {
6065 Assembler::cmpl(src1, src2);
6066 }
6068 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6069 ucomisd(opr1, opr2);
6071 Label L;
6072 if (unordered_is_less) {
6073 movl(dst, -1);
6074 jcc(Assembler::parity, L);
6075 jcc(Assembler::below , L);
6076 movl(dst, 0);
6077 jcc(Assembler::equal , L);
6078 increment(dst);
6079 } else { // unordered is greater
6080 movl(dst, 1);
6081 jcc(Assembler::parity, L);
6082 jcc(Assembler::above , L);
6083 movl(dst, 0);
6084 jcc(Assembler::equal , L);
6085 decrementl(dst);
6086 }
6087 bind(L);
6088 }
6090 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6091 ucomiss(opr1, opr2);
6093 Label L;
6094 if (unordered_is_less) {
6095 movl(dst, -1);
6096 jcc(Assembler::parity, L);
6097 jcc(Assembler::below , L);
6098 movl(dst, 0);
6099 jcc(Assembler::equal , L);
6100 increment(dst);
6101 } else { // unordered is greater
6102 movl(dst, 1);
6103 jcc(Assembler::parity, L);
6104 jcc(Assembler::above , L);
6105 movl(dst, 0);
6106 jcc(Assembler::equal , L);
6107 decrementl(dst);
6108 }
6109 bind(L);
6110 }
6113 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6114 if (reachable(src1)) {
6115 cmpb(as_Address(src1), imm);
6116 } else {
6117 lea(rscratch1, src1);
6118 cmpb(Address(rscratch1, 0), imm);
6119 }
6120 }
6122 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6123 #ifdef _LP64
6124 if (src2.is_lval()) {
6125 movptr(rscratch1, src2);
6126 Assembler::cmpq(src1, rscratch1);
6127 } else if (reachable(src2)) {
6128 cmpq(src1, as_Address(src2));
6129 } else {
6130 lea(rscratch1, src2);
6131 Assembler::cmpq(src1, Address(rscratch1, 0));
6132 }
6133 #else
6134 if (src2.is_lval()) {
6135 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6136 } else {
6137 cmpl(src1, as_Address(src2));
6138 }
6139 #endif // _LP64
6140 }
6142 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6143 assert(src2.is_lval(), "not a mem-mem compare");
6144 #ifdef _LP64
6145 // moves src2's literal address
6146 movptr(rscratch1, src2);
6147 Assembler::cmpq(src1, rscratch1);
6148 #else
6149 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6150 #endif // _LP64
6151 }
6153 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6154 if (reachable(adr)) {
6155 if (os::is_MP())
6156 lock();
6157 cmpxchgptr(reg, as_Address(adr));
6158 } else {
6159 lea(rscratch1, adr);
6160 if (os::is_MP())
6161 lock();
6162 cmpxchgptr(reg, Address(rscratch1, 0));
6163 }
6164 }
6166 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6167 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6168 }
6170 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6171 if (reachable(src)) {
6172 comisd(dst, as_Address(src));
6173 } else {
6174 lea(rscratch1, src);
6175 comisd(dst, Address(rscratch1, 0));
6176 }
6177 }
6179 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6180 if (reachable(src)) {
6181 comiss(dst, as_Address(src));
6182 } else {
6183 lea(rscratch1, src);
6184 comiss(dst, Address(rscratch1, 0));
6185 }
6186 }
6189 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6190 Condition negated_cond = negate_condition(cond);
6191 Label L;
6192 jcc(negated_cond, L);
6193 atomic_incl(counter_addr);
6194 bind(L);
6195 }
6197 int MacroAssembler::corrected_idivl(Register reg) {
6198 // Full implementation of Java idiv and irem; checks for
6199 // special case as described in JVM spec., p.243 & p.271.
6200 // The function returns the (pc) offset of the idivl
6201 // instruction - may be needed for implicit exceptions.
6202 //
6203 // normal case special case
6204 //
6205 // input : rax,: dividend min_int
6206 // reg: divisor (may not be rax,/rdx) -1
6207 //
6208 // output: rax,: quotient (= rax, idiv reg) min_int
6209 // rdx: remainder (= rax, irem reg) 0
6210 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6211 const int min_int = 0x80000000;
6212 Label normal_case, special_case;
6214 // check for special case
6215 cmpl(rax, min_int);
6216 jcc(Assembler::notEqual, normal_case);
6217 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6218 cmpl(reg, -1);
6219 jcc(Assembler::equal, special_case);
6221 // handle normal case
6222 bind(normal_case);
6223 cdql();
6224 int idivl_offset = offset();
6225 idivl(reg);
6227 // normal and special case exit
6228 bind(special_case);
6230 return idivl_offset;
6231 }
6235 void MacroAssembler::decrementl(Register reg, int value) {
6236 if (value == min_jint) {subl(reg, value) ; return; }
6237 if (value < 0) { incrementl(reg, -value); return; }
6238 if (value == 0) { ; return; }
6239 if (value == 1 && UseIncDec) { decl(reg) ; return; }
6240 /* else */ { subl(reg, value) ; return; }
6241 }
6243 void MacroAssembler::decrementl(Address dst, int value) {
6244 if (value == min_jint) {subl(dst, value) ; return; }
6245 if (value < 0) { incrementl(dst, -value); return; }
6246 if (value == 0) { ; return; }
6247 if (value == 1 && UseIncDec) { decl(dst) ; return; }
6248 /* else */ { subl(dst, value) ; return; }
6249 }
6251 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6252 assert (shift_value > 0, "illegal shift value");
6253 Label _is_positive;
6254 testl (reg, reg);
6255 jcc (Assembler::positive, _is_positive);
6256 int offset = (1 << shift_value) - 1 ;
6258 if (offset == 1) {
6259 incrementl(reg);
6260 } else {
6261 addl(reg, offset);
6262 }
6264 bind (_is_positive);
6265 sarl(reg, shift_value);
6266 }
6268 // !defined(COMPILER2) is because of stupid core builds
6269 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6270 void MacroAssembler::empty_FPU_stack() {
6271 if (VM_Version::supports_mmx()) {
6272 emms();
6273 } else {
6274 for (int i = 8; i-- > 0; ) ffree(i);
6275 }
6276 }
6277 #endif // !LP64 || C1 || !C2
6280 // Defines obj, preserves var_size_in_bytes
6281 void MacroAssembler::eden_allocate(Register obj,
6282 Register var_size_in_bytes,
6283 int con_size_in_bytes,
6284 Register t1,
6285 Label& slow_case) {
6286 assert(obj == rax, "obj must be in rax, for cmpxchg");
6287 assert_different_registers(obj, var_size_in_bytes, t1);
6288 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6289 jmp(slow_case);
6290 } else {
6291 Register end = t1;
6292 Label retry;
6293 bind(retry);
6294 ExternalAddress heap_top((address) Universe::heap()->top_addr());
6295 movptr(obj, heap_top);
6296 if (var_size_in_bytes == noreg) {
6297 lea(end, Address(obj, con_size_in_bytes));
6298 } else {
6299 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6300 }
6301 // if end < obj then we wrapped around => object too long => slow case
6302 cmpptr(end, obj);
6303 jcc(Assembler::below, slow_case);
6304 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6305 jcc(Assembler::above, slow_case);
6306 // Compare obj with the top addr, and if still equal, store the new top addr in
6307 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6308 // it otherwise. Use lock prefix for atomicity on MPs.
6309 locked_cmpxchgptr(end, heap_top);
6310 jcc(Assembler::notEqual, retry);
6311 }
6312 }
6314 void MacroAssembler::enter() {
6315 push(rbp);
6316 mov(rbp, rsp);
6317 }
6319 void MacroAssembler::fcmp(Register tmp) {
6320 fcmp(tmp, 1, true, true);
6321 }
6323 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6324 assert(!pop_right || pop_left, "usage error");
6325 if (VM_Version::supports_cmov()) {
6326 assert(tmp == noreg, "unneeded temp");
6327 if (pop_left) {
6328 fucomip(index);
6329 } else {
6330 fucomi(index);
6331 }
6332 if (pop_right) {
6333 fpop();
6334 }
6335 } else {
6336 assert(tmp != noreg, "need temp");
6337 if (pop_left) {
6338 if (pop_right) {
6339 fcompp();
6340 } else {
6341 fcomp(index);
6342 }
6343 } else {
6344 fcom(index);
6345 }
6346 // convert FPU condition into eflags condition via rax,
6347 save_rax(tmp);
6348 fwait(); fnstsw_ax();
6349 sahf();
6350 restore_rax(tmp);
6351 }
6352 // condition codes set as follows:
6353 //
6354 // CF (corresponds to C0) if x < y
6355 // PF (corresponds to C2) if unordered
6356 // ZF (corresponds to C3) if x = y
6357 }
6359 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6360 fcmp2int(dst, unordered_is_less, 1, true, true);
6361 }
6363 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6364 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6365 Label L;
6366 if (unordered_is_less) {
6367 movl(dst, -1);
6368 jcc(Assembler::parity, L);
6369 jcc(Assembler::below , L);
6370 movl(dst, 0);
6371 jcc(Assembler::equal , L);
6372 increment(dst);
6373 } else { // unordered is greater
6374 movl(dst, 1);
6375 jcc(Assembler::parity, L);
6376 jcc(Assembler::above , L);
6377 movl(dst, 0);
6378 jcc(Assembler::equal , L);
6379 decrementl(dst);
6380 }
6381 bind(L);
6382 }
6384 void MacroAssembler::fld_d(AddressLiteral src) {
6385 fld_d(as_Address(src));
6386 }
6388 void MacroAssembler::fld_s(AddressLiteral src) {
6389 fld_s(as_Address(src));
6390 }
6392 void MacroAssembler::fld_x(AddressLiteral src) {
6393 Assembler::fld_x(as_Address(src));
6394 }
6396 void MacroAssembler::fldcw(AddressLiteral src) {
6397 Assembler::fldcw(as_Address(src));
6398 }
6400 void MacroAssembler::fpop() {
6401 ffree();
6402 fincstp();
6403 }
6405 void MacroAssembler::fremr(Register tmp) {
6406 save_rax(tmp);
6407 { Label L;
6408 bind(L);
6409 fprem();
6410 fwait(); fnstsw_ax();
6411 #ifdef _LP64
6412 testl(rax, 0x400);
6413 jcc(Assembler::notEqual, L);
6414 #else
6415 sahf();
6416 jcc(Assembler::parity, L);
6417 #endif // _LP64
6418 }
6419 restore_rax(tmp);
6420 // Result is in ST0.
6421 // Note: fxch & fpop to get rid of ST1
6422 // (otherwise FPU stack could overflow eventually)
6423 fxch(1);
6424 fpop();
6425 }
6428 void MacroAssembler::incrementl(AddressLiteral dst) {
6429 if (reachable(dst)) {
6430 incrementl(as_Address(dst));
6431 } else {
6432 lea(rscratch1, dst);
6433 incrementl(Address(rscratch1, 0));
6434 }
6435 }
6437 void MacroAssembler::incrementl(ArrayAddress dst) {
6438 incrementl(as_Address(dst));
6439 }
6441 void MacroAssembler::incrementl(Register reg, int value) {
6442 if (value == min_jint) {addl(reg, value) ; return; }
6443 if (value < 0) { decrementl(reg, -value); return; }
6444 if (value == 0) { ; return; }
6445 if (value == 1 && UseIncDec) { incl(reg) ; return; }
6446 /* else */ { addl(reg, value) ; return; }
6447 }
6449 void MacroAssembler::incrementl(Address dst, int value) {
6450 if (value == min_jint) {addl(dst, value) ; return; }
6451 if (value < 0) { decrementl(dst, -value); return; }
6452 if (value == 0) { ; return; }
6453 if (value == 1 && UseIncDec) { incl(dst) ; return; }
6454 /* else */ { addl(dst, value) ; return; }
6455 }
6457 void MacroAssembler::jump(AddressLiteral dst) {
6458 if (reachable(dst)) {
6459 jmp_literal(dst.target(), dst.rspec());
6460 } else {
6461 lea(rscratch1, dst);
6462 jmp(rscratch1);
6463 }
6464 }
6466 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6467 if (reachable(dst)) {
6468 InstructionMark im(this);
6469 relocate(dst.reloc());
6470 const int short_size = 2;
6471 const int long_size = 6;
6472 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6473 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6474 // 0111 tttn #8-bit disp
6475 emit_byte(0x70 | cc);
6476 emit_byte((offs - short_size) & 0xFF);
6477 } else {
6478 // 0000 1111 1000 tttn #32-bit disp
6479 emit_byte(0x0F);
6480 emit_byte(0x80 | cc);
6481 emit_long(offs - long_size);
6482 }
6483 } else {
6484 #ifdef ASSERT
6485 warning("reversing conditional branch");
6486 #endif /* ASSERT */
6487 Label skip;
6488 jccb(reverse[cc], skip);
6489 lea(rscratch1, dst);
6490 Assembler::jmp(rscratch1);
6491 bind(skip);
6492 }
6493 }
6495 void MacroAssembler::ldmxcsr(AddressLiteral src) {
6496 if (reachable(src)) {
6497 Assembler::ldmxcsr(as_Address(src));
6498 } else {
6499 lea(rscratch1, src);
6500 Assembler::ldmxcsr(Address(rscratch1, 0));
6501 }
6502 }
6504 int MacroAssembler::load_signed_byte(Register dst, Address src) {
6505 int off;
6506 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6507 off = offset();
6508 movsbl(dst, src); // movsxb
6509 } else {
6510 off = load_unsigned_byte(dst, src);
6511 shll(dst, 24);
6512 sarl(dst, 24);
6513 }
6514 return off;
6515 }
6517 // Note: load_signed_short used to be called load_signed_word.
6518 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6519 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6520 // The term "word" in HotSpot means a 32- or 64-bit machine word.
6521 int MacroAssembler::load_signed_short(Register dst, Address src) {
6522 int off;
6523 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6524 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6525 // version but this is what 64bit has always done. This seems to imply
6526 // that users are only using 32bits worth.
6527 off = offset();
6528 movswl(dst, src); // movsxw
6529 } else {
6530 off = load_unsigned_short(dst, src);
6531 shll(dst, 16);
6532 sarl(dst, 16);
6533 }
6534 return off;
6535 }
6537 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6538 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6539 // and "3.9 Partial Register Penalties", p. 22).
6540 int off;
6541 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6542 off = offset();
6543 movzbl(dst, src); // movzxb
6544 } else {
6545 xorl(dst, dst);
6546 off = offset();
6547 movb(dst, src);
6548 }
6549 return off;
6550 }
6552 // Note: load_unsigned_short used to be called load_unsigned_word.
6553 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6554 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6555 // and "3.9 Partial Register Penalties", p. 22).
6556 int off;
6557 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6558 off = offset();
6559 movzwl(dst, src); // movzxw
6560 } else {
6561 xorl(dst, dst);
6562 off = offset();
6563 movw(dst, src);
6564 }
6565 return off;
6566 }
6568 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
6569 switch (size_in_bytes) {
6570 #ifndef _LP64
6571 case 8:
6572 assert(dst2 != noreg, "second dest register required");
6573 movl(dst, src);
6574 movl(dst2, src.plus_disp(BytesPerInt));
6575 break;
6576 #else
6577 case 8: movq(dst, src); break;
6578 #endif
6579 case 4: movl(dst, src); break;
6580 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
6581 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
6582 default: ShouldNotReachHere();
6583 }
6584 }
6586 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
6587 switch (size_in_bytes) {
6588 #ifndef _LP64
6589 case 8:
6590 assert(src2 != noreg, "second source register required");
6591 movl(dst, src);
6592 movl(dst.plus_disp(BytesPerInt), src2);
6593 break;
6594 #else
6595 case 8: movq(dst, src); break;
6596 #endif
6597 case 4: movl(dst, src); break;
6598 case 2: movw(dst, src); break;
6599 case 1: movb(dst, src); break;
6600 default: ShouldNotReachHere();
6601 }
6602 }
6604 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6605 if (reachable(dst)) {
6606 movl(as_Address(dst), src);
6607 } else {
6608 lea(rscratch1, dst);
6609 movl(Address(rscratch1, 0), src);
6610 }
6611 }
6613 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6614 if (reachable(src)) {
6615 movl(dst, as_Address(src));
6616 } else {
6617 lea(rscratch1, src);
6618 movl(dst, Address(rscratch1, 0));
6619 }
6620 }
6622 // C++ bool manipulation
6624 void MacroAssembler::movbool(Register dst, Address src) {
6625 if(sizeof(bool) == 1)
6626 movb(dst, src);
6627 else if(sizeof(bool) == 2)
6628 movw(dst, src);
6629 else if(sizeof(bool) == 4)
6630 movl(dst, src);
6631 else
6632 // unsupported
6633 ShouldNotReachHere();
6634 }
6636 void MacroAssembler::movbool(Address dst, bool boolconst) {
6637 if(sizeof(bool) == 1)
6638 movb(dst, (int) boolconst);
6639 else if(sizeof(bool) == 2)
6640 movw(dst, (int) boolconst);
6641 else if(sizeof(bool) == 4)
6642 movl(dst, (int) boolconst);
6643 else
6644 // unsupported
6645 ShouldNotReachHere();
6646 }
6648 void MacroAssembler::movbool(Address dst, Register src) {
6649 if(sizeof(bool) == 1)
6650 movb(dst, src);
6651 else if(sizeof(bool) == 2)
6652 movw(dst, src);
6653 else if(sizeof(bool) == 4)
6654 movl(dst, src);
6655 else
6656 // unsupported
6657 ShouldNotReachHere();
6658 }
6660 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6661 movb(as_Address(dst), src);
6662 }
6664 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6665 if (reachable(src)) {
6666 if (UseXmmLoadAndClearUpper) {
6667 movsd (dst, as_Address(src));
6668 } else {
6669 movlpd(dst, as_Address(src));
6670 }
6671 } else {
6672 lea(rscratch1, src);
6673 if (UseXmmLoadAndClearUpper) {
6674 movsd (dst, Address(rscratch1, 0));
6675 } else {
6676 movlpd(dst, Address(rscratch1, 0));
6677 }
6678 }
6679 }
6681 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6682 if (reachable(src)) {
6683 movss(dst, as_Address(src));
6684 } else {
6685 lea(rscratch1, src);
6686 movss(dst, Address(rscratch1, 0));
6687 }
6688 }
6690 void MacroAssembler::movptr(Register dst, Register src) {
6691 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6692 }
6694 void MacroAssembler::movptr(Register dst, Address src) {
6695 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6696 }
6698 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
6699 void MacroAssembler::movptr(Register dst, intptr_t src) {
6700 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6701 }
6703 void MacroAssembler::movptr(Address dst, Register src) {
6704 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6705 }
6707 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6708 if (reachable(src)) {
6709 movss(dst, as_Address(src));
6710 } else {
6711 lea(rscratch1, src);
6712 movss(dst, Address(rscratch1, 0));
6713 }
6714 }
6716 void MacroAssembler::null_check(Register reg, int offset) {
6717 if (needs_explicit_null_check(offset)) {
6718 // provoke OS NULL exception if reg = NULL by
6719 // accessing M[reg] w/o changing any (non-CC) registers
6720 // NOTE: cmpl is plenty here to provoke a segv
6721 cmpptr(rax, Address(reg, 0));
6722 // Note: should probably use testl(rax, Address(reg, 0));
6723 // may be shorter code (however, this version of
6724 // testl needs to be implemented first)
6725 } else {
6726 // nothing to do, (later) access of M[reg + offset]
6727 // will provoke OS NULL exception if reg = NULL
6728 }
6729 }
6731 void MacroAssembler::os_breakpoint() {
6732 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6733 // (e.g., MSVC can't call ps() otherwise)
6734 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6735 }
6737 void MacroAssembler::pop_CPU_state() {
6738 pop_FPU_state();
6739 pop_IU_state();
6740 }
6742 void MacroAssembler::pop_FPU_state() {
6743 NOT_LP64(frstor(Address(rsp, 0));)
6744 LP64_ONLY(fxrstor(Address(rsp, 0));)
6745 addptr(rsp, FPUStateSizeInWords * wordSize);
6746 }
6748 void MacroAssembler::pop_IU_state() {
6749 popa();
6750 LP64_ONLY(addq(rsp, 8));
6751 popf();
6752 }
6754 // Save Integer and Float state
6755 // Warning: Stack must be 16 byte aligned (64bit)
6756 void MacroAssembler::push_CPU_state() {
6757 push_IU_state();
6758 push_FPU_state();
6759 }
6761 void MacroAssembler::push_FPU_state() {
6762 subptr(rsp, FPUStateSizeInWords * wordSize);
6763 #ifndef _LP64
6764 fnsave(Address(rsp, 0));
6765 fwait();
6766 #else
6767 fxsave(Address(rsp, 0));
6768 #endif // LP64
6769 }
6771 void MacroAssembler::push_IU_state() {
6772 // Push flags first because pusha kills them
6773 pushf();
6774 // Make sure rsp stays 16-byte aligned
6775 LP64_ONLY(subq(rsp, 8));
6776 pusha();
6777 }
6779 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6780 // determine java_thread register
6781 if (!java_thread->is_valid()) {
6782 java_thread = rdi;
6783 get_thread(java_thread);
6784 }
6785 // we must set sp to zero to clear frame
6786 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6787 if (clear_fp) {
6788 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6789 }
6791 if (clear_pc)
6792 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6794 }
6796 void MacroAssembler::restore_rax(Register tmp) {
6797 if (tmp == noreg) pop(rax);
6798 else if (tmp != rax) mov(rax, tmp);
6799 }
6801 void MacroAssembler::round_to(Register reg, int modulus) {
6802 addptr(reg, modulus - 1);
6803 andptr(reg, -modulus);
6804 }
6806 void MacroAssembler::save_rax(Register tmp) {
6807 if (tmp == noreg) push(rax);
6808 else if (tmp != rax) mov(tmp, rax);
6809 }
6811 // Write serialization page so VM thread can do a pseudo remote membar.
6812 // We use the current thread pointer to calculate a thread specific
6813 // offset to write to within the page. This minimizes bus traffic
6814 // due to cache line collision.
6815 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6816 movl(tmp, thread);
6817 shrl(tmp, os::get_serialize_page_shift_count());
6818 andl(tmp, (os::vm_page_size() - sizeof(int)));
6820 Address index(noreg, tmp, Address::times_1);
6821 ExternalAddress page(os::get_memory_serialize_page());
6823 // Size of store must match masking code above
6824 movl(as_Address(ArrayAddress(page, index)), tmp);
6825 }
6827 // Calls to C land
6828 //
6829 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6830 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6831 // has to be reset to 0. This is required to allow proper stack traversal.
6832 void MacroAssembler::set_last_Java_frame(Register java_thread,
6833 Register last_java_sp,
6834 Register last_java_fp,
6835 address last_java_pc) {
6836 // determine java_thread register
6837 if (!java_thread->is_valid()) {
6838 java_thread = rdi;
6839 get_thread(java_thread);
6840 }
6841 // determine last_java_sp register
6842 if (!last_java_sp->is_valid()) {
6843 last_java_sp = rsp;
6844 }
6846 // last_java_fp is optional
6848 if (last_java_fp->is_valid()) {
6849 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6850 }
6852 // last_java_pc is optional
6854 if (last_java_pc != NULL) {
6855 lea(Address(java_thread,
6856 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6857 InternalAddress(last_java_pc));
6859 }
6860 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6861 }
6863 void MacroAssembler::shlptr(Register dst, int imm8) {
6864 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6865 }
6867 void MacroAssembler::shrptr(Register dst, int imm8) {
6868 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6869 }
6871 void MacroAssembler::sign_extend_byte(Register reg) {
6872 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6873 movsbl(reg, reg); // movsxb
6874 } else {
6875 shll(reg, 24);
6876 sarl(reg, 24);
6877 }
6878 }
6880 void MacroAssembler::sign_extend_short(Register reg) {
6881 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6882 movswl(reg, reg); // movsxw
6883 } else {
6884 shll(reg, 16);
6885 sarl(reg, 16);
6886 }
6887 }
6889 //////////////////////////////////////////////////////////////////////////////////
6890 #ifndef SERIALGC
6892 void MacroAssembler::g1_write_barrier_pre(Register obj,
6893 Register pre_val,
6894 Register thread,
6895 Register tmp,
6896 bool tosca_live,
6897 bool expand_call) {
6899 // If expand_call is true then we expand the call_VM_leaf macro
6900 // directly to skip generating the check by
6901 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
6903 #ifdef _LP64
6904 assert(thread == r15_thread, "must be");
6905 #endif // _LP64
6907 Label done;
6908 Label runtime;
6910 assert(pre_val != noreg, "check this code");
6912 if (obj != noreg) {
6913 assert_different_registers(obj, pre_val, tmp);
6914 assert(pre_val != rax, "check this code");
6915 }
6917 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6918 PtrQueue::byte_offset_of_active()));
6919 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6920 PtrQueue::byte_offset_of_index()));
6921 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6922 PtrQueue::byte_offset_of_buf()));
6925 // Is marking active?
6926 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6927 cmpl(in_progress, 0);
6928 } else {
6929 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6930 cmpb(in_progress, 0);
6931 }
6932 jcc(Assembler::equal, done);
6934 // Do we need to load the previous value?
6935 if (obj != noreg) {
6936 load_heap_oop(pre_val, Address(obj, 0));
6937 }
6939 // Is the previous value null?
6940 cmpptr(pre_val, (int32_t) NULL_WORD);
6941 jcc(Assembler::equal, done);
6943 // Can we store original value in the thread's buffer?
6944 // Is index == 0?
6945 // (The index field is typed as size_t.)
6947 movptr(tmp, index); // tmp := *index_adr
6948 cmpptr(tmp, 0); // tmp == 0?
6949 jcc(Assembler::equal, runtime); // If yes, goto runtime
6951 subptr(tmp, wordSize); // tmp := tmp - wordSize
6952 movptr(index, tmp); // *index_adr := tmp
6953 addptr(tmp, buffer); // tmp := tmp + *buffer_adr
6955 // Record the previous value
6956 movptr(Address(tmp, 0), pre_val);
6957 jmp(done);
6959 bind(runtime);
6960 // save the live input values
6961 if(tosca_live) push(rax);
6963 if (obj != noreg && obj != rax)
6964 push(obj);
6966 if (pre_val != rax)
6967 push(pre_val);
6969 // Calling the runtime using the regular call_VM_leaf mechanism generates
6970 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
6971 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
6972 //
6973 // If we care generating the pre-barrier without a frame (e.g. in the
6974 // intrinsified Reference.get() routine) then ebp might be pointing to
6975 // the caller frame and so this check will most likely fail at runtime.
6976 //
6977 // Expanding the call directly bypasses the generation of the check.
6978 // So when we do not have have a full interpreter frame on the stack
6979 // expand_call should be passed true.
6981 NOT_LP64( push(thread); )
6983 if (expand_call) {
6984 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
6985 pass_arg1(this, thread);
6986 pass_arg0(this, pre_val);
6987 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
6988 } else {
6989 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
6990 }
6992 NOT_LP64( pop(thread); )
6994 // save the live input values
6995 if (pre_val != rax)
6996 pop(pre_val);
6998 if (obj != noreg && obj != rax)
6999 pop(obj);
7001 if(tosca_live) pop(rax);
7003 bind(done);
7004 }
7006 void MacroAssembler::g1_write_barrier_post(Register store_addr,
7007 Register new_val,
7008 Register thread,
7009 Register tmp,
7010 Register tmp2) {
7011 #ifdef _LP64
7012 assert(thread == r15_thread, "must be");
7013 #endif // _LP64
7015 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
7016 PtrQueue::byte_offset_of_index()));
7017 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
7018 PtrQueue::byte_offset_of_buf()));
7020 BarrierSet* bs = Universe::heap()->barrier_set();
7021 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7022 Label done;
7023 Label runtime;
7025 // Does store cross heap regions?
7027 movptr(tmp, store_addr);
7028 xorptr(tmp, new_val);
7029 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
7030 jcc(Assembler::equal, done);
7032 // crosses regions, storing NULL?
7034 cmpptr(new_val, (int32_t) NULL_WORD);
7035 jcc(Assembler::equal, done);
7037 // storing region crossing non-NULL, is card already dirty?
7039 ExternalAddress cardtable((address) ct->byte_map_base);
7040 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7041 #ifdef _LP64
7042 const Register card_addr = tmp;
7044 movq(card_addr, store_addr);
7045 shrq(card_addr, CardTableModRefBS::card_shift);
7047 lea(tmp2, cardtable);
7049 // get the address of the card
7050 addq(card_addr, tmp2);
7051 #else
7052 const Register card_index = tmp;
7054 movl(card_index, store_addr);
7055 shrl(card_index, CardTableModRefBS::card_shift);
7057 Address index(noreg, card_index, Address::times_1);
7058 const Register card_addr = tmp;
7059 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
7060 #endif
7061 cmpb(Address(card_addr, 0), 0);
7062 jcc(Assembler::equal, done);
7064 // storing a region crossing, non-NULL oop, card is clean.
7065 // dirty card and log.
7067 movb(Address(card_addr, 0), 0);
7069 cmpl(queue_index, 0);
7070 jcc(Assembler::equal, runtime);
7071 subl(queue_index, wordSize);
7072 movptr(tmp2, buffer);
7073 #ifdef _LP64
7074 movslq(rscratch1, queue_index);
7075 addq(tmp2, rscratch1);
7076 movq(Address(tmp2, 0), card_addr);
7077 #else
7078 addl(tmp2, queue_index);
7079 movl(Address(tmp2, 0), card_index);
7080 #endif
7081 jmp(done);
7083 bind(runtime);
7084 // save the live input values
7085 push(store_addr);
7086 push(new_val);
7087 #ifdef _LP64
7088 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
7089 #else
7090 push(thread);
7091 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
7092 pop(thread);
7093 #endif
7094 pop(new_val);
7095 pop(store_addr);
7097 bind(done);
7098 }
7100 #endif // SERIALGC
7101 //////////////////////////////////////////////////////////////////////////////////
7104 void MacroAssembler::store_check(Register obj) {
7105 // Does a store check for the oop in register obj. The content of
7106 // register obj is destroyed afterwards.
7107 store_check_part_1(obj);
7108 store_check_part_2(obj);
7109 }
7111 void MacroAssembler::store_check(Register obj, Address dst) {
7112 store_check(obj);
7113 }
7116 // split the store check operation so that other instructions can be scheduled inbetween
7117 void MacroAssembler::store_check_part_1(Register obj) {
7118 BarrierSet* bs = Universe::heap()->barrier_set();
7119 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
7120 shrptr(obj, CardTableModRefBS::card_shift);
7121 }
7123 void MacroAssembler::store_check_part_2(Register obj) {
7124 BarrierSet* bs = Universe::heap()->barrier_set();
7125 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
7126 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7127 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7129 // The calculation for byte_map_base is as follows:
7130 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
7131 // So this essentially converts an address to a displacement and
7132 // it will never need to be relocated. On 64bit however the value may be too
7133 // large for a 32bit displacement
7135 intptr_t disp = (intptr_t) ct->byte_map_base;
7136 if (is_simm32(disp)) {
7137 Address cardtable(noreg, obj, Address::times_1, disp);
7138 movb(cardtable, 0);
7139 } else {
7140 // By doing it as an ExternalAddress disp could be converted to a rip-relative
7141 // displacement and done in a single instruction given favorable mapping and
7142 // a smarter version of as_Address. Worst case it is two instructions which
7143 // is no worse off then loading disp into a register and doing as a simple
7144 // Address() as above.
7145 // We can't do as ExternalAddress as the only style since if disp == 0 we'll
7146 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
7147 // in some cases we'll get a single instruction version.
7149 ExternalAddress cardtable((address)disp);
7150 Address index(noreg, obj, Address::times_1);
7151 movb(as_Address(ArrayAddress(cardtable, index)), 0);
7152 }
7153 }
7155 void MacroAssembler::subptr(Register dst, int32_t imm32) {
7156 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7157 }
7159 void MacroAssembler::subptr(Register dst, Register src) {
7160 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7161 }
7163 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
7164 // src2 must be rval
7166 if (reachable(src2)) {
7167 testl(src1, as_Address(src2));
7168 } else {
7169 lea(rscratch1, src2);
7170 testl(src1, Address(rscratch1, 0));
7171 }
7172 }
7174 // C++ bool manipulation
7175 void MacroAssembler::testbool(Register dst) {
7176 if(sizeof(bool) == 1)
7177 testb(dst, 0xff);
7178 else if(sizeof(bool) == 2) {
7179 // testw implementation needed for two byte bools
7180 ShouldNotReachHere();
7181 } else if(sizeof(bool) == 4)
7182 testl(dst, dst);
7183 else
7184 // unsupported
7185 ShouldNotReachHere();
7186 }
7188 void MacroAssembler::testptr(Register dst, Register src) {
7189 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
7190 }
7192 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
7193 void MacroAssembler::tlab_allocate(Register obj,
7194 Register var_size_in_bytes,
7195 int con_size_in_bytes,
7196 Register t1,
7197 Register t2,
7198 Label& slow_case) {
7199 assert_different_registers(obj, t1, t2);
7200 assert_different_registers(obj, var_size_in_bytes, t1);
7201 Register end = t2;
7202 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
7204 verify_tlab();
7206 NOT_LP64(get_thread(thread));
7208 movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
7209 if (var_size_in_bytes == noreg) {
7210 lea(end, Address(obj, con_size_in_bytes));
7211 } else {
7212 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
7213 }
7214 cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
7215 jcc(Assembler::above, slow_case);
7217 // update the tlab top pointer
7218 movptr(Address(thread, JavaThread::tlab_top_offset()), end);
7220 // recover var_size_in_bytes if necessary
7221 if (var_size_in_bytes == end) {
7222 subptr(var_size_in_bytes, obj);
7223 }
7224 verify_tlab();
7225 }
7227 // Preserves rbx, and rdx.
7228 Register MacroAssembler::tlab_refill(Label& retry,
7229 Label& try_eden,
7230 Label& slow_case) {
7231 Register top = rax;
7232 Register t1 = rcx;
7233 Register t2 = rsi;
7234 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
7235 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
7236 Label do_refill, discard_tlab;
7238 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7239 // No allocation in the shared eden.
7240 jmp(slow_case);
7241 }
7243 NOT_LP64(get_thread(thread_reg));
7245 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7246 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7248 // calculate amount of free space
7249 subptr(t1, top);
7250 shrptr(t1, LogHeapWordSize);
7252 // Retain tlab and allocate object in shared space if
7253 // the amount free in the tlab is too large to discard.
7254 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7255 jcc(Assembler::lessEqual, discard_tlab);
7257 // Retain
7258 // %%% yuck as movptr...
7259 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7260 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7261 if (TLABStats) {
7262 // increment number of slow_allocations
7263 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7264 }
7265 jmp(try_eden);
7267 bind(discard_tlab);
7268 if (TLABStats) {
7269 // increment number of refills
7270 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7271 // accumulate wastage -- t1 is amount free in tlab
7272 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7273 }
7275 // if tlab is currently allocated (top or end != null) then
7276 // fill [top, end + alignment_reserve) with array object
7277 testptr(top, top);
7278 jcc(Assembler::zero, do_refill);
7280 // set up the mark word
7281 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7282 // set the length to the remaining space
7283 subptr(t1, typeArrayOopDesc::header_size(T_INT));
7284 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7285 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7286 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7287 // set klass to intArrayKlass
7288 // dubious reloc why not an oop reloc?
7289 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
7290 // store klass last. concurrent gcs assumes klass length is valid if
7291 // klass field is not null.
7292 store_klass(top, t1);
7294 movptr(t1, top);
7295 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7296 incr_allocated_bytes(thread_reg, t1, 0);
7298 // refill the tlab with an eden allocation
7299 bind(do_refill);
7300 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7301 shlptr(t1, LogHeapWordSize);
7302 // allocate new tlab, address returned in top
7303 eden_allocate(top, t1, 0, t2, slow_case);
7305 // Check that t1 was preserved in eden_allocate.
7306 #ifdef ASSERT
7307 if (UseTLAB) {
7308 Label ok;
7309 Register tsize = rsi;
7310 assert_different_registers(tsize, thread_reg, t1);
7311 push(tsize);
7312 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7313 shlptr(tsize, LogHeapWordSize);
7314 cmpptr(t1, tsize);
7315 jcc(Assembler::equal, ok);
7316 stop("assert(t1 != tlab size)");
7317 should_not_reach_here();
7319 bind(ok);
7320 pop(tsize);
7321 }
7322 #endif
7323 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7324 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7325 addptr(top, t1);
7326 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7327 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7328 verify_tlab();
7329 jmp(retry);
7331 return thread_reg; // for use by caller
7332 }
7334 void MacroAssembler::incr_allocated_bytes(Register thread,
7335 Register var_size_in_bytes,
7336 int con_size_in_bytes,
7337 Register t1) {
7338 #ifdef _LP64
7339 if (var_size_in_bytes->is_valid()) {
7340 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
7341 } else {
7342 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
7343 }
7344 #else
7345 if (!thread->is_valid()) {
7346 assert(t1->is_valid(), "need temp reg");
7347 thread = t1;
7348 get_thread(thread);
7349 }
7351 if (var_size_in_bytes->is_valid()) {
7352 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
7353 } else {
7354 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
7355 }
7356 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
7357 #endif
7358 }
7360 static const double pi_4 = 0.7853981633974483;
7362 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7363 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7364 // was attempted in this code; unfortunately it appears that the
7365 // switch to 80-bit precision and back causes this to be
7366 // unprofitable compared with simply performing a runtime call if
7367 // the argument is out of the (-pi/4, pi/4) range.
7369 Register tmp = noreg;
7370 if (!VM_Version::supports_cmov()) {
7371 // fcmp needs a temporary so preserve rbx,
7372 tmp = rbx;
7373 push(tmp);
7374 }
7376 Label slow_case, done;
7378 ExternalAddress pi4_adr = (address)&pi_4;
7379 if (reachable(pi4_adr)) {
7380 // x ?<= pi/4
7381 fld_d(pi4_adr);
7382 fld_s(1); // Stack: X PI/4 X
7383 fabs(); // Stack: |X| PI/4 X
7384 fcmp(tmp);
7385 jcc(Assembler::above, slow_case);
7387 // fastest case: -pi/4 <= x <= pi/4
7388 switch(trig) {
7389 case 's':
7390 fsin();
7391 break;
7392 case 'c':
7393 fcos();
7394 break;
7395 case 't':
7396 ftan();
7397 break;
7398 default:
7399 assert(false, "bad intrinsic");
7400 break;
7401 }
7402 jmp(done);
7403 }
7405 // slow case: runtime call
7406 bind(slow_case);
7407 // Preserve registers across runtime call
7408 pusha();
7409 int incoming_argument_and_return_value_offset = -1;
7410 if (num_fpu_regs_in_use > 1) {
7411 // Must preserve all other FPU regs (could alternatively convert
7412 // SharedRuntime::dsin and dcos into assembly routines known not to trash
7413 // FPU state, but can not trust C compiler)
7414 NEEDS_CLEANUP;
7415 // NOTE that in this case we also push the incoming argument to
7416 // the stack and restore it later; we also use this stack slot to
7417 // hold the return value from dsin or dcos.
7418 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7419 subptr(rsp, sizeof(jdouble));
7420 fstp_d(Address(rsp, 0));
7421 }
7422 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7423 fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7424 }
7425 subptr(rsp, sizeof(jdouble));
7426 fstp_d(Address(rsp, 0));
7427 #ifdef _LP64
7428 movdbl(xmm0, Address(rsp, 0));
7429 #endif // _LP64
7431 // NOTE: we must not use call_VM_leaf here because that requires a
7432 // complete interpreter frame in debug mode -- same bug as 4387334
7433 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7434 // do proper 64bit abi
7436 NEEDS_CLEANUP;
7437 // Need to add stack banging before this runtime call if it needs to
7438 // be taken; however, there is no generic stack banging routine at
7439 // the MacroAssembler level
7440 switch(trig) {
7441 case 's':
7442 {
7443 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7444 }
7445 break;
7446 case 'c':
7447 {
7448 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7449 }
7450 break;
7451 case 't':
7452 {
7453 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7454 }
7455 break;
7456 default:
7457 assert(false, "bad intrinsic");
7458 break;
7459 }
7460 #ifdef _LP64
7461 movsd(Address(rsp, 0), xmm0);
7462 fld_d(Address(rsp, 0));
7463 #endif // _LP64
7464 addptr(rsp, sizeof(jdouble));
7465 if (num_fpu_regs_in_use > 1) {
7466 // Must save return value to stack and then restore entire FPU stack
7467 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7468 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7469 fld_d(Address(rsp, 0));
7470 addptr(rsp, sizeof(jdouble));
7471 }
7472 }
7473 popa();
7475 // Come here with result in F-TOS
7476 bind(done);
7478 if (tmp != noreg) {
7479 pop(tmp);
7480 }
7481 }
7484 // Look up the method for a megamorphic invokeinterface call.
7485 // The target method is determined by <intf_klass, itable_index>.
7486 // The receiver klass is in recv_klass.
7487 // On success, the result will be in method_result, and execution falls through.
7488 // On failure, execution transfers to the given label.
7489 void MacroAssembler::lookup_interface_method(Register recv_klass,
7490 Register intf_klass,
7491 RegisterOrConstant itable_index,
7492 Register method_result,
7493 Register scan_temp,
7494 Label& L_no_such_interface) {
7495 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7496 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7497 "caller must use same register for non-constant itable index as for method");
7499 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7500 int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7501 int itentry_off = itableMethodEntry::method_offset_in_bytes();
7502 int scan_step = itableOffsetEntry::size() * wordSize;
7503 int vte_size = vtableEntry::size() * wordSize;
7504 Address::ScaleFactor times_vte_scale = Address::times_ptr;
7505 assert(vte_size == wordSize, "else adjust times_vte_scale");
7507 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7509 // %%% Could store the aligned, prescaled offset in the klassoop.
7510 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7511 if (HeapWordsPerLong > 1) {
7512 // Round up to align_object_offset boundary
7513 // see code for instanceKlass::start_of_itable!
7514 round_to(scan_temp, BytesPerLong);
7515 }
7517 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7518 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7519 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7521 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7522 // if (scan->interface() == intf) {
7523 // result = (klass + scan->offset() + itable_index);
7524 // }
7525 // }
7526 Label search, found_method;
7528 for (int peel = 1; peel >= 0; peel--) {
7529 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7530 cmpptr(intf_klass, method_result);
7532 if (peel) {
7533 jccb(Assembler::equal, found_method);
7534 } else {
7535 jccb(Assembler::notEqual, search);
7536 // (invert the test to fall through to found_method...)
7537 }
7539 if (!peel) break;
7541 bind(search);
7543 // Check that the previous entry is non-null. A null entry means that
7544 // the receiver class doesn't implement the interface, and wasn't the
7545 // same as when the caller was compiled.
7546 testptr(method_result, method_result);
7547 jcc(Assembler::zero, L_no_such_interface);
7548 addptr(scan_temp, scan_step);
7549 }
7551 bind(found_method);
7553 // Got a hit.
7554 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7555 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7556 }
7559 void MacroAssembler::check_klass_subtype(Register sub_klass,
7560 Register super_klass,
7561 Register temp_reg,
7562 Label& L_success) {
7563 Label L_failure;
7564 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
7565 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7566 bind(L_failure);
7567 }
7570 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7571 Register super_klass,
7572 Register temp_reg,
7573 Label* L_success,
7574 Label* L_failure,
7575 Label* L_slow_path,
7576 RegisterOrConstant super_check_offset) {
7577 assert_different_registers(sub_klass, super_klass, temp_reg);
7578 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7579 if (super_check_offset.is_register()) {
7580 assert_different_registers(sub_klass, super_klass,
7581 super_check_offset.as_register());
7582 } else if (must_load_sco) {
7583 assert(temp_reg != noreg, "supply either a temp or a register offset");
7584 }
7586 Label L_fallthrough;
7587 int label_nulls = 0;
7588 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7589 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7590 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7591 assert(label_nulls <= 1, "at most one NULL in the batch");
7593 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7594 Klass::secondary_super_cache_offset_in_bytes());
7595 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7596 Klass::super_check_offset_offset_in_bytes());
7597 Address super_check_offset_addr(super_klass, sco_offset);
7599 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7600 // range of a jccb. If this routine grows larger, reconsider at
7601 // least some of these.
7602 #define local_jcc(assembler_cond, label) \
7603 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
7604 else jcc( assembler_cond, label) /*omit semi*/
7606 // Hacked jmp, which may only be used just before L_fallthrough.
7607 #define final_jmp(label) \
7608 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
7609 else jmp(label) /*omit semi*/
7611 // If the pointers are equal, we are done (e.g., String[] elements).
7612 // This self-check enables sharing of secondary supertype arrays among
7613 // non-primary types such as array-of-interface. Otherwise, each such
7614 // type would need its own customized SSA.
7615 // We move this check to the front of the fast path because many
7616 // type checks are in fact trivially successful in this manner,
7617 // so we get a nicely predicted branch right at the start of the check.
7618 cmpptr(sub_klass, super_klass);
7619 local_jcc(Assembler::equal, *L_success);
7621 // Check the supertype display:
7622 if (must_load_sco) {
7623 // Positive movl does right thing on LP64.
7624 movl(temp_reg, super_check_offset_addr);
7625 super_check_offset = RegisterOrConstant(temp_reg);
7626 }
7627 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7628 cmpptr(super_klass, super_check_addr); // load displayed supertype
7630 // This check has worked decisively for primary supers.
7631 // Secondary supers are sought in the super_cache ('super_cache_addr').
7632 // (Secondary supers are interfaces and very deeply nested subtypes.)
7633 // This works in the same check above because of a tricky aliasing
7634 // between the super_cache and the primary super display elements.
7635 // (The 'super_check_addr' can address either, as the case requires.)
7636 // Note that the cache is updated below if it does not help us find
7637 // what we need immediately.
7638 // So if it was a primary super, we can just fail immediately.
7639 // Otherwise, it's the slow path for us (no success at this point).
7641 if (super_check_offset.is_register()) {
7642 local_jcc(Assembler::equal, *L_success);
7643 cmpl(super_check_offset.as_register(), sc_offset);
7644 if (L_failure == &L_fallthrough) {
7645 local_jcc(Assembler::equal, *L_slow_path);
7646 } else {
7647 local_jcc(Assembler::notEqual, *L_failure);
7648 final_jmp(*L_slow_path);
7649 }
7650 } else if (super_check_offset.as_constant() == sc_offset) {
7651 // Need a slow path; fast failure is impossible.
7652 if (L_slow_path == &L_fallthrough) {
7653 local_jcc(Assembler::equal, *L_success);
7654 } else {
7655 local_jcc(Assembler::notEqual, *L_slow_path);
7656 final_jmp(*L_success);
7657 }
7658 } else {
7659 // No slow path; it's a fast decision.
7660 if (L_failure == &L_fallthrough) {
7661 local_jcc(Assembler::equal, *L_success);
7662 } else {
7663 local_jcc(Assembler::notEqual, *L_failure);
7664 final_jmp(*L_success);
7665 }
7666 }
7668 bind(L_fallthrough);
7670 #undef local_jcc
7671 #undef final_jmp
7672 }
7675 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7676 Register super_klass,
7677 Register temp_reg,
7678 Register temp2_reg,
7679 Label* L_success,
7680 Label* L_failure,
7681 bool set_cond_codes) {
7682 assert_different_registers(sub_klass, super_klass, temp_reg);
7683 if (temp2_reg != noreg)
7684 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7685 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7687 Label L_fallthrough;
7688 int label_nulls = 0;
7689 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7690 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7691 assert(label_nulls <= 1, "at most one NULL in the batch");
7693 // a couple of useful fields in sub_klass:
7694 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7695 Klass::secondary_supers_offset_in_bytes());
7696 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7697 Klass::secondary_super_cache_offset_in_bytes());
7698 Address secondary_supers_addr(sub_klass, ss_offset);
7699 Address super_cache_addr( sub_klass, sc_offset);
7701 // Do a linear scan of the secondary super-klass chain.
7702 // This code is rarely used, so simplicity is a virtue here.
7703 // The repne_scan instruction uses fixed registers, which we must spill.
7704 // Don't worry too much about pre-existing connections with the input regs.
7706 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7707 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7709 // Get super_klass value into rax (even if it was in rdi or rcx).
7710 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7711 if (super_klass != rax || UseCompressedOops) {
7712 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7713 mov(rax, super_klass);
7714 }
7715 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7716 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7718 #ifndef PRODUCT
7719 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7720 ExternalAddress pst_counter_addr((address) pst_counter);
7721 NOT_LP64( incrementl(pst_counter_addr) );
7722 LP64_ONLY( lea(rcx, pst_counter_addr) );
7723 LP64_ONLY( incrementl(Address(rcx, 0)) );
7724 #endif //PRODUCT
7726 // We will consult the secondary-super array.
7727 movptr(rdi, secondary_supers_addr);
7728 // Load the array length. (Positive movl does right thing on LP64.)
7729 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7730 // Skip to start of data.
7731 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7733 // Scan RCX words at [RDI] for an occurrence of RAX.
7734 // Set NZ/Z based on last compare.
7735 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
7736 // not change flags (only scas instruction which is repeated sets flags).
7737 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
7738 #ifdef _LP64
7739 // This part is tricky, as values in supers array could be 32 or 64 bit wide
7740 // and we store values in objArrays always encoded, thus we need to encode
7741 // the value of rax before repne. Note that rax is dead after the repne.
7742 if (UseCompressedOops) {
7743 encode_heap_oop_not_null(rax); // Changes flags.
7744 // The superclass is never null; it would be a basic system error if a null
7745 // pointer were to sneak in here. Note that we have already loaded the
7746 // Klass::super_check_offset from the super_klass in the fast path,
7747 // so if there is a null in that register, we are already in the afterlife.
7748 testl(rax,rax); // Set Z = 0
7749 repne_scanl();
7750 } else
7751 #endif // _LP64
7752 {
7753 testptr(rax,rax); // Set Z = 0
7754 repne_scan();
7755 }
7756 // Unspill the temp. registers:
7757 if (pushed_rdi) pop(rdi);
7758 if (pushed_rcx) pop(rcx);
7759 if (pushed_rax) pop(rax);
7761 if (set_cond_codes) {
7762 // Special hack for the AD files: rdi is guaranteed non-zero.
7763 assert(!pushed_rdi, "rdi must be left non-NULL");
7764 // Also, the condition codes are properly set Z/NZ on succeed/failure.
7765 }
7767 if (L_failure == &L_fallthrough)
7768 jccb(Assembler::notEqual, *L_failure);
7769 else jcc(Assembler::notEqual, *L_failure);
7771 // Success. Cache the super we found and proceed in triumph.
7772 movptr(super_cache_addr, super_klass);
7774 if (L_success != &L_fallthrough) {
7775 jmp(*L_success);
7776 }
7778 #undef IS_A_TEMP
7780 bind(L_fallthrough);
7781 }
7784 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7785 ucomisd(dst, as_Address(src));
7786 }
7788 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7789 ucomiss(dst, as_Address(src));
7790 }
7792 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7793 if (reachable(src)) {
7794 xorpd(dst, as_Address(src));
7795 } else {
7796 lea(rscratch1, src);
7797 xorpd(dst, Address(rscratch1, 0));
7798 }
7799 }
7801 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7802 if (reachable(src)) {
7803 xorps(dst, as_Address(src));
7804 } else {
7805 lea(rscratch1, src);
7806 xorps(dst, Address(rscratch1, 0));
7807 }
7808 }
7810 void MacroAssembler::verify_oop(Register reg, const char* s) {
7811 if (!VerifyOops) return;
7813 // Pass register number to verify_oop_subroutine
7814 char* b = new char[strlen(s) + 50];
7815 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7816 #ifdef _LP64
7817 push(rscratch1); // save r10, trashed by movptr()
7818 #endif
7819 push(rax); // save rax,
7820 push(reg); // pass register argument
7821 ExternalAddress buffer((address) b);
7822 // avoid using pushptr, as it modifies scratch registers
7823 // and our contract is not to modify anything
7824 movptr(rax, buffer.addr());
7825 push(rax);
7826 // call indirectly to solve generation ordering problem
7827 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7828 call(rax);
7829 // Caller pops the arguments (oop, message) and restores rax, r10
7830 }
7833 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
7834 Register tmp,
7835 int offset) {
7836 intptr_t value = *delayed_value_addr;
7837 if (value != 0)
7838 return RegisterOrConstant(value + offset);
7840 // load indirectly to solve generation ordering problem
7841 movptr(tmp, ExternalAddress((address) delayed_value_addr));
7843 #ifdef ASSERT
7844 { Label L;
7845 testptr(tmp, tmp);
7846 if (WizardMode) {
7847 jcc(Assembler::notZero, L);
7848 char* buf = new char[40];
7849 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
7850 stop(buf);
7851 } else {
7852 jccb(Assembler::notZero, L);
7853 hlt();
7854 }
7855 bind(L);
7856 }
7857 #endif
7859 if (offset != 0)
7860 addptr(tmp, offset);
7862 return RegisterOrConstant(tmp);
7863 }
7866 // registers on entry:
7867 // - rax ('check' register): required MethodType
7868 // - rcx: method handle
7869 // - rdx, rsi, or ?: killable temp
7870 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
7871 Register temp_reg,
7872 Label& wrong_method_type) {
7873 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
7874 // compare method type against that of the receiver
7875 if (UseCompressedOops) {
7876 load_heap_oop(temp_reg, type_addr);
7877 cmpptr(mtype_reg, temp_reg);
7878 } else {
7879 cmpptr(mtype_reg, type_addr);
7880 }
7881 jcc(Assembler::notEqual, wrong_method_type);
7882 }
7885 // A method handle has a "vmslots" field which gives the size of its
7886 // argument list in JVM stack slots. This field is either located directly
7887 // in every method handle, or else is indirectly accessed through the
7888 // method handle's MethodType. This macro hides the distinction.
7889 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
7890 Register temp_reg) {
7891 assert_different_registers(vmslots_reg, mh_reg, temp_reg);
7892 // load mh.type.form.vmslots
7893 if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
7894 // hoist vmslots into every mh to avoid dependent load chain
7895 movl(vmslots_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
7896 } else {
7897 Register temp2_reg = vmslots_reg;
7898 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
7899 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
7900 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
7901 }
7902 }
7905 // registers on entry:
7906 // - rcx: method handle
7907 // - rdx: killable temp (interpreted only)
7908 // - rax: killable temp (compiled only)
7909 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
7910 assert(mh_reg == rcx, "caller must put MH object in rcx");
7911 assert_different_registers(mh_reg, temp_reg);
7913 // pick out the interpreted side of the handler
7914 // NOTE: vmentry is not an oop!
7915 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
7917 // off we go...
7918 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
7920 // for the various stubs which take control at this point,
7921 // see MethodHandles::generate_method_handle_stub
7922 }
7925 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
7926 int extra_slot_offset) {
7927 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
7928 int stackElementSize = Interpreter::stackElementSize;
7929 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
7930 #ifdef ASSERT
7931 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
7932 assert(offset1 - offset == stackElementSize, "correct arithmetic");
7933 #endif
7934 Register scale_reg = noreg;
7935 Address::ScaleFactor scale_factor = Address::no_scale;
7936 if (arg_slot.is_constant()) {
7937 offset += arg_slot.as_constant() * stackElementSize;
7938 } else {
7939 scale_reg = arg_slot.as_register();
7940 scale_factor = Address::times(stackElementSize);
7941 }
7942 offset += wordSize; // return PC is on stack
7943 return Address(rsp, scale_reg, scale_factor, offset);
7944 }
7947 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7948 if (!VerifyOops) return;
7950 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7951 // Pass register number to verify_oop_subroutine
7952 char* b = new char[strlen(s) + 50];
7953 sprintf(b, "verify_oop_addr: %s", s);
7955 #ifdef _LP64
7956 push(rscratch1); // save r10, trashed by movptr()
7957 #endif
7958 push(rax); // save rax,
7959 // addr may contain rsp so we will have to adjust it based on the push
7960 // we just did
7961 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7962 // stores rax into addr which is backwards of what was intended.
7963 if (addr.uses(rsp)) {
7964 lea(rax, addr);
7965 pushptr(Address(rax, BytesPerWord));
7966 } else {
7967 pushptr(addr);
7968 }
7970 ExternalAddress buffer((address) b);
7971 // pass msg argument
7972 // avoid using pushptr, as it modifies scratch registers
7973 // and our contract is not to modify anything
7974 movptr(rax, buffer.addr());
7975 push(rax);
7977 // call indirectly to solve generation ordering problem
7978 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7979 call(rax);
7980 // Caller pops the arguments (addr, message) and restores rax, r10.
7981 }
7983 void MacroAssembler::verify_tlab() {
7984 #ifdef ASSERT
7985 if (UseTLAB && VerifyOops) {
7986 Label next, ok;
7987 Register t1 = rsi;
7988 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7990 push(t1);
7991 NOT_LP64(push(thread_reg));
7992 NOT_LP64(get_thread(thread_reg));
7994 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7995 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7996 jcc(Assembler::aboveEqual, next);
7997 stop("assert(top >= start)");
7998 should_not_reach_here();
8000 bind(next);
8001 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
8002 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
8003 jcc(Assembler::aboveEqual, ok);
8004 stop("assert(top <= end)");
8005 should_not_reach_here();
8007 bind(ok);
8008 NOT_LP64(pop(thread_reg));
8009 pop(t1);
8010 }
8011 #endif
8012 }
8014 class ControlWord {
8015 public:
8016 int32_t _value;
8018 int rounding_control() const { return (_value >> 10) & 3 ; }
8019 int precision_control() const { return (_value >> 8) & 3 ; }
8020 bool precision() const { return ((_value >> 5) & 1) != 0; }
8021 bool underflow() const { return ((_value >> 4) & 1) != 0; }
8022 bool overflow() const { return ((_value >> 3) & 1) != 0; }
8023 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
8024 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
8025 bool invalid() const { return ((_value >> 0) & 1) != 0; }
8027 void print() const {
8028 // rounding control
8029 const char* rc;
8030 switch (rounding_control()) {
8031 case 0: rc = "round near"; break;
8032 case 1: rc = "round down"; break;
8033 case 2: rc = "round up "; break;
8034 case 3: rc = "chop "; break;
8035 };
8036 // precision control
8037 const char* pc;
8038 switch (precision_control()) {
8039 case 0: pc = "24 bits "; break;
8040 case 1: pc = "reserved"; break;
8041 case 2: pc = "53 bits "; break;
8042 case 3: pc = "64 bits "; break;
8043 };
8044 // flags
8045 char f[9];
8046 f[0] = ' ';
8047 f[1] = ' ';
8048 f[2] = (precision ()) ? 'P' : 'p';
8049 f[3] = (underflow ()) ? 'U' : 'u';
8050 f[4] = (overflow ()) ? 'O' : 'o';
8051 f[5] = (zero_divide ()) ? 'Z' : 'z';
8052 f[6] = (denormalized()) ? 'D' : 'd';
8053 f[7] = (invalid ()) ? 'I' : 'i';
8054 f[8] = '\x0';
8055 // output
8056 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
8057 }
8059 };
8061 class StatusWord {
8062 public:
8063 int32_t _value;
8065 bool busy() const { return ((_value >> 15) & 1) != 0; }
8066 bool C3() const { return ((_value >> 14) & 1) != 0; }
8067 bool C2() const { return ((_value >> 10) & 1) != 0; }
8068 bool C1() const { return ((_value >> 9) & 1) != 0; }
8069 bool C0() const { return ((_value >> 8) & 1) != 0; }
8070 int top() const { return (_value >> 11) & 7 ; }
8071 bool error_status() const { return ((_value >> 7) & 1) != 0; }
8072 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
8073 bool precision() const { return ((_value >> 5) & 1) != 0; }
8074 bool underflow() const { return ((_value >> 4) & 1) != 0; }
8075 bool overflow() const { return ((_value >> 3) & 1) != 0; }
8076 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
8077 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
8078 bool invalid() const { return ((_value >> 0) & 1) != 0; }
8080 void print() const {
8081 // condition codes
8082 char c[5];
8083 c[0] = (C3()) ? '3' : '-';
8084 c[1] = (C2()) ? '2' : '-';
8085 c[2] = (C1()) ? '1' : '-';
8086 c[3] = (C0()) ? '0' : '-';
8087 c[4] = '\x0';
8088 // flags
8089 char f[9];
8090 f[0] = (error_status()) ? 'E' : '-';
8091 f[1] = (stack_fault ()) ? 'S' : '-';
8092 f[2] = (precision ()) ? 'P' : '-';
8093 f[3] = (underflow ()) ? 'U' : '-';
8094 f[4] = (overflow ()) ? 'O' : '-';
8095 f[5] = (zero_divide ()) ? 'Z' : '-';
8096 f[6] = (denormalized()) ? 'D' : '-';
8097 f[7] = (invalid ()) ? 'I' : '-';
8098 f[8] = '\x0';
8099 // output
8100 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
8101 }
8103 };
8105 class TagWord {
8106 public:
8107 int32_t _value;
8109 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
8111 void print() const {
8112 printf("%04x", _value & 0xFFFF);
8113 }
8115 };
8117 class FPU_Register {
8118 public:
8119 int32_t _m0;
8120 int32_t _m1;
8121 int16_t _ex;
8123 bool is_indefinite() const {
8124 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
8125 }
8127 void print() const {
8128 char sign = (_ex < 0) ? '-' : '+';
8129 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
8130 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
8131 };
8133 };
8135 class FPU_State {
8136 public:
8137 enum {
8138 register_size = 10,
8139 number_of_registers = 8,
8140 register_mask = 7
8141 };
8143 ControlWord _control_word;
8144 StatusWord _status_word;
8145 TagWord _tag_word;
8146 int32_t _error_offset;
8147 int32_t _error_selector;
8148 int32_t _data_offset;
8149 int32_t _data_selector;
8150 int8_t _register[register_size * number_of_registers];
8152 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
8153 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
8155 const char* tag_as_string(int tag) const {
8156 switch (tag) {
8157 case 0: return "valid";
8158 case 1: return "zero";
8159 case 2: return "special";
8160 case 3: return "empty";
8161 }
8162 ShouldNotReachHere();
8163 return NULL;
8164 }
8166 void print() const {
8167 // print computation registers
8168 { int t = _status_word.top();
8169 for (int i = 0; i < number_of_registers; i++) {
8170 int j = (i - t) & register_mask;
8171 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
8172 st(j)->print();
8173 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
8174 }
8175 }
8176 printf("\n");
8177 // print control registers
8178 printf("ctrl = "); _control_word.print(); printf("\n");
8179 printf("stat = "); _status_word .print(); printf("\n");
8180 printf("tags = "); _tag_word .print(); printf("\n");
8181 }
8183 };
8185 class Flag_Register {
8186 public:
8187 int32_t _value;
8189 bool overflow() const { return ((_value >> 11) & 1) != 0; }
8190 bool direction() const { return ((_value >> 10) & 1) != 0; }
8191 bool sign() const { return ((_value >> 7) & 1) != 0; }
8192 bool zero() const { return ((_value >> 6) & 1) != 0; }
8193 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
8194 bool parity() const { return ((_value >> 2) & 1) != 0; }
8195 bool carry() const { return ((_value >> 0) & 1) != 0; }
8197 void print() const {
8198 // flags
8199 char f[8];
8200 f[0] = (overflow ()) ? 'O' : '-';
8201 f[1] = (direction ()) ? 'D' : '-';
8202 f[2] = (sign ()) ? 'S' : '-';
8203 f[3] = (zero ()) ? 'Z' : '-';
8204 f[4] = (auxiliary_carry()) ? 'A' : '-';
8205 f[5] = (parity ()) ? 'P' : '-';
8206 f[6] = (carry ()) ? 'C' : '-';
8207 f[7] = '\x0';
8208 // output
8209 printf("%08x flags = %s", _value, f);
8210 }
8212 };
8214 class IU_Register {
8215 public:
8216 int32_t _value;
8218 void print() const {
8219 printf("%08x %11d", _value, _value);
8220 }
8222 };
8224 class IU_State {
8225 public:
8226 Flag_Register _eflags;
8227 IU_Register _rdi;
8228 IU_Register _rsi;
8229 IU_Register _rbp;
8230 IU_Register _rsp;
8231 IU_Register _rbx;
8232 IU_Register _rdx;
8233 IU_Register _rcx;
8234 IU_Register _rax;
8236 void print() const {
8237 // computation registers
8238 printf("rax, = "); _rax.print(); printf("\n");
8239 printf("rbx, = "); _rbx.print(); printf("\n");
8240 printf("rcx = "); _rcx.print(); printf("\n");
8241 printf("rdx = "); _rdx.print(); printf("\n");
8242 printf("rdi = "); _rdi.print(); printf("\n");
8243 printf("rsi = "); _rsi.print(); printf("\n");
8244 printf("rbp, = "); _rbp.print(); printf("\n");
8245 printf("rsp = "); _rsp.print(); printf("\n");
8246 printf("\n");
8247 // control registers
8248 printf("flgs = "); _eflags.print(); printf("\n");
8249 }
8250 };
8253 class CPU_State {
8254 public:
8255 FPU_State _fpu_state;
8256 IU_State _iu_state;
8258 void print() const {
8259 printf("--------------------------------------------------\n");
8260 _iu_state .print();
8261 printf("\n");
8262 _fpu_state.print();
8263 printf("--------------------------------------------------\n");
8264 }
8266 };
8269 static void _print_CPU_state(CPU_State* state) {
8270 state->print();
8271 };
8274 void MacroAssembler::print_CPU_state() {
8275 push_CPU_state();
8276 push(rsp); // pass CPU state
8277 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
8278 addptr(rsp, wordSize); // discard argument
8279 pop_CPU_state();
8280 }
8283 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
8284 static int counter = 0;
8285 FPU_State* fs = &state->_fpu_state;
8286 counter++;
8287 // For leaf calls, only verify that the top few elements remain empty.
8288 // We only need 1 empty at the top for C2 code.
8289 if( stack_depth < 0 ) {
8290 if( fs->tag_for_st(7) != 3 ) {
8291 printf("FPR7 not empty\n");
8292 state->print();
8293 assert(false, "error");
8294 return false;
8295 }
8296 return true; // All other stack states do not matter
8297 }
8299 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
8300 "bad FPU control word");
8302 // compute stack depth
8303 int i = 0;
8304 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
8305 int d = i;
8306 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
8307 // verify findings
8308 if (i != FPU_State::number_of_registers) {
8309 // stack not contiguous
8310 printf("%s: stack not contiguous at ST%d\n", s, i);
8311 state->print();
8312 assert(false, "error");
8313 return false;
8314 }
8315 // check if computed stack depth corresponds to expected stack depth
8316 if (stack_depth < 0) {
8317 // expected stack depth is -stack_depth or less
8318 if (d > -stack_depth) {
8319 // too many elements on the stack
8320 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
8321 state->print();
8322 assert(false, "error");
8323 return false;
8324 }
8325 } else {
8326 // expected stack depth is stack_depth
8327 if (d != stack_depth) {
8328 // wrong stack depth
8329 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
8330 state->print();
8331 assert(false, "error");
8332 return false;
8333 }
8334 }
8335 // everything is cool
8336 return true;
8337 }
8340 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
8341 if (!VerifyFPU) return;
8342 push_CPU_state();
8343 push(rsp); // pass CPU state
8344 ExternalAddress msg((address) s);
8345 // pass message string s
8346 pushptr(msg.addr());
8347 push(stack_depth); // pass stack depth
8348 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
8349 addptr(rsp, 3 * wordSize); // discard arguments
8350 // check for error
8351 { Label L;
8352 testl(rax, rax);
8353 jcc(Assembler::notZero, L);
8354 int3(); // break if error condition
8355 bind(L);
8356 }
8357 pop_CPU_state();
8358 }
8360 void MacroAssembler::load_klass(Register dst, Register src) {
8361 #ifdef _LP64
8362 if (UseCompressedOops) {
8363 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8364 decode_heap_oop_not_null(dst);
8365 } else
8366 #endif
8367 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8368 }
8370 void MacroAssembler::load_prototype_header(Register dst, Register src) {
8371 #ifdef _LP64
8372 if (UseCompressedOops) {
8373 assert (Universe::heap() != NULL, "java heap should be initialized");
8374 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8375 if (Universe::narrow_oop_shift() != 0) {
8376 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8377 if (LogMinObjAlignmentInBytes == Address::times_8) {
8378 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8379 } else {
8380 // OK to use shift since we don't need to preserve flags.
8381 shlq(dst, LogMinObjAlignmentInBytes);
8382 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8383 }
8384 } else {
8385 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8386 }
8387 } else
8388 #endif
8389 {
8390 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8391 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8392 }
8393 }
8395 void MacroAssembler::store_klass(Register dst, Register src) {
8396 #ifdef _LP64
8397 if (UseCompressedOops) {
8398 encode_heap_oop_not_null(src);
8399 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8400 } else
8401 #endif
8402 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8403 }
8405 void MacroAssembler::load_heap_oop(Register dst, Address src) {
8406 #ifdef _LP64
8407 if (UseCompressedOops) {
8408 movl(dst, src);
8409 decode_heap_oop(dst);
8410 } else
8411 #endif
8412 movptr(dst, src);
8413 }
8415 void MacroAssembler::store_heap_oop(Address dst, Register src) {
8416 #ifdef _LP64
8417 if (UseCompressedOops) {
8418 assert(!dst.uses(src), "not enough registers");
8419 encode_heap_oop(src);
8420 movl(dst, src);
8421 } else
8422 #endif
8423 movptr(dst, src);
8424 }
8426 // Used for storing NULLs.
8427 void MacroAssembler::store_heap_oop_null(Address dst) {
8428 #ifdef _LP64
8429 if (UseCompressedOops) {
8430 movl(dst, (int32_t)NULL_WORD);
8431 } else {
8432 movslq(dst, (int32_t)NULL_WORD);
8433 }
8434 #else
8435 movl(dst, (int32_t)NULL_WORD);
8436 #endif
8437 }
8439 #ifdef _LP64
8440 void MacroAssembler::store_klass_gap(Register dst, Register src) {
8441 if (UseCompressedOops) {
8442 // Store to klass gap in destination
8443 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8444 }
8445 }
8447 #ifdef ASSERT
8448 void MacroAssembler::verify_heapbase(const char* msg) {
8449 assert (UseCompressedOops, "should be compressed");
8450 assert (Universe::heap() != NULL, "java heap should be initialized");
8451 if (CheckCompressedOops) {
8452 Label ok;
8453 push(rscratch1); // cmpptr trashes rscratch1
8454 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8455 jcc(Assembler::equal, ok);
8456 stop(msg);
8457 bind(ok);
8458 pop(rscratch1);
8459 }
8460 }
8461 #endif
8463 // Algorithm must match oop.inline.hpp encode_heap_oop.
8464 void MacroAssembler::encode_heap_oop(Register r) {
8465 #ifdef ASSERT
8466 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
8467 #endif
8468 verify_oop(r, "broken oop in encode_heap_oop");
8469 if (Universe::narrow_oop_base() == NULL) {
8470 if (Universe::narrow_oop_shift() != 0) {
8471 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8472 shrq(r, LogMinObjAlignmentInBytes);
8473 }
8474 return;
8475 }
8476 testq(r, r);
8477 cmovq(Assembler::equal, r, r12_heapbase);
8478 subq(r, r12_heapbase);
8479 shrq(r, LogMinObjAlignmentInBytes);
8480 }
8482 void MacroAssembler::encode_heap_oop_not_null(Register r) {
8483 #ifdef ASSERT
8484 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
8485 if (CheckCompressedOops) {
8486 Label ok;
8487 testq(r, r);
8488 jcc(Assembler::notEqual, ok);
8489 stop("null oop passed to encode_heap_oop_not_null");
8490 bind(ok);
8491 }
8492 #endif
8493 verify_oop(r, "broken oop in encode_heap_oop_not_null");
8494 if (Universe::narrow_oop_base() != NULL) {
8495 subq(r, r12_heapbase);
8496 }
8497 if (Universe::narrow_oop_shift() != 0) {
8498 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8499 shrq(r, LogMinObjAlignmentInBytes);
8500 }
8501 }
8503 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8504 #ifdef ASSERT
8505 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
8506 if (CheckCompressedOops) {
8507 Label ok;
8508 testq(src, src);
8509 jcc(Assembler::notEqual, ok);
8510 stop("null oop passed to encode_heap_oop_not_null2");
8511 bind(ok);
8512 }
8513 #endif
8514 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8515 if (dst != src) {
8516 movq(dst, src);
8517 }
8518 if (Universe::narrow_oop_base() != NULL) {
8519 subq(dst, r12_heapbase);
8520 }
8521 if (Universe::narrow_oop_shift() != 0) {
8522 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8523 shrq(dst, LogMinObjAlignmentInBytes);
8524 }
8525 }
8527 void MacroAssembler::decode_heap_oop(Register r) {
8528 #ifdef ASSERT
8529 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
8530 #endif
8531 if (Universe::narrow_oop_base() == NULL) {
8532 if (Universe::narrow_oop_shift() != 0) {
8533 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8534 shlq(r, LogMinObjAlignmentInBytes);
8535 }
8536 } else {
8537 Label done;
8538 shlq(r, LogMinObjAlignmentInBytes);
8539 jccb(Assembler::equal, done);
8540 addq(r, r12_heapbase);
8541 bind(done);
8542 }
8543 verify_oop(r, "broken oop in decode_heap_oop");
8544 }
8546 void MacroAssembler::decode_heap_oop_not_null(Register r) {
8547 // Note: it will change flags
8548 assert (UseCompressedOops, "should only be used for compressed headers");
8549 assert (Universe::heap() != NULL, "java heap should be initialized");
8550 // Cannot assert, unverified entry point counts instructions (see .ad file)
8551 // vtableStubs also counts instructions in pd_code_size_limit.
8552 // Also do not verify_oop as this is called by verify_oop.
8553 if (Universe::narrow_oop_shift() != 0) {
8554 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8555 shlq(r, LogMinObjAlignmentInBytes);
8556 if (Universe::narrow_oop_base() != NULL) {
8557 addq(r, r12_heapbase);
8558 }
8559 } else {
8560 assert (Universe::narrow_oop_base() == NULL, "sanity");
8561 }
8562 }
8564 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8565 // Note: it will change flags
8566 assert (UseCompressedOops, "should only be used for compressed headers");
8567 assert (Universe::heap() != NULL, "java heap should be initialized");
8568 // Cannot assert, unverified entry point counts instructions (see .ad file)
8569 // vtableStubs also counts instructions in pd_code_size_limit.
8570 // Also do not verify_oop as this is called by verify_oop.
8571 if (Universe::narrow_oop_shift() != 0) {
8572 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8573 if (LogMinObjAlignmentInBytes == Address::times_8) {
8574 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8575 } else {
8576 if (dst != src) {
8577 movq(dst, src);
8578 }
8579 shlq(dst, LogMinObjAlignmentInBytes);
8580 if (Universe::narrow_oop_base() != NULL) {
8581 addq(dst, r12_heapbase);
8582 }
8583 }
8584 } else {
8585 assert (Universe::narrow_oop_base() == NULL, "sanity");
8586 if (dst != src) {
8587 movq(dst, src);
8588 }
8589 }
8590 }
8592 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8593 assert (UseCompressedOops, "should only be used for compressed headers");
8594 assert (Universe::heap() != NULL, "java heap should be initialized");
8595 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8596 int oop_index = oop_recorder()->find_index(obj);
8597 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8598 mov_narrow_oop(dst, oop_index, rspec);
8599 }
8601 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8602 assert (UseCompressedOops, "should only be used for compressed headers");
8603 assert (Universe::heap() != NULL, "java heap should be initialized");
8604 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8605 int oop_index = oop_recorder()->find_index(obj);
8606 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8607 mov_narrow_oop(dst, oop_index, rspec);
8608 }
8610 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8611 assert (UseCompressedOops, "should only be used for compressed headers");
8612 assert (Universe::heap() != NULL, "java heap should be initialized");
8613 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8614 int oop_index = oop_recorder()->find_index(obj);
8615 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8616 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8617 }
8619 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8620 assert (UseCompressedOops, "should only be used for compressed headers");
8621 assert (Universe::heap() != NULL, "java heap should be initialized");
8622 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8623 int oop_index = oop_recorder()->find_index(obj);
8624 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8625 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8626 }
8628 void MacroAssembler::reinit_heapbase() {
8629 if (UseCompressedOops) {
8630 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8631 }
8632 }
8633 #endif // _LP64
8635 // IndexOf for constant substrings with size >= 8 chars
8636 // which don't need to be loaded through stack.
8637 void MacroAssembler::string_indexofC8(Register str1, Register str2,
8638 Register cnt1, Register cnt2,
8639 int int_cnt2, Register result,
8640 XMMRegister vec, Register tmp) {
8641 assert(UseSSE42Intrinsics, "SSE4.2 is required");
8643 // This method uses pcmpestri inxtruction with bound registers
8644 // inputs:
8645 // xmm - substring
8646 // rax - substring length (elements count)
8647 // mem - scanned string
8648 // rdx - string length (elements count)
8649 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8650 // outputs:
8651 // rcx - matched index in string
8652 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8654 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
8655 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
8656 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
8658 // Note, inline_string_indexOf() generates checks:
8659 // if (substr.count > string.count) return -1;
8660 // if (substr.count == 0) return 0;
8661 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
8663 // Load substring.
8664 movdqu(vec, Address(str2, 0));
8665 movl(cnt2, int_cnt2);
8666 movptr(result, str1); // string addr
8668 if (int_cnt2 > 8) {
8669 jmpb(SCAN_TO_SUBSTR);
8671 // Reload substr for rescan, this code
8672 // is executed only for large substrings (> 8 chars)
8673 bind(RELOAD_SUBSTR);
8674 movdqu(vec, Address(str2, 0));
8675 negptr(cnt2); // Jumped here with negative cnt2, convert to positive
8677 bind(RELOAD_STR);
8678 // We came here after the beginning of the substring was
8679 // matched but the rest of it was not so we need to search
8680 // again. Start from the next element after the previous match.
8682 // cnt2 is number of substring reminding elements and
8683 // cnt1 is number of string reminding elements when cmp failed.
8684 // Restored cnt1 = cnt1 - cnt2 + int_cnt2
8685 subl(cnt1, cnt2);
8686 addl(cnt1, int_cnt2);
8687 movl(cnt2, int_cnt2); // Now restore cnt2
8689 decrementl(cnt1); // Shift to next element
8690 cmpl(cnt1, cnt2);
8691 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8693 addptr(result, 2);
8695 } // (int_cnt2 > 8)
8697 // Scan string for start of substr in 16-byte vectors
8698 bind(SCAN_TO_SUBSTR);
8699 pcmpestri(vec, Address(result, 0), 0x0d);
8700 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
8701 subl(cnt1, 8);
8702 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
8703 cmpl(cnt1, cnt2);
8704 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8705 addptr(result, 16);
8706 jmpb(SCAN_TO_SUBSTR);
8708 // Found a potential substr
8709 bind(FOUND_CANDIDATE);
8710 // Matched whole vector if first element matched (tmp(rcx) == 0).
8711 if (int_cnt2 == 8) {
8712 jccb(Assembler::overflow, RET_FOUND); // OF == 1
8713 } else { // int_cnt2 > 8
8714 jccb(Assembler::overflow, FOUND_SUBSTR);
8715 }
8716 // After pcmpestri tmp(rcx) contains matched element index
8717 // Compute start addr of substr
8718 lea(result, Address(result, tmp, Address::times_2));
8720 // Make sure string is still long enough
8721 subl(cnt1, tmp);
8722 cmpl(cnt1, cnt2);
8723 if (int_cnt2 == 8) {
8724 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
8725 } else { // int_cnt2 > 8
8726 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
8727 }
8728 // Left less then substring.
8730 bind(RET_NOT_FOUND);
8731 movl(result, -1);
8732 jmpb(EXIT);
8734 if (int_cnt2 > 8) {
8735 // This code is optimized for the case when whole substring
8736 // is matched if its head is matched.
8737 bind(MATCH_SUBSTR_HEAD);
8738 pcmpestri(vec, Address(result, 0), 0x0d);
8739 // Reload only string if does not match
8740 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
8742 Label CONT_SCAN_SUBSTR;
8743 // Compare the rest of substring (> 8 chars).
8744 bind(FOUND_SUBSTR);
8745 // First 8 chars are already matched.
8746 negptr(cnt2);
8747 addptr(cnt2, 8);
8749 bind(SCAN_SUBSTR);
8750 subl(cnt1, 8);
8751 cmpl(cnt2, -8); // Do not read beyond substring
8752 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
8753 // Back-up strings to avoid reading beyond substring:
8754 // cnt1 = cnt1 - cnt2 + 8
8755 addl(cnt1, cnt2); // cnt2 is negative
8756 addl(cnt1, 8);
8757 movl(cnt2, 8); negptr(cnt2);
8758 bind(CONT_SCAN_SUBSTR);
8759 if (int_cnt2 < (int)G) {
8760 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
8761 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
8762 } else {
8763 // calculate index in register to avoid integer overflow (int_cnt2*2)
8764 movl(tmp, int_cnt2);
8765 addptr(tmp, cnt2);
8766 movdqu(vec, Address(str2, tmp, Address::times_2, 0));
8767 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
8768 }
8769 // Need to reload strings pointers if not matched whole vector
8770 jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
8771 addptr(cnt2, 8);
8772 jccb(Assembler::negative, SCAN_SUBSTR);
8773 // Fall through if found full substring
8775 } // (int_cnt2 > 8)
8777 bind(RET_FOUND);
8778 // Found result if we matched full small substring.
8779 // Compute substr offset
8780 subptr(result, str1);
8781 shrl(result, 1); // index
8782 bind(EXIT);
8784 } // string_indexofC8
8786 // Small strings are loaded through stack if they cross page boundary.
8787 void MacroAssembler::string_indexof(Register str1, Register str2,
8788 Register cnt1, Register cnt2,
8789 int int_cnt2, Register result,
8790 XMMRegister vec, Register tmp) {
8791 assert(UseSSE42Intrinsics, "SSE4.2 is required");
8792 //
8793 // int_cnt2 is length of small (< 8 chars) constant substring
8794 // or (-1) for non constant substring in which case its length
8795 // is in cnt2 register.
8796 //
8797 // Note, inline_string_indexOf() generates checks:
8798 // if (substr.count > string.count) return -1;
8799 // if (substr.count == 0) return 0;
8800 //
8801 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
8803 // This method uses pcmpestri inxtruction with bound registers
8804 // inputs:
8805 // xmm - substring
8806 // rax - substring length (elements count)
8807 // mem - scanned string
8808 // rdx - string length (elements count)
8809 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8810 // outputs:
8811 // rcx - matched index in string
8812 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8814 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
8815 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
8816 FOUND_CANDIDATE;
8818 { //========================================================
8819 // We don't know where these strings are located
8820 // and we can't read beyond them. Load them through stack.
8821 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
8823 movptr(tmp, rsp); // save old SP
8825 if (int_cnt2 > 0) { // small (< 8 chars) constant substring
8826 if (int_cnt2 == 1) { // One char
8827 load_unsigned_short(result, Address(str2, 0));
8828 movdl(vec, result); // move 32 bits
8829 } else if (int_cnt2 == 2) { // Two chars
8830 movdl(vec, Address(str2, 0)); // move 32 bits
8831 } else if (int_cnt2 == 4) { // Four chars
8832 movq(vec, Address(str2, 0)); // move 64 bits
8833 } else { // cnt2 = { 3, 5, 6, 7 }
8834 // Array header size is 12 bytes in 32-bit VM
8835 // + 6 bytes for 3 chars == 18 bytes,
8836 // enough space to load vec and shift.
8837 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
8838 movdqu(vec, Address(str2, (int_cnt2*2)-16));
8839 psrldq(vec, 16-(int_cnt2*2));
8840 }
8841 } else { // not constant substring
8842 cmpl(cnt2, 8);
8843 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
8845 // We can read beyond string if srt+16 does not cross page boundary
8846 // since heaps are aligned and mapped by pages.
8847 assert(os::vm_page_size() < (int)G, "default page should be small");
8848 movl(result, str2); // We need only low 32 bits
8849 andl(result, (os::vm_page_size()-1));
8850 cmpl(result, (os::vm_page_size()-16));
8851 jccb(Assembler::belowEqual, CHECK_STR);
8853 // Move small strings to stack to allow load 16 bytes into vec.
8854 subptr(rsp, 16);
8855 int stk_offset = wordSize-2;
8856 push(cnt2);
8858 bind(COPY_SUBSTR);
8859 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
8860 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
8861 decrement(cnt2);
8862 jccb(Assembler::notZero, COPY_SUBSTR);
8864 pop(cnt2);
8865 movptr(str2, rsp); // New substring address
8866 } // non constant
8868 bind(CHECK_STR);
8869 cmpl(cnt1, 8);
8870 jccb(Assembler::aboveEqual, BIG_STRINGS);
8872 // Check cross page boundary.
8873 movl(result, str1); // We need only low 32 bits
8874 andl(result, (os::vm_page_size()-1));
8875 cmpl(result, (os::vm_page_size()-16));
8876 jccb(Assembler::belowEqual, BIG_STRINGS);
8878 subptr(rsp, 16);
8879 int stk_offset = -2;
8880 if (int_cnt2 < 0) { // not constant
8881 push(cnt2);
8882 stk_offset += wordSize;
8883 }
8884 movl(cnt2, cnt1);
8886 bind(COPY_STR);
8887 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
8888 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
8889 decrement(cnt2);
8890 jccb(Assembler::notZero, COPY_STR);
8892 if (int_cnt2 < 0) { // not constant
8893 pop(cnt2);
8894 }
8895 movptr(str1, rsp); // New string address
8897 bind(BIG_STRINGS);
8898 // Load substring.
8899 if (int_cnt2 < 0) { // -1
8900 movdqu(vec, Address(str2, 0));
8901 push(cnt2); // substr count
8902 push(str2); // substr addr
8903 push(str1); // string addr
8904 } else {
8905 // Small (< 8 chars) constant substrings are loaded already.
8906 movl(cnt2, int_cnt2);
8907 }
8908 push(tmp); // original SP
8910 } // Finished loading
8912 //========================================================
8913 // Start search
8914 //
8916 movptr(result, str1); // string addr
8918 if (int_cnt2 < 0) { // Only for non constant substring
8919 jmpb(SCAN_TO_SUBSTR);
8921 // SP saved at sp+0
8922 // String saved at sp+1*wordSize
8923 // Substr saved at sp+2*wordSize
8924 // Substr count saved at sp+3*wordSize
8926 // Reload substr for rescan, this code
8927 // is executed only for large substrings (> 8 chars)
8928 bind(RELOAD_SUBSTR);
8929 movptr(str2, Address(rsp, 2*wordSize));
8930 movl(cnt2, Address(rsp, 3*wordSize));
8931 movdqu(vec, Address(str2, 0));
8932 // We came here after the beginning of the substring was
8933 // matched but the rest of it was not so we need to search
8934 // again. Start from the next element after the previous match.
8935 subptr(str1, result); // Restore counter
8936 shrl(str1, 1);
8937 addl(cnt1, str1);
8938 decrementl(cnt1); // Shift to next element
8939 cmpl(cnt1, cnt2);
8940 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8942 addptr(result, 2);
8943 } // non constant
8945 // Scan string for start of substr in 16-byte vectors
8946 bind(SCAN_TO_SUBSTR);
8947 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8948 pcmpestri(vec, Address(result, 0), 0x0d);
8949 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
8950 subl(cnt1, 8);
8951 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
8952 cmpl(cnt1, cnt2);
8953 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8954 addptr(result, 16);
8956 bind(ADJUST_STR);
8957 cmpl(cnt1, 8); // Do not read beyond string
8958 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
8959 // Back-up string to avoid reading beyond string.
8960 lea(result, Address(result, cnt1, Address::times_2, -16));
8961 movl(cnt1, 8);
8962 jmpb(SCAN_TO_SUBSTR);
8964 // Found a potential substr
8965 bind(FOUND_CANDIDATE);
8966 // After pcmpestri tmp(rcx) contains matched element index
8968 // Make sure string is still long enough
8969 subl(cnt1, tmp);
8970 cmpl(cnt1, cnt2);
8971 jccb(Assembler::greaterEqual, FOUND_SUBSTR);
8972 // Left less then substring.
8974 bind(RET_NOT_FOUND);
8975 movl(result, -1);
8976 jmpb(CLEANUP);
8978 bind(FOUND_SUBSTR);
8979 // Compute start addr of substr
8980 lea(result, Address(result, tmp, Address::times_2));
8982 if (int_cnt2 > 0) { // Constant substring
8983 // Repeat search for small substring (< 8 chars)
8984 // from new point without reloading substring.
8985 // Have to check that we don't read beyond string.
8986 cmpl(tmp, 8-int_cnt2);
8987 jccb(Assembler::greater, ADJUST_STR);
8988 // Fall through if matched whole substring.
8989 } else { // non constant
8990 assert(int_cnt2 == -1, "should be != 0");
8992 addl(tmp, cnt2);
8993 // Found result if we matched whole substring.
8994 cmpl(tmp, 8);
8995 jccb(Assembler::lessEqual, RET_FOUND);
8997 // Repeat search for small substring (<= 8 chars)
8998 // from new point 'str1' without reloading substring.
8999 cmpl(cnt2, 8);
9000 // Have to check that we don't read beyond string.
9001 jccb(Assembler::lessEqual, ADJUST_STR);
9003 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
9004 // Compare the rest of substring (> 8 chars).
9005 movptr(str1, result);
9007 cmpl(tmp, cnt2);
9008 // First 8 chars are already matched.
9009 jccb(Assembler::equal, CHECK_NEXT);
9011 bind(SCAN_SUBSTR);
9012 pcmpestri(vec, Address(str1, 0), 0x0d);
9013 // Need to reload strings pointers if not matched whole vector
9014 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
9016 bind(CHECK_NEXT);
9017 subl(cnt2, 8);
9018 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
9019 addptr(str1, 16);
9020 addptr(str2, 16);
9021 subl(cnt1, 8);
9022 cmpl(cnt2, 8); // Do not read beyond substring
9023 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
9024 // Back-up strings to avoid reading beyond substring.
9025 lea(str2, Address(str2, cnt2, Address::times_2, -16));
9026 lea(str1, Address(str1, cnt2, Address::times_2, -16));
9027 subl(cnt1, cnt2);
9028 movl(cnt2, 8);
9029 addl(cnt1, 8);
9030 bind(CONT_SCAN_SUBSTR);
9031 movdqu(vec, Address(str2, 0));
9032 jmpb(SCAN_SUBSTR);
9034 bind(RET_FOUND_LONG);
9035 movptr(str1, Address(rsp, wordSize));
9036 } // non constant
9038 bind(RET_FOUND);
9039 // Compute substr offset
9040 subptr(result, str1);
9041 shrl(result, 1); // index
9043 bind(CLEANUP);
9044 pop(rsp); // restore SP
9046 } // string_indexof
9048 // Compare strings.
9049 void MacroAssembler::string_compare(Register str1, Register str2,
9050 Register cnt1, Register cnt2, Register result,
9051 XMMRegister vec1) {
9052 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
9054 // Compute the minimum of the string lengths and the
9055 // difference of the string lengths (stack).
9056 // Do the conditional move stuff
9057 movl(result, cnt1);
9058 subl(cnt1, cnt2);
9059 push(cnt1);
9060 if (VM_Version::supports_cmov()) {
9061 cmovl(Assembler::lessEqual, cnt2, result);
9062 } else {
9063 Label GT_LABEL;
9064 jccb(Assembler::greater, GT_LABEL);
9065 movl(cnt2, result);
9066 bind(GT_LABEL);
9067 }
9069 // Is the minimum length zero?
9070 testl(cnt2, cnt2);
9071 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
9073 // Load first characters
9074 load_unsigned_short(result, Address(str1, 0));
9075 load_unsigned_short(cnt1, Address(str2, 0));
9077 // Compare first characters
9078 subl(result, cnt1);
9079 jcc(Assembler::notZero, POP_LABEL);
9080 decrementl(cnt2);
9081 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
9083 {
9084 // Check after comparing first character to see if strings are equivalent
9085 Label LSkip2;
9086 // Check if the strings start at same location
9087 cmpptr(str1, str2);
9088 jccb(Assembler::notEqual, LSkip2);
9090 // Check if the length difference is zero (from stack)
9091 cmpl(Address(rsp, 0), 0x0);
9092 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
9094 // Strings might not be equivalent
9095 bind(LSkip2);
9096 }
9098 Address::ScaleFactor scale = Address::times_2;
9099 int stride = 8;
9101 // Advance to next element
9102 addptr(str1, 16/stride);
9103 addptr(str2, 16/stride);
9105 if (UseSSE42Intrinsics) {
9106 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
9107 int pcmpmask = 0x19;
9108 // Setup to compare 16-byte vectors
9109 movl(result, cnt2);
9110 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
9111 jccb(Assembler::zero, COMPARE_TAIL);
9113 lea(str1, Address(str1, result, scale));
9114 lea(str2, Address(str2, result, scale));
9115 negptr(result);
9117 // pcmpestri
9118 // inputs:
9119 // vec1- substring
9120 // rax - negative string length (elements count)
9121 // mem - scaned string
9122 // rdx - string length (elements count)
9123 // pcmpmask - cmp mode: 11000 (string compare with negated result)
9124 // + 00 (unsigned bytes) or + 01 (unsigned shorts)
9125 // outputs:
9126 // rcx - first mismatched element index
9127 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
9129 bind(COMPARE_WIDE_VECTORS);
9130 movdqu(vec1, Address(str1, result, scale));
9131 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
9132 // After pcmpestri cnt1(rcx) contains mismatched element index
9134 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
9135 addptr(result, stride);
9136 subptr(cnt2, stride);
9137 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
9139 // compare wide vectors tail
9140 testl(result, result);
9141 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
9143 movl(cnt2, stride);
9144 movl(result, stride);
9145 negptr(result);
9146 movdqu(vec1, Address(str1, result, scale));
9147 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
9148 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
9150 // Mismatched characters in the vectors
9151 bind(VECTOR_NOT_EQUAL);
9152 addptr(result, cnt1);
9153 movptr(cnt2, result);
9154 load_unsigned_short(result, Address(str1, cnt2, scale));
9155 load_unsigned_short(cnt1, Address(str2, cnt2, scale));
9156 subl(result, cnt1);
9157 jmpb(POP_LABEL);
9159 bind(COMPARE_TAIL); // limit is zero
9160 movl(cnt2, result);
9161 // Fallthru to tail compare
9162 }
9164 // Shift str2 and str1 to the end of the arrays, negate min
9165 lea(str1, Address(str1, cnt2, scale, 0));
9166 lea(str2, Address(str2, cnt2, scale, 0));
9167 negptr(cnt2);
9169 // Compare the rest of the elements
9170 bind(WHILE_HEAD_LABEL);
9171 load_unsigned_short(result, Address(str1, cnt2, scale, 0));
9172 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
9173 subl(result, cnt1);
9174 jccb(Assembler::notZero, POP_LABEL);
9175 increment(cnt2);
9176 jccb(Assembler::notZero, WHILE_HEAD_LABEL);
9178 // Strings are equal up to min length. Return the length difference.
9179 bind(LENGTH_DIFF_LABEL);
9180 pop(result);
9181 jmpb(DONE_LABEL);
9183 // Discard the stored length difference
9184 bind(POP_LABEL);
9185 pop(cnt1);
9187 // That's it
9188 bind(DONE_LABEL);
9189 }
9191 // Compare char[] arrays aligned to 4 bytes or substrings.
9192 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
9193 Register limit, Register result, Register chr,
9194 XMMRegister vec1, XMMRegister vec2) {
9195 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
9197 int length_offset = arrayOopDesc::length_offset_in_bytes();
9198 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
9200 // Check the input args
9201 cmpptr(ary1, ary2);
9202 jcc(Assembler::equal, TRUE_LABEL);
9204 if (is_array_equ) {
9205 // Need additional checks for arrays_equals.
9206 testptr(ary1, ary1);
9207 jcc(Assembler::zero, FALSE_LABEL);
9208 testptr(ary2, ary2);
9209 jcc(Assembler::zero, FALSE_LABEL);
9211 // Check the lengths
9212 movl(limit, Address(ary1, length_offset));
9213 cmpl(limit, Address(ary2, length_offset));
9214 jcc(Assembler::notEqual, FALSE_LABEL);
9215 }
9217 // count == 0
9218 testl(limit, limit);
9219 jcc(Assembler::zero, TRUE_LABEL);
9221 if (is_array_equ) {
9222 // Load array address
9223 lea(ary1, Address(ary1, base_offset));
9224 lea(ary2, Address(ary2, base_offset));
9225 }
9227 shll(limit, 1); // byte count != 0
9228 movl(result, limit); // copy
9230 if (UseSSE42Intrinsics) {
9231 // With SSE4.2, use double quad vector compare
9232 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
9234 // Compare 16-byte vectors
9235 andl(result, 0x0000000e); // tail count (in bytes)
9236 andl(limit, 0xfffffff0); // vector count (in bytes)
9237 jccb(Assembler::zero, COMPARE_TAIL);
9239 lea(ary1, Address(ary1, limit, Address::times_1));
9240 lea(ary2, Address(ary2, limit, Address::times_1));
9241 negptr(limit);
9243 bind(COMPARE_WIDE_VECTORS);
9244 movdqu(vec1, Address(ary1, limit, Address::times_1));
9245 movdqu(vec2, Address(ary2, limit, Address::times_1));
9246 pxor(vec1, vec2);
9248 ptest(vec1, vec1);
9249 jccb(Assembler::notZero, FALSE_LABEL);
9250 addptr(limit, 16);
9251 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
9253 testl(result, result);
9254 jccb(Assembler::zero, TRUE_LABEL);
9256 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
9257 movdqu(vec2, Address(ary2, result, Address::times_1, -16));
9258 pxor(vec1, vec2);
9260 ptest(vec1, vec1);
9261 jccb(Assembler::notZero, FALSE_LABEL);
9262 jmpb(TRUE_LABEL);
9264 bind(COMPARE_TAIL); // limit is zero
9265 movl(limit, result);
9266 // Fallthru to tail compare
9267 }
9269 // Compare 4-byte vectors
9270 andl(limit, 0xfffffffc); // vector count (in bytes)
9271 jccb(Assembler::zero, COMPARE_CHAR);
9273 lea(ary1, Address(ary1, limit, Address::times_1));
9274 lea(ary2, Address(ary2, limit, Address::times_1));
9275 negptr(limit);
9277 bind(COMPARE_VECTORS);
9278 movl(chr, Address(ary1, limit, Address::times_1));
9279 cmpl(chr, Address(ary2, limit, Address::times_1));
9280 jccb(Assembler::notEqual, FALSE_LABEL);
9281 addptr(limit, 4);
9282 jcc(Assembler::notZero, COMPARE_VECTORS);
9284 // Compare trailing char (final 2 bytes), if any
9285 bind(COMPARE_CHAR);
9286 testl(result, 0x2); // tail char
9287 jccb(Assembler::zero, TRUE_LABEL);
9288 load_unsigned_short(chr, Address(ary1, 0));
9289 load_unsigned_short(limit, Address(ary2, 0));
9290 cmpl(chr, limit);
9291 jccb(Assembler::notEqual, FALSE_LABEL);
9293 bind(TRUE_LABEL);
9294 movl(result, 1); // return true
9295 jmpb(DONE);
9297 bind(FALSE_LABEL);
9298 xorl(result, result); // return false
9300 // That's it
9301 bind(DONE);
9302 }
9304 #ifdef PRODUCT
9305 #define BLOCK_COMMENT(str) /* nothing */
9306 #else
9307 #define BLOCK_COMMENT(str) block_comment(str)
9308 #endif
9310 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
9311 void MacroAssembler::generate_fill(BasicType t, bool aligned,
9312 Register to, Register value, Register count,
9313 Register rtmp, XMMRegister xtmp) {
9314 assert_different_registers(to, value, count, rtmp);
9315 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
9316 Label L_fill_2_bytes, L_fill_4_bytes;
9318 int shift = -1;
9319 switch (t) {
9320 case T_BYTE:
9321 shift = 2;
9322 break;
9323 case T_SHORT:
9324 shift = 1;
9325 break;
9326 case T_INT:
9327 shift = 0;
9328 break;
9329 default: ShouldNotReachHere();
9330 }
9332 if (t == T_BYTE) {
9333 andl(value, 0xff);
9334 movl(rtmp, value);
9335 shll(rtmp, 8);
9336 orl(value, rtmp);
9337 }
9338 if (t == T_SHORT) {
9339 andl(value, 0xffff);
9340 }
9341 if (t == T_BYTE || t == T_SHORT) {
9342 movl(rtmp, value);
9343 shll(rtmp, 16);
9344 orl(value, rtmp);
9345 }
9347 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
9348 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
9349 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
9350 // align source address at 4 bytes address boundary
9351 if (t == T_BYTE) {
9352 // One byte misalignment happens only for byte arrays
9353 testptr(to, 1);
9354 jccb(Assembler::zero, L_skip_align1);
9355 movb(Address(to, 0), value);
9356 increment(to);
9357 decrement(count);
9358 BIND(L_skip_align1);
9359 }
9360 // Two bytes misalignment happens only for byte and short (char) arrays
9361 testptr(to, 2);
9362 jccb(Assembler::zero, L_skip_align2);
9363 movw(Address(to, 0), value);
9364 addptr(to, 2);
9365 subl(count, 1<<(shift-1));
9366 BIND(L_skip_align2);
9367 }
9368 if (UseSSE < 2) {
9369 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
9370 // Fill 32-byte chunks
9371 subl(count, 8 << shift);
9372 jcc(Assembler::less, L_check_fill_8_bytes);
9373 align(16);
9375 BIND(L_fill_32_bytes_loop);
9377 for (int i = 0; i < 32; i += 4) {
9378 movl(Address(to, i), value);
9379 }
9381 addptr(to, 32);
9382 subl(count, 8 << shift);
9383 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
9384 BIND(L_check_fill_8_bytes);
9385 addl(count, 8 << shift);
9386 jccb(Assembler::zero, L_exit);
9387 jmpb(L_fill_8_bytes);
9389 //
9390 // length is too short, just fill qwords
9391 //
9392 BIND(L_fill_8_bytes_loop);
9393 movl(Address(to, 0), value);
9394 movl(Address(to, 4), value);
9395 addptr(to, 8);
9396 BIND(L_fill_8_bytes);
9397 subl(count, 1 << (shift + 1));
9398 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
9399 // fall through to fill 4 bytes
9400 } else {
9401 Label L_fill_32_bytes;
9402 if (!UseUnalignedLoadStores) {
9403 // align to 8 bytes, we know we are 4 byte aligned to start
9404 testptr(to, 4);
9405 jccb(Assembler::zero, L_fill_32_bytes);
9406 movl(Address(to, 0), value);
9407 addptr(to, 4);
9408 subl(count, 1<<shift);
9409 }
9410 BIND(L_fill_32_bytes);
9411 {
9412 assert( UseSSE >= 2, "supported cpu only" );
9413 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
9414 // Fill 32-byte chunks
9415 movdl(xtmp, value);
9416 pshufd(xtmp, xtmp, 0);
9418 subl(count, 8 << shift);
9419 jcc(Assembler::less, L_check_fill_8_bytes);
9420 align(16);
9422 BIND(L_fill_32_bytes_loop);
9424 if (UseUnalignedLoadStores) {
9425 movdqu(Address(to, 0), xtmp);
9426 movdqu(Address(to, 16), xtmp);
9427 } else {
9428 movq(Address(to, 0), xtmp);
9429 movq(Address(to, 8), xtmp);
9430 movq(Address(to, 16), xtmp);
9431 movq(Address(to, 24), xtmp);
9432 }
9434 addptr(to, 32);
9435 subl(count, 8 << shift);
9436 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
9437 BIND(L_check_fill_8_bytes);
9438 addl(count, 8 << shift);
9439 jccb(Assembler::zero, L_exit);
9440 jmpb(L_fill_8_bytes);
9442 //
9443 // length is too short, just fill qwords
9444 //
9445 BIND(L_fill_8_bytes_loop);
9446 movq(Address(to, 0), xtmp);
9447 addptr(to, 8);
9448 BIND(L_fill_8_bytes);
9449 subl(count, 1 << (shift + 1));
9450 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
9451 }
9452 }
9453 // fill trailing 4 bytes
9454 BIND(L_fill_4_bytes);
9455 testl(count, 1<<shift);
9456 jccb(Assembler::zero, L_fill_2_bytes);
9457 movl(Address(to, 0), value);
9458 if (t == T_BYTE || t == T_SHORT) {
9459 addptr(to, 4);
9460 BIND(L_fill_2_bytes);
9461 // fill trailing 2 bytes
9462 testl(count, 1<<(shift-1));
9463 jccb(Assembler::zero, L_fill_byte);
9464 movw(Address(to, 0), value);
9465 if (t == T_BYTE) {
9466 addptr(to, 2);
9467 BIND(L_fill_byte);
9468 // fill trailing byte
9469 testl(count, 1);
9470 jccb(Assembler::zero, L_exit);
9471 movb(Address(to, 0), value);
9472 } else {
9473 BIND(L_fill_byte);
9474 }
9475 } else {
9476 BIND(L_fill_2_bytes);
9477 }
9478 BIND(L_exit);
9479 }
9480 #undef BIND
9481 #undef BLOCK_COMMENT
9484 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
9485 switch (cond) {
9486 // Note some conditions are synonyms for others
9487 case Assembler::zero: return Assembler::notZero;
9488 case Assembler::notZero: return Assembler::zero;
9489 case Assembler::less: return Assembler::greaterEqual;
9490 case Assembler::lessEqual: return Assembler::greater;
9491 case Assembler::greater: return Assembler::lessEqual;
9492 case Assembler::greaterEqual: return Assembler::less;
9493 case Assembler::below: return Assembler::aboveEqual;
9494 case Assembler::belowEqual: return Assembler::above;
9495 case Assembler::above: return Assembler::belowEqual;
9496 case Assembler::aboveEqual: return Assembler::below;
9497 case Assembler::overflow: return Assembler::noOverflow;
9498 case Assembler::noOverflow: return Assembler::overflow;
9499 case Assembler::negative: return Assembler::positive;
9500 case Assembler::positive: return Assembler::negative;
9501 case Assembler::parity: return Assembler::noParity;
9502 case Assembler::noParity: return Assembler::parity;
9503 }
9504 ShouldNotReachHere(); return Assembler::overflow;
9505 }
9507 SkipIfEqual::SkipIfEqual(
9508 MacroAssembler* masm, const bool* flag_addr, bool value) {
9509 _masm = masm;
9510 _masm->cmp8(ExternalAddress((address)flag_addr), value);
9511 _masm->jcc(Assembler::equal, _label);
9512 }
9514 SkipIfEqual::~SkipIfEqual() {
9515 _masm->bind(_label);
9516 }