src/cpu/x86/vm/assembler_x86.cpp

Thu, 07 Apr 2011 09:53:20 -0700

author
johnc
date
Thu, 07 Apr 2011 09:53:20 -0700
changeset 2781
e1162778c1c8
parent 2639
8033953d67ff
child 2784
92add02409c9
permissions
-rw-r--r--

7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes

     1 /*
     2  * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "assembler_x86.inline.hpp"
    27 #include "gc_interface/collectedHeap.inline.hpp"
    28 #include "interpreter/interpreter.hpp"
    29 #include "memory/cardTableModRefBS.hpp"
    30 #include "memory/resourceArea.hpp"
    31 #include "prims/methodHandles.hpp"
    32 #include "runtime/biasedLocking.hpp"
    33 #include "runtime/interfaceSupport.hpp"
    34 #include "runtime/objectMonitor.hpp"
    35 #include "runtime/os.hpp"
    36 #include "runtime/sharedRuntime.hpp"
    37 #include "runtime/stubRoutines.hpp"
    38 #ifndef SERIALGC
    39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
    41 #include "gc_implementation/g1/heapRegion.hpp"
    42 #endif
    44 // Implementation of AddressLiteral
    46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
    47   _is_lval = false;
    48   _target = target;
    49   switch (rtype) {
    50   case relocInfo::oop_type:
    51     // Oops are a special case. Normally they would be their own section
    52     // but in cases like icBuffer they are literals in the code stream that
    53     // we don't have a section for. We use none so that we get a literal address
    54     // which is always patchable.
    55     break;
    56   case relocInfo::external_word_type:
    57     _rspec = external_word_Relocation::spec(target);
    58     break;
    59   case relocInfo::internal_word_type:
    60     _rspec = internal_word_Relocation::spec(target);
    61     break;
    62   case relocInfo::opt_virtual_call_type:
    63     _rspec = opt_virtual_call_Relocation::spec();
    64     break;
    65   case relocInfo::static_call_type:
    66     _rspec = static_call_Relocation::spec();
    67     break;
    68   case relocInfo::runtime_call_type:
    69     _rspec = runtime_call_Relocation::spec();
    70     break;
    71   case relocInfo::poll_type:
    72   case relocInfo::poll_return_type:
    73     _rspec = Relocation::spec_simple(rtype);
    74     break;
    75   case relocInfo::none:
    76     break;
    77   default:
    78     ShouldNotReachHere();
    79     break;
    80   }
    81 }
    83 // Implementation of Address
    85 #ifdef _LP64
    87 Address Address::make_array(ArrayAddress adr) {
    88   // Not implementable on 64bit machines
    89   // Should have been handled higher up the call chain.
    90   ShouldNotReachHere();
    91   return Address();
    92 }
    94 // exceedingly dangerous constructor
    95 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
    96   _base  = noreg;
    97   _index = noreg;
    98   _scale = no_scale;
    99   _disp  = disp;
   100   switch (rtype) {
   101     case relocInfo::external_word_type:
   102       _rspec = external_word_Relocation::spec(loc);
   103       break;
   104     case relocInfo::internal_word_type:
   105       _rspec = internal_word_Relocation::spec(loc);
   106       break;
   107     case relocInfo::runtime_call_type:
   108       // HMM
   109       _rspec = runtime_call_Relocation::spec();
   110       break;
   111     case relocInfo::poll_type:
   112     case relocInfo::poll_return_type:
   113       _rspec = Relocation::spec_simple(rtype);
   114       break;
   115     case relocInfo::none:
   116       break;
   117     default:
   118       ShouldNotReachHere();
   119   }
   120 }
   121 #else // LP64
   123 Address Address::make_array(ArrayAddress adr) {
   124   AddressLiteral base = adr.base();
   125   Address index = adr.index();
   126   assert(index._disp == 0, "must not have disp"); // maybe it can?
   127   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
   128   array._rspec = base._rspec;
   129   return array;
   130 }
   132 // exceedingly dangerous constructor
   133 Address::Address(address loc, RelocationHolder spec) {
   134   _base  = noreg;
   135   _index = noreg;
   136   _scale = no_scale;
   137   _disp  = (intptr_t) loc;
   138   _rspec = spec;
   139 }
   141 #endif // _LP64
   145 // Convert the raw encoding form into the form expected by the constructor for
   146 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
   147 // that to noreg for the Address constructor.
   148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
   149   RelocationHolder rspec;
   150   if (disp_is_oop) {
   151     rspec = Relocation::spec_simple(relocInfo::oop_type);
   152   }
   153   bool valid_index = index != rsp->encoding();
   154   if (valid_index) {
   155     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
   156     madr._rspec = rspec;
   157     return madr;
   158   } else {
   159     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
   160     madr._rspec = rspec;
   161     return madr;
   162   }
   163 }
   165 // Implementation of Assembler
   167 int AbstractAssembler::code_fill_byte() {
   168   return (u_char)'\xF4'; // hlt
   169 }
   171 // make this go away someday
   172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
   173   if (rtype == relocInfo::none)
   174         emit_long(data);
   175   else  emit_data(data, Relocation::spec_simple(rtype), format);
   176 }
   178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
   179   assert(imm_operand == 0, "default format must be immediate in this file");
   180   assert(inst_mark() != NULL, "must be inside InstructionMark");
   181   if (rspec.type() !=  relocInfo::none) {
   182     #ifdef ASSERT
   183       check_relocation(rspec, format);
   184     #endif
   185     // Do not use AbstractAssembler::relocate, which is not intended for
   186     // embedded words.  Instead, relocate to the enclosing instruction.
   188     // hack. call32 is too wide for mask so use disp32
   189     if (format == call32_operand)
   190       code_section()->relocate(inst_mark(), rspec, disp32_operand);
   191     else
   192       code_section()->relocate(inst_mark(), rspec, format);
   193   }
   194   emit_long(data);
   195 }
   197 static int encode(Register r) {
   198   int enc = r->encoding();
   199   if (enc >= 8) {
   200     enc -= 8;
   201   }
   202   return enc;
   203 }
   205 static int encode(XMMRegister r) {
   206   int enc = r->encoding();
   207   if (enc >= 8) {
   208     enc -= 8;
   209   }
   210   return enc;
   211 }
   213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
   214   assert(dst->has_byte_register(), "must have byte register");
   215   assert(isByte(op1) && isByte(op2), "wrong opcode");
   216   assert(isByte(imm8), "not a byte");
   217   assert((op1 & 0x01) == 0, "should be 8bit operation");
   218   emit_byte(op1);
   219   emit_byte(op2 | encode(dst));
   220   emit_byte(imm8);
   221 }
   224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
   225   assert(isByte(op1) && isByte(op2), "wrong opcode");
   226   assert((op1 & 0x01) == 1, "should be 32bit operation");
   227   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   228   if (is8bit(imm32)) {
   229     emit_byte(op1 | 0x02); // set sign bit
   230     emit_byte(op2 | encode(dst));
   231     emit_byte(imm32 & 0xFF);
   232   } else {
   233     emit_byte(op1);
   234     emit_byte(op2 | encode(dst));
   235     emit_long(imm32);
   236   }
   237 }
   239 // immediate-to-memory forms
   240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
   241   assert((op1 & 0x01) == 1, "should be 32bit operation");
   242   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   243   if (is8bit(imm32)) {
   244     emit_byte(op1 | 0x02); // set sign bit
   245     emit_operand(rm, adr, 1);
   246     emit_byte(imm32 & 0xFF);
   247   } else {
   248     emit_byte(op1);
   249     emit_operand(rm, adr, 4);
   250     emit_long(imm32);
   251   }
   252 }
   254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
   255   LP64_ONLY(ShouldNotReachHere());
   256   assert(isByte(op1) && isByte(op2), "wrong opcode");
   257   assert((op1 & 0x01) == 1, "should be 32bit operation");
   258   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   259   InstructionMark im(this);
   260   emit_byte(op1);
   261   emit_byte(op2 | encode(dst));
   262   emit_data((intptr_t)obj, relocInfo::oop_type, 0);
   263 }
   266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
   267   assert(isByte(op1) && isByte(op2), "wrong opcode");
   268   emit_byte(op1);
   269   emit_byte(op2 | encode(dst) << 3 | encode(src));
   270 }
   273 void Assembler::emit_operand(Register reg, Register base, Register index,
   274                              Address::ScaleFactor scale, int disp,
   275                              RelocationHolder const& rspec,
   276                              int rip_relative_correction) {
   277   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
   279   // Encode the registers as needed in the fields they are used in
   281   int regenc = encode(reg) << 3;
   282   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
   283   int baseenc = base->is_valid() ? encode(base) : 0;
   285   if (base->is_valid()) {
   286     if (index->is_valid()) {
   287       assert(scale != Address::no_scale, "inconsistent address");
   288       // [base + index*scale + disp]
   289       if (disp == 0 && rtype == relocInfo::none  &&
   290           base != rbp LP64_ONLY(&& base != r13)) {
   291         // [base + index*scale]
   292         // [00 reg 100][ss index base]
   293         assert(index != rsp, "illegal addressing mode");
   294         emit_byte(0x04 | regenc);
   295         emit_byte(scale << 6 | indexenc | baseenc);
   296       } else if (is8bit(disp) && rtype == relocInfo::none) {
   297         // [base + index*scale + imm8]
   298         // [01 reg 100][ss index base] imm8
   299         assert(index != rsp, "illegal addressing mode");
   300         emit_byte(0x44 | regenc);
   301         emit_byte(scale << 6 | indexenc | baseenc);
   302         emit_byte(disp & 0xFF);
   303       } else {
   304         // [base + index*scale + disp32]
   305         // [10 reg 100][ss index base] disp32
   306         assert(index != rsp, "illegal addressing mode");
   307         emit_byte(0x84 | regenc);
   308         emit_byte(scale << 6 | indexenc | baseenc);
   309         emit_data(disp, rspec, disp32_operand);
   310       }
   311     } else if (base == rsp LP64_ONLY(|| base == r12)) {
   312       // [rsp + disp]
   313       if (disp == 0 && rtype == relocInfo::none) {
   314         // [rsp]
   315         // [00 reg 100][00 100 100]
   316         emit_byte(0x04 | regenc);
   317         emit_byte(0x24);
   318       } else if (is8bit(disp) && rtype == relocInfo::none) {
   319         // [rsp + imm8]
   320         // [01 reg 100][00 100 100] disp8
   321         emit_byte(0x44 | regenc);
   322         emit_byte(0x24);
   323         emit_byte(disp & 0xFF);
   324       } else {
   325         // [rsp + imm32]
   326         // [10 reg 100][00 100 100] disp32
   327         emit_byte(0x84 | regenc);
   328         emit_byte(0x24);
   329         emit_data(disp, rspec, disp32_operand);
   330       }
   331     } else {
   332       // [base + disp]
   333       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
   334       if (disp == 0 && rtype == relocInfo::none &&
   335           base != rbp LP64_ONLY(&& base != r13)) {
   336         // [base]
   337         // [00 reg base]
   338         emit_byte(0x00 | regenc | baseenc);
   339       } else if (is8bit(disp) && rtype == relocInfo::none) {
   340         // [base + disp8]
   341         // [01 reg base] disp8
   342         emit_byte(0x40 | regenc | baseenc);
   343         emit_byte(disp & 0xFF);
   344       } else {
   345         // [base + disp32]
   346         // [10 reg base] disp32
   347         emit_byte(0x80 | regenc | baseenc);
   348         emit_data(disp, rspec, disp32_operand);
   349       }
   350     }
   351   } else {
   352     if (index->is_valid()) {
   353       assert(scale != Address::no_scale, "inconsistent address");
   354       // [index*scale + disp]
   355       // [00 reg 100][ss index 101] disp32
   356       assert(index != rsp, "illegal addressing mode");
   357       emit_byte(0x04 | regenc);
   358       emit_byte(scale << 6 | indexenc | 0x05);
   359       emit_data(disp, rspec, disp32_operand);
   360     } else if (rtype != relocInfo::none ) {
   361       // [disp] (64bit) RIP-RELATIVE (32bit) abs
   362       // [00 000 101] disp32
   364       emit_byte(0x05 | regenc);
   365       // Note that the RIP-rel. correction applies to the generated
   366       // disp field, but _not_ to the target address in the rspec.
   368       // disp was created by converting the target address minus the pc
   369       // at the start of the instruction. That needs more correction here.
   370       // intptr_t disp = target - next_ip;
   371       assert(inst_mark() != NULL, "must be inside InstructionMark");
   372       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
   373       int64_t adjusted = disp;
   374       // Do rip-rel adjustment for 64bit
   375       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
   376       assert(is_simm32(adjusted),
   377              "must be 32bit offset (RIP relative address)");
   378       emit_data((int32_t) adjusted, rspec, disp32_operand);
   380     } else {
   381       // 32bit never did this, did everything as the rip-rel/disp code above
   382       // [disp] ABSOLUTE
   383       // [00 reg 100][00 100 101] disp32
   384       emit_byte(0x04 | regenc);
   385       emit_byte(0x25);
   386       emit_data(disp, rspec, disp32_operand);
   387     }
   388   }
   389 }
   391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
   392                              Address::ScaleFactor scale, int disp,
   393                              RelocationHolder const& rspec) {
   394   emit_operand((Register)reg, base, index, scale, disp, rspec);
   395 }
   397 // Secret local extension to Assembler::WhichOperand:
   398 #define end_pc_operand (_WhichOperand_limit)
   400 address Assembler::locate_operand(address inst, WhichOperand which) {
   401   // Decode the given instruction, and return the address of
   402   // an embedded 32-bit operand word.
   404   // If "which" is disp32_operand, selects the displacement portion
   405   // of an effective address specifier.
   406   // If "which" is imm64_operand, selects the trailing immediate constant.
   407   // If "which" is call32_operand, selects the displacement of a call or jump.
   408   // Caller is responsible for ensuring that there is such an operand,
   409   // and that it is 32/64 bits wide.
   411   // If "which" is end_pc_operand, find the end of the instruction.
   413   address ip = inst;
   414   bool is_64bit = false;
   416   debug_only(bool has_disp32 = false);
   417   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
   419   again_after_prefix:
   420   switch (0xFF & *ip++) {
   422   // These convenience macros generate groups of "case" labels for the switch.
   423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
   424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
   425              case (x)+4: case (x)+5: case (x)+6: case (x)+7
   426 #define REP16(x) REP8((x)+0): \
   427               case REP8((x)+8)
   429   case CS_segment:
   430   case SS_segment:
   431   case DS_segment:
   432   case ES_segment:
   433   case FS_segment:
   434   case GS_segment:
   435     // Seems dubious
   436     LP64_ONLY(assert(false, "shouldn't have that prefix"));
   437     assert(ip == inst+1, "only one prefix allowed");
   438     goto again_after_prefix;
   440   case 0x67:
   441   case REX:
   442   case REX_B:
   443   case REX_X:
   444   case REX_XB:
   445   case REX_R:
   446   case REX_RB:
   447   case REX_RX:
   448   case REX_RXB:
   449     NOT_LP64(assert(false, "64bit prefixes"));
   450     goto again_after_prefix;
   452   case REX_W:
   453   case REX_WB:
   454   case REX_WX:
   455   case REX_WXB:
   456   case REX_WR:
   457   case REX_WRB:
   458   case REX_WRX:
   459   case REX_WRXB:
   460     NOT_LP64(assert(false, "64bit prefixes"));
   461     is_64bit = true;
   462     goto again_after_prefix;
   464   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
   465   case 0x88: // movb a, r
   466   case 0x89: // movl a, r
   467   case 0x8A: // movb r, a
   468   case 0x8B: // movl r, a
   469   case 0x8F: // popl a
   470     debug_only(has_disp32 = true);
   471     break;
   473   case 0x68: // pushq #32
   474     if (which == end_pc_operand) {
   475       return ip + 4;
   476     }
   477     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
   478     return ip;                  // not produced by emit_operand
   480   case 0x66: // movw ... (size prefix)
   481     again_after_size_prefix2:
   482     switch (0xFF & *ip++) {
   483     case REX:
   484     case REX_B:
   485     case REX_X:
   486     case REX_XB:
   487     case REX_R:
   488     case REX_RB:
   489     case REX_RX:
   490     case REX_RXB:
   491     case REX_W:
   492     case REX_WB:
   493     case REX_WX:
   494     case REX_WXB:
   495     case REX_WR:
   496     case REX_WRB:
   497     case REX_WRX:
   498     case REX_WRXB:
   499       NOT_LP64(assert(false, "64bit prefix found"));
   500       goto again_after_size_prefix2;
   501     case 0x8B: // movw r, a
   502     case 0x89: // movw a, r
   503       debug_only(has_disp32 = true);
   504       break;
   505     case 0xC7: // movw a, #16
   506       debug_only(has_disp32 = true);
   507       tail_size = 2;  // the imm16
   508       break;
   509     case 0x0F: // several SSE/SSE2 variants
   510       ip--;    // reparse the 0x0F
   511       goto again_after_prefix;
   512     default:
   513       ShouldNotReachHere();
   514     }
   515     break;
   517   case REP8(0xB8): // movl/q r, #32/#64(oop?)
   518     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
   519     // these asserts are somewhat nonsensical
   520 #ifndef _LP64
   521     assert(which == imm_operand || which == disp32_operand, "");
   522 #else
   523     assert((which == call32_operand || which == imm_operand) && is_64bit ||
   524            which == narrow_oop_operand && !is_64bit, "");
   525 #endif // _LP64
   526     return ip;
   528   case 0x69: // imul r, a, #32
   529   case 0xC7: // movl a, #32(oop?)
   530     tail_size = 4;
   531     debug_only(has_disp32 = true); // has both kinds of operands!
   532     break;
   534   case 0x0F: // movx..., etc.
   535     switch (0xFF & *ip++) {
   536     case 0x12: // movlps
   537     case 0x28: // movaps
   538     case 0x2E: // ucomiss
   539     case 0x2F: // comiss
   540     case 0x54: // andps
   541     case 0x55: // andnps
   542     case 0x56: // orps
   543     case 0x57: // xorps
   544     case 0x6E: // movd
   545     case 0x7E: // movd
   546     case 0xAE: // ldmxcsr   a
   547       // 64bit side says it these have both operands but that doesn't
   548       // appear to be true
   549       debug_only(has_disp32 = true);
   550       break;
   552     case 0xAD: // shrd r, a, %cl
   553     case 0xAF: // imul r, a
   554     case 0xBE: // movsbl r, a (movsxb)
   555     case 0xBF: // movswl r, a (movsxw)
   556     case 0xB6: // movzbl r, a (movzxb)
   557     case 0xB7: // movzwl r, a (movzxw)
   558     case REP16(0x40): // cmovl cc, r, a
   559     case 0xB0: // cmpxchgb
   560     case 0xB1: // cmpxchg
   561     case 0xC1: // xaddl
   562     case 0xC7: // cmpxchg8
   563     case REP16(0x90): // setcc a
   564       debug_only(has_disp32 = true);
   565       // fall out of the switch to decode the address
   566       break;
   568     case 0xAC: // shrd r, a, #8
   569       debug_only(has_disp32 = true);
   570       tail_size = 1;  // the imm8
   571       break;
   573     case REP16(0x80): // jcc rdisp32
   574       if (which == end_pc_operand)  return ip + 4;
   575       assert(which == call32_operand, "jcc has no disp32 or imm");
   576       return ip;
   577     default:
   578       ShouldNotReachHere();
   579     }
   580     break;
   582   case 0x81: // addl a, #32; addl r, #32
   583     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
   584     // on 32bit in the case of cmpl, the imm might be an oop
   585     tail_size = 4;
   586     debug_only(has_disp32 = true); // has both kinds of operands!
   587     break;
   589   case 0x83: // addl a, #8; addl r, #8
   590     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
   591     debug_only(has_disp32 = true); // has both kinds of operands!
   592     tail_size = 1;
   593     break;
   595   case 0x9B:
   596     switch (0xFF & *ip++) {
   597     case 0xD9: // fnstcw a
   598       debug_only(has_disp32 = true);
   599       break;
   600     default:
   601       ShouldNotReachHere();
   602     }
   603     break;
   605   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
   606   case REP4(0x10): // adc...
   607   case REP4(0x20): // and...
   608   case REP4(0x30): // xor...
   609   case REP4(0x08): // or...
   610   case REP4(0x18): // sbb...
   611   case REP4(0x28): // sub...
   612   case 0xF7: // mull a
   613   case 0x8D: // lea r, a
   614   case 0x87: // xchg r, a
   615   case REP4(0x38): // cmp...
   616   case 0x85: // test r, a
   617     debug_only(has_disp32 = true); // has both kinds of operands!
   618     break;
   620   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
   621   case 0xC6: // movb a, #8
   622   case 0x80: // cmpb a, #8
   623   case 0x6B: // imul r, a, #8
   624     debug_only(has_disp32 = true); // has both kinds of operands!
   625     tail_size = 1; // the imm8
   626     break;
   628   case 0xE8: // call rdisp32
   629   case 0xE9: // jmp  rdisp32
   630     if (which == end_pc_operand)  return ip + 4;
   631     assert(which == call32_operand, "call has no disp32 or imm");
   632     return ip;
   634   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
   635   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
   636   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
   637   case 0xDD: // fld_d a; fst_d a; fstp_d a
   638   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
   639   case 0xDF: // fild_d a; fistp_d a
   640   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
   641   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
   642   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
   643     debug_only(has_disp32 = true);
   644     break;
   646   case 0xF0:                    // Lock
   647     assert(os::is_MP(), "only on MP");
   648     goto again_after_prefix;
   650   case 0xF3:                    // For SSE
   651   case 0xF2:                    // For SSE2
   652     switch (0xFF & *ip++) {
   653     case REX:
   654     case REX_B:
   655     case REX_X:
   656     case REX_XB:
   657     case REX_R:
   658     case REX_RB:
   659     case REX_RX:
   660     case REX_RXB:
   661     case REX_W:
   662     case REX_WB:
   663     case REX_WX:
   664     case REX_WXB:
   665     case REX_WR:
   666     case REX_WRB:
   667     case REX_WRX:
   668     case REX_WRXB:
   669       NOT_LP64(assert(false, "found 64bit prefix"));
   670       ip++;
   671     default:
   672       ip++;
   673     }
   674     debug_only(has_disp32 = true); // has both kinds of operands!
   675     break;
   677   default:
   678     ShouldNotReachHere();
   680 #undef REP8
   681 #undef REP16
   682   }
   684   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
   685 #ifdef _LP64
   686   assert(which != imm_operand, "instruction is not a movq reg, imm64");
   687 #else
   688   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
   689   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
   690 #endif // LP64
   691   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
   693   // parse the output of emit_operand
   694   int op2 = 0xFF & *ip++;
   695   int base = op2 & 0x07;
   696   int op3 = -1;
   697   const int b100 = 4;
   698   const int b101 = 5;
   699   if (base == b100 && (op2 >> 6) != 3) {
   700     op3 = 0xFF & *ip++;
   701     base = op3 & 0x07;   // refetch the base
   702   }
   703   // now ip points at the disp (if any)
   705   switch (op2 >> 6) {
   706   case 0:
   707     // [00 reg  100][ss index base]
   708     // [00 reg  100][00   100  esp]
   709     // [00 reg base]
   710     // [00 reg  100][ss index  101][disp32]
   711     // [00 reg  101]               [disp32]
   713     if (base == b101) {
   714       if (which == disp32_operand)
   715         return ip;              // caller wants the disp32
   716       ip += 4;                  // skip the disp32
   717     }
   718     break;
   720   case 1:
   721     // [01 reg  100][ss index base][disp8]
   722     // [01 reg  100][00   100  esp][disp8]
   723     // [01 reg base]               [disp8]
   724     ip += 1;                    // skip the disp8
   725     break;
   727   case 2:
   728     // [10 reg  100][ss index base][disp32]
   729     // [10 reg  100][00   100  esp][disp32]
   730     // [10 reg base]               [disp32]
   731     if (which == disp32_operand)
   732       return ip;                // caller wants the disp32
   733     ip += 4;                    // skip the disp32
   734     break;
   736   case 3:
   737     // [11 reg base]  (not a memory addressing mode)
   738     break;
   739   }
   741   if (which == end_pc_operand) {
   742     return ip + tail_size;
   743   }
   745 #ifdef _LP64
   746   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
   747 #else
   748   assert(which == imm_operand, "instruction has only an imm field");
   749 #endif // LP64
   750   return ip;
   751 }
   753 address Assembler::locate_next_instruction(address inst) {
   754   // Secretly share code with locate_operand:
   755   return locate_operand(inst, end_pc_operand);
   756 }
   759 #ifdef ASSERT
   760 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
   761   address inst = inst_mark();
   762   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
   763   address opnd;
   765   Relocation* r = rspec.reloc();
   766   if (r->type() == relocInfo::none) {
   767     return;
   768   } else if (r->is_call() || format == call32_operand) {
   769     // assert(format == imm32_operand, "cannot specify a nonzero format");
   770     opnd = locate_operand(inst, call32_operand);
   771   } else if (r->is_data()) {
   772     assert(format == imm_operand || format == disp32_operand
   773            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
   774     opnd = locate_operand(inst, (WhichOperand)format);
   775   } else {
   776     assert(format == imm_operand, "cannot specify a format");
   777     return;
   778   }
   779   assert(opnd == pc(), "must put operand where relocs can find it");
   780 }
   781 #endif // ASSERT
   783 void Assembler::emit_operand32(Register reg, Address adr) {
   784   assert(reg->encoding() < 8, "no extended registers");
   785   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
   786   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
   787                adr._rspec);
   788 }
   790 void Assembler::emit_operand(Register reg, Address adr,
   791                              int rip_relative_correction) {
   792   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
   793                adr._rspec,
   794                rip_relative_correction);
   795 }
   797 void Assembler::emit_operand(XMMRegister reg, Address adr) {
   798   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
   799                adr._rspec);
   800 }
   802 // MMX operations
   803 void Assembler::emit_operand(MMXRegister reg, Address adr) {
   804   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
   805   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
   806 }
   808 // work around gcc (3.2.1-7a) bug
   809 void Assembler::emit_operand(Address adr, MMXRegister reg) {
   810   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
   811   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
   812 }
   815 void Assembler::emit_farith(int b1, int b2, int i) {
   816   assert(isByte(b1) && isByte(b2), "wrong opcode");
   817   assert(0 <= i &&  i < 8, "illegal stack offset");
   818   emit_byte(b1);
   819   emit_byte(b2 + i);
   820 }
   823 // Now the Assembler instructions (identical for 32/64 bits)
   825 void Assembler::adcl(Address dst, int32_t imm32) {
   826   InstructionMark im(this);
   827   prefix(dst);
   828   emit_arith_operand(0x81, rdx, dst, imm32);
   829 }
   831 void Assembler::adcl(Address dst, Register src) {
   832   InstructionMark im(this);
   833   prefix(dst, src);
   834   emit_byte(0x11);
   835   emit_operand(src, dst);
   836 }
   838 void Assembler::adcl(Register dst, int32_t imm32) {
   839   prefix(dst);
   840   emit_arith(0x81, 0xD0, dst, imm32);
   841 }
   843 void Assembler::adcl(Register dst, Address src) {
   844   InstructionMark im(this);
   845   prefix(src, dst);
   846   emit_byte(0x13);
   847   emit_operand(dst, src);
   848 }
   850 void Assembler::adcl(Register dst, Register src) {
   851   (void) prefix_and_encode(dst->encoding(), src->encoding());
   852   emit_arith(0x13, 0xC0, dst, src);
   853 }
   855 void Assembler::addl(Address dst, int32_t imm32) {
   856   InstructionMark im(this);
   857   prefix(dst);
   858   emit_arith_operand(0x81, rax, dst, imm32);
   859 }
   861 void Assembler::addl(Address dst, Register src) {
   862   InstructionMark im(this);
   863   prefix(dst, src);
   864   emit_byte(0x01);
   865   emit_operand(src, dst);
   866 }
   868 void Assembler::addl(Register dst, int32_t imm32) {
   869   prefix(dst);
   870   emit_arith(0x81, 0xC0, dst, imm32);
   871 }
   873 void Assembler::addl(Register dst, Address src) {
   874   InstructionMark im(this);
   875   prefix(src, dst);
   876   emit_byte(0x03);
   877   emit_operand(dst, src);
   878 }
   880 void Assembler::addl(Register dst, Register src) {
   881   (void) prefix_and_encode(dst->encoding(), src->encoding());
   882   emit_arith(0x03, 0xC0, dst, src);
   883 }
   885 void Assembler::addr_nop_4() {
   886   // 4 bytes: NOP DWORD PTR [EAX+0]
   887   emit_byte(0x0F);
   888   emit_byte(0x1F);
   889   emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
   890   emit_byte(0);    // 8-bits offset (1 byte)
   891 }
   893 void Assembler::addr_nop_5() {
   894   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
   895   emit_byte(0x0F);
   896   emit_byte(0x1F);
   897   emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
   898   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
   899   emit_byte(0);    // 8-bits offset (1 byte)
   900 }
   902 void Assembler::addr_nop_7() {
   903   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
   904   emit_byte(0x0F);
   905   emit_byte(0x1F);
   906   emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
   907   emit_long(0);    // 32-bits offset (4 bytes)
   908 }
   910 void Assembler::addr_nop_8() {
   911   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
   912   emit_byte(0x0F);
   913   emit_byte(0x1F);
   914   emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
   915   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
   916   emit_long(0);    // 32-bits offset (4 bytes)
   917 }
   919 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   920   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   921   emit_byte(0xF2);
   922   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   923   emit_byte(0x0F);
   924   emit_byte(0x58);
   925   emit_byte(0xC0 | encode);
   926 }
   928 void Assembler::addsd(XMMRegister dst, Address src) {
   929   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   930   InstructionMark im(this);
   931   emit_byte(0xF2);
   932   prefix(src, dst);
   933   emit_byte(0x0F);
   934   emit_byte(0x58);
   935   emit_operand(dst, src);
   936 }
   938 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   939   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   940   emit_byte(0xF3);
   941   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   942   emit_byte(0x0F);
   943   emit_byte(0x58);
   944   emit_byte(0xC0 | encode);
   945 }
   947 void Assembler::addss(XMMRegister dst, Address src) {
   948   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   949   InstructionMark im(this);
   950   emit_byte(0xF3);
   951   prefix(src, dst);
   952   emit_byte(0x0F);
   953   emit_byte(0x58);
   954   emit_operand(dst, src);
   955 }
   957 void Assembler::andl(Register dst, int32_t imm32) {
   958   prefix(dst);
   959   emit_arith(0x81, 0xE0, dst, imm32);
   960 }
   962 void Assembler::andl(Register dst, Address src) {
   963   InstructionMark im(this);
   964   prefix(src, dst);
   965   emit_byte(0x23);
   966   emit_operand(dst, src);
   967 }
   969 void Assembler::andl(Register dst, Register src) {
   970   (void) prefix_and_encode(dst->encoding(), src->encoding());
   971   emit_arith(0x23, 0xC0, dst, src);
   972 }
   974 void Assembler::andpd(XMMRegister dst, Address src) {
   975   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   976   InstructionMark im(this);
   977   emit_byte(0x66);
   978   prefix(src, dst);
   979   emit_byte(0x0F);
   980   emit_byte(0x54);
   981   emit_operand(dst, src);
   982 }
   984 void Assembler::bsfl(Register dst, Register src) {
   985   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   986   emit_byte(0x0F);
   987   emit_byte(0xBC);
   988   emit_byte(0xC0 | encode);
   989 }
   991 void Assembler::bsrl(Register dst, Register src) {
   992   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   993   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   994   emit_byte(0x0F);
   995   emit_byte(0xBD);
   996   emit_byte(0xC0 | encode);
   997 }
   999 void Assembler::bswapl(Register reg) { // bswap
  1000   int encode = prefix_and_encode(reg->encoding());
  1001   emit_byte(0x0F);
  1002   emit_byte(0xC8 | encode);
  1005 void Assembler::call(Label& L, relocInfo::relocType rtype) {
  1006   // suspect disp32 is always good
  1007   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
  1009   if (L.is_bound()) {
  1010     const int long_size = 5;
  1011     int offs = (int)( target(L) - pc() );
  1012     assert(offs <= 0, "assembler error");
  1013     InstructionMark im(this);
  1014     // 1110 1000 #32-bit disp
  1015     emit_byte(0xE8);
  1016     emit_data(offs - long_size, rtype, operand);
  1017   } else {
  1018     InstructionMark im(this);
  1019     // 1110 1000 #32-bit disp
  1020     L.add_patch_at(code(), locator());
  1022     emit_byte(0xE8);
  1023     emit_data(int(0), rtype, operand);
  1027 void Assembler::call(Register dst) {
  1028   // This was originally using a 32bit register encoding
  1029   // and surely we want 64bit!
  1030   // this is a 32bit encoding but in 64bit mode the default
  1031   // operand size is 64bit so there is no need for the
  1032   // wide prefix. So prefix only happens if we use the
  1033   // new registers. Much like push/pop.
  1034   int x = offset();
  1035   // this may be true but dbx disassembles it as if it
  1036   // were 32bits...
  1037   // int encode = prefix_and_encode(dst->encoding());
  1038   // if (offset() != x) assert(dst->encoding() >= 8, "what?");
  1039   int encode = prefixq_and_encode(dst->encoding());
  1041   emit_byte(0xFF);
  1042   emit_byte(0xD0 | encode);
  1046 void Assembler::call(Address adr) {
  1047   InstructionMark im(this);
  1048   prefix(adr);
  1049   emit_byte(0xFF);
  1050   emit_operand(rdx, adr);
  1053 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
  1054   assert(entry != NULL, "call most probably wrong");
  1055   InstructionMark im(this);
  1056   emit_byte(0xE8);
  1057   intptr_t disp = entry - (_code_pos + sizeof(int32_t));
  1058   assert(is_simm32(disp), "must be 32bit offset (call2)");
  1059   // Technically, should use call32_operand, but this format is
  1060   // implied by the fact that we're emitting a call instruction.
  1062   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
  1063   emit_data((int) disp, rspec, operand);
  1066 void Assembler::cdql() {
  1067   emit_byte(0x99);
  1070 void Assembler::cmovl(Condition cc, Register dst, Register src) {
  1071   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
  1072   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1073   emit_byte(0x0F);
  1074   emit_byte(0x40 | cc);
  1075   emit_byte(0xC0 | encode);
  1079 void Assembler::cmovl(Condition cc, Register dst, Address src) {
  1080   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
  1081   prefix(src, dst);
  1082   emit_byte(0x0F);
  1083   emit_byte(0x40 | cc);
  1084   emit_operand(dst, src);
  1087 void Assembler::cmpb(Address dst, int imm8) {
  1088   InstructionMark im(this);
  1089   prefix(dst);
  1090   emit_byte(0x80);
  1091   emit_operand(rdi, dst, 1);
  1092   emit_byte(imm8);
  1095 void Assembler::cmpl(Address dst, int32_t imm32) {
  1096   InstructionMark im(this);
  1097   prefix(dst);
  1098   emit_byte(0x81);
  1099   emit_operand(rdi, dst, 4);
  1100   emit_long(imm32);
  1103 void Assembler::cmpl(Register dst, int32_t imm32) {
  1104   prefix(dst);
  1105   emit_arith(0x81, 0xF8, dst, imm32);
  1108 void Assembler::cmpl(Register dst, Register src) {
  1109   (void) prefix_and_encode(dst->encoding(), src->encoding());
  1110   emit_arith(0x3B, 0xC0, dst, src);
  1114 void Assembler::cmpl(Register dst, Address  src) {
  1115   InstructionMark im(this);
  1116   prefix(src, dst);
  1117   emit_byte(0x3B);
  1118   emit_operand(dst, src);
  1121 void Assembler::cmpw(Address dst, int imm16) {
  1122   InstructionMark im(this);
  1123   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
  1124   emit_byte(0x66);
  1125   emit_byte(0x81);
  1126   emit_operand(rdi, dst, 2);
  1127   emit_word(imm16);
  1130 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
  1131 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
  1132 // The ZF is set if the compared values were equal, and cleared otherwise.
  1133 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
  1134   if (Atomics & 2) {
  1135      // caveat: no instructionmark, so this isn't relocatable.
  1136      // Emit a synthetic, non-atomic, CAS equivalent.
  1137      // Beware.  The synthetic form sets all ICCs, not just ZF.
  1138      // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
  1139      cmpl(rax, adr);
  1140      movl(rax, adr);
  1141      if (reg != rax) {
  1142         Label L ;
  1143         jcc(Assembler::notEqual, L);
  1144         movl(adr, reg);
  1145         bind(L);
  1147   } else {
  1148      InstructionMark im(this);
  1149      prefix(adr, reg);
  1150      emit_byte(0x0F);
  1151      emit_byte(0xB1);
  1152      emit_operand(reg, adr);
  1156 void Assembler::comisd(XMMRegister dst, Address src) {
  1157   // NOTE: dbx seems to decode this as comiss even though the
  1158   // 0x66 is there. Strangly ucomisd comes out correct
  1159   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1160   emit_byte(0x66);
  1161   comiss(dst, src);
  1164 void Assembler::comiss(XMMRegister dst, Address src) {
  1165   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1167   InstructionMark im(this);
  1168   prefix(src, dst);
  1169   emit_byte(0x0F);
  1170   emit_byte(0x2F);
  1171   emit_operand(dst, src);
  1174 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
  1175   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1176   emit_byte(0xF3);
  1177   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1178   emit_byte(0x0F);
  1179   emit_byte(0xE6);
  1180   emit_byte(0xC0 | encode);
  1183 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
  1184   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1185   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1186   emit_byte(0x0F);
  1187   emit_byte(0x5B);
  1188   emit_byte(0xC0 | encode);
  1191 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
  1192   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1193   emit_byte(0xF2);
  1194   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1195   emit_byte(0x0F);
  1196   emit_byte(0x5A);
  1197   emit_byte(0xC0 | encode);
  1200 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
  1201   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1202   emit_byte(0xF2);
  1203   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1204   emit_byte(0x0F);
  1205   emit_byte(0x2A);
  1206   emit_byte(0xC0 | encode);
  1209 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
  1210   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1211   emit_byte(0xF3);
  1212   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1213   emit_byte(0x0F);
  1214   emit_byte(0x2A);
  1215   emit_byte(0xC0 | encode);
  1218 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
  1219   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1220   emit_byte(0xF3);
  1221   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1222   emit_byte(0x0F);
  1223   emit_byte(0x5A);
  1224   emit_byte(0xC0 | encode);
  1227 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
  1228   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1229   emit_byte(0xF2);
  1230   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1231   emit_byte(0x0F);
  1232   emit_byte(0x2C);
  1233   emit_byte(0xC0 | encode);
  1236 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
  1237   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1238   emit_byte(0xF3);
  1239   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1240   emit_byte(0x0F);
  1241   emit_byte(0x2C);
  1242   emit_byte(0xC0 | encode);
  1245 void Assembler::decl(Address dst) {
  1246   // Don't use it directly. Use MacroAssembler::decrement() instead.
  1247   InstructionMark im(this);
  1248   prefix(dst);
  1249   emit_byte(0xFF);
  1250   emit_operand(rcx, dst);
  1253 void Assembler::divsd(XMMRegister dst, Address src) {
  1254   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1255   InstructionMark im(this);
  1256   emit_byte(0xF2);
  1257   prefix(src, dst);
  1258   emit_byte(0x0F);
  1259   emit_byte(0x5E);
  1260   emit_operand(dst, src);
  1263 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
  1264   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1265   emit_byte(0xF2);
  1266   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1267   emit_byte(0x0F);
  1268   emit_byte(0x5E);
  1269   emit_byte(0xC0 | encode);
  1272 void Assembler::divss(XMMRegister dst, Address src) {
  1273   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1274   InstructionMark im(this);
  1275   emit_byte(0xF3);
  1276   prefix(src, dst);
  1277   emit_byte(0x0F);
  1278   emit_byte(0x5E);
  1279   emit_operand(dst, src);
  1282 void Assembler::divss(XMMRegister dst, XMMRegister src) {
  1283   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1284   emit_byte(0xF3);
  1285   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1286   emit_byte(0x0F);
  1287   emit_byte(0x5E);
  1288   emit_byte(0xC0 | encode);
  1291 void Assembler::emms() {
  1292   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
  1293   emit_byte(0x0F);
  1294   emit_byte(0x77);
  1297 void Assembler::hlt() {
  1298   emit_byte(0xF4);
  1301 void Assembler::idivl(Register src) {
  1302   int encode = prefix_and_encode(src->encoding());
  1303   emit_byte(0xF7);
  1304   emit_byte(0xF8 | encode);
  1307 void Assembler::divl(Register src) { // Unsigned
  1308   int encode = prefix_and_encode(src->encoding());
  1309   emit_byte(0xF7);
  1310   emit_byte(0xF0 | encode);
  1313 void Assembler::imull(Register dst, Register src) {
  1314   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1315   emit_byte(0x0F);
  1316   emit_byte(0xAF);
  1317   emit_byte(0xC0 | encode);
  1321 void Assembler::imull(Register dst, Register src, int value) {
  1322   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1323   if (is8bit(value)) {
  1324     emit_byte(0x6B);
  1325     emit_byte(0xC0 | encode);
  1326     emit_byte(value & 0xFF);
  1327   } else {
  1328     emit_byte(0x69);
  1329     emit_byte(0xC0 | encode);
  1330     emit_long(value);
  1334 void Assembler::incl(Address dst) {
  1335   // Don't use it directly. Use MacroAssembler::increment() instead.
  1336   InstructionMark im(this);
  1337   prefix(dst);
  1338   emit_byte(0xFF);
  1339   emit_operand(rax, dst);
  1342 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
  1343   InstructionMark im(this);
  1344   relocate(rtype);
  1345   assert((0 <= cc) && (cc < 16), "illegal cc");
  1346   if (L.is_bound()) {
  1347     address dst = target(L);
  1348     assert(dst != NULL, "jcc most probably wrong");
  1350     const int short_size = 2;
  1351     const int long_size = 6;
  1352     intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
  1353     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
  1354       // 0111 tttn #8-bit disp
  1355       emit_byte(0x70 | cc);
  1356       emit_byte((offs - short_size) & 0xFF);
  1357     } else {
  1358       // 0000 1111 1000 tttn #32-bit disp
  1359       assert(is_simm32(offs - long_size),
  1360              "must be 32bit offset (call4)");
  1361       emit_byte(0x0F);
  1362       emit_byte(0x80 | cc);
  1363       emit_long(offs - long_size);
  1365   } else {
  1366     // Note: could eliminate cond. jumps to this jump if condition
  1367     //       is the same however, seems to be rather unlikely case.
  1368     // Note: use jccb() if label to be bound is very close to get
  1369     //       an 8-bit displacement
  1370     L.add_patch_at(code(), locator());
  1371     emit_byte(0x0F);
  1372     emit_byte(0x80 | cc);
  1373     emit_long(0);
  1377 void Assembler::jccb(Condition cc, Label& L) {
  1378   if (L.is_bound()) {
  1379     const int short_size = 2;
  1380     address entry = target(L);
  1381     assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
  1382            "Dispacement too large for a short jmp");
  1383     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
  1384     // 0111 tttn #8-bit disp
  1385     emit_byte(0x70 | cc);
  1386     emit_byte((offs - short_size) & 0xFF);
  1387   } else {
  1388     InstructionMark im(this);
  1389     L.add_patch_at(code(), locator());
  1390     emit_byte(0x70 | cc);
  1391     emit_byte(0);
  1395 void Assembler::jmp(Address adr) {
  1396   InstructionMark im(this);
  1397   prefix(adr);
  1398   emit_byte(0xFF);
  1399   emit_operand(rsp, adr);
  1402 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
  1403   if (L.is_bound()) {
  1404     address entry = target(L);
  1405     assert(entry != NULL, "jmp most probably wrong");
  1406     InstructionMark im(this);
  1407     const int short_size = 2;
  1408     const int long_size = 5;
  1409     intptr_t offs = entry - _code_pos;
  1410     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
  1411       emit_byte(0xEB);
  1412       emit_byte((offs - short_size) & 0xFF);
  1413     } else {
  1414       emit_byte(0xE9);
  1415       emit_long(offs - long_size);
  1417   } else {
  1418     // By default, forward jumps are always 32-bit displacements, since
  1419     // we can't yet know where the label will be bound.  If you're sure that
  1420     // the forward jump will not run beyond 256 bytes, use jmpb to
  1421     // force an 8-bit displacement.
  1422     InstructionMark im(this);
  1423     relocate(rtype);
  1424     L.add_patch_at(code(), locator());
  1425     emit_byte(0xE9);
  1426     emit_long(0);
  1430 void Assembler::jmp(Register entry) {
  1431   int encode = prefix_and_encode(entry->encoding());
  1432   emit_byte(0xFF);
  1433   emit_byte(0xE0 | encode);
  1436 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
  1437   InstructionMark im(this);
  1438   emit_byte(0xE9);
  1439   assert(dest != NULL, "must have a target");
  1440   intptr_t disp = dest - (_code_pos + sizeof(int32_t));
  1441   assert(is_simm32(disp), "must be 32bit offset (jmp)");
  1442   emit_data(disp, rspec.reloc(), call32_operand);
  1445 void Assembler::jmpb(Label& L) {
  1446   if (L.is_bound()) {
  1447     const int short_size = 2;
  1448     address entry = target(L);
  1449     assert(is8bit((entry - _code_pos) + short_size),
  1450            "Dispacement too large for a short jmp");
  1451     assert(entry != NULL, "jmp most probably wrong");
  1452     intptr_t offs = entry - _code_pos;
  1453     emit_byte(0xEB);
  1454     emit_byte((offs - short_size) & 0xFF);
  1455   } else {
  1456     InstructionMark im(this);
  1457     L.add_patch_at(code(), locator());
  1458     emit_byte(0xEB);
  1459     emit_byte(0);
  1463 void Assembler::ldmxcsr( Address src) {
  1464   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1465   InstructionMark im(this);
  1466   prefix(src);
  1467   emit_byte(0x0F);
  1468   emit_byte(0xAE);
  1469   emit_operand(as_Register(2), src);
  1472 void Assembler::leal(Register dst, Address src) {
  1473   InstructionMark im(this);
  1474 #ifdef _LP64
  1475   emit_byte(0x67); // addr32
  1476   prefix(src, dst);
  1477 #endif // LP64
  1478   emit_byte(0x8D);
  1479   emit_operand(dst, src);
  1482 void Assembler::lock() {
  1483   if (Atomics & 1) {
  1484      // Emit either nothing, a NOP, or a NOP: prefix
  1485      emit_byte(0x90) ;
  1486   } else {
  1487      emit_byte(0xF0);
  1491 void Assembler::lzcntl(Register dst, Register src) {
  1492   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
  1493   emit_byte(0xF3);
  1494   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1495   emit_byte(0x0F);
  1496   emit_byte(0xBD);
  1497   emit_byte(0xC0 | encode);
  1500 // Emit mfence instruction
  1501 void Assembler::mfence() {
  1502   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
  1503   emit_byte( 0x0F );
  1504   emit_byte( 0xAE );
  1505   emit_byte( 0xF0 );
  1508 void Assembler::mov(Register dst, Register src) {
  1509   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
  1512 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
  1513   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1514   int dstenc = dst->encoding();
  1515   int srcenc = src->encoding();
  1516   emit_byte(0x66);
  1517   if (dstenc < 8) {
  1518     if (srcenc >= 8) {
  1519       prefix(REX_B);
  1520       srcenc -= 8;
  1522   } else {
  1523     if (srcenc < 8) {
  1524       prefix(REX_R);
  1525     } else {
  1526       prefix(REX_RB);
  1527       srcenc -= 8;
  1529     dstenc -= 8;
  1531   emit_byte(0x0F);
  1532   emit_byte(0x28);
  1533   emit_byte(0xC0 | dstenc << 3 | srcenc);
  1536 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
  1537   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1538   int dstenc = dst->encoding();
  1539   int srcenc = src->encoding();
  1540   if (dstenc < 8) {
  1541     if (srcenc >= 8) {
  1542       prefix(REX_B);
  1543       srcenc -= 8;
  1545   } else {
  1546     if (srcenc < 8) {
  1547       prefix(REX_R);
  1548     } else {
  1549       prefix(REX_RB);
  1550       srcenc -= 8;
  1552     dstenc -= 8;
  1554   emit_byte(0x0F);
  1555   emit_byte(0x28);
  1556   emit_byte(0xC0 | dstenc << 3 | srcenc);
  1559 void Assembler::movb(Register dst, Address src) {
  1560   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
  1561   InstructionMark im(this);
  1562   prefix(src, dst, true);
  1563   emit_byte(0x8A);
  1564   emit_operand(dst, src);
  1568 void Assembler::movb(Address dst, int imm8) {
  1569   InstructionMark im(this);
  1570    prefix(dst);
  1571   emit_byte(0xC6);
  1572   emit_operand(rax, dst, 1);
  1573   emit_byte(imm8);
  1577 void Assembler::movb(Address dst, Register src) {
  1578   assert(src->has_byte_register(), "must have byte register");
  1579   InstructionMark im(this);
  1580   prefix(dst, src, true);
  1581   emit_byte(0x88);
  1582   emit_operand(src, dst);
  1585 void Assembler::movdl(XMMRegister dst, Register src) {
  1586   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1587   emit_byte(0x66);
  1588   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1589   emit_byte(0x0F);
  1590   emit_byte(0x6E);
  1591   emit_byte(0xC0 | encode);
  1594 void Assembler::movdl(Register dst, XMMRegister src) {
  1595   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1596   emit_byte(0x66);
  1597   // swap src/dst to get correct prefix
  1598   int encode = prefix_and_encode(src->encoding(), dst->encoding());
  1599   emit_byte(0x0F);
  1600   emit_byte(0x7E);
  1601   emit_byte(0xC0 | encode);
  1604 void Assembler::movdl(XMMRegister dst, Address src) {
  1605   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1606   InstructionMark im(this);
  1607   emit_byte(0x66);
  1608   prefix(src, dst);
  1609   emit_byte(0x0F);
  1610   emit_byte(0x6E);
  1611   emit_operand(dst, src);
  1615 void Assembler::movdqa(XMMRegister dst, Address src) {
  1616   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1617   InstructionMark im(this);
  1618   emit_byte(0x66);
  1619   prefix(src, dst);
  1620   emit_byte(0x0F);
  1621   emit_byte(0x6F);
  1622   emit_operand(dst, src);
  1625 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
  1626   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1627   emit_byte(0x66);
  1628   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  1629   emit_byte(0x0F);
  1630   emit_byte(0x6F);
  1631   emit_byte(0xC0 | encode);
  1634 void Assembler::movdqa(Address dst, XMMRegister src) {
  1635   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1636   InstructionMark im(this);
  1637   emit_byte(0x66);
  1638   prefix(dst, src);
  1639   emit_byte(0x0F);
  1640   emit_byte(0x7F);
  1641   emit_operand(src, dst);
  1644 void Assembler::movdqu(XMMRegister dst, Address src) {
  1645   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1646   InstructionMark im(this);
  1647   emit_byte(0xF3);
  1648   prefix(src, dst);
  1649   emit_byte(0x0F);
  1650   emit_byte(0x6F);
  1651   emit_operand(dst, src);
  1654 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
  1655   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1656   emit_byte(0xF3);
  1657   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  1658   emit_byte(0x0F);
  1659   emit_byte(0x6F);
  1660   emit_byte(0xC0 | encode);
  1663 void Assembler::movdqu(Address dst, XMMRegister src) {
  1664   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1665   InstructionMark im(this);
  1666   emit_byte(0xF3);
  1667   prefix(dst, src);
  1668   emit_byte(0x0F);
  1669   emit_byte(0x7F);
  1670   emit_operand(src, dst);
  1673 // Uses zero extension on 64bit
  1675 void Assembler::movl(Register dst, int32_t imm32) {
  1676   int encode = prefix_and_encode(dst->encoding());
  1677   emit_byte(0xB8 | encode);
  1678   emit_long(imm32);
  1681 void Assembler::movl(Register dst, Register src) {
  1682   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1683   emit_byte(0x8B);
  1684   emit_byte(0xC0 | encode);
  1687 void Assembler::movl(Register dst, Address src) {
  1688   InstructionMark im(this);
  1689   prefix(src, dst);
  1690   emit_byte(0x8B);
  1691   emit_operand(dst, src);
  1694 void Assembler::movl(Address dst, int32_t imm32) {
  1695   InstructionMark im(this);
  1696   prefix(dst);
  1697   emit_byte(0xC7);
  1698   emit_operand(rax, dst, 4);
  1699   emit_long(imm32);
  1702 void Assembler::movl(Address dst, Register src) {
  1703   InstructionMark im(this);
  1704   prefix(dst, src);
  1705   emit_byte(0x89);
  1706   emit_operand(src, dst);
  1709 // New cpus require to use movsd and movss to avoid partial register stall
  1710 // when loading from memory. But for old Opteron use movlpd instead of movsd.
  1711 // The selection is done in MacroAssembler::movdbl() and movflt().
  1712 void Assembler::movlpd(XMMRegister dst, Address src) {
  1713   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1714   InstructionMark im(this);
  1715   emit_byte(0x66);
  1716   prefix(src, dst);
  1717   emit_byte(0x0F);
  1718   emit_byte(0x12);
  1719   emit_operand(dst, src);
  1722 void Assembler::movq( MMXRegister dst, Address src ) {
  1723   assert( VM_Version::supports_mmx(), "" );
  1724   emit_byte(0x0F);
  1725   emit_byte(0x6F);
  1726   emit_operand(dst, src);
  1729 void Assembler::movq( Address dst, MMXRegister src ) {
  1730   assert( VM_Version::supports_mmx(), "" );
  1731   emit_byte(0x0F);
  1732   emit_byte(0x7F);
  1733   // workaround gcc (3.2.1-7a) bug
  1734   // In that version of gcc with only an emit_operand(MMX, Address)
  1735   // gcc will tail jump and try and reverse the parameters completely
  1736   // obliterating dst in the process. By having a version available
  1737   // that doesn't need to swap the args at the tail jump the bug is
  1738   // avoided.
  1739   emit_operand(dst, src);
  1742 void Assembler::movq(XMMRegister dst, Address src) {
  1743   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1744   InstructionMark im(this);
  1745   emit_byte(0xF3);
  1746   prefix(src, dst);
  1747   emit_byte(0x0F);
  1748   emit_byte(0x7E);
  1749   emit_operand(dst, src);
  1752 void Assembler::movq(Address dst, XMMRegister src) {
  1753   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1754   InstructionMark im(this);
  1755   emit_byte(0x66);
  1756   prefix(dst, src);
  1757   emit_byte(0x0F);
  1758   emit_byte(0xD6);
  1759   emit_operand(src, dst);
  1762 void Assembler::movsbl(Register dst, Address src) { // movsxb
  1763   InstructionMark im(this);
  1764   prefix(src, dst);
  1765   emit_byte(0x0F);
  1766   emit_byte(0xBE);
  1767   emit_operand(dst, src);
  1770 void Assembler::movsbl(Register dst, Register src) { // movsxb
  1771   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
  1772   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
  1773   emit_byte(0x0F);
  1774   emit_byte(0xBE);
  1775   emit_byte(0xC0 | encode);
  1778 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
  1779   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1780   emit_byte(0xF2);
  1781   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1782   emit_byte(0x0F);
  1783   emit_byte(0x10);
  1784   emit_byte(0xC0 | encode);
  1787 void Assembler::movsd(XMMRegister dst, Address src) {
  1788   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1789   InstructionMark im(this);
  1790   emit_byte(0xF2);
  1791   prefix(src, dst);
  1792   emit_byte(0x0F);
  1793   emit_byte(0x10);
  1794   emit_operand(dst, src);
  1797 void Assembler::movsd(Address dst, XMMRegister src) {
  1798   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1799   InstructionMark im(this);
  1800   emit_byte(0xF2);
  1801   prefix(dst, src);
  1802   emit_byte(0x0F);
  1803   emit_byte(0x11);
  1804   emit_operand(src, dst);
  1807 void Assembler::movss(XMMRegister dst, XMMRegister src) {
  1808   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1809   emit_byte(0xF3);
  1810   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1811   emit_byte(0x0F);
  1812   emit_byte(0x10);
  1813   emit_byte(0xC0 | encode);
  1816 void Assembler::movss(XMMRegister dst, Address src) {
  1817   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1818   InstructionMark im(this);
  1819   emit_byte(0xF3);
  1820   prefix(src, dst);
  1821   emit_byte(0x0F);
  1822   emit_byte(0x10);
  1823   emit_operand(dst, src);
  1826 void Assembler::movss(Address dst, XMMRegister src) {
  1827   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1828   InstructionMark im(this);
  1829   emit_byte(0xF3);
  1830   prefix(dst, src);
  1831   emit_byte(0x0F);
  1832   emit_byte(0x11);
  1833   emit_operand(src, dst);
  1836 void Assembler::movswl(Register dst, Address src) { // movsxw
  1837   InstructionMark im(this);
  1838   prefix(src, dst);
  1839   emit_byte(0x0F);
  1840   emit_byte(0xBF);
  1841   emit_operand(dst, src);
  1844 void Assembler::movswl(Register dst, Register src) { // movsxw
  1845   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1846   emit_byte(0x0F);
  1847   emit_byte(0xBF);
  1848   emit_byte(0xC0 | encode);
  1851 void Assembler::movw(Address dst, int imm16) {
  1852   InstructionMark im(this);
  1854   emit_byte(0x66); // switch to 16-bit mode
  1855   prefix(dst);
  1856   emit_byte(0xC7);
  1857   emit_operand(rax, dst, 2);
  1858   emit_word(imm16);
  1861 void Assembler::movw(Register dst, Address src) {
  1862   InstructionMark im(this);
  1863   emit_byte(0x66);
  1864   prefix(src, dst);
  1865   emit_byte(0x8B);
  1866   emit_operand(dst, src);
  1869 void Assembler::movw(Address dst, Register src) {
  1870   InstructionMark im(this);
  1871   emit_byte(0x66);
  1872   prefix(dst, src);
  1873   emit_byte(0x89);
  1874   emit_operand(src, dst);
  1877 void Assembler::movzbl(Register dst, Address src) { // movzxb
  1878   InstructionMark im(this);
  1879   prefix(src, dst);
  1880   emit_byte(0x0F);
  1881   emit_byte(0xB6);
  1882   emit_operand(dst, src);
  1885 void Assembler::movzbl(Register dst, Register src) { // movzxb
  1886   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
  1887   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
  1888   emit_byte(0x0F);
  1889   emit_byte(0xB6);
  1890   emit_byte(0xC0 | encode);
  1893 void Assembler::movzwl(Register dst, Address src) { // movzxw
  1894   InstructionMark im(this);
  1895   prefix(src, dst);
  1896   emit_byte(0x0F);
  1897   emit_byte(0xB7);
  1898   emit_operand(dst, src);
  1901 void Assembler::movzwl(Register dst, Register src) { // movzxw
  1902   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1903   emit_byte(0x0F);
  1904   emit_byte(0xB7);
  1905   emit_byte(0xC0 | encode);
  1908 void Assembler::mull(Address src) {
  1909   InstructionMark im(this);
  1910   prefix(src);
  1911   emit_byte(0xF7);
  1912   emit_operand(rsp, src);
  1915 void Assembler::mull(Register src) {
  1916   int encode = prefix_and_encode(src->encoding());
  1917   emit_byte(0xF7);
  1918   emit_byte(0xE0 | encode);
  1921 void Assembler::mulsd(XMMRegister dst, Address src) {
  1922   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1923   InstructionMark im(this);
  1924   emit_byte(0xF2);
  1925   prefix(src, dst);
  1926   emit_byte(0x0F);
  1927   emit_byte(0x59);
  1928   emit_operand(dst, src);
  1931 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
  1932   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  1933   emit_byte(0xF2);
  1934   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1935   emit_byte(0x0F);
  1936   emit_byte(0x59);
  1937   emit_byte(0xC0 | encode);
  1940 void Assembler::mulss(XMMRegister dst, Address src) {
  1941   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1942   InstructionMark im(this);
  1943   emit_byte(0xF3);
  1944   prefix(src, dst);
  1945   emit_byte(0x0F);
  1946   emit_byte(0x59);
  1947   emit_operand(dst, src);
  1950 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
  1951   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  1952   emit_byte(0xF3);
  1953   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1954   emit_byte(0x0F);
  1955   emit_byte(0x59);
  1956   emit_byte(0xC0 | encode);
  1959 void Assembler::negl(Register dst) {
  1960   int encode = prefix_and_encode(dst->encoding());
  1961   emit_byte(0xF7);
  1962   emit_byte(0xD8 | encode);
  1965 void Assembler::nop(int i) {
  1966 #ifdef ASSERT
  1967   assert(i > 0, " ");
  1968   // The fancy nops aren't currently recognized by debuggers making it a
  1969   // pain to disassemble code while debugging. If asserts are on clearly
  1970   // speed is not an issue so simply use the single byte traditional nop
  1971   // to do alignment.
  1973   for (; i > 0 ; i--) emit_byte(0x90);
  1974   return;
  1976 #endif // ASSERT
  1978   if (UseAddressNop && VM_Version::is_intel()) {
  1979     //
  1980     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
  1981     //  1: 0x90
  1982     //  2: 0x66 0x90
  1983     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
  1984     //  4: 0x0F 0x1F 0x40 0x00
  1985     //  5: 0x0F 0x1F 0x44 0x00 0x00
  1986     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
  1987     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  1988     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  1989     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  1990     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  1991     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  1993     // The rest coding is Intel specific - don't use consecutive address nops
  1995     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  1996     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  1997     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  1998     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  2000     while(i >= 15) {
  2001       // For Intel don't generate consecutive addess nops (mix with regular nops)
  2002       i -= 15;
  2003       emit_byte(0x66);   // size prefix
  2004       emit_byte(0x66);   // size prefix
  2005       emit_byte(0x66);   // size prefix
  2006       addr_nop_8();
  2007       emit_byte(0x66);   // size prefix
  2008       emit_byte(0x66);   // size prefix
  2009       emit_byte(0x66);   // size prefix
  2010       emit_byte(0x90);   // nop
  2012     switch (i) {
  2013       case 14:
  2014         emit_byte(0x66); // size prefix
  2015       case 13:
  2016         emit_byte(0x66); // size prefix
  2017       case 12:
  2018         addr_nop_8();
  2019         emit_byte(0x66); // size prefix
  2020         emit_byte(0x66); // size prefix
  2021         emit_byte(0x66); // size prefix
  2022         emit_byte(0x90); // nop
  2023         break;
  2024       case 11:
  2025         emit_byte(0x66); // size prefix
  2026       case 10:
  2027         emit_byte(0x66); // size prefix
  2028       case 9:
  2029         emit_byte(0x66); // size prefix
  2030       case 8:
  2031         addr_nop_8();
  2032         break;
  2033       case 7:
  2034         addr_nop_7();
  2035         break;
  2036       case 6:
  2037         emit_byte(0x66); // size prefix
  2038       case 5:
  2039         addr_nop_5();
  2040         break;
  2041       case 4:
  2042         addr_nop_4();
  2043         break;
  2044       case 3:
  2045         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
  2046         emit_byte(0x66); // size prefix
  2047       case 2:
  2048         emit_byte(0x66); // size prefix
  2049       case 1:
  2050         emit_byte(0x90); // nop
  2051         break;
  2052       default:
  2053         assert(i == 0, " ");
  2055     return;
  2057   if (UseAddressNop && VM_Version::is_amd()) {
  2058     //
  2059     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
  2060     //  1: 0x90
  2061     //  2: 0x66 0x90
  2062     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
  2063     //  4: 0x0F 0x1F 0x40 0x00
  2064     //  5: 0x0F 0x1F 0x44 0x00 0x00
  2065     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
  2066     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  2067     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2068     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2069     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2070     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2072     // The rest coding is AMD specific - use consecutive address nops
  2074     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
  2075     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
  2076     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  2077     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  2078     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2079     //     Size prefixes (0x66) are added for larger sizes
  2081     while(i >= 22) {
  2082       i -= 11;
  2083       emit_byte(0x66); // size prefix
  2084       emit_byte(0x66); // size prefix
  2085       emit_byte(0x66); // size prefix
  2086       addr_nop_8();
  2088     // Generate first nop for size between 21-12
  2089     switch (i) {
  2090       case 21:
  2091         i -= 1;
  2092         emit_byte(0x66); // size prefix
  2093       case 20:
  2094       case 19:
  2095         i -= 1;
  2096         emit_byte(0x66); // size prefix
  2097       case 18:
  2098       case 17:
  2099         i -= 1;
  2100         emit_byte(0x66); // size prefix
  2101       case 16:
  2102       case 15:
  2103         i -= 8;
  2104         addr_nop_8();
  2105         break;
  2106       case 14:
  2107       case 13:
  2108         i -= 7;
  2109         addr_nop_7();
  2110         break;
  2111       case 12:
  2112         i -= 6;
  2113         emit_byte(0x66); // size prefix
  2114         addr_nop_5();
  2115         break;
  2116       default:
  2117         assert(i < 12, " ");
  2120     // Generate second nop for size between 11-1
  2121     switch (i) {
  2122       case 11:
  2123         emit_byte(0x66); // size prefix
  2124       case 10:
  2125         emit_byte(0x66); // size prefix
  2126       case 9:
  2127         emit_byte(0x66); // size prefix
  2128       case 8:
  2129         addr_nop_8();
  2130         break;
  2131       case 7:
  2132         addr_nop_7();
  2133         break;
  2134       case 6:
  2135         emit_byte(0x66); // size prefix
  2136       case 5:
  2137         addr_nop_5();
  2138         break;
  2139       case 4:
  2140         addr_nop_4();
  2141         break;
  2142       case 3:
  2143         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
  2144         emit_byte(0x66); // size prefix
  2145       case 2:
  2146         emit_byte(0x66); // size prefix
  2147       case 1:
  2148         emit_byte(0x90); // nop
  2149         break;
  2150       default:
  2151         assert(i == 0, " ");
  2153     return;
  2156   // Using nops with size prefixes "0x66 0x90".
  2157   // From AMD Optimization Guide:
  2158   //  1: 0x90
  2159   //  2: 0x66 0x90
  2160   //  3: 0x66 0x66 0x90
  2161   //  4: 0x66 0x66 0x66 0x90
  2162   //  5: 0x66 0x66 0x90 0x66 0x90
  2163   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
  2164   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
  2165   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
  2166   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
  2167   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
  2168   //
  2169   while(i > 12) {
  2170     i -= 4;
  2171     emit_byte(0x66); // size prefix
  2172     emit_byte(0x66);
  2173     emit_byte(0x66);
  2174     emit_byte(0x90); // nop
  2176   // 1 - 12 nops
  2177   if(i > 8) {
  2178     if(i > 9) {
  2179       i -= 1;
  2180       emit_byte(0x66);
  2182     i -= 3;
  2183     emit_byte(0x66);
  2184     emit_byte(0x66);
  2185     emit_byte(0x90);
  2187   // 1 - 8 nops
  2188   if(i > 4) {
  2189     if(i > 6) {
  2190       i -= 1;
  2191       emit_byte(0x66);
  2193     i -= 3;
  2194     emit_byte(0x66);
  2195     emit_byte(0x66);
  2196     emit_byte(0x90);
  2198   switch (i) {
  2199     case 4:
  2200       emit_byte(0x66);
  2201     case 3:
  2202       emit_byte(0x66);
  2203     case 2:
  2204       emit_byte(0x66);
  2205     case 1:
  2206       emit_byte(0x90);
  2207       break;
  2208     default:
  2209       assert(i == 0, " ");
  2213 void Assembler::notl(Register dst) {
  2214   int encode = prefix_and_encode(dst->encoding());
  2215   emit_byte(0xF7);
  2216   emit_byte(0xD0 | encode );
  2219 void Assembler::orl(Address dst, int32_t imm32) {
  2220   InstructionMark im(this);
  2221   prefix(dst);
  2222   emit_arith_operand(0x81, rcx, dst, imm32);
  2225 void Assembler::orl(Register dst, int32_t imm32) {
  2226   prefix(dst);
  2227   emit_arith(0x81, 0xC8, dst, imm32);
  2230 void Assembler::orl(Register dst, Address src) {
  2231   InstructionMark im(this);
  2232   prefix(src, dst);
  2233   emit_byte(0x0B);
  2234   emit_operand(dst, src);
  2237 void Assembler::orl(Register dst, Register src) {
  2238   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2239   emit_arith(0x0B, 0xC0, dst, src);
  2242 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
  2243   assert(VM_Version::supports_sse4_2(), "");
  2245   InstructionMark im(this);
  2246   emit_byte(0x66);
  2247   prefix(src, dst);
  2248   emit_byte(0x0F);
  2249   emit_byte(0x3A);
  2250   emit_byte(0x61);
  2251   emit_operand(dst, src);
  2252   emit_byte(imm8);
  2255 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
  2256   assert(VM_Version::supports_sse4_2(), "");
  2258   emit_byte(0x66);
  2259   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  2260   emit_byte(0x0F);
  2261   emit_byte(0x3A);
  2262   emit_byte(0x61);
  2263   emit_byte(0xC0 | encode);
  2264   emit_byte(imm8);
  2267 // generic
  2268 void Assembler::pop(Register dst) {
  2269   int encode = prefix_and_encode(dst->encoding());
  2270   emit_byte(0x58 | encode);
  2273 void Assembler::popcntl(Register dst, Address src) {
  2274   assert(VM_Version::supports_popcnt(), "must support");
  2275   InstructionMark im(this);
  2276   emit_byte(0xF3);
  2277   prefix(src, dst);
  2278   emit_byte(0x0F);
  2279   emit_byte(0xB8);
  2280   emit_operand(dst, src);
  2283 void Assembler::popcntl(Register dst, Register src) {
  2284   assert(VM_Version::supports_popcnt(), "must support");
  2285   emit_byte(0xF3);
  2286   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2287   emit_byte(0x0F);
  2288   emit_byte(0xB8);
  2289   emit_byte(0xC0 | encode);
  2292 void Assembler::popf() {
  2293   emit_byte(0x9D);
  2296 #ifndef _LP64 // no 32bit push/pop on amd64
  2297 void Assembler::popl(Address dst) {
  2298   // NOTE: this will adjust stack by 8byte on 64bits
  2299   InstructionMark im(this);
  2300   prefix(dst);
  2301   emit_byte(0x8F);
  2302   emit_operand(rax, dst);
  2304 #endif
  2306 void Assembler::prefetch_prefix(Address src) {
  2307   prefix(src);
  2308   emit_byte(0x0F);
  2311 void Assembler::prefetchnta(Address src) {
  2312   NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
  2313   InstructionMark im(this);
  2314   prefetch_prefix(src);
  2315   emit_byte(0x18);
  2316   emit_operand(rax, src); // 0, src
  2319 void Assembler::prefetchr(Address src) {
  2320   NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
  2321   InstructionMark im(this);
  2322   prefetch_prefix(src);
  2323   emit_byte(0x0D);
  2324   emit_operand(rax, src); // 0, src
  2327 void Assembler::prefetcht0(Address src) {
  2328   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
  2329   InstructionMark im(this);
  2330   prefetch_prefix(src);
  2331   emit_byte(0x18);
  2332   emit_operand(rcx, src); // 1, src
  2335 void Assembler::prefetcht1(Address src) {
  2336   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
  2337   InstructionMark im(this);
  2338   prefetch_prefix(src);
  2339   emit_byte(0x18);
  2340   emit_operand(rdx, src); // 2, src
  2343 void Assembler::prefetcht2(Address src) {
  2344   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
  2345   InstructionMark im(this);
  2346   prefetch_prefix(src);
  2347   emit_byte(0x18);
  2348   emit_operand(rbx, src); // 3, src
  2351 void Assembler::prefetchw(Address src) {
  2352   NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
  2353   InstructionMark im(this);
  2354   prefetch_prefix(src);
  2355   emit_byte(0x0D);
  2356   emit_operand(rcx, src); // 1, src
  2359 void Assembler::prefix(Prefix p) {
  2360   a_byte(p);
  2363 void Assembler::por(XMMRegister dst, XMMRegister src) {
  2364   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2366   emit_byte(0x66);
  2367   int  encode = prefix_and_encode(dst->encoding(), src->encoding());
  2368   emit_byte(0x0F);
  2370   emit_byte(0xEB);
  2371   emit_byte(0xC0 | encode);
  2374 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
  2375   assert(isByte(mode), "invalid value");
  2376   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2378   emit_byte(0x66);
  2379   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2380   emit_byte(0x0F);
  2381   emit_byte(0x70);
  2382   emit_byte(0xC0 | encode);
  2383   emit_byte(mode & 0xFF);
  2387 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
  2388   assert(isByte(mode), "invalid value");
  2389   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2391   InstructionMark im(this);
  2392   emit_byte(0x66);
  2393   prefix(src, dst);
  2394   emit_byte(0x0F);
  2395   emit_byte(0x70);
  2396   emit_operand(dst, src);
  2397   emit_byte(mode & 0xFF);
  2400 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
  2401   assert(isByte(mode), "invalid value");
  2402   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2404   emit_byte(0xF2);
  2405   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2406   emit_byte(0x0F);
  2407   emit_byte(0x70);
  2408   emit_byte(0xC0 | encode);
  2409   emit_byte(mode & 0xFF);
  2412 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
  2413   assert(isByte(mode), "invalid value");
  2414   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2416   InstructionMark im(this);
  2417   emit_byte(0xF2);
  2418   prefix(src, dst); // QQ new
  2419   emit_byte(0x0F);
  2420   emit_byte(0x70);
  2421   emit_operand(dst, src);
  2422   emit_byte(mode & 0xFF);
  2425 void Assembler::psrlq(XMMRegister dst, int shift) {
  2426   // Shift 64 bit value logically right by specified number of bits.
  2427   // HMM Table D-1 says sse2 or mmx.
  2428   // Do not confuse it with psrldq SSE2 instruction which
  2429   // shifts 128 bit value in xmm register by number of bytes.
  2430   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2432   int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
  2433   emit_byte(0x66);
  2434   emit_byte(0x0F);
  2435   emit_byte(0x73);
  2436   emit_byte(0xC0 | encode);
  2437   emit_byte(shift);
  2440 void Assembler::psrldq(XMMRegister dst, int shift) {
  2441   // Shift 128 bit value in xmm register by number of bytes.
  2442   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2444   int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
  2445   emit_byte(0x66);
  2446   emit_byte(0x0F);
  2447   emit_byte(0x73);
  2448   emit_byte(0xC0 | encode);
  2449   emit_byte(shift);
  2452 void Assembler::ptest(XMMRegister dst, Address src) {
  2453   assert(VM_Version::supports_sse4_1(), "");
  2455   InstructionMark im(this);
  2456   emit_byte(0x66);
  2457   prefix(src, dst);
  2458   emit_byte(0x0F);
  2459   emit_byte(0x38);
  2460   emit_byte(0x17);
  2461   emit_operand(dst, src);
  2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
  2465   assert(VM_Version::supports_sse4_1(), "");
  2467   emit_byte(0x66);
  2468   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  2469   emit_byte(0x0F);
  2470   emit_byte(0x38);
  2471   emit_byte(0x17);
  2472   emit_byte(0xC0 | encode);
  2475 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
  2476   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2477   emit_byte(0x66);
  2478   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2479   emit_byte(0x0F);
  2480   emit_byte(0x60);
  2481   emit_byte(0xC0 | encode);
  2484 void Assembler::push(int32_t imm32) {
  2485   // in 64bits we push 64bits onto the stack but only
  2486   // take a 32bit immediate
  2487   emit_byte(0x68);
  2488   emit_long(imm32);
  2491 void Assembler::push(Register src) {
  2492   int encode = prefix_and_encode(src->encoding());
  2494   emit_byte(0x50 | encode);
  2497 void Assembler::pushf() {
  2498   emit_byte(0x9C);
  2501 #ifndef _LP64 // no 32bit push/pop on amd64
  2502 void Assembler::pushl(Address src) {
  2503   // Note this will push 64bit on 64bit
  2504   InstructionMark im(this);
  2505   prefix(src);
  2506   emit_byte(0xFF);
  2507   emit_operand(rsi, src);
  2509 #endif
  2511 void Assembler::pxor(XMMRegister dst, Address src) {
  2512   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2513   InstructionMark im(this);
  2514   emit_byte(0x66);
  2515   prefix(src, dst);
  2516   emit_byte(0x0F);
  2517   emit_byte(0xEF);
  2518   emit_operand(dst, src);
  2521 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
  2522   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2523   InstructionMark im(this);
  2524   emit_byte(0x66);
  2525   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2526   emit_byte(0x0F);
  2527   emit_byte(0xEF);
  2528   emit_byte(0xC0 | encode);
  2531 void Assembler::rcll(Register dst, int imm8) {
  2532   assert(isShiftCount(imm8), "illegal shift count");
  2533   int encode = prefix_and_encode(dst->encoding());
  2534   if (imm8 == 1) {
  2535     emit_byte(0xD1);
  2536     emit_byte(0xD0 | encode);
  2537   } else {
  2538     emit_byte(0xC1);
  2539     emit_byte(0xD0 | encode);
  2540     emit_byte(imm8);
  2544 // copies data from [esi] to [edi] using rcx pointer sized words
  2545 // generic
  2546 void Assembler::rep_mov() {
  2547   emit_byte(0xF3);
  2548   // MOVSQ
  2549   LP64_ONLY(prefix(REX_W));
  2550   emit_byte(0xA5);
  2553 // sets rcx pointer sized words with rax, value at [edi]
  2554 // generic
  2555 void Assembler::rep_set() { // rep_set
  2556   emit_byte(0xF3);
  2557   // STOSQ
  2558   LP64_ONLY(prefix(REX_W));
  2559   emit_byte(0xAB);
  2562 // scans rcx pointer sized words at [edi] for occurance of rax,
  2563 // generic
  2564 void Assembler::repne_scan() { // repne_scan
  2565   emit_byte(0xF2);
  2566   // SCASQ
  2567   LP64_ONLY(prefix(REX_W));
  2568   emit_byte(0xAF);
  2571 #ifdef _LP64
  2572 // scans rcx 4 byte words at [edi] for occurance of rax,
  2573 // generic
  2574 void Assembler::repne_scanl() { // repne_scan
  2575   emit_byte(0xF2);
  2576   // SCASL
  2577   emit_byte(0xAF);
  2579 #endif
  2581 void Assembler::ret(int imm16) {
  2582   if (imm16 == 0) {
  2583     emit_byte(0xC3);
  2584   } else {
  2585     emit_byte(0xC2);
  2586     emit_word(imm16);
  2590 void Assembler::sahf() {
  2591 #ifdef _LP64
  2592   // Not supported in 64bit mode
  2593   ShouldNotReachHere();
  2594 #endif
  2595   emit_byte(0x9E);
  2598 void Assembler::sarl(Register dst, int imm8) {
  2599   int encode = prefix_and_encode(dst->encoding());
  2600   assert(isShiftCount(imm8), "illegal shift count");
  2601   if (imm8 == 1) {
  2602     emit_byte(0xD1);
  2603     emit_byte(0xF8 | encode);
  2604   } else {
  2605     emit_byte(0xC1);
  2606     emit_byte(0xF8 | encode);
  2607     emit_byte(imm8);
  2611 void Assembler::sarl(Register dst) {
  2612   int encode = prefix_and_encode(dst->encoding());
  2613   emit_byte(0xD3);
  2614   emit_byte(0xF8 | encode);
  2617 void Assembler::sbbl(Address dst, int32_t imm32) {
  2618   InstructionMark im(this);
  2619   prefix(dst);
  2620   emit_arith_operand(0x81, rbx, dst, imm32);
  2623 void Assembler::sbbl(Register dst, int32_t imm32) {
  2624   prefix(dst);
  2625   emit_arith(0x81, 0xD8, dst, imm32);
  2629 void Assembler::sbbl(Register dst, Address src) {
  2630   InstructionMark im(this);
  2631   prefix(src, dst);
  2632   emit_byte(0x1B);
  2633   emit_operand(dst, src);
  2636 void Assembler::sbbl(Register dst, Register src) {
  2637   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2638   emit_arith(0x1B, 0xC0, dst, src);
  2641 void Assembler::setb(Condition cc, Register dst) {
  2642   assert(0 <= cc && cc < 16, "illegal cc");
  2643   int encode = prefix_and_encode(dst->encoding(), true);
  2644   emit_byte(0x0F);
  2645   emit_byte(0x90 | cc);
  2646   emit_byte(0xC0 | encode);
  2649 void Assembler::shll(Register dst, int imm8) {
  2650   assert(isShiftCount(imm8), "illegal shift count");
  2651   int encode = prefix_and_encode(dst->encoding());
  2652   if (imm8 == 1 ) {
  2653     emit_byte(0xD1);
  2654     emit_byte(0xE0 | encode);
  2655   } else {
  2656     emit_byte(0xC1);
  2657     emit_byte(0xE0 | encode);
  2658     emit_byte(imm8);
  2662 void Assembler::shll(Register dst) {
  2663   int encode = prefix_and_encode(dst->encoding());
  2664   emit_byte(0xD3);
  2665   emit_byte(0xE0 | encode);
  2668 void Assembler::shrl(Register dst, int imm8) {
  2669   assert(isShiftCount(imm8), "illegal shift count");
  2670   int encode = prefix_and_encode(dst->encoding());
  2671   emit_byte(0xC1);
  2672   emit_byte(0xE8 | encode);
  2673   emit_byte(imm8);
  2676 void Assembler::shrl(Register dst) {
  2677   int encode = prefix_and_encode(dst->encoding());
  2678   emit_byte(0xD3);
  2679   emit_byte(0xE8 | encode);
  2682 // copies a single word from [esi] to [edi]
  2683 void Assembler::smovl() {
  2684   emit_byte(0xA5);
  2687 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
  2688   // HMM Table D-1 says sse2
  2689   // NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2690   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2691   emit_byte(0xF2);
  2692   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2693   emit_byte(0x0F);
  2694   emit_byte(0x51);
  2695   emit_byte(0xC0 | encode);
  2698 void Assembler::sqrtsd(XMMRegister dst, Address src) {
  2699   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2700   InstructionMark im(this);
  2701   emit_byte(0xF2);
  2702   prefix(src, dst);
  2703   emit_byte(0x0F);
  2704   emit_byte(0x51);
  2705   emit_operand(dst, src);
  2708 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
  2709   // HMM Table D-1 says sse2
  2710   // NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2711   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2712   emit_byte(0xF3);
  2713   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2714   emit_byte(0x0F);
  2715   emit_byte(0x51);
  2716   emit_byte(0xC0 | encode);
  2719 void Assembler::sqrtss(XMMRegister dst, Address src) {
  2720   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2721   InstructionMark im(this);
  2722   emit_byte(0xF3);
  2723   prefix(src, dst);
  2724   emit_byte(0x0F);
  2725   emit_byte(0x51);
  2726   emit_operand(dst, src);
  2729 void Assembler::stmxcsr( Address dst) {
  2730   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2731   InstructionMark im(this);
  2732   prefix(dst);
  2733   emit_byte(0x0F);
  2734   emit_byte(0xAE);
  2735   emit_operand(as_Register(3), dst);
  2738 void Assembler::subl(Address dst, int32_t imm32) {
  2739   InstructionMark im(this);
  2740   prefix(dst);
  2741   emit_arith_operand(0x81, rbp, dst, imm32);
  2744 void Assembler::subl(Address dst, Register src) {
  2745   InstructionMark im(this);
  2746   prefix(dst, src);
  2747   emit_byte(0x29);
  2748   emit_operand(src, dst);
  2751 void Assembler::subl(Register dst, int32_t imm32) {
  2752   prefix(dst);
  2753   emit_arith(0x81, 0xE8, dst, imm32);
  2756 void Assembler::subl(Register dst, Address src) {
  2757   InstructionMark im(this);
  2758   prefix(src, dst);
  2759   emit_byte(0x2B);
  2760   emit_operand(dst, src);
  2763 void Assembler::subl(Register dst, Register src) {
  2764   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2765   emit_arith(0x2B, 0xC0, dst, src);
  2768 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
  2769   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2770   emit_byte(0xF2);
  2771   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2772   emit_byte(0x0F);
  2773   emit_byte(0x5C);
  2774   emit_byte(0xC0 | encode);
  2777 void Assembler::subsd(XMMRegister dst, Address src) {
  2778   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2779   InstructionMark im(this);
  2780   emit_byte(0xF2);
  2781   prefix(src, dst);
  2782   emit_byte(0x0F);
  2783   emit_byte(0x5C);
  2784   emit_operand(dst, src);
  2787 void Assembler::subss(XMMRegister dst, XMMRegister src) {
  2788   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2789   emit_byte(0xF3);
  2790   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2791   emit_byte(0x0F);
  2792   emit_byte(0x5C);
  2793   emit_byte(0xC0 | encode);
  2796 void Assembler::subss(XMMRegister dst, Address src) {
  2797   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2798   InstructionMark im(this);
  2799   emit_byte(0xF3);
  2800   prefix(src, dst);
  2801   emit_byte(0x0F);
  2802   emit_byte(0x5C);
  2803   emit_operand(dst, src);
  2806 void Assembler::testb(Register dst, int imm8) {
  2807   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
  2808   (void) prefix_and_encode(dst->encoding(), true);
  2809   emit_arith_b(0xF6, 0xC0, dst, imm8);
  2812 void Assembler::testl(Register dst, int32_t imm32) {
  2813   // not using emit_arith because test
  2814   // doesn't support sign-extension of
  2815   // 8bit operands
  2816   int encode = dst->encoding();
  2817   if (encode == 0) {
  2818     emit_byte(0xA9);
  2819   } else {
  2820     encode = prefix_and_encode(encode);
  2821     emit_byte(0xF7);
  2822     emit_byte(0xC0 | encode);
  2824   emit_long(imm32);
  2827 void Assembler::testl(Register dst, Register src) {
  2828   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2829   emit_arith(0x85, 0xC0, dst, src);
  2832 void Assembler::testl(Register dst, Address  src) {
  2833   InstructionMark im(this);
  2834   prefix(src, dst);
  2835   emit_byte(0x85);
  2836   emit_operand(dst, src);
  2839 void Assembler::ucomisd(XMMRegister dst, Address src) {
  2840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2841   emit_byte(0x66);
  2842   ucomiss(dst, src);
  2845 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
  2846   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2847   emit_byte(0x66);
  2848   ucomiss(dst, src);
  2851 void Assembler::ucomiss(XMMRegister dst, Address src) {
  2852   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2854   InstructionMark im(this);
  2855   prefix(src, dst);
  2856   emit_byte(0x0F);
  2857   emit_byte(0x2E);
  2858   emit_operand(dst, src);
  2861 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
  2862   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2863   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2864   emit_byte(0x0F);
  2865   emit_byte(0x2E);
  2866   emit_byte(0xC0 | encode);
  2870 void Assembler::xaddl(Address dst, Register src) {
  2871   InstructionMark im(this);
  2872   prefix(dst, src);
  2873   emit_byte(0x0F);
  2874   emit_byte(0xC1);
  2875   emit_operand(src, dst);
  2878 void Assembler::xchgl(Register dst, Address src) { // xchg
  2879   InstructionMark im(this);
  2880   prefix(src, dst);
  2881   emit_byte(0x87);
  2882   emit_operand(dst, src);
  2885 void Assembler::xchgl(Register dst, Register src) {
  2886   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2887   emit_byte(0x87);
  2888   emit_byte(0xc0 | encode);
  2891 void Assembler::xorl(Register dst, int32_t imm32) {
  2892   prefix(dst);
  2893   emit_arith(0x81, 0xF0, dst, imm32);
  2896 void Assembler::xorl(Register dst, Address src) {
  2897   InstructionMark im(this);
  2898   prefix(src, dst);
  2899   emit_byte(0x33);
  2900   emit_operand(dst, src);
  2903 void Assembler::xorl(Register dst, Register src) {
  2904   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2905   emit_arith(0x33, 0xC0, dst, src);
  2908 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
  2909   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2910   emit_byte(0x66);
  2911   xorps(dst, src);
  2914 void Assembler::xorpd(XMMRegister dst, Address src) {
  2915   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  2916   InstructionMark im(this);
  2917   emit_byte(0x66);
  2918   prefix(src, dst);
  2919   emit_byte(0x0F);
  2920   emit_byte(0x57);
  2921   emit_operand(dst, src);
  2925 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
  2926   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2927   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2928   emit_byte(0x0F);
  2929   emit_byte(0x57);
  2930   emit_byte(0xC0 | encode);
  2933 void Assembler::xorps(XMMRegister dst, Address src) {
  2934   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  2935   InstructionMark im(this);
  2936   prefix(src, dst);
  2937   emit_byte(0x0F);
  2938   emit_byte(0x57);
  2939   emit_operand(dst, src);
  2942 #ifndef _LP64
  2943 // 32bit only pieces of the assembler
  2945 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
  2946   // NO PREFIX AS NEVER 64BIT
  2947   InstructionMark im(this);
  2948   emit_byte(0x81);
  2949   emit_byte(0xF8 | src1->encoding());
  2950   emit_data(imm32, rspec, 0);
  2953 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
  2954   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
  2955   InstructionMark im(this);
  2956   emit_byte(0x81);
  2957   emit_operand(rdi, src1);
  2958   emit_data(imm32, rspec, 0);
  2961 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
  2962 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
  2963 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
  2964 void Assembler::cmpxchg8(Address adr) {
  2965   InstructionMark im(this);
  2966   emit_byte(0x0F);
  2967   emit_byte(0xc7);
  2968   emit_operand(rcx, adr);
  2971 void Assembler::decl(Register dst) {
  2972   // Don't use it directly. Use MacroAssembler::decrementl() instead.
  2973  emit_byte(0x48 | dst->encoding());
  2976 #endif // _LP64
  2978 // 64bit typically doesn't use the x87 but needs to for the trig funcs
  2980 void Assembler::fabs() {
  2981   emit_byte(0xD9);
  2982   emit_byte(0xE1);
  2985 void Assembler::fadd(int i) {
  2986   emit_farith(0xD8, 0xC0, i);
  2989 void Assembler::fadd_d(Address src) {
  2990   InstructionMark im(this);
  2991   emit_byte(0xDC);
  2992   emit_operand32(rax, src);
  2995 void Assembler::fadd_s(Address src) {
  2996   InstructionMark im(this);
  2997   emit_byte(0xD8);
  2998   emit_operand32(rax, src);
  3001 void Assembler::fadda(int i) {
  3002   emit_farith(0xDC, 0xC0, i);
  3005 void Assembler::faddp(int i) {
  3006   emit_farith(0xDE, 0xC0, i);
  3009 void Assembler::fchs() {
  3010   emit_byte(0xD9);
  3011   emit_byte(0xE0);
  3014 void Assembler::fcom(int i) {
  3015   emit_farith(0xD8, 0xD0, i);
  3018 void Assembler::fcomp(int i) {
  3019   emit_farith(0xD8, 0xD8, i);
  3022 void Assembler::fcomp_d(Address src) {
  3023   InstructionMark im(this);
  3024   emit_byte(0xDC);
  3025   emit_operand32(rbx, src);
  3028 void Assembler::fcomp_s(Address src) {
  3029   InstructionMark im(this);
  3030   emit_byte(0xD8);
  3031   emit_operand32(rbx, src);
  3034 void Assembler::fcompp() {
  3035   emit_byte(0xDE);
  3036   emit_byte(0xD9);
  3039 void Assembler::fcos() {
  3040   emit_byte(0xD9);
  3041   emit_byte(0xFF);
  3044 void Assembler::fdecstp() {
  3045   emit_byte(0xD9);
  3046   emit_byte(0xF6);
  3049 void Assembler::fdiv(int i) {
  3050   emit_farith(0xD8, 0xF0, i);
  3053 void Assembler::fdiv_d(Address src) {
  3054   InstructionMark im(this);
  3055   emit_byte(0xDC);
  3056   emit_operand32(rsi, src);
  3059 void Assembler::fdiv_s(Address src) {
  3060   InstructionMark im(this);
  3061   emit_byte(0xD8);
  3062   emit_operand32(rsi, src);
  3065 void Assembler::fdiva(int i) {
  3066   emit_farith(0xDC, 0xF8, i);
  3069 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
  3070 //       is erroneous for some of the floating-point instructions below.
  3072 void Assembler::fdivp(int i) {
  3073   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
  3076 void Assembler::fdivr(int i) {
  3077   emit_farith(0xD8, 0xF8, i);
  3080 void Assembler::fdivr_d(Address src) {
  3081   InstructionMark im(this);
  3082   emit_byte(0xDC);
  3083   emit_operand32(rdi, src);
  3086 void Assembler::fdivr_s(Address src) {
  3087   InstructionMark im(this);
  3088   emit_byte(0xD8);
  3089   emit_operand32(rdi, src);
  3092 void Assembler::fdivra(int i) {
  3093   emit_farith(0xDC, 0xF0, i);
  3096 void Assembler::fdivrp(int i) {
  3097   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
  3100 void Assembler::ffree(int i) {
  3101   emit_farith(0xDD, 0xC0, i);
  3104 void Assembler::fild_d(Address adr) {
  3105   InstructionMark im(this);
  3106   emit_byte(0xDF);
  3107   emit_operand32(rbp, adr);
  3110 void Assembler::fild_s(Address adr) {
  3111   InstructionMark im(this);
  3112   emit_byte(0xDB);
  3113   emit_operand32(rax, adr);
  3116 void Assembler::fincstp() {
  3117   emit_byte(0xD9);
  3118   emit_byte(0xF7);
  3121 void Assembler::finit() {
  3122   emit_byte(0x9B);
  3123   emit_byte(0xDB);
  3124   emit_byte(0xE3);
  3127 void Assembler::fist_s(Address adr) {
  3128   InstructionMark im(this);
  3129   emit_byte(0xDB);
  3130   emit_operand32(rdx, adr);
  3133 void Assembler::fistp_d(Address adr) {
  3134   InstructionMark im(this);
  3135   emit_byte(0xDF);
  3136   emit_operand32(rdi, adr);
  3139 void Assembler::fistp_s(Address adr) {
  3140   InstructionMark im(this);
  3141   emit_byte(0xDB);
  3142   emit_operand32(rbx, adr);
  3145 void Assembler::fld1() {
  3146   emit_byte(0xD9);
  3147   emit_byte(0xE8);
  3150 void Assembler::fld_d(Address adr) {
  3151   InstructionMark im(this);
  3152   emit_byte(0xDD);
  3153   emit_operand32(rax, adr);
  3156 void Assembler::fld_s(Address adr) {
  3157   InstructionMark im(this);
  3158   emit_byte(0xD9);
  3159   emit_operand32(rax, adr);
  3163 void Assembler::fld_s(int index) {
  3164   emit_farith(0xD9, 0xC0, index);
  3167 void Assembler::fld_x(Address adr) {
  3168   InstructionMark im(this);
  3169   emit_byte(0xDB);
  3170   emit_operand32(rbp, adr);
  3173 void Assembler::fldcw(Address src) {
  3174   InstructionMark im(this);
  3175   emit_byte(0xd9);
  3176   emit_operand32(rbp, src);
  3179 void Assembler::fldenv(Address src) {
  3180   InstructionMark im(this);
  3181   emit_byte(0xD9);
  3182   emit_operand32(rsp, src);
  3185 void Assembler::fldlg2() {
  3186   emit_byte(0xD9);
  3187   emit_byte(0xEC);
  3190 void Assembler::fldln2() {
  3191   emit_byte(0xD9);
  3192   emit_byte(0xED);
  3195 void Assembler::fldz() {
  3196   emit_byte(0xD9);
  3197   emit_byte(0xEE);
  3200 void Assembler::flog() {
  3201   fldln2();
  3202   fxch();
  3203   fyl2x();
  3206 void Assembler::flog10() {
  3207   fldlg2();
  3208   fxch();
  3209   fyl2x();
  3212 void Assembler::fmul(int i) {
  3213   emit_farith(0xD8, 0xC8, i);
  3216 void Assembler::fmul_d(Address src) {
  3217   InstructionMark im(this);
  3218   emit_byte(0xDC);
  3219   emit_operand32(rcx, src);
  3222 void Assembler::fmul_s(Address src) {
  3223   InstructionMark im(this);
  3224   emit_byte(0xD8);
  3225   emit_operand32(rcx, src);
  3228 void Assembler::fmula(int i) {
  3229   emit_farith(0xDC, 0xC8, i);
  3232 void Assembler::fmulp(int i) {
  3233   emit_farith(0xDE, 0xC8, i);
  3236 void Assembler::fnsave(Address dst) {
  3237   InstructionMark im(this);
  3238   emit_byte(0xDD);
  3239   emit_operand32(rsi, dst);
  3242 void Assembler::fnstcw(Address src) {
  3243   InstructionMark im(this);
  3244   emit_byte(0x9B);
  3245   emit_byte(0xD9);
  3246   emit_operand32(rdi, src);
  3249 void Assembler::fnstsw_ax() {
  3250   emit_byte(0xdF);
  3251   emit_byte(0xE0);
  3254 void Assembler::fprem() {
  3255   emit_byte(0xD9);
  3256   emit_byte(0xF8);
  3259 void Assembler::fprem1() {
  3260   emit_byte(0xD9);
  3261   emit_byte(0xF5);
  3264 void Assembler::frstor(Address src) {
  3265   InstructionMark im(this);
  3266   emit_byte(0xDD);
  3267   emit_operand32(rsp, src);
  3270 void Assembler::fsin() {
  3271   emit_byte(0xD9);
  3272   emit_byte(0xFE);
  3275 void Assembler::fsqrt() {
  3276   emit_byte(0xD9);
  3277   emit_byte(0xFA);
  3280 void Assembler::fst_d(Address adr) {
  3281   InstructionMark im(this);
  3282   emit_byte(0xDD);
  3283   emit_operand32(rdx, adr);
  3286 void Assembler::fst_s(Address adr) {
  3287   InstructionMark im(this);
  3288   emit_byte(0xD9);
  3289   emit_operand32(rdx, adr);
  3292 void Assembler::fstp_d(Address adr) {
  3293   InstructionMark im(this);
  3294   emit_byte(0xDD);
  3295   emit_operand32(rbx, adr);
  3298 void Assembler::fstp_d(int index) {
  3299   emit_farith(0xDD, 0xD8, index);
  3302 void Assembler::fstp_s(Address adr) {
  3303   InstructionMark im(this);
  3304   emit_byte(0xD9);
  3305   emit_operand32(rbx, adr);
  3308 void Assembler::fstp_x(Address adr) {
  3309   InstructionMark im(this);
  3310   emit_byte(0xDB);
  3311   emit_operand32(rdi, adr);
  3314 void Assembler::fsub(int i) {
  3315   emit_farith(0xD8, 0xE0, i);
  3318 void Assembler::fsub_d(Address src) {
  3319   InstructionMark im(this);
  3320   emit_byte(0xDC);
  3321   emit_operand32(rsp, src);
  3324 void Assembler::fsub_s(Address src) {
  3325   InstructionMark im(this);
  3326   emit_byte(0xD8);
  3327   emit_operand32(rsp, src);
  3330 void Assembler::fsuba(int i) {
  3331   emit_farith(0xDC, 0xE8, i);
  3334 void Assembler::fsubp(int i) {
  3335   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
  3338 void Assembler::fsubr(int i) {
  3339   emit_farith(0xD8, 0xE8, i);
  3342 void Assembler::fsubr_d(Address src) {
  3343   InstructionMark im(this);
  3344   emit_byte(0xDC);
  3345   emit_operand32(rbp, src);
  3348 void Assembler::fsubr_s(Address src) {
  3349   InstructionMark im(this);
  3350   emit_byte(0xD8);
  3351   emit_operand32(rbp, src);
  3354 void Assembler::fsubra(int i) {
  3355   emit_farith(0xDC, 0xE0, i);
  3358 void Assembler::fsubrp(int i) {
  3359   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
  3362 void Assembler::ftan() {
  3363   emit_byte(0xD9);
  3364   emit_byte(0xF2);
  3365   emit_byte(0xDD);
  3366   emit_byte(0xD8);
  3369 void Assembler::ftst() {
  3370   emit_byte(0xD9);
  3371   emit_byte(0xE4);
  3374 void Assembler::fucomi(int i) {
  3375   // make sure the instruction is supported (introduced for P6, together with cmov)
  3376   guarantee(VM_Version::supports_cmov(), "illegal instruction");
  3377   emit_farith(0xDB, 0xE8, i);
  3380 void Assembler::fucomip(int i) {
  3381   // make sure the instruction is supported (introduced for P6, together with cmov)
  3382   guarantee(VM_Version::supports_cmov(), "illegal instruction");
  3383   emit_farith(0xDF, 0xE8, i);
  3386 void Assembler::fwait() {
  3387   emit_byte(0x9B);
  3390 void Assembler::fxch(int i) {
  3391   emit_farith(0xD9, 0xC8, i);
  3394 void Assembler::fyl2x() {
  3395   emit_byte(0xD9);
  3396   emit_byte(0xF1);
  3400 #ifndef _LP64
  3402 void Assembler::incl(Register dst) {
  3403   // Don't use it directly. Use MacroAssembler::incrementl() instead.
  3404  emit_byte(0x40 | dst->encoding());
  3407 void Assembler::lea(Register dst, Address src) {
  3408   leal(dst, src);
  3411 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
  3412   InstructionMark im(this);
  3413   emit_byte(0xC7);
  3414   emit_operand(rax, dst);
  3415   emit_data((int)imm32, rspec, 0);
  3418 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
  3419   InstructionMark im(this);
  3420   int encode = prefix_and_encode(dst->encoding());
  3421   emit_byte(0xB8 | encode);
  3422   emit_data((int)imm32, rspec, 0);
  3425 void Assembler::popa() { // 32bit
  3426   emit_byte(0x61);
  3429 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
  3430   InstructionMark im(this);
  3431   emit_byte(0x68);
  3432   emit_data(imm32, rspec, 0);
  3435 void Assembler::pusha() { // 32bit
  3436   emit_byte(0x60);
  3439 void Assembler::set_byte_if_not_zero(Register dst) {
  3440   emit_byte(0x0F);
  3441   emit_byte(0x95);
  3442   emit_byte(0xE0 | dst->encoding());
  3445 void Assembler::shldl(Register dst, Register src) {
  3446   emit_byte(0x0F);
  3447   emit_byte(0xA5);
  3448   emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
  3451 void Assembler::shrdl(Register dst, Register src) {
  3452   emit_byte(0x0F);
  3453   emit_byte(0xAD);
  3454   emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
  3457 #else // LP64
  3459 void Assembler::set_byte_if_not_zero(Register dst) {
  3460   int enc = prefix_and_encode(dst->encoding(), true);
  3461   emit_byte(0x0F);
  3462   emit_byte(0x95);
  3463   emit_byte(0xE0 | enc);
  3466 // 64bit only pieces of the assembler
  3467 // This should only be used by 64bit instructions that can use rip-relative
  3468 // it cannot be used by instructions that want an immediate value.
  3470 bool Assembler::reachable(AddressLiteral adr) {
  3471   int64_t disp;
  3472   // None will force a 64bit literal to the code stream. Likely a placeholder
  3473   // for something that will be patched later and we need to certain it will
  3474   // always be reachable.
  3475   if (adr.reloc() == relocInfo::none) {
  3476     return false;
  3478   if (adr.reloc() == relocInfo::internal_word_type) {
  3479     // This should be rip relative and easily reachable.
  3480     return true;
  3482   if (adr.reloc() == relocInfo::virtual_call_type ||
  3483       adr.reloc() == relocInfo::opt_virtual_call_type ||
  3484       adr.reloc() == relocInfo::static_call_type ||
  3485       adr.reloc() == relocInfo::static_stub_type ) {
  3486     // This should be rip relative within the code cache and easily
  3487     // reachable until we get huge code caches. (At which point
  3488     // ic code is going to have issues).
  3489     return true;
  3491   if (adr.reloc() != relocInfo::external_word_type &&
  3492       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
  3493       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
  3494       adr.reloc() != relocInfo::runtime_call_type ) {
  3495     return false;
  3498   // Stress the correction code
  3499   if (ForceUnreachable) {
  3500     // Must be runtimecall reloc, see if it is in the codecache
  3501     // Flipping stuff in the codecache to be unreachable causes issues
  3502     // with things like inline caches where the additional instructions
  3503     // are not handled.
  3504     if (CodeCache::find_blob(adr._target) == NULL) {
  3505       return false;
  3508   // For external_word_type/runtime_call_type if it is reachable from where we
  3509   // are now (possibly a temp buffer) and where we might end up
  3510   // anywhere in the codeCache then we are always reachable.
  3511   // This would have to change if we ever save/restore shared code
  3512   // to be more pessimistic.
  3514   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
  3515   if (!is_simm32(disp)) return false;
  3516   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
  3517   if (!is_simm32(disp)) return false;
  3519   disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
  3521   // Because rip relative is a disp + address_of_next_instruction and we
  3522   // don't know the value of address_of_next_instruction we apply a fudge factor
  3523   // to make sure we will be ok no matter the size of the instruction we get placed into.
  3524   // We don't have to fudge the checks above here because they are already worst case.
  3526   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
  3527   // + 4 because better safe than sorry.
  3528   const int fudge = 12 + 4;
  3529   if (disp < 0) {
  3530     disp -= fudge;
  3531   } else {
  3532     disp += fudge;
  3534   return is_simm32(disp);
  3537 void Assembler::emit_data64(jlong data,
  3538                             relocInfo::relocType rtype,
  3539                             int format) {
  3540   if (rtype == relocInfo::none) {
  3541     emit_long64(data);
  3542   } else {
  3543     emit_data64(data, Relocation::spec_simple(rtype), format);
  3547 void Assembler::emit_data64(jlong data,
  3548                             RelocationHolder const& rspec,
  3549                             int format) {
  3550   assert(imm_operand == 0, "default format must be immediate in this file");
  3551   assert(imm_operand == format, "must be immediate");
  3552   assert(inst_mark() != NULL, "must be inside InstructionMark");
  3553   // Do not use AbstractAssembler::relocate, which is not intended for
  3554   // embedded words.  Instead, relocate to the enclosing instruction.
  3555   code_section()->relocate(inst_mark(), rspec, format);
  3556 #ifdef ASSERT
  3557   check_relocation(rspec, format);
  3558 #endif
  3559   emit_long64(data);
  3562 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
  3563   if (reg_enc >= 8) {
  3564     prefix(REX_B);
  3565     reg_enc -= 8;
  3566   } else if (byteinst && reg_enc >= 4) {
  3567     prefix(REX);
  3569   return reg_enc;
  3572 int Assembler::prefixq_and_encode(int reg_enc) {
  3573   if (reg_enc < 8) {
  3574     prefix(REX_W);
  3575   } else {
  3576     prefix(REX_WB);
  3577     reg_enc -= 8;
  3579   return reg_enc;
  3582 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
  3583   if (dst_enc < 8) {
  3584     if (src_enc >= 8) {
  3585       prefix(REX_B);
  3586       src_enc -= 8;
  3587     } else if (byteinst && src_enc >= 4) {
  3588       prefix(REX);
  3590   } else {
  3591     if (src_enc < 8) {
  3592       prefix(REX_R);
  3593     } else {
  3594       prefix(REX_RB);
  3595       src_enc -= 8;
  3597     dst_enc -= 8;
  3599   return dst_enc << 3 | src_enc;
  3602 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
  3603   if (dst_enc < 8) {
  3604     if (src_enc < 8) {
  3605       prefix(REX_W);
  3606     } else {
  3607       prefix(REX_WB);
  3608       src_enc -= 8;
  3610   } else {
  3611     if (src_enc < 8) {
  3612       prefix(REX_WR);
  3613     } else {
  3614       prefix(REX_WRB);
  3615       src_enc -= 8;
  3617     dst_enc -= 8;
  3619   return dst_enc << 3 | src_enc;
  3622 void Assembler::prefix(Register reg) {
  3623   if (reg->encoding() >= 8) {
  3624     prefix(REX_B);
  3628 void Assembler::prefix(Address adr) {
  3629   if (adr.base_needs_rex()) {
  3630     if (adr.index_needs_rex()) {
  3631       prefix(REX_XB);
  3632     } else {
  3633       prefix(REX_B);
  3635   } else {
  3636     if (adr.index_needs_rex()) {
  3637       prefix(REX_X);
  3642 void Assembler::prefixq(Address adr) {
  3643   if (adr.base_needs_rex()) {
  3644     if (adr.index_needs_rex()) {
  3645       prefix(REX_WXB);
  3646     } else {
  3647       prefix(REX_WB);
  3649   } else {
  3650     if (adr.index_needs_rex()) {
  3651       prefix(REX_WX);
  3652     } else {
  3653       prefix(REX_W);
  3659 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
  3660   if (reg->encoding() < 8) {
  3661     if (adr.base_needs_rex()) {
  3662       if (adr.index_needs_rex()) {
  3663         prefix(REX_XB);
  3664       } else {
  3665         prefix(REX_B);
  3667     } else {
  3668       if (adr.index_needs_rex()) {
  3669         prefix(REX_X);
  3670       } else if (reg->encoding() >= 4 ) {
  3671         prefix(REX);
  3674   } else {
  3675     if (adr.base_needs_rex()) {
  3676       if (adr.index_needs_rex()) {
  3677         prefix(REX_RXB);
  3678       } else {
  3679         prefix(REX_RB);
  3681     } else {
  3682       if (adr.index_needs_rex()) {
  3683         prefix(REX_RX);
  3684       } else {
  3685         prefix(REX_R);
  3691 void Assembler::prefixq(Address adr, Register src) {
  3692   if (src->encoding() < 8) {
  3693     if (adr.base_needs_rex()) {
  3694       if (adr.index_needs_rex()) {
  3695         prefix(REX_WXB);
  3696       } else {
  3697         prefix(REX_WB);
  3699     } else {
  3700       if (adr.index_needs_rex()) {
  3701         prefix(REX_WX);
  3702       } else {
  3703         prefix(REX_W);
  3706   } else {
  3707     if (adr.base_needs_rex()) {
  3708       if (adr.index_needs_rex()) {
  3709         prefix(REX_WRXB);
  3710       } else {
  3711         prefix(REX_WRB);
  3713     } else {
  3714       if (adr.index_needs_rex()) {
  3715         prefix(REX_WRX);
  3716       } else {
  3717         prefix(REX_WR);
  3723 void Assembler::prefix(Address adr, XMMRegister reg) {
  3724   if (reg->encoding() < 8) {
  3725     if (adr.base_needs_rex()) {
  3726       if (adr.index_needs_rex()) {
  3727         prefix(REX_XB);
  3728       } else {
  3729         prefix(REX_B);
  3731     } else {
  3732       if (adr.index_needs_rex()) {
  3733         prefix(REX_X);
  3736   } else {
  3737     if (adr.base_needs_rex()) {
  3738       if (adr.index_needs_rex()) {
  3739         prefix(REX_RXB);
  3740       } else {
  3741         prefix(REX_RB);
  3743     } else {
  3744       if (adr.index_needs_rex()) {
  3745         prefix(REX_RX);
  3746       } else {
  3747         prefix(REX_R);
  3753 void Assembler::adcq(Register dst, int32_t imm32) {
  3754   (void) prefixq_and_encode(dst->encoding());
  3755   emit_arith(0x81, 0xD0, dst, imm32);
  3758 void Assembler::adcq(Register dst, Address src) {
  3759   InstructionMark im(this);
  3760   prefixq(src, dst);
  3761   emit_byte(0x13);
  3762   emit_operand(dst, src);
  3765 void Assembler::adcq(Register dst, Register src) {
  3766   (int) prefixq_and_encode(dst->encoding(), src->encoding());
  3767   emit_arith(0x13, 0xC0, dst, src);
  3770 void Assembler::addq(Address dst, int32_t imm32) {
  3771   InstructionMark im(this);
  3772   prefixq(dst);
  3773   emit_arith_operand(0x81, rax, dst,imm32);
  3776 void Assembler::addq(Address dst, Register src) {
  3777   InstructionMark im(this);
  3778   prefixq(dst, src);
  3779   emit_byte(0x01);
  3780   emit_operand(src, dst);
  3783 void Assembler::addq(Register dst, int32_t imm32) {
  3784   (void) prefixq_and_encode(dst->encoding());
  3785   emit_arith(0x81, 0xC0, dst, imm32);
  3788 void Assembler::addq(Register dst, Address src) {
  3789   InstructionMark im(this);
  3790   prefixq(src, dst);
  3791   emit_byte(0x03);
  3792   emit_operand(dst, src);
  3795 void Assembler::addq(Register dst, Register src) {
  3796   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  3797   emit_arith(0x03, 0xC0, dst, src);
  3800 void Assembler::andq(Register dst, int32_t imm32) {
  3801   (void) prefixq_and_encode(dst->encoding());
  3802   emit_arith(0x81, 0xE0, dst, imm32);
  3805 void Assembler::andq(Register dst, Address src) {
  3806   InstructionMark im(this);
  3807   prefixq(src, dst);
  3808   emit_byte(0x23);
  3809   emit_operand(dst, src);
  3812 void Assembler::andq(Register dst, Register src) {
  3813   (int) prefixq_and_encode(dst->encoding(), src->encoding());
  3814   emit_arith(0x23, 0xC0, dst, src);
  3817 void Assembler::bsfq(Register dst, Register src) {
  3818   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3819   emit_byte(0x0F);
  3820   emit_byte(0xBC);
  3821   emit_byte(0xC0 | encode);
  3824 void Assembler::bsrq(Register dst, Register src) {
  3825   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
  3826   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3827   emit_byte(0x0F);
  3828   emit_byte(0xBD);
  3829   emit_byte(0xC0 | encode);
  3832 void Assembler::bswapq(Register reg) {
  3833   int encode = prefixq_and_encode(reg->encoding());
  3834   emit_byte(0x0F);
  3835   emit_byte(0xC8 | encode);
  3838 void Assembler::cdqq() {
  3839   prefix(REX_W);
  3840   emit_byte(0x99);
  3843 void Assembler::clflush(Address adr) {
  3844   prefix(adr);
  3845   emit_byte(0x0F);
  3846   emit_byte(0xAE);
  3847   emit_operand(rdi, adr);
  3850 void Assembler::cmovq(Condition cc, Register dst, Register src) {
  3851   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3852   emit_byte(0x0F);
  3853   emit_byte(0x40 | cc);
  3854   emit_byte(0xC0 | encode);
  3857 void Assembler::cmovq(Condition cc, Register dst, Address src) {
  3858   InstructionMark im(this);
  3859   prefixq(src, dst);
  3860   emit_byte(0x0F);
  3861   emit_byte(0x40 | cc);
  3862   emit_operand(dst, src);
  3865 void Assembler::cmpq(Address dst, int32_t imm32) {
  3866   InstructionMark im(this);
  3867   prefixq(dst);
  3868   emit_byte(0x81);
  3869   emit_operand(rdi, dst, 4);
  3870   emit_long(imm32);
  3873 void Assembler::cmpq(Register dst, int32_t imm32) {
  3874   (void) prefixq_and_encode(dst->encoding());
  3875   emit_arith(0x81, 0xF8, dst, imm32);
  3878 void Assembler::cmpq(Address dst, Register src) {
  3879   InstructionMark im(this);
  3880   prefixq(dst, src);
  3881   emit_byte(0x3B);
  3882   emit_operand(src, dst);
  3885 void Assembler::cmpq(Register dst, Register src) {
  3886   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  3887   emit_arith(0x3B, 0xC0, dst, src);
  3890 void Assembler::cmpq(Register dst, Address  src) {
  3891   InstructionMark im(this);
  3892   prefixq(src, dst);
  3893   emit_byte(0x3B);
  3894   emit_operand(dst, src);
  3897 void Assembler::cmpxchgq(Register reg, Address adr) {
  3898   InstructionMark im(this);
  3899   prefixq(adr, reg);
  3900   emit_byte(0x0F);
  3901   emit_byte(0xB1);
  3902   emit_operand(reg, adr);
  3905 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
  3906   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  3907   emit_byte(0xF2);
  3908   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3909   emit_byte(0x0F);
  3910   emit_byte(0x2A);
  3911   emit_byte(0xC0 | encode);
  3914 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
  3915   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  3916   emit_byte(0xF3);
  3917   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3918   emit_byte(0x0F);
  3919   emit_byte(0x2A);
  3920   emit_byte(0xC0 | encode);
  3923 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
  3924   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  3925   emit_byte(0xF2);
  3926   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3927   emit_byte(0x0F);
  3928   emit_byte(0x2C);
  3929   emit_byte(0xC0 | encode);
  3932 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
  3933   NOT_LP64(assert(VM_Version::supports_sse(), ""));
  3934   emit_byte(0xF3);
  3935   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3936   emit_byte(0x0F);
  3937   emit_byte(0x2C);
  3938   emit_byte(0xC0 | encode);
  3941 void Assembler::decl(Register dst) {
  3942   // Don't use it directly. Use MacroAssembler::decrementl() instead.
  3943   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
  3944   int encode = prefix_and_encode(dst->encoding());
  3945   emit_byte(0xFF);
  3946   emit_byte(0xC8 | encode);
  3949 void Assembler::decq(Register dst) {
  3950   // Don't use it directly. Use MacroAssembler::decrementq() instead.
  3951   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
  3952   int encode = prefixq_and_encode(dst->encoding());
  3953   emit_byte(0xFF);
  3954   emit_byte(0xC8 | encode);
  3957 void Assembler::decq(Address dst) {
  3958   // Don't use it directly. Use MacroAssembler::decrementq() instead.
  3959   InstructionMark im(this);
  3960   prefixq(dst);
  3961   emit_byte(0xFF);
  3962   emit_operand(rcx, dst);
  3965 void Assembler::fxrstor(Address src) {
  3966   prefixq(src);
  3967   emit_byte(0x0F);
  3968   emit_byte(0xAE);
  3969   emit_operand(as_Register(1), src);
  3972 void Assembler::fxsave(Address dst) {
  3973   prefixq(dst);
  3974   emit_byte(0x0F);
  3975   emit_byte(0xAE);
  3976   emit_operand(as_Register(0), dst);
  3979 void Assembler::idivq(Register src) {
  3980   int encode = prefixq_and_encode(src->encoding());
  3981   emit_byte(0xF7);
  3982   emit_byte(0xF8 | encode);
  3985 void Assembler::imulq(Register dst, Register src) {
  3986   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3987   emit_byte(0x0F);
  3988   emit_byte(0xAF);
  3989   emit_byte(0xC0 | encode);
  3992 void Assembler::imulq(Register dst, Register src, int value) {
  3993   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3994   if (is8bit(value)) {
  3995     emit_byte(0x6B);
  3996     emit_byte(0xC0 | encode);
  3997     emit_byte(value & 0xFF);
  3998   } else {
  3999     emit_byte(0x69);
  4000     emit_byte(0xC0 | encode);
  4001     emit_long(value);
  4005 void Assembler::incl(Register dst) {
  4006   // Don't use it directly. Use MacroAssembler::incrementl() instead.
  4007   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
  4008   int encode = prefix_and_encode(dst->encoding());
  4009   emit_byte(0xFF);
  4010   emit_byte(0xC0 | encode);
  4013 void Assembler::incq(Register dst) {
  4014   // Don't use it directly. Use MacroAssembler::incrementq() instead.
  4015   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
  4016   int encode = prefixq_and_encode(dst->encoding());
  4017   emit_byte(0xFF);
  4018   emit_byte(0xC0 | encode);
  4021 void Assembler::incq(Address dst) {
  4022   // Don't use it directly. Use MacroAssembler::incrementq() instead.
  4023   InstructionMark im(this);
  4024   prefixq(dst);
  4025   emit_byte(0xFF);
  4026   emit_operand(rax, dst);
  4029 void Assembler::lea(Register dst, Address src) {
  4030   leaq(dst, src);
  4033 void Assembler::leaq(Register dst, Address src) {
  4034   InstructionMark im(this);
  4035   prefixq(src, dst);
  4036   emit_byte(0x8D);
  4037   emit_operand(dst, src);
  4040 void Assembler::mov64(Register dst, int64_t imm64) {
  4041   InstructionMark im(this);
  4042   int encode = prefixq_and_encode(dst->encoding());
  4043   emit_byte(0xB8 | encode);
  4044   emit_long64(imm64);
  4047 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
  4048   InstructionMark im(this);
  4049   int encode = prefixq_and_encode(dst->encoding());
  4050   emit_byte(0xB8 | encode);
  4051   emit_data64(imm64, rspec);
  4054 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
  4055   InstructionMark im(this);
  4056   int encode = prefix_and_encode(dst->encoding());
  4057   emit_byte(0xB8 | encode);
  4058   emit_data((int)imm32, rspec, narrow_oop_operand);
  4061 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
  4062   InstructionMark im(this);
  4063   prefix(dst);
  4064   emit_byte(0xC7);
  4065   emit_operand(rax, dst, 4);
  4066   emit_data((int)imm32, rspec, narrow_oop_operand);
  4069 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
  4070   InstructionMark im(this);
  4071   int encode = prefix_and_encode(src1->encoding());
  4072   emit_byte(0x81);
  4073   emit_byte(0xF8 | encode);
  4074   emit_data((int)imm32, rspec, narrow_oop_operand);
  4077 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
  4078   InstructionMark im(this);
  4079   prefix(src1);
  4080   emit_byte(0x81);
  4081   emit_operand(rax, src1, 4);
  4082   emit_data((int)imm32, rspec, narrow_oop_operand);
  4085 void Assembler::lzcntq(Register dst, Register src) {
  4086   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
  4087   emit_byte(0xF3);
  4088   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4089   emit_byte(0x0F);
  4090   emit_byte(0xBD);
  4091   emit_byte(0xC0 | encode);
  4094 void Assembler::movdq(XMMRegister dst, Register src) {
  4095   // table D-1 says MMX/SSE2
  4096   NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
  4097   emit_byte(0x66);
  4098   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4099   emit_byte(0x0F);
  4100   emit_byte(0x6E);
  4101   emit_byte(0xC0 | encode);
  4104 void Assembler::movdq(Register dst, XMMRegister src) {
  4105   // table D-1 says MMX/SSE2
  4106   NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
  4107   emit_byte(0x66);
  4108   // swap src/dst to get correct prefix
  4109   int encode = prefixq_and_encode(src->encoding(), dst->encoding());
  4110   emit_byte(0x0F);
  4111   emit_byte(0x7E);
  4112   emit_byte(0xC0 | encode);
  4115 void Assembler::movq(Register dst, Register src) {
  4116   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4117   emit_byte(0x8B);
  4118   emit_byte(0xC0 | encode);
  4121 void Assembler::movq(Register dst, Address src) {
  4122   InstructionMark im(this);
  4123   prefixq(src, dst);
  4124   emit_byte(0x8B);
  4125   emit_operand(dst, src);
  4128 void Assembler::movq(Address dst, Register src) {
  4129   InstructionMark im(this);
  4130   prefixq(dst, src);
  4131   emit_byte(0x89);
  4132   emit_operand(src, dst);
  4135 void Assembler::movsbq(Register dst, Address src) {
  4136   InstructionMark im(this);
  4137   prefixq(src, dst);
  4138   emit_byte(0x0F);
  4139   emit_byte(0xBE);
  4140   emit_operand(dst, src);
  4143 void Assembler::movsbq(Register dst, Register src) {
  4144   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4145   emit_byte(0x0F);
  4146   emit_byte(0xBE);
  4147   emit_byte(0xC0 | encode);
  4150 void Assembler::movslq(Register dst, int32_t imm32) {
  4151   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
  4152   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
  4153   // as a result we shouldn't use until tested at runtime...
  4154   ShouldNotReachHere();
  4155   InstructionMark im(this);
  4156   int encode = prefixq_and_encode(dst->encoding());
  4157   emit_byte(0xC7 | encode);
  4158   emit_long(imm32);
  4161 void Assembler::movslq(Address dst, int32_t imm32) {
  4162   assert(is_simm32(imm32), "lost bits");
  4163   InstructionMark im(this);
  4164   prefixq(dst);
  4165   emit_byte(0xC7);
  4166   emit_operand(rax, dst, 4);
  4167   emit_long(imm32);
  4170 void Assembler::movslq(Register dst, Address src) {
  4171   InstructionMark im(this);
  4172   prefixq(src, dst);
  4173   emit_byte(0x63);
  4174   emit_operand(dst, src);
  4177 void Assembler::movslq(Register dst, Register src) {
  4178   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4179   emit_byte(0x63);
  4180   emit_byte(0xC0 | encode);
  4183 void Assembler::movswq(Register dst, Address src) {
  4184   InstructionMark im(this);
  4185   prefixq(src, dst);
  4186   emit_byte(0x0F);
  4187   emit_byte(0xBF);
  4188   emit_operand(dst, src);
  4191 void Assembler::movswq(Register dst, Register src) {
  4192   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4193   emit_byte(0x0F);
  4194   emit_byte(0xBF);
  4195   emit_byte(0xC0 | encode);
  4198 void Assembler::movzbq(Register dst, Address src) {
  4199   InstructionMark im(this);
  4200   prefixq(src, dst);
  4201   emit_byte(0x0F);
  4202   emit_byte(0xB6);
  4203   emit_operand(dst, src);
  4206 void Assembler::movzbq(Register dst, Register src) {
  4207   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4208   emit_byte(0x0F);
  4209   emit_byte(0xB6);
  4210   emit_byte(0xC0 | encode);
  4213 void Assembler::movzwq(Register dst, Address src) {
  4214   InstructionMark im(this);
  4215   prefixq(src, dst);
  4216   emit_byte(0x0F);
  4217   emit_byte(0xB7);
  4218   emit_operand(dst, src);
  4221 void Assembler::movzwq(Register dst, Register src) {
  4222   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4223   emit_byte(0x0F);
  4224   emit_byte(0xB7);
  4225   emit_byte(0xC0 | encode);
  4228 void Assembler::negq(Register dst) {
  4229   int encode = prefixq_and_encode(dst->encoding());
  4230   emit_byte(0xF7);
  4231   emit_byte(0xD8 | encode);
  4234 void Assembler::notq(Register dst) {
  4235   int encode = prefixq_and_encode(dst->encoding());
  4236   emit_byte(0xF7);
  4237   emit_byte(0xD0 | encode);
  4240 void Assembler::orq(Address dst, int32_t imm32) {
  4241   InstructionMark im(this);
  4242   prefixq(dst);
  4243   emit_byte(0x81);
  4244   emit_operand(rcx, dst, 4);
  4245   emit_long(imm32);
  4248 void Assembler::orq(Register dst, int32_t imm32) {
  4249   (void) prefixq_and_encode(dst->encoding());
  4250   emit_arith(0x81, 0xC8, dst, imm32);
  4253 void Assembler::orq(Register dst, Address src) {
  4254   InstructionMark im(this);
  4255   prefixq(src, dst);
  4256   emit_byte(0x0B);
  4257   emit_operand(dst, src);
  4260 void Assembler::orq(Register dst, Register src) {
  4261   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  4262   emit_arith(0x0B, 0xC0, dst, src);
  4265 void Assembler::popa() { // 64bit
  4266   movq(r15, Address(rsp, 0));
  4267   movq(r14, Address(rsp, wordSize));
  4268   movq(r13, Address(rsp, 2 * wordSize));
  4269   movq(r12, Address(rsp, 3 * wordSize));
  4270   movq(r11, Address(rsp, 4 * wordSize));
  4271   movq(r10, Address(rsp, 5 * wordSize));
  4272   movq(r9,  Address(rsp, 6 * wordSize));
  4273   movq(r8,  Address(rsp, 7 * wordSize));
  4274   movq(rdi, Address(rsp, 8 * wordSize));
  4275   movq(rsi, Address(rsp, 9 * wordSize));
  4276   movq(rbp, Address(rsp, 10 * wordSize));
  4277   // skip rsp
  4278   movq(rbx, Address(rsp, 12 * wordSize));
  4279   movq(rdx, Address(rsp, 13 * wordSize));
  4280   movq(rcx, Address(rsp, 14 * wordSize));
  4281   movq(rax, Address(rsp, 15 * wordSize));
  4283   addq(rsp, 16 * wordSize);
  4286 void Assembler::popcntq(Register dst, Address src) {
  4287   assert(VM_Version::supports_popcnt(), "must support");
  4288   InstructionMark im(this);
  4289   emit_byte(0xF3);
  4290   prefixq(src, dst);
  4291   emit_byte(0x0F);
  4292   emit_byte(0xB8);
  4293   emit_operand(dst, src);
  4296 void Assembler::popcntq(Register dst, Register src) {
  4297   assert(VM_Version::supports_popcnt(), "must support");
  4298   emit_byte(0xF3);
  4299   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4300   emit_byte(0x0F);
  4301   emit_byte(0xB8);
  4302   emit_byte(0xC0 | encode);
  4305 void Assembler::popq(Address dst) {
  4306   InstructionMark im(this);
  4307   prefixq(dst);
  4308   emit_byte(0x8F);
  4309   emit_operand(rax, dst);
  4312 void Assembler::pusha() { // 64bit
  4313   // we have to store original rsp.  ABI says that 128 bytes
  4314   // below rsp are local scratch.
  4315   movq(Address(rsp, -5 * wordSize), rsp);
  4317   subq(rsp, 16 * wordSize);
  4319   movq(Address(rsp, 15 * wordSize), rax);
  4320   movq(Address(rsp, 14 * wordSize), rcx);
  4321   movq(Address(rsp, 13 * wordSize), rdx);
  4322   movq(Address(rsp, 12 * wordSize), rbx);
  4323   // skip rsp
  4324   movq(Address(rsp, 10 * wordSize), rbp);
  4325   movq(Address(rsp, 9 * wordSize), rsi);
  4326   movq(Address(rsp, 8 * wordSize), rdi);
  4327   movq(Address(rsp, 7 * wordSize), r8);
  4328   movq(Address(rsp, 6 * wordSize), r9);
  4329   movq(Address(rsp, 5 * wordSize), r10);
  4330   movq(Address(rsp, 4 * wordSize), r11);
  4331   movq(Address(rsp, 3 * wordSize), r12);
  4332   movq(Address(rsp, 2 * wordSize), r13);
  4333   movq(Address(rsp, wordSize), r14);
  4334   movq(Address(rsp, 0), r15);
  4337 void Assembler::pushq(Address src) {
  4338   InstructionMark im(this);
  4339   prefixq(src);
  4340   emit_byte(0xFF);
  4341   emit_operand(rsi, src);
  4344 void Assembler::rclq(Register dst, int imm8) {
  4345   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  4346   int encode = prefixq_and_encode(dst->encoding());
  4347   if (imm8 == 1) {
  4348     emit_byte(0xD1);
  4349     emit_byte(0xD0 | encode);
  4350   } else {
  4351     emit_byte(0xC1);
  4352     emit_byte(0xD0 | encode);
  4353     emit_byte(imm8);
  4356 void Assembler::sarq(Register dst, int imm8) {
  4357   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  4358   int encode = prefixq_and_encode(dst->encoding());
  4359   if (imm8 == 1) {
  4360     emit_byte(0xD1);
  4361     emit_byte(0xF8 | encode);
  4362   } else {
  4363     emit_byte(0xC1);
  4364     emit_byte(0xF8 | encode);
  4365     emit_byte(imm8);
  4369 void Assembler::sarq(Register dst) {
  4370   int encode = prefixq_and_encode(dst->encoding());
  4371   emit_byte(0xD3);
  4372   emit_byte(0xF8 | encode);
  4375 void Assembler::sbbq(Address dst, int32_t imm32) {
  4376   InstructionMark im(this);
  4377   prefixq(dst);
  4378   emit_arith_operand(0x81, rbx, dst, imm32);
  4381 void Assembler::sbbq(Register dst, int32_t imm32) {
  4382   (void) prefixq_and_encode(dst->encoding());
  4383   emit_arith(0x81, 0xD8, dst, imm32);
  4386 void Assembler::sbbq(Register dst, Address src) {
  4387   InstructionMark im(this);
  4388   prefixq(src, dst);
  4389   emit_byte(0x1B);
  4390   emit_operand(dst, src);
  4393 void Assembler::sbbq(Register dst, Register src) {
  4394   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  4395   emit_arith(0x1B, 0xC0, dst, src);
  4398 void Assembler::shlq(Register dst, int imm8) {
  4399   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  4400   int encode = prefixq_and_encode(dst->encoding());
  4401   if (imm8 == 1) {
  4402     emit_byte(0xD1);
  4403     emit_byte(0xE0 | encode);
  4404   } else {
  4405     emit_byte(0xC1);
  4406     emit_byte(0xE0 | encode);
  4407     emit_byte(imm8);
  4411 void Assembler::shlq(Register dst) {
  4412   int encode = prefixq_and_encode(dst->encoding());
  4413   emit_byte(0xD3);
  4414   emit_byte(0xE0 | encode);
  4417 void Assembler::shrq(Register dst, int imm8) {
  4418   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  4419   int encode = prefixq_and_encode(dst->encoding());
  4420   emit_byte(0xC1);
  4421   emit_byte(0xE8 | encode);
  4422   emit_byte(imm8);
  4425 void Assembler::shrq(Register dst) {
  4426   int encode = prefixq_and_encode(dst->encoding());
  4427   emit_byte(0xD3);
  4428   emit_byte(0xE8 | encode);
  4431 void Assembler::subq(Address dst, int32_t imm32) {
  4432   InstructionMark im(this);
  4433   prefixq(dst);
  4434   emit_arith_operand(0x81, rbp, dst, imm32);
  4437 void Assembler::subq(Address dst, Register src) {
  4438   InstructionMark im(this);
  4439   prefixq(dst, src);
  4440   emit_byte(0x29);
  4441   emit_operand(src, dst);
  4444 void Assembler::subq(Register dst, int32_t imm32) {
  4445   (void) prefixq_and_encode(dst->encoding());
  4446   emit_arith(0x81, 0xE8, dst, imm32);
  4449 void Assembler::subq(Register dst, Address src) {
  4450   InstructionMark im(this);
  4451   prefixq(src, dst);
  4452   emit_byte(0x2B);
  4453   emit_operand(dst, src);
  4456 void Assembler::subq(Register dst, Register src) {
  4457   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  4458   emit_arith(0x2B, 0xC0, dst, src);
  4461 void Assembler::testq(Register dst, int32_t imm32) {
  4462   // not using emit_arith because test
  4463   // doesn't support sign-extension of
  4464   // 8bit operands
  4465   int encode = dst->encoding();
  4466   if (encode == 0) {
  4467     prefix(REX_W);
  4468     emit_byte(0xA9);
  4469   } else {
  4470     encode = prefixq_and_encode(encode);
  4471     emit_byte(0xF7);
  4472     emit_byte(0xC0 | encode);
  4474   emit_long(imm32);
  4477 void Assembler::testq(Register dst, Register src) {
  4478   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  4479   emit_arith(0x85, 0xC0, dst, src);
  4482 void Assembler::xaddq(Address dst, Register src) {
  4483   InstructionMark im(this);
  4484   prefixq(dst, src);
  4485   emit_byte(0x0F);
  4486   emit_byte(0xC1);
  4487   emit_operand(src, dst);
  4490 void Assembler::xchgq(Register dst, Address src) {
  4491   InstructionMark im(this);
  4492   prefixq(src, dst);
  4493   emit_byte(0x87);
  4494   emit_operand(dst, src);
  4497 void Assembler::xchgq(Register dst, Register src) {
  4498   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  4499   emit_byte(0x87);
  4500   emit_byte(0xc0 | encode);
  4503 void Assembler::xorq(Register dst, Register src) {
  4504   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  4505   emit_arith(0x33, 0xC0, dst, src);
  4508 void Assembler::xorq(Register dst, Address src) {
  4509   InstructionMark im(this);
  4510   prefixq(src, dst);
  4511   emit_byte(0x33);
  4512   emit_operand(dst, src);
  4515 #endif // !LP64
  4517 static Assembler::Condition reverse[] = {
  4518     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  4519     Assembler::overflow       /* noOverflow    = 0x1 */ ,
  4520     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
  4521     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
  4522     Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
  4523     Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
  4524     Assembler::above          /* belowEqual    = 0x6 */ ,
  4525     Assembler::belowEqual     /* above         = 0x7 */ ,
  4526     Assembler::positive       /* negative      = 0x8 */ ,
  4527     Assembler::negative       /* positive      = 0x9 */ ,
  4528     Assembler::noParity       /* parity        = 0xa */ ,
  4529     Assembler::parity         /* noParity      = 0xb */ ,
  4530     Assembler::greaterEqual   /* less          = 0xc */ ,
  4531     Assembler::less           /* greaterEqual  = 0xd */ ,
  4532     Assembler::greater        /* lessEqual     = 0xe */ ,
  4533     Assembler::lessEqual      /* greater       = 0xf, */
  4535 };
  4538 // Implementation of MacroAssembler
  4540 // First all the versions that have distinct versions depending on 32/64 bit
  4541 // Unless the difference is trivial (1 line or so).
  4543 #ifndef _LP64
  4545 // 32bit versions
  4547 Address MacroAssembler::as_Address(AddressLiteral adr) {
  4548   return Address(adr.target(), adr.rspec());
  4551 Address MacroAssembler::as_Address(ArrayAddress adr) {
  4552   return Address::make_array(adr);
  4555 int MacroAssembler::biased_locking_enter(Register lock_reg,
  4556                                          Register obj_reg,
  4557                                          Register swap_reg,
  4558                                          Register tmp_reg,
  4559                                          bool swap_reg_contains_mark,
  4560                                          Label& done,
  4561                                          Label* slow_case,
  4562                                          BiasedLockingCounters* counters) {
  4563   assert(UseBiasedLocking, "why call this otherwise?");
  4564   assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
  4565   assert_different_registers(lock_reg, obj_reg, swap_reg);
  4567   if (PrintBiasedLockingStatistics && counters == NULL)
  4568     counters = BiasedLocking::counters();
  4570   bool need_tmp_reg = false;
  4571   if (tmp_reg == noreg) {
  4572     need_tmp_reg = true;
  4573     tmp_reg = lock_reg;
  4574   } else {
  4575     assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
  4577   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
  4578   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
  4579   Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
  4580   Address saved_mark_addr(lock_reg, 0);
  4582   // Biased locking
  4583   // See whether the lock is currently biased toward our thread and
  4584   // whether the epoch is still valid
  4585   // Note that the runtime guarantees sufficient alignment of JavaThread
  4586   // pointers to allow age to be placed into low bits
  4587   // First check to see whether biasing is even enabled for this object
  4588   Label cas_label;
  4589   int null_check_offset = -1;
  4590   if (!swap_reg_contains_mark) {
  4591     null_check_offset = offset();
  4592     movl(swap_reg, mark_addr);
  4594   if (need_tmp_reg) {
  4595     push(tmp_reg);
  4597   movl(tmp_reg, swap_reg);
  4598   andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
  4599   cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
  4600   if (need_tmp_reg) {
  4601     pop(tmp_reg);
  4603   jcc(Assembler::notEqual, cas_label);
  4604   // The bias pattern is present in the object's header. Need to check
  4605   // whether the bias owner and the epoch are both still current.
  4606   // Note that because there is no current thread register on x86 we
  4607   // need to store off the mark word we read out of the object to
  4608   // avoid reloading it and needing to recheck invariants below. This
  4609   // store is unfortunate but it makes the overall code shorter and
  4610   // simpler.
  4611   movl(saved_mark_addr, swap_reg);
  4612   if (need_tmp_reg) {
  4613     push(tmp_reg);
  4615   get_thread(tmp_reg);
  4616   xorl(swap_reg, tmp_reg);
  4617   if (swap_reg_contains_mark) {
  4618     null_check_offset = offset();
  4620   movl(tmp_reg, klass_addr);
  4621   xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  4622   andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
  4623   if (need_tmp_reg) {
  4624     pop(tmp_reg);
  4626   if (counters != NULL) {
  4627     cond_inc32(Assembler::zero,
  4628                ExternalAddress((address)counters->biased_lock_entry_count_addr()));
  4630   jcc(Assembler::equal, done);
  4632   Label try_revoke_bias;
  4633   Label try_rebias;
  4635   // At this point we know that the header has the bias pattern and
  4636   // that we are not the bias owner in the current epoch. We need to
  4637   // figure out more details about the state of the header in order to
  4638   // know what operations can be legally performed on the object's
  4639   // header.
  4641   // If the low three bits in the xor result aren't clear, that means
  4642   // the prototype header is no longer biased and we have to revoke
  4643   // the bias on this object.
  4644   testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
  4645   jcc(Assembler::notZero, try_revoke_bias);
  4647   // Biasing is still enabled for this data type. See whether the
  4648   // epoch of the current bias is still valid, meaning that the epoch
  4649   // bits of the mark word are equal to the epoch bits of the
  4650   // prototype header. (Note that the prototype header's epoch bits
  4651   // only change at a safepoint.) If not, attempt to rebias the object
  4652   // toward the current thread. Note that we must be absolutely sure
  4653   // that the current epoch is invalid in order to do this because
  4654   // otherwise the manipulations it performs on the mark word are
  4655   // illegal.
  4656   testl(swap_reg, markOopDesc::epoch_mask_in_place);
  4657   jcc(Assembler::notZero, try_rebias);
  4659   // The epoch of the current bias is still valid but we know nothing
  4660   // about the owner; it might be set or it might be clear. Try to
  4661   // acquire the bias of the object using an atomic operation. If this
  4662   // fails we will go in to the runtime to revoke the object's bias.
  4663   // Note that we first construct the presumed unbiased header so we
  4664   // don't accidentally blow away another thread's valid bias.
  4665   movl(swap_reg, saved_mark_addr);
  4666   andl(swap_reg,
  4667        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
  4668   if (need_tmp_reg) {
  4669     push(tmp_reg);
  4671   get_thread(tmp_reg);
  4672   orl(tmp_reg, swap_reg);
  4673   if (os::is_MP()) {
  4674     lock();
  4676   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
  4677   if (need_tmp_reg) {
  4678     pop(tmp_reg);
  4680   // If the biasing toward our thread failed, this means that
  4681   // another thread succeeded in biasing it toward itself and we
  4682   // need to revoke that bias. The revocation will occur in the
  4683   // interpreter runtime in the slow case.
  4684   if (counters != NULL) {
  4685     cond_inc32(Assembler::zero,
  4686                ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
  4688   if (slow_case != NULL) {
  4689     jcc(Assembler::notZero, *slow_case);
  4691   jmp(done);
  4693   bind(try_rebias);
  4694   // At this point we know the epoch has expired, meaning that the
  4695   // current "bias owner", if any, is actually invalid. Under these
  4696   // circumstances _only_, we are allowed to use the current header's
  4697   // value as the comparison value when doing the cas to acquire the
  4698   // bias in the current epoch. In other words, we allow transfer of
  4699   // the bias from one thread to another directly in this situation.
  4700   //
  4701   // FIXME: due to a lack of registers we currently blow away the age
  4702   // bits in this situation. Should attempt to preserve them.
  4703   if (need_tmp_reg) {
  4704     push(tmp_reg);
  4706   get_thread(tmp_reg);
  4707   movl(swap_reg, klass_addr);
  4708   orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  4709   movl(swap_reg, saved_mark_addr);
  4710   if (os::is_MP()) {
  4711     lock();
  4713   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
  4714   if (need_tmp_reg) {
  4715     pop(tmp_reg);
  4717   // If the biasing toward our thread failed, then another thread
  4718   // succeeded in biasing it toward itself and we need to revoke that
  4719   // bias. The revocation will occur in the runtime in the slow case.
  4720   if (counters != NULL) {
  4721     cond_inc32(Assembler::zero,
  4722                ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
  4724   if (slow_case != NULL) {
  4725     jcc(Assembler::notZero, *slow_case);
  4727   jmp(done);
  4729   bind(try_revoke_bias);
  4730   // The prototype mark in the klass doesn't have the bias bit set any
  4731   // more, indicating that objects of this data type are not supposed
  4732   // to be biased any more. We are going to try to reset the mark of
  4733   // this object to the prototype value and fall through to the
  4734   // CAS-based locking scheme. Note that if our CAS fails, it means
  4735   // that another thread raced us for the privilege of revoking the
  4736   // bias of this particular object, so it's okay to continue in the
  4737   // normal locking code.
  4738   //
  4739   // FIXME: due to a lack of registers we currently blow away the age
  4740   // bits in this situation. Should attempt to preserve them.
  4741   movl(swap_reg, saved_mark_addr);
  4742   if (need_tmp_reg) {
  4743     push(tmp_reg);
  4745   movl(tmp_reg, klass_addr);
  4746   movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  4747   if (os::is_MP()) {
  4748     lock();
  4750   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
  4751   if (need_tmp_reg) {
  4752     pop(tmp_reg);
  4754   // Fall through to the normal CAS-based lock, because no matter what
  4755   // the result of the above CAS, some thread must have succeeded in
  4756   // removing the bias bit from the object's header.
  4757   if (counters != NULL) {
  4758     cond_inc32(Assembler::zero,
  4759                ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
  4762   bind(cas_label);
  4764   return null_check_offset;
  4766 void MacroAssembler::call_VM_leaf_base(address entry_point,
  4767                                        int number_of_arguments) {
  4768   call(RuntimeAddress(entry_point));
  4769   increment(rsp, number_of_arguments * wordSize);
  4772 void MacroAssembler::cmpoop(Address src1, jobject obj) {
  4773   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
  4776 void MacroAssembler::cmpoop(Register src1, jobject obj) {
  4777   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
  4780 void MacroAssembler::extend_sign(Register hi, Register lo) {
  4781   // According to Intel Doc. AP-526, "Integer Divide", p.18.
  4782   if (VM_Version::is_P6() && hi == rdx && lo == rax) {
  4783     cdql();
  4784   } else {
  4785     movl(hi, lo);
  4786     sarl(hi, 31);
  4790 void MacroAssembler::fat_nop() {
  4791   // A 5 byte nop that is safe for patching (see patch_verified_entry)
  4792   emit_byte(0x26); // es:
  4793   emit_byte(0x2e); // cs:
  4794   emit_byte(0x64); // fs:
  4795   emit_byte(0x65); // gs:
  4796   emit_byte(0x90);
  4799 void MacroAssembler::jC2(Register tmp, Label& L) {
  4800   // set parity bit if FPU flag C2 is set (via rax)
  4801   save_rax(tmp);
  4802   fwait(); fnstsw_ax();
  4803   sahf();
  4804   restore_rax(tmp);
  4805   // branch
  4806   jcc(Assembler::parity, L);
  4809 void MacroAssembler::jnC2(Register tmp, Label& L) {
  4810   // set parity bit if FPU flag C2 is set (via rax)
  4811   save_rax(tmp);
  4812   fwait(); fnstsw_ax();
  4813   sahf();
  4814   restore_rax(tmp);
  4815   // branch
  4816   jcc(Assembler::noParity, L);
  4819 // 32bit can do a case table jump in one instruction but we no longer allow the base
  4820 // to be installed in the Address class
  4821 void MacroAssembler::jump(ArrayAddress entry) {
  4822   jmp(as_Address(entry));
  4825 // Note: y_lo will be destroyed
  4826 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
  4827   // Long compare for Java (semantics as described in JVM spec.)
  4828   Label high, low, done;
  4830   cmpl(x_hi, y_hi);
  4831   jcc(Assembler::less, low);
  4832   jcc(Assembler::greater, high);
  4833   // x_hi is the return register
  4834   xorl(x_hi, x_hi);
  4835   cmpl(x_lo, y_lo);
  4836   jcc(Assembler::below, low);
  4837   jcc(Assembler::equal, done);
  4839   bind(high);
  4840   xorl(x_hi, x_hi);
  4841   increment(x_hi);
  4842   jmp(done);
  4844   bind(low);
  4845   xorl(x_hi, x_hi);
  4846   decrementl(x_hi);
  4848   bind(done);
  4851 void MacroAssembler::lea(Register dst, AddressLiteral src) {
  4852     mov_literal32(dst, (int32_t)src.target(), src.rspec());
  4855 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
  4856   // leal(dst, as_Address(adr));
  4857   // see note in movl as to why we must use a move
  4858   mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
  4861 void MacroAssembler::leave() {
  4862   mov(rsp, rbp);
  4863   pop(rbp);
  4866 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
  4867   // Multiplication of two Java long values stored on the stack
  4868   // as illustrated below. Result is in rdx:rax.
  4869   //
  4870   // rsp ---> [  ??  ] \               \
  4871   //            ....    | y_rsp_offset  |
  4872   //          [ y_lo ] /  (in bytes)    | x_rsp_offset
  4873   //          [ y_hi ]                  | (in bytes)
  4874   //            ....                    |
  4875   //          [ x_lo ]                 /
  4876   //          [ x_hi ]
  4877   //            ....
  4878   //
  4879   // Basic idea: lo(result) = lo(x_lo * y_lo)
  4880   //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
  4881   Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
  4882   Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
  4883   Label quick;
  4884   // load x_hi, y_hi and check if quick
  4885   // multiplication is possible
  4886   movl(rbx, x_hi);
  4887   movl(rcx, y_hi);
  4888   movl(rax, rbx);
  4889   orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
  4890   jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
  4891   // do full multiplication
  4892   // 1st step
  4893   mull(y_lo);                                    // x_hi * y_lo
  4894   movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
  4895   // 2nd step
  4896   movl(rax, x_lo);
  4897   mull(rcx);                                     // x_lo * y_hi
  4898   addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
  4899   // 3rd step
  4900   bind(quick);                                   // note: rbx, = 0 if quick multiply!
  4901   movl(rax, x_lo);
  4902   mull(y_lo);                                    // x_lo * y_lo
  4903   addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
  4906 void MacroAssembler::lneg(Register hi, Register lo) {
  4907   negl(lo);
  4908   adcl(hi, 0);
  4909   negl(hi);
  4912 void MacroAssembler::lshl(Register hi, Register lo) {
  4913   // Java shift left long support (semantics as described in JVM spec., p.305)
  4914   // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
  4915   // shift value is in rcx !
  4916   assert(hi != rcx, "must not use rcx");
  4917   assert(lo != rcx, "must not use rcx");
  4918   const Register s = rcx;                        // shift count
  4919   const int      n = BitsPerWord;
  4920   Label L;
  4921   andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
  4922   cmpl(s, n);                                    // if (s < n)
  4923   jcc(Assembler::less, L);                       // else (s >= n)
  4924   movl(hi, lo);                                  // x := x << n
  4925   xorl(lo, lo);
  4926   // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
  4927   bind(L);                                       // s (mod n) < n
  4928   shldl(hi, lo);                                 // x := x << s
  4929   shll(lo);
  4933 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
  4934   // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
  4935   // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
  4936   assert(hi != rcx, "must not use rcx");
  4937   assert(lo != rcx, "must not use rcx");
  4938   const Register s = rcx;                        // shift count
  4939   const int      n = BitsPerWord;
  4940   Label L;
  4941   andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
  4942   cmpl(s, n);                                    // if (s < n)
  4943   jcc(Assembler::less, L);                       // else (s >= n)
  4944   movl(lo, hi);                                  // x := x >> n
  4945   if (sign_extension) sarl(hi, 31);
  4946   else                xorl(hi, hi);
  4947   // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
  4948   bind(L);                                       // s (mod n) < n
  4949   shrdl(lo, hi);                                 // x := x >> s
  4950   if (sign_extension) sarl(hi);
  4951   else                shrl(hi);
  4954 void MacroAssembler::movoop(Register dst, jobject obj) {
  4955   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
  4958 void MacroAssembler::movoop(Address dst, jobject obj) {
  4959   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
  4962 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
  4963   if (src.is_lval()) {
  4964     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
  4965   } else {
  4966     movl(dst, as_Address(src));
  4970 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
  4971   movl(as_Address(dst), src);
  4974 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
  4975   movl(dst, as_Address(src));
  4978 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
  4979 void MacroAssembler::movptr(Address dst, intptr_t src) {
  4980   movl(dst, src);
  4984 void MacroAssembler::pop_callee_saved_registers() {
  4985   pop(rcx);
  4986   pop(rdx);
  4987   pop(rdi);
  4988   pop(rsi);
  4991 void MacroAssembler::pop_fTOS() {
  4992   fld_d(Address(rsp, 0));
  4993   addl(rsp, 2 * wordSize);
  4996 void MacroAssembler::push_callee_saved_registers() {
  4997   push(rsi);
  4998   push(rdi);
  4999   push(rdx);
  5000   push(rcx);
  5003 void MacroAssembler::push_fTOS() {
  5004   subl(rsp, 2 * wordSize);
  5005   fstp_d(Address(rsp, 0));
  5009 void MacroAssembler::pushoop(jobject obj) {
  5010   push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
  5014 void MacroAssembler::pushptr(AddressLiteral src) {
  5015   if (src.is_lval()) {
  5016     push_literal32((int32_t)src.target(), src.rspec());
  5017   } else {
  5018     pushl(as_Address(src));
  5022 void MacroAssembler::set_word_if_not_zero(Register dst) {
  5023   xorl(dst, dst);
  5024   set_byte_if_not_zero(dst);
  5027 static void pass_arg0(MacroAssembler* masm, Register arg) {
  5028   masm->push(arg);
  5031 static void pass_arg1(MacroAssembler* masm, Register arg) {
  5032   masm->push(arg);
  5035 static void pass_arg2(MacroAssembler* masm, Register arg) {
  5036   masm->push(arg);
  5039 static void pass_arg3(MacroAssembler* masm, Register arg) {
  5040   masm->push(arg);
  5043 #ifndef PRODUCT
  5044 extern "C" void findpc(intptr_t x);
  5045 #endif
  5047 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
  5048   // In order to get locks to work, we need to fake a in_VM state
  5049   JavaThread* thread = JavaThread::current();
  5050   JavaThreadState saved_state = thread->thread_state();
  5051   thread->set_thread_state(_thread_in_vm);
  5052   if (ShowMessageBoxOnError) {
  5053     JavaThread* thread = JavaThread::current();
  5054     JavaThreadState saved_state = thread->thread_state();
  5055     thread->set_thread_state(_thread_in_vm);
  5056     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  5057       ttyLocker ttyl;
  5058       BytecodeCounter::print();
  5060     // To see where a verify_oop failed, get $ebx+40/X for this frame.
  5061     // This is the value of eip which points to where verify_oop will return.
  5062     if (os::message_box(msg, "Execution stopped, print registers?")) {
  5063       ttyLocker ttyl;
  5064       tty->print_cr("eip = 0x%08x", eip);
  5065 #ifndef PRODUCT
  5066       if ((WizardMode || Verbose) && PrintMiscellaneous) {
  5067         tty->cr();
  5068         findpc(eip);
  5069         tty->cr();
  5071 #endif
  5072       tty->print_cr("rax = 0x%08x", rax);
  5073       tty->print_cr("rbx = 0x%08x", rbx);
  5074       tty->print_cr("rcx = 0x%08x", rcx);
  5075       tty->print_cr("rdx = 0x%08x", rdx);
  5076       tty->print_cr("rdi = 0x%08x", rdi);
  5077       tty->print_cr("rsi = 0x%08x", rsi);
  5078       tty->print_cr("rbp = 0x%08x", rbp);
  5079       tty->print_cr("rsp = 0x%08x", rsp);
  5080       BREAKPOINT;
  5081       assert(false, "start up GDB");
  5083   } else {
  5084     ttyLocker ttyl;
  5085     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
  5086     assert(false, "DEBUG MESSAGE");
  5088   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
  5091 void MacroAssembler::stop(const char* msg) {
  5092   ExternalAddress message((address)msg);
  5093   // push address of message
  5094   pushptr(message.addr());
  5095   { Label L; call(L, relocInfo::none); bind(L); }     // push eip
  5096   pusha();                                           // push registers
  5097   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
  5098   hlt();
  5101 void MacroAssembler::warn(const char* msg) {
  5102   push_CPU_state();
  5104   ExternalAddress message((address) msg);
  5105   // push address of message
  5106   pushptr(message.addr());
  5108   call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
  5109   addl(rsp, wordSize);       // discard argument
  5110   pop_CPU_state();
  5113 #else // _LP64
  5115 // 64 bit versions
  5117 Address MacroAssembler::as_Address(AddressLiteral adr) {
  5118   // amd64 always does this as a pc-rel
  5119   // we can be absolute or disp based on the instruction type
  5120   // jmp/call are displacements others are absolute
  5121   assert(!adr.is_lval(), "must be rval");
  5122   assert(reachable(adr), "must be");
  5123   return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
  5127 Address MacroAssembler::as_Address(ArrayAddress adr) {
  5128   AddressLiteral base = adr.base();
  5129   lea(rscratch1, base);
  5130   Address index = adr.index();
  5131   assert(index._disp == 0, "must not have disp"); // maybe it can?
  5132   Address array(rscratch1, index._index, index._scale, index._disp);
  5133   return array;
  5136 int MacroAssembler::biased_locking_enter(Register lock_reg,
  5137                                          Register obj_reg,
  5138                                          Register swap_reg,
  5139                                          Register tmp_reg,
  5140                                          bool swap_reg_contains_mark,
  5141                                          Label& done,
  5142                                          Label* slow_case,
  5143                                          BiasedLockingCounters* counters) {
  5144   assert(UseBiasedLocking, "why call this otherwise?");
  5145   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
  5146   assert(tmp_reg != noreg, "tmp_reg must be supplied");
  5147   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
  5148   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
  5149   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
  5150   Address saved_mark_addr(lock_reg, 0);
  5152   if (PrintBiasedLockingStatistics && counters == NULL)
  5153     counters = BiasedLocking::counters();
  5155   // Biased locking
  5156   // See whether the lock is currently biased toward our thread and
  5157   // whether the epoch is still valid
  5158   // Note that the runtime guarantees sufficient alignment of JavaThread
  5159   // pointers to allow age to be placed into low bits
  5160   // First check to see whether biasing is even enabled for this object
  5161   Label cas_label;
  5162   int null_check_offset = -1;
  5163   if (!swap_reg_contains_mark) {
  5164     null_check_offset = offset();
  5165     movq(swap_reg, mark_addr);
  5167   movq(tmp_reg, swap_reg);
  5168   andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
  5169   cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
  5170   jcc(Assembler::notEqual, cas_label);
  5171   // The bias pattern is present in the object's header. Need to check
  5172   // whether the bias owner and the epoch are both still current.
  5173   load_prototype_header(tmp_reg, obj_reg);
  5174   orq(tmp_reg, r15_thread);
  5175   xorq(tmp_reg, swap_reg);
  5176   andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
  5177   if (counters != NULL) {
  5178     cond_inc32(Assembler::zero,
  5179                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
  5181   jcc(Assembler::equal, done);
  5183   Label try_revoke_bias;
  5184   Label try_rebias;
  5186   // At this point we know that the header has the bias pattern and
  5187   // that we are not the bias owner in the current epoch. We need to
  5188   // figure out more details about the state of the header in order to
  5189   // know what operations can be legally performed on the object's
  5190   // header.
  5192   // If the low three bits in the xor result aren't clear, that means
  5193   // the prototype header is no longer biased and we have to revoke
  5194   // the bias on this object.
  5195   testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
  5196   jcc(Assembler::notZero, try_revoke_bias);
  5198   // Biasing is still enabled for this data type. See whether the
  5199   // epoch of the current bias is still valid, meaning that the epoch
  5200   // bits of the mark word are equal to the epoch bits of the
  5201   // prototype header. (Note that the prototype header's epoch bits
  5202   // only change at a safepoint.) If not, attempt to rebias the object
  5203   // toward the current thread. Note that we must be absolutely sure
  5204   // that the current epoch is invalid in order to do this because
  5205   // otherwise the manipulations it performs on the mark word are
  5206   // illegal.
  5207   testq(tmp_reg, markOopDesc::epoch_mask_in_place);
  5208   jcc(Assembler::notZero, try_rebias);
  5210   // The epoch of the current bias is still valid but we know nothing
  5211   // about the owner; it might be set or it might be clear. Try to
  5212   // acquire the bias of the object using an atomic operation. If this
  5213   // fails we will go in to the runtime to revoke the object's bias.
  5214   // Note that we first construct the presumed unbiased header so we
  5215   // don't accidentally blow away another thread's valid bias.
  5216   andq(swap_reg,
  5217        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
  5218   movq(tmp_reg, swap_reg);
  5219   orq(tmp_reg, r15_thread);
  5220   if (os::is_MP()) {
  5221     lock();
  5223   cmpxchgq(tmp_reg, Address(obj_reg, 0));
  5224   // If the biasing toward our thread failed, this means that
  5225   // another thread succeeded in biasing it toward itself and we
  5226   // need to revoke that bias. The revocation will occur in the
  5227   // interpreter runtime in the slow case.
  5228   if (counters != NULL) {
  5229     cond_inc32(Assembler::zero,
  5230                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
  5232   if (slow_case != NULL) {
  5233     jcc(Assembler::notZero, *slow_case);
  5235   jmp(done);
  5237   bind(try_rebias);
  5238   // At this point we know the epoch has expired, meaning that the
  5239   // current "bias owner", if any, is actually invalid. Under these
  5240   // circumstances _only_, we are allowed to use the current header's
  5241   // value as the comparison value when doing the cas to acquire the
  5242   // bias in the current epoch. In other words, we allow transfer of
  5243   // the bias from one thread to another directly in this situation.
  5244   //
  5245   // FIXME: due to a lack of registers we currently blow away the age
  5246   // bits in this situation. Should attempt to preserve them.
  5247   load_prototype_header(tmp_reg, obj_reg);
  5248   orq(tmp_reg, r15_thread);
  5249   if (os::is_MP()) {
  5250     lock();
  5252   cmpxchgq(tmp_reg, Address(obj_reg, 0));
  5253   // If the biasing toward our thread failed, then another thread
  5254   // succeeded in biasing it toward itself and we need to revoke that
  5255   // bias. The revocation will occur in the runtime in the slow case.
  5256   if (counters != NULL) {
  5257     cond_inc32(Assembler::zero,
  5258                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
  5260   if (slow_case != NULL) {
  5261     jcc(Assembler::notZero, *slow_case);
  5263   jmp(done);
  5265   bind(try_revoke_bias);
  5266   // The prototype mark in the klass doesn't have the bias bit set any
  5267   // more, indicating that objects of this data type are not supposed
  5268   // to be biased any more. We are going to try to reset the mark of
  5269   // this object to the prototype value and fall through to the
  5270   // CAS-based locking scheme. Note that if our CAS fails, it means
  5271   // that another thread raced us for the privilege of revoking the
  5272   // bias of this particular object, so it's okay to continue in the
  5273   // normal locking code.
  5274   //
  5275   // FIXME: due to a lack of registers we currently blow away the age
  5276   // bits in this situation. Should attempt to preserve them.
  5277   load_prototype_header(tmp_reg, obj_reg);
  5278   if (os::is_MP()) {
  5279     lock();
  5281   cmpxchgq(tmp_reg, Address(obj_reg, 0));
  5282   // Fall through to the normal CAS-based lock, because no matter what
  5283   // the result of the above CAS, some thread must have succeeded in
  5284   // removing the bias bit from the object's header.
  5285   if (counters != NULL) {
  5286     cond_inc32(Assembler::zero,
  5287                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
  5290   bind(cas_label);
  5292   return null_check_offset;
  5295 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
  5296   Label L, E;
  5298 #ifdef _WIN64
  5299   // Windows always allocates space for it's register args
  5300   assert(num_args <= 4, "only register arguments supported");
  5301   subq(rsp,  frame::arg_reg_save_area_bytes);
  5302 #endif
  5304   // Align stack if necessary
  5305   testl(rsp, 15);
  5306   jcc(Assembler::zero, L);
  5308   subq(rsp, 8);
  5310     call(RuntimeAddress(entry_point));
  5312   addq(rsp, 8);
  5313   jmp(E);
  5315   bind(L);
  5317     call(RuntimeAddress(entry_point));
  5320   bind(E);
  5322 #ifdef _WIN64
  5323   // restore stack pointer
  5324   addq(rsp, frame::arg_reg_save_area_bytes);
  5325 #endif
  5329 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
  5330   assert(!src2.is_lval(), "should use cmpptr");
  5332   if (reachable(src2)) {
  5333     cmpq(src1, as_Address(src2));
  5334   } else {
  5335     lea(rscratch1, src2);
  5336     Assembler::cmpq(src1, Address(rscratch1, 0));
  5340 int MacroAssembler::corrected_idivq(Register reg) {
  5341   // Full implementation of Java ldiv and lrem; checks for special
  5342   // case as described in JVM spec., p.243 & p.271.  The function
  5343   // returns the (pc) offset of the idivl instruction - may be needed
  5344   // for implicit exceptions.
  5345   //
  5346   //         normal case                           special case
  5347   //
  5348   // input : rax: dividend                         min_long
  5349   //         reg: divisor   (may not be eax/edx)   -1
  5350   //
  5351   // output: rax: quotient  (= rax idiv reg)       min_long
  5352   //         rdx: remainder (= rax irem reg)       0
  5353   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
  5354   static const int64_t min_long = 0x8000000000000000;
  5355   Label normal_case, special_case;
  5357   // check for special case
  5358   cmp64(rax, ExternalAddress((address) &min_long));
  5359   jcc(Assembler::notEqual, normal_case);
  5360   xorl(rdx, rdx); // prepare rdx for possible special case (where
  5361                   // remainder = 0)
  5362   cmpq(reg, -1);
  5363   jcc(Assembler::equal, special_case);
  5365   // handle normal case
  5366   bind(normal_case);
  5367   cdqq();
  5368   int idivq_offset = offset();
  5369   idivq(reg);
  5371   // normal and special case exit
  5372   bind(special_case);
  5374   return idivq_offset;
  5377 void MacroAssembler::decrementq(Register reg, int value) {
  5378   if (value == min_jint) { subq(reg, value); return; }
  5379   if (value <  0) { incrementq(reg, -value); return; }
  5380   if (value == 0) {                        ; return; }
  5381   if (value == 1 && UseIncDec) { decq(reg) ; return; }
  5382   /* else */      { subq(reg, value)       ; return; }
  5385 void MacroAssembler::decrementq(Address dst, int value) {
  5386   if (value == min_jint) { subq(dst, value); return; }
  5387   if (value <  0) { incrementq(dst, -value); return; }
  5388   if (value == 0) {                        ; return; }
  5389   if (value == 1 && UseIncDec) { decq(dst) ; return; }
  5390   /* else */      { subq(dst, value)       ; return; }
  5393 void MacroAssembler::fat_nop() {
  5394   // A 5 byte nop that is safe for patching (see patch_verified_entry)
  5395   // Recommened sequence from 'Software Optimization Guide for the AMD
  5396   // Hammer Processor'
  5397   emit_byte(0x66);
  5398   emit_byte(0x66);
  5399   emit_byte(0x90);
  5400   emit_byte(0x66);
  5401   emit_byte(0x90);
  5404 void MacroAssembler::incrementq(Register reg, int value) {
  5405   if (value == min_jint) { addq(reg, value); return; }
  5406   if (value <  0) { decrementq(reg, -value); return; }
  5407   if (value == 0) {                        ; return; }
  5408   if (value == 1 && UseIncDec) { incq(reg) ; return; }
  5409   /* else */      { addq(reg, value)       ; return; }
  5412 void MacroAssembler::incrementq(Address dst, int value) {
  5413   if (value == min_jint) { addq(dst, value); return; }
  5414   if (value <  0) { decrementq(dst, -value); return; }
  5415   if (value == 0) {                        ; return; }
  5416   if (value == 1 && UseIncDec) { incq(dst) ; return; }
  5417   /* else */      { addq(dst, value)       ; return; }
  5420 // 32bit can do a case table jump in one instruction but we no longer allow the base
  5421 // to be installed in the Address class
  5422 void MacroAssembler::jump(ArrayAddress entry) {
  5423   lea(rscratch1, entry.base());
  5424   Address dispatch = entry.index();
  5425   assert(dispatch._base == noreg, "must be");
  5426   dispatch._base = rscratch1;
  5427   jmp(dispatch);
  5430 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
  5431   ShouldNotReachHere(); // 64bit doesn't use two regs
  5432   cmpq(x_lo, y_lo);
  5435 void MacroAssembler::lea(Register dst, AddressLiteral src) {
  5436     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
  5439 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
  5440   mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
  5441   movptr(dst, rscratch1);
  5444 void MacroAssembler::leave() {
  5445   // %%% is this really better? Why not on 32bit too?
  5446   emit_byte(0xC9); // LEAVE
  5449 void MacroAssembler::lneg(Register hi, Register lo) {
  5450   ShouldNotReachHere(); // 64bit doesn't use two regs
  5451   negq(lo);
  5454 void MacroAssembler::movoop(Register dst, jobject obj) {
  5455   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
  5458 void MacroAssembler::movoop(Address dst, jobject obj) {
  5459   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
  5460   movq(dst, rscratch1);
  5463 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
  5464   if (src.is_lval()) {
  5465     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
  5466   } else {
  5467     if (reachable(src)) {
  5468       movq(dst, as_Address(src));
  5469     } else {
  5470       lea(rscratch1, src);
  5471       movq(dst, Address(rscratch1,0));
  5476 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
  5477   movq(as_Address(dst), src);
  5480 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
  5481   movq(dst, as_Address(src));
  5484 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
  5485 void MacroAssembler::movptr(Address dst, intptr_t src) {
  5486   mov64(rscratch1, src);
  5487   movq(dst, rscratch1);
  5490 // These are mostly for initializing NULL
  5491 void MacroAssembler::movptr(Address dst, int32_t src) {
  5492   movslq(dst, src);
  5495 void MacroAssembler::movptr(Register dst, int32_t src) {
  5496   mov64(dst, (intptr_t)src);
  5499 void MacroAssembler::pushoop(jobject obj) {
  5500   movoop(rscratch1, obj);
  5501   push(rscratch1);
  5504 void MacroAssembler::pushptr(AddressLiteral src) {
  5505   lea(rscratch1, src);
  5506   if (src.is_lval()) {
  5507     push(rscratch1);
  5508   } else {
  5509     pushq(Address(rscratch1, 0));
  5513 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  5514                                            bool clear_pc) {
  5515   // we must set sp to zero to clear frame
  5516   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
  5517   // must clear fp, so that compiled frames are not confused; it is
  5518   // possible that we need it only for debugging
  5519   if (clear_fp) {
  5520     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
  5523   if (clear_pc) {
  5524     movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
  5528 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  5529                                          Register last_java_fp,
  5530                                          address  last_java_pc) {
  5531   // determine last_java_sp register
  5532   if (!last_java_sp->is_valid()) {
  5533     last_java_sp = rsp;
  5536   // last_java_fp is optional
  5537   if (last_java_fp->is_valid()) {
  5538     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
  5539            last_java_fp);
  5542   // last_java_pc is optional
  5543   if (last_java_pc != NULL) {
  5544     Address java_pc(r15_thread,
  5545                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  5546     lea(rscratch1, InternalAddress(last_java_pc));
  5547     movptr(java_pc, rscratch1);
  5550   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
  5553 static void pass_arg0(MacroAssembler* masm, Register arg) {
  5554   if (c_rarg0 != arg ) {
  5555     masm->mov(c_rarg0, arg);
  5559 static void pass_arg1(MacroAssembler* masm, Register arg) {
  5560   if (c_rarg1 != arg ) {
  5561     masm->mov(c_rarg1, arg);
  5565 static void pass_arg2(MacroAssembler* masm, Register arg) {
  5566   if (c_rarg2 != arg ) {
  5567     masm->mov(c_rarg2, arg);
  5571 static void pass_arg3(MacroAssembler* masm, Register arg) {
  5572   if (c_rarg3 != arg ) {
  5573     masm->mov(c_rarg3, arg);
  5577 void MacroAssembler::stop(const char* msg) {
  5578   address rip = pc();
  5579   pusha(); // get regs on stack
  5580   lea(c_rarg0, ExternalAddress((address) msg));
  5581   lea(c_rarg1, InternalAddress(rip));
  5582   movq(c_rarg2, rsp); // pass pointer to regs array
  5583   andq(rsp, -16); // align stack as required by ABI
  5584   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
  5585   hlt();
  5588 void MacroAssembler::warn(const char* msg) {
  5589   push(rsp);
  5590   andq(rsp, -16);     // align stack as required by push_CPU_state and call
  5592   push_CPU_state();   // keeps alignment at 16 bytes
  5593   lea(c_rarg0, ExternalAddress((address) msg));
  5594   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
  5595   pop_CPU_state();
  5596   pop(rsp);
  5599 #ifndef PRODUCT
  5600 extern "C" void findpc(intptr_t x);
  5601 #endif
  5603 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
  5604   // In order to get locks to work, we need to fake a in_VM state
  5605   if (ShowMessageBoxOnError ) {
  5606     JavaThread* thread = JavaThread::current();
  5607     JavaThreadState saved_state = thread->thread_state();
  5608     thread->set_thread_state(_thread_in_vm);
  5609 #ifndef PRODUCT
  5610     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  5611       ttyLocker ttyl;
  5612       BytecodeCounter::print();
  5614 #endif
  5615     // To see where a verify_oop failed, get $ebx+40/X for this frame.
  5616     // XXX correct this offset for amd64
  5617     // This is the value of eip which points to where verify_oop will return.
  5618     if (os::message_box(msg, "Execution stopped, print registers?")) {
  5619       ttyLocker ttyl;
  5620       tty->print_cr("rip = 0x%016lx", pc);
  5621 #ifndef PRODUCT
  5622       tty->cr();
  5623       findpc(pc);
  5624       tty->cr();
  5625 #endif
  5626       tty->print_cr("rax = 0x%016lx", regs[15]);
  5627       tty->print_cr("rbx = 0x%016lx", regs[12]);
  5628       tty->print_cr("rcx = 0x%016lx", regs[14]);
  5629       tty->print_cr("rdx = 0x%016lx", regs[13]);
  5630       tty->print_cr("rdi = 0x%016lx", regs[8]);
  5631       tty->print_cr("rsi = 0x%016lx", regs[9]);
  5632       tty->print_cr("rbp = 0x%016lx", regs[10]);
  5633       tty->print_cr("rsp = 0x%016lx", regs[11]);
  5634       tty->print_cr("r8  = 0x%016lx", regs[7]);
  5635       tty->print_cr("r9  = 0x%016lx", regs[6]);
  5636       tty->print_cr("r10 = 0x%016lx", regs[5]);
  5637       tty->print_cr("r11 = 0x%016lx", regs[4]);
  5638       tty->print_cr("r12 = 0x%016lx", regs[3]);
  5639       tty->print_cr("r13 = 0x%016lx", regs[2]);
  5640       tty->print_cr("r14 = 0x%016lx", regs[1]);
  5641       tty->print_cr("r15 = 0x%016lx", regs[0]);
  5642       BREAKPOINT;
  5644     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
  5645   } else {
  5646     ttyLocker ttyl;
  5647     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
  5648                     msg);
  5652 #endif // _LP64
  5654 // Now versions that are common to 32/64 bit
  5656 void MacroAssembler::addptr(Register dst, int32_t imm32) {
  5657   LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
  5660 void MacroAssembler::addptr(Register dst, Register src) {
  5661   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
  5664 void MacroAssembler::addptr(Address dst, Register src) {
  5665   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
  5668 void MacroAssembler::align(int modulus) {
  5669   if (offset() % modulus != 0) {
  5670     nop(modulus - (offset() % modulus));
  5674 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
  5675   if (reachable(src)) {
  5676     andpd(dst, as_Address(src));
  5677   } else {
  5678     lea(rscratch1, src);
  5679     andpd(dst, Address(rscratch1, 0));
  5683 void MacroAssembler::andptr(Register dst, int32_t imm32) {
  5684   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
  5687 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
  5688   pushf();
  5689   if (os::is_MP())
  5690     lock();
  5691   incrementl(counter_addr);
  5692   popf();
  5695 // Writes to stack successive pages until offset reached to check for
  5696 // stack overflow + shadow pages.  This clobbers tmp.
  5697 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
  5698   movptr(tmp, rsp);
  5699   // Bang stack for total size given plus shadow page size.
  5700   // Bang one page at a time because large size can bang beyond yellow and
  5701   // red zones.
  5702   Label loop;
  5703   bind(loop);
  5704   movl(Address(tmp, (-os::vm_page_size())), size );
  5705   subptr(tmp, os::vm_page_size());
  5706   subl(size, os::vm_page_size());
  5707   jcc(Assembler::greater, loop);
  5709   // Bang down shadow pages too.
  5710   // The -1 because we already subtracted 1 page.
  5711   for (int i = 0; i< StackShadowPages-1; i++) {
  5712     // this could be any sized move but this is can be a debugging crumb
  5713     // so the bigger the better.
  5714     movptr(Address(tmp, (-i*os::vm_page_size())), size );
  5718 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
  5719   assert(UseBiasedLocking, "why call this otherwise?");
  5721   // Check for biased locking unlock case, which is a no-op
  5722   // Note: we do not have to check the thread ID for two reasons.
  5723   // First, the interpreter checks for IllegalMonitorStateException at
  5724   // a higher level. Second, if the bias was revoked while we held the
  5725   // lock, the object could not be rebiased toward another thread, so
  5726   // the bias bit would be clear.
  5727   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  5728   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
  5729   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
  5730   jcc(Assembler::equal, done);
  5733 void MacroAssembler::c2bool(Register x) {
  5734   // implements x == 0 ? 0 : 1
  5735   // note: must only look at least-significant byte of x
  5736   //       since C-style booleans are stored in one byte
  5737   //       only! (was bug)
  5738   andl(x, 0xFF);
  5739   setb(Assembler::notZero, x);
  5742 // Wouldn't need if AddressLiteral version had new name
  5743 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
  5744   Assembler::call(L, rtype);
  5747 void MacroAssembler::call(Register entry) {
  5748   Assembler::call(entry);
  5751 void MacroAssembler::call(AddressLiteral entry) {
  5752   if (reachable(entry)) {
  5753     Assembler::call_literal(entry.target(), entry.rspec());
  5754   } else {
  5755     lea(rscratch1, entry);
  5756     Assembler::call(rscratch1);
  5760 // Implementation of call_VM versions
  5762 void MacroAssembler::call_VM(Register oop_result,
  5763                              address entry_point,
  5764                              bool check_exceptions) {
  5765   Label C, E;
  5766   call(C, relocInfo::none);
  5767   jmp(E);
  5769   bind(C);
  5770   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  5771   ret(0);
  5773   bind(E);
  5776 void MacroAssembler::call_VM(Register oop_result,
  5777                              address entry_point,
  5778                              Register arg_1,
  5779                              bool check_exceptions) {
  5780   Label C, E;
  5781   call(C, relocInfo::none);
  5782   jmp(E);
  5784   bind(C);
  5785   pass_arg1(this, arg_1);
  5786   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  5787   ret(0);
  5789   bind(E);
  5792 void MacroAssembler::call_VM(Register oop_result,
  5793                              address entry_point,
  5794                              Register arg_1,
  5795                              Register arg_2,
  5796                              bool check_exceptions) {
  5797   Label C, E;
  5798   call(C, relocInfo::none);
  5799   jmp(E);
  5801   bind(C);
  5803   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  5805   pass_arg2(this, arg_2);
  5806   pass_arg1(this, arg_1);
  5807   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  5808   ret(0);
  5810   bind(E);
  5813 void MacroAssembler::call_VM(Register oop_result,
  5814                              address entry_point,
  5815                              Register arg_1,
  5816                              Register arg_2,
  5817                              Register arg_3,
  5818                              bool check_exceptions) {
  5819   Label C, E;
  5820   call(C, relocInfo::none);
  5821   jmp(E);
  5823   bind(C);
  5825   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
  5826   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
  5827   pass_arg3(this, arg_3);
  5829   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  5830   pass_arg2(this, arg_2);
  5832   pass_arg1(this, arg_1);
  5833   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  5834   ret(0);
  5836   bind(E);
  5839 void MacroAssembler::call_VM(Register oop_result,
  5840                              Register last_java_sp,
  5841                              address entry_point,
  5842                              int number_of_arguments,
  5843                              bool check_exceptions) {
  5844   Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
  5845   call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
  5848 void MacroAssembler::call_VM(Register oop_result,
  5849                              Register last_java_sp,
  5850                              address entry_point,
  5851                              Register arg_1,
  5852                              bool check_exceptions) {
  5853   pass_arg1(this, arg_1);
  5854   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  5857 void MacroAssembler::call_VM(Register oop_result,
  5858                              Register last_java_sp,
  5859                              address entry_point,
  5860                              Register arg_1,
  5861                              Register arg_2,
  5862                              bool check_exceptions) {
  5864   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  5865   pass_arg2(this, arg_2);
  5866   pass_arg1(this, arg_1);
  5867   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  5870 void MacroAssembler::call_VM(Register oop_result,
  5871                              Register last_java_sp,
  5872                              address entry_point,
  5873                              Register arg_1,
  5874                              Register arg_2,
  5875                              Register arg_3,
  5876                              bool check_exceptions) {
  5877   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
  5878   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
  5879   pass_arg3(this, arg_3);
  5880   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  5881   pass_arg2(this, arg_2);
  5882   pass_arg1(this, arg_1);
  5883   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  5886 void MacroAssembler::call_VM_base(Register oop_result,
  5887                                   Register java_thread,
  5888                                   Register last_java_sp,
  5889                                   address  entry_point,
  5890                                   int      number_of_arguments,
  5891                                   bool     check_exceptions) {
  5892   // determine java_thread register
  5893   if (!java_thread->is_valid()) {
  5894 #ifdef _LP64
  5895     java_thread = r15_thread;
  5896 #else
  5897     java_thread = rdi;
  5898     get_thread(java_thread);
  5899 #endif // LP64
  5901   // determine last_java_sp register
  5902   if (!last_java_sp->is_valid()) {
  5903     last_java_sp = rsp;
  5905   // debugging support
  5906   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  5907   LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
  5908 #ifdef ASSERT
  5909   LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");)
  5910 #endif // ASSERT
  5912   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
  5913   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
  5915   // push java thread (becomes first argument of C function)
  5917   NOT_LP64(push(java_thread); number_of_arguments++);
  5918   LP64_ONLY(mov(c_rarg0, r15_thread));
  5920   // set last Java frame before call
  5921   assert(last_java_sp != rbp, "can't use ebp/rbp");
  5923   // Only interpreter should have to set fp
  5924   set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
  5926   // do the call, remove parameters
  5927   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
  5929   // restore the thread (cannot use the pushed argument since arguments
  5930   // may be overwritten by C code generated by an optimizing compiler);
  5931   // however can use the register value directly if it is callee saved.
  5932   if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
  5933     // rdi & rsi (also r15) are callee saved -> nothing to do
  5934 #ifdef ASSERT
  5935     guarantee(java_thread != rax, "change this code");
  5936     push(rax);
  5937     { Label L;
  5938       get_thread(rax);
  5939       cmpptr(java_thread, rax);
  5940       jcc(Assembler::equal, L);
  5941       stop("MacroAssembler::call_VM_base: rdi not callee saved?");
  5942       bind(L);
  5944     pop(rax);
  5945 #endif
  5946   } else {
  5947     get_thread(java_thread);
  5949   // reset last Java frame
  5950   // Only interpreter should have to clear fp
  5951   reset_last_Java_frame(java_thread, true, false);
  5953 #ifndef CC_INTERP
  5954    // C++ interp handles this in the interpreter
  5955   check_and_handle_popframe(java_thread);
  5956   check_and_handle_earlyret(java_thread);
  5957 #endif /* CC_INTERP */
  5959   if (check_exceptions) {
  5960     // check for pending exceptions (java_thread is set upon return)
  5961     cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
  5962 #ifndef _LP64
  5963     jump_cc(Assembler::notEqual,
  5964             RuntimeAddress(StubRoutines::forward_exception_entry()));
  5965 #else
  5966     // This used to conditionally jump to forward_exception however it is
  5967     // possible if we relocate that the branch will not reach. So we must jump
  5968     // around so we can always reach
  5970     Label ok;
  5971     jcc(Assembler::equal, ok);
  5972     jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  5973     bind(ok);
  5974 #endif // LP64
  5977   // get oop result if there is one and reset the value in the thread
  5978   if (oop_result->is_valid()) {
  5979     movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  5980     movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
  5981     verify_oop(oop_result, "broken oop in call_VM_base");
  5985 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  5987   // Calculate the value for last_Java_sp
  5988   // somewhat subtle. call_VM does an intermediate call
  5989   // which places a return address on the stack just under the
  5990   // stack pointer as the user finsihed with it. This allows
  5991   // use to retrieve last_Java_pc from last_Java_sp[-1].
  5992   // On 32bit we then have to push additional args on the stack to accomplish
  5993   // the actual requested call. On 64bit call_VM only can use register args
  5994   // so the only extra space is the return address that call_VM created.
  5995   // This hopefully explains the calculations here.
  5997 #ifdef _LP64
  5998   // We've pushed one address, correct last_Java_sp
  5999   lea(rax, Address(rsp, wordSize));
  6000 #else
  6001   lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
  6002 #endif // LP64
  6004   call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
  6008 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  6009   call_VM_leaf_base(entry_point, number_of_arguments);
  6012 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  6013   pass_arg0(this, arg_0);
  6014   call_VM_leaf(entry_point, 1);
  6017 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  6019   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
  6020   pass_arg1(this, arg_1);
  6021   pass_arg0(this, arg_0);
  6022   call_VM_leaf(entry_point, 2);
  6025 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  6026   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
  6027   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
  6028   pass_arg2(this, arg_2);
  6029   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
  6030   pass_arg1(this, arg_1);
  6031   pass_arg0(this, arg_0);
  6032   call_VM_leaf(entry_point, 3);
  6035 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
  6038 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
  6041 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
  6042   if (reachable(src1)) {
  6043     cmpl(as_Address(src1), imm);
  6044   } else {
  6045     lea(rscratch1, src1);
  6046     cmpl(Address(rscratch1, 0), imm);
  6050 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
  6051   assert(!src2.is_lval(), "use cmpptr");
  6052   if (reachable(src2)) {
  6053     cmpl(src1, as_Address(src2));
  6054   } else {
  6055     lea(rscratch1, src2);
  6056     cmpl(src1, Address(rscratch1, 0));
  6060 void MacroAssembler::cmp32(Register src1, int32_t imm) {
  6061   Assembler::cmpl(src1, imm);
  6064 void MacroAssembler::cmp32(Register src1, Address src2) {
  6065   Assembler::cmpl(src1, src2);
  6068 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
  6069   ucomisd(opr1, opr2);
  6071   Label L;
  6072   if (unordered_is_less) {
  6073     movl(dst, -1);
  6074     jcc(Assembler::parity, L);
  6075     jcc(Assembler::below , L);
  6076     movl(dst, 0);
  6077     jcc(Assembler::equal , L);
  6078     increment(dst);
  6079   } else { // unordered is greater
  6080     movl(dst, 1);
  6081     jcc(Assembler::parity, L);
  6082     jcc(Assembler::above , L);
  6083     movl(dst, 0);
  6084     jcc(Assembler::equal , L);
  6085     decrementl(dst);
  6087   bind(L);
  6090 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
  6091   ucomiss(opr1, opr2);
  6093   Label L;
  6094   if (unordered_is_less) {
  6095     movl(dst, -1);
  6096     jcc(Assembler::parity, L);
  6097     jcc(Assembler::below , L);
  6098     movl(dst, 0);
  6099     jcc(Assembler::equal , L);
  6100     increment(dst);
  6101   } else { // unordered is greater
  6102     movl(dst, 1);
  6103     jcc(Assembler::parity, L);
  6104     jcc(Assembler::above , L);
  6105     movl(dst, 0);
  6106     jcc(Assembler::equal , L);
  6107     decrementl(dst);
  6109   bind(L);
  6113 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
  6114   if (reachable(src1)) {
  6115     cmpb(as_Address(src1), imm);
  6116   } else {
  6117     lea(rscratch1, src1);
  6118     cmpb(Address(rscratch1, 0), imm);
  6122 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
  6123 #ifdef _LP64
  6124   if (src2.is_lval()) {
  6125     movptr(rscratch1, src2);
  6126     Assembler::cmpq(src1, rscratch1);
  6127   } else if (reachable(src2)) {
  6128     cmpq(src1, as_Address(src2));
  6129   } else {
  6130     lea(rscratch1, src2);
  6131     Assembler::cmpq(src1, Address(rscratch1, 0));
  6133 #else
  6134   if (src2.is_lval()) {
  6135     cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
  6136   } else {
  6137     cmpl(src1, as_Address(src2));
  6139 #endif // _LP64
  6142 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
  6143   assert(src2.is_lval(), "not a mem-mem compare");
  6144 #ifdef _LP64
  6145   // moves src2's literal address
  6146   movptr(rscratch1, src2);
  6147   Assembler::cmpq(src1, rscratch1);
  6148 #else
  6149   cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
  6150 #endif // _LP64
  6153 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
  6154   if (reachable(adr)) {
  6155     if (os::is_MP())
  6156       lock();
  6157     cmpxchgptr(reg, as_Address(adr));
  6158   } else {
  6159     lea(rscratch1, adr);
  6160     if (os::is_MP())
  6161       lock();
  6162     cmpxchgptr(reg, Address(rscratch1, 0));
  6166 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
  6167   LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
  6170 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
  6171   if (reachable(src)) {
  6172     comisd(dst, as_Address(src));
  6173   } else {
  6174     lea(rscratch1, src);
  6175     comisd(dst, Address(rscratch1, 0));
  6179 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
  6180   if (reachable(src)) {
  6181     comiss(dst, as_Address(src));
  6182   } else {
  6183     lea(rscratch1, src);
  6184     comiss(dst, Address(rscratch1, 0));
  6189 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
  6190   Condition negated_cond = negate_condition(cond);
  6191   Label L;
  6192   jcc(negated_cond, L);
  6193   atomic_incl(counter_addr);
  6194   bind(L);
  6197 int MacroAssembler::corrected_idivl(Register reg) {
  6198   // Full implementation of Java idiv and irem; checks for
  6199   // special case as described in JVM spec., p.243 & p.271.
  6200   // The function returns the (pc) offset of the idivl
  6201   // instruction - may be needed for implicit exceptions.
  6202   //
  6203   //         normal case                           special case
  6204   //
  6205   // input : rax,: dividend                         min_int
  6206   //         reg: divisor   (may not be rax,/rdx)   -1
  6207   //
  6208   // output: rax,: quotient  (= rax, idiv reg)       min_int
  6209   //         rdx: remainder (= rax, irem reg)       0
  6210   assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
  6211   const int min_int = 0x80000000;
  6212   Label normal_case, special_case;
  6214   // check for special case
  6215   cmpl(rax, min_int);
  6216   jcc(Assembler::notEqual, normal_case);
  6217   xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
  6218   cmpl(reg, -1);
  6219   jcc(Assembler::equal, special_case);
  6221   // handle normal case
  6222   bind(normal_case);
  6223   cdql();
  6224   int idivl_offset = offset();
  6225   idivl(reg);
  6227   // normal and special case exit
  6228   bind(special_case);
  6230   return idivl_offset;
  6235 void MacroAssembler::decrementl(Register reg, int value) {
  6236   if (value == min_jint) {subl(reg, value) ; return; }
  6237   if (value <  0) { incrementl(reg, -value); return; }
  6238   if (value == 0) {                        ; return; }
  6239   if (value == 1 && UseIncDec) { decl(reg) ; return; }
  6240   /* else */      { subl(reg, value)       ; return; }
  6243 void MacroAssembler::decrementl(Address dst, int value) {
  6244   if (value == min_jint) {subl(dst, value) ; return; }
  6245   if (value <  0) { incrementl(dst, -value); return; }
  6246   if (value == 0) {                        ; return; }
  6247   if (value == 1 && UseIncDec) { decl(dst) ; return; }
  6248   /* else */      { subl(dst, value)       ; return; }
  6251 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
  6252   assert (shift_value > 0, "illegal shift value");
  6253   Label _is_positive;
  6254   testl (reg, reg);
  6255   jcc (Assembler::positive, _is_positive);
  6256   int offset = (1 << shift_value) - 1 ;
  6258   if (offset == 1) {
  6259     incrementl(reg);
  6260   } else {
  6261     addl(reg, offset);
  6264   bind (_is_positive);
  6265   sarl(reg, shift_value);
  6268 // !defined(COMPILER2) is because of stupid core builds
  6269 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
  6270 void MacroAssembler::empty_FPU_stack() {
  6271   if (VM_Version::supports_mmx()) {
  6272     emms();
  6273   } else {
  6274     for (int i = 8; i-- > 0; ) ffree(i);
  6277 #endif // !LP64 || C1 || !C2
  6280 // Defines obj, preserves var_size_in_bytes
  6281 void MacroAssembler::eden_allocate(Register obj,
  6282                                    Register var_size_in_bytes,
  6283                                    int con_size_in_bytes,
  6284                                    Register t1,
  6285                                    Label& slow_case) {
  6286   assert(obj == rax, "obj must be in rax, for cmpxchg");
  6287   assert_different_registers(obj, var_size_in_bytes, t1);
  6288   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  6289     jmp(slow_case);
  6290   } else {
  6291     Register end = t1;
  6292     Label retry;
  6293     bind(retry);
  6294     ExternalAddress heap_top((address) Universe::heap()->top_addr());
  6295     movptr(obj, heap_top);
  6296     if (var_size_in_bytes == noreg) {
  6297       lea(end, Address(obj, con_size_in_bytes));
  6298     } else {
  6299       lea(end, Address(obj, var_size_in_bytes, Address::times_1));
  6301     // if end < obj then we wrapped around => object too long => slow case
  6302     cmpptr(end, obj);
  6303     jcc(Assembler::below, slow_case);
  6304     cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
  6305     jcc(Assembler::above, slow_case);
  6306     // Compare obj with the top addr, and if still equal, store the new top addr in
  6307     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
  6308     // it otherwise. Use lock prefix for atomicity on MPs.
  6309     locked_cmpxchgptr(end, heap_top);
  6310     jcc(Assembler::notEqual, retry);
  6314 void MacroAssembler::enter() {
  6315   push(rbp);
  6316   mov(rbp, rsp);
  6319 void MacroAssembler::fcmp(Register tmp) {
  6320   fcmp(tmp, 1, true, true);
  6323 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
  6324   assert(!pop_right || pop_left, "usage error");
  6325   if (VM_Version::supports_cmov()) {
  6326     assert(tmp == noreg, "unneeded temp");
  6327     if (pop_left) {
  6328       fucomip(index);
  6329     } else {
  6330       fucomi(index);
  6332     if (pop_right) {
  6333       fpop();
  6335   } else {
  6336     assert(tmp != noreg, "need temp");
  6337     if (pop_left) {
  6338       if (pop_right) {
  6339         fcompp();
  6340       } else {
  6341         fcomp(index);
  6343     } else {
  6344       fcom(index);
  6346     // convert FPU condition into eflags condition via rax,
  6347     save_rax(tmp);
  6348     fwait(); fnstsw_ax();
  6349     sahf();
  6350     restore_rax(tmp);
  6352   // condition codes set as follows:
  6353   //
  6354   // CF (corresponds to C0) if x < y
  6355   // PF (corresponds to C2) if unordered
  6356   // ZF (corresponds to C3) if x = y
  6359 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
  6360   fcmp2int(dst, unordered_is_less, 1, true, true);
  6363 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
  6364   fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
  6365   Label L;
  6366   if (unordered_is_less) {
  6367     movl(dst, -1);
  6368     jcc(Assembler::parity, L);
  6369     jcc(Assembler::below , L);
  6370     movl(dst, 0);
  6371     jcc(Assembler::equal , L);
  6372     increment(dst);
  6373   } else { // unordered is greater
  6374     movl(dst, 1);
  6375     jcc(Assembler::parity, L);
  6376     jcc(Assembler::above , L);
  6377     movl(dst, 0);
  6378     jcc(Assembler::equal , L);
  6379     decrementl(dst);
  6381   bind(L);
  6384 void MacroAssembler::fld_d(AddressLiteral src) {
  6385   fld_d(as_Address(src));
  6388 void MacroAssembler::fld_s(AddressLiteral src) {
  6389   fld_s(as_Address(src));
  6392 void MacroAssembler::fld_x(AddressLiteral src) {
  6393   Assembler::fld_x(as_Address(src));
  6396 void MacroAssembler::fldcw(AddressLiteral src) {
  6397   Assembler::fldcw(as_Address(src));
  6400 void MacroAssembler::fpop() {
  6401   ffree();
  6402   fincstp();
  6405 void MacroAssembler::fremr(Register tmp) {
  6406   save_rax(tmp);
  6407   { Label L;
  6408     bind(L);
  6409     fprem();
  6410     fwait(); fnstsw_ax();
  6411 #ifdef _LP64
  6412     testl(rax, 0x400);
  6413     jcc(Assembler::notEqual, L);
  6414 #else
  6415     sahf();
  6416     jcc(Assembler::parity, L);
  6417 #endif // _LP64
  6419   restore_rax(tmp);
  6420   // Result is in ST0.
  6421   // Note: fxch & fpop to get rid of ST1
  6422   // (otherwise FPU stack could overflow eventually)
  6423   fxch(1);
  6424   fpop();
  6428 void MacroAssembler::incrementl(AddressLiteral dst) {
  6429   if (reachable(dst)) {
  6430     incrementl(as_Address(dst));
  6431   } else {
  6432     lea(rscratch1, dst);
  6433     incrementl(Address(rscratch1, 0));
  6437 void MacroAssembler::incrementl(ArrayAddress dst) {
  6438   incrementl(as_Address(dst));
  6441 void MacroAssembler::incrementl(Register reg, int value) {
  6442   if (value == min_jint) {addl(reg, value) ; return; }
  6443   if (value <  0) { decrementl(reg, -value); return; }
  6444   if (value == 0) {                        ; return; }
  6445   if (value == 1 && UseIncDec) { incl(reg) ; return; }
  6446   /* else */      { addl(reg, value)       ; return; }
  6449 void MacroAssembler::incrementl(Address dst, int value) {
  6450   if (value == min_jint) {addl(dst, value) ; return; }
  6451   if (value <  0) { decrementl(dst, -value); return; }
  6452   if (value == 0) {                        ; return; }
  6453   if (value == 1 && UseIncDec) { incl(dst) ; return; }
  6454   /* else */      { addl(dst, value)       ; return; }
  6457 void MacroAssembler::jump(AddressLiteral dst) {
  6458   if (reachable(dst)) {
  6459     jmp_literal(dst.target(), dst.rspec());
  6460   } else {
  6461     lea(rscratch1, dst);
  6462     jmp(rscratch1);
  6466 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
  6467   if (reachable(dst)) {
  6468     InstructionMark im(this);
  6469     relocate(dst.reloc());
  6470     const int short_size = 2;
  6471     const int long_size = 6;
  6472     int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
  6473     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
  6474       // 0111 tttn #8-bit disp
  6475       emit_byte(0x70 | cc);
  6476       emit_byte((offs - short_size) & 0xFF);
  6477     } else {
  6478       // 0000 1111 1000 tttn #32-bit disp
  6479       emit_byte(0x0F);
  6480       emit_byte(0x80 | cc);
  6481       emit_long(offs - long_size);
  6483   } else {
  6484 #ifdef ASSERT
  6485     warning("reversing conditional branch");
  6486 #endif /* ASSERT */
  6487     Label skip;
  6488     jccb(reverse[cc], skip);
  6489     lea(rscratch1, dst);
  6490     Assembler::jmp(rscratch1);
  6491     bind(skip);
  6495 void MacroAssembler::ldmxcsr(AddressLiteral src) {
  6496   if (reachable(src)) {
  6497     Assembler::ldmxcsr(as_Address(src));
  6498   } else {
  6499     lea(rscratch1, src);
  6500     Assembler::ldmxcsr(Address(rscratch1, 0));
  6504 int MacroAssembler::load_signed_byte(Register dst, Address src) {
  6505   int off;
  6506   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
  6507     off = offset();
  6508     movsbl(dst, src); // movsxb
  6509   } else {
  6510     off = load_unsigned_byte(dst, src);
  6511     shll(dst, 24);
  6512     sarl(dst, 24);
  6514   return off;
  6517 // Note: load_signed_short used to be called load_signed_word.
  6518 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler
  6519 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.
  6520 // The term "word" in HotSpot means a 32- or 64-bit machine word.
  6521 int MacroAssembler::load_signed_short(Register dst, Address src) {
  6522   int off;
  6523   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
  6524     // This is dubious to me since it seems safe to do a signed 16 => 64 bit
  6525     // version but this is what 64bit has always done. This seems to imply
  6526     // that users are only using 32bits worth.
  6527     off = offset();
  6528     movswl(dst, src); // movsxw
  6529   } else {
  6530     off = load_unsigned_short(dst, src);
  6531     shll(dst, 16);
  6532     sarl(dst, 16);
  6534   return off;
  6537 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
  6538   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
  6539   // and "3.9 Partial Register Penalties", p. 22).
  6540   int off;
  6541   if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
  6542     off = offset();
  6543     movzbl(dst, src); // movzxb
  6544   } else {
  6545     xorl(dst, dst);
  6546     off = offset();
  6547     movb(dst, src);
  6549   return off;
  6552 // Note: load_unsigned_short used to be called load_unsigned_word.
  6553 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
  6554   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
  6555   // and "3.9 Partial Register Penalties", p. 22).
  6556   int off;
  6557   if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
  6558     off = offset();
  6559     movzwl(dst, src); // movzxw
  6560   } else {
  6561     xorl(dst, dst);
  6562     off = offset();
  6563     movw(dst, src);
  6565   return off;
  6568 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
  6569   switch (size_in_bytes) {
  6570 #ifndef _LP64
  6571   case  8:
  6572     assert(dst2 != noreg, "second dest register required");
  6573     movl(dst,  src);
  6574     movl(dst2, src.plus_disp(BytesPerInt));
  6575     break;
  6576 #else
  6577   case  8:  movq(dst, src); break;
  6578 #endif
  6579   case  4:  movl(dst, src); break;
  6580   case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
  6581   case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
  6582   default:  ShouldNotReachHere();
  6586 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
  6587   switch (size_in_bytes) {
  6588 #ifndef _LP64
  6589   case  8:
  6590     assert(src2 != noreg, "second source register required");
  6591     movl(dst,                        src);
  6592     movl(dst.plus_disp(BytesPerInt), src2);
  6593     break;
  6594 #else
  6595   case  8:  movq(dst, src); break;
  6596 #endif
  6597   case  4:  movl(dst, src); break;
  6598   case  2:  movw(dst, src); break;
  6599   case  1:  movb(dst, src); break;
  6600   default:  ShouldNotReachHere();
  6604 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
  6605   if (reachable(dst)) {
  6606     movl(as_Address(dst), src);
  6607   } else {
  6608     lea(rscratch1, dst);
  6609     movl(Address(rscratch1, 0), src);
  6613 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
  6614   if (reachable(src)) {
  6615     movl(dst, as_Address(src));
  6616   } else {
  6617     lea(rscratch1, src);
  6618     movl(dst, Address(rscratch1, 0));
  6622 // C++ bool manipulation
  6624 void MacroAssembler::movbool(Register dst, Address src) {
  6625   if(sizeof(bool) == 1)
  6626     movb(dst, src);
  6627   else if(sizeof(bool) == 2)
  6628     movw(dst, src);
  6629   else if(sizeof(bool) == 4)
  6630     movl(dst, src);
  6631   else
  6632     // unsupported
  6633     ShouldNotReachHere();
  6636 void MacroAssembler::movbool(Address dst, bool boolconst) {
  6637   if(sizeof(bool) == 1)
  6638     movb(dst, (int) boolconst);
  6639   else if(sizeof(bool) == 2)
  6640     movw(dst, (int) boolconst);
  6641   else if(sizeof(bool) == 4)
  6642     movl(dst, (int) boolconst);
  6643   else
  6644     // unsupported
  6645     ShouldNotReachHere();
  6648 void MacroAssembler::movbool(Address dst, Register src) {
  6649   if(sizeof(bool) == 1)
  6650     movb(dst, src);
  6651   else if(sizeof(bool) == 2)
  6652     movw(dst, src);
  6653   else if(sizeof(bool) == 4)
  6654     movl(dst, src);
  6655   else
  6656     // unsupported
  6657     ShouldNotReachHere();
  6660 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
  6661   movb(as_Address(dst), src);
  6664 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
  6665   if (reachable(src)) {
  6666     if (UseXmmLoadAndClearUpper) {
  6667       movsd (dst, as_Address(src));
  6668     } else {
  6669       movlpd(dst, as_Address(src));
  6671   } else {
  6672     lea(rscratch1, src);
  6673     if (UseXmmLoadAndClearUpper) {
  6674       movsd (dst, Address(rscratch1, 0));
  6675     } else {
  6676       movlpd(dst, Address(rscratch1, 0));
  6681 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
  6682   if (reachable(src)) {
  6683     movss(dst, as_Address(src));
  6684   } else {
  6685     lea(rscratch1, src);
  6686     movss(dst, Address(rscratch1, 0));
  6690 void MacroAssembler::movptr(Register dst, Register src) {
  6691   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
  6694 void MacroAssembler::movptr(Register dst, Address src) {
  6695   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
  6698 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
  6699 void MacroAssembler::movptr(Register dst, intptr_t src) {
  6700   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
  6703 void MacroAssembler::movptr(Address dst, Register src) {
  6704   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
  6707 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
  6708   if (reachable(src)) {
  6709     movss(dst, as_Address(src));
  6710   } else {
  6711     lea(rscratch1, src);
  6712     movss(dst, Address(rscratch1, 0));
  6716 void MacroAssembler::null_check(Register reg, int offset) {
  6717   if (needs_explicit_null_check(offset)) {
  6718     // provoke OS NULL exception if reg = NULL by
  6719     // accessing M[reg] w/o changing any (non-CC) registers
  6720     // NOTE: cmpl is plenty here to provoke a segv
  6721     cmpptr(rax, Address(reg, 0));
  6722     // Note: should probably use testl(rax, Address(reg, 0));
  6723     //       may be shorter code (however, this version of
  6724     //       testl needs to be implemented first)
  6725   } else {
  6726     // nothing to do, (later) access of M[reg + offset]
  6727     // will provoke OS NULL exception if reg = NULL
  6731 void MacroAssembler::os_breakpoint() {
  6732   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
  6733   // (e.g., MSVC can't call ps() otherwise)
  6734   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  6737 void MacroAssembler::pop_CPU_state() {
  6738   pop_FPU_state();
  6739   pop_IU_state();
  6742 void MacroAssembler::pop_FPU_state() {
  6743   NOT_LP64(frstor(Address(rsp, 0));)
  6744   LP64_ONLY(fxrstor(Address(rsp, 0));)
  6745   addptr(rsp, FPUStateSizeInWords * wordSize);
  6748 void MacroAssembler::pop_IU_state() {
  6749   popa();
  6750   LP64_ONLY(addq(rsp, 8));
  6751   popf();
  6754 // Save Integer and Float state
  6755 // Warning: Stack must be 16 byte aligned (64bit)
  6756 void MacroAssembler::push_CPU_state() {
  6757   push_IU_state();
  6758   push_FPU_state();
  6761 void MacroAssembler::push_FPU_state() {
  6762   subptr(rsp, FPUStateSizeInWords * wordSize);
  6763 #ifndef _LP64
  6764   fnsave(Address(rsp, 0));
  6765   fwait();
  6766 #else
  6767   fxsave(Address(rsp, 0));
  6768 #endif // LP64
  6771 void MacroAssembler::push_IU_state() {
  6772   // Push flags first because pusha kills them
  6773   pushf();
  6774   // Make sure rsp stays 16-byte aligned
  6775   LP64_ONLY(subq(rsp, 8));
  6776   pusha();
  6779 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
  6780   // determine java_thread register
  6781   if (!java_thread->is_valid()) {
  6782     java_thread = rdi;
  6783     get_thread(java_thread);
  6785   // we must set sp to zero to clear frame
  6786   movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
  6787   if (clear_fp) {
  6788     movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
  6791   if (clear_pc)
  6792     movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
  6796 void MacroAssembler::restore_rax(Register tmp) {
  6797   if (tmp == noreg) pop(rax);
  6798   else if (tmp != rax) mov(rax, tmp);
  6801 void MacroAssembler::round_to(Register reg, int modulus) {
  6802   addptr(reg, modulus - 1);
  6803   andptr(reg, -modulus);
  6806 void MacroAssembler::save_rax(Register tmp) {
  6807   if (tmp == noreg) push(rax);
  6808   else if (tmp != rax) mov(tmp, rax);
  6811 // Write serialization page so VM thread can do a pseudo remote membar.
  6812 // We use the current thread pointer to calculate a thread specific
  6813 // offset to write to within the page. This minimizes bus traffic
  6814 // due to cache line collision.
  6815 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
  6816   movl(tmp, thread);
  6817   shrl(tmp, os::get_serialize_page_shift_count());
  6818   andl(tmp, (os::vm_page_size() - sizeof(int)));
  6820   Address index(noreg, tmp, Address::times_1);
  6821   ExternalAddress page(os::get_memory_serialize_page());
  6823   // Size of store must match masking code above
  6824   movl(as_Address(ArrayAddress(page, index)), tmp);
  6827 // Calls to C land
  6828 //
  6829 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
  6830 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
  6831 // has to be reset to 0. This is required to allow proper stack traversal.
  6832 void MacroAssembler::set_last_Java_frame(Register java_thread,
  6833                                          Register last_java_sp,
  6834                                          Register last_java_fp,
  6835                                          address  last_java_pc) {
  6836   // determine java_thread register
  6837   if (!java_thread->is_valid()) {
  6838     java_thread = rdi;
  6839     get_thread(java_thread);
  6841   // determine last_java_sp register
  6842   if (!last_java_sp->is_valid()) {
  6843     last_java_sp = rsp;
  6846   // last_java_fp is optional
  6848   if (last_java_fp->is_valid()) {
  6849     movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
  6852   // last_java_pc is optional
  6854   if (last_java_pc != NULL) {
  6855     lea(Address(java_thread,
  6856                  JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
  6857         InternalAddress(last_java_pc));
  6860   movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
  6863 void MacroAssembler::shlptr(Register dst, int imm8) {
  6864   LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
  6867 void MacroAssembler::shrptr(Register dst, int imm8) {
  6868   LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
  6871 void MacroAssembler::sign_extend_byte(Register reg) {
  6872   if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
  6873     movsbl(reg, reg); // movsxb
  6874   } else {
  6875     shll(reg, 24);
  6876     sarl(reg, 24);
  6880 void MacroAssembler::sign_extend_short(Register reg) {
  6881   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
  6882     movswl(reg, reg); // movsxw
  6883   } else {
  6884     shll(reg, 16);
  6885     sarl(reg, 16);
  6889 //////////////////////////////////////////////////////////////////////////////////
  6890 #ifndef SERIALGC
  6892 void MacroAssembler::g1_write_barrier_pre(Register obj,
  6893                                           Register pre_val,
  6894                                           Register thread,
  6895                                           Register tmp,
  6896                                           bool tosca_live,
  6897                                           bool expand_call) {
  6899   // If expand_call is true then we expand the call_VM_leaf macro
  6900   // directly to skip generating the check by
  6901   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
  6903 #ifdef _LP64
  6904   assert(thread == r15_thread, "must be");
  6905 #endif // _LP64
  6907   Label done;
  6908   Label runtime;
  6910   assert(pre_val != noreg, "check this code");
  6912   if (obj != noreg) {
  6913     assert_different_registers(obj, pre_val, tmp);
  6914     assert(pre_val != rax, "check this code");
  6917   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  6918                                        PtrQueue::byte_offset_of_active()));
  6919   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  6920                                        PtrQueue::byte_offset_of_index()));
  6921   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
  6922                                        PtrQueue::byte_offset_of_buf()));
  6925   // Is marking active?
  6926   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
  6927     cmpl(in_progress, 0);
  6928   } else {
  6929     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
  6930     cmpb(in_progress, 0);
  6932   jcc(Assembler::equal, done);
  6934   // Do we need to load the previous value?
  6935   if (obj != noreg) {
  6936     load_heap_oop(pre_val, Address(obj, 0));
  6939   // Is the previous value null?
  6940   cmpptr(pre_val, (int32_t) NULL_WORD);
  6941   jcc(Assembler::equal, done);
  6943   // Can we store original value in the thread's buffer?
  6944   // Is index == 0?
  6945   // (The index field is typed as size_t.)
  6947   movptr(tmp, index);                   // tmp := *index_adr
  6948   cmpptr(tmp, 0);                       // tmp == 0?
  6949   jcc(Assembler::equal, runtime);       // If yes, goto runtime
  6951   subptr(tmp, wordSize);                // tmp := tmp - wordSize
  6952   movptr(index, tmp);                   // *index_adr := tmp
  6953   addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
  6955   // Record the previous value
  6956   movptr(Address(tmp, 0), pre_val);
  6957   jmp(done);
  6959   bind(runtime);
  6960   // save the live input values
  6961   if(tosca_live) push(rax);
  6963   if (obj != noreg && obj != rax)
  6964     push(obj);
  6966   if (pre_val != rax)
  6967     push(pre_val);
  6969   // Calling the runtime using the regular call_VM_leaf mechanism generates
  6970   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
  6971   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
  6972   //
  6973   // If we care generating the pre-barrier without a frame (e.g. in the
  6974   // intrinsified Reference.get() routine) then ebp might be pointing to
  6975   // the caller frame and so this check will most likely fail at runtime.
  6976   //
  6977   // Expanding the call directly bypasses the generation of the check.
  6978   // So when we do not have have a full interpreter frame on the stack
  6979   // expand_call should be passed true.
  6981   NOT_LP64( push(thread); )
  6983   if (expand_call) {
  6984     LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
  6985     pass_arg1(this, thread);
  6986     pass_arg0(this, pre_val);
  6987     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
  6988   } else {
  6989     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
  6992   NOT_LP64( pop(thread); )
  6994   // save the live input values
  6995   if (pre_val != rax)
  6996     pop(pre_val);
  6998   if (obj != noreg && obj != rax)
  6999     pop(obj);
  7001   if(tosca_live) pop(rax);
  7003   bind(done);
  7006 void MacroAssembler::g1_write_barrier_post(Register store_addr,
  7007                                            Register new_val,
  7008                                            Register thread,
  7009                                            Register tmp,
  7010                                            Register tmp2) {
  7011 #ifdef _LP64
  7012   assert(thread == r15_thread, "must be");
  7013 #endif // _LP64
  7015   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  7016                                        PtrQueue::byte_offset_of_index()));
  7017   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
  7018                                        PtrQueue::byte_offset_of_buf()));
  7020   BarrierSet* bs = Universe::heap()->barrier_set();
  7021   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  7022   Label done;
  7023   Label runtime;
  7025   // Does store cross heap regions?
  7027   movptr(tmp, store_addr);
  7028   xorptr(tmp, new_val);
  7029   shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
  7030   jcc(Assembler::equal, done);
  7032   // crosses regions, storing NULL?
  7034   cmpptr(new_val, (int32_t) NULL_WORD);
  7035   jcc(Assembler::equal, done);
  7037   // storing region crossing non-NULL, is card already dirty?
  7039   ExternalAddress cardtable((address) ct->byte_map_base);
  7040   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  7041 #ifdef _LP64
  7042   const Register card_addr = tmp;
  7044   movq(card_addr, store_addr);
  7045   shrq(card_addr, CardTableModRefBS::card_shift);
  7047   lea(tmp2, cardtable);
  7049   // get the address of the card
  7050   addq(card_addr, tmp2);
  7051 #else
  7052   const Register card_index = tmp;
  7054   movl(card_index, store_addr);
  7055   shrl(card_index, CardTableModRefBS::card_shift);
  7057   Address index(noreg, card_index, Address::times_1);
  7058   const Register card_addr = tmp;
  7059   lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
  7060 #endif
  7061   cmpb(Address(card_addr, 0), 0);
  7062   jcc(Assembler::equal, done);
  7064   // storing a region crossing, non-NULL oop, card is clean.
  7065   // dirty card and log.
  7067   movb(Address(card_addr, 0), 0);
  7069   cmpl(queue_index, 0);
  7070   jcc(Assembler::equal, runtime);
  7071   subl(queue_index, wordSize);
  7072   movptr(tmp2, buffer);
  7073 #ifdef _LP64
  7074   movslq(rscratch1, queue_index);
  7075   addq(tmp2, rscratch1);
  7076   movq(Address(tmp2, 0), card_addr);
  7077 #else
  7078   addl(tmp2, queue_index);
  7079   movl(Address(tmp2, 0), card_index);
  7080 #endif
  7081   jmp(done);
  7083   bind(runtime);
  7084   // save the live input values
  7085   push(store_addr);
  7086   push(new_val);
  7087 #ifdef _LP64
  7088   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
  7089 #else
  7090   push(thread);
  7091   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
  7092   pop(thread);
  7093 #endif
  7094   pop(new_val);
  7095   pop(store_addr);
  7097   bind(done);
  7100 #endif // SERIALGC
  7101 //////////////////////////////////////////////////////////////////////////////////
  7104 void MacroAssembler::store_check(Register obj) {
  7105   // Does a store check for the oop in register obj. The content of
  7106   // register obj is destroyed afterwards.
  7107   store_check_part_1(obj);
  7108   store_check_part_2(obj);
  7111 void MacroAssembler::store_check(Register obj, Address dst) {
  7112   store_check(obj);
  7116 // split the store check operation so that other instructions can be scheduled inbetween
  7117 void MacroAssembler::store_check_part_1(Register obj) {
  7118   BarrierSet* bs = Universe::heap()->barrier_set();
  7119   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  7120   shrptr(obj, CardTableModRefBS::card_shift);
  7123 void MacroAssembler::store_check_part_2(Register obj) {
  7124   BarrierSet* bs = Universe::heap()->barrier_set();
  7125   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  7126   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  7127   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  7129   // The calculation for byte_map_base is as follows:
  7130   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
  7131   // So this essentially converts an address to a displacement and
  7132   // it will never need to be relocated. On 64bit however the value may be too
  7133   // large for a 32bit displacement
  7135   intptr_t disp = (intptr_t) ct->byte_map_base;
  7136   if (is_simm32(disp)) {
  7137     Address cardtable(noreg, obj, Address::times_1, disp);
  7138     movb(cardtable, 0);
  7139   } else {
  7140     // By doing it as an ExternalAddress disp could be converted to a rip-relative
  7141     // displacement and done in a single instruction given favorable mapping and
  7142     // a smarter version of as_Address. Worst case it is two instructions which
  7143     // is no worse off then loading disp into a register and doing as a simple
  7144     // Address() as above.
  7145     // We can't do as ExternalAddress as the only style since if disp == 0 we'll
  7146     // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
  7147     // in some cases we'll get a single instruction version.
  7149     ExternalAddress cardtable((address)disp);
  7150     Address index(noreg, obj, Address::times_1);
  7151     movb(as_Address(ArrayAddress(cardtable, index)), 0);
  7155 void MacroAssembler::subptr(Register dst, int32_t imm32) {
  7156   LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
  7159 void MacroAssembler::subptr(Register dst, Register src) {
  7160   LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
  7163 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
  7164   // src2 must be rval
  7166   if (reachable(src2)) {
  7167     testl(src1, as_Address(src2));
  7168   } else {
  7169     lea(rscratch1, src2);
  7170     testl(src1, Address(rscratch1, 0));
  7174 // C++ bool manipulation
  7175 void MacroAssembler::testbool(Register dst) {
  7176   if(sizeof(bool) == 1)
  7177     testb(dst, 0xff);
  7178   else if(sizeof(bool) == 2) {
  7179     // testw implementation needed for two byte bools
  7180     ShouldNotReachHere();
  7181   } else if(sizeof(bool) == 4)
  7182     testl(dst, dst);
  7183   else
  7184     // unsupported
  7185     ShouldNotReachHere();
  7188 void MacroAssembler::testptr(Register dst, Register src) {
  7189   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
  7192 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  7193 void MacroAssembler::tlab_allocate(Register obj,
  7194                                    Register var_size_in_bytes,
  7195                                    int con_size_in_bytes,
  7196                                    Register t1,
  7197                                    Register t2,
  7198                                    Label& slow_case) {
  7199   assert_different_registers(obj, t1, t2);
  7200   assert_different_registers(obj, var_size_in_bytes, t1);
  7201   Register end = t2;
  7202   Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
  7204   verify_tlab();
  7206   NOT_LP64(get_thread(thread));
  7208   movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
  7209   if (var_size_in_bytes == noreg) {
  7210     lea(end, Address(obj, con_size_in_bytes));
  7211   } else {
  7212     lea(end, Address(obj, var_size_in_bytes, Address::times_1));
  7214   cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
  7215   jcc(Assembler::above, slow_case);
  7217   // update the tlab top pointer
  7218   movptr(Address(thread, JavaThread::tlab_top_offset()), end);
  7220   // recover var_size_in_bytes if necessary
  7221   if (var_size_in_bytes == end) {
  7222     subptr(var_size_in_bytes, obj);
  7224   verify_tlab();
  7227 // Preserves rbx, and rdx.
  7228 Register MacroAssembler::tlab_refill(Label& retry,
  7229                                      Label& try_eden,
  7230                                      Label& slow_case) {
  7231   Register top = rax;
  7232   Register t1  = rcx;
  7233   Register t2  = rsi;
  7234   Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
  7235   assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
  7236   Label do_refill, discard_tlab;
  7238   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  7239     // No allocation in the shared eden.
  7240     jmp(slow_case);
  7243   NOT_LP64(get_thread(thread_reg));
  7245   movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  7246   movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
  7248   // calculate amount of free space
  7249   subptr(t1, top);
  7250   shrptr(t1, LogHeapWordSize);
  7252   // Retain tlab and allocate object in shared space if
  7253   // the amount free in the tlab is too large to discard.
  7254   cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
  7255   jcc(Assembler::lessEqual, discard_tlab);
  7257   // Retain
  7258   // %%% yuck as movptr...
  7259   movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
  7260   addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
  7261   if (TLABStats) {
  7262     // increment number of slow_allocations
  7263     addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
  7265   jmp(try_eden);
  7267   bind(discard_tlab);
  7268   if (TLABStats) {
  7269     // increment number of refills
  7270     addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
  7271     // accumulate wastage -- t1 is amount free in tlab
  7272     addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
  7275   // if tlab is currently allocated (top or end != null) then
  7276   // fill [top, end + alignment_reserve) with array object
  7277   testptr(top, top);
  7278   jcc(Assembler::zero, do_refill);
  7280   // set up the mark word
  7281   movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
  7282   // set the length to the remaining space
  7283   subptr(t1, typeArrayOopDesc::header_size(T_INT));
  7284   addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
  7285   shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
  7286   movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
  7287   // set klass to intArrayKlass
  7288   // dubious reloc why not an oop reloc?
  7289   movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
  7290   // store klass last.  concurrent gcs assumes klass length is valid if
  7291   // klass field is not null.
  7292   store_klass(top, t1);
  7294   movptr(t1, top);
  7295   subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
  7296   incr_allocated_bytes(thread_reg, t1, 0);
  7298   // refill the tlab with an eden allocation
  7299   bind(do_refill);
  7300   movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
  7301   shlptr(t1, LogHeapWordSize);
  7302   // allocate new tlab, address returned in top
  7303   eden_allocate(top, t1, 0, t2, slow_case);
  7305   // Check that t1 was preserved in eden_allocate.
  7306 #ifdef ASSERT
  7307   if (UseTLAB) {
  7308     Label ok;
  7309     Register tsize = rsi;
  7310     assert_different_registers(tsize, thread_reg, t1);
  7311     push(tsize);
  7312     movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
  7313     shlptr(tsize, LogHeapWordSize);
  7314     cmpptr(t1, tsize);
  7315     jcc(Assembler::equal, ok);
  7316     stop("assert(t1 != tlab size)");
  7317     should_not_reach_here();
  7319     bind(ok);
  7320     pop(tsize);
  7322 #endif
  7323   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
  7324   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
  7325   addptr(top, t1);
  7326   subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  7327   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
  7328   verify_tlab();
  7329   jmp(retry);
  7331   return thread_reg; // for use by caller
  7334 void MacroAssembler::incr_allocated_bytes(Register thread,
  7335                                           Register var_size_in_bytes,
  7336                                           int con_size_in_bytes,
  7337                                           Register t1) {
  7338 #ifdef _LP64
  7339   if (var_size_in_bytes->is_valid()) {
  7340     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
  7341   } else {
  7342     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
  7344 #else
  7345   if (!thread->is_valid()) {
  7346     assert(t1->is_valid(), "need temp reg");
  7347     thread = t1;
  7348     get_thread(thread);
  7351   if (var_size_in_bytes->is_valid()) {
  7352     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
  7353   } else {
  7354     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
  7356   adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
  7357 #endif
  7360 static const double     pi_4 =  0.7853981633974483;
  7362 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
  7363   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
  7364   // was attempted in this code; unfortunately it appears that the
  7365   // switch to 80-bit precision and back causes this to be
  7366   // unprofitable compared with simply performing a runtime call if
  7367   // the argument is out of the (-pi/4, pi/4) range.
  7369   Register tmp = noreg;
  7370   if (!VM_Version::supports_cmov()) {
  7371     // fcmp needs a temporary so preserve rbx,
  7372     tmp = rbx;
  7373     push(tmp);
  7376   Label slow_case, done;
  7378   ExternalAddress pi4_adr = (address)&pi_4;
  7379   if (reachable(pi4_adr)) {
  7380     // x ?<= pi/4
  7381     fld_d(pi4_adr);
  7382     fld_s(1);                // Stack:  X  PI/4  X
  7383     fabs();                  // Stack: |X| PI/4  X
  7384     fcmp(tmp);
  7385     jcc(Assembler::above, slow_case);
  7387     // fastest case: -pi/4 <= x <= pi/4
  7388     switch(trig) {
  7389     case 's':
  7390       fsin();
  7391       break;
  7392     case 'c':
  7393       fcos();
  7394       break;
  7395     case 't':
  7396       ftan();
  7397       break;
  7398     default:
  7399       assert(false, "bad intrinsic");
  7400       break;
  7402     jmp(done);
  7405   // slow case: runtime call
  7406   bind(slow_case);
  7407   // Preserve registers across runtime call
  7408   pusha();
  7409   int incoming_argument_and_return_value_offset = -1;
  7410   if (num_fpu_regs_in_use > 1) {
  7411     // Must preserve all other FPU regs (could alternatively convert
  7412     // SharedRuntime::dsin and dcos into assembly routines known not to trash
  7413     // FPU state, but can not trust C compiler)
  7414     NEEDS_CLEANUP;
  7415     // NOTE that in this case we also push the incoming argument to
  7416     // the stack and restore it later; we also use this stack slot to
  7417     // hold the return value from dsin or dcos.
  7418     for (int i = 0; i < num_fpu_regs_in_use; i++) {
  7419       subptr(rsp, sizeof(jdouble));
  7420       fstp_d(Address(rsp, 0));
  7422     incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
  7423     fld_d(Address(rsp, incoming_argument_and_return_value_offset));
  7425   subptr(rsp, sizeof(jdouble));
  7426   fstp_d(Address(rsp, 0));
  7427 #ifdef _LP64
  7428   movdbl(xmm0, Address(rsp, 0));
  7429 #endif // _LP64
  7431   // NOTE: we must not use call_VM_leaf here because that requires a
  7432   // complete interpreter frame in debug mode -- same bug as 4387334
  7433   // MacroAssembler::call_VM_leaf_base is perfectly safe and will
  7434   // do proper 64bit abi
  7436   NEEDS_CLEANUP;
  7437   // Need to add stack banging before this runtime call if it needs to
  7438   // be taken; however, there is no generic stack banging routine at
  7439   // the MacroAssembler level
  7440   switch(trig) {
  7441   case 's':
  7443       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
  7445     break;
  7446   case 'c':
  7448       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
  7450     break;
  7451   case 't':
  7453       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
  7455     break;
  7456   default:
  7457     assert(false, "bad intrinsic");
  7458     break;
  7460 #ifdef _LP64
  7461     movsd(Address(rsp, 0), xmm0);
  7462     fld_d(Address(rsp, 0));
  7463 #endif // _LP64
  7464   addptr(rsp, sizeof(jdouble));
  7465   if (num_fpu_regs_in_use > 1) {
  7466     // Must save return value to stack and then restore entire FPU stack
  7467     fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
  7468     for (int i = 0; i < num_fpu_regs_in_use; i++) {
  7469       fld_d(Address(rsp, 0));
  7470       addptr(rsp, sizeof(jdouble));
  7473   popa();
  7475   // Come here with result in F-TOS
  7476   bind(done);
  7478   if (tmp != noreg) {
  7479     pop(tmp);
  7484 // Look up the method for a megamorphic invokeinterface call.
  7485 // The target method is determined by <intf_klass, itable_index>.
  7486 // The receiver klass is in recv_klass.
  7487 // On success, the result will be in method_result, and execution falls through.
  7488 // On failure, execution transfers to the given label.
  7489 void MacroAssembler::lookup_interface_method(Register recv_klass,
  7490                                              Register intf_klass,
  7491                                              RegisterOrConstant itable_index,
  7492                                              Register method_result,
  7493                                              Register scan_temp,
  7494                                              Label& L_no_such_interface) {
  7495   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
  7496   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
  7497          "caller must use same register for non-constant itable index as for method");
  7499   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  7500   int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
  7501   int itentry_off = itableMethodEntry::method_offset_in_bytes();
  7502   int scan_step   = itableOffsetEntry::size() * wordSize;
  7503   int vte_size    = vtableEntry::size() * wordSize;
  7504   Address::ScaleFactor times_vte_scale = Address::times_ptr;
  7505   assert(vte_size == wordSize, "else adjust times_vte_scale");
  7507   movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
  7509   // %%% Could store the aligned, prescaled offset in the klassoop.
  7510   lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
  7511   if (HeapWordsPerLong > 1) {
  7512     // Round up to align_object_offset boundary
  7513     // see code for instanceKlass::start_of_itable!
  7514     round_to(scan_temp, BytesPerLong);
  7517   // Adjust recv_klass by scaled itable_index, so we can free itable_index.
  7518   assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  7519   lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
  7521   // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
  7522   //   if (scan->interface() == intf) {
  7523   //     result = (klass + scan->offset() + itable_index);
  7524   //   }
  7525   // }
  7526   Label search, found_method;
  7528   for (int peel = 1; peel >= 0; peel--) {
  7529     movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  7530     cmpptr(intf_klass, method_result);
  7532     if (peel) {
  7533       jccb(Assembler::equal, found_method);
  7534     } else {
  7535       jccb(Assembler::notEqual, search);
  7536       // (invert the test to fall through to found_method...)
  7539     if (!peel)  break;
  7541     bind(search);
  7543     // Check that the previous entry is non-null.  A null entry means that
  7544     // the receiver class doesn't implement the interface, and wasn't the
  7545     // same as when the caller was compiled.
  7546     testptr(method_result, method_result);
  7547     jcc(Assembler::zero, L_no_such_interface);
  7548     addptr(scan_temp, scan_step);
  7551   bind(found_method);
  7553   // Got a hit.
  7554   movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
  7555   movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
  7559 void MacroAssembler::check_klass_subtype(Register sub_klass,
  7560                            Register super_klass,
  7561                            Register temp_reg,
  7562                            Label& L_success) {
  7563   Label L_failure;
  7564   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  7565   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  7566   bind(L_failure);
  7570 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  7571                                                    Register super_klass,
  7572                                                    Register temp_reg,
  7573                                                    Label* L_success,
  7574                                                    Label* L_failure,
  7575                                                    Label* L_slow_path,
  7576                                         RegisterOrConstant super_check_offset) {
  7577   assert_different_registers(sub_klass, super_klass, temp_reg);
  7578   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  7579   if (super_check_offset.is_register()) {
  7580     assert_different_registers(sub_klass, super_klass,
  7581                                super_check_offset.as_register());
  7582   } else if (must_load_sco) {
  7583     assert(temp_reg != noreg, "supply either a temp or a register offset");
  7586   Label L_fallthrough;
  7587   int label_nulls = 0;
  7588   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  7589   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  7590   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  7591   assert(label_nulls <= 1, "at most one NULL in the batch");
  7593   int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  7594                    Klass::secondary_super_cache_offset_in_bytes());
  7595   int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  7596                     Klass::super_check_offset_offset_in_bytes());
  7597   Address super_check_offset_addr(super_klass, sco_offset);
  7599   // Hacked jcc, which "knows" that L_fallthrough, at least, is in
  7600   // range of a jccb.  If this routine grows larger, reconsider at
  7601   // least some of these.
  7602 #define local_jcc(assembler_cond, label)                                \
  7603   if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
  7604   else                             jcc( assembler_cond, label) /*omit semi*/
  7606   // Hacked jmp, which may only be used just before L_fallthrough.
  7607 #define final_jmp(label)                                                \
  7608   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
  7609   else                            jmp(label)                /*omit semi*/
  7611   // If the pointers are equal, we are done (e.g., String[] elements).
  7612   // This self-check enables sharing of secondary supertype arrays among
  7613   // non-primary types such as array-of-interface.  Otherwise, each such
  7614   // type would need its own customized SSA.
  7615   // We move this check to the front of the fast path because many
  7616   // type checks are in fact trivially successful in this manner,
  7617   // so we get a nicely predicted branch right at the start of the check.
  7618   cmpptr(sub_klass, super_klass);
  7619   local_jcc(Assembler::equal, *L_success);
  7621   // Check the supertype display:
  7622   if (must_load_sco) {
  7623     // Positive movl does right thing on LP64.
  7624     movl(temp_reg, super_check_offset_addr);
  7625     super_check_offset = RegisterOrConstant(temp_reg);
  7627   Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
  7628   cmpptr(super_klass, super_check_addr); // load displayed supertype
  7630   // This check has worked decisively for primary supers.
  7631   // Secondary supers are sought in the super_cache ('super_cache_addr').
  7632   // (Secondary supers are interfaces and very deeply nested subtypes.)
  7633   // This works in the same check above because of a tricky aliasing
  7634   // between the super_cache and the primary super display elements.
  7635   // (The 'super_check_addr' can address either, as the case requires.)
  7636   // Note that the cache is updated below if it does not help us find
  7637   // what we need immediately.
  7638   // So if it was a primary super, we can just fail immediately.
  7639   // Otherwise, it's the slow path for us (no success at this point).
  7641   if (super_check_offset.is_register()) {
  7642     local_jcc(Assembler::equal, *L_success);
  7643     cmpl(super_check_offset.as_register(), sc_offset);
  7644     if (L_failure == &L_fallthrough) {
  7645       local_jcc(Assembler::equal, *L_slow_path);
  7646     } else {
  7647       local_jcc(Assembler::notEqual, *L_failure);
  7648       final_jmp(*L_slow_path);
  7650   } else if (super_check_offset.as_constant() == sc_offset) {
  7651     // Need a slow path; fast failure is impossible.
  7652     if (L_slow_path == &L_fallthrough) {
  7653       local_jcc(Assembler::equal, *L_success);
  7654     } else {
  7655       local_jcc(Assembler::notEqual, *L_slow_path);
  7656       final_jmp(*L_success);
  7658   } else {
  7659     // No slow path; it's a fast decision.
  7660     if (L_failure == &L_fallthrough) {
  7661       local_jcc(Assembler::equal, *L_success);
  7662     } else {
  7663       local_jcc(Assembler::notEqual, *L_failure);
  7664       final_jmp(*L_success);
  7668   bind(L_fallthrough);
  7670 #undef local_jcc
  7671 #undef final_jmp
  7675 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  7676                                                    Register super_klass,
  7677                                                    Register temp_reg,
  7678                                                    Register temp2_reg,
  7679                                                    Label* L_success,
  7680                                                    Label* L_failure,
  7681                                                    bool set_cond_codes) {
  7682   assert_different_registers(sub_klass, super_klass, temp_reg);
  7683   if (temp2_reg != noreg)
  7684     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  7685 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  7687   Label L_fallthrough;
  7688   int label_nulls = 0;
  7689   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  7690   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  7691   assert(label_nulls <= 1, "at most one NULL in the batch");
  7693   // a couple of useful fields in sub_klass:
  7694   int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
  7695                    Klass::secondary_supers_offset_in_bytes());
  7696   int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  7697                    Klass::secondary_super_cache_offset_in_bytes());
  7698   Address secondary_supers_addr(sub_klass, ss_offset);
  7699   Address super_cache_addr(     sub_klass, sc_offset);
  7701   // Do a linear scan of the secondary super-klass chain.
  7702   // This code is rarely used, so simplicity is a virtue here.
  7703   // The repne_scan instruction uses fixed registers, which we must spill.
  7704   // Don't worry too much about pre-existing connections with the input regs.
  7706   assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
  7707   assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
  7709   // Get super_klass value into rax (even if it was in rdi or rcx).
  7710   bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
  7711   if (super_klass != rax || UseCompressedOops) {
  7712     if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
  7713     mov(rax, super_klass);
  7715   if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
  7716   if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
  7718 #ifndef PRODUCT
  7719   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  7720   ExternalAddress pst_counter_addr((address) pst_counter);
  7721   NOT_LP64(  incrementl(pst_counter_addr) );
  7722   LP64_ONLY( lea(rcx, pst_counter_addr) );
  7723   LP64_ONLY( incrementl(Address(rcx, 0)) );
  7724 #endif //PRODUCT
  7726   // We will consult the secondary-super array.
  7727   movptr(rdi, secondary_supers_addr);
  7728   // Load the array length.  (Positive movl does right thing on LP64.)
  7729   movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
  7730   // Skip to start of data.
  7731   addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  7733   // Scan RCX words at [RDI] for an occurrence of RAX.
  7734   // Set NZ/Z based on last compare.
  7735   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
  7736   // not change flags (only scas instruction which is repeated sets flags).
  7737   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
  7738 #ifdef _LP64
  7739   // This part is tricky, as values in supers array could be 32 or 64 bit wide
  7740   // and we store values in objArrays always encoded, thus we need to encode
  7741   // the value of rax before repne.  Note that rax is dead after the repne.
  7742   if (UseCompressedOops) {
  7743     encode_heap_oop_not_null(rax); // Changes flags.
  7744     // The superclass is never null; it would be a basic system error if a null
  7745     // pointer were to sneak in here.  Note that we have already loaded the
  7746     // Klass::super_check_offset from the super_klass in the fast path,
  7747     // so if there is a null in that register, we are already in the afterlife.
  7748     testl(rax,rax); // Set Z = 0
  7749     repne_scanl();
  7750   } else
  7751 #endif // _LP64
  7753     testptr(rax,rax); // Set Z = 0
  7754     repne_scan();
  7756   // Unspill the temp. registers:
  7757   if (pushed_rdi)  pop(rdi);
  7758   if (pushed_rcx)  pop(rcx);
  7759   if (pushed_rax)  pop(rax);
  7761   if (set_cond_codes) {
  7762     // Special hack for the AD files:  rdi is guaranteed non-zero.
  7763     assert(!pushed_rdi, "rdi must be left non-NULL");
  7764     // Also, the condition codes are properly set Z/NZ on succeed/failure.
  7767   if (L_failure == &L_fallthrough)
  7768         jccb(Assembler::notEqual, *L_failure);
  7769   else  jcc(Assembler::notEqual, *L_failure);
  7771   // Success.  Cache the super we found and proceed in triumph.
  7772   movptr(super_cache_addr, super_klass);
  7774   if (L_success != &L_fallthrough) {
  7775     jmp(*L_success);
  7778 #undef IS_A_TEMP
  7780   bind(L_fallthrough);
  7784 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
  7785   ucomisd(dst, as_Address(src));
  7788 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
  7789   ucomiss(dst, as_Address(src));
  7792 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
  7793   if (reachable(src)) {
  7794     xorpd(dst, as_Address(src));
  7795   } else {
  7796     lea(rscratch1, src);
  7797     xorpd(dst, Address(rscratch1, 0));
  7801 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
  7802   if (reachable(src)) {
  7803     xorps(dst, as_Address(src));
  7804   } else {
  7805     lea(rscratch1, src);
  7806     xorps(dst, Address(rscratch1, 0));
  7810 void MacroAssembler::verify_oop(Register reg, const char* s) {
  7811   if (!VerifyOops) return;
  7813   // Pass register number to verify_oop_subroutine
  7814   char* b = new char[strlen(s) + 50];
  7815   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  7816 #ifdef _LP64
  7817   push(rscratch1);                    // save r10, trashed by movptr()
  7818 #endif
  7819   push(rax);                          // save rax,
  7820   push(reg);                          // pass register argument
  7821   ExternalAddress buffer((address) b);
  7822   // avoid using pushptr, as it modifies scratch registers
  7823   // and our contract is not to modify anything
  7824   movptr(rax, buffer.addr());
  7825   push(rax);
  7826   // call indirectly to solve generation ordering problem
  7827   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  7828   call(rax);
  7829   // Caller pops the arguments (oop, message) and restores rax, r10
  7833 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  7834                                                       Register tmp,
  7835                                                       int offset) {
  7836   intptr_t value = *delayed_value_addr;
  7837   if (value != 0)
  7838     return RegisterOrConstant(value + offset);
  7840   // load indirectly to solve generation ordering problem
  7841   movptr(tmp, ExternalAddress((address) delayed_value_addr));
  7843 #ifdef ASSERT
  7844   { Label L;
  7845     testptr(tmp, tmp);
  7846     if (WizardMode) {
  7847       jcc(Assembler::notZero, L);
  7848       char* buf = new char[40];
  7849       sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
  7850       stop(buf);
  7851     } else {
  7852       jccb(Assembler::notZero, L);
  7853       hlt();
  7855     bind(L);
  7857 #endif
  7859   if (offset != 0)
  7860     addptr(tmp, offset);
  7862   return RegisterOrConstant(tmp);
  7866 // registers on entry:
  7867 //  - rax ('check' register): required MethodType
  7868 //  - rcx: method handle
  7869 //  - rdx, rsi, or ?: killable temp
  7870 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
  7871                                               Register temp_reg,
  7872                                               Label& wrong_method_type) {
  7873   Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
  7874   // compare method type against that of the receiver
  7875   if (UseCompressedOops) {
  7876     load_heap_oop(temp_reg, type_addr);
  7877     cmpptr(mtype_reg, temp_reg);
  7878   } else {
  7879     cmpptr(mtype_reg, type_addr);
  7881   jcc(Assembler::notEqual, wrong_method_type);
  7885 // A method handle has a "vmslots" field which gives the size of its
  7886 // argument list in JVM stack slots.  This field is either located directly
  7887 // in every method handle, or else is indirectly accessed through the
  7888 // method handle's MethodType.  This macro hides the distinction.
  7889 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
  7890                                                 Register temp_reg) {
  7891   assert_different_registers(vmslots_reg, mh_reg, temp_reg);
  7892   // load mh.type.form.vmslots
  7893   if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
  7894     // hoist vmslots into every mh to avoid dependent load chain
  7895     movl(vmslots_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
  7896   } else {
  7897     Register temp2_reg = vmslots_reg;
  7898     load_heap_oop(temp2_reg, Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
  7899     load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
  7900     movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
  7905 // registers on entry:
  7906 //  - rcx: method handle
  7907 //  - rdx: killable temp (interpreted only)
  7908 //  - rax: killable temp (compiled only)
  7909 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
  7910   assert(mh_reg == rcx, "caller must put MH object in rcx");
  7911   assert_different_registers(mh_reg, temp_reg);
  7913   // pick out the interpreted side of the handler
  7914   // NOTE: vmentry is not an oop!
  7915   movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
  7917   // off we go...
  7918   jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
  7920   // for the various stubs which take control at this point,
  7921   // see MethodHandles::generate_method_handle_stub
  7925 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
  7926                                          int extra_slot_offset) {
  7927   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  7928   int stackElementSize = Interpreter::stackElementSize;
  7929   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
  7930 #ifdef ASSERT
  7931   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  7932   assert(offset1 - offset == stackElementSize, "correct arithmetic");
  7933 #endif
  7934   Register             scale_reg    = noreg;
  7935   Address::ScaleFactor scale_factor = Address::no_scale;
  7936   if (arg_slot.is_constant()) {
  7937     offset += arg_slot.as_constant() * stackElementSize;
  7938   } else {
  7939     scale_reg    = arg_slot.as_register();
  7940     scale_factor = Address::times(stackElementSize);
  7942   offset += wordSize;           // return PC is on stack
  7943   return Address(rsp, scale_reg, scale_factor, offset);
  7947 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  7948   if (!VerifyOops) return;
  7950   // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
  7951   // Pass register number to verify_oop_subroutine
  7952   char* b = new char[strlen(s) + 50];
  7953   sprintf(b, "verify_oop_addr: %s", s);
  7955 #ifdef _LP64
  7956   push(rscratch1);                    // save r10, trashed by movptr()
  7957 #endif
  7958   push(rax);                          // save rax,
  7959   // addr may contain rsp so we will have to adjust it based on the push
  7960   // we just did
  7961   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
  7962   // stores rax into addr which is backwards of what was intended.
  7963   if (addr.uses(rsp)) {
  7964     lea(rax, addr);
  7965     pushptr(Address(rax, BytesPerWord));
  7966   } else {
  7967     pushptr(addr);
  7970   ExternalAddress buffer((address) b);
  7971   // pass msg argument
  7972   // avoid using pushptr, as it modifies scratch registers
  7973   // and our contract is not to modify anything
  7974   movptr(rax, buffer.addr());
  7975   push(rax);
  7977   // call indirectly to solve generation ordering problem
  7978   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  7979   call(rax);
  7980   // Caller pops the arguments (addr, message) and restores rax, r10.
  7983 void MacroAssembler::verify_tlab() {
  7984 #ifdef ASSERT
  7985   if (UseTLAB && VerifyOops) {
  7986     Label next, ok;
  7987     Register t1 = rsi;
  7988     Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
  7990     push(t1);
  7991     NOT_LP64(push(thread_reg));
  7992     NOT_LP64(get_thread(thread_reg));
  7994     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  7995     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
  7996     jcc(Assembler::aboveEqual, next);
  7997     stop("assert(top >= start)");
  7998     should_not_reach_here();
  8000     bind(next);
  8001     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
  8002     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  8003     jcc(Assembler::aboveEqual, ok);
  8004     stop("assert(top <= end)");
  8005     should_not_reach_here();
  8007     bind(ok);
  8008     NOT_LP64(pop(thread_reg));
  8009     pop(t1);
  8011 #endif
  8014 class ControlWord {
  8015  public:
  8016   int32_t _value;
  8018   int  rounding_control() const        { return  (_value >> 10) & 3      ; }
  8019   int  precision_control() const       { return  (_value >>  8) & 3      ; }
  8020   bool precision() const               { return ((_value >>  5) & 1) != 0; }
  8021   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  8022   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  8023   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  8024   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  8025   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  8027   void print() const {
  8028     // rounding control
  8029     const char* rc;
  8030     switch (rounding_control()) {
  8031       case 0: rc = "round near"; break;
  8032       case 1: rc = "round down"; break;
  8033       case 2: rc = "round up  "; break;
  8034       case 3: rc = "chop      "; break;
  8035     };
  8036     // precision control
  8037     const char* pc;
  8038     switch (precision_control()) {
  8039       case 0: pc = "24 bits "; break;
  8040       case 1: pc = "reserved"; break;
  8041       case 2: pc = "53 bits "; break;
  8042       case 3: pc = "64 bits "; break;
  8043     };
  8044     // flags
  8045     char f[9];
  8046     f[0] = ' ';
  8047     f[1] = ' ';
  8048     f[2] = (precision   ()) ? 'P' : 'p';
  8049     f[3] = (underflow   ()) ? 'U' : 'u';
  8050     f[4] = (overflow    ()) ? 'O' : 'o';
  8051     f[5] = (zero_divide ()) ? 'Z' : 'z';
  8052     f[6] = (denormalized()) ? 'D' : 'd';
  8053     f[7] = (invalid     ()) ? 'I' : 'i';
  8054     f[8] = '\x0';
  8055     // output
  8056     printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
  8059 };
  8061 class StatusWord {
  8062  public:
  8063   int32_t _value;
  8065   bool busy() const                    { return ((_value >> 15) & 1) != 0; }
  8066   bool C3() const                      { return ((_value >> 14) & 1) != 0; }
  8067   bool C2() const                      { return ((_value >> 10) & 1) != 0; }
  8068   bool C1() const                      { return ((_value >>  9) & 1) != 0; }
  8069   bool C0() const                      { return ((_value >>  8) & 1) != 0; }
  8070   int  top() const                     { return  (_value >> 11) & 7      ; }
  8071   bool error_status() const            { return ((_value >>  7) & 1) != 0; }
  8072   bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
  8073   bool precision() const               { return ((_value >>  5) & 1) != 0; }
  8074   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
  8075   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
  8076   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
  8077   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
  8078   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
  8080   void print() const {
  8081     // condition codes
  8082     char c[5];
  8083     c[0] = (C3()) ? '3' : '-';
  8084     c[1] = (C2()) ? '2' : '-';
  8085     c[2] = (C1()) ? '1' : '-';
  8086     c[3] = (C0()) ? '0' : '-';
  8087     c[4] = '\x0';
  8088     // flags
  8089     char f[9];
  8090     f[0] = (error_status()) ? 'E' : '-';
  8091     f[1] = (stack_fault ()) ? 'S' : '-';
  8092     f[2] = (precision   ()) ? 'P' : '-';
  8093     f[3] = (underflow   ()) ? 'U' : '-';
  8094     f[4] = (overflow    ()) ? 'O' : '-';
  8095     f[5] = (zero_divide ()) ? 'Z' : '-';
  8096     f[6] = (denormalized()) ? 'D' : '-';
  8097     f[7] = (invalid     ()) ? 'I' : '-';
  8098     f[8] = '\x0';
  8099     // output
  8100     printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
  8103 };
  8105 class TagWord {
  8106  public:
  8107   int32_t _value;
  8109   int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
  8111   void print() const {
  8112     printf("%04x", _value & 0xFFFF);
  8115 };
  8117 class FPU_Register {
  8118  public:
  8119   int32_t _m0;
  8120   int32_t _m1;
  8121   int16_t _ex;
  8123   bool is_indefinite() const           {
  8124     return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
  8127   void print() const {
  8128     char  sign = (_ex < 0) ? '-' : '+';
  8129     const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
  8130     printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
  8131   };
  8133 };
  8135 class FPU_State {
  8136  public:
  8137   enum {
  8138     register_size       = 10,
  8139     number_of_registers =  8,
  8140     register_mask       =  7
  8141   };
  8143   ControlWord  _control_word;
  8144   StatusWord   _status_word;
  8145   TagWord      _tag_word;
  8146   int32_t      _error_offset;
  8147   int32_t      _error_selector;
  8148   int32_t      _data_offset;
  8149   int32_t      _data_selector;
  8150   int8_t       _register[register_size * number_of_registers];
  8152   int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
  8153   FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
  8155   const char* tag_as_string(int tag) const {
  8156     switch (tag) {
  8157       case 0: return "valid";
  8158       case 1: return "zero";
  8159       case 2: return "special";
  8160       case 3: return "empty";
  8162     ShouldNotReachHere();
  8163     return NULL;
  8166   void print() const {
  8167     // print computation registers
  8168     { int t = _status_word.top();
  8169       for (int i = 0; i < number_of_registers; i++) {
  8170         int j = (i - t) & register_mask;
  8171         printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
  8172         st(j)->print();
  8173         printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
  8176     printf("\n");
  8177     // print control registers
  8178     printf("ctrl = "); _control_word.print(); printf("\n");
  8179     printf("stat = "); _status_word .print(); printf("\n");
  8180     printf("tags = "); _tag_word    .print(); printf("\n");
  8183 };
  8185 class Flag_Register {
  8186  public:
  8187   int32_t _value;
  8189   bool overflow() const                { return ((_value >> 11) & 1) != 0; }
  8190   bool direction() const               { return ((_value >> 10) & 1) != 0; }
  8191   bool sign() const                    { return ((_value >>  7) & 1) != 0; }
  8192   bool zero() const                    { return ((_value >>  6) & 1) != 0; }
  8193   bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
  8194   bool parity() const                  { return ((_value >>  2) & 1) != 0; }
  8195   bool carry() const                   { return ((_value >>  0) & 1) != 0; }
  8197   void print() const {
  8198     // flags
  8199     char f[8];
  8200     f[0] = (overflow       ()) ? 'O' : '-';
  8201     f[1] = (direction      ()) ? 'D' : '-';
  8202     f[2] = (sign           ()) ? 'S' : '-';
  8203     f[3] = (zero           ()) ? 'Z' : '-';
  8204     f[4] = (auxiliary_carry()) ? 'A' : '-';
  8205     f[5] = (parity         ()) ? 'P' : '-';
  8206     f[6] = (carry          ()) ? 'C' : '-';
  8207     f[7] = '\x0';
  8208     // output
  8209     printf("%08x  flags = %s", _value, f);
  8212 };
  8214 class IU_Register {
  8215  public:
  8216   int32_t _value;
  8218   void print() const {
  8219     printf("%08x  %11d", _value, _value);
  8222 };
  8224 class IU_State {
  8225  public:
  8226   Flag_Register _eflags;
  8227   IU_Register   _rdi;
  8228   IU_Register   _rsi;
  8229   IU_Register   _rbp;
  8230   IU_Register   _rsp;
  8231   IU_Register   _rbx;
  8232   IU_Register   _rdx;
  8233   IU_Register   _rcx;
  8234   IU_Register   _rax;
  8236   void print() const {
  8237     // computation registers
  8238     printf("rax,  = "); _rax.print(); printf("\n");
  8239     printf("rbx,  = "); _rbx.print(); printf("\n");
  8240     printf("rcx  = "); _rcx.print(); printf("\n");
  8241     printf("rdx  = "); _rdx.print(); printf("\n");
  8242     printf("rdi  = "); _rdi.print(); printf("\n");
  8243     printf("rsi  = "); _rsi.print(); printf("\n");
  8244     printf("rbp,  = "); _rbp.print(); printf("\n");
  8245     printf("rsp  = "); _rsp.print(); printf("\n");
  8246     printf("\n");
  8247     // control registers
  8248     printf("flgs = "); _eflags.print(); printf("\n");
  8250 };
  8253 class CPU_State {
  8254  public:
  8255   FPU_State _fpu_state;
  8256   IU_State  _iu_state;
  8258   void print() const {
  8259     printf("--------------------------------------------------\n");
  8260     _iu_state .print();
  8261     printf("\n");
  8262     _fpu_state.print();
  8263     printf("--------------------------------------------------\n");
  8266 };
  8269 static void _print_CPU_state(CPU_State* state) {
  8270   state->print();
  8271 };
  8274 void MacroAssembler::print_CPU_state() {
  8275   push_CPU_state();
  8276   push(rsp);                // pass CPU state
  8277   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
  8278   addptr(rsp, wordSize);       // discard argument
  8279   pop_CPU_state();
  8283 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
  8284   static int counter = 0;
  8285   FPU_State* fs = &state->_fpu_state;
  8286   counter++;
  8287   // For leaf calls, only verify that the top few elements remain empty.
  8288   // We only need 1 empty at the top for C2 code.
  8289   if( stack_depth < 0 ) {
  8290     if( fs->tag_for_st(7) != 3 ) {
  8291       printf("FPR7 not empty\n");
  8292       state->print();
  8293       assert(false, "error");
  8294       return false;
  8296     return true;                // All other stack states do not matter
  8299   assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
  8300          "bad FPU control word");
  8302   // compute stack depth
  8303   int i = 0;
  8304   while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
  8305   int d = i;
  8306   while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
  8307   // verify findings
  8308   if (i != FPU_State::number_of_registers) {
  8309     // stack not contiguous
  8310     printf("%s: stack not contiguous at ST%d\n", s, i);
  8311     state->print();
  8312     assert(false, "error");
  8313     return false;
  8315   // check if computed stack depth corresponds to expected stack depth
  8316   if (stack_depth < 0) {
  8317     // expected stack depth is -stack_depth or less
  8318     if (d > -stack_depth) {
  8319       // too many elements on the stack
  8320       printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
  8321       state->print();
  8322       assert(false, "error");
  8323       return false;
  8325   } else {
  8326     // expected stack depth is stack_depth
  8327     if (d != stack_depth) {
  8328       // wrong stack depth
  8329       printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
  8330       state->print();
  8331       assert(false, "error");
  8332       return false;
  8335   // everything is cool
  8336   return true;
  8340 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
  8341   if (!VerifyFPU) return;
  8342   push_CPU_state();
  8343   push(rsp);                // pass CPU state
  8344   ExternalAddress msg((address) s);
  8345   // pass message string s
  8346   pushptr(msg.addr());
  8347   push(stack_depth);        // pass stack depth
  8348   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
  8349   addptr(rsp, 3 * wordSize);   // discard arguments
  8350   // check for error
  8351   { Label L;
  8352     testl(rax, rax);
  8353     jcc(Assembler::notZero, L);
  8354     int3();                  // break if error condition
  8355     bind(L);
  8357   pop_CPU_state();
  8360 void MacroAssembler::load_klass(Register dst, Register src) {
  8361 #ifdef _LP64
  8362   if (UseCompressedOops) {
  8363     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  8364     decode_heap_oop_not_null(dst);
  8365   } else
  8366 #endif
  8367     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  8370 void MacroAssembler::load_prototype_header(Register dst, Register src) {
  8371 #ifdef _LP64
  8372   if (UseCompressedOops) {
  8373     assert (Universe::heap() != NULL, "java heap should be initialized");
  8374     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  8375     if (Universe::narrow_oop_shift() != 0) {
  8376       assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  8377       if (LogMinObjAlignmentInBytes == Address::times_8) {
  8378         movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  8379       } else {
  8380         // OK to use shift since we don't need to preserve flags.
  8381         shlq(dst, LogMinObjAlignmentInBytes);
  8382         movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  8384     } else {
  8385       movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  8387   } else
  8388 #endif
  8390     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  8391     movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  8395 void MacroAssembler::store_klass(Register dst, Register src) {
  8396 #ifdef _LP64
  8397   if (UseCompressedOops) {
  8398     encode_heap_oop_not_null(src);
  8399     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
  8400   } else
  8401 #endif
  8402     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
  8405 void MacroAssembler::load_heap_oop(Register dst, Address src) {
  8406 #ifdef _LP64
  8407   if (UseCompressedOops) {
  8408     movl(dst, src);
  8409     decode_heap_oop(dst);
  8410   } else
  8411 #endif
  8412     movptr(dst, src);
  8415 void MacroAssembler::store_heap_oop(Address dst, Register src) {
  8416 #ifdef _LP64
  8417   if (UseCompressedOops) {
  8418     assert(!dst.uses(src), "not enough registers");
  8419     encode_heap_oop(src);
  8420     movl(dst, src);
  8421   } else
  8422 #endif
  8423     movptr(dst, src);
  8426 // Used for storing NULLs.
  8427 void MacroAssembler::store_heap_oop_null(Address dst) {
  8428 #ifdef _LP64
  8429   if (UseCompressedOops) {
  8430     movl(dst, (int32_t)NULL_WORD);
  8431   } else {
  8432     movslq(dst, (int32_t)NULL_WORD);
  8434 #else
  8435   movl(dst, (int32_t)NULL_WORD);
  8436 #endif
  8439 #ifdef _LP64
  8440 void MacroAssembler::store_klass_gap(Register dst, Register src) {
  8441   if (UseCompressedOops) {
  8442     // Store to klass gap in destination
  8443     movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
  8447 #ifdef ASSERT
  8448 void MacroAssembler::verify_heapbase(const char* msg) {
  8449   assert (UseCompressedOops, "should be compressed");
  8450   assert (Universe::heap() != NULL, "java heap should be initialized");
  8451   if (CheckCompressedOops) {
  8452     Label ok;
  8453     push(rscratch1); // cmpptr trashes rscratch1
  8454     cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
  8455     jcc(Assembler::equal, ok);
  8456     stop(msg);
  8457     bind(ok);
  8458     pop(rscratch1);
  8461 #endif
  8463 // Algorithm must match oop.inline.hpp encode_heap_oop.
  8464 void MacroAssembler::encode_heap_oop(Register r) {
  8465 #ifdef ASSERT
  8466   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
  8467 #endif
  8468   verify_oop(r, "broken oop in encode_heap_oop");
  8469   if (Universe::narrow_oop_base() == NULL) {
  8470     if (Universe::narrow_oop_shift() != 0) {
  8471       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  8472       shrq(r, LogMinObjAlignmentInBytes);
  8474     return;
  8476   testq(r, r);
  8477   cmovq(Assembler::equal, r, r12_heapbase);
  8478   subq(r, r12_heapbase);
  8479   shrq(r, LogMinObjAlignmentInBytes);
  8482 void MacroAssembler::encode_heap_oop_not_null(Register r) {
  8483 #ifdef ASSERT
  8484   verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
  8485   if (CheckCompressedOops) {
  8486     Label ok;
  8487     testq(r, r);
  8488     jcc(Assembler::notEqual, ok);
  8489     stop("null oop passed to encode_heap_oop_not_null");
  8490     bind(ok);
  8492 #endif
  8493   verify_oop(r, "broken oop in encode_heap_oop_not_null");
  8494   if (Universe::narrow_oop_base() != NULL) {
  8495     subq(r, r12_heapbase);
  8497   if (Universe::narrow_oop_shift() != 0) {
  8498     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  8499     shrq(r, LogMinObjAlignmentInBytes);
  8503 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  8504 #ifdef ASSERT
  8505   verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
  8506   if (CheckCompressedOops) {
  8507     Label ok;
  8508     testq(src, src);
  8509     jcc(Assembler::notEqual, ok);
  8510     stop("null oop passed to encode_heap_oop_not_null2");
  8511     bind(ok);
  8513 #endif
  8514   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
  8515   if (dst != src) {
  8516     movq(dst, src);
  8518   if (Universe::narrow_oop_base() != NULL) {
  8519     subq(dst, r12_heapbase);
  8521   if (Universe::narrow_oop_shift() != 0) {
  8522     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  8523     shrq(dst, LogMinObjAlignmentInBytes);
  8527 void  MacroAssembler::decode_heap_oop(Register r) {
  8528 #ifdef ASSERT
  8529   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
  8530 #endif
  8531   if (Universe::narrow_oop_base() == NULL) {
  8532     if (Universe::narrow_oop_shift() != 0) {
  8533       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  8534       shlq(r, LogMinObjAlignmentInBytes);
  8536   } else {
  8537     Label done;
  8538     shlq(r, LogMinObjAlignmentInBytes);
  8539     jccb(Assembler::equal, done);
  8540     addq(r, r12_heapbase);
  8541     bind(done);
  8543   verify_oop(r, "broken oop in decode_heap_oop");
  8546 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  8547   // Note: it will change flags
  8548   assert (UseCompressedOops, "should only be used for compressed headers");
  8549   assert (Universe::heap() != NULL, "java heap should be initialized");
  8550   // Cannot assert, unverified entry point counts instructions (see .ad file)
  8551   // vtableStubs also counts instructions in pd_code_size_limit.
  8552   // Also do not verify_oop as this is called by verify_oop.
  8553   if (Universe::narrow_oop_shift() != 0) {
  8554     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  8555     shlq(r, LogMinObjAlignmentInBytes);
  8556     if (Universe::narrow_oop_base() != NULL) {
  8557       addq(r, r12_heapbase);
  8559   } else {
  8560     assert (Universe::narrow_oop_base() == NULL, "sanity");
  8564 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  8565   // Note: it will change flags
  8566   assert (UseCompressedOops, "should only be used for compressed headers");
  8567   assert (Universe::heap() != NULL, "java heap should be initialized");
  8568   // Cannot assert, unverified entry point counts instructions (see .ad file)
  8569   // vtableStubs also counts instructions in pd_code_size_limit.
  8570   // Also do not verify_oop as this is called by verify_oop.
  8571   if (Universe::narrow_oop_shift() != 0) {
  8572     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  8573     if (LogMinObjAlignmentInBytes == Address::times_8) {
  8574       leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
  8575     } else {
  8576       if (dst != src) {
  8577         movq(dst, src);
  8579       shlq(dst, LogMinObjAlignmentInBytes);
  8580       if (Universe::narrow_oop_base() != NULL) {
  8581         addq(dst, r12_heapbase);
  8584   } else {
  8585     assert (Universe::narrow_oop_base() == NULL, "sanity");
  8586     if (dst != src) {
  8587       movq(dst, src);
  8592 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  8593   assert (UseCompressedOops, "should only be used for compressed headers");
  8594   assert (Universe::heap() != NULL, "java heap should be initialized");
  8595   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  8596   int oop_index = oop_recorder()->find_index(obj);
  8597   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  8598   mov_narrow_oop(dst, oop_index, rspec);
  8601 void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
  8602   assert (UseCompressedOops, "should only be used for compressed headers");
  8603   assert (Universe::heap() != NULL, "java heap should be initialized");
  8604   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  8605   int oop_index = oop_recorder()->find_index(obj);
  8606   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  8607   mov_narrow_oop(dst, oop_index, rspec);
  8610 void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
  8611   assert (UseCompressedOops, "should only be used for compressed headers");
  8612   assert (Universe::heap() != NULL, "java heap should be initialized");
  8613   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  8614   int oop_index = oop_recorder()->find_index(obj);
  8615   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  8616   Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  8619 void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
  8620   assert (UseCompressedOops, "should only be used for compressed headers");
  8621   assert (Universe::heap() != NULL, "java heap should be initialized");
  8622   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  8623   int oop_index = oop_recorder()->find_index(obj);
  8624   RelocationHolder rspec = oop_Relocation::spec(oop_index);
  8625   Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  8628 void MacroAssembler::reinit_heapbase() {
  8629   if (UseCompressedOops) {
  8630     movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
  8633 #endif // _LP64
  8635 // IndexOf for constant substrings with size >= 8 chars
  8636 // which don't need to be loaded through stack.
  8637 void MacroAssembler::string_indexofC8(Register str1, Register str2,
  8638                                       Register cnt1, Register cnt2,
  8639                                       int int_cnt2,  Register result,
  8640                                       XMMRegister vec, Register tmp) {
  8641   assert(UseSSE42Intrinsics, "SSE4.2 is required");
  8643   // This method uses pcmpestri inxtruction with bound registers
  8644   //   inputs:
  8645   //     xmm - substring
  8646   //     rax - substring length (elements count)
  8647   //     mem - scanned string
  8648   //     rdx - string length (elements count)
  8649   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
  8650   //   outputs:
  8651   //     rcx - matched index in string
  8652   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  8654   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
  8655         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
  8656         MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
  8658   // Note, inline_string_indexOf() generates checks:
  8659   // if (substr.count > string.count) return -1;
  8660   // if (substr.count == 0) return 0;
  8661   assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
  8663   // Load substring.
  8664   movdqu(vec, Address(str2, 0));
  8665   movl(cnt2, int_cnt2);
  8666   movptr(result, str1); // string addr
  8668   if (int_cnt2 > 8) {
  8669     jmpb(SCAN_TO_SUBSTR);
  8671     // Reload substr for rescan, this code
  8672     // is executed only for large substrings (> 8 chars)
  8673     bind(RELOAD_SUBSTR);
  8674     movdqu(vec, Address(str2, 0));
  8675     negptr(cnt2); // Jumped here with negative cnt2, convert to positive
  8677     bind(RELOAD_STR);
  8678     // We came here after the beginning of the substring was
  8679     // matched but the rest of it was not so we need to search
  8680     // again. Start from the next element after the previous match.
  8682     // cnt2 is number of substring reminding elements and
  8683     // cnt1 is number of string reminding elements when cmp failed.
  8684     // Restored cnt1 = cnt1 - cnt2 + int_cnt2
  8685     subl(cnt1, cnt2);
  8686     addl(cnt1, int_cnt2);
  8687     movl(cnt2, int_cnt2); // Now restore cnt2
  8689     decrementl(cnt1);     // Shift to next element
  8690     cmpl(cnt1, cnt2);
  8691     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  8693     addptr(result, 2);
  8695   } // (int_cnt2 > 8)
  8697   // Scan string for start of substr in 16-byte vectors
  8698   bind(SCAN_TO_SUBSTR);
  8699   pcmpestri(vec, Address(result, 0), 0x0d);
  8700   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  8701   subl(cnt1, 8);
  8702   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  8703   cmpl(cnt1, cnt2);
  8704   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  8705   addptr(result, 16);
  8706   jmpb(SCAN_TO_SUBSTR);
  8708   // Found a potential substr
  8709   bind(FOUND_CANDIDATE);
  8710   // Matched whole vector if first element matched (tmp(rcx) == 0).
  8711   if (int_cnt2 == 8) {
  8712     jccb(Assembler::overflow, RET_FOUND);    // OF == 1
  8713   } else { // int_cnt2 > 8
  8714     jccb(Assembler::overflow, FOUND_SUBSTR);
  8716   // After pcmpestri tmp(rcx) contains matched element index
  8717   // Compute start addr of substr
  8718   lea(result, Address(result, tmp, Address::times_2));
  8720   // Make sure string is still long enough
  8721   subl(cnt1, tmp);
  8722   cmpl(cnt1, cnt2);
  8723   if (int_cnt2 == 8) {
  8724     jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  8725   } else { // int_cnt2 > 8
  8726     jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
  8728   // Left less then substring.
  8730   bind(RET_NOT_FOUND);
  8731   movl(result, -1);
  8732   jmpb(EXIT);
  8734   if (int_cnt2 > 8) {
  8735     // This code is optimized for the case when whole substring
  8736     // is matched if its head is matched.
  8737     bind(MATCH_SUBSTR_HEAD);
  8738     pcmpestri(vec, Address(result, 0), 0x0d);
  8739     // Reload only string if does not match
  8740     jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
  8742     Label CONT_SCAN_SUBSTR;
  8743     // Compare the rest of substring (> 8 chars).
  8744     bind(FOUND_SUBSTR);
  8745     // First 8 chars are already matched.
  8746     negptr(cnt2);
  8747     addptr(cnt2, 8);
  8749     bind(SCAN_SUBSTR);
  8750     subl(cnt1, 8);
  8751     cmpl(cnt2, -8); // Do not read beyond substring
  8752     jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
  8753     // Back-up strings to avoid reading beyond substring:
  8754     // cnt1 = cnt1 - cnt2 + 8
  8755     addl(cnt1, cnt2); // cnt2 is negative
  8756     addl(cnt1, 8);
  8757     movl(cnt2, 8); negptr(cnt2);
  8758     bind(CONT_SCAN_SUBSTR);
  8759     if (int_cnt2 < (int)G) {
  8760       movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
  8761       pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
  8762     } else {
  8763       // calculate index in register to avoid integer overflow (int_cnt2*2)
  8764       movl(tmp, int_cnt2);
  8765       addptr(tmp, cnt2);
  8766       movdqu(vec, Address(str2, tmp, Address::times_2, 0));
  8767       pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
  8769     // Need to reload strings pointers if not matched whole vector
  8770     jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  8771     addptr(cnt2, 8);
  8772     jccb(Assembler::negative, SCAN_SUBSTR);
  8773     // Fall through if found full substring
  8775   } // (int_cnt2 > 8)
  8777   bind(RET_FOUND);
  8778   // Found result if we matched full small substring.
  8779   // Compute substr offset
  8780   subptr(result, str1);
  8781   shrl(result, 1); // index
  8782   bind(EXIT);
  8784 } // string_indexofC8
  8786 // Small strings are loaded through stack if they cross page boundary.
  8787 void MacroAssembler::string_indexof(Register str1, Register str2,
  8788                                     Register cnt1, Register cnt2,
  8789                                     int int_cnt2,  Register result,
  8790                                     XMMRegister vec, Register tmp) {
  8791   assert(UseSSE42Intrinsics, "SSE4.2 is required");
  8792   //
  8793   // int_cnt2 is length of small (< 8 chars) constant substring
  8794   // or (-1) for non constant substring in which case its length
  8795   // is in cnt2 register.
  8796   //
  8797   // Note, inline_string_indexOf() generates checks:
  8798   // if (substr.count > string.count) return -1;
  8799   // if (substr.count == 0) return 0;
  8800   //
  8801   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
  8803   // This method uses pcmpestri inxtruction with bound registers
  8804   //   inputs:
  8805   //     xmm - substring
  8806   //     rax - substring length (elements count)
  8807   //     mem - scanned string
  8808   //     rdx - string length (elements count)
  8809   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
  8810   //   outputs:
  8811   //     rcx - matched index in string
  8812   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  8814   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
  8815         RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
  8816         FOUND_CANDIDATE;
  8818   { //========================================================
  8819     // We don't know where these strings are located
  8820     // and we can't read beyond them. Load them through stack.
  8821     Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
  8823     movptr(tmp, rsp); // save old SP
  8825     if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
  8826       if (int_cnt2 == 1) {  // One char
  8827         load_unsigned_short(result, Address(str2, 0));
  8828         movdl(vec, result); // move 32 bits
  8829       } else if (int_cnt2 == 2) { // Two chars
  8830         movdl(vec, Address(str2, 0)); // move 32 bits
  8831       } else if (int_cnt2 == 4) { // Four chars
  8832         movq(vec, Address(str2, 0));  // move 64 bits
  8833       } else { // cnt2 = { 3, 5, 6, 7 }
  8834         // Array header size is 12 bytes in 32-bit VM
  8835         // + 6 bytes for 3 chars == 18 bytes,
  8836         // enough space to load vec and shift.
  8837         assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
  8838         movdqu(vec, Address(str2, (int_cnt2*2)-16));
  8839         psrldq(vec, 16-(int_cnt2*2));
  8841     } else { // not constant substring
  8842       cmpl(cnt2, 8);
  8843       jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
  8845       // We can read beyond string if srt+16 does not cross page boundary
  8846       // since heaps are aligned and mapped by pages.
  8847       assert(os::vm_page_size() < (int)G, "default page should be small");
  8848       movl(result, str2); // We need only low 32 bits
  8849       andl(result, (os::vm_page_size()-1));
  8850       cmpl(result, (os::vm_page_size()-16));
  8851       jccb(Assembler::belowEqual, CHECK_STR);
  8853       // Move small strings to stack to allow load 16 bytes into vec.
  8854       subptr(rsp, 16);
  8855       int stk_offset = wordSize-2;
  8856       push(cnt2);
  8858       bind(COPY_SUBSTR);
  8859       load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
  8860       movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
  8861       decrement(cnt2);
  8862       jccb(Assembler::notZero, COPY_SUBSTR);
  8864       pop(cnt2);
  8865       movptr(str2, rsp);  // New substring address
  8866     } // non constant
  8868     bind(CHECK_STR);
  8869     cmpl(cnt1, 8);
  8870     jccb(Assembler::aboveEqual, BIG_STRINGS);
  8872     // Check cross page boundary.
  8873     movl(result, str1); // We need only low 32 bits
  8874     andl(result, (os::vm_page_size()-1));
  8875     cmpl(result, (os::vm_page_size()-16));
  8876     jccb(Assembler::belowEqual, BIG_STRINGS);
  8878     subptr(rsp, 16);
  8879     int stk_offset = -2;
  8880     if (int_cnt2 < 0) { // not constant
  8881       push(cnt2);
  8882       stk_offset += wordSize;
  8884     movl(cnt2, cnt1);
  8886     bind(COPY_STR);
  8887     load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
  8888     movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
  8889     decrement(cnt2);
  8890     jccb(Assembler::notZero, COPY_STR);
  8892     if (int_cnt2 < 0) { // not constant
  8893       pop(cnt2);
  8895     movptr(str1, rsp);  // New string address
  8897     bind(BIG_STRINGS);
  8898     // Load substring.
  8899     if (int_cnt2 < 0) { // -1
  8900       movdqu(vec, Address(str2, 0));
  8901       push(cnt2);       // substr count
  8902       push(str2);       // substr addr
  8903       push(str1);       // string addr
  8904     } else {
  8905       // Small (< 8 chars) constant substrings are loaded already.
  8906       movl(cnt2, int_cnt2);
  8908     push(tmp);  // original SP
  8910   } // Finished loading
  8912   //========================================================
  8913   // Start search
  8914   //
  8916   movptr(result, str1); // string addr
  8918   if (int_cnt2  < 0) {  // Only for non constant substring
  8919     jmpb(SCAN_TO_SUBSTR);
  8921     // SP saved at sp+0
  8922     // String saved at sp+1*wordSize
  8923     // Substr saved at sp+2*wordSize
  8924     // Substr count saved at sp+3*wordSize
  8926     // Reload substr for rescan, this code
  8927     // is executed only for large substrings (> 8 chars)
  8928     bind(RELOAD_SUBSTR);
  8929     movptr(str2, Address(rsp, 2*wordSize));
  8930     movl(cnt2, Address(rsp, 3*wordSize));
  8931     movdqu(vec, Address(str2, 0));
  8932     // We came here after the beginning of the substring was
  8933     // matched but the rest of it was not so we need to search
  8934     // again. Start from the next element after the previous match.
  8935     subptr(str1, result); // Restore counter
  8936     shrl(str1, 1);
  8937     addl(cnt1, str1);
  8938     decrementl(cnt1);   // Shift to next element
  8939     cmpl(cnt1, cnt2);
  8940     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  8942     addptr(result, 2);
  8943   } // non constant
  8945   // Scan string for start of substr in 16-byte vectors
  8946   bind(SCAN_TO_SUBSTR);
  8947   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  8948   pcmpestri(vec, Address(result, 0), 0x0d);
  8949   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  8950   subl(cnt1, 8);
  8951   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  8952   cmpl(cnt1, cnt2);
  8953   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  8954   addptr(result, 16);
  8956   bind(ADJUST_STR);
  8957   cmpl(cnt1, 8); // Do not read beyond string
  8958   jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  8959   // Back-up string to avoid reading beyond string.
  8960   lea(result, Address(result, cnt1, Address::times_2, -16));
  8961   movl(cnt1, 8);
  8962   jmpb(SCAN_TO_SUBSTR);
  8964   // Found a potential substr
  8965   bind(FOUND_CANDIDATE);
  8966   // After pcmpestri tmp(rcx) contains matched element index
  8968   // Make sure string is still long enough
  8969   subl(cnt1, tmp);
  8970   cmpl(cnt1, cnt2);
  8971   jccb(Assembler::greaterEqual, FOUND_SUBSTR);
  8972   // Left less then substring.
  8974   bind(RET_NOT_FOUND);
  8975   movl(result, -1);
  8976   jmpb(CLEANUP);
  8978   bind(FOUND_SUBSTR);
  8979   // Compute start addr of substr
  8980   lea(result, Address(result, tmp, Address::times_2));
  8982   if (int_cnt2 > 0) { // Constant substring
  8983     // Repeat search for small substring (< 8 chars)
  8984     // from new point without reloading substring.
  8985     // Have to check that we don't read beyond string.
  8986     cmpl(tmp, 8-int_cnt2);
  8987     jccb(Assembler::greater, ADJUST_STR);
  8988     // Fall through if matched whole substring.
  8989   } else { // non constant
  8990     assert(int_cnt2 == -1, "should be != 0");
  8992     addl(tmp, cnt2);
  8993     // Found result if we matched whole substring.
  8994     cmpl(tmp, 8);
  8995     jccb(Assembler::lessEqual, RET_FOUND);
  8997     // Repeat search for small substring (<= 8 chars)
  8998     // from new point 'str1' without reloading substring.
  8999     cmpl(cnt2, 8);
  9000     // Have to check that we don't read beyond string.
  9001     jccb(Assembler::lessEqual, ADJUST_STR);
  9003     Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
  9004     // Compare the rest of substring (> 8 chars).
  9005     movptr(str1, result);
  9007     cmpl(tmp, cnt2);
  9008     // First 8 chars are already matched.
  9009     jccb(Assembler::equal, CHECK_NEXT);
  9011     bind(SCAN_SUBSTR);
  9012     pcmpestri(vec, Address(str1, 0), 0x0d);
  9013     // Need to reload strings pointers if not matched whole vector
  9014     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  9016     bind(CHECK_NEXT);
  9017     subl(cnt2, 8);
  9018     jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
  9019     addptr(str1, 16);
  9020     addptr(str2, 16);
  9021     subl(cnt1, 8);
  9022     cmpl(cnt2, 8); // Do not read beyond substring
  9023     jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
  9024     // Back-up strings to avoid reading beyond substring.
  9025     lea(str2, Address(str2, cnt2, Address::times_2, -16));
  9026     lea(str1, Address(str1, cnt2, Address::times_2, -16));
  9027     subl(cnt1, cnt2);
  9028     movl(cnt2, 8);
  9029     addl(cnt1, 8);
  9030     bind(CONT_SCAN_SUBSTR);
  9031     movdqu(vec, Address(str2, 0));
  9032     jmpb(SCAN_SUBSTR);
  9034     bind(RET_FOUND_LONG);
  9035     movptr(str1, Address(rsp, wordSize));
  9036   } // non constant
  9038   bind(RET_FOUND);
  9039   // Compute substr offset
  9040   subptr(result, str1);
  9041   shrl(result, 1); // index
  9043   bind(CLEANUP);
  9044   pop(rsp); // restore SP
  9046 } // string_indexof
  9048 // Compare strings.
  9049 void MacroAssembler::string_compare(Register str1, Register str2,
  9050                                     Register cnt1, Register cnt2, Register result,
  9051                                     XMMRegister vec1) {
  9052   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
  9054   // Compute the minimum of the string lengths and the
  9055   // difference of the string lengths (stack).
  9056   // Do the conditional move stuff
  9057   movl(result, cnt1);
  9058   subl(cnt1, cnt2);
  9059   push(cnt1);
  9060   if (VM_Version::supports_cmov()) {
  9061     cmovl(Assembler::lessEqual, cnt2, result);
  9062   } else {
  9063     Label GT_LABEL;
  9064     jccb(Assembler::greater, GT_LABEL);
  9065     movl(cnt2, result);
  9066     bind(GT_LABEL);
  9069   // Is the minimum length zero?
  9070   testl(cnt2, cnt2);
  9071   jcc(Assembler::zero, LENGTH_DIFF_LABEL);
  9073   // Load first characters
  9074   load_unsigned_short(result, Address(str1, 0));
  9075   load_unsigned_short(cnt1, Address(str2, 0));
  9077   // Compare first characters
  9078   subl(result, cnt1);
  9079   jcc(Assembler::notZero,  POP_LABEL);
  9080   decrementl(cnt2);
  9081   jcc(Assembler::zero, LENGTH_DIFF_LABEL);
  9084     // Check after comparing first character to see if strings are equivalent
  9085     Label LSkip2;
  9086     // Check if the strings start at same location
  9087     cmpptr(str1, str2);
  9088     jccb(Assembler::notEqual, LSkip2);
  9090     // Check if the length difference is zero (from stack)
  9091     cmpl(Address(rsp, 0), 0x0);
  9092     jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
  9094     // Strings might not be equivalent
  9095     bind(LSkip2);
  9098   Address::ScaleFactor scale = Address::times_2;
  9099   int stride = 8;
  9101   // Advance to next element
  9102   addptr(str1, 16/stride);
  9103   addptr(str2, 16/stride);
  9105   if (UseSSE42Intrinsics) {
  9106     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
  9107     int pcmpmask = 0x19;
  9108     // Setup to compare 16-byte vectors
  9109     movl(result, cnt2);
  9110     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
  9111     jccb(Assembler::zero, COMPARE_TAIL);
  9113     lea(str1, Address(str1, result, scale));
  9114     lea(str2, Address(str2, result, scale));
  9115     negptr(result);
  9117     // pcmpestri
  9118     //   inputs:
  9119     //     vec1- substring
  9120     //     rax - negative string length (elements count)
  9121     //     mem - scaned string
  9122     //     rdx - string length (elements count)
  9123     //     pcmpmask - cmp mode: 11000 (string compare with negated result)
  9124     //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)
  9125     //   outputs:
  9126     //     rcx - first mismatched element index
  9127     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
  9129     bind(COMPARE_WIDE_VECTORS);
  9130     movdqu(vec1, Address(str1, result, scale));
  9131     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
  9132     // After pcmpestri cnt1(rcx) contains mismatched element index
  9134     jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
  9135     addptr(result, stride);
  9136     subptr(cnt2, stride);
  9137     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
  9139     // compare wide vectors tail
  9140     testl(result, result);
  9141     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
  9143     movl(cnt2, stride);
  9144     movl(result, stride);
  9145     negptr(result);
  9146     movdqu(vec1, Address(str1, result, scale));
  9147     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
  9148     jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
  9150     // Mismatched characters in the vectors
  9151     bind(VECTOR_NOT_EQUAL);
  9152     addptr(result, cnt1);
  9153     movptr(cnt2, result);
  9154     load_unsigned_short(result, Address(str1, cnt2, scale));
  9155     load_unsigned_short(cnt1, Address(str2, cnt2, scale));
  9156     subl(result, cnt1);
  9157     jmpb(POP_LABEL);
  9159     bind(COMPARE_TAIL); // limit is zero
  9160     movl(cnt2, result);
  9161     // Fallthru to tail compare
  9164   // Shift str2 and str1 to the end of the arrays, negate min
  9165   lea(str1, Address(str1, cnt2, scale, 0));
  9166   lea(str2, Address(str2, cnt2, scale, 0));
  9167   negptr(cnt2);
  9169   // Compare the rest of the elements
  9170   bind(WHILE_HEAD_LABEL);
  9171   load_unsigned_short(result, Address(str1, cnt2, scale, 0));
  9172   load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
  9173   subl(result, cnt1);
  9174   jccb(Assembler::notZero, POP_LABEL);
  9175   increment(cnt2);
  9176   jccb(Assembler::notZero, WHILE_HEAD_LABEL);
  9178   // Strings are equal up to min length.  Return the length difference.
  9179   bind(LENGTH_DIFF_LABEL);
  9180   pop(result);
  9181   jmpb(DONE_LABEL);
  9183   // Discard the stored length difference
  9184   bind(POP_LABEL);
  9185   pop(cnt1);
  9187   // That's it
  9188   bind(DONE_LABEL);
  9191 // Compare char[] arrays aligned to 4 bytes or substrings.
  9192 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
  9193                                         Register limit, Register result, Register chr,
  9194                                         XMMRegister vec1, XMMRegister vec2) {
  9195   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
  9197   int length_offset  = arrayOopDesc::length_offset_in_bytes();
  9198   int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
  9200   // Check the input args
  9201   cmpptr(ary1, ary2);
  9202   jcc(Assembler::equal, TRUE_LABEL);
  9204   if (is_array_equ) {
  9205     // Need additional checks for arrays_equals.
  9206     testptr(ary1, ary1);
  9207     jcc(Assembler::zero, FALSE_LABEL);
  9208     testptr(ary2, ary2);
  9209     jcc(Assembler::zero, FALSE_LABEL);
  9211     // Check the lengths
  9212     movl(limit, Address(ary1, length_offset));
  9213     cmpl(limit, Address(ary2, length_offset));
  9214     jcc(Assembler::notEqual, FALSE_LABEL);
  9217   // count == 0
  9218   testl(limit, limit);
  9219   jcc(Assembler::zero, TRUE_LABEL);
  9221   if (is_array_equ) {
  9222     // Load array address
  9223     lea(ary1, Address(ary1, base_offset));
  9224     lea(ary2, Address(ary2, base_offset));
  9227   shll(limit, 1);      // byte count != 0
  9228   movl(result, limit); // copy
  9230   if (UseSSE42Intrinsics) {
  9231     // With SSE4.2, use double quad vector compare
  9232     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
  9234     // Compare 16-byte vectors
  9235     andl(result, 0x0000000e);  //   tail count (in bytes)
  9236     andl(limit, 0xfffffff0);   // vector count (in bytes)
  9237     jccb(Assembler::zero, COMPARE_TAIL);
  9239     lea(ary1, Address(ary1, limit, Address::times_1));
  9240     lea(ary2, Address(ary2, limit, Address::times_1));
  9241     negptr(limit);
  9243     bind(COMPARE_WIDE_VECTORS);
  9244     movdqu(vec1, Address(ary1, limit, Address::times_1));
  9245     movdqu(vec2, Address(ary2, limit, Address::times_1));
  9246     pxor(vec1, vec2);
  9248     ptest(vec1, vec1);
  9249     jccb(Assembler::notZero, FALSE_LABEL);
  9250     addptr(limit, 16);
  9251     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
  9253     testl(result, result);
  9254     jccb(Assembler::zero, TRUE_LABEL);
  9256     movdqu(vec1, Address(ary1, result, Address::times_1, -16));
  9257     movdqu(vec2, Address(ary2, result, Address::times_1, -16));
  9258     pxor(vec1, vec2);
  9260     ptest(vec1, vec1);
  9261     jccb(Assembler::notZero, FALSE_LABEL);
  9262     jmpb(TRUE_LABEL);
  9264     bind(COMPARE_TAIL); // limit is zero
  9265     movl(limit, result);
  9266     // Fallthru to tail compare
  9269   // Compare 4-byte vectors
  9270   andl(limit, 0xfffffffc); // vector count (in bytes)
  9271   jccb(Assembler::zero, COMPARE_CHAR);
  9273   lea(ary1, Address(ary1, limit, Address::times_1));
  9274   lea(ary2, Address(ary2, limit, Address::times_1));
  9275   negptr(limit);
  9277   bind(COMPARE_VECTORS);
  9278   movl(chr, Address(ary1, limit, Address::times_1));
  9279   cmpl(chr, Address(ary2, limit, Address::times_1));
  9280   jccb(Assembler::notEqual, FALSE_LABEL);
  9281   addptr(limit, 4);
  9282   jcc(Assembler::notZero, COMPARE_VECTORS);
  9284   // Compare trailing char (final 2 bytes), if any
  9285   bind(COMPARE_CHAR);
  9286   testl(result, 0x2);   // tail  char
  9287   jccb(Assembler::zero, TRUE_LABEL);
  9288   load_unsigned_short(chr, Address(ary1, 0));
  9289   load_unsigned_short(limit, Address(ary2, 0));
  9290   cmpl(chr, limit);
  9291   jccb(Assembler::notEqual, FALSE_LABEL);
  9293   bind(TRUE_LABEL);
  9294   movl(result, 1);   // return true
  9295   jmpb(DONE);
  9297   bind(FALSE_LABEL);
  9298   xorl(result, result); // return false
  9300   // That's it
  9301   bind(DONE);
  9304 #ifdef PRODUCT
  9305 #define BLOCK_COMMENT(str) /* nothing */
  9306 #else
  9307 #define BLOCK_COMMENT(str) block_comment(str)
  9308 #endif
  9310 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  9311 void MacroAssembler::generate_fill(BasicType t, bool aligned,
  9312                                    Register to, Register value, Register count,
  9313                                    Register rtmp, XMMRegister xtmp) {
  9314   assert_different_registers(to, value, count, rtmp);
  9315   Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
  9316   Label L_fill_2_bytes, L_fill_4_bytes;
  9318   int shift = -1;
  9319   switch (t) {
  9320     case T_BYTE:
  9321       shift = 2;
  9322       break;
  9323     case T_SHORT:
  9324       shift = 1;
  9325       break;
  9326     case T_INT:
  9327       shift = 0;
  9328       break;
  9329     default: ShouldNotReachHere();
  9332   if (t == T_BYTE) {
  9333     andl(value, 0xff);
  9334     movl(rtmp, value);
  9335     shll(rtmp, 8);
  9336     orl(value, rtmp);
  9338   if (t == T_SHORT) {
  9339     andl(value, 0xffff);
  9341   if (t == T_BYTE || t == T_SHORT) {
  9342     movl(rtmp, value);
  9343     shll(rtmp, 16);
  9344     orl(value, rtmp);
  9347   cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
  9348   jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
  9349   if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
  9350     // align source address at 4 bytes address boundary
  9351     if (t == T_BYTE) {
  9352       // One byte misalignment happens only for byte arrays
  9353       testptr(to, 1);
  9354       jccb(Assembler::zero, L_skip_align1);
  9355       movb(Address(to, 0), value);
  9356       increment(to);
  9357       decrement(count);
  9358       BIND(L_skip_align1);
  9360     // Two bytes misalignment happens only for byte and short (char) arrays
  9361     testptr(to, 2);
  9362     jccb(Assembler::zero, L_skip_align2);
  9363     movw(Address(to, 0), value);
  9364     addptr(to, 2);
  9365     subl(count, 1<<(shift-1));
  9366     BIND(L_skip_align2);
  9368   if (UseSSE < 2) {
  9369     Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
  9370     // Fill 32-byte chunks
  9371     subl(count, 8 << shift);
  9372     jcc(Assembler::less, L_check_fill_8_bytes);
  9373     align(16);
  9375     BIND(L_fill_32_bytes_loop);
  9377     for (int i = 0; i < 32; i += 4) {
  9378       movl(Address(to, i), value);
  9381     addptr(to, 32);
  9382     subl(count, 8 << shift);
  9383     jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
  9384     BIND(L_check_fill_8_bytes);
  9385     addl(count, 8 << shift);
  9386     jccb(Assembler::zero, L_exit);
  9387     jmpb(L_fill_8_bytes);
  9389     //
  9390     // length is too short, just fill qwords
  9391     //
  9392     BIND(L_fill_8_bytes_loop);
  9393     movl(Address(to, 0), value);
  9394     movl(Address(to, 4), value);
  9395     addptr(to, 8);
  9396     BIND(L_fill_8_bytes);
  9397     subl(count, 1 << (shift + 1));
  9398     jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
  9399     // fall through to fill 4 bytes
  9400   } else {
  9401     Label L_fill_32_bytes;
  9402     if (!UseUnalignedLoadStores) {
  9403       // align to 8 bytes, we know we are 4 byte aligned to start
  9404       testptr(to, 4);
  9405       jccb(Assembler::zero, L_fill_32_bytes);
  9406       movl(Address(to, 0), value);
  9407       addptr(to, 4);
  9408       subl(count, 1<<shift);
  9410     BIND(L_fill_32_bytes);
  9412       assert( UseSSE >= 2, "supported cpu only" );
  9413       Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
  9414       // Fill 32-byte chunks
  9415       movdl(xtmp, value);
  9416       pshufd(xtmp, xtmp, 0);
  9418       subl(count, 8 << shift);
  9419       jcc(Assembler::less, L_check_fill_8_bytes);
  9420       align(16);
  9422       BIND(L_fill_32_bytes_loop);
  9424       if (UseUnalignedLoadStores) {
  9425         movdqu(Address(to, 0), xtmp);
  9426         movdqu(Address(to, 16), xtmp);
  9427       } else {
  9428         movq(Address(to, 0), xtmp);
  9429         movq(Address(to, 8), xtmp);
  9430         movq(Address(to, 16), xtmp);
  9431         movq(Address(to, 24), xtmp);
  9434       addptr(to, 32);
  9435       subl(count, 8 << shift);
  9436       jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
  9437       BIND(L_check_fill_8_bytes);
  9438       addl(count, 8 << shift);
  9439       jccb(Assembler::zero, L_exit);
  9440       jmpb(L_fill_8_bytes);
  9442       //
  9443       // length is too short, just fill qwords
  9444       //
  9445       BIND(L_fill_8_bytes_loop);
  9446       movq(Address(to, 0), xtmp);
  9447       addptr(to, 8);
  9448       BIND(L_fill_8_bytes);
  9449       subl(count, 1 << (shift + 1));
  9450       jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
  9453   // fill trailing 4 bytes
  9454   BIND(L_fill_4_bytes);
  9455   testl(count, 1<<shift);
  9456   jccb(Assembler::zero, L_fill_2_bytes);
  9457   movl(Address(to, 0), value);
  9458   if (t == T_BYTE || t == T_SHORT) {
  9459     addptr(to, 4);
  9460     BIND(L_fill_2_bytes);
  9461     // fill trailing 2 bytes
  9462     testl(count, 1<<(shift-1));
  9463     jccb(Assembler::zero, L_fill_byte);
  9464     movw(Address(to, 0), value);
  9465     if (t == T_BYTE) {
  9466       addptr(to, 2);
  9467       BIND(L_fill_byte);
  9468       // fill trailing byte
  9469       testl(count, 1);
  9470       jccb(Assembler::zero, L_exit);
  9471       movb(Address(to, 0), value);
  9472     } else {
  9473       BIND(L_fill_byte);
  9475   } else {
  9476     BIND(L_fill_2_bytes);
  9478   BIND(L_exit);
  9480 #undef BIND
  9481 #undef BLOCK_COMMENT
  9484 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
  9485   switch (cond) {
  9486     // Note some conditions are synonyms for others
  9487     case Assembler::zero:         return Assembler::notZero;
  9488     case Assembler::notZero:      return Assembler::zero;
  9489     case Assembler::less:         return Assembler::greaterEqual;
  9490     case Assembler::lessEqual:    return Assembler::greater;
  9491     case Assembler::greater:      return Assembler::lessEqual;
  9492     case Assembler::greaterEqual: return Assembler::less;
  9493     case Assembler::below:        return Assembler::aboveEqual;
  9494     case Assembler::belowEqual:   return Assembler::above;
  9495     case Assembler::above:        return Assembler::belowEqual;
  9496     case Assembler::aboveEqual:   return Assembler::below;
  9497     case Assembler::overflow:     return Assembler::noOverflow;
  9498     case Assembler::noOverflow:   return Assembler::overflow;
  9499     case Assembler::negative:     return Assembler::positive;
  9500     case Assembler::positive:     return Assembler::negative;
  9501     case Assembler::parity:       return Assembler::noParity;
  9502     case Assembler::noParity:     return Assembler::parity;
  9504   ShouldNotReachHere(); return Assembler::overflow;
  9507 SkipIfEqual::SkipIfEqual(
  9508     MacroAssembler* masm, const bool* flag_addr, bool value) {
  9509   _masm = masm;
  9510   _masm->cmp8(ExternalAddress((address)flag_addr), value);
  9511   _masm->jcc(Assembler::equal, _label);
  9514 SkipIfEqual::~SkipIfEqual() {
  9515   _masm->bind(_label);

mercurial