src/cpu/x86/vm/assembler_x86_64.cpp

Wed, 19 Mar 2008 15:33:25 -0700

author
kvn
date
Wed, 19 Mar 2008 15:33:25 -0700
changeset 506
3d62cb85208d
parent 435
a61af66fc99e
child 511
d6fe2e4959d6
permissions
-rw-r--r--

6662967: Optimize I2D conversion on new x86
Summary: Use CVTDQ2PS and CVTDQ2PD for integer values conversions to float and double values on new AMD cpu.
Reviewed-by: sgoldman, never

     1 /*
     2  * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_assembler_x86_64.cpp.incl"
    28 // Implementation of AddressLiteral
    30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
    31   _is_lval = false;
    32   _target = target;
    33   switch (rtype) {
    34   case relocInfo::oop_type:
    35     // Oops are a special case. Normally they would be their own section
    36     // but in cases like icBuffer they are literals in the code stream that
    37     // we don't have a section for. We use none so that we get a literal address
    38     // which is always patchable.
    39     break;
    40   case relocInfo::external_word_type:
    41     _rspec = external_word_Relocation::spec(target);
    42     break;
    43   case relocInfo::internal_word_type:
    44     _rspec = internal_word_Relocation::spec(target);
    45     break;
    46   case relocInfo::opt_virtual_call_type:
    47     _rspec = opt_virtual_call_Relocation::spec();
    48     break;
    49   case relocInfo::static_call_type:
    50     _rspec = static_call_Relocation::spec();
    51     break;
    52   case relocInfo::runtime_call_type:
    53     _rspec = runtime_call_Relocation::spec();
    54     break;
    55   case relocInfo::none:
    56     break;
    57   default:
    58     ShouldNotReachHere();
    59     break;
    60   }
    61 }
    63 // Implementation of Address
    65 Address Address::make_array(ArrayAddress adr) {
    66 #ifdef _LP64
    67   // Not implementable on 64bit machines
    68   // Should have been handled higher up the call chain.
    69   ShouldNotReachHere();
    70   return Address();
    71 #else
    72   AddressLiteral base = adr.base();
    73   Address index = adr.index();
    74   assert(index._disp == 0, "must not have disp"); // maybe it can?
    75   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
    76   array._rspec = base._rspec;
    77   return array;
    78 #endif // _LP64
    79 }
    81 // exceedingly dangerous constructor
    82 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
    83   _base  = noreg;
    84   _index = noreg;
    85   _scale = no_scale;
    86   _disp  = disp;
    87   switch (rtype) {
    88     case relocInfo::external_word_type:
    89       _rspec = external_word_Relocation::spec(loc);
    90       break;
    91     case relocInfo::internal_word_type:
    92       _rspec = internal_word_Relocation::spec(loc);
    93       break;
    94     case relocInfo::runtime_call_type:
    95       // HMM
    96       _rspec = runtime_call_Relocation::spec();
    97       break;
    98     case relocInfo::none:
    99       break;
   100     default:
   101       ShouldNotReachHere();
   102   }
   103 }
   105 // Convert the raw encoding form into the form expected by the constructor for
   106 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
   107 // that to noreg for the Address constructor.
   108 Address Address::make_raw(int base, int index, int scale, int disp) {
   109   bool valid_index = index != rsp->encoding();
   110   if (valid_index) {
   111     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
   112     return madr;
   113   } else {
   114     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
   115     return madr;
   116   }
   117 }
   120 // Implementation of Assembler
   121 int AbstractAssembler::code_fill_byte() {
   122   return (u_char)'\xF4'; // hlt
   123 }
   125 // This should only be used by 64bit instructions that can use rip-relative
   126 // it cannot be used by instructions that want an immediate value.
   128 bool Assembler::reachable(AddressLiteral adr) {
   129   int64_t disp;
   130   // None will force a 64bit literal to the code stream. Likely a placeholder
   131   // for something that will be patched later and we need to certain it will
   132   // always be reachable.
   133   if (adr.reloc() == relocInfo::none) {
   134     return false;
   135   }
   136   if (adr.reloc() == relocInfo::internal_word_type) {
   137     // This should be rip relative and easily reachable.
   138     return true;
   139   }
   140   if (adr.reloc() != relocInfo::external_word_type &&
   141       adr.reloc() != relocInfo::runtime_call_type ) {
   142     return false;
   143   }
   145   // Stress the correction code
   146   if (ForceUnreachable) {
   147     // Must be runtimecall reloc, see if it is in the codecache
   148     // Flipping stuff in the codecache to be unreachable causes issues
   149     // with things like inline caches where the additional instructions
   150     // are not handled.
   151     if (CodeCache::find_blob(adr._target) == NULL) {
   152       return false;
   153     }
   154   }
   155   // For external_word_type/runtime_call_type if it is reachable from where we
   156   // are now (possibly a temp buffer) and where we might end up
   157   // anywhere in the codeCache then we are always reachable.
   158   // This would have to change if we ever save/restore shared code
   159   // to be more pessimistic.
   161   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
   162   if (!is_simm32(disp)) return false;
   163   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
   164   if (!is_simm32(disp)) return false;
   166   disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
   168   // Because rip relative is a disp + address_of_next_instruction and we
   169   // don't know the value of address_of_next_instruction we apply a fudge factor
   170   // to make sure we will be ok no matter the size of the instruction we get placed into.
   171   // We don't have to fudge the checks above here because they are already worst case.
   173   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
   174   // + 4 because better safe than sorry.
   175   const int fudge = 12 + 4;
   176   if (disp < 0) {
   177     disp -= fudge;
   178   } else {
   179     disp += fudge;
   180   }
   181   return is_simm32(disp);
   182 }
   185 // make this go away eventually
   186 void Assembler::emit_data(jint data,
   187                           relocInfo::relocType rtype,
   188                           int format) {
   189   if (rtype == relocInfo::none) {
   190     emit_long(data);
   191   } else {
   192     emit_data(data, Relocation::spec_simple(rtype), format);
   193   }
   194 }
   196 void Assembler::emit_data(jint data,
   197                           RelocationHolder const& rspec,
   198                           int format) {
   199   assert(imm64_operand == 0, "default format must be imm64 in this file");
   200   assert(imm64_operand != format, "must not be imm64");
   201   assert(inst_mark() != NULL, "must be inside InstructionMark");
   202   if (rspec.type() !=  relocInfo::none) {
   203     #ifdef ASSERT
   204       check_relocation(rspec, format);
   205     #endif
   206     // Do not use AbstractAssembler::relocate, which is not intended for
   207     // embedded words.  Instead, relocate to the enclosing instruction.
   209     // hack. call32 is too wide for mask so use disp32
   210     if (format == call32_operand)
   211       code_section()->relocate(inst_mark(), rspec, disp32_operand);
   212     else
   213       code_section()->relocate(inst_mark(), rspec, format);
   214   }
   215   emit_long(data);
   216 }
   218 void Assembler::emit_data64(jlong data,
   219                             relocInfo::relocType rtype,
   220                             int format) {
   221   if (rtype == relocInfo::none) {
   222     emit_long64(data);
   223   } else {
   224     emit_data64(data, Relocation::spec_simple(rtype), format);
   225   }
   226 }
   228 void Assembler::emit_data64(jlong data,
   229                             RelocationHolder const& rspec,
   230                             int format) {
   231   assert(imm64_operand == 0, "default format must be imm64 in this file");
   232   assert(imm64_operand == format, "must be imm64");
   233   assert(inst_mark() != NULL, "must be inside InstructionMark");
   234   // Do not use AbstractAssembler::relocate, which is not intended for
   235   // embedded words.  Instead, relocate to the enclosing instruction.
   236   code_section()->relocate(inst_mark(), rspec, format);
   237 #ifdef ASSERT
   238   check_relocation(rspec, format);
   239 #endif
   240   emit_long64(data);
   241 }
   243 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
   244   assert(isByte(op1) && isByte(op2), "wrong opcode");
   245   assert(isByte(imm8), "not a byte");
   246   assert((op1 & 0x01) == 0, "should be 8bit operation");
   247   int dstenc = dst->encoding();
   248   if (dstenc >= 8) {
   249     dstenc -= 8;
   250   }
   251   emit_byte(op1);
   252   emit_byte(op2 | dstenc);
   253   emit_byte(imm8);
   254 }
   256 void Assembler::emit_arith(int op1, int op2, Register dst, int imm32) {
   257   assert(isByte(op1) && isByte(op2), "wrong opcode");
   258   assert((op1 & 0x01) == 1, "should be 32bit operation");
   259   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   260   int dstenc = dst->encoding();
   261   if (dstenc >= 8) {
   262     dstenc -= 8;
   263   }
   264   if (is8bit(imm32)) {
   265     emit_byte(op1 | 0x02); // set sign bit
   266     emit_byte(op2 | dstenc);
   267     emit_byte(imm32 & 0xFF);
   268   } else {
   269     emit_byte(op1);
   270     emit_byte(op2 | dstenc);
   271     emit_long(imm32);
   272   }
   273 }
   275 // immediate-to-memory forms
   276 void Assembler::emit_arith_operand(int op1,
   277                                    Register rm, Address adr,
   278                                    int imm32) {
   279   assert((op1 & 0x01) == 1, "should be 32bit operation");
   280   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   281   if (is8bit(imm32)) {
   282     emit_byte(op1 | 0x02); // set sign bit
   283     emit_operand(rm, adr, 1);
   284     emit_byte(imm32 & 0xFF);
   285   } else {
   286     emit_byte(op1);
   287     emit_operand(rm, adr, 4);
   288     emit_long(imm32);
   289   }
   290 }
   293 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
   294   assert(isByte(op1) && isByte(op2), "wrong opcode");
   295   int dstenc = dst->encoding();
   296   int srcenc = src->encoding();
   297   if (dstenc >= 8) {
   298     dstenc -= 8;
   299   }
   300   if (srcenc >= 8) {
   301     srcenc -= 8;
   302   }
   303   emit_byte(op1);
   304   emit_byte(op2 | dstenc << 3 | srcenc);
   305 }
   307 void Assembler::emit_operand(Register reg, Register base, Register index,
   308                              Address::ScaleFactor scale, int disp,
   309                              RelocationHolder const& rspec,
   310                              int rip_relative_correction) {
   311   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
   312   int regenc = reg->encoding();
   313   if (regenc >= 8) {
   314     regenc -= 8;
   315   }
   316   if (base->is_valid()) {
   317     if (index->is_valid()) {
   318       assert(scale != Address::no_scale, "inconsistent address");
   319       int indexenc = index->encoding();
   320       if (indexenc >= 8) {
   321         indexenc -= 8;
   322       }
   323       int baseenc = base->encoding();
   324       if (baseenc >= 8) {
   325         baseenc -= 8;
   326       }
   327       // [base + index*scale + disp]
   328       if (disp == 0 && rtype == relocInfo::none  &&
   329           base != rbp && base != r13) {
   330         // [base + index*scale]
   331         // [00 reg 100][ss index base]
   332         assert(index != rsp, "illegal addressing mode");
   333         emit_byte(0x04 | regenc << 3);
   334         emit_byte(scale << 6 | indexenc << 3 | baseenc);
   335       } else if (is8bit(disp) && rtype == relocInfo::none) {
   336         // [base + index*scale + imm8]
   337         // [01 reg 100][ss index base] imm8
   338         assert(index != rsp, "illegal addressing mode");
   339         emit_byte(0x44 | regenc << 3);
   340         emit_byte(scale << 6 | indexenc << 3 | baseenc);
   341         emit_byte(disp & 0xFF);
   342       } else {
   343         // [base + index*scale + disp32]
   344         // [10 reg 100][ss index base] disp32
   345         assert(index != rsp, "illegal addressing mode");
   346         emit_byte(0x84 | regenc << 3);
   347         emit_byte(scale << 6 | indexenc << 3 | baseenc);
   348         emit_data(disp, rspec, disp32_operand);
   349       }
   350     } else if (base == rsp || base == r12) {
   351       // [rsp + disp]
   352       if (disp == 0 && rtype == relocInfo::none) {
   353         // [rsp]
   354         // [00 reg 100][00 100 100]
   355         emit_byte(0x04 | regenc << 3);
   356         emit_byte(0x24);
   357       } else if (is8bit(disp) && rtype == relocInfo::none) {
   358         // [rsp + imm8]
   359         // [01 reg 100][00 100 100] disp8
   360         emit_byte(0x44 | regenc << 3);
   361         emit_byte(0x24);
   362         emit_byte(disp & 0xFF);
   363       } else {
   364         // [rsp + imm32]
   365         // [10 reg 100][00 100 100] disp32
   366         emit_byte(0x84 | regenc << 3);
   367         emit_byte(0x24);
   368         emit_data(disp, rspec, disp32_operand);
   369       }
   370     } else {
   371       // [base + disp]
   372       assert(base != rsp && base != r12, "illegal addressing mode");
   373       int baseenc = base->encoding();
   374       if (baseenc >= 8) {
   375         baseenc -= 8;
   376       }
   377       if (disp == 0 && rtype == relocInfo::none &&
   378           base != rbp && base != r13) {
   379         // [base]
   380         // [00 reg base]
   381         emit_byte(0x00 | regenc << 3 | baseenc);
   382       } else if (is8bit(disp) && rtype == relocInfo::none) {
   383         // [base + disp8]
   384         // [01 reg base] disp8
   385         emit_byte(0x40 | regenc << 3 | baseenc);
   386         emit_byte(disp & 0xFF);
   387       } else {
   388         // [base + disp32]
   389         // [10 reg base] disp32
   390         emit_byte(0x80 | regenc << 3 | baseenc);
   391         emit_data(disp, rspec, disp32_operand);
   392       }
   393     }
   394   } else {
   395     if (index->is_valid()) {
   396       assert(scale != Address::no_scale, "inconsistent address");
   397       int indexenc = index->encoding();
   398       if (indexenc >= 8) {
   399         indexenc -= 8;
   400       }
   401       // [index*scale + disp]
   402       // [00 reg 100][ss index 101] disp32
   403       assert(index != rsp, "illegal addressing mode");
   404       emit_byte(0x04 | regenc << 3);
   405       emit_byte(scale << 6 | indexenc << 3 | 0x05);
   406       emit_data(disp, rspec, disp32_operand);
   407 #ifdef _LP64
   408     } else if (rtype != relocInfo::none ) {
   409       // [disp] RIP-RELATIVE
   410       // [00 000 101] disp32
   412       emit_byte(0x05 | regenc << 3);
   413       // Note that the RIP-rel. correction applies to the generated
   414       // disp field, but _not_ to the target address in the rspec.
   416       // disp was created by converting the target address minus the pc
   417       // at the start of the instruction. That needs more correction here.
   418       // intptr_t disp = target - next_ip;
   419       assert(inst_mark() != NULL, "must be inside InstructionMark");
   420       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
   421       int64_t adjusted = (int64_t) disp -  (next_ip - inst_mark());
   422       assert(is_simm32(adjusted),
   423              "must be 32bit offset (RIP relative address)");
   424       emit_data((int) adjusted, rspec, disp32_operand);
   426 #endif // _LP64
   427     } else {
   428       // [disp] ABSOLUTE
   429       // [00 reg 100][00 100 101] disp32
   430       emit_byte(0x04 | regenc << 3);
   431       emit_byte(0x25);
   432       emit_data(disp, rspec, disp32_operand);
   433     }
   434   }
   435 }
   437 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
   438                              Address::ScaleFactor scale, int disp,
   439                              RelocationHolder const& rspec,
   440                              int rip_relative_correction) {
   441   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
   442   int regenc = reg->encoding();
   443   if (regenc >= 8) {
   444     regenc -= 8;
   445   }
   446   if (base->is_valid()) {
   447     if (index->is_valid()) {
   448       assert(scale != Address::no_scale, "inconsistent address");
   449       int indexenc = index->encoding();
   450       if (indexenc >= 8) {
   451         indexenc -= 8;
   452       }
   453       int baseenc = base->encoding();
   454       if (baseenc >= 8) {
   455         baseenc -= 8;
   456       }
   457       // [base + index*scale + disp]
   458       if (disp == 0 && rtype == relocInfo::none  &&
   459           base != rbp && base != r13) {
   460         // [base + index*scale]
   461         // [00 reg 100][ss index base]
   462         assert(index != rsp, "illegal addressing mode");
   463         emit_byte(0x04 | regenc << 3);
   464         emit_byte(scale << 6 | indexenc << 3 | baseenc);
   465       } else if (is8bit(disp) && rtype == relocInfo::none) {
   466         // [base + index*scale + disp8]
   467         // [01 reg 100][ss index base] disp8
   468         assert(index != rsp, "illegal addressing mode");
   469         emit_byte(0x44 | regenc << 3);
   470         emit_byte(scale << 6 | indexenc << 3 | baseenc);
   471         emit_byte(disp & 0xFF);
   472       } else {
   473         // [base + index*scale + disp32]
   474         // [10 reg 100][ss index base] disp32
   475         assert(index != rsp, "illegal addressing mode");
   476         emit_byte(0x84 | regenc << 3);
   477         emit_byte(scale << 6 | indexenc << 3 | baseenc);
   478         emit_data(disp, rspec, disp32_operand);
   479       }
   480     } else if (base == rsp || base == r12) {
   481       // [rsp + disp]
   482       if (disp == 0 && rtype == relocInfo::none) {
   483         // [rsp]
   484         // [00 reg 100][00 100 100]
   485         emit_byte(0x04 | regenc << 3);
   486         emit_byte(0x24);
   487       } else if (is8bit(disp) && rtype == relocInfo::none) {
   488         // [rsp + imm8]
   489         // [01 reg 100][00 100 100] disp8
   490         emit_byte(0x44 | regenc << 3);
   491         emit_byte(0x24);
   492         emit_byte(disp & 0xFF);
   493       } else {
   494         // [rsp + imm32]
   495         // [10 reg 100][00 100 100] disp32
   496         emit_byte(0x84 | regenc << 3);
   497         emit_byte(0x24);
   498         emit_data(disp, rspec, disp32_operand);
   499       }
   500     } else {
   501       // [base + disp]
   502       assert(base != rsp && base != r12, "illegal addressing mode");
   503       int baseenc = base->encoding();
   504       if (baseenc >= 8) {
   505         baseenc -= 8;
   506       }
   507       if (disp == 0 && rtype == relocInfo::none &&
   508           base != rbp && base != r13) {
   509         // [base]
   510         // [00 reg base]
   511         emit_byte(0x00 | regenc << 3 | baseenc);
   512       } else if (is8bit(disp) && rtype == relocInfo::none) {
   513         // [base + imm8]
   514         // [01 reg base] disp8
   515         emit_byte(0x40 | regenc << 3 | baseenc);
   516         emit_byte(disp & 0xFF);
   517       } else {
   518         // [base + imm32]
   519         // [10 reg base] disp32
   520         emit_byte(0x80 | regenc << 3 | baseenc);
   521         emit_data(disp, rspec, disp32_operand);
   522       }
   523     }
   524   } else {
   525     if (index->is_valid()) {
   526       assert(scale != Address::no_scale, "inconsistent address");
   527       int indexenc = index->encoding();
   528       if (indexenc >= 8) {
   529         indexenc -= 8;
   530       }
   531       // [index*scale + disp]
   532       // [00 reg 100][ss index 101] disp32
   533       assert(index != rsp, "illegal addressing mode");
   534       emit_byte(0x04 | regenc << 3);
   535       emit_byte(scale << 6 | indexenc << 3 | 0x05);
   536       emit_data(disp, rspec, disp32_operand);
   537 #ifdef _LP64
   538     } else if ( rtype != relocInfo::none ) {
   539       // [disp] RIP-RELATIVE
   540       // [00 reg 101] disp32
   541       emit_byte(0x05 | regenc << 3);
   542       // Note that the RIP-rel. correction applies to the generated
   543       // disp field, but _not_ to the target address in the rspec.
   545       // disp was created by converting the target address minus the pc
   546       // at the start of the instruction. That needs more correction here.
   547       // intptr_t disp = target - next_ip;
   549       assert(inst_mark() != NULL, "must be inside InstructionMark");
   550       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
   552       int64_t adjusted = (int64_t) disp -  (next_ip - inst_mark());
   553       assert(is_simm32(adjusted),
   554              "must be 32bit offset (RIP relative address)");
   555       emit_data((int) adjusted, rspec, disp32_operand);
   556 #endif // _LP64
   557     } else {
   558       // [disp] ABSOLUTE
   559       // [00 reg 100][00 100 101] disp32
   560       emit_byte(0x04 | regenc << 3);
   561       emit_byte(0x25);
   562       emit_data(disp, rspec, disp32_operand);
   563     }
   564   }
   565 }
   567 // Secret local extension to Assembler::WhichOperand:
   568 #define end_pc_operand (_WhichOperand_limit)
   570 address Assembler::locate_operand(address inst, WhichOperand which) {
   571   // Decode the given instruction, and return the address of
   572   // an embedded 32-bit operand word.
   574   // If "which" is disp32_operand, selects the displacement portion
   575   // of an effective address specifier.
   576   // If "which" is imm64_operand, selects the trailing immediate constant.
   577   // If "which" is call32_operand, selects the displacement of a call or jump.
   578   // Caller is responsible for ensuring that there is such an operand,
   579   // and that it is 32/64 bits wide.
   581   // If "which" is end_pc_operand, find the end of the instruction.
   583   address ip = inst;
   584   bool is_64bit = false;
   586   debug_only(bool has_disp32 = false);
   587   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
   589   again_after_prefix:
   590   switch (0xFF & *ip++) {
   592   // These convenience macros generate groups of "case" labels for the switch.
   593 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
   594 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
   595              case (x)+4: case (x)+5: case (x)+6: case (x)+7
   596 #define REP16(x) REP8((x)+0): \
   597               case REP8((x)+8)
   599   case CS_segment:
   600   case SS_segment:
   601   case DS_segment:
   602   case ES_segment:
   603   case FS_segment:
   604   case GS_segment:
   605     assert(0, "shouldn't have that prefix");
   606     assert(ip == inst + 1 || ip == inst + 2, "only two prefixes allowed");
   607     goto again_after_prefix;
   609   case 0x67:
   610   case REX:
   611   case REX_B:
   612   case REX_X:
   613   case REX_XB:
   614   case REX_R:
   615   case REX_RB:
   616   case REX_RX:
   617   case REX_RXB:
   618 //     assert(ip == inst + 1, "only one prefix allowed");
   619     goto again_after_prefix;
   621   case REX_W:
   622   case REX_WB:
   623   case REX_WX:
   624   case REX_WXB:
   625   case REX_WR:
   626   case REX_WRB:
   627   case REX_WRX:
   628   case REX_WRXB:
   629     is_64bit = true;
   630 //     assert(ip == inst + 1, "only one prefix allowed");
   631     goto again_after_prefix;
   633   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
   634   case 0x88: // movb a, r
   635   case 0x89: // movl a, r
   636   case 0x8A: // movb r, a
   637   case 0x8B: // movl r, a
   638   case 0x8F: // popl a
   639     debug_only(has_disp32 = true);
   640     break;
   642   case 0x68: // pushq #32
   643     if (which == end_pc_operand) {
   644       return ip + 4;
   645     }
   646     assert(0, "pushq has no disp32 or imm64");
   647     ShouldNotReachHere();
   649   case 0x66: // movw ... (size prefix)
   650     again_after_size_prefix2:
   651     switch (0xFF & *ip++) {
   652     case REX:
   653     case REX_B:
   654     case REX_X:
   655     case REX_XB:
   656     case REX_R:
   657     case REX_RB:
   658     case REX_RX:
   659     case REX_RXB:
   660     case REX_W:
   661     case REX_WB:
   662     case REX_WX:
   663     case REX_WXB:
   664     case REX_WR:
   665     case REX_WRB:
   666     case REX_WRX:
   667     case REX_WRXB:
   668       goto again_after_size_prefix2;
   669     case 0x8B: // movw r, a
   670     case 0x89: // movw a, r
   671       break;
   672     case 0xC7: // movw a, #16
   673       tail_size = 2;  // the imm16
   674       break;
   675     case 0x0F: // several SSE/SSE2 variants
   676       ip--;    // reparse the 0x0F
   677       goto again_after_prefix;
   678     default:
   679       ShouldNotReachHere();
   680     }
   681     break;
   683   case REP8(0xB8): // movl/q r, #32/#64(oop?)
   684     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
   685     assert((which == call32_operand || which == imm64_operand) && is_64bit, "");
   686     return ip;
   688   case 0x69: // imul r, a, #32
   689   case 0xC7: // movl a, #32(oop?)
   690     tail_size = 4;
   691     debug_only(has_disp32 = true); // has both kinds of operands!
   692     break;
   694   case 0x0F: // movx..., etc.
   695     switch (0xFF & *ip++) {
   696     case 0x12: // movlps
   697     case 0x28: // movaps
   698     case 0x2E: // ucomiss
   699     case 0x2F: // comiss
   700     case 0x54: // andps
   701     case 0x57: // xorps
   702     case 0x6E: // movd
   703     case 0x7E: // movd
   704     case 0xAE: // ldmxcsr   a
   705       debug_only(has_disp32 = true); // has both kinds of operands!
   706       break;
   707     case 0xAD: // shrd r, a, %cl
   708     case 0xAF: // imul r, a
   709     case 0xBE: // movsbl r, a
   710     case 0xBF: // movswl r, a
   711     case 0xB6: // movzbl r, a
   712     case 0xB7: // movzwl r, a
   713     case REP16(0x40): // cmovl cc, r, a
   714     case 0xB0: // cmpxchgb
   715     case 0xB1: // cmpxchg
   716     case 0xC1: // xaddl
   717     case 0xC7: // cmpxchg8
   718     case REP16(0x90): // setcc a
   719       debug_only(has_disp32 = true);
   720       // fall out of the switch to decode the address
   721       break;
   722     case 0xAC: // shrd r, a, #8
   723       debug_only(has_disp32 = true);
   724       tail_size = 1;  // the imm8
   725       break;
   726     case REP16(0x80): // jcc rdisp32
   727       if (which == end_pc_operand)  return ip + 4;
   728       assert(which == call32_operand, "jcc has no disp32 or imm64");
   729       return ip;
   730     default:
   731       ShouldNotReachHere();
   732     }
   733     break;
   735   case 0x81: // addl a, #32; addl r, #32
   736     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
   737     tail_size = 4;
   738     debug_only(has_disp32 = true); // has both kinds of operands!
   739     break;
   741   case 0x83: // addl a, #8; addl r, #8
   742     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
   743     debug_only(has_disp32 = true); // has both kinds of operands!
   744     tail_size = 1;
   745     break;
   747   case 0x9B:
   748     switch (0xFF & *ip++) {
   749     case 0xD9: // fnstcw a
   750       debug_only(has_disp32 = true);
   751       break;
   752     default:
   753       ShouldNotReachHere();
   754     }
   755     break;
   757   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
   758   case REP4(0x10): // adc...
   759   case REP4(0x20): // and...
   760   case REP4(0x30): // xor...
   761   case REP4(0x08): // or...
   762   case REP4(0x18): // sbb...
   763   case REP4(0x28): // sub...
   764   case 0xF7: // mull a
   765   case 0x87: // xchg r, a
   766     debug_only(has_disp32 = true);
   767     break;
   768   case REP4(0x38): // cmp...
   769   case 0x8D: // lea r, a
   770   case 0x85: // test r, a
   771     debug_only(has_disp32 = true); // has both kinds of operands!
   772     break;
   774   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
   775   case 0xC6: // movb a, #8
   776   case 0x80: // cmpb a, #8
   777   case 0x6B: // imul r, a, #8
   778     debug_only(has_disp32 = true); // has both kinds of operands!
   779     tail_size = 1; // the imm8
   780     break;
   782   case 0xE8: // call rdisp32
   783   case 0xE9: // jmp  rdisp32
   784     if (which == end_pc_operand)  return ip + 4;
   785     assert(which == call32_operand, "call has no disp32 or imm32");
   786     return ip;
   788   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
   789   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
   790   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
   791   case 0xDD: // fld_d a; fst_d a; fstp_d a
   792   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
   793   case 0xDF: // fild_d a; fistp_d a
   794   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
   795   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
   796   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
   797     debug_only(has_disp32 = true);
   798     break;
   800   case 0xF3:                    // For SSE
   801   case 0xF2:                    // For SSE2
   802     switch (0xFF & *ip++) {
   803     case REX:
   804     case REX_B:
   805     case REX_X:
   806     case REX_XB:
   807     case REX_R:
   808     case REX_RB:
   809     case REX_RX:
   810     case REX_RXB:
   811     case REX_W:
   812     case REX_WB:
   813     case REX_WX:
   814     case REX_WXB:
   815     case REX_WR:
   816     case REX_WRB:
   817     case REX_WRX:
   818     case REX_WRXB:
   819       ip++;
   820     default:
   821       ip++;
   822     }
   823     debug_only(has_disp32 = true); // has both kinds of operands!
   824     break;
   826   default:
   827     ShouldNotReachHere();
   829 #undef REP8
   830 #undef REP16
   831   }
   833   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
   834   assert(which != imm64_operand, "instruction is not a movq reg, imm64");
   835   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
   837   // parse the output of emit_operand
   838   int op2 = 0xFF & *ip++;
   839   int base = op2 & 0x07;
   840   int op3 = -1;
   841   const int b100 = 4;
   842   const int b101 = 5;
   843   if (base == b100 && (op2 >> 6) != 3) {
   844     op3 = 0xFF & *ip++;
   845     base = op3 & 0x07;   // refetch the base
   846   }
   847   // now ip points at the disp (if any)
   849   switch (op2 >> 6) {
   850   case 0:
   851     // [00 reg  100][ss index base]
   852     // [00 reg  100][00   100  esp]
   853     // [00 reg base]
   854     // [00 reg  100][ss index  101][disp32]
   855     // [00 reg  101]               [disp32]
   857     if (base == b101) {
   858       if (which == disp32_operand)
   859         return ip;              // caller wants the disp32
   860       ip += 4;                  // skip the disp32
   861     }
   862     break;
   864   case 1:
   865     // [01 reg  100][ss index base][disp8]
   866     // [01 reg  100][00   100  esp][disp8]
   867     // [01 reg base]               [disp8]
   868     ip += 1;                    // skip the disp8
   869     break;
   871   case 2:
   872     // [10 reg  100][ss index base][disp32]
   873     // [10 reg  100][00   100  esp][disp32]
   874     // [10 reg base]               [disp32]
   875     if (which == disp32_operand)
   876       return ip;                // caller wants the disp32
   877     ip += 4;                    // skip the disp32
   878     break;
   880   case 3:
   881     // [11 reg base]  (not a memory addressing mode)
   882     break;
   883   }
   885   if (which == end_pc_operand) {
   886     return ip + tail_size;
   887   }
   889   assert(0, "fix locate_operand");
   890   return ip;
   891 }
   893 address Assembler::locate_next_instruction(address inst) {
   894   // Secretly share code with locate_operand:
   895   return locate_operand(inst, end_pc_operand);
   896 }
   898 #ifdef ASSERT
   899 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
   900   address inst = inst_mark();
   901   assert(inst != NULL && inst < pc(),
   902          "must point to beginning of instruction");
   903   address opnd;
   905   Relocation* r = rspec.reloc();
   906   if (r->type() == relocInfo::none) {
   907     return;
   908   } else if (r->is_call() || format == call32_operand) {
   909     opnd = locate_operand(inst, call32_operand);
   910   } else if (r->is_data()) {
   911     assert(format == imm64_operand || format == disp32_operand, "format ok");
   912     opnd = locate_operand(inst, (WhichOperand) format);
   913   } else {
   914     assert(format == 0, "cannot specify a format");
   915     return;
   916   }
   917   assert(opnd == pc(), "must put operand where relocs can find it");
   918 }
   919 #endif
   921 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
   922   if (reg_enc >= 8) {
   923     prefix(REX_B);
   924     reg_enc -= 8;
   925   } else if (byteinst && reg_enc >= 4) {
   926     prefix(REX);
   927   }
   928   return reg_enc;
   929 }
   931 int Assembler::prefixq_and_encode(int reg_enc) {
   932   if (reg_enc < 8) {
   933     prefix(REX_W);
   934   } else {
   935     prefix(REX_WB);
   936     reg_enc -= 8;
   937   }
   938   return reg_enc;
   939 }
   941 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
   942   if (dst_enc < 8) {
   943     if (src_enc >= 8) {
   944       prefix(REX_B);
   945       src_enc -= 8;
   946     } else if (byteinst && src_enc >= 4) {
   947       prefix(REX);
   948     }
   949   } else {
   950     if (src_enc < 8) {
   951       prefix(REX_R);
   952     } else {
   953       prefix(REX_RB);
   954       src_enc -= 8;
   955     }
   956     dst_enc -= 8;
   957   }
   958   return dst_enc << 3 | src_enc;
   959 }
   961 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
   962   if (dst_enc < 8) {
   963     if (src_enc < 8) {
   964       prefix(REX_W);
   965     } else {
   966       prefix(REX_WB);
   967       src_enc -= 8;
   968     }
   969   } else {
   970     if (src_enc < 8) {
   971       prefix(REX_WR);
   972     } else {
   973       prefix(REX_WRB);
   974       src_enc -= 8;
   975     }
   976     dst_enc -= 8;
   977   }
   978   return dst_enc << 3 | src_enc;
   979 }
   981 void Assembler::prefix(Register reg) {
   982   if (reg->encoding() >= 8) {
   983     prefix(REX_B);
   984   }
   985 }
   987 void Assembler::prefix(Address adr) {
   988   if (adr.base_needs_rex()) {
   989     if (adr.index_needs_rex()) {
   990       prefix(REX_XB);
   991     } else {
   992       prefix(REX_B);
   993     }
   994   } else {
   995     if (adr.index_needs_rex()) {
   996       prefix(REX_X);
   997     }
   998   }
   999 }
  1001 void Assembler::prefixq(Address adr) {
  1002   if (adr.base_needs_rex()) {
  1003     if (adr.index_needs_rex()) {
  1004       prefix(REX_WXB);
  1005     } else {
  1006       prefix(REX_WB);
  1008   } else {
  1009     if (adr.index_needs_rex()) {
  1010       prefix(REX_WX);
  1011     } else {
  1012       prefix(REX_W);
  1018 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
  1019   if (reg->encoding() < 8) {
  1020     if (adr.base_needs_rex()) {
  1021       if (adr.index_needs_rex()) {
  1022         prefix(REX_XB);
  1023       } else {
  1024         prefix(REX_B);
  1026     } else {
  1027       if (adr.index_needs_rex()) {
  1028         prefix(REX_X);
  1029       } else if (reg->encoding() >= 4 ) {
  1030         prefix(REX);
  1033   } else {
  1034     if (adr.base_needs_rex()) {
  1035       if (adr.index_needs_rex()) {
  1036         prefix(REX_RXB);
  1037       } else {
  1038         prefix(REX_RB);
  1040     } else {
  1041       if (adr.index_needs_rex()) {
  1042         prefix(REX_RX);
  1043       } else {
  1044         prefix(REX_R);
  1050 void Assembler::prefixq(Address adr, Register src) {
  1051   if (src->encoding() < 8) {
  1052     if (adr.base_needs_rex()) {
  1053       if (adr.index_needs_rex()) {
  1054         prefix(REX_WXB);
  1055       } else {
  1056         prefix(REX_WB);
  1058     } else {
  1059       if (adr.index_needs_rex()) {
  1060         prefix(REX_WX);
  1061       } else {
  1062         prefix(REX_W);
  1065   } else {
  1066     if (adr.base_needs_rex()) {
  1067       if (adr.index_needs_rex()) {
  1068         prefix(REX_WRXB);
  1069       } else {
  1070         prefix(REX_WRB);
  1072     } else {
  1073       if (adr.index_needs_rex()) {
  1074         prefix(REX_WRX);
  1075       } else {
  1076         prefix(REX_WR);
  1082 void Assembler::prefix(Address adr, XMMRegister reg) {
  1083   if (reg->encoding() < 8) {
  1084     if (adr.base_needs_rex()) {
  1085       if (adr.index_needs_rex()) {
  1086         prefix(REX_XB);
  1087       } else {
  1088         prefix(REX_B);
  1090     } else {
  1091       if (adr.index_needs_rex()) {
  1092         prefix(REX_X);
  1095   } else {
  1096     if (adr.base_needs_rex()) {
  1097       if (adr.index_needs_rex()) {
  1098         prefix(REX_RXB);
  1099       } else {
  1100         prefix(REX_RB);
  1102     } else {
  1103       if (adr.index_needs_rex()) {
  1104         prefix(REX_RX);
  1105       } else {
  1106         prefix(REX_R);
  1112 void Assembler::emit_operand(Register reg, Address adr,
  1113                              int rip_relative_correction) {
  1114   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
  1115                adr._rspec,
  1116                rip_relative_correction);
  1119 void Assembler::emit_operand(XMMRegister reg, Address adr,
  1120                              int rip_relative_correction) {
  1121   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
  1122                adr._rspec,
  1123                rip_relative_correction);
  1126 void Assembler::emit_farith(int b1, int b2, int i) {
  1127   assert(isByte(b1) && isByte(b2), "wrong opcode");
  1128   assert(0 <= i &&  i < 8, "illegal stack offset");
  1129   emit_byte(b1);
  1130   emit_byte(b2 + i);
  1133 // pushad is invalid, use this instead.
  1134 // NOTE: Kills flags!!
  1135 void Assembler::pushaq() {
  1136   // we have to store original rsp.  ABI says that 128 bytes
  1137   // below rsp are local scratch.
  1138   movq(Address(rsp, -5 * wordSize), rsp);
  1140   subq(rsp, 16 * wordSize);
  1142   movq(Address(rsp, 15 * wordSize), rax);
  1143   movq(Address(rsp, 14 * wordSize), rcx);
  1144   movq(Address(rsp, 13 * wordSize), rdx);
  1145   movq(Address(rsp, 12 * wordSize), rbx);
  1146   // skip rsp
  1147   movq(Address(rsp, 10 * wordSize), rbp);
  1148   movq(Address(rsp, 9 * wordSize), rsi);
  1149   movq(Address(rsp, 8 * wordSize), rdi);
  1150   movq(Address(rsp, 7 * wordSize), r8);
  1151   movq(Address(rsp, 6 * wordSize), r9);
  1152   movq(Address(rsp, 5 * wordSize), r10);
  1153   movq(Address(rsp, 4 * wordSize), r11);
  1154   movq(Address(rsp, 3 * wordSize), r12);
  1155   movq(Address(rsp, 2 * wordSize), r13);
  1156   movq(Address(rsp, wordSize), r14);
  1157   movq(Address(rsp, 0), r15);
  1160 // popad is invalid, use this instead
  1161 // NOTE: Kills flags!!
  1162 void Assembler::popaq() {
  1163   movq(r15, Address(rsp, 0));
  1164   movq(r14, Address(rsp, wordSize));
  1165   movq(r13, Address(rsp, 2 * wordSize));
  1166   movq(r12, Address(rsp, 3 * wordSize));
  1167   movq(r11, Address(rsp, 4 * wordSize));
  1168   movq(r10, Address(rsp, 5 * wordSize));
  1169   movq(r9,  Address(rsp, 6 * wordSize));
  1170   movq(r8,  Address(rsp, 7 * wordSize));
  1171   movq(rdi, Address(rsp, 8 * wordSize));
  1172   movq(rsi, Address(rsp, 9 * wordSize));
  1173   movq(rbp, Address(rsp, 10 * wordSize));
  1174   // skip rsp
  1175   movq(rbx, Address(rsp, 12 * wordSize));
  1176   movq(rdx, Address(rsp, 13 * wordSize));
  1177   movq(rcx, Address(rsp, 14 * wordSize));
  1178   movq(rax, Address(rsp, 15 * wordSize));
  1180   addq(rsp, 16 * wordSize);
  1183 void Assembler::pushfq() {
  1184   emit_byte(0x9C);
  1187 void Assembler::popfq() {
  1188   emit_byte(0x9D);
  1191 void Assembler::pushq(int imm32) {
  1192   emit_byte(0x68);
  1193   emit_long(imm32);
  1196 void Assembler::pushq(Register src) {
  1197   int encode = prefix_and_encode(src->encoding());
  1199   emit_byte(0x50 | encode);
  1202 void Assembler::pushq(Address src) {
  1203   InstructionMark im(this);
  1204   prefix(src);
  1205   emit_byte(0xFF);
  1206   emit_operand(rsi, src);
  1209 void Assembler::popq(Register dst) {
  1210   int encode = prefix_and_encode(dst->encoding());
  1211   emit_byte(0x58 | encode);
  1214 void Assembler::popq(Address dst) {
  1215   InstructionMark im(this);
  1216   prefix(dst);
  1217   emit_byte(0x8F);
  1218   emit_operand(rax, dst);
  1221 void Assembler::prefix(Prefix p) {
  1222   a_byte(p);
  1225 void Assembler::movb(Register dst, Address src) {
  1226   InstructionMark im(this);
  1227   prefix(src, dst, true);
  1228   emit_byte(0x8A);
  1229   emit_operand(dst, src);
  1232 void Assembler::movb(Address dst, int imm8) {
  1233   InstructionMark im(this);
  1234   prefix(dst);
  1235   emit_byte(0xC6);
  1236   emit_operand(rax, dst, 1);
  1237   emit_byte(imm8);
  1240 void Assembler::movb(Address dst, Register src) {
  1241   InstructionMark im(this);
  1242   prefix(dst, src, true);
  1243   emit_byte(0x88);
  1244   emit_operand(src, dst);
  1247 void Assembler::movw(Address dst, int imm16) {
  1248   InstructionMark im(this);
  1249   emit_byte(0x66); // switch to 16-bit mode
  1250   prefix(dst);
  1251   emit_byte(0xC7);
  1252   emit_operand(rax, dst, 2);
  1253   emit_word(imm16);
  1256 void Assembler::movw(Register dst, Address src) {
  1257   InstructionMark im(this);
  1258   emit_byte(0x66);
  1259   prefix(src, dst);
  1260   emit_byte(0x8B);
  1261   emit_operand(dst, src);
  1264 void Assembler::movw(Address dst, Register src) {
  1265   InstructionMark im(this);
  1266   emit_byte(0x66);
  1267   prefix(dst, src);
  1268   emit_byte(0x89);
  1269   emit_operand(src, dst);
  1272 // Uses zero extension.
  1273 void Assembler::movl(Register dst, int imm32) {
  1274   int encode = prefix_and_encode(dst->encoding());
  1275   emit_byte(0xB8 | encode);
  1276   emit_long(imm32);
  1279 void Assembler::movl(Register dst, Register src) {
  1280   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1281   emit_byte(0x8B);
  1282   emit_byte(0xC0 | encode);
  1285 void Assembler::movl(Register dst, Address src) {
  1286   InstructionMark im(this);
  1287   prefix(src, dst);
  1288   emit_byte(0x8B);
  1289   emit_operand(dst, src);
  1292 void Assembler::movl(Address dst, int imm32) {
  1293   InstructionMark im(this);
  1294   prefix(dst);
  1295   emit_byte(0xC7);
  1296   emit_operand(rax, dst, 4);
  1297   emit_long(imm32);
  1300 void Assembler::movl(Address dst, Register src) {
  1301   InstructionMark im(this);
  1302   prefix(dst, src);
  1303   emit_byte(0x89);
  1304   emit_operand(src, dst);
  1307 void Assembler::mov64(Register dst, int64_t imm64) {
  1308   InstructionMark im(this);
  1309   int encode = prefixq_and_encode(dst->encoding());
  1310   emit_byte(0xB8 | encode);
  1311   emit_long64(imm64);
  1314 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
  1315   InstructionMark im(this);
  1316   int encode = prefixq_and_encode(dst->encoding());
  1317   emit_byte(0xB8 | encode);
  1318   emit_data64(imm64, rspec);
  1321 void Assembler::movq(Register dst, Register src) {
  1322   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  1323   emit_byte(0x8B);
  1324   emit_byte(0xC0 | encode);
  1327 void Assembler::movq(Register dst, Address src) {
  1328   InstructionMark im(this);
  1329   prefixq(src, dst);
  1330   emit_byte(0x8B);
  1331   emit_operand(dst, src);
  1334 void Assembler::mov64(Address dst, int64_t imm32) {
  1335   assert(is_simm32(imm32), "lost bits");
  1336   InstructionMark im(this);
  1337   prefixq(dst);
  1338   emit_byte(0xC7);
  1339   emit_operand(rax, dst, 4);
  1340   emit_long(imm32);
  1343 void Assembler::movq(Address dst, Register src) {
  1344   InstructionMark im(this);
  1345   prefixq(dst, src);
  1346   emit_byte(0x89);
  1347   emit_operand(src, dst);
  1350 void Assembler::movsbl(Register dst, Address src) {
  1351   InstructionMark im(this);
  1352   prefix(src, dst);
  1353   emit_byte(0x0F);
  1354   emit_byte(0xBE);
  1355   emit_operand(dst, src);
  1358 void Assembler::movsbl(Register dst, Register src) {
  1359   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
  1360   emit_byte(0x0F);
  1361   emit_byte(0xBE);
  1362   emit_byte(0xC0 | encode);
  1365 void Assembler::movswl(Register dst, Address src) {
  1366   InstructionMark im(this);
  1367   prefix(src, dst);
  1368   emit_byte(0x0F);
  1369   emit_byte(0xBF);
  1370   emit_operand(dst, src);
  1373 void Assembler::movswl(Register dst, Register src) {
  1374   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1375   emit_byte(0x0F);
  1376   emit_byte(0xBF);
  1377   emit_byte(0xC0 | encode);
  1380 void Assembler::movslq(Register dst, Address src) {
  1381   InstructionMark im(this);
  1382   prefixq(src, dst);
  1383   emit_byte(0x63);
  1384   emit_operand(dst, src);
  1387 void Assembler::movslq(Register dst, Register src) {
  1388   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  1389   emit_byte(0x63);
  1390   emit_byte(0xC0 | encode);
  1393 void Assembler::movzbl(Register dst, Address src) {
  1394   InstructionMark im(this);
  1395   prefix(src, dst);
  1396   emit_byte(0x0F);
  1397   emit_byte(0xB6);
  1398   emit_operand(dst, src);
  1401 void Assembler::movzbl(Register dst, Register src) {
  1402   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
  1403   emit_byte(0x0F);
  1404   emit_byte(0xB6);
  1405   emit_byte(0xC0 | encode);
  1408 void Assembler::movzwl(Register dst, Address src) {
  1409   InstructionMark im(this);
  1410   prefix(src, dst);
  1411   emit_byte(0x0F);
  1412   emit_byte(0xB7);
  1413   emit_operand(dst, src);
  1416 void Assembler::movzwl(Register dst, Register src) {
  1417   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1418   emit_byte(0x0F);
  1419   emit_byte(0xB7);
  1420   emit_byte(0xC0 | encode);
  1423 void Assembler::movss(XMMRegister dst, XMMRegister src) {
  1424   emit_byte(0xF3);
  1425   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1426   emit_byte(0x0F);
  1427   emit_byte(0x10);
  1428   emit_byte(0xC0 | encode);
  1431 void Assembler::movss(XMMRegister dst, Address src) {
  1432   InstructionMark im(this);
  1433   emit_byte(0xF3);
  1434   prefix(src, dst);
  1435   emit_byte(0x0F);
  1436   emit_byte(0x10);
  1437   emit_operand(dst, src);
  1440 void Assembler::movss(Address dst, XMMRegister src) {
  1441   InstructionMark im(this);
  1442   emit_byte(0xF3);
  1443   prefix(dst, src);
  1444   emit_byte(0x0F);
  1445   emit_byte(0x11);
  1446   emit_operand(src, dst);
  1449 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
  1450   emit_byte(0xF2);
  1451   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1452   emit_byte(0x0F);
  1453   emit_byte(0x10);
  1454   emit_byte(0xC0 | encode);
  1457 void Assembler::movsd(XMMRegister dst, Address src) {
  1458   InstructionMark im(this);
  1459   emit_byte(0xF2);
  1460   prefix(src, dst);
  1461   emit_byte(0x0F);
  1462   emit_byte(0x10);
  1463   emit_operand(dst, src);
  1466 void Assembler::movsd(Address dst, XMMRegister src) {
  1467   InstructionMark im(this);
  1468   emit_byte(0xF2);
  1469   prefix(dst, src);
  1470   emit_byte(0x0F);
  1471   emit_byte(0x11);
  1472   emit_operand(src, dst);
  1475 // New cpus require to use movsd and movss to avoid partial register stall
  1476 // when loading from memory. But for old Opteron use movlpd instead of movsd.
  1477 // The selection is done in MacroAssembler::movdbl() and movflt().
  1478 void Assembler::movlpd(XMMRegister dst, Address src) {
  1479   InstructionMark im(this);
  1480   emit_byte(0x66);
  1481   prefix(src, dst);
  1482   emit_byte(0x0F);
  1483   emit_byte(0x12);
  1484   emit_operand(dst, src);
  1487 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
  1488   int dstenc = dst->encoding();
  1489   int srcenc = src->encoding();
  1490   emit_byte(0x66);
  1491   if (dstenc < 8) {
  1492     if (srcenc >= 8) {
  1493       prefix(REX_B);
  1494       srcenc -= 8;
  1496   } else {
  1497     if (srcenc < 8) {
  1498       prefix(REX_R);
  1499     } else {
  1500       prefix(REX_RB);
  1501       srcenc -= 8;
  1503     dstenc -= 8;
  1505   emit_byte(0x0F);
  1506   emit_byte(0x28);
  1507   emit_byte(0xC0 | dstenc << 3 | srcenc);
  1510 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
  1511   int dstenc = dst->encoding();
  1512   int srcenc = src->encoding();
  1513   if (dstenc < 8) {
  1514     if (srcenc >= 8) {
  1515       prefix(REX_B);
  1516       srcenc -= 8;
  1518   } else {
  1519     if (srcenc < 8) {
  1520       prefix(REX_R);
  1521     } else {
  1522       prefix(REX_RB);
  1523       srcenc -= 8;
  1525     dstenc -= 8;
  1527   emit_byte(0x0F);
  1528   emit_byte(0x28);
  1529   emit_byte(0xC0 | dstenc << 3 | srcenc);
  1532 void Assembler::movdl(XMMRegister dst, Register src) {
  1533   emit_byte(0x66);
  1534   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1535   emit_byte(0x0F);
  1536   emit_byte(0x6E);
  1537   emit_byte(0xC0 | encode);
  1540 void Assembler::movdl(Register dst, XMMRegister src) {
  1541   emit_byte(0x66);
  1542   // swap src/dst to get correct prefix
  1543   int encode = prefix_and_encode(src->encoding(), dst->encoding());
  1544   emit_byte(0x0F);
  1545   emit_byte(0x7E);
  1546   emit_byte(0xC0 | encode);
  1549 void Assembler::movdq(XMMRegister dst, Register src) {
  1550   emit_byte(0x66);
  1551   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  1552   emit_byte(0x0F);
  1553   emit_byte(0x6E);
  1554   emit_byte(0xC0 | encode);
  1557 void Assembler::movdq(Register dst, XMMRegister src) {
  1558   emit_byte(0x66);
  1559   // swap src/dst to get correct prefix
  1560   int encode = prefixq_and_encode(src->encoding(), dst->encoding());
  1561   emit_byte(0x0F);
  1562   emit_byte(0x7E);
  1563   emit_byte(0xC0 | encode);
  1566 void Assembler::pxor(XMMRegister dst, Address src) {
  1567   InstructionMark im(this);
  1568   emit_byte(0x66);
  1569   prefix(src, dst);
  1570   emit_byte(0x0F);
  1571   emit_byte(0xEF);
  1572   emit_operand(dst, src);
  1575 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
  1576   InstructionMark im(this);
  1577   emit_byte(0x66);
  1578   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1579   emit_byte(0x0F);
  1580   emit_byte(0xEF);
  1581   emit_byte(0xC0 | encode);
  1584 void Assembler::movdqa(XMMRegister dst, Address src) {
  1585   InstructionMark im(this);
  1586   emit_byte(0x66);
  1587   prefix(src, dst);
  1588   emit_byte(0x0F);
  1589   emit_byte(0x6F);
  1590   emit_operand(dst, src);
  1593 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
  1594   emit_byte(0x66);
  1595   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  1596   emit_byte(0x0F);
  1597   emit_byte(0x6F);
  1598   emit_byte(0xC0 | encode);
  1601 void Assembler::movdqa(Address dst, XMMRegister src) {
  1602   InstructionMark im(this);
  1603   emit_byte(0x66);
  1604   prefix(dst, src);
  1605   emit_byte(0x0F);
  1606   emit_byte(0x7F);
  1607   emit_operand(src, dst);
  1610 void Assembler::movq(XMMRegister dst, Address src) {
  1611   InstructionMark im(this);
  1612   emit_byte(0xF3);
  1613   prefix(src, dst);
  1614   emit_byte(0x0F);
  1615   emit_byte(0x7E);
  1616   emit_operand(dst, src);
  1619 void Assembler::movq(Address dst, XMMRegister src) {
  1620   InstructionMark im(this);
  1621   emit_byte(0x66);
  1622   prefix(dst, src);
  1623   emit_byte(0x0F);
  1624   emit_byte(0xD6);
  1625   emit_operand(src, dst);
  1628 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
  1629   assert(isByte(mode), "invalid value");
  1630   emit_byte(0x66);
  1631   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1632   emit_byte(0x0F);
  1633   emit_byte(0x70);
  1634   emit_byte(0xC0 | encode);
  1635   emit_byte(mode & 0xFF);
  1638 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
  1639   assert(isByte(mode), "invalid value");
  1640   InstructionMark im(this);
  1641   emit_byte(0x66);
  1642   emit_byte(0x0F);
  1643   emit_byte(0x70);
  1644   emit_operand(dst, src);
  1645   emit_byte(mode & 0xFF);
  1648 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
  1649   assert(isByte(mode), "invalid value");
  1650   emit_byte(0xF2);
  1651   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1652   emit_byte(0x0F);
  1653   emit_byte(0x70);
  1654   emit_byte(0xC0 | encode);
  1655   emit_byte(mode & 0xFF);
  1658 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
  1659   assert(isByte(mode), "invalid value");
  1660   InstructionMark im(this);
  1661   emit_byte(0xF2);
  1662   emit_byte(0x0F);
  1663   emit_byte(0x70);
  1664   emit_operand(dst, src);
  1665   emit_byte(mode & 0xFF);
  1668 void Assembler::cmovl(Condition cc, Register dst, Register src) {
  1669   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1670   emit_byte(0x0F);
  1671   emit_byte(0x40 | cc);
  1672   emit_byte(0xC0 | encode);
  1675 void Assembler::cmovl(Condition cc, Register dst, Address src) {
  1676   InstructionMark im(this);
  1677   prefix(src, dst);
  1678   emit_byte(0x0F);
  1679   emit_byte(0x40 | cc);
  1680   emit_operand(dst, src);
  1683 void Assembler::cmovq(Condition cc, Register dst, Register src) {
  1684   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  1685   emit_byte(0x0F);
  1686   emit_byte(0x40 | cc);
  1687   emit_byte(0xC0 | encode);
  1690 void Assembler::cmovq(Condition cc, Register dst, Address src) {
  1691   InstructionMark im(this);
  1692   prefixq(src, dst);
  1693   emit_byte(0x0F);
  1694   emit_byte(0x40 | cc);
  1695   emit_operand(dst, src);
  1698 void Assembler::prefetch_prefix(Address src) {
  1699   prefix(src);
  1700   emit_byte(0x0F);
  1703 void Assembler::prefetcht0(Address src) {
  1704   InstructionMark im(this);
  1705   prefetch_prefix(src);
  1706   emit_byte(0x18);
  1707   emit_operand(rcx, src); // 1, src
  1710 void Assembler::prefetcht1(Address src) {
  1711   InstructionMark im(this);
  1712   prefetch_prefix(src);
  1713   emit_byte(0x18);
  1714   emit_operand(rdx, src); // 2, src
  1717 void Assembler::prefetcht2(Address src) {
  1718   InstructionMark im(this);
  1719   prefetch_prefix(src);
  1720   emit_byte(0x18);
  1721   emit_operand(rbx, src); // 3, src
  1724 void Assembler::prefetchnta(Address src) {
  1725   InstructionMark im(this);
  1726   prefetch_prefix(src);
  1727   emit_byte(0x18);
  1728   emit_operand(rax, src); // 0, src
  1731 void Assembler::prefetchw(Address src) {
  1732   InstructionMark im(this);
  1733   prefetch_prefix(src);
  1734   emit_byte(0x0D);
  1735   emit_operand(rcx, src); // 1, src
  1738 void Assembler::adcl(Register dst, int imm32) {
  1739   prefix(dst);
  1740   emit_arith(0x81, 0xD0, dst, imm32);
  1743 void Assembler::adcl(Register dst, Address src) {
  1744   InstructionMark im(this);
  1745   prefix(src, dst);
  1746   emit_byte(0x13);
  1747   emit_operand(dst, src);
  1750 void Assembler::adcl(Register dst, Register src) {
  1751   (void) prefix_and_encode(dst->encoding(), src->encoding());
  1752   emit_arith(0x13, 0xC0, dst, src);
  1755 void Assembler::adcq(Register dst, int imm32) {
  1756   (void) prefixq_and_encode(dst->encoding());
  1757   emit_arith(0x81, 0xD0, dst, imm32);
  1760 void Assembler::adcq(Register dst, Address src) {
  1761   InstructionMark im(this);
  1762   prefixq(src, dst);
  1763   emit_byte(0x13);
  1764   emit_operand(dst, src);
  1767 void Assembler::adcq(Register dst, Register src) {
  1768   (int) prefixq_and_encode(dst->encoding(), src->encoding());
  1769   emit_arith(0x13, 0xC0, dst, src);
  1772 void Assembler::addl(Address dst, int imm32) {
  1773   InstructionMark im(this);
  1774   prefix(dst);
  1775   emit_arith_operand(0x81, rax, dst,imm32);
  1778 void Assembler::addl(Address dst, Register src) {
  1779   InstructionMark im(this);
  1780   prefix(dst, src);
  1781   emit_byte(0x01);
  1782   emit_operand(src, dst);
  1785 void Assembler::addl(Register dst, int imm32) {
  1786   prefix(dst);
  1787   emit_arith(0x81, 0xC0, dst, imm32);
  1790 void Assembler::addl(Register dst, Address src) {
  1791   InstructionMark im(this);
  1792   prefix(src, dst);
  1793   emit_byte(0x03);
  1794   emit_operand(dst, src);
  1797 void Assembler::addl(Register dst, Register src) {
  1798   (void) prefix_and_encode(dst->encoding(), src->encoding());
  1799   emit_arith(0x03, 0xC0, dst, src);
  1802 void Assembler::addq(Address dst, int imm32) {
  1803   InstructionMark im(this);
  1804   prefixq(dst);
  1805   emit_arith_operand(0x81, rax, dst,imm32);
  1808 void Assembler::addq(Address dst, Register src) {
  1809   InstructionMark im(this);
  1810   prefixq(dst, src);
  1811   emit_byte(0x01);
  1812   emit_operand(src, dst);
  1815 void Assembler::addq(Register dst, int imm32) {
  1816   (void) prefixq_and_encode(dst->encoding());
  1817   emit_arith(0x81, 0xC0, dst, imm32);
  1820 void Assembler::addq(Register dst, Address src) {
  1821   InstructionMark im(this);
  1822   prefixq(src, dst);
  1823   emit_byte(0x03);
  1824   emit_operand(dst, src);
  1827 void Assembler::addq(Register dst, Register src) {
  1828   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  1829   emit_arith(0x03, 0xC0, dst, src);
  1832 void Assembler::andl(Register dst, int imm32) {
  1833   prefix(dst);
  1834   emit_arith(0x81, 0xE0, dst, imm32);
  1837 void Assembler::andl(Register dst, Address src) {
  1838   InstructionMark im(this);
  1839   prefix(src, dst);
  1840   emit_byte(0x23);
  1841   emit_operand(dst, src);
  1844 void Assembler::andl(Register dst, Register src) {
  1845   (void) prefix_and_encode(dst->encoding(), src->encoding());
  1846   emit_arith(0x23, 0xC0, dst, src);
  1849 void Assembler::andq(Register dst, int imm32) {
  1850   (void) prefixq_and_encode(dst->encoding());
  1851   emit_arith(0x81, 0xE0, dst, imm32);
  1854 void Assembler::andq(Register dst, Address src) {
  1855   InstructionMark im(this);
  1856   prefixq(src, dst);
  1857   emit_byte(0x23);
  1858   emit_operand(dst, src);
  1861 void Assembler::andq(Register dst, Register src) {
  1862   (int) prefixq_and_encode(dst->encoding(), src->encoding());
  1863   emit_arith(0x23, 0xC0, dst, src);
  1866 void Assembler::cmpb(Address dst, int imm8) {
  1867   InstructionMark im(this);
  1868   prefix(dst);
  1869   emit_byte(0x80);
  1870   emit_operand(rdi, dst, 1);
  1871   emit_byte(imm8);
  1874 void Assembler::cmpl(Address dst, int imm32) {
  1875   InstructionMark im(this);
  1876   prefix(dst);
  1877   emit_byte(0x81);
  1878   emit_operand(rdi, dst, 4);
  1879   emit_long(imm32);
  1882 void Assembler::cmpl(Register dst, int imm32) {
  1883   prefix(dst);
  1884   emit_arith(0x81, 0xF8, dst, imm32);
  1887 void Assembler::cmpl(Register dst, Register src) {
  1888   (void) prefix_and_encode(dst->encoding(), src->encoding());
  1889   emit_arith(0x3B, 0xC0, dst, src);
  1892 void Assembler::cmpl(Register dst, Address src) {
  1893   InstructionMark im(this);
  1894   prefix(src, dst);
  1895   emit_byte(0x3B);
  1896   emit_operand(dst, src);
  1899 void Assembler::cmpq(Address dst, int imm32) {
  1900   InstructionMark im(this);
  1901   prefixq(dst);
  1902   emit_byte(0x81);
  1903   emit_operand(rdi, dst, 4);
  1904   emit_long(imm32);
  1907 void Assembler::cmpq(Register dst, int imm32) {
  1908   (void) prefixq_and_encode(dst->encoding());
  1909   emit_arith(0x81, 0xF8, dst, imm32);
  1912 void Assembler::cmpq(Address dst, Register src) {
  1913   prefixq(dst, src);
  1914   emit_byte(0x3B);
  1915   emit_operand(src, dst);
  1918 void Assembler::cmpq(Register dst, Register src) {
  1919   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  1920   emit_arith(0x3B, 0xC0, dst, src);
  1923 void Assembler::cmpq(Register dst, Address  src) {
  1924   InstructionMark im(this);
  1925   prefixq(src, dst);
  1926   emit_byte(0x3B);
  1927   emit_operand(dst, src);
  1930 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
  1931   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1932   emit_byte(0x0F);
  1933   emit_byte(0x2E);
  1934   emit_byte(0xC0 | encode);
  1937 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
  1938   emit_byte(0x66);
  1939   ucomiss(dst, src);
  1942 void Assembler::decl(Register dst) {
  1943   // Don't use it directly. Use MacroAssembler::decrementl() instead.
  1944   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
  1945   int encode = prefix_and_encode(dst->encoding());
  1946   emit_byte(0xFF);
  1947   emit_byte(0xC8 | encode);
  1950 void Assembler::decl(Address dst) {
  1951   // Don't use it directly. Use MacroAssembler::decrementl() instead.
  1952   InstructionMark im(this);
  1953   prefix(dst);
  1954   emit_byte(0xFF);
  1955   emit_operand(rcx, dst);
  1958 void Assembler::decq(Register dst) {
  1959   // Don't use it directly. Use MacroAssembler::decrementq() instead.
  1960   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
  1961   int encode = prefixq_and_encode(dst->encoding());
  1962   emit_byte(0xFF);
  1963   emit_byte(0xC8 | encode);
  1966 void Assembler::decq(Address dst) {
  1967   // Don't use it directly. Use MacroAssembler::decrementq() instead.
  1968   InstructionMark im(this);
  1969   prefixq(dst);
  1970   emit_byte(0xFF);
  1971   emit_operand(rcx, dst);
  1974 void Assembler::idivl(Register src) {
  1975   int encode = prefix_and_encode(src->encoding());
  1976   emit_byte(0xF7);
  1977   emit_byte(0xF8 | encode);
  1980 void Assembler::idivq(Register src) {
  1981   int encode = prefixq_and_encode(src->encoding());
  1982   emit_byte(0xF7);
  1983   emit_byte(0xF8 | encode);
  1986 void Assembler::cdql() {
  1987   emit_byte(0x99);
  1990 void Assembler::cdqq() {
  1991   prefix(REX_W);
  1992   emit_byte(0x99);
  1995 void Assembler::imull(Register dst, Register src) {
  1996   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  1997   emit_byte(0x0F);
  1998   emit_byte(0xAF);
  1999   emit_byte(0xC0 | encode);
  2002 void Assembler::imull(Register dst, Register src, int value) {
  2003   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2004   if (is8bit(value)) {
  2005     emit_byte(0x6B);
  2006     emit_byte(0xC0 | encode);
  2007     emit_byte(value);
  2008   } else {
  2009     emit_byte(0x69);
  2010     emit_byte(0xC0 | encode);
  2011     emit_long(value);
  2015 void Assembler::imulq(Register dst, Register src) {
  2016   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  2017   emit_byte(0x0F);
  2018   emit_byte(0xAF);
  2019   emit_byte(0xC0 | encode);
  2022 void Assembler::imulq(Register dst, Register src, int value) {
  2023   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  2024   if (is8bit(value)) {
  2025     emit_byte(0x6B);
  2026     emit_byte(0xC0 | encode);
  2027     emit_byte(value);
  2028   } else {
  2029     emit_byte(0x69);
  2030     emit_byte(0xC0 | encode);
  2031     emit_long(value);
  2035 void Assembler::incl(Register dst) {
  2036   // Don't use it directly. Use MacroAssembler::incrementl() instead.
  2037   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
  2038   int encode = prefix_and_encode(dst->encoding());
  2039   emit_byte(0xFF);
  2040   emit_byte(0xC0 | encode);
  2043 void Assembler::incl(Address dst) {
  2044   // Don't use it directly. Use MacroAssembler::incrementl() instead.
  2045   InstructionMark im(this);
  2046   prefix(dst);
  2047   emit_byte(0xFF);
  2048   emit_operand(rax, dst);
  2051 void Assembler::incq(Register dst) {
  2052   // Don't use it directly. Use MacroAssembler::incrementq() instead.
  2053   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
  2054   int encode = prefixq_and_encode(dst->encoding());
  2055   emit_byte(0xFF);
  2056   emit_byte(0xC0 | encode);
  2059 void Assembler::incq(Address dst) {
  2060   // Don't use it directly. Use MacroAssembler::incrementq() instead.
  2061   InstructionMark im(this);
  2062   prefixq(dst);
  2063   emit_byte(0xFF);
  2064   emit_operand(rax, dst);
  2067 void Assembler::leal(Register dst, Address src) {
  2068   InstructionMark im(this);
  2069   emit_byte(0x67); // addr32
  2070   prefix(src, dst);
  2071   emit_byte(0x8D);
  2072   emit_operand(dst, src);
  2075 void Assembler::leaq(Register dst, Address src) {
  2076   InstructionMark im(this);
  2077   prefixq(src, dst);
  2078   emit_byte(0x8D);
  2079   emit_operand(dst, src);
  2082 void Assembler::mull(Address src) {
  2083   InstructionMark im(this);
  2084   // was missing
  2085   prefix(src);
  2086   emit_byte(0xF7);
  2087   emit_operand(rsp, src);
  2090 void Assembler::mull(Register src) {
  2091   // was missing
  2092   int encode = prefix_and_encode(src->encoding());
  2093   emit_byte(0xF7);
  2094   emit_byte(0xE0 | encode);
  2097 void Assembler::negl(Register dst) {
  2098   int encode = prefix_and_encode(dst->encoding());
  2099   emit_byte(0xF7);
  2100   emit_byte(0xD8 | encode);
  2103 void Assembler::negq(Register dst) {
  2104   int encode = prefixq_and_encode(dst->encoding());
  2105   emit_byte(0xF7);
  2106   emit_byte(0xD8 | encode);
  2109 void Assembler::notl(Register dst) {
  2110   int encode = prefix_and_encode(dst->encoding());
  2111   emit_byte(0xF7);
  2112   emit_byte(0xD0 | encode);
  2115 void Assembler::notq(Register dst) {
  2116   int encode = prefixq_and_encode(dst->encoding());
  2117   emit_byte(0xF7);
  2118   emit_byte(0xD0 | encode);
  2121 void Assembler::orl(Address dst, int imm32) {
  2122   InstructionMark im(this);
  2123   prefix(dst);
  2124   emit_byte(0x81);
  2125   emit_operand(rcx, dst, 4);
  2126   emit_long(imm32);
  2129 void Assembler::orl(Register dst, int imm32) {
  2130   prefix(dst);
  2131   emit_arith(0x81, 0xC8, dst, imm32);
  2134 void Assembler::orl(Register dst, Address src) {
  2135   InstructionMark im(this);
  2136   prefix(src, dst);
  2137   emit_byte(0x0B);
  2138   emit_operand(dst, src);
  2141 void Assembler::orl(Register dst, Register src) {
  2142   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2143   emit_arith(0x0B, 0xC0, dst, src);
  2146 void Assembler::orq(Address dst, int imm32) {
  2147   InstructionMark im(this);
  2148   prefixq(dst);
  2149   emit_byte(0x81);
  2150   emit_operand(rcx, dst, 4);
  2151   emit_long(imm32);
  2154 void Assembler::orq(Register dst, int imm32) {
  2155   (void) prefixq_and_encode(dst->encoding());
  2156   emit_arith(0x81, 0xC8, dst, imm32);
  2159 void Assembler::orq(Register dst, Address src) {
  2160   InstructionMark im(this);
  2161   prefixq(src, dst);
  2162   emit_byte(0x0B);
  2163   emit_operand(dst, src);
  2166 void Assembler::orq(Register dst, Register src) {
  2167   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  2168   emit_arith(0x0B, 0xC0, dst, src);
  2171 void Assembler::rcll(Register dst, int imm8) {
  2172   assert(isShiftCount(imm8), "illegal shift count");
  2173   int encode = prefix_and_encode(dst->encoding());
  2174   if (imm8 == 1) {
  2175     emit_byte(0xD1);
  2176     emit_byte(0xD0 | encode);
  2177   } else {
  2178     emit_byte(0xC1);
  2179     emit_byte(0xD0 | encode);
  2180     emit_byte(imm8);
  2184 void Assembler::rclq(Register dst, int imm8) {
  2185   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  2186   int encode = prefixq_and_encode(dst->encoding());
  2187   if (imm8 == 1) {
  2188     emit_byte(0xD1);
  2189     emit_byte(0xD0 | encode);
  2190   } else {
  2191     emit_byte(0xC1);
  2192     emit_byte(0xD0 | encode);
  2193     emit_byte(imm8);
  2197 void Assembler::sarl(Register dst, int imm8) {
  2198   int encode = prefix_and_encode(dst->encoding());
  2199   assert(isShiftCount(imm8), "illegal shift count");
  2200   if (imm8 == 1) {
  2201     emit_byte(0xD1);
  2202     emit_byte(0xF8 | encode);
  2203   } else {
  2204     emit_byte(0xC1);
  2205     emit_byte(0xF8 | encode);
  2206     emit_byte(imm8);
  2210 void Assembler::sarl(Register dst) {
  2211   int encode = prefix_and_encode(dst->encoding());
  2212   emit_byte(0xD3);
  2213   emit_byte(0xF8 | encode);
  2216 void Assembler::sarq(Register dst, int imm8) {
  2217   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  2218   int encode = prefixq_and_encode(dst->encoding());
  2219   if (imm8 == 1) {
  2220     emit_byte(0xD1);
  2221     emit_byte(0xF8 | encode);
  2222   } else {
  2223     emit_byte(0xC1);
  2224     emit_byte(0xF8 | encode);
  2225     emit_byte(imm8);
  2229 void Assembler::sarq(Register dst) {
  2230   int encode = prefixq_and_encode(dst->encoding());
  2231   emit_byte(0xD3);
  2232   emit_byte(0xF8 | encode);
  2235 void Assembler::sbbl(Address dst, int imm32) {
  2236   InstructionMark im(this);
  2237   prefix(dst);
  2238   emit_arith_operand(0x81, rbx, dst, imm32);
  2241 void Assembler::sbbl(Register dst, int imm32) {
  2242   prefix(dst);
  2243   emit_arith(0x81, 0xD8, dst, imm32);
  2246 void Assembler::sbbl(Register dst, Address src) {
  2247   InstructionMark im(this);
  2248   prefix(src, dst);
  2249   emit_byte(0x1B);
  2250   emit_operand(dst, src);
  2253 void Assembler::sbbl(Register dst, Register src) {
  2254   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2255   emit_arith(0x1B, 0xC0, dst, src);
  2258 void Assembler::sbbq(Address dst, int imm32) {
  2259   InstructionMark im(this);
  2260   prefixq(dst);
  2261   emit_arith_operand(0x81, rbx, dst, imm32);
  2264 void Assembler::sbbq(Register dst, int imm32) {
  2265   (void) prefixq_and_encode(dst->encoding());
  2266   emit_arith(0x81, 0xD8, dst, imm32);
  2269 void Assembler::sbbq(Register dst, Address src) {
  2270   InstructionMark im(this);
  2271   prefixq(src, dst);
  2272   emit_byte(0x1B);
  2273   emit_operand(dst, src);
  2276 void Assembler::sbbq(Register dst, Register src) {
  2277   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  2278   emit_arith(0x1B, 0xC0, dst, src);
  2281 void Assembler::shll(Register dst, int imm8) {
  2282   assert(isShiftCount(imm8), "illegal shift count");
  2283   int encode = prefix_and_encode(dst->encoding());
  2284   if (imm8 == 1 ) {
  2285     emit_byte(0xD1);
  2286     emit_byte(0xE0 | encode);
  2287   } else {
  2288     emit_byte(0xC1);
  2289     emit_byte(0xE0 | encode);
  2290     emit_byte(imm8);
  2294 void Assembler::shll(Register dst) {
  2295   int encode = prefix_and_encode(dst->encoding());
  2296   emit_byte(0xD3);
  2297   emit_byte(0xE0 | encode);
  2300 void Assembler::shlq(Register dst, int imm8) {
  2301   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  2302   int encode = prefixq_and_encode(dst->encoding());
  2303   if (imm8 == 1) {
  2304     emit_byte(0xD1);
  2305     emit_byte(0xE0 | encode);
  2306   } else {
  2307     emit_byte(0xC1);
  2308     emit_byte(0xE0 | encode);
  2309     emit_byte(imm8);
  2313 void Assembler::shlq(Register dst) {
  2314   int encode = prefixq_and_encode(dst->encoding());
  2315   emit_byte(0xD3);
  2316   emit_byte(0xE0 | encode);
  2319 void Assembler::shrl(Register dst, int imm8) {
  2320   assert(isShiftCount(imm8), "illegal shift count");
  2321   int encode = prefix_and_encode(dst->encoding());
  2322   emit_byte(0xC1);
  2323   emit_byte(0xE8 | encode);
  2324   emit_byte(imm8);
  2327 void Assembler::shrl(Register dst) {
  2328   int encode = prefix_and_encode(dst->encoding());
  2329   emit_byte(0xD3);
  2330   emit_byte(0xE8 | encode);
  2333 void Assembler::shrq(Register dst, int imm8) {
  2334   assert(isShiftCount(imm8 >> 1), "illegal shift count");
  2335   int encode = prefixq_and_encode(dst->encoding());
  2336   emit_byte(0xC1);
  2337   emit_byte(0xE8 | encode);
  2338   emit_byte(imm8);
  2341 void Assembler::shrq(Register dst) {
  2342   int encode = prefixq_and_encode(dst->encoding());
  2343   emit_byte(0xD3);
  2344   emit_byte(0xE8 | encode);
  2347 void Assembler::subl(Address dst, int imm32) {
  2348   InstructionMark im(this);
  2349   prefix(dst);
  2350   if (is8bit(imm32)) {
  2351     emit_byte(0x83);
  2352     emit_operand(rbp, dst, 1);
  2353     emit_byte(imm32 & 0xFF);
  2354   } else {
  2355     emit_byte(0x81);
  2356     emit_operand(rbp, dst, 4);
  2357     emit_long(imm32);
  2361 void Assembler::subl(Register dst, int imm32) {
  2362   prefix(dst);
  2363   emit_arith(0x81, 0xE8, dst, imm32);
  2366 void Assembler::subl(Address dst, Register src) {
  2367   InstructionMark im(this);
  2368   prefix(dst, src);
  2369   emit_byte(0x29);
  2370   emit_operand(src, dst);
  2373 void Assembler::subl(Register dst, Address src) {
  2374   InstructionMark im(this);
  2375   prefix(src, dst);
  2376   emit_byte(0x2B);
  2377   emit_operand(dst, src);
  2380 void Assembler::subl(Register dst, Register src) {
  2381   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2382   emit_arith(0x2B, 0xC0, dst, src);
  2385 void Assembler::subq(Address dst, int imm32) {
  2386   InstructionMark im(this);
  2387   prefixq(dst);
  2388   if (is8bit(imm32)) {
  2389     emit_byte(0x83);
  2390     emit_operand(rbp, dst, 1);
  2391     emit_byte(imm32 & 0xFF);
  2392   } else {
  2393     emit_byte(0x81);
  2394     emit_operand(rbp, dst, 4);
  2395     emit_long(imm32);
  2399 void Assembler::subq(Register dst, int imm32) {
  2400   (void) prefixq_and_encode(dst->encoding());
  2401   emit_arith(0x81, 0xE8, dst, imm32);
  2404 void Assembler::subq(Address dst, Register src) {
  2405   InstructionMark im(this);
  2406   prefixq(dst, src);
  2407   emit_byte(0x29);
  2408   emit_operand(src, dst);
  2411 void Assembler::subq(Register dst, Address src) {
  2412   InstructionMark im(this);
  2413   prefixq(src, dst);
  2414   emit_byte(0x2B);
  2415   emit_operand(dst, src);
  2418 void Assembler::subq(Register dst, Register src) {
  2419   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  2420   emit_arith(0x2B, 0xC0, dst, src);
  2423 void Assembler::testb(Register dst, int imm8) {
  2424   (void) prefix_and_encode(dst->encoding(), true);
  2425   emit_arith_b(0xF6, 0xC0, dst, imm8);
  2428 void Assembler::testl(Register dst, int imm32) {
  2429   // not using emit_arith because test
  2430   // doesn't support sign-extension of
  2431   // 8bit operands
  2432   int encode = dst->encoding();
  2433   if (encode == 0) {
  2434     emit_byte(0xA9);
  2435   } else {
  2436     encode = prefix_and_encode(encode);
  2437     emit_byte(0xF7);
  2438     emit_byte(0xC0 | encode);
  2440   emit_long(imm32);
  2443 void Assembler::testl(Register dst, Register src) {
  2444   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2445   emit_arith(0x85, 0xC0, dst, src);
  2448 void Assembler::testq(Register dst, int imm32) {
  2449   // not using emit_arith because test
  2450   // doesn't support sign-extension of
  2451   // 8bit operands
  2452   int encode = dst->encoding();
  2453   if (encode == 0) {
  2454     prefix(REX_W);
  2455     emit_byte(0xA9);
  2456   } else {
  2457     encode = prefixq_and_encode(encode);
  2458     emit_byte(0xF7);
  2459     emit_byte(0xC0 | encode);
  2461   emit_long(imm32);
  2464 void Assembler::testq(Register dst, Register src) {
  2465   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  2466   emit_arith(0x85, 0xC0, dst, src);
  2469 void Assembler::xaddl(Address dst, Register src) {
  2470   InstructionMark im(this);
  2471   prefix(dst, src);
  2472   emit_byte(0x0F);
  2473   emit_byte(0xC1);
  2474   emit_operand(src, dst);
  2477 void Assembler::xaddq(Address dst, Register src) {
  2478   InstructionMark im(this);
  2479   prefixq(dst, src);
  2480   emit_byte(0x0F);
  2481   emit_byte(0xC1);
  2482   emit_operand(src, dst);
  2485 void Assembler::xorl(Register dst, int imm32) {
  2486   prefix(dst);
  2487   emit_arith(0x81, 0xF0, dst, imm32);
  2490 void Assembler::xorl(Register dst, Register src) {
  2491   (void) prefix_and_encode(dst->encoding(), src->encoding());
  2492   emit_arith(0x33, 0xC0, dst, src);
  2495 void Assembler::xorl(Register dst, Address src) {
  2496   InstructionMark im(this);
  2497   prefix(src, dst);
  2498   emit_byte(0x33);
  2499   emit_operand(dst, src);
  2502 void Assembler::xorq(Register dst, int imm32) {
  2503   (void) prefixq_and_encode(dst->encoding());
  2504   emit_arith(0x81, 0xF0, dst, imm32);
  2507 void Assembler::xorq(Register dst, Register src) {
  2508   (void) prefixq_and_encode(dst->encoding(), src->encoding());
  2509   emit_arith(0x33, 0xC0, dst, src);
  2512 void Assembler::xorq(Register dst, Address src) {
  2513   InstructionMark im(this);
  2514   prefixq(src, dst);
  2515   emit_byte(0x33);
  2516   emit_operand(dst, src);
  2519 void Assembler::bswapl(Register reg) {
  2520   int encode = prefix_and_encode(reg->encoding());
  2521   emit_byte(0x0F);
  2522   emit_byte(0xC8 | encode);
  2525 void Assembler::bswapq(Register reg) {
  2526   int encode = prefixq_and_encode(reg->encoding());
  2527   emit_byte(0x0F);
  2528   emit_byte(0xC8 | encode);
  2531 void Assembler::lock() {
  2532   emit_byte(0xF0);
  2535 void Assembler::xchgl(Register dst, Address src) {
  2536   InstructionMark im(this);
  2537   prefix(src, dst);
  2538   emit_byte(0x87);
  2539   emit_operand(dst, src);
  2542 void Assembler::xchgl(Register dst, Register src) {
  2543   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  2544   emit_byte(0x87);
  2545   emit_byte(0xc0 | encode);
  2548 void Assembler::xchgq(Register dst, Address src) {
  2549   InstructionMark im(this);
  2550   prefixq(src, dst);
  2551   emit_byte(0x87);
  2552   emit_operand(dst, src);
  2555 void Assembler::xchgq(Register dst, Register src) {
  2556   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  2557   emit_byte(0x87);
  2558   emit_byte(0xc0 | encode);
  2561 void Assembler::cmpxchgl(Register reg, Address adr) {
  2562   InstructionMark im(this);
  2563   prefix(adr, reg);
  2564   emit_byte(0x0F);
  2565   emit_byte(0xB1);
  2566   emit_operand(reg, adr);
  2569 void Assembler::cmpxchgq(Register reg, Address adr) {
  2570   InstructionMark im(this);
  2571   prefixq(adr, reg);
  2572   emit_byte(0x0F);
  2573   emit_byte(0xB1);
  2574   emit_operand(reg, adr);
  2577 void Assembler::hlt() {
  2578   emit_byte(0xF4);
  2582 void Assembler::addr_nop_4() {
  2583   // 4 bytes: NOP DWORD PTR [EAX+0]
  2584   emit_byte(0x0F);
  2585   emit_byte(0x1F);
  2586   emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
  2587   emit_byte(0);    // 8-bits offset (1 byte)
  2590 void Assembler::addr_nop_5() {
  2591   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
  2592   emit_byte(0x0F);
  2593   emit_byte(0x1F);
  2594   emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
  2595   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
  2596   emit_byte(0);    // 8-bits offset (1 byte)
  2599 void Assembler::addr_nop_7() {
  2600   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
  2601   emit_byte(0x0F);
  2602   emit_byte(0x1F);
  2603   emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
  2604   emit_long(0);    // 32-bits offset (4 bytes)
  2607 void Assembler::addr_nop_8() {
  2608   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
  2609   emit_byte(0x0F);
  2610   emit_byte(0x1F);
  2611   emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
  2612   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
  2613   emit_long(0);    // 32-bits offset (4 bytes)
  2616 void Assembler::nop(int i) {
  2617   assert(i > 0, " ");
  2618   if (UseAddressNop && VM_Version::is_intel()) {
  2619     //
  2620     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
  2621     //  1: 0x90
  2622     //  2: 0x66 0x90
  2623     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
  2624     //  4: 0x0F 0x1F 0x40 0x00
  2625     //  5: 0x0F 0x1F 0x44 0x00 0x00
  2626     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
  2627     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  2628     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2629     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2630     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2631     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2633     // The rest coding is Intel specific - don't use consecutive address nops
  2635     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  2636     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  2637     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  2638     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  2640     while(i >= 15) {
  2641       // For Intel don't generate consecutive addess nops (mix with regular nops)
  2642       i -= 15;
  2643       emit_byte(0x66);   // size prefix
  2644       emit_byte(0x66);   // size prefix
  2645       emit_byte(0x66);   // size prefix
  2646       addr_nop_8();
  2647       emit_byte(0x66);   // size prefix
  2648       emit_byte(0x66);   // size prefix
  2649       emit_byte(0x66);   // size prefix
  2650       emit_byte(0x90);   // nop
  2652     switch (i) {
  2653       case 14:
  2654         emit_byte(0x66); // size prefix
  2655       case 13:
  2656         emit_byte(0x66); // size prefix
  2657       case 12:
  2658         addr_nop_8();
  2659         emit_byte(0x66); // size prefix
  2660         emit_byte(0x66); // size prefix
  2661         emit_byte(0x66); // size prefix
  2662         emit_byte(0x90); // nop
  2663         break;
  2664       case 11:
  2665         emit_byte(0x66); // size prefix
  2666       case 10:
  2667         emit_byte(0x66); // size prefix
  2668       case 9:
  2669         emit_byte(0x66); // size prefix
  2670       case 8:
  2671         addr_nop_8();
  2672         break;
  2673       case 7:
  2674         addr_nop_7();
  2675         break;
  2676       case 6:
  2677         emit_byte(0x66); // size prefix
  2678       case 5:
  2679         addr_nop_5();
  2680         break;
  2681       case 4:
  2682         addr_nop_4();
  2683         break;
  2684       case 3:
  2685         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
  2686         emit_byte(0x66); // size prefix
  2687       case 2:
  2688         emit_byte(0x66); // size prefix
  2689       case 1:
  2690         emit_byte(0x90); // nop
  2691         break;
  2692       default:
  2693         assert(i == 0, " ");
  2695     return;
  2697   if (UseAddressNop && VM_Version::is_amd()) {
  2698     //
  2699     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
  2700     //  1: 0x90
  2701     //  2: 0x66 0x90
  2702     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
  2703     //  4: 0x0F 0x1F 0x40 0x00
  2704     //  5: 0x0F 0x1F 0x44 0x00 0x00
  2705     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
  2706     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  2707     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2708     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2709     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2710     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2712     // The rest coding is AMD specific - use consecutive address nops
  2714     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
  2715     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
  2716     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  2717     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
  2718     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  2719     //     Size prefixes (0x66) are added for larger sizes
  2721     while(i >= 22) {
  2722       i -= 11;
  2723       emit_byte(0x66); // size prefix
  2724       emit_byte(0x66); // size prefix
  2725       emit_byte(0x66); // size prefix
  2726       addr_nop_8();
  2728     // Generate first nop for size between 21-12
  2729     switch (i) {
  2730       case 21:
  2731         i -= 1;
  2732         emit_byte(0x66); // size prefix
  2733       case 20:
  2734       case 19:
  2735         i -= 1;
  2736         emit_byte(0x66); // size prefix
  2737       case 18:
  2738       case 17:
  2739         i -= 1;
  2740         emit_byte(0x66); // size prefix
  2741       case 16:
  2742       case 15:
  2743         i -= 8;
  2744         addr_nop_8();
  2745         break;
  2746       case 14:
  2747       case 13:
  2748         i -= 7;
  2749         addr_nop_7();
  2750         break;
  2751       case 12:
  2752         i -= 6;
  2753         emit_byte(0x66); // size prefix
  2754         addr_nop_5();
  2755         break;
  2756       default:
  2757         assert(i < 12, " ");
  2760     // Generate second nop for size between 11-1
  2761     switch (i) {
  2762       case 11:
  2763         emit_byte(0x66); // size prefix
  2764       case 10:
  2765         emit_byte(0x66); // size prefix
  2766       case 9:
  2767         emit_byte(0x66); // size prefix
  2768       case 8:
  2769         addr_nop_8();
  2770         break;
  2771       case 7:
  2772         addr_nop_7();
  2773         break;
  2774       case 6:
  2775         emit_byte(0x66); // size prefix
  2776       case 5:
  2777         addr_nop_5();
  2778         break;
  2779       case 4:
  2780         addr_nop_4();
  2781         break;
  2782       case 3:
  2783         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
  2784         emit_byte(0x66); // size prefix
  2785       case 2:
  2786         emit_byte(0x66); // size prefix
  2787       case 1:
  2788         emit_byte(0x90); // nop
  2789         break;
  2790       default:
  2791         assert(i == 0, " ");
  2793     return;
  2796   // Using nops with size prefixes "0x66 0x90".
  2797   // From AMD Optimization Guide:
  2798   //  1: 0x90
  2799   //  2: 0x66 0x90
  2800   //  3: 0x66 0x66 0x90
  2801   //  4: 0x66 0x66 0x66 0x90
  2802   //  5: 0x66 0x66 0x90 0x66 0x90
  2803   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
  2804   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
  2805   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
  2806   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
  2807   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
  2808   //
  2809   while(i > 12) {
  2810     i -= 4;
  2811     emit_byte(0x66); // size prefix
  2812     emit_byte(0x66);
  2813     emit_byte(0x66);
  2814     emit_byte(0x90); // nop
  2816   // 1 - 12 nops
  2817   if(i > 8) {
  2818     if(i > 9) {
  2819       i -= 1;
  2820       emit_byte(0x66);
  2822     i -= 3;
  2823     emit_byte(0x66);
  2824     emit_byte(0x66);
  2825     emit_byte(0x90);
  2827   // 1 - 8 nops
  2828   if(i > 4) {
  2829     if(i > 6) {
  2830       i -= 1;
  2831       emit_byte(0x66);
  2833     i -= 3;
  2834     emit_byte(0x66);
  2835     emit_byte(0x66);
  2836     emit_byte(0x90);
  2838   switch (i) {
  2839     case 4:
  2840       emit_byte(0x66);
  2841     case 3:
  2842       emit_byte(0x66);
  2843     case 2:
  2844       emit_byte(0x66);
  2845     case 1:
  2846       emit_byte(0x90);
  2847       break;
  2848     default:
  2849       assert(i == 0, " ");
  2853 void Assembler::ret(int imm16) {
  2854   if (imm16 == 0) {
  2855     emit_byte(0xC3);
  2856   } else {
  2857     emit_byte(0xC2);
  2858     emit_word(imm16);
  2862 // copies a single word from [esi] to [edi]
  2863 void Assembler::smovl() {
  2864   emit_byte(0xA5);
  2867 // copies data from [rsi] to [rdi] using rcx words (m32)
  2868 void Assembler::rep_movl() {
  2869   // REP
  2870   emit_byte(0xF3);
  2871   // MOVSL
  2872   emit_byte(0xA5);
  2875 // copies data from [rsi] to [rdi] using rcx double words (m64)
  2876 void Assembler::rep_movq() {
  2877   // REP
  2878   emit_byte(0xF3);
  2879   // MOVSQ
  2880   prefix(REX_W);
  2881   emit_byte(0xA5);
  2884 // sets rcx double words (m64) with rax value at [rdi]
  2885 void Assembler::rep_set() {
  2886   // REP
  2887   emit_byte(0xF3);
  2888   // STOSQ
  2889   prefix(REX_W);
  2890   emit_byte(0xAB);
  2893 // scans rcx double words (m64) at [rdi] for occurance of rax
  2894 void Assembler::repne_scan() {
  2895   // REPNE/REPNZ
  2896   emit_byte(0xF2);
  2897   // SCASQ
  2898   prefix(REX_W);
  2899   emit_byte(0xAF);
  2902 void Assembler::setb(Condition cc, Register dst) {
  2903   assert(0 <= cc && cc < 16, "illegal cc");
  2904   int encode = prefix_and_encode(dst->encoding(), true);
  2905   emit_byte(0x0F);
  2906   emit_byte(0x90 | cc);
  2907   emit_byte(0xC0 | encode);
  2910 void Assembler::clflush(Address adr) {
  2911   prefix(adr);
  2912   emit_byte(0x0F);
  2913   emit_byte(0xAE);
  2914   emit_operand(rdi, adr);
  2917 void Assembler::call(Label& L, relocInfo::relocType rtype) {
  2918   if (L.is_bound()) {
  2919     const int long_size = 5;
  2920     int offs = (int)( target(L) - pc() );
  2921     assert(offs <= 0, "assembler error");
  2922     InstructionMark im(this);
  2923     // 1110 1000 #32-bit disp
  2924     emit_byte(0xE8);
  2925     emit_data(offs - long_size, rtype, disp32_operand);
  2926   } else {
  2927     InstructionMark im(this);
  2928     // 1110 1000 #32-bit disp
  2929     L.add_patch_at(code(), locator());
  2931     emit_byte(0xE8);
  2932     emit_data(int(0), rtype, disp32_operand);
  2936 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
  2937   assert(entry != NULL, "call most probably wrong");
  2938   InstructionMark im(this);
  2939   emit_byte(0xE8);
  2940   intptr_t disp = entry - (_code_pos + sizeof(int32_t));
  2941   assert(is_simm32(disp), "must be 32bit offset (call2)");
  2942   // Technically, should use call32_operand, but this format is
  2943   // implied by the fact that we're emitting a call instruction.
  2944   emit_data((int) disp, rspec, disp32_operand);
  2948 void Assembler::call(Register dst) {
  2949   // This was originally using a 32bit register encoding
  2950   // and surely we want 64bit!
  2951   // this is a 32bit encoding but in 64bit mode the default
  2952   // operand size is 64bit so there is no need for the
  2953   // wide prefix. So prefix only happens if we use the
  2954   // new registers. Much like push/pop.
  2955   int encode = prefixq_and_encode(dst->encoding());
  2956   emit_byte(0xFF);
  2957   emit_byte(0xD0 | encode);
  2960 void Assembler::call(Address adr) {
  2961   InstructionMark im(this);
  2962   prefix(adr);
  2963   emit_byte(0xFF);
  2964   emit_operand(rdx, adr);
  2967 void Assembler::jmp(Register reg) {
  2968   int encode = prefix_and_encode(reg->encoding());
  2969   emit_byte(0xFF);
  2970   emit_byte(0xE0 | encode);
  2973 void Assembler::jmp(Address adr) {
  2974   InstructionMark im(this);
  2975   prefix(adr);
  2976   emit_byte(0xFF);
  2977   emit_operand(rsp, adr);
  2980 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
  2981   InstructionMark im(this);
  2982   emit_byte(0xE9);
  2983   assert(dest != NULL, "must have a target");
  2984   intptr_t disp = dest - (_code_pos + sizeof(int32_t));
  2985   assert(is_simm32(disp), "must be 32bit offset (jmp)");
  2986   emit_data(disp, rspec.reloc(), call32_operand);
  2989 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
  2990   if (L.is_bound()) {
  2991     address entry = target(L);
  2992     assert(entry != NULL, "jmp most probably wrong");
  2993     InstructionMark im(this);
  2994     const int short_size = 2;
  2995     const int long_size = 5;
  2996     intptr_t offs = entry - _code_pos;
  2997     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
  2998       emit_byte(0xEB);
  2999       emit_byte((offs - short_size) & 0xFF);
  3000     } else {
  3001       emit_byte(0xE9);
  3002       emit_long(offs - long_size);
  3004   } else {
  3005     // By default, forward jumps are always 32-bit displacements, since
  3006     // we can't yet know where the label will be bound.  If you're sure that
  3007     // the forward jump will not run beyond 256 bytes, use jmpb to
  3008     // force an 8-bit displacement.
  3009     InstructionMark im(this);
  3010     relocate(rtype);
  3011     L.add_patch_at(code(), locator());
  3012     emit_byte(0xE9);
  3013     emit_long(0);
  3017 void Assembler::jmpb(Label& L) {
  3018   if (L.is_bound()) {
  3019     const int short_size = 2;
  3020     address entry = target(L);
  3021     assert(is8bit((entry - _code_pos) + short_size),
  3022            "Dispacement too large for a short jmp");
  3023     assert(entry != NULL, "jmp most probably wrong");
  3024     intptr_t offs = entry - _code_pos;
  3025     emit_byte(0xEB);
  3026     emit_byte((offs - short_size) & 0xFF);
  3027   } else {
  3028     InstructionMark im(this);
  3029     L.add_patch_at(code(), locator());
  3030     emit_byte(0xEB);
  3031     emit_byte(0);
  3035 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
  3036   InstructionMark im(this);
  3037   relocate(rtype);
  3038   assert((0 <= cc) && (cc < 16), "illegal cc");
  3039   if (L.is_bound()) {
  3040     address dst = target(L);
  3041     assert(dst != NULL, "jcc most probably wrong");
  3043     const int short_size = 2;
  3044     const int long_size = 6;
  3045     intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
  3046     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
  3047       // 0111 tttn #8-bit disp
  3048       emit_byte(0x70 | cc);
  3049       emit_byte((offs - short_size) & 0xFF);
  3050     } else {
  3051       // 0000 1111 1000 tttn #32-bit disp
  3052       assert(is_simm32(offs - long_size),
  3053              "must be 32bit offset (call4)");
  3054       emit_byte(0x0F);
  3055       emit_byte(0x80 | cc);
  3056       emit_long(offs - long_size);
  3058   } else {
  3059     // Note: could eliminate cond. jumps to this jump if condition
  3060     //       is the same however, seems to be rather unlikely case.
  3061     // Note: use jccb() if label to be bound is very close to get
  3062     //       an 8-bit displacement
  3063     L.add_patch_at(code(), locator());
  3064     emit_byte(0x0F);
  3065     emit_byte(0x80 | cc);
  3066     emit_long(0);
  3070 void Assembler::jccb(Condition cc, Label& L) {
  3071   if (L.is_bound()) {
  3072     const int short_size = 2;
  3073     const int long_size = 6;
  3074     address entry = target(L);
  3075     assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
  3076            "Dispacement too large for a short jmp");
  3077     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
  3078     // 0111 tttn #8-bit disp
  3079     emit_byte(0x70 | cc);
  3080     emit_byte((offs - short_size) & 0xFF);
  3081   } else {
  3082     InstructionMark im(this);
  3083     L.add_patch_at(code(), locator());
  3084     emit_byte(0x70 | cc);
  3085     emit_byte(0);
  3089 // FP instructions
  3091 void Assembler::fxsave(Address dst) {
  3092   prefixq(dst);
  3093   emit_byte(0x0F);
  3094   emit_byte(0xAE);
  3095   emit_operand(as_Register(0), dst);
  3098 void Assembler::fxrstor(Address src) {
  3099   prefixq(src);
  3100   emit_byte(0x0F);
  3101   emit_byte(0xAE);
  3102   emit_operand(as_Register(1), src);
  3105 void Assembler::ldmxcsr(Address src) {
  3106   InstructionMark im(this);
  3107   prefix(src);
  3108   emit_byte(0x0F);
  3109   emit_byte(0xAE);
  3110   emit_operand(as_Register(2), src);
  3113 void Assembler::stmxcsr(Address dst) {
  3114   InstructionMark im(this);
  3115   prefix(dst);
  3116   emit_byte(0x0F);
  3117   emit_byte(0xAE);
  3118   emit_operand(as_Register(3), dst);
  3121 void Assembler::addss(XMMRegister dst, XMMRegister src) {
  3122   emit_byte(0xF3);
  3123   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3124   emit_byte(0x0F);
  3125   emit_byte(0x58);
  3126   emit_byte(0xC0 | encode);
  3129 void Assembler::addss(XMMRegister dst, Address src) {
  3130   InstructionMark im(this);
  3131   emit_byte(0xF3);
  3132   prefix(src, dst);
  3133   emit_byte(0x0F);
  3134   emit_byte(0x58);
  3135   emit_operand(dst, src);
  3138 void Assembler::subss(XMMRegister dst, XMMRegister src) {
  3139   emit_byte(0xF3);
  3140   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3141   emit_byte(0x0F);
  3142   emit_byte(0x5C);
  3143   emit_byte(0xC0 | encode);
  3146 void Assembler::subss(XMMRegister dst, Address src) {
  3147   InstructionMark im(this);
  3148   emit_byte(0xF3);
  3149   prefix(src, dst);
  3150   emit_byte(0x0F);
  3151   emit_byte(0x5C);
  3152   emit_operand(dst, src);
  3155 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
  3156   emit_byte(0xF3);
  3157   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3158   emit_byte(0x0F);
  3159   emit_byte(0x59);
  3160   emit_byte(0xC0 | encode);
  3163 void Assembler::mulss(XMMRegister dst, Address src) {
  3164   InstructionMark im(this);
  3165   emit_byte(0xF3);
  3166   prefix(src, dst);
  3167   emit_byte(0x0F);
  3168   emit_byte(0x59);
  3169   emit_operand(dst, src);
  3172 void Assembler::divss(XMMRegister dst, XMMRegister src) {
  3173   emit_byte(0xF3);
  3174   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3175   emit_byte(0x0F);
  3176   emit_byte(0x5E);
  3177   emit_byte(0xC0 | encode);
  3180 void Assembler::divss(XMMRegister dst, Address src) {
  3181   InstructionMark im(this);
  3182   emit_byte(0xF3);
  3183   prefix(src, dst);
  3184   emit_byte(0x0F);
  3185   emit_byte(0x5E);
  3186   emit_operand(dst, src);
  3189 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
  3190   emit_byte(0xF2);
  3191   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3192   emit_byte(0x0F);
  3193   emit_byte(0x58);
  3194   emit_byte(0xC0 | encode);
  3197 void Assembler::addsd(XMMRegister dst, Address src) {
  3198   InstructionMark im(this);
  3199   emit_byte(0xF2);
  3200   prefix(src, dst);
  3201   emit_byte(0x0F);
  3202   emit_byte(0x58);
  3203   emit_operand(dst, src);
  3206 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
  3207   emit_byte(0xF2);
  3208   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3209   emit_byte(0x0F);
  3210   emit_byte(0x5C);
  3211   emit_byte(0xC0 | encode);
  3214 void Assembler::subsd(XMMRegister dst, Address src) {
  3215   InstructionMark im(this);
  3216   emit_byte(0xF2);
  3217   prefix(src, dst);
  3218   emit_byte(0x0F);
  3219   emit_byte(0x5C);
  3220   emit_operand(dst, src);
  3223 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
  3224   emit_byte(0xF2);
  3225   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3226   emit_byte(0x0F);
  3227   emit_byte(0x59);
  3228   emit_byte(0xC0 | encode);
  3231 void Assembler::mulsd(XMMRegister dst, Address src) {
  3232   InstructionMark im(this);
  3233   emit_byte(0xF2);
  3234   prefix(src, dst);
  3235   emit_byte(0x0F);
  3236   emit_byte(0x59);
  3237   emit_operand(dst, src);
  3240 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
  3241   emit_byte(0xF2);
  3242   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3243   emit_byte(0x0F);
  3244   emit_byte(0x5E);
  3245   emit_byte(0xC0 | encode);
  3248 void Assembler::divsd(XMMRegister dst, Address src) {
  3249   InstructionMark im(this);
  3250   emit_byte(0xF2);
  3251   prefix(src, dst);
  3252   emit_byte(0x0F);
  3253   emit_byte(0x5E);
  3254   emit_operand(dst, src);
  3257 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
  3258   emit_byte(0xF2);
  3259   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3260   emit_byte(0x0F);
  3261   emit_byte(0x51);
  3262   emit_byte(0xC0 | encode);
  3265 void Assembler::sqrtsd(XMMRegister dst, Address src) {
  3266   InstructionMark im(this);
  3267   emit_byte(0xF2);
  3268   prefix(src, dst);
  3269   emit_byte(0x0F);
  3270   emit_byte(0x51);
  3271   emit_operand(dst, src);
  3274 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
  3275   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3276   emit_byte(0x0F);
  3277   emit_byte(0x57);
  3278   emit_byte(0xC0 | encode);
  3281 void Assembler::xorps(XMMRegister dst, Address src) {
  3282   InstructionMark im(this);
  3283   prefix(src, dst);
  3284   emit_byte(0x0F);
  3285   emit_byte(0x57);
  3286   emit_operand(dst, src);
  3289 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
  3290   emit_byte(0x66);
  3291   xorps(dst, src);
  3294 void Assembler::xorpd(XMMRegister dst, Address src) {
  3295   InstructionMark im(this);
  3296   emit_byte(0x66);
  3297   prefix(src, dst);
  3298   emit_byte(0x0F);
  3299   emit_byte(0x57);
  3300   emit_operand(dst, src);
  3303 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
  3304   emit_byte(0xF3);
  3305   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3306   emit_byte(0x0F);
  3307   emit_byte(0x2A);
  3308   emit_byte(0xC0 | encode);
  3311 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
  3312   emit_byte(0xF3);
  3313   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3314   emit_byte(0x0F);
  3315   emit_byte(0x2A);
  3316   emit_byte(0xC0 | encode);
  3319 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
  3320   emit_byte(0xF2);
  3321   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3322   emit_byte(0x0F);
  3323   emit_byte(0x2A);
  3324   emit_byte(0xC0 | encode);
  3327 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
  3328   emit_byte(0xF2);
  3329   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3330   emit_byte(0x0F);
  3331   emit_byte(0x2A);
  3332   emit_byte(0xC0 | encode);
  3335 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
  3336   emit_byte(0xF3);
  3337   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3338   emit_byte(0x0F);
  3339   emit_byte(0x2C);
  3340   emit_byte(0xC0 | encode);
  3343 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
  3344   emit_byte(0xF3);
  3345   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3346   emit_byte(0x0F);
  3347   emit_byte(0x2C);
  3348   emit_byte(0xC0 | encode);
  3351 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
  3352   emit_byte(0xF2);
  3353   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3354   emit_byte(0x0F);
  3355   emit_byte(0x2C);
  3356   emit_byte(0xC0 | encode);
  3359 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
  3360   emit_byte(0xF2);
  3361   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  3362   emit_byte(0x0F);
  3363   emit_byte(0x2C);
  3364   emit_byte(0xC0 | encode);
  3367 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
  3368   emit_byte(0xF3);
  3369   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3370   emit_byte(0x0F);
  3371   emit_byte(0x5A);
  3372   emit_byte(0xC0 | encode);
  3375 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
  3376   emit_byte(0xF3);
  3377   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3378   emit_byte(0x0F);
  3379   emit_byte(0xE6);
  3380   emit_byte(0xC0 | encode);
  3383 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
  3384   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3385   emit_byte(0x0F);
  3386   emit_byte(0x5B);
  3387   emit_byte(0xC0 | encode);
  3390 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
  3391   emit_byte(0xF2);
  3392   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3393   emit_byte(0x0F);
  3394   emit_byte(0x5A);
  3395   emit_byte(0xC0 | encode);
  3398 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
  3399   emit_byte(0x66);
  3400   int encode = prefix_and_encode(dst->encoding(), src->encoding());
  3401   emit_byte(0x0F);
  3402   emit_byte(0x60);
  3403   emit_byte(0xC0 | encode);
  3406 // Implementation of MacroAssembler
  3408 // On 32 bit it returns a vanilla displacement on 64 bit is a rip relative displacement
  3409 Address MacroAssembler::as_Address(AddressLiteral adr) {
  3410   assert(!adr.is_lval(), "must be rval");
  3411   assert(reachable(adr), "must be");
  3412   return Address((int)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
  3415 Address MacroAssembler::as_Address(ArrayAddress adr) {
  3416 #ifdef _LP64
  3417   AddressLiteral base = adr.base();
  3418   lea(rscratch1, base);
  3419   Address index = adr.index();
  3420   assert(index._disp == 0, "must not have disp"); // maybe it can?
  3421   Address array(rscratch1, index._index, index._scale, index._disp);
  3422   return array;
  3423 #else
  3424   return Address::make_array(adr);
  3425 #endif // _LP64
  3429 void MacroAssembler::fat_nop() {
  3430   // A 5 byte nop that is safe for patching (see patch_verified_entry)
  3431   // Recommened sequence from 'Software Optimization Guide for the AMD
  3432   // Hammer Processor'
  3433   emit_byte(0x66);
  3434   emit_byte(0x66);
  3435   emit_byte(0x90);
  3436   emit_byte(0x66);
  3437   emit_byte(0x90);
  3440 static Assembler::Condition reverse[] = {
  3441     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  3442     Assembler::overflow       /* noOverflow    = 0x1 */ ,
  3443     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
  3444     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
  3445     Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
  3446     Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
  3447     Assembler::above          /* belowEqual    = 0x6 */ ,
  3448     Assembler::belowEqual     /* above         = 0x7 */ ,
  3449     Assembler::positive       /* negative      = 0x8 */ ,
  3450     Assembler::negative       /* positive      = 0x9 */ ,
  3451     Assembler::noParity       /* parity        = 0xa */ ,
  3452     Assembler::parity         /* noParity      = 0xb */ ,
  3453     Assembler::greaterEqual   /* less          = 0xc */ ,
  3454     Assembler::less           /* greaterEqual  = 0xd */ ,
  3455     Assembler::greater        /* lessEqual     = 0xe */ ,
  3456     Assembler::lessEqual      /* greater       = 0xf, */
  3458 };
  3460 // 32bit can do a case table jump in one instruction but we no longer allow the base
  3461 // to be installed in the Address class
  3462 void MacroAssembler::jump(ArrayAddress entry) {
  3463 #ifdef _LP64
  3464   lea(rscratch1, entry.base());
  3465   Address dispatch = entry.index();
  3466   assert(dispatch._base == noreg, "must be");
  3467   dispatch._base = rscratch1;
  3468   jmp(dispatch);
  3469 #else
  3470   jmp(as_Address(entry));
  3471 #endif // _LP64
  3474 void MacroAssembler::jump(AddressLiteral dst) {
  3475   if (reachable(dst)) {
  3476     jmp_literal(dst.target(), dst.rspec());
  3477   } else {
  3478     lea(rscratch1, dst);
  3479     jmp(rscratch1);
  3483 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
  3484   if (reachable(dst)) {
  3485     InstructionMark im(this);
  3486     relocate(dst.reloc());
  3487     const int short_size = 2;
  3488     const int long_size = 6;
  3489     int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
  3490     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
  3491       // 0111 tttn #8-bit disp
  3492       emit_byte(0x70 | cc);
  3493       emit_byte((offs - short_size) & 0xFF);
  3494     } else {
  3495       // 0000 1111 1000 tttn #32-bit disp
  3496       emit_byte(0x0F);
  3497       emit_byte(0x80 | cc);
  3498       emit_long(offs - long_size);
  3500   } else {
  3501 #ifdef ASSERT
  3502     warning("reversing conditional branch");
  3503 #endif /* ASSERT */
  3504     Label skip;
  3505     jccb(reverse[cc], skip);
  3506     lea(rscratch1, dst);
  3507     Assembler::jmp(rscratch1);
  3508     bind(skip);
  3512 // Wouldn't need if AddressLiteral version had new name
  3513 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
  3514   Assembler::call(L, rtype);
  3517 // Wouldn't need if AddressLiteral version had new name
  3518 void MacroAssembler::call(Register entry) {
  3519   Assembler::call(entry);
  3522 void MacroAssembler::call(AddressLiteral entry) {
  3523   if (reachable(entry)) {
  3524     Assembler::call_literal(entry.target(), entry.rspec());
  3525   } else {
  3526     lea(rscratch1, entry);
  3527     Assembler::call(rscratch1);
  3531 void MacroAssembler::cmp8(AddressLiteral src1, int8_t src2) {
  3532   if (reachable(src1)) {
  3533     cmpb(as_Address(src1), src2);
  3534   } else {
  3535     lea(rscratch1, src1);
  3536     cmpb(Address(rscratch1, 0), src2);
  3540 void MacroAssembler::cmp32(AddressLiteral src1, int32_t src2) {
  3541   if (reachable(src1)) {
  3542     cmpl(as_Address(src1), src2);
  3543   } else {
  3544     lea(rscratch1, src1);
  3545     cmpl(Address(rscratch1, 0), src2);
  3549 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
  3550   if (reachable(src2)) {
  3551     cmpl(src1, as_Address(src2));
  3552   } else {
  3553     lea(rscratch1, src2);
  3554     cmpl(src1, Address(rscratch1, 0));
  3558 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
  3559 #ifdef _LP64
  3560   if (src2.is_lval()) {
  3561     movptr(rscratch1, src2);
  3562     Assembler::cmpq(src1, rscratch1);
  3563   } else if (reachable(src2)) {
  3564     cmpq(src1, as_Address(src2));
  3565   } else {
  3566     lea(rscratch1, src2);
  3567     Assembler::cmpq(src1, Address(rscratch1, 0));
  3569 #else
  3570   if (src2.is_lval()) {
  3571     cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
  3572   } else {
  3573     cmpl(src1, as_Address(src2));
  3575 #endif // _LP64
  3578 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
  3579   assert(src2.is_lval(), "not a mem-mem compare");
  3580 #ifdef _LP64
  3581   // moves src2's literal address
  3582   movptr(rscratch1, src2);
  3583   Assembler::cmpq(src1, rscratch1);
  3584 #else
  3585   cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
  3586 #endif // _LP64
  3589 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
  3590   assert(!src2.is_lval(), "should use cmpptr");
  3592   if (reachable(src2)) {
  3593 #ifdef _LP64
  3594     cmpq(src1, as_Address(src2));
  3595 #else
  3596     ShouldNotReachHere();
  3597 #endif // _LP64
  3598   } else {
  3599     lea(rscratch1, src2);
  3600     Assembler::cmpq(src1, Address(rscratch1, 0));
  3604 void MacroAssembler::cmpxchgptr(Register reg, AddressLiteral adr) {
  3605   if (reachable(adr)) {
  3606 #ifdef _LP64
  3607     cmpxchgq(reg, as_Address(adr));
  3608 #else
  3609     cmpxchgl(reg, as_Address(adr));
  3610 #endif // _LP64
  3611   } else {
  3612     lea(rscratch1, adr);
  3613     cmpxchgq(reg, Address(rscratch1, 0));
  3617 void MacroAssembler::incrementl(AddressLiteral dst) {
  3618   if (reachable(dst)) {
  3619     incrementl(as_Address(dst));
  3620   } else {
  3621     lea(rscratch1, dst);
  3622     incrementl(Address(rscratch1, 0));
  3626 void MacroAssembler::incrementl(ArrayAddress dst) {
  3627   incrementl(as_Address(dst));
  3630 void MacroAssembler::lea(Register dst, Address src) {
  3631 #ifdef _LP64
  3632   leaq(dst, src);
  3633 #else
  3634   leal(dst, src);
  3635 #endif // _LP64
  3638 void MacroAssembler::lea(Register dst, AddressLiteral src) {
  3639 #ifdef _LP64
  3640     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
  3641 #else
  3642     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
  3643 #endif // _LP64
  3646 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
  3647   if (reachable(dst)) {
  3648     movl(as_Address(dst), src);
  3649   } else {
  3650     lea(rscratch1, dst);
  3651     movl(Address(rscratch1, 0), src);
  3655 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
  3656   if (reachable(src)) {
  3657     movl(dst, as_Address(src));
  3658   } else {
  3659     lea(rscratch1, src);
  3660     movl(dst, Address(rscratch1, 0));
  3664 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
  3665   if (reachable(src)) {
  3666     if (UseXmmLoadAndClearUpper) {
  3667       movsd (dst, as_Address(src));
  3668     } else {
  3669       movlpd(dst, as_Address(src));
  3671   } else {
  3672     lea(rscratch1, src);
  3673     if (UseXmmLoadAndClearUpper) {
  3674       movsd (dst, Address(rscratch1, 0));
  3675     } else {
  3676       movlpd(dst, Address(rscratch1, 0));
  3681 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
  3682   if (reachable(src)) {
  3683     movss(dst, as_Address(src));
  3684   } else {
  3685     lea(rscratch1, src);
  3686     movss(dst, Address(rscratch1, 0));
  3690 void MacroAssembler::movoop(Register dst, jobject obj) {
  3691   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
  3694 void MacroAssembler::movoop(Address dst, jobject obj) {
  3695   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
  3696   movq(dst, rscratch1);
  3699 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
  3700 #ifdef _LP64
  3701   if (src.is_lval()) {
  3702     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
  3703   } else {
  3704     if (reachable(src)) {
  3705       movq(dst, as_Address(src));
  3706     } else {
  3707       lea(rscratch1, src);
  3708       movq(dst, Address(rscratch1,0));
  3711 #else
  3712   if (src.is_lval()) {
  3713     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
  3714   } else {
  3715     movl(dst, as_Address(src));
  3717 #endif // LP64
  3720 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
  3721 #ifdef _LP64
  3722   movq(as_Address(dst), src);
  3723 #else
  3724   movl(as_Address(dst), src);
  3725 #endif // _LP64
  3728 void MacroAssembler::pushoop(jobject obj) {
  3729 #ifdef _LP64
  3730   movoop(rscratch1, obj);
  3731   pushq(rscratch1);
  3732 #else
  3733   push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
  3734 #endif // _LP64
  3737 void MacroAssembler::pushptr(AddressLiteral src) {
  3738 #ifdef _LP64
  3739   lea(rscratch1, src);
  3740   if (src.is_lval()) {
  3741     pushq(rscratch1);
  3742   } else {
  3743     pushq(Address(rscratch1, 0));
  3745 #else
  3746   if (src.is_lval()) {
  3747     push_literal((int32_t)src.target(), src.rspec());
  3748   else {
  3749     pushl(as_Address(src));
  3751 #endif // _LP64
  3754 void MacroAssembler::ldmxcsr(AddressLiteral src) {
  3755   if (reachable(src)) {
  3756     Assembler::ldmxcsr(as_Address(src));
  3757   } else {
  3758     lea(rscratch1, src);
  3759     Assembler::ldmxcsr(Address(rscratch1, 0));
  3763 void MacroAssembler::movlpd(XMMRegister dst, AddressLiteral src) {
  3764   if (reachable(src)) {
  3765     movlpd(dst, as_Address(src));
  3766   } else {
  3767     lea(rscratch1, src);
  3768     movlpd(dst, Address(rscratch1, 0));
  3772 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
  3773   if (reachable(src)) {
  3774     movss(dst, as_Address(src));
  3775   } else {
  3776     lea(rscratch1, src);
  3777     movss(dst, Address(rscratch1, 0));
  3780 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
  3781   if (reachable(src)) {
  3782     xorpd(dst, as_Address(src));
  3783   } else {
  3784     lea(rscratch1, src);
  3785     xorpd(dst, Address(rscratch1, 0));
  3789 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
  3790   if (reachable(src)) {
  3791     xorps(dst, as_Address(src));
  3792   } else {
  3793     lea(rscratch1, src);
  3794     xorps(dst, Address(rscratch1, 0));
  3798 void MacroAssembler::null_check(Register reg, int offset) {
  3799   if (needs_explicit_null_check(offset)) {
  3800     // provoke OS NULL exception if reg = NULL by
  3801     // accessing M[reg] w/o changing any (non-CC) registers
  3802     cmpq(rax, Address(reg, 0));
  3803     // Note: should probably use testl(rax, Address(reg, 0));
  3804     //       may be shorter code (however, this version of
  3805     //       testl needs to be implemented first)
  3806   } else {
  3807     // nothing to do, (later) access of M[reg + offset]
  3808     // will provoke OS NULL exception if reg = NULL
  3812 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
  3813   int off = offset();
  3814   movzbl(dst, src);
  3815   return off;
  3818 int MacroAssembler::load_unsigned_word(Register dst, Address src) {
  3819   int off = offset();
  3820   movzwl(dst, src);
  3821   return off;
  3824 int MacroAssembler::load_signed_byte(Register dst, Address src) {
  3825   int off = offset();
  3826   movsbl(dst, src);
  3827   return off;
  3830 int MacroAssembler::load_signed_word(Register dst, Address src) {
  3831   int off = offset();
  3832   movswl(dst, src);
  3833   return off;
  3836 void MacroAssembler::incrementl(Register reg, int value) {
  3837   if (value == min_jint) { addl(reg, value); return; }
  3838   if (value <  0) { decrementl(reg, -value); return; }
  3839   if (value == 0) {                        ; return; }
  3840   if (value == 1 && UseIncDec) { incl(reg) ; return; }
  3841   /* else */      { addl(reg, value)       ; return; }
  3844 void MacroAssembler::decrementl(Register reg, int value) {
  3845   if (value == min_jint) { subl(reg, value); return; }
  3846   if (value <  0) { incrementl(reg, -value); return; }
  3847   if (value == 0) {                        ; return; }
  3848   if (value == 1 && UseIncDec) { decl(reg) ; return; }
  3849   /* else */      { subl(reg, value)       ; return; }
  3852 void MacroAssembler::incrementq(Register reg, int value) {
  3853   if (value == min_jint) { addq(reg, value); return; }
  3854   if (value <  0) { decrementq(reg, -value); return; }
  3855   if (value == 0) {                        ; return; }
  3856   if (value == 1 && UseIncDec) { incq(reg) ; return; }
  3857   /* else */      { addq(reg, value)       ; return; }
  3860 void MacroAssembler::decrementq(Register reg, int value) {
  3861   if (value == min_jint) { subq(reg, value); return; }
  3862   if (value <  0) { incrementq(reg, -value); return; }
  3863   if (value == 0) {                        ; return; }
  3864   if (value == 1 && UseIncDec) { decq(reg) ; return; }
  3865   /* else */      { subq(reg, value)       ; return; }
  3868 void MacroAssembler::incrementl(Address dst, int value) {
  3869   if (value == min_jint) { addl(dst, value); return; }
  3870   if (value <  0) { decrementl(dst, -value); return; }
  3871   if (value == 0) {                        ; return; }
  3872   if (value == 1 && UseIncDec) { incl(dst) ; return; }
  3873   /* else */      { addl(dst, value)       ; return; }
  3876 void MacroAssembler::decrementl(Address dst, int value) {
  3877   if (value == min_jint) { subl(dst, value); return; }
  3878   if (value <  0) { incrementl(dst, -value); return; }
  3879   if (value == 0) {                        ; return; }
  3880   if (value == 1 && UseIncDec) { decl(dst) ; return; }
  3881   /* else */      { subl(dst, value)       ; return; }
  3884 void MacroAssembler::incrementq(Address dst, int value) {
  3885   if (value == min_jint) { addq(dst, value); return; }
  3886   if (value <  0) { decrementq(dst, -value); return; }
  3887   if (value == 0) {                        ; return; }
  3888   if (value == 1 && UseIncDec) { incq(dst) ; return; }
  3889   /* else */      { addq(dst, value)       ; return; }
  3892 void MacroAssembler::decrementq(Address dst, int value) {
  3893   if (value == min_jint) { subq(dst, value); return; }
  3894   if (value <  0) { incrementq(dst, -value); return; }
  3895   if (value == 0) {                        ; return; }
  3896   if (value == 1 && UseIncDec) { decq(dst) ; return; }
  3897   /* else */      { subq(dst, value)       ; return; }
  3900 void MacroAssembler::align(int modulus) {
  3901   if (offset() % modulus != 0) {
  3902     nop(modulus - (offset() % modulus));
  3906 void MacroAssembler::enter() {
  3907   pushq(rbp);
  3908   movq(rbp, rsp);
  3911 void MacroAssembler::leave() {
  3912   emit_byte(0xC9); // LEAVE
  3915 // C++ bool manipulation
  3917 void MacroAssembler::movbool(Register dst, Address src) {
  3918   if(sizeof(bool) == 1)
  3919     movb(dst, src);
  3920   else if(sizeof(bool) == 2)
  3921     movw(dst, src);
  3922   else if(sizeof(bool) == 4)
  3923     movl(dst, src);
  3924   else {
  3925     // unsupported
  3926     ShouldNotReachHere();
  3930 void MacroAssembler::movbool(Address dst, bool boolconst) {
  3931   if(sizeof(bool) == 1)
  3932     movb(dst, (int) boolconst);
  3933   else if(sizeof(bool) == 2)
  3934     movw(dst, (int) boolconst);
  3935   else if(sizeof(bool) == 4)
  3936     movl(dst, (int) boolconst);
  3937   else {
  3938     // unsupported
  3939     ShouldNotReachHere();
  3943 void MacroAssembler::movbool(Address dst, Register src) {
  3944   if(sizeof(bool) == 1)
  3945     movb(dst, src);
  3946   else if(sizeof(bool) == 2)
  3947     movw(dst, src);
  3948   else if(sizeof(bool) == 4)
  3949     movl(dst, src);
  3950   else {
  3951     // unsupported
  3952     ShouldNotReachHere();
  3956 void MacroAssembler::testbool(Register dst) {
  3957   if(sizeof(bool) == 1)
  3958     testb(dst, (int) 0xff);
  3959   else if(sizeof(bool) == 2) {
  3960     // need testw impl
  3961     ShouldNotReachHere();
  3962   } else if(sizeof(bool) == 4)
  3963     testl(dst, dst);
  3964   else {
  3965     // unsupported
  3966     ShouldNotReachHere();
  3970 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
  3971                                          Register last_java_fp,
  3972                                          address  last_java_pc) {
  3973   // determine last_java_sp register
  3974   if (!last_java_sp->is_valid()) {
  3975     last_java_sp = rsp;
  3978   // last_java_fp is optional
  3979   if (last_java_fp->is_valid()) {
  3980     movq(Address(r15_thread, JavaThread::last_Java_fp_offset()),
  3981          last_java_fp);
  3984   // last_java_pc is optional
  3985   if (last_java_pc != NULL) {
  3986     Address java_pc(r15_thread,
  3987                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
  3988     lea(rscratch1, InternalAddress(last_java_pc));
  3989     movq(java_pc, rscratch1);
  3992   movq(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
  3995 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
  3996                                            bool clear_pc) {
  3997   // we must set sp to zero to clear frame
  3998   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
  3999   // must clear fp, so that compiled frames are not confused; it is
  4000   // possible that we need it only for debugging
  4001   if (clear_fp) {
  4002     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
  4005   if (clear_pc) {
  4006     movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
  4011 // Implementation of call_VM versions
  4013 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
  4014   Label L, E;
  4016 #ifdef _WIN64
  4017   // Windows always allocates space for it's register args
  4018   assert(num_args <= 4, "only register arguments supported");
  4019   subq(rsp,  frame::arg_reg_save_area_bytes);
  4020 #endif
  4022   // Align stack if necessary
  4023   testl(rsp, 15);
  4024   jcc(Assembler::zero, L);
  4026   subq(rsp, 8);
  4028     call(RuntimeAddress(entry_point));
  4030   addq(rsp, 8);
  4031   jmp(E);
  4033   bind(L);
  4035     call(RuntimeAddress(entry_point));
  4038   bind(E);
  4040 #ifdef _WIN64
  4041   // restore stack pointer
  4042   addq(rsp, frame::arg_reg_save_area_bytes);
  4043 #endif
  4048 void MacroAssembler::call_VM_base(Register oop_result,
  4049                                   Register java_thread,
  4050                                   Register last_java_sp,
  4051                                   address entry_point,
  4052                                   int num_args,
  4053                                   bool check_exceptions) {
  4054   // determine last_java_sp register
  4055   if (!last_java_sp->is_valid()) {
  4056     last_java_sp = rsp;
  4059   // debugging support
  4060   assert(num_args >= 0, "cannot have negative number of arguments");
  4061   assert(r15_thread != oop_result,
  4062          "cannot use the same register for java_thread & oop_result");
  4063   assert(r15_thread != last_java_sp,
  4064          "cannot use the same register for java_thread & last_java_sp");
  4066   // set last Java frame before call
  4068   // This sets last_Java_fp which is only needed from interpreted frames
  4069   // and should really be done only from the interp_masm version before
  4070   // calling the underlying call_VM. That doesn't happen yet so we set
  4071   // last_Java_fp here even though some callers don't need it and
  4072   // also clear it below.
  4073   set_last_Java_frame(last_java_sp, rbp, NULL);
  4076     Label L, E;
  4078     // Align stack if necessary
  4079 #ifdef _WIN64
  4080     assert(num_args <= 4, "only register arguments supported");
  4081     // Windows always allocates space for it's register args
  4082     subq(rsp, frame::arg_reg_save_area_bytes);
  4083 #endif
  4084     testl(rsp, 15);
  4085     jcc(Assembler::zero, L);
  4087     subq(rsp, 8);
  4089       call(RuntimeAddress(entry_point));
  4091     addq(rsp, 8);
  4092     jmp(E);
  4095     bind(L);
  4097       call(RuntimeAddress(entry_point));
  4100     bind(E);
  4102 #ifdef _WIN64
  4103     // restore stack pointer
  4104     addq(rsp, frame::arg_reg_save_area_bytes);
  4105 #endif
  4108 #ifdef ASSERT
  4109   pushq(rax);
  4111     Label L;
  4112     get_thread(rax);
  4113     cmpq(r15_thread, rax);
  4114     jcc(Assembler::equal, L);
  4115     stop("MacroAssembler::call_VM_base: register not callee saved?");
  4116     bind(L);
  4118   popq(rax);
  4119 #endif
  4121   // reset last Java frame
  4122   // This really shouldn't have to clear fp set note above at the
  4123   // call to set_last_Java_frame
  4124   reset_last_Java_frame(true, false);
  4126   check_and_handle_popframe(noreg);
  4127   check_and_handle_earlyret(noreg);
  4129   if (check_exceptions) {
  4130     cmpq(Address(r15_thread, Thread::pending_exception_offset()), (int) NULL);
  4131     // This used to conditionally jump to forward_exception however it is
  4132     // possible if we relocate that the branch will not reach. So we must jump
  4133     // around so we can always reach
  4134     Label ok;
  4135     jcc(Assembler::equal, ok);
  4136     jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  4137     bind(ok);
  4140   // get oop result if there is one and reset the value in the thread
  4141   if (oop_result->is_valid()) {
  4142     movq(oop_result, Address(r15_thread, JavaThread::vm_result_offset()));
  4143     movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD);
  4144     verify_oop(oop_result);
  4148 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
  4149 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
  4151 void MacroAssembler::call_VM_helper(Register oop_result,
  4152                                     address entry_point,
  4153                                     int num_args,
  4154                                     bool check_exceptions) {
  4155   // Java thread becomes first argument of C function
  4156   movq(c_rarg0, r15_thread);
  4158   // We've pushed one address, correct last_Java_sp
  4159   leaq(rax, Address(rsp, wordSize));
  4161   call_VM_base(oop_result, noreg, rax, entry_point, num_args,
  4162                check_exceptions);
  4166 void MacroAssembler::call_VM(Register oop_result,
  4167                              address entry_point,
  4168                              bool check_exceptions) {
  4169   Label C, E;
  4170   Assembler::call(C, relocInfo::none);
  4171   jmp(E);
  4173   bind(C);
  4174   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
  4175   ret(0);
  4177   bind(E);
  4181 void MacroAssembler::call_VM(Register oop_result,
  4182                              address entry_point,
  4183                              Register arg_1,
  4184                              bool check_exceptions) {
  4185   assert(rax != arg_1, "smashed argument");
  4186   assert(c_rarg0 != arg_1, "smashed argument");
  4188   Label C, E;
  4189   Assembler::call(C, relocInfo::none);
  4190   jmp(E);
  4192   bind(C);
  4193   // c_rarg0 is reserved for thread
  4194   if (c_rarg1 != arg_1) {
  4195     movq(c_rarg1, arg_1);
  4197   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
  4198   ret(0);
  4200   bind(E);
  4203 void MacroAssembler::call_VM(Register oop_result,
  4204                              address entry_point,
  4205                              Register arg_1,
  4206                              Register arg_2,
  4207                              bool check_exceptions) {
  4208   assert(rax != arg_1, "smashed argument");
  4209   assert(rax != arg_2, "smashed argument");
  4210   assert(c_rarg0 != arg_1, "smashed argument");
  4211   assert(c_rarg0 != arg_2, "smashed argument");
  4212   assert(c_rarg1 != arg_2, "smashed argument");
  4213   assert(c_rarg2 != arg_1, "smashed argument");
  4215   Label C, E;
  4216   Assembler::call(C, relocInfo::none);
  4217   jmp(E);
  4219   bind(C);
  4220   // c_rarg0 is reserved for thread
  4221   if (c_rarg1 != arg_1) {
  4222     movq(c_rarg1, arg_1);
  4224   if (c_rarg2 != arg_2) {
  4225     movq(c_rarg2, arg_2);
  4227   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
  4228   ret(0);
  4230   bind(E);
  4234 void MacroAssembler::call_VM(Register oop_result,
  4235                              address entry_point,
  4236                              Register arg_1,
  4237                              Register arg_2,
  4238                              Register arg_3,
  4239                              bool check_exceptions) {
  4240   assert(rax != arg_1, "smashed argument");
  4241   assert(rax != arg_2, "smashed argument");
  4242   assert(rax != arg_3, "smashed argument");
  4243   assert(c_rarg0 != arg_1, "smashed argument");
  4244   assert(c_rarg0 != arg_2, "smashed argument");
  4245   assert(c_rarg0 != arg_3, "smashed argument");
  4246   assert(c_rarg1 != arg_2, "smashed argument");
  4247   assert(c_rarg1 != arg_3, "smashed argument");
  4248   assert(c_rarg2 != arg_1, "smashed argument");
  4249   assert(c_rarg2 != arg_3, "smashed argument");
  4250   assert(c_rarg3 != arg_1, "smashed argument");
  4251   assert(c_rarg3 != arg_2, "smashed argument");
  4253   Label C, E;
  4254   Assembler::call(C, relocInfo::none);
  4255   jmp(E);
  4257   bind(C);
  4258   // c_rarg0 is reserved for thread
  4259   if (c_rarg1 != arg_1) {
  4260     movq(c_rarg1, arg_1);
  4262   if (c_rarg2 != arg_2) {
  4263     movq(c_rarg2, arg_2);
  4265   if (c_rarg3 != arg_3) {
  4266     movq(c_rarg3, arg_3);
  4268   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
  4269   ret(0);
  4271   bind(E);
  4274 void MacroAssembler::call_VM(Register oop_result,
  4275                              Register last_java_sp,
  4276                              address entry_point,
  4277                              int num_args,
  4278                              bool check_exceptions) {
  4279   call_VM_base(oop_result, noreg, last_java_sp, entry_point, num_args,
  4280                check_exceptions);
  4283 void MacroAssembler::call_VM(Register oop_result,
  4284                              Register last_java_sp,
  4285                              address entry_point,
  4286                              Register arg_1,
  4287                              bool check_exceptions) {
  4288   assert(c_rarg0 != arg_1, "smashed argument");
  4289   assert(c_rarg1 != last_java_sp, "smashed argument");
  4290   // c_rarg0 is reserved for thread
  4291   if (c_rarg1 != arg_1) {
  4292     movq(c_rarg1, arg_1);
  4294   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
  4297 void MacroAssembler::call_VM(Register oop_result,
  4298                              Register last_java_sp,
  4299                              address entry_point,
  4300                              Register arg_1,
  4301                              Register arg_2,
  4302                              bool check_exceptions) {
  4303   assert(c_rarg0 != arg_1, "smashed argument");
  4304   assert(c_rarg0 != arg_2, "smashed argument");
  4305   assert(c_rarg1 != arg_2, "smashed argument");
  4306   assert(c_rarg1 != last_java_sp, "smashed argument");
  4307   assert(c_rarg2 != arg_1, "smashed argument");
  4308   assert(c_rarg2 != last_java_sp, "smashed argument");
  4309   // c_rarg0 is reserved for thread
  4310   if (c_rarg1 != arg_1) {
  4311     movq(c_rarg1, arg_1);
  4313   if (c_rarg2 != arg_2) {
  4314     movq(c_rarg2, arg_2);
  4316   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
  4320 void MacroAssembler::call_VM(Register oop_result,
  4321                              Register last_java_sp,
  4322                              address entry_point,
  4323                              Register arg_1,
  4324                              Register arg_2,
  4325                              Register arg_3,
  4326                              bool check_exceptions) {
  4327   assert(c_rarg0 != arg_1, "smashed argument");
  4328   assert(c_rarg0 != arg_2, "smashed argument");
  4329   assert(c_rarg0 != arg_3, "smashed argument");
  4330   assert(c_rarg1 != arg_2, "smashed argument");
  4331   assert(c_rarg1 != arg_3, "smashed argument");
  4332   assert(c_rarg1 != last_java_sp, "smashed argument");
  4333   assert(c_rarg2 != arg_1, "smashed argument");
  4334   assert(c_rarg2 != arg_3, "smashed argument");
  4335   assert(c_rarg2 != last_java_sp, "smashed argument");
  4336   assert(c_rarg3 != arg_1, "smashed argument");
  4337   assert(c_rarg3 != arg_2, "smashed argument");
  4338   assert(c_rarg3 != last_java_sp, "smashed argument");
  4339   // c_rarg0 is reserved for thread
  4340   if (c_rarg1 != arg_1) {
  4341     movq(c_rarg1, arg_1);
  4343   if (c_rarg2 != arg_2) {
  4344     movq(c_rarg2, arg_2);
  4346   if (c_rarg3 != arg_3) {
  4347     movq(c_rarg2, arg_3);
  4349   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
  4352 void MacroAssembler::call_VM_leaf(address entry_point, int num_args) {
  4353   call_VM_leaf_base(entry_point, num_args);
  4356 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
  4357   if (c_rarg0 != arg_1) {
  4358     movq(c_rarg0, arg_1);
  4360   call_VM_leaf(entry_point, 1);
  4363 void MacroAssembler::call_VM_leaf(address entry_point,
  4364                                   Register arg_1,
  4365                                   Register arg_2) {
  4366   assert(c_rarg0 != arg_2, "smashed argument");
  4367   assert(c_rarg1 != arg_1, "smashed argument");
  4368   if (c_rarg0 != arg_1) {
  4369     movq(c_rarg0, arg_1);
  4371   if (c_rarg1 != arg_2) {
  4372     movq(c_rarg1, arg_2);
  4374   call_VM_leaf(entry_point, 2);
  4377 void MacroAssembler::call_VM_leaf(address entry_point,
  4378                                   Register arg_1,
  4379                                   Register arg_2,
  4380                                   Register arg_3) {
  4381   assert(c_rarg0 != arg_2, "smashed argument");
  4382   assert(c_rarg0 != arg_3, "smashed argument");
  4383   assert(c_rarg1 != arg_1, "smashed argument");
  4384   assert(c_rarg1 != arg_3, "smashed argument");
  4385   assert(c_rarg2 != arg_1, "smashed argument");
  4386   assert(c_rarg2 != arg_2, "smashed argument");
  4387   if (c_rarg0 != arg_1) {
  4388     movq(c_rarg0, arg_1);
  4390   if (c_rarg1 != arg_2) {
  4391     movq(c_rarg1, arg_2);
  4393   if (c_rarg2 != arg_3) {
  4394     movq(c_rarg2, arg_3);
  4396   call_VM_leaf(entry_point, 3);
  4400 // Calls to C land
  4401 //
  4402 // When entering C land, the rbp & rsp of the last Java frame have to
  4403 // be recorded in the (thread-local) JavaThread object. When leaving C
  4404 // land, the last Java fp has to be reset to 0. This is required to
  4405 // allow proper stack traversal.
  4406 void MacroAssembler::store_check(Register obj) {
  4407   // Does a store check for the oop in register obj. The content of
  4408   // register obj is destroyed afterwards.
  4409   store_check_part_1(obj);
  4410   store_check_part_2(obj);
  4413 void MacroAssembler::store_check(Register obj, Address dst) {
  4414   store_check(obj);
  4417 // split the store check operation so that other instructions can be
  4418 // scheduled inbetween
  4419 void MacroAssembler::store_check_part_1(Register obj) {
  4420   BarrierSet* bs = Universe::heap()->barrier_set();
  4421   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  4422   shrq(obj, CardTableModRefBS::card_shift);
  4425 void MacroAssembler::store_check_part_2(Register obj) {
  4426   BarrierSet* bs = Universe::heap()->barrier_set();
  4427   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
  4428   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
  4429   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  4430   ExternalAddress cardtable((address)ct->byte_map_base);
  4431   Address index(noreg, obj, Address::times_1);
  4432   movb(as_Address(ArrayAddress(cardtable, index)), 0);
  4435 void MacroAssembler::c2bool(Register x) {
  4436   // implements x == 0 ? 0 : 1
  4437   // note: must only look at least-significant byte of x
  4438   //       since C-style booleans are stored in one byte
  4439   //       only! (was bug)
  4440   andl(x, 0xFF);
  4441   setb(Assembler::notZero, x);
  4444 int MacroAssembler::corrected_idivl(Register reg) {
  4445   // Full implementation of Java idiv and irem; checks for special
  4446   // case as described in JVM spec., p.243 & p.271.  The function
  4447   // returns the (pc) offset of the idivl instruction - may be needed
  4448   // for implicit exceptions.
  4449   //
  4450   //         normal case                           special case
  4451   //
  4452   // input : eax: dividend                         min_int
  4453   //         reg: divisor   (may not be eax/edx)   -1
  4454   //
  4455   // output: eax: quotient  (= eax idiv reg)       min_int
  4456   //         edx: remainder (= eax irem reg)       0
  4457   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
  4458   const int min_int = 0x80000000;
  4459   Label normal_case, special_case;
  4461   // check for special case
  4462   cmpl(rax, min_int);
  4463   jcc(Assembler::notEqual, normal_case);
  4464   xorl(rdx, rdx); // prepare edx for possible special case (where
  4465                   // remainder = 0)
  4466   cmpl(reg, -1);
  4467   jcc(Assembler::equal, special_case);
  4469   // handle normal case
  4470   bind(normal_case);
  4471   cdql();
  4472   int idivl_offset = offset();
  4473   idivl(reg);
  4475   // normal and special case exit
  4476   bind(special_case);
  4478   return idivl_offset;
  4481 int MacroAssembler::corrected_idivq(Register reg) {
  4482   // Full implementation of Java ldiv and lrem; checks for special
  4483   // case as described in JVM spec., p.243 & p.271.  The function
  4484   // returns the (pc) offset of the idivl instruction - may be needed
  4485   // for implicit exceptions.
  4486   //
  4487   //         normal case                           special case
  4488   //
  4489   // input : rax: dividend                         min_long
  4490   //         reg: divisor   (may not be eax/edx)   -1
  4491   //
  4492   // output: rax: quotient  (= rax idiv reg)       min_long
  4493   //         rdx: remainder (= rax irem reg)       0
  4494   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
  4495   static const int64_t min_long = 0x8000000000000000;
  4496   Label normal_case, special_case;
  4498   // check for special case
  4499   cmp64(rax, ExternalAddress((address) &min_long));
  4500   jcc(Assembler::notEqual, normal_case);
  4501   xorl(rdx, rdx); // prepare rdx for possible special case (where
  4502                   // remainder = 0)
  4503   cmpq(reg, -1);
  4504   jcc(Assembler::equal, special_case);
  4506   // handle normal case
  4507   bind(normal_case);
  4508   cdqq();
  4509   int idivq_offset = offset();
  4510   idivq(reg);
  4512   // normal and special case exit
  4513   bind(special_case);
  4515   return idivq_offset;
  4518 void MacroAssembler::push_IU_state() {
  4519   pushfq();     // Push flags first because pushaq kills them
  4520   subq(rsp, 8); // Make sure rsp stays 16-byte aligned
  4521   pushaq();
  4524 void MacroAssembler::pop_IU_state() {
  4525   popaq();
  4526   addq(rsp, 8);
  4527   popfq();
  4530 void MacroAssembler::push_FPU_state() {
  4531   subq(rsp, FPUStateSizeInWords * wordSize);
  4532   fxsave(Address(rsp, 0));
  4535 void MacroAssembler::pop_FPU_state() {
  4536   fxrstor(Address(rsp, 0));
  4537   addq(rsp, FPUStateSizeInWords * wordSize);
  4540 // Save Integer and Float state
  4541 // Warning: Stack must be 16 byte aligned
  4542 void MacroAssembler::push_CPU_state() {
  4543   push_IU_state();
  4544   push_FPU_state();
  4547 void MacroAssembler::pop_CPU_state() {
  4548   pop_FPU_state();
  4549   pop_IU_state();
  4552 void MacroAssembler::sign_extend_short(Register reg) {
  4553   movswl(reg, reg);
  4556 void MacroAssembler::sign_extend_byte(Register reg) {
  4557   movsbl(reg, reg);
  4560 void MacroAssembler::division_with_shift(Register reg, int shift_value) {
  4561   assert (shift_value > 0, "illegal shift value");
  4562   Label _is_positive;
  4563   testl (reg, reg);
  4564   jcc (Assembler::positive, _is_positive);
  4565   int offset = (1 << shift_value) - 1 ;
  4567   if (offset == 1) {
  4568     incrementl(reg);
  4569   } else {
  4570     addl(reg, offset);
  4573   bind (_is_positive);
  4574   sarl(reg, shift_value);
  4577 void MacroAssembler::round_to_l(Register reg, int modulus) {
  4578   addl(reg, modulus - 1);
  4579   andl(reg, -modulus);
  4582 void MacroAssembler::round_to_q(Register reg, int modulus) {
  4583   addq(reg, modulus - 1);
  4584   andq(reg, -modulus);
  4587 void MacroAssembler::verify_oop(Register reg, const char* s) {
  4588   if (!VerifyOops) {
  4589     return;
  4592   // Pass register number to verify_oop_subroutine
  4593   char* b = new char[strlen(s) + 50];
  4594   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
  4596   pushq(rax); // save rax, restored by receiver
  4598   // pass args on stack, only touch rax
  4599   pushq(reg);
  4601   // avoid using pushptr, as it modifies scratch registers
  4602   // and our contract is not to modify anything
  4603   ExternalAddress buffer((address)b);
  4604   movptr(rax, buffer.addr());
  4605   pushq(rax);
  4607   // call indirectly to solve generation ordering problem
  4608   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  4609   call(rax); // no alignment requirement
  4610   // everything popped by receiver
  4613 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
  4614   if (!VerifyOops) return;
  4615   // Pass register number to verify_oop_subroutine
  4616   char* b = new char[strlen(s) + 50];
  4617   sprintf(b, "verify_oop_addr: %s", s);
  4618   pushq(rax);                          // save rax
  4619   movq(addr, rax);
  4620   pushq(rax);                          // pass register argument
  4623   // avoid using pushptr, as it modifies scratch registers
  4624   // and our contract is not to modify anything
  4625   ExternalAddress buffer((address)b);
  4626   movptr(rax, buffer.addr());
  4627   pushq(rax);
  4629   // call indirectly to solve generation ordering problem
  4630   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  4631   call(rax); // no alignment requirement
  4632   // everything popped by receiver
  4636 void MacroAssembler::stop(const char* msg) {
  4637   address rip = pc();
  4638   pushaq(); // get regs on stack
  4639   lea(c_rarg0, ExternalAddress((address) msg));
  4640   lea(c_rarg1, InternalAddress(rip));
  4641   movq(c_rarg2, rsp); // pass pointer to regs array
  4642   andq(rsp, -16); // align stack as required by ABI
  4643   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug)));
  4644   hlt();
  4647 void MacroAssembler::warn(const char* msg) {
  4648   pushq(r12);
  4649   movq(r12, rsp);
  4650   andq(rsp, -16);     // align stack as required by push_CPU_state and call
  4652   push_CPU_state();   // keeps alignment at 16 bytes
  4653   lea(c_rarg0, ExternalAddress((address) msg));
  4654   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
  4655   pop_CPU_state();
  4657   movq(rsp, r12);
  4658   popq(r12);
  4661 void MacroAssembler::debug(char* msg, int64_t pc, int64_t regs[]) {
  4662   // In order to get locks to work, we need to fake a in_VM state
  4663   if (ShowMessageBoxOnError ) {
  4664     JavaThread* thread = JavaThread::current();
  4665     JavaThreadState saved_state = thread->thread_state();
  4666     thread->set_thread_state(_thread_in_vm);
  4667     ttyLocker ttyl;
  4668 #ifndef PRODUCT
  4669     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
  4670       BytecodeCounter::print();
  4672 #endif
  4673     // To see where a verify_oop failed, get $ebx+40/X for this frame.
  4674     // XXX correct this offset for amd64
  4675     // This is the value of eip which points to where verify_oop will return.
  4676     if (os::message_box(msg, "Execution stopped, print registers?")) {
  4677       tty->print_cr("rip = 0x%016lx", pc);
  4678       tty->print_cr("rax = 0x%016lx", regs[15]);
  4679       tty->print_cr("rbx = 0x%016lx", regs[12]);
  4680       tty->print_cr("rcx = 0x%016lx", regs[14]);
  4681       tty->print_cr("rdx = 0x%016lx", regs[13]);
  4682       tty->print_cr("rdi = 0x%016lx", regs[8]);
  4683       tty->print_cr("rsi = 0x%016lx", regs[9]);
  4684       tty->print_cr("rbp = 0x%016lx", regs[10]);
  4685       tty->print_cr("rsp = 0x%016lx", regs[11]);
  4686       tty->print_cr("r8  = 0x%016lx", regs[7]);
  4687       tty->print_cr("r9  = 0x%016lx", regs[6]);
  4688       tty->print_cr("r10 = 0x%016lx", regs[5]);
  4689       tty->print_cr("r11 = 0x%016lx", regs[4]);
  4690       tty->print_cr("r12 = 0x%016lx", regs[3]);
  4691       tty->print_cr("r13 = 0x%016lx", regs[2]);
  4692       tty->print_cr("r14 = 0x%016lx", regs[1]);
  4693       tty->print_cr("r15 = 0x%016lx", regs[0]);
  4694       BREAKPOINT;
  4696     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
  4697   } else {
  4698     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
  4699                     msg);
  4703 void MacroAssembler::os_breakpoint() {
  4704   // instead of directly emitting a breakpoint, call os:breakpoint for
  4705   // better debugability
  4706   // This shouldn't need alignment, it's an empty function
  4707   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
  4710 // Write serialization page so VM thread can do a pseudo remote membar.
  4711 // We use the current thread pointer to calculate a thread specific
  4712 // offset to write to within the page. This minimizes bus traffic
  4713 // due to cache line collision.
  4714 void MacroAssembler::serialize_memory(Register thread,
  4715                                       Register tmp) {
  4717   movl(tmp, thread);
  4718   shrl(tmp, os::get_serialize_page_shift_count());
  4719   andl(tmp, (os::vm_page_size() - sizeof(int)));
  4721   Address index(noreg, tmp, Address::times_1);
  4722   ExternalAddress page(os::get_memory_serialize_page());
  4724   movptr(ArrayAddress(page, index), tmp);
  4727 void MacroAssembler::verify_tlab() {
  4728 #ifdef ASSERT
  4729   if (UseTLAB) {
  4730     Label next, ok;
  4731     Register t1 = rsi;
  4733     pushq(t1);
  4735     movq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
  4736     cmpq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_start_offset())));
  4737     jcc(Assembler::aboveEqual, next);
  4738     stop("assert(top >= start)");
  4739     should_not_reach_here();
  4741     bind(next);
  4742     movq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
  4743     cmpq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
  4744     jcc(Assembler::aboveEqual, ok);
  4745     stop("assert(top <= end)");
  4746     should_not_reach_here();
  4748     bind(ok);
  4750     popq(t1);
  4752 #endif
  4755 // Defines obj, preserves var_size_in_bytes
  4756 void MacroAssembler::eden_allocate(Register obj,
  4757                                    Register var_size_in_bytes,
  4758                                    int con_size_in_bytes,
  4759                                    Register t1,
  4760                                    Label& slow_case) {
  4761   assert(obj == rax, "obj must be in rax for cmpxchg");
  4762   assert_different_registers(obj, var_size_in_bytes, t1);
  4763   Register end = t1;
  4764   Label retry;
  4765   bind(retry);
  4766   ExternalAddress heap_top((address) Universe::heap()->top_addr());
  4767   movptr(obj, heap_top);
  4768   if (var_size_in_bytes == noreg) {
  4769     leaq(end, Address(obj, con_size_in_bytes));
  4770   } else {
  4771     leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
  4773   // if end < obj then we wrapped around => object too long => slow case
  4774   cmpq(end, obj);
  4775   jcc(Assembler::below, slow_case);
  4776   cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
  4778   jcc(Assembler::above, slow_case);
  4779   // Compare obj with the top addr, and if still equal, store the new
  4780   // top addr in end at the address of the top addr pointer. Sets ZF
  4781   // if was equal, and clears it otherwise. Use lock prefix for
  4782   // atomicity on MPs.
  4783   if (os::is_MP()) {
  4784     lock();
  4786   cmpxchgptr(end, heap_top);
  4787   // if someone beat us on the allocation, try again, otherwise continue
  4788   jcc(Assembler::notEqual, retry);
  4791 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  4792 void MacroAssembler::tlab_allocate(Register obj,
  4793                                    Register var_size_in_bytes,
  4794                                    int con_size_in_bytes,
  4795                                    Register t1,
  4796                                    Register t2,
  4797                                    Label& slow_case) {
  4798   assert_different_registers(obj, t1, t2);
  4799   assert_different_registers(obj, var_size_in_bytes, t1);
  4800   Register end = t2;
  4802   verify_tlab();
  4804   movq(obj, Address(r15_thread, JavaThread::tlab_top_offset()));
  4805   if (var_size_in_bytes == noreg) {
  4806     leaq(end, Address(obj, con_size_in_bytes));
  4807   } else {
  4808     leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
  4810   cmpq(end, Address(r15_thread, JavaThread::tlab_end_offset()));
  4811   jcc(Assembler::above, slow_case);
  4813   // update the tlab top pointer
  4814   movq(Address(r15_thread, JavaThread::tlab_top_offset()), end);
  4816   // recover var_size_in_bytes if necessary
  4817   if (var_size_in_bytes == end) {
  4818     subq(var_size_in_bytes, obj);
  4820   verify_tlab();
  4823 // Preserves rbx and rdx.
  4824 void MacroAssembler::tlab_refill(Label& retry,
  4825                                  Label& try_eden,
  4826                                  Label& slow_case) {
  4827   Register top = rax;
  4828   Register t1 = rcx;
  4829   Register t2 = rsi;
  4830   Register t3 = r10;
  4831   Register thread_reg = r15_thread;
  4832   assert_different_registers(top, thread_reg, t1, t2, t3,
  4833                              /* preserve: */ rbx, rdx);
  4834   Label do_refill, discard_tlab;
  4836   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  4837     // No allocation in the shared eden.
  4838     jmp(slow_case);
  4841   movq(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
  4842   movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
  4844   // calculate amount of free space
  4845   subq(t1, top);
  4846   shrq(t1, LogHeapWordSize);
  4848   // Retain tlab and allocate object in shared space if
  4849   // the amount free in the tlab is too large to discard.
  4850   cmpq(t1, Address(thread_reg, // size_t
  4851                    in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
  4852   jcc(Assembler::lessEqual, discard_tlab);
  4854   // Retain
  4855   mov64(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment());
  4856   addq(Address(thread_reg,  // size_t
  4857                in_bytes(JavaThread::tlab_refill_waste_limit_offset())),
  4858        t2);
  4859   if (TLABStats) {
  4860     // increment number of slow_allocations
  4861     addl(Address(thread_reg, // unsigned int
  4862                  in_bytes(JavaThread::tlab_slow_allocations_offset())),
  4863          1);
  4865   jmp(try_eden);
  4867   bind(discard_tlab);
  4868   if (TLABStats) {
  4869     // increment number of refills
  4870     addl(Address(thread_reg, // unsigned int
  4871                  in_bytes(JavaThread::tlab_number_of_refills_offset())),
  4872          1);
  4873     // accumulate wastage -- t1 is amount free in tlab
  4874     addl(Address(thread_reg, // unsigned int
  4875                  in_bytes(JavaThread::tlab_fast_refill_waste_offset())),
  4876          t1);
  4879   // if tlab is currently allocated (top or end != null) then
  4880   // fill [top, end + alignment_reserve) with array object
  4881   testq(top, top);
  4882   jcc(Assembler::zero, do_refill);
  4884   // set up the mark word
  4885   mov64(t3, (int64_t) markOopDesc::prototype()->copy_set_hash(0x2));
  4886   movq(Address(top, oopDesc::mark_offset_in_bytes()), t3);
  4887   // set the length to the remaining space
  4888   subq(t1, typeArrayOopDesc::header_size(T_INT));
  4889   addq(t1, (int)ThreadLocalAllocBuffer::alignment_reserve());
  4890   shlq(t1, log2_intptr(HeapWordSize / sizeof(jint)));
  4891   movq(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
  4892   // set klass to intArrayKlass
  4893   movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
  4894   movq(Address(top, oopDesc::klass_offset_in_bytes()), t1);
  4896   // refill the tlab with an eden allocation
  4897   bind(do_refill);
  4898   movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
  4899   shlq(t1, LogHeapWordSize);
  4900   // add object_size ??
  4901   eden_allocate(top, t1, 0, t2, slow_case);
  4903   // Check that t1 was preserved in eden_allocate.
  4904 #ifdef ASSERT
  4905   if (UseTLAB) {
  4906     Label ok;
  4907     Register tsize = rsi;
  4908     assert_different_registers(tsize, thread_reg, t1);
  4909     pushq(tsize);
  4910     movq(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
  4911     shlq(tsize, LogHeapWordSize);
  4912     cmpq(t1, tsize);
  4913     jcc(Assembler::equal, ok);
  4914     stop("assert(t1 != tlab size)");
  4915     should_not_reach_here();
  4917     bind(ok);
  4918     popq(tsize);
  4920 #endif
  4921   movq(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
  4922   movq(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
  4923   addq(top, t1);
  4924   subq(top, (int)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
  4925   movq(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
  4926   verify_tlab();
  4927   jmp(retry);
  4931 int MacroAssembler::biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg,
  4932                                          bool swap_reg_contains_mark,
  4933                                          Label& done, Label* slow_case,
  4934                                          BiasedLockingCounters* counters) {
  4935   assert(UseBiasedLocking, "why call this otherwise?");
  4936   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
  4937   assert(tmp_reg != noreg, "tmp_reg must be supplied");
  4938   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
  4939   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
  4940   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
  4941   Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
  4942   Address saved_mark_addr(lock_reg, 0);
  4944   if (PrintBiasedLockingStatistics && counters == NULL)
  4945     counters = BiasedLocking::counters();
  4947   // Biased locking
  4948   // See whether the lock is currently biased toward our thread and
  4949   // whether the epoch is still valid
  4950   // Note that the runtime guarantees sufficient alignment of JavaThread
  4951   // pointers to allow age to be placed into low bits
  4952   // First check to see whether biasing is even enabled for this object
  4953   Label cas_label;
  4954   int null_check_offset = -1;
  4955   if (!swap_reg_contains_mark) {
  4956     null_check_offset = offset();
  4957     movq(swap_reg, mark_addr);
  4959   movq(tmp_reg, swap_reg);
  4960   andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
  4961   cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
  4962   jcc(Assembler::notEqual, cas_label);
  4963   // The bias pattern is present in the object's header. Need to check
  4964   // whether the bias owner and the epoch are both still current.
  4965   movq(tmp_reg, klass_addr);
  4966   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  4967   orq(tmp_reg, r15_thread);
  4968   xorq(tmp_reg, swap_reg);
  4969   andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
  4970   if (counters != NULL) {
  4971     cond_inc32(Assembler::zero,
  4972                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
  4974   jcc(Assembler::equal, done);
  4976   Label try_revoke_bias;
  4977   Label try_rebias;
  4979   // At this point we know that the header has the bias pattern and
  4980   // that we are not the bias owner in the current epoch. We need to
  4981   // figure out more details about the state of the header in order to
  4982   // know what operations can be legally performed on the object's
  4983   // header.
  4985   // If the low three bits in the xor result aren't clear, that means
  4986   // the prototype header is no longer biased and we have to revoke
  4987   // the bias on this object.
  4988   testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
  4989   jcc(Assembler::notZero, try_revoke_bias);
  4991   // Biasing is still enabled for this data type. See whether the
  4992   // epoch of the current bias is still valid, meaning that the epoch
  4993   // bits of the mark word are equal to the epoch bits of the
  4994   // prototype header. (Note that the prototype header's epoch bits
  4995   // only change at a safepoint.) If not, attempt to rebias the object
  4996   // toward the current thread. Note that we must be absolutely sure
  4997   // that the current epoch is invalid in order to do this because
  4998   // otherwise the manipulations it performs on the mark word are
  4999   // illegal.
  5000   testq(tmp_reg, markOopDesc::epoch_mask_in_place);
  5001   jcc(Assembler::notZero, try_rebias);
  5003   // The epoch of the current bias is still valid but we know nothing
  5004   // about the owner; it might be set or it might be clear. Try to
  5005   // acquire the bias of the object using an atomic operation. If this
  5006   // fails we will go in to the runtime to revoke the object's bias.
  5007   // Note that we first construct the presumed unbiased header so we
  5008   // don't accidentally blow away another thread's valid bias.
  5009   andq(swap_reg,
  5010        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
  5011   movq(tmp_reg, swap_reg);
  5012   orq(tmp_reg, r15_thread);
  5013   if (os::is_MP()) {
  5014     lock();
  5016   cmpxchgq(tmp_reg, Address(obj_reg, 0));
  5017   // If the biasing toward our thread failed, this means that
  5018   // another thread succeeded in biasing it toward itself and we
  5019   // need to revoke that bias. The revocation will occur in the
  5020   // interpreter runtime in the slow case.
  5021   if (counters != NULL) {
  5022     cond_inc32(Assembler::zero,
  5023                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
  5025   if (slow_case != NULL) {
  5026     jcc(Assembler::notZero, *slow_case);
  5028   jmp(done);
  5030   bind(try_rebias);
  5031   // At this point we know the epoch has expired, meaning that the
  5032   // current "bias owner", if any, is actually invalid. Under these
  5033   // circumstances _only_, we are allowed to use the current header's
  5034   // value as the comparison value when doing the cas to acquire the
  5035   // bias in the current epoch. In other words, we allow transfer of
  5036   // the bias from one thread to another directly in this situation.
  5037   //
  5038   // FIXME: due to a lack of registers we currently blow away the age
  5039   // bits in this situation. Should attempt to preserve them.
  5040   movq(tmp_reg, klass_addr);
  5041   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  5042   orq(tmp_reg, r15_thread);
  5043   if (os::is_MP()) {
  5044     lock();
  5046   cmpxchgq(tmp_reg, Address(obj_reg, 0));
  5047   // If the biasing toward our thread failed, then another thread
  5048   // succeeded in biasing it toward itself and we need to revoke that
  5049   // bias. The revocation will occur in the runtime in the slow case.
  5050   if (counters != NULL) {
  5051     cond_inc32(Assembler::zero,
  5052                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
  5054   if (slow_case != NULL) {
  5055     jcc(Assembler::notZero, *slow_case);
  5057   jmp(done);
  5059   bind(try_revoke_bias);
  5060   // The prototype mark in the klass doesn't have the bias bit set any
  5061   // more, indicating that objects of this data type are not supposed
  5062   // to be biased any more. We are going to try to reset the mark of
  5063   // this object to the prototype value and fall through to the
  5064   // CAS-based locking scheme. Note that if our CAS fails, it means
  5065   // that another thread raced us for the privilege of revoking the
  5066   // bias of this particular object, so it's okay to continue in the
  5067   // normal locking code.
  5068   //
  5069   // FIXME: due to a lack of registers we currently blow away the age
  5070   // bits in this situation. Should attempt to preserve them.
  5071   movq(tmp_reg, klass_addr);
  5072   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  5073   if (os::is_MP()) {
  5074     lock();
  5076   cmpxchgq(tmp_reg, Address(obj_reg, 0));
  5077   // Fall through to the normal CAS-based lock, because no matter what
  5078   // the result of the above CAS, some thread must have succeeded in
  5079   // removing the bias bit from the object's header.
  5080   if (counters != NULL) {
  5081     cond_inc32(Assembler::zero,
  5082                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
  5085   bind(cas_label);
  5087   return null_check_offset;
  5091 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
  5092   assert(UseBiasedLocking, "why call this otherwise?");
  5094   // Check for biased locking unlock case, which is a no-op
  5095   // Note: we do not have to check the thread ID for two reasons.
  5096   // First, the interpreter checks for IllegalMonitorStateException at
  5097   // a higher level. Second, if the bias was revoked while we held the
  5098   // lock, the object could not be rebiased toward another thread, so
  5099   // the bias bit would be clear.
  5100   movq(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
  5101   andq(temp_reg, markOopDesc::biased_lock_mask_in_place);
  5102   cmpq(temp_reg, markOopDesc::biased_lock_pattern);
  5103   jcc(Assembler::equal, done);
  5107 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
  5108   switch (cond) {
  5109     // Note some conditions are synonyms for others
  5110     case Assembler::zero:         return Assembler::notZero;
  5111     case Assembler::notZero:      return Assembler::zero;
  5112     case Assembler::less:         return Assembler::greaterEqual;
  5113     case Assembler::lessEqual:    return Assembler::greater;
  5114     case Assembler::greater:      return Assembler::lessEqual;
  5115     case Assembler::greaterEqual: return Assembler::less;
  5116     case Assembler::below:        return Assembler::aboveEqual;
  5117     case Assembler::belowEqual:   return Assembler::above;
  5118     case Assembler::above:        return Assembler::belowEqual;
  5119     case Assembler::aboveEqual:   return Assembler::below;
  5120     case Assembler::overflow:     return Assembler::noOverflow;
  5121     case Assembler::noOverflow:   return Assembler::overflow;
  5122     case Assembler::negative:     return Assembler::positive;
  5123     case Assembler::positive:     return Assembler::negative;
  5124     case Assembler::parity:       return Assembler::noParity;
  5125     case Assembler::noParity:     return Assembler::parity;
  5127   ShouldNotReachHere(); return Assembler::overflow;
  5131 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
  5132   Condition negated_cond = negate_condition(cond);
  5133   Label L;
  5134   jcc(negated_cond, L);
  5135   atomic_incl(counter_addr);
  5136   bind(L);
  5139 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
  5140   pushfq();
  5141   if (os::is_MP())
  5142     lock();
  5143   incrementl(counter_addr);
  5144   popfq();
  5147 SkipIfEqual::SkipIfEqual(
  5148     MacroAssembler* masm, const bool* flag_addr, bool value) {
  5149   _masm = masm;
  5150   _masm->cmp8(ExternalAddress((address)flag_addr), value);
  5151   _masm->jcc(Assembler::equal, _label);
  5154 SkipIfEqual::~SkipIfEqual() {
  5155   _masm->bind(_label);
  5158 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
  5159   movq(tmp, rsp);
  5160   // Bang stack for total size given plus shadow page size.
  5161   // Bang one page at a time because large size can bang beyond yellow and
  5162   // red zones.
  5163   Label loop;
  5164   bind(loop);
  5165   movl(Address(tmp, (-os::vm_page_size())), size );
  5166   subq(tmp, os::vm_page_size());
  5167   subl(size, os::vm_page_size());
  5168   jcc(Assembler::greater, loop);
  5170   // Bang down shadow pages too.
  5171   // The -1 because we already subtracted 1 page.
  5172   for (int i = 0; i< StackShadowPages-1; i++) {
  5173     movq(Address(tmp, (-i*os::vm_page_size())), size );

mercurial