jdk8-mips64-public/hotspot: src/cpu/x86/vm/assembler

7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
Summary: Moved sizeof(klassOopDesc), changed the return type to ByteSize and removed the _in_bytes suffix.
Reviewed-by: never, bdelsart, coleenp, jrose

     1 /*

     2  * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "assembler_x86.inline.hpp"

    27 #include "gc_interface/collectedHeap.inline.hpp"

    28 #include "interpreter/interpreter.hpp"

    29 #include "memory/cardTableModRefBS.hpp"

    30 #include "memory/resourceArea.hpp"

    31 #include "prims/methodHandles.hpp"

    32 #include "runtime/biasedLocking.hpp"

    33 #include "runtime/interfaceSupport.hpp"

    34 #include "runtime/objectMonitor.hpp"

    35 #include "runtime/os.hpp"

    36 #include "runtime/sharedRuntime.hpp"

    37 #include "runtime/stubRoutines.hpp"

    38 #ifndef SERIALGC

    39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"

    40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"

    41 #include "gc_implementation/g1/heapRegion.hpp"

    42 #endif

    44 // Implementation of AddressLiteral

    46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {

    47   _is_lval = false;

    48   _target = target;

    49   switch (rtype) {

    50   case relocInfo::oop_type:

    51     // Oops are a special case. Normally they would be their own section

    52     // but in cases like icBuffer they are literals in the code stream that

    53     // we don't have a section for. We use none so that we get a literal address

    54     // which is always patchable.

    55     break;

    56   case relocInfo::external_word_type:

    57     _rspec = external_word_Relocation::spec(target);

    58     break;

    59   case relocInfo::internal_word_type:

    60     _rspec = internal_word_Relocation::spec(target);

    61     break;

    62   case relocInfo::opt_virtual_call_type:

    63     _rspec = opt_virtual_call_Relocation::spec();

    64     break;

    65   case relocInfo::static_call_type:

    66     _rspec = static_call_Relocation::spec();

    67     break;

    68   case relocInfo::runtime_call_type:

    69     _rspec = runtime_call_Relocation::spec();

    70     break;

    71   case relocInfo::poll_type:

    72   case relocInfo::poll_return_type:

    73     _rspec = Relocation::spec_simple(rtype);

    74     break;

    75   case relocInfo::none:

    76     break;

    77   default:

    78     ShouldNotReachHere();

    79     break;

    80   }

    81 }

    83 // Implementation of Address

    85 #ifdef _LP64

    87 Address Address::make_array(ArrayAddress adr) {

    88   // Not implementable on 64bit machines

    89   // Should have been handled higher up the call chain.

    90   ShouldNotReachHere();

    91   return Address();

    92 }

    94 // exceedingly dangerous constructor

    95 Address::Address(int disp, address loc, relocInfo::relocType rtype) {

    96   _base  = noreg;

    97   _index = noreg;

    98   _scale = no_scale;

    99   _disp  = disp;

   100   switch (rtype) {

   101     case relocInfo::external_word_type:

   102       _rspec = external_word_Relocation::spec(loc);

   103       break;

   104     case relocInfo::internal_word_type:

   105       _rspec = internal_word_Relocation::spec(loc);

   106       break;

   107     case relocInfo::runtime_call_type:

   108       // HMM

   109       _rspec = runtime_call_Relocation::spec();

   110       break;

   111     case relocInfo::poll_type:

   112     case relocInfo::poll_return_type:

   113       _rspec = Relocation::spec_simple(rtype);

   114       break;

   115     case relocInfo::none:

   116       break;

   117     default:

   118       ShouldNotReachHere();

   119   }

   120 }

   121 #else // LP64

   123 Address Address::make_array(ArrayAddress adr) {

   124   AddressLiteral base = adr.base();

   125   Address index = adr.index();

   126   assert(index._disp == 0, "must not have disp"); // maybe it can?

   127   Address array(index._base, index._index, index._scale, (intptr_t) base.target());

   128   array._rspec = base._rspec;

   129   return array;

   130 }

   132 // exceedingly dangerous constructor

   133 Address::Address(address loc, RelocationHolder spec) {

   134   _base  = noreg;

   135   _index = noreg;

   136   _scale = no_scale;

   137   _disp  = (intptr_t) loc;

   138   _rspec = spec;

   139 }

   141 #endif // _LP64

   145 // Convert the raw encoding form into the form expected by the constructor for

   146 // Address.  An index of 4 (rsp) corresponds to having no index, so convert

   147 // that to noreg for the Address constructor.

   148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {

   149   RelocationHolder rspec;

   150   if (disp_is_oop) {

   151     rspec = Relocation::spec_simple(relocInfo::oop_type);

   152   }

   153   bool valid_index = index != rsp->encoding();

   154   if (valid_index) {

   155     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));

   156     madr._rspec = rspec;

   157     return madr;

   158   } else {

   159     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));

   160     madr._rspec = rspec;

   161     return madr;

   162   }

   163 }

   165 // Implementation of Assembler

   167 int AbstractAssembler::code_fill_byte() {

   168   return (u_char)'\xF4'; // hlt

   169 }

   171 // make this go away someday

   172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {

   173   if (rtype == relocInfo::none)

   174         emit_long(data);

   175   else  emit_data(data, Relocation::spec_simple(rtype), format);

   176 }

   178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {

   179   assert(imm_operand == 0, "default format must be immediate in this file");

   180   assert(inst_mark() != NULL, "must be inside InstructionMark");

   181   if (rspec.type() !=  relocInfo::none) {

   182     #ifdef ASSERT

   183       check_relocation(rspec, format);

   184     #endif

   185     // Do not use AbstractAssembler::relocate, which is not intended for

   186     // embedded words.  Instead, relocate to the enclosing instruction.

   188     // hack. call32 is too wide for mask so use disp32

   189     if (format == call32_operand)

   190       code_section()->relocate(inst_mark(), rspec, disp32_operand);

   191     else

   192       code_section()->relocate(inst_mark(), rspec, format);

   193   }

   194   emit_long(data);

   195 }

   197 static int encode(Register r) {

   198   int enc = r->encoding();

   199   if (enc >= 8) {

   200     enc -= 8;

   201   }

   202   return enc;

   203 }

   205 static int encode(XMMRegister r) {

   206   int enc = r->encoding();

   207   if (enc >= 8) {

   208     enc -= 8;

   209   }

   210   return enc;

   211 }

   213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {

   214   assert(dst->has_byte_register(), "must have byte register");

   215   assert(isByte(op1) && isByte(op2), "wrong opcode");

   216   assert(isByte(imm8), "not a byte");

   217   assert((op1 & 0x01) == 0, "should be 8bit operation");

   218   emit_byte(op1);

   219   emit_byte(op2 | encode(dst));

   220   emit_byte(imm8);

   221 }

   224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {

   225   assert(isByte(op1) && isByte(op2), "wrong opcode");

   226   assert((op1 & 0x01) == 1, "should be 32bit operation");

   227   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");

   228   if (is8bit(imm32)) {

   229     emit_byte(op1 | 0x02); // set sign bit

   230     emit_byte(op2 | encode(dst));

   231     emit_byte(imm32 & 0xFF);

   232   } else {

   233     emit_byte(op1);

   234     emit_byte(op2 | encode(dst));

   235     emit_long(imm32);

   236   }

   237 }

   239 // immediate-to-memory forms

   240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {

   241   assert((op1 & 0x01) == 1, "should be 32bit operation");

   242   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");

   243   if (is8bit(imm32)) {

   244     emit_byte(op1 | 0x02); // set sign bit

   245     emit_operand(rm, adr, 1);

   246     emit_byte(imm32 & 0xFF);

   247   } else {

   248     emit_byte(op1);

   249     emit_operand(rm, adr, 4);

   250     emit_long(imm32);

   251   }

   252 }

   254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {

   255   LP64_ONLY(ShouldNotReachHere());

   256   assert(isByte(op1) && isByte(op2), "wrong opcode");

   257   assert((op1 & 0x01) == 1, "should be 32bit operation");

   258   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");

   259   InstructionMark im(this);

   260   emit_byte(op1);

   261   emit_byte(op2 | encode(dst));

   262   emit_data((intptr_t)obj, relocInfo::oop_type, 0);

   263 }

   266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {

   267   assert(isByte(op1) && isByte(op2), "wrong opcode");

   268   emit_byte(op1);

   269   emit_byte(op2 | encode(dst) << 3 | encode(src));

   270 }

   273 void Assembler::emit_operand(Register reg, Register base, Register index,

   274                              Address::ScaleFactor scale, int disp,

   275                              RelocationHolder const& rspec,

   276                              int rip_relative_correction) {

   277   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();

   279   // Encode the registers as needed in the fields they are used in

   281   int regenc = encode(reg) << 3;

   282   int indexenc = index->is_valid() ? encode(index) << 3 : 0;

   283   int baseenc = base->is_valid() ? encode(base) : 0;

   285   if (base->is_valid()) {

   286     if (index->is_valid()) {

   287       assert(scale != Address::no_scale, "inconsistent address");

   288       // [base + index*scale + disp]

   289       if (disp == 0 && rtype == relocInfo::none  &&

   290           base != rbp LP64_ONLY(&& base != r13)) {

   291         // [base + index*scale]

   292         // [00 reg 100][ss index base]

   293         assert(index != rsp, "illegal addressing mode");

   294         emit_byte(0x04 | regenc);

   295         emit_byte(scale << 6 | indexenc | baseenc);

   296       } else if (is8bit(disp) && rtype == relocInfo::none) {

   297         // [base + index*scale + imm8]

   298         // [01 reg 100][ss index base] imm8

   299         assert(index != rsp, "illegal addressing mode");

   300         emit_byte(0x44 | regenc);

   301         emit_byte(scale << 6 | indexenc | baseenc);

   302         emit_byte(disp & 0xFF);

   303       } else {

   304         // [base + index*scale + disp32]

   305         // [10 reg 100][ss index base] disp32

   306         assert(index != rsp, "illegal addressing mode");

   307         emit_byte(0x84 | regenc);

   308         emit_byte(scale << 6 | indexenc | baseenc);

   309         emit_data(disp, rspec, disp32_operand);

   310       }

   311     } else if (base == rsp LP64_ONLY(|| base == r12)) {

   312       // [rsp + disp]

   313       if (disp == 0 && rtype == relocInfo::none) {

   314         // [rsp]

   315         // [00 reg 100][00 100 100]

   316         emit_byte(0x04 | regenc);

   317         emit_byte(0x24);

   318       } else if (is8bit(disp) && rtype == relocInfo::none) {

   319         // [rsp + imm8]

   320         // [01 reg 100][00 100 100] disp8

   321         emit_byte(0x44 | regenc);

   322         emit_byte(0x24);

   323         emit_byte(disp & 0xFF);

   324       } else {

   325         // [rsp + imm32]

   326         // [10 reg 100][00 100 100] disp32

   327         emit_byte(0x84 | regenc);

   328         emit_byte(0x24);

   329         emit_data(disp, rspec, disp32_operand);

   330       }

   331     } else {

   332       // [base + disp]

   333       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");

   334       if (disp == 0 && rtype == relocInfo::none &&

   335           base != rbp LP64_ONLY(&& base != r13)) {

   336         // [base]

   337         // [00 reg base]

   338         emit_byte(0x00 | regenc | baseenc);

   339       } else if (is8bit(disp) && rtype == relocInfo::none) {

   340         // [base + disp8]

   341         // [01 reg base] disp8

   342         emit_byte(0x40 | regenc | baseenc);

   343         emit_byte(disp & 0xFF);

   344       } else {

   345         // [base + disp32]

   346         // [10 reg base] disp32

   347         emit_byte(0x80 | regenc | baseenc);

   348         emit_data(disp, rspec, disp32_operand);

   349       }

   350     }

   351   } else {

   352     if (index->is_valid()) {

   353       assert(scale != Address::no_scale, "inconsistent address");

   354       // [index*scale + disp]

   355       // [00 reg 100][ss index 101] disp32

   356       assert(index != rsp, "illegal addressing mode");

   357       emit_byte(0x04 | regenc);

   358       emit_byte(scale << 6 | indexenc | 0x05);

   359       emit_data(disp, rspec, disp32_operand);

   360     } else if (rtype != relocInfo::none ) {

   361       // [disp] (64bit) RIP-RELATIVE (32bit) abs

   362       // [00 000 101] disp32

   364       emit_byte(0x05 | regenc);

   365       // Note that the RIP-rel. correction applies to the generated

   366       // disp field, but _not_ to the target address in the rspec.

   368       // disp was created by converting the target address minus the pc

   369       // at the start of the instruction. That needs more correction here.

   370       // intptr_t disp = target - next_ip;

   371       assert(inst_mark() != NULL, "must be inside InstructionMark");

   372       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;

   373       int64_t adjusted = disp;

   374       // Do rip-rel adjustment for 64bit

   375       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));

   376       assert(is_simm32(adjusted),

   377              "must be 32bit offset (RIP relative address)");

   378       emit_data((int32_t) adjusted, rspec, disp32_operand);

   380     } else {

   381       // 32bit never did this, did everything as the rip-rel/disp code above

   382       // [disp] ABSOLUTE

   383       // [00 reg 100][00 100 101] disp32

   384       emit_byte(0x04 | regenc);

   385       emit_byte(0x25);

   386       emit_data(disp, rspec, disp32_operand);

   387     }

   388   }

   389 }

   391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,

   392                              Address::ScaleFactor scale, int disp,

   393                              RelocationHolder const& rspec) {

   394   emit_operand((Register)reg, base, index, scale, disp, rspec);

   395 }

   397 // Secret local extension to Assembler::WhichOperand:

   398 #define end_pc_operand (_WhichOperand_limit)

   400 address Assembler::locate_operand(address inst, WhichOperand which) {

   401   // Decode the given instruction, and return the address of

   402   // an embedded 32-bit operand word.

   404   // If "which" is disp32_operand, selects the displacement portion

   405   // of an effective address specifier.

   406   // If "which" is imm64_operand, selects the trailing immediate constant.

   407   // If "which" is call32_operand, selects the displacement of a call or jump.

   408   // Caller is responsible for ensuring that there is such an operand,

   409   // and that it is 32/64 bits wide.

   411   // If "which" is end_pc_operand, find the end of the instruction.

   413   address ip = inst;

   414   bool is_64bit = false;

   416   debug_only(bool has_disp32 = false);

   417   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn

   419   again_after_prefix:

   420   switch (0xFF & *ip++) {

   422   // These convenience macros generate groups of "case" labels for the switch.

   423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3

   424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \

   425              case (x)+4: case (x)+5: case (x)+6: case (x)+7

   426 #define REP16(x) REP8((x)+0): \

   427               case REP8((x)+8)

   429   case CS_segment:

   430   case SS_segment:

   431   case DS_segment:

   432   case ES_segment:

   433   case FS_segment:

   434   case GS_segment:

   435     // Seems dubious

   436     LP64_ONLY(assert(false, "shouldn't have that prefix"));

   437     assert(ip == inst+1, "only one prefix allowed");

   438     goto again_after_prefix;

   440   case 0x67:

   441   case REX:

   442   case REX_B:

   443   case REX_X:

   444   case REX_XB:

   445   case REX_R:

   446   case REX_RB:

   447   case REX_RX:

   448   case REX_RXB:

   449     NOT_LP64(assert(false, "64bit prefixes"));

   450     goto again_after_prefix;

   452   case REX_W:

   453   case REX_WB:

   454   case REX_WX:

   455   case REX_WXB:

   456   case REX_WR:

   457   case REX_WRB:

   458   case REX_WRX:

   459   case REX_WRXB:

   460     NOT_LP64(assert(false, "64bit prefixes"));

   461     is_64bit = true;

   462     goto again_after_prefix;

   464   case 0xFF: // pushq a; decl a; incl a; call a; jmp a

   465   case 0x88: // movb a, r

   466   case 0x89: // movl a, r

   467   case 0x8A: // movb r, a

   468   case 0x8B: // movl r, a

   469   case 0x8F: // popl a

   470     debug_only(has_disp32 = true);

   471     break;

   473   case 0x68: // pushq #32

   474     if (which == end_pc_operand) {

   475       return ip + 4;

   476     }

   477     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");

   478     return ip;                  // not produced by emit_operand

   480   case 0x66: // movw ... (size prefix)

   481     again_after_size_prefix2:

   482     switch (0xFF & *ip++) {

   483     case REX:

   484     case REX_B:

   485     case REX_X:

   486     case REX_XB:

   487     case REX_R:

   488     case REX_RB:

   489     case REX_RX:

   490     case REX_RXB:

   491     case REX_W:

   492     case REX_WB:

   493     case REX_WX:

   494     case REX_WXB:

   495     case REX_WR:

   496     case REX_WRB:

   497     case REX_WRX:

   498     case REX_WRXB:

   499       NOT_LP64(assert(false, "64bit prefix found"));

   500       goto again_after_size_prefix2;

   501     case 0x8B: // movw r, a

   502     case 0x89: // movw a, r

   503       debug_only(has_disp32 = true);

   504       break;

   505     case 0xC7: // movw a, #16

   506       debug_only(has_disp32 = true);

   507       tail_size = 2;  // the imm16

   508       break;

   509     case 0x0F: // several SSE/SSE2 variants

   510       ip--;    // reparse the 0x0F

   511       goto again_after_prefix;

   512     default:

   513       ShouldNotReachHere();

   514     }

   515     break;

   517   case REP8(0xB8): // movl/q r, #32/#64(oop?)

   518     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);

   519     // these asserts are somewhat nonsensical

   520 #ifndef _LP64

   521     assert(which == imm_operand || which == disp32_operand, "");

   522 #else

   523     assert((which == call32_operand || which == imm_operand) && is_64bit ||

   524            which == narrow_oop_operand && !is_64bit, "");

   525 #endif // _LP64

   526     return ip;

   528   case 0x69: // imul r, a, #32

   529   case 0xC7: // movl a, #32(oop?)

   530     tail_size = 4;

   531     debug_only(has_disp32 = true); // has both kinds of operands!

   532     break;

   534   case 0x0F: // movx..., etc.

   535     switch (0xFF & *ip++) {

   536     case 0x3A: // pcmpestri

   537       tail_size = 1;

   538     case 0x38: // ptest, pmovzxbw

   539       ip++; // skip opcode

   540       debug_only(has_disp32 = true); // has both kinds of operands!

   541       break;

   543     case 0x70: // pshufd r, r/a, #8

   544       debug_only(has_disp32 = true); // has both kinds of operands!

   545     case 0x73: // psrldq r, #8

   546       tail_size = 1;

   547       break;

   549     case 0x12: // movlps

   550     case 0x28: // movaps

   551     case 0x2E: // ucomiss

   552     case 0x2F: // comiss

   553     case 0x54: // andps

   554     case 0x55: // andnps

   555     case 0x56: // orps

   556     case 0x57: // xorps

   557     case 0x6E: // movd

   558     case 0x7E: // movd

   559     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush

   560       debug_only(has_disp32 = true);

   561       break;

   563     case 0xAD: // shrd r, a, %cl

   564     case 0xAF: // imul r, a

   565     case 0xBE: // movsbl r, a (movsxb)

   566     case 0xBF: // movswl r, a (movsxw)

   567     case 0xB6: // movzbl r, a (movzxb)

   568     case 0xB7: // movzwl r, a (movzxw)

   569     case REP16(0x40): // cmovl cc, r, a

   570     case 0xB0: // cmpxchgb

   571     case 0xB1: // cmpxchg

   572     case 0xC1: // xaddl

   573     case 0xC7: // cmpxchg8

   574     case REP16(0x90): // setcc a

   575       debug_only(has_disp32 = true);

   576       // fall out of the switch to decode the address

   577       break;

   579     case 0xC4: // pinsrw r, a, #8

   580       debug_only(has_disp32 = true);

   581     case 0xC5: // pextrw r, r, #8

   582       tail_size = 1;  // the imm8

   583       break;

   585     case 0xAC: // shrd r, a, #8

   586       debug_only(has_disp32 = true);

   587       tail_size = 1;  // the imm8

   588       break;

   590     case REP16(0x80): // jcc rdisp32

   591       if (which == end_pc_operand)  return ip + 4;

   592       assert(which == call32_operand, "jcc has no disp32 or imm");

   593       return ip;

   594     default:

   595       ShouldNotReachHere();

   596     }

   597     break;

   599   case 0x81: // addl a, #32; addl r, #32

   600     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl

   601     // on 32bit in the case of cmpl, the imm might be an oop

   602     tail_size = 4;

   603     debug_only(has_disp32 = true); // has both kinds of operands!

   604     break;

   606   case 0x83: // addl a, #8; addl r, #8

   607     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl

   608     debug_only(has_disp32 = true); // has both kinds of operands!

   609     tail_size = 1;

   610     break;

   612   case 0x9B:

   613     switch (0xFF & *ip++) {

   614     case 0xD9: // fnstcw a

   615       debug_only(has_disp32 = true);

   616       break;

   617     default:

   618       ShouldNotReachHere();

   619     }

   620     break;

   622   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a

   623   case REP4(0x10): // adc...

   624   case REP4(0x20): // and...

   625   case REP4(0x30): // xor...

   626   case REP4(0x08): // or...

   627   case REP4(0x18): // sbb...

   628   case REP4(0x28): // sub...

   629   case 0xF7: // mull a

   630   case 0x8D: // lea r, a

   631   case 0x87: // xchg r, a

   632   case REP4(0x38): // cmp...

   633   case 0x85: // test r, a

   634     debug_only(has_disp32 = true); // has both kinds of operands!

   635     break;

   637   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8

   638   case 0xC6: // movb a, #8

   639   case 0x80: // cmpb a, #8

   640   case 0x6B: // imul r, a, #8

   641     debug_only(has_disp32 = true); // has both kinds of operands!

   642     tail_size = 1; // the imm8

   643     break;

   645   case 0xC4: // VEX_3bytes

   646   case 0xC5: // VEX_2bytes

   647     assert((UseAVX > 0), "shouldn't have VEX prefix");

   648     assert(ip == inst+1, "no prefixes allowed");

   649     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions

   650     // but they have prefix 0x0F and processed when 0x0F processed above.

   651     //

   652     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES

   653     // instructions (these instructions are not supported in 64-bit mode).

   654     // To distinguish them bits [7:6] are set in the VEX second byte since

   655     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set

   656     // those VEX bits REX and vvvv bits are inverted.

   657     //

   658     // Fortunately C2 doesn't generate these instructions so we don't need

   659     // to check for them in product version.

   661     // Check second byte

   662     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));

   664     // First byte

   665     if ((0xFF & *inst) == VEX_3bytes) {

   666       ip++; // third byte

   667       is_64bit = ((VEX_W & *ip) == VEX_W);

   668     }

   669     ip++; // opcode

   670     // To find the end of instruction (which == end_pc_operand).

   671     switch (0xFF & *ip) {

   672     case 0x61: // pcmpestri r, r/a, #8

   673     case 0x70: // pshufd r, r/a, #8

   674     case 0x73: // psrldq r, #8

   675       tail_size = 1;  // the imm8

   676       break;

   677     default:

   678       break;

   679     }

   680     ip++; // skip opcode

   681     debug_only(has_disp32 = true); // has both kinds of operands!

   682     break;

   684   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1

   685   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl

   686   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a

   687   case 0xDD: // fld_d a; fst_d a; fstp_d a

   688   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a

   689   case 0xDF: // fild_d a; fistp_d a

   690   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a

   691   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a

   692   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a

   693     debug_only(has_disp32 = true);

   694     break;

   696   case 0xE8: // call rdisp32

   697   case 0xE9: // jmp  rdisp32

   698     if (which == end_pc_operand)  return ip + 4;

   699     assert(which == call32_operand, "call has no disp32 or imm");

   700     return ip;

   702   case 0xF0:                    // Lock

   703     assert(os::is_MP(), "only on MP");

   704     goto again_after_prefix;

   706   case 0xF3:                    // For SSE

   707   case 0xF2:                    // For SSE2

   708     switch (0xFF & *ip++) {

   709     case REX:

   710     case REX_B:

   711     case REX_X:

   712     case REX_XB:

   713     case REX_R:

   714     case REX_RB:

   715     case REX_RX:

   716     case REX_RXB:

   717     case REX_W:

   718     case REX_WB:

   719     case REX_WX:

   720     case REX_WXB:

   721     case REX_WR:

   722     case REX_WRB:

   723     case REX_WRX:

   724     case REX_WRXB:

   725       NOT_LP64(assert(false, "found 64bit prefix"));

   726       ip++;

   727     default:

   728       ip++;

   729     }

   730     debug_only(has_disp32 = true); // has both kinds of operands!

   731     break;

   733   default:

   734     ShouldNotReachHere();

   736 #undef REP8

   737 #undef REP16

   738   }

   740   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");

   741 #ifdef _LP64

   742   assert(which != imm_operand, "instruction is not a movq reg, imm64");

   743 #else

   744   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");

   745   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");

   746 #endif // LP64

   747   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");

   749   // parse the output of emit_operand

   750   int op2 = 0xFF & *ip++;

   751   int base = op2 & 0x07;

   752   int op3 = -1;

   753   const int b100 = 4;

   754   const int b101 = 5;

   755   if (base == b100 && (op2 >> 6) != 3) {

   756     op3 = 0xFF & *ip++;

   757     base = op3 & 0x07;   // refetch the base

   758   }

   759   // now ip points at the disp (if any)

   761   switch (op2 >> 6) {

   762   case 0:

   763     // [00 reg  100][ss index base]

   764     // [00 reg  100][00   100  esp]

   765     // [00 reg base]

   766     // [00 reg  100][ss index  101][disp32]

   767     // [00 reg  101]               [disp32]

   769     if (base == b101) {

   770       if (which == disp32_operand)

   771         return ip;              // caller wants the disp32

   772       ip += 4;                  // skip the disp32

   773     }

   774     break;

   776   case 1:

   777     // [01 reg  100][ss index base][disp8]

   778     // [01 reg  100][00   100  esp][disp8]

   779     // [01 reg base]               [disp8]

   780     ip += 1;                    // skip the disp8

   781     break;

   783   case 2:

   784     // [10 reg  100][ss index base][disp32]

   785     // [10 reg  100][00   100  esp][disp32]

   786     // [10 reg base]               [disp32]

   787     if (which == disp32_operand)

   788       return ip;                // caller wants the disp32

   789     ip += 4;                    // skip the disp32

   790     break;

   792   case 3:

   793     // [11 reg base]  (not a memory addressing mode)

   794     break;

   795   }

   797   if (which == end_pc_operand) {

   798     return ip + tail_size;

   799   }

   801 #ifdef _LP64

   802   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");

   803 #else

   804   assert(which == imm_operand, "instruction has only an imm field");

   805 #endif // LP64

   806   return ip;

   807 }

   809 address Assembler::locate_next_instruction(address inst) {

   810   // Secretly share code with locate_operand:

   811   return locate_operand(inst, end_pc_operand);

   812 }

   815 #ifdef ASSERT

   816 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {

   817   address inst = inst_mark();

   818   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");

   819   address opnd;

   821   Relocation* r = rspec.reloc();

   822   if (r->type() == relocInfo::none) {

   823     return;

   824   } else if (r->is_call() || format == call32_operand) {

   825     // assert(format == imm32_operand, "cannot specify a nonzero format");

   826     opnd = locate_operand(inst, call32_operand);

   827   } else if (r->is_data()) {

   828     assert(format == imm_operand || format == disp32_operand

   829            LP64_ONLY(|| format == narrow_oop_operand), "format ok");

   830     opnd = locate_operand(inst, (WhichOperand)format);

   831   } else {

   832     assert(format == imm_operand, "cannot specify a format");

   833     return;

   834   }

   835   assert(opnd == pc(), "must put operand where relocs can find it");

   836 }

   837 #endif // ASSERT

   839 void Assembler::emit_operand32(Register reg, Address adr) {

   840   assert(reg->encoding() < 8, "no extended registers");

   841   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");

   842   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,

   843                adr._rspec);

   844 }

   846 void Assembler::emit_operand(Register reg, Address adr,

   847                              int rip_relative_correction) {

   848   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,

   849                adr._rspec,

   850                rip_relative_correction);

   851 }

   853 void Assembler::emit_operand(XMMRegister reg, Address adr) {

   854   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,

   855                adr._rspec);

   856 }

   858 // MMX operations

   859 void Assembler::emit_operand(MMXRegister reg, Address adr) {

   860   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");

   861   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);

   862 }

   864 // work around gcc (3.2.1-7a) bug

   865 void Assembler::emit_operand(Address adr, MMXRegister reg) {

   866   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");

   867   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);

   868 }

   871 void Assembler::emit_farith(int b1, int b2, int i) {

   872   assert(isByte(b1) && isByte(b2), "wrong opcode");

   873   assert(0 <= i &&  i < 8, "illegal stack offset");

   874   emit_byte(b1);

   875   emit_byte(b2 + i);

   876 }

   879 // Now the Assembler instructions (identical for 32/64 bits)

   881 void Assembler::adcl(Address dst, int32_t imm32) {

   882   InstructionMark im(this);

   883   prefix(dst);

   884   emit_arith_operand(0x81, rdx, dst, imm32);

   885 }

   887 void Assembler::adcl(Address dst, Register src) {

   888   InstructionMark im(this);

   889   prefix(dst, src);

   890   emit_byte(0x11);

   891   emit_operand(src, dst);

   892 }

   894 void Assembler::adcl(Register dst, int32_t imm32) {

   895   prefix(dst);

   896   emit_arith(0x81, 0xD0, dst, imm32);

   897 }

   899 void Assembler::adcl(Register dst, Address src) {

   900   InstructionMark im(this);

   901   prefix(src, dst);

   902   emit_byte(0x13);

   903   emit_operand(dst, src);

   904 }

   906 void Assembler::adcl(Register dst, Register src) {

   907   (void) prefix_and_encode(dst->encoding(), src->encoding());

   908   emit_arith(0x13, 0xC0, dst, src);

   909 }

   911 void Assembler::addl(Address dst, int32_t imm32) {

   912   InstructionMark im(this);

   913   prefix(dst);

   914   emit_arith_operand(0x81, rax, dst, imm32);

   915 }

   917 void Assembler::addl(Address dst, Register src) {

   918   InstructionMark im(this);

   919   prefix(dst, src);

   920   emit_byte(0x01);

   921   emit_operand(src, dst);

   922 }

   924 void Assembler::addl(Register dst, int32_t imm32) {

   925   prefix(dst);

   926   emit_arith(0x81, 0xC0, dst, imm32);

   927 }

   929 void Assembler::addl(Register dst, Address src) {

   930   InstructionMark im(this);

   931   prefix(src, dst);

   932   emit_byte(0x03);

   933   emit_operand(dst, src);

   934 }

   936 void Assembler::addl(Register dst, Register src) {

   937   (void) prefix_and_encode(dst->encoding(), src->encoding());

   938   emit_arith(0x03, 0xC0, dst, src);

   939 }

   941 void Assembler::addr_nop_4() {

   942   // 4 bytes: NOP DWORD PTR [EAX+0]

   943   emit_byte(0x0F);

   944   emit_byte(0x1F);

   945   emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);

   946   emit_byte(0);    // 8-bits offset (1 byte)

   947 }

   949 void Assembler::addr_nop_5() {

   950   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset

   951   emit_byte(0x0F);

   952   emit_byte(0x1F);

   953   emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);

   954   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);

   955   emit_byte(0);    // 8-bits offset (1 byte)

   956 }

   958 void Assembler::addr_nop_7() {

   959   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset

   960   emit_byte(0x0F);

   961   emit_byte(0x1F);

   962   emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);

   963   emit_long(0);    // 32-bits offset (4 bytes)

   964 }

   966 void Assembler::addr_nop_8() {

   967   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset

   968   emit_byte(0x0F);

   969   emit_byte(0x1F);

   970   emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);

   971   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);

   972   emit_long(0);    // 32-bits offset (4 bytes)

   973 }

   975 void Assembler::addsd(XMMRegister dst, XMMRegister src) {

   976   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

   977   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

   978   emit_byte(0x58);

   979   emit_byte(0xC0 | encode);

   980 }

   982 void Assembler::addsd(XMMRegister dst, Address src) {

   983   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

   984   InstructionMark im(this);

   985   simd_prefix(dst, dst, src, VEX_SIMD_F2);

   986   emit_byte(0x58);

   987   emit_operand(dst, src);

   988 }

   990 void Assembler::addss(XMMRegister dst, XMMRegister src) {

   991   NOT_LP64(assert(VM_Version::supports_sse(), ""));

   992   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

   993   emit_byte(0x58);

   994   emit_byte(0xC0 | encode);

   995 }

   997 void Assembler::addss(XMMRegister dst, Address src) {

   998   NOT_LP64(assert(VM_Version::supports_sse(), ""));

   999   InstructionMark im(this);

  1000   simd_prefix(dst, dst, src, VEX_SIMD_F3);

  1001   emit_byte(0x58);

  1002   emit_operand(dst, src);

  1003 }

  1005 void Assembler::andl(Address dst, int32_t imm32) {

  1006   InstructionMark im(this);

  1007   prefix(dst);

  1008   emit_byte(0x81);

  1009   emit_operand(rsp, dst, 4);

  1010   emit_long(imm32);

  1011 }

  1013 void Assembler::andl(Register dst, int32_t imm32) {

  1014   prefix(dst);

  1015   emit_arith(0x81, 0xE0, dst, imm32);

  1016 }

  1018 void Assembler::andl(Register dst, Address src) {

  1019   InstructionMark im(this);

  1020   prefix(src, dst);

  1021   emit_byte(0x23);

  1022   emit_operand(dst, src);

  1023 }

  1025 void Assembler::andl(Register dst, Register src) {

  1026   (void) prefix_and_encode(dst->encoding(), src->encoding());

  1027   emit_arith(0x23, 0xC0, dst, src);

  1028 }

  1030 void Assembler::andpd(XMMRegister dst, Address src) {

  1031   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1032   InstructionMark im(this);

  1033   simd_prefix(dst, dst, src, VEX_SIMD_66);

  1034   emit_byte(0x54);

  1035   emit_operand(dst, src);

  1036 }

  1038 void Assembler::andpd(XMMRegister dst, XMMRegister src) {

  1039   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1040   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);

  1041   emit_byte(0x54);

  1042   emit_byte(0xC0 | encode);

  1043 }

  1045 void Assembler::andps(XMMRegister dst, Address src) {

  1046   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1047   InstructionMark im(this);

  1048   simd_prefix(dst, dst, src, VEX_SIMD_NONE);

  1049   emit_byte(0x54);

  1050   emit_operand(dst, src);

  1051 }

  1053 void Assembler::andps(XMMRegister dst, XMMRegister src) {

  1054   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1055   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);

  1056   emit_byte(0x54);

  1057   emit_byte(0xC0 | encode);

  1058 }

  1060 void Assembler::bsfl(Register dst, Register src) {

  1061   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1062   emit_byte(0x0F);

  1063   emit_byte(0xBC);

  1064   emit_byte(0xC0 | encode);

  1065 }

  1067 void Assembler::bsrl(Register dst, Register src) {

  1068   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");

  1069   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1070   emit_byte(0x0F);

  1071   emit_byte(0xBD);

  1072   emit_byte(0xC0 | encode);

  1073 }

  1075 void Assembler::bswapl(Register reg) { // bswap

  1076   int encode = prefix_and_encode(reg->encoding());

  1077   emit_byte(0x0F);

  1078   emit_byte(0xC8 | encode);

  1079 }

  1081 void Assembler::call(Label& L, relocInfo::relocType rtype) {

  1082   // suspect disp32 is always good

  1083   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);

  1085   if (L.is_bound()) {

  1086     const int long_size = 5;

  1087     int offs = (int)( target(L) - pc() );

  1088     assert(offs <= 0, "assembler error");

  1089     InstructionMark im(this);

  1090     // 1110 1000 #32-bit disp

  1091     emit_byte(0xE8);

  1092     emit_data(offs - long_size, rtype, operand);

  1093   } else {

  1094     InstructionMark im(this);

  1095     // 1110 1000 #32-bit disp

  1096     L.add_patch_at(code(), locator());

  1098     emit_byte(0xE8);

  1099     emit_data(int(0), rtype, operand);

  1100   }

  1101 }

  1103 void Assembler::call(Register dst) {

  1104   int encode = prefix_and_encode(dst->encoding());

  1105   emit_byte(0xFF);

  1106   emit_byte(0xD0 | encode);

  1107 }

  1110 void Assembler::call(Address adr) {

  1111   InstructionMark im(this);

  1112   prefix(adr);

  1113   emit_byte(0xFF);

  1114   emit_operand(rdx, adr);

  1115 }

  1117 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {

  1118   assert(entry != NULL, "call most probably wrong");

  1119   InstructionMark im(this);

  1120   emit_byte(0xE8);

  1121   intptr_t disp = entry - (_code_pos + sizeof(int32_t));

  1122   assert(is_simm32(disp), "must be 32bit offset (call2)");

  1123   // Technically, should use call32_operand, but this format is

  1124   // implied by the fact that we're emitting a call instruction.

  1126   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);

  1127   emit_data((int) disp, rspec, operand);

  1128 }

  1130 void Assembler::cdql() {

  1131   emit_byte(0x99);

  1132 }

  1134 void Assembler::cmovl(Condition cc, Register dst, Register src) {

  1135   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));

  1136   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1137   emit_byte(0x0F);

  1138   emit_byte(0x40 | cc);

  1139   emit_byte(0xC0 | encode);

  1140 }

  1143 void Assembler::cmovl(Condition cc, Register dst, Address src) {

  1144   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));

  1145   prefix(src, dst);

  1146   emit_byte(0x0F);

  1147   emit_byte(0x40 | cc);

  1148   emit_operand(dst, src);

  1149 }

  1151 void Assembler::cmpb(Address dst, int imm8) {

  1152   InstructionMark im(this);

  1153   prefix(dst);

  1154   emit_byte(0x80);

  1155   emit_operand(rdi, dst, 1);

  1156   emit_byte(imm8);

  1157 }

  1159 void Assembler::cmpl(Address dst, int32_t imm32) {

  1160   InstructionMark im(this);

  1161   prefix(dst);

  1162   emit_byte(0x81);

  1163   emit_operand(rdi, dst, 4);

  1164   emit_long(imm32);

  1165 }

  1167 void Assembler::cmpl(Register dst, int32_t imm32) {

  1168   prefix(dst);

  1169   emit_arith(0x81, 0xF8, dst, imm32);

  1170 }

  1172 void Assembler::cmpl(Register dst, Register src) {

  1173   (void) prefix_and_encode(dst->encoding(), src->encoding());

  1174   emit_arith(0x3B, 0xC0, dst, src);

  1175 }

  1178 void Assembler::cmpl(Register dst, Address  src) {

  1179   InstructionMark im(this);

  1180   prefix(src, dst);

  1181   emit_byte(0x3B);

  1182   emit_operand(dst, src);

  1183 }

  1185 void Assembler::cmpw(Address dst, int imm16) {

  1186   InstructionMark im(this);

  1187   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");

  1188   emit_byte(0x66);

  1189   emit_byte(0x81);

  1190   emit_operand(rdi, dst, 2);

  1191   emit_word(imm16);

  1192 }

  1194 // The 32-bit cmpxchg compares the value at adr with the contents of rax,

  1195 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.

  1196 // The ZF is set if the compared values were equal, and cleared otherwise.

  1197 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg

  1198   if (Atomics & 2) {

  1199      // caveat: no instructionmark, so this isn't relocatable.

  1200      // Emit a synthetic, non-atomic, CAS equivalent.

  1201      // Beware.  The synthetic form sets all ICCs, not just ZF.

  1202      // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)

  1203      cmpl(rax, adr);

  1204      movl(rax, adr);

  1205      if (reg != rax) {

  1206         Label L ;

  1207         jcc(Assembler::notEqual, L);

  1208         movl(adr, reg);

  1209         bind(L);

  1210      }

  1211   } else {

  1212      InstructionMark im(this);

  1213      prefix(adr, reg);

  1214      emit_byte(0x0F);

  1215      emit_byte(0xB1);

  1216      emit_operand(reg, adr);

  1217   }

  1218 }

  1220 void Assembler::comisd(XMMRegister dst, Address src) {

  1221   // NOTE: dbx seems to decode this as comiss even though the

  1222   // 0x66 is there. Strangly ucomisd comes out correct

  1223   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1224   InstructionMark im(this);

  1225   simd_prefix(dst, src, VEX_SIMD_66);

  1226   emit_byte(0x2F);

  1227   emit_operand(dst, src);

  1228 }

  1230 void Assembler::comisd(XMMRegister dst, XMMRegister src) {

  1231   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1232   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);

  1233   emit_byte(0x2F);

  1234   emit_byte(0xC0 | encode);

  1235 }

  1237 void Assembler::comiss(XMMRegister dst, Address src) {

  1238   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1239   InstructionMark im(this);

  1240   simd_prefix(dst, src, VEX_SIMD_NONE);

  1241   emit_byte(0x2F);

  1242   emit_operand(dst, src);

  1243 }

  1245 void Assembler::comiss(XMMRegister dst, XMMRegister src) {

  1246   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1247   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);

  1248   emit_byte(0x2F);

  1249   emit_byte(0xC0 | encode);

  1250 }

  1252 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {

  1253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1254   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);

  1255   emit_byte(0xE6);

  1256   emit_byte(0xC0 | encode);

  1257 }

  1259 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {

  1260   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1261   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);

  1262   emit_byte(0x5B);

  1263   emit_byte(0xC0 | encode);

  1264 }

  1266 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {

  1267   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1268   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

  1269   emit_byte(0x5A);

  1270   emit_byte(0xC0 | encode);

  1271 }

  1273 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {

  1274   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1275   InstructionMark im(this);

  1276   simd_prefix(dst, dst, src, VEX_SIMD_F2);

  1277   emit_byte(0x5A);

  1278   emit_operand(dst, src);

  1279 }

  1281 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {

  1282   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1283   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

  1284   emit_byte(0x2A);

  1285   emit_byte(0xC0 | encode);

  1286 }

  1288 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {

  1289   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1290   InstructionMark im(this);

  1291   simd_prefix(dst, dst, src, VEX_SIMD_F2);

  1292   emit_byte(0x2A);

  1293   emit_operand(dst, src);

  1294 }

  1296 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {

  1297   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1298   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

  1299   emit_byte(0x2A);

  1300   emit_byte(0xC0 | encode);

  1301 }

  1303 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {

  1304   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1305   InstructionMark im(this);

  1306   simd_prefix(dst, dst, src, VEX_SIMD_F3);

  1307   emit_byte(0x2A);

  1308   emit_operand(dst, src);

  1309 }

  1311 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {

  1312   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1313   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

  1314   emit_byte(0x5A);

  1315   emit_byte(0xC0 | encode);

  1316 }

  1318 void Assembler::cvtss2sd(XMMRegister dst, Address src) {

  1319   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1320   InstructionMark im(this);

  1321   simd_prefix(dst, dst, src, VEX_SIMD_F3);

  1322   emit_byte(0x5A);

  1323   emit_operand(dst, src);

  1324 }

  1327 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {

  1328   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1329   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);

  1330   emit_byte(0x2C);

  1331   emit_byte(0xC0 | encode);

  1332 }

  1334 void Assembler::cvttss2sil(Register dst, XMMRegister src) {

  1335   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1336   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);

  1337   emit_byte(0x2C);

  1338   emit_byte(0xC0 | encode);

  1339 }

  1341 void Assembler::decl(Address dst) {

  1342   // Don't use it directly. Use MacroAssembler::decrement() instead.

  1343   InstructionMark im(this);

  1344   prefix(dst);

  1345   emit_byte(0xFF);

  1346   emit_operand(rcx, dst);

  1347 }

  1349 void Assembler::divsd(XMMRegister dst, Address src) {

  1350   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1351   InstructionMark im(this);

  1352   simd_prefix(dst, dst, src, VEX_SIMD_F2);

  1353   emit_byte(0x5E);

  1354   emit_operand(dst, src);

  1355 }

  1357 void Assembler::divsd(XMMRegister dst, XMMRegister src) {

  1358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1359   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

  1360   emit_byte(0x5E);

  1361   emit_byte(0xC0 | encode);

  1362 }

  1364 void Assembler::divss(XMMRegister dst, Address src) {

  1365   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1366   InstructionMark im(this);

  1367   simd_prefix(dst, dst, src, VEX_SIMD_F3);

  1368   emit_byte(0x5E);

  1369   emit_operand(dst, src);

  1370 }

  1372 void Assembler::divss(XMMRegister dst, XMMRegister src) {

  1373   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1374   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

  1375   emit_byte(0x5E);

  1376   emit_byte(0xC0 | encode);

  1377 }

  1379 void Assembler::emms() {

  1380   NOT_LP64(assert(VM_Version::supports_mmx(), ""));

  1381   emit_byte(0x0F);

  1382   emit_byte(0x77);

  1383 }

  1385 void Assembler::hlt() {

  1386   emit_byte(0xF4);

  1387 }

  1389 void Assembler::idivl(Register src) {

  1390   int encode = prefix_and_encode(src->encoding());

  1391   emit_byte(0xF7);

  1392   emit_byte(0xF8 | encode);

  1393 }

  1395 void Assembler::divl(Register src) { // Unsigned

  1396   int encode = prefix_and_encode(src->encoding());

  1397   emit_byte(0xF7);

  1398   emit_byte(0xF0 | encode);

  1399 }

  1401 void Assembler::imull(Register dst, Register src) {

  1402   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1403   emit_byte(0x0F);

  1404   emit_byte(0xAF);

  1405   emit_byte(0xC0 | encode);

  1406 }

  1409 void Assembler::imull(Register dst, Register src, int value) {

  1410   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1411   if (is8bit(value)) {

  1412     emit_byte(0x6B);

  1413     emit_byte(0xC0 | encode);

  1414     emit_byte(value & 0xFF);

  1415   } else {

  1416     emit_byte(0x69);

  1417     emit_byte(0xC0 | encode);

  1418     emit_long(value);

  1419   }

  1420 }

  1422 void Assembler::incl(Address dst) {

  1423   // Don't use it directly. Use MacroAssembler::increment() instead.

  1424   InstructionMark im(this);

  1425   prefix(dst);

  1426   emit_byte(0xFF);

  1427   emit_operand(rax, dst);

  1428 }

  1430 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {

  1431   InstructionMark im(this);

  1432   assert((0 <= cc) && (cc < 16), "illegal cc");

  1433   if (L.is_bound()) {

  1434     address dst = target(L);

  1435     assert(dst != NULL, "jcc most probably wrong");

  1437     const int short_size = 2;

  1438     const int long_size = 6;

  1439     intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;

  1440     if (maybe_short && is8bit(offs - short_size)) {

  1441       // 0111 tttn #8-bit disp

  1442       emit_byte(0x70 | cc);

  1443       emit_byte((offs - short_size) & 0xFF);

  1444     } else {

  1445       // 0000 1111 1000 tttn #32-bit disp

  1446       assert(is_simm32(offs - long_size),

  1447              "must be 32bit offset (call4)");

  1448       emit_byte(0x0F);

  1449       emit_byte(0x80 | cc);

  1450       emit_long(offs - long_size);

  1451     }

  1452   } else {

  1453     // Note: could eliminate cond. jumps to this jump if condition

  1454     //       is the same however, seems to be rather unlikely case.

  1455     // Note: use jccb() if label to be bound is very close to get

  1456     //       an 8-bit displacement

  1457     L.add_patch_at(code(), locator());

  1458     emit_byte(0x0F);

  1459     emit_byte(0x80 | cc);

  1460     emit_long(0);

  1461   }

  1462 }

  1464 void Assembler::jccb(Condition cc, Label& L) {

  1465   if (L.is_bound()) {

  1466     const int short_size = 2;

  1467     address entry = target(L);

  1468     assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),

  1469            "Dispacement too large for a short jmp");

  1470     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;

  1471     // 0111 tttn #8-bit disp

  1472     emit_byte(0x70 | cc);

  1473     emit_byte((offs - short_size) & 0xFF);

  1474   } else {

  1475     InstructionMark im(this);

  1476     L.add_patch_at(code(), locator());

  1477     emit_byte(0x70 | cc);

  1478     emit_byte(0);

  1479   }

  1480 }

  1482 void Assembler::jmp(Address adr) {

  1483   InstructionMark im(this);

  1484   prefix(adr);

  1485   emit_byte(0xFF);

  1486   emit_operand(rsp, adr);

  1487 }

  1489 void Assembler::jmp(Label& L, bool maybe_short) {

  1490   if (L.is_bound()) {

  1491     address entry = target(L);

  1492     assert(entry != NULL, "jmp most probably wrong");

  1493     InstructionMark im(this);

  1494     const int short_size = 2;

  1495     const int long_size = 5;

  1496     intptr_t offs = entry - _code_pos;

  1497     if (maybe_short && is8bit(offs - short_size)) {

  1498       emit_byte(0xEB);

  1499       emit_byte((offs - short_size) & 0xFF);

  1500     } else {

  1501       emit_byte(0xE9);

  1502       emit_long(offs - long_size);

  1503     }

  1504   } else {

  1505     // By default, forward jumps are always 32-bit displacements, since

  1506     // we can't yet know where the label will be bound.  If you're sure that

  1507     // the forward jump will not run beyond 256 bytes, use jmpb to

  1508     // force an 8-bit displacement.

  1509     InstructionMark im(this);

  1510     L.add_patch_at(code(), locator());

  1511     emit_byte(0xE9);

  1512     emit_long(0);

  1513   }

  1514 }

  1516 void Assembler::jmp(Register entry) {

  1517   int encode = prefix_and_encode(entry->encoding());

  1518   emit_byte(0xFF);

  1519   emit_byte(0xE0 | encode);

  1520 }

  1522 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {

  1523   InstructionMark im(this);

  1524   emit_byte(0xE9);

  1525   assert(dest != NULL, "must have a target");

  1526   intptr_t disp = dest - (_code_pos + sizeof(int32_t));

  1527   assert(is_simm32(disp), "must be 32bit offset (jmp)");

  1528   emit_data(disp, rspec.reloc(), call32_operand);

  1529 }

  1531 void Assembler::jmpb(Label& L) {

  1532   if (L.is_bound()) {

  1533     const int short_size = 2;

  1534     address entry = target(L);

  1535     assert(is8bit((entry - _code_pos) + short_size),

  1536            "Dispacement too large for a short jmp");

  1537     assert(entry != NULL, "jmp most probably wrong");

  1538     intptr_t offs = entry - _code_pos;

  1539     emit_byte(0xEB);

  1540     emit_byte((offs - short_size) & 0xFF);

  1541   } else {

  1542     InstructionMark im(this);

  1543     L.add_patch_at(code(), locator());

  1544     emit_byte(0xEB);

  1545     emit_byte(0);

  1546   }

  1547 }

  1549 void Assembler::ldmxcsr( Address src) {

  1550   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1551   InstructionMark im(this);

  1552   prefix(src);

  1553   emit_byte(0x0F);

  1554   emit_byte(0xAE);

  1555   emit_operand(as_Register(2), src);

  1556 }

  1558 void Assembler::leal(Register dst, Address src) {

  1559   InstructionMark im(this);

  1560 #ifdef _LP64

  1561   emit_byte(0x67); // addr32

  1562   prefix(src, dst);

  1563 #endif // LP64

  1564   emit_byte(0x8D);

  1565   emit_operand(dst, src);

  1566 }

  1568 void Assembler::lock() {

  1569   if (Atomics & 1) {

  1570      // Emit either nothing, a NOP, or a NOP: prefix

  1571      emit_byte(0x90) ;

  1572   } else {

  1573      emit_byte(0xF0);

  1574   }

  1575 }

  1577 void Assembler::lzcntl(Register dst, Register src) {

  1578   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");

  1579   emit_byte(0xF3);

  1580   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1581   emit_byte(0x0F);

  1582   emit_byte(0xBD);

  1583   emit_byte(0xC0 | encode);

  1584 }

  1586 // Emit mfence instruction

  1587 void Assembler::mfence() {

  1588   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)

  1589   emit_byte( 0x0F );

  1590   emit_byte( 0xAE );

  1591   emit_byte( 0xF0 );

  1592 }

  1594 void Assembler::mov(Register dst, Register src) {

  1595   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));

  1596 }

  1598 void Assembler::movapd(XMMRegister dst, XMMRegister src) {

  1599   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1600   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);

  1601   emit_byte(0x28);

  1602   emit_byte(0xC0 | encode);

  1603 }

  1605 void Assembler::movaps(XMMRegister dst, XMMRegister src) {

  1606   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1607   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);

  1608   emit_byte(0x28);

  1609   emit_byte(0xC0 | encode);

  1610 }

  1612 void Assembler::movb(Register dst, Address src) {

  1613   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));

  1614   InstructionMark im(this);

  1615   prefix(src, dst, true);

  1616   emit_byte(0x8A);

  1617   emit_operand(dst, src);

  1618 }

  1621 void Assembler::movb(Address dst, int imm8) {

  1622   InstructionMark im(this);

  1623    prefix(dst);

  1624   emit_byte(0xC6);

  1625   emit_operand(rax, dst, 1);

  1626   emit_byte(imm8);

  1627 }

  1630 void Assembler::movb(Address dst, Register src) {

  1631   assert(src->has_byte_register(), "must have byte register");

  1632   InstructionMark im(this);

  1633   prefix(dst, src, true);

  1634   emit_byte(0x88);

  1635   emit_operand(src, dst);

  1636 }

  1638 void Assembler::movdl(XMMRegister dst, Register src) {

  1639   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1640   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);

  1641   emit_byte(0x6E);

  1642   emit_byte(0xC0 | encode);

  1643 }

  1645 void Assembler::movdl(Register dst, XMMRegister src) {

  1646   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1647   // swap src/dst to get correct prefix

  1648   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);

  1649   emit_byte(0x7E);

  1650   emit_byte(0xC0 | encode);

  1651 }

  1653 void Assembler::movdl(XMMRegister dst, Address src) {

  1654   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1655   InstructionMark im(this);

  1656   simd_prefix(dst, src, VEX_SIMD_66);

  1657   emit_byte(0x6E);

  1658   emit_operand(dst, src);

  1659 }

  1661 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {

  1662   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1663   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);

  1664   emit_byte(0x6F);

  1665   emit_byte(0xC0 | encode);

  1666 }

  1668 void Assembler::movdqu(XMMRegister dst, Address src) {

  1669   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1670   InstructionMark im(this);

  1671   simd_prefix(dst, src, VEX_SIMD_F3);

  1672   emit_byte(0x6F);

  1673   emit_operand(dst, src);

  1674 }

  1676 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {

  1677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1678   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);

  1679   emit_byte(0x6F);

  1680   emit_byte(0xC0 | encode);

  1681 }

  1683 void Assembler::movdqu(Address dst, XMMRegister src) {

  1684   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1685   InstructionMark im(this);

  1686   simd_prefix(dst, src, VEX_SIMD_F3);

  1687   emit_byte(0x7F);

  1688   emit_operand(src, dst);

  1689 }

  1691 // Uses zero extension on 64bit

  1693 void Assembler::movl(Register dst, int32_t imm32) {

  1694   int encode = prefix_and_encode(dst->encoding());

  1695   emit_byte(0xB8 | encode);

  1696   emit_long(imm32);

  1697 }

  1699 void Assembler::movl(Register dst, Register src) {

  1700   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1701   emit_byte(0x8B);

  1702   emit_byte(0xC0 | encode);

  1703 }

  1705 void Assembler::movl(Register dst, Address src) {

  1706   InstructionMark im(this);

  1707   prefix(src, dst);

  1708   emit_byte(0x8B);

  1709   emit_operand(dst, src);

  1710 }

  1712 void Assembler::movl(Address dst, int32_t imm32) {

  1713   InstructionMark im(this);

  1714   prefix(dst);

  1715   emit_byte(0xC7);

  1716   emit_operand(rax, dst, 4);

  1717   emit_long(imm32);

  1718 }

  1720 void Assembler::movl(Address dst, Register src) {

  1721   InstructionMark im(this);

  1722   prefix(dst, src);

  1723   emit_byte(0x89);

  1724   emit_operand(src, dst);

  1725 }

  1727 // New cpus require to use movsd and movss to avoid partial register stall

  1728 // when loading from memory. But for old Opteron use movlpd instead of movsd.

  1729 // The selection is done in MacroAssembler::movdbl() and movflt().

  1730 void Assembler::movlpd(XMMRegister dst, Address src) {

  1731   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1732   InstructionMark im(this);

  1733   simd_prefix(dst, dst, src, VEX_SIMD_66);

  1734   emit_byte(0x12);

  1735   emit_operand(dst, src);

  1736 }

  1738 void Assembler::movq( MMXRegister dst, Address src ) {

  1739   assert( VM_Version::supports_mmx(), "" );

  1740   emit_byte(0x0F);

  1741   emit_byte(0x6F);

  1742   emit_operand(dst, src);

  1743 }

  1745 void Assembler::movq( Address dst, MMXRegister src ) {

  1746   assert( VM_Version::supports_mmx(), "" );

  1747   emit_byte(0x0F);

  1748   emit_byte(0x7F);

  1749   // workaround gcc (3.2.1-7a) bug

  1750   // In that version of gcc with only an emit_operand(MMX, Address)

  1751   // gcc will tail jump and try and reverse the parameters completely

  1752   // obliterating dst in the process. By having a version available

  1753   // that doesn't need to swap the args at the tail jump the bug is

  1754   // avoided.

  1755   emit_operand(dst, src);

  1756 }

  1758 void Assembler::movq(XMMRegister dst, Address src) {

  1759   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1760   InstructionMark im(this);

  1761   simd_prefix(dst, src, VEX_SIMD_F3);

  1762   emit_byte(0x7E);

  1763   emit_operand(dst, src);

  1764 }

  1766 void Assembler::movq(Address dst, XMMRegister src) {

  1767   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1768   InstructionMark im(this);

  1769   simd_prefix(dst, src, VEX_SIMD_66);

  1770   emit_byte(0xD6);

  1771   emit_operand(src, dst);

  1772 }

  1774 void Assembler::movsbl(Register dst, Address src) { // movsxb

  1775   InstructionMark im(this);

  1776   prefix(src, dst);

  1777   emit_byte(0x0F);

  1778   emit_byte(0xBE);

  1779   emit_operand(dst, src);

  1780 }

  1782 void Assembler::movsbl(Register dst, Register src) { // movsxb

  1783   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));

  1784   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);

  1785   emit_byte(0x0F);

  1786   emit_byte(0xBE);

  1787   emit_byte(0xC0 | encode);

  1788 }

  1790 void Assembler::movsd(XMMRegister dst, XMMRegister src) {

  1791   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1792   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

  1793   emit_byte(0x10);

  1794   emit_byte(0xC0 | encode);

  1795 }

  1797 void Assembler::movsd(XMMRegister dst, Address src) {

  1798   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1799   InstructionMark im(this);

  1800   simd_prefix(dst, src, VEX_SIMD_F2);

  1801   emit_byte(0x10);

  1802   emit_operand(dst, src);

  1803 }

  1805 void Assembler::movsd(Address dst, XMMRegister src) {

  1806   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1807   InstructionMark im(this);

  1808   simd_prefix(dst, src, VEX_SIMD_F2);

  1809   emit_byte(0x11);

  1810   emit_operand(src, dst);

  1811 }

  1813 void Assembler::movss(XMMRegister dst, XMMRegister src) {

  1814   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1815   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

  1816   emit_byte(0x10);

  1817   emit_byte(0xC0 | encode);

  1818 }

  1820 void Assembler::movss(XMMRegister dst, Address src) {

  1821   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1822   InstructionMark im(this);

  1823   simd_prefix(dst, src, VEX_SIMD_F3);

  1824   emit_byte(0x10);

  1825   emit_operand(dst, src);

  1826 }

  1828 void Assembler::movss(Address dst, XMMRegister src) {

  1829   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1830   InstructionMark im(this);

  1831   simd_prefix(dst, src, VEX_SIMD_F3);

  1832   emit_byte(0x11);

  1833   emit_operand(src, dst);

  1834 }

  1836 void Assembler::movswl(Register dst, Address src) { // movsxw

  1837   InstructionMark im(this);

  1838   prefix(src, dst);

  1839   emit_byte(0x0F);

  1840   emit_byte(0xBF);

  1841   emit_operand(dst, src);

  1842 }

  1844 void Assembler::movswl(Register dst, Register src) { // movsxw

  1845   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1846   emit_byte(0x0F);

  1847   emit_byte(0xBF);

  1848   emit_byte(0xC0 | encode);

  1849 }

  1851 void Assembler::movw(Address dst, int imm16) {

  1852   InstructionMark im(this);

  1854   emit_byte(0x66); // switch to 16-bit mode

  1855   prefix(dst);

  1856   emit_byte(0xC7);

  1857   emit_operand(rax, dst, 2);

  1858   emit_word(imm16);

  1859 }

  1861 void Assembler::movw(Register dst, Address src) {

  1862   InstructionMark im(this);

  1863   emit_byte(0x66);

  1864   prefix(src, dst);

  1865   emit_byte(0x8B);

  1866   emit_operand(dst, src);

  1867 }

  1869 void Assembler::movw(Address dst, Register src) {

  1870   InstructionMark im(this);

  1871   emit_byte(0x66);

  1872   prefix(dst, src);

  1873   emit_byte(0x89);

  1874   emit_operand(src, dst);

  1875 }

  1877 void Assembler::movzbl(Register dst, Address src) { // movzxb

  1878   InstructionMark im(this);

  1879   prefix(src, dst);

  1880   emit_byte(0x0F);

  1881   emit_byte(0xB6);

  1882   emit_operand(dst, src);

  1883 }

  1885 void Assembler::movzbl(Register dst, Register src) { // movzxb

  1886   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));

  1887   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);

  1888   emit_byte(0x0F);

  1889   emit_byte(0xB6);

  1890   emit_byte(0xC0 | encode);

  1891 }

  1893 void Assembler::movzwl(Register dst, Address src) { // movzxw

  1894   InstructionMark im(this);

  1895   prefix(src, dst);

  1896   emit_byte(0x0F);

  1897   emit_byte(0xB7);

  1898   emit_operand(dst, src);

  1899 }

  1901 void Assembler::movzwl(Register dst, Register src) { // movzxw

  1902   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  1903   emit_byte(0x0F);

  1904   emit_byte(0xB7);

  1905   emit_byte(0xC0 | encode);

  1906 }

  1908 void Assembler::mull(Address src) {

  1909   InstructionMark im(this);

  1910   prefix(src);

  1911   emit_byte(0xF7);

  1912   emit_operand(rsp, src);

  1913 }

  1915 void Assembler::mull(Register src) {

  1916   int encode = prefix_and_encode(src->encoding());

  1917   emit_byte(0xF7);

  1918   emit_byte(0xE0 | encode);

  1919 }

  1921 void Assembler::mulsd(XMMRegister dst, Address src) {

  1922   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1923   InstructionMark im(this);

  1924   simd_prefix(dst, dst, src, VEX_SIMD_F2);

  1925   emit_byte(0x59);

  1926   emit_operand(dst, src);

  1927 }

  1929 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {

  1930   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  1931   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

  1932   emit_byte(0x59);

  1933   emit_byte(0xC0 | encode);

  1934 }

  1936 void Assembler::mulss(XMMRegister dst, Address src) {

  1937   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1938   InstructionMark im(this);

  1939   simd_prefix(dst, dst, src, VEX_SIMD_F3);

  1940   emit_byte(0x59);

  1941   emit_operand(dst, src);

  1942 }

  1944 void Assembler::mulss(XMMRegister dst, XMMRegister src) {

  1945   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  1946   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

  1947   emit_byte(0x59);

  1948   emit_byte(0xC0 | encode);

  1949 }

  1951 void Assembler::negl(Register dst) {

  1952   int encode = prefix_and_encode(dst->encoding());

  1953   emit_byte(0xF7);

  1954   emit_byte(0xD8 | encode);

  1955 }

  1957 void Assembler::nop(int i) {

  1958 #ifdef ASSERT

  1959   assert(i > 0, " ");

  1960   // The fancy nops aren't currently recognized by debuggers making it a

  1961   // pain to disassemble code while debugging. If asserts are on clearly

  1962   // speed is not an issue so simply use the single byte traditional nop

  1963   // to do alignment.

  1965   for (; i > 0 ; i--) emit_byte(0x90);

  1966   return;

  1968 #endif // ASSERT

  1970   if (UseAddressNop && VM_Version::is_intel()) {

  1971     //

  1972     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel

  1973     //  1: 0x90

  1974     //  2: 0x66 0x90

  1975     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)

  1976     //  4: 0x0F 0x1F 0x40 0x00

  1977     //  5: 0x0F 0x1F 0x44 0x00 0x00

  1978     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00

  1979     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00

  1980     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  1981     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  1982     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  1983     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  1985     // The rest coding is Intel specific - don't use consecutive address nops

  1987     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90

  1988     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90

  1989     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90

  1990     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90

  1992     while(i >= 15) {

  1993       // For Intel don't generate consecutive addess nops (mix with regular nops)

  1994       i -= 15;

  1995       emit_byte(0x66);   // size prefix

  1996       emit_byte(0x66);   // size prefix

  1997       emit_byte(0x66);   // size prefix

  1998       addr_nop_8();

  1999       emit_byte(0x66);   // size prefix

  2000       emit_byte(0x66);   // size prefix

  2001       emit_byte(0x66);   // size prefix

  2002       emit_byte(0x90);   // nop

  2003     }

  2004     switch (i) {

  2005       case 14:

  2006         emit_byte(0x66); // size prefix

  2007       case 13:

  2008         emit_byte(0x66); // size prefix

  2009       case 12:

  2010         addr_nop_8();

  2011         emit_byte(0x66); // size prefix

  2012         emit_byte(0x66); // size prefix

  2013         emit_byte(0x66); // size prefix

  2014         emit_byte(0x90); // nop

  2015         break;

  2016       case 11:

  2017         emit_byte(0x66); // size prefix

  2018       case 10:

  2019         emit_byte(0x66); // size prefix

  2020       case 9:

  2021         emit_byte(0x66); // size prefix

  2022       case 8:

  2023         addr_nop_8();

  2024         break;

  2025       case 7:

  2026         addr_nop_7();

  2027         break;

  2028       case 6:

  2029         emit_byte(0x66); // size prefix

  2030       case 5:

  2031         addr_nop_5();

  2032         break;

  2033       case 4:

  2034         addr_nop_4();

  2035         break;

  2036       case 3:

  2037         // Don't use "0x0F 0x1F 0x00" - need patching safe padding

  2038         emit_byte(0x66); // size prefix

  2039       case 2:

  2040         emit_byte(0x66); // size prefix

  2041       case 1:

  2042         emit_byte(0x90); // nop

  2043         break;

  2044       default:

  2045         assert(i == 0, " ");

  2046     }

  2047     return;

  2048   }

  2049   if (UseAddressNop && VM_Version::is_amd()) {

  2050     //

  2051     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.

  2052     //  1: 0x90

  2053     //  2: 0x66 0x90

  2054     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)

  2055     //  4: 0x0F 0x1F 0x40 0x00

  2056     //  5: 0x0F 0x1F 0x44 0x00 0x00

  2057     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00

  2058     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00

  2059     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  2060     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  2061     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  2062     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  2064     // The rest coding is AMD specific - use consecutive address nops

  2066     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00

  2067     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00

  2068     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00

  2069     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00

  2070     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00

  2071     //     Size prefixes (0x66) are added for larger sizes

  2073     while(i >= 22) {

  2074       i -= 11;

  2075       emit_byte(0x66); // size prefix

  2076       emit_byte(0x66); // size prefix

  2077       emit_byte(0x66); // size prefix

  2078       addr_nop_8();

  2079     }

  2080     // Generate first nop for size between 21-12

  2081     switch (i) {

  2082       case 21:

  2083         i -= 1;

  2084         emit_byte(0x66); // size prefix

  2085       case 20:

  2086       case 19:

  2087         i -= 1;

  2088         emit_byte(0x66); // size prefix

  2089       case 18:

  2090       case 17:

  2091         i -= 1;

  2092         emit_byte(0x66); // size prefix

  2093       case 16:

  2094       case 15:

  2095         i -= 8;

  2096         addr_nop_8();

  2097         break;

  2098       case 14:

  2099       case 13:

  2100         i -= 7;

  2101         addr_nop_7();

  2102         break;

  2103       case 12:

  2104         i -= 6;

  2105         emit_byte(0x66); // size prefix

  2106         addr_nop_5();

  2107         break;

  2108       default:

  2109         assert(i < 12, " ");

  2110     }

  2112     // Generate second nop for size between 11-1

  2113     switch (i) {

  2114       case 11:

  2115         emit_byte(0x66); // size prefix

  2116       case 10:

  2117         emit_byte(0x66); // size prefix

  2118       case 9:

  2119         emit_byte(0x66); // size prefix

  2120       case 8:

  2121         addr_nop_8();

  2122         break;

  2123       case 7:

  2124         addr_nop_7();

  2125         break;

  2126       case 6:

  2127         emit_byte(0x66); // size prefix

  2128       case 5:

  2129         addr_nop_5();

  2130         break;

  2131       case 4:

  2132         addr_nop_4();

  2133         break;

  2134       case 3:

  2135         // Don't use "0x0F 0x1F 0x00" - need patching safe padding

  2136         emit_byte(0x66); // size prefix

  2137       case 2:

  2138         emit_byte(0x66); // size prefix

  2139       case 1:

  2140         emit_byte(0x90); // nop

  2141         break;

  2142       default:

  2143         assert(i == 0, " ");

  2144     }

  2145     return;

  2146   }

  2148   // Using nops with size prefixes "0x66 0x90".

  2149   // From AMD Optimization Guide:

  2150   //  1: 0x90

  2151   //  2: 0x66 0x90

  2152   //  3: 0x66 0x66 0x90

  2153   //  4: 0x66 0x66 0x66 0x90

  2154   //  5: 0x66 0x66 0x90 0x66 0x90

  2155   //  6: 0x66 0x66 0x90 0x66 0x66 0x90

  2156   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90

  2157   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90

  2158   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90

  2159   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90

  2160   //

  2161   while(i > 12) {

  2162     i -= 4;

  2163     emit_byte(0x66); // size prefix

  2164     emit_byte(0x66);

  2165     emit_byte(0x66);

  2166     emit_byte(0x90); // nop

  2167   }

  2168   // 1 - 12 nops

  2169   if(i > 8) {

  2170     if(i > 9) {

  2171       i -= 1;

  2172       emit_byte(0x66);

  2173     }

  2174     i -= 3;

  2175     emit_byte(0x66);

  2176     emit_byte(0x66);

  2177     emit_byte(0x90);

  2178   }

  2179   // 1 - 8 nops

  2180   if(i > 4) {

  2181     if(i > 6) {

  2182       i -= 1;

  2183       emit_byte(0x66);

  2184     }

  2185     i -= 3;

  2186     emit_byte(0x66);

  2187     emit_byte(0x66);

  2188     emit_byte(0x90);

  2189   }

  2190   switch (i) {

  2191     case 4:

  2192       emit_byte(0x66);

  2193     case 3:

  2194       emit_byte(0x66);

  2195     case 2:

  2196       emit_byte(0x66);

  2197     case 1:

  2198       emit_byte(0x90);

  2199       break;

  2200     default:

  2201       assert(i == 0, " ");

  2202   }

  2203 }

  2205 void Assembler::notl(Register dst) {

  2206   int encode = prefix_and_encode(dst->encoding());

  2207   emit_byte(0xF7);

  2208   emit_byte(0xD0 | encode );

  2209 }

  2211 void Assembler::orl(Address dst, int32_t imm32) {

  2212   InstructionMark im(this);

  2213   prefix(dst);

  2214   emit_arith_operand(0x81, rcx, dst, imm32);

  2215 }

  2217 void Assembler::orl(Register dst, int32_t imm32) {

  2218   prefix(dst);

  2219   emit_arith(0x81, 0xC8, dst, imm32);

  2220 }

  2222 void Assembler::orl(Register dst, Address src) {

  2223   InstructionMark im(this);

  2224   prefix(src, dst);

  2225   emit_byte(0x0B);

  2226   emit_operand(dst, src);

  2227 }

  2229 void Assembler::orl(Register dst, Register src) {

  2230   (void) prefix_and_encode(dst->encoding(), src->encoding());

  2231   emit_arith(0x0B, 0xC0, dst, src);

  2232 }

  2234 void Assembler::packuswb(XMMRegister dst, Address src) {

  2235   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2236   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2237   InstructionMark im(this);

  2238   simd_prefix(dst, dst, src, VEX_SIMD_66);

  2239   emit_byte(0x67);

  2240   emit_operand(dst, src);

  2241 }

  2243 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {

  2244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2245   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);

  2246   emit_byte(0x67);

  2247   emit_byte(0xC0 | encode);

  2248 }

  2250 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {

  2251   assert(VM_Version::supports_sse4_2(), "");

  2252   InstructionMark im(this);

  2253   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);

  2254   emit_byte(0x61);

  2255   emit_operand(dst, src);

  2256   emit_byte(imm8);

  2257 }

  2259 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {

  2260   assert(VM_Version::supports_sse4_2(), "");

  2261   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);

  2262   emit_byte(0x61);

  2263   emit_byte(0xC0 | encode);

  2264   emit_byte(imm8);

  2265 }

  2267 void Assembler::pmovzxbw(XMMRegister dst, Address src) {

  2268   assert(VM_Version::supports_sse4_1(), "");

  2269   InstructionMark im(this);

  2270   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

  2271   emit_byte(0x30);

  2272   emit_operand(dst, src);

  2273 }

  2275 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {

  2276   assert(VM_Version::supports_sse4_1(), "");

  2277   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

  2278   emit_byte(0x30);

  2279   emit_byte(0xC0 | encode);

  2280 }

  2282 // generic

  2283 void Assembler::pop(Register dst) {

  2284   int encode = prefix_and_encode(dst->encoding());

  2285   emit_byte(0x58 | encode);

  2286 }

  2288 void Assembler::popcntl(Register dst, Address src) {

  2289   assert(VM_Version::supports_popcnt(), "must support");

  2290   InstructionMark im(this);

  2291   emit_byte(0xF3);

  2292   prefix(src, dst);

  2293   emit_byte(0x0F);

  2294   emit_byte(0xB8);

  2295   emit_operand(dst, src);

  2296 }

  2298 void Assembler::popcntl(Register dst, Register src) {

  2299   assert(VM_Version::supports_popcnt(), "must support");

  2300   emit_byte(0xF3);

  2301   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  2302   emit_byte(0x0F);

  2303   emit_byte(0xB8);

  2304   emit_byte(0xC0 | encode);

  2305 }

  2307 void Assembler::popf() {

  2308   emit_byte(0x9D);

  2309 }

  2311 #ifndef _LP64 // no 32bit push/pop on amd64

  2312 void Assembler::popl(Address dst) {

  2313   // NOTE: this will adjust stack by 8byte on 64bits

  2314   InstructionMark im(this);

  2315   prefix(dst);

  2316   emit_byte(0x8F);

  2317   emit_operand(rax, dst);

  2318 }

  2319 #endif

  2321 void Assembler::prefetch_prefix(Address src) {

  2322   prefix(src);

  2323   emit_byte(0x0F);

  2324 }

  2326 void Assembler::prefetchnta(Address src) {

  2327   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));

  2328   InstructionMark im(this);

  2329   prefetch_prefix(src);

  2330   emit_byte(0x18);

  2331   emit_operand(rax, src); // 0, src

  2332 }

  2334 void Assembler::prefetchr(Address src) {

  2335   assert(VM_Version::supports_3dnow_prefetch(), "must support");

  2336   InstructionMark im(this);

  2337   prefetch_prefix(src);

  2338   emit_byte(0x0D);

  2339   emit_operand(rax, src); // 0, src

  2340 }

  2342 void Assembler::prefetcht0(Address src) {

  2343   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));

  2344   InstructionMark im(this);

  2345   prefetch_prefix(src);

  2346   emit_byte(0x18);

  2347   emit_operand(rcx, src); // 1, src

  2348 }

  2350 void Assembler::prefetcht1(Address src) {

  2351   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));

  2352   InstructionMark im(this);

  2353   prefetch_prefix(src);

  2354   emit_byte(0x18);

  2355   emit_operand(rdx, src); // 2, src

  2356 }

  2358 void Assembler::prefetcht2(Address src) {

  2359   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));

  2360   InstructionMark im(this);

  2361   prefetch_prefix(src);

  2362   emit_byte(0x18);

  2363   emit_operand(rbx, src); // 3, src

  2364 }

  2366 void Assembler::prefetchw(Address src) {

  2367   assert(VM_Version::supports_3dnow_prefetch(), "must support");

  2368   InstructionMark im(this);

  2369   prefetch_prefix(src);

  2370   emit_byte(0x0D);

  2371   emit_operand(rcx, src); // 1, src

  2372 }

  2374 void Assembler::prefix(Prefix p) {

  2375   a_byte(p);

  2376 }

  2378 void Assembler::por(XMMRegister dst, XMMRegister src) {

  2379   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2380   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);

  2381   emit_byte(0xEB);

  2382   emit_byte(0xC0 | encode);

  2383 }

  2385 void Assembler::por(XMMRegister dst, Address src) {

  2386   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2387   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2388   InstructionMark im(this);

  2389   simd_prefix(dst, dst, src, VEX_SIMD_66);

  2390   emit_byte(0xEB);

  2391   emit_operand(dst, src);

  2392 }

  2394 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {

  2395   assert(isByte(mode), "invalid value");

  2396   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2397   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);

  2398   emit_byte(0x70);

  2399   emit_byte(0xC0 | encode);

  2400   emit_byte(mode & 0xFF);

  2402 }

  2404 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {

  2405   assert(isByte(mode), "invalid value");

  2406   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2407   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2408   InstructionMark im(this);

  2409   simd_prefix(dst, src, VEX_SIMD_66);

  2410   emit_byte(0x70);

  2411   emit_operand(dst, src);

  2412   emit_byte(mode & 0xFF);

  2413 }

  2415 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {

  2416   assert(isByte(mode), "invalid value");

  2417   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2418   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);

  2419   emit_byte(0x70);

  2420   emit_byte(0xC0 | encode);

  2421   emit_byte(mode & 0xFF);

  2422 }

  2424 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {

  2425   assert(isByte(mode), "invalid value");

  2426   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2427   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2428   InstructionMark im(this);

  2429   simd_prefix(dst, src, VEX_SIMD_F2);

  2430   emit_byte(0x70);

  2431   emit_operand(dst, src);

  2432   emit_byte(mode & 0xFF);

  2433 }

  2435 void Assembler::psrlq(XMMRegister dst, int shift) {

  2436   // Shift 64 bit value logically right by specified number of bits.

  2437   // HMM Table D-1 says sse2 or mmx.

  2438   // Do not confuse it with psrldq SSE2 instruction which

  2439   // shifts 128 bit value in xmm register by number of bytes.

  2440   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2441   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);

  2442   emit_byte(0x73);

  2443   emit_byte(0xC0 | encode);

  2444   emit_byte(shift);

  2445 }

  2447 void Assembler::psrldq(XMMRegister dst, int shift) {

  2448   // Shift 128 bit value in xmm register by number of bytes.

  2449   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2450   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);

  2451   emit_byte(0x73);

  2452   emit_byte(0xC0 | encode);

  2453   emit_byte(shift);

  2454 }

  2456 void Assembler::ptest(XMMRegister dst, Address src) {

  2457   assert(VM_Version::supports_sse4_1(), "");

  2458   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2459   InstructionMark im(this);

  2460   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

  2461   emit_byte(0x17);

  2462   emit_operand(dst, src);

  2463 }

  2465 void Assembler::ptest(XMMRegister dst, XMMRegister src) {

  2466   assert(VM_Version::supports_sse4_1(), "");

  2467   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

  2468   emit_byte(0x17);

  2469   emit_byte(0xC0 | encode);

  2470 }

  2472 void Assembler::punpcklbw(XMMRegister dst, Address src) {

  2473   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2474   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2475   InstructionMark im(this);

  2476   simd_prefix(dst, dst, src, VEX_SIMD_66);

  2477   emit_byte(0x60);

  2478   emit_operand(dst, src);

  2479 }

  2481 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {

  2482   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2483   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);

  2484   emit_byte(0x60);

  2485   emit_byte(0xC0 | encode);

  2486 }

  2488 void Assembler::punpckldq(XMMRegister dst, Address src) {

  2489   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2490   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2491   InstructionMark im(this);

  2492   simd_prefix(dst, dst, src, VEX_SIMD_66);

  2493   emit_byte(0x62);

  2494   emit_operand(dst, src);

  2495 }

  2497 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {

  2498   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2499   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);

  2500   emit_byte(0x62);

  2501   emit_byte(0xC0 | encode);

  2502 }

  2504 void Assembler::push(int32_t imm32) {

  2505   // in 64bits we push 64bits onto the stack but only

  2506   // take a 32bit immediate

  2507   emit_byte(0x68);

  2508   emit_long(imm32);

  2509 }

  2511 void Assembler::push(Register src) {

  2512   int encode = prefix_and_encode(src->encoding());

  2514   emit_byte(0x50 | encode);

  2515 }

  2517 void Assembler::pushf() {

  2518   emit_byte(0x9C);

  2519 }

  2521 #ifndef _LP64 // no 32bit push/pop on amd64

  2522 void Assembler::pushl(Address src) {

  2523   // Note this will push 64bit on 64bit

  2524   InstructionMark im(this);

  2525   prefix(src);

  2526   emit_byte(0xFF);

  2527   emit_operand(rsi, src);

  2528 }

  2529 #endif

  2531 void Assembler::pxor(XMMRegister dst, Address src) {

  2532   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2533   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");

  2534   InstructionMark im(this);

  2535   simd_prefix(dst, dst, src, VEX_SIMD_66);

  2536   emit_byte(0xEF);

  2537   emit_operand(dst, src);

  2538 }

  2540 void Assembler::pxor(XMMRegister dst, XMMRegister src) {

  2541   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2542   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);

  2543   emit_byte(0xEF);

  2544   emit_byte(0xC0 | encode);

  2545 }

  2547 void Assembler::rcll(Register dst, int imm8) {

  2548   assert(isShiftCount(imm8), "illegal shift count");

  2549   int encode = prefix_and_encode(dst->encoding());

  2550   if (imm8 == 1) {

  2551     emit_byte(0xD1);

  2552     emit_byte(0xD0 | encode);

  2553   } else {

  2554     emit_byte(0xC1);

  2555     emit_byte(0xD0 | encode);

  2556     emit_byte(imm8);

  2557   }

  2558 }

  2560 // copies data from [esi] to [edi] using rcx pointer sized words

  2561 // generic

  2562 void Assembler::rep_mov() {

  2563   emit_byte(0xF3);

  2564   // MOVSQ

  2565   LP64_ONLY(prefix(REX_W));

  2566   emit_byte(0xA5);

  2567 }

  2569 // sets rcx pointer sized words with rax, value at [edi]

  2570 // generic

  2571 void Assembler::rep_set() { // rep_set

  2572   emit_byte(0xF3);

  2573   // STOSQ

  2574   LP64_ONLY(prefix(REX_W));

  2575   emit_byte(0xAB);

  2576 }

  2578 // scans rcx pointer sized words at [edi] for occurance of rax,

  2579 // generic

  2580 void Assembler::repne_scan() { // repne_scan

  2581   emit_byte(0xF2);

  2582   // SCASQ

  2583   LP64_ONLY(prefix(REX_W));

  2584   emit_byte(0xAF);

  2585 }

  2587 #ifdef _LP64

  2588 // scans rcx 4 byte words at [edi] for occurance of rax,

  2589 // generic

  2590 void Assembler::repne_scanl() { // repne_scan

  2591   emit_byte(0xF2);

  2592   // SCASL

  2593   emit_byte(0xAF);

  2594 }

  2595 #endif

  2597 void Assembler::ret(int imm16) {

  2598   if (imm16 == 0) {

  2599     emit_byte(0xC3);

  2600   } else {

  2601     emit_byte(0xC2);

  2602     emit_word(imm16);

  2603   }

  2604 }

  2606 void Assembler::sahf() {

  2607 #ifdef _LP64

  2608   // Not supported in 64bit mode

  2609   ShouldNotReachHere();

  2610 #endif

  2611   emit_byte(0x9E);

  2612 }

  2614 void Assembler::sarl(Register dst, int imm8) {

  2615   int encode = prefix_and_encode(dst->encoding());

  2616   assert(isShiftCount(imm8), "illegal shift count");

  2617   if (imm8 == 1) {

  2618     emit_byte(0xD1);

  2619     emit_byte(0xF8 | encode);

  2620   } else {

  2621     emit_byte(0xC1);

  2622     emit_byte(0xF8 | encode);

  2623     emit_byte(imm8);

  2624   }

  2625 }

  2627 void Assembler::sarl(Register dst) {

  2628   int encode = prefix_and_encode(dst->encoding());

  2629   emit_byte(0xD3);

  2630   emit_byte(0xF8 | encode);

  2631 }

  2633 void Assembler::sbbl(Address dst, int32_t imm32) {

  2634   InstructionMark im(this);

  2635   prefix(dst);

  2636   emit_arith_operand(0x81, rbx, dst, imm32);

  2637 }

  2639 void Assembler::sbbl(Register dst, int32_t imm32) {

  2640   prefix(dst);

  2641   emit_arith(0x81, 0xD8, dst, imm32);

  2642 }

  2645 void Assembler::sbbl(Register dst, Address src) {

  2646   InstructionMark im(this);

  2647   prefix(src, dst);

  2648   emit_byte(0x1B);

  2649   emit_operand(dst, src);

  2650 }

  2652 void Assembler::sbbl(Register dst, Register src) {

  2653   (void) prefix_and_encode(dst->encoding(), src->encoding());

  2654   emit_arith(0x1B, 0xC0, dst, src);

  2655 }

  2657 void Assembler::setb(Condition cc, Register dst) {

  2658   assert(0 <= cc && cc < 16, "illegal cc");

  2659   int encode = prefix_and_encode(dst->encoding(), true);

  2660   emit_byte(0x0F);

  2661   emit_byte(0x90 | cc);

  2662   emit_byte(0xC0 | encode);

  2663 }

  2665 void Assembler::shll(Register dst, int imm8) {

  2666   assert(isShiftCount(imm8), "illegal shift count");

  2667   int encode = prefix_and_encode(dst->encoding());

  2668   if (imm8 == 1 ) {

  2669     emit_byte(0xD1);

  2670     emit_byte(0xE0 | encode);

  2671   } else {

  2672     emit_byte(0xC1);

  2673     emit_byte(0xE0 | encode);

  2674     emit_byte(imm8);

  2675   }

  2676 }

  2678 void Assembler::shll(Register dst) {

  2679   int encode = prefix_and_encode(dst->encoding());

  2680   emit_byte(0xD3);

  2681   emit_byte(0xE0 | encode);

  2682 }

  2684 void Assembler::shrl(Register dst, int imm8) {

  2685   assert(isShiftCount(imm8), "illegal shift count");

  2686   int encode = prefix_and_encode(dst->encoding());

  2687   emit_byte(0xC1);

  2688   emit_byte(0xE8 | encode);

  2689   emit_byte(imm8);

  2690 }

  2692 void Assembler::shrl(Register dst) {

  2693   int encode = prefix_and_encode(dst->encoding());

  2694   emit_byte(0xD3);

  2695   emit_byte(0xE8 | encode);

  2696 }

  2698 // copies a single word from [esi] to [edi]

  2699 void Assembler::smovl() {

  2700   emit_byte(0xA5);

  2701 }

  2703 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {

  2704   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2705   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

  2706   emit_byte(0x51);

  2707   emit_byte(0xC0 | encode);

  2708 }

  2710 void Assembler::sqrtsd(XMMRegister dst, Address src) {

  2711   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2712   InstructionMark im(this);

  2713   simd_prefix(dst, dst, src, VEX_SIMD_F2);

  2714   emit_byte(0x51);

  2715   emit_operand(dst, src);

  2716 }

  2718 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {

  2719   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2720   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

  2721   emit_byte(0x51);

  2722   emit_byte(0xC0 | encode);

  2723 }

  2725 void Assembler::sqrtss(XMMRegister dst, Address src) {

  2726   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2727   InstructionMark im(this);

  2728   simd_prefix(dst, dst, src, VEX_SIMD_F3);

  2729   emit_byte(0x51);

  2730   emit_operand(dst, src);

  2731 }

  2733 void Assembler::stmxcsr( Address dst) {

  2734   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2735   InstructionMark im(this);

  2736   prefix(dst);

  2737   emit_byte(0x0F);

  2738   emit_byte(0xAE);

  2739   emit_operand(as_Register(3), dst);

  2740 }

  2742 void Assembler::subl(Address dst, int32_t imm32) {

  2743   InstructionMark im(this);

  2744   prefix(dst);

  2745   emit_arith_operand(0x81, rbp, dst, imm32);

  2746 }

  2748 void Assembler::subl(Address dst, Register src) {

  2749   InstructionMark im(this);

  2750   prefix(dst, src);

  2751   emit_byte(0x29);

  2752   emit_operand(src, dst);

  2753 }

  2755 void Assembler::subl(Register dst, int32_t imm32) {

  2756   prefix(dst);

  2757   emit_arith(0x81, 0xE8, dst, imm32);

  2758 }

  2760 void Assembler::subl(Register dst, Address src) {

  2761   InstructionMark im(this);

  2762   prefix(src, dst);

  2763   emit_byte(0x2B);

  2764   emit_operand(dst, src);

  2765 }

  2767 void Assembler::subl(Register dst, Register src) {

  2768   (void) prefix_and_encode(dst->encoding(), src->encoding());

  2769   emit_arith(0x2B, 0xC0, dst, src);

  2770 }

  2772 void Assembler::subsd(XMMRegister dst, XMMRegister src) {

  2773   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2774   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);

  2775   emit_byte(0x5C);

  2776   emit_byte(0xC0 | encode);

  2777 }

  2779 void Assembler::subsd(XMMRegister dst, Address src) {

  2780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2781   InstructionMark im(this);

  2782   simd_prefix(dst, dst, src, VEX_SIMD_F2);

  2783   emit_byte(0x5C);

  2784   emit_operand(dst, src);

  2785 }

  2787 void Assembler::subss(XMMRegister dst, XMMRegister src) {

  2788   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2789   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);

  2790   emit_byte(0x5C);

  2791   emit_byte(0xC0 | encode);

  2792 }

  2794 void Assembler::subss(XMMRegister dst, Address src) {

  2795   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2796   InstructionMark im(this);

  2797   simd_prefix(dst, dst, src, VEX_SIMD_F3);

  2798   emit_byte(0x5C);

  2799   emit_operand(dst, src);

  2800 }

  2802 void Assembler::testb(Register dst, int imm8) {

  2803   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));

  2804   (void) prefix_and_encode(dst->encoding(), true);

  2805   emit_arith_b(0xF6, 0xC0, dst, imm8);

  2806 }

  2808 void Assembler::testl(Register dst, int32_t imm32) {

  2809   // not using emit_arith because test

  2810   // doesn't support sign-extension of

  2811   // 8bit operands

  2812   int encode = dst->encoding();

  2813   if (encode == 0) {

  2814     emit_byte(0xA9);

  2815   } else {

  2816     encode = prefix_and_encode(encode);

  2817     emit_byte(0xF7);

  2818     emit_byte(0xC0 | encode);

  2819   }

  2820   emit_long(imm32);

  2821 }

  2823 void Assembler::testl(Register dst, Register src) {

  2824   (void) prefix_and_encode(dst->encoding(), src->encoding());

  2825   emit_arith(0x85, 0xC0, dst, src);

  2826 }

  2828 void Assembler::testl(Register dst, Address  src) {

  2829   InstructionMark im(this);

  2830   prefix(src, dst);

  2831   emit_byte(0x85);

  2832   emit_operand(dst, src);

  2833 }

  2835 void Assembler::ucomisd(XMMRegister dst, Address src) {

  2836   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2837   InstructionMark im(this);

  2838   simd_prefix(dst, src, VEX_SIMD_66);

  2839   emit_byte(0x2E);

  2840   emit_operand(dst, src);

  2841 }

  2843 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {

  2844   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2845   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);

  2846   emit_byte(0x2E);

  2847   emit_byte(0xC0 | encode);

  2848 }

  2850 void Assembler::ucomiss(XMMRegister dst, Address src) {

  2851   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2852   InstructionMark im(this);

  2853   simd_prefix(dst, src, VEX_SIMD_NONE);

  2854   emit_byte(0x2E);

  2855   emit_operand(dst, src);

  2856 }

  2858 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {

  2859   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2860   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);

  2861   emit_byte(0x2E);

  2862   emit_byte(0xC0 | encode);

  2863 }

  2866 void Assembler::xaddl(Address dst, Register src) {

  2867   InstructionMark im(this);

  2868   prefix(dst, src);

  2869   emit_byte(0x0F);

  2870   emit_byte(0xC1);

  2871   emit_operand(src, dst);

  2872 }

  2874 void Assembler::xchgl(Register dst, Address src) { // xchg

  2875   InstructionMark im(this);

  2876   prefix(src, dst);

  2877   emit_byte(0x87);

  2878   emit_operand(dst, src);

  2879 }

  2881 void Assembler::xchgl(Register dst, Register src) {

  2882   int encode = prefix_and_encode(dst->encoding(), src->encoding());

  2883   emit_byte(0x87);

  2884   emit_byte(0xc0 | encode);

  2885 }

  2887 void Assembler::xorl(Register dst, int32_t imm32) {

  2888   prefix(dst);

  2889   emit_arith(0x81, 0xF0, dst, imm32);

  2890 }

  2892 void Assembler::xorl(Register dst, Address src) {

  2893   InstructionMark im(this);

  2894   prefix(src, dst);

  2895   emit_byte(0x33);

  2896   emit_operand(dst, src);

  2897 }

  2899 void Assembler::xorl(Register dst, Register src) {

  2900   (void) prefix_and_encode(dst->encoding(), src->encoding());

  2901   emit_arith(0x33, 0xC0, dst, src);

  2902 }

  2904 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {

  2905   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2906   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);

  2907   emit_byte(0x57);

  2908   emit_byte(0xC0 | encode);

  2909 }

  2911 void Assembler::xorpd(XMMRegister dst, Address src) {

  2912   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  2913   InstructionMark im(this);

  2914   simd_prefix(dst, dst, src, VEX_SIMD_66);

  2915   emit_byte(0x57);

  2916   emit_operand(dst, src);

  2917 }

  2920 void Assembler::xorps(XMMRegister dst, XMMRegister src) {

  2921   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2922   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);

  2923   emit_byte(0x57);

  2924   emit_byte(0xC0 | encode);

  2925 }

  2927 void Assembler::xorps(XMMRegister dst, Address src) {

  2928   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  2929   InstructionMark im(this);

  2930   simd_prefix(dst, dst, src, VEX_SIMD_NONE);

  2931   emit_byte(0x57);

  2932   emit_operand(dst, src);

  2933 }

  2935 // AVX 3-operands non destructive source instructions (encoded with VEX prefix)

  2937 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {

  2938   assert(VM_Version::supports_avx(), "");

  2939   InstructionMark im(this);

  2940   vex_prefix(dst, nds, src, VEX_SIMD_F2);

  2941   emit_byte(0x58);

  2942   emit_operand(dst, src);

  2943 }

  2945 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  2946   assert(VM_Version::supports_avx(), "");

  2947   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);

  2948   emit_byte(0x58);

  2949   emit_byte(0xC0 | encode);

  2950 }

  2952 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {

  2953   assert(VM_Version::supports_avx(), "");

  2954   InstructionMark im(this);

  2955   vex_prefix(dst, nds, src, VEX_SIMD_F3);

  2956   emit_byte(0x58);

  2957   emit_operand(dst, src);

  2958 }

  2960 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  2961   assert(VM_Version::supports_avx(), "");

  2962   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);

  2963   emit_byte(0x58);

  2964   emit_byte(0xC0 | encode);

  2965 }

  2967 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {

  2968   assert(VM_Version::supports_avx(), "");

  2969   InstructionMark im(this);

  2970   vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector

  2971   emit_byte(0x54);

  2972   emit_operand(dst, src);

  2973 }

  2975 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {

  2976   assert(VM_Version::supports_avx(), "");

  2977   InstructionMark im(this);

  2978   vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector

  2979   emit_byte(0x54);

  2980   emit_operand(dst, src);

  2981 }

  2983 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {

  2984   assert(VM_Version::supports_avx(), "");

  2985   InstructionMark im(this);

  2986   vex_prefix(dst, nds, src, VEX_SIMD_F2);

  2987   emit_byte(0x5E);

  2988   emit_operand(dst, src);

  2989 }

  2991 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  2992   assert(VM_Version::supports_avx(), "");

  2993   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);

  2994   emit_byte(0x5E);

  2995   emit_byte(0xC0 | encode);

  2996 }

  2998 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {

  2999   assert(VM_Version::supports_avx(), "");

  3000   InstructionMark im(this);

  3001   vex_prefix(dst, nds, src, VEX_SIMD_F3);

  3002   emit_byte(0x5E);

  3003   emit_operand(dst, src);

  3004 }

  3006 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  3007   assert(VM_Version::supports_avx(), "");

  3008   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);

  3009   emit_byte(0x5E);

  3010   emit_byte(0xC0 | encode);

  3011 }

  3013 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {

  3014   assert(VM_Version::supports_avx(), "");

  3015   InstructionMark im(this);

  3016   vex_prefix(dst, nds, src, VEX_SIMD_F2);

  3017   emit_byte(0x59);

  3018   emit_operand(dst, src);

  3019 }

  3021 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  3022   assert(VM_Version::supports_avx(), "");

  3023   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);

  3024   emit_byte(0x59);

  3025   emit_byte(0xC0 | encode);

  3026 }

  3028 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {

  3029   InstructionMark im(this);

  3030   vex_prefix(dst, nds, src, VEX_SIMD_F3);

  3031   emit_byte(0x59);

  3032   emit_operand(dst, src);

  3033 }

  3035 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  3036   assert(VM_Version::supports_avx(), "");

  3037   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);

  3038   emit_byte(0x59);

  3039   emit_byte(0xC0 | encode);

  3040 }

  3043 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {

  3044   assert(VM_Version::supports_avx(), "");

  3045   InstructionMark im(this);

  3046   vex_prefix(dst, nds, src, VEX_SIMD_F2);

  3047   emit_byte(0x5C);

  3048   emit_operand(dst, src);

  3049 }

  3051 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  3052   assert(VM_Version::supports_avx(), "");

  3053   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);

  3054   emit_byte(0x5C);

  3055   emit_byte(0xC0 | encode);

  3056 }

  3058 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {

  3059   assert(VM_Version::supports_avx(), "");

  3060   InstructionMark im(this);

  3061   vex_prefix(dst, nds, src, VEX_SIMD_F3);

  3062   emit_byte(0x5C);

  3063   emit_operand(dst, src);

  3064 }

  3066 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {

  3067   assert(VM_Version::supports_avx(), "");

  3068   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);

  3069   emit_byte(0x5C);

  3070   emit_byte(0xC0 | encode);

  3071 }

  3073 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {

  3074   assert(VM_Version::supports_avx(), "");

  3075   InstructionMark im(this);

  3076   vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector

  3077   emit_byte(0x57);

  3078   emit_operand(dst, src);

  3079 }

  3081 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {

  3082   assert(VM_Version::supports_avx(), "");

  3083   InstructionMark im(this);

  3084   vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector

  3085   emit_byte(0x57);

  3086   emit_operand(dst, src);

  3087 }

  3090 #ifndef _LP64

  3091 // 32bit only pieces of the assembler

  3093 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {

  3094   // NO PREFIX AS NEVER 64BIT

  3095   InstructionMark im(this);

  3096   emit_byte(0x81);

  3097   emit_byte(0xF8 | src1->encoding());

  3098   emit_data(imm32, rspec, 0);

  3099 }

  3101 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {

  3102   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs

  3103   InstructionMark im(this);

  3104   emit_byte(0x81);

  3105   emit_operand(rdi, src1);

  3106   emit_data(imm32, rspec, 0);

  3107 }

  3109 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,

  3110 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded

  3111 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.

  3112 void Assembler::cmpxchg8(Address adr) {

  3113   InstructionMark im(this);

  3114   emit_byte(0x0F);

  3115   emit_byte(0xc7);

  3116   emit_operand(rcx, adr);

  3117 }

  3119 void Assembler::decl(Register dst) {

  3120   // Don't use it directly. Use MacroAssembler::decrementl() instead.

  3121  emit_byte(0x48 | dst->encoding());

  3122 }

  3124 #endif // _LP64

  3126 // 64bit typically doesn't use the x87 but needs to for the trig funcs

  3128 void Assembler::fabs() {

  3129   emit_byte(0xD9);

  3130   emit_byte(0xE1);

  3131 }

  3133 void Assembler::fadd(int i) {

  3134   emit_farith(0xD8, 0xC0, i);

  3135 }

  3137 void Assembler::fadd_d(Address src) {

  3138   InstructionMark im(this);

  3139   emit_byte(0xDC);

  3140   emit_operand32(rax, src);

  3141 }

  3143 void Assembler::fadd_s(Address src) {

  3144   InstructionMark im(this);

  3145   emit_byte(0xD8);

  3146   emit_operand32(rax, src);

  3147 }

  3149 void Assembler::fadda(int i) {

  3150   emit_farith(0xDC, 0xC0, i);

  3151 }

  3153 void Assembler::faddp(int i) {

  3154   emit_farith(0xDE, 0xC0, i);

  3155 }

  3157 void Assembler::fchs() {

  3158   emit_byte(0xD9);

  3159   emit_byte(0xE0);

  3160 }

  3162 void Assembler::fcom(int i) {

  3163   emit_farith(0xD8, 0xD0, i);

  3164 }

  3166 void Assembler::fcomp(int i) {

  3167   emit_farith(0xD8, 0xD8, i);

  3168 }

  3170 void Assembler::fcomp_d(Address src) {

  3171   InstructionMark im(this);

  3172   emit_byte(0xDC);

  3173   emit_operand32(rbx, src);

  3174 }

  3176 void Assembler::fcomp_s(Address src) {

  3177   InstructionMark im(this);

  3178   emit_byte(0xD8);

  3179   emit_operand32(rbx, src);

  3180 }

  3182 void Assembler::fcompp() {

  3183   emit_byte(0xDE);

  3184   emit_byte(0xD9);

  3185 }

  3187 void Assembler::fcos() {

  3188   emit_byte(0xD9);

  3189   emit_byte(0xFF);

  3190 }

  3192 void Assembler::fdecstp() {

  3193   emit_byte(0xD9);

  3194   emit_byte(0xF6);

  3195 }

  3197 void Assembler::fdiv(int i) {

  3198   emit_farith(0xD8, 0xF0, i);

  3199 }

  3201 void Assembler::fdiv_d(Address src) {

  3202   InstructionMark im(this);

  3203   emit_byte(0xDC);

  3204   emit_operand32(rsi, src);

  3205 }

  3207 void Assembler::fdiv_s(Address src) {

  3208   InstructionMark im(this);

  3209   emit_byte(0xD8);

  3210   emit_operand32(rsi, src);

  3211 }

  3213 void Assembler::fdiva(int i) {

  3214   emit_farith(0xDC, 0xF8, i);

  3215 }

  3217 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)

  3218 //       is erroneous for some of the floating-point instructions below.

  3220 void Assembler::fdivp(int i) {

  3221   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)

  3222 }

  3224 void Assembler::fdivr(int i) {

  3225   emit_farith(0xD8, 0xF8, i);

  3226 }

  3228 void Assembler::fdivr_d(Address src) {

  3229   InstructionMark im(this);

  3230   emit_byte(0xDC);

  3231   emit_operand32(rdi, src);

  3232 }

  3234 void Assembler::fdivr_s(Address src) {

  3235   InstructionMark im(this);

  3236   emit_byte(0xD8);

  3237   emit_operand32(rdi, src);

  3238 }

  3240 void Assembler::fdivra(int i) {

  3241   emit_farith(0xDC, 0xF0, i);

  3242 }

  3244 void Assembler::fdivrp(int i) {

  3245   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)

  3246 }

  3248 void Assembler::ffree(int i) {

  3249   emit_farith(0xDD, 0xC0, i);

  3250 }

  3252 void Assembler::fild_d(Address adr) {

  3253   InstructionMark im(this);

  3254   emit_byte(0xDF);

  3255   emit_operand32(rbp, adr);

  3256 }

  3258 void Assembler::fild_s(Address adr) {

  3259   InstructionMark im(this);

  3260   emit_byte(0xDB);

  3261   emit_operand32(rax, adr);

  3262 }

  3264 void Assembler::fincstp() {

  3265   emit_byte(0xD9);

  3266   emit_byte(0xF7);

  3267 }

  3269 void Assembler::finit() {

  3270   emit_byte(0x9B);

  3271   emit_byte(0xDB);

  3272   emit_byte(0xE3);

  3273 }

  3275 void Assembler::fist_s(Address adr) {

  3276   InstructionMark im(this);

  3277   emit_byte(0xDB);

  3278   emit_operand32(rdx, adr);

  3279 }

  3281 void Assembler::fistp_d(Address adr) {

  3282   InstructionMark im(this);

  3283   emit_byte(0xDF);

  3284   emit_operand32(rdi, adr);

  3285 }

  3287 void Assembler::fistp_s(Address adr) {

  3288   InstructionMark im(this);

  3289   emit_byte(0xDB);

  3290   emit_operand32(rbx, adr);

  3291 }

  3293 void Assembler::fld1() {

  3294   emit_byte(0xD9);

  3295   emit_byte(0xE8);

  3296 }

  3298 void Assembler::fld_d(Address adr) {

  3299   InstructionMark im(this);

  3300   emit_byte(0xDD);

  3301   emit_operand32(rax, adr);

  3302 }

  3304 void Assembler::fld_s(Address adr) {

  3305   InstructionMark im(this);

  3306   emit_byte(0xD9);

  3307   emit_operand32(rax, adr);

  3308 }

  3311 void Assembler::fld_s(int index) {

  3312   emit_farith(0xD9, 0xC0, index);

  3313 }

  3315 void Assembler::fld_x(Address adr) {

  3316   InstructionMark im(this);

  3317   emit_byte(0xDB);

  3318   emit_operand32(rbp, adr);

  3319 }

  3321 void Assembler::fldcw(Address src) {

  3322   InstructionMark im(this);

  3323   emit_byte(0xd9);

  3324   emit_operand32(rbp, src);

  3325 }

  3327 void Assembler::fldenv(Address src) {

  3328   InstructionMark im(this);

  3329   emit_byte(0xD9);

  3330   emit_operand32(rsp, src);

  3331 }

  3333 void Assembler::fldlg2() {

  3334   emit_byte(0xD9);

  3335   emit_byte(0xEC);

  3336 }

  3338 void Assembler::fldln2() {

  3339   emit_byte(0xD9);

  3340   emit_byte(0xED);

  3341 }

  3343 void Assembler::fldz() {

  3344   emit_byte(0xD9);

  3345   emit_byte(0xEE);

  3346 }

  3348 void Assembler::flog() {

  3349   fldln2();

  3350   fxch();

  3351   fyl2x();

  3352 }

  3354 void Assembler::flog10() {

  3355   fldlg2();

  3356   fxch();

  3357   fyl2x();

  3358 }

  3360 void Assembler::fmul(int i) {

  3361   emit_farith(0xD8, 0xC8, i);

  3362 }

  3364 void Assembler::fmul_d(Address src) {

  3365   InstructionMark im(this);

  3366   emit_byte(0xDC);

  3367   emit_operand32(rcx, src);

  3368 }

  3370 void Assembler::fmul_s(Address src) {

  3371   InstructionMark im(this);

  3372   emit_byte(0xD8);

  3373   emit_operand32(rcx, src);

  3374 }

  3376 void Assembler::fmula(int i) {

  3377   emit_farith(0xDC, 0xC8, i);

  3378 }

  3380 void Assembler::fmulp(int i) {

  3381   emit_farith(0xDE, 0xC8, i);

  3382 }

  3384 void Assembler::fnsave(Address dst) {

  3385   InstructionMark im(this);

  3386   emit_byte(0xDD);

  3387   emit_operand32(rsi, dst);

  3388 }

  3390 void Assembler::fnstcw(Address src) {

  3391   InstructionMark im(this);

  3392   emit_byte(0x9B);

  3393   emit_byte(0xD9);

  3394   emit_operand32(rdi, src);

  3395 }

  3397 void Assembler::fnstsw_ax() {

  3398   emit_byte(0xdF);

  3399   emit_byte(0xE0);

  3400 }

  3402 void Assembler::fprem() {

  3403   emit_byte(0xD9);

  3404   emit_byte(0xF8);

  3405 }

  3407 void Assembler::fprem1() {

  3408   emit_byte(0xD9);

  3409   emit_byte(0xF5);

  3410 }

  3412 void Assembler::frstor(Address src) {

  3413   InstructionMark im(this);

  3414   emit_byte(0xDD);

  3415   emit_operand32(rsp, src);

  3416 }

  3418 void Assembler::fsin() {

  3419   emit_byte(0xD9);

  3420   emit_byte(0xFE);

  3421 }

  3423 void Assembler::fsqrt() {

  3424   emit_byte(0xD9);

  3425   emit_byte(0xFA);

  3426 }

  3428 void Assembler::fst_d(Address adr) {

  3429   InstructionMark im(this);

  3430   emit_byte(0xDD);

  3431   emit_operand32(rdx, adr);

  3432 }

  3434 void Assembler::fst_s(Address adr) {

  3435   InstructionMark im(this);

  3436   emit_byte(0xD9);

  3437   emit_operand32(rdx, adr);

  3438 }

  3440 void Assembler::fstp_d(Address adr) {

  3441   InstructionMark im(this);

  3442   emit_byte(0xDD);

  3443   emit_operand32(rbx, adr);

  3444 }

  3446 void Assembler::fstp_d(int index) {

  3447   emit_farith(0xDD, 0xD8, index);

  3448 }

  3450 void Assembler::fstp_s(Address adr) {

  3451   InstructionMark im(this);

  3452   emit_byte(0xD9);

  3453   emit_operand32(rbx, adr);

  3454 }

  3456 void Assembler::fstp_x(Address adr) {

  3457   InstructionMark im(this);

  3458   emit_byte(0xDB);

  3459   emit_operand32(rdi, adr);

  3460 }

  3462 void Assembler::fsub(int i) {

  3463   emit_farith(0xD8, 0xE0, i);

  3464 }

  3466 void Assembler::fsub_d(Address src) {

  3467   InstructionMark im(this);

  3468   emit_byte(0xDC);

  3469   emit_operand32(rsp, src);

  3470 }

  3472 void Assembler::fsub_s(Address src) {

  3473   InstructionMark im(this);

  3474   emit_byte(0xD8);

  3475   emit_operand32(rsp, src);

  3476 }

  3478 void Assembler::fsuba(int i) {

  3479   emit_farith(0xDC, 0xE8, i);

  3480 }

  3482 void Assembler::fsubp(int i) {

  3483   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)

  3484 }

  3486 void Assembler::fsubr(int i) {

  3487   emit_farith(0xD8, 0xE8, i);

  3488 }

  3490 void Assembler::fsubr_d(Address src) {

  3491   InstructionMark im(this);

  3492   emit_byte(0xDC);

  3493   emit_operand32(rbp, src);

  3494 }

  3496 void Assembler::fsubr_s(Address src) {

  3497   InstructionMark im(this);

  3498   emit_byte(0xD8);

  3499   emit_operand32(rbp, src);

  3500 }

  3502 void Assembler::fsubra(int i) {

  3503   emit_farith(0xDC, 0xE0, i);

  3504 }

  3506 void Assembler::fsubrp(int i) {

  3507   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)

  3508 }

  3510 void Assembler::ftan() {

  3511   emit_byte(0xD9);

  3512   emit_byte(0xF2);

  3513   emit_byte(0xDD);

  3514   emit_byte(0xD8);

  3515 }

  3517 void Assembler::ftst() {

  3518   emit_byte(0xD9);

  3519   emit_byte(0xE4);

  3520 }

  3522 void Assembler::fucomi(int i) {

  3523   // make sure the instruction is supported (introduced for P6, together with cmov)

  3524   guarantee(VM_Version::supports_cmov(), "illegal instruction");

  3525   emit_farith(0xDB, 0xE8, i);

  3526 }

  3528 void Assembler::fucomip(int i) {

  3529   // make sure the instruction is supported (introduced for P6, together with cmov)

  3530   guarantee(VM_Version::supports_cmov(), "illegal instruction");

  3531   emit_farith(0xDF, 0xE8, i);

  3532 }

  3534 void Assembler::fwait() {

  3535   emit_byte(0x9B);

  3536 }

  3538 void Assembler::fxch(int i) {

  3539   emit_farith(0xD9, 0xC8, i);

  3540 }

  3542 void Assembler::fyl2x() {

  3543   emit_byte(0xD9);

  3544   emit_byte(0xF1);

  3545 }

  3547 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.

  3548 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };

  3549 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.

  3550 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };

  3552 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.

  3553 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {

  3554   if (pre > 0) {

  3555     emit_byte(simd_pre[pre]);

  3556   }

  3557   if (rex_w) {

  3558     prefixq(adr, xreg);

  3559   } else {

  3560     prefix(adr, xreg);

  3561   }

  3562   if (opc > 0) {

  3563     emit_byte(0x0F);

  3564     int opc2 = simd_opc[opc];

  3565     if (opc2 > 0) {

  3566       emit_byte(opc2);

  3567     }

  3568   }

  3569 }

  3571 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {

  3572   if (pre > 0) {

  3573     emit_byte(simd_pre[pre]);

  3574   }

  3575   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :

  3576                           prefix_and_encode(dst_enc, src_enc);

  3577   if (opc > 0) {

  3578     emit_byte(0x0F);

  3579     int opc2 = simd_opc[opc];

  3580     if (opc2 > 0) {

  3581       emit_byte(opc2);

  3582     }

  3583   }

  3584   return encode;

  3585 }

  3588 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {

  3589   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {

  3590     prefix(VEX_3bytes);

  3592     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);

  3593     byte1 = (~byte1) & 0xE0;

  3594     byte1 |= opc;

  3595     a_byte(byte1);

  3597     int byte2 = ((~nds_enc) & 0xf) << 3;

  3598     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;

  3599     emit_byte(byte2);

  3600   } else {

  3601     prefix(VEX_2bytes);

  3603     int byte1 = vex_r ? VEX_R : 0;

  3604     byte1 = (~byte1) & 0x80;

  3605     byte1 |= ((~nds_enc) & 0xf) << 3;

  3606     byte1 |= (vector256 ? 4 : 0) | pre;

  3607     emit_byte(byte1);

  3608   }

  3609 }

  3611 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){

  3612   bool vex_r = (xreg_enc >= 8);

  3613   bool vex_b = adr.base_needs_rex();

  3614   bool vex_x = adr.index_needs_rex();

  3615   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);

  3616 }

  3618 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {

  3619   bool vex_r = (dst_enc >= 8);

  3620   bool vex_b = (src_enc >= 8);

  3621   bool vex_x = false;

  3622   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);

  3623   return (((dst_enc & 7) << 3) | (src_enc & 7));

  3624 }

  3627 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {

  3628   if (UseAVX > 0) {

  3629     int xreg_enc = xreg->encoding();

  3630     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;

  3631     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);

  3632   } else {

  3633     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");

  3634     rex_prefix(adr, xreg, pre, opc, rex_w);

  3635   }

  3636 }

  3638 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {

  3639   int dst_enc = dst->encoding();

  3640   int src_enc = src->encoding();

  3641   if (UseAVX > 0) {

  3642     int nds_enc = nds->is_valid() ? nds->encoding() : 0;

  3643     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);

  3644   } else {

  3645     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");

  3646     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);

  3647   }

  3648 }

  3650 #ifndef _LP64

  3652 void Assembler::incl(Register dst) {

  3653   // Don't use it directly. Use MacroAssembler::incrementl() instead.

  3654   emit_byte(0x40 | dst->encoding());

  3655 }

  3657 void Assembler::lea(Register dst, Address src) {

  3658   leal(dst, src);

  3659 }

  3661 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {

  3662   InstructionMark im(this);

  3663   emit_byte(0xC7);

  3664   emit_operand(rax, dst);

  3665   emit_data((int)imm32, rspec, 0);

  3666 }

  3668 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {

  3669   InstructionMark im(this);

  3670   int encode = prefix_and_encode(dst->encoding());

  3671   emit_byte(0xB8 | encode);

  3672   emit_data((int)imm32, rspec, 0);

  3673 }

  3675 void Assembler::popa() { // 32bit

  3676   emit_byte(0x61);

  3677 }

  3679 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {

  3680   InstructionMark im(this);

  3681   emit_byte(0x68);

  3682   emit_data(imm32, rspec, 0);

  3683 }

  3685 void Assembler::pusha() { // 32bit

  3686   emit_byte(0x60);

  3687 }

  3689 void Assembler::set_byte_if_not_zero(Register dst) {

  3690   emit_byte(0x0F);

  3691   emit_byte(0x95);

  3692   emit_byte(0xE0 | dst->encoding());

  3693 }

  3695 void Assembler::shldl(Register dst, Register src) {

  3696   emit_byte(0x0F);

  3697   emit_byte(0xA5);

  3698   emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());

  3699 }

  3701 void Assembler::shrdl(Register dst, Register src) {

  3702   emit_byte(0x0F);

  3703   emit_byte(0xAD);

  3704   emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());

  3705 }

  3707 #else // LP64

  3709 void Assembler::set_byte_if_not_zero(Register dst) {

  3710   int enc = prefix_and_encode(dst->encoding(), true);

  3711   emit_byte(0x0F);

  3712   emit_byte(0x95);

  3713   emit_byte(0xE0 | enc);

  3714 }

  3716 // 64bit only pieces of the assembler

  3717 // This should only be used by 64bit instructions that can use rip-relative

  3718 // it cannot be used by instructions that want an immediate value.

  3720 bool Assembler::reachable(AddressLiteral adr) {

  3721   int64_t disp;

  3722   // None will force a 64bit literal to the code stream. Likely a placeholder

  3723   // for something that will be patched later and we need to certain it will

  3724   // always be reachable.

  3725   if (adr.reloc() == relocInfo::none) {

  3726     return false;

  3727   }

  3728   if (adr.reloc() == relocInfo::internal_word_type) {

  3729     // This should be rip relative and easily reachable.

  3730     return true;

  3731   }

  3732   if (adr.reloc() == relocInfo::virtual_call_type ||

  3733       adr.reloc() == relocInfo::opt_virtual_call_type ||

  3734       adr.reloc() == relocInfo::static_call_type ||

  3735       adr.reloc() == relocInfo::static_stub_type ) {

  3736     // This should be rip relative within the code cache and easily

  3737     // reachable until we get huge code caches. (At which point

  3738     // ic code is going to have issues).

  3739     return true;

  3740   }

  3741   if (adr.reloc() != relocInfo::external_word_type &&

  3742       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special

  3743       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them

  3744       adr.reloc() != relocInfo::runtime_call_type ) {

  3745     return false;

  3746   }

  3748   // Stress the correction code

  3749   if (ForceUnreachable) {

  3750     // Must be runtimecall reloc, see if it is in the codecache

  3751     // Flipping stuff in the codecache to be unreachable causes issues

  3752     // with things like inline caches where the additional instructions

  3753     // are not handled.

  3754     if (CodeCache::find_blob(adr._target) == NULL) {

  3755       return false;

  3756     }

  3757   }

  3758   // For external_word_type/runtime_call_type if it is reachable from where we

  3759   // are now (possibly a temp buffer) and where we might end up

  3760   // anywhere in the codeCache then we are always reachable.

  3761   // This would have to change if we ever save/restore shared code

  3762   // to be more pessimistic.

  3763   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));

  3764   if (!is_simm32(disp)) return false;

  3765   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));

  3766   if (!is_simm32(disp)) return false;

  3768   disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));

  3770   // Because rip relative is a disp + address_of_next_instruction and we

  3771   // don't know the value of address_of_next_instruction we apply a fudge factor

  3772   // to make sure we will be ok no matter the size of the instruction we get placed into.

  3773   // We don't have to fudge the checks above here because they are already worst case.

  3775   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal

  3776   // + 4 because better safe than sorry.

  3777   const int fudge = 12 + 4;

  3778   if (disp < 0) {

  3779     disp -= fudge;

  3780   } else {

  3781     disp += fudge;

  3782   }

  3783   return is_simm32(disp);

  3784 }

  3786 // Check if the polling page is not reachable from the code cache using rip-relative

  3787 // addressing.

  3788 bool Assembler::is_polling_page_far() {

  3789   intptr_t addr = (intptr_t)os::get_polling_page();

  3790   return ForceUnreachable ||

  3791          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||

  3792          !is_simm32(addr - (intptr_t)CodeCache::high_bound());

  3793 }

  3795 void Assembler::emit_data64(jlong data,

  3796                             relocInfo::relocType rtype,

  3797                             int format) {

  3798   if (rtype == relocInfo::none) {

  3799     emit_long64(data);

  3800   } else {

  3801     emit_data64(data, Relocation::spec_simple(rtype), format);

  3802   }

  3803 }

  3805 void Assembler::emit_data64(jlong data,

  3806                             RelocationHolder const& rspec,

  3807                             int format) {

  3808   assert(imm_operand == 0, "default format must be immediate in this file");

  3809   assert(imm_operand == format, "must be immediate");

  3810   assert(inst_mark() != NULL, "must be inside InstructionMark");

  3811   // Do not use AbstractAssembler::relocate, which is not intended for

  3812   // embedded words.  Instead, relocate to the enclosing instruction.

  3813   code_section()->relocate(inst_mark(), rspec, format);

  3814 #ifdef ASSERT

  3815   check_relocation(rspec, format);

  3816 #endif

  3817   emit_long64(data);

  3818 }

  3820 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {

  3821   if (reg_enc >= 8) {

  3822     prefix(REX_B);

  3823     reg_enc -= 8;

  3824   } else if (byteinst && reg_enc >= 4) {

  3825     prefix(REX);

  3826   }

  3827   return reg_enc;

  3828 }

  3830 int Assembler::prefixq_and_encode(int reg_enc) {

  3831   if (reg_enc < 8) {

  3832     prefix(REX_W);

  3833   } else {

  3834     prefix(REX_WB);

  3835     reg_enc -= 8;

  3836   }

  3837   return reg_enc;

  3838 }

  3840 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {

  3841   if (dst_enc < 8) {

  3842     if (src_enc >= 8) {

  3843       prefix(REX_B);

  3844       src_enc -= 8;

  3845     } else if (byteinst && src_enc >= 4) {

  3846       prefix(REX);

  3847     }

  3848   } else {

  3849     if (src_enc < 8) {

  3850       prefix(REX_R);

  3851     } else {

  3852       prefix(REX_RB);

  3853       src_enc -= 8;

  3854     }

  3855     dst_enc -= 8;

  3856   }

  3857   return dst_enc << 3 | src_enc;

  3858 }

  3860 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {

  3861   if (dst_enc < 8) {

  3862     if (src_enc < 8) {

  3863       prefix(REX_W);

  3864     } else {

  3865       prefix(REX_WB);

  3866       src_enc -= 8;

  3867     }

  3868   } else {

  3869     if (src_enc < 8) {

  3870       prefix(REX_WR);

  3871     } else {

  3872       prefix(REX_WRB);

  3873       src_enc -= 8;

  3874     }

  3875     dst_enc -= 8;

  3876   }

  3877   return dst_enc << 3 | src_enc;

  3878 }

  3880 void Assembler::prefix(Register reg) {

  3881   if (reg->encoding() >= 8) {

  3882     prefix(REX_B);

  3883   }

  3884 }

  3886 void Assembler::prefix(Address adr) {

  3887   if (adr.base_needs_rex()) {

  3888     if (adr.index_needs_rex()) {

  3889       prefix(REX_XB);

  3890     } else {

  3891       prefix(REX_B);

  3892     }

  3893   } else {

  3894     if (adr.index_needs_rex()) {

  3895       prefix(REX_X);

  3896     }

  3897   }

  3898 }

  3900 void Assembler::prefixq(Address adr) {

  3901   if (adr.base_needs_rex()) {

  3902     if (adr.index_needs_rex()) {

  3903       prefix(REX_WXB);

  3904     } else {

  3905       prefix(REX_WB);

  3906     }

  3907   } else {

  3908     if (adr.index_needs_rex()) {

  3909       prefix(REX_WX);

  3910     } else {

  3911       prefix(REX_W);

  3912     }

  3913   }

  3914 }

  3917 void Assembler::prefix(Address adr, Register reg, bool byteinst) {

  3918   if (reg->encoding() < 8) {

  3919     if (adr.base_needs_rex()) {

  3920       if (adr.index_needs_rex()) {

  3921         prefix(REX_XB);

  3922       } else {

  3923         prefix(REX_B);

  3924       }

  3925     } else {

  3926       if (adr.index_needs_rex()) {

  3927         prefix(REX_X);

  3928       } else if (byteinst && reg->encoding() >= 4 ) {

  3929         prefix(REX);

  3930       }

  3931     }

  3932   } else {

  3933     if (adr.base_needs_rex()) {

  3934       if (adr.index_needs_rex()) {

  3935         prefix(REX_RXB);

  3936       } else {

  3937         prefix(REX_RB);

  3938       }

  3939     } else {

  3940       if (adr.index_needs_rex()) {

  3941         prefix(REX_RX);

  3942       } else {

  3943         prefix(REX_R);

  3944       }

  3945     }

  3946   }

  3947 }

  3949 void Assembler::prefixq(Address adr, Register src) {

  3950   if (src->encoding() < 8) {

  3951     if (adr.base_needs_rex()) {

  3952       if (adr.index_needs_rex()) {

  3953         prefix(REX_WXB);

  3954       } else {

  3955         prefix(REX_WB);

  3956       }

  3957     } else {

  3958       if (adr.index_needs_rex()) {

  3959         prefix(REX_WX);

  3960       } else {

  3961         prefix(REX_W);

  3962       }

  3963     }

  3964   } else {

  3965     if (adr.base_needs_rex()) {

  3966       if (adr.index_needs_rex()) {

  3967         prefix(REX_WRXB);

  3968       } else {

  3969         prefix(REX_WRB);

  3970       }

  3971     } else {

  3972       if (adr.index_needs_rex()) {

  3973         prefix(REX_WRX);

  3974       } else {

  3975         prefix(REX_WR);

  3976       }

  3977     }

  3978   }

  3979 }

  3981 void Assembler::prefix(Address adr, XMMRegister reg) {

  3982   if (reg->encoding() < 8) {

  3983     if (adr.base_needs_rex()) {

  3984       if (adr.index_needs_rex()) {

  3985         prefix(REX_XB);

  3986       } else {

  3987         prefix(REX_B);

  3988       }

  3989     } else {

  3990       if (adr.index_needs_rex()) {

  3991         prefix(REX_X);

  3992       }

  3993     }

  3994   } else {

  3995     if (adr.base_needs_rex()) {

  3996       if (adr.index_needs_rex()) {

  3997         prefix(REX_RXB);

  3998       } else {

  3999         prefix(REX_RB);

  4000       }

  4001     } else {

  4002       if (adr.index_needs_rex()) {

  4003         prefix(REX_RX);

  4004       } else {

  4005         prefix(REX_R);

  4006       }

  4007     }

  4008   }

  4009 }

  4011 void Assembler::prefixq(Address adr, XMMRegister src) {

  4012   if (src->encoding() < 8) {

  4013     if (adr.base_needs_rex()) {

  4014       if (adr.index_needs_rex()) {

  4015         prefix(REX_WXB);

  4016       } else {

  4017         prefix(REX_WB);

  4018       }

  4019     } else {

  4020       if (adr.index_needs_rex()) {

  4021         prefix(REX_WX);

  4022       } else {

  4023         prefix(REX_W);

  4024       }

  4025     }

  4026   } else {

  4027     if (adr.base_needs_rex()) {

  4028       if (adr.index_needs_rex()) {

  4029         prefix(REX_WRXB);

  4030       } else {

  4031         prefix(REX_WRB);

  4032       }

  4033     } else {

  4034       if (adr.index_needs_rex()) {

  4035         prefix(REX_WRX);

  4036       } else {

  4037         prefix(REX_WR);

  4038       }

  4039     }

  4040   }

  4041 }

  4043 void Assembler::adcq(Register dst, int32_t imm32) {

  4044   (void) prefixq_and_encode(dst->encoding());

  4045   emit_arith(0x81, 0xD0, dst, imm32);

  4046 }

  4048 void Assembler::adcq(Register dst, Address src) {

  4049   InstructionMark im(this);

  4050   prefixq(src, dst);

  4051   emit_byte(0x13);

  4052   emit_operand(dst, src);

  4053 }

  4055 void Assembler::adcq(Register dst, Register src) {

  4056   (int) prefixq_and_encode(dst->encoding(), src->encoding());

  4057   emit_arith(0x13, 0xC0, dst, src);

  4058 }

  4060 void Assembler::addq(Address dst, int32_t imm32) {

  4061   InstructionMark im(this);

  4062   prefixq(dst);

  4063   emit_arith_operand(0x81, rax, dst,imm32);

  4064 }

  4066 void Assembler::addq(Address dst, Register src) {

  4067   InstructionMark im(this);

  4068   prefixq(dst, src);

  4069   emit_byte(0x01);

  4070   emit_operand(src, dst);

  4071 }

  4073 void Assembler::addq(Register dst, int32_t imm32) {

  4074   (void) prefixq_and_encode(dst->encoding());

  4075   emit_arith(0x81, 0xC0, dst, imm32);

  4076 }

  4078 void Assembler::addq(Register dst, Address src) {

  4079   InstructionMark im(this);

  4080   prefixq(src, dst);

  4081   emit_byte(0x03);

  4082   emit_operand(dst, src);

  4083 }

  4085 void Assembler::addq(Register dst, Register src) {

  4086   (void) prefixq_and_encode(dst->encoding(), src->encoding());

  4087   emit_arith(0x03, 0xC0, dst, src);

  4088 }

  4090 void Assembler::andq(Address dst, int32_t imm32) {

  4091   InstructionMark im(this);

  4092   prefixq(dst);

  4093   emit_byte(0x81);

  4094   emit_operand(rsp, dst, 4);

  4095   emit_long(imm32);

  4096 }

  4098 void Assembler::andq(Register dst, int32_t imm32) {

  4099   (void) prefixq_and_encode(dst->encoding());

  4100   emit_arith(0x81, 0xE0, dst, imm32);

  4101 }

  4103 void Assembler::andq(Register dst, Address src) {

  4104   InstructionMark im(this);

  4105   prefixq(src, dst);

  4106   emit_byte(0x23);

  4107   emit_operand(dst, src);

  4108 }

  4110 void Assembler::andq(Register dst, Register src) {

  4111   (int) prefixq_and_encode(dst->encoding(), src->encoding());

  4112   emit_arith(0x23, 0xC0, dst, src);

  4113 }

  4115 void Assembler::bsfq(Register dst, Register src) {

  4116   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4117   emit_byte(0x0F);

  4118   emit_byte(0xBC);

  4119   emit_byte(0xC0 | encode);

  4120 }

  4122 void Assembler::bsrq(Register dst, Register src) {

  4123   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");

  4124   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4125   emit_byte(0x0F);

  4126   emit_byte(0xBD);

  4127   emit_byte(0xC0 | encode);

  4128 }

  4130 void Assembler::bswapq(Register reg) {

  4131   int encode = prefixq_and_encode(reg->encoding());

  4132   emit_byte(0x0F);

  4133   emit_byte(0xC8 | encode);

  4134 }

  4136 void Assembler::cdqq() {

  4137   prefix(REX_W);

  4138   emit_byte(0x99);

  4139 }

  4141 void Assembler::clflush(Address adr) {

  4142   prefix(adr);

  4143   emit_byte(0x0F);

  4144   emit_byte(0xAE);

  4145   emit_operand(rdi, adr);

  4146 }

  4148 void Assembler::cmovq(Condition cc, Register dst, Register src) {

  4149   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4150   emit_byte(0x0F);

  4151   emit_byte(0x40 | cc);

  4152   emit_byte(0xC0 | encode);

  4153 }

  4155 void Assembler::cmovq(Condition cc, Register dst, Address src) {

  4156   InstructionMark im(this);

  4157   prefixq(src, dst);

  4158   emit_byte(0x0F);

  4159   emit_byte(0x40 | cc);

  4160   emit_operand(dst, src);

  4161 }

  4163 void Assembler::cmpq(Address dst, int32_t imm32) {

  4164   InstructionMark im(this);

  4165   prefixq(dst);

  4166   emit_byte(0x81);

  4167   emit_operand(rdi, dst, 4);

  4168   emit_long(imm32);

  4169 }

  4171 void Assembler::cmpq(Register dst, int32_t imm32) {

  4172   (void) prefixq_and_encode(dst->encoding());

  4173   emit_arith(0x81, 0xF8, dst, imm32);

  4174 }

  4176 void Assembler::cmpq(Address dst, Register src) {

  4177   InstructionMark im(this);

  4178   prefixq(dst, src);

  4179   emit_byte(0x3B);

  4180   emit_operand(src, dst);

  4181 }

  4183 void Assembler::cmpq(Register dst, Register src) {

  4184   (void) prefixq_and_encode(dst->encoding(), src->encoding());

  4185   emit_arith(0x3B, 0xC0, dst, src);

  4186 }

  4188 void Assembler::cmpq(Register dst, Address  src) {

  4189   InstructionMark im(this);

  4190   prefixq(src, dst);

  4191   emit_byte(0x3B);

  4192   emit_operand(dst, src);

  4193 }

  4195 void Assembler::cmpxchgq(Register reg, Address adr) {

  4196   InstructionMark im(this);

  4197   prefixq(adr, reg);

  4198   emit_byte(0x0F);

  4199   emit_byte(0xB1);

  4200   emit_operand(reg, adr);

  4201 }

  4203 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {

  4204   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  4205   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);

  4206   emit_byte(0x2A);

  4207   emit_byte(0xC0 | encode);

  4208 }

  4210 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {

  4211   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  4212   InstructionMark im(this);

  4213   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);

  4214   emit_byte(0x2A);

  4215   emit_operand(dst, src);

  4216 }

  4218 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {

  4219   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  4220   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);

  4221   emit_byte(0x2A);

  4222   emit_byte(0xC0 | encode);

  4223 }

  4225 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {

  4226   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  4227   InstructionMark im(this);

  4228   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);

  4229   emit_byte(0x2A);

  4230   emit_operand(dst, src);

  4231 }

  4233 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {

  4234   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  4235   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);

  4236   emit_byte(0x2C);

  4237   emit_byte(0xC0 | encode);

  4238 }

  4240 void Assembler::cvttss2siq(Register dst, XMMRegister src) {

  4241   NOT_LP64(assert(VM_Version::supports_sse(), ""));

  4242   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);

  4243   emit_byte(0x2C);

  4244   emit_byte(0xC0 | encode);

  4245 }

  4247 void Assembler::decl(Register dst) {

  4248   // Don't use it directly. Use MacroAssembler::decrementl() instead.

  4249   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)

  4250   int encode = prefix_and_encode(dst->encoding());

  4251   emit_byte(0xFF);

  4252   emit_byte(0xC8 | encode);

  4253 }

  4255 void Assembler::decq(Register dst) {

  4256   // Don't use it directly. Use MacroAssembler::decrementq() instead.

  4257   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)

  4258   int encode = prefixq_and_encode(dst->encoding());

  4259   emit_byte(0xFF);

  4260   emit_byte(0xC8 | encode);

  4261 }

  4263 void Assembler::decq(Address dst) {

  4264   // Don't use it directly. Use MacroAssembler::decrementq() instead.

  4265   InstructionMark im(this);

  4266   prefixq(dst);

  4267   emit_byte(0xFF);

  4268   emit_operand(rcx, dst);

  4269 }

  4271 void Assembler::fxrstor(Address src) {

  4272   prefixq(src);

  4273   emit_byte(0x0F);

  4274   emit_byte(0xAE);

  4275   emit_operand(as_Register(1), src);

  4276 }

  4278 void Assembler::fxsave(Address dst) {

  4279   prefixq(dst);

  4280   emit_byte(0x0F);

  4281   emit_byte(0xAE);

  4282   emit_operand(as_Register(0), dst);

  4283 }

  4285 void Assembler::idivq(Register src) {

  4286   int encode = prefixq_and_encode(src->encoding());

  4287   emit_byte(0xF7);

  4288   emit_byte(0xF8 | encode);

  4289 }

  4291 void Assembler::imulq(Register dst, Register src) {

  4292   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4293   emit_byte(0x0F);

  4294   emit_byte(0xAF);

  4295   emit_byte(0xC0 | encode);

  4296 }

  4298 void Assembler::imulq(Register dst, Register src, int value) {

  4299   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4300   if (is8bit(value)) {

  4301     emit_byte(0x6B);

  4302     emit_byte(0xC0 | encode);

  4303     emit_byte(value & 0xFF);

  4304   } else {

  4305     emit_byte(0x69);

  4306     emit_byte(0xC0 | encode);

  4307     emit_long(value);

  4308   }

  4309 }

  4311 void Assembler::incl(Register dst) {

  4312   // Don't use it directly. Use MacroAssembler::incrementl() instead.

  4313   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)

  4314   int encode = prefix_and_encode(dst->encoding());

  4315   emit_byte(0xFF);

  4316   emit_byte(0xC0 | encode);

  4317 }

  4319 void Assembler::incq(Register dst) {

  4320   // Don't use it directly. Use MacroAssembler::incrementq() instead.

  4321   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)

  4322   int encode = prefixq_and_encode(dst->encoding());

  4323   emit_byte(0xFF);

  4324   emit_byte(0xC0 | encode);

  4325 }

  4327 void Assembler::incq(Address dst) {

  4328   // Don't use it directly. Use MacroAssembler::incrementq() instead.

  4329   InstructionMark im(this);

  4330   prefixq(dst);

  4331   emit_byte(0xFF);

  4332   emit_operand(rax, dst);

  4333 }

  4335 void Assembler::lea(Register dst, Address src) {

  4336   leaq(dst, src);

  4337 }

  4339 void Assembler::leaq(Register dst, Address src) {

  4340   InstructionMark im(this);

  4341   prefixq(src, dst);

  4342   emit_byte(0x8D);

  4343   emit_operand(dst, src);

  4344 }

  4346 void Assembler::mov64(Register dst, int64_t imm64) {

  4347   InstructionMark im(this);

  4348   int encode = prefixq_and_encode(dst->encoding());

  4349   emit_byte(0xB8 | encode);

  4350   emit_long64(imm64);

  4351 }

  4353 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {

  4354   InstructionMark im(this);

  4355   int encode = prefixq_and_encode(dst->encoding());

  4356   emit_byte(0xB8 | encode);

  4357   emit_data64(imm64, rspec);

  4358 }

  4360 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {

  4361   InstructionMark im(this);

  4362   int encode = prefix_and_encode(dst->encoding());

  4363   emit_byte(0xB8 | encode);

  4364   emit_data((int)imm32, rspec, narrow_oop_operand);

  4365 }

  4367 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {

  4368   InstructionMark im(this);

  4369   prefix(dst);

  4370   emit_byte(0xC7);

  4371   emit_operand(rax, dst, 4);

  4372   emit_data((int)imm32, rspec, narrow_oop_operand);

  4373 }

  4375 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {

  4376   InstructionMark im(this);

  4377   int encode = prefix_and_encode(src1->encoding());

  4378   emit_byte(0x81);

  4379   emit_byte(0xF8 | encode);

  4380   emit_data((int)imm32, rspec, narrow_oop_operand);

  4381 }

  4383 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {

  4384   InstructionMark im(this);

  4385   prefix(src1);

  4386   emit_byte(0x81);

  4387   emit_operand(rax, src1, 4);

  4388   emit_data((int)imm32, rspec, narrow_oop_operand);

  4389 }

  4391 void Assembler::lzcntq(Register dst, Register src) {

  4392   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");

  4393   emit_byte(0xF3);

  4394   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4395   emit_byte(0x0F);

  4396   emit_byte(0xBD);

  4397   emit_byte(0xC0 | encode);

  4398 }

  4400 void Assembler::movdq(XMMRegister dst, Register src) {

  4401   // table D-1 says MMX/SSE2

  4402   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  4403   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);

  4404   emit_byte(0x6E);

  4405   emit_byte(0xC0 | encode);

  4406 }

  4408 void Assembler::movdq(Register dst, XMMRegister src) {

  4409   // table D-1 says MMX/SSE2

  4410   NOT_LP64(assert(VM_Version::supports_sse2(), ""));

  4411   // swap src/dst to get correct prefix

  4412   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);

  4413   emit_byte(0x7E);

  4414   emit_byte(0xC0 | encode);

  4415 }

  4417 void Assembler::movq(Register dst, Register src) {

  4418   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4419   emit_byte(0x8B);

  4420   emit_byte(0xC0 | encode);

  4421 }

  4423 void Assembler::movq(Register dst, Address src) {

  4424   InstructionMark im(this);

  4425   prefixq(src, dst);

  4426   emit_byte(0x8B);

  4427   emit_operand(dst, src);

  4428 }

  4430 void Assembler::movq(Address dst, Register src) {

  4431   InstructionMark im(this);

  4432   prefixq(dst, src);

  4433   emit_byte(0x89);

  4434   emit_operand(src, dst);

  4435 }

  4437 void Assembler::movsbq(Register dst, Address src) {

  4438   InstructionMark im(this);

  4439   prefixq(src, dst);

  4440   emit_byte(0x0F);

  4441   emit_byte(0xBE);

  4442   emit_operand(dst, src);

  4443 }

  4445 void Assembler::movsbq(Register dst, Register src) {

  4446   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4447   emit_byte(0x0F);

  4448   emit_byte(0xBE);

  4449   emit_byte(0xC0 | encode);

  4450 }

  4452 void Assembler::movslq(Register dst, int32_t imm32) {

  4453   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)

  4454   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)

  4455   // as a result we shouldn't use until tested at runtime...

  4456   ShouldNotReachHere();

  4457   InstructionMark im(this);

  4458   int encode = prefixq_and_encode(dst->encoding());

  4459   emit_byte(0xC7 | encode);

  4460   emit_long(imm32);

  4461 }

  4463 void Assembler::movslq(Address dst, int32_t imm32) {

  4464   assert(is_simm32(imm32), "lost bits");

  4465   InstructionMark im(this);

  4466   prefixq(dst);

  4467   emit_byte(0xC7);

  4468   emit_operand(rax, dst, 4);

  4469   emit_long(imm32);

  4470 }

  4472 void Assembler::movslq(Register dst, Address src) {

  4473   InstructionMark im(this);

  4474   prefixq(src, dst);

  4475   emit_byte(0x63);

  4476   emit_operand(dst, src);

  4477 }

  4479 void Assembler::movslq(Register dst, Register src) {

  4480   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4481   emit_byte(0x63);

  4482   emit_byte(0xC0 | encode);

  4483 }

  4485 void Assembler::movswq(Register dst, Address src) {

  4486   InstructionMark im(this);

  4487   prefixq(src, dst);

  4488   emit_byte(0x0F);

  4489   emit_byte(0xBF);

  4490   emit_operand(dst, src);

  4491 }

  4493 void Assembler::movswq(Register dst, Register src) {

  4494   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4495   emit_byte(0x0F);

  4496   emit_byte(0xBF);

  4497   emit_byte(0xC0 | encode);

  4498 }

  4500 void Assembler::movzbq(Register dst, Address src) {

  4501   InstructionMark im(this);

  4502   prefixq(src, dst);

  4503   emit_byte(0x0F);

  4504   emit_byte(0xB6);

  4505   emit_operand(dst, src);

  4506 }

  4508 void Assembler::movzbq(Register dst, Register src) {

  4509   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4510   emit_byte(0x0F);

  4511   emit_byte(0xB6);

  4512   emit_byte(0xC0 | encode);

  4513 }

  4515 void Assembler::movzwq(Register dst, Address src) {

  4516   InstructionMark im(this);

  4517   prefixq(src, dst);

  4518   emit_byte(0x0F);

  4519   emit_byte(0xB7);

  4520   emit_operand(dst, src);

  4521 }

  4523 void Assembler::movzwq(Register dst, Register src) {

  4524   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4525   emit_byte(0x0F);

  4526   emit_byte(0xB7);

  4527   emit_byte(0xC0 | encode);

  4528 }

  4530 void Assembler::negq(Register dst) {

  4531   int encode = prefixq_and_encode(dst->encoding());

  4532   emit_byte(0xF7);

  4533   emit_byte(0xD8 | encode);

  4534 }

  4536 void Assembler::notq(Register dst) {

  4537   int encode = prefixq_and_encode(dst->encoding());

  4538   emit_byte(0xF7);

  4539   emit_byte(0xD0 | encode);

  4540 }

  4542 void Assembler::orq(Address dst, int32_t imm32) {

  4543   InstructionMark im(this);

  4544   prefixq(dst);

  4545   emit_byte(0x81);

  4546   emit_operand(rcx, dst, 4);

  4547   emit_long(imm32);

  4548 }

  4550 void Assembler::orq(Register dst, int32_t imm32) {

  4551   (void) prefixq_and_encode(dst->encoding());

  4552   emit_arith(0x81, 0xC8, dst, imm32);

  4553 }

  4555 void Assembler::orq(Register dst, Address src) {

  4556   InstructionMark im(this);

  4557   prefixq(src, dst);

  4558   emit_byte(0x0B);

  4559   emit_operand(dst, src);

  4560 }

  4562 void Assembler::orq(Register dst, Register src) {

  4563   (void) prefixq_and_encode(dst->encoding(), src->encoding());

  4564   emit_arith(0x0B, 0xC0, dst, src);

  4565 }

  4567 void Assembler::popa() { // 64bit

  4568   movq(r15, Address(rsp, 0));

  4569   movq(r14, Address(rsp, wordSize));

  4570   movq(r13, Address(rsp, 2 * wordSize));

  4571   movq(r12, Address(rsp, 3 * wordSize));

  4572   movq(r11, Address(rsp, 4 * wordSize));

  4573   movq(r10, Address(rsp, 5 * wordSize));

  4574   movq(r9,  Address(rsp, 6 * wordSize));

  4575   movq(r8,  Address(rsp, 7 * wordSize));

  4576   movq(rdi, Address(rsp, 8 * wordSize));

  4577   movq(rsi, Address(rsp, 9 * wordSize));

  4578   movq(rbp, Address(rsp, 10 * wordSize));

  4579   // skip rsp

  4580   movq(rbx, Address(rsp, 12 * wordSize));

  4581   movq(rdx, Address(rsp, 13 * wordSize));

  4582   movq(rcx, Address(rsp, 14 * wordSize));

  4583   movq(rax, Address(rsp, 15 * wordSize));

  4585   addq(rsp, 16 * wordSize);

  4586 }

  4588 void Assembler::popcntq(Register dst, Address src) {

  4589   assert(VM_Version::supports_popcnt(), "must support");

  4590   InstructionMark im(this);

  4591   emit_byte(0xF3);

  4592   prefixq(src, dst);

  4593   emit_byte(0x0F);

  4594   emit_byte(0xB8);

  4595   emit_operand(dst, src);

  4596 }

  4598 void Assembler::popcntq(Register dst, Register src) {

  4599   assert(VM_Version::supports_popcnt(), "must support");

  4600   emit_byte(0xF3);

  4601   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4602   emit_byte(0x0F);

  4603   emit_byte(0xB8);

  4604   emit_byte(0xC0 | encode);

  4605 }

  4607 void Assembler::popq(Address dst) {

  4608   InstructionMark im(this);

  4609   prefixq(dst);

  4610   emit_byte(0x8F);

  4611   emit_operand(rax, dst);

  4612 }

  4614 void Assembler::pusha() { // 64bit

  4615   // we have to store original rsp.  ABI says that 128 bytes

  4616   // below rsp are local scratch.

  4617   movq(Address(rsp, -5 * wordSize), rsp);

  4619   subq(rsp, 16 * wordSize);

  4621   movq(Address(rsp, 15 * wordSize), rax);

  4622   movq(Address(rsp, 14 * wordSize), rcx);

  4623   movq(Address(rsp, 13 * wordSize), rdx);

  4624   movq(Address(rsp, 12 * wordSize), rbx);

  4625   // skip rsp

  4626   movq(Address(rsp, 10 * wordSize), rbp);

  4627   movq(Address(rsp, 9 * wordSize), rsi);

  4628   movq(Address(rsp, 8 * wordSize), rdi);

  4629   movq(Address(rsp, 7 * wordSize), r8);

  4630   movq(Address(rsp, 6 * wordSize), r9);

  4631   movq(Address(rsp, 5 * wordSize), r10);

  4632   movq(Address(rsp, 4 * wordSize), r11);

  4633   movq(Address(rsp, 3 * wordSize), r12);

  4634   movq(Address(rsp, 2 * wordSize), r13);

  4635   movq(Address(rsp, wordSize), r14);

  4636   movq(Address(rsp, 0), r15);

  4637 }

  4639 void Assembler::pushq(Address src) {

  4640   InstructionMark im(this);

  4641   prefixq(src);

  4642   emit_byte(0xFF);

  4643   emit_operand(rsi, src);

  4644 }

  4646 void Assembler::rclq(Register dst, int imm8) {

  4647   assert(isShiftCount(imm8 >> 1), "illegal shift count");

  4648   int encode = prefixq_and_encode(dst->encoding());

  4649   if (imm8 == 1) {

  4650     emit_byte(0xD1);

  4651     emit_byte(0xD0 | encode);

  4652   } else {

  4653     emit_byte(0xC1);

  4654     emit_byte(0xD0 | encode);

  4655     emit_byte(imm8);

  4656   }

  4657 }

  4658 void Assembler::sarq(Register dst, int imm8) {

  4659   assert(isShiftCount(imm8 >> 1), "illegal shift count");

  4660   int encode = prefixq_and_encode(dst->encoding());

  4661   if (imm8 == 1) {

  4662     emit_byte(0xD1);

  4663     emit_byte(0xF8 | encode);

  4664   } else {

  4665     emit_byte(0xC1);

  4666     emit_byte(0xF8 | encode);

  4667     emit_byte(imm8);

  4668   }

  4669 }

  4671 void Assembler::sarq(Register dst) {

  4672   int encode = prefixq_and_encode(dst->encoding());

  4673   emit_byte(0xD3);

  4674   emit_byte(0xF8 | encode);

  4675 }

  4677 void Assembler::sbbq(Address dst, int32_t imm32) {

  4678   InstructionMark im(this);

  4679   prefixq(dst);

  4680   emit_arith_operand(0x81, rbx, dst, imm32);

  4681 }

  4683 void Assembler::sbbq(Register dst, int32_t imm32) {

  4684   (void) prefixq_and_encode(dst->encoding());

  4685   emit_arith(0x81, 0xD8, dst, imm32);

  4686 }

  4688 void Assembler::sbbq(Register dst, Address src) {

  4689   InstructionMark im(this);

  4690   prefixq(src, dst);

  4691   emit_byte(0x1B);

  4692   emit_operand(dst, src);

  4693 }

  4695 void Assembler::sbbq(Register dst, Register src) {

  4696   (void) prefixq_and_encode(dst->encoding(), src->encoding());

  4697   emit_arith(0x1B, 0xC0, dst, src);

  4698 }

  4700 void Assembler::shlq(Register dst, int imm8) {

  4701   assert(isShiftCount(imm8 >> 1), "illegal shift count");

  4702   int encode = prefixq_and_encode(dst->encoding());

  4703   if (imm8 == 1) {

  4704     emit_byte(0xD1);

  4705     emit_byte(0xE0 | encode);

  4706   } else {

  4707     emit_byte(0xC1);

  4708     emit_byte(0xE0 | encode);

  4709     emit_byte(imm8);

  4710   }

  4711 }

  4713 void Assembler::shlq(Register dst) {

  4714   int encode = prefixq_and_encode(dst->encoding());

  4715   emit_byte(0xD3);

  4716   emit_byte(0xE0 | encode);

  4717 }

  4719 void Assembler::shrq(Register dst, int imm8) {

  4720   assert(isShiftCount(imm8 >> 1), "illegal shift count");

  4721   int encode = prefixq_and_encode(dst->encoding());

  4722   emit_byte(0xC1);

  4723   emit_byte(0xE8 | encode);

  4724   emit_byte(imm8);

  4725 }

  4727 void Assembler::shrq(Register dst) {

  4728   int encode = prefixq_and_encode(dst->encoding());

  4729   emit_byte(0xD3);

  4730   emit_byte(0xE8 | encode);

  4731 }

  4733 void Assembler::subq(Address dst, int32_t imm32) {

  4734   InstructionMark im(this);

  4735   prefixq(dst);

  4736   emit_arith_operand(0x81, rbp, dst, imm32);

  4737 }

  4739 void Assembler::subq(Address dst, Register src) {

  4740   InstructionMark im(this);

  4741   prefixq(dst, src);

  4742   emit_byte(0x29);

  4743   emit_operand(src, dst);

  4744 }

  4746 void Assembler::subq(Register dst, int32_t imm32) {

  4747   (void) prefixq_and_encode(dst->encoding());

  4748   emit_arith(0x81, 0xE8, dst, imm32);

  4749 }

  4751 void Assembler::subq(Register dst, Address src) {

  4752   InstructionMark im(this);

  4753   prefixq(src, dst);

  4754   emit_byte(0x2B);

  4755   emit_operand(dst, src);

  4756 }

  4758 void Assembler::subq(Register dst, Register src) {

  4759   (void) prefixq_and_encode(dst->encoding(), src->encoding());

  4760   emit_arith(0x2B, 0xC0, dst, src);

  4761 }

  4763 void Assembler::testq(Register dst, int32_t imm32) {

  4764   // not using emit_arith because test

  4765   // doesn't support sign-extension of

  4766   // 8bit operands

  4767   int encode = dst->encoding();

  4768   if (encode == 0) {

  4769     prefix(REX_W);

  4770     emit_byte(0xA9);

  4771   } else {

  4772     encode = prefixq_and_encode(encode);

  4773     emit_byte(0xF7);

  4774     emit_byte(0xC0 | encode);

  4775   }

  4776   emit_long(imm32);

  4777 }

  4779 void Assembler::testq(Register dst, Register src) {

  4780   (void) prefixq_and_encode(dst->encoding(), src->encoding());

  4781   emit_arith(0x85, 0xC0, dst, src);

  4782 }

  4784 void Assembler::xaddq(Address dst, Register src) {

  4785   InstructionMark im(this);

  4786   prefixq(dst, src);

  4787   emit_byte(0x0F);

  4788   emit_byte(0xC1);

  4789   emit_operand(src, dst);

  4790 }

  4792 void Assembler::xchgq(Register dst, Address src) {

  4793   InstructionMark im(this);

  4794   prefixq(src, dst);

  4795   emit_byte(0x87);

  4796   emit_operand(dst, src);

  4797 }

  4799 void Assembler::xchgq(Register dst, Register src) {

  4800   int encode = prefixq_and_encode(dst->encoding(), src->encoding());

  4801   emit_byte(0x87);

  4802   emit_byte(0xc0 | encode);

  4803 }

  4805 void Assembler::xorq(Register dst, Register src) {

  4806   (void) prefixq_and_encode(dst->encoding(), src->encoding());

  4807   emit_arith(0x33, 0xC0, dst, src);

  4808 }

  4810 void Assembler::xorq(Register dst, Address src) {

  4811   InstructionMark im(this);

  4812   prefixq(src, dst);

  4813   emit_byte(0x33);

  4814   emit_operand(dst, src);

  4815 }

  4817 #endif // !LP64

  4819 static Assembler::Condition reverse[] = {

  4820     Assembler::noOverflow     /* overflow      = 0x0 */ ,

  4821     Assembler::overflow       /* noOverflow    = 0x1 */ ,

  4822     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,

  4823     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,

  4824     Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,

  4825     Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,

  4826     Assembler::above          /* belowEqual    = 0x6 */ ,

  4827     Assembler::belowEqual     /* above         = 0x7 */ ,

  4828     Assembler::positive       /* negative      = 0x8 */ ,

  4829     Assembler::negative       /* positive      = 0x9 */ ,

  4830     Assembler::noParity       /* parity        = 0xa */ ,

  4831     Assembler::parity         /* noParity      = 0xb */ ,

  4832     Assembler::greaterEqual   /* less          = 0xc */ ,

  4833     Assembler::less           /* greaterEqual  = 0xd */ ,

  4834     Assembler::greater        /* lessEqual     = 0xe */ ,

  4835     Assembler::lessEqual      /* greater       = 0xf, */

  4837 };

  4840 // Implementation of MacroAssembler

  4842 // First all the versions that have distinct versions depending on 32/64 bit

  4843 // Unless the difference is trivial (1 line or so).

  4845 #ifndef _LP64

  4847 // 32bit versions

  4849 Address MacroAssembler::as_Address(AddressLiteral adr) {

  4850   return Address(adr.target(), adr.rspec());

  4851 }

  4853 Address MacroAssembler::as_Address(ArrayAddress adr) {

  4854   return Address::make_array(adr);

  4855 }

  4857 int MacroAssembler::biased_locking_enter(Register lock_reg,

  4858                                          Register obj_reg,

  4859                                          Register swap_reg,

  4860                                          Register tmp_reg,

  4861                                          bool swap_reg_contains_mark,

  4862                                          Label& done,

  4863                                          Label* slow_case,

  4864                                          BiasedLockingCounters* counters) {

  4865   assert(UseBiasedLocking, "why call this otherwise?");

  4866   assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");

  4867   assert_different_registers(lock_reg, obj_reg, swap_reg);

  4869   if (PrintBiasedLockingStatistics && counters == NULL)

  4870     counters = BiasedLocking::counters();

  4872   bool need_tmp_reg = false;

  4873   if (tmp_reg == noreg) {

  4874     need_tmp_reg = true;

  4875     tmp_reg = lock_reg;

  4876   } else {

  4877     assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);

  4878   }

  4879   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");

  4880   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());

  4881   Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());

  4882   Address saved_mark_addr(lock_reg, 0);

  4884   // Biased locking

  4885   // See whether the lock is currently biased toward our thread and

  4886   // whether the epoch is still valid

  4887   // Note that the runtime guarantees sufficient alignment of JavaThread

  4888   // pointers to allow age to be placed into low bits

  4889   // First check to see whether biasing is even enabled for this object

  4890   Label cas_label;

  4891   int null_check_offset = -1;

  4892   if (!swap_reg_contains_mark) {

  4893     null_check_offset = offset();

  4894     movl(swap_reg, mark_addr);

  4895   }

  4896   if (need_tmp_reg) {

  4897     push(tmp_reg);

  4898   }

  4899   movl(tmp_reg, swap_reg);

  4900   andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);

  4901   cmpl(tmp_reg, markOopDesc::biased_lock_pattern);

  4902   if (need_tmp_reg) {

  4903     pop(tmp_reg);

  4904   }

  4905   jcc(Assembler::notEqual, cas_label);

  4906   // The bias pattern is present in the object's header. Need to check

  4907   // whether the bias owner and the epoch are both still current.

  4908   // Note that because there is no current thread register on x86 we

  4909   // need to store off the mark word we read out of the object to

  4910   // avoid reloading it and needing to recheck invariants below. This

  4911   // store is unfortunate but it makes the overall code shorter and

  4912   // simpler.

  4913   movl(saved_mark_addr, swap_reg);

  4914   if (need_tmp_reg) {

  4915     push(tmp_reg);

  4916   }

  4917   get_thread(tmp_reg);

  4918   xorl(swap_reg, tmp_reg);

  4919   if (swap_reg_contains_mark) {

  4920     null_check_offset = offset();

  4921   }

  4922   movl(tmp_reg, klass_addr);

  4923   xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));

  4924   andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));

  4925   if (need_tmp_reg) {

  4926     pop(tmp_reg);

  4927   }

  4928   if (counters != NULL) {

  4929     cond_inc32(Assembler::zero,

  4930                ExternalAddress((address)counters->biased_lock_entry_count_addr()));

  4931   }

  4932   jcc(Assembler::equal, done);

  4934   Label try_revoke_bias;

  4935   Label try_rebias;

  4937   // At this point we know that the header has the bias pattern and

  4938   // that we are not the bias owner in the current epoch. We need to

  4939   // figure out more details about the state of the header in order to

  4940   // know what operations can be legally performed on the object's

  4941   // header.

  4943   // If the low three bits in the xor result aren't clear, that means

  4944   // the prototype header is no longer biased and we have to revoke

  4945   // the bias on this object.

  4946   testl(swap_reg, markOopDesc::biased_lock_mask_in_place);

  4947   jcc(Assembler::notZero, try_revoke_bias);

  4949   // Biasing is still enabled for this data type. See whether the

  4950   // epoch of the current bias is still valid, meaning that the epoch

  4951   // bits of the mark word are equal to the epoch bits of the

  4952   // prototype header. (Note that the prototype header's epoch bits

  4953   // only change at a safepoint.) If not, attempt to rebias the object

  4954   // toward the current thread. Note that we must be absolutely sure

  4955   // that the current epoch is invalid in order to do this because

  4956   // otherwise the manipulations it performs on the mark word are

  4957   // illegal.

  4958   testl(swap_reg, markOopDesc::epoch_mask_in_place);

  4959   jcc(Assembler::notZero, try_rebias);

  4961   // The epoch of the current bias is still valid but we know nothing

  4962   // about the owner; it might be set or it might be clear. Try to

  4963   // acquire the bias of the object using an atomic operation. If this

  4964   // fails we will go in to the runtime to revoke the object's bias.

  4965   // Note that we first construct the presumed unbiased header so we

  4966   // don't accidentally blow away another thread's valid bias.

  4967   movl(swap_reg, saved_mark_addr);

  4968   andl(swap_reg,

  4969        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);

  4970   if (need_tmp_reg) {

  4971     push(tmp_reg);

  4972   }

  4973   get_thread(tmp_reg);

  4974   orl(tmp_reg, swap_reg);

  4975   if (os::is_MP()) {

  4976     lock();

  4977   }

  4978   cmpxchgptr(tmp_reg, Address(obj_reg, 0));

  4979   if (need_tmp_reg) {

  4980     pop(tmp_reg);

  4981   }

  4982   // If the biasing toward our thread failed, this means that

  4983   // another thread succeeded in biasing it toward itself and we

  4984   // need to revoke that bias. The revocation will occur in the

  4985   // interpreter runtime in the slow case.

  4986   if (counters != NULL) {

  4987     cond_inc32(Assembler::zero,

  4988                ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));

  4989   }

  4990   if (slow_case != NULL) {

  4991     jcc(Assembler::notZero, *slow_case);

  4992   }

  4993   jmp(done);

  4995   bind(try_rebias);

  4996   // At this point we know the epoch has expired, meaning that the

  4997   // current "bias owner", if any, is actually invalid. Under these

  4998   // circumstances _only_, we are allowed to use the current header's

  4999   // value as the comparison value when doing the cas to acquire the

  5000   // bias in the current epoch. In other words, we allow transfer of

  5001   // the bias from one thread to another directly in this situation.

  5002   //

  5003   // FIXME: due to a lack of registers we currently blow away the age

  5004   // bits in this situation. Should attempt to preserve them.

  5005   if (need_tmp_reg) {

  5006     push(tmp_reg);

  5007   }

  5008   get_thread(tmp_reg);

  5009   movl(swap_reg, klass_addr);

  5010   orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));

  5011   movl(swap_reg, saved_mark_addr);

  5012   if (os::is_MP()) {

  5013     lock();

  5014   }

  5015   cmpxchgptr(tmp_reg, Address(obj_reg, 0));

  5016   if (need_tmp_reg) {

  5017     pop(tmp_reg);

  5018   }

  5019   // If the biasing toward our thread failed, then another thread

  5020   // succeeded in biasing it toward itself and we need to revoke that

  5021   // bias. The revocation will occur in the runtime in the slow case.

  5022   if (counters != NULL) {

  5023     cond_inc32(Assembler::zero,

  5024                ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));

  5025   }

  5026   if (slow_case != NULL) {

  5027     jcc(Assembler::notZero, *slow_case);

  5028   }

  5029   jmp(done);

  5031   bind(try_revoke_bias);

  5032   // The prototype mark in the klass doesn't have the bias bit set any

  5033   // more, indicating that objects of this data type are not supposed

  5034   // to be biased any more. We are going to try to reset the mark of

  5035   // this object to the prototype value and fall through to the

  5036   // CAS-based locking scheme. Note that if our CAS fails, it means

  5037   // that another thread raced us for the privilege of revoking the

  5038   // bias of this particular object, so it's okay to continue in the

  5039   // normal locking code.

  5040   //

  5041   // FIXME: due to a lack of registers we currently blow away the age

  5042   // bits in this situation. Should attempt to preserve them.

  5043   movl(swap_reg, saved_mark_addr);

  5044   if (need_tmp_reg) {

  5045     push(tmp_reg);

  5046   }

  5047   movl(tmp_reg, klass_addr);

  5048   movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));

  5049   if (os::is_MP()) {

  5050     lock();

  5051   }

  5052   cmpxchgptr(tmp_reg, Address(obj_reg, 0));

  5053   if (need_tmp_reg) {

  5054     pop(tmp_reg);

  5055   }

  5056   // Fall through to the normal CAS-based lock, because no matter what

  5057   // the result of the above CAS, some thread must have succeeded in

  5058   // removing the bias bit from the object's header.

  5059   if (counters != NULL) {

  5060     cond_inc32(Assembler::zero,

  5061                ExternalAddress((address)counters->revoked_lock_entry_count_addr()));

  5062   }

  5064   bind(cas_label);

  5066   return null_check_offset;

  5067 }

  5068 void MacroAssembler::call_VM_leaf_base(address entry_point,

  5069                                        int number_of_arguments) {

  5070   call(RuntimeAddress(entry_point));

  5071   increment(rsp, number_of_arguments * wordSize);

  5072 }

  5074 void MacroAssembler::cmpoop(Address src1, jobject obj) {

  5075   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());

  5076 }

  5078 void MacroAssembler::cmpoop(Register src1, jobject obj) {

  5079   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());

  5080 }

  5082 void MacroAssembler::extend_sign(Register hi, Register lo) {

  5083   // According to Intel Doc. AP-526, "Integer Divide", p.18.

  5084   if (VM_Version::is_P6() && hi == rdx && lo == rax) {

  5085     cdql();

  5086   } else {

  5087     movl(hi, lo);

  5088     sarl(hi, 31);

  5089   }

  5090 }

  5092 void MacroAssembler::fat_nop() {

  5093   // A 5 byte nop that is safe for patching (see patch_verified_entry)

  5094   emit_byte(0x26); // es:

  5095   emit_byte(0x2e); // cs:

  5096   emit_byte(0x64); // fs:

  5097   emit_byte(0x65); // gs:

  5098   emit_byte(0x90);

  5099 }

  5101 void MacroAssembler::jC2(Register tmp, Label& L) {

  5102   // set parity bit if FPU flag C2 is set (via rax)

  5103   save_rax(tmp);

  5104   fwait(); fnstsw_ax();

  5105   sahf();

  5106   restore_rax(tmp);

  5107   // branch

  5108   jcc(Assembler::parity, L);

  5109 }

  5111 void MacroAssembler::jnC2(Register tmp, Label& L) {

  5112   // set parity bit if FPU flag C2 is set (via rax)

  5113   save_rax(tmp);

  5114   fwait(); fnstsw_ax();

  5115   sahf();

  5116   restore_rax(tmp);

  5117   // branch

  5118   jcc(Assembler::noParity, L);

  5119 }

  5121 // 32bit can do a case table jump in one instruction but we no longer allow the base

  5122 // to be installed in the Address class

  5123 void MacroAssembler::jump(ArrayAddress entry) {

  5124   jmp(as_Address(entry));

  5125 }

  5127 // Note: y_lo will be destroyed

  5128 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {

  5129   // Long compare for Java (semantics as described in JVM spec.)

  5130   Label high, low, done;

  5132   cmpl(x_hi, y_hi);

  5133   jcc(Assembler::less, low);

  5134   jcc(Assembler::greater, high);

  5135   // x_hi is the return register

  5136   xorl(x_hi, x_hi);

  5137   cmpl(x_lo, y_lo);

  5138   jcc(Assembler::below, low);

  5139   jcc(Assembler::equal, done);

  5141   bind(high);

  5142   xorl(x_hi, x_hi);

  5143   increment(x_hi);

  5144   jmp(done);

  5146   bind(low);

  5147   xorl(x_hi, x_hi);

  5148   decrementl(x_hi);

  5150   bind(done);

  5151 }

  5153 void MacroAssembler::lea(Register dst, AddressLiteral src) {

  5154     mov_literal32(dst, (int32_t)src.target(), src.rspec());

  5155 }

  5157 void MacroAssembler::lea(Address dst, AddressLiteral adr) {

  5158   // leal(dst, as_Address(adr));

  5159   // see note in movl as to why we must use a move

  5160   mov_literal32(dst, (int32_t) adr.target(), adr.rspec());

  5161 }

  5163 void MacroAssembler::leave() {

  5164   mov(rsp, rbp);

  5165   pop(rbp);

  5166 }

  5168 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {

  5169   // Multiplication of two Java long values stored on the stack

  5170   // as illustrated below. Result is in rdx:rax.

  5171   //

  5172   // rsp ---> [  ??  ] \               \

  5173   //            ....    | y_rsp_offset  |

  5174   //          [ y_lo ] /  (in bytes)    | x_rsp_offset

  5175   //          [ y_hi ]                  | (in bytes)

  5176   //            ....                    |

  5177   //          [ x_lo ]                 /

  5178   //          [ x_hi ]

  5179   //            ....

  5180   //

  5181   // Basic idea: lo(result) = lo(x_lo * y_lo)

  5182   //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)

  5183   Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);

  5184   Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);

  5185   Label quick;

  5186   // load x_hi, y_hi and check if quick

  5187   // multiplication is possible

  5188   movl(rbx, x_hi);

  5189   movl(rcx, y_hi);

  5190   movl(rax, rbx);

  5191   orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0

  5192   jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply

  5193   // do full multiplication

  5194   // 1st step

  5195   mull(y_lo);                                    // x_hi * y_lo

  5196   movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,

  5197   // 2nd step

  5198   movl(rax, x_lo);

  5199   mull(rcx);                                     // x_lo * y_hi

  5200   addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,

  5201   // 3rd step

  5202   bind(quick);                                   // note: rbx, = 0 if quick multiply!

  5203   movl(rax, x_lo);

  5204   mull(y_lo);                                    // x_lo * y_lo

  5205   addl(rdx, rbx);                                // correct hi(x_lo * y_lo)

  5206 }

  5208 void MacroAssembler::lneg(Register hi, Register lo) {

  5209   negl(lo);

  5210   adcl(hi, 0);

  5211   negl(hi);

  5212 }

  5214 void MacroAssembler::lshl(Register hi, Register lo) {

  5215   // Java shift left long support (semantics as described in JVM spec., p.305)

  5216   // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))

  5217   // shift value is in rcx !

  5218   assert(hi != rcx, "must not use rcx");

  5219   assert(lo != rcx, "must not use rcx");

  5220   const Register s = rcx;                        // shift count

  5221   const int      n = BitsPerWord;

  5222   Label L;

  5223   andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)

  5224   cmpl(s, n);                                    // if (s < n)

  5225   jcc(Assembler::less, L);                       // else (s >= n)

  5226   movl(hi, lo);                                  // x := x << n

  5227   xorl(lo, lo);

  5228   // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!

  5229   bind(L);                                       // s (mod n) < n

  5230   shldl(hi, lo);                                 // x := x << s

  5231   shll(lo);

  5232 }

  5235 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {

  5236   // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)

  5237   // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))

  5238   assert(hi != rcx, "must not use rcx");

  5239   assert(lo != rcx, "must not use rcx");

  5240   const Register s = rcx;                        // shift count

  5241   const int      n = BitsPerWord;

  5242   Label L;

  5243   andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)

  5244   cmpl(s, n);                                    // if (s < n)

  5245   jcc(Assembler::less, L);                       // else (s >= n)

  5246   movl(lo, hi);                                  // x := x >> n

  5247   if (sign_extension) sarl(hi, 31);

  5248   else                xorl(hi, hi);

  5249   // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!

  5250   bind(L);                                       // s (mod n) < n

  5251   shrdl(lo, hi);                                 // x := x >> s

  5252   if (sign_extension) sarl(hi);

  5253   else                shrl(hi);

  5254 }

  5256 void MacroAssembler::movoop(Register dst, jobject obj) {

  5257   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());

  5258 }

  5260 void MacroAssembler::movoop(Address dst, jobject obj) {

  5261   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());

  5262 }

  5264 void MacroAssembler::movptr(Register dst, AddressLiteral src) {

  5265   if (src.is_lval()) {

  5266     mov_literal32(dst, (intptr_t)src.target(), src.rspec());

  5267   } else {

  5268     movl(dst, as_Address(src));

  5269   }

  5270 }

  5272 void MacroAssembler::movptr(ArrayAddress dst, Register src) {

  5273   movl(as_Address(dst), src);

  5274 }

  5276 void MacroAssembler::movptr(Register dst, ArrayAddress src) {

  5277   movl(dst, as_Address(src));

  5278 }

  5280 // src should NEVER be a real pointer. Use AddressLiteral for true pointers

  5281 void MacroAssembler::movptr(Address dst, intptr_t src) {

  5282   movl(dst, src);

  5283 }

  5286 void MacroAssembler::pop_callee_saved_registers() {

  5287   pop(rcx);

  5288   pop(rdx);

  5289   pop(rdi);

  5290   pop(rsi);

  5291 }

  5293 void MacroAssembler::pop_fTOS() {

  5294   fld_d(Address(rsp, 0));

  5295   addl(rsp, 2 * wordSize);

  5296 }

  5298 void MacroAssembler::push_callee_saved_registers() {

  5299   push(rsi);

  5300   push(rdi);

  5301   push(rdx);

  5302   push(rcx);

  5303 }

  5305 void MacroAssembler::push_fTOS() {

  5306   subl(rsp, 2 * wordSize);

  5307   fstp_d(Address(rsp, 0));

  5308 }

  5311 void MacroAssembler::pushoop(jobject obj) {

  5312   push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());

  5313 }

  5316 void MacroAssembler::pushptr(AddressLiteral src) {

  5317   if (src.is_lval()) {

  5318     push_literal32((int32_t)src.target(), src.rspec());

  5319   } else {

  5320     pushl(as_Address(src));

  5321   }

  5322 }

  5324 void MacroAssembler::set_word_if_not_zero(Register dst) {

  5325   xorl(dst, dst);

  5326   set_byte_if_not_zero(dst);

  5327 }

  5329 static void pass_arg0(MacroAssembler* masm, Register arg) {

  5330   masm->push(arg);

  5331 }

  5333 static void pass_arg1(MacroAssembler* masm, Register arg) {

  5334   masm->push(arg);

  5335 }

  5337 static void pass_arg2(MacroAssembler* masm, Register arg) {

  5338   masm->push(arg);

  5339 }

  5341 static void pass_arg3(MacroAssembler* masm, Register arg) {

  5342   masm->push(arg);

  5343 }

  5345 #ifndef PRODUCT

  5346 extern "C" void findpc(intptr_t x);

  5347 #endif

  5349 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {

  5350   // In order to get locks to work, we need to fake a in_VM state

  5351   JavaThread* thread = JavaThread::current();

  5352   JavaThreadState saved_state = thread->thread_state();

  5353   thread->set_thread_state(_thread_in_vm);

  5354   if (ShowMessageBoxOnError) {

  5355     JavaThread* thread = JavaThread::current();

  5356     JavaThreadState saved_state = thread->thread_state();

  5357     thread->set_thread_state(_thread_in_vm);

  5358     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {

  5359       ttyLocker ttyl;

  5360       BytecodeCounter::print();

  5361     }

  5362     // To see where a verify_oop failed, get $ebx+40/X for this frame.

  5363     // This is the value of eip which points to where verify_oop will return.

  5364     if (os::message_box(msg, "Execution stopped, print registers?")) {

  5365       ttyLocker ttyl;

  5366       tty->print_cr("eip = 0x%08x", eip);

  5367 #ifndef PRODUCT

  5368       if ((WizardMode || Verbose) && PrintMiscellaneous) {

  5369         tty->cr();

  5370         findpc(eip);

  5371         tty->cr();

  5372       }

  5373 #endif

  5374       tty->print_cr("rax = 0x%08x", rax);

  5375       tty->print_cr("rbx = 0x%08x", rbx);

  5376       tty->print_cr("rcx = 0x%08x", rcx);

  5377       tty->print_cr("rdx = 0x%08x", rdx);

  5378       tty->print_cr("rdi = 0x%08x", rdi);

  5379       tty->print_cr("rsi = 0x%08x", rsi);

  5380       tty->print_cr("rbp = 0x%08x", rbp);

  5381       tty->print_cr("rsp = 0x%08x", rsp);

  5382       BREAKPOINT;

  5383       assert(false, "start up GDB");

  5384     }

  5385   } else {

  5386     ttyLocker ttyl;

  5387     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);

  5388     assert(false, err_msg("DEBUG MESSAGE: %s", msg));

  5389   }

  5390   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);

  5391 }

  5393 void MacroAssembler::stop(const char* msg) {

  5394   ExternalAddress message((address)msg);

  5395   // push address of message

  5396   pushptr(message.addr());

  5397   { Label L; call(L, relocInfo::none); bind(L); }     // push eip

  5398   pusha();                                           // push registers

  5399   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));

  5400   hlt();

  5401 }

  5403 void MacroAssembler::warn(const char* msg) {

  5404   push_CPU_state();

  5406   ExternalAddress message((address) msg);

  5407   // push address of message

  5408   pushptr(message.addr());

  5410   call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));

  5411   addl(rsp, wordSize);       // discard argument

  5412   pop_CPU_state();

  5413 }

  5415 #else // _LP64

  5417 // 64 bit versions

  5419 Address MacroAssembler::as_Address(AddressLiteral adr) {

  5420   // amd64 always does this as a pc-rel

  5421   // we can be absolute or disp based on the instruction type

  5422   // jmp/call are displacements others are absolute

  5423   assert(!adr.is_lval(), "must be rval");

  5424   assert(reachable(adr), "must be");

  5425   return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());

  5427 }

  5429 Address MacroAssembler::as_Address(ArrayAddress adr) {

  5430   AddressLiteral base = adr.base();

  5431   lea(rscratch1, base);

  5432   Address index = adr.index();

  5433   assert(index._disp == 0, "must not have disp"); // maybe it can?

  5434   Address array(rscratch1, index._index, index._scale, index._disp);

  5435   return array;

  5436 }

  5438 int MacroAssembler::biased_locking_enter(Register lock_reg,

  5439                                          Register obj_reg,

  5440                                          Register swap_reg,

  5441                                          Register tmp_reg,

  5442                                          bool swap_reg_contains_mark,

  5443                                          Label& done,

  5444                                          Label* slow_case,

  5445                                          BiasedLockingCounters* counters) {

  5446   assert(UseBiasedLocking, "why call this otherwise?");

  5447   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");

  5448   assert(tmp_reg != noreg, "tmp_reg must be supplied");

  5449   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);

  5450   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");

  5451   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());

  5452   Address saved_mark_addr(lock_reg, 0);

  5454   if (PrintBiasedLockingStatistics && counters == NULL)

  5455     counters = BiasedLocking::counters();

  5457   // Biased locking

  5458   // See whether the lock is currently biased toward our thread and

  5459   // whether the epoch is still valid

  5460   // Note that the runtime guarantees sufficient alignment of JavaThread

  5461   // pointers to allow age to be placed into low bits

  5462   // First check to see whether biasing is even enabled for this object

  5463   Label cas_label;

  5464   int null_check_offset = -1;

  5465   if (!swap_reg_contains_mark) {

  5466     null_check_offset = offset();

  5467     movq(swap_reg, mark_addr);

  5468   }

  5469   movq(tmp_reg, swap_reg);

  5470   andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);

  5471   cmpq(tmp_reg, markOopDesc::biased_lock_pattern);

  5472   jcc(Assembler::notEqual, cas_label);

  5473   // The bias pattern is present in the object's header. Need to check

  5474   // whether the bias owner and the epoch are both still current.

  5475   load_prototype_header(tmp_reg, obj_reg);

  5476   orq(tmp_reg, r15_thread);

  5477   xorq(tmp_reg, swap_reg);

  5478   andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));

  5479   if (counters != NULL) {

  5480     cond_inc32(Assembler::zero,

  5481                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));

  5482   }

  5483   jcc(Assembler::equal, done);

  5485   Label try_revoke_bias;

  5486   Label try_rebias;

  5488   // At this point we know that the header has the bias pattern and

  5489   // that we are not the bias owner in the current epoch. We need to

  5490   // figure out more details about the state of the header in order to

  5491   // know what operations can be legally performed on the object's

  5492   // header.

  5494   // If the low three bits in the xor result aren't clear, that means

  5495   // the prototype header is no longer biased and we have to revoke

  5496   // the bias on this object.

  5497   testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);

  5498   jcc(Assembler::notZero, try_revoke_bias);

  5500   // Biasing is still enabled for this data type. See whether the

  5501   // epoch of the current bias is still valid, meaning that the epoch

  5502   // bits of the mark word are equal to the epoch bits of the

  5503   // prototype header. (Note that the prototype header's epoch bits

  5504   // only change at a safepoint.) If not, attempt to rebias the object

  5505   // toward the current thread. Note that we must be absolutely sure

  5506   // that the current epoch is invalid in order to do this because

  5507   // otherwise the manipulations it performs on the mark word are

  5508   // illegal.

  5509   testq(tmp_reg, markOopDesc::epoch_mask_in_place);

  5510   jcc(Assembler::notZero, try_rebias);

  5512   // The epoch of the current bias is still valid but we know nothing

  5513   // about the owner; it might be set or it might be clear. Try to

  5514   // acquire the bias of the object using an atomic operation. If this

  5515   // fails we will go in to the runtime to revoke the object's bias.

  5516   // Note that we first construct the presumed unbiased header so we

  5517   // don't accidentally blow away another thread's valid bias.

  5518   andq(swap_reg,

  5519        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);

  5520   movq(tmp_reg, swap_reg);

  5521   orq(tmp_reg, r15_thread);

  5522   if (os::is_MP()) {

  5523     lock();

  5524   }

  5525   cmpxchgq(tmp_reg, Address(obj_reg, 0));

  5526   // If the biasing toward our thread failed, this means that

  5527   // another thread succeeded in biasing it toward itself and we

  5528   // need to revoke that bias. The revocation will occur in the

  5529   // interpreter runtime in the slow case.

  5530   if (counters != NULL) {

  5531     cond_inc32(Assembler::zero,

  5532                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));

  5533   }

  5534   if (slow_case != NULL) {

  5535     jcc(Assembler::notZero, *slow_case);

  5536   }

  5537   jmp(done);

  5539   bind(try_rebias);

  5540   // At this point we know the epoch has expired, meaning that the

  5541   // current "bias owner", if any, is actually invalid. Under these

  5542   // circumstances _only_, we are allowed to use the current header's

  5543   // value as the comparison value when doing the cas to acquire the

  5544   // bias in the current epoch. In other words, we allow transfer of

  5545   // the bias from one thread to another directly in this situation.

  5546   //

  5547   // FIXME: due to a lack of registers we currently blow away the age

  5548   // bits in this situation. Should attempt to preserve them.

  5549   load_prototype_header(tmp_reg, obj_reg);

  5550   orq(tmp_reg, r15_thread);

  5551   if (os::is_MP()) {

  5552     lock();

  5553   }

  5554   cmpxchgq(tmp_reg, Address(obj_reg, 0));

  5555   // If the biasing toward our thread failed, then another thread

  5556   // succeeded in biasing it toward itself and we need to revoke that

  5557   // bias. The revocation will occur in the runtime in the slow case.

  5558   if (counters != NULL) {

  5559     cond_inc32(Assembler::zero,

  5560                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));

  5561   }

  5562   if (slow_case != NULL) {

  5563     jcc(Assembler::notZero, *slow_case);

  5564   }

  5565   jmp(done);

  5567   bind(try_revoke_bias);

  5568   // The prototype mark in the klass doesn't have the bias bit set any

  5569   // more, indicating that objects of this data type are not supposed

  5570   // to be biased any more. We are going to try to reset the mark of

  5571   // this object to the prototype value and fall through to the

  5572   // CAS-based locking scheme. Note that if our CAS fails, it means

  5573   // that another thread raced us for the privilege of revoking the

  5574   // bias of this particular object, so it's okay to continue in the

  5575   // normal locking code.

  5576   //

  5577   // FIXME: due to a lack of registers we currently blow away the age

  5578   // bits in this situation. Should attempt to preserve them.

  5579   load_prototype_header(tmp_reg, obj_reg);

  5580   if (os::is_MP()) {

  5581     lock();

  5582   }

  5583   cmpxchgq(tmp_reg, Address(obj_reg, 0));

  5584   // Fall through to the normal CAS-based lock, because no matter what

  5585   // the result of the above CAS, some thread must have succeeded in

  5586   // removing the bias bit from the object's header.

  5587   if (counters != NULL) {

  5588     cond_inc32(Assembler::zero,

  5589                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));

  5590   }

  5592   bind(cas_label);

  5594   return null_check_offset;

  5595 }

  5597 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {

  5598   Label L, E;

  5600 #ifdef _WIN64

  5601   // Windows always allocates space for it's register args

  5602   assert(num_args <= 4, "only register arguments supported");

  5603   subq(rsp,  frame::arg_reg_save_area_bytes);

  5604 #endif

  5606   // Align stack if necessary

  5607   testl(rsp, 15);

  5608   jcc(Assembler::zero, L);

  5610   subq(rsp, 8);

  5611   {

  5612     call(RuntimeAddress(entry_point));

  5613   }

  5614   addq(rsp, 8);

  5615   jmp(E);

  5617   bind(L);

  5618   {

  5619     call(RuntimeAddress(entry_point));

  5620   }

  5622   bind(E);

  5624 #ifdef _WIN64

  5625   // restore stack pointer

  5626   addq(rsp, frame::arg_reg_save_area_bytes);

  5627 #endif

  5629 }

  5631 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {

  5632   assert(!src2.is_lval(), "should use cmpptr");

  5634   if (reachable(src2)) {

  5635     cmpq(src1, as_Address(src2));

  5636   } else {

  5637     lea(rscratch1, src2);

  5638     Assembler::cmpq(src1, Address(rscratch1, 0));

  5639   }

  5640 }

  5642 int MacroAssembler::corrected_idivq(Register reg) {

  5643   // Full implementation of Java ldiv and lrem; checks for special

  5644   // case as described in JVM spec., p.243 & p.271.  The function

  5645   // returns the (pc) offset of the idivl instruction - may be needed

  5646   // for implicit exceptions.

  5647   //

  5648   //         normal case                           special case

  5649   //

  5650   // input : rax: dividend                         min_long

  5651   //         reg: divisor   (may not be eax/edx)   -1

  5652   //

  5653   // output: rax: quotient  (= rax idiv reg)       min_long

  5654   //         rdx: remainder (= rax irem reg)       0

  5655   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");

  5656   static const int64_t min_long = 0x8000000000000000;

  5657   Label normal_case, special_case;

  5659   // check for special case

  5660   cmp64(rax, ExternalAddress((address) &min_long));

  5661   jcc(Assembler::notEqual, normal_case);

  5662   xorl(rdx, rdx); // prepare rdx for possible special case (where

  5663                   // remainder = 0)

  5664   cmpq(reg, -1);

  5665   jcc(Assembler::equal, special_case);

  5667   // handle normal case

  5668   bind(normal_case);

  5669   cdqq();

  5670   int idivq_offset = offset();

  5671   idivq(reg);

  5673   // normal and special case exit

  5674   bind(special_case);

  5676   return idivq_offset;

  5677 }

  5679 void MacroAssembler::decrementq(Register reg, int value) {

  5680   if (value == min_jint) { subq(reg, value); return; }

  5681   if (value <  0) { incrementq(reg, -value); return; }

  5682   if (value == 0) {                        ; return; }

  5683   if (value == 1 && UseIncDec) { decq(reg) ; return; }

  5684   /* else */      { subq(reg, value)       ; return; }

  5685 }

  5687 void MacroAssembler::decrementq(Address dst, int value) {

  5688   if (value == min_jint) { subq(dst, value); return; }

  5689   if (value <  0) { incrementq(dst, -value); return; }

  5690   if (value == 0) {                        ; return; }

  5691   if (value == 1 && UseIncDec) { decq(dst) ; return; }

  5692   /* else */      { subq(dst, value)       ; return; }

  5693 }

  5695 void MacroAssembler::fat_nop() {

  5696   // A 5 byte nop that is safe for patching (see patch_verified_entry)

  5697   // Recommened sequence from 'Software Optimization Guide for the AMD

  5698   // Hammer Processor'

  5699   emit_byte(0x66);

  5700   emit_byte(0x66);

  5701   emit_byte(0x90);

  5702   emit_byte(0x66);

  5703   emit_byte(0x90);

  5704 }

  5706 void MacroAssembler::incrementq(Register reg, int value) {

  5707   if (value == min_jint) { addq(reg, value); return; }

  5708   if (value <  0) { decrementq(reg, -value); return; }

  5709   if (value == 0) {                        ; return; }

  5710   if (value == 1 && UseIncDec) { incq(reg) ; return; }

  5711   /* else */      { addq(reg, value)       ; return; }

  5712 }

  5714 void MacroAssembler::incrementq(Address dst, int value) {

  5715   if (value == min_jint) { addq(dst, value); return; }

  5716   if (value <  0) { decrementq(dst, -value); return; }

  5717   if (value == 0) {                        ; return; }

  5718   if (value == 1 && UseIncDec) { incq(dst) ; return; }

  5719   /* else */      { addq(dst, value)       ; return; }

  5720 }

  5722 // 32bit can do a case table jump in one instruction but we no longer allow the base

  5723 // to be installed in the Address class

  5724 void MacroAssembler::jump(ArrayAddress entry) {

  5725   lea(rscratch1, entry.base());

  5726   Address dispatch = entry.index();

  5727   assert(dispatch._base == noreg, "must be");

  5728   dispatch._base = rscratch1;

  5729   jmp(dispatch);

  5730 }

  5732 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {

  5733   ShouldNotReachHere(); // 64bit doesn't use two regs

  5734   cmpq(x_lo, y_lo);

  5735 }

  5737 void MacroAssembler::lea(Register dst, AddressLiteral src) {

  5738     mov_literal64(dst, (intptr_t)src.target(), src.rspec());

  5739 }

  5741 void MacroAssembler::lea(Address dst, AddressLiteral adr) {

  5742   mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());

  5743   movptr(dst, rscratch1);

  5744 }

  5746 void MacroAssembler::leave() {

  5747   // %%% is this really better? Why not on 32bit too?

  5748   emit_byte(0xC9); // LEAVE

  5749 }

  5751 void MacroAssembler::lneg(Register hi, Register lo) {

  5752   ShouldNotReachHere(); // 64bit doesn't use two regs

  5753   negq(lo);

  5754 }

  5756 void MacroAssembler::movoop(Register dst, jobject obj) {

  5757   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());

  5758 }

  5760 void MacroAssembler::movoop(Address dst, jobject obj) {

  5761   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());

  5762   movq(dst, rscratch1);

  5763 }

  5765 void MacroAssembler::movptr(Register dst, AddressLiteral src) {

  5766   if (src.is_lval()) {

  5767     mov_literal64(dst, (intptr_t)src.target(), src.rspec());

  5768   } else {

  5769     if (reachable(src)) {

  5770       movq(dst, as_Address(src));

  5771     } else {

  5772       lea(rscratch1, src);

  5773       movq(dst, Address(rscratch1,0));

  5774     }

  5775   }

  5776 }

  5778 void MacroAssembler::movptr(ArrayAddress dst, Register src) {

  5779   movq(as_Address(dst), src);

  5780 }

  5782 void MacroAssembler::movptr(Register dst, ArrayAddress src) {

  5783   movq(dst, as_Address(src));

  5784 }

  5786 // src should NEVER be a real pointer. Use AddressLiteral for true pointers

  5787 void MacroAssembler::movptr(Address dst, intptr_t src) {

  5788   mov64(rscratch1, src);

  5789   movq(dst, rscratch1);

  5790 }

  5792 // These are mostly for initializing NULL

  5793 void MacroAssembler::movptr(Address dst, int32_t src) {

  5794   movslq(dst, src);

  5795 }

  5797 void MacroAssembler::movptr(Register dst, int32_t src) {

  5798   mov64(dst, (intptr_t)src);

  5799 }

  5801 void MacroAssembler::pushoop(jobject obj) {

  5802   movoop(rscratch1, obj);

  5803   push(rscratch1);

  5804 }

  5806 void MacroAssembler::pushptr(AddressLiteral src) {

  5807   lea(rscratch1, src);

  5808   if (src.is_lval()) {

  5809     push(rscratch1);

  5810   } else {

  5811     pushq(Address(rscratch1, 0));

  5812   }

  5813 }

  5815 void MacroAssembler::reset_last_Java_frame(bool clear_fp,

  5816                                            bool clear_pc) {

  5817   // we must set sp to zero to clear frame

  5818   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);

  5819   // must clear fp, so that compiled frames are not confused; it is

  5820   // possible that we need it only for debugging

  5821   if (clear_fp) {

  5822     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);

  5823   }

  5825   if (clear_pc) {

  5826     movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);

  5827   }

  5828 }

  5830 void MacroAssembler::set_last_Java_frame(Register last_java_sp,

  5831                                          Register last_java_fp,

  5832                                          address  last_java_pc) {

  5833   // determine last_java_sp register

  5834   if (!last_java_sp->is_valid()) {

  5835     last_java_sp = rsp;

  5836   }

  5838   // last_java_fp is optional

  5839   if (last_java_fp->is_valid()) {

  5840     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),

  5841            last_java_fp);

  5842   }

  5844   // last_java_pc is optional

  5845   if (last_java_pc != NULL) {

  5846     Address java_pc(r15_thread,

  5847                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());

  5848     lea(rscratch1, InternalAddress(last_java_pc));

  5849     movptr(java_pc, rscratch1);

  5850   }

  5852   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);

  5853 }

  5855 static void pass_arg0(MacroAssembler* masm, Register arg) {

  5856   if (c_rarg0 != arg ) {

  5857     masm->mov(c_rarg0, arg);

  5858   }

  5859 }

  5861 static void pass_arg1(MacroAssembler* masm, Register arg) {

  5862   if (c_rarg1 != arg ) {

  5863     masm->mov(c_rarg1, arg);

  5864   }

  5865 }

  5867 static void pass_arg2(MacroAssembler* masm, Register arg) {

  5868   if (c_rarg2 != arg ) {

  5869     masm->mov(c_rarg2, arg);

  5870   }

  5871 }

  5873 static void pass_arg3(MacroAssembler* masm, Register arg) {

  5874   if (c_rarg3 != arg ) {

  5875     masm->mov(c_rarg3, arg);

  5876   }

  5877 }

  5879 void MacroAssembler::stop(const char* msg) {

  5880   address rip = pc();

  5881   pusha(); // get regs on stack

  5882   lea(c_rarg0, ExternalAddress((address) msg));

  5883   lea(c_rarg1, InternalAddress(rip));

  5884   movq(c_rarg2, rsp); // pass pointer to regs array

  5885   andq(rsp, -16); // align stack as required by ABI

  5886   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));

  5887   hlt();

  5888 }

  5890 void MacroAssembler::warn(const char* msg) {

  5891   push(rsp);

  5892   andq(rsp, -16);     // align stack as required by push_CPU_state and call

  5894   push_CPU_state();   // keeps alignment at 16 bytes

  5895   lea(c_rarg0, ExternalAddress((address) msg));

  5896   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);

  5897   pop_CPU_state();

  5898   pop(rsp);

  5899 }

  5901 #ifndef PRODUCT

  5902 extern "C" void findpc(intptr_t x);

  5903 #endif

  5905 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {

  5906   // In order to get locks to work, we need to fake a in_VM state

  5907   if (ShowMessageBoxOnError ) {

  5908     JavaThread* thread = JavaThread::current();

  5909     JavaThreadState saved_state = thread->thread_state();

  5910     thread->set_thread_state(_thread_in_vm);

  5911 #ifndef PRODUCT

  5912     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {

  5913       ttyLocker ttyl;

  5914       BytecodeCounter::print();

  5915     }

  5916 #endif

  5917     // To see where a verify_oop failed, get $ebx+40/X for this frame.

  5918     // XXX correct this offset for amd64

  5919     // This is the value of eip which points to where verify_oop will return.

  5920     if (os::message_box(msg, "Execution stopped, print registers?")) {

  5921       ttyLocker ttyl;

  5922       tty->print_cr("rip = 0x%016lx", pc);

  5923 #ifndef PRODUCT

  5924       tty->cr();

  5925       findpc(pc);

  5926       tty->cr();

  5927 #endif

  5928       tty->print_cr("rax = 0x%016lx", regs[15]);

  5929       tty->print_cr("rbx = 0x%016lx", regs[12]);

  5930       tty->print_cr("rcx = 0x%016lx", regs[14]);

  5931       tty->print_cr("rdx = 0x%016lx", regs[13]);

  5932       tty->print_cr("rdi = 0x%016lx", regs[8]);

  5933       tty->print_cr("rsi = 0x%016lx", regs[9]);

  5934       tty->print_cr("rbp = 0x%016lx", regs[10]);

  5935       tty->print_cr("rsp = 0x%016lx", regs[11]);

  5936       tty->print_cr("r8  = 0x%016lx", regs[7]);

  5937       tty->print_cr("r9  = 0x%016lx", regs[6]);

  5938       tty->print_cr("r10 = 0x%016lx", regs[5]);

  5939       tty->print_cr("r11 = 0x%016lx", regs[4]);

  5940       tty->print_cr("r12 = 0x%016lx", regs[3]);

  5941       tty->print_cr("r13 = 0x%016lx", regs[2]);

  5942       tty->print_cr("r14 = 0x%016lx", regs[1]);

  5943       tty->print_cr("r15 = 0x%016lx", regs[0]);

  5944       BREAKPOINT;

  5945     }

  5946     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);

  5947   } else {

  5948     ttyLocker ttyl;

  5949     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",

  5950                     msg);

  5951     assert(false, err_msg("DEBUG MESSAGE: %s", msg));

  5952   }

  5953 }

  5955 #endif // _LP64

  5957 // Now versions that are common to 32/64 bit

  5959 void MacroAssembler::addptr(Register dst, int32_t imm32) {

  5960   LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));

  5961 }

  5963 void MacroAssembler::addptr(Register dst, Register src) {

  5964   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));

  5965 }

  5967 void MacroAssembler::addptr(Address dst, Register src) {

  5968   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));

  5969 }

  5971 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {

  5972   if (reachable(src)) {

  5973     Assembler::addsd(dst, as_Address(src));

  5974   } else {

  5975     lea(rscratch1, src);

  5976     Assembler::addsd(dst, Address(rscratch1, 0));

  5977   }

  5978 }

  5980 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {

  5981   if (reachable(src)) {

  5982     addss(dst, as_Address(src));

  5983   } else {

  5984     lea(rscratch1, src);

  5985     addss(dst, Address(rscratch1, 0));

  5986   }

  5987 }

  5989 void MacroAssembler::align(int modulus) {

  5990   if (offset() % modulus != 0) {

  5991     nop(modulus - (offset() % modulus));

  5992   }

  5993 }

  5995 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {

  5996   // Used in sign-masking with aligned address.

  5997   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");

  5998   if (reachable(src)) {

  5999     Assembler::andpd(dst, as_Address(src));

  6000   } else {

  6001     lea(rscratch1, src);

  6002     Assembler::andpd(dst, Address(rscratch1, 0));

  6003   }

  6004 }

  6006 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {

  6007   // Used in sign-masking with aligned address.

  6008   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");

  6009   if (reachable(src)) {

  6010     Assembler::andps(dst, as_Address(src));

  6011   } else {

  6012     lea(rscratch1, src);

  6013     Assembler::andps(dst, Address(rscratch1, 0));

  6014   }

  6015 }

  6017 void MacroAssembler::andptr(Register dst, int32_t imm32) {

  6018   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));

  6019 }

  6021 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {

  6022   pushf();

  6023   if (os::is_MP())

  6024     lock();

  6025   incrementl(counter_addr);

  6026   popf();

  6027 }

  6029 // Writes to stack successive pages until offset reached to check for

  6030 // stack overflow + shadow pages.  This clobbers tmp.

  6031 void MacroAssembler::bang_stack_size(Register size, Register tmp) {

  6032   movptr(tmp, rsp);

  6033   // Bang stack for total size given plus shadow page size.

  6034   // Bang one page at a time because large size can bang beyond yellow and

  6035   // red zones.

  6036   Label loop;

  6037   bind(loop);

  6038   movl(Address(tmp, (-os::vm_page_size())), size );

  6039   subptr(tmp, os::vm_page_size());

  6040   subl(size, os::vm_page_size());

  6041   jcc(Assembler::greater, loop);

  6043   // Bang down shadow pages too.

  6044   // The -1 because we already subtracted 1 page.

  6045   for (int i = 0; i< StackShadowPages-1; i++) {

  6046     // this could be any sized move but this is can be a debugging crumb

  6047     // so the bigger the better.

  6048     movptr(Address(tmp, (-i*os::vm_page_size())), size );

  6049   }

  6050 }

  6052 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {

  6053   assert(UseBiasedLocking, "why call this otherwise?");

  6055   // Check for biased locking unlock case, which is a no-op

  6056   // Note: we do not have to check the thread ID for two reasons.

  6057   // First, the interpreter checks for IllegalMonitorStateException at

  6058   // a higher level. Second, if the bias was revoked while we held the

  6059   // lock, the object could not be rebiased toward another thread, so

  6060   // the bias bit would be clear.

  6061   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));

  6062   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);

  6063   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);

  6064   jcc(Assembler::equal, done);

  6065 }

  6067 void MacroAssembler::c2bool(Register x) {

  6068   // implements x == 0 ? 0 : 1

  6069   // note: must only look at least-significant byte of x

  6070   //       since C-style booleans are stored in one byte

  6071   //       only! (was bug)

  6072   andl(x, 0xFF);

  6073   setb(Assembler::notZero, x);

  6074 }

  6076 // Wouldn't need if AddressLiteral version had new name

  6077 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {

  6078   Assembler::call(L, rtype);

  6079 }

  6081 void MacroAssembler::call(Register entry) {

  6082   Assembler::call(entry);

  6083 }

  6085 void MacroAssembler::call(AddressLiteral entry) {

  6086   if (reachable(entry)) {

  6087     Assembler::call_literal(entry.target(), entry.rspec());

  6088   } else {

  6089     lea(rscratch1, entry);

  6090     Assembler::call(rscratch1);

  6091   }

  6092 }

  6094 // Implementation of call_VM versions

  6096 void MacroAssembler::call_VM(Register oop_result,

  6097                              address entry_point,

  6098                              bool check_exceptions) {

  6099   Label C, E;

  6100   call(C, relocInfo::none);

  6101   jmp(E);

  6103   bind(C);

  6104   call_VM_helper(oop_result, entry_point, 0, check_exceptions);

  6105   ret(0);

  6107   bind(E);

  6108 }

  6110 void MacroAssembler::call_VM(Register oop_result,

  6111                              address entry_point,

  6112                              Register arg_1,

  6113                              bool check_exceptions) {

  6114   Label C, E;

  6115   call(C, relocInfo::none);

  6116   jmp(E);

  6118   bind(C);

  6119   pass_arg1(this, arg_1);

  6120   call_VM_helper(oop_result, entry_point, 1, check_exceptions);

  6121   ret(0);

  6123   bind(E);

  6124 }

  6126 void MacroAssembler::call_VM(Register oop_result,

  6127                              address entry_point,

  6128                              Register arg_1,

  6129                              Register arg_2,

  6130                              bool check_exceptions) {

  6131   Label C, E;

  6132   call(C, relocInfo::none);

  6133   jmp(E);

  6135   bind(C);

  6137   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6139   pass_arg2(this, arg_2);

  6140   pass_arg1(this, arg_1);

  6141   call_VM_helper(oop_result, entry_point, 2, check_exceptions);

  6142   ret(0);

  6144   bind(E);

  6145 }

  6147 void MacroAssembler::call_VM(Register oop_result,

  6148                              address entry_point,

  6149                              Register arg_1,

  6150                              Register arg_2,

  6151                              Register arg_3,

  6152                              bool check_exceptions) {

  6153   Label C, E;

  6154   call(C, relocInfo::none);

  6155   jmp(E);

  6157   bind(C);

  6159   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));

  6160   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));

  6161   pass_arg3(this, arg_3);

  6163   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6164   pass_arg2(this, arg_2);

  6166   pass_arg1(this, arg_1);

  6167   call_VM_helper(oop_result, entry_point, 3, check_exceptions);

  6168   ret(0);

  6170   bind(E);

  6171 }

  6173 void MacroAssembler::call_VM(Register oop_result,

  6174                              Register last_java_sp,

  6175                              address entry_point,

  6176                              int number_of_arguments,

  6177                              bool check_exceptions) {

  6178   Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);

  6179   call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);

  6180 }

  6182 void MacroAssembler::call_VM(Register oop_result,

  6183                              Register last_java_sp,

  6184                              address entry_point,

  6185                              Register arg_1,

  6186                              bool check_exceptions) {

  6187   pass_arg1(this, arg_1);

  6188   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);

  6189 }

  6191 void MacroAssembler::call_VM(Register oop_result,

  6192                              Register last_java_sp,

  6193                              address entry_point,

  6194                              Register arg_1,

  6195                              Register arg_2,

  6196                              bool check_exceptions) {

  6198   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6199   pass_arg2(this, arg_2);

  6200   pass_arg1(this, arg_1);

  6201   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);

  6202 }

  6204 void MacroAssembler::call_VM(Register oop_result,

  6205                              Register last_java_sp,

  6206                              address entry_point,

  6207                              Register arg_1,

  6208                              Register arg_2,

  6209                              Register arg_3,

  6210                              bool check_exceptions) {

  6211   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));

  6212   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));

  6213   pass_arg3(this, arg_3);

  6214   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6215   pass_arg2(this, arg_2);

  6216   pass_arg1(this, arg_1);

  6217   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);

  6218 }

  6220 void MacroAssembler::super_call_VM(Register oop_result,

  6221                                    Register last_java_sp,

  6222                                    address entry_point,

  6223                                    int number_of_arguments,

  6224                                    bool check_exceptions) {

  6225   Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);

  6226   MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);

  6227 }

  6229 void MacroAssembler::super_call_VM(Register oop_result,

  6230                                    Register last_java_sp,

  6231                                    address entry_point,

  6232                                    Register arg_1,

  6233                                    bool check_exceptions) {

  6234   pass_arg1(this, arg_1);

  6235   super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);

  6236 }

  6238 void MacroAssembler::super_call_VM(Register oop_result,

  6239                                    Register last_java_sp,

  6240                                    address entry_point,

  6241                                    Register arg_1,

  6242                                    Register arg_2,

  6243                                    bool check_exceptions) {

  6245   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6246   pass_arg2(this, arg_2);

  6247   pass_arg1(this, arg_1);

  6248   super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);

  6249 }

  6251 void MacroAssembler::super_call_VM(Register oop_result,

  6252                                    Register last_java_sp,

  6253                                    address entry_point,

  6254                                    Register arg_1,

  6255                                    Register arg_2,

  6256                                    Register arg_3,

  6257                                    bool check_exceptions) {

  6258   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));

  6259   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));

  6260   pass_arg3(this, arg_3);

  6261   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6262   pass_arg2(this, arg_2);

  6263   pass_arg1(this, arg_1);

  6264   super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);

  6265 }

  6267 void MacroAssembler::call_VM_base(Register oop_result,

  6268                                   Register java_thread,

  6269                                   Register last_java_sp,

  6270                                   address  entry_point,

  6271                                   int      number_of_arguments,

  6272                                   bool     check_exceptions) {

  6273   // determine java_thread register

  6274   if (!java_thread->is_valid()) {

  6275 #ifdef _LP64

  6276     java_thread = r15_thread;

  6277 #else

  6278     java_thread = rdi;

  6279     get_thread(java_thread);

  6280 #endif // LP64

  6281   }

  6282   // determine last_java_sp register

  6283   if (!last_java_sp->is_valid()) {

  6284     last_java_sp = rsp;

  6285   }

  6286   // debugging support

  6287   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");

  6288   LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));

  6289 #ifdef ASSERT

  6290   LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");)

  6291 #endif // ASSERT

  6293   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");

  6294   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");

  6296   // push java thread (becomes first argument of C function)

  6298   NOT_LP64(push(java_thread); number_of_arguments++);

  6299   LP64_ONLY(mov(c_rarg0, r15_thread));

  6301   // set last Java frame before call

  6302   assert(last_java_sp != rbp, "can't use ebp/rbp");

  6304   // Only interpreter should have to set fp

  6305   set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);

  6307   // do the call, remove parameters

  6308   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);

  6310   // restore the thread (cannot use the pushed argument since arguments

  6311   // may be overwritten by C code generated by an optimizing compiler);

  6312   // however can use the register value directly if it is callee saved.

  6313   if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {

  6314     // rdi & rsi (also r15) are callee saved -> nothing to do

  6315 #ifdef ASSERT

  6316     guarantee(java_thread != rax, "change this code");

  6317     push(rax);

  6318     { Label L;

  6319       get_thread(rax);

  6320       cmpptr(java_thread, rax);

  6321       jcc(Assembler::equal, L);

  6322       stop("MacroAssembler::call_VM_base: rdi not callee saved?");

  6323       bind(L);

  6324     }

  6325     pop(rax);

  6326 #endif

  6327   } else {

  6328     get_thread(java_thread);

  6329   }

  6330   // reset last Java frame

  6331   // Only interpreter should have to clear fp

  6332   reset_last_Java_frame(java_thread, true, false);

  6334 #ifndef CC_INTERP

  6335    // C++ interp handles this in the interpreter

  6336   check_and_handle_popframe(java_thread);

  6337   check_and_handle_earlyret(java_thread);

  6338 #endif /* CC_INTERP */

  6340   if (check_exceptions) {

  6341     // check for pending exceptions (java_thread is set upon return)

  6342     cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);

  6343 #ifndef _LP64

  6344     jump_cc(Assembler::notEqual,

  6345             RuntimeAddress(StubRoutines::forward_exception_entry()));

  6346 #else

  6347     // This used to conditionally jump to forward_exception however it is

  6348     // possible if we relocate that the branch will not reach. So we must jump

  6349     // around so we can always reach

  6351     Label ok;

  6352     jcc(Assembler::equal, ok);

  6353     jump(RuntimeAddress(StubRoutines::forward_exception_entry()));

  6354     bind(ok);

  6355 #endif // LP64

  6356   }

  6358   // get oop result if there is one and reset the value in the thread

  6359   if (oop_result->is_valid()) {

  6360     movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));

  6361     movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);

  6362     verify_oop(oop_result, "broken oop in call_VM_base");

  6363   }

  6364 }

  6366 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {

  6368   // Calculate the value for last_Java_sp

  6369   // somewhat subtle. call_VM does an intermediate call

  6370   // which places a return address on the stack just under the

  6371   // stack pointer as the user finsihed with it. This allows

  6372   // use to retrieve last_Java_pc from last_Java_sp[-1].

  6373   // On 32bit we then have to push additional args on the stack to accomplish

  6374   // the actual requested call. On 64bit call_VM only can use register args

  6375   // so the only extra space is the return address that call_VM created.

  6376   // This hopefully explains the calculations here.

  6378 #ifdef _LP64

  6379   // We've pushed one address, correct last_Java_sp

  6380   lea(rax, Address(rsp, wordSize));

  6381 #else

  6382   lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));

  6383 #endif // LP64

  6385   call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);

  6387 }

  6389 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {

  6390   call_VM_leaf_base(entry_point, number_of_arguments);

  6391 }

  6393 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {

  6394   pass_arg0(this, arg_0);

  6395   call_VM_leaf(entry_point, 1);

  6396 }

  6398 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {

  6400   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));

  6401   pass_arg1(this, arg_1);

  6402   pass_arg0(this, arg_0);

  6403   call_VM_leaf(entry_point, 2);

  6404 }

  6406 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {

  6407   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));

  6408   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6409   pass_arg2(this, arg_2);

  6410   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));

  6411   pass_arg1(this, arg_1);

  6412   pass_arg0(this, arg_0);

  6413   call_VM_leaf(entry_point, 3);

  6414 }

  6416 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {

  6417   pass_arg0(this, arg_0);

  6418   MacroAssembler::call_VM_leaf_base(entry_point, 1);

  6419 }

  6421 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {

  6423   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));

  6424   pass_arg1(this, arg_1);

  6425   pass_arg0(this, arg_0);

  6426   MacroAssembler::call_VM_leaf_base(entry_point, 2);

  6427 }

  6429 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {

  6430   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));

  6431   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6432   pass_arg2(this, arg_2);

  6433   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));

  6434   pass_arg1(this, arg_1);

  6435   pass_arg0(this, arg_0);

  6436   MacroAssembler::call_VM_leaf_base(entry_point, 3);

  6437 }

  6439 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {

  6440   LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));

  6441   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));

  6442   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));

  6443   pass_arg3(this, arg_3);

  6444   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));

  6445   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));

  6446   pass_arg2(this, arg_2);

  6447   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));

  6448   pass_arg1(this, arg_1);

  6449   pass_arg0(this, arg_0);

  6450   MacroAssembler::call_VM_leaf_base(entry_point, 4);

  6451 }

  6453 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {

  6454 }

  6456 void MacroAssembler::check_and_handle_popframe(Register java_thread) {

  6457 }

  6459 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {

  6460   if (reachable(src1)) {

  6461     cmpl(as_Address(src1), imm);

  6462   } else {

  6463     lea(rscratch1, src1);

  6464     cmpl(Address(rscratch1, 0), imm);

  6465   }

  6466 }

  6468 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {

  6469   assert(!src2.is_lval(), "use cmpptr");

  6470   if (reachable(src2)) {

  6471     cmpl(src1, as_Address(src2));

  6472   } else {

  6473     lea(rscratch1, src2);

  6474     cmpl(src1, Address(rscratch1, 0));

  6475   }

  6476 }

  6478 void MacroAssembler::cmp32(Register src1, int32_t imm) {

  6479   Assembler::cmpl(src1, imm);

  6480 }

  6482 void MacroAssembler::cmp32(Register src1, Address src2) {

  6483   Assembler::cmpl(src1, src2);

  6484 }

  6486 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {

  6487   ucomisd(opr1, opr2);

  6489   Label L;

  6490   if (unordered_is_less) {

  6491     movl(dst, -1);

  6492     jcc(Assembler::parity, L);

  6493     jcc(Assembler::below , L);

  6494     movl(dst, 0);

  6495     jcc(Assembler::equal , L);

  6496     increment(dst);

  6497   } else { // unordered is greater

  6498     movl(dst, 1);

  6499     jcc(Assembler::parity, L);

  6500     jcc(Assembler::above , L);

  6501     movl(dst, 0);

  6502     jcc(Assembler::equal , L);

  6503     decrementl(dst);

  6504   }

  6505   bind(L);

  6506 }

  6508 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {

  6509   ucomiss(opr1, opr2);

  6511   Label L;

  6512   if (unordered_is_less) {

  6513     movl(dst, -1);

  6514     jcc(Assembler::parity, L);

  6515     jcc(Assembler::below , L);

  6516     movl(dst, 0);

  6517     jcc(Assembler::equal , L);

  6518     increment(dst);

  6519   } else { // unordered is greater

  6520     movl(dst, 1);

  6521     jcc(Assembler::parity, L);

  6522     jcc(Assembler::above , L);

  6523     movl(dst, 0);

  6524     jcc(Assembler::equal , L);

  6525     decrementl(dst);

  6526   }

  6527   bind(L);

  6528 }

  6531 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {

  6532   if (reachable(src1)) {

  6533     cmpb(as_Address(src1), imm);

  6534   } else {

  6535     lea(rscratch1, src1);

  6536     cmpb(Address(rscratch1, 0), imm);

  6537   }

  6538 }

  6540 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {

  6541 #ifdef _LP64

  6542   if (src2.is_lval()) {

  6543     movptr(rscratch1, src2);

  6544     Assembler::cmpq(src1, rscratch1);

  6545   } else if (reachable(src2)) {

  6546     cmpq(src1, as_Address(src2));

  6547   } else {

  6548     lea(rscratch1, src2);

  6549     Assembler::cmpq(src1, Address(rscratch1, 0));

  6550   }

  6551 #else

  6552   if (src2.is_lval()) {

  6553     cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());

  6554   } else {

  6555     cmpl(src1, as_Address(src2));

  6556   }

  6557 #endif // _LP64

  6558 }

  6560 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {

  6561   assert(src2.is_lval(), "not a mem-mem compare");

  6562 #ifdef _LP64

  6563   // moves src2's literal address

  6564   movptr(rscratch1, src2);

  6565   Assembler::cmpq(src1, rscratch1);

  6566 #else

  6567   cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());

  6568 #endif // _LP64

  6569 }

  6571 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {

  6572   if (reachable(adr)) {

  6573     if (os::is_MP())

  6574       lock();

  6575     cmpxchgptr(reg, as_Address(adr));

  6576   } else {

  6577     lea(rscratch1, adr);

  6578     if (os::is_MP())

  6579       lock();

  6580     cmpxchgptr(reg, Address(rscratch1, 0));

  6581   }

  6582 }

  6584 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {

  6585   LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));

  6586 }

  6588 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {

  6589   if (reachable(src)) {

  6590     Assembler::comisd(dst, as_Address(src));

  6591   } else {

  6592     lea(rscratch1, src);

  6593     Assembler::comisd(dst, Address(rscratch1, 0));

  6594   }

  6595 }

  6597 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {

  6598   if (reachable(src)) {

  6599     Assembler::comiss(dst, as_Address(src));

  6600   } else {

  6601     lea(rscratch1, src);

  6602     Assembler::comiss(dst, Address(rscratch1, 0));

  6603   }

  6604 }

  6607 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {

  6608   Condition negated_cond = negate_condition(cond);

  6609   Label L;

  6610   jcc(negated_cond, L);

  6611   atomic_incl(counter_addr);

  6612   bind(L);

  6613 }

  6615 int MacroAssembler::corrected_idivl(Register reg) {

  6616   // Full implementation of Java idiv and irem; checks for

  6617   // special case as described in JVM spec., p.243 & p.271.

  6618   // The function returns the (pc) offset of the idivl

  6619   // instruction - may be needed for implicit exceptions.

  6620   //

  6621   //         normal case                           special case

  6622   //

  6623   // input : rax,: dividend                         min_int

  6624   //         reg: divisor   (may not be rax,/rdx)   -1

  6625   //

  6626   // output: rax,: quotient  (= rax, idiv reg)       min_int

  6627   //         rdx: remainder (= rax, irem reg)       0

  6628   assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");

  6629   const int min_int = 0x80000000;

  6630   Label normal_case, special_case;

  6632   // check for special case

  6633   cmpl(rax, min_int);

  6634   jcc(Assembler::notEqual, normal_case);

  6635   xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)

  6636   cmpl(reg, -1);

  6637   jcc(Assembler::equal, special_case);

  6639   // handle normal case

  6640   bind(normal_case);

  6641   cdql();

  6642   int idivl_offset = offset();

  6643   idivl(reg);

  6645   // normal and special case exit

  6646   bind(special_case);

  6648   return idivl_offset;

  6649 }

  6653 void MacroAssembler::decrementl(Register reg, int value) {

  6654   if (value == min_jint) {subl(reg, value) ; return; }

  6655   if (value <  0) { incrementl(reg, -value); return; }

  6656   if (value == 0) {                        ; return; }

  6657   if (value == 1 && UseIncDec) { decl(reg) ; return; }

  6658   /* else */      { subl(reg, value)       ; return; }

  6659 }

  6661 void MacroAssembler::decrementl(Address dst, int value) {

  6662   if (value == min_jint) {subl(dst, value) ; return; }

  6663   if (value <  0) { incrementl(dst, -value); return; }

  6664   if (value == 0) {                        ; return; }

  6665   if (value == 1 && UseIncDec) { decl(dst) ; return; }

  6666   /* else */      { subl(dst, value)       ; return; }

  6667 }

  6669 void MacroAssembler::division_with_shift (Register reg, int shift_value) {

  6670   assert (shift_value > 0, "illegal shift value");

  6671   Label _is_positive;

  6672   testl (reg, reg);

  6673   jcc (Assembler::positive, _is_positive);

  6674   int offset = (1 << shift_value) - 1 ;

  6676   if (offset == 1) {

  6677     incrementl(reg);

  6678   } else {

  6679     addl(reg, offset);

  6680   }

  6682   bind (_is_positive);

  6683   sarl(reg, shift_value);

  6684 }

  6686 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {

  6687   if (reachable(src)) {

  6688     Assembler::divsd(dst, as_Address(src));

  6689   } else {

  6690     lea(rscratch1, src);

  6691     Assembler::divsd(dst, Address(rscratch1, 0));

  6692   }

  6693 }

  6695 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {

  6696   if (reachable(src)) {

  6697     Assembler::divss(dst, as_Address(src));

  6698   } else {

  6699     lea(rscratch1, src);

  6700     Assembler::divss(dst, Address(rscratch1, 0));

  6701   }

  6702 }

  6704 // !defined(COMPILER2) is because of stupid core builds

  6705 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)

  6706 void MacroAssembler::empty_FPU_stack() {

  6707   if (VM_Version::supports_mmx()) {

  6708     emms();

  6709   } else {

  6710     for (int i = 8; i-- > 0; ) ffree(i);

  6711   }

  6712 }

  6713 #endif // !LP64 || C1 || !C2

  6716 // Defines obj, preserves var_size_in_bytes

  6717 void MacroAssembler::eden_allocate(Register obj,

  6718                                    Register var_size_in_bytes,

  6719                                    int con_size_in_bytes,

  6720                                    Register t1,

  6721                                    Label& slow_case) {

  6722   assert(obj == rax, "obj must be in rax, for cmpxchg");

  6723   assert_different_registers(obj, var_size_in_bytes, t1);

  6724   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {

  6725     jmp(slow_case);

  6726   } else {

  6727     Register end = t1;

  6728     Label retry;

  6729     bind(retry);

  6730     ExternalAddress heap_top((address) Universe::heap()->top_addr());

  6731     movptr(obj, heap_top);

  6732     if (var_size_in_bytes == noreg) {

  6733       lea(end, Address(obj, con_size_in_bytes));

  6734     } else {

  6735       lea(end, Address(obj, var_size_in_bytes, Address::times_1));

  6736     }

  6737     // if end < obj then we wrapped around => object too long => slow case

  6738     cmpptr(end, obj);

  6739     jcc(Assembler::below, slow_case);

  6740     cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));

  6741     jcc(Assembler::above, slow_case);

  6742     // Compare obj with the top addr, and if still equal, store the new top addr in

  6743     // end at the address of the top addr pointer. Sets ZF if was equal, and clears

  6744     // it otherwise. Use lock prefix for atomicity on MPs.

  6745     locked_cmpxchgptr(end, heap_top);

  6746     jcc(Assembler::notEqual, retry);

  6747   }

  6748 }

  6750 void MacroAssembler::enter() {

  6751   push(rbp);

  6752   mov(rbp, rsp);

  6753 }

  6755 void MacroAssembler::fcmp(Register tmp) {

  6756   fcmp(tmp, 1, true, true);

  6757 }

  6759 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {

  6760   assert(!pop_right || pop_left, "usage error");

  6761   if (VM_Version::supports_cmov()) {

  6762     assert(tmp == noreg, "unneeded temp");

  6763     if (pop_left) {

  6764       fucomip(index);

  6765     } else {

  6766       fucomi(index);

  6767     }

  6768     if (pop_right) {

  6769       fpop();

  6770     }

  6771   } else {

  6772     assert(tmp != noreg, "need temp");

  6773     if (pop_left) {

  6774       if (pop_right) {

  6775         fcompp();

  6776       } else {

  6777         fcomp(index);

  6778       }

  6779     } else {

  6780       fcom(index);

  6781     }

  6782     // convert FPU condition into eflags condition via rax,

  6783     save_rax(tmp);

  6784     fwait(); fnstsw_ax();

  6785     sahf();

  6786     restore_rax(tmp);

  6787   }

  6788   // condition codes set as follows:

  6789   //

  6790   // CF (corresponds to C0) if x < y

  6791   // PF (corresponds to C2) if unordered

  6792   // ZF (corresponds to C3) if x = y

  6793 }

  6795 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {

  6796   fcmp2int(dst, unordered_is_less, 1, true, true);

  6797 }

  6799 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {

  6800   fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);

  6801   Label L;

  6802   if (unordered_is_less) {

  6803     movl(dst, -1);

  6804     jcc(Assembler::parity, L);

  6805     jcc(Assembler::below , L);

  6806     movl(dst, 0);

  6807     jcc(Assembler::equal , L);

  6808     increment(dst);

  6809   } else { // unordered is greater

  6810     movl(dst, 1);

  6811     jcc(Assembler::parity, L);

  6812     jcc(Assembler::above , L);

  6813     movl(dst, 0);

  6814     jcc(Assembler::equal , L);

  6815     decrementl(dst);

  6816   }

  6817   bind(L);

  6818 }

  6820 void MacroAssembler::fld_d(AddressLiteral src) {

  6821   fld_d(as_Address(src));

  6822 }

  6824 void MacroAssembler::fld_s(AddressLiteral src) {

  6825   fld_s(as_Address(src));

  6826 }

  6828 void MacroAssembler::fld_x(AddressLiteral src) {

  6829   Assembler::fld_x(as_Address(src));

  6830 }

  6832 void MacroAssembler::fldcw(AddressLiteral src) {

  6833   Assembler::fldcw(as_Address(src));

  6834 }

  6836 void MacroAssembler::fpop() {

  6837   ffree();

  6838   fincstp();

  6839 }

  6841 void MacroAssembler::fremr(Register tmp) {

  6842   save_rax(tmp);

  6843   { Label L;

  6844     bind(L);

  6845     fprem();

  6846     fwait(); fnstsw_ax();

  6847 #ifdef _LP64

  6848     testl(rax, 0x400);

  6849     jcc(Assembler::notEqual, L);

  6850 #else

  6851     sahf();

  6852     jcc(Assembler::parity, L);

  6853 #endif // _LP64

  6854   }

  6855   restore_rax(tmp);

  6856   // Result is in ST0.

  6857   // Note: fxch & fpop to get rid of ST1

  6858   // (otherwise FPU stack could overflow eventually)

  6859   fxch(1);

  6860   fpop();

  6861 }

  6864 void MacroAssembler::incrementl(AddressLiteral dst) {

  6865   if (reachable(dst)) {

  6866     incrementl(as_Address(dst));

  6867   } else {

  6868     lea(rscratch1, dst);

  6869     incrementl(Address(rscratch1, 0));

  6870   }

  6871 }

  6873 void MacroAssembler::incrementl(ArrayAddress dst) {

  6874   incrementl(as_Address(dst));

  6875 }

  6877 void MacroAssembler::incrementl(Register reg, int value) {

  6878   if (value == min_jint) {addl(reg, value) ; return; }

  6879   if (value <  0) { decrementl(reg, -value); return; }

  6880   if (value == 0) {                        ; return; }

  6881   if (value == 1 && UseIncDec) { incl(reg) ; return; }

  6882   /* else */      { addl(reg, value)       ; return; }

  6883 }

  6885 void MacroAssembler::incrementl(Address dst, int value) {

  6886   if (value == min_jint) {addl(dst, value) ; return; }

  6887   if (value <  0) { decrementl(dst, -value); return; }

  6888   if (value == 0) {                        ; return; }

  6889   if (value == 1 && UseIncDec) { incl(dst) ; return; }

  6890   /* else */      { addl(dst, value)       ; return; }

  6891 }

  6893 void MacroAssembler::jump(AddressLiteral dst) {

  6894   if (reachable(dst)) {

  6895     jmp_literal(dst.target(), dst.rspec());

  6896   } else {

  6897     lea(rscratch1, dst);

  6898     jmp(rscratch1);

  6899   }

  6900 }

  6902 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {

  6903   if (reachable(dst)) {

  6904     InstructionMark im(this);

  6905     relocate(dst.reloc());

  6906     const int short_size = 2;

  6907     const int long_size = 6;

  6908     int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);

  6909     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {

  6910       // 0111 tttn #8-bit disp

  6911       emit_byte(0x70 | cc);

  6912       emit_byte((offs - short_size) & 0xFF);

  6913     } else {

  6914       // 0000 1111 1000 tttn #32-bit disp

  6915       emit_byte(0x0F);

  6916       emit_byte(0x80 | cc);

  6917       emit_long(offs - long_size);

  6918     }

  6919   } else {

  6920 #ifdef ASSERT

  6921     warning("reversing conditional branch");

  6922 #endif /* ASSERT */

  6923     Label skip;

  6924     jccb(reverse[cc], skip);

  6925     lea(rscratch1, dst);

  6926     Assembler::jmp(rscratch1);

  6927     bind(skip);

  6928   }

  6929 }

  6931 void MacroAssembler::ldmxcsr(AddressLiteral src) {

  6932   if (reachable(src)) {

  6933     Assembler::ldmxcsr(as_Address(src));

  6934   } else {

  6935     lea(rscratch1, src);

  6936     Assembler::ldmxcsr(Address(rscratch1, 0));

  6937   }

  6938 }

  6940 int MacroAssembler::load_signed_byte(Register dst, Address src) {

  6941   int off;

  6942   if (LP64_ONLY(true ||) VM_Version::is_P6()) {

  6943     off = offset();

  6944     movsbl(dst, src); // movsxb

  6945   } else {

  6946     off = load_unsigned_byte(dst, src);

  6947     shll(dst, 24);

  6948     sarl(dst, 24);

  6949   }

  6950   return off;

  6951 }

  6953 // Note: load_signed_short used to be called load_signed_word.

  6954 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler

  6955 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.

  6956 // The term "word" in HotSpot means a 32- or 64-bit machine word.

  6957 int MacroAssembler::load_signed_short(Register dst, Address src) {

  6958   int off;

  6959   if (LP64_ONLY(true ||) VM_Version::is_P6()) {

  6960     // This is dubious to me since it seems safe to do a signed 16 => 64 bit

  6961     // version but this is what 64bit has always done. This seems to imply

  6962     // that users are only using 32bits worth.

  6963     off = offset();

  6964     movswl(dst, src); // movsxw

  6965   } else {

  6966     off = load_unsigned_short(dst, src);

  6967     shll(dst, 16);

  6968     sarl(dst, 16);

  6969   }

  6970   return off;

  6971 }

  6973 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {

  6974   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,

  6975   // and "3.9 Partial Register Penalties", p. 22).

  6976   int off;

  6977   if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {

  6978     off = offset();

  6979     movzbl(dst, src); // movzxb

  6980   } else {

  6981     xorl(dst, dst);

  6982     off = offset();

  6983     movb(dst, src);

  6984   }

  6985   return off;

  6986 }

  6988 // Note: load_unsigned_short used to be called load_unsigned_word.

  6989 int MacroAssembler::load_unsigned_short(Register dst, Address src) {

  6990   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,

  6991   // and "3.9 Partial Register Penalties", p. 22).

  6992   int off;

  6993   if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {

  6994     off = offset();

  6995     movzwl(dst, src); // movzxw

  6996   } else {

  6997     xorl(dst, dst);

  6998     off = offset();

  6999     movw(dst, src);

  7000   }

  7001   return off;

  7002 }

  7004 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {

  7005   switch (size_in_bytes) {

  7006 #ifndef _LP64

  7007   case  8:

  7008     assert(dst2 != noreg, "second dest register required");

  7009     movl(dst,  src);

  7010     movl(dst2, src.plus_disp(BytesPerInt));

  7011     break;

  7012 #else

  7013   case  8:  movq(dst, src); break;

  7014 #endif

  7015   case  4:  movl(dst, src); break;

  7016   case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;

  7017   case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;

  7018   default:  ShouldNotReachHere();

  7019   }

  7020 }

  7022 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {

  7023   switch (size_in_bytes) {

  7024 #ifndef _LP64

  7025   case  8:

  7026     assert(src2 != noreg, "second source register required");

  7027     movl(dst,                        src);

  7028     movl(dst.plus_disp(BytesPerInt), src2);

  7029     break;

  7030 #else

  7031   case  8:  movq(dst, src); break;

  7032 #endif

  7033   case  4:  movl(dst, src); break;

  7034   case  2:  movw(dst, src); break;

  7035   case  1:  movb(dst, src); break;

  7036   default:  ShouldNotReachHere();

  7037   }

  7038 }

  7040 void MacroAssembler::mov32(AddressLiteral dst, Register src) {

  7041   if (reachable(dst)) {

  7042     movl(as_Address(dst), src);

  7043   } else {

  7044     lea(rscratch1, dst);

  7045     movl(Address(rscratch1, 0), src);

  7046   }

  7047 }

  7049 void MacroAssembler::mov32(Register dst, AddressLiteral src) {

  7050   if (reachable(src)) {

  7051     movl(dst, as_Address(src));

  7052   } else {

  7053     lea(rscratch1, src);

  7054     movl(dst, Address(rscratch1, 0));

  7055   }

  7056 }

  7058 // C++ bool manipulation

  7060 void MacroAssembler::movbool(Register dst, Address src) {

  7061   if(sizeof(bool) == 1)

  7062     movb(dst, src);

  7063   else if(sizeof(bool) == 2)

  7064     movw(dst, src);

  7065   else if(sizeof(bool) == 4)

  7066     movl(dst, src);

  7067   else

  7068     // unsupported

  7069     ShouldNotReachHere();

  7070 }

  7072 void MacroAssembler::movbool(Address dst, bool boolconst) {

  7073   if(sizeof(bool) == 1)

  7074     movb(dst, (int) boolconst);

  7075   else if(sizeof(bool) == 2)

  7076     movw(dst, (int) boolconst);

  7077   else if(sizeof(bool) == 4)

  7078     movl(dst, (int) boolconst);

  7079   else

  7080     // unsupported

  7081     ShouldNotReachHere();

  7082 }

  7084 void MacroAssembler::movbool(Address dst, Register src) {

  7085   if(sizeof(bool) == 1)

  7086     movb(dst, src);

  7087   else if(sizeof(bool) == 2)

  7088     movw(dst, src);

  7089   else if(sizeof(bool) == 4)

  7090     movl(dst, src);

  7091   else

  7092     // unsupported

  7093     ShouldNotReachHere();

  7094 }

  7096 void MacroAssembler::movbyte(ArrayAddress dst, int src) {

  7097   movb(as_Address(dst), src);

  7098 }

  7100 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {

  7101   if (reachable(src)) {

  7102     if (UseXmmLoadAndClearUpper) {

  7103       movsd (dst, as_Address(src));

  7104     } else {

  7105       movlpd(dst, as_Address(src));

  7106     }

  7107   } else {

  7108     lea(rscratch1, src);

  7109     if (UseXmmLoadAndClearUpper) {

  7110       movsd (dst, Address(rscratch1, 0));

  7111     } else {

  7112       movlpd(dst, Address(rscratch1, 0));

  7113     }

  7114   }

  7115 }

  7117 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {

  7118   if (reachable(src)) {

  7119     movss(dst, as_Address(src));

  7120   } else {

  7121     lea(rscratch1, src);

  7122     movss(dst, Address(rscratch1, 0));

  7123   }

  7124 }

  7126 void MacroAssembler::movptr(Register dst, Register src) {

  7127   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));

  7128 }

  7130 void MacroAssembler::movptr(Register dst, Address src) {

  7131   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));

  7132 }

  7134 // src should NEVER be a real pointer. Use AddressLiteral for true pointers

  7135 void MacroAssembler::movptr(Register dst, intptr_t src) {

  7136   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));

  7137 }

  7139 void MacroAssembler::movptr(Address dst, Register src) {

  7140   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));

  7141 }

  7143 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {

  7144   if (reachable(src)) {

  7145     Assembler::movsd(dst, as_Address(src));

  7146   } else {

  7147     lea(rscratch1, src);

  7148     Assembler::movsd(dst, Address(rscratch1, 0));

  7149   }

  7150 }

  7152 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {

  7153   if (reachable(src)) {

  7154     Assembler::movss(dst, as_Address(src));

  7155   } else {

  7156     lea(rscratch1, src);

  7157     Assembler::movss(dst, Address(rscratch1, 0));

  7158   }

  7159 }

  7161 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {

  7162   if (reachable(src)) {

  7163     Assembler::mulsd(dst, as_Address(src));

  7164   } else {

  7165     lea(rscratch1, src);

  7166     Assembler::mulsd(dst, Address(rscratch1, 0));

  7167   }

  7168 }

  7170 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {

  7171   if (reachable(src)) {

  7172     Assembler::mulss(dst, as_Address(src));

  7173   } else {

  7174     lea(rscratch1, src);

  7175     Assembler::mulss(dst, Address(rscratch1, 0));

  7176   }

  7177 }

  7179 void MacroAssembler::null_check(Register reg, int offset) {

  7180   if (needs_explicit_null_check(offset)) {

  7181     // provoke OS NULL exception if reg = NULL by

  7182     // accessing M[reg] w/o changing any (non-CC) registers

  7183     // NOTE: cmpl is plenty here to provoke a segv

  7184     cmpptr(rax, Address(reg, 0));

  7185     // Note: should probably use testl(rax, Address(reg, 0));

  7186     //       may be shorter code (however, this version of

  7187     //       testl needs to be implemented first)

  7188   } else {

  7189     // nothing to do, (later) access of M[reg + offset]

  7190     // will provoke OS NULL exception if reg = NULL

  7191   }

  7192 }

  7194 void MacroAssembler::os_breakpoint() {

  7195   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability

  7196   // (e.g., MSVC can't call ps() otherwise)

  7197   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));

  7198 }

  7200 void MacroAssembler::pop_CPU_state() {

  7201   pop_FPU_state();

  7202   pop_IU_state();

  7203 }

  7205 void MacroAssembler::pop_FPU_state() {

  7206   NOT_LP64(frstor(Address(rsp, 0));)

  7207   LP64_ONLY(fxrstor(Address(rsp, 0));)

  7208   addptr(rsp, FPUStateSizeInWords * wordSize);

  7209 }

  7211 void MacroAssembler::pop_IU_state() {

  7212   popa();

  7213   LP64_ONLY(addq(rsp, 8));

  7214   popf();

  7215 }

  7217 // Save Integer and Float state

  7218 // Warning: Stack must be 16 byte aligned (64bit)

  7219 void MacroAssembler::push_CPU_state() {

  7220   push_IU_state();

  7221   push_FPU_state();

  7222 }

  7224 void MacroAssembler::push_FPU_state() {

  7225   subptr(rsp, FPUStateSizeInWords * wordSize);

  7226 #ifndef _LP64

  7227   fnsave(Address(rsp, 0));

  7228   fwait();

  7229 #else

  7230   fxsave(Address(rsp, 0));

  7231 #endif // LP64

  7232 }

  7234 void MacroAssembler::push_IU_state() {

  7235   // Push flags first because pusha kills them

  7236   pushf();

  7237   // Make sure rsp stays 16-byte aligned

  7238   LP64_ONLY(subq(rsp, 8));

  7239   pusha();

  7240 }

  7242 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {

  7243   // determine java_thread register

  7244   if (!java_thread->is_valid()) {

  7245     java_thread = rdi;

  7246     get_thread(java_thread);

  7247   }

  7248   // we must set sp to zero to clear frame

  7249   movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);

  7250   if (clear_fp) {

  7251     movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);

  7252   }

  7254   if (clear_pc)

  7255     movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);

  7257 }

  7259 void MacroAssembler::restore_rax(Register tmp) {

  7260   if (tmp == noreg) pop(rax);

  7261   else if (tmp != rax) mov(rax, tmp);

  7262 }

  7264 void MacroAssembler::round_to(Register reg, int modulus) {

  7265   addptr(reg, modulus - 1);

  7266   andptr(reg, -modulus);

  7267 }

  7269 void MacroAssembler::save_rax(Register tmp) {

  7270   if (tmp == noreg) push(rax);

  7271   else if (tmp != rax) mov(tmp, rax);

  7272 }

  7274 // Write serialization page so VM thread can do a pseudo remote membar.

  7275 // We use the current thread pointer to calculate a thread specific

  7276 // offset to write to within the page. This minimizes bus traffic

  7277 // due to cache line collision.

  7278 void MacroAssembler::serialize_memory(Register thread, Register tmp) {

  7279   movl(tmp, thread);

  7280   shrl(tmp, os::get_serialize_page_shift_count());

  7281   andl(tmp, (os::vm_page_size() - sizeof(int)));

  7283   Address index(noreg, tmp, Address::times_1);

  7284   ExternalAddress page(os::get_memory_serialize_page());

  7286   // Size of store must match masking code above

  7287   movl(as_Address(ArrayAddress(page, index)), tmp);

  7288 }

  7290 // Calls to C land

  7291 //

  7292 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded

  7293 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp

  7294 // has to be reset to 0. This is required to allow proper stack traversal.

  7295 void MacroAssembler::set_last_Java_frame(Register java_thread,

  7296                                          Register last_java_sp,

  7297                                          Register last_java_fp,

  7298                                          address  last_java_pc) {

  7299   // determine java_thread register

  7300   if (!java_thread->is_valid()) {

  7301     java_thread = rdi;

  7302     get_thread(java_thread);

  7303   }

  7304   // determine last_java_sp register

  7305   if (!last_java_sp->is_valid()) {

  7306     last_java_sp = rsp;

  7307   }

  7309   // last_java_fp is optional

  7311   if (last_java_fp->is_valid()) {

  7312     movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);

  7313   }

  7315   // last_java_pc is optional

  7317   if (last_java_pc != NULL) {

  7318     lea(Address(java_thread,

  7319                  JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),

  7320         InternalAddress(last_java_pc));

  7322   }

  7323   movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);

  7324 }

  7326 void MacroAssembler::shlptr(Register dst, int imm8) {

  7327   LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));

  7328 }

  7330 void MacroAssembler::shrptr(Register dst, int imm8) {

  7331   LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));

  7332 }

  7334 void MacroAssembler::sign_extend_byte(Register reg) {

  7335   if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {

  7336     movsbl(reg, reg); // movsxb

  7337   } else {

  7338     shll(reg, 24);

  7339     sarl(reg, 24);

  7340   }

  7341 }

  7343 void MacroAssembler::sign_extend_short(Register reg) {

  7344   if (LP64_ONLY(true ||) VM_Version::is_P6()) {

  7345     movswl(reg, reg); // movsxw

  7346   } else {

  7347     shll(reg, 16);

  7348     sarl(reg, 16);

  7349   }

  7350 }

  7352 void MacroAssembler::testl(Register dst, AddressLiteral src) {

  7353   assert(reachable(src), "Address should be reachable");

  7354   testl(dst, as_Address(src));

  7355 }

  7357 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {

  7358   if (reachable(src)) {

  7359     Assembler::sqrtsd(dst, as_Address(src));

  7360   } else {

  7361     lea(rscratch1, src);

  7362     Assembler::sqrtsd(dst, Address(rscratch1, 0));

  7363   }

  7364 }

  7366 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {

  7367   if (reachable(src)) {

  7368     Assembler::sqrtss(dst, as_Address(src));

  7369   } else {

  7370     lea(rscratch1, src);

  7371     Assembler::sqrtss(dst, Address(rscratch1, 0));

  7372   }

  7373 }

  7375 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {

  7376   if (reachable(src)) {

  7377     Assembler::subsd(dst, as_Address(src));

  7378   } else {

  7379     lea(rscratch1, src);

  7380     Assembler::subsd(dst, Address(rscratch1, 0));

  7381   }

  7382 }

  7384 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {

  7385   if (reachable(src)) {

  7386     Assembler::subss(dst, as_Address(src));

  7387   } else {

  7388     lea(rscratch1, src);

  7389     Assembler::subss(dst, Address(rscratch1, 0));

  7390   }

  7391 }

  7393 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {

  7394   if (reachable(src)) {

  7395     Assembler::ucomisd(dst, as_Address(src));

  7396   } else {

  7397     lea(rscratch1, src);

  7398     Assembler::ucomisd(dst, Address(rscratch1, 0));

  7399   }

  7400 }

  7402 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {

  7403   if (reachable(src)) {

  7404     Assembler::ucomiss(dst, as_Address(src));

  7405   } else {

  7406     lea(rscratch1, src);

  7407     Assembler::ucomiss(dst, Address(rscratch1, 0));

  7408   }

  7409 }

  7411 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {

  7412   // Used in sign-bit flipping with aligned address.

  7413   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");

  7414   if (reachable(src)) {

  7415     Assembler::xorpd(dst, as_Address(src));

  7416   } else {

  7417     lea(rscratch1, src);

  7418     Assembler::xorpd(dst, Address(rscratch1, 0));

  7419   }

  7420 }

  7422 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {

  7423   // Used in sign-bit flipping with aligned address.

  7424   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");

  7425   if (reachable(src)) {

  7426     Assembler::xorps(dst, as_Address(src));

  7427   } else {

  7428     lea(rscratch1, src);

  7429     Assembler::xorps(dst, Address(rscratch1, 0));

  7430   }

  7431 }

  7433 // AVX 3-operands instructions

  7435 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7436   if (reachable(src)) {

  7437     vaddsd(dst, nds, as_Address(src));

  7438   } else {

  7439     lea(rscratch1, src);

  7440     vaddsd(dst, nds, Address(rscratch1, 0));

  7441   }

  7442 }

  7444 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7445   if (reachable(src)) {

  7446     vaddss(dst, nds, as_Address(src));

  7447   } else {

  7448     lea(rscratch1, src);

  7449     vaddss(dst, nds, Address(rscratch1, 0));

  7450   }

  7451 }

  7453 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7454   if (reachable(src)) {

  7455     vandpd(dst, nds, as_Address(src));

  7456   } else {

  7457     lea(rscratch1, src);

  7458     vandpd(dst, nds, Address(rscratch1, 0));

  7459   }

  7460 }

  7462 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7463   if (reachable(src)) {

  7464     vandps(dst, nds, as_Address(src));

  7465   } else {

  7466     lea(rscratch1, src);

  7467     vandps(dst, nds, Address(rscratch1, 0));

  7468   }

  7469 }

  7471 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7472   if (reachable(src)) {

  7473     vdivsd(dst, nds, as_Address(src));

  7474   } else {

  7475     lea(rscratch1, src);

  7476     vdivsd(dst, nds, Address(rscratch1, 0));

  7477   }

  7478 }

  7480 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7481   if (reachable(src)) {

  7482     vdivss(dst, nds, as_Address(src));

  7483   } else {

  7484     lea(rscratch1, src);

  7485     vdivss(dst, nds, Address(rscratch1, 0));

  7486   }

  7487 }

  7489 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7490   if (reachable(src)) {

  7491     vmulsd(dst, nds, as_Address(src));

  7492   } else {

  7493     lea(rscratch1, src);

  7494     vmulsd(dst, nds, Address(rscratch1, 0));

  7495   }

  7496 }

  7498 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7499   if (reachable(src)) {

  7500     vmulss(dst, nds, as_Address(src));

  7501   } else {

  7502     lea(rscratch1, src);

  7503     vmulss(dst, nds, Address(rscratch1, 0));

  7504   }

  7505 }

  7507 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7508   if (reachable(src)) {

  7509     vsubsd(dst, nds, as_Address(src));

  7510   } else {

  7511     lea(rscratch1, src);

  7512     vsubsd(dst, nds, Address(rscratch1, 0));

  7513   }

  7514 }

  7516 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7517   if (reachable(src)) {

  7518     vsubss(dst, nds, as_Address(src));

  7519   } else {

  7520     lea(rscratch1, src);

  7521     vsubss(dst, nds, Address(rscratch1, 0));

  7522   }

  7523 }

  7525 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7526   if (reachable(src)) {

  7527     vxorpd(dst, nds, as_Address(src));

  7528   } else {

  7529     lea(rscratch1, src);

  7530     vxorpd(dst, nds, Address(rscratch1, 0));

  7531   }

  7532 }

  7534 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {

  7535   if (reachable(src)) {

  7536     vxorps(dst, nds, as_Address(src));

  7537   } else {

  7538     lea(rscratch1, src);

  7539     vxorps(dst, nds, Address(rscratch1, 0));

  7540   }

  7541 }

  7544 //////////////////////////////////////////////////////////////////////////////////

  7545 #ifndef SERIALGC

  7547 void MacroAssembler::g1_write_barrier_pre(Register obj,

  7548                                           Register pre_val,

  7549                                           Register thread,

  7550                                           Register tmp,

  7551                                           bool tosca_live,

  7552                                           bool expand_call) {

  7554   // If expand_call is true then we expand the call_VM_leaf macro

  7555   // directly to skip generating the check by

  7556   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.

  7558 #ifdef _LP64

  7559   assert(thread == r15_thread, "must be");

  7560 #endif // _LP64

  7562   Label done;

  7563   Label runtime;

  7565   assert(pre_val != noreg, "check this code");

  7567   if (obj != noreg) {

  7568     assert_different_registers(obj, pre_val, tmp);

  7569     assert(pre_val != rax, "check this code");

  7570   }

  7572   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +

  7573                                        PtrQueue::byte_offset_of_active()));

  7574   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +

  7575                                        PtrQueue::byte_offset_of_index()));

  7576   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +

  7577                                        PtrQueue::byte_offset_of_buf()));

  7580   // Is marking active?

  7581   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {

  7582     cmpl(in_progress, 0);

  7583   } else {

  7584     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");

  7585     cmpb(in_progress, 0);

  7586   }

  7587   jcc(Assembler::equal, done);

  7589   // Do we need to load the previous value?

  7590   if (obj != noreg) {

  7591     load_heap_oop(pre_val, Address(obj, 0));

  7592   }

  7594   // Is the previous value null?

  7595   cmpptr(pre_val, (int32_t) NULL_WORD);

  7596   jcc(Assembler::equal, done);

  7598   // Can we store original value in the thread's buffer?

  7599   // Is index == 0?

  7600   // (The index field is typed as size_t.)

  7602   movptr(tmp, index);                   // tmp := *index_adr

  7603   cmpptr(tmp, 0);                       // tmp == 0?

  7604   jcc(Assembler::equal, runtime);       // If yes, goto runtime

  7606   subptr(tmp, wordSize);                // tmp := tmp - wordSize

  7607   movptr(index, tmp);                   // *index_adr := tmp

  7608   addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr

  7610   // Record the previous value

  7611   movptr(Address(tmp, 0), pre_val);

  7612   jmp(done);

  7614   bind(runtime);

  7615   // save the live input values

  7616   if(tosca_live) push(rax);

  7618   if (obj != noreg && obj != rax)

  7619     push(obj);

  7621   if (pre_val != rax)

  7622     push(pre_val);

  7624   // Calling the runtime using the regular call_VM_leaf mechanism generates

  7625   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)

  7626   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.

  7627   //

  7628   // If we care generating the pre-barrier without a frame (e.g. in the

  7629   // intrinsified Reference.get() routine) then ebp might be pointing to

  7630   // the caller frame and so this check will most likely fail at runtime.

  7631   //

  7632   // Expanding the call directly bypasses the generation of the check.

  7633   // So when we do not have have a full interpreter frame on the stack

  7634   // expand_call should be passed true.

  7636   NOT_LP64( push(thread); )

  7638   if (expand_call) {

  7639     LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )

  7640     pass_arg1(this, thread);

  7641     pass_arg0(this, pre_val);

  7642     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);

  7643   } else {

  7644     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);

  7645   }

  7647   NOT_LP64( pop(thread); )

  7649   // save the live input values

  7650   if (pre_val != rax)

  7651     pop(pre_val);

  7653   if (obj != noreg && obj != rax)

  7654     pop(obj);

  7656   if(tosca_live) pop(rax);

  7658   bind(done);

  7659 }

  7661 void MacroAssembler::g1_write_barrier_post(Register store_addr,

  7662                                            Register new_val,

  7663                                            Register thread,

  7664                                            Register tmp,

  7665                                            Register tmp2) {

  7666 #ifdef _LP64

  7667   assert(thread == r15_thread, "must be");

  7668 #endif // _LP64

  7670   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +

  7671                                        PtrQueue::byte_offset_of_index()));

  7672   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +

  7673                                        PtrQueue::byte_offset_of_buf()));

  7675   BarrierSet* bs = Universe::heap()->barrier_set();

  7676   CardTableModRefBS* ct = (CardTableModRefBS*)bs;

  7677   Label done;

  7678   Label runtime;

  7680   // Does store cross heap regions?

  7682   movptr(tmp, store_addr);

  7683   xorptr(tmp, new_val);

  7684   shrptr(tmp, HeapRegion::LogOfHRGrainBytes);

  7685   jcc(Assembler::equal, done);

  7687   // crosses regions, storing NULL?

  7689   cmpptr(new_val, (int32_t) NULL_WORD);

  7690   jcc(Assembler::equal, done);

  7692   // storing region crossing non-NULL, is card already dirty?

  7694   ExternalAddress cardtable((address) ct->byte_map_base);

  7695   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");

  7696 #ifdef _LP64

  7697   const Register card_addr = tmp;

  7699   movq(card_addr, store_addr);

  7700   shrq(card_addr, CardTableModRefBS::card_shift);

  7702   lea(tmp2, cardtable);

  7704   // get the address of the card

  7705   addq(card_addr, tmp2);

  7706 #else

  7707   const Register card_index = tmp;

  7709   movl(card_index, store_addr);

  7710   shrl(card_index, CardTableModRefBS::card_shift);

  7712   Address index(noreg, card_index, Address::times_1);

  7713   const Register card_addr = tmp;

  7714   lea(card_addr, as_Address(ArrayAddress(cardtable, index)));

  7715 #endif

  7716   cmpb(Address(card_addr, 0), 0);

  7717   jcc(Assembler::equal, done);

  7719   // storing a region crossing, non-NULL oop, card is clean.

  7720   // dirty card and log.

  7722   movb(Address(card_addr, 0), 0);

  7724   cmpl(queue_index, 0);

  7725   jcc(Assembler::equal, runtime);

  7726   subl(queue_index, wordSize);

  7727   movptr(tmp2, buffer);

  7728 #ifdef _LP64

  7729   movslq(rscratch1, queue_index);

  7730   addq(tmp2, rscratch1);

  7731   movq(Address(tmp2, 0), card_addr);

  7732 #else

  7733   addl(tmp2, queue_index);

  7734   movl(Address(tmp2, 0), card_index);

  7735 #endif

  7736   jmp(done);

  7738   bind(runtime);

  7739   // save the live input values

  7740   push(store_addr);

  7741   push(new_val);

  7742 #ifdef _LP64

  7743   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);

  7744 #else

  7745   push(thread);

  7746   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);

  7747   pop(thread);

  7748 #endif

  7749   pop(new_val);

  7750   pop(store_addr);

  7752   bind(done);

  7753 }

  7755 #endif // SERIALGC

  7756 //////////////////////////////////////////////////////////////////////////////////

  7759 void MacroAssembler::store_check(Register obj) {

  7760   // Does a store check for the oop in register obj. The content of

  7761   // register obj is destroyed afterwards.

  7762   store_check_part_1(obj);

  7763   store_check_part_2(obj);

  7764 }

  7766 void MacroAssembler::store_check(Register obj, Address dst) {

  7767   store_check(obj);

  7768 }

  7771 // split the store check operation so that other instructions can be scheduled inbetween

  7772 void MacroAssembler::store_check_part_1(Register obj) {

  7773   BarrierSet* bs = Universe::heap()->barrier_set();

  7774   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");

  7775   shrptr(obj, CardTableModRefBS::card_shift);

  7776 }

  7778 void MacroAssembler::store_check_part_2(Register obj) {

  7779   BarrierSet* bs = Universe::heap()->barrier_set();

  7780   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");

  7781   CardTableModRefBS* ct = (CardTableModRefBS*)bs;

  7782   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");

  7784   // The calculation for byte_map_base is as follows:

  7785   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);

  7786   // So this essentially converts an address to a displacement and

  7787   // it will never need to be relocated. On 64bit however the value may be too

  7788   // large for a 32bit displacement

  7790   intptr_t disp = (intptr_t) ct->byte_map_base;

  7791   if (is_simm32(disp)) {

  7792     Address cardtable(noreg, obj, Address::times_1, disp);

  7793     movb(cardtable, 0);

  7794   } else {

  7795     // By doing it as an ExternalAddress disp could be converted to a rip-relative

  7796     // displacement and done in a single instruction given favorable mapping and

  7797     // a smarter version of as_Address. Worst case it is two instructions which

  7798     // is no worse off then loading disp into a register and doing as a simple

  7799     // Address() as above.

  7800     // We can't do as ExternalAddress as the only style since if disp == 0 we'll

  7801     // assert since NULL isn't acceptable in a reloci (see 6644928). In any case

  7802     // in some cases we'll get a single instruction version.

  7804     ExternalAddress cardtable((address)disp);

  7805     Address index(noreg, obj, Address::times_1);

  7806     movb(as_Address(ArrayAddress(cardtable, index)), 0);

  7807   }

  7808 }

  7810 void MacroAssembler::subptr(Register dst, int32_t imm32) {

  7811   LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));

  7812 }

  7814 void MacroAssembler::subptr(Register dst, Register src) {

  7815   LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));

  7816 }

  7818 // C++ bool manipulation

  7819 void MacroAssembler::testbool(Register dst) {

  7820   if(sizeof(bool) == 1)

  7821     testb(dst, 0xff);

  7822   else if(sizeof(bool) == 2) {

  7823     // testw implementation needed for two byte bools

  7824     ShouldNotReachHere();

  7825   } else if(sizeof(bool) == 4)

  7826     testl(dst, dst);

  7827   else

  7828     // unsupported

  7829     ShouldNotReachHere();

  7830 }

  7832 void MacroAssembler::testptr(Register dst, Register src) {

  7833   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));

  7834 }

  7836 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.

  7837 void MacroAssembler::tlab_allocate(Register obj,

  7838                                    Register var_size_in_bytes,

  7839                                    int con_size_in_bytes,

  7840                                    Register t1,

  7841                                    Register t2,

  7842                                    Label& slow_case) {

  7843   assert_different_registers(obj, t1, t2);

  7844   assert_different_registers(obj, var_size_in_bytes, t1);

  7845   Register end = t2;

  7846   Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);

  7848   verify_tlab();

  7850   NOT_LP64(get_thread(thread));

  7852   movptr(obj, Address(thread, JavaThread::tlab_top_offset()));

  7853   if (var_size_in_bytes == noreg) {

  7854     lea(end, Address(obj, con_size_in_bytes));

  7855   } else {

  7856     lea(end, Address(obj, var_size_in_bytes, Address::times_1));

  7857   }

  7858   cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));

  7859   jcc(Assembler::above, slow_case);

  7861   // update the tlab top pointer

  7862   movptr(Address(thread, JavaThread::tlab_top_offset()), end);

  7864   // recover var_size_in_bytes if necessary

  7865   if (var_size_in_bytes == end) {

  7866     subptr(var_size_in_bytes, obj);

  7867   }

  7868   verify_tlab();

  7869 }

  7871 // Preserves rbx, and rdx.

  7872 Register MacroAssembler::tlab_refill(Label& retry,

  7873                                      Label& try_eden,

  7874                                      Label& slow_case) {

  7875   Register top = rax;

  7876   Register t1  = rcx;

  7877   Register t2  = rsi;

  7878   Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);

  7879   assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);

  7880   Label do_refill, discard_tlab;

  7882   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {

  7883     // No allocation in the shared eden.

  7884     jmp(slow_case);

  7885   }

  7887   NOT_LP64(get_thread(thread_reg));

  7889   movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));

  7890   movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));

  7892   // calculate amount of free space

  7893   subptr(t1, top);

  7894   shrptr(t1, LogHeapWordSize);

  7896   // Retain tlab and allocate object in shared space if

  7897   // the amount free in the tlab is too large to discard.

  7898   cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));

  7899   jcc(Assembler::lessEqual, discard_tlab);

  7901   // Retain

  7902   // %%% yuck as movptr...

  7903   movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());

  7904   addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);

  7905   if (TLABStats) {

  7906     // increment number of slow_allocations

  7907     addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);

  7908   }

  7909   jmp(try_eden);

  7911   bind(discard_tlab);

  7912   if (TLABStats) {

  7913     // increment number of refills

  7914     addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);

  7915     // accumulate wastage -- t1 is amount free in tlab

  7916     addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);

  7917   }

  7919   // if tlab is currently allocated (top or end != null) then

  7920   // fill [top, end + alignment_reserve) with array object

  7921   testptr(top, top);

  7922   jcc(Assembler::zero, do_refill);

  7924   // set up the mark word

  7925   movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));

  7926   // set the length to the remaining space

  7927   subptr(t1, typeArrayOopDesc::header_size(T_INT));

  7928   addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());

  7929   shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));

  7930   movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);

  7931   // set klass to intArrayKlass

  7932   // dubious reloc why not an oop reloc?

  7933   movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));

  7934   // store klass last.  concurrent gcs assumes klass length is valid if

  7935   // klass field is not null.

  7936   store_klass(top, t1);

  7938   movptr(t1, top);

  7939   subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));

  7940   incr_allocated_bytes(thread_reg, t1, 0);

  7942   // refill the tlab with an eden allocation

  7943   bind(do_refill);

  7944   movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));

  7945   shlptr(t1, LogHeapWordSize);

  7946   // allocate new tlab, address returned in top

  7947   eden_allocate(top, t1, 0, t2, slow_case);

  7949   // Check that t1 was preserved in eden_allocate.

  7950 #ifdef ASSERT

  7951   if (UseTLAB) {

  7952     Label ok;

  7953     Register tsize = rsi;

  7954     assert_different_registers(tsize, thread_reg, t1);

  7955     push(tsize);

  7956     movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));

  7957     shlptr(tsize, LogHeapWordSize);

  7958     cmpptr(t1, tsize);

  7959     jcc(Assembler::equal, ok);

  7960     stop("assert(t1 != tlab size)");

  7961     should_not_reach_here();

  7963     bind(ok);

  7964     pop(tsize);

  7965   }

  7966 #endif

  7967   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);

  7968   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);

  7969   addptr(top, t1);

  7970   subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());

  7971   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);

  7972   verify_tlab();

  7973   jmp(retry);

  7975   return thread_reg; // for use by caller

  7976 }

  7978 void MacroAssembler::incr_allocated_bytes(Register thread,

  7979                                           Register var_size_in_bytes,

  7980                                           int con_size_in_bytes,

  7981                                           Register t1) {

  7982 #ifdef _LP64

  7983   if (var_size_in_bytes->is_valid()) {

  7984     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);

  7985   } else {

  7986     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);

  7987   }

  7988 #else

  7989   if (!thread->is_valid()) {

  7990     assert(t1->is_valid(), "need temp reg");

  7991     thread = t1;

  7992     get_thread(thread);

  7993   }

  7995   if (var_size_in_bytes->is_valid()) {

  7996     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);

  7997   } else {

  7998     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);

  7999   }

  8000   adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);

  8001 #endif

  8002 }

  8004 static const double     pi_4 =  0.7853981633974483;

  8006 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {

  8007   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)

  8008   // was attempted in this code; unfortunately it appears that the

  8009   // switch to 80-bit precision and back causes this to be

  8010   // unprofitable compared with simply performing a runtime call if

  8011   // the argument is out of the (-pi/4, pi/4) range.

  8013   Register tmp = noreg;

  8014   if (!VM_Version::supports_cmov()) {

  8015     // fcmp needs a temporary so preserve rbx,

  8016     tmp = rbx;

  8017     push(tmp);

  8018   }

  8020   Label slow_case, done;

  8022   ExternalAddress pi4_adr = (address)&pi_4;

  8023   if (reachable(pi4_adr)) {

  8024     // x ?<= pi/4

  8025     fld_d(pi4_adr);

  8026     fld_s(1);                // Stack:  X  PI/4  X

  8027     fabs();                  // Stack: |X| PI/4  X

  8028     fcmp(tmp);

  8029     jcc(Assembler::above, slow_case);

  8031     // fastest case: -pi/4 <= x <= pi/4

  8032     switch(trig) {

  8033     case 's':

  8034       fsin();

  8035       break;

  8036     case 'c':

  8037       fcos();

  8038       break;

  8039     case 't':

  8040       ftan();

  8041       break;

  8042     default:

  8043       assert(false, "bad intrinsic");

  8044       break;

  8045     }

  8046     jmp(done);

  8047   }

  8049   // slow case: runtime call

  8050   bind(slow_case);

  8051   // Preserve registers across runtime call

  8052   pusha();

  8053   int incoming_argument_and_return_value_offset = -1;

  8054   if (num_fpu_regs_in_use > 1) {

  8055     // Must preserve all other FPU regs (could alternatively convert

  8056     // SharedRuntime::dsin and dcos into assembly routines known not to trash

  8057     // FPU state, but can not trust C compiler)

  8058     NEEDS_CLEANUP;

  8059     // NOTE that in this case we also push the incoming argument to

  8060     // the stack and restore it later; we also use this stack slot to

  8061     // hold the return value from dsin or dcos.

  8062     for (int i = 0; i < num_fpu_regs_in_use; i++) {

  8063       subptr(rsp, sizeof(jdouble));

  8064       fstp_d(Address(rsp, 0));

  8065     }

  8066     incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);

  8067     fld_d(Address(rsp, incoming_argument_and_return_value_offset));

  8068   }

  8069   subptr(rsp, sizeof(jdouble));

  8070   fstp_d(Address(rsp, 0));

  8071 #ifdef _LP64

  8072   movdbl(xmm0, Address(rsp, 0));

  8073 #endif // _LP64

  8075   // NOTE: we must not use call_VM_leaf here because that requires a

  8076   // complete interpreter frame in debug mode -- same bug as 4387334

  8077   // MacroAssembler::call_VM_leaf_base is perfectly safe and will

  8078   // do proper 64bit abi

  8080   NEEDS_CLEANUP;

  8081   // Need to add stack banging before this runtime call if it needs to

  8082   // be taken; however, there is no generic stack banging routine at

  8083   // the MacroAssembler level

  8084   switch(trig) {

  8085   case 's':

  8086     {

  8087       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);

  8088     }

  8089     break;

  8090   case 'c':

  8091     {

  8092       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);

  8093     }

  8094     break;

  8095   case 't':

  8096     {

  8097       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);

  8098     }

  8099     break;

  8100   default:

  8101     assert(false, "bad intrinsic");

  8102     break;

  8103   }

  8104 #ifdef _LP64

  8105     movsd(Address(rsp, 0), xmm0);

  8106     fld_d(Address(rsp, 0));

  8107 #endif // _LP64

  8108   addptr(rsp, sizeof(jdouble));

  8109   if (num_fpu_regs_in_use > 1) {

  8110     // Must save return value to stack and then restore entire FPU stack

  8111     fstp_d(Address(rsp, incoming_argument_and_return_value_offset));

  8112     for (int i = 0; i < num_fpu_regs_in_use; i++) {

  8113       fld_d(Address(rsp, 0));

  8114       addptr(rsp, sizeof(jdouble));

  8115     }

  8116   }

  8117   popa();

  8119   // Come here with result in F-TOS

  8120   bind(done);

  8122   if (tmp != noreg) {

  8123     pop(tmp);

  8124   }

  8125 }

  8128 // Look up the method for a megamorphic invokeinterface call.

  8129 // The target method is determined by <intf_klass, itable_index>.

  8130 // The receiver klass is in recv_klass.

  8131 // On success, the result will be in method_result, and execution falls through.

  8132 // On failure, execution transfers to the given label.

  8133 void MacroAssembler::lookup_interface_method(Register recv_klass,

  8134                                              Register intf_klass,

  8135                                              RegisterOrConstant itable_index,

  8136                                              Register method_result,

  8137                                              Register scan_temp,

  8138                                              Label& L_no_such_interface) {

  8139   assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);

  8140   assert(itable_index.is_constant() || itable_index.as_register() == method_result,

  8141          "caller must use same register for non-constant itable index as for method");

  8143   // Compute start of first itableOffsetEntry (which is at the end of the vtable)

  8144   int vtable_base = instanceKlass::vtable_start_offset() * wordSize;

  8145   int itentry_off = itableMethodEntry::method_offset_in_bytes();

  8146   int scan_step   = itableOffsetEntry::size() * wordSize;

  8147   int vte_size    = vtableEntry::size() * wordSize;

  8148   Address::ScaleFactor times_vte_scale = Address::times_ptr;

  8149   assert(vte_size == wordSize, "else adjust times_vte_scale");

  8151   movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));

  8153   // %%% Could store the aligned, prescaled offset in the klassoop.

  8154   lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));

  8155   if (HeapWordsPerLong > 1) {

  8156     // Round up to align_object_offset boundary

  8157     // see code for instanceKlass::start_of_itable!

  8158     round_to(scan_temp, BytesPerLong);

  8159   }

  8161   // Adjust recv_klass by scaled itable_index, so we can free itable_index.

  8162   assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");

  8163   lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));

  8165   // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {

  8166   //   if (scan->interface() == intf) {

  8167   //     result = (klass + scan->offset() + itable_index);

  8168   //   }

  8169   // }

  8170   Label search, found_method;

  8172   for (int peel = 1; peel >= 0; peel--) {

  8173     movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));

  8174     cmpptr(intf_klass, method_result);

  8176     if (peel) {

  8177       jccb(Assembler::equal, found_method);

  8178     } else {

  8179       jccb(Assembler::notEqual, search);

  8180       // (invert the test to fall through to found_method...)

  8181     }

  8183     if (!peel)  break;

  8185     bind(search);

  8187     // Check that the previous entry is non-null.  A null entry means that

  8188     // the receiver class doesn't implement the interface, and wasn't the

  8189     // same as when the caller was compiled.

  8190     testptr(method_result, method_result);

  8191     jcc(Assembler::zero, L_no_such_interface);

  8192     addptr(scan_temp, scan_step);

  8193   }

  8195   bind(found_method);

  8197   // Got a hit.

  8198   movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));

  8199   movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));

  8200 }

  8203 void MacroAssembler::check_klass_subtype(Register sub_klass,

  8204                            Register super_klass,

  8205                            Register temp_reg,

  8206                            Label& L_success) {

  8207   Label L_failure;

  8208   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);

  8209   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);

  8210   bind(L_failure);

  8211 }

  8214 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,

  8215                                                    Register super_klass,

  8216                                                    Register temp_reg,

  8217                                                    Label* L_success,

  8218                                                    Label* L_failure,

  8219                                                    Label* L_slow_path,

  8220                                         RegisterOrConstant super_check_offset) {

  8221   assert_different_registers(sub_klass, super_klass, temp_reg);

  8222   bool must_load_sco = (super_check_offset.constant_or_zero() == -1);

  8223   if (super_check_offset.is_register()) {

  8224     assert_different_registers(sub_klass, super_klass,

  8225                                super_check_offset.as_register());

  8226   } else if (must_load_sco) {

  8227     assert(temp_reg != noreg, "supply either a temp or a register offset");

  8228   }

  8230   Label L_fallthrough;

  8231   int label_nulls = 0;

  8232   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }

  8233   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }

  8234   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }

  8235   assert(label_nulls <= 1, "at most one NULL in the batch");

  8237   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());

  8238   int sco_offset = in_bytes(Klass::super_check_offset_offset());

  8239   Address super_check_offset_addr(super_klass, sco_offset);

  8241   // Hacked jcc, which "knows" that L_fallthrough, at least, is in

  8242   // range of a jccb.  If this routine grows larger, reconsider at

  8243   // least some of these.

  8244 #define local_jcc(assembler_cond, label)                                \

  8245   if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \

  8246   else                             jcc( assembler_cond, label) /*omit semi*/

  8248   // Hacked jmp, which may only be used just before L_fallthrough.

  8249 #define final_jmp(label)                                                \

  8250   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \

  8251   else                            jmp(label)                /*omit semi*/

  8253   // If the pointers are equal, we are done (e.g., String[] elements).

  8254   // This self-check enables sharing of secondary supertype arrays among

  8255   // non-primary types such as array-of-interface.  Otherwise, each such

  8256   // type would need its own customized SSA.

  8257   // We move this check to the front of the fast path because many

  8258   // type checks are in fact trivially successful in this manner,

  8259   // so we get a nicely predicted branch right at the start of the check.

  8260   cmpptr(sub_klass, super_klass);

  8261   local_jcc(Assembler::equal, *L_success);

  8263   // Check the supertype display:

  8264   if (must_load_sco) {

  8265     // Positive movl does right thing on LP64.

  8266     movl(temp_reg, super_check_offset_addr);

  8267     super_check_offset = RegisterOrConstant(temp_reg);

  8268   }

  8269   Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);

  8270   cmpptr(super_klass, super_check_addr); // load displayed supertype

  8272   // This check has worked decisively for primary supers.

  8273   // Secondary supers are sought in the super_cache ('super_cache_addr').

  8274   // (Secondary supers are interfaces and very deeply nested subtypes.)

  8275   // This works in the same check above because of a tricky aliasing

  8276   // between the super_cache and the primary super display elements.

  8277   // (The 'super_check_addr' can address either, as the case requires.)

  8278   // Note that the cache is updated below if it does not help us find

  8279   // what we need immediately.

  8280   // So if it was a primary super, we can just fail immediately.

  8281   // Otherwise, it's the slow path for us (no success at this point).

  8283   if (super_check_offset.is_register()) {

  8284     local_jcc(Assembler::equal, *L_success);

  8285     cmpl(super_check_offset.as_register(), sc_offset);

  8286     if (L_failure == &L_fallthrough) {

  8287       local_jcc(Assembler::equal, *L_slow_path);

  8288     } else {

  8289       local_jcc(Assembler::notEqual, *L_failure);

  8290       final_jmp(*L_slow_path);

  8291     }

  8292   } else if (super_check_offset.as_constant() == sc_offset) {

  8293     // Need a slow path; fast failure is impossible.

  8294     if (L_slow_path == &L_fallthrough) {

  8295       local_jcc(Assembler::equal, *L_success);

  8296     } else {

  8297       local_jcc(Assembler::notEqual, *L_slow_path);

  8298       final_jmp(*L_success);

  8299     }

  8300   } else {

  8301     // No slow path; it's a fast decision.

  8302     if (L_failure == &L_fallthrough) {

  8303       local_jcc(Assembler::equal, *L_success);

  8304     } else {

  8305       local_jcc(Assembler::notEqual, *L_failure);

  8306       final_jmp(*L_success);

  8307     }

  8308   }

  8310   bind(L_fallthrough);

  8312 #undef local_jcc

  8313 #undef final_jmp

  8314 }

  8317 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,

  8318                                                    Register super_klass,

  8319                                                    Register temp_reg,

  8320                                                    Register temp2_reg,

  8321                                                    Label* L_success,

  8322                                                    Label* L_failure,

  8323                                                    bool set_cond_codes) {

  8324   assert_different_registers(sub_klass, super_klass, temp_reg);

  8325   if (temp2_reg != noreg)

  8326     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);

  8327 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)

  8329   Label L_fallthrough;

  8330   int label_nulls = 0;

  8331   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }

  8332   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }

  8333   assert(label_nulls <= 1, "at most one NULL in the batch");

  8335   // a couple of useful fields in sub_klass:

  8336   int ss_offset = in_bytes(Klass::secondary_supers_offset());

  8337   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());

  8338   Address secondary_supers_addr(sub_klass, ss_offset);

  8339   Address super_cache_addr(     sub_klass, sc_offset);

  8341   // Do a linear scan of the secondary super-klass chain.

  8342   // This code is rarely used, so simplicity is a virtue here.

  8343   // The repne_scan instruction uses fixed registers, which we must spill.

  8344   // Don't worry too much about pre-existing connections with the input regs.

  8346   assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)

  8347   assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)

  8349   // Get super_klass value into rax (even if it was in rdi or rcx).

  8350   bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;

  8351   if (super_klass != rax || UseCompressedOops) {

  8352     if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }

  8353     mov(rax, super_klass);

  8354   }

  8355   if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }

  8356   if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }

  8358 #ifndef PRODUCT

  8359   int* pst_counter = &SharedRuntime::_partial_subtype_ctr;

  8360   ExternalAddress pst_counter_addr((address) pst_counter);

  8361   NOT_LP64(  incrementl(pst_counter_addr) );

  8362   LP64_ONLY( lea(rcx, pst_counter_addr) );

  8363   LP64_ONLY( incrementl(Address(rcx, 0)) );

  8364 #endif //PRODUCT

  8366   // We will consult the secondary-super array.

  8367   movptr(rdi, secondary_supers_addr);

  8368   // Load the array length.  (Positive movl does right thing on LP64.)

  8369   movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));

  8370   // Skip to start of data.

  8371   addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

  8373   // Scan RCX words at [RDI] for an occurrence of RAX.

  8374   // Set NZ/Z based on last compare.

  8375   // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does

  8376   // not change flags (only scas instruction which is repeated sets flags).

  8377   // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.

  8378 #ifdef _LP64

  8379   // This part is tricky, as values in supers array could be 32 or 64 bit wide

  8380   // and we store values in objArrays always encoded, thus we need to encode

  8381   // the value of rax before repne.  Note that rax is dead after the repne.

  8382   if (UseCompressedOops) {

  8383     encode_heap_oop_not_null(rax); // Changes flags.

  8384     // The superclass is never null; it would be a basic system error if a null

  8385     // pointer were to sneak in here.  Note that we have already loaded the

  8386     // Klass::super_check_offset from the super_klass in the fast path,

  8387     // so if there is a null in that register, we are already in the afterlife.

  8388     testl(rax,rax); // Set Z = 0

  8389     repne_scanl();

  8390   } else

  8391 #endif // _LP64

  8392   {

  8393     testptr(rax,rax); // Set Z = 0

  8394     repne_scan();

  8395   }

  8396   // Unspill the temp. registers:

  8397   if (pushed_rdi)  pop(rdi);

  8398   if (pushed_rcx)  pop(rcx);

  8399   if (pushed_rax)  pop(rax);

  8401   if (set_cond_codes) {

  8402     // Special hack for the AD files:  rdi is guaranteed non-zero.

  8403     assert(!pushed_rdi, "rdi must be left non-NULL");

  8404     // Also, the condition codes are properly set Z/NZ on succeed/failure.

  8405   }

  8407   if (L_failure == &L_fallthrough)

  8408         jccb(Assembler::notEqual, *L_failure);

  8409   else  jcc(Assembler::notEqual, *L_failure);

  8411   // Success.  Cache the super we found and proceed in triumph.

  8412   movptr(super_cache_addr, super_klass);

  8414   if (L_success != &L_fallthrough) {

  8415     jmp(*L_success);

  8416   }

  8418 #undef IS_A_TEMP

  8420   bind(L_fallthrough);

  8421 }

  8424 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {

  8425   if (VM_Version::supports_cmov()) {

  8426     cmovl(cc, dst, src);

  8427   } else {

  8428     Label L;

  8429     jccb(negate_condition(cc), L);

  8430     movl(dst, src);

  8431     bind(L);

  8432   }

  8433 }

  8435 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {

  8436   if (VM_Version::supports_cmov()) {

  8437     cmovl(cc, dst, src);

  8438   } else {

  8439     Label L;

  8440     jccb(negate_condition(cc), L);

  8441     movl(dst, src);

  8442     bind(L);

  8443   }

  8444 }

  8446 void MacroAssembler::verify_oop(Register reg, const char* s) {

  8447   if (!VerifyOops) return;

  8449   // Pass register number to verify_oop_subroutine

  8450   char* b = new char[strlen(s) + 50];

  8451   sprintf(b, "verify_oop: %s: %s", reg->name(), s);

  8452 #ifdef _LP64

  8453   push(rscratch1);                    // save r10, trashed by movptr()

  8454 #endif

  8455   push(rax);                          // save rax,

  8456   push(reg);                          // pass register argument

  8457   ExternalAddress buffer((address) b);

  8458   // avoid using pushptr, as it modifies scratch registers

  8459   // and our contract is not to modify anything

  8460   movptr(rax, buffer.addr());

  8461   push(rax);

  8462   // call indirectly to solve generation ordering problem

  8463   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));

  8464   call(rax);

  8465   // Caller pops the arguments (oop, message) and restores rax, r10

  8466 }

  8469 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,

  8470                                                       Register tmp,

  8471                                                       int offset) {

  8472   intptr_t value = *delayed_value_addr;

  8473   if (value != 0)

  8474     return RegisterOrConstant(value + offset);

  8476   // load indirectly to solve generation ordering problem

  8477   movptr(tmp, ExternalAddress((address) delayed_value_addr));

  8479 #ifdef ASSERT

  8480   { Label L;

  8481     testptr(tmp, tmp);

  8482     if (WizardMode) {

  8483       jcc(Assembler::notZero, L);

  8484       char* buf = new char[40];

  8485       sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);

  8486       stop(buf);

  8487     } else {

  8488       jccb(Assembler::notZero, L);

  8489       hlt();

  8490     }

  8491     bind(L);

  8492   }

  8493 #endif

  8495   if (offset != 0)

  8496     addptr(tmp, offset);

  8498   return RegisterOrConstant(tmp);

  8499 }

  8502 // registers on entry:

  8503 //  - rax ('check' register): required MethodType

  8504 //  - rcx: method handle

  8505 //  - rdx, rsi, or ?: killable temp

  8506 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,

  8507                                               Register temp_reg,

  8508                                               Label& wrong_method_type) {

  8509   Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));

  8510   // compare method type against that of the receiver

  8511   if (UseCompressedOops) {

  8512     load_heap_oop(temp_reg, type_addr);

  8513     cmpptr(mtype_reg, temp_reg);

  8514   } else {

  8515     cmpptr(mtype_reg, type_addr);

  8516   }

  8517   jcc(Assembler::notEqual, wrong_method_type);

  8518 }

  8521 // A method handle has a "vmslots" field which gives the size of its

  8522 // argument list in JVM stack slots.  This field is either located directly

  8523 // in every method handle, or else is indirectly accessed through the

  8524 // method handle's MethodType.  This macro hides the distinction.

  8525 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,

  8526                                                 Register temp_reg) {

  8527   assert_different_registers(vmslots_reg, mh_reg, temp_reg);

  8528   // load mh.type.form.vmslots

  8529   Register temp2_reg = vmslots_reg;

  8530   load_heap_oop(temp2_reg, Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));

  8531   load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));

  8532   movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));

  8533 }

  8536 // registers on entry:

  8537 //  - rcx: method handle

  8538 //  - rdx: killable temp (interpreted only)

  8539 //  - rax: killable temp (compiled only)

  8540 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {

  8541   assert(mh_reg == rcx, "caller must put MH object in rcx");

  8542   assert_different_registers(mh_reg, temp_reg);

  8544   // pick out the interpreted side of the handler

  8545   // NOTE: vmentry is not an oop!

  8546   movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));

  8548   // off we go...

  8549   jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));

  8551   // for the various stubs which take control at this point,

  8552   // see MethodHandles::generate_method_handle_stub

  8553 }

  8556 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,

  8557                                          int extra_slot_offset) {

  8558   // cf. TemplateTable::prepare_invoke(), if (load_receiver).

  8559   int stackElementSize = Interpreter::stackElementSize;

  8560   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);

  8561 #ifdef ASSERT

  8562   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);

  8563   assert(offset1 - offset == stackElementSize, "correct arithmetic");

  8564 #endif

  8565   Register             scale_reg    = noreg;

  8566   Address::ScaleFactor scale_factor = Address::no_scale;

  8567   if (arg_slot.is_constant()) {

  8568     offset += arg_slot.as_constant() * stackElementSize;

  8569   } else {

  8570     scale_reg    = arg_slot.as_register();

  8571     scale_factor = Address::times(stackElementSize);

  8572   }

  8573   offset += wordSize;           // return PC is on stack

  8574   return Address(rsp, scale_reg, scale_factor, offset);

  8575 }

  8578 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {

  8579   if (!VerifyOops) return;

  8581   // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);

  8582   // Pass register number to verify_oop_subroutine

  8583   char* b = new char[strlen(s) + 50];

  8584   sprintf(b, "verify_oop_addr: %s", s);

  8586 #ifdef _LP64

  8587   push(rscratch1);                    // save r10, trashed by movptr()

  8588 #endif

  8589   push(rax);                          // save rax,

  8590   // addr may contain rsp so we will have to adjust it based on the push

  8591   // we just did (and on 64 bit we do two pushes)

  8592   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which

  8593   // stores rax into addr which is backwards of what was intended.

  8594   if (addr.uses(rsp)) {

  8595     lea(rax, addr);

  8596     pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));

  8597   } else {

  8598     pushptr(addr);

  8599   }

  8601   ExternalAddress buffer((address) b);

  8602   // pass msg argument

  8603   // avoid using pushptr, as it modifies scratch registers

  8604   // and our contract is not to modify anything

  8605   movptr(rax, buffer.addr());

  8606   push(rax);

  8608   // call indirectly to solve generation ordering problem

  8609   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));

  8610   call(rax);

  8611   // Caller pops the arguments (addr, message) and restores rax, r10.

  8612 }

  8614 void MacroAssembler::verify_tlab() {

  8615 #ifdef ASSERT

  8616   if (UseTLAB && VerifyOops) {

  8617     Label next, ok;

  8618     Register t1 = rsi;

  8619     Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);

  8621     push(t1);

  8622     NOT_LP64(push(thread_reg));

  8623     NOT_LP64(get_thread(thread_reg));

  8625     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));

  8626     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));

  8627     jcc(Assembler::aboveEqual, next);

  8628     stop("assert(top >= start)");

  8629     should_not_reach_here();

  8631     bind(next);

  8632     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));

  8633     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));

  8634     jcc(Assembler::aboveEqual, ok);

  8635     stop("assert(top <= end)");

  8636     should_not_reach_here();

  8638     bind(ok);

  8639     NOT_LP64(pop(thread_reg));

  8640     pop(t1);

  8641   }

  8642 #endif

  8643 }

  8645 class ControlWord {

  8646  public:

  8647   int32_t _value;

  8649   int  rounding_control() const        { return  (_value >> 10) & 3      ; }

  8650   int  precision_control() const       { return  (_value >>  8) & 3      ; }

  8651   bool precision() const               { return ((_value >>  5) & 1) != 0; }

  8652   bool underflow() const               { return ((_value >>  4) & 1) != 0; }

  8653   bool overflow() const                { return ((_value >>  3) & 1) != 0; }

  8654   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }

  8655   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }

  8656   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }

  8658   void print() const {

  8659     // rounding control

  8660     const char* rc;

  8661     switch (rounding_control()) {

  8662       case 0: rc = "round near"; break;

  8663       case 1: rc = "round down"; break;

  8664       case 2: rc = "round up  "; break;

  8665       case 3: rc = "chop      "; break;

  8666     };

  8667     // precision control

  8668     const char* pc;

  8669     switch (precision_control()) {

  8670       case 0: pc = "24 bits "; break;

  8671       case 1: pc = "reserved"; break;

  8672       case 2: pc = "53 bits "; break;

  8673       case 3: pc = "64 bits "; break;

  8674     };

  8675     // flags

  8676     char f[9];

  8677     f[0] = ' ';

  8678     f[1] = ' ';

  8679     f[2] = (precision   ()) ? 'P' : 'p';

  8680     f[3] = (underflow   ()) ? 'U' : 'u';

  8681     f[4] = (overflow    ()) ? 'O' : 'o';

  8682     f[5] = (zero_divide ()) ? 'Z' : 'z';

  8683     f[6] = (denormalized()) ? 'D' : 'd';

  8684     f[7] = (invalid     ()) ? 'I' : 'i';

  8685     f[8] = '\x0';

  8686     // output

  8687     printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);

  8688   }

  8690 };

  8692 class StatusWord {

  8693  public:

  8694   int32_t _value;

  8696   bool busy() const                    { return ((_value >> 15) & 1) != 0; }

  8697   bool C3() const                      { return ((_value >> 14) & 1) != 0; }

  8698   bool C2() const                      { return ((_value >> 10) & 1) != 0; }

  8699   bool C1() const                      { return ((_value >>  9) & 1) != 0; }

  8700   bool C0() const                      { return ((_value >>  8) & 1) != 0; }

  8701   int  top() const                     { return  (_value >> 11) & 7      ; }

  8702   bool error_status() const            { return ((_value >>  7) & 1) != 0; }

  8703   bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }

  8704   bool precision() const               { return ((_value >>  5) & 1) != 0; }

  8705   bool underflow() const               { return ((_value >>  4) & 1) != 0; }

  8706   bool overflow() const                { return ((_value >>  3) & 1) != 0; }

  8707   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }

  8708   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }

  8709   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }

  8711   void print() const {

  8712     // condition codes

  8713     char c[5];

  8714     c[0] = (C3()) ? '3' : '-';

  8715     c[1] = (C2()) ? '2' : '-';

  8716     c[2] = (C1()) ? '1' : '-';

  8717     c[3] = (C0()) ? '0' : '-';

  8718     c[4] = '\x0';

  8719     // flags

  8720     char f[9];

  8721     f[0] = (error_status()) ? 'E' : '-';

  8722     f[1] = (stack_fault ()) ? 'S' : '-';

  8723     f[2] = (precision   ()) ? 'P' : '-';

  8724     f[3] = (underflow   ()) ? 'U' : '-';

  8725     f[4] = (overflow    ()) ? 'O' : '-';

  8726     f[5] = (zero_divide ()) ? 'Z' : '-';

  8727     f[6] = (denormalized()) ? 'D' : '-';

  8728     f[7] = (invalid     ()) ? 'I' : '-';

  8729     f[8] = '\x0';

  8730     // output

  8731     printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());

  8732   }

  8734 };

  8736 class TagWord {

  8737  public:

  8738   int32_t _value;

  8740   int tag_at(int i) const              { return (_value >> (i*2)) & 3; }

  8742   void print() const {

  8743     printf("%04x", _value & 0xFFFF);

  8744   }

  8746 };

  8748 class FPU_Register {

  8749  public:

  8750   int32_t _m0;

  8751   int32_t _m1;

  8752   int16_t _ex;

  8754   bool is_indefinite() const           {

  8755     return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;

  8756   }

  8758   void print() const {

  8759     char  sign = (_ex < 0) ? '-' : '+';

  8760     const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";

  8761     printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);

  8762   };

  8764 };

  8766 class FPU_State {

  8767  public:

  8768   enum {

  8769     register_size       = 10,

  8770     number_of_registers =  8,

  8771     register_mask       =  7

  8772   };

  8774   ControlWord  _control_word;

  8775   StatusWord   _status_word;

  8776   TagWord      _tag_word;

  8777   int32_t      _error_offset;

  8778   int32_t      _error_selector;

  8779   int32_t      _data_offset;

  8780   int32_t      _data_selector;

  8781   int8_t       _register[register_size * number_of_registers];

  8783   int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }

  8784   FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }

  8786   const char* tag_as_string(int tag) const {

  8787     switch (tag) {

  8788       case 0: return "valid";

  8789       case 1: return "zero";

  8790       case 2: return "special";

  8791       case 3: return "empty";

  8792     }

  8793     ShouldNotReachHere();

  8794     return NULL;

  8795   }

  8797   void print() const {

  8798     // print computation registers

  8799     { int t = _status_word.top();

  8800       for (int i = 0; i < number_of_registers; i++) {

  8801         int j = (i - t) & register_mask;

  8802         printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);

  8803         st(j)->print();

  8804         printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));

  8805       }

  8806     }

  8807     printf("\n");

  8808     // print control registers

  8809     printf("ctrl = "); _control_word.print(); printf("\n");

  8810     printf("stat = "); _status_word .print(); printf("\n");

  8811     printf("tags = "); _tag_word    .print(); printf("\n");

  8812   }

  8814 };

  8816 class Flag_Register {

  8817  public:

  8818   int32_t _value;

  8820   bool overflow() const                { return ((_value >> 11) & 1) != 0; }

  8821   bool direction() const               { return ((_value >> 10) & 1) != 0; }

  8822   bool sign() const                    { return ((_value >>  7) & 1) != 0; }

  8823   bool zero() const                    { return ((_value >>  6) & 1) != 0; }

  8824   bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }

  8825   bool parity() const                  { return ((_value >>  2) & 1) != 0; }

  8826   bool carry() const                   { return ((_value >>  0) & 1) != 0; }

  8828   void print() const {

  8829     // flags

  8830     char f[8];

  8831     f[0] = (overflow       ()) ? 'O' : '-';

  8832     f[1] = (direction      ()) ? 'D' : '-';

  8833     f[2] = (sign           ()) ? 'S' : '-';

  8834     f[3] = (zero           ()) ? 'Z' : '-';

  8835     f[4] = (auxiliary_carry()) ? 'A' : '-';

  8836     f[5] = (parity         ()) ? 'P' : '-';

  8837     f[6] = (carry          ()) ? 'C' : '-';

  8838     f[7] = '\x0';

  8839     // output

  8840     printf("%08x  flags = %s", _value, f);

  8841   }

  8843 };

  8845 class IU_Register {

  8846  public:

  8847   int32_t _value;

  8849   void print() const {

  8850     printf("%08x  %11d", _value, _value);

  8851   }

  8853 };

  8855 class IU_State {

  8856  public:

  8857   Flag_Register _eflags;

  8858   IU_Register   _rdi;

  8859   IU_Register   _rsi;

  8860   IU_Register   _rbp;

  8861   IU_Register   _rsp;

  8862   IU_Register   _rbx;

  8863   IU_Register   _rdx;

  8864   IU_Register   _rcx;

  8865   IU_Register   _rax;

  8867   void print() const {

  8868     // computation registers

  8869     printf("rax,  = "); _rax.print(); printf("\n");

  8870     printf("rbx,  = "); _rbx.print(); printf("\n");

  8871     printf("rcx  = "); _rcx.print(); printf("\n");

  8872     printf("rdx  = "); _rdx.print(); printf("\n");

  8873     printf("rdi  = "); _rdi.print(); printf("\n");

  8874     printf("rsi  = "); _rsi.print(); printf("\n");

  8875     printf("rbp,  = "); _rbp.print(); printf("\n");

  8876     printf("rsp  = "); _rsp.print(); printf("\n");

  8877     printf("\n");

  8878     // control registers

  8879     printf("flgs = "); _eflags.print(); printf("\n");

  8880   }

  8881 };

  8884 class CPU_State {

  8885  public:

  8886   FPU_State _fpu_state;

  8887   IU_State  _iu_state;

  8889   void print() const {

  8890     printf("--------------------------------------------------\n");

  8891     _iu_state .print();

  8892     printf("\n");

  8893     _fpu_state.print();

  8894     printf("--------------------------------------------------\n");

  8895   }

  8897 };

  8900 static void _print_CPU_state(CPU_State* state) {

  8901   state->print();

  8902 };

  8905 void MacroAssembler::print_CPU_state() {

  8906   push_CPU_state();

  8907   push(rsp);                // pass CPU state

  8908   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));

  8909   addptr(rsp, wordSize);       // discard argument

  8910   pop_CPU_state();

  8911 }

  8914 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {

  8915   static int counter = 0;

  8916   FPU_State* fs = &state->_fpu_state;

  8917   counter++;

  8918   // For leaf calls, only verify that the top few elements remain empty.

  8919   // We only need 1 empty at the top for C2 code.

  8920   if( stack_depth < 0 ) {

  8921     if( fs->tag_for_st(7) != 3 ) {

  8922       printf("FPR7 not empty\n");

  8923       state->print();

  8924       assert(false, "error");

  8925       return false;

  8926     }

  8927     return true;                // All other stack states do not matter

  8928   }

  8930   assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,

  8931          "bad FPU control word");

  8933   // compute stack depth

  8934   int i = 0;

  8935   while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;

  8936   int d = i;

  8937   while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;

  8938   // verify findings

  8939   if (i != FPU_State::number_of_registers) {

  8940     // stack not contiguous

  8941     printf("%s: stack not contiguous at ST%d\n", s, i);

  8942     state->print();

  8943     assert(false, "error");

  8944     return false;

  8945   }

  8946   // check if computed stack depth corresponds to expected stack depth

  8947   if (stack_depth < 0) {

  8948     // expected stack depth is -stack_depth or less

  8949     if (d > -stack_depth) {

  8950       // too many elements on the stack

  8951       printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);

  8952       state->print();

  8953       assert(false, "error");

  8954       return false;

  8955     }

  8956   } else {

  8957     // expected stack depth is stack_depth

  8958     if (d != stack_depth) {

  8959       // wrong stack depth

  8960       printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);

  8961       state->print();

  8962       assert(false, "error");

  8963       return false;

  8964     }

  8965   }

  8966   // everything is cool

  8967   return true;

  8968 }

  8971 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {

  8972   if (!VerifyFPU) return;

  8973   push_CPU_state();

  8974   push(rsp);                // pass CPU state

  8975   ExternalAddress msg((address) s);

  8976   // pass message string s

  8977   pushptr(msg.addr());

  8978   push(stack_depth);        // pass stack depth

  8979   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));

  8980   addptr(rsp, 3 * wordSize);   // discard arguments

  8981   // check for error

  8982   { Label L;

  8983     testl(rax, rax);

  8984     jcc(Assembler::notZero, L);

  8985     int3();                  // break if error condition

  8986     bind(L);

  8987   }

  8988   pop_CPU_state();

  8989 }

  8991 void MacroAssembler::load_klass(Register dst, Register src) {

  8992 #ifdef _LP64

  8993   if (UseCompressedOops) {

  8994     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));

  8995     decode_heap_oop_not_null(dst);

  8996   } else

  8997 #endif

  8998     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));

  8999 }

  9001 void MacroAssembler::load_prototype_header(Register dst, Register src) {

  9002 #ifdef _LP64

  9003   if (UseCompressedOops) {

  9004     assert (Universe::heap() != NULL, "java heap should be initialized");

  9005     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));

  9006     if (Universe::narrow_oop_shift() != 0) {

  9007       assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");

  9008       if (LogMinObjAlignmentInBytes == Address::times_8) {

  9009         movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));

  9010       } else {

  9011         // OK to use shift since we don't need to preserve flags.

  9012         shlq(dst, LogMinObjAlignmentInBytes);

  9013         movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset()));

  9014       }

  9015     } else {

  9016       movq(dst, Address(dst, Klass::prototype_header_offset()));

  9017     }

  9018   } else

  9019 #endif

  9020   {

  9021     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));

  9022     movptr(dst, Address(dst, Klass::prototype_header_offset()));

  9023   }

  9024 }

  9026 void MacroAssembler::store_klass(Register dst, Register src) {

  9027 #ifdef _LP64

  9028   if (UseCompressedOops) {

  9029     encode_heap_oop_not_null(src);

  9030     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);

  9031   } else

  9032 #endif

  9033     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);

  9034 }

  9036 void MacroAssembler::load_heap_oop(Register dst, Address src) {

  9037 #ifdef _LP64

  9038   if (UseCompressedOops) {

  9039     movl(dst, src);

  9040     decode_heap_oop(dst);

  9041   } else

  9042 #endif

  9043     movptr(dst, src);

  9044 }

  9046 // Doesn't do verfication, generates fixed size code

  9047 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {

  9048 #ifdef _LP64

  9049   if (UseCompressedOops) {

  9050     movl(dst, src);

  9051     decode_heap_oop_not_null(dst);

  9052   } else

  9053 #endif

  9054     movptr(dst, src);

  9055 }

  9057 void MacroAssembler::store_heap_oop(Address dst, Register src) {

  9058 #ifdef _LP64

  9059   if (UseCompressedOops) {

  9060     assert(!dst.uses(src), "not enough registers");

  9061     encode_heap_oop(src);

  9062     movl(dst, src);

  9063   } else

  9064 #endif

  9065     movptr(dst, src);

  9066 }

  9068 // Used for storing NULLs.

  9069 void MacroAssembler::store_heap_oop_null(Address dst) {

  9070 #ifdef _LP64

  9071   if (UseCompressedOops) {

  9072     movl(dst, (int32_t)NULL_WORD);

  9073   } else {

  9074     movslq(dst, (int32_t)NULL_WORD);

  9075   }

  9076 #else

  9077   movl(dst, (int32_t)NULL_WORD);

  9078 #endif

  9079 }

  9081 #ifdef _LP64

  9082 void MacroAssembler::store_klass_gap(Register dst, Register src) {

  9083   if (UseCompressedOops) {

  9084     // Store to klass gap in destination

  9085     movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);

  9086   }

  9087 }

  9089 #ifdef ASSERT

  9090 void MacroAssembler::verify_heapbase(const char* msg) {

  9091   assert (UseCompressedOops, "should be compressed");

  9092   assert (Universe::heap() != NULL, "java heap should be initialized");

  9093   if (CheckCompressedOops) {

  9094     Label ok;

  9095     push(rscratch1); // cmpptr trashes rscratch1

  9096     cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));

  9097     jcc(Assembler::equal, ok);

  9098     stop(msg);

  9099     bind(ok);

  9100     pop(rscratch1);

  9101   }

  9102 }

  9103 #endif

  9105 // Algorithm must match oop.inline.hpp encode_heap_oop.

  9106 void MacroAssembler::encode_heap_oop(Register r) {

  9107 #ifdef ASSERT

  9108   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");

  9109 #endif

  9110   verify_oop(r, "broken oop in encode_heap_oop");

  9111   if (Universe::narrow_oop_base() == NULL) {

  9112     if (Universe::narrow_oop_shift() != 0) {

  9113       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");

  9114       shrq(r, LogMinObjAlignmentInBytes);

  9115     }

  9116     return;

  9117   }

  9118   testq(r, r);

  9119   cmovq(Assembler::equal, r, r12_heapbase);

  9120   subq(r, r12_heapbase);

  9121   shrq(r, LogMinObjAlignmentInBytes);

  9122 }

  9124 void MacroAssembler::encode_heap_oop_not_null(Register r) {

  9125 #ifdef ASSERT

  9126   verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");

  9127   if (CheckCompressedOops) {

  9128     Label ok;

  9129     testq(r, r);

  9130     jcc(Assembler::notEqual, ok);

  9131     stop("null oop passed to encode_heap_oop_not_null");

  9132     bind(ok);

  9133   }

  9134 #endif

  9135   verify_oop(r, "broken oop in encode_heap_oop_not_null");

  9136   if (Universe::narrow_oop_base() != NULL) {

  9137     subq(r, r12_heapbase);

  9138   }

  9139   if (Universe::narrow_oop_shift() != 0) {

  9140     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");

  9141     shrq(r, LogMinObjAlignmentInBytes);

  9142   }

  9143 }

  9145 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {

  9146 #ifdef ASSERT

  9147   verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");

  9148   if (CheckCompressedOops) {

  9149     Label ok;

  9150     testq(src, src);

  9151     jcc(Assembler::notEqual, ok);

  9152     stop("null oop passed to encode_heap_oop_not_null2");

  9153     bind(ok);

  9154   }

  9155 #endif

  9156   verify_oop(src, "broken oop in encode_heap_oop_not_null2");

  9157   if (dst != src) {

  9158     movq(dst, src);

  9159   }

  9160   if (Universe::narrow_oop_base() != NULL) {

  9161     subq(dst, r12_heapbase);

  9162   }

  9163   if (Universe::narrow_oop_shift() != 0) {

  9164     assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");

  9165     shrq(dst, LogMinObjAlignmentInBytes);

  9166   }

  9167 }

  9169 void  MacroAssembler::decode_heap_oop(Register r) {

  9170 #ifdef ASSERT

  9171   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");

  9172 #endif

  9173   if (Universe::narrow_oop_base() == NULL) {

  9174     if (Universe::narrow_oop_shift() != 0) {

  9175       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");

  9176       shlq(r, LogMinObjAlignmentInBytes);

  9177     }

  9178   } else {

  9179     Label done;

  9180     shlq(r, LogMinObjAlignmentInBytes);

  9181     jccb(Assembler::equal, done);

  9182     addq(r, r12_heapbase);

  9183     bind(done);

  9184   }

  9185   verify_oop(r, "broken oop in decode_heap_oop");

  9186 }

  9188 void  MacroAssembler::decode_heap_oop_not_null(Register r) {

  9189   // Note: it will change flags

  9190   assert (UseCompressedOops, "should only be used for compressed headers");

  9191   assert (Universe::heap() != NULL, "java heap should be initialized");

  9192   // Cannot assert, unverified entry point counts instructions (see .ad file)

  9193   // vtableStubs also counts instructions in pd_code_size_limit.

  9194   // Also do not verify_oop as this is called by verify_oop.

  9195   if (Universe::narrow_oop_shift() != 0) {

  9196     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");

  9197     shlq(r, LogMinObjAlignmentInBytes);

  9198     if (Universe::narrow_oop_base() != NULL) {

  9199       addq(r, r12_heapbase);

  9200     }

  9201   } else {

  9202     assert (Universe::narrow_oop_base() == NULL, "sanity");

  9203   }

  9204 }

  9206 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {

  9207   // Note: it will change flags

  9208   assert (UseCompressedOops, "should only be used for compressed headers");

  9209   assert (Universe::heap() != NULL, "java heap should be initialized");

  9210   // Cannot assert, unverified entry point counts instructions (see .ad file)

  9211   // vtableStubs also counts instructions in pd_code_size_limit.

  9212   // Also do not verify_oop as this is called by verify_oop.

  9213   if (Universe::narrow_oop_shift() != 0) {

  9214     assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");

  9215     if (LogMinObjAlignmentInBytes == Address::times_8) {

  9216       leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));

  9217     } else {

  9218       if (dst != src) {

  9219         movq(dst, src);

  9220       }

  9221       shlq(dst, LogMinObjAlignmentInBytes);

  9222       if (Universe::narrow_oop_base() != NULL) {

  9223         addq(dst, r12_heapbase);

  9224       }

  9225     }

  9226   } else {

  9227     assert (Universe::narrow_oop_base() == NULL, "sanity");

  9228     if (dst != src) {

  9229       movq(dst, src);

  9230     }

  9231   }

  9232 }

  9234 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {

  9235   assert (UseCompressedOops, "should only be used for compressed headers");

  9236   assert (Universe::heap() != NULL, "java heap should be initialized");

  9237   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");

  9238   int oop_index = oop_recorder()->find_index(obj);

  9239   RelocationHolder rspec = oop_Relocation::spec(oop_index);

  9240   mov_narrow_oop(dst, oop_index, rspec);

  9241 }

  9243 void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {

  9244   assert (UseCompressedOops, "should only be used for compressed headers");

  9245   assert (Universe::heap() != NULL, "java heap should be initialized");

  9246   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");

  9247   int oop_index = oop_recorder()->find_index(obj);

  9248   RelocationHolder rspec = oop_Relocation::spec(oop_index);

  9249   mov_narrow_oop(dst, oop_index, rspec);

  9250 }

  9252 void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {

  9253   assert (UseCompressedOops, "should only be used for compressed headers");

  9254   assert (Universe::heap() != NULL, "java heap should be initialized");

  9255   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");

  9256   int oop_index = oop_recorder()->find_index(obj);

  9257   RelocationHolder rspec = oop_Relocation::spec(oop_index);

  9258   Assembler::cmp_narrow_oop(dst, oop_index, rspec);

  9259 }

  9261 void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {

  9262   assert (UseCompressedOops, "should only be used for compressed headers");

  9263   assert (Universe::heap() != NULL, "java heap should be initialized");

  9264   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");

  9265   int oop_index = oop_recorder()->find_index(obj);

  9266   RelocationHolder rspec = oop_Relocation::spec(oop_index);

  9267   Assembler::cmp_narrow_oop(dst, oop_index, rspec);

  9268 }

  9270 void MacroAssembler::reinit_heapbase() {

  9271   if (UseCompressedOops) {

  9272     movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));

  9273   }

  9274 }

  9275 #endif // _LP64

  9277 // IndexOf for constant substrings with size >= 8 chars

  9278 // which don't need to be loaded through stack.

  9279 void MacroAssembler::string_indexofC8(Register str1, Register str2,

  9280                                       Register cnt1, Register cnt2,

  9281                                       int int_cnt2,  Register result,

  9282                                       XMMRegister vec, Register tmp) {

  9283   assert(UseSSE42Intrinsics, "SSE4.2 is required");

  9285   // This method uses pcmpestri inxtruction with bound registers

  9286   //   inputs:

  9287   //     xmm - substring

  9288   //     rax - substring length (elements count)

  9289   //     mem - scanned string

  9290   //     rdx - string length (elements count)

  9291   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)

  9292   //   outputs:

  9293   //     rcx - matched index in string

  9294   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");

  9296   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,

  9297         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,

  9298         MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;

  9300   // Note, inline_string_indexOf() generates checks:

  9301   // if (substr.count > string.count) return -1;

  9302   // if (substr.count == 0) return 0;

  9303   assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");

  9305   // Load substring.

  9306   movdqu(vec, Address(str2, 0));

  9307   movl(cnt2, int_cnt2);

  9308   movptr(result, str1); // string addr

  9310   if (int_cnt2 > 8) {

  9311     jmpb(SCAN_TO_SUBSTR);

  9313     // Reload substr for rescan, this code

  9314     // is executed only for large substrings (> 8 chars)

  9315     bind(RELOAD_SUBSTR);

  9316     movdqu(vec, Address(str2, 0));

  9317     negptr(cnt2); // Jumped here with negative cnt2, convert to positive

  9319     bind(RELOAD_STR);

  9320     // We came here after the beginning of the substring was

  9321     // matched but the rest of it was not so we need to search

  9322     // again. Start from the next element after the previous match.

  9324     // cnt2 is number of substring reminding elements and

  9325     // cnt1 is number of string reminding elements when cmp failed.

  9326     // Restored cnt1 = cnt1 - cnt2 + int_cnt2

  9327     subl(cnt1, cnt2);

  9328     addl(cnt1, int_cnt2);

  9329     movl(cnt2, int_cnt2); // Now restore cnt2

  9331     decrementl(cnt1);     // Shift to next element

  9332     cmpl(cnt1, cnt2);

  9333     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring

  9335     addptr(result, 2);

  9337   } // (int_cnt2 > 8)

  9339   // Scan string for start of substr in 16-byte vectors

  9340   bind(SCAN_TO_SUBSTR);

  9341   pcmpestri(vec, Address(result, 0), 0x0d);

  9342   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1

  9343   subl(cnt1, 8);

  9344   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string

  9345   cmpl(cnt1, cnt2);

  9346   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring

  9347   addptr(result, 16);

  9348   jmpb(SCAN_TO_SUBSTR);

  9350   // Found a potential substr

  9351   bind(FOUND_CANDIDATE);

  9352   // Matched whole vector if first element matched (tmp(rcx) == 0).

  9353   if (int_cnt2 == 8) {

  9354     jccb(Assembler::overflow, RET_FOUND);    // OF == 1

  9355   } else { // int_cnt2 > 8

  9356     jccb(Assembler::overflow, FOUND_SUBSTR);

  9357   }

  9358   // After pcmpestri tmp(rcx) contains matched element index

  9359   // Compute start addr of substr

  9360   lea(result, Address(result, tmp, Address::times_2));

  9362   // Make sure string is still long enough

  9363   subl(cnt1, tmp);

  9364   cmpl(cnt1, cnt2);

  9365   if (int_cnt2 == 8) {

  9366     jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);

  9367   } else { // int_cnt2 > 8

  9368     jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);

  9369   }

  9370   // Left less then substring.

  9372   bind(RET_NOT_FOUND);

  9373   movl(result, -1);

  9374   jmpb(EXIT);

  9376   if (int_cnt2 > 8) {

  9377     // This code is optimized for the case when whole substring

  9378     // is matched if its head is matched.

  9379     bind(MATCH_SUBSTR_HEAD);

  9380     pcmpestri(vec, Address(result, 0), 0x0d);

  9381     // Reload only string if does not match

  9382     jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0

  9384     Label CONT_SCAN_SUBSTR;

  9385     // Compare the rest of substring (> 8 chars).

  9386     bind(FOUND_SUBSTR);

  9387     // First 8 chars are already matched.

  9388     negptr(cnt2);

  9389     addptr(cnt2, 8);

  9391     bind(SCAN_SUBSTR);

  9392     subl(cnt1, 8);

  9393     cmpl(cnt2, -8); // Do not read beyond substring

  9394     jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);

  9395     // Back-up strings to avoid reading beyond substring:

  9396     // cnt1 = cnt1 - cnt2 + 8

  9397     addl(cnt1, cnt2); // cnt2 is negative

  9398     addl(cnt1, 8);

  9399     movl(cnt2, 8); negptr(cnt2);

  9400     bind(CONT_SCAN_SUBSTR);

  9401     if (int_cnt2 < (int)G) {

  9402       movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));

  9403       pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);

  9404     } else {

  9405       // calculate index in register to avoid integer overflow (int_cnt2*2)

  9406       movl(tmp, int_cnt2);

  9407       addptr(tmp, cnt2);

  9408       movdqu(vec, Address(str2, tmp, Address::times_2, 0));

  9409       pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);

  9410     }

  9411     // Need to reload strings pointers if not matched whole vector

  9412     jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0

  9413     addptr(cnt2, 8);

  9414     jccb(Assembler::negative, SCAN_SUBSTR);

  9415     // Fall through if found full substring

  9417   } // (int_cnt2 > 8)

  9419   bind(RET_FOUND);

  9420   // Found result if we matched full small substring.

  9421   // Compute substr offset

  9422   subptr(result, str1);

  9423   shrl(result, 1); // index

  9424   bind(EXIT);

  9426 } // string_indexofC8

  9428 // Small strings are loaded through stack if they cross page boundary.

  9429 void MacroAssembler::string_indexof(Register str1, Register str2,

  9430                                     Register cnt1, Register cnt2,

  9431                                     int int_cnt2,  Register result,

  9432                                     XMMRegister vec, Register tmp) {

  9433   assert(UseSSE42Intrinsics, "SSE4.2 is required");

  9434   //

  9435   // int_cnt2 is length of small (< 8 chars) constant substring

  9436   // or (-1) for non constant substring in which case its length

  9437   // is in cnt2 register.

  9438   //

  9439   // Note, inline_string_indexOf() generates checks:

  9440   // if (substr.count > string.count) return -1;

  9441   // if (substr.count == 0) return 0;

  9442   //

  9443   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");

  9445   // This method uses pcmpestri inxtruction with bound registers

  9446   //   inputs:

  9447   //     xmm - substring

  9448   //     rax - substring length (elements count)

  9449   //     mem - scanned string

  9450   //     rdx - string length (elements count)

  9451   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)

  9452   //   outputs:

  9453   //     rcx - matched index in string

  9454   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");

  9456   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,

  9457         RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,

  9458         FOUND_CANDIDATE;

  9460   { //========================================================

  9461     // We don't know where these strings are located

  9462     // and we can't read beyond them. Load them through stack.

  9463     Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;

  9465     movptr(tmp, rsp); // save old SP

  9467     if (int_cnt2 > 0) {     // small (< 8 chars) constant substring

  9468       if (int_cnt2 == 1) {  // One char

  9469         load_unsigned_short(result, Address(str2, 0));

  9470         movdl(vec, result); // move 32 bits

  9471       } else if (int_cnt2 == 2) { // Two chars

  9472         movdl(vec, Address(str2, 0)); // move 32 bits

  9473       } else if (int_cnt2 == 4) { // Four chars

  9474         movq(vec, Address(str2, 0));  // move 64 bits

  9475       } else { // cnt2 = { 3, 5, 6, 7 }

  9476         // Array header size is 12 bytes in 32-bit VM

  9477         // + 6 bytes for 3 chars == 18 bytes,

  9478         // enough space to load vec and shift.

  9479         assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");

  9480         movdqu(vec, Address(str2, (int_cnt2*2)-16));

  9481         psrldq(vec, 16-(int_cnt2*2));

  9482       }

  9483     } else { // not constant substring

  9484       cmpl(cnt2, 8);

  9485       jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough

  9487       // We can read beyond string if srt+16 does not cross page boundary

  9488       // since heaps are aligned and mapped by pages.

  9489       assert(os::vm_page_size() < (int)G, "default page should be small");

  9490       movl(result, str2); // We need only low 32 bits

  9491       andl(result, (os::vm_page_size()-1));

  9492       cmpl(result, (os::vm_page_size()-16));

  9493       jccb(Assembler::belowEqual, CHECK_STR);

  9495       // Move small strings to stack to allow load 16 bytes into vec.

  9496       subptr(rsp, 16);

  9497       int stk_offset = wordSize-2;

  9498       push(cnt2);

  9500       bind(COPY_SUBSTR);

  9501       load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));

  9502       movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);

  9503       decrement(cnt2);

  9504       jccb(Assembler::notZero, COPY_SUBSTR);

  9506       pop(cnt2);

  9507       movptr(str2, rsp);  // New substring address

  9508     } // non constant

  9510     bind(CHECK_STR);

  9511     cmpl(cnt1, 8);

  9512     jccb(Assembler::aboveEqual, BIG_STRINGS);

  9514     // Check cross page boundary.

  9515     movl(result, str1); // We need only low 32 bits

  9516     andl(result, (os::vm_page_size()-1));

  9517     cmpl(result, (os::vm_page_size()-16));

  9518     jccb(Assembler::belowEqual, BIG_STRINGS);

  9520     subptr(rsp, 16);

  9521     int stk_offset = -2;

  9522     if (int_cnt2 < 0) { // not constant

  9523       push(cnt2);

  9524       stk_offset += wordSize;

  9525     }

  9526     movl(cnt2, cnt1);

  9528     bind(COPY_STR);

  9529     load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));

  9530     movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);

  9531     decrement(cnt2);

  9532     jccb(Assembler::notZero, COPY_STR);

  9534     if (int_cnt2 < 0) { // not constant

  9535       pop(cnt2);

  9536     }

  9537     movptr(str1, rsp);  // New string address

  9539     bind(BIG_STRINGS);

  9540     // Load substring.

  9541     if (int_cnt2 < 0) { // -1

  9542       movdqu(vec, Address(str2, 0));

  9543       push(cnt2);       // substr count

  9544       push(str2);       // substr addr

  9545       push(str1);       // string addr

  9546     } else {

  9547       // Small (< 8 chars) constant substrings are loaded already.

  9548       movl(cnt2, int_cnt2);

  9549     }

  9550     push(tmp);  // original SP

  9552   } // Finished loading

  9554   //========================================================

  9555   // Start search

  9556   //

  9558   movptr(result, str1); // string addr

  9560   if (int_cnt2  < 0) {  // Only for non constant substring

  9561     jmpb(SCAN_TO_SUBSTR);

  9563     // SP saved at sp+0

  9564     // String saved at sp+1*wordSize

  9565     // Substr saved at sp+2*wordSize

  9566     // Substr count saved at sp+3*wordSize

  9568     // Reload substr for rescan, this code

  9569     // is executed only for large substrings (> 8 chars)

  9570     bind(RELOAD_SUBSTR);

  9571     movptr(str2, Address(rsp, 2*wordSize));

  9572     movl(cnt2, Address(rsp, 3*wordSize));

  9573     movdqu(vec, Address(str2, 0));

  9574     // We came here after the beginning of the substring was

  9575     // matched but the rest of it was not so we need to search

  9576     // again. Start from the next element after the previous match.

  9577     subptr(str1, result); // Restore counter

  9578     shrl(str1, 1);

  9579     addl(cnt1, str1);

  9580     decrementl(cnt1);   // Shift to next element

  9581     cmpl(cnt1, cnt2);

  9582     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring

  9584     addptr(result, 2);

  9585   } // non constant

  9587   // Scan string for start of substr in 16-byte vectors

  9588   bind(SCAN_TO_SUBSTR);

  9589   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");

  9590   pcmpestri(vec, Address(result, 0), 0x0d);

  9591   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1

  9592   subl(cnt1, 8);

  9593   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string

  9594   cmpl(cnt1, cnt2);

  9595   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring

  9596   addptr(result, 16);

  9598   bind(ADJUST_STR);

  9599   cmpl(cnt1, 8); // Do not read beyond string

  9600   jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);

  9601   // Back-up string to avoid reading beyond string.

  9602   lea(result, Address(result, cnt1, Address::times_2, -16));

  9603   movl(cnt1, 8);

  9604   jmpb(SCAN_TO_SUBSTR);

  9606   // Found a potential substr

  9607   bind(FOUND_CANDIDATE);

  9608   // After pcmpestri tmp(rcx) contains matched element index

  9610   // Make sure string is still long enough

  9611   subl(cnt1, tmp);

  9612   cmpl(cnt1, cnt2);

  9613   jccb(Assembler::greaterEqual, FOUND_SUBSTR);

  9614   // Left less then substring.

  9616   bind(RET_NOT_FOUND);

  9617   movl(result, -1);

  9618   jmpb(CLEANUP);

  9620   bind(FOUND_SUBSTR);

  9621   // Compute start addr of substr

  9622   lea(result, Address(result, tmp, Address::times_2));

  9624   if (int_cnt2 > 0) { // Constant substring

  9625     // Repeat search for small substring (< 8 chars)

  9626     // from new point without reloading substring.

  9627     // Have to check that we don't read beyond string.

  9628     cmpl(tmp, 8-int_cnt2);

  9629     jccb(Assembler::greater, ADJUST_STR);

  9630     // Fall through if matched whole substring.

  9631   } else { // non constant

  9632     assert(int_cnt2 == -1, "should be != 0");

  9634     addl(tmp, cnt2);

  9635     // Found result if we matched whole substring.

  9636     cmpl(tmp, 8);

  9637     jccb(Assembler::lessEqual, RET_FOUND);

  9639     // Repeat search for small substring (<= 8 chars)

  9640     // from new point 'str1' without reloading substring.

  9641     cmpl(cnt2, 8);

  9642     // Have to check that we don't read beyond string.

  9643     jccb(Assembler::lessEqual, ADJUST_STR);

  9645     Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;

  9646     // Compare the rest of substring (> 8 chars).

  9647     movptr(str1, result);

  9649     cmpl(tmp, cnt2);

  9650     // First 8 chars are already matched.

  9651     jccb(Assembler::equal, CHECK_NEXT);

  9653     bind(SCAN_SUBSTR);

  9654     pcmpestri(vec, Address(str1, 0), 0x0d);

  9655     // Need to reload strings pointers if not matched whole vector

  9656     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0

  9658     bind(CHECK_NEXT);

  9659     subl(cnt2, 8);

  9660     jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring

  9661     addptr(str1, 16);

  9662     addptr(str2, 16);

  9663     subl(cnt1, 8);

  9664     cmpl(cnt2, 8); // Do not read beyond substring

  9665     jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);

  9666     // Back-up strings to avoid reading beyond substring.

  9667     lea(str2, Address(str2, cnt2, Address::times_2, -16));

  9668     lea(str1, Address(str1, cnt2, Address::times_2, -16));

  9669     subl(cnt1, cnt2);

  9670     movl(cnt2, 8);

  9671     addl(cnt1, 8);

  9672     bind(CONT_SCAN_SUBSTR);

  9673     movdqu(vec, Address(str2, 0));

  9674     jmpb(SCAN_SUBSTR);

  9676     bind(RET_FOUND_LONG);

  9677     movptr(str1, Address(rsp, wordSize));

  9678   } // non constant

  9680   bind(RET_FOUND);

  9681   // Compute substr offset

  9682   subptr(result, str1);

  9683   shrl(result, 1); // index

  9685   bind(CLEANUP);

  9686   pop(rsp); // restore SP

  9688 } // string_indexof

  9690 // Compare strings.

  9691 void MacroAssembler::string_compare(Register str1, Register str2,

  9692                                     Register cnt1, Register cnt2, Register result,

  9693                                     XMMRegister vec1) {

  9694   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;

  9696   // Compute the minimum of the string lengths and the

  9697   // difference of the string lengths (stack).

  9698   // Do the conditional move stuff

  9699   movl(result, cnt1);

  9700   subl(cnt1, cnt2);

  9701   push(cnt1);

  9702   cmov32(Assembler::lessEqual, cnt2, result);

  9704   // Is the minimum length zero?

  9705   testl(cnt2, cnt2);

  9706   jcc(Assembler::zero, LENGTH_DIFF_LABEL);

  9708   // Load first characters

  9709   load_unsigned_short(result, Address(str1, 0));

  9710   load_unsigned_short(cnt1, Address(str2, 0));

  9712   // Compare first characters

  9713   subl(result, cnt1);

  9714   jcc(Assembler::notZero,  POP_LABEL);

  9715   decrementl(cnt2);

  9716   jcc(Assembler::zero, LENGTH_DIFF_LABEL);

  9718   {

  9719     // Check after comparing first character to see if strings are equivalent

  9720     Label LSkip2;

  9721     // Check if the strings start at same location

  9722     cmpptr(str1, str2);

  9723     jccb(Assembler::notEqual, LSkip2);

  9725     // Check if the length difference is zero (from stack)

  9726     cmpl(Address(rsp, 0), 0x0);

  9727     jcc(Assembler::equal,  LENGTH_DIFF_LABEL);

  9729     // Strings might not be equivalent

  9730     bind(LSkip2);

  9731   }

  9733   Address::ScaleFactor scale = Address::times_2;

  9734   int stride = 8;

  9736   // Advance to next element

  9737   addptr(str1, 16/stride);

  9738   addptr(str2, 16/stride);

  9740   if (UseSSE42Intrinsics) {

  9741     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;

  9742     int pcmpmask = 0x19;

  9743     // Setup to compare 16-byte vectors

  9744     movl(result, cnt2);

  9745     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count

  9746     jccb(Assembler::zero, COMPARE_TAIL);

  9748     lea(str1, Address(str1, result, scale));

  9749     lea(str2, Address(str2, result, scale));

  9750     negptr(result);

  9752     // pcmpestri

  9753     //   inputs:

  9754     //     vec1- substring

  9755     //     rax - negative string length (elements count)

  9756     //     mem - scaned string

  9757     //     rdx - string length (elements count)

  9758     //     pcmpmask - cmp mode: 11000 (string compare with negated result)

  9759     //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)

  9760     //   outputs:

  9761     //     rcx - first mismatched element index

  9762     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");

  9764     bind(COMPARE_WIDE_VECTORS);

  9765     movdqu(vec1, Address(str1, result, scale));

  9766     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);

  9767     // After pcmpestri cnt1(rcx) contains mismatched element index

  9769     jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1

  9770     addptr(result, stride);

  9771     subptr(cnt2, stride);

  9772     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);

  9774     // compare wide vectors tail

  9775     testl(result, result);

  9776     jccb(Assembler::zero, LENGTH_DIFF_LABEL);

  9778     movl(cnt2, stride);

  9779     movl(result, stride);

  9780     negptr(result);

  9781     movdqu(vec1, Address(str1, result, scale));

  9782     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);

  9783     jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);

  9785     // Mismatched characters in the vectors

  9786     bind(VECTOR_NOT_EQUAL);

  9787     addptr(result, cnt1);

  9788     movptr(cnt2, result);

  9789     load_unsigned_short(result, Address(str1, cnt2, scale));

  9790     load_unsigned_short(cnt1, Address(str2, cnt2, scale));

  9791     subl(result, cnt1);

  9792     jmpb(POP_LABEL);

  9794     bind(COMPARE_TAIL); // limit is zero

  9795     movl(cnt2, result);

  9796     // Fallthru to tail compare

  9797   }

  9799   // Shift str2 and str1 to the end of the arrays, negate min

  9800   lea(str1, Address(str1, cnt2, scale, 0));

  9801   lea(str2, Address(str2, cnt2, scale, 0));

  9802   negptr(cnt2);

  9804   // Compare the rest of the elements

  9805   bind(WHILE_HEAD_LABEL);

  9806   load_unsigned_short(result, Address(str1, cnt2, scale, 0));

  9807   load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));

  9808   subl(result, cnt1);

  9809   jccb(Assembler::notZero, POP_LABEL);

  9810   increment(cnt2);

  9811   jccb(Assembler::notZero, WHILE_HEAD_LABEL);

  9813   // Strings are equal up to min length.  Return the length difference.

  9814   bind(LENGTH_DIFF_LABEL);

  9815   pop(result);

  9816   jmpb(DONE_LABEL);

  9818   // Discard the stored length difference

  9819   bind(POP_LABEL);

  9820   pop(cnt1);

  9822   // That's it

  9823   bind(DONE_LABEL);

  9824 }

  9826 // Compare char[] arrays aligned to 4 bytes or substrings.

  9827 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,

  9828                                         Register limit, Register result, Register chr,

  9829                                         XMMRegister vec1, XMMRegister vec2) {

  9830   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;

  9832   int length_offset  = arrayOopDesc::length_offset_in_bytes();

  9833   int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);

  9835   // Check the input args

  9836   cmpptr(ary1, ary2);

  9837   jcc(Assembler::equal, TRUE_LABEL);

  9839   if (is_array_equ) {

  9840     // Need additional checks for arrays_equals.

  9841     testptr(ary1, ary1);

  9842     jcc(Assembler::zero, FALSE_LABEL);

  9843     testptr(ary2, ary2);

  9844     jcc(Assembler::zero, FALSE_LABEL);

  9846     // Check the lengths

  9847     movl(limit, Address(ary1, length_offset));

  9848     cmpl(limit, Address(ary2, length_offset));

  9849     jcc(Assembler::notEqual, FALSE_LABEL);

  9850   }

  9852   // count == 0

  9853   testl(limit, limit);

  9854   jcc(Assembler::zero, TRUE_LABEL);

  9856   if (is_array_equ) {

  9857     // Load array address

  9858     lea(ary1, Address(ary1, base_offset));

  9859     lea(ary2, Address(ary2, base_offset));

  9860   }

  9862   shll(limit, 1);      // byte count != 0

  9863   movl(result, limit); // copy

  9865   if (UseSSE42Intrinsics) {

  9866     // With SSE4.2, use double quad vector compare

  9867     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;

  9869     // Compare 16-byte vectors

  9870     andl(result, 0x0000000e);  //   tail count (in bytes)

  9871     andl(limit, 0xfffffff0);   // vector count (in bytes)

  9872     jccb(Assembler::zero, COMPARE_TAIL);

  9874     lea(ary1, Address(ary1, limit, Address::times_1));

  9875     lea(ary2, Address(ary2, limit, Address::times_1));

  9876     negptr(limit);

  9878     bind(COMPARE_WIDE_VECTORS);

  9879     movdqu(vec1, Address(ary1, limit, Address::times_1));

  9880     movdqu(vec2, Address(ary2, limit, Address::times_1));

  9881     pxor(vec1, vec2);

  9883     ptest(vec1, vec1);

  9884     jccb(Assembler::notZero, FALSE_LABEL);

  9885     addptr(limit, 16);

  9886     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);

  9888     testl(result, result);

  9889     jccb(Assembler::zero, TRUE_LABEL);

  9891     movdqu(vec1, Address(ary1, result, Address::times_1, -16));

  9892     movdqu(vec2, Address(ary2, result, Address::times_1, -16));

  9893     pxor(vec1, vec2);

  9895     ptest(vec1, vec1);

  9896     jccb(Assembler::notZero, FALSE_LABEL);

  9897     jmpb(TRUE_LABEL);

  9899     bind(COMPARE_TAIL); // limit is zero

  9900     movl(limit, result);

  9901     // Fallthru to tail compare

  9902   }

  9904   // Compare 4-byte vectors

  9905   andl(limit, 0xfffffffc); // vector count (in bytes)

  9906   jccb(Assembler::zero, COMPARE_CHAR);

  9908   lea(ary1, Address(ary1, limit, Address::times_1));

  9909   lea(ary2, Address(ary2, limit, Address::times_1));

  9910   negptr(limit);

  9912   bind(COMPARE_VECTORS);

  9913   movl(chr, Address(ary1, limit, Address::times_1));

  9914   cmpl(chr, Address(ary2, limit, Address::times_1));

  9915   jccb(Assembler::notEqual, FALSE_LABEL);

  9916   addptr(limit, 4);

  9917   jcc(Assembler::notZero, COMPARE_VECTORS);

  9919   // Compare trailing char (final 2 bytes), if any

  9920   bind(COMPARE_CHAR);

  9921   testl(result, 0x2);   // tail  char

  9922   jccb(Assembler::zero, TRUE_LABEL);

  9923   load_unsigned_short(chr, Address(ary1, 0));

  9924   load_unsigned_short(limit, Address(ary2, 0));

  9925   cmpl(chr, limit);

  9926   jccb(Assembler::notEqual, FALSE_LABEL);

  9928   bind(TRUE_LABEL);

  9929   movl(result, 1);   // return true

  9930   jmpb(DONE);

  9932   bind(FALSE_LABEL);

  9933   xorl(result, result); // return false

  9935   // That's it

  9936   bind(DONE);

  9937 }

  9939 #ifdef PRODUCT

  9940 #define BLOCK_COMMENT(str) /* nothing */

  9941 #else

  9942 #define BLOCK_COMMENT(str) block_comment(str)

  9943 #endif

  9945 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")

  9946 void MacroAssembler::generate_fill(BasicType t, bool aligned,

  9947                                    Register to, Register value, Register count,

  9948                                    Register rtmp, XMMRegister xtmp) {

  9949   assert_different_registers(to, value, count, rtmp);

  9950   Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;

  9951   Label L_fill_2_bytes, L_fill_4_bytes;

  9953   int shift = -1;

  9954   switch (t) {

  9955     case T_BYTE:

  9956       shift = 2;

  9957       break;

  9958     case T_SHORT:

  9959       shift = 1;

  9960       break;

  9961     case T_INT:

  9962       shift = 0;

  9963       break;

  9964     default: ShouldNotReachHere();

  9965   }

  9967   if (t == T_BYTE) {

  9968     andl(value, 0xff);

  9969     movl(rtmp, value);

  9970     shll(rtmp, 8);

  9971     orl(value, rtmp);

  9972   }

  9973   if (t == T_SHORT) {

  9974     andl(value, 0xffff);

  9975   }

  9976   if (t == T_BYTE || t == T_SHORT) {

  9977     movl(rtmp, value);

  9978     shll(rtmp, 16);

  9979     orl(value, rtmp);

  9980   }

  9982   cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element

  9983   jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp

  9984   if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {

  9985     // align source address at 4 bytes address boundary

  9986     if (t == T_BYTE) {

  9987       // One byte misalignment happens only for byte arrays

  9988       testptr(to, 1);

  9989       jccb(Assembler::zero, L_skip_align1);

  9990       movb(Address(to, 0), value);

  9991       increment(to);

  9992       decrement(count);

  9993       BIND(L_skip_align1);

  9994     }

  9995     // Two bytes misalignment happens only for byte and short (char) arrays

  9996     testptr(to, 2);

  9997     jccb(Assembler::zero, L_skip_align2);

  9998     movw(Address(to, 0), value);

  9999     addptr(to, 2);

 10000     subl(count, 1<<(shift-1));

 10001     BIND(L_skip_align2);

 10002   }

 10003   if (UseSSE < 2) {

 10004     Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;

 10005     // Fill 32-byte chunks

 10006     subl(count, 8 << shift);

 10007     jcc(Assembler::less, L_check_fill_8_bytes);

 10008     align(16);

 10010     BIND(L_fill_32_bytes_loop);

 10012     for (int i = 0; i < 32; i += 4) {

 10013       movl(Address(to, i), value);

 10014     }

 10016     addptr(to, 32);

 10017     subl(count, 8 << shift);

 10018     jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);

 10019     BIND(L_check_fill_8_bytes);

 10020     addl(count, 8 << shift);

 10021     jccb(Assembler::zero, L_exit);

 10022     jmpb(L_fill_8_bytes);

 10024     //

 10025     // length is too short, just fill qwords

 10026     //

 10027     BIND(L_fill_8_bytes_loop);

 10028     movl(Address(to, 0), value);

 10029     movl(Address(to, 4), value);

 10030     addptr(to, 8);

 10031     BIND(L_fill_8_bytes);

 10032     subl(count, 1 << (shift + 1));

 10033     jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);

 10034     // fall through to fill 4 bytes

 10035   } else {

 10036     Label L_fill_32_bytes;

 10037     if (!UseUnalignedLoadStores) {

 10038       // align to 8 bytes, we know we are 4 byte aligned to start

 10039       testptr(to, 4);

 10040       jccb(Assembler::zero, L_fill_32_bytes);

 10041       movl(Address(to, 0), value);

 10042       addptr(to, 4);

 10043       subl(count, 1<<shift);

 10044     }

 10045     BIND(L_fill_32_bytes);

 10046     {

 10047       assert( UseSSE >= 2, "supported cpu only" );

 10048       Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;

 10049       // Fill 32-byte chunks

 10050       movdl(xtmp, value);

 10051       pshufd(xtmp, xtmp, 0);

 10053       subl(count, 8 << shift);

 10054       jcc(Assembler::less, L_check_fill_8_bytes);

 10055       align(16);

 10057       BIND(L_fill_32_bytes_loop);

 10059       if (UseUnalignedLoadStores) {

 10060         movdqu(Address(to, 0), xtmp);

 10061         movdqu(Address(to, 16), xtmp);

 10062       } else {

 10063         movq(Address(to, 0), xtmp);

 10064         movq(Address(to, 8), xtmp);

 10065         movq(Address(to, 16), xtmp);

 10066         movq(Address(to, 24), xtmp);

 10067       }

 10069       addptr(to, 32);

 10070       subl(count, 8 << shift);

 10071       jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);

 10072       BIND(L_check_fill_8_bytes);

 10073       addl(count, 8 << shift);

 10074       jccb(Assembler::zero, L_exit);

 10075       jmpb(L_fill_8_bytes);

 10077       //

 10078       // length is too short, just fill qwords

 10079       //

 10080       BIND(L_fill_8_bytes_loop);

 10081       movq(Address(to, 0), xtmp);

 10082       addptr(to, 8);

 10083       BIND(L_fill_8_bytes);

 10084       subl(count, 1 << (shift + 1));

 10085       jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);

 10086     }

 10087   }

 10088   // fill trailing 4 bytes

 10089   BIND(L_fill_4_bytes);

 10090   testl(count, 1<<shift);

 10091   jccb(Assembler::zero, L_fill_2_bytes);

 10092   movl(Address(to, 0), value);

 10093   if (t == T_BYTE || t == T_SHORT) {

 10094     addptr(to, 4);

 10095     BIND(L_fill_2_bytes);

 10096     // fill trailing 2 bytes

 10097     testl(count, 1<<(shift-1));

 10098     jccb(Assembler::zero, L_fill_byte);

 10099     movw(Address(to, 0), value);

 10100     if (t == T_BYTE) {

 10101       addptr(to, 2);

 10102       BIND(L_fill_byte);

 10103       // fill trailing byte

 10104       testl(count, 1);

 10105       jccb(Assembler::zero, L_exit);

 10106       movb(Address(to, 0), value);

 10107     } else {

 10108       BIND(L_fill_byte);

 10109     }

 10110   } else {

 10111     BIND(L_fill_2_bytes);

 10112   }

 10113   BIND(L_exit);

 10114 }

 10115 #undef BIND

 10116 #undef BLOCK_COMMENT

 10119 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {

 10120   switch (cond) {

 10121     // Note some conditions are synonyms for others

 10122     case Assembler::zero:         return Assembler::notZero;

 10123     case Assembler::notZero:      return Assembler::zero;

 10124     case Assembler::less:         return Assembler::greaterEqual;

 10125     case Assembler::lessEqual:    return Assembler::greater;

 10126     case Assembler::greater:      return Assembler::lessEqual;

 10127     case Assembler::greaterEqual: return Assembler::less;

 10128     case Assembler::below:        return Assembler::aboveEqual;

 10129     case Assembler::belowEqual:   return Assembler::above;

 10130     case Assembler::above:        return Assembler::belowEqual;

 10131     case Assembler::aboveEqual:   return Assembler::below;

 10132     case Assembler::overflow:     return Assembler::noOverflow;

 10133     case Assembler::noOverflow:   return Assembler::overflow;

 10134     case Assembler::negative:     return Assembler::positive;

 10135     case Assembler::positive:     return Assembler::negative;

 10136     case Assembler::parity:       return Assembler::noParity;

 10137     case Assembler::noParity:     return Assembler::parity;

 10138   }

 10139   ShouldNotReachHere(); return Assembler::overflow;

 10140 }

 10142 SkipIfEqual::SkipIfEqual(

 10143     MacroAssembler* masm, const bool* flag_addr, bool value) {

 10144   _masm = masm;

 10145   _masm->cmp8(ExternalAddress((address)flag_addr), value);

 10146   _masm->jcc(Assembler::equal, _label);

 10147 }

 10149 SkipIfEqual::~SkipIfEqual() {

 10150   _masm->bind(_label);

 10151 }

src/cpu/x86/vm/assembler_x86.cpp@069ab3f976d3

src/cpu/x86/vm/assembler_x86.cpp

Mercurial > jdk8-mips64-public > hotspot / file revision

src/cpu/x86/vm/assembler_x86.cpp@069ab3f976d3

src/cpu/x86/vm/assembler_x86.cpp