src/cpu/x86/vm/templateTable_x86_32.cpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,3689 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "precompiled.hpp"
    1.29 +#include "asm/macroAssembler.hpp"
    1.30 +#include "interpreter/interpreter.hpp"
    1.31 +#include "interpreter/interpreterRuntime.hpp"
    1.32 +#include "interpreter/templateTable.hpp"
    1.33 +#include "memory/universe.inline.hpp"
    1.34 +#include "oops/methodData.hpp"
    1.35 +#include "oops/objArrayKlass.hpp"
    1.36 +#include "oops/oop.inline.hpp"
    1.37 +#include "prims/methodHandles.hpp"
    1.38 +#include "runtime/sharedRuntime.hpp"
    1.39 +#include "runtime/stubRoutines.hpp"
    1.40 +#include "runtime/synchronizer.hpp"
    1.41 +#include "utilities/macros.hpp"
    1.42 +
    1.43 +#ifndef CC_INTERP
    1.44 +#define __ _masm->
    1.45 +
    1.46 +//----------------------------------------------------------------------------------------------------
    1.47 +// Platform-dependent initialization
    1.48 +
    1.49 +void TemplateTable::pd_initialize() {
    1.50 +  // No i486 specific initialization
    1.51 +}
    1.52 +
    1.53 +//----------------------------------------------------------------------------------------------------
    1.54 +// Address computation
    1.55 +
    1.56 +// local variables
    1.57 +static inline Address iaddress(int n)            {
    1.58 +  return Address(rdi, Interpreter::local_offset_in_bytes(n));
    1.59 +}
    1.60 +
    1.61 +static inline Address laddress(int n)            { return iaddress(n + 1); }
    1.62 +static inline Address haddress(int n)            { return iaddress(n + 0); }
    1.63 +static inline Address faddress(int n)            { return iaddress(n); }
    1.64 +static inline Address daddress(int n)            { return laddress(n); }
    1.65 +static inline Address aaddress(int n)            { return iaddress(n); }
    1.66 +
    1.67 +static inline Address iaddress(Register r)       {
    1.68 +  return Address(rdi, r, Interpreter::stackElementScale());
    1.69 +}
    1.70 +static inline Address laddress(Register r)       {
    1.71 +  return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(1));
    1.72 +}
    1.73 +static inline Address haddress(Register r)       {
    1.74 +  return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0));
    1.75 +}
    1.76 +
    1.77 +static inline Address faddress(Register r)       { return iaddress(r); }
    1.78 +static inline Address daddress(Register r)       { return laddress(r); }
    1.79 +static inline Address aaddress(Register r)       { return iaddress(r); }
    1.80 +
    1.81 +// expression stack
    1.82 +// (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
    1.83 +// data beyond the rsp which is potentially unsafe in an MT environment;
    1.84 +// an interrupt may overwrite that data.)
    1.85 +static inline Address at_rsp   () {
    1.86 +  return Address(rsp, 0);
    1.87 +}
    1.88 +
    1.89 +// At top of Java expression stack which may be different than rsp().  It
    1.90 +// isn't for category 1 objects.
    1.91 +static inline Address at_tos   () {
    1.92 +  Address tos = Address(rsp,  Interpreter::expr_offset_in_bytes(0));
    1.93 +  return tos;
    1.94 +}
    1.95 +
    1.96 +static inline Address at_tos_p1() {
    1.97 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
    1.98 +}
    1.99 +
   1.100 +static inline Address at_tos_p2() {
   1.101 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
   1.102 +}
   1.103 +
   1.104 +// Condition conversion
   1.105 +static Assembler::Condition j_not(TemplateTable::Condition cc) {
   1.106 +  switch (cc) {
   1.107 +    case TemplateTable::equal        : return Assembler::notEqual;
   1.108 +    case TemplateTable::not_equal    : return Assembler::equal;
   1.109 +    case TemplateTable::less         : return Assembler::greaterEqual;
   1.110 +    case TemplateTable::less_equal   : return Assembler::greater;
   1.111 +    case TemplateTable::greater      : return Assembler::lessEqual;
   1.112 +    case TemplateTable::greater_equal: return Assembler::less;
   1.113 +  }
   1.114 +  ShouldNotReachHere();
   1.115 +  return Assembler::zero;
   1.116 +}
   1.117 +
   1.118 +
   1.119 +//----------------------------------------------------------------------------------------------------
   1.120 +// Miscelaneous helper routines
   1.121 +
   1.122 +// Store an oop (or NULL) at the address described by obj.
   1.123 +// If val == noreg this means store a NULL
   1.124 +
   1.125 +static void do_oop_store(InterpreterMacroAssembler* _masm,
   1.126 +                         Address obj,
   1.127 +                         Register val,
   1.128 +                         BarrierSet::Name barrier,
   1.129 +                         bool precise) {
   1.130 +  assert(val == noreg || val == rax, "parameter is just for looks");
   1.131 +  switch (barrier) {
   1.132 +#if INCLUDE_ALL_GCS
   1.133 +    case BarrierSet::G1SATBCT:
   1.134 +    case BarrierSet::G1SATBCTLogging:
   1.135 +      {
   1.136 +        // flatten object address if needed
   1.137 +        // We do it regardless of precise because we need the registers
   1.138 +        if (obj.index() == noreg && obj.disp() == 0) {
   1.139 +          if (obj.base() != rdx) {
   1.140 +            __ movl(rdx, obj.base());
   1.141 +          }
   1.142 +        } else {
   1.143 +          __ leal(rdx, obj);
   1.144 +        }
   1.145 +        __ get_thread(rcx);
   1.146 +        __ save_bcp();
   1.147 +        __ g1_write_barrier_pre(rdx /* obj */,
   1.148 +                                rbx /* pre_val */,
   1.149 +                                rcx /* thread */,
   1.150 +                                rsi /* tmp */,
   1.151 +                                val != noreg /* tosca_live */,
   1.152 +                                false /* expand_call */);
   1.153 +
   1.154 +        // Do the actual store
   1.155 +        // noreg means NULL
   1.156 +        if (val == noreg) {
   1.157 +          __ movptr(Address(rdx, 0), NULL_WORD);
   1.158 +          // No post barrier for NULL
   1.159 +        } else {
   1.160 +          __ movl(Address(rdx, 0), val);
   1.161 +          __ g1_write_barrier_post(rdx /* store_adr */,
   1.162 +                                   val /* new_val */,
   1.163 +                                   rcx /* thread */,
   1.164 +                                   rbx /* tmp */,
   1.165 +                                   rsi /* tmp2 */);
   1.166 +        }
   1.167 +        __ restore_bcp();
   1.168 +
   1.169 +      }
   1.170 +      break;
   1.171 +#endif // INCLUDE_ALL_GCS
   1.172 +    case BarrierSet::CardTableModRef:
   1.173 +    case BarrierSet::CardTableExtension:
   1.174 +      {
   1.175 +        if (val == noreg) {
   1.176 +          __ movptr(obj, NULL_WORD);
   1.177 +        } else {
   1.178 +          __ movl(obj, val);
   1.179 +          // flatten object address if needed
   1.180 +          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
   1.181 +            __ store_check(obj.base());
   1.182 +          } else {
   1.183 +            __ leal(rdx, obj);
   1.184 +            __ store_check(rdx);
   1.185 +          }
   1.186 +        }
   1.187 +      }
   1.188 +      break;
   1.189 +    case BarrierSet::ModRef:
   1.190 +    case BarrierSet::Other:
   1.191 +      if (val == noreg) {
   1.192 +        __ movptr(obj, NULL_WORD);
   1.193 +      } else {
   1.194 +        __ movl(obj, val);
   1.195 +      }
   1.196 +      break;
   1.197 +    default      :
   1.198 +      ShouldNotReachHere();
   1.199 +
   1.200 +  }
   1.201 +}
   1.202 +
   1.203 +Address TemplateTable::at_bcp(int offset) {
   1.204 +  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   1.205 +  return Address(rsi, offset);
   1.206 +}
   1.207 +
   1.208 +
   1.209 +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
   1.210 +                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
   1.211 +                                   int byte_no) {
   1.212 +  if (!RewriteBytecodes)  return;
   1.213 +  Label L_patch_done;
   1.214 +
   1.215 +  switch (bc) {
   1.216 +  case Bytecodes::_fast_aputfield:
   1.217 +  case Bytecodes::_fast_bputfield:
   1.218 +  case Bytecodes::_fast_cputfield:
   1.219 +  case Bytecodes::_fast_dputfield:
   1.220 +  case Bytecodes::_fast_fputfield:
   1.221 +  case Bytecodes::_fast_iputfield:
   1.222 +  case Bytecodes::_fast_lputfield:
   1.223 +  case Bytecodes::_fast_sputfield:
   1.224 +    {
   1.225 +      // We skip bytecode quickening for putfield instructions when
   1.226 +      // the put_code written to the constant pool cache is zero.
   1.227 +      // This is required so that every execution of this instruction
   1.228 +      // calls out to InterpreterRuntime::resolve_get_put to do
   1.229 +      // additional, required work.
   1.230 +      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   1.231 +      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
   1.232 +      __ get_cache_and_index_and_bytecode_at_bcp(bc_reg, temp_reg, temp_reg, byte_no, 1);
   1.233 +      __ movl(bc_reg, bc);
   1.234 +      __ cmpl(temp_reg, (int) 0);
   1.235 +      __ jcc(Assembler::zero, L_patch_done);  // don't patch
   1.236 +    }
   1.237 +    break;
   1.238 +  default:
   1.239 +    assert(byte_no == -1, "sanity");
   1.240 +    // the pair bytecodes have already done the load.
   1.241 +    if (load_bc_into_bc_reg) {
   1.242 +      __ movl(bc_reg, bc);
   1.243 +    }
   1.244 +  }
   1.245 +
   1.246 +  if (JvmtiExport::can_post_breakpoint()) {
   1.247 +    Label L_fast_patch;
   1.248 +    // if a breakpoint is present we can't rewrite the stream directly
   1.249 +    __ movzbl(temp_reg, at_bcp(0));
   1.250 +    __ cmpl(temp_reg, Bytecodes::_breakpoint);
   1.251 +    __ jcc(Assembler::notEqual, L_fast_patch);
   1.252 +    __ get_method(temp_reg);
   1.253 +    // Let breakpoint table handling rewrite to quicker bytecode
   1.254 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rsi, bc_reg);
   1.255 +#ifndef ASSERT
   1.256 +    __ jmpb(L_patch_done);
   1.257 +#else
   1.258 +    __ jmp(L_patch_done);
   1.259 +#endif
   1.260 +    __ bind(L_fast_patch);
   1.261 +  }
   1.262 +
   1.263 +#ifdef ASSERT
   1.264 +  Label L_okay;
   1.265 +  __ load_unsigned_byte(temp_reg, at_bcp(0));
   1.266 +  __ cmpl(temp_reg, (int)Bytecodes::java_code(bc));
   1.267 +  __ jccb(Assembler::equal, L_okay);
   1.268 +  __ cmpl(temp_reg, bc_reg);
   1.269 +  __ jcc(Assembler::equal, L_okay);
   1.270 +  __ stop("patching the wrong bytecode");
   1.271 +  __ bind(L_okay);
   1.272 +#endif
   1.273 +
   1.274 +  // patch bytecode
   1.275 +  __ movb(at_bcp(0), bc_reg);
   1.276 +  __ bind(L_patch_done);
   1.277 +}
   1.278 +
   1.279 +//----------------------------------------------------------------------------------------------------
   1.280 +// Individual instructions
   1.281 +
   1.282 +void TemplateTable::nop() {
   1.283 +  transition(vtos, vtos);
   1.284 +  // nothing to do
   1.285 +}
   1.286 +
   1.287 +void TemplateTable::shouldnotreachhere() {
   1.288 +  transition(vtos, vtos);
   1.289 +  __ stop("shouldnotreachhere bytecode");
   1.290 +}
   1.291 +
   1.292 +
   1.293 +
   1.294 +void TemplateTable::aconst_null() {
   1.295 +  transition(vtos, atos);
   1.296 +  __ xorptr(rax, rax);
   1.297 +}
   1.298 +
   1.299 +
   1.300 +void TemplateTable::iconst(int value) {
   1.301 +  transition(vtos, itos);
   1.302 +  if (value == 0) {
   1.303 +    __ xorptr(rax, rax);
   1.304 +  } else {
   1.305 +    __ movptr(rax, value);
   1.306 +  }
   1.307 +}
   1.308 +
   1.309 +
   1.310 +void TemplateTable::lconst(int value) {
   1.311 +  transition(vtos, ltos);
   1.312 +  if (value == 0) {
   1.313 +    __ xorptr(rax, rax);
   1.314 +  } else {
   1.315 +    __ movptr(rax, value);
   1.316 +  }
   1.317 +  assert(value >= 0, "check this code");
   1.318 +  __ xorptr(rdx, rdx);
   1.319 +}
   1.320 +
   1.321 +
   1.322 +void TemplateTable::fconst(int value) {
   1.323 +  transition(vtos, ftos);
   1.324 +         if (value == 0) { __ fldz();
   1.325 +  } else if (value == 1) { __ fld1();
   1.326 +  } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
   1.327 +  } else                 { ShouldNotReachHere();
   1.328 +  }
   1.329 +}
   1.330 +
   1.331 +
   1.332 +void TemplateTable::dconst(int value) {
   1.333 +  transition(vtos, dtos);
   1.334 +         if (value == 0) { __ fldz();
   1.335 +  } else if (value == 1) { __ fld1();
   1.336 +  } else                 { ShouldNotReachHere();
   1.337 +  }
   1.338 +}
   1.339 +
   1.340 +
   1.341 +void TemplateTable::bipush() {
   1.342 +  transition(vtos, itos);
   1.343 +  __ load_signed_byte(rax, at_bcp(1));
   1.344 +}
   1.345 +
   1.346 +
   1.347 +void TemplateTable::sipush() {
   1.348 +  transition(vtos, itos);
   1.349 +  __ load_unsigned_short(rax, at_bcp(1));
   1.350 +  __ bswapl(rax);
   1.351 +  __ sarl(rax, 16);
   1.352 +}
   1.353 +
   1.354 +void TemplateTable::ldc(bool wide) {
   1.355 +  transition(vtos, vtos);
   1.356 +  Label call_ldc, notFloat, notClass, Done;
   1.357 +
   1.358 +  if (wide) {
   1.359 +    __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.360 +  } else {
   1.361 +    __ load_unsigned_byte(rbx, at_bcp(1));
   1.362 +  }
   1.363 +  __ get_cpool_and_tags(rcx, rax);
   1.364 +  const int base_offset = ConstantPool::header_size() * wordSize;
   1.365 +  const int tags_offset = Array<u1>::base_offset_in_bytes();
   1.366 +
   1.367 +  // get type
   1.368 +  __ xorptr(rdx, rdx);
   1.369 +  __ movb(rdx, Address(rax, rbx, Address::times_1, tags_offset));
   1.370 +
   1.371 +  // unresolved class - get the resolved class
   1.372 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
   1.373 +  __ jccb(Assembler::equal, call_ldc);
   1.374 +
   1.375 +  // unresolved class in error (resolution failed) - call into runtime
   1.376 +  // so that the same error from first resolution attempt is thrown.
   1.377 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
   1.378 +  __ jccb(Assembler::equal, call_ldc);
   1.379 +
   1.380 +  // resolved class - need to call vm to get java mirror of the class
   1.381 +  __ cmpl(rdx, JVM_CONSTANT_Class);
   1.382 +  __ jcc(Assembler::notEqual, notClass);
   1.383 +
   1.384 +  __ bind(call_ldc);
   1.385 +  __ movl(rcx, wide);
   1.386 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rcx);
   1.387 +  __ push(atos);
   1.388 +  __ jmp(Done);
   1.389 +
   1.390 +  __ bind(notClass);
   1.391 +  __ cmpl(rdx, JVM_CONSTANT_Float);
   1.392 +  __ jccb(Assembler::notEqual, notFloat);
   1.393 +  // ftos
   1.394 +  __ fld_s(    Address(rcx, rbx, Address::times_ptr, base_offset));
   1.395 +  __ push(ftos);
   1.396 +  __ jmp(Done);
   1.397 +
   1.398 +  __ bind(notFloat);
   1.399 +#ifdef ASSERT
   1.400 +  { Label L;
   1.401 +    __ cmpl(rdx, JVM_CONSTANT_Integer);
   1.402 +    __ jcc(Assembler::equal, L);
   1.403 +    // String and Object are rewritten to fast_aldc
   1.404 +    __ stop("unexpected tag type in ldc");
   1.405 +    __ bind(L);
   1.406 +  }
   1.407 +#endif
   1.408 +  // itos JVM_CONSTANT_Integer only
   1.409 +  __ movl(rax, Address(rcx, rbx, Address::times_ptr, base_offset));
   1.410 +  __ push(itos);
   1.411 +  __ bind(Done);
   1.412 +}
   1.413 +
   1.414 +// Fast path for caching oop constants.
   1.415 +void TemplateTable::fast_aldc(bool wide) {
   1.416 +  transition(vtos, atos);
   1.417 +
   1.418 +  Register result = rax;
   1.419 +  Register tmp = rdx;
   1.420 +  int index_size = wide ? sizeof(u2) : sizeof(u1);
   1.421 +
   1.422 +  Label resolved;
   1.423 +
   1.424 +  // We are resolved if the resolved reference cache entry contains a
   1.425 +  // non-null object (String, MethodType, etc.)
   1.426 +  assert_different_registers(result, tmp);
   1.427 +  __ get_cache_index_at_bcp(tmp, 1, index_size);
   1.428 +  __ load_resolved_reference_at_index(result, tmp);
   1.429 +  __ testl(result, result);
   1.430 +  __ jcc(Assembler::notZero, resolved);
   1.431 +
   1.432 +  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
   1.433 +
   1.434 +  // first time invocation - must resolve first
   1.435 +  __ movl(tmp, (int)bytecode());
   1.436 +  __ call_VM(result, entry, tmp);
   1.437 +
   1.438 +  __ bind(resolved);
   1.439 +
   1.440 +  if (VerifyOops) {
   1.441 +    __ verify_oop(result);
   1.442 +  }
   1.443 +}
   1.444 +
   1.445 +void TemplateTable::ldc2_w() {
   1.446 +  transition(vtos, vtos);
   1.447 +  Label Long, Done;
   1.448 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.449 +
   1.450 +  __ get_cpool_and_tags(rcx, rax);
   1.451 +  const int base_offset = ConstantPool::header_size() * wordSize;
   1.452 +  const int tags_offset = Array<u1>::base_offset_in_bytes();
   1.453 +
   1.454 +  // get type
   1.455 +  __ cmpb(Address(rax, rbx, Address::times_1, tags_offset), JVM_CONSTANT_Double);
   1.456 +  __ jccb(Assembler::notEqual, Long);
   1.457 +  // dtos
   1.458 +  __ fld_d(    Address(rcx, rbx, Address::times_ptr, base_offset));
   1.459 +  __ push(dtos);
   1.460 +  __ jmpb(Done);
   1.461 +
   1.462 +  __ bind(Long);
   1.463 +  // ltos
   1.464 +  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset + 0 * wordSize));
   1.465 +  NOT_LP64(__ movptr(rdx, Address(rcx, rbx, Address::times_ptr, base_offset + 1 * wordSize)));
   1.466 +
   1.467 +  __ push(ltos);
   1.468 +
   1.469 +  __ bind(Done);
   1.470 +}
   1.471 +
   1.472 +
   1.473 +void TemplateTable::locals_index(Register reg, int offset) {
   1.474 +  __ load_unsigned_byte(reg, at_bcp(offset));
   1.475 +  __ negptr(reg);
   1.476 +}
   1.477 +
   1.478 +
   1.479 +void TemplateTable::iload() {
   1.480 +  transition(vtos, itos);
   1.481 +  if (RewriteFrequentPairs) {
   1.482 +    Label rewrite, done;
   1.483 +
   1.484 +    // get next byte
   1.485 +    __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   1.486 +    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   1.487 +    // last two iloads in a pair.  Comparing against fast_iload means that
   1.488 +    // the next bytecode is neither an iload or a caload, and therefore
   1.489 +    // an iload pair.
   1.490 +    __ cmpl(rbx, Bytecodes::_iload);
   1.491 +    __ jcc(Assembler::equal, done);
   1.492 +
   1.493 +    __ cmpl(rbx, Bytecodes::_fast_iload);
   1.494 +    __ movl(rcx, Bytecodes::_fast_iload2);
   1.495 +    __ jccb(Assembler::equal, rewrite);
   1.496 +
   1.497 +    // if _caload, rewrite to fast_icaload
   1.498 +    __ cmpl(rbx, Bytecodes::_caload);
   1.499 +    __ movl(rcx, Bytecodes::_fast_icaload);
   1.500 +    __ jccb(Assembler::equal, rewrite);
   1.501 +
   1.502 +    // rewrite so iload doesn't check again.
   1.503 +    __ movl(rcx, Bytecodes::_fast_iload);
   1.504 +
   1.505 +    // rewrite
   1.506 +    // rcx: fast bytecode
   1.507 +    __ bind(rewrite);
   1.508 +    patch_bytecode(Bytecodes::_iload, rcx, rbx, false);
   1.509 +    __ bind(done);
   1.510 +  }
   1.511 +
   1.512 +  // Get the local value into tos
   1.513 +  locals_index(rbx);
   1.514 +  __ movl(rax, iaddress(rbx));
   1.515 +}
   1.516 +
   1.517 +
   1.518 +void TemplateTable::fast_iload2() {
   1.519 +  transition(vtos, itos);
   1.520 +  locals_index(rbx);
   1.521 +  __ movl(rax, iaddress(rbx));
   1.522 +  __ push(itos);
   1.523 +  locals_index(rbx, 3);
   1.524 +  __ movl(rax, iaddress(rbx));
   1.525 +}
   1.526 +
   1.527 +void TemplateTable::fast_iload() {
   1.528 +  transition(vtos, itos);
   1.529 +  locals_index(rbx);
   1.530 +  __ movl(rax, iaddress(rbx));
   1.531 +}
   1.532 +
   1.533 +
   1.534 +void TemplateTable::lload() {
   1.535 +  transition(vtos, ltos);
   1.536 +  locals_index(rbx);
   1.537 +  __ movptr(rax, laddress(rbx));
   1.538 +  NOT_LP64(__ movl(rdx, haddress(rbx)));
   1.539 +}
   1.540 +
   1.541 +
   1.542 +void TemplateTable::fload() {
   1.543 +  transition(vtos, ftos);
   1.544 +  locals_index(rbx);
   1.545 +  __ fld_s(faddress(rbx));
   1.546 +}
   1.547 +
   1.548 +
   1.549 +void TemplateTable::dload() {
   1.550 +  transition(vtos, dtos);
   1.551 +  locals_index(rbx);
   1.552 +  __ fld_d(daddress(rbx));
   1.553 +}
   1.554 +
   1.555 +
   1.556 +void TemplateTable::aload() {
   1.557 +  transition(vtos, atos);
   1.558 +  locals_index(rbx);
   1.559 +  __ movptr(rax, aaddress(rbx));
   1.560 +}
   1.561 +
   1.562 +
   1.563 +void TemplateTable::locals_index_wide(Register reg) {
   1.564 +  __ load_unsigned_short(reg, at_bcp(2));
   1.565 +  __ bswapl(reg);
   1.566 +  __ shrl(reg, 16);
   1.567 +  __ negptr(reg);
   1.568 +}
   1.569 +
   1.570 +
   1.571 +void TemplateTable::wide_iload() {
   1.572 +  transition(vtos, itos);
   1.573 +  locals_index_wide(rbx);
   1.574 +  __ movl(rax, iaddress(rbx));
   1.575 +}
   1.576 +
   1.577 +
   1.578 +void TemplateTable::wide_lload() {
   1.579 +  transition(vtos, ltos);
   1.580 +  locals_index_wide(rbx);
   1.581 +  __ movptr(rax, laddress(rbx));
   1.582 +  NOT_LP64(__ movl(rdx, haddress(rbx)));
   1.583 +}
   1.584 +
   1.585 +
   1.586 +void TemplateTable::wide_fload() {
   1.587 +  transition(vtos, ftos);
   1.588 +  locals_index_wide(rbx);
   1.589 +  __ fld_s(faddress(rbx));
   1.590 +}
   1.591 +
   1.592 +
   1.593 +void TemplateTable::wide_dload() {
   1.594 +  transition(vtos, dtos);
   1.595 +  locals_index_wide(rbx);
   1.596 +  __ fld_d(daddress(rbx));
   1.597 +}
   1.598 +
   1.599 +
   1.600 +void TemplateTable::wide_aload() {
   1.601 +  transition(vtos, atos);
   1.602 +  locals_index_wide(rbx);
   1.603 +  __ movptr(rax, aaddress(rbx));
   1.604 +}
   1.605 +
   1.606 +void TemplateTable::index_check(Register array, Register index) {
   1.607 +  // Pop ptr into array
   1.608 +  __ pop_ptr(array);
   1.609 +  index_check_without_pop(array, index);
   1.610 +}
   1.611 +
   1.612 +void TemplateTable::index_check_without_pop(Register array, Register index) {
   1.613 +  // destroys rbx,
   1.614 +  // check array
   1.615 +  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   1.616 +  LP64_ONLY(__ movslq(index, index));
   1.617 +  // check index
   1.618 +  __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
   1.619 +  if (index != rbx) {
   1.620 +    // ??? convention: move aberrant index into rbx, for exception message
   1.621 +    assert(rbx != array, "different registers");
   1.622 +    __ mov(rbx, index);
   1.623 +  }
   1.624 +  __ jump_cc(Assembler::aboveEqual,
   1.625 +             ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
   1.626 +}
   1.627 +
   1.628 +
   1.629 +void TemplateTable::iaload() {
   1.630 +  transition(itos, itos);
   1.631 +  // rdx: array
   1.632 +  index_check(rdx, rax);  // kills rbx,
   1.633 +  // rax,: index
   1.634 +  __ movl(rax, Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_INT)));
   1.635 +}
   1.636 +
   1.637 +
   1.638 +void TemplateTable::laload() {
   1.639 +  transition(itos, ltos);
   1.640 +  // rax,: index
   1.641 +  // rdx: array
   1.642 +  index_check(rdx, rax);
   1.643 +  __ mov(rbx, rax);
   1.644 +  // rbx,: index
   1.645 +  __ movptr(rax, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize));
   1.646 +  NOT_LP64(__ movl(rdx, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize)));
   1.647 +}
   1.648 +
   1.649 +
   1.650 +void TemplateTable::faload() {
   1.651 +  transition(itos, ftos);
   1.652 +  // rdx: array
   1.653 +  index_check(rdx, rax);  // kills rbx,
   1.654 +  // rax,: index
   1.655 +  __ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
   1.656 +}
   1.657 +
   1.658 +
   1.659 +void TemplateTable::daload() {
   1.660 +  transition(itos, dtos);
   1.661 +  // rdx: array
   1.662 +  index_check(rdx, rax);  // kills rbx,
   1.663 +  // rax,: index
   1.664 +  __ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
   1.665 +}
   1.666 +
   1.667 +
   1.668 +void TemplateTable::aaload() {
   1.669 +  transition(itos, atos);
   1.670 +  // rdx: array
   1.671 +  index_check(rdx, rax);  // kills rbx,
   1.672 +  // rax,: index
   1.673 +  __ movptr(rax, Address(rdx, rax, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.674 +}
   1.675 +
   1.676 +
   1.677 +void TemplateTable::baload() {
   1.678 +  transition(itos, itos);
   1.679 +  // rdx: array
   1.680 +  index_check(rdx, rax);  // kills rbx,
   1.681 +  // rax,: index
   1.682 +  // can do better code for P5 - fix this at some point
   1.683 +  __ load_signed_byte(rbx, Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
   1.684 +  __ mov(rax, rbx);
   1.685 +}
   1.686 +
   1.687 +
   1.688 +void TemplateTable::caload() {
   1.689 +  transition(itos, itos);
   1.690 +  // rdx: array
   1.691 +  index_check(rdx, rax);  // kills rbx,
   1.692 +  // rax,: index
   1.693 +  // can do better code for P5 - may want to improve this at some point
   1.694 +  __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.695 +  __ mov(rax, rbx);
   1.696 +}
   1.697 +
   1.698 +// iload followed by caload frequent pair
   1.699 +void TemplateTable::fast_icaload() {
   1.700 +  transition(vtos, itos);
   1.701 +  // load index out of locals
   1.702 +  locals_index(rbx);
   1.703 +  __ movl(rax, iaddress(rbx));
   1.704 +
   1.705 +  // rdx: array
   1.706 +  index_check(rdx, rax);
   1.707 +  // rax,: index
   1.708 +  __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.709 +  __ mov(rax, rbx);
   1.710 +}
   1.711 +
   1.712 +void TemplateTable::saload() {
   1.713 +  transition(itos, itos);
   1.714 +  // rdx: array
   1.715 +  index_check(rdx, rax);  // kills rbx,
   1.716 +  // rax,: index
   1.717 +  // can do better code for P5 - may want to improve this at some point
   1.718 +  __ load_signed_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
   1.719 +  __ mov(rax, rbx);
   1.720 +}
   1.721 +
   1.722 +
   1.723 +void TemplateTable::iload(int n) {
   1.724 +  transition(vtos, itos);
   1.725 +  __ movl(rax, iaddress(n));
   1.726 +}
   1.727 +
   1.728 +
   1.729 +void TemplateTable::lload(int n) {
   1.730 +  transition(vtos, ltos);
   1.731 +  __ movptr(rax, laddress(n));
   1.732 +  NOT_LP64(__ movptr(rdx, haddress(n)));
   1.733 +}
   1.734 +
   1.735 +
   1.736 +void TemplateTable::fload(int n) {
   1.737 +  transition(vtos, ftos);
   1.738 +  __ fld_s(faddress(n));
   1.739 +}
   1.740 +
   1.741 +
   1.742 +void TemplateTable::dload(int n) {
   1.743 +  transition(vtos, dtos);
   1.744 +  __ fld_d(daddress(n));
   1.745 +}
   1.746 +
   1.747 +
   1.748 +void TemplateTable::aload(int n) {
   1.749 +  transition(vtos, atos);
   1.750 +  __ movptr(rax, aaddress(n));
   1.751 +}
   1.752 +
   1.753 +
   1.754 +void TemplateTable::aload_0() {
   1.755 +  transition(vtos, atos);
   1.756 +  // According to bytecode histograms, the pairs:
   1.757 +  //
   1.758 +  // _aload_0, _fast_igetfield
   1.759 +  // _aload_0, _fast_agetfield
   1.760 +  // _aload_0, _fast_fgetfield
   1.761 +  //
   1.762 +  // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
   1.763 +  // bytecode checks if the next bytecode is either _fast_igetfield,
   1.764 +  // _fast_agetfield or _fast_fgetfield and then rewrites the
   1.765 +  // current bytecode into a pair bytecode; otherwise it rewrites the current
   1.766 +  // bytecode into _fast_aload_0 that doesn't do the pair check anymore.
   1.767 +  //
   1.768 +  // Note: If the next bytecode is _getfield, the rewrite must be delayed,
   1.769 +  //       otherwise we may miss an opportunity for a pair.
   1.770 +  //
   1.771 +  // Also rewrite frequent pairs
   1.772 +  //   aload_0, aload_1
   1.773 +  //   aload_0, iload_1
   1.774 +  // These bytecodes with a small amount of code are most profitable to rewrite
   1.775 +  if (RewriteFrequentPairs) {
   1.776 +    Label rewrite, done;
   1.777 +    // get next byte
   1.778 +    __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   1.779 +
   1.780 +    // do actual aload_0
   1.781 +    aload(0);
   1.782 +
   1.783 +    // if _getfield then wait with rewrite
   1.784 +    __ cmpl(rbx, Bytecodes::_getfield);
   1.785 +    __ jcc(Assembler::equal, done);
   1.786 +
   1.787 +    // if _igetfield then reqrite to _fast_iaccess_0
   1.788 +    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.789 +    __ cmpl(rbx, Bytecodes::_fast_igetfield);
   1.790 +    __ movl(rcx, Bytecodes::_fast_iaccess_0);
   1.791 +    __ jccb(Assembler::equal, rewrite);
   1.792 +
   1.793 +    // if _agetfield then reqrite to _fast_aaccess_0
   1.794 +    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.795 +    __ cmpl(rbx, Bytecodes::_fast_agetfield);
   1.796 +    __ movl(rcx, Bytecodes::_fast_aaccess_0);
   1.797 +    __ jccb(Assembler::equal, rewrite);
   1.798 +
   1.799 +    // if _fgetfield then reqrite to _fast_faccess_0
   1.800 +    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.801 +    __ cmpl(rbx, Bytecodes::_fast_fgetfield);
   1.802 +    __ movl(rcx, Bytecodes::_fast_faccess_0);
   1.803 +    __ jccb(Assembler::equal, rewrite);
   1.804 +
   1.805 +    // else rewrite to _fast_aload0
   1.806 +    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.807 +    __ movl(rcx, Bytecodes::_fast_aload_0);
   1.808 +
   1.809 +    // rewrite
   1.810 +    // rcx: fast bytecode
   1.811 +    __ bind(rewrite);
   1.812 +    patch_bytecode(Bytecodes::_aload_0, rcx, rbx, false);
   1.813 +
   1.814 +    __ bind(done);
   1.815 +  } else {
   1.816 +    aload(0);
   1.817 +  }
   1.818 +}
   1.819 +
   1.820 +void TemplateTable::istore() {
   1.821 +  transition(itos, vtos);
   1.822 +  locals_index(rbx);
   1.823 +  __ movl(iaddress(rbx), rax);
   1.824 +}
   1.825 +
   1.826 +
   1.827 +void TemplateTable::lstore() {
   1.828 +  transition(ltos, vtos);
   1.829 +  locals_index(rbx);
   1.830 +  __ movptr(laddress(rbx), rax);
   1.831 +  NOT_LP64(__ movptr(haddress(rbx), rdx));
   1.832 +}
   1.833 +
   1.834 +
   1.835 +void TemplateTable::fstore() {
   1.836 +  transition(ftos, vtos);
   1.837 +  locals_index(rbx);
   1.838 +  __ fstp_s(faddress(rbx));
   1.839 +}
   1.840 +
   1.841 +
   1.842 +void TemplateTable::dstore() {
   1.843 +  transition(dtos, vtos);
   1.844 +  locals_index(rbx);
   1.845 +  __ fstp_d(daddress(rbx));
   1.846 +}
   1.847 +
   1.848 +
   1.849 +void TemplateTable::astore() {
   1.850 +  transition(vtos, vtos);
   1.851 +  __ pop_ptr(rax);
   1.852 +  locals_index(rbx);
   1.853 +  __ movptr(aaddress(rbx), rax);
   1.854 +}
   1.855 +
   1.856 +
   1.857 +void TemplateTable::wide_istore() {
   1.858 +  transition(vtos, vtos);
   1.859 +  __ pop_i(rax);
   1.860 +  locals_index_wide(rbx);
   1.861 +  __ movl(iaddress(rbx), rax);
   1.862 +}
   1.863 +
   1.864 +
   1.865 +void TemplateTable::wide_lstore() {
   1.866 +  transition(vtos, vtos);
   1.867 +  __ pop_l(rax, rdx);
   1.868 +  locals_index_wide(rbx);
   1.869 +  __ movptr(laddress(rbx), rax);
   1.870 +  NOT_LP64(__ movl(haddress(rbx), rdx));
   1.871 +}
   1.872 +
   1.873 +
   1.874 +void TemplateTable::wide_fstore() {
   1.875 +  wide_istore();
   1.876 +}
   1.877 +
   1.878 +
   1.879 +void TemplateTable::wide_dstore() {
   1.880 +  wide_lstore();
   1.881 +}
   1.882 +
   1.883 +
   1.884 +void TemplateTable::wide_astore() {
   1.885 +  transition(vtos, vtos);
   1.886 +  __ pop_ptr(rax);
   1.887 +  locals_index_wide(rbx);
   1.888 +  __ movptr(aaddress(rbx), rax);
   1.889 +}
   1.890 +
   1.891 +
   1.892 +void TemplateTable::iastore() {
   1.893 +  transition(itos, vtos);
   1.894 +  __ pop_i(rbx);
   1.895 +  // rax,: value
   1.896 +  // rdx: array
   1.897 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.898 +  // rbx,: index
   1.899 +  __ movl(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_INT)), rax);
   1.900 +}
   1.901 +
   1.902 +
   1.903 +void TemplateTable::lastore() {
   1.904 +  transition(ltos, vtos);
   1.905 +  __ pop_i(rbx);
   1.906 +  // rax,: low(value)
   1.907 +  // rcx: array
   1.908 +  // rdx: high(value)
   1.909 +  index_check(rcx, rbx);  // prefer index in rbx,
   1.910 +  // rbx,: index
   1.911 +  __ movptr(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize), rax);
   1.912 +  NOT_LP64(__ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize), rdx));
   1.913 +}
   1.914 +
   1.915 +
   1.916 +void TemplateTable::fastore() {
   1.917 +  transition(ftos, vtos);
   1.918 +  __ pop_i(rbx);
   1.919 +  // rdx: array
   1.920 +  // st0: value
   1.921 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.922 +  // rbx,: index
   1.923 +  __ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
   1.924 +}
   1.925 +
   1.926 +
   1.927 +void TemplateTable::dastore() {
   1.928 +  transition(dtos, vtos);
   1.929 +  __ pop_i(rbx);
   1.930 +  // rdx: array
   1.931 +  // st0: value
   1.932 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.933 +  // rbx,: index
   1.934 +  __ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
   1.935 +}
   1.936 +
   1.937 +
   1.938 +void TemplateTable::aastore() {
   1.939 +  Label is_null, ok_is_subtype, done;
   1.940 +  transition(vtos, vtos);
   1.941 +  // stack: ..., array, index, value
   1.942 +  __ movptr(rax, at_tos());     // Value
   1.943 +  __ movl(rcx, at_tos_p1());  // Index
   1.944 +  __ movptr(rdx, at_tos_p2());  // Array
   1.945 +
   1.946 +  Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   1.947 +  index_check_without_pop(rdx, rcx);      // kills rbx,
   1.948 +  // do array store check - check for NULL value first
   1.949 +  __ testptr(rax, rax);
   1.950 +  __ jcc(Assembler::zero, is_null);
   1.951 +
   1.952 +  // Move subklass into EBX
   1.953 +  __ load_klass(rbx, rax);
   1.954 +  // Move superklass into EAX
   1.955 +  __ load_klass(rax, rdx);
   1.956 +  __ movptr(rax, Address(rax, ObjArrayKlass::element_klass_offset()));
   1.957 +  // Compress array+index*wordSize+12 into a single register.  Frees ECX.
   1.958 +  __ lea(rdx, element_address);
   1.959 +
   1.960 +  // Generate subtype check.  Blows ECX.  Resets EDI to locals.
   1.961 +  // Superklass in EAX.  Subklass in EBX.
   1.962 +  __ gen_subtype_check( rbx, ok_is_subtype );
   1.963 +
   1.964 +  // Come here on failure
   1.965 +  // object is at TOS
   1.966 +  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
   1.967 +
   1.968 +  // Come here on success
   1.969 +  __ bind(ok_is_subtype);
   1.970 +
   1.971 +  // Get the value to store
   1.972 +  __ movptr(rax, at_rsp());
   1.973 +  // and store it with appropriate barrier
   1.974 +  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
   1.975 +
   1.976 +  __ jmp(done);
   1.977 +
   1.978 +  // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
   1.979 +  __ bind(is_null);
   1.980 +  __ profile_null_seen(rbx);
   1.981 +
   1.982 +  // Store NULL, (noreg means NULL to do_oop_store)
   1.983 +  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
   1.984 +
   1.985 +  // Pop stack arguments
   1.986 +  __ bind(done);
   1.987 +  __ addptr(rsp, 3 * Interpreter::stackElementSize);
   1.988 +}
   1.989 +
   1.990 +
   1.991 +void TemplateTable::bastore() {
   1.992 +  transition(itos, vtos);
   1.993 +  __ pop_i(rbx);
   1.994 +  // rax,: value
   1.995 +  // rdx: array
   1.996 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.997 +  // rbx,: index
   1.998 +  __ movb(Address(rdx, rbx, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)), rax);
   1.999 +}
  1.1000 +
  1.1001 +
  1.1002 +void TemplateTable::castore() {
  1.1003 +  transition(itos, vtos);
  1.1004 +  __ pop_i(rbx);
  1.1005 +  // rax,: value
  1.1006 +  // rdx: array
  1.1007 +  index_check(rdx, rbx);  // prefer index in rbx,
  1.1008 +  // rbx,: index
  1.1009 +  __ movw(Address(rdx, rbx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)), rax);
  1.1010 +}
  1.1011 +
  1.1012 +
  1.1013 +void TemplateTable::sastore() {
  1.1014 +  castore();
  1.1015 +}
  1.1016 +
  1.1017 +
  1.1018 +void TemplateTable::istore(int n) {
  1.1019 +  transition(itos, vtos);
  1.1020 +  __ movl(iaddress(n), rax);
  1.1021 +}
  1.1022 +
  1.1023 +
  1.1024 +void TemplateTable::lstore(int n) {
  1.1025 +  transition(ltos, vtos);
  1.1026 +  __ movptr(laddress(n), rax);
  1.1027 +  NOT_LP64(__ movptr(haddress(n), rdx));
  1.1028 +}
  1.1029 +
  1.1030 +
  1.1031 +void TemplateTable::fstore(int n) {
  1.1032 +  transition(ftos, vtos);
  1.1033 +  __ fstp_s(faddress(n));
  1.1034 +}
  1.1035 +
  1.1036 +
  1.1037 +void TemplateTable::dstore(int n) {
  1.1038 +  transition(dtos, vtos);
  1.1039 +  __ fstp_d(daddress(n));
  1.1040 +}
  1.1041 +
  1.1042 +
  1.1043 +void TemplateTable::astore(int n) {
  1.1044 +  transition(vtos, vtos);
  1.1045 +  __ pop_ptr(rax);
  1.1046 +  __ movptr(aaddress(n), rax);
  1.1047 +}
  1.1048 +
  1.1049 +
  1.1050 +void TemplateTable::pop() {
  1.1051 +  transition(vtos, vtos);
  1.1052 +  __ addptr(rsp, Interpreter::stackElementSize);
  1.1053 +}
  1.1054 +
  1.1055 +
  1.1056 +void TemplateTable::pop2() {
  1.1057 +  transition(vtos, vtos);
  1.1058 +  __ addptr(rsp, 2*Interpreter::stackElementSize);
  1.1059 +}
  1.1060 +
  1.1061 +
  1.1062 +void TemplateTable::dup() {
  1.1063 +  transition(vtos, vtos);
  1.1064 +  // stack: ..., a
  1.1065 +  __ load_ptr(0, rax);
  1.1066 +  __ push_ptr(rax);
  1.1067 +  // stack: ..., a, a
  1.1068 +}
  1.1069 +
  1.1070 +
  1.1071 +void TemplateTable::dup_x1() {
  1.1072 +  transition(vtos, vtos);
  1.1073 +  // stack: ..., a, b
  1.1074 +  __ load_ptr( 0, rax);  // load b
  1.1075 +  __ load_ptr( 1, rcx);  // load a
  1.1076 +  __ store_ptr(1, rax);  // store b
  1.1077 +  __ store_ptr(0, rcx);  // store a
  1.1078 +  __ push_ptr(rax);      // push b
  1.1079 +  // stack: ..., b, a, b
  1.1080 +}
  1.1081 +
  1.1082 +
  1.1083 +void TemplateTable::dup_x2() {
  1.1084 +  transition(vtos, vtos);
  1.1085 +  // stack: ..., a, b, c
  1.1086 +  __ load_ptr( 0, rax);  // load c
  1.1087 +  __ load_ptr( 2, rcx);  // load a
  1.1088 +  __ store_ptr(2, rax);  // store c in a
  1.1089 +  __ push_ptr(rax);      // push c
  1.1090 +  // stack: ..., c, b, c, c
  1.1091 +  __ load_ptr( 2, rax);  // load b
  1.1092 +  __ store_ptr(2, rcx);  // store a in b
  1.1093 +  // stack: ..., c, a, c, c
  1.1094 +  __ store_ptr(1, rax);  // store b in c
  1.1095 +  // stack: ..., c, a, b, c
  1.1096 +}
  1.1097 +
  1.1098 +
  1.1099 +void TemplateTable::dup2() {
  1.1100 +  transition(vtos, vtos);
  1.1101 +  // stack: ..., a, b
  1.1102 +  __ load_ptr(1, rax);  // load a
  1.1103 +  __ push_ptr(rax);     // push a
  1.1104 +  __ load_ptr(1, rax);  // load b
  1.1105 +  __ push_ptr(rax);     // push b
  1.1106 +  // stack: ..., a, b, a, b
  1.1107 +}
  1.1108 +
  1.1109 +
  1.1110 +void TemplateTable::dup2_x1() {
  1.1111 +  transition(vtos, vtos);
  1.1112 +  // stack: ..., a, b, c
  1.1113 +  __ load_ptr( 0, rcx);  // load c
  1.1114 +  __ load_ptr( 1, rax);  // load b
  1.1115 +  __ push_ptr(rax);      // push b
  1.1116 +  __ push_ptr(rcx);      // push c
  1.1117 +  // stack: ..., a, b, c, b, c
  1.1118 +  __ store_ptr(3, rcx);  // store c in b
  1.1119 +  // stack: ..., a, c, c, b, c
  1.1120 +  __ load_ptr( 4, rcx);  // load a
  1.1121 +  __ store_ptr(2, rcx);  // store a in 2nd c
  1.1122 +  // stack: ..., a, c, a, b, c
  1.1123 +  __ store_ptr(4, rax);  // store b in a
  1.1124 +  // stack: ..., b, c, a, b, c
  1.1125 +  // stack: ..., b, c, a, b, c
  1.1126 +}
  1.1127 +
  1.1128 +
  1.1129 +void TemplateTable::dup2_x2() {
  1.1130 +  transition(vtos, vtos);
  1.1131 +  // stack: ..., a, b, c, d
  1.1132 +  __ load_ptr( 0, rcx);  // load d
  1.1133 +  __ load_ptr( 1, rax);  // load c
  1.1134 +  __ push_ptr(rax);      // push c
  1.1135 +  __ push_ptr(rcx);      // push d
  1.1136 +  // stack: ..., a, b, c, d, c, d
  1.1137 +  __ load_ptr( 4, rax);  // load b
  1.1138 +  __ store_ptr(2, rax);  // store b in d
  1.1139 +  __ store_ptr(4, rcx);  // store d in b
  1.1140 +  // stack: ..., a, d, c, b, c, d
  1.1141 +  __ load_ptr( 5, rcx);  // load a
  1.1142 +  __ load_ptr( 3, rax);  // load c
  1.1143 +  __ store_ptr(3, rcx);  // store a in c
  1.1144 +  __ store_ptr(5, rax);  // store c in a
  1.1145 +  // stack: ..., c, d, a, b, c, d
  1.1146 +  // stack: ..., c, d, a, b, c, d
  1.1147 +}
  1.1148 +
  1.1149 +
  1.1150 +void TemplateTable::swap() {
  1.1151 +  transition(vtos, vtos);
  1.1152 +  // stack: ..., a, b
  1.1153 +  __ load_ptr( 1, rcx);  // load a
  1.1154 +  __ load_ptr( 0, rax);  // load b
  1.1155 +  __ store_ptr(0, rcx);  // store a in b
  1.1156 +  __ store_ptr(1, rax);  // store b in a
  1.1157 +  // stack: ..., b, a
  1.1158 +}
  1.1159 +
  1.1160 +
  1.1161 +void TemplateTable::iop2(Operation op) {
  1.1162 +  transition(itos, itos);
  1.1163 +  switch (op) {
  1.1164 +    case add  :                   __ pop_i(rdx); __ addl (rax, rdx); break;
  1.1165 +    case sub  : __ mov(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
  1.1166 +    case mul  :                   __ pop_i(rdx); __ imull(rax, rdx); break;
  1.1167 +    case _and :                   __ pop_i(rdx); __ andl (rax, rdx); break;
  1.1168 +    case _or  :                   __ pop_i(rdx); __ orl  (rax, rdx); break;
  1.1169 +    case _xor :                   __ pop_i(rdx); __ xorl (rax, rdx); break;
  1.1170 +    case shl  : __ mov(rcx, rax); __ pop_i(rax); __ shll (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
  1.1171 +    case shr  : __ mov(rcx, rax); __ pop_i(rax); __ sarl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
  1.1172 +    case ushr : __ mov(rcx, rax); __ pop_i(rax); __ shrl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
  1.1173 +    default   : ShouldNotReachHere();
  1.1174 +  }
  1.1175 +}
  1.1176 +
  1.1177 +
  1.1178 +void TemplateTable::lop2(Operation op) {
  1.1179 +  transition(ltos, ltos);
  1.1180 +  __ pop_l(rbx, rcx);
  1.1181 +  switch (op) {
  1.1182 +    case add  : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
  1.1183 +    case sub  : __ subl(rbx, rax); __ sbbl(rcx, rdx);
  1.1184 +                __ mov (rax, rbx); __ mov (rdx, rcx); break;
  1.1185 +    case _and : __ andl(rax, rbx); __ andl(rdx, rcx); break;
  1.1186 +    case _or  : __ orl (rax, rbx); __ orl (rdx, rcx); break;
  1.1187 +    case _xor : __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
  1.1188 +    default   : ShouldNotReachHere();
  1.1189 +  }
  1.1190 +}
  1.1191 +
  1.1192 +
  1.1193 +void TemplateTable::idiv() {
  1.1194 +  transition(itos, itos);
  1.1195 +  __ mov(rcx, rax);
  1.1196 +  __ pop_i(rax);
  1.1197 +  // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
  1.1198 +  //       they are not equal, one could do a normal division (no correction
  1.1199 +  //       needed), which may speed up this implementation for the common case.
  1.1200 +  //       (see also JVM spec., p.243 & p.271)
  1.1201 +  __ corrected_idivl(rcx);
  1.1202 +}
  1.1203 +
  1.1204 +
  1.1205 +void TemplateTable::irem() {
  1.1206 +  transition(itos, itos);
  1.1207 +  __ mov(rcx, rax);
  1.1208 +  __ pop_i(rax);
  1.1209 +  // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
  1.1210 +  //       they are not equal, one could do a normal division (no correction
  1.1211 +  //       needed), which may speed up this implementation for the common case.
  1.1212 +  //       (see also JVM spec., p.243 & p.271)
  1.1213 +  __ corrected_idivl(rcx);
  1.1214 +  __ mov(rax, rdx);
  1.1215 +}
  1.1216 +
  1.1217 +
  1.1218 +void TemplateTable::lmul() {
  1.1219 +  transition(ltos, ltos);
  1.1220 +  __ pop_l(rbx, rcx);
  1.1221 +  __ push(rcx); __ push(rbx);
  1.1222 +  __ push(rdx); __ push(rax);
  1.1223 +  __ lmul(2 * wordSize, 0);
  1.1224 +  __ addptr(rsp, 4 * wordSize);  // take off temporaries
  1.1225 +}
  1.1226 +
  1.1227 +
  1.1228 +void TemplateTable::ldiv() {
  1.1229 +  transition(ltos, ltos);
  1.1230 +  __ pop_l(rbx, rcx);
  1.1231 +  __ push(rcx); __ push(rbx);
  1.1232 +  __ push(rdx); __ push(rax);
  1.1233 +  // check if y = 0
  1.1234 +  __ orl(rax, rdx);
  1.1235 +  __ jump_cc(Assembler::zero,
  1.1236 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1237 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
  1.1238 +  __ addptr(rsp, 4 * wordSize);  // take off temporaries
  1.1239 +}
  1.1240 +
  1.1241 +
  1.1242 +void TemplateTable::lrem() {
  1.1243 +  transition(ltos, ltos);
  1.1244 +  __ pop_l(rbx, rcx);
  1.1245 +  __ push(rcx); __ push(rbx);
  1.1246 +  __ push(rdx); __ push(rax);
  1.1247 +  // check if y = 0
  1.1248 +  __ orl(rax, rdx);
  1.1249 +  __ jump_cc(Assembler::zero,
  1.1250 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1251 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
  1.1252 +  __ addptr(rsp, 4 * wordSize);
  1.1253 +}
  1.1254 +
  1.1255 +
  1.1256 +void TemplateTable::lshl() {
  1.1257 +  transition(itos, ltos);
  1.1258 +  __ movl(rcx, rax);                             // get shift count
  1.1259 +  __ pop_l(rax, rdx);                            // get shift value
  1.1260 +  __ lshl(rdx, rax);
  1.1261 +}
  1.1262 +
  1.1263 +
  1.1264 +void TemplateTable::lshr() {
  1.1265 +  transition(itos, ltos);
  1.1266 +  __ mov(rcx, rax);                              // get shift count
  1.1267 +  __ pop_l(rax, rdx);                            // get shift value
  1.1268 +  __ lshr(rdx, rax, true);
  1.1269 +}
  1.1270 +
  1.1271 +
  1.1272 +void TemplateTable::lushr() {
  1.1273 +  transition(itos, ltos);
  1.1274 +  __ mov(rcx, rax);                              // get shift count
  1.1275 +  __ pop_l(rax, rdx);                            // get shift value
  1.1276 +  __ lshr(rdx, rax);
  1.1277 +}
  1.1278 +
  1.1279 +
  1.1280 +void TemplateTable::fop2(Operation op) {
  1.1281 +  transition(ftos, ftos);
  1.1282 +  switch (op) {
  1.1283 +    case add: __ fadd_s (at_rsp());                break;
  1.1284 +    case sub: __ fsubr_s(at_rsp());                break;
  1.1285 +    case mul: __ fmul_s (at_rsp());                break;
  1.1286 +    case div: __ fdivr_s(at_rsp());                break;
  1.1287 +    case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
  1.1288 +    default : ShouldNotReachHere();
  1.1289 +  }
  1.1290 +  __ f2ieee();
  1.1291 +  __ pop(rax);  // pop float thing off
  1.1292 +}
  1.1293 +
  1.1294 +
  1.1295 +void TemplateTable::dop2(Operation op) {
  1.1296 +  transition(dtos, dtos);
  1.1297 +
  1.1298 +  switch (op) {
  1.1299 +    case add: __ fadd_d (at_rsp());                break;
  1.1300 +    case sub: __ fsubr_d(at_rsp());                break;
  1.1301 +    case mul: {
  1.1302 +      Label L_strict;
  1.1303 +      Label L_join;
  1.1304 +      const Address access_flags      (rcx, Method::access_flags_offset());
  1.1305 +      __ get_method(rcx);
  1.1306 +      __ movl(rcx, access_flags);
  1.1307 +      __ testl(rcx, JVM_ACC_STRICT);
  1.1308 +      __ jccb(Assembler::notZero, L_strict);
  1.1309 +      __ fmul_d (at_rsp());
  1.1310 +      __ jmpb(L_join);
  1.1311 +      __ bind(L_strict);
  1.1312 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
  1.1313 +      __ fmulp();
  1.1314 +      __ fmul_d (at_rsp());
  1.1315 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
  1.1316 +      __ fmulp();
  1.1317 +      __ bind(L_join);
  1.1318 +      break;
  1.1319 +    }
  1.1320 +    case div: {
  1.1321 +      Label L_strict;
  1.1322 +      Label L_join;
  1.1323 +      const Address access_flags      (rcx, Method::access_flags_offset());
  1.1324 +      __ get_method(rcx);
  1.1325 +      __ movl(rcx, access_flags);
  1.1326 +      __ testl(rcx, JVM_ACC_STRICT);
  1.1327 +      __ jccb(Assembler::notZero, L_strict);
  1.1328 +      __ fdivr_d(at_rsp());
  1.1329 +      __ jmp(L_join);
  1.1330 +      __ bind(L_strict);
  1.1331 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
  1.1332 +      __ fmul_d (at_rsp());
  1.1333 +      __ fdivrp();
  1.1334 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
  1.1335 +      __ fmulp();
  1.1336 +      __ bind(L_join);
  1.1337 +      break;
  1.1338 +    }
  1.1339 +    case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
  1.1340 +    default : ShouldNotReachHere();
  1.1341 +  }
  1.1342 +  __ d2ieee();
  1.1343 +  // Pop double precision number from rsp.
  1.1344 +  __ pop(rax);
  1.1345 +  __ pop(rdx);
  1.1346 +}
  1.1347 +
  1.1348 +
  1.1349 +void TemplateTable::ineg() {
  1.1350 +  transition(itos, itos);
  1.1351 +  __ negl(rax);
  1.1352 +}
  1.1353 +
  1.1354 +
  1.1355 +void TemplateTable::lneg() {
  1.1356 +  transition(ltos, ltos);
  1.1357 +  __ lneg(rdx, rax);
  1.1358 +}
  1.1359 +
  1.1360 +
  1.1361 +void TemplateTable::fneg() {
  1.1362 +  transition(ftos, ftos);
  1.1363 +  __ fchs();
  1.1364 +}
  1.1365 +
  1.1366 +
  1.1367 +void TemplateTable::dneg() {
  1.1368 +  transition(dtos, dtos);
  1.1369 +  __ fchs();
  1.1370 +}
  1.1371 +
  1.1372 +
  1.1373 +void TemplateTable::iinc() {
  1.1374 +  transition(vtos, vtos);
  1.1375 +  __ load_signed_byte(rdx, at_bcp(2));           // get constant
  1.1376 +  locals_index(rbx);
  1.1377 +  __ addl(iaddress(rbx), rdx);
  1.1378 +}
  1.1379 +
  1.1380 +
  1.1381 +void TemplateTable::wide_iinc() {
  1.1382 +  transition(vtos, vtos);
  1.1383 +  __ movl(rdx, at_bcp(4));                       // get constant
  1.1384 +  locals_index_wide(rbx);
  1.1385 +  __ bswapl(rdx);                                 // swap bytes & sign-extend constant
  1.1386 +  __ sarl(rdx, 16);
  1.1387 +  __ addl(iaddress(rbx), rdx);
  1.1388 +  // Note: should probably use only one movl to get both
  1.1389 +  //       the index and the constant -> fix this
  1.1390 +}
  1.1391 +
  1.1392 +
  1.1393 +void TemplateTable::convert() {
  1.1394 +  // Checking
  1.1395 +#ifdef ASSERT
  1.1396 +  { TosState tos_in  = ilgl;
  1.1397 +    TosState tos_out = ilgl;
  1.1398 +    switch (bytecode()) {
  1.1399 +      case Bytecodes::_i2l: // fall through
  1.1400 +      case Bytecodes::_i2f: // fall through
  1.1401 +      case Bytecodes::_i2d: // fall through
  1.1402 +      case Bytecodes::_i2b: // fall through
  1.1403 +      case Bytecodes::_i2c: // fall through
  1.1404 +      case Bytecodes::_i2s: tos_in = itos; break;
  1.1405 +      case Bytecodes::_l2i: // fall through
  1.1406 +      case Bytecodes::_l2f: // fall through
  1.1407 +      case Bytecodes::_l2d: tos_in = ltos; break;
  1.1408 +      case Bytecodes::_f2i: // fall through
  1.1409 +      case Bytecodes::_f2l: // fall through
  1.1410 +      case Bytecodes::_f2d: tos_in = ftos; break;
  1.1411 +      case Bytecodes::_d2i: // fall through
  1.1412 +      case Bytecodes::_d2l: // fall through
  1.1413 +      case Bytecodes::_d2f: tos_in = dtos; break;
  1.1414 +      default             : ShouldNotReachHere();
  1.1415 +    }
  1.1416 +    switch (bytecode()) {
  1.1417 +      case Bytecodes::_l2i: // fall through
  1.1418 +      case Bytecodes::_f2i: // fall through
  1.1419 +      case Bytecodes::_d2i: // fall through
  1.1420 +      case Bytecodes::_i2b: // fall through
  1.1421 +      case Bytecodes::_i2c: // fall through
  1.1422 +      case Bytecodes::_i2s: tos_out = itos; break;
  1.1423 +      case Bytecodes::_i2l: // fall through
  1.1424 +      case Bytecodes::_f2l: // fall through
  1.1425 +      case Bytecodes::_d2l: tos_out = ltos; break;
  1.1426 +      case Bytecodes::_i2f: // fall through
  1.1427 +      case Bytecodes::_l2f: // fall through
  1.1428 +      case Bytecodes::_d2f: tos_out = ftos; break;
  1.1429 +      case Bytecodes::_i2d: // fall through
  1.1430 +      case Bytecodes::_l2d: // fall through
  1.1431 +      case Bytecodes::_f2d: tos_out = dtos; break;
  1.1432 +      default             : ShouldNotReachHere();
  1.1433 +    }
  1.1434 +    transition(tos_in, tos_out);
  1.1435 +  }
  1.1436 +#endif // ASSERT
  1.1437 +
  1.1438 +  // Conversion
  1.1439 +  // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation)
  1.1440 +  switch (bytecode()) {
  1.1441 +    case Bytecodes::_i2l:
  1.1442 +      __ extend_sign(rdx, rax);
  1.1443 +      break;
  1.1444 +    case Bytecodes::_i2f:
  1.1445 +      __ push(rax);          // store int on tos
  1.1446 +      __ fild_s(at_rsp());   // load int to ST0
  1.1447 +      __ f2ieee();           // truncate to float size
  1.1448 +      __ pop(rcx);           // adjust rsp
  1.1449 +      break;
  1.1450 +    case Bytecodes::_i2d:
  1.1451 +      __ push(rax);          // add one slot for d2ieee()
  1.1452 +      __ push(rax);          // store int on tos
  1.1453 +      __ fild_s(at_rsp());   // load int to ST0
  1.1454 +      __ d2ieee();           // truncate to double size
  1.1455 +      __ pop(rcx);           // adjust rsp
  1.1456 +      __ pop(rcx);
  1.1457 +      break;
  1.1458 +    case Bytecodes::_i2b:
  1.1459 +      __ shll(rax, 24);      // truncate upper 24 bits
  1.1460 +      __ sarl(rax, 24);      // and sign-extend byte
  1.1461 +      LP64_ONLY(__ movsbl(rax, rax));
  1.1462 +      break;
  1.1463 +    case Bytecodes::_i2c:
  1.1464 +      __ andl(rax, 0xFFFF);  // truncate upper 16 bits
  1.1465 +      LP64_ONLY(__ movzwl(rax, rax));
  1.1466 +      break;
  1.1467 +    case Bytecodes::_i2s:
  1.1468 +      __ shll(rax, 16);      // truncate upper 16 bits
  1.1469 +      __ sarl(rax, 16);      // and sign-extend short
  1.1470 +      LP64_ONLY(__ movswl(rax, rax));
  1.1471 +      break;
  1.1472 +    case Bytecodes::_l2i:
  1.1473 +      /* nothing to do */
  1.1474 +      break;
  1.1475 +    case Bytecodes::_l2f:
  1.1476 +      __ push(rdx);          // store long on tos
  1.1477 +      __ push(rax);
  1.1478 +      __ fild_d(at_rsp());   // load long to ST0
  1.1479 +      __ f2ieee();           // truncate to float size
  1.1480 +      __ pop(rcx);           // adjust rsp
  1.1481 +      __ pop(rcx);
  1.1482 +      break;
  1.1483 +    case Bytecodes::_l2d:
  1.1484 +      __ push(rdx);          // store long on tos
  1.1485 +      __ push(rax);
  1.1486 +      __ fild_d(at_rsp());   // load long to ST0
  1.1487 +      __ d2ieee();           // truncate to double size
  1.1488 +      __ pop(rcx);           // adjust rsp
  1.1489 +      __ pop(rcx);
  1.1490 +      break;
  1.1491 +    case Bytecodes::_f2i:
  1.1492 +      __ push(rcx);          // reserve space for argument
  1.1493 +      __ fstp_s(at_rsp());   // pass float argument on stack
  1.1494 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
  1.1495 +      break;
  1.1496 +    case Bytecodes::_f2l:
  1.1497 +      __ push(rcx);          // reserve space for argument
  1.1498 +      __ fstp_s(at_rsp());   // pass float argument on stack
  1.1499 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
  1.1500 +      break;
  1.1501 +    case Bytecodes::_f2d:
  1.1502 +      /* nothing to do */
  1.1503 +      break;
  1.1504 +    case Bytecodes::_d2i:
  1.1505 +      __ push(rcx);          // reserve space for argument
  1.1506 +      __ push(rcx);
  1.1507 +      __ fstp_d(at_rsp());   // pass double argument on stack
  1.1508 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
  1.1509 +      break;
  1.1510 +    case Bytecodes::_d2l:
  1.1511 +      __ push(rcx);          // reserve space for argument
  1.1512 +      __ push(rcx);
  1.1513 +      __ fstp_d(at_rsp());   // pass double argument on stack
  1.1514 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
  1.1515 +      break;
  1.1516 +    case Bytecodes::_d2f:
  1.1517 +      __ push(rcx);          // reserve space for f2ieee()
  1.1518 +      __ f2ieee();           // truncate to float size
  1.1519 +      __ pop(rcx);           // adjust rsp
  1.1520 +      break;
  1.1521 +    default             :
  1.1522 +      ShouldNotReachHere();
  1.1523 +  }
  1.1524 +}
  1.1525 +
  1.1526 +
  1.1527 +void TemplateTable::lcmp() {
  1.1528 +  transition(ltos, itos);
  1.1529 +  // y = rdx:rax
  1.1530 +  __ pop_l(rbx, rcx);             // get x = rcx:rbx
  1.1531 +  __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
  1.1532 +  __ mov(rax, rcx);
  1.1533 +}
  1.1534 +
  1.1535 +
  1.1536 +void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1.1537 +  if (is_float) {
  1.1538 +    __ fld_s(at_rsp());
  1.1539 +  } else {
  1.1540 +    __ fld_d(at_rsp());
  1.1541 +    __ pop(rdx);
  1.1542 +  }
  1.1543 +  __ pop(rcx);
  1.1544 +  __ fcmp2int(rax, unordered_result < 0);
  1.1545 +}
  1.1546 +
  1.1547 +
  1.1548 +void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1.1549 +  __ get_method(rcx);           // ECX holds method
  1.1550 +  __ profile_taken_branch(rax,rbx); // EAX holds updated MDP, EBX holds bumped taken count
  1.1551 +
  1.1552 +  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
  1.1553 +                             InvocationCounter::counter_offset();
  1.1554 +  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
  1.1555 +                              InvocationCounter::counter_offset();
  1.1556 +
  1.1557 +  // Load up EDX with the branch displacement
  1.1558 +  if (is_wide) {
  1.1559 +    __ movl(rdx, at_bcp(1));
  1.1560 +  } else {
  1.1561 +    __ load_signed_short(rdx, at_bcp(1));
  1.1562 +  }
  1.1563 +  __ bswapl(rdx);
  1.1564 +  if (!is_wide) __ sarl(rdx, 16);
  1.1565 +  LP64_ONLY(__ movslq(rdx, rdx));
  1.1566 +
  1.1567 +
  1.1568 +  // Handle all the JSR stuff here, then exit.
  1.1569 +  // It's much shorter and cleaner than intermingling with the
  1.1570 +  // non-JSR normal-branch stuff occurring below.
  1.1571 +  if (is_jsr) {
  1.1572 +    // Pre-load the next target bytecode into EBX
  1.1573 +    __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1, 0));
  1.1574 +
  1.1575 +    // compute return address as bci in rax,
  1.1576 +    __ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(ConstMethod::codes_offset())));
  1.1577 +    __ subptr(rax, Address(rcx, Method::const_offset()));
  1.1578 +    // Adjust the bcp in RSI by the displacement in EDX
  1.1579 +    __ addptr(rsi, rdx);
  1.1580 +    // Push return address
  1.1581 +    __ push_i(rax);
  1.1582 +    // jsr returns vtos
  1.1583 +    __ dispatch_only_noverify(vtos);
  1.1584 +    return;
  1.1585 +  }
  1.1586 +
  1.1587 +  // Normal (non-jsr) branch handling
  1.1588 +
  1.1589 +  // Adjust the bcp in RSI by the displacement in EDX
  1.1590 +  __ addptr(rsi, rdx);
  1.1591 +
  1.1592 +  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
  1.1593 +  Label backedge_counter_overflow;
  1.1594 +  Label profile_method;
  1.1595 +  Label dispatch;
  1.1596 +  if (UseLoopCounter) {
  1.1597 +    // increment backedge counter for backward branches
  1.1598 +    // rax,: MDO
  1.1599 +    // rbx,: MDO bumped taken-count
  1.1600 +    // rcx: method
  1.1601 +    // rdx: target offset
  1.1602 +    // rsi: target bcp
  1.1603 +    // rdi: locals pointer
  1.1604 +    __ testl(rdx, rdx);             // check if forward or backward branch
  1.1605 +    __ jcc(Assembler::positive, dispatch); // count only if backward branch
  1.1606 +
  1.1607 +    // check if MethodCounters exists
  1.1608 +    Label has_counters;
  1.1609 +    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
  1.1610 +    __ testptr(rax, rax);
  1.1611 +    __ jcc(Assembler::notZero, has_counters);
  1.1612 +    __ push(rdx);
  1.1613 +    __ push(rcx);
  1.1614 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
  1.1615 +               rcx);
  1.1616 +    __ pop(rcx);
  1.1617 +    __ pop(rdx);
  1.1618 +    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
  1.1619 +    __ testptr(rax, rax);
  1.1620 +    __ jcc(Assembler::zero, dispatch);
  1.1621 +    __ bind(has_counters);
  1.1622 +
  1.1623 +    if (TieredCompilation) {
  1.1624 +      Label no_mdo;
  1.1625 +      int increment = InvocationCounter::count_increment;
  1.1626 +      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
  1.1627 +      if (ProfileInterpreter) {
  1.1628 +        // Are we profiling?
  1.1629 +        __ movptr(rbx, Address(rcx, in_bytes(Method::method_data_offset())));
  1.1630 +        __ testptr(rbx, rbx);
  1.1631 +        __ jccb(Assembler::zero, no_mdo);
  1.1632 +        // Increment the MDO backedge counter
  1.1633 +        const Address mdo_backedge_counter(rbx, in_bytes(MethodData::backedge_counter_offset()) +
  1.1634 +                                                in_bytes(InvocationCounter::counter_offset()));
  1.1635 +        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
  1.1636 +                                   rax, false, Assembler::zero, &backedge_counter_overflow);
  1.1637 +        __ jmp(dispatch);
  1.1638 +      }
  1.1639 +      __ bind(no_mdo);
  1.1640 +      // Increment backedge counter in MethodCounters*
  1.1641 +      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
  1.1642 +      __ increment_mask_and_jump(Address(rcx, be_offset), increment, mask,
  1.1643 +                                 rax, false, Assembler::zero, &backedge_counter_overflow);
  1.1644 +    } else {
  1.1645 +      // increment counter
  1.1646 +      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
  1.1647 +      __ movl(rax, Address(rcx, be_offset));        // load backedge counter
  1.1648 +      __ incrementl(rax, InvocationCounter::count_increment); // increment counter
  1.1649 +      __ movl(Address(rcx, be_offset), rax);        // store counter
  1.1650 +
  1.1651 +      __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
  1.1652 +
  1.1653 +      __ andl(rax, InvocationCounter::count_mask_value);     // and the status bits
  1.1654 +      __ addl(rax, Address(rcx, be_offset));        // add both counters
  1.1655 +
  1.1656 +      if (ProfileInterpreter) {
  1.1657 +        // Test to see if we should create a method data oop
  1.1658 +        __ cmp32(rax,
  1.1659 +                 ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
  1.1660 +        __ jcc(Assembler::less, dispatch);
  1.1661 +
  1.1662 +        // if no method data exists, go to profile method
  1.1663 +        __ test_method_data_pointer(rax, profile_method);
  1.1664 +
  1.1665 +        if (UseOnStackReplacement) {
  1.1666 +          // check for overflow against rbx, which is the MDO taken count
  1.1667 +          __ cmp32(rbx,
  1.1668 +                   ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1669 +          __ jcc(Assembler::below, dispatch);
  1.1670 +
  1.1671 +          // When ProfileInterpreter is on, the backedge_count comes from the
  1.1672 +          // MethodData*, which value does not get reset on the call to
  1.1673 +          // frequency_counter_overflow().  To avoid excessive calls to the overflow
  1.1674 +          // routine while the method is being compiled, add a second test to make
  1.1675 +          // sure the overflow function is called only once every overflow_frequency.
  1.1676 +          const int overflow_frequency = 1024;
  1.1677 +          __ andptr(rbx, overflow_frequency-1);
  1.1678 +          __ jcc(Assembler::zero, backedge_counter_overflow);
  1.1679 +        }
  1.1680 +      } else {
  1.1681 +        if (UseOnStackReplacement) {
  1.1682 +          // check for overflow against rax, which is the sum of the counters
  1.1683 +          __ cmp32(rax,
  1.1684 +                   ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1685 +          __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
  1.1686 +
  1.1687 +        }
  1.1688 +      }
  1.1689 +    }
  1.1690 +    __ bind(dispatch);
  1.1691 +  }
  1.1692 +
  1.1693 +  // Pre-load the next target bytecode into EBX
  1.1694 +  __ load_unsigned_byte(rbx, Address(rsi, 0));
  1.1695 +
  1.1696 +  // continue with the bytecode @ target
  1.1697 +  // rax,: return bci for jsr's, unused otherwise
  1.1698 +  // rbx,: target bytecode
  1.1699 +  // rsi: target bcp
  1.1700 +  __ dispatch_only(vtos);
  1.1701 +
  1.1702 +  if (UseLoopCounter) {
  1.1703 +    if (ProfileInterpreter) {
  1.1704 +      // Out-of-line code to allocate method data oop.
  1.1705 +      __ bind(profile_method);
  1.1706 +      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  1.1707 +      __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
  1.1708 +      __ set_method_data_pointer_for_bcp();
  1.1709 +      __ jmp(dispatch);
  1.1710 +    }
  1.1711 +
  1.1712 +    if (UseOnStackReplacement) {
  1.1713 +
  1.1714 +      // invocation counter overflow
  1.1715 +      __ bind(backedge_counter_overflow);
  1.1716 +      __ negptr(rdx);
  1.1717 +      __ addptr(rdx, rsi);        // branch bcp
  1.1718 +      call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rdx);
  1.1719 +      __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
  1.1720 +
  1.1721 +      // rax,: osr nmethod (osr ok) or NULL (osr not possible)
  1.1722 +      // rbx,: target bytecode
  1.1723 +      // rdx: scratch
  1.1724 +      // rdi: locals pointer
  1.1725 +      // rsi: bcp
  1.1726 +      __ testptr(rax, rax);                      // test result
  1.1727 +      __ jcc(Assembler::zero, dispatch);         // no osr if null
  1.1728 +      // nmethod may have been invalidated (VM may block upon call_VM return)
  1.1729 +      __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
  1.1730 +      __ cmpl(rcx, InvalidOSREntryBci);
  1.1731 +      __ jcc(Assembler::equal, dispatch);
  1.1732 +
  1.1733 +      // We have the address of an on stack replacement routine in rax,
  1.1734 +      // We need to prepare to execute the OSR method. First we must
  1.1735 +      // migrate the locals and monitors off of the stack.
  1.1736 +
  1.1737 +      __ mov(rbx, rax);                             // save the nmethod
  1.1738 +
  1.1739 +      const Register thread = rcx;
  1.1740 +      __ get_thread(thread);
  1.1741 +      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
  1.1742 +      // rax, is OSR buffer, move it to expected parameter location
  1.1743 +      __ mov(rcx, rax);
  1.1744 +
  1.1745 +      // pop the interpreter frame
  1.1746 +      __ movptr(rdx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
  1.1747 +      __ leave();                                // remove frame anchor
  1.1748 +      __ pop(rdi);                               // get return address
  1.1749 +      __ mov(rsp, rdx);                          // set sp to sender sp
  1.1750 +
  1.1751 +      // Align stack pointer for compiled code (note that caller is
  1.1752 +      // responsible for undoing this fixup by remembering the old SP
  1.1753 +      // in an rbp,-relative location)
  1.1754 +      __ andptr(rsp, -(StackAlignmentInBytes));
  1.1755 +
  1.1756 +      // push the (possibly adjusted) return address
  1.1757 +      __ push(rdi);
  1.1758 +
  1.1759 +      // and begin the OSR nmethod
  1.1760 +      __ jmp(Address(rbx, nmethod::osr_entry_point_offset()));
  1.1761 +    }
  1.1762 +  }
  1.1763 +}
  1.1764 +
  1.1765 +
  1.1766 +void TemplateTable::if_0cmp(Condition cc) {
  1.1767 +  transition(itos, vtos);
  1.1768 +  // assume branch is more often taken than not (loops use backward branches)
  1.1769 +  Label not_taken;
  1.1770 +  __ testl(rax, rax);
  1.1771 +  __ jcc(j_not(cc), not_taken);
  1.1772 +  branch(false, false);
  1.1773 +  __ bind(not_taken);
  1.1774 +  __ profile_not_taken_branch(rax);
  1.1775 +}
  1.1776 +
  1.1777 +
  1.1778 +void TemplateTable::if_icmp(Condition cc) {
  1.1779 +  transition(itos, vtos);
  1.1780 +  // assume branch is more often taken than not (loops use backward branches)
  1.1781 +  Label not_taken;
  1.1782 +  __ pop_i(rdx);
  1.1783 +  __ cmpl(rdx, rax);
  1.1784 +  __ jcc(j_not(cc), not_taken);
  1.1785 +  branch(false, false);
  1.1786 +  __ bind(not_taken);
  1.1787 +  __ profile_not_taken_branch(rax);
  1.1788 +}
  1.1789 +
  1.1790 +
  1.1791 +void TemplateTable::if_nullcmp(Condition cc) {
  1.1792 +  transition(atos, vtos);
  1.1793 +  // assume branch is more often taken than not (loops use backward branches)
  1.1794 +  Label not_taken;
  1.1795 +  __ testptr(rax, rax);
  1.1796 +  __ jcc(j_not(cc), not_taken);
  1.1797 +  branch(false, false);
  1.1798 +  __ bind(not_taken);
  1.1799 +  __ profile_not_taken_branch(rax);
  1.1800 +}
  1.1801 +
  1.1802 +
  1.1803 +void TemplateTable::if_acmp(Condition cc) {
  1.1804 +  transition(atos, vtos);
  1.1805 +  // assume branch is more often taken than not (loops use backward branches)
  1.1806 +  Label not_taken;
  1.1807 +  __ pop_ptr(rdx);
  1.1808 +  __ cmpptr(rdx, rax);
  1.1809 +  __ jcc(j_not(cc), not_taken);
  1.1810 +  branch(false, false);
  1.1811 +  __ bind(not_taken);
  1.1812 +  __ profile_not_taken_branch(rax);
  1.1813 +}
  1.1814 +
  1.1815 +
  1.1816 +void TemplateTable::ret() {
  1.1817 +  transition(vtos, vtos);
  1.1818 +  locals_index(rbx);
  1.1819 +  __ movptr(rbx, iaddress(rbx));                   // get return bci, compute return bcp
  1.1820 +  __ profile_ret(rbx, rcx);
  1.1821 +  __ get_method(rax);
  1.1822 +  __ movptr(rsi, Address(rax, Method::const_offset()));
  1.1823 +  __ lea(rsi, Address(rsi, rbx, Address::times_1,
  1.1824 +                      ConstMethod::codes_offset()));
  1.1825 +  __ dispatch_next(vtos);
  1.1826 +}
  1.1827 +
  1.1828 +
  1.1829 +void TemplateTable::wide_ret() {
  1.1830 +  transition(vtos, vtos);
  1.1831 +  locals_index_wide(rbx);
  1.1832 +  __ movptr(rbx, iaddress(rbx));                   // get return bci, compute return bcp
  1.1833 +  __ profile_ret(rbx, rcx);
  1.1834 +  __ get_method(rax);
  1.1835 +  __ movptr(rsi, Address(rax, Method::const_offset()));
  1.1836 +  __ lea(rsi, Address(rsi, rbx, Address::times_1, ConstMethod::codes_offset()));
  1.1837 +  __ dispatch_next(vtos);
  1.1838 +}
  1.1839 +
  1.1840 +
  1.1841 +void TemplateTable::tableswitch() {
  1.1842 +  Label default_case, continue_execution;
  1.1843 +  transition(itos, vtos);
  1.1844 +  // align rsi
  1.1845 +  __ lea(rbx, at_bcp(wordSize));
  1.1846 +  __ andptr(rbx, -wordSize);
  1.1847 +  // load lo & hi
  1.1848 +  __ movl(rcx, Address(rbx, 1 * wordSize));
  1.1849 +  __ movl(rdx, Address(rbx, 2 * wordSize));
  1.1850 +  __ bswapl(rcx);
  1.1851 +  __ bswapl(rdx);
  1.1852 +  // check against lo & hi
  1.1853 +  __ cmpl(rax, rcx);
  1.1854 +  __ jccb(Assembler::less, default_case);
  1.1855 +  __ cmpl(rax, rdx);
  1.1856 +  __ jccb(Assembler::greater, default_case);
  1.1857 +  // lookup dispatch offset
  1.1858 +  __ subl(rax, rcx);
  1.1859 +  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
  1.1860 +  __ profile_switch_case(rax, rbx, rcx);
  1.1861 +  // continue execution
  1.1862 +  __ bind(continue_execution);
  1.1863 +  __ bswapl(rdx);
  1.1864 +  __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
  1.1865 +  __ addptr(rsi, rdx);
  1.1866 +  __ dispatch_only(vtos);
  1.1867 +  // handle default
  1.1868 +  __ bind(default_case);
  1.1869 +  __ profile_switch_default(rax);
  1.1870 +  __ movl(rdx, Address(rbx, 0));
  1.1871 +  __ jmp(continue_execution);
  1.1872 +}
  1.1873 +
  1.1874 +
  1.1875 +void TemplateTable::lookupswitch() {
  1.1876 +  transition(itos, itos);
  1.1877 +  __ stop("lookupswitch bytecode should have been rewritten");
  1.1878 +}
  1.1879 +
  1.1880 +
  1.1881 +void TemplateTable::fast_linearswitch() {
  1.1882 +  transition(itos, vtos);
  1.1883 +  Label loop_entry, loop, found, continue_execution;
  1.1884 +  // bswapl rax, so we can avoid bswapping the table entries
  1.1885 +  __ bswapl(rax);
  1.1886 +  // align rsi
  1.1887 +  __ lea(rbx, at_bcp(wordSize));                // btw: should be able to get rid of this instruction (change offsets below)
  1.1888 +  __ andptr(rbx, -wordSize);
  1.1889 +  // set counter
  1.1890 +  __ movl(rcx, Address(rbx, wordSize));
  1.1891 +  __ bswapl(rcx);
  1.1892 +  __ jmpb(loop_entry);
  1.1893 +  // table search
  1.1894 +  __ bind(loop);
  1.1895 +  __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * wordSize));
  1.1896 +  __ jccb(Assembler::equal, found);
  1.1897 +  __ bind(loop_entry);
  1.1898 +  __ decrementl(rcx);
  1.1899 +  __ jcc(Assembler::greaterEqual, loop);
  1.1900 +  // default case
  1.1901 +  __ profile_switch_default(rax);
  1.1902 +  __ movl(rdx, Address(rbx, 0));
  1.1903 +  __ jmpb(continue_execution);
  1.1904 +  // entry found -> get offset
  1.1905 +  __ bind(found);
  1.1906 +  __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * wordSize));
  1.1907 +  __ profile_switch_case(rcx, rax, rbx);
  1.1908 +  // continue execution
  1.1909 +  __ bind(continue_execution);
  1.1910 +  __ bswapl(rdx);
  1.1911 +  __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
  1.1912 +  __ addptr(rsi, rdx);
  1.1913 +  __ dispatch_only(vtos);
  1.1914 +}
  1.1915 +
  1.1916 +
  1.1917 +void TemplateTable::fast_binaryswitch() {
  1.1918 +  transition(itos, vtos);
  1.1919 +  // Implementation using the following core algorithm:
  1.1920 +  //
  1.1921 +  // int binary_search(int key, LookupswitchPair* array, int n) {
  1.1922 +  //   // Binary search according to "Methodik des Programmierens" by
  1.1923 +  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  1.1924 +  //   int i = 0;
  1.1925 +  //   int j = n;
  1.1926 +  //   while (i+1 < j) {
  1.1927 +  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  1.1928 +  //     // with      Q: for all i: 0 <= i < n: key < a[i]
  1.1929 +  //     // where a stands for the array and assuming that the (inexisting)
  1.1930 +  //     // element a[n] is infinitely big.
  1.1931 +  //     int h = (i + j) >> 1;
  1.1932 +  //     // i < h < j
  1.1933 +  //     if (key < array[h].fast_match()) {
  1.1934 +  //       j = h;
  1.1935 +  //     } else {
  1.1936 +  //       i = h;
  1.1937 +  //     }
  1.1938 +  //   }
  1.1939 +  //   // R: a[i] <= key < a[i+1] or Q
  1.1940 +  //   // (i.e., if key is within array, i is the correct index)
  1.1941 +  //   return i;
  1.1942 +  // }
  1.1943 +
  1.1944 +  // register allocation
  1.1945 +  const Register key   = rax;                    // already set (tosca)
  1.1946 +  const Register array = rbx;
  1.1947 +  const Register i     = rcx;
  1.1948 +  const Register j     = rdx;
  1.1949 +  const Register h     = rdi;                    // needs to be restored
  1.1950 +  const Register temp  = rsi;
  1.1951 +  // setup array
  1.1952 +  __ save_bcp();
  1.1953 +
  1.1954 +  __ lea(array, at_bcp(3*wordSize));             // btw: should be able to get rid of this instruction (change offsets below)
  1.1955 +  __ andptr(array, -wordSize);
  1.1956 +  // initialize i & j
  1.1957 +  __ xorl(i, i);                                 // i = 0;
  1.1958 +  __ movl(j, Address(array, -wordSize));         // j = length(array);
  1.1959 +  // Convert j into native byteordering
  1.1960 +  __ bswapl(j);
  1.1961 +  // and start
  1.1962 +  Label entry;
  1.1963 +  __ jmp(entry);
  1.1964 +
  1.1965 +  // binary search loop
  1.1966 +  { Label loop;
  1.1967 +    __ bind(loop);
  1.1968 +    // int h = (i + j) >> 1;
  1.1969 +    __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
  1.1970 +    __ sarl(h, 1);                               // h = (i + j) >> 1;
  1.1971 +    // if (key < array[h].fast_match()) {
  1.1972 +    //   j = h;
  1.1973 +    // } else {
  1.1974 +    //   i = h;
  1.1975 +    // }
  1.1976 +    // Convert array[h].match to native byte-ordering before compare
  1.1977 +    __ movl(temp, Address(array, h, Address::times_8, 0*wordSize));
  1.1978 +    __ bswapl(temp);
  1.1979 +    __ cmpl(key, temp);
  1.1980 +    // j = h if (key <  array[h].fast_match())
  1.1981 +    __ cmov32(Assembler::less        , j, h);
  1.1982 +    // i = h if (key >= array[h].fast_match())
  1.1983 +    __ cmov32(Assembler::greaterEqual, i, h);
  1.1984 +    // while (i+1 < j)
  1.1985 +    __ bind(entry);
  1.1986 +    __ leal(h, Address(i, 1));                   // i+1
  1.1987 +    __ cmpl(h, j);                               // i+1 < j
  1.1988 +    __ jcc(Assembler::less, loop);
  1.1989 +  }
  1.1990 +
  1.1991 +  // end of binary search, result index is i (must check again!)
  1.1992 +  Label default_case;
  1.1993 +  // Convert array[i].match to native byte-ordering before compare
  1.1994 +  __ movl(temp, Address(array, i, Address::times_8, 0*wordSize));
  1.1995 +  __ bswapl(temp);
  1.1996 +  __ cmpl(key, temp);
  1.1997 +  __ jcc(Assembler::notEqual, default_case);
  1.1998 +
  1.1999 +  // entry found -> j = offset
  1.2000 +  __ movl(j , Address(array, i, Address::times_8, 1*wordSize));
  1.2001 +  __ profile_switch_case(i, key, array);
  1.2002 +  __ bswapl(j);
  1.2003 +  LP64_ONLY(__ movslq(j, j));
  1.2004 +  __ restore_bcp();
  1.2005 +  __ restore_locals();                           // restore rdi
  1.2006 +  __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
  1.2007 +
  1.2008 +  __ addptr(rsi, j);
  1.2009 +  __ dispatch_only(vtos);
  1.2010 +
  1.2011 +  // default case -> j = default offset
  1.2012 +  __ bind(default_case);
  1.2013 +  __ profile_switch_default(i);
  1.2014 +  __ movl(j, Address(array, -2*wordSize));
  1.2015 +  __ bswapl(j);
  1.2016 +  LP64_ONLY(__ movslq(j, j));
  1.2017 +  __ restore_bcp();
  1.2018 +  __ restore_locals();                           // restore rdi
  1.2019 +  __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
  1.2020 +  __ addptr(rsi, j);
  1.2021 +  __ dispatch_only(vtos);
  1.2022 +}
  1.2023 +
  1.2024 +
  1.2025 +void TemplateTable::_return(TosState state) {
  1.2026 +  transition(state, state);
  1.2027 +  assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
  1.2028 +
  1.2029 +  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  1.2030 +    assert(state == vtos, "only valid state");
  1.2031 +    __ movptr(rax, aaddress(0));
  1.2032 +    __ load_klass(rdi, rax);
  1.2033 +    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
  1.2034 +    __ testl(rdi, JVM_ACC_HAS_FINALIZER);
  1.2035 +    Label skip_register_finalizer;
  1.2036 +    __ jcc(Assembler::zero, skip_register_finalizer);
  1.2037 +
  1.2038 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), rax);
  1.2039 +
  1.2040 +    __ bind(skip_register_finalizer);
  1.2041 +  }
  1.2042 +
  1.2043 +  __ remove_activation(state, rsi);
  1.2044 +  __ jmp(rsi);
  1.2045 +}
  1.2046 +
  1.2047 +
  1.2048 +// ----------------------------------------------------------------------------
  1.2049 +// Volatile variables demand their effects be made known to all CPU's in
  1.2050 +// order.  Store buffers on most chips allow reads & writes to reorder; the
  1.2051 +// JMM's ReadAfterWrite.java test fails in -Xint mode without some kind of
  1.2052 +// memory barrier (i.e., it's not sufficient that the interpreter does not
  1.2053 +// reorder volatile references, the hardware also must not reorder them).
  1.2054 +//
  1.2055 +// According to the new Java Memory Model (JMM):
  1.2056 +// (1) All volatiles are serialized wrt to each other.
  1.2057 +// ALSO reads & writes act as aquire & release, so:
  1.2058 +// (2) A read cannot let unrelated NON-volatile memory refs that happen after
  1.2059 +// the read float up to before the read.  It's OK for non-volatile memory refs
  1.2060 +// that happen before the volatile read to float down below it.
  1.2061 +// (3) Similar a volatile write cannot let unrelated NON-volatile memory refs
  1.2062 +// that happen BEFORE the write float down to after the write.  It's OK for
  1.2063 +// non-volatile memory refs that happen after the volatile write to float up
  1.2064 +// before it.
  1.2065 +//
  1.2066 +// We only put in barriers around volatile refs (they are expensive), not
  1.2067 +// _between_ memory refs (that would require us to track the flavor of the
  1.2068 +// previous memory refs).  Requirements (2) and (3) require some barriers
  1.2069 +// before volatile stores and after volatile loads.  These nearly cover
  1.2070 +// requirement (1) but miss the volatile-store-volatile-load case.  This final
  1.2071 +// case is placed after volatile-stores although it could just as well go
  1.2072 +// before volatile-loads.
  1.2073 +void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits order_constraint ) {
  1.2074 +  // Helper function to insert a is-volatile test and memory barrier
  1.2075 +  if( !os::is_MP() ) return;    // Not needed on single CPU
  1.2076 +  __ membar(order_constraint);
  1.2077 +}
  1.2078 +
  1.2079 +void TemplateTable::resolve_cache_and_index(int byte_no,
  1.2080 +                                            Register Rcache,
  1.2081 +                                            Register index,
  1.2082 +                                            size_t index_size) {
  1.2083 +  const Register temp = rbx;
  1.2084 +  assert_different_registers(Rcache, index, temp);
  1.2085 +
  1.2086 +  Label resolved;
  1.2087 +    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  1.2088 +    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  1.2089 +    __ cmpl(temp, (int) bytecode());  // have we resolved this bytecode?
  1.2090 +    __ jcc(Assembler::equal, resolved);
  1.2091 +
  1.2092 +  // resolve first time through
  1.2093 +  address entry;
  1.2094 +  switch (bytecode()) {
  1.2095 +    case Bytecodes::_getstatic      : // fall through
  1.2096 +    case Bytecodes::_putstatic      : // fall through
  1.2097 +    case Bytecodes::_getfield       : // fall through
  1.2098 +    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);        break;
  1.2099 +    case Bytecodes::_invokevirtual  : // fall through
  1.2100 +    case Bytecodes::_invokespecial  : // fall through
  1.2101 +    case Bytecodes::_invokestatic   : // fall through
  1.2102 +    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);         break;
  1.2103 +    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);   break;
  1.2104 +    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
  1.2105 +    default:
  1.2106 +      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
  1.2107 +      break;
  1.2108 +  }
  1.2109 +  __ movl(temp, (int)bytecode());
  1.2110 +  __ call_VM(noreg, entry, temp);
  1.2111 +  // Update registers with resolved info
  1.2112 +  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
  1.2113 +  __ bind(resolved);
  1.2114 +}
  1.2115 +
  1.2116 +
  1.2117 +// The cache and index registers must be set before call
  1.2118 +void TemplateTable::load_field_cp_cache_entry(Register obj,
  1.2119 +                                              Register cache,
  1.2120 +                                              Register index,
  1.2121 +                                              Register off,
  1.2122 +                                              Register flags,
  1.2123 +                                              bool is_static = false) {
  1.2124 +  assert_different_registers(cache, index, flags, off);
  1.2125 +
  1.2126 +  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  1.2127 +  // Field offset
  1.2128 +  __ movptr(off, Address(cache, index, Address::times_ptr,
  1.2129 +                         in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())));
  1.2130 +  // Flags
  1.2131 +  __ movl(flags, Address(cache, index, Address::times_ptr,
  1.2132 +           in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())));
  1.2133 +
  1.2134 +  // klass overwrite register
  1.2135 +  if (is_static) {
  1.2136 +    __ movptr(obj, Address(cache, index, Address::times_ptr,
  1.2137 +                           in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())));
  1.2138 +    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
  1.2139 +    __ movptr(obj, Address(obj, mirror_offset));
  1.2140 +  }
  1.2141 +}
  1.2142 +
  1.2143 +void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  1.2144 +                                               Register method,
  1.2145 +                                               Register itable_index,
  1.2146 +                                               Register flags,
  1.2147 +                                               bool is_invokevirtual,
  1.2148 +                                               bool is_invokevfinal, /*unused*/
  1.2149 +                                               bool is_invokedynamic) {
  1.2150 +  // setup registers
  1.2151 +  const Register cache = rcx;
  1.2152 +  const Register index = rdx;
  1.2153 +  assert_different_registers(method, flags);
  1.2154 +  assert_different_registers(method, cache, index);
  1.2155 +  assert_different_registers(itable_index, flags);
  1.2156 +  assert_different_registers(itable_index, cache, index);
  1.2157 +  // determine constant pool cache field offsets
  1.2158 +  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
  1.2159 +  const int method_offset = in_bytes(
  1.2160 +    ConstantPoolCache::base_offset() +
  1.2161 +      ((byte_no == f2_byte)
  1.2162 +       ? ConstantPoolCacheEntry::f2_offset()
  1.2163 +       : ConstantPoolCacheEntry::f1_offset()));
  1.2164 +  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
  1.2165 +                                    ConstantPoolCacheEntry::flags_offset());
  1.2166 +  // access constant pool cache fields
  1.2167 +  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
  1.2168 +                                    ConstantPoolCacheEntry::f2_offset());
  1.2169 +
  1.2170 +  size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
  1.2171 +  resolve_cache_and_index(byte_no, cache, index, index_size);
  1.2172 +    __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
  1.2173 +
  1.2174 +  if (itable_index != noreg) {
  1.2175 +    __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
  1.2176 +  }
  1.2177 +  __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
  1.2178 +}
  1.2179 +
  1.2180 +
  1.2181 +// The registers cache and index expected to be set before call.
  1.2182 +// Correct values of the cache and index registers are preserved.
  1.2183 +void TemplateTable::jvmti_post_field_access(Register cache,
  1.2184 +                                            Register index,
  1.2185 +                                            bool is_static,
  1.2186 +                                            bool has_tos) {
  1.2187 +  if (JvmtiExport::can_post_field_access()) {
  1.2188 +    // Check to see if a field access watch has been set before we take
  1.2189 +    // the time to call into the VM.
  1.2190 +    Label L1;
  1.2191 +    assert_different_registers(cache, index, rax);
  1.2192 +    __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2193 +    __ testl(rax,rax);
  1.2194 +    __ jcc(Assembler::zero, L1);
  1.2195 +
  1.2196 +    // cache entry pointer
  1.2197 +    __ addptr(cache, in_bytes(ConstantPoolCache::base_offset()));
  1.2198 +    __ shll(index, LogBytesPerWord);
  1.2199 +    __ addptr(cache, index);
  1.2200 +    if (is_static) {
  1.2201 +      __ xorptr(rax, rax);      // NULL object reference
  1.2202 +    } else {
  1.2203 +      __ pop(atos);         // Get the object
  1.2204 +      __ verify_oop(rax);
  1.2205 +      __ push(atos);        // Restore stack state
  1.2206 +    }
  1.2207 +    // rax,:   object pointer or NULL
  1.2208 +    // cache: cache entry pointer
  1.2209 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  1.2210 +               rax, cache);
  1.2211 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2212 +    __ bind(L1);
  1.2213 +  }
  1.2214 +}
  1.2215 +
  1.2216 +void TemplateTable::pop_and_check_object(Register r) {
  1.2217 +  __ pop_ptr(r);
  1.2218 +  __ null_check(r);  // for field access must check obj.
  1.2219 +  __ verify_oop(r);
  1.2220 +}
  1.2221 +
  1.2222 +void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  1.2223 +  transition(vtos, vtos);
  1.2224 +
  1.2225 +  const Register cache = rcx;
  1.2226 +  const Register index = rdx;
  1.2227 +  const Register obj   = rcx;
  1.2228 +  const Register off   = rbx;
  1.2229 +  const Register flags = rax;
  1.2230 +
  1.2231 +  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  1.2232 +  jvmti_post_field_access(cache, index, is_static, false);
  1.2233 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2234 +
  1.2235 +  if (!is_static) pop_and_check_object(obj);
  1.2236 +
  1.2237 +  const Address lo(obj, off, Address::times_1, 0*wordSize);
  1.2238 +  const Address hi(obj, off, Address::times_1, 1*wordSize);
  1.2239 +
  1.2240 +  Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  1.2241 +
  1.2242 +  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2243 +  assert(btos == 0, "change code, btos != 0");
  1.2244 +  // btos
  1.2245 +  __ andptr(flags, ConstantPoolCacheEntry::tos_state_mask);
  1.2246 +  __ jcc(Assembler::notZero, notByte);
  1.2247 +
  1.2248 +  __ load_signed_byte(rax, lo );
  1.2249 +  __ push(btos);
  1.2250 +  // Rewrite bytecode to be faster
  1.2251 +  if (!is_static) {
  1.2252 +    patch_bytecode(Bytecodes::_fast_bgetfield, rcx, rbx);
  1.2253 +  }
  1.2254 +  __ jmp(Done);
  1.2255 +
  1.2256 +  __ bind(notByte);
  1.2257 +  // itos
  1.2258 +  __ cmpl(flags, itos );
  1.2259 +  __ jcc(Assembler::notEqual, notInt);
  1.2260 +
  1.2261 +  __ movl(rax, lo );
  1.2262 +  __ push(itos);
  1.2263 +  // Rewrite bytecode to be faster
  1.2264 +  if (!is_static) {
  1.2265 +    patch_bytecode(Bytecodes::_fast_igetfield, rcx, rbx);
  1.2266 +  }
  1.2267 +  __ jmp(Done);
  1.2268 +
  1.2269 +  __ bind(notInt);
  1.2270 +  // atos
  1.2271 +  __ cmpl(flags, atos );
  1.2272 +  __ jcc(Assembler::notEqual, notObj);
  1.2273 +
  1.2274 +  __ movl(rax, lo );
  1.2275 +  __ push(atos);
  1.2276 +  if (!is_static) {
  1.2277 +    patch_bytecode(Bytecodes::_fast_agetfield, rcx, rbx);
  1.2278 +  }
  1.2279 +  __ jmp(Done);
  1.2280 +
  1.2281 +  __ bind(notObj);
  1.2282 +  // ctos
  1.2283 +  __ cmpl(flags, ctos );
  1.2284 +  __ jcc(Assembler::notEqual, notChar);
  1.2285 +
  1.2286 +  __ load_unsigned_short(rax, lo );
  1.2287 +  __ push(ctos);
  1.2288 +  if (!is_static) {
  1.2289 +    patch_bytecode(Bytecodes::_fast_cgetfield, rcx, rbx);
  1.2290 +  }
  1.2291 +  __ jmp(Done);
  1.2292 +
  1.2293 +  __ bind(notChar);
  1.2294 +  // stos
  1.2295 +  __ cmpl(flags, stos );
  1.2296 +  __ jcc(Assembler::notEqual, notShort);
  1.2297 +
  1.2298 +  __ load_signed_short(rax, lo );
  1.2299 +  __ push(stos);
  1.2300 +  if (!is_static) {
  1.2301 +    patch_bytecode(Bytecodes::_fast_sgetfield, rcx, rbx);
  1.2302 +  }
  1.2303 +  __ jmp(Done);
  1.2304 +
  1.2305 +  __ bind(notShort);
  1.2306 +  // ltos
  1.2307 +  __ cmpl(flags, ltos );
  1.2308 +  __ jcc(Assembler::notEqual, notLong);
  1.2309 +
  1.2310 +  // Generate code as if volatile.  There just aren't enough registers to
  1.2311 +  // save that information and this code is faster than the test.
  1.2312 +  __ fild_d(lo);                // Must load atomically
  1.2313 +  __ subptr(rsp,2*wordSize);    // Make space for store
  1.2314 +  __ fistp_d(Address(rsp,0));
  1.2315 +  __ pop(rax);
  1.2316 +  __ pop(rdx);
  1.2317 +
  1.2318 +  __ push(ltos);
  1.2319 +  // Don't rewrite to _fast_lgetfield for potential volatile case.
  1.2320 +  __ jmp(Done);
  1.2321 +
  1.2322 +  __ bind(notLong);
  1.2323 +  // ftos
  1.2324 +  __ cmpl(flags, ftos );
  1.2325 +  __ jcc(Assembler::notEqual, notFloat);
  1.2326 +
  1.2327 +  __ fld_s(lo);
  1.2328 +  __ push(ftos);
  1.2329 +  if (!is_static) {
  1.2330 +    patch_bytecode(Bytecodes::_fast_fgetfield, rcx, rbx);
  1.2331 +  }
  1.2332 +  __ jmp(Done);
  1.2333 +
  1.2334 +  __ bind(notFloat);
  1.2335 +  // dtos
  1.2336 +  __ cmpl(flags, dtos );
  1.2337 +  __ jcc(Assembler::notEqual, notDouble);
  1.2338 +
  1.2339 +  __ fld_d(lo);
  1.2340 +  __ push(dtos);
  1.2341 +  if (!is_static) {
  1.2342 +    patch_bytecode(Bytecodes::_fast_dgetfield, rcx, rbx);
  1.2343 +  }
  1.2344 +  __ jmpb(Done);
  1.2345 +
  1.2346 +  __ bind(notDouble);
  1.2347 +
  1.2348 +  __ stop("Bad state");
  1.2349 +
  1.2350 +  __ bind(Done);
  1.2351 +  // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  1.2352 +  // volatile_barrier( );
  1.2353 +}
  1.2354 +
  1.2355 +
  1.2356 +void TemplateTable::getfield(int byte_no) {
  1.2357 +  getfield_or_static(byte_no, false);
  1.2358 +}
  1.2359 +
  1.2360 +
  1.2361 +void TemplateTable::getstatic(int byte_no) {
  1.2362 +  getfield_or_static(byte_no, true);
  1.2363 +}
  1.2364 +
  1.2365 +// The registers cache and index expected to be set before call.
  1.2366 +// The function may destroy various registers, just not the cache and index registers.
  1.2367 +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  1.2368 +
  1.2369 +  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  1.2370 +
  1.2371 +  if (JvmtiExport::can_post_field_modification()) {
  1.2372 +    // Check to see if a field modification watch has been set before we take
  1.2373 +    // the time to call into the VM.
  1.2374 +    Label L1;
  1.2375 +    assert_different_registers(cache, index, rax);
  1.2376 +    __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2377 +    __ testl(rax, rax);
  1.2378 +    __ jcc(Assembler::zero, L1);
  1.2379 +
  1.2380 +    // The cache and index registers have been already set.
  1.2381 +    // This allows to eliminate this call but the cache and index
  1.2382 +    // registers have to be correspondingly used after this line.
  1.2383 +    __ get_cache_and_index_at_bcp(rax, rdx, 1);
  1.2384 +
  1.2385 +    if (is_static) {
  1.2386 +      // Life is simple.  Null out the object pointer.
  1.2387 +      __ xorptr(rbx, rbx);
  1.2388 +    } else {
  1.2389 +      // Life is harder. The stack holds the value on top, followed by the object.
  1.2390 +      // We don't know the size of the value, though; it could be one or two words
  1.2391 +      // depending on its type. As a result, we must find the type to determine where
  1.2392 +      // the object is.
  1.2393 +      Label two_word, valsize_known;
  1.2394 +      __ movl(rcx, Address(rax, rdx, Address::times_ptr, in_bytes(cp_base_offset +
  1.2395 +                                   ConstantPoolCacheEntry::flags_offset())));
  1.2396 +      __ mov(rbx, rsp);
  1.2397 +      __ shrl(rcx, ConstantPoolCacheEntry::tos_state_shift);
  1.2398 +      // Make sure we don't need to mask rcx after the above shift
  1.2399 +      ConstantPoolCacheEntry::verify_tos_state_shift();
  1.2400 +      __ cmpl(rcx, ltos);
  1.2401 +      __ jccb(Assembler::equal, two_word);
  1.2402 +      __ cmpl(rcx, dtos);
  1.2403 +      __ jccb(Assembler::equal, two_word);
  1.2404 +      __ addptr(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
  1.2405 +      __ jmpb(valsize_known);
  1.2406 +
  1.2407 +      __ bind(two_word);
  1.2408 +      __ addptr(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
  1.2409 +
  1.2410 +      __ bind(valsize_known);
  1.2411 +      // setup object pointer
  1.2412 +      __ movptr(rbx, Address(rbx, 0));
  1.2413 +    }
  1.2414 +    // cache entry pointer
  1.2415 +    __ addptr(rax, in_bytes(cp_base_offset));
  1.2416 +    __ shll(rdx, LogBytesPerWord);
  1.2417 +    __ addptr(rax, rdx);
  1.2418 +    // object (tos)
  1.2419 +    __ mov(rcx, rsp);
  1.2420 +    // rbx,: object pointer set up above (NULL if static)
  1.2421 +    // rax,: cache entry pointer
  1.2422 +    // rcx: jvalue object on the stack
  1.2423 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
  1.2424 +               rbx, rax, rcx);
  1.2425 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2426 +    __ bind(L1);
  1.2427 +  }
  1.2428 +}
  1.2429 +
  1.2430 +
  1.2431 +void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  1.2432 +  transition(vtos, vtos);
  1.2433 +
  1.2434 +  const Register cache = rcx;
  1.2435 +  const Register index = rdx;
  1.2436 +  const Register obj   = rcx;
  1.2437 +  const Register off   = rbx;
  1.2438 +  const Register flags = rax;
  1.2439 +
  1.2440 +  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  1.2441 +  jvmti_post_field_mod(cache, index, is_static);
  1.2442 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2443 +
  1.2444 +  // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  1.2445 +  // volatile_barrier( );
  1.2446 +
  1.2447 +  Label notVolatile, Done;
  1.2448 +  __ movl(rdx, flags);
  1.2449 +  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
  1.2450 +  __ andl(rdx, 0x1);
  1.2451 +
  1.2452 +  // field addresses
  1.2453 +  const Address lo(obj, off, Address::times_1, 0*wordSize);
  1.2454 +  const Address hi(obj, off, Address::times_1, 1*wordSize);
  1.2455 +
  1.2456 +  Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  1.2457 +
  1.2458 +  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2459 +  assert(btos == 0, "change code, btos != 0");
  1.2460 +  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
  1.2461 +  __ jcc(Assembler::notZero, notByte);
  1.2462 +
  1.2463 +  // btos
  1.2464 +  {
  1.2465 +    __ pop(btos);
  1.2466 +    if (!is_static) pop_and_check_object(obj);
  1.2467 +    __ movb(lo, rax);
  1.2468 +    if (!is_static) {
  1.2469 +      patch_bytecode(Bytecodes::_fast_bputfield, rcx, rbx, true, byte_no);
  1.2470 +    }
  1.2471 +    __ jmp(Done);
  1.2472 +  }
  1.2473 +
  1.2474 +  __ bind(notByte);
  1.2475 +  __ cmpl(flags, itos);
  1.2476 +  __ jcc(Assembler::notEqual, notInt);
  1.2477 +
  1.2478 +  // itos
  1.2479 +  {
  1.2480 +    __ pop(itos);
  1.2481 +    if (!is_static) pop_and_check_object(obj);
  1.2482 +    __ movl(lo, rax);
  1.2483 +    if (!is_static) {
  1.2484 +      patch_bytecode(Bytecodes::_fast_iputfield, rcx, rbx, true, byte_no);
  1.2485 +    }
  1.2486 +    __ jmp(Done);
  1.2487 +  }
  1.2488 +
  1.2489 +  __ bind(notInt);
  1.2490 +  __ cmpl(flags, atos);
  1.2491 +  __ jcc(Assembler::notEqual, notObj);
  1.2492 +
  1.2493 +  // atos
  1.2494 +  {
  1.2495 +    __ pop(atos);
  1.2496 +    if (!is_static) pop_and_check_object(obj);
  1.2497 +    do_oop_store(_masm, lo, rax, _bs->kind(), false);
  1.2498 +    if (!is_static) {
  1.2499 +      patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx, true, byte_no);
  1.2500 +    }
  1.2501 +    __ jmp(Done);
  1.2502 +  }
  1.2503 +
  1.2504 +  __ bind(notObj);
  1.2505 +  __ cmpl(flags, ctos);
  1.2506 +  __ jcc(Assembler::notEqual, notChar);
  1.2507 +
  1.2508 +  // ctos
  1.2509 +  {
  1.2510 +    __ pop(ctos);
  1.2511 +    if (!is_static) pop_and_check_object(obj);
  1.2512 +    __ movw(lo, rax);
  1.2513 +    if (!is_static) {
  1.2514 +      patch_bytecode(Bytecodes::_fast_cputfield, rcx, rbx, true, byte_no);
  1.2515 +    }
  1.2516 +    __ jmp(Done);
  1.2517 +  }
  1.2518 +
  1.2519 +  __ bind(notChar);
  1.2520 +  __ cmpl(flags, stos);
  1.2521 +  __ jcc(Assembler::notEqual, notShort);
  1.2522 +
  1.2523 +  // stos
  1.2524 +  {
  1.2525 +    __ pop(stos);
  1.2526 +    if (!is_static) pop_and_check_object(obj);
  1.2527 +    __ movw(lo, rax);
  1.2528 +    if (!is_static) {
  1.2529 +      patch_bytecode(Bytecodes::_fast_sputfield, rcx, rbx, true, byte_no);
  1.2530 +    }
  1.2531 +    __ jmp(Done);
  1.2532 +  }
  1.2533 +
  1.2534 +  __ bind(notShort);
  1.2535 +  __ cmpl(flags, ltos);
  1.2536 +  __ jcc(Assembler::notEqual, notLong);
  1.2537 +
  1.2538 +  // ltos
  1.2539 +  {
  1.2540 +    Label notVolatileLong;
  1.2541 +    __ testl(rdx, rdx);
  1.2542 +    __ jcc(Assembler::zero, notVolatileLong);
  1.2543 +
  1.2544 +    __ pop(ltos);  // overwrites rdx, do this after testing volatile.
  1.2545 +    if (!is_static) pop_and_check_object(obj);
  1.2546 +
  1.2547 +    // Replace with real volatile test
  1.2548 +    __ push(rdx);
  1.2549 +    __ push(rax);                 // Must update atomically with FIST
  1.2550 +    __ fild_d(Address(rsp,0));    // So load into FPU register
  1.2551 +    __ fistp_d(lo);               // and put into memory atomically
  1.2552 +    __ addptr(rsp, 2*wordSize);
  1.2553 +    // volatile_barrier();
  1.2554 +    volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
  1.2555 +                                                 Assembler::StoreStore));
  1.2556 +    // Don't rewrite volatile version
  1.2557 +    __ jmp(notVolatile);
  1.2558 +
  1.2559 +    __ bind(notVolatileLong);
  1.2560 +
  1.2561 +    __ pop(ltos);  // overwrites rdx
  1.2562 +    if (!is_static) pop_and_check_object(obj);
  1.2563 +    NOT_LP64(__ movptr(hi, rdx));
  1.2564 +    __ movptr(lo, rax);
  1.2565 +    if (!is_static) {
  1.2566 +      patch_bytecode(Bytecodes::_fast_lputfield, rcx, rbx, true, byte_no);
  1.2567 +    }
  1.2568 +    __ jmp(notVolatile);
  1.2569 +  }
  1.2570 +
  1.2571 +  __ bind(notLong);
  1.2572 +  __ cmpl(flags, ftos);
  1.2573 +  __ jcc(Assembler::notEqual, notFloat);
  1.2574 +
  1.2575 +  // ftos
  1.2576 +  {
  1.2577 +    __ pop(ftos);
  1.2578 +    if (!is_static) pop_and_check_object(obj);
  1.2579 +    __ fstp_s(lo);
  1.2580 +    if (!is_static) {
  1.2581 +      patch_bytecode(Bytecodes::_fast_fputfield, rcx, rbx, true, byte_no);
  1.2582 +    }
  1.2583 +    __ jmp(Done);
  1.2584 +  }
  1.2585 +
  1.2586 +  __ bind(notFloat);
  1.2587 +#ifdef ASSERT
  1.2588 +  __ cmpl(flags, dtos);
  1.2589 +  __ jcc(Assembler::notEqual, notDouble);
  1.2590 +#endif
  1.2591 +
  1.2592 +  // dtos
  1.2593 +  {
  1.2594 +    __ pop(dtos);
  1.2595 +    if (!is_static) pop_and_check_object(obj);
  1.2596 +    __ fstp_d(lo);
  1.2597 +    if (!is_static) {
  1.2598 +      patch_bytecode(Bytecodes::_fast_dputfield, rcx, rbx, true, byte_no);
  1.2599 +    }
  1.2600 +    __ jmp(Done);
  1.2601 +  }
  1.2602 +
  1.2603 +#ifdef ASSERT
  1.2604 +  __ bind(notDouble);
  1.2605 +  __ stop("Bad state");
  1.2606 +#endif
  1.2607 +
  1.2608 +  __ bind(Done);
  1.2609 +
  1.2610 +  // Check for volatile store
  1.2611 +  __ testl(rdx, rdx);
  1.2612 +  __ jcc(Assembler::zero, notVolatile);
  1.2613 +  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
  1.2614 +                                               Assembler::StoreStore));
  1.2615 +  __ bind(notVolatile);
  1.2616 +}
  1.2617 +
  1.2618 +
  1.2619 +void TemplateTable::putfield(int byte_no) {
  1.2620 +  putfield_or_static(byte_no, false);
  1.2621 +}
  1.2622 +
  1.2623 +
  1.2624 +void TemplateTable::putstatic(int byte_no) {
  1.2625 +  putfield_or_static(byte_no, true);
  1.2626 +}
  1.2627 +
  1.2628 +void TemplateTable::jvmti_post_fast_field_mod() {
  1.2629 +  if (JvmtiExport::can_post_field_modification()) {
  1.2630 +    // Check to see if a field modification watch has been set before we take
  1.2631 +    // the time to call into the VM.
  1.2632 +    Label L2;
  1.2633 +     __ mov32(rcx, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2634 +     __ testl(rcx,rcx);
  1.2635 +     __ jcc(Assembler::zero, L2);
  1.2636 +     __ pop_ptr(rbx);               // copy the object pointer from tos
  1.2637 +     __ verify_oop(rbx);
  1.2638 +     __ push_ptr(rbx);              // put the object pointer back on tos
  1.2639 +
  1.2640 +     // Save tos values before call_VM() clobbers them. Since we have
  1.2641 +     // to do it for every data type, we use the saved values as the
  1.2642 +     // jvalue object.
  1.2643 +     switch (bytecode()) {          // load values into the jvalue object
  1.2644 +     case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
  1.2645 +     case Bytecodes::_fast_bputfield: // fall through
  1.2646 +     case Bytecodes::_fast_sputfield: // fall through
  1.2647 +     case Bytecodes::_fast_cputfield: // fall through
  1.2648 +     case Bytecodes::_fast_iputfield: __ push_i(rax); break;
  1.2649 +     case Bytecodes::_fast_dputfield: __ push_d(); break;
  1.2650 +     case Bytecodes::_fast_fputfield: __ push_f(); break;
  1.2651 +     case Bytecodes::_fast_lputfield: __ push_l(rax); break;
  1.2652 +
  1.2653 +     default:
  1.2654 +       ShouldNotReachHere();
  1.2655 +     }
  1.2656 +     __ mov(rcx, rsp);              // points to jvalue on the stack
  1.2657 +     // access constant pool cache entry
  1.2658 +     __ get_cache_entry_pointer_at_bcp(rax, rdx, 1);
  1.2659 +     __ verify_oop(rbx);
  1.2660 +     // rbx,: object pointer copied above
  1.2661 +     // rax,: cache entry pointer
  1.2662 +     // rcx: jvalue object on the stack
  1.2663 +     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx);
  1.2664 +
  1.2665 +     switch (bytecode()) {             // restore tos values
  1.2666 +     case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
  1.2667 +     case Bytecodes::_fast_bputfield: // fall through
  1.2668 +     case Bytecodes::_fast_sputfield: // fall through
  1.2669 +     case Bytecodes::_fast_cputfield: // fall through
  1.2670 +     case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
  1.2671 +     case Bytecodes::_fast_dputfield: __ pop_d(); break;
  1.2672 +     case Bytecodes::_fast_fputfield: __ pop_f(); break;
  1.2673 +     case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
  1.2674 +     }
  1.2675 +     __ bind(L2);
  1.2676 +  }
  1.2677 +}
  1.2678 +
  1.2679 +void TemplateTable::fast_storefield(TosState state) {
  1.2680 +  transition(state, vtos);
  1.2681 +
  1.2682 +  ByteSize base = ConstantPoolCache::base_offset();
  1.2683 +
  1.2684 +  jvmti_post_fast_field_mod();
  1.2685 +
  1.2686 +  // access constant pool cache
  1.2687 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2688 +
  1.2689 +  // test for volatile with rdx but rdx is tos register for lputfield.
  1.2690 +  if (bytecode() == Bytecodes::_fast_lputfield) __ push(rdx);
  1.2691 +  __ movl(rdx, Address(rcx, rbx, Address::times_ptr, in_bytes(base +
  1.2692 +                       ConstantPoolCacheEntry::flags_offset())));
  1.2693 +
  1.2694 +  // replace index with field offset from cache entry
  1.2695 +  __ movptr(rbx, Address(rcx, rbx, Address::times_ptr, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
  1.2696 +
  1.2697 +  // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  1.2698 +  // volatile_barrier( );
  1.2699 +
  1.2700 +  Label notVolatile, Done;
  1.2701 +  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
  1.2702 +  __ andl(rdx, 0x1);
  1.2703 +  // Check for volatile store
  1.2704 +  __ testl(rdx, rdx);
  1.2705 +  __ jcc(Assembler::zero, notVolatile);
  1.2706 +
  1.2707 +  if (bytecode() == Bytecodes::_fast_lputfield) __ pop(rdx);
  1.2708 +
  1.2709 +  // Get object from stack
  1.2710 +  pop_and_check_object(rcx);
  1.2711 +
  1.2712 +  // field addresses
  1.2713 +  const Address lo(rcx, rbx, Address::times_1, 0*wordSize);
  1.2714 +  const Address hi(rcx, rbx, Address::times_1, 1*wordSize);
  1.2715 +
  1.2716 +  // access field
  1.2717 +  switch (bytecode()) {
  1.2718 +    case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
  1.2719 +    case Bytecodes::_fast_sputfield: // fall through
  1.2720 +    case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
  1.2721 +    case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
  1.2722 +    case Bytecodes::_fast_lputfield:
  1.2723 +      NOT_LP64(__ movptr(hi, rdx));
  1.2724 +      __ movptr(lo, rax);
  1.2725 +      break;
  1.2726 +    case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
  1.2727 +    case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
  1.2728 +    case Bytecodes::_fast_aputfield: {
  1.2729 +      do_oop_store(_masm, lo, rax, _bs->kind(), false);
  1.2730 +      break;
  1.2731 +    }
  1.2732 +    default:
  1.2733 +      ShouldNotReachHere();
  1.2734 +  }
  1.2735 +
  1.2736 +  Label done;
  1.2737 +  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
  1.2738 +                                               Assembler::StoreStore));
  1.2739 +  // Barriers are so large that short branch doesn't reach!
  1.2740 +  __ jmp(done);
  1.2741 +
  1.2742 +  // Same code as above, but don't need rdx to test for volatile.
  1.2743 +  __ bind(notVolatile);
  1.2744 +
  1.2745 +  if (bytecode() == Bytecodes::_fast_lputfield) __ pop(rdx);
  1.2746 +
  1.2747 +  // Get object from stack
  1.2748 +  pop_and_check_object(rcx);
  1.2749 +
  1.2750 +  // access field
  1.2751 +  switch (bytecode()) {
  1.2752 +    case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
  1.2753 +    case Bytecodes::_fast_sputfield: // fall through
  1.2754 +    case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
  1.2755 +    case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
  1.2756 +    case Bytecodes::_fast_lputfield:
  1.2757 +      NOT_LP64(__ movptr(hi, rdx));
  1.2758 +      __ movptr(lo, rax);
  1.2759 +      break;
  1.2760 +    case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
  1.2761 +    case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
  1.2762 +    case Bytecodes::_fast_aputfield: {
  1.2763 +      do_oop_store(_masm, lo, rax, _bs->kind(), false);
  1.2764 +      break;
  1.2765 +    }
  1.2766 +    default:
  1.2767 +      ShouldNotReachHere();
  1.2768 +  }
  1.2769 +  __ bind(done);
  1.2770 +}
  1.2771 +
  1.2772 +
  1.2773 +void TemplateTable::fast_accessfield(TosState state) {
  1.2774 +  transition(atos, state);
  1.2775 +
  1.2776 +  // do the JVMTI work here to avoid disturbing the register state below
  1.2777 +  if (JvmtiExport::can_post_field_access()) {
  1.2778 +    // Check to see if a field access watch has been set before we take
  1.2779 +    // the time to call into the VM.
  1.2780 +    Label L1;
  1.2781 +    __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2782 +    __ testl(rcx,rcx);
  1.2783 +    __ jcc(Assembler::zero, L1);
  1.2784 +    // access constant pool cache entry
  1.2785 +    __ get_cache_entry_pointer_at_bcp(rcx, rdx, 1);
  1.2786 +    __ push_ptr(rax);  // save object pointer before call_VM() clobbers it
  1.2787 +    __ verify_oop(rax);
  1.2788 +    // rax,: object pointer copied above
  1.2789 +    // rcx: cache entry pointer
  1.2790 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), rax, rcx);
  1.2791 +    __ pop_ptr(rax);   // restore object pointer
  1.2792 +    __ bind(L1);
  1.2793 +  }
  1.2794 +
  1.2795 +  // access constant pool cache
  1.2796 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2797 +  // replace index with field offset from cache entry
  1.2798 +  __ movptr(rbx, Address(rcx,
  1.2799 +                         rbx,
  1.2800 +                         Address::times_ptr,
  1.2801 +                         in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())));
  1.2802 +
  1.2803 +
  1.2804 +  // rax,: object
  1.2805 +  __ verify_oop(rax);
  1.2806 +  __ null_check(rax);
  1.2807 +  // field addresses
  1.2808 +  const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
  1.2809 +  const Address hi = Address(rax, rbx, Address::times_1, 1*wordSize);
  1.2810 +
  1.2811 +  // access field
  1.2812 +  switch (bytecode()) {
  1.2813 +    case Bytecodes::_fast_bgetfield: __ movsbl(rax, lo );                 break;
  1.2814 +    case Bytecodes::_fast_sgetfield: __ load_signed_short(rax, lo );      break;
  1.2815 +    case Bytecodes::_fast_cgetfield: __ load_unsigned_short(rax, lo );    break;
  1.2816 +    case Bytecodes::_fast_igetfield: __ movl(rax, lo);                    break;
  1.2817 +    case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten");  break;
  1.2818 +    case Bytecodes::_fast_fgetfield: __ fld_s(lo);                        break;
  1.2819 +    case Bytecodes::_fast_dgetfield: __ fld_d(lo);                        break;
  1.2820 +    case Bytecodes::_fast_agetfield: __ movptr(rax, lo); __ verify_oop(rax); break;
  1.2821 +    default:
  1.2822 +      ShouldNotReachHere();
  1.2823 +  }
  1.2824 +
  1.2825 +  // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
  1.2826 +  // volatile_barrier( );
  1.2827 +}
  1.2828 +
  1.2829 +void TemplateTable::fast_xaccess(TosState state) {
  1.2830 +  transition(vtos, state);
  1.2831 +  // get receiver
  1.2832 +  __ movptr(rax, aaddress(0));
  1.2833 +  // access constant pool cache
  1.2834 +  __ get_cache_and_index_at_bcp(rcx, rdx, 2);
  1.2835 +  __ movptr(rbx, Address(rcx,
  1.2836 +                         rdx,
  1.2837 +                         Address::times_ptr,
  1.2838 +                         in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())));
  1.2839 +  // make sure exception is reported in correct bcp range (getfield is next instruction)
  1.2840 +  __ increment(rsi);
  1.2841 +  __ null_check(rax);
  1.2842 +  const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
  1.2843 +  if (state == itos) {
  1.2844 +    __ movl(rax, lo);
  1.2845 +  } else if (state == atos) {
  1.2846 +    __ movptr(rax, lo);
  1.2847 +    __ verify_oop(rax);
  1.2848 +  } else if (state == ftos) {
  1.2849 +    __ fld_s(lo);
  1.2850 +  } else {
  1.2851 +    ShouldNotReachHere();
  1.2852 +  }
  1.2853 +  __ decrement(rsi);
  1.2854 +}
  1.2855 +
  1.2856 +
  1.2857 +
  1.2858 +//----------------------------------------------------------------------------------------------------
  1.2859 +// Calls
  1.2860 +
  1.2861 +void TemplateTable::count_calls(Register method, Register temp) {
  1.2862 +  // implemented elsewhere
  1.2863 +  ShouldNotReachHere();
  1.2864 +}
  1.2865 +
  1.2866 +
  1.2867 +void TemplateTable::prepare_invoke(int byte_no,
  1.2868 +                                   Register method,  // linked method (or i-klass)
  1.2869 +                                   Register index,   // itable index, MethodType, etc.
  1.2870 +                                   Register recv,    // if caller wants to see it
  1.2871 +                                   Register flags    // if caller wants to test it
  1.2872 +                                   ) {
  1.2873 +  // determine flags
  1.2874 +  const Bytecodes::Code code = bytecode();
  1.2875 +  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  1.2876 +  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  1.2877 +  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  1.2878 +  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  1.2879 +  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  1.2880 +  const bool load_receiver       = (recv  != noreg);
  1.2881 +  const bool save_flags          = (flags != noreg);
  1.2882 +  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
  1.2883 +  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  1.2884 +  assert(flags == noreg || flags == rdx, "");
  1.2885 +  assert(recv  == noreg || recv  == rcx, "");
  1.2886 +
  1.2887 +  // setup registers & access constant pool cache
  1.2888 +  if (recv  == noreg)  recv  = rcx;
  1.2889 +  if (flags == noreg)  flags = rdx;
  1.2890 +  assert_different_registers(method, index, recv, flags);
  1.2891 +
  1.2892 +  // save 'interpreter return address'
  1.2893 +  __ save_bcp();
  1.2894 +
  1.2895 +  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
  1.2896 +
  1.2897 +  // maybe push appendix to arguments (just before return address)
  1.2898 +  if (is_invokedynamic || is_invokehandle) {
  1.2899 +    Label L_no_push;
  1.2900 +    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
  1.2901 +    __ jccb(Assembler::zero, L_no_push);
  1.2902 +    // Push the appendix as a trailing parameter.
  1.2903 +    // This must be done before we get the receiver,
  1.2904 +    // since the parameter_size includes it.
  1.2905 +    __ push(rbx);
  1.2906 +    __ mov(rbx, index);
  1.2907 +    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
  1.2908 +    __ load_resolved_reference_at_index(index, rbx);
  1.2909 +    __ pop(rbx);
  1.2910 +    __ push(index);  // push appendix (MethodType, CallSite, etc.)
  1.2911 +    __ bind(L_no_push);
  1.2912 +  }
  1.2913 +
  1.2914 +  // load receiver if needed (note: no return address pushed yet)
  1.2915 +  if (load_receiver) {
  1.2916 +    __ movl(recv, flags);
  1.2917 +    __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
  1.2918 +    const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
  1.2919 +    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  1.2920 +    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  1.2921 +    __ movptr(recv, recv_addr);
  1.2922 +    __ verify_oop(recv);
  1.2923 +  }
  1.2924 +
  1.2925 +  if (save_flags) {
  1.2926 +    __ mov(rsi, flags);
  1.2927 +  }
  1.2928 +
  1.2929 +  // compute return type
  1.2930 +  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2931 +  // Make sure we don't need to mask flags after the above shift
  1.2932 +  ConstantPoolCacheEntry::verify_tos_state_shift();
  1.2933 +  // load return address
  1.2934 +  {
  1.2935 +    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
  1.2936 +    ExternalAddress table(table_addr);
  1.2937 +    __ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr)));
  1.2938 +  }
  1.2939 +
  1.2940 +  // push return address
  1.2941 +  __ push(flags);
  1.2942 +
  1.2943 +  // Restore flags value from the constant pool cache, and restore rsi
  1.2944 +  // for later null checks.  rsi is the bytecode pointer
  1.2945 +  if (save_flags) {
  1.2946 +    __ mov(flags, rsi);
  1.2947 +    __ restore_bcp();
  1.2948 +  }
  1.2949 +}
  1.2950 +
  1.2951 +
  1.2952 +void TemplateTable::invokevirtual_helper(Register index,
  1.2953 +                                         Register recv,
  1.2954 +                                         Register flags) {
  1.2955 +  // Uses temporary registers rax, rdx
  1.2956 +  assert_different_registers(index, recv, rax, rdx);
  1.2957 +  assert(index == rbx, "");
  1.2958 +  assert(recv  == rcx, "");
  1.2959 +
  1.2960 +  // Test for an invoke of a final method
  1.2961 +  Label notFinal;
  1.2962 +  __ movl(rax, flags);
  1.2963 +  __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
  1.2964 +  __ jcc(Assembler::zero, notFinal);
  1.2965 +
  1.2966 +  const Register method = index;  // method must be rbx
  1.2967 +  assert(method == rbx,
  1.2968 +         "Method* must be rbx for interpreter calling convention");
  1.2969 +
  1.2970 +  // do the call - the index is actually the method to call
  1.2971 +  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
  1.2972 +
  1.2973 +  // It's final, need a null check here!
  1.2974 +  __ null_check(recv);
  1.2975 +
  1.2976 +  // profile this call
  1.2977 +  __ profile_final_call(rax);
  1.2978 +  __ profile_arguments_type(rax, method, rsi, true);
  1.2979 +
  1.2980 +  __ jump_from_interpreted(method, rax);
  1.2981 +
  1.2982 +  __ bind(notFinal);
  1.2983 +
  1.2984 +  // get receiver klass
  1.2985 +  __ null_check(recv, oopDesc::klass_offset_in_bytes());
  1.2986 +  __ load_klass(rax, recv);
  1.2987 +
  1.2988 +  // profile this call
  1.2989 +  __ profile_virtual_call(rax, rdi, rdx);
  1.2990 +
  1.2991 +  // get target Method* & entry point
  1.2992 +  __ lookup_virtual_method(rax, index, method);
  1.2993 +  __ profile_arguments_type(rdx, method, rsi, true);
  1.2994 +  __ jump_from_interpreted(method, rdx);
  1.2995 +}
  1.2996 +
  1.2997 +
  1.2998 +void TemplateTable::invokevirtual(int byte_no) {
  1.2999 +  transition(vtos, vtos);
  1.3000 +  assert(byte_no == f2_byte, "use this argument");
  1.3001 +  prepare_invoke(byte_no,
  1.3002 +                 rbx,    // method or vtable index
  1.3003 +                 noreg,  // unused itable index
  1.3004 +                 rcx, rdx); // recv, flags
  1.3005 +
  1.3006 +  // rbx: index
  1.3007 +  // rcx: receiver
  1.3008 +  // rdx: flags
  1.3009 +
  1.3010 +  invokevirtual_helper(rbx, rcx, rdx);
  1.3011 +}
  1.3012 +
  1.3013 +
  1.3014 +void TemplateTable::invokespecial(int byte_no) {
  1.3015 +  transition(vtos, vtos);
  1.3016 +  assert(byte_no == f1_byte, "use this argument");
  1.3017 +  prepare_invoke(byte_no, rbx, noreg,  // get f1 Method*
  1.3018 +                 rcx);  // get receiver also for null check
  1.3019 +  __ verify_oop(rcx);
  1.3020 +  __ null_check(rcx);
  1.3021 +  // do the call
  1.3022 +  __ profile_call(rax);
  1.3023 +  __ profile_arguments_type(rax, rbx, rsi, false);
  1.3024 +  __ jump_from_interpreted(rbx, rax);
  1.3025 +}
  1.3026 +
  1.3027 +
  1.3028 +void TemplateTable::invokestatic(int byte_no) {
  1.3029 +  transition(vtos, vtos);
  1.3030 +  assert(byte_no == f1_byte, "use this argument");
  1.3031 +  prepare_invoke(byte_no, rbx);  // get f1 Method*
  1.3032 +  // do the call
  1.3033 +  __ profile_call(rax);
  1.3034 +  __ profile_arguments_type(rax, rbx, rsi, false);
  1.3035 +  __ jump_from_interpreted(rbx, rax);
  1.3036 +}
  1.3037 +
  1.3038 +
  1.3039 +void TemplateTable::fast_invokevfinal(int byte_no) {
  1.3040 +  transition(vtos, vtos);
  1.3041 +  assert(byte_no == f2_byte, "use this argument");
  1.3042 +  __ stop("fast_invokevfinal not used on x86");
  1.3043 +}
  1.3044 +
  1.3045 +
  1.3046 +void TemplateTable::invokeinterface(int byte_no) {
  1.3047 +  transition(vtos, vtos);
  1.3048 +  assert(byte_no == f1_byte, "use this argument");
  1.3049 +  prepare_invoke(byte_no, rax, rbx,  // get f1 Klass*, f2 itable index
  1.3050 +                 rcx, rdx); // recv, flags
  1.3051 +
  1.3052 +  // rax: interface klass (from f1)
  1.3053 +  // rbx: itable index (from f2)
  1.3054 +  // rcx: receiver
  1.3055 +  // rdx: flags
  1.3056 +
  1.3057 +  // Special case of invokeinterface called for virtual method of
  1.3058 +  // java.lang.Object.  See cpCacheOop.cpp for details.
  1.3059 +  // This code isn't produced by javac, but could be produced by
  1.3060 +  // another compliant java compiler.
  1.3061 +  Label notMethod;
  1.3062 +  __ movl(rdi, rdx);
  1.3063 +  __ andl(rdi, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
  1.3064 +  __ jcc(Assembler::zero, notMethod);
  1.3065 +
  1.3066 +  invokevirtual_helper(rbx, rcx, rdx);
  1.3067 +  __ bind(notMethod);
  1.3068 +
  1.3069 +  // Get receiver klass into rdx - also a null check
  1.3070 +  __ restore_locals();  // restore rdi
  1.3071 +  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
  1.3072 +  __ load_klass(rdx, rcx);
  1.3073 +
  1.3074 +  // profile this call
  1.3075 +  __ profile_virtual_call(rdx, rsi, rdi);
  1.3076 +
  1.3077 +  Label no_such_interface, no_such_method;
  1.3078 +
  1.3079 +  __ lookup_interface_method(// inputs: rec. class, interface, itable index
  1.3080 +                             rdx, rax, rbx,
  1.3081 +                             // outputs: method, scan temp. reg
  1.3082 +                             rbx, rsi,
  1.3083 +                             no_such_interface);
  1.3084 +
  1.3085 +  // rbx: Method* to call
  1.3086 +  // rcx: receiver
  1.3087 +  // Check for abstract method error
  1.3088 +  // Note: This should be done more efficiently via a throw_abstract_method_error
  1.3089 +  //       interpreter entry point and a conditional jump to it in case of a null
  1.3090 +  //       method.
  1.3091 +  __ testptr(rbx, rbx);
  1.3092 +  __ jcc(Assembler::zero, no_such_method);
  1.3093 +
  1.3094 +  __ profile_arguments_type(rdx, rbx, rsi, true);
  1.3095 +
  1.3096 +  // do the call
  1.3097 +  // rcx: receiver
  1.3098 +  // rbx,: Method*
  1.3099 +  __ jump_from_interpreted(rbx, rdx);
  1.3100 +  __ should_not_reach_here();
  1.3101 +
  1.3102 +  // exception handling code follows...
  1.3103 +  // note: must restore interpreter registers to canonical
  1.3104 +  //       state for exception handling to work correctly!
  1.3105 +
  1.3106 +  __ bind(no_such_method);
  1.3107 +  // throw exception
  1.3108 +  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
  1.3109 +  __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
  1.3110 +  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
  1.3111 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  1.3112 +  // the call_VM checks for exception, so we should never return here.
  1.3113 +  __ should_not_reach_here();
  1.3114 +
  1.3115 +  __ bind(no_such_interface);
  1.3116 +  // throw exception
  1.3117 +  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
  1.3118 +  __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
  1.3119 +  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
  1.3120 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.3121 +                   InterpreterRuntime::throw_IncompatibleClassChangeError));
  1.3122 +  // the call_VM checks for exception, so we should never return here.
  1.3123 +  __ should_not_reach_here();
  1.3124 +}
  1.3125 +
  1.3126 +void TemplateTable::invokehandle(int byte_no) {
  1.3127 +  transition(vtos, vtos);
  1.3128 +  assert(byte_no == f1_byte, "use this argument");
  1.3129 +  const Register rbx_method = rbx;
  1.3130 +  const Register rax_mtype  = rax;
  1.3131 +  const Register rcx_recv   = rcx;
  1.3132 +  const Register rdx_flags  = rdx;
  1.3133 +
  1.3134 +  if (!EnableInvokeDynamic) {
  1.3135 +    // rewriter does not generate this bytecode
  1.3136 +    __ should_not_reach_here();
  1.3137 +    return;
  1.3138 +  }
  1.3139 +
  1.3140 +  prepare_invoke(byte_no, rbx_method, rax_mtype, rcx_recv);
  1.3141 +  __ verify_method_ptr(rbx_method);
  1.3142 +  __ verify_oop(rcx_recv);
  1.3143 +  __ null_check(rcx_recv);
  1.3144 +
  1.3145 +  // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
  1.3146 +  // rbx: MH.invokeExact_MT method (from f2)
  1.3147 +
  1.3148 +  // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
  1.3149 +
  1.3150 +  // FIXME: profile the LambdaForm also
  1.3151 +  __ profile_final_call(rax);
  1.3152 +  __ profile_arguments_type(rdx, rbx_method, rsi, true);
  1.3153 +
  1.3154 +  __ jump_from_interpreted(rbx_method, rdx);
  1.3155 +}
  1.3156 +
  1.3157 +
  1.3158 +void TemplateTable::invokedynamic(int byte_no) {
  1.3159 +  transition(vtos, vtos);
  1.3160 +  assert(byte_no == f1_byte, "use this argument");
  1.3161 +
  1.3162 +  if (!EnableInvokeDynamic) {
  1.3163 +    // We should not encounter this bytecode if !EnableInvokeDynamic.
  1.3164 +    // The verifier will stop it.  However, if we get past the verifier,
  1.3165 +    // this will stop the thread in a reasonable way, without crashing the JVM.
  1.3166 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.3167 +                     InterpreterRuntime::throw_IncompatibleClassChangeError));
  1.3168 +    // the call_VM checks for exception, so we should never return here.
  1.3169 +    __ should_not_reach_here();
  1.3170 +    return;
  1.3171 +  }
  1.3172 +
  1.3173 +  const Register rbx_method   = rbx;
  1.3174 +  const Register rax_callsite = rax;
  1.3175 +
  1.3176 +  prepare_invoke(byte_no, rbx_method, rax_callsite);
  1.3177 +
  1.3178 +  // rax: CallSite object (from cpool->resolved_references[f1])
  1.3179 +  // rbx: MH.linkToCallSite method (from f2)
  1.3180 +
  1.3181 +  // Note:  rax_callsite is already pushed by prepare_invoke
  1.3182 +
  1.3183 +  // %%% should make a type profile for any invokedynamic that takes a ref argument
  1.3184 +  // profile this call
  1.3185 +  __ profile_call(rsi);
  1.3186 +  __ profile_arguments_type(rdx, rbx, rsi, false);
  1.3187 +
  1.3188 +  __ verify_oop(rax_callsite);
  1.3189 +
  1.3190 +  __ jump_from_interpreted(rbx_method, rdx);
  1.3191 +}
  1.3192 +
  1.3193 +//----------------------------------------------------------------------------------------------------
  1.3194 +// Allocation
  1.3195 +
  1.3196 +void TemplateTable::_new() {
  1.3197 +  transition(vtos, atos);
  1.3198 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3199 +  Label slow_case;
  1.3200 +  Label slow_case_no_pop;
  1.3201 +  Label done;
  1.3202 +  Label initialize_header;
  1.3203 +  Label initialize_object;  // including clearing the fields
  1.3204 +  Label allocate_shared;
  1.3205 +
  1.3206 +  __ get_cpool_and_tags(rcx, rax);
  1.3207 +
  1.3208 +  // Make sure the class we're about to instantiate has been resolved.
  1.3209 +  // This is done before loading InstanceKlass to be consistent with the order
  1.3210 +  // how Constant Pool is updated (see ConstantPool::klass_at_put)
  1.3211 +  const int tags_offset = Array<u1>::base_offset_in_bytes();
  1.3212 +  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
  1.3213 +  __ jcc(Assembler::notEqual, slow_case_no_pop);
  1.3214 +
  1.3215 +  // get InstanceKlass
  1.3216 +  __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(ConstantPool)));
  1.3217 +  __ push(rcx);  // save the contexts of klass for initializing the header
  1.3218 +
  1.3219 +  // make sure klass is initialized & doesn't have finalizer
  1.3220 +  // make sure klass is fully initialized
  1.3221 +  __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
  1.3222 +  __ jcc(Assembler::notEqual, slow_case);
  1.3223 +
  1.3224 +  // get instance_size in InstanceKlass (scaled to a count of bytes)
  1.3225 +  __ movl(rdx, Address(rcx, Klass::layout_helper_offset()));
  1.3226 +  // test to see if it has a finalizer or is malformed in some way
  1.3227 +  __ testl(rdx, Klass::_lh_instance_slow_path_bit);
  1.3228 +  __ jcc(Assembler::notZero, slow_case);
  1.3229 +
  1.3230 +  //
  1.3231 +  // Allocate the instance
  1.3232 +  // 1) Try to allocate in the TLAB
  1.3233 +  // 2) if fail and the object is large allocate in the shared Eden
  1.3234 +  // 3) if the above fails (or is not applicable), go to a slow case
  1.3235 +  // (creates a new TLAB, etc.)
  1.3236 +
  1.3237 +  const bool allow_shared_alloc =
  1.3238 +    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  1.3239 +
  1.3240 +  const Register thread = rcx;
  1.3241 +  if (UseTLAB || allow_shared_alloc) {
  1.3242 +    __ get_thread(thread);
  1.3243 +  }
  1.3244 +
  1.3245 +  if (UseTLAB) {
  1.3246 +    __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
  1.3247 +    __ lea(rbx, Address(rax, rdx, Address::times_1));
  1.3248 +    __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
  1.3249 +    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
  1.3250 +    __ movptr(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
  1.3251 +    if (ZeroTLAB) {
  1.3252 +      // the fields have been already cleared
  1.3253 +      __ jmp(initialize_header);
  1.3254 +    } else {
  1.3255 +      // initialize both the header and fields
  1.3256 +      __ jmp(initialize_object);
  1.3257 +    }
  1.3258 +  }
  1.3259 +
  1.3260 +  // Allocation in the shared Eden, if allowed.
  1.3261 +  //
  1.3262 +  // rdx: instance size in bytes
  1.3263 +  if (allow_shared_alloc) {
  1.3264 +    __ bind(allocate_shared);
  1.3265 +
  1.3266 +    ExternalAddress heap_top((address)Universe::heap()->top_addr());
  1.3267 +
  1.3268 +    Label retry;
  1.3269 +    __ bind(retry);
  1.3270 +    __ movptr(rax, heap_top);
  1.3271 +    __ lea(rbx, Address(rax, rdx, Address::times_1));
  1.3272 +    __ cmpptr(rbx, ExternalAddress((address)Universe::heap()->end_addr()));
  1.3273 +    __ jcc(Assembler::above, slow_case);
  1.3274 +
  1.3275 +    // Compare rax, with the top addr, and if still equal, store the new
  1.3276 +    // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
  1.3277 +    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  1.3278 +    //
  1.3279 +    // rax,: object begin
  1.3280 +    // rbx,: object end
  1.3281 +    // rdx: instance size in bytes
  1.3282 +    __ locked_cmpxchgptr(rbx, heap_top);
  1.3283 +
  1.3284 +    // if someone beat us on the allocation, try again, otherwise continue
  1.3285 +    __ jcc(Assembler::notEqual, retry);
  1.3286 +
  1.3287 +    __ incr_allocated_bytes(thread, rdx, 0);
  1.3288 +  }
  1.3289 +
  1.3290 +  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  1.3291 +    // The object is initialized before the header.  If the object size is
  1.3292 +    // zero, go directly to the header initialization.
  1.3293 +    __ bind(initialize_object);
  1.3294 +    __ decrement(rdx, sizeof(oopDesc));
  1.3295 +    __ jcc(Assembler::zero, initialize_header);
  1.3296 +
  1.3297 +    // Initialize topmost object field, divide rdx by 8, check if odd and
  1.3298 +    // test if zero.
  1.3299 +    __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
  1.3300 +    __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
  1.3301 +
  1.3302 +    // rdx must have been multiple of 8
  1.3303 +#ifdef ASSERT
  1.3304 +    // make sure rdx was multiple of 8
  1.3305 +    Label L;
  1.3306 +    // Ignore partial flag stall after shrl() since it is debug VM
  1.3307 +    __ jccb(Assembler::carryClear, L);
  1.3308 +    __ stop("object size is not multiple of 2 - adjust this code");
  1.3309 +    __ bind(L);
  1.3310 +    // rdx must be > 0, no extra check needed here
  1.3311 +#endif
  1.3312 +
  1.3313 +    // initialize remaining object fields: rdx was a multiple of 8
  1.3314 +    { Label loop;
  1.3315 +    __ bind(loop);
  1.3316 +    __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
  1.3317 +    NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
  1.3318 +    __ decrement(rdx);
  1.3319 +    __ jcc(Assembler::notZero, loop);
  1.3320 +    }
  1.3321 +
  1.3322 +    // initialize object header only.
  1.3323 +    __ bind(initialize_header);
  1.3324 +    if (UseBiasedLocking) {
  1.3325 +      __ pop(rcx);   // get saved klass back in the register.
  1.3326 +      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
  1.3327 +      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
  1.3328 +    } else {
  1.3329 +      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
  1.3330 +                (int32_t)markOopDesc::prototype()); // header
  1.3331 +      __ pop(rcx);   // get saved klass back in the register.
  1.3332 +    }
  1.3333 +    __ store_klass(rax, rcx);  // klass
  1.3334 +
  1.3335 +    {
  1.3336 +      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
  1.3337 +      // Trigger dtrace event for fastpath
  1.3338 +      __ push(atos);
  1.3339 +      __ call_VM_leaf(
  1.3340 +           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
  1.3341 +      __ pop(atos);
  1.3342 +    }
  1.3343 +
  1.3344 +    __ jmp(done);
  1.3345 +  }
  1.3346 +
  1.3347 +  // slow case
  1.3348 +  __ bind(slow_case);
  1.3349 +  __ pop(rcx);   // restore stack pointer to what it was when we came in.
  1.3350 +  __ bind(slow_case_no_pop);
  1.3351 +  __ get_constant_pool(rax);
  1.3352 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3353 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rax, rdx);
  1.3354 +
  1.3355 +  // continue
  1.3356 +  __ bind(done);
  1.3357 +}
  1.3358 +
  1.3359 +
  1.3360 +void TemplateTable::newarray() {
  1.3361 +  transition(itos, atos);
  1.3362 +  __ push_i(rax);                                 // make sure everything is on the stack
  1.3363 +  __ load_unsigned_byte(rdx, at_bcp(1));
  1.3364 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), rdx, rax);
  1.3365 +  __ pop_i(rdx);                                  // discard size
  1.3366 +}
  1.3367 +
  1.3368 +
  1.3369 +void TemplateTable::anewarray() {
  1.3370 +  transition(itos, atos);
  1.3371 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3372 +  __ get_constant_pool(rcx);
  1.3373 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), rcx, rdx, rax);
  1.3374 +}
  1.3375 +
  1.3376 +
  1.3377 +void TemplateTable::arraylength() {
  1.3378 +  transition(atos, itos);
  1.3379 +  __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
  1.3380 +  __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
  1.3381 +}
  1.3382 +
  1.3383 +
  1.3384 +void TemplateTable::checkcast() {
  1.3385 +  transition(atos, atos);
  1.3386 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3387 +  __ testptr(rax, rax);   // Object is in EAX
  1.3388 +  __ jcc(Assembler::zero, is_null);
  1.3389 +
  1.3390 +  // Get cpool & tags index
  1.3391 +  __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
  1.3392 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index
  1.3393 +  // See if bytecode has already been quicked
  1.3394 +  __ cmpb(Address(rdx, rbx, Address::times_1, Array<u1>::base_offset_in_bytes()), JVM_CONSTANT_Class);
  1.3395 +  __ jcc(Assembler::equal, quicked);
  1.3396 +
  1.3397 +  __ push(atos);
  1.3398 +  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
  1.3399 +  // vm_result_2 has metadata result
  1.3400 +  // borrow rdi from locals
  1.3401 +  __ get_thread(rdi);
  1.3402 +  __ get_vm_result_2(rax, rdi);
  1.3403 +  __ restore_locals();
  1.3404 +  __ pop_ptr(rdx);
  1.3405 +  __ jmpb(resolved);
  1.3406 +
  1.3407 +  // Get superklass in EAX and subklass in EBX
  1.3408 +  __ bind(quicked);
  1.3409 +  __ mov(rdx, rax);          // Save object in EDX; EAX needed for subtype check
  1.3410 +  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(ConstantPool)));
  1.3411 +
  1.3412 +  __ bind(resolved);
  1.3413 +  __ load_klass(rbx, rdx);
  1.3414 +
  1.3415 +  // Generate subtype check.  Blows ECX.  Resets EDI.  Object in EDX.
  1.3416 +  // Superklass in EAX.  Subklass in EBX.
  1.3417 +  __ gen_subtype_check( rbx, ok_is_subtype );
  1.3418 +
  1.3419 +  // Come here on failure
  1.3420 +  __ push(rdx);
  1.3421 +  // object is at TOS
  1.3422 +  __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
  1.3423 +
  1.3424 +  // Come here on success
  1.3425 +  __ bind(ok_is_subtype);
  1.3426 +  __ mov(rax,rdx);           // Restore object in EDX
  1.3427 +
  1.3428 +  // Collect counts on whether this check-cast sees NULLs a lot or not.
  1.3429 +  if (ProfileInterpreter) {
  1.3430 +    __ jmp(done);
  1.3431 +    __ bind(is_null);
  1.3432 +    __ profile_null_seen(rcx);
  1.3433 +  } else {
  1.3434 +    __ bind(is_null);   // same as 'done'
  1.3435 +  }
  1.3436 +  __ bind(done);
  1.3437 +}
  1.3438 +
  1.3439 +
  1.3440 +void TemplateTable::instanceof() {
  1.3441 +  transition(atos, itos);
  1.3442 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3443 +  __ testptr(rax, rax);
  1.3444 +  __ jcc(Assembler::zero, is_null);
  1.3445 +
  1.3446 +  // Get cpool & tags index
  1.3447 +  __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
  1.3448 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index
  1.3449 +  // See if bytecode has already been quicked
  1.3450 +  __ cmpb(Address(rdx, rbx, Address::times_1, Array<u1>::base_offset_in_bytes()), JVM_CONSTANT_Class);
  1.3451 +  __ jcc(Assembler::equal, quicked);
  1.3452 +
  1.3453 +  __ push(atos);
  1.3454 +  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
  1.3455 +  // vm_result_2 has metadata result
  1.3456 +  // borrow rdi from locals
  1.3457 +  __ get_thread(rdi);
  1.3458 +  __ get_vm_result_2(rax, rdi);
  1.3459 +  __ restore_locals();
  1.3460 +  __ pop_ptr(rdx);
  1.3461 +  __ load_klass(rdx, rdx);
  1.3462 +  __ jmp(resolved);
  1.3463 +
  1.3464 +  // Get superklass in EAX and subklass in EDX
  1.3465 +  __ bind(quicked);
  1.3466 +  __ load_klass(rdx, rax);
  1.3467 +  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(ConstantPool)));
  1.3468 +
  1.3469 +  __ bind(resolved);
  1.3470 +
  1.3471 +  // Generate subtype check.  Blows ECX.  Resets EDI.
  1.3472 +  // Superklass in EAX.  Subklass in EDX.
  1.3473 +  __ gen_subtype_check( rdx, ok_is_subtype );
  1.3474 +
  1.3475 +  // Come here on failure
  1.3476 +  __ xorl(rax,rax);
  1.3477 +  __ jmpb(done);
  1.3478 +  // Come here on success
  1.3479 +  __ bind(ok_is_subtype);
  1.3480 +  __ movl(rax, 1);
  1.3481 +
  1.3482 +  // Collect counts on whether this test sees NULLs a lot or not.
  1.3483 +  if (ProfileInterpreter) {
  1.3484 +    __ jmp(done);
  1.3485 +    __ bind(is_null);
  1.3486 +    __ profile_null_seen(rcx);
  1.3487 +  } else {
  1.3488 +    __ bind(is_null);   // same as 'done'
  1.3489 +  }
  1.3490 +  __ bind(done);
  1.3491 +  // rax, = 0: obj == NULL or  obj is not an instanceof the specified klass
  1.3492 +  // rax, = 1: obj != NULL and obj is     an instanceof the specified klass
  1.3493 +}
  1.3494 +
  1.3495 +
  1.3496 +//----------------------------------------------------------------------------------------------------
  1.3497 +// Breakpoints
  1.3498 +void TemplateTable::_breakpoint() {
  1.3499 +
  1.3500 +  // Note: We get here even if we are single stepping..
  1.3501 +  // jbug inists on setting breakpoints at every bytecode
  1.3502 +  // even if we are in single step mode.
  1.3503 +
  1.3504 +  transition(vtos, vtos);
  1.3505 +
  1.3506 +  // get the unpatched byte code
  1.3507 +  __ get_method(rcx);
  1.3508 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), rcx, rsi);
  1.3509 +  __ mov(rbx, rax);
  1.3510 +
  1.3511 +  // post the breakpoint event
  1.3512 +  __ get_method(rcx);
  1.3513 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), rcx, rsi);
  1.3514 +
  1.3515 +  // complete the execution of original bytecode
  1.3516 +  __ dispatch_only_normal(vtos);
  1.3517 +}
  1.3518 +
  1.3519 +
  1.3520 +//----------------------------------------------------------------------------------------------------
  1.3521 +// Exceptions
  1.3522 +
  1.3523 +void TemplateTable::athrow() {
  1.3524 +  transition(atos, vtos);
  1.3525 +  __ null_check(rax);
  1.3526 +  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
  1.3527 +}
  1.3528 +
  1.3529 +
  1.3530 +//----------------------------------------------------------------------------------------------------
  1.3531 +// Synchronization
  1.3532 +//
  1.3533 +// Note: monitorenter & exit are symmetric routines; which is reflected
  1.3534 +//       in the assembly code structure as well
  1.3535 +//
  1.3536 +// Stack layout:
  1.3537 +//
  1.3538 +// [expressions  ] <--- rsp               = expression stack top
  1.3539 +// ..
  1.3540 +// [expressions  ]
  1.3541 +// [monitor entry] <--- monitor block top = expression stack bot
  1.3542 +// ..
  1.3543 +// [monitor entry]
  1.3544 +// [frame data   ] <--- monitor block bot
  1.3545 +// ...
  1.3546 +// [saved rbp,    ] <--- rbp,
  1.3547 +
  1.3548 +
  1.3549 +void TemplateTable::monitorenter() {
  1.3550 +  transition(atos, vtos);
  1.3551 +
  1.3552 +  // check for NULL object
  1.3553 +  __ null_check(rax);
  1.3554 +
  1.3555 +  const Address monitor_block_top(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3556 +  const Address monitor_block_bot(rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
  1.3557 +  const int entry_size =         (     frame::interpreter_frame_monitor_size()           * wordSize);
  1.3558 +  Label allocated;
  1.3559 +
  1.3560 +  // initialize entry pointer
  1.3561 +  __ xorl(rdx, rdx);                             // points to free slot or NULL
  1.3562 +
  1.3563 +  // find a free slot in the monitor block (result in rdx)
  1.3564 +  { Label entry, loop, exit;
  1.3565 +    __ movptr(rcx, monitor_block_top);           // points to current entry, starting with top-most entry
  1.3566 +
  1.3567 +    __ lea(rbx, monitor_block_bot);              // points to word before bottom of monitor block
  1.3568 +    __ jmpb(entry);
  1.3569 +
  1.3570 +    __ bind(loop);
  1.3571 +    __ cmpptr(Address(rcx, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD);  // check if current entry is used
  1.3572 +    __ cmovptr(Assembler::equal, rdx, rcx);      // if not used then remember entry in rdx
  1.3573 +    __ cmpptr(rax, Address(rcx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
  1.3574 +    __ jccb(Assembler::equal, exit);             // if same object then stop searching
  1.3575 +    __ addptr(rcx, entry_size);                  // otherwise advance to next entry
  1.3576 +    __ bind(entry);
  1.3577 +    __ cmpptr(rcx, rbx);                         // check if bottom reached
  1.3578 +    __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
  1.3579 +    __ bind(exit);
  1.3580 +  }
  1.3581 +
  1.3582 +  __ testptr(rdx, rdx);                          // check if a slot has been found
  1.3583 +  __ jccb(Assembler::notZero, allocated);        // if found, continue with that one
  1.3584 +
  1.3585 +  // allocate one if there's no free slot
  1.3586 +  { Label entry, loop;
  1.3587 +    // 1. compute new pointers                   // rsp: old expression stack top
  1.3588 +    __ movptr(rdx, monitor_block_bot);           // rdx: old expression stack bottom
  1.3589 +    __ subptr(rsp, entry_size);                  // move expression stack top
  1.3590 +    __ subptr(rdx, entry_size);                  // move expression stack bottom
  1.3591 +    __ mov(rcx, rsp);                            // set start value for copy loop
  1.3592 +    __ movptr(monitor_block_bot, rdx);           // set new monitor block top
  1.3593 +    __ jmp(entry);
  1.3594 +    // 2. move expression stack contents
  1.3595 +    __ bind(loop);
  1.3596 +    __ movptr(rbx, Address(rcx, entry_size));    // load expression stack word from old location
  1.3597 +    __ movptr(Address(rcx, 0), rbx);             // and store it at new location
  1.3598 +    __ addptr(rcx, wordSize);                    // advance to next word
  1.3599 +    __ bind(entry);
  1.3600 +    __ cmpptr(rcx, rdx);                         // check if bottom reached
  1.3601 +    __ jcc(Assembler::notEqual, loop);           // if not at bottom then copy next word
  1.3602 +  }
  1.3603 +
  1.3604 +  // call run-time routine
  1.3605 +  // rdx: points to monitor entry
  1.3606 +  __ bind(allocated);
  1.3607 +
  1.3608 +  // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
  1.3609 +  // The object has already been poped from the stack, so the expression stack looks correct.
  1.3610 +  __ increment(rsi);
  1.3611 +
  1.3612 +  __ movptr(Address(rdx, BasicObjectLock::obj_offset_in_bytes()), rax);     // store object
  1.3613 +  __ lock_object(rdx);
  1.3614 +
  1.3615 +  // check to make sure this monitor doesn't cause stack overflow after locking
  1.3616 +  __ save_bcp();  // in case of exception
  1.3617 +  __ generate_stack_overflow_check(0);
  1.3618 +
  1.3619 +  // The bcp has already been incremented. Just need to dispatch to next instruction.
  1.3620 +  __ dispatch_next(vtos);
  1.3621 +}
  1.3622 +
  1.3623 +
  1.3624 +void TemplateTable::monitorexit() {
  1.3625 +  transition(atos, vtos);
  1.3626 +
  1.3627 +  // check for NULL object
  1.3628 +  __ null_check(rax);
  1.3629 +
  1.3630 +  const Address monitor_block_top(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3631 +  const Address monitor_block_bot(rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
  1.3632 +  const int entry_size =         (     frame::interpreter_frame_monitor_size()           * wordSize);
  1.3633 +  Label found;
  1.3634 +
  1.3635 +  // find matching slot
  1.3636 +  { Label entry, loop;
  1.3637 +    __ movptr(rdx, monitor_block_top);           // points to current entry, starting with top-most entry
  1.3638 +    __ lea(rbx, monitor_block_bot);             // points to word before bottom of monitor block
  1.3639 +    __ jmpb(entry);
  1.3640 +
  1.3641 +    __ bind(loop);
  1.3642 +    __ cmpptr(rax, Address(rdx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
  1.3643 +    __ jcc(Assembler::equal, found);             // if same object then stop searching
  1.3644 +    __ addptr(rdx, entry_size);                  // otherwise advance to next entry
  1.3645 +    __ bind(entry);
  1.3646 +    __ cmpptr(rdx, rbx);                         // check if bottom reached
  1.3647 +    __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
  1.3648 +  }
  1.3649 +
  1.3650 +  // error handling. Unlocking was not block-structured
  1.3651 +  Label end;
  1.3652 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
  1.3653 +  __ should_not_reach_here();
  1.3654 +
  1.3655 +  // call run-time routine
  1.3656 +  // rcx: points to monitor entry
  1.3657 +  __ bind(found);
  1.3658 +  __ push_ptr(rax);                                 // make sure object is on stack (contract with oopMaps)
  1.3659 +  __ unlock_object(rdx);
  1.3660 +  __ pop_ptr(rax);                                  // discard object
  1.3661 +  __ bind(end);
  1.3662 +}
  1.3663 +
  1.3664 +
  1.3665 +//----------------------------------------------------------------------------------------------------
  1.3666 +// Wide instructions
  1.3667 +
  1.3668 +void TemplateTable::wide() {
  1.3669 +  transition(vtos, vtos);
  1.3670 +  __ load_unsigned_byte(rbx, at_bcp(1));
  1.3671 +  ExternalAddress wtable((address)Interpreter::_wentry_point);
  1.3672 +  __ jump(ArrayAddress(wtable, Address(noreg, rbx, Address::times_ptr)));
  1.3673 +  // Note: the rsi increment step is part of the individual wide bytecode implementations
  1.3674 +}
  1.3675 +
  1.3676 +
  1.3677 +//----------------------------------------------------------------------------------------------------
  1.3678 +// Multi arrays
  1.3679 +
  1.3680 +void TemplateTable::multianewarray() {
  1.3681 +  transition(vtos, atos);
  1.3682 +  __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
  1.3683 +  // last dim is on top of stack; we want address of first one:
  1.3684 +  // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
  1.3685 +  // the latter wordSize to point to the beginning of the array.
  1.3686 +  __ lea(  rax, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
  1.3687 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rax);     // pass in rax,
  1.3688 +  __ load_unsigned_byte(rbx, at_bcp(3));
  1.3689 +  __ lea(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
  1.3690 +}
  1.3691 +
  1.3692 +#endif /* !CC_INTERP */

mercurial