src/cpu/x86/vm/templateTable_x86_64.cpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,3765 @@
     1.4 +/*
     1.5 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "precompiled.hpp"
    1.29 +#include "asm/macroAssembler.hpp"
    1.30 +#include "interpreter/interpreter.hpp"
    1.31 +#include "interpreter/interpreterRuntime.hpp"
    1.32 +#include "interpreter/templateTable.hpp"
    1.33 +#include "memory/universe.inline.hpp"
    1.34 +#include "oops/methodData.hpp"
    1.35 +#include "oops/objArrayKlass.hpp"
    1.36 +#include "oops/oop.inline.hpp"
    1.37 +#include "prims/methodHandles.hpp"
    1.38 +#include "runtime/sharedRuntime.hpp"
    1.39 +#include "runtime/stubRoutines.hpp"
    1.40 +#include "runtime/synchronizer.hpp"
    1.41 +#include "utilities/macros.hpp"
    1.42 +
    1.43 +#ifndef CC_INTERP
    1.44 +
    1.45 +#define __ _masm->
    1.46 +
    1.47 +// Platform-dependent initialization
    1.48 +
    1.49 +void TemplateTable::pd_initialize() {
    1.50 +  // No amd64 specific initialization
    1.51 +}
    1.52 +
    1.53 +// Address computation: local variables
    1.54 +
    1.55 +static inline Address iaddress(int n) {
    1.56 +  return Address(r14, Interpreter::local_offset_in_bytes(n));
    1.57 +}
    1.58 +
    1.59 +static inline Address laddress(int n) {
    1.60 +  return iaddress(n + 1);
    1.61 +}
    1.62 +
    1.63 +static inline Address faddress(int n) {
    1.64 +  return iaddress(n);
    1.65 +}
    1.66 +
    1.67 +static inline Address daddress(int n) {
    1.68 +  return laddress(n);
    1.69 +}
    1.70 +
    1.71 +static inline Address aaddress(int n) {
    1.72 +  return iaddress(n);
    1.73 +}
    1.74 +
    1.75 +static inline Address iaddress(Register r) {
    1.76 +  return Address(r14, r, Address::times_8);
    1.77 +}
    1.78 +
    1.79 +static inline Address laddress(Register r) {
    1.80 +  return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
    1.81 +}
    1.82 +
    1.83 +static inline Address faddress(Register r) {
    1.84 +  return iaddress(r);
    1.85 +}
    1.86 +
    1.87 +static inline Address daddress(Register r) {
    1.88 +  return laddress(r);
    1.89 +}
    1.90 +
    1.91 +static inline Address aaddress(Register r) {
    1.92 +  return iaddress(r);
    1.93 +}
    1.94 +
    1.95 +static inline Address at_rsp() {
    1.96 +  return Address(rsp, 0);
    1.97 +}
    1.98 +
    1.99 +// At top of Java expression stack which may be different than esp().  It
   1.100 +// isn't for category 1 objects.
   1.101 +static inline Address at_tos   () {
   1.102 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
   1.103 +}
   1.104 +
   1.105 +static inline Address at_tos_p1() {
   1.106 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
   1.107 +}
   1.108 +
   1.109 +static inline Address at_tos_p2() {
   1.110 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
   1.111 +}
   1.112 +
   1.113 +static inline Address at_tos_p3() {
   1.114 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(3));
   1.115 +}
   1.116 +
   1.117 +// Condition conversion
   1.118 +static Assembler::Condition j_not(TemplateTable::Condition cc) {
   1.119 +  switch (cc) {
   1.120 +  case TemplateTable::equal        : return Assembler::notEqual;
   1.121 +  case TemplateTable::not_equal    : return Assembler::equal;
   1.122 +  case TemplateTable::less         : return Assembler::greaterEqual;
   1.123 +  case TemplateTable::less_equal   : return Assembler::greater;
   1.124 +  case TemplateTable::greater      : return Assembler::lessEqual;
   1.125 +  case TemplateTable::greater_equal: return Assembler::less;
   1.126 +  }
   1.127 +  ShouldNotReachHere();
   1.128 +  return Assembler::zero;
   1.129 +}
   1.130 +
   1.131 +
   1.132 +// Miscelaneous helper routines
   1.133 +// Store an oop (or NULL) at the address described by obj.
   1.134 +// If val == noreg this means store a NULL
   1.135 +
   1.136 +static void do_oop_store(InterpreterMacroAssembler* _masm,
   1.137 +                         Address obj,
   1.138 +                         Register val,
   1.139 +                         BarrierSet::Name barrier,
   1.140 +                         bool precise) {
   1.141 +  assert(val == noreg || val == rax, "parameter is just for looks");
   1.142 +  switch (barrier) {
   1.143 +#if INCLUDE_ALL_GCS
   1.144 +    case BarrierSet::G1SATBCT:
   1.145 +    case BarrierSet::G1SATBCTLogging:
   1.146 +      {
   1.147 +        // flatten object address if needed
   1.148 +        if (obj.index() == noreg && obj.disp() == 0) {
   1.149 +          if (obj.base() != rdx) {
   1.150 +            __ movq(rdx, obj.base());
   1.151 +          }
   1.152 +        } else {
   1.153 +          __ leaq(rdx, obj);
   1.154 +        }
   1.155 +        __ g1_write_barrier_pre(rdx /* obj */,
   1.156 +                                rbx /* pre_val */,
   1.157 +                                r15_thread /* thread */,
   1.158 +                                r8  /* tmp */,
   1.159 +                                val != noreg /* tosca_live */,
   1.160 +                                false /* expand_call */);
   1.161 +        if (val == noreg) {
   1.162 +          __ store_heap_oop_null(Address(rdx, 0));
   1.163 +        } else {
   1.164 +          // G1 barrier needs uncompressed oop for region cross check.
   1.165 +          Register new_val = val;
   1.166 +          if (UseCompressedOops) {
   1.167 +            new_val = rbx;
   1.168 +            __ movptr(new_val, val);
   1.169 +          }
   1.170 +          __ store_heap_oop(Address(rdx, 0), val);
   1.171 +          __ g1_write_barrier_post(rdx /* store_adr */,
   1.172 +                                   new_val /* new_val */,
   1.173 +                                   r15_thread /* thread */,
   1.174 +                                   r8 /* tmp */,
   1.175 +                                   rbx /* tmp2 */);
   1.176 +        }
   1.177 +      }
   1.178 +      break;
   1.179 +#endif // INCLUDE_ALL_GCS
   1.180 +    case BarrierSet::CardTableModRef:
   1.181 +    case BarrierSet::CardTableExtension:
   1.182 +      {
   1.183 +        if (val == noreg) {
   1.184 +          __ store_heap_oop_null(obj);
   1.185 +        } else {
   1.186 +          __ store_heap_oop(obj, val);
   1.187 +          // flatten object address if needed
   1.188 +          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
   1.189 +            __ store_check(obj.base());
   1.190 +          } else {
   1.191 +            __ leaq(rdx, obj);
   1.192 +            __ store_check(rdx);
   1.193 +          }
   1.194 +        }
   1.195 +      }
   1.196 +      break;
   1.197 +    case BarrierSet::ModRef:
   1.198 +    case BarrierSet::Other:
   1.199 +      if (val == noreg) {
   1.200 +        __ store_heap_oop_null(obj);
   1.201 +      } else {
   1.202 +        __ store_heap_oop(obj, val);
   1.203 +      }
   1.204 +      break;
   1.205 +    default      :
   1.206 +      ShouldNotReachHere();
   1.207 +
   1.208 +  }
   1.209 +}
   1.210 +
   1.211 +Address TemplateTable::at_bcp(int offset) {
   1.212 +  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   1.213 +  return Address(r13, offset);
   1.214 +}
   1.215 +
   1.216 +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
   1.217 +                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
   1.218 +                                   int byte_no) {
   1.219 +  if (!RewriteBytecodes)  return;
   1.220 +  Label L_patch_done;
   1.221 +
   1.222 +  switch (bc) {
   1.223 +  case Bytecodes::_fast_aputfield:
   1.224 +  case Bytecodes::_fast_bputfield:
   1.225 +  case Bytecodes::_fast_cputfield:
   1.226 +  case Bytecodes::_fast_dputfield:
   1.227 +  case Bytecodes::_fast_fputfield:
   1.228 +  case Bytecodes::_fast_iputfield:
   1.229 +  case Bytecodes::_fast_lputfield:
   1.230 +  case Bytecodes::_fast_sputfield:
   1.231 +    {
   1.232 +      // We skip bytecode quickening for putfield instructions when
   1.233 +      // the put_code written to the constant pool cache is zero.
   1.234 +      // This is required so that every execution of this instruction
   1.235 +      // calls out to InterpreterRuntime::resolve_get_put to do
   1.236 +      // additional, required work.
   1.237 +      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   1.238 +      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
   1.239 +      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
   1.240 +      __ movl(bc_reg, bc);
   1.241 +      __ cmpl(temp_reg, (int) 0);
   1.242 +      __ jcc(Assembler::zero, L_patch_done);  // don't patch
   1.243 +    }
   1.244 +    break;
   1.245 +  default:
   1.246 +    assert(byte_no == -1, "sanity");
   1.247 +    // the pair bytecodes have already done the load.
   1.248 +    if (load_bc_into_bc_reg) {
   1.249 +      __ movl(bc_reg, bc);
   1.250 +    }
   1.251 +  }
   1.252 +
   1.253 +  if (JvmtiExport::can_post_breakpoint()) {
   1.254 +    Label L_fast_patch;
   1.255 +    // if a breakpoint is present we can't rewrite the stream directly
   1.256 +    __ movzbl(temp_reg, at_bcp(0));
   1.257 +    __ cmpl(temp_reg, Bytecodes::_breakpoint);
   1.258 +    __ jcc(Assembler::notEqual, L_fast_patch);
   1.259 +    __ get_method(temp_reg);
   1.260 +    // Let breakpoint table handling rewrite to quicker bytecode
   1.261 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, r13, bc_reg);
   1.262 +#ifndef ASSERT
   1.263 +    __ jmpb(L_patch_done);
   1.264 +#else
   1.265 +    __ jmp(L_patch_done);
   1.266 +#endif
   1.267 +    __ bind(L_fast_patch);
   1.268 +  }
   1.269 +
   1.270 +#ifdef ASSERT
   1.271 +  Label L_okay;
   1.272 +  __ load_unsigned_byte(temp_reg, at_bcp(0));
   1.273 +  __ cmpl(temp_reg, (int) Bytecodes::java_code(bc));
   1.274 +  __ jcc(Assembler::equal, L_okay);
   1.275 +  __ cmpl(temp_reg, bc_reg);
   1.276 +  __ jcc(Assembler::equal, L_okay);
   1.277 +  __ stop("patching the wrong bytecode");
   1.278 +  __ bind(L_okay);
   1.279 +#endif
   1.280 +
   1.281 +  // patch bytecode
   1.282 +  __ movb(at_bcp(0), bc_reg);
   1.283 +  __ bind(L_patch_done);
   1.284 +}
   1.285 +
   1.286 +
   1.287 +// Individual instructions
   1.288 +
   1.289 +void TemplateTable::nop() {
   1.290 +  transition(vtos, vtos);
   1.291 +  // nothing to do
   1.292 +}
   1.293 +
   1.294 +void TemplateTable::shouldnotreachhere() {
   1.295 +  transition(vtos, vtos);
   1.296 +  __ stop("shouldnotreachhere bytecode");
   1.297 +}
   1.298 +
   1.299 +void TemplateTable::aconst_null() {
   1.300 +  transition(vtos, atos);
   1.301 +  __ xorl(rax, rax);
   1.302 +}
   1.303 +
   1.304 +void TemplateTable::iconst(int value) {
   1.305 +  transition(vtos, itos);
   1.306 +  if (value == 0) {
   1.307 +    __ xorl(rax, rax);
   1.308 +  } else {
   1.309 +    __ movl(rax, value);
   1.310 +  }
   1.311 +}
   1.312 +
   1.313 +void TemplateTable::lconst(int value) {
   1.314 +  transition(vtos, ltos);
   1.315 +  if (value == 0) {
   1.316 +    __ xorl(rax, rax);
   1.317 +  } else {
   1.318 +    __ movl(rax, value);
   1.319 +  }
   1.320 +}
   1.321 +
   1.322 +void TemplateTable::fconst(int value) {
   1.323 +  transition(vtos, ftos);
   1.324 +  static float one = 1.0f, two = 2.0f;
   1.325 +  switch (value) {
   1.326 +  case 0:
   1.327 +    __ xorps(xmm0, xmm0);
   1.328 +    break;
   1.329 +  case 1:
   1.330 +    __ movflt(xmm0, ExternalAddress((address) &one));
   1.331 +    break;
   1.332 +  case 2:
   1.333 +    __ movflt(xmm0, ExternalAddress((address) &two));
   1.334 +    break;
   1.335 +  default:
   1.336 +    ShouldNotReachHere();
   1.337 +    break;
   1.338 +  }
   1.339 +}
   1.340 +
   1.341 +void TemplateTable::dconst(int value) {
   1.342 +  transition(vtos, dtos);
   1.343 +  static double one = 1.0;
   1.344 +  switch (value) {
   1.345 +  case 0:
   1.346 +    __ xorpd(xmm0, xmm0);
   1.347 +    break;
   1.348 +  case 1:
   1.349 +    __ movdbl(xmm0, ExternalAddress((address) &one));
   1.350 +    break;
   1.351 +  default:
   1.352 +    ShouldNotReachHere();
   1.353 +    break;
   1.354 +  }
   1.355 +}
   1.356 +
   1.357 +void TemplateTable::bipush() {
   1.358 +  transition(vtos, itos);
   1.359 +  __ load_signed_byte(rax, at_bcp(1));
   1.360 +}
   1.361 +
   1.362 +void TemplateTable::sipush() {
   1.363 +  transition(vtos, itos);
   1.364 +  __ load_unsigned_short(rax, at_bcp(1));
   1.365 +  __ bswapl(rax);
   1.366 +  __ sarl(rax, 16);
   1.367 +}
   1.368 +
   1.369 +void TemplateTable::ldc(bool wide) {
   1.370 +  transition(vtos, vtos);
   1.371 +  Label call_ldc, notFloat, notClass, Done;
   1.372 +
   1.373 +  if (wide) {
   1.374 +    __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.375 +  } else {
   1.376 +    __ load_unsigned_byte(rbx, at_bcp(1));
   1.377 +  }
   1.378 +
   1.379 +  __ get_cpool_and_tags(rcx, rax);
   1.380 +  const int base_offset = ConstantPool::header_size() * wordSize;
   1.381 +  const int tags_offset = Array<u1>::base_offset_in_bytes();
   1.382 +
   1.383 +  // get type
   1.384 +  __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
   1.385 +
   1.386 +  // unresolved class - get the resolved class
   1.387 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
   1.388 +  __ jccb(Assembler::equal, call_ldc);
   1.389 +
   1.390 +  // unresolved class in error state - call into runtime to throw the error
   1.391 +  // from the first resolution attempt
   1.392 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
   1.393 +  __ jccb(Assembler::equal, call_ldc);
   1.394 +
   1.395 +  // resolved class - need to call vm to get java mirror of the class
   1.396 +  __ cmpl(rdx, JVM_CONSTANT_Class);
   1.397 +  __ jcc(Assembler::notEqual, notClass);
   1.398 +
   1.399 +  __ bind(call_ldc);
   1.400 +  __ movl(c_rarg1, wide);
   1.401 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
   1.402 +  __ push_ptr(rax);
   1.403 +  __ verify_oop(rax);
   1.404 +  __ jmp(Done);
   1.405 +
   1.406 +  __ bind(notClass);
   1.407 +  __ cmpl(rdx, JVM_CONSTANT_Float);
   1.408 +  __ jccb(Assembler::notEqual, notFloat);
   1.409 +  // ftos
   1.410 +  __ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
   1.411 +  __ push_f();
   1.412 +  __ jmp(Done);
   1.413 +
   1.414 +  __ bind(notFloat);
   1.415 +#ifdef ASSERT
   1.416 +  {
   1.417 +    Label L;
   1.418 +    __ cmpl(rdx, JVM_CONSTANT_Integer);
   1.419 +    __ jcc(Assembler::equal, L);
   1.420 +    // String and Object are rewritten to fast_aldc
   1.421 +    __ stop("unexpected tag type in ldc");
   1.422 +    __ bind(L);
   1.423 +  }
   1.424 +#endif
   1.425 +  // itos JVM_CONSTANT_Integer only
   1.426 +  __ movl(rax, Address(rcx, rbx, Address::times_8, base_offset));
   1.427 +  __ push_i(rax);
   1.428 +  __ bind(Done);
   1.429 +}
   1.430 +
   1.431 +// Fast path for caching oop constants.
   1.432 +void TemplateTable::fast_aldc(bool wide) {
   1.433 +  transition(vtos, atos);
   1.434 +
   1.435 +  Register result = rax;
   1.436 +  Register tmp = rdx;
   1.437 +  int index_size = wide ? sizeof(u2) : sizeof(u1);
   1.438 +
   1.439 +  Label resolved;
   1.440 +
   1.441 +  // We are resolved if the resolved reference cache entry contains a
   1.442 +  // non-null object (String, MethodType, etc.)
   1.443 +  assert_different_registers(result, tmp);
   1.444 +  __ get_cache_index_at_bcp(tmp, 1, index_size);
   1.445 +  __ load_resolved_reference_at_index(result, tmp);
   1.446 +  __ testl(result, result);
   1.447 +  __ jcc(Assembler::notZero, resolved);
   1.448 +
   1.449 +  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
   1.450 +
   1.451 +  // first time invocation - must resolve first
   1.452 +  __ movl(tmp, (int)bytecode());
   1.453 +  __ call_VM(result, entry, tmp);
   1.454 +
   1.455 +  __ bind(resolved);
   1.456 +
   1.457 +  if (VerifyOops) {
   1.458 +    __ verify_oop(result);
   1.459 +  }
   1.460 +}
   1.461 +
   1.462 +void TemplateTable::ldc2_w() {
   1.463 +  transition(vtos, vtos);
   1.464 +  Label Long, Done;
   1.465 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.466 +
   1.467 +  __ get_cpool_and_tags(rcx, rax);
   1.468 +  const int base_offset = ConstantPool::header_size() * wordSize;
   1.469 +  const int tags_offset = Array<u1>::base_offset_in_bytes();
   1.470 +
   1.471 +  // get type
   1.472 +  __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
   1.473 +          JVM_CONSTANT_Double);
   1.474 +  __ jccb(Assembler::notEqual, Long);
   1.475 +  // dtos
   1.476 +  __ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
   1.477 +  __ push_d();
   1.478 +  __ jmpb(Done);
   1.479 +
   1.480 +  __ bind(Long);
   1.481 +  // ltos
   1.482 +  __ movq(rax, Address(rcx, rbx, Address::times_8, base_offset));
   1.483 +  __ push_l();
   1.484 +
   1.485 +  __ bind(Done);
   1.486 +}
   1.487 +
   1.488 +void TemplateTable::locals_index(Register reg, int offset) {
   1.489 +  __ load_unsigned_byte(reg, at_bcp(offset));
   1.490 +  __ negptr(reg);
   1.491 +}
   1.492 +
   1.493 +void TemplateTable::iload() {
   1.494 +  transition(vtos, itos);
   1.495 +  if (RewriteFrequentPairs) {
   1.496 +    Label rewrite, done;
   1.497 +    const Register bc = c_rarg3;
   1.498 +    assert(rbx != bc, "register damaged");
   1.499 +
   1.500 +    // get next byte
   1.501 +    __ load_unsigned_byte(rbx,
   1.502 +                          at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   1.503 +    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   1.504 +    // last two iloads in a pair.  Comparing against fast_iload means that
   1.505 +    // the next bytecode is neither an iload or a caload, and therefore
   1.506 +    // an iload pair.
   1.507 +    __ cmpl(rbx, Bytecodes::_iload);
   1.508 +    __ jcc(Assembler::equal, done);
   1.509 +
   1.510 +    __ cmpl(rbx, Bytecodes::_fast_iload);
   1.511 +    __ movl(bc, Bytecodes::_fast_iload2);
   1.512 +    __ jccb(Assembler::equal, rewrite);
   1.513 +
   1.514 +    // if _caload, rewrite to fast_icaload
   1.515 +    __ cmpl(rbx, Bytecodes::_caload);
   1.516 +    __ movl(bc, Bytecodes::_fast_icaload);
   1.517 +    __ jccb(Assembler::equal, rewrite);
   1.518 +
   1.519 +    // rewrite so iload doesn't check again.
   1.520 +    __ movl(bc, Bytecodes::_fast_iload);
   1.521 +
   1.522 +    // rewrite
   1.523 +    // bc: fast bytecode
   1.524 +    __ bind(rewrite);
   1.525 +    patch_bytecode(Bytecodes::_iload, bc, rbx, false);
   1.526 +    __ bind(done);
   1.527 +  }
   1.528 +
   1.529 +  // Get the local value into tos
   1.530 +  locals_index(rbx);
   1.531 +  __ movl(rax, iaddress(rbx));
   1.532 +}
   1.533 +
   1.534 +void TemplateTable::fast_iload2() {
   1.535 +  transition(vtos, itos);
   1.536 +  locals_index(rbx);
   1.537 +  __ movl(rax, iaddress(rbx));
   1.538 +  __ push(itos);
   1.539 +  locals_index(rbx, 3);
   1.540 +  __ movl(rax, iaddress(rbx));
   1.541 +}
   1.542 +
   1.543 +void TemplateTable::fast_iload() {
   1.544 +  transition(vtos, itos);
   1.545 +  locals_index(rbx);
   1.546 +  __ movl(rax, iaddress(rbx));
   1.547 +}
   1.548 +
   1.549 +void TemplateTable::lload() {
   1.550 +  transition(vtos, ltos);
   1.551 +  locals_index(rbx);
   1.552 +  __ movq(rax, laddress(rbx));
   1.553 +}
   1.554 +
   1.555 +void TemplateTable::fload() {
   1.556 +  transition(vtos, ftos);
   1.557 +  locals_index(rbx);
   1.558 +  __ movflt(xmm0, faddress(rbx));
   1.559 +}
   1.560 +
   1.561 +void TemplateTable::dload() {
   1.562 +  transition(vtos, dtos);
   1.563 +  locals_index(rbx);
   1.564 +  __ movdbl(xmm0, daddress(rbx));
   1.565 +}
   1.566 +
   1.567 +void TemplateTable::aload() {
   1.568 +  transition(vtos, atos);
   1.569 +  locals_index(rbx);
   1.570 +  __ movptr(rax, aaddress(rbx));
   1.571 +}
   1.572 +
   1.573 +void TemplateTable::locals_index_wide(Register reg) {
   1.574 +  __ load_unsigned_short(reg, at_bcp(2));
   1.575 +  __ bswapl(reg);
   1.576 +  __ shrl(reg, 16);
   1.577 +  __ negptr(reg);
   1.578 +}
   1.579 +
   1.580 +void TemplateTable::wide_iload() {
   1.581 +  transition(vtos, itos);
   1.582 +  locals_index_wide(rbx);
   1.583 +  __ movl(rax, iaddress(rbx));
   1.584 +}
   1.585 +
   1.586 +void TemplateTable::wide_lload() {
   1.587 +  transition(vtos, ltos);
   1.588 +  locals_index_wide(rbx);
   1.589 +  __ movq(rax, laddress(rbx));
   1.590 +}
   1.591 +
   1.592 +void TemplateTable::wide_fload() {
   1.593 +  transition(vtos, ftos);
   1.594 +  locals_index_wide(rbx);
   1.595 +  __ movflt(xmm0, faddress(rbx));
   1.596 +}
   1.597 +
   1.598 +void TemplateTable::wide_dload() {
   1.599 +  transition(vtos, dtos);
   1.600 +  locals_index_wide(rbx);
   1.601 +  __ movdbl(xmm0, daddress(rbx));
   1.602 +}
   1.603 +
   1.604 +void TemplateTable::wide_aload() {
   1.605 +  transition(vtos, atos);
   1.606 +  locals_index_wide(rbx);
   1.607 +  __ movptr(rax, aaddress(rbx));
   1.608 +}
   1.609 +
   1.610 +void TemplateTable::index_check(Register array, Register index) {
   1.611 +  // destroys rbx
   1.612 +  // check array
   1.613 +  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   1.614 +  // sign extend index for use by indexed load
   1.615 +  __ movl2ptr(index, index);
   1.616 +  // check index
   1.617 +  __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
   1.618 +  if (index != rbx) {
   1.619 +    // ??? convention: move aberrant index into ebx for exception message
   1.620 +    assert(rbx != array, "different registers");
   1.621 +    __ movl(rbx, index);
   1.622 +  }
   1.623 +  __ jump_cc(Assembler::aboveEqual,
   1.624 +             ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
   1.625 +}
   1.626 +
   1.627 +void TemplateTable::iaload() {
   1.628 +  transition(itos, itos);
   1.629 +  __ pop_ptr(rdx);
   1.630 +  // eax: index
   1.631 +  // rdx: array
   1.632 +  index_check(rdx, rax); // kills rbx
   1.633 +  __ movl(rax, Address(rdx, rax,
   1.634 +                       Address::times_4,
   1.635 +                       arrayOopDesc::base_offset_in_bytes(T_INT)));
   1.636 +}
   1.637 +
   1.638 +void TemplateTable::laload() {
   1.639 +  transition(itos, ltos);
   1.640 +  __ pop_ptr(rdx);
   1.641 +  // eax: index
   1.642 +  // rdx: array
   1.643 +  index_check(rdx, rax); // kills rbx
   1.644 +  __ movq(rax, Address(rdx, rbx,
   1.645 +                       Address::times_8,
   1.646 +                       arrayOopDesc::base_offset_in_bytes(T_LONG)));
   1.647 +}
   1.648 +
   1.649 +void TemplateTable::faload() {
   1.650 +  transition(itos, ftos);
   1.651 +  __ pop_ptr(rdx);
   1.652 +  // eax: index
   1.653 +  // rdx: array
   1.654 +  index_check(rdx, rax); // kills rbx
   1.655 +  __ movflt(xmm0, Address(rdx, rax,
   1.656 +                         Address::times_4,
   1.657 +                         arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
   1.658 +}
   1.659 +
   1.660 +void TemplateTable::daload() {
   1.661 +  transition(itos, dtos);
   1.662 +  __ pop_ptr(rdx);
   1.663 +  // eax: index
   1.664 +  // rdx: array
   1.665 +  index_check(rdx, rax); // kills rbx
   1.666 +  __ movdbl(xmm0, Address(rdx, rax,
   1.667 +                          Address::times_8,
   1.668 +                          arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
   1.669 +}
   1.670 +
   1.671 +void TemplateTable::aaload() {
   1.672 +  transition(itos, atos);
   1.673 +  __ pop_ptr(rdx);
   1.674 +  // eax: index
   1.675 +  // rdx: array
   1.676 +  index_check(rdx, rax); // kills rbx
   1.677 +  __ load_heap_oop(rax, Address(rdx, rax,
   1.678 +                                UseCompressedOops ? Address::times_4 : Address::times_8,
   1.679 +                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.680 +}
   1.681 +
   1.682 +void TemplateTable::baload() {
   1.683 +  transition(itos, itos);
   1.684 +  __ pop_ptr(rdx);
   1.685 +  // eax: index
   1.686 +  // rdx: array
   1.687 +  index_check(rdx, rax); // kills rbx
   1.688 +  __ load_signed_byte(rax,
   1.689 +                      Address(rdx, rax,
   1.690 +                              Address::times_1,
   1.691 +                              arrayOopDesc::base_offset_in_bytes(T_BYTE)));
   1.692 +}
   1.693 +
   1.694 +void TemplateTable::caload() {
   1.695 +  transition(itos, itos);
   1.696 +  __ pop_ptr(rdx);
   1.697 +  // eax: index
   1.698 +  // rdx: array
   1.699 +  index_check(rdx, rax); // kills rbx
   1.700 +  __ load_unsigned_short(rax,
   1.701 +                         Address(rdx, rax,
   1.702 +                                 Address::times_2,
   1.703 +                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.704 +}
   1.705 +
   1.706 +// iload followed by caload frequent pair
   1.707 +void TemplateTable::fast_icaload() {
   1.708 +  transition(vtos, itos);
   1.709 +  // load index out of locals
   1.710 +  locals_index(rbx);
   1.711 +  __ movl(rax, iaddress(rbx));
   1.712 +
   1.713 +  // eax: index
   1.714 +  // rdx: array
   1.715 +  __ pop_ptr(rdx);
   1.716 +  index_check(rdx, rax); // kills rbx
   1.717 +  __ load_unsigned_short(rax,
   1.718 +                         Address(rdx, rax,
   1.719 +                                 Address::times_2,
   1.720 +                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.721 +}
   1.722 +
   1.723 +void TemplateTable::saload() {
   1.724 +  transition(itos, itos);
   1.725 +  __ pop_ptr(rdx);
   1.726 +  // eax: index
   1.727 +  // rdx: array
   1.728 +  index_check(rdx, rax); // kills rbx
   1.729 +  __ load_signed_short(rax,
   1.730 +                       Address(rdx, rax,
   1.731 +                               Address::times_2,
   1.732 +                               arrayOopDesc::base_offset_in_bytes(T_SHORT)));
   1.733 +}
   1.734 +
   1.735 +void TemplateTable::iload(int n) {
   1.736 +  transition(vtos, itos);
   1.737 +  __ movl(rax, iaddress(n));
   1.738 +}
   1.739 +
   1.740 +void TemplateTable::lload(int n) {
   1.741 +  transition(vtos, ltos);
   1.742 +  __ movq(rax, laddress(n));
   1.743 +}
   1.744 +
   1.745 +void TemplateTable::fload(int n) {
   1.746 +  transition(vtos, ftos);
   1.747 +  __ movflt(xmm0, faddress(n));
   1.748 +}
   1.749 +
   1.750 +void TemplateTable::dload(int n) {
   1.751 +  transition(vtos, dtos);
   1.752 +  __ movdbl(xmm0, daddress(n));
   1.753 +}
   1.754 +
   1.755 +void TemplateTable::aload(int n) {
   1.756 +  transition(vtos, atos);
   1.757 +  __ movptr(rax, aaddress(n));
   1.758 +}
   1.759 +
   1.760 +void TemplateTable::aload_0() {
   1.761 +  transition(vtos, atos);
   1.762 +  // According to bytecode histograms, the pairs:
   1.763 +  //
   1.764 +  // _aload_0, _fast_igetfield
   1.765 +  // _aload_0, _fast_agetfield
   1.766 +  // _aload_0, _fast_fgetfield
   1.767 +  //
   1.768 +  // occur frequently. If RewriteFrequentPairs is set, the (slow)
   1.769 +  // _aload_0 bytecode checks if the next bytecode is either
   1.770 +  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
   1.771 +  // rewrites the current bytecode into a pair bytecode; otherwise it
   1.772 +  // rewrites the current bytecode into _fast_aload_0 that doesn't do
   1.773 +  // the pair check anymore.
   1.774 +  //
   1.775 +  // Note: If the next bytecode is _getfield, the rewrite must be
   1.776 +  //       delayed, otherwise we may miss an opportunity for a pair.
   1.777 +  //
   1.778 +  // Also rewrite frequent pairs
   1.779 +  //   aload_0, aload_1
   1.780 +  //   aload_0, iload_1
   1.781 +  // These bytecodes with a small amount of code are most profitable
   1.782 +  // to rewrite
   1.783 +  if (RewriteFrequentPairs) {
   1.784 +    Label rewrite, done;
   1.785 +    const Register bc = c_rarg3;
   1.786 +    assert(rbx != bc, "register damaged");
   1.787 +    // get next byte
   1.788 +    __ load_unsigned_byte(rbx,
   1.789 +                          at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   1.790 +
   1.791 +    // do actual aload_0
   1.792 +    aload(0);
   1.793 +
   1.794 +    // if _getfield then wait with rewrite
   1.795 +    __ cmpl(rbx, Bytecodes::_getfield);
   1.796 +    __ jcc(Assembler::equal, done);
   1.797 +
   1.798 +    // if _igetfield then reqrite to _fast_iaccess_0
   1.799 +    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
   1.800 +           Bytecodes::_aload_0,
   1.801 +           "fix bytecode definition");
   1.802 +    __ cmpl(rbx, Bytecodes::_fast_igetfield);
   1.803 +    __ movl(bc, Bytecodes::_fast_iaccess_0);
   1.804 +    __ jccb(Assembler::equal, rewrite);
   1.805 +
   1.806 +    // if _agetfield then reqrite to _fast_aaccess_0
   1.807 +    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
   1.808 +           Bytecodes::_aload_0,
   1.809 +           "fix bytecode definition");
   1.810 +    __ cmpl(rbx, Bytecodes::_fast_agetfield);
   1.811 +    __ movl(bc, Bytecodes::_fast_aaccess_0);
   1.812 +    __ jccb(Assembler::equal, rewrite);
   1.813 +
   1.814 +    // if _fgetfield then reqrite to _fast_faccess_0
   1.815 +    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
   1.816 +           Bytecodes::_aload_0,
   1.817 +           "fix bytecode definition");
   1.818 +    __ cmpl(rbx, Bytecodes::_fast_fgetfield);
   1.819 +    __ movl(bc, Bytecodes::_fast_faccess_0);
   1.820 +    __ jccb(Assembler::equal, rewrite);
   1.821 +
   1.822 +    // else rewrite to _fast_aload0
   1.823 +    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
   1.824 +           Bytecodes::_aload_0,
   1.825 +           "fix bytecode definition");
   1.826 +    __ movl(bc, Bytecodes::_fast_aload_0);
   1.827 +
   1.828 +    // rewrite
   1.829 +    // bc: fast bytecode
   1.830 +    __ bind(rewrite);
   1.831 +    patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);
   1.832 +
   1.833 +    __ bind(done);
   1.834 +  } else {
   1.835 +    aload(0);
   1.836 +  }
   1.837 +}
   1.838 +
   1.839 +void TemplateTable::istore() {
   1.840 +  transition(itos, vtos);
   1.841 +  locals_index(rbx);
   1.842 +  __ movl(iaddress(rbx), rax);
   1.843 +}
   1.844 +
   1.845 +void TemplateTable::lstore() {
   1.846 +  transition(ltos, vtos);
   1.847 +  locals_index(rbx);
   1.848 +  __ movq(laddress(rbx), rax);
   1.849 +}
   1.850 +
   1.851 +void TemplateTable::fstore() {
   1.852 +  transition(ftos, vtos);
   1.853 +  locals_index(rbx);
   1.854 +  __ movflt(faddress(rbx), xmm0);
   1.855 +}
   1.856 +
   1.857 +void TemplateTable::dstore() {
   1.858 +  transition(dtos, vtos);
   1.859 +  locals_index(rbx);
   1.860 +  __ movdbl(daddress(rbx), xmm0);
   1.861 +}
   1.862 +
   1.863 +void TemplateTable::astore() {
   1.864 +  transition(vtos, vtos);
   1.865 +  __ pop_ptr(rax);
   1.866 +  locals_index(rbx);
   1.867 +  __ movptr(aaddress(rbx), rax);
   1.868 +}
   1.869 +
   1.870 +void TemplateTable::wide_istore() {
   1.871 +  transition(vtos, vtos);
   1.872 +  __ pop_i();
   1.873 +  locals_index_wide(rbx);
   1.874 +  __ movl(iaddress(rbx), rax);
   1.875 +}
   1.876 +
   1.877 +void TemplateTable::wide_lstore() {
   1.878 +  transition(vtos, vtos);
   1.879 +  __ pop_l();
   1.880 +  locals_index_wide(rbx);
   1.881 +  __ movq(laddress(rbx), rax);
   1.882 +}
   1.883 +
   1.884 +void TemplateTable::wide_fstore() {
   1.885 +  transition(vtos, vtos);
   1.886 +  __ pop_f();
   1.887 +  locals_index_wide(rbx);
   1.888 +  __ movflt(faddress(rbx), xmm0);
   1.889 +}
   1.890 +
   1.891 +void TemplateTable::wide_dstore() {
   1.892 +  transition(vtos, vtos);
   1.893 +  __ pop_d();
   1.894 +  locals_index_wide(rbx);
   1.895 +  __ movdbl(daddress(rbx), xmm0);
   1.896 +}
   1.897 +
   1.898 +void TemplateTable::wide_astore() {
   1.899 +  transition(vtos, vtos);
   1.900 +  __ pop_ptr(rax);
   1.901 +  locals_index_wide(rbx);
   1.902 +  __ movptr(aaddress(rbx), rax);
   1.903 +}
   1.904 +
   1.905 +void TemplateTable::iastore() {
   1.906 +  transition(itos, vtos);
   1.907 +  __ pop_i(rbx);
   1.908 +  __ pop_ptr(rdx);
   1.909 +  // eax: value
   1.910 +  // ebx: index
   1.911 +  // rdx: array
   1.912 +  index_check(rdx, rbx); // prefer index in ebx
   1.913 +  __ movl(Address(rdx, rbx,
   1.914 +                  Address::times_4,
   1.915 +                  arrayOopDesc::base_offset_in_bytes(T_INT)),
   1.916 +          rax);
   1.917 +}
   1.918 +
   1.919 +void TemplateTable::lastore() {
   1.920 +  transition(ltos, vtos);
   1.921 +  __ pop_i(rbx);
   1.922 +  __ pop_ptr(rdx);
   1.923 +  // rax: value
   1.924 +  // ebx: index
   1.925 +  // rdx: array
   1.926 +  index_check(rdx, rbx); // prefer index in ebx
   1.927 +  __ movq(Address(rdx, rbx,
   1.928 +                  Address::times_8,
   1.929 +                  arrayOopDesc::base_offset_in_bytes(T_LONG)),
   1.930 +          rax);
   1.931 +}
   1.932 +
   1.933 +void TemplateTable::fastore() {
   1.934 +  transition(ftos, vtos);
   1.935 +  __ pop_i(rbx);
   1.936 +  __ pop_ptr(rdx);
   1.937 +  // xmm0: value
   1.938 +  // ebx:  index
   1.939 +  // rdx:  array
   1.940 +  index_check(rdx, rbx); // prefer index in ebx
   1.941 +  __ movflt(Address(rdx, rbx,
   1.942 +                   Address::times_4,
   1.943 +                   arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
   1.944 +           xmm0);
   1.945 +}
   1.946 +
   1.947 +void TemplateTable::dastore() {
   1.948 +  transition(dtos, vtos);
   1.949 +  __ pop_i(rbx);
   1.950 +  __ pop_ptr(rdx);
   1.951 +  // xmm0: value
   1.952 +  // ebx:  index
   1.953 +  // rdx:  array
   1.954 +  index_check(rdx, rbx); // prefer index in ebx
   1.955 +  __ movdbl(Address(rdx, rbx,
   1.956 +                   Address::times_8,
   1.957 +                   arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
   1.958 +           xmm0);
   1.959 +}
   1.960 +
   1.961 +void TemplateTable::aastore() {
   1.962 +  Label is_null, ok_is_subtype, done;
   1.963 +  transition(vtos, vtos);
   1.964 +  // stack: ..., array, index, value
   1.965 +  __ movptr(rax, at_tos());    // value
   1.966 +  __ movl(rcx, at_tos_p1()); // index
   1.967 +  __ movptr(rdx, at_tos_p2()); // array
   1.968 +
   1.969 +  Address element_address(rdx, rcx,
   1.970 +                          UseCompressedOops? Address::times_4 : Address::times_8,
   1.971 +                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   1.972 +
   1.973 +  index_check(rdx, rcx);     // kills rbx
   1.974 +  // do array store check - check for NULL value first
   1.975 +  __ testptr(rax, rax);
   1.976 +  __ jcc(Assembler::zero, is_null);
   1.977 +
   1.978 +  // Move subklass into rbx
   1.979 +  __ load_klass(rbx, rax);
   1.980 +  // Move superklass into rax
   1.981 +  __ load_klass(rax, rdx);
   1.982 +  __ movptr(rax, Address(rax,
   1.983 +                         ObjArrayKlass::element_klass_offset()));
   1.984 +  // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
   1.985 +  __ lea(rdx, element_address);
   1.986 +
   1.987 +  // Generate subtype check.  Blows rcx, rdi
   1.988 +  // Superklass in rax.  Subklass in rbx.
   1.989 +  __ gen_subtype_check(rbx, ok_is_subtype);
   1.990 +
   1.991 +  // Come here on failure
   1.992 +  // object is at TOS
   1.993 +  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
   1.994 +
   1.995 +  // Come here on success
   1.996 +  __ bind(ok_is_subtype);
   1.997 +
   1.998 +  // Get the value we will store
   1.999 +  __ movptr(rax, at_tos());
  1.1000 +  // Now store using the appropriate barrier
  1.1001 +  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
  1.1002 +  __ jmp(done);
  1.1003 +
  1.1004 +  // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
  1.1005 +  __ bind(is_null);
  1.1006 +  __ profile_null_seen(rbx);
  1.1007 +
  1.1008 +  // Store a NULL
  1.1009 +  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
  1.1010 +
  1.1011 +  // Pop stack arguments
  1.1012 +  __ bind(done);
  1.1013 +  __ addptr(rsp, 3 * Interpreter::stackElementSize);
  1.1014 +}
  1.1015 +
  1.1016 +void TemplateTable::bastore() {
  1.1017 +  transition(itos, vtos);
  1.1018 +  __ pop_i(rbx);
  1.1019 +  __ pop_ptr(rdx);
  1.1020 +  // eax: value
  1.1021 +  // ebx: index
  1.1022 +  // rdx: array
  1.1023 +  index_check(rdx, rbx); // prefer index in ebx
  1.1024 +  __ movb(Address(rdx, rbx,
  1.1025 +                  Address::times_1,
  1.1026 +                  arrayOopDesc::base_offset_in_bytes(T_BYTE)),
  1.1027 +          rax);
  1.1028 +}
  1.1029 +
  1.1030 +void TemplateTable::castore() {
  1.1031 +  transition(itos, vtos);
  1.1032 +  __ pop_i(rbx);
  1.1033 +  __ pop_ptr(rdx);
  1.1034 +  // eax: value
  1.1035 +  // ebx: index
  1.1036 +  // rdx: array
  1.1037 +  index_check(rdx, rbx);  // prefer index in ebx
  1.1038 +  __ movw(Address(rdx, rbx,
  1.1039 +                  Address::times_2,
  1.1040 +                  arrayOopDesc::base_offset_in_bytes(T_CHAR)),
  1.1041 +          rax);
  1.1042 +}
  1.1043 +
  1.1044 +void TemplateTable::sastore() {
  1.1045 +  castore();
  1.1046 +}
  1.1047 +
  1.1048 +void TemplateTable::istore(int n) {
  1.1049 +  transition(itos, vtos);
  1.1050 +  __ movl(iaddress(n), rax);
  1.1051 +}
  1.1052 +
  1.1053 +void TemplateTable::lstore(int n) {
  1.1054 +  transition(ltos, vtos);
  1.1055 +  __ movq(laddress(n), rax);
  1.1056 +}
  1.1057 +
  1.1058 +void TemplateTable::fstore(int n) {
  1.1059 +  transition(ftos, vtos);
  1.1060 +  __ movflt(faddress(n), xmm0);
  1.1061 +}
  1.1062 +
  1.1063 +void TemplateTable::dstore(int n) {
  1.1064 +  transition(dtos, vtos);
  1.1065 +  __ movdbl(daddress(n), xmm0);
  1.1066 +}
  1.1067 +
  1.1068 +void TemplateTable::astore(int n) {
  1.1069 +  transition(vtos, vtos);
  1.1070 +  __ pop_ptr(rax);
  1.1071 +  __ movptr(aaddress(n), rax);
  1.1072 +}
  1.1073 +
  1.1074 +void TemplateTable::pop() {
  1.1075 +  transition(vtos, vtos);
  1.1076 +  __ addptr(rsp, Interpreter::stackElementSize);
  1.1077 +}
  1.1078 +
  1.1079 +void TemplateTable::pop2() {
  1.1080 +  transition(vtos, vtos);
  1.1081 +  __ addptr(rsp, 2 * Interpreter::stackElementSize);
  1.1082 +}
  1.1083 +
  1.1084 +void TemplateTable::dup() {
  1.1085 +  transition(vtos, vtos);
  1.1086 +  __ load_ptr(0, rax);
  1.1087 +  __ push_ptr(rax);
  1.1088 +  // stack: ..., a, a
  1.1089 +}
  1.1090 +
  1.1091 +void TemplateTable::dup_x1() {
  1.1092 +  transition(vtos, vtos);
  1.1093 +  // stack: ..., a, b
  1.1094 +  __ load_ptr( 0, rax);  // load b
  1.1095 +  __ load_ptr( 1, rcx);  // load a
  1.1096 +  __ store_ptr(1, rax);  // store b
  1.1097 +  __ store_ptr(0, rcx);  // store a
  1.1098 +  __ push_ptr(rax);      // push b
  1.1099 +  // stack: ..., b, a, b
  1.1100 +}
  1.1101 +
  1.1102 +void TemplateTable::dup_x2() {
  1.1103 +  transition(vtos, vtos);
  1.1104 +  // stack: ..., a, b, c
  1.1105 +  __ load_ptr( 0, rax);  // load c
  1.1106 +  __ load_ptr( 2, rcx);  // load a
  1.1107 +  __ store_ptr(2, rax);  // store c in a
  1.1108 +  __ push_ptr(rax);      // push c
  1.1109 +  // stack: ..., c, b, c, c
  1.1110 +  __ load_ptr( 2, rax);  // load b
  1.1111 +  __ store_ptr(2, rcx);  // store a in b
  1.1112 +  // stack: ..., c, a, c, c
  1.1113 +  __ store_ptr(1, rax);  // store b in c
  1.1114 +  // stack: ..., c, a, b, c
  1.1115 +}
  1.1116 +
  1.1117 +void TemplateTable::dup2() {
  1.1118 +  transition(vtos, vtos);
  1.1119 +  // stack: ..., a, b
  1.1120 +  __ load_ptr(1, rax);  // load a
  1.1121 +  __ push_ptr(rax);     // push a
  1.1122 +  __ load_ptr(1, rax);  // load b
  1.1123 +  __ push_ptr(rax);     // push b
  1.1124 +  // stack: ..., a, b, a, b
  1.1125 +}
  1.1126 +
  1.1127 +void TemplateTable::dup2_x1() {
  1.1128 +  transition(vtos, vtos);
  1.1129 +  // stack: ..., a, b, c
  1.1130 +  __ load_ptr( 0, rcx);  // load c
  1.1131 +  __ load_ptr( 1, rax);  // load b
  1.1132 +  __ push_ptr(rax);      // push b
  1.1133 +  __ push_ptr(rcx);      // push c
  1.1134 +  // stack: ..., a, b, c, b, c
  1.1135 +  __ store_ptr(3, rcx);  // store c in b
  1.1136 +  // stack: ..., a, c, c, b, c
  1.1137 +  __ load_ptr( 4, rcx);  // load a
  1.1138 +  __ store_ptr(2, rcx);  // store a in 2nd c
  1.1139 +  // stack: ..., a, c, a, b, c
  1.1140 +  __ store_ptr(4, rax);  // store b in a
  1.1141 +  // stack: ..., b, c, a, b, c
  1.1142 +}
  1.1143 +
  1.1144 +void TemplateTable::dup2_x2() {
  1.1145 +  transition(vtos, vtos);
  1.1146 +  // stack: ..., a, b, c, d
  1.1147 +  __ load_ptr( 0, rcx);  // load d
  1.1148 +  __ load_ptr( 1, rax);  // load c
  1.1149 +  __ push_ptr(rax);      // push c
  1.1150 +  __ push_ptr(rcx);      // push d
  1.1151 +  // stack: ..., a, b, c, d, c, d
  1.1152 +  __ load_ptr( 4, rax);  // load b
  1.1153 +  __ store_ptr(2, rax);  // store b in d
  1.1154 +  __ store_ptr(4, rcx);  // store d in b
  1.1155 +  // stack: ..., a, d, c, b, c, d
  1.1156 +  __ load_ptr( 5, rcx);  // load a
  1.1157 +  __ load_ptr( 3, rax);  // load c
  1.1158 +  __ store_ptr(3, rcx);  // store a in c
  1.1159 +  __ store_ptr(5, rax);  // store c in a
  1.1160 +  // stack: ..., c, d, a, b, c, d
  1.1161 +}
  1.1162 +
  1.1163 +void TemplateTable::swap() {
  1.1164 +  transition(vtos, vtos);
  1.1165 +  // stack: ..., a, b
  1.1166 +  __ load_ptr( 1, rcx);  // load a
  1.1167 +  __ load_ptr( 0, rax);  // load b
  1.1168 +  __ store_ptr(0, rcx);  // store a in b
  1.1169 +  __ store_ptr(1, rax);  // store b in a
  1.1170 +  // stack: ..., b, a
  1.1171 +}
  1.1172 +
  1.1173 +void TemplateTable::iop2(Operation op) {
  1.1174 +  transition(itos, itos);
  1.1175 +  switch (op) {
  1.1176 +  case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
  1.1177 +  case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
  1.1178 +  case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
  1.1179 +  case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
  1.1180 +  case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
  1.1181 +  case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
  1.1182 +  case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
  1.1183 +  case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
  1.1184 +  case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
  1.1185 +  default   : ShouldNotReachHere();
  1.1186 +  }
  1.1187 +}
  1.1188 +
  1.1189 +void TemplateTable::lop2(Operation op) {
  1.1190 +  transition(ltos, ltos);
  1.1191 +  switch (op) {
  1.1192 +  case add  :                    __ pop_l(rdx); __ addptr(rax, rdx); break;
  1.1193 +  case sub  : __ mov(rdx, rax);  __ pop_l(rax); __ subptr(rax, rdx); break;
  1.1194 +  case _and :                    __ pop_l(rdx); __ andptr(rax, rdx); break;
  1.1195 +  case _or  :                    __ pop_l(rdx); __ orptr (rax, rdx); break;
  1.1196 +  case _xor :                    __ pop_l(rdx); __ xorptr(rax, rdx); break;
  1.1197 +  default   : ShouldNotReachHere();
  1.1198 +  }
  1.1199 +}
  1.1200 +
  1.1201 +void TemplateTable::idiv() {
  1.1202 +  transition(itos, itos);
  1.1203 +  __ movl(rcx, rax);
  1.1204 +  __ pop_i(rax);
  1.1205 +  // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
  1.1206 +  //       they are not equal, one could do a normal division (no correction
  1.1207 +  //       needed), which may speed up this implementation for the common case.
  1.1208 +  //       (see also JVM spec., p.243 & p.271)
  1.1209 +  __ corrected_idivl(rcx);
  1.1210 +}
  1.1211 +
  1.1212 +void TemplateTable::irem() {
  1.1213 +  transition(itos, itos);
  1.1214 +  __ movl(rcx, rax);
  1.1215 +  __ pop_i(rax);
  1.1216 +  // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
  1.1217 +  //       they are not equal, one could do a normal division (no correction
  1.1218 +  //       needed), which may speed up this implementation for the common case.
  1.1219 +  //       (see also JVM spec., p.243 & p.271)
  1.1220 +  __ corrected_idivl(rcx);
  1.1221 +  __ movl(rax, rdx);
  1.1222 +}
  1.1223 +
  1.1224 +void TemplateTable::lmul() {
  1.1225 +  transition(ltos, ltos);
  1.1226 +  __ pop_l(rdx);
  1.1227 +  __ imulq(rax, rdx);
  1.1228 +}
  1.1229 +
  1.1230 +void TemplateTable::ldiv() {
  1.1231 +  transition(ltos, ltos);
  1.1232 +  __ mov(rcx, rax);
  1.1233 +  __ pop_l(rax);
  1.1234 +  // generate explicit div0 check
  1.1235 +  __ testq(rcx, rcx);
  1.1236 +  __ jump_cc(Assembler::zero,
  1.1237 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1238 +  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
  1.1239 +  //       they are not equal, one could do a normal division (no correction
  1.1240 +  //       needed), which may speed up this implementation for the common case.
  1.1241 +  //       (see also JVM spec., p.243 & p.271)
  1.1242 +  __ corrected_idivq(rcx); // kills rbx
  1.1243 +}
  1.1244 +
  1.1245 +void TemplateTable::lrem() {
  1.1246 +  transition(ltos, ltos);
  1.1247 +  __ mov(rcx, rax);
  1.1248 +  __ pop_l(rax);
  1.1249 +  __ testq(rcx, rcx);
  1.1250 +  __ jump_cc(Assembler::zero,
  1.1251 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1252 +  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
  1.1253 +  //       they are not equal, one could do a normal division (no correction
  1.1254 +  //       needed), which may speed up this implementation for the common case.
  1.1255 +  //       (see also JVM spec., p.243 & p.271)
  1.1256 +  __ corrected_idivq(rcx); // kills rbx
  1.1257 +  __ mov(rax, rdx);
  1.1258 +}
  1.1259 +
  1.1260 +void TemplateTable::lshl() {
  1.1261 +  transition(itos, ltos);
  1.1262 +  __ movl(rcx, rax);                             // get shift count
  1.1263 +  __ pop_l(rax);                                 // get shift value
  1.1264 +  __ shlq(rax);
  1.1265 +}
  1.1266 +
  1.1267 +void TemplateTable::lshr() {
  1.1268 +  transition(itos, ltos);
  1.1269 +  __ movl(rcx, rax);                             // get shift count
  1.1270 +  __ pop_l(rax);                                 // get shift value
  1.1271 +  __ sarq(rax);
  1.1272 +}
  1.1273 +
  1.1274 +void TemplateTable::lushr() {
  1.1275 +  transition(itos, ltos);
  1.1276 +  __ movl(rcx, rax);                             // get shift count
  1.1277 +  __ pop_l(rax);                                 // get shift value
  1.1278 +  __ shrq(rax);
  1.1279 +}
  1.1280 +
  1.1281 +void TemplateTable::fop2(Operation op) {
  1.1282 +  transition(ftos, ftos);
  1.1283 +  switch (op) {
  1.1284 +  case add:
  1.1285 +    __ addss(xmm0, at_rsp());
  1.1286 +    __ addptr(rsp, Interpreter::stackElementSize);
  1.1287 +    break;
  1.1288 +  case sub:
  1.1289 +    __ movflt(xmm1, xmm0);
  1.1290 +    __ pop_f(xmm0);
  1.1291 +    __ subss(xmm0, xmm1);
  1.1292 +    break;
  1.1293 +  case mul:
  1.1294 +    __ mulss(xmm0, at_rsp());
  1.1295 +    __ addptr(rsp, Interpreter::stackElementSize);
  1.1296 +    break;
  1.1297 +  case div:
  1.1298 +    __ movflt(xmm1, xmm0);
  1.1299 +    __ pop_f(xmm0);
  1.1300 +    __ divss(xmm0, xmm1);
  1.1301 +    break;
  1.1302 +  case rem:
  1.1303 +    __ movflt(xmm1, xmm0);
  1.1304 +    __ pop_f(xmm0);
  1.1305 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1.1306 +    break;
  1.1307 +  default:
  1.1308 +    ShouldNotReachHere();
  1.1309 +    break;
  1.1310 +  }
  1.1311 +}
  1.1312 +
  1.1313 +void TemplateTable::dop2(Operation op) {
  1.1314 +  transition(dtos, dtos);
  1.1315 +  switch (op) {
  1.1316 +  case add:
  1.1317 +    __ addsd(xmm0, at_rsp());
  1.1318 +    __ addptr(rsp, 2 * Interpreter::stackElementSize);
  1.1319 +    break;
  1.1320 +  case sub:
  1.1321 +    __ movdbl(xmm1, xmm0);
  1.1322 +    __ pop_d(xmm0);
  1.1323 +    __ subsd(xmm0, xmm1);
  1.1324 +    break;
  1.1325 +  case mul:
  1.1326 +    __ mulsd(xmm0, at_rsp());
  1.1327 +    __ addptr(rsp, 2 * Interpreter::stackElementSize);
  1.1328 +    break;
  1.1329 +  case div:
  1.1330 +    __ movdbl(xmm1, xmm0);
  1.1331 +    __ pop_d(xmm0);
  1.1332 +    __ divsd(xmm0, xmm1);
  1.1333 +    break;
  1.1334 +  case rem:
  1.1335 +    __ movdbl(xmm1, xmm0);
  1.1336 +    __ pop_d(xmm0);
  1.1337 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1.1338 +    break;
  1.1339 +  default:
  1.1340 +    ShouldNotReachHere();
  1.1341 +    break;
  1.1342 +  }
  1.1343 +}
  1.1344 +
  1.1345 +void TemplateTable::ineg() {
  1.1346 +  transition(itos, itos);
  1.1347 +  __ negl(rax);
  1.1348 +}
  1.1349 +
  1.1350 +void TemplateTable::lneg() {
  1.1351 +  transition(ltos, ltos);
  1.1352 +  __ negq(rax);
  1.1353 +}
  1.1354 +
  1.1355 +// Note: 'double' and 'long long' have 32-bits alignment on x86.
  1.1356 +static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  1.1357 +  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  1.1358 +  // of 128-bits operands for SSE instructions.
  1.1359 +  jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
  1.1360 +  // Store the value to a 128-bits operand.
  1.1361 +  operand[0] = lo;
  1.1362 +  operand[1] = hi;
  1.1363 +  return operand;
  1.1364 +}
  1.1365 +
  1.1366 +// Buffer for 128-bits masks used by SSE instructions.
  1.1367 +static jlong float_signflip_pool[2*2];
  1.1368 +static jlong double_signflip_pool[2*2];
  1.1369 +
  1.1370 +void TemplateTable::fneg() {
  1.1371 +  transition(ftos, ftos);
  1.1372 +  static jlong *float_signflip  = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
  1.1373 +  __ xorps(xmm0, ExternalAddress((address) float_signflip));
  1.1374 +}
  1.1375 +
  1.1376 +void TemplateTable::dneg() {
  1.1377 +  transition(dtos, dtos);
  1.1378 +  static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
  1.1379 +  __ xorpd(xmm0, ExternalAddress((address) double_signflip));
  1.1380 +}
  1.1381 +
  1.1382 +void TemplateTable::iinc() {
  1.1383 +  transition(vtos, vtos);
  1.1384 +  __ load_signed_byte(rdx, at_bcp(2)); // get constant
  1.1385 +  locals_index(rbx);
  1.1386 +  __ addl(iaddress(rbx), rdx);
  1.1387 +}
  1.1388 +
  1.1389 +void TemplateTable::wide_iinc() {
  1.1390 +  transition(vtos, vtos);
  1.1391 +  __ movl(rdx, at_bcp(4)); // get constant
  1.1392 +  locals_index_wide(rbx);
  1.1393 +  __ bswapl(rdx); // swap bytes & sign-extend constant
  1.1394 +  __ sarl(rdx, 16);
  1.1395 +  __ addl(iaddress(rbx), rdx);
  1.1396 +  // Note: should probably use only one movl to get both
  1.1397 +  //       the index and the constant -> fix this
  1.1398 +}
  1.1399 +
  1.1400 +void TemplateTable::convert() {
  1.1401 +  // Checking
  1.1402 +#ifdef ASSERT
  1.1403 +  {
  1.1404 +    TosState tos_in  = ilgl;
  1.1405 +    TosState tos_out = ilgl;
  1.1406 +    switch (bytecode()) {
  1.1407 +    case Bytecodes::_i2l: // fall through
  1.1408 +    case Bytecodes::_i2f: // fall through
  1.1409 +    case Bytecodes::_i2d: // fall through
  1.1410 +    case Bytecodes::_i2b: // fall through
  1.1411 +    case Bytecodes::_i2c: // fall through
  1.1412 +    case Bytecodes::_i2s: tos_in = itos; break;
  1.1413 +    case Bytecodes::_l2i: // fall through
  1.1414 +    case Bytecodes::_l2f: // fall through
  1.1415 +    case Bytecodes::_l2d: tos_in = ltos; break;
  1.1416 +    case Bytecodes::_f2i: // fall through
  1.1417 +    case Bytecodes::_f2l: // fall through
  1.1418 +    case Bytecodes::_f2d: tos_in = ftos; break;
  1.1419 +    case Bytecodes::_d2i: // fall through
  1.1420 +    case Bytecodes::_d2l: // fall through
  1.1421 +    case Bytecodes::_d2f: tos_in = dtos; break;
  1.1422 +    default             : ShouldNotReachHere();
  1.1423 +    }
  1.1424 +    switch (bytecode()) {
  1.1425 +    case Bytecodes::_l2i: // fall through
  1.1426 +    case Bytecodes::_f2i: // fall through
  1.1427 +    case Bytecodes::_d2i: // fall through
  1.1428 +    case Bytecodes::_i2b: // fall through
  1.1429 +    case Bytecodes::_i2c: // fall through
  1.1430 +    case Bytecodes::_i2s: tos_out = itos; break;
  1.1431 +    case Bytecodes::_i2l: // fall through
  1.1432 +    case Bytecodes::_f2l: // fall through
  1.1433 +    case Bytecodes::_d2l: tos_out = ltos; break;
  1.1434 +    case Bytecodes::_i2f: // fall through
  1.1435 +    case Bytecodes::_l2f: // fall through
  1.1436 +    case Bytecodes::_d2f: tos_out = ftos; break;
  1.1437 +    case Bytecodes::_i2d: // fall through
  1.1438 +    case Bytecodes::_l2d: // fall through
  1.1439 +    case Bytecodes::_f2d: tos_out = dtos; break;
  1.1440 +    default             : ShouldNotReachHere();
  1.1441 +    }
  1.1442 +    transition(tos_in, tos_out);
  1.1443 +  }
  1.1444 +#endif // ASSERT
  1.1445 +
  1.1446 +  static const int64_t is_nan = 0x8000000000000000L;
  1.1447 +
  1.1448 +  // Conversion
  1.1449 +  switch (bytecode()) {
  1.1450 +  case Bytecodes::_i2l:
  1.1451 +    __ movslq(rax, rax);
  1.1452 +    break;
  1.1453 +  case Bytecodes::_i2f:
  1.1454 +    __ cvtsi2ssl(xmm0, rax);
  1.1455 +    break;
  1.1456 +  case Bytecodes::_i2d:
  1.1457 +    __ cvtsi2sdl(xmm0, rax);
  1.1458 +    break;
  1.1459 +  case Bytecodes::_i2b:
  1.1460 +    __ movsbl(rax, rax);
  1.1461 +    break;
  1.1462 +  case Bytecodes::_i2c:
  1.1463 +    __ movzwl(rax, rax);
  1.1464 +    break;
  1.1465 +  case Bytecodes::_i2s:
  1.1466 +    __ movswl(rax, rax);
  1.1467 +    break;
  1.1468 +  case Bytecodes::_l2i:
  1.1469 +    __ movl(rax, rax);
  1.1470 +    break;
  1.1471 +  case Bytecodes::_l2f:
  1.1472 +    __ cvtsi2ssq(xmm0, rax);
  1.1473 +    break;
  1.1474 +  case Bytecodes::_l2d:
  1.1475 +    __ cvtsi2sdq(xmm0, rax);
  1.1476 +    break;
  1.1477 +  case Bytecodes::_f2i:
  1.1478 +  {
  1.1479 +    Label L;
  1.1480 +    __ cvttss2sil(rax, xmm0);
  1.1481 +    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
  1.1482 +    __ jcc(Assembler::notEqual, L);
  1.1483 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
  1.1484 +    __ bind(L);
  1.1485 +  }
  1.1486 +    break;
  1.1487 +  case Bytecodes::_f2l:
  1.1488 +  {
  1.1489 +    Label L;
  1.1490 +    __ cvttss2siq(rax, xmm0);
  1.1491 +    // NaN or overflow/underflow?
  1.1492 +    __ cmp64(rax, ExternalAddress((address) &is_nan));
  1.1493 +    __ jcc(Assembler::notEqual, L);
  1.1494 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
  1.1495 +    __ bind(L);
  1.1496 +  }
  1.1497 +    break;
  1.1498 +  case Bytecodes::_f2d:
  1.1499 +    __ cvtss2sd(xmm0, xmm0);
  1.1500 +    break;
  1.1501 +  case Bytecodes::_d2i:
  1.1502 +  {
  1.1503 +    Label L;
  1.1504 +    __ cvttsd2sil(rax, xmm0);
  1.1505 +    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
  1.1506 +    __ jcc(Assembler::notEqual, L);
  1.1507 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
  1.1508 +    __ bind(L);
  1.1509 +  }
  1.1510 +    break;
  1.1511 +  case Bytecodes::_d2l:
  1.1512 +  {
  1.1513 +    Label L;
  1.1514 +    __ cvttsd2siq(rax, xmm0);
  1.1515 +    // NaN or overflow/underflow?
  1.1516 +    __ cmp64(rax, ExternalAddress((address) &is_nan));
  1.1517 +    __ jcc(Assembler::notEqual, L);
  1.1518 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
  1.1519 +    __ bind(L);
  1.1520 +  }
  1.1521 +    break;
  1.1522 +  case Bytecodes::_d2f:
  1.1523 +    __ cvtsd2ss(xmm0, xmm0);
  1.1524 +    break;
  1.1525 +  default:
  1.1526 +    ShouldNotReachHere();
  1.1527 +  }
  1.1528 +}
  1.1529 +
  1.1530 +void TemplateTable::lcmp() {
  1.1531 +  transition(ltos, itos);
  1.1532 +  Label done;
  1.1533 +  __ pop_l(rdx);
  1.1534 +  __ cmpq(rdx, rax);
  1.1535 +  __ movl(rax, -1);
  1.1536 +  __ jccb(Assembler::less, done);
  1.1537 +  __ setb(Assembler::notEqual, rax);
  1.1538 +  __ movzbl(rax, rax);
  1.1539 +  __ bind(done);
  1.1540 +}
  1.1541 +
  1.1542 +void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1.1543 +  Label done;
  1.1544 +  if (is_float) {
  1.1545 +    // XXX get rid of pop here, use ... reg, mem32
  1.1546 +    __ pop_f(xmm1);
  1.1547 +    __ ucomiss(xmm1, xmm0);
  1.1548 +  } else {
  1.1549 +    // XXX get rid of pop here, use ... reg, mem64
  1.1550 +    __ pop_d(xmm1);
  1.1551 +    __ ucomisd(xmm1, xmm0);
  1.1552 +  }
  1.1553 +  if (unordered_result < 0) {
  1.1554 +    __ movl(rax, -1);
  1.1555 +    __ jccb(Assembler::parity, done);
  1.1556 +    __ jccb(Assembler::below, done);
  1.1557 +    __ setb(Assembler::notEqual, rdx);
  1.1558 +    __ movzbl(rax, rdx);
  1.1559 +  } else {
  1.1560 +    __ movl(rax, 1);
  1.1561 +    __ jccb(Assembler::parity, done);
  1.1562 +    __ jccb(Assembler::above, done);
  1.1563 +    __ movl(rax, 0);
  1.1564 +    __ jccb(Assembler::equal, done);
  1.1565 +    __ decrementl(rax);
  1.1566 +  }
  1.1567 +  __ bind(done);
  1.1568 +}
  1.1569 +
  1.1570 +void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1.1571 +  __ get_method(rcx); // rcx holds method
  1.1572 +  __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
  1.1573 +                                     // holds bumped taken count
  1.1574 +
  1.1575 +  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
  1.1576 +                             InvocationCounter::counter_offset();
  1.1577 +  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
  1.1578 +                              InvocationCounter::counter_offset();
  1.1579 +
  1.1580 +  // Load up edx with the branch displacement
  1.1581 +  if (is_wide) {
  1.1582 +    __ movl(rdx, at_bcp(1));
  1.1583 +  } else {
  1.1584 +    __ load_signed_short(rdx, at_bcp(1));
  1.1585 +  }
  1.1586 +  __ bswapl(rdx);
  1.1587 +
  1.1588 +  if (!is_wide) {
  1.1589 +    __ sarl(rdx, 16);
  1.1590 +  }
  1.1591 +  __ movl2ptr(rdx, rdx);
  1.1592 +
  1.1593 +  // Handle all the JSR stuff here, then exit.
  1.1594 +  // It's much shorter and cleaner than intermingling with the non-JSR
  1.1595 +  // normal-branch stuff occurring below.
  1.1596 +  if (is_jsr) {
  1.1597 +    // Pre-load the next target bytecode into rbx
  1.1598 +    __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1, 0));
  1.1599 +
  1.1600 +    // compute return address as bci in rax
  1.1601 +    __ lea(rax, at_bcp((is_wide ? 5 : 3) -
  1.1602 +                        in_bytes(ConstMethod::codes_offset())));
  1.1603 +    __ subptr(rax, Address(rcx, Method::const_offset()));
  1.1604 +    // Adjust the bcp in r13 by the displacement in rdx
  1.1605 +    __ addptr(r13, rdx);
  1.1606 +    // jsr returns atos that is not an oop
  1.1607 +    __ push_i(rax);
  1.1608 +    __ dispatch_only(vtos);
  1.1609 +    return;
  1.1610 +  }
  1.1611 +
  1.1612 +  // Normal (non-jsr) branch handling
  1.1613 +
  1.1614 +  // Adjust the bcp in r13 by the displacement in rdx
  1.1615 +  __ addptr(r13, rdx);
  1.1616 +
  1.1617 +  assert(UseLoopCounter || !UseOnStackReplacement,
  1.1618 +         "on-stack-replacement requires loop counters");
  1.1619 +  Label backedge_counter_overflow;
  1.1620 +  Label profile_method;
  1.1621 +  Label dispatch;
  1.1622 +  if (UseLoopCounter) {
  1.1623 +    // increment backedge counter for backward branches
  1.1624 +    // rax: MDO
  1.1625 +    // ebx: MDO bumped taken-count
  1.1626 +    // rcx: method
  1.1627 +    // rdx: target offset
  1.1628 +    // r13: target bcp
  1.1629 +    // r14: locals pointer
  1.1630 +    __ testl(rdx, rdx);             // check if forward or backward branch
  1.1631 +    __ jcc(Assembler::positive, dispatch); // count only if backward branch
  1.1632 +
  1.1633 +    // check if MethodCounters exists
  1.1634 +    Label has_counters;
  1.1635 +    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
  1.1636 +    __ testptr(rax, rax);
  1.1637 +    __ jcc(Assembler::notZero, has_counters);
  1.1638 +    __ push(rdx);
  1.1639 +    __ push(rcx);
  1.1640 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
  1.1641 +               rcx);
  1.1642 +    __ pop(rcx);
  1.1643 +    __ pop(rdx);
  1.1644 +    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
  1.1645 +    __ jcc(Assembler::zero, dispatch);
  1.1646 +    __ bind(has_counters);
  1.1647 +
  1.1648 +    if (TieredCompilation) {
  1.1649 +      Label no_mdo;
  1.1650 +      int increment = InvocationCounter::count_increment;
  1.1651 +      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
  1.1652 +      if (ProfileInterpreter) {
  1.1653 +        // Are we profiling?
  1.1654 +        __ movptr(rbx, Address(rcx, in_bytes(Method::method_data_offset())));
  1.1655 +        __ testptr(rbx, rbx);
  1.1656 +        __ jccb(Assembler::zero, no_mdo);
  1.1657 +        // Increment the MDO backedge counter
  1.1658 +        const Address mdo_backedge_counter(rbx, in_bytes(MethodData::backedge_counter_offset()) +
  1.1659 +                                           in_bytes(InvocationCounter::counter_offset()));
  1.1660 +        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
  1.1661 +                                   rax, false, Assembler::zero, &backedge_counter_overflow);
  1.1662 +        __ jmp(dispatch);
  1.1663 +      }
  1.1664 +      __ bind(no_mdo);
  1.1665 +      // Increment backedge counter in MethodCounters*
  1.1666 +      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
  1.1667 +      __ increment_mask_and_jump(Address(rcx, be_offset), increment, mask,
  1.1668 +                                 rax, false, Assembler::zero, &backedge_counter_overflow);
  1.1669 +    } else {
  1.1670 +      // increment counter
  1.1671 +      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
  1.1672 +      __ movl(rax, Address(rcx, be_offset));        // load backedge counter
  1.1673 +      __ incrementl(rax, InvocationCounter::count_increment); // increment counter
  1.1674 +      __ movl(Address(rcx, be_offset), rax);        // store counter
  1.1675 +
  1.1676 +      __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
  1.1677 +
  1.1678 +      __ andl(rax, InvocationCounter::count_mask_value); // and the status bits
  1.1679 +      __ addl(rax, Address(rcx, be_offset));        // add both counters
  1.1680 +
  1.1681 +      if (ProfileInterpreter) {
  1.1682 +        // Test to see if we should create a method data oop
  1.1683 +        __ cmp32(rax,
  1.1684 +                 ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
  1.1685 +        __ jcc(Assembler::less, dispatch);
  1.1686 +
  1.1687 +        // if no method data exists, go to profile method
  1.1688 +        __ test_method_data_pointer(rax, profile_method);
  1.1689 +
  1.1690 +        if (UseOnStackReplacement) {
  1.1691 +          // check for overflow against ebx which is the MDO taken count
  1.1692 +          __ cmp32(rbx,
  1.1693 +                   ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1694 +          __ jcc(Assembler::below, dispatch);
  1.1695 +
  1.1696 +          // When ProfileInterpreter is on, the backedge_count comes
  1.1697 +          // from the MethodData*, which value does not get reset on
  1.1698 +          // the call to frequency_counter_overflow().  To avoid
  1.1699 +          // excessive calls to the overflow routine while the method is
  1.1700 +          // being compiled, add a second test to make sure the overflow
  1.1701 +          // function is called only once every overflow_frequency.
  1.1702 +          const int overflow_frequency = 1024;
  1.1703 +          __ andl(rbx, overflow_frequency - 1);
  1.1704 +          __ jcc(Assembler::zero, backedge_counter_overflow);
  1.1705 +
  1.1706 +        }
  1.1707 +      } else {
  1.1708 +        if (UseOnStackReplacement) {
  1.1709 +          // check for overflow against eax, which is the sum of the
  1.1710 +          // counters
  1.1711 +          __ cmp32(rax,
  1.1712 +                   ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1713 +          __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
  1.1714 +
  1.1715 +        }
  1.1716 +      }
  1.1717 +    }
  1.1718 +    __ bind(dispatch);
  1.1719 +  }
  1.1720 +
  1.1721 +  // Pre-load the next target bytecode into rbx
  1.1722 +  __ load_unsigned_byte(rbx, Address(r13, 0));
  1.1723 +
  1.1724 +  // continue with the bytecode @ target
  1.1725 +  // eax: return bci for jsr's, unused otherwise
  1.1726 +  // ebx: target bytecode
  1.1727 +  // r13: target bcp
  1.1728 +  __ dispatch_only(vtos);
  1.1729 +
  1.1730 +  if (UseLoopCounter) {
  1.1731 +    if (ProfileInterpreter) {
  1.1732 +      // Out-of-line code to allocate method data oop.
  1.1733 +      __ bind(profile_method);
  1.1734 +      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  1.1735 +      __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
  1.1736 +      __ set_method_data_pointer_for_bcp();
  1.1737 +      __ jmp(dispatch);
  1.1738 +    }
  1.1739 +
  1.1740 +    if (UseOnStackReplacement) {
  1.1741 +      // invocation counter overflow
  1.1742 +      __ bind(backedge_counter_overflow);
  1.1743 +      __ negptr(rdx);
  1.1744 +      __ addptr(rdx, r13); // branch bcp
  1.1745 +      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
  1.1746 +      __ call_VM(noreg,
  1.1747 +                 CAST_FROM_FN_PTR(address,
  1.1748 +                                  InterpreterRuntime::frequency_counter_overflow),
  1.1749 +                 rdx);
  1.1750 +      __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
  1.1751 +
  1.1752 +      // rax: osr nmethod (osr ok) or NULL (osr not possible)
  1.1753 +      // ebx: target bytecode
  1.1754 +      // rdx: scratch
  1.1755 +      // r14: locals pointer
  1.1756 +      // r13: bcp
  1.1757 +      __ testptr(rax, rax);                        // test result
  1.1758 +      __ jcc(Assembler::zero, dispatch);         // no osr if null
  1.1759 +      // nmethod may have been invalidated (VM may block upon call_VM return)
  1.1760 +      __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
  1.1761 +      __ cmpl(rcx, InvalidOSREntryBci);
  1.1762 +      __ jcc(Assembler::equal, dispatch);
  1.1763 +
  1.1764 +      // We have the address of an on stack replacement routine in eax
  1.1765 +      // We need to prepare to execute the OSR method. First we must
  1.1766 +      // migrate the locals and monitors off of the stack.
  1.1767 +
  1.1768 +      __ mov(r13, rax);                             // save the nmethod
  1.1769 +
  1.1770 +      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
  1.1771 +
  1.1772 +      // eax is OSR buffer, move it to expected parameter location
  1.1773 +      __ mov(j_rarg0, rax);
  1.1774 +
  1.1775 +      // We use j_rarg definitions here so that registers don't conflict as parameter
  1.1776 +      // registers change across platforms as we are in the midst of a calling
  1.1777 +      // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
  1.1778 +
  1.1779 +      const Register retaddr = j_rarg2;
  1.1780 +      const Register sender_sp = j_rarg1;
  1.1781 +
  1.1782 +      // pop the interpreter frame
  1.1783 +      __ movptr(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
  1.1784 +      __ leave();                                // remove frame anchor
  1.1785 +      __ pop(retaddr);                           // get return address
  1.1786 +      __ mov(rsp, sender_sp);                   // set sp to sender sp
  1.1787 +      // Ensure compiled code always sees stack at proper alignment
  1.1788 +      __ andptr(rsp, -(StackAlignmentInBytes));
  1.1789 +
  1.1790 +      // unlike x86 we need no specialized return from compiled code
  1.1791 +      // to the interpreter or the call stub.
  1.1792 +
  1.1793 +      // push the return address
  1.1794 +      __ push(retaddr);
  1.1795 +
  1.1796 +      // and begin the OSR nmethod
  1.1797 +      __ jmp(Address(r13, nmethod::osr_entry_point_offset()));
  1.1798 +    }
  1.1799 +  }
  1.1800 +}
  1.1801 +
  1.1802 +
  1.1803 +void TemplateTable::if_0cmp(Condition cc) {
  1.1804 +  transition(itos, vtos);
  1.1805 +  // assume branch is more often taken than not (loops use backward branches)
  1.1806 +  Label not_taken;
  1.1807 +  __ testl(rax, rax);
  1.1808 +  __ jcc(j_not(cc), not_taken);
  1.1809 +  branch(false, false);
  1.1810 +  __ bind(not_taken);
  1.1811 +  __ profile_not_taken_branch(rax);
  1.1812 +}
  1.1813 +
  1.1814 +void TemplateTable::if_icmp(Condition cc) {
  1.1815 +  transition(itos, vtos);
  1.1816 +  // assume branch is more often taken than not (loops use backward branches)
  1.1817 +  Label not_taken;
  1.1818 +  __ pop_i(rdx);
  1.1819 +  __ cmpl(rdx, rax);
  1.1820 +  __ jcc(j_not(cc), not_taken);
  1.1821 +  branch(false, false);
  1.1822 +  __ bind(not_taken);
  1.1823 +  __ profile_not_taken_branch(rax);
  1.1824 +}
  1.1825 +
  1.1826 +void TemplateTable::if_nullcmp(Condition cc) {
  1.1827 +  transition(atos, vtos);
  1.1828 +  // assume branch is more often taken than not (loops use backward branches)
  1.1829 +  Label not_taken;
  1.1830 +  __ testptr(rax, rax);
  1.1831 +  __ jcc(j_not(cc), not_taken);
  1.1832 +  branch(false, false);
  1.1833 +  __ bind(not_taken);
  1.1834 +  __ profile_not_taken_branch(rax);
  1.1835 +}
  1.1836 +
  1.1837 +void TemplateTable::if_acmp(Condition cc) {
  1.1838 +  transition(atos, vtos);
  1.1839 +  // assume branch is more often taken than not (loops use backward branches)
  1.1840 +  Label not_taken;
  1.1841 +  __ pop_ptr(rdx);
  1.1842 +  __ cmpptr(rdx, rax);
  1.1843 +  __ jcc(j_not(cc), not_taken);
  1.1844 +  branch(false, false);
  1.1845 +  __ bind(not_taken);
  1.1846 +  __ profile_not_taken_branch(rax);
  1.1847 +}
  1.1848 +
  1.1849 +void TemplateTable::ret() {
  1.1850 +  transition(vtos, vtos);
  1.1851 +  locals_index(rbx);
  1.1852 +  __ movslq(rbx, iaddress(rbx)); // get return bci, compute return bcp
  1.1853 +  __ profile_ret(rbx, rcx);
  1.1854 +  __ get_method(rax);
  1.1855 +  __ movptr(r13, Address(rax, Method::const_offset()));
  1.1856 +  __ lea(r13, Address(r13, rbx, Address::times_1,
  1.1857 +                      ConstMethod::codes_offset()));
  1.1858 +  __ dispatch_next(vtos);
  1.1859 +}
  1.1860 +
  1.1861 +void TemplateTable::wide_ret() {
  1.1862 +  transition(vtos, vtos);
  1.1863 +  locals_index_wide(rbx);
  1.1864 +  __ movptr(rbx, aaddress(rbx)); // get return bci, compute return bcp
  1.1865 +  __ profile_ret(rbx, rcx);
  1.1866 +  __ get_method(rax);
  1.1867 +  __ movptr(r13, Address(rax, Method::const_offset()));
  1.1868 +  __ lea(r13, Address(r13, rbx, Address::times_1, ConstMethod::codes_offset()));
  1.1869 +  __ dispatch_next(vtos);
  1.1870 +}
  1.1871 +
  1.1872 +void TemplateTable::tableswitch() {
  1.1873 +  Label default_case, continue_execution;
  1.1874 +  transition(itos, vtos);
  1.1875 +  // align r13
  1.1876 +  __ lea(rbx, at_bcp(BytesPerInt));
  1.1877 +  __ andptr(rbx, -BytesPerInt);
  1.1878 +  // load lo & hi
  1.1879 +  __ movl(rcx, Address(rbx, BytesPerInt));
  1.1880 +  __ movl(rdx, Address(rbx, 2 * BytesPerInt));
  1.1881 +  __ bswapl(rcx);
  1.1882 +  __ bswapl(rdx);
  1.1883 +  // check against lo & hi
  1.1884 +  __ cmpl(rax, rcx);
  1.1885 +  __ jcc(Assembler::less, default_case);
  1.1886 +  __ cmpl(rax, rdx);
  1.1887 +  __ jcc(Assembler::greater, default_case);
  1.1888 +  // lookup dispatch offset
  1.1889 +  __ subl(rax, rcx);
  1.1890 +  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
  1.1891 +  __ profile_switch_case(rax, rbx, rcx);
  1.1892 +  // continue execution
  1.1893 +  __ bind(continue_execution);
  1.1894 +  __ bswapl(rdx);
  1.1895 +  __ movl2ptr(rdx, rdx);
  1.1896 +  __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
  1.1897 +  __ addptr(r13, rdx);
  1.1898 +  __ dispatch_only(vtos);
  1.1899 +  // handle default
  1.1900 +  __ bind(default_case);
  1.1901 +  __ profile_switch_default(rax);
  1.1902 +  __ movl(rdx, Address(rbx, 0));
  1.1903 +  __ jmp(continue_execution);
  1.1904 +}
  1.1905 +
  1.1906 +void TemplateTable::lookupswitch() {
  1.1907 +  transition(itos, itos);
  1.1908 +  __ stop("lookupswitch bytecode should have been rewritten");
  1.1909 +}
  1.1910 +
  1.1911 +void TemplateTable::fast_linearswitch() {
  1.1912 +  transition(itos, vtos);
  1.1913 +  Label loop_entry, loop, found, continue_execution;
  1.1914 +  // bswap rax so we can avoid bswapping the table entries
  1.1915 +  __ bswapl(rax);
  1.1916 +  // align r13
  1.1917 +  __ lea(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
  1.1918 +                                    // this instruction (change offsets
  1.1919 +                                    // below)
  1.1920 +  __ andptr(rbx, -BytesPerInt);
  1.1921 +  // set counter
  1.1922 +  __ movl(rcx, Address(rbx, BytesPerInt));
  1.1923 +  __ bswapl(rcx);
  1.1924 +  __ jmpb(loop_entry);
  1.1925 +  // table search
  1.1926 +  __ bind(loop);
  1.1927 +  __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
  1.1928 +  __ jcc(Assembler::equal, found);
  1.1929 +  __ bind(loop_entry);
  1.1930 +  __ decrementl(rcx);
  1.1931 +  __ jcc(Assembler::greaterEqual, loop);
  1.1932 +  // default case
  1.1933 +  __ profile_switch_default(rax);
  1.1934 +  __ movl(rdx, Address(rbx, 0));
  1.1935 +  __ jmp(continue_execution);
  1.1936 +  // entry found -> get offset
  1.1937 +  __ bind(found);
  1.1938 +  __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
  1.1939 +  __ profile_switch_case(rcx, rax, rbx);
  1.1940 +  // continue execution
  1.1941 +  __ bind(continue_execution);
  1.1942 +  __ bswapl(rdx);
  1.1943 +  __ movl2ptr(rdx, rdx);
  1.1944 +  __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
  1.1945 +  __ addptr(r13, rdx);
  1.1946 +  __ dispatch_only(vtos);
  1.1947 +}
  1.1948 +
  1.1949 +void TemplateTable::fast_binaryswitch() {
  1.1950 +  transition(itos, vtos);
  1.1951 +  // Implementation using the following core algorithm:
  1.1952 +  //
  1.1953 +  // int binary_search(int key, LookupswitchPair* array, int n) {
  1.1954 +  //   // Binary search according to "Methodik des Programmierens" by
  1.1955 +  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  1.1956 +  //   int i = 0;
  1.1957 +  //   int j = n;
  1.1958 +  //   while (i+1 < j) {
  1.1959 +  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  1.1960 +  //     // with      Q: for all i: 0 <= i < n: key < a[i]
  1.1961 +  //     // where a stands for the array and assuming that the (inexisting)
  1.1962 +  //     // element a[n] is infinitely big.
  1.1963 +  //     int h = (i + j) >> 1;
  1.1964 +  //     // i < h < j
  1.1965 +  //     if (key < array[h].fast_match()) {
  1.1966 +  //       j = h;
  1.1967 +  //     } else {
  1.1968 +  //       i = h;
  1.1969 +  //     }
  1.1970 +  //   }
  1.1971 +  //   // R: a[i] <= key < a[i+1] or Q
  1.1972 +  //   // (i.e., if key is within array, i is the correct index)
  1.1973 +  //   return i;
  1.1974 +  // }
  1.1975 +
  1.1976 +  // Register allocation
  1.1977 +  const Register key   = rax; // already set (tosca)
  1.1978 +  const Register array = rbx;
  1.1979 +  const Register i     = rcx;
  1.1980 +  const Register j     = rdx;
  1.1981 +  const Register h     = rdi;
  1.1982 +  const Register temp  = rsi;
  1.1983 +
  1.1984 +  // Find array start
  1.1985 +  __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
  1.1986 +                                          // get rid of this
  1.1987 +                                          // instruction (change
  1.1988 +                                          // offsets below)
  1.1989 +  __ andptr(array, -BytesPerInt);
  1.1990 +
  1.1991 +  // Initialize i & j
  1.1992 +  __ xorl(i, i);                            // i = 0;
  1.1993 +  __ movl(j, Address(array, -BytesPerInt)); // j = length(array);
  1.1994 +
  1.1995 +  // Convert j into native byteordering
  1.1996 +  __ bswapl(j);
  1.1997 +
  1.1998 +  // And start
  1.1999 +  Label entry;
  1.2000 +  __ jmp(entry);
  1.2001 +
  1.2002 +  // binary search loop
  1.2003 +  {
  1.2004 +    Label loop;
  1.2005 +    __ bind(loop);
  1.2006 +    // int h = (i + j) >> 1;
  1.2007 +    __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
  1.2008 +    __ sarl(h, 1);                               // h = (i + j) >> 1;
  1.2009 +    // if (key < array[h].fast_match()) {
  1.2010 +    //   j = h;
  1.2011 +    // } else {
  1.2012 +    //   i = h;
  1.2013 +    // }
  1.2014 +    // Convert array[h].match to native byte-ordering before compare
  1.2015 +    __ movl(temp, Address(array, h, Address::times_8));
  1.2016 +    __ bswapl(temp);
  1.2017 +    __ cmpl(key, temp);
  1.2018 +    // j = h if (key <  array[h].fast_match())
  1.2019 +    __ cmovl(Assembler::less, j, h);
  1.2020 +    // i = h if (key >= array[h].fast_match())
  1.2021 +    __ cmovl(Assembler::greaterEqual, i, h);
  1.2022 +    // while (i+1 < j)
  1.2023 +    __ bind(entry);
  1.2024 +    __ leal(h, Address(i, 1)); // i+1
  1.2025 +    __ cmpl(h, j);             // i+1 < j
  1.2026 +    __ jcc(Assembler::less, loop);
  1.2027 +  }
  1.2028 +
  1.2029 +  // end of binary search, result index is i (must check again!)
  1.2030 +  Label default_case;
  1.2031 +  // Convert array[i].match to native byte-ordering before compare
  1.2032 +  __ movl(temp, Address(array, i, Address::times_8));
  1.2033 +  __ bswapl(temp);
  1.2034 +  __ cmpl(key, temp);
  1.2035 +  __ jcc(Assembler::notEqual, default_case);
  1.2036 +
  1.2037 +  // entry found -> j = offset
  1.2038 +  __ movl(j , Address(array, i, Address::times_8, BytesPerInt));
  1.2039 +  __ profile_switch_case(i, key, array);
  1.2040 +  __ bswapl(j);
  1.2041 +  __ movl2ptr(j, j);
  1.2042 +  __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
  1.2043 +  __ addptr(r13, j);
  1.2044 +  __ dispatch_only(vtos);
  1.2045 +
  1.2046 +  // default case -> j = default offset
  1.2047 +  __ bind(default_case);
  1.2048 +  __ profile_switch_default(i);
  1.2049 +  __ movl(j, Address(array, -2 * BytesPerInt));
  1.2050 +  __ bswapl(j);
  1.2051 +  __ movl2ptr(j, j);
  1.2052 +  __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
  1.2053 +  __ addptr(r13, j);
  1.2054 +  __ dispatch_only(vtos);
  1.2055 +}
  1.2056 +
  1.2057 +
  1.2058 +void TemplateTable::_return(TosState state) {
  1.2059 +  transition(state, state);
  1.2060 +  assert(_desc->calls_vm(),
  1.2061 +         "inconsistent calls_vm information"); // call in remove_activation
  1.2062 +
  1.2063 +  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  1.2064 +    assert(state == vtos, "only valid state");
  1.2065 +    __ movptr(c_rarg1, aaddress(0));
  1.2066 +    __ load_klass(rdi, c_rarg1);
  1.2067 +    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
  1.2068 +    __ testl(rdi, JVM_ACC_HAS_FINALIZER);
  1.2069 +    Label skip_register_finalizer;
  1.2070 +    __ jcc(Assembler::zero, skip_register_finalizer);
  1.2071 +
  1.2072 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
  1.2073 +
  1.2074 +    __ bind(skip_register_finalizer);
  1.2075 +  }
  1.2076 +
  1.2077 +  __ remove_activation(state, r13);
  1.2078 +  __ jmp(r13);
  1.2079 +}
  1.2080 +
  1.2081 +// ----------------------------------------------------------------------------
  1.2082 +// Volatile variables demand their effects be made known to all CPU's
  1.2083 +// in order.  Store buffers on most chips allow reads & writes to
  1.2084 +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
  1.2085 +// without some kind of memory barrier (i.e., it's not sufficient that
  1.2086 +// the interpreter does not reorder volatile references, the hardware
  1.2087 +// also must not reorder them).
  1.2088 +//
  1.2089 +// According to the new Java Memory Model (JMM):
  1.2090 +// (1) All volatiles are serialized wrt to each other.  ALSO reads &
  1.2091 +//     writes act as aquire & release, so:
  1.2092 +// (2) A read cannot let unrelated NON-volatile memory refs that
  1.2093 +//     happen after the read float up to before the read.  It's OK for
  1.2094 +//     non-volatile memory refs that happen before the volatile read to
  1.2095 +//     float down below it.
  1.2096 +// (3) Similar a volatile write cannot let unrelated NON-volatile
  1.2097 +//     memory refs that happen BEFORE the write float down to after the
  1.2098 +//     write.  It's OK for non-volatile memory refs that happen after the
  1.2099 +//     volatile write to float up before it.
  1.2100 +//
  1.2101 +// We only put in barriers around volatile refs (they are expensive),
  1.2102 +// not _between_ memory refs (that would require us to track the
  1.2103 +// flavor of the previous memory refs).  Requirements (2) and (3)
  1.2104 +// require some barriers before volatile stores and after volatile
  1.2105 +// loads.  These nearly cover requirement (1) but miss the
  1.2106 +// volatile-store-volatile-load case.  This final case is placed after
  1.2107 +// volatile-stores although it could just as well go before
  1.2108 +// volatile-loads.
  1.2109 +void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
  1.2110 +                                     order_constraint) {
  1.2111 +  // Helper function to insert a is-volatile test and memory barrier
  1.2112 +  if (os::is_MP()) { // Not needed on single CPU
  1.2113 +    __ membar(order_constraint);
  1.2114 +  }
  1.2115 +}
  1.2116 +
  1.2117 +void TemplateTable::resolve_cache_and_index(int byte_no,
  1.2118 +                                            Register Rcache,
  1.2119 +                                            Register index,
  1.2120 +                                            size_t index_size) {
  1.2121 +  const Register temp = rbx;
  1.2122 +  assert_different_registers(Rcache, index, temp);
  1.2123 +
  1.2124 +  Label resolved;
  1.2125 +    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  1.2126 +    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  1.2127 +    __ cmpl(temp, (int) bytecode());  // have we resolved this bytecode?
  1.2128 +    __ jcc(Assembler::equal, resolved);
  1.2129 +
  1.2130 +  // resolve first time through
  1.2131 +  address entry;
  1.2132 +  switch (bytecode()) {
  1.2133 +  case Bytecodes::_getstatic:
  1.2134 +  case Bytecodes::_putstatic:
  1.2135 +  case Bytecodes::_getfield:
  1.2136 +  case Bytecodes::_putfield:
  1.2137 +    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
  1.2138 +    break;
  1.2139 +  case Bytecodes::_invokevirtual:
  1.2140 +  case Bytecodes::_invokespecial:
  1.2141 +  case Bytecodes::_invokestatic:
  1.2142 +  case Bytecodes::_invokeinterface:
  1.2143 +    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
  1.2144 +    break;
  1.2145 +  case Bytecodes::_invokehandle:
  1.2146 +    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
  1.2147 +    break;
  1.2148 +  case Bytecodes::_invokedynamic:
  1.2149 +    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
  1.2150 +    break;
  1.2151 +  default:
  1.2152 +    fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
  1.2153 +    break;
  1.2154 +  }
  1.2155 +  __ movl(temp, (int) bytecode());
  1.2156 +  __ call_VM(noreg, entry, temp);
  1.2157 +
  1.2158 +  // Update registers with resolved info
  1.2159 +  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
  1.2160 +  __ bind(resolved);
  1.2161 +}
  1.2162 +
  1.2163 +// The cache and index registers must be set before call
  1.2164 +void TemplateTable::load_field_cp_cache_entry(Register obj,
  1.2165 +                                              Register cache,
  1.2166 +                                              Register index,
  1.2167 +                                              Register off,
  1.2168 +                                              Register flags,
  1.2169 +                                              bool is_static = false) {
  1.2170 +  assert_different_registers(cache, index, flags, off);
  1.2171 +
  1.2172 +  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  1.2173 +  // Field offset
  1.2174 +  __ movptr(off, Address(cache, index, Address::times_ptr,
  1.2175 +                         in_bytes(cp_base_offset +
  1.2176 +                                  ConstantPoolCacheEntry::f2_offset())));
  1.2177 +  // Flags
  1.2178 +  __ movl(flags, Address(cache, index, Address::times_ptr,
  1.2179 +                         in_bytes(cp_base_offset +
  1.2180 +                                  ConstantPoolCacheEntry::flags_offset())));
  1.2181 +
  1.2182 +  // klass overwrite register
  1.2183 +  if (is_static) {
  1.2184 +    __ movptr(obj, Address(cache, index, Address::times_ptr,
  1.2185 +                           in_bytes(cp_base_offset +
  1.2186 +                                    ConstantPoolCacheEntry::f1_offset())));
  1.2187 +    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
  1.2188 +    __ movptr(obj, Address(obj, mirror_offset));
  1.2189 +  }
  1.2190 +}
  1.2191 +
  1.2192 +void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  1.2193 +                                               Register method,
  1.2194 +                                               Register itable_index,
  1.2195 +                                               Register flags,
  1.2196 +                                               bool is_invokevirtual,
  1.2197 +                                               bool is_invokevfinal, /*unused*/
  1.2198 +                                               bool is_invokedynamic) {
  1.2199 +  // setup registers
  1.2200 +  const Register cache = rcx;
  1.2201 +  const Register index = rdx;
  1.2202 +  assert_different_registers(method, flags);
  1.2203 +  assert_different_registers(method, cache, index);
  1.2204 +  assert_different_registers(itable_index, flags);
  1.2205 +  assert_different_registers(itable_index, cache, index);
  1.2206 +  // determine constant pool cache field offsets
  1.2207 +  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
  1.2208 +  const int method_offset = in_bytes(
  1.2209 +    ConstantPoolCache::base_offset() +
  1.2210 +      ((byte_no == f2_byte)
  1.2211 +       ? ConstantPoolCacheEntry::f2_offset()
  1.2212 +       : ConstantPoolCacheEntry::f1_offset()));
  1.2213 +  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
  1.2214 +                                    ConstantPoolCacheEntry::flags_offset());
  1.2215 +  // access constant pool cache fields
  1.2216 +  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
  1.2217 +                                    ConstantPoolCacheEntry::f2_offset());
  1.2218 +
  1.2219 +  size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
  1.2220 +  resolve_cache_and_index(byte_no, cache, index, index_size);
  1.2221 +    __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
  1.2222 +
  1.2223 +  if (itable_index != noreg) {
  1.2224 +    // pick up itable or appendix index from f2 also:
  1.2225 +    __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
  1.2226 +  }
  1.2227 +  __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
  1.2228 +}
  1.2229 +
  1.2230 +// Correct values of the cache and index registers are preserved.
  1.2231 +void TemplateTable::jvmti_post_field_access(Register cache, Register index,
  1.2232 +                                            bool is_static, bool has_tos) {
  1.2233 +  // do the JVMTI work here to avoid disturbing the register state below
  1.2234 +  // We use c_rarg registers here because we want to use the register used in
  1.2235 +  // the call to the VM
  1.2236 +  if (JvmtiExport::can_post_field_access()) {
  1.2237 +    // Check to see if a field access watch has been set before we
  1.2238 +    // take the time to call into the VM.
  1.2239 +    Label L1;
  1.2240 +    assert_different_registers(cache, index, rax);
  1.2241 +    __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2242 +    __ testl(rax, rax);
  1.2243 +    __ jcc(Assembler::zero, L1);
  1.2244 +
  1.2245 +    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
  1.2246 +
  1.2247 +    // cache entry pointer
  1.2248 +    __ addptr(c_rarg2, in_bytes(ConstantPoolCache::base_offset()));
  1.2249 +    __ shll(c_rarg3, LogBytesPerWord);
  1.2250 +    __ addptr(c_rarg2, c_rarg3);
  1.2251 +    if (is_static) {
  1.2252 +      __ xorl(c_rarg1, c_rarg1); // NULL object reference
  1.2253 +    } else {
  1.2254 +      __ movptr(c_rarg1, at_tos()); // get object pointer without popping it
  1.2255 +      __ verify_oop(c_rarg1);
  1.2256 +    }
  1.2257 +    // c_rarg1: object pointer or NULL
  1.2258 +    // c_rarg2: cache entry pointer
  1.2259 +    // c_rarg3: jvalue object on the stack
  1.2260 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.2261 +                                       InterpreterRuntime::post_field_access),
  1.2262 +               c_rarg1, c_rarg2, c_rarg3);
  1.2263 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2264 +    __ bind(L1);
  1.2265 +  }
  1.2266 +}
  1.2267 +
  1.2268 +void TemplateTable::pop_and_check_object(Register r) {
  1.2269 +  __ pop_ptr(r);
  1.2270 +  __ null_check(r);  // for field access must check obj.
  1.2271 +  __ verify_oop(r);
  1.2272 +}
  1.2273 +
  1.2274 +void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  1.2275 +  transition(vtos, vtos);
  1.2276 +
  1.2277 +  const Register cache = rcx;
  1.2278 +  const Register index = rdx;
  1.2279 +  const Register obj   = c_rarg3;
  1.2280 +  const Register off   = rbx;
  1.2281 +  const Register flags = rax;
  1.2282 +  const Register bc = c_rarg3; // uses same reg as obj, so don't mix them
  1.2283 +
  1.2284 +  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  1.2285 +  jvmti_post_field_access(cache, index, is_static, false);
  1.2286 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2287 +
  1.2288 +  if (!is_static) {
  1.2289 +    // obj is on the stack
  1.2290 +    pop_and_check_object(obj);
  1.2291 +  }
  1.2292 +
  1.2293 +  const Address field(obj, off, Address::times_1);
  1.2294 +
  1.2295 +  Label Done, notByte, notInt, notShort, notChar,
  1.2296 +              notLong, notFloat, notObj, notDouble;
  1.2297 +
  1.2298 +  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2299 +  // Make sure we don't need to mask edx after the above shift
  1.2300 +  assert(btos == 0, "change code, btos != 0");
  1.2301 +
  1.2302 +  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
  1.2303 +  __ jcc(Assembler::notZero, notByte);
  1.2304 +  // btos
  1.2305 +  __ load_signed_byte(rax, field);
  1.2306 +  __ push(btos);
  1.2307 +  // Rewrite bytecode to be faster
  1.2308 +  if (!is_static) {
  1.2309 +    patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
  1.2310 +  }
  1.2311 +  __ jmp(Done);
  1.2312 +
  1.2313 +  __ bind(notByte);
  1.2314 +  __ cmpl(flags, atos);
  1.2315 +  __ jcc(Assembler::notEqual, notObj);
  1.2316 +  // atos
  1.2317 +  __ load_heap_oop(rax, field);
  1.2318 +  __ push(atos);
  1.2319 +  if (!is_static) {
  1.2320 +    patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
  1.2321 +  }
  1.2322 +  __ jmp(Done);
  1.2323 +
  1.2324 +  __ bind(notObj);
  1.2325 +  __ cmpl(flags, itos);
  1.2326 +  __ jcc(Assembler::notEqual, notInt);
  1.2327 +  // itos
  1.2328 +  __ movl(rax, field);
  1.2329 +  __ push(itos);
  1.2330 +  // Rewrite bytecode to be faster
  1.2331 +  if (!is_static) {
  1.2332 +    patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
  1.2333 +  }
  1.2334 +  __ jmp(Done);
  1.2335 +
  1.2336 +  __ bind(notInt);
  1.2337 +  __ cmpl(flags, ctos);
  1.2338 +  __ jcc(Assembler::notEqual, notChar);
  1.2339 +  // ctos
  1.2340 +  __ load_unsigned_short(rax, field);
  1.2341 +  __ push(ctos);
  1.2342 +  // Rewrite bytecode to be faster
  1.2343 +  if (!is_static) {
  1.2344 +    patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
  1.2345 +  }
  1.2346 +  __ jmp(Done);
  1.2347 +
  1.2348 +  __ bind(notChar);
  1.2349 +  __ cmpl(flags, stos);
  1.2350 +  __ jcc(Assembler::notEqual, notShort);
  1.2351 +  // stos
  1.2352 +  __ load_signed_short(rax, field);
  1.2353 +  __ push(stos);
  1.2354 +  // Rewrite bytecode to be faster
  1.2355 +  if (!is_static) {
  1.2356 +    patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
  1.2357 +  }
  1.2358 +  __ jmp(Done);
  1.2359 +
  1.2360 +  __ bind(notShort);
  1.2361 +  __ cmpl(flags, ltos);
  1.2362 +  __ jcc(Assembler::notEqual, notLong);
  1.2363 +  // ltos
  1.2364 +  __ movq(rax, field);
  1.2365 +  __ push(ltos);
  1.2366 +  // Rewrite bytecode to be faster
  1.2367 +  if (!is_static) {
  1.2368 +    patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx);
  1.2369 +  }
  1.2370 +  __ jmp(Done);
  1.2371 +
  1.2372 +  __ bind(notLong);
  1.2373 +  __ cmpl(flags, ftos);
  1.2374 +  __ jcc(Assembler::notEqual, notFloat);
  1.2375 +  // ftos
  1.2376 +  __ movflt(xmm0, field);
  1.2377 +  __ push(ftos);
  1.2378 +  // Rewrite bytecode to be faster
  1.2379 +  if (!is_static) {
  1.2380 +    patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
  1.2381 +  }
  1.2382 +  __ jmp(Done);
  1.2383 +
  1.2384 +  __ bind(notFloat);
  1.2385 +#ifdef ASSERT
  1.2386 +  __ cmpl(flags, dtos);
  1.2387 +  __ jcc(Assembler::notEqual, notDouble);
  1.2388 +#endif
  1.2389 +  // dtos
  1.2390 +  __ movdbl(xmm0, field);
  1.2391 +  __ push(dtos);
  1.2392 +  // Rewrite bytecode to be faster
  1.2393 +  if (!is_static) {
  1.2394 +    patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
  1.2395 +  }
  1.2396 +#ifdef ASSERT
  1.2397 +  __ jmp(Done);
  1.2398 +
  1.2399 +  __ bind(notDouble);
  1.2400 +  __ stop("Bad state");
  1.2401 +#endif
  1.2402 +
  1.2403 +  __ bind(Done);
  1.2404 +  // [jk] not needed currently
  1.2405 +  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadLoad |
  1.2406 +  //                                              Assembler::LoadStore));
  1.2407 +}
  1.2408 +
  1.2409 +
  1.2410 +void TemplateTable::getfield(int byte_no) {
  1.2411 +  getfield_or_static(byte_no, false);
  1.2412 +}
  1.2413 +
  1.2414 +void TemplateTable::getstatic(int byte_no) {
  1.2415 +  getfield_or_static(byte_no, true);
  1.2416 +}
  1.2417 +
  1.2418 +// The registers cache and index expected to be set before call.
  1.2419 +// The function may destroy various registers, just not the cache and index registers.
  1.2420 +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  1.2421 +  transition(vtos, vtos);
  1.2422 +
  1.2423 +  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  1.2424 +
  1.2425 +  if (JvmtiExport::can_post_field_modification()) {
  1.2426 +    // Check to see if a field modification watch has been set before
  1.2427 +    // we take the time to call into the VM.
  1.2428 +    Label L1;
  1.2429 +    assert_different_registers(cache, index, rax);
  1.2430 +    __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2431 +    __ testl(rax, rax);
  1.2432 +    __ jcc(Assembler::zero, L1);
  1.2433 +
  1.2434 +    __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1);
  1.2435 +
  1.2436 +    if (is_static) {
  1.2437 +      // Life is simple.  Null out the object pointer.
  1.2438 +      __ xorl(c_rarg1, c_rarg1);
  1.2439 +    } else {
  1.2440 +      // Life is harder. The stack holds the value on top, followed by
  1.2441 +      // the object.  We don't know the size of the value, though; it
  1.2442 +      // could be one or two words depending on its type. As a result,
  1.2443 +      // we must find the type to determine where the object is.
  1.2444 +      __ movl(c_rarg3, Address(c_rarg2, rscratch1,
  1.2445 +                           Address::times_8,
  1.2446 +                           in_bytes(cp_base_offset +
  1.2447 +                                     ConstantPoolCacheEntry::flags_offset())));
  1.2448 +      __ shrl(c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
  1.2449 +      // Make sure we don't need to mask rcx after the above shift
  1.2450 +      ConstantPoolCacheEntry::verify_tos_state_shift();
  1.2451 +      __ movptr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
  1.2452 +      __ cmpl(c_rarg3, ltos);
  1.2453 +      __ cmovptr(Assembler::equal,
  1.2454 +                 c_rarg1, at_tos_p2()); // ltos (two word jvalue)
  1.2455 +      __ cmpl(c_rarg3, dtos);
  1.2456 +      __ cmovptr(Assembler::equal,
  1.2457 +                 c_rarg1, at_tos_p2()); // dtos (two word jvalue)
  1.2458 +    }
  1.2459 +    // cache entry pointer
  1.2460 +    __ addptr(c_rarg2, in_bytes(cp_base_offset));
  1.2461 +    __ shll(rscratch1, LogBytesPerWord);
  1.2462 +    __ addptr(c_rarg2, rscratch1);
  1.2463 +    // object (tos)
  1.2464 +    __ mov(c_rarg3, rsp);
  1.2465 +    // c_rarg1: object pointer set up above (NULL if static)
  1.2466 +    // c_rarg2: cache entry pointer
  1.2467 +    // c_rarg3: jvalue object on the stack
  1.2468 +    __ call_VM(noreg,
  1.2469 +               CAST_FROM_FN_PTR(address,
  1.2470 +                                InterpreterRuntime::post_field_modification),
  1.2471 +               c_rarg1, c_rarg2, c_rarg3);
  1.2472 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2473 +    __ bind(L1);
  1.2474 +  }
  1.2475 +}
  1.2476 +
  1.2477 +void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  1.2478 +  transition(vtos, vtos);
  1.2479 +
  1.2480 +  const Register cache = rcx;
  1.2481 +  const Register index = rdx;
  1.2482 +  const Register obj   = rcx;
  1.2483 +  const Register off   = rbx;
  1.2484 +  const Register flags = rax;
  1.2485 +  const Register bc    = c_rarg3;
  1.2486 +
  1.2487 +  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
  1.2488 +  jvmti_post_field_mod(cache, index, is_static);
  1.2489 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2490 +
  1.2491 +  // [jk] not needed currently
  1.2492 +  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
  1.2493 +  //                                              Assembler::StoreStore));
  1.2494 +
  1.2495 +  Label notVolatile, Done;
  1.2496 +  __ movl(rdx, flags);
  1.2497 +  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
  1.2498 +  __ andl(rdx, 0x1);
  1.2499 +
  1.2500 +  // field address
  1.2501 +  const Address field(obj, off, Address::times_1);
  1.2502 +
  1.2503 +  Label notByte, notInt, notShort, notChar,
  1.2504 +        notLong, notFloat, notObj, notDouble;
  1.2505 +
  1.2506 +  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2507 +
  1.2508 +  assert(btos == 0, "change code, btos != 0");
  1.2509 +  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
  1.2510 +  __ jcc(Assembler::notZero, notByte);
  1.2511 +
  1.2512 +  // btos
  1.2513 +  {
  1.2514 +    __ pop(btos);
  1.2515 +    if (!is_static) pop_and_check_object(obj);
  1.2516 +    __ movb(field, rax);
  1.2517 +    if (!is_static) {
  1.2518 +      patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no);
  1.2519 +    }
  1.2520 +    __ jmp(Done);
  1.2521 +  }
  1.2522 +
  1.2523 +  __ bind(notByte);
  1.2524 +  __ cmpl(flags, atos);
  1.2525 +  __ jcc(Assembler::notEqual, notObj);
  1.2526 +
  1.2527 +  // atos
  1.2528 +  {
  1.2529 +    __ pop(atos);
  1.2530 +    if (!is_static) pop_and_check_object(obj);
  1.2531 +    // Store into the field
  1.2532 +    do_oop_store(_masm, field, rax, _bs->kind(), false);
  1.2533 +    if (!is_static) {
  1.2534 +      patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no);
  1.2535 +    }
  1.2536 +    __ jmp(Done);
  1.2537 +  }
  1.2538 +
  1.2539 +  __ bind(notObj);
  1.2540 +  __ cmpl(flags, itos);
  1.2541 +  __ jcc(Assembler::notEqual, notInt);
  1.2542 +
  1.2543 +  // itos
  1.2544 +  {
  1.2545 +    __ pop(itos);
  1.2546 +    if (!is_static) pop_and_check_object(obj);
  1.2547 +    __ movl(field, rax);
  1.2548 +    if (!is_static) {
  1.2549 +      patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no);
  1.2550 +    }
  1.2551 +    __ jmp(Done);
  1.2552 +  }
  1.2553 +
  1.2554 +  __ bind(notInt);
  1.2555 +  __ cmpl(flags, ctos);
  1.2556 +  __ jcc(Assembler::notEqual, notChar);
  1.2557 +
  1.2558 +  // ctos
  1.2559 +  {
  1.2560 +    __ pop(ctos);
  1.2561 +    if (!is_static) pop_and_check_object(obj);
  1.2562 +    __ movw(field, rax);
  1.2563 +    if (!is_static) {
  1.2564 +      patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no);
  1.2565 +    }
  1.2566 +    __ jmp(Done);
  1.2567 +  }
  1.2568 +
  1.2569 +  __ bind(notChar);
  1.2570 +  __ cmpl(flags, stos);
  1.2571 +  __ jcc(Assembler::notEqual, notShort);
  1.2572 +
  1.2573 +  // stos
  1.2574 +  {
  1.2575 +    __ pop(stos);
  1.2576 +    if (!is_static) pop_and_check_object(obj);
  1.2577 +    __ movw(field, rax);
  1.2578 +    if (!is_static) {
  1.2579 +      patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no);
  1.2580 +    }
  1.2581 +    __ jmp(Done);
  1.2582 +  }
  1.2583 +
  1.2584 +  __ bind(notShort);
  1.2585 +  __ cmpl(flags, ltos);
  1.2586 +  __ jcc(Assembler::notEqual, notLong);
  1.2587 +
  1.2588 +  // ltos
  1.2589 +  {
  1.2590 +    __ pop(ltos);
  1.2591 +    if (!is_static) pop_and_check_object(obj);
  1.2592 +    __ movq(field, rax);
  1.2593 +    if (!is_static) {
  1.2594 +      patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no);
  1.2595 +    }
  1.2596 +    __ jmp(Done);
  1.2597 +  }
  1.2598 +
  1.2599 +  __ bind(notLong);
  1.2600 +  __ cmpl(flags, ftos);
  1.2601 +  __ jcc(Assembler::notEqual, notFloat);
  1.2602 +
  1.2603 +  // ftos
  1.2604 +  {
  1.2605 +    __ pop(ftos);
  1.2606 +    if (!is_static) pop_and_check_object(obj);
  1.2607 +    __ movflt(field, xmm0);
  1.2608 +    if (!is_static) {
  1.2609 +      patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
  1.2610 +    }
  1.2611 +    __ jmp(Done);
  1.2612 +  }
  1.2613 +
  1.2614 +  __ bind(notFloat);
  1.2615 +#ifdef ASSERT
  1.2616 +  __ cmpl(flags, dtos);
  1.2617 +  __ jcc(Assembler::notEqual, notDouble);
  1.2618 +#endif
  1.2619 +
  1.2620 +  // dtos
  1.2621 +  {
  1.2622 +    __ pop(dtos);
  1.2623 +    if (!is_static) pop_and_check_object(obj);
  1.2624 +    __ movdbl(field, xmm0);
  1.2625 +    if (!is_static) {
  1.2626 +      patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
  1.2627 +    }
  1.2628 +  }
  1.2629 +
  1.2630 +#ifdef ASSERT
  1.2631 +  __ jmp(Done);
  1.2632 +
  1.2633 +  __ bind(notDouble);
  1.2634 +  __ stop("Bad state");
  1.2635 +#endif
  1.2636 +
  1.2637 +  __ bind(Done);
  1.2638 +
  1.2639 +  // Check for volatile store
  1.2640 +  __ testl(rdx, rdx);
  1.2641 +  __ jcc(Assembler::zero, notVolatile);
  1.2642 +  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
  1.2643 +                                               Assembler::StoreStore));
  1.2644 +  __ bind(notVolatile);
  1.2645 +}
  1.2646 +
  1.2647 +void TemplateTable::putfield(int byte_no) {
  1.2648 +  putfield_or_static(byte_no, false);
  1.2649 +}
  1.2650 +
  1.2651 +void TemplateTable::putstatic(int byte_no) {
  1.2652 +  putfield_or_static(byte_no, true);
  1.2653 +}
  1.2654 +
  1.2655 +void TemplateTable::jvmti_post_fast_field_mod() {
  1.2656 +  if (JvmtiExport::can_post_field_modification()) {
  1.2657 +    // Check to see if a field modification watch has been set before
  1.2658 +    // we take the time to call into the VM.
  1.2659 +    Label L2;
  1.2660 +    __ mov32(c_rarg3, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2661 +    __ testl(c_rarg3, c_rarg3);
  1.2662 +    __ jcc(Assembler::zero, L2);
  1.2663 +    __ pop_ptr(rbx);                  // copy the object pointer from tos
  1.2664 +    __ verify_oop(rbx);
  1.2665 +    __ push_ptr(rbx);                 // put the object pointer back on tos
  1.2666 +    // Save tos values before call_VM() clobbers them. Since we have
  1.2667 +    // to do it for every data type, we use the saved values as the
  1.2668 +    // jvalue object.
  1.2669 +    switch (bytecode()) {          // load values into the jvalue object
  1.2670 +    case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
  1.2671 +    case Bytecodes::_fast_bputfield: // fall through
  1.2672 +    case Bytecodes::_fast_sputfield: // fall through
  1.2673 +    case Bytecodes::_fast_cputfield: // fall through
  1.2674 +    case Bytecodes::_fast_iputfield: __ push_i(rax); break;
  1.2675 +    case Bytecodes::_fast_dputfield: __ push_d(); break;
  1.2676 +    case Bytecodes::_fast_fputfield: __ push_f(); break;
  1.2677 +    case Bytecodes::_fast_lputfield: __ push_l(rax); break;
  1.2678 +
  1.2679 +    default:
  1.2680 +      ShouldNotReachHere();
  1.2681 +    }
  1.2682 +    __ mov(c_rarg3, rsp);             // points to jvalue on the stack
  1.2683 +    // access constant pool cache entry
  1.2684 +    __ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1);
  1.2685 +    __ verify_oop(rbx);
  1.2686 +    // rbx: object pointer copied above
  1.2687 +    // c_rarg2: cache entry pointer
  1.2688 +    // c_rarg3: jvalue object on the stack
  1.2689 +    __ call_VM(noreg,
  1.2690 +               CAST_FROM_FN_PTR(address,
  1.2691 +                                InterpreterRuntime::post_field_modification),
  1.2692 +               rbx, c_rarg2, c_rarg3);
  1.2693 +
  1.2694 +    switch (bytecode()) {             // restore tos values
  1.2695 +    case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
  1.2696 +    case Bytecodes::_fast_bputfield: // fall through
  1.2697 +    case Bytecodes::_fast_sputfield: // fall through
  1.2698 +    case Bytecodes::_fast_cputfield: // fall through
  1.2699 +    case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
  1.2700 +    case Bytecodes::_fast_dputfield: __ pop_d(); break;
  1.2701 +    case Bytecodes::_fast_fputfield: __ pop_f(); break;
  1.2702 +    case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
  1.2703 +    }
  1.2704 +    __ bind(L2);
  1.2705 +  }
  1.2706 +}
  1.2707 +
  1.2708 +void TemplateTable::fast_storefield(TosState state) {
  1.2709 +  transition(state, vtos);
  1.2710 +
  1.2711 +  ByteSize base = ConstantPoolCache::base_offset();
  1.2712 +
  1.2713 +  jvmti_post_fast_field_mod();
  1.2714 +
  1.2715 +  // access constant pool cache
  1.2716 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2717 +
  1.2718 +  // test for volatile with rdx
  1.2719 +  __ movl(rdx, Address(rcx, rbx, Address::times_8,
  1.2720 +                       in_bytes(base +
  1.2721 +                                ConstantPoolCacheEntry::flags_offset())));
  1.2722 +
  1.2723 +  // replace index with field offset from cache entry
  1.2724 +  __ movptr(rbx, Address(rcx, rbx, Address::times_8,
  1.2725 +                         in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
  1.2726 +
  1.2727 +  // [jk] not needed currently
  1.2728 +  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
  1.2729 +  //                                              Assembler::StoreStore));
  1.2730 +
  1.2731 +  Label notVolatile;
  1.2732 +  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
  1.2733 +  __ andl(rdx, 0x1);
  1.2734 +
  1.2735 +  // Get object from stack
  1.2736 +  pop_and_check_object(rcx);
  1.2737 +
  1.2738 +  // field address
  1.2739 +  const Address field(rcx, rbx, Address::times_1);
  1.2740 +
  1.2741 +  // access field
  1.2742 +  switch (bytecode()) {
  1.2743 +  case Bytecodes::_fast_aputfield:
  1.2744 +    do_oop_store(_masm, field, rax, _bs->kind(), false);
  1.2745 +    break;
  1.2746 +  case Bytecodes::_fast_lputfield:
  1.2747 +    __ movq(field, rax);
  1.2748 +    break;
  1.2749 +  case Bytecodes::_fast_iputfield:
  1.2750 +    __ movl(field, rax);
  1.2751 +    break;
  1.2752 +  case Bytecodes::_fast_bputfield:
  1.2753 +    __ movb(field, rax);
  1.2754 +    break;
  1.2755 +  case Bytecodes::_fast_sputfield:
  1.2756 +    // fall through
  1.2757 +  case Bytecodes::_fast_cputfield:
  1.2758 +    __ movw(field, rax);
  1.2759 +    break;
  1.2760 +  case Bytecodes::_fast_fputfield:
  1.2761 +    __ movflt(field, xmm0);
  1.2762 +    break;
  1.2763 +  case Bytecodes::_fast_dputfield:
  1.2764 +    __ movdbl(field, xmm0);
  1.2765 +    break;
  1.2766 +  default:
  1.2767 +    ShouldNotReachHere();
  1.2768 +  }
  1.2769 +
  1.2770 +  // Check for volatile store
  1.2771 +  __ testl(rdx, rdx);
  1.2772 +  __ jcc(Assembler::zero, notVolatile);
  1.2773 +  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
  1.2774 +                                               Assembler::StoreStore));
  1.2775 +  __ bind(notVolatile);
  1.2776 +}
  1.2777 +
  1.2778 +
  1.2779 +void TemplateTable::fast_accessfield(TosState state) {
  1.2780 +  transition(atos, state);
  1.2781 +
  1.2782 +  // Do the JVMTI work here to avoid disturbing the register state below
  1.2783 +  if (JvmtiExport::can_post_field_access()) {
  1.2784 +    // Check to see if a field access watch has been set before we
  1.2785 +    // take the time to call into the VM.
  1.2786 +    Label L1;
  1.2787 +    __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2788 +    __ testl(rcx, rcx);
  1.2789 +    __ jcc(Assembler::zero, L1);
  1.2790 +    // access constant pool cache entry
  1.2791 +    __ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1);
  1.2792 +    __ verify_oop(rax);
  1.2793 +    __ push_ptr(rax);  // save object pointer before call_VM() clobbers it
  1.2794 +    __ mov(c_rarg1, rax);
  1.2795 +    // c_rarg1: object pointer copied above
  1.2796 +    // c_rarg2: cache entry pointer
  1.2797 +    __ call_VM(noreg,
  1.2798 +               CAST_FROM_FN_PTR(address,
  1.2799 +                                InterpreterRuntime::post_field_access),
  1.2800 +               c_rarg1, c_rarg2);
  1.2801 +    __ pop_ptr(rax); // restore object pointer
  1.2802 +    __ bind(L1);
  1.2803 +  }
  1.2804 +
  1.2805 +  // access constant pool cache
  1.2806 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2807 +  // replace index with field offset from cache entry
  1.2808 +  // [jk] not needed currently
  1.2809 +  // if (os::is_MP()) {
  1.2810 +  //   __ movl(rdx, Address(rcx, rbx, Address::times_8,
  1.2811 +  //                        in_bytes(ConstantPoolCache::base_offset() +
  1.2812 +  //                                 ConstantPoolCacheEntry::flags_offset())));
  1.2813 +  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
  1.2814 +  //   __ andl(rdx, 0x1);
  1.2815 +  // }
  1.2816 +  __ movptr(rbx, Address(rcx, rbx, Address::times_8,
  1.2817 +                         in_bytes(ConstantPoolCache::base_offset() +
  1.2818 +                                  ConstantPoolCacheEntry::f2_offset())));
  1.2819 +
  1.2820 +  // rax: object
  1.2821 +  __ verify_oop(rax);
  1.2822 +  __ null_check(rax);
  1.2823 +  Address field(rax, rbx, Address::times_1);
  1.2824 +
  1.2825 +  // access field
  1.2826 +  switch (bytecode()) {
  1.2827 +  case Bytecodes::_fast_agetfield:
  1.2828 +    __ load_heap_oop(rax, field);
  1.2829 +    __ verify_oop(rax);
  1.2830 +    break;
  1.2831 +  case Bytecodes::_fast_lgetfield:
  1.2832 +    __ movq(rax, field);
  1.2833 +    break;
  1.2834 +  case Bytecodes::_fast_igetfield:
  1.2835 +    __ movl(rax, field);
  1.2836 +    break;
  1.2837 +  case Bytecodes::_fast_bgetfield:
  1.2838 +    __ movsbl(rax, field);
  1.2839 +    break;
  1.2840 +  case Bytecodes::_fast_sgetfield:
  1.2841 +    __ load_signed_short(rax, field);
  1.2842 +    break;
  1.2843 +  case Bytecodes::_fast_cgetfield:
  1.2844 +    __ load_unsigned_short(rax, field);
  1.2845 +    break;
  1.2846 +  case Bytecodes::_fast_fgetfield:
  1.2847 +    __ movflt(xmm0, field);
  1.2848 +    break;
  1.2849 +  case Bytecodes::_fast_dgetfield:
  1.2850 +    __ movdbl(xmm0, field);
  1.2851 +    break;
  1.2852 +  default:
  1.2853 +    ShouldNotReachHere();
  1.2854 +  }
  1.2855 +  // [jk] not needed currently
  1.2856 +  // if (os::is_MP()) {
  1.2857 +  //   Label notVolatile;
  1.2858 +  //   __ testl(rdx, rdx);
  1.2859 +  //   __ jcc(Assembler::zero, notVolatile);
  1.2860 +  //   __ membar(Assembler::LoadLoad);
  1.2861 +  //   __ bind(notVolatile);
  1.2862 +  //};
  1.2863 +}
  1.2864 +
  1.2865 +void TemplateTable::fast_xaccess(TosState state) {
  1.2866 +  transition(vtos, state);
  1.2867 +
  1.2868 +  // get receiver
  1.2869 +  __ movptr(rax, aaddress(0));
  1.2870 +  // access constant pool cache
  1.2871 +  __ get_cache_and_index_at_bcp(rcx, rdx, 2);
  1.2872 +  __ movptr(rbx,
  1.2873 +            Address(rcx, rdx, Address::times_8,
  1.2874 +                    in_bytes(ConstantPoolCache::base_offset() +
  1.2875 +                             ConstantPoolCacheEntry::f2_offset())));
  1.2876 +  // make sure exception is reported in correct bcp range (getfield is
  1.2877 +  // next instruction)
  1.2878 +  __ increment(r13);
  1.2879 +  __ null_check(rax);
  1.2880 +  switch (state) {
  1.2881 +  case itos:
  1.2882 +    __ movl(rax, Address(rax, rbx, Address::times_1));
  1.2883 +    break;
  1.2884 +  case atos:
  1.2885 +    __ load_heap_oop(rax, Address(rax, rbx, Address::times_1));
  1.2886 +    __ verify_oop(rax);
  1.2887 +    break;
  1.2888 +  case ftos:
  1.2889 +    __ movflt(xmm0, Address(rax, rbx, Address::times_1));
  1.2890 +    break;
  1.2891 +  default:
  1.2892 +    ShouldNotReachHere();
  1.2893 +  }
  1.2894 +
  1.2895 +  // [jk] not needed currently
  1.2896 +  // if (os::is_MP()) {
  1.2897 +  //   Label notVolatile;
  1.2898 +  //   __ movl(rdx, Address(rcx, rdx, Address::times_8,
  1.2899 +  //                        in_bytes(ConstantPoolCache::base_offset() +
  1.2900 +  //                                 ConstantPoolCacheEntry::flags_offset())));
  1.2901 +  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
  1.2902 +  //   __ testl(rdx, 0x1);
  1.2903 +  //   __ jcc(Assembler::zero, notVolatile);
  1.2904 +  //   __ membar(Assembler::LoadLoad);
  1.2905 +  //   __ bind(notVolatile);
  1.2906 +  // }
  1.2907 +
  1.2908 +  __ decrement(r13);
  1.2909 +}
  1.2910 +
  1.2911 +
  1.2912 +
  1.2913 +//-----------------------------------------------------------------------------
  1.2914 +// Calls
  1.2915 +
  1.2916 +void TemplateTable::count_calls(Register method, Register temp) {
  1.2917 +  // implemented elsewhere
  1.2918 +  ShouldNotReachHere();
  1.2919 +}
  1.2920 +
  1.2921 +void TemplateTable::prepare_invoke(int byte_no,
  1.2922 +                                   Register method,  // linked method (or i-klass)
  1.2923 +                                   Register index,   // itable index, MethodType, etc.
  1.2924 +                                   Register recv,    // if caller wants to see it
  1.2925 +                                   Register flags    // if caller wants to test it
  1.2926 +                                   ) {
  1.2927 +  // determine flags
  1.2928 +  const Bytecodes::Code code = bytecode();
  1.2929 +  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  1.2930 +  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  1.2931 +  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  1.2932 +  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  1.2933 +  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  1.2934 +  const bool load_receiver       = (recv  != noreg);
  1.2935 +  const bool save_flags          = (flags != noreg);
  1.2936 +  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
  1.2937 +  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  1.2938 +  assert(flags == noreg || flags == rdx, "");
  1.2939 +  assert(recv  == noreg || recv  == rcx, "");
  1.2940 +
  1.2941 +  // setup registers & access constant pool cache
  1.2942 +  if (recv  == noreg)  recv  = rcx;
  1.2943 +  if (flags == noreg)  flags = rdx;
  1.2944 +  assert_different_registers(method, index, recv, flags);
  1.2945 +
  1.2946 +  // save 'interpreter return address'
  1.2947 +  __ save_bcp();
  1.2948 +
  1.2949 +  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
  1.2950 +
  1.2951 +  // maybe push appendix to arguments (just before return address)
  1.2952 +  if (is_invokedynamic || is_invokehandle) {
  1.2953 +    Label L_no_push;
  1.2954 +    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
  1.2955 +    __ jcc(Assembler::zero, L_no_push);
  1.2956 +    // Push the appendix as a trailing parameter.
  1.2957 +    // This must be done before we get the receiver,
  1.2958 +    // since the parameter_size includes it.
  1.2959 +    __ push(rbx);
  1.2960 +    __ mov(rbx, index);
  1.2961 +    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
  1.2962 +    __ load_resolved_reference_at_index(index, rbx);
  1.2963 +    __ pop(rbx);
  1.2964 +    __ push(index);  // push appendix (MethodType, CallSite, etc.)
  1.2965 +    __ bind(L_no_push);
  1.2966 +  }
  1.2967 +
  1.2968 +  // load receiver if needed (after appendix is pushed so parameter size is correct)
  1.2969 +  // Note: no return address pushed yet
  1.2970 +  if (load_receiver) {
  1.2971 +    __ movl(recv, flags);
  1.2972 +    __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
  1.2973 +    const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
  1.2974 +    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
  1.2975 +    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
  1.2976 +    __ movptr(recv, recv_addr);
  1.2977 +    __ verify_oop(recv);
  1.2978 +  }
  1.2979 +
  1.2980 +  if (save_flags) {
  1.2981 +    __ movl(r13, flags);
  1.2982 +  }
  1.2983 +
  1.2984 +  // compute return type
  1.2985 +  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  1.2986 +  // Make sure we don't need to mask flags after the above shift
  1.2987 +  ConstantPoolCacheEntry::verify_tos_state_shift();
  1.2988 +  // load return address
  1.2989 +  {
  1.2990 +    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
  1.2991 +    ExternalAddress table(table_addr);
  1.2992 +    __ lea(rscratch1, table);
  1.2993 +    __ movptr(flags, Address(rscratch1, flags, Address::times_ptr));
  1.2994 +  }
  1.2995 +
  1.2996 +  // push return address
  1.2997 +  __ push(flags);
  1.2998 +
  1.2999 +  // Restore flags value from the constant pool cache, and restore rsi
  1.3000 +  // for later null checks.  r13 is the bytecode pointer
  1.3001 +  if (save_flags) {
  1.3002 +    __ movl(flags, r13);
  1.3003 +    __ restore_bcp();
  1.3004 +  }
  1.3005 +}
  1.3006 +
  1.3007 +
  1.3008 +void TemplateTable::invokevirtual_helper(Register index,
  1.3009 +                                         Register recv,
  1.3010 +                                         Register flags) {
  1.3011 +  // Uses temporary registers rax, rdx
  1.3012 +  assert_different_registers(index, recv, rax, rdx);
  1.3013 +  assert(index == rbx, "");
  1.3014 +  assert(recv  == rcx, "");
  1.3015 +
  1.3016 +  // Test for an invoke of a final method
  1.3017 +  Label notFinal;
  1.3018 +  __ movl(rax, flags);
  1.3019 +  __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
  1.3020 +  __ jcc(Assembler::zero, notFinal);
  1.3021 +
  1.3022 +  const Register method = index;  // method must be rbx
  1.3023 +  assert(method == rbx,
  1.3024 +         "Method* must be rbx for interpreter calling convention");
  1.3025 +
  1.3026 +  // do the call - the index is actually the method to call
  1.3027 +  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
  1.3028 +
  1.3029 +  // It's final, need a null check here!
  1.3030 +  __ null_check(recv);
  1.3031 +
  1.3032 +  // profile this call
  1.3033 +  __ profile_final_call(rax);
  1.3034 +  __ profile_arguments_type(rax, method, r13, true);
  1.3035 +
  1.3036 +  __ jump_from_interpreted(method, rax);
  1.3037 +
  1.3038 +  __ bind(notFinal);
  1.3039 +
  1.3040 +  // get receiver klass
  1.3041 +  __ null_check(recv, oopDesc::klass_offset_in_bytes());
  1.3042 +  __ load_klass(rax, recv);
  1.3043 +
  1.3044 +  // profile this call
  1.3045 +  __ profile_virtual_call(rax, r14, rdx);
  1.3046 +
  1.3047 +  // get target Method* & entry point
  1.3048 +  __ lookup_virtual_method(rax, index, method);
  1.3049 +  __ profile_arguments_type(rdx, method, r13, true);
  1.3050 +  __ jump_from_interpreted(method, rdx);
  1.3051 +}
  1.3052 +
  1.3053 +
  1.3054 +void TemplateTable::invokevirtual(int byte_no) {
  1.3055 +  transition(vtos, vtos);
  1.3056 +  assert(byte_no == f2_byte, "use this argument");
  1.3057 +  prepare_invoke(byte_no,
  1.3058 +                 rbx,    // method or vtable index
  1.3059 +                 noreg,  // unused itable index
  1.3060 +                 rcx, rdx); // recv, flags
  1.3061 +
  1.3062 +  // rbx: index
  1.3063 +  // rcx: receiver
  1.3064 +  // rdx: flags
  1.3065 +
  1.3066 +  invokevirtual_helper(rbx, rcx, rdx);
  1.3067 +}
  1.3068 +
  1.3069 +
  1.3070 +void TemplateTable::invokespecial(int byte_no) {
  1.3071 +  transition(vtos, vtos);
  1.3072 +  assert(byte_no == f1_byte, "use this argument");
  1.3073 +  prepare_invoke(byte_no, rbx, noreg,  // get f1 Method*
  1.3074 +                 rcx);  // get receiver also for null check
  1.3075 +  __ verify_oop(rcx);
  1.3076 +  __ null_check(rcx);
  1.3077 +  // do the call
  1.3078 +  __ profile_call(rax);
  1.3079 +  __ profile_arguments_type(rax, rbx, r13, false);
  1.3080 +  __ jump_from_interpreted(rbx, rax);
  1.3081 +}
  1.3082 +
  1.3083 +
  1.3084 +void TemplateTable::invokestatic(int byte_no) {
  1.3085 +  transition(vtos, vtos);
  1.3086 +  assert(byte_no == f1_byte, "use this argument");
  1.3087 +  prepare_invoke(byte_no, rbx);  // get f1 Method*
  1.3088 +  // do the call
  1.3089 +  __ profile_call(rax);
  1.3090 +  __ profile_arguments_type(rax, rbx, r13, false);
  1.3091 +  __ jump_from_interpreted(rbx, rax);
  1.3092 +}
  1.3093 +
  1.3094 +void TemplateTable::fast_invokevfinal(int byte_no) {
  1.3095 +  transition(vtos, vtos);
  1.3096 +  assert(byte_no == f2_byte, "use this argument");
  1.3097 +  __ stop("fast_invokevfinal not used on amd64");
  1.3098 +}
  1.3099 +
  1.3100 +void TemplateTable::invokeinterface(int byte_no) {
  1.3101 +  transition(vtos, vtos);
  1.3102 +  assert(byte_no == f1_byte, "use this argument");
  1.3103 +  prepare_invoke(byte_no, rax, rbx,  // get f1 Klass*, f2 itable index
  1.3104 +                 rcx, rdx); // recv, flags
  1.3105 +
  1.3106 +  // rax: interface klass (from f1)
  1.3107 +  // rbx: itable index (from f2)
  1.3108 +  // rcx: receiver
  1.3109 +  // rdx: flags
  1.3110 +
  1.3111 +  // Special case of invokeinterface called for virtual method of
  1.3112 +  // java.lang.Object.  See cpCacheOop.cpp for details.
  1.3113 +  // This code isn't produced by javac, but could be produced by
  1.3114 +  // another compliant java compiler.
  1.3115 +  Label notMethod;
  1.3116 +  __ movl(r14, rdx);
  1.3117 +  __ andl(r14, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
  1.3118 +  __ jcc(Assembler::zero, notMethod);
  1.3119 +
  1.3120 +  invokevirtual_helper(rbx, rcx, rdx);
  1.3121 +  __ bind(notMethod);
  1.3122 +
  1.3123 +  // Get receiver klass into rdx - also a null check
  1.3124 +  __ restore_locals();  // restore r14
  1.3125 +  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
  1.3126 +  __ load_klass(rdx, rcx);
  1.3127 +
  1.3128 +  // profile this call
  1.3129 +  __ profile_virtual_call(rdx, r13, r14);
  1.3130 +
  1.3131 +  Label no_such_interface, no_such_method;
  1.3132 +
  1.3133 +  __ lookup_interface_method(// inputs: rec. class, interface, itable index
  1.3134 +                             rdx, rax, rbx,
  1.3135 +                             // outputs: method, scan temp. reg
  1.3136 +                             rbx, r13,
  1.3137 +                             no_such_interface);
  1.3138 +
  1.3139 +  // rbx: Method* to call
  1.3140 +  // rcx: receiver
  1.3141 +  // Check for abstract method error
  1.3142 +  // Note: This should be done more efficiently via a throw_abstract_method_error
  1.3143 +  //       interpreter entry point and a conditional jump to it in case of a null
  1.3144 +  //       method.
  1.3145 +  __ testptr(rbx, rbx);
  1.3146 +  __ jcc(Assembler::zero, no_such_method);
  1.3147 +
  1.3148 +  __ profile_arguments_type(rdx, rbx, r13, true);
  1.3149 +
  1.3150 +  // do the call
  1.3151 +  // rcx: receiver
  1.3152 +  // rbx,: Method*
  1.3153 +  __ jump_from_interpreted(rbx, rdx);
  1.3154 +  __ should_not_reach_here();
  1.3155 +
  1.3156 +  // exception handling code follows...
  1.3157 +  // note: must restore interpreter registers to canonical
  1.3158 +  //       state for exception handling to work correctly!
  1.3159 +
  1.3160 +  __ bind(no_such_method);
  1.3161 +  // throw exception
  1.3162 +  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
  1.3163 +  __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
  1.3164 +  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
  1.3165 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  1.3166 +  // the call_VM checks for exception, so we should never return here.
  1.3167 +  __ should_not_reach_here();
  1.3168 +
  1.3169 +  __ bind(no_such_interface);
  1.3170 +  // throw exception
  1.3171 +  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
  1.3172 +  __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
  1.3173 +  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
  1.3174 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.3175 +                   InterpreterRuntime::throw_IncompatibleClassChangeError));
  1.3176 +  // the call_VM checks for exception, so we should never return here.
  1.3177 +  __ should_not_reach_here();
  1.3178 +}
  1.3179 +
  1.3180 +
  1.3181 +void TemplateTable::invokehandle(int byte_no) {
  1.3182 +  transition(vtos, vtos);
  1.3183 +  assert(byte_no == f1_byte, "use this argument");
  1.3184 +  const Register rbx_method = rbx;
  1.3185 +  const Register rax_mtype  = rax;
  1.3186 +  const Register rcx_recv   = rcx;
  1.3187 +  const Register rdx_flags  = rdx;
  1.3188 +
  1.3189 +  if (!EnableInvokeDynamic) {
  1.3190 +    // rewriter does not generate this bytecode
  1.3191 +    __ should_not_reach_here();
  1.3192 +    return;
  1.3193 +  }
  1.3194 +
  1.3195 +  prepare_invoke(byte_no, rbx_method, rax_mtype, rcx_recv);
  1.3196 +  __ verify_method_ptr(rbx_method);
  1.3197 +  __ verify_oop(rcx_recv);
  1.3198 +  __ null_check(rcx_recv);
  1.3199 +
  1.3200 +  // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
  1.3201 +  // rbx: MH.invokeExact_MT method (from f2)
  1.3202 +
  1.3203 +  // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
  1.3204 +
  1.3205 +  // FIXME: profile the LambdaForm also
  1.3206 +  __ profile_final_call(rax);
  1.3207 +  __ profile_arguments_type(rdx, rbx_method, r13, true);
  1.3208 +
  1.3209 +  __ jump_from_interpreted(rbx_method, rdx);
  1.3210 +}
  1.3211 +
  1.3212 +
  1.3213 +void TemplateTable::invokedynamic(int byte_no) {
  1.3214 +  transition(vtos, vtos);
  1.3215 +  assert(byte_no == f1_byte, "use this argument");
  1.3216 +
  1.3217 +  if (!EnableInvokeDynamic) {
  1.3218 +    // We should not encounter this bytecode if !EnableInvokeDynamic.
  1.3219 +    // The verifier will stop it.  However, if we get past the verifier,
  1.3220 +    // this will stop the thread in a reasonable way, without crashing the JVM.
  1.3221 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.3222 +                     InterpreterRuntime::throw_IncompatibleClassChangeError));
  1.3223 +    // the call_VM checks for exception, so we should never return here.
  1.3224 +    __ should_not_reach_here();
  1.3225 +    return;
  1.3226 +  }
  1.3227 +
  1.3228 +  const Register rbx_method   = rbx;
  1.3229 +  const Register rax_callsite = rax;
  1.3230 +
  1.3231 +  prepare_invoke(byte_no, rbx_method, rax_callsite);
  1.3232 +
  1.3233 +  // rax: CallSite object (from cpool->resolved_references[f1])
  1.3234 +  // rbx: MH.linkToCallSite method (from f2)
  1.3235 +
  1.3236 +  // Note:  rax_callsite is already pushed by prepare_invoke
  1.3237 +
  1.3238 +  // %%% should make a type profile for any invokedynamic that takes a ref argument
  1.3239 +  // profile this call
  1.3240 +  __ profile_call(r13);
  1.3241 +  __ profile_arguments_type(rdx, rbx_method, r13, false);
  1.3242 +
  1.3243 +  __ verify_oop(rax_callsite);
  1.3244 +
  1.3245 +  __ jump_from_interpreted(rbx_method, rdx);
  1.3246 +}
  1.3247 +
  1.3248 +
  1.3249 +//-----------------------------------------------------------------------------
  1.3250 +// Allocation
  1.3251 +
  1.3252 +void TemplateTable::_new() {
  1.3253 +  transition(vtos, atos);
  1.3254 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3255 +  Label slow_case;
  1.3256 +  Label done;
  1.3257 +  Label initialize_header;
  1.3258 +  Label initialize_object; // including clearing the fields
  1.3259 +  Label allocate_shared;
  1.3260 +
  1.3261 +  __ get_cpool_and_tags(rsi, rax);
  1.3262 +  // Make sure the class we're about to instantiate has been resolved.
  1.3263 +  // This is done before loading InstanceKlass to be consistent with the order
  1.3264 +  // how Constant Pool is updated (see ConstantPool::klass_at_put)
  1.3265 +  const int tags_offset = Array<u1>::base_offset_in_bytes();
  1.3266 +  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset),
  1.3267 +          JVM_CONSTANT_Class);
  1.3268 +  __ jcc(Assembler::notEqual, slow_case);
  1.3269 +
  1.3270 +  // get InstanceKlass
  1.3271 +  __ movptr(rsi, Address(rsi, rdx,
  1.3272 +            Address::times_8, sizeof(ConstantPool)));
  1.3273 +
  1.3274 +  // make sure klass is initialized & doesn't have finalizer
  1.3275 +  // make sure klass is fully initialized
  1.3276 +  __ cmpb(Address(rsi,
  1.3277 +                  InstanceKlass::init_state_offset()),
  1.3278 +          InstanceKlass::fully_initialized);
  1.3279 +  __ jcc(Assembler::notEqual, slow_case);
  1.3280 +
  1.3281 +  // get instance_size in InstanceKlass (scaled to a count of bytes)
  1.3282 +  __ movl(rdx,
  1.3283 +          Address(rsi,
  1.3284 +                  Klass::layout_helper_offset()));
  1.3285 +  // test to see if it has a finalizer or is malformed in some way
  1.3286 +  __ testl(rdx, Klass::_lh_instance_slow_path_bit);
  1.3287 +  __ jcc(Assembler::notZero, slow_case);
  1.3288 +
  1.3289 +  // Allocate the instance
  1.3290 +  // 1) Try to allocate in the TLAB
  1.3291 +  // 2) if fail and the object is large allocate in the shared Eden
  1.3292 +  // 3) if the above fails (or is not applicable), go to a slow case
  1.3293 +  // (creates a new TLAB, etc.)
  1.3294 +
  1.3295 +  const bool allow_shared_alloc =
  1.3296 +    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  1.3297 +
  1.3298 +  if (UseTLAB) {
  1.3299 +    __ movptr(rax, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
  1.3300 +    __ lea(rbx, Address(rax, rdx, Address::times_1));
  1.3301 +    __ cmpptr(rbx, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
  1.3302 +    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
  1.3303 +    __ movptr(Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
  1.3304 +    if (ZeroTLAB) {
  1.3305 +      // the fields have been already cleared
  1.3306 +      __ jmp(initialize_header);
  1.3307 +    } else {
  1.3308 +      // initialize both the header and fields
  1.3309 +      __ jmp(initialize_object);
  1.3310 +    }
  1.3311 +  }
  1.3312 +
  1.3313 +  // Allocation in the shared Eden, if allowed.
  1.3314 +  //
  1.3315 +  // rdx: instance size in bytes
  1.3316 +  if (allow_shared_alloc) {
  1.3317 +    __ bind(allocate_shared);
  1.3318 +
  1.3319 +    ExternalAddress top((address)Universe::heap()->top_addr());
  1.3320 +    ExternalAddress end((address)Universe::heap()->end_addr());
  1.3321 +
  1.3322 +    const Register RtopAddr = rscratch1;
  1.3323 +    const Register RendAddr = rscratch2;
  1.3324 +
  1.3325 +    __ lea(RtopAddr, top);
  1.3326 +    __ lea(RendAddr, end);
  1.3327 +    __ movptr(rax, Address(RtopAddr, 0));
  1.3328 +
  1.3329 +    // For retries rax gets set by cmpxchgq
  1.3330 +    Label retry;
  1.3331 +    __ bind(retry);
  1.3332 +    __ lea(rbx, Address(rax, rdx, Address::times_1));
  1.3333 +    __ cmpptr(rbx, Address(RendAddr, 0));
  1.3334 +    __ jcc(Assembler::above, slow_case);
  1.3335 +
  1.3336 +    // Compare rax with the top addr, and if still equal, store the new
  1.3337 +    // top addr in rbx at the address of the top addr pointer. Sets ZF if was
  1.3338 +    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  1.3339 +    //
  1.3340 +    // rax: object begin
  1.3341 +    // rbx: object end
  1.3342 +    // rdx: instance size in bytes
  1.3343 +    if (os::is_MP()) {
  1.3344 +      __ lock();
  1.3345 +    }
  1.3346 +    __ cmpxchgptr(rbx, Address(RtopAddr, 0));
  1.3347 +
  1.3348 +    // if someone beat us on the allocation, try again, otherwise continue
  1.3349 +    __ jcc(Assembler::notEqual, retry);
  1.3350 +
  1.3351 +    __ incr_allocated_bytes(r15_thread, rdx, 0);
  1.3352 +  }
  1.3353 +
  1.3354 +  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  1.3355 +    // The object is initialized before the header.  If the object size is
  1.3356 +    // zero, go directly to the header initialization.
  1.3357 +    __ bind(initialize_object);
  1.3358 +    __ decrementl(rdx, sizeof(oopDesc));
  1.3359 +    __ jcc(Assembler::zero, initialize_header);
  1.3360 +
  1.3361 +    // Initialize object fields
  1.3362 +    __ xorl(rcx, rcx); // use zero reg to clear memory (shorter code)
  1.3363 +    __ shrl(rdx, LogBytesPerLong);  // divide by oopSize to simplify the loop
  1.3364 +    {
  1.3365 +      Label loop;
  1.3366 +      __ bind(loop);
  1.3367 +      __ movq(Address(rax, rdx, Address::times_8,
  1.3368 +                      sizeof(oopDesc) - oopSize),
  1.3369 +              rcx);
  1.3370 +      __ decrementl(rdx);
  1.3371 +      __ jcc(Assembler::notZero, loop);
  1.3372 +    }
  1.3373 +
  1.3374 +    // initialize object header only.
  1.3375 +    __ bind(initialize_header);
  1.3376 +    if (UseBiasedLocking) {
  1.3377 +      __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset()));
  1.3378 +      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1);
  1.3379 +    } else {
  1.3380 +      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
  1.3381 +               (intptr_t) markOopDesc::prototype()); // header (address 0x1)
  1.3382 +    }
  1.3383 +    __ xorl(rcx, rcx); // use zero reg to clear memory (shorter code)
  1.3384 +    __ store_klass_gap(rax, rcx);  // zero klass gap for compressed oops
  1.3385 +    __ store_klass(rax, rsi);      // store klass last
  1.3386 +
  1.3387 +    {
  1.3388 +      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
  1.3389 +      // Trigger dtrace event for fastpath
  1.3390 +      __ push(atos); // save the return value
  1.3391 +      __ call_VM_leaf(
  1.3392 +           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
  1.3393 +      __ pop(atos); // restore the return value
  1.3394 +
  1.3395 +    }
  1.3396 +    __ jmp(done);
  1.3397 +  }
  1.3398 +
  1.3399 +
  1.3400 +  // slow case
  1.3401 +  __ bind(slow_case);
  1.3402 +  __ get_constant_pool(c_rarg1);
  1.3403 +  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
  1.3404 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
  1.3405 +  __ verify_oop(rax);
  1.3406 +
  1.3407 +  // continue
  1.3408 +  __ bind(done);
  1.3409 +}
  1.3410 +
  1.3411 +void TemplateTable::newarray() {
  1.3412 +  transition(itos, atos);
  1.3413 +  __ load_unsigned_byte(c_rarg1, at_bcp(1));
  1.3414 +  __ movl(c_rarg2, rax);
  1.3415 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
  1.3416 +          c_rarg1, c_rarg2);
  1.3417 +}
  1.3418 +
  1.3419 +void TemplateTable::anewarray() {
  1.3420 +  transition(itos, atos);
  1.3421 +  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
  1.3422 +  __ get_constant_pool(c_rarg1);
  1.3423 +  __ movl(c_rarg3, rax);
  1.3424 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
  1.3425 +          c_rarg1, c_rarg2, c_rarg3);
  1.3426 +}
  1.3427 +
  1.3428 +void TemplateTable::arraylength() {
  1.3429 +  transition(atos, itos);
  1.3430 +  __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
  1.3431 +  __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
  1.3432 +}
  1.3433 +
  1.3434 +void TemplateTable::checkcast() {
  1.3435 +  transition(atos, atos);
  1.3436 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3437 +  __ testptr(rax, rax); // object is in rax
  1.3438 +  __ jcc(Assembler::zero, is_null);
  1.3439 +
  1.3440 +  // Get cpool & tags index
  1.3441 +  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
  1.3442 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
  1.3443 +  // See if bytecode has already been quicked
  1.3444 +  __ cmpb(Address(rdx, rbx,
  1.3445 +                  Address::times_1,
  1.3446 +                  Array<u1>::base_offset_in_bytes()),
  1.3447 +          JVM_CONSTANT_Class);
  1.3448 +  __ jcc(Assembler::equal, quicked);
  1.3449 +  __ push(atos); // save receiver for result, and for GC
  1.3450 +  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  1.3451 +  // vm_result_2 has metadata result
  1.3452 +  __ get_vm_result_2(rax, r15_thread);
  1.3453 +  __ pop_ptr(rdx); // restore receiver
  1.3454 +  __ jmpb(resolved);
  1.3455 +
  1.3456 +  // Get superklass in rax and subklass in rbx
  1.3457 +  __ bind(quicked);
  1.3458 +  __ mov(rdx, rax); // Save object in rdx; rax needed for subtype check
  1.3459 +  __ movptr(rax, Address(rcx, rbx,
  1.3460 +                       Address::times_8, sizeof(ConstantPool)));
  1.3461 +
  1.3462 +  __ bind(resolved);
  1.3463 +  __ load_klass(rbx, rdx);
  1.3464 +
  1.3465 +  // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
  1.3466 +  // Superklass in rax.  Subklass in rbx.
  1.3467 +  __ gen_subtype_check(rbx, ok_is_subtype);
  1.3468 +
  1.3469 +  // Come here on failure
  1.3470 +  __ push_ptr(rdx);
  1.3471 +  // object is at TOS
  1.3472 +  __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
  1.3473 +
  1.3474 +  // Come here on success
  1.3475 +  __ bind(ok_is_subtype);
  1.3476 +  __ mov(rax, rdx); // Restore object in rdx
  1.3477 +
  1.3478 +  // Collect counts on whether this check-cast sees NULLs a lot or not.
  1.3479 +  if (ProfileInterpreter) {
  1.3480 +    __ jmp(done);
  1.3481 +    __ bind(is_null);
  1.3482 +    __ profile_null_seen(rcx);
  1.3483 +  } else {
  1.3484 +    __ bind(is_null);   // same as 'done'
  1.3485 +  }
  1.3486 +  __ bind(done);
  1.3487 +}
  1.3488 +
  1.3489 +void TemplateTable::instanceof() {
  1.3490 +  transition(atos, itos);
  1.3491 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3492 +  __ testptr(rax, rax);
  1.3493 +  __ jcc(Assembler::zero, is_null);
  1.3494 +
  1.3495 +  // Get cpool & tags index
  1.3496 +  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
  1.3497 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
  1.3498 +  // See if bytecode has already been quicked
  1.3499 +  __ cmpb(Address(rdx, rbx,
  1.3500 +                  Address::times_1,
  1.3501 +                  Array<u1>::base_offset_in_bytes()),
  1.3502 +          JVM_CONSTANT_Class);
  1.3503 +  __ jcc(Assembler::equal, quicked);
  1.3504 +
  1.3505 +  __ push(atos); // save receiver for result, and for GC
  1.3506 +  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  1.3507 +  // vm_result_2 has metadata result
  1.3508 +  __ get_vm_result_2(rax, r15_thread);
  1.3509 +  __ pop_ptr(rdx); // restore receiver
  1.3510 +  __ verify_oop(rdx);
  1.3511 +  __ load_klass(rdx, rdx);
  1.3512 +  __ jmpb(resolved);
  1.3513 +
  1.3514 +  // Get superklass in rax and subklass in rdx
  1.3515 +  __ bind(quicked);
  1.3516 +  __ load_klass(rdx, rax);
  1.3517 +  __ movptr(rax, Address(rcx, rbx,
  1.3518 +                         Address::times_8, sizeof(ConstantPool)));
  1.3519 +
  1.3520 +  __ bind(resolved);
  1.3521 +
  1.3522 +  // Generate subtype check.  Blows rcx, rdi
  1.3523 +  // Superklass in rax.  Subklass in rdx.
  1.3524 +  __ gen_subtype_check(rdx, ok_is_subtype);
  1.3525 +
  1.3526 +  // Come here on failure
  1.3527 +  __ xorl(rax, rax);
  1.3528 +  __ jmpb(done);
  1.3529 +  // Come here on success
  1.3530 +  __ bind(ok_is_subtype);
  1.3531 +  __ movl(rax, 1);
  1.3532 +
  1.3533 +  // Collect counts on whether this test sees NULLs a lot or not.
  1.3534 +  if (ProfileInterpreter) {
  1.3535 +    __ jmp(done);
  1.3536 +    __ bind(is_null);
  1.3537 +    __ profile_null_seen(rcx);
  1.3538 +  } else {
  1.3539 +    __ bind(is_null);   // same as 'done'
  1.3540 +  }
  1.3541 +  __ bind(done);
  1.3542 +  // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
  1.3543 +  // rax = 1: obj != NULL and obj is     an instanceof the specified klass
  1.3544 +}
  1.3545 +
  1.3546 +//-----------------------------------------------------------------------------
  1.3547 +// Breakpoints
  1.3548 +void TemplateTable::_breakpoint() {
  1.3549 +  // Note: We get here even if we are single stepping..
  1.3550 +  // jbug inists on setting breakpoints at every bytecode
  1.3551 +  // even if we are in single step mode.
  1.3552 +
  1.3553 +  transition(vtos, vtos);
  1.3554 +
  1.3555 +  // get the unpatched byte code
  1.3556 +  __ get_method(c_rarg1);
  1.3557 +  __ call_VM(noreg,
  1.3558 +             CAST_FROM_FN_PTR(address,
  1.3559 +                              InterpreterRuntime::get_original_bytecode_at),
  1.3560 +             c_rarg1, r13);
  1.3561 +  __ mov(rbx, rax);
  1.3562 +
  1.3563 +  // post the breakpoint event
  1.3564 +  __ get_method(c_rarg1);
  1.3565 +  __ call_VM(noreg,
  1.3566 +             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
  1.3567 +             c_rarg1, r13);
  1.3568 +
  1.3569 +  // complete the execution of original bytecode
  1.3570 +  __ dispatch_only_normal(vtos);
  1.3571 +}
  1.3572 +
  1.3573 +//-----------------------------------------------------------------------------
  1.3574 +// Exceptions
  1.3575 +
  1.3576 +void TemplateTable::athrow() {
  1.3577 +  transition(atos, vtos);
  1.3578 +  __ null_check(rax);
  1.3579 +  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
  1.3580 +}
  1.3581 +
  1.3582 +//-----------------------------------------------------------------------------
  1.3583 +// Synchronization
  1.3584 +//
  1.3585 +// Note: monitorenter & exit are symmetric routines; which is reflected
  1.3586 +//       in the assembly code structure as well
  1.3587 +//
  1.3588 +// Stack layout:
  1.3589 +//
  1.3590 +// [expressions  ] <--- rsp               = expression stack top
  1.3591 +// ..
  1.3592 +// [expressions  ]
  1.3593 +// [monitor entry] <--- monitor block top = expression stack bot
  1.3594 +// ..
  1.3595 +// [monitor entry]
  1.3596 +// [frame data   ] <--- monitor block bot
  1.3597 +// ...
  1.3598 +// [saved rbp    ] <--- rbp
  1.3599 +void TemplateTable::monitorenter() {
  1.3600 +  transition(atos, vtos);
  1.3601 +
  1.3602 +  // check for NULL object
  1.3603 +  __ null_check(rax);
  1.3604 +
  1.3605 +  const Address monitor_block_top(
  1.3606 +        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3607 +  const Address monitor_block_bot(
  1.3608 +        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
  1.3609 +  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
  1.3610 +
  1.3611 +  Label allocated;
  1.3612 +
  1.3613 +  // initialize entry pointer
  1.3614 +  __ xorl(c_rarg1, c_rarg1); // points to free slot or NULL
  1.3615 +
  1.3616 +  // find a free slot in the monitor block (result in c_rarg1)
  1.3617 +  {
  1.3618 +    Label entry, loop, exit;
  1.3619 +    __ movptr(c_rarg3, monitor_block_top); // points to current entry,
  1.3620 +                                     // starting with top-most entry
  1.3621 +    __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
  1.3622 +                                     // of monitor block
  1.3623 +    __ jmpb(entry);
  1.3624 +
  1.3625 +    __ bind(loop);
  1.3626 +    // check if current entry is used
  1.3627 +    __ cmpptr(Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL_WORD);
  1.3628 +    // if not used then remember entry in c_rarg1
  1.3629 +    __ cmov(Assembler::equal, c_rarg1, c_rarg3);
  1.3630 +    // check if current entry is for same object
  1.3631 +    __ cmpptr(rax, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
  1.3632 +    // if same object then stop searching
  1.3633 +    __ jccb(Assembler::equal, exit);
  1.3634 +    // otherwise advance to next entry
  1.3635 +    __ addptr(c_rarg3, entry_size);
  1.3636 +    __ bind(entry);
  1.3637 +    // check if bottom reached
  1.3638 +    __ cmpptr(c_rarg3, c_rarg2);
  1.3639 +    // if not at bottom then check this entry
  1.3640 +    __ jcc(Assembler::notEqual, loop);
  1.3641 +    __ bind(exit);
  1.3642 +  }
  1.3643 +
  1.3644 +  __ testptr(c_rarg1, c_rarg1); // check if a slot has been found
  1.3645 +  __ jcc(Assembler::notZero, allocated); // if found, continue with that one
  1.3646 +
  1.3647 +  // allocate one if there's no free slot
  1.3648 +  {
  1.3649 +    Label entry, loop;
  1.3650 +    // 1. compute new pointers             // rsp: old expression stack top
  1.3651 +    __ movptr(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom
  1.3652 +    __ subptr(rsp, entry_size);            // move expression stack top
  1.3653 +    __ subptr(c_rarg1, entry_size);        // move expression stack bottom
  1.3654 +    __ mov(c_rarg3, rsp);                  // set start value for copy loop
  1.3655 +    __ movptr(monitor_block_bot, c_rarg1); // set new monitor block bottom
  1.3656 +    __ jmp(entry);
  1.3657 +    // 2. move expression stack contents
  1.3658 +    __ bind(loop);
  1.3659 +    __ movptr(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
  1.3660 +                                                      // word from old location
  1.3661 +    __ movptr(Address(c_rarg3, 0), c_rarg2);          // and store it at new location
  1.3662 +    __ addptr(c_rarg3, wordSize);                     // advance to next word
  1.3663 +    __ bind(entry);
  1.3664 +    __ cmpptr(c_rarg3, c_rarg1);            // check if bottom reached
  1.3665 +    __ jcc(Assembler::notEqual, loop);      // if not at bottom then
  1.3666 +                                            // copy next word
  1.3667 +  }
  1.3668 +
  1.3669 +  // call run-time routine
  1.3670 +  // c_rarg1: points to monitor entry
  1.3671 +  __ bind(allocated);
  1.3672 +
  1.3673 +  // Increment bcp to point to the next bytecode, so exception
  1.3674 +  // handling for async. exceptions work correctly.
  1.3675 +  // The object has already been poped from the stack, so the
  1.3676 +  // expression stack looks correct.
  1.3677 +  __ increment(r13);
  1.3678 +
  1.3679 +  // store object
  1.3680 +  __ movptr(Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()), rax);
  1.3681 +  __ lock_object(c_rarg1);
  1.3682 +
  1.3683 +  // check to make sure this monitor doesn't cause stack overflow after locking
  1.3684 +  __ save_bcp();  // in case of exception
  1.3685 +  __ generate_stack_overflow_check(0);
  1.3686 +
  1.3687 +  // The bcp has already been incremented. Just need to dispatch to
  1.3688 +  // next instruction.
  1.3689 +  __ dispatch_next(vtos);
  1.3690 +}
  1.3691 +
  1.3692 +
  1.3693 +void TemplateTable::monitorexit() {
  1.3694 +  transition(atos, vtos);
  1.3695 +
  1.3696 +  // check for NULL object
  1.3697 +  __ null_check(rax);
  1.3698 +
  1.3699 +  const Address monitor_block_top(
  1.3700 +        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3701 +  const Address monitor_block_bot(
  1.3702 +        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
  1.3703 +  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
  1.3704 +
  1.3705 +  Label found;
  1.3706 +
  1.3707 +  // find matching slot
  1.3708 +  {
  1.3709 +    Label entry, loop;
  1.3710 +    __ movptr(c_rarg1, monitor_block_top); // points to current entry,
  1.3711 +                                     // starting with top-most entry
  1.3712 +    __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
  1.3713 +                                     // of monitor block
  1.3714 +    __ jmpb(entry);
  1.3715 +
  1.3716 +    __ bind(loop);
  1.3717 +    // check if current entry is for same object
  1.3718 +    __ cmpptr(rax, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
  1.3719 +    // if same object then stop searching
  1.3720 +    __ jcc(Assembler::equal, found);
  1.3721 +    // otherwise advance to next entry
  1.3722 +    __ addptr(c_rarg1, entry_size);
  1.3723 +    __ bind(entry);
  1.3724 +    // check if bottom reached
  1.3725 +    __ cmpptr(c_rarg1, c_rarg2);
  1.3726 +    // if not at bottom then check this entry
  1.3727 +    __ jcc(Assembler::notEqual, loop);
  1.3728 +  }
  1.3729 +
  1.3730 +  // error handling. Unlocking was not block-structured
  1.3731 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.3732 +                   InterpreterRuntime::throw_illegal_monitor_state_exception));
  1.3733 +  __ should_not_reach_here();
  1.3734 +
  1.3735 +  // call run-time routine
  1.3736 +  // rsi: points to monitor entry
  1.3737 +  __ bind(found);
  1.3738 +  __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
  1.3739 +  __ unlock_object(c_rarg1);
  1.3740 +  __ pop_ptr(rax); // discard object
  1.3741 +}
  1.3742 +
  1.3743 +
  1.3744 +// Wide instructions
  1.3745 +void TemplateTable::wide() {
  1.3746 +  transition(vtos, vtos);
  1.3747 +  __ load_unsigned_byte(rbx, at_bcp(1));
  1.3748 +  __ lea(rscratch1, ExternalAddress((address)Interpreter::_wentry_point));
  1.3749 +  __ jmp(Address(rscratch1, rbx, Address::times_8));
  1.3750 +  // Note: the r13 increment step is part of the individual wide
  1.3751 +  // bytecode implementations
  1.3752 +}
  1.3753 +
  1.3754 +
  1.3755 +// Multi arrays
  1.3756 +void TemplateTable::multianewarray() {
  1.3757 +  transition(vtos, atos);
  1.3758 +  __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
  1.3759 +  // last dim is on top of stack; we want address of first one:
  1.3760 +  // first_addr = last_addr + (ndims - 1) * wordSize
  1.3761 +  __ lea(c_rarg1, Address(rsp, rax, Address::times_8, -wordSize));
  1.3762 +  call_VM(rax,
  1.3763 +          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
  1.3764 +          c_rarg1);
  1.3765 +  __ load_unsigned_byte(rbx, at_bcp(3));
  1.3766 +  __ lea(rsp, Address(rsp, rbx, Address::times_8));
  1.3767 +}
  1.3768 +#endif // !CC_INTERP

mercurial