src/cpu/x86/vm/templateTable_x86_64.cpp

changeset 435
a61af66fc99e
child 548
ba764ed4b6f2
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Sat Dec 01 00:00:00 2007 +0000
     1.3 @@ -0,0 +1,3546 @@
     1.4 +/*
     1.5 + * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "incls/_precompiled.incl"
    1.29 +#include "incls/_templateTable_x86_64.cpp.incl"
    1.30 +
    1.31 +#define __ _masm->
    1.32 +
    1.33 +// Platform-dependent initialization
    1.34 +
    1.35 +void TemplateTable::pd_initialize() {
    1.36 +  // No amd64 specific initialization
    1.37 +}
    1.38 +
    1.39 +// Address computation: local variables
    1.40 +
    1.41 +static inline Address iaddress(int n) {
    1.42 +  return Address(r14, Interpreter::local_offset_in_bytes(n));
    1.43 +}
    1.44 +
    1.45 +static inline Address laddress(int n) {
    1.46 +  return iaddress(n + 1);
    1.47 +}
    1.48 +
    1.49 +static inline Address faddress(int n) {
    1.50 +  return iaddress(n);
    1.51 +}
    1.52 +
    1.53 +static inline Address daddress(int n) {
    1.54 +  return laddress(n);
    1.55 +}
    1.56 +
    1.57 +static inline Address aaddress(int n) {
    1.58 +  return iaddress(n);
    1.59 +}
    1.60 +
    1.61 +static inline Address iaddress(Register r) {
    1.62 +  return Address(r14, r, Address::times_8, Interpreter::value_offset_in_bytes());
    1.63 +}
    1.64 +
    1.65 +static inline Address laddress(Register r) {
    1.66 +  return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
    1.67 +}
    1.68 +
    1.69 +static inline Address faddress(Register r) {
    1.70 +  return iaddress(r);
    1.71 +}
    1.72 +
    1.73 +static inline Address daddress(Register r) {
    1.74 +  return laddress(r);
    1.75 +}
    1.76 +
    1.77 +static inline Address aaddress(Register r) {
    1.78 +  return iaddress(r);
    1.79 +}
    1.80 +
    1.81 +static inline Address at_rsp() {
    1.82 +  return Address(rsp, 0);
    1.83 +}
    1.84 +
    1.85 +// At top of Java expression stack which may be different than esp().  It
    1.86 +// isn't for category 1 objects.
    1.87 +static inline Address at_tos   () {
    1.88 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
    1.89 +}
    1.90 +
    1.91 +static inline Address at_tos_p1() {
    1.92 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
    1.93 +}
    1.94 +
    1.95 +static inline Address at_tos_p2() {
    1.96 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
    1.97 +}
    1.98 +
    1.99 +static inline Address at_tos_p3() {
   1.100 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(3));
   1.101 +}
   1.102 +
   1.103 +// Condition conversion
   1.104 +static Assembler::Condition j_not(TemplateTable::Condition cc) {
   1.105 +  switch (cc) {
   1.106 +  case TemplateTable::equal        : return Assembler::notEqual;
   1.107 +  case TemplateTable::not_equal    : return Assembler::equal;
   1.108 +  case TemplateTable::less         : return Assembler::greaterEqual;
   1.109 +  case TemplateTable::less_equal   : return Assembler::greater;
   1.110 +  case TemplateTable::greater      : return Assembler::lessEqual;
   1.111 +  case TemplateTable::greater_equal: return Assembler::less;
   1.112 +  }
   1.113 +  ShouldNotReachHere();
   1.114 +  return Assembler::zero;
   1.115 +}
   1.116 +
   1.117 +
   1.118 +// Miscelaneous helper routines
   1.119 +
   1.120 +Address TemplateTable::at_bcp(int offset) {
   1.121 +  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   1.122 +  return Address(r13, offset);
   1.123 +}
   1.124 +
   1.125 +void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
   1.126 +                                   Register scratch,
   1.127 +                                   bool load_bc_into_scratch/*=true*/) {
   1.128 +  if (!RewriteBytecodes) {
   1.129 +    return;
   1.130 +  }
   1.131 +  // the pair bytecodes have already done the load.
   1.132 +  if (load_bc_into_scratch) {
   1.133 +    __ movl(bc, bytecode);
   1.134 +  }
   1.135 +  Label patch_done;
   1.136 +  if (JvmtiExport::can_post_breakpoint()) {
   1.137 +    Label fast_patch;
   1.138 +    // if a breakpoint is present we can't rewrite the stream directly
   1.139 +    __ movzbl(scratch, at_bcp(0));
   1.140 +    __ cmpl(scratch, Bytecodes::_breakpoint);
   1.141 +    __ jcc(Assembler::notEqual, fast_patch);
   1.142 +    __ get_method(scratch);
   1.143 +    // Let breakpoint table handling rewrite to quicker bytecode
   1.144 +    __ call_VM(noreg,
   1.145 +               CAST_FROM_FN_PTR(address,
   1.146 +                                InterpreterRuntime::set_original_bytecode_at),
   1.147 +               scratch, r13, bc);
   1.148 +#ifndef ASSERT
   1.149 +    __ jmpb(patch_done);
   1.150 +    __ bind(fast_patch);
   1.151 +  }
   1.152 +#else
   1.153 +    __ jmp(patch_done);
   1.154 +    __ bind(fast_patch);
   1.155 +  }
   1.156 +  Label okay;
   1.157 +  __ load_unsigned_byte(scratch, at_bcp(0));
   1.158 +  __ cmpl(scratch, (int) Bytecodes::java_code(bytecode));
   1.159 +  __ jcc(Assembler::equal, okay);
   1.160 +  __ cmpl(scratch, bc);
   1.161 +  __ jcc(Assembler::equal, okay);
   1.162 +  __ stop("patching the wrong bytecode");
   1.163 +  __ bind(okay);
   1.164 +#endif
   1.165 +  // patch bytecode
   1.166 +  __ movb(at_bcp(0), bc);
   1.167 +  __ bind(patch_done);
   1.168 +}
   1.169 +
   1.170 +
   1.171 +// Individual instructions
   1.172 +
   1.173 +void TemplateTable::nop() {
   1.174 +  transition(vtos, vtos);
   1.175 +  // nothing to do
   1.176 +}
   1.177 +
   1.178 +void TemplateTable::shouldnotreachhere() {
   1.179 +  transition(vtos, vtos);
   1.180 +  __ stop("shouldnotreachhere bytecode");
   1.181 +}
   1.182 +
   1.183 +void TemplateTable::aconst_null() {
   1.184 +  transition(vtos, atos);
   1.185 +  __ xorl(rax, rax);
   1.186 +}
   1.187 +
   1.188 +void TemplateTable::iconst(int value) {
   1.189 +  transition(vtos, itos);
   1.190 +  if (value == 0) {
   1.191 +    __ xorl(rax, rax);
   1.192 +  } else {
   1.193 +    __ movl(rax, value);
   1.194 +  }
   1.195 +}
   1.196 +
   1.197 +void TemplateTable::lconst(int value) {
   1.198 +  transition(vtos, ltos);
   1.199 +  if (value == 0) {
   1.200 +    __ xorl(rax, rax);
   1.201 +  } else {
   1.202 +    __ movl(rax, value);
   1.203 +  }
   1.204 +}
   1.205 +
   1.206 +void TemplateTable::fconst(int value) {
   1.207 +  transition(vtos, ftos);
   1.208 +  static float one = 1.0f, two = 2.0f;
   1.209 +  switch (value) {
   1.210 +  case 0:
   1.211 +    __ xorps(xmm0, xmm0);
   1.212 +    break;
   1.213 +  case 1:
   1.214 +    __ movflt(xmm0, ExternalAddress((address) &one));
   1.215 +    break;
   1.216 +  case 2:
   1.217 +    __ movflt(xmm0, ExternalAddress((address) &two));
   1.218 +    break;
   1.219 +  default:
   1.220 +    ShouldNotReachHere();
   1.221 +    break;
   1.222 +  }
   1.223 +}
   1.224 +
   1.225 +void TemplateTable::dconst(int value) {
   1.226 +  transition(vtos, dtos);
   1.227 +  static double one = 1.0;
   1.228 +  switch (value) {
   1.229 +  case 0:
   1.230 +    __ xorpd(xmm0, xmm0);
   1.231 +    break;
   1.232 +  case 1:
   1.233 +    __ movdbl(xmm0, ExternalAddress((address) &one));
   1.234 +    break;
   1.235 +  default:
   1.236 +    ShouldNotReachHere();
   1.237 +    break;
   1.238 +  }
   1.239 +}
   1.240 +
   1.241 +void TemplateTable::bipush() {
   1.242 +  transition(vtos, itos);
   1.243 +  __ load_signed_byte(rax, at_bcp(1));
   1.244 +}
   1.245 +
   1.246 +void TemplateTable::sipush() {
   1.247 +  transition(vtos, itos);
   1.248 +  __ load_unsigned_word(rax, at_bcp(1));
   1.249 +  __ bswapl(rax);
   1.250 +  __ sarl(rax, 16);
   1.251 +}
   1.252 +
   1.253 +void TemplateTable::ldc(bool wide) {
   1.254 +  transition(vtos, vtos);
   1.255 +  Label call_ldc, notFloat, notClass, Done;
   1.256 +
   1.257 +  if (wide) {
   1.258 +    __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.259 +  } else {
   1.260 +    __ load_unsigned_byte(rbx, at_bcp(1));
   1.261 +  }
   1.262 +
   1.263 +  __ get_cpool_and_tags(rcx, rax);
   1.264 +  const int base_offset = constantPoolOopDesc::header_size() * wordSize;
   1.265 +  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
   1.266 +
   1.267 +  // get type
   1.268 +  __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
   1.269 +
   1.270 +  // unresolved string - get the resolved string
   1.271 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedString);
   1.272 +  __ jccb(Assembler::equal, call_ldc);
   1.273 +
   1.274 +  // unresolved class - get the resolved class
   1.275 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
   1.276 +  __ jccb(Assembler::equal, call_ldc);
   1.277 +
   1.278 +  // unresolved class in error state - call into runtime to throw the error
   1.279 +  // from the first resolution attempt
   1.280 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
   1.281 +  __ jccb(Assembler::equal, call_ldc);
   1.282 +
   1.283 +  // resolved class - need to call vm to get java mirror of the class
   1.284 +  __ cmpl(rdx, JVM_CONSTANT_Class);
   1.285 +  __ jcc(Assembler::notEqual, notClass);
   1.286 +
   1.287 +  __ bind(call_ldc);
   1.288 +  __ movl(c_rarg1, wide);
   1.289 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
   1.290 +  __ push_ptr(rax);
   1.291 +  __ verify_oop(rax);
   1.292 +  __ jmp(Done);
   1.293 +
   1.294 +  __ bind(notClass);
   1.295 +  __ cmpl(rdx, JVM_CONSTANT_Float);
   1.296 +  __ jccb(Assembler::notEqual, notFloat);
   1.297 +  // ftos
   1.298 +  __ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
   1.299 +  __ push_f();
   1.300 +  __ jmp(Done);
   1.301 +
   1.302 +  __ bind(notFloat);
   1.303 +#ifdef ASSERT
   1.304 +  {
   1.305 +    Label L;
   1.306 +    __ cmpl(rdx, JVM_CONSTANT_Integer);
   1.307 +    __ jcc(Assembler::equal, L);
   1.308 +    __ cmpl(rdx, JVM_CONSTANT_String);
   1.309 +    __ jcc(Assembler::equal, L);
   1.310 +    __ stop("unexpected tag type in ldc");
   1.311 +    __ bind(L);
   1.312 +  }
   1.313 +#endif
   1.314 +  // atos and itos
   1.315 +  Label isOop;
   1.316 +  __ cmpl(rdx, JVM_CONSTANT_Integer);
   1.317 +  __ jcc(Assembler::notEqual, isOop);
   1.318 +  __ movl(rax, Address(rcx, rbx, Address::times_8, base_offset));
   1.319 +  __ push_i(rax);
   1.320 +  __ jmp(Done);
   1.321 +
   1.322 +  __ bind(isOop);
   1.323 +  __ movq(rax, Address(rcx, rbx, Address::times_8, base_offset));
   1.324 +  __ push_ptr(rax);
   1.325 +
   1.326 +  if (VerifyOops) {
   1.327 +    __ verify_oop(rax);
   1.328 +  }
   1.329 +
   1.330 +  __ bind(Done);
   1.331 +}
   1.332 +
   1.333 +void TemplateTable::ldc2_w() {
   1.334 +  transition(vtos, vtos);
   1.335 +  Label Long, Done;
   1.336 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.337 +
   1.338 +  __ get_cpool_and_tags(rcx, rax);
   1.339 +  const int base_offset = constantPoolOopDesc::header_size() * wordSize;
   1.340 +  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
   1.341 +
   1.342 +  // get type
   1.343 +  __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
   1.344 +          JVM_CONSTANT_Double);
   1.345 +  __ jccb(Assembler::notEqual, Long);
   1.346 +  // dtos
   1.347 +  __ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
   1.348 +  __ push_d();
   1.349 +  __ jmpb(Done);
   1.350 +
   1.351 +  __ bind(Long);
   1.352 +  // ltos
   1.353 +  __ movq(rax, Address(rcx, rbx, Address::times_8, base_offset));
   1.354 +  __ push_l();
   1.355 +
   1.356 +  __ bind(Done);
   1.357 +}
   1.358 +
   1.359 +void TemplateTable::locals_index(Register reg, int offset) {
   1.360 +  __ load_unsigned_byte(reg, at_bcp(offset));
   1.361 +  __ negq(reg);
   1.362 +  if (TaggedStackInterpreter) __ shlq(reg, 1);  // index = index*2
   1.363 +}
   1.364 +
   1.365 +void TemplateTable::iload() {
   1.366 +  transition(vtos, itos);
   1.367 +  if (RewriteFrequentPairs) {
   1.368 +    Label rewrite, done;
   1.369 +    const Register bc = c_rarg3;
   1.370 +    assert(rbx != bc, "register damaged");
   1.371 +
   1.372 +    // get next byte
   1.373 +    __ load_unsigned_byte(rbx,
   1.374 +                          at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   1.375 +    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   1.376 +    // last two iloads in a pair.  Comparing against fast_iload means that
   1.377 +    // the next bytecode is neither an iload or a caload, and therefore
   1.378 +    // an iload pair.
   1.379 +    __ cmpl(rbx, Bytecodes::_iload);
   1.380 +    __ jcc(Assembler::equal, done);
   1.381 +
   1.382 +    __ cmpl(rbx, Bytecodes::_fast_iload);
   1.383 +    __ movl(bc, Bytecodes::_fast_iload2);
   1.384 +    __ jccb(Assembler::equal, rewrite);
   1.385 +
   1.386 +    // if _caload, rewrite to fast_icaload
   1.387 +    __ cmpl(rbx, Bytecodes::_caload);
   1.388 +    __ movl(bc, Bytecodes::_fast_icaload);
   1.389 +    __ jccb(Assembler::equal, rewrite);
   1.390 +
   1.391 +    // rewrite so iload doesn't check again.
   1.392 +    __ movl(bc, Bytecodes::_fast_iload);
   1.393 +
   1.394 +    // rewrite
   1.395 +    // bc: fast bytecode
   1.396 +    __ bind(rewrite);
   1.397 +    patch_bytecode(Bytecodes::_iload, bc, rbx, false);
   1.398 +    __ bind(done);
   1.399 +  }
   1.400 +
   1.401 +  // Get the local value into tos
   1.402 +  locals_index(rbx);
   1.403 +  __ movl(rax, iaddress(rbx));
   1.404 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.405 +}
   1.406 +
   1.407 +void TemplateTable::fast_iload2() {
   1.408 +  transition(vtos, itos);
   1.409 +  locals_index(rbx);
   1.410 +  __ movl(rax, iaddress(rbx));
   1.411 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.412 +  __ push(itos);
   1.413 +  locals_index(rbx, 3);
   1.414 +  __ movl(rax, iaddress(rbx));
   1.415 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.416 +}
   1.417 +
   1.418 +void TemplateTable::fast_iload() {
   1.419 +  transition(vtos, itos);
   1.420 +  locals_index(rbx);
   1.421 +  __ movl(rax, iaddress(rbx));
   1.422 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.423 +}
   1.424 +
   1.425 +void TemplateTable::lload() {
   1.426 +  transition(vtos, ltos);
   1.427 +  locals_index(rbx);
   1.428 +  __ movq(rax, laddress(rbx));
   1.429 +  debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.430 +}
   1.431 +
   1.432 +void TemplateTable::fload() {
   1.433 +  transition(vtos, ftos);
   1.434 +  locals_index(rbx);
   1.435 +  __ movflt(xmm0, faddress(rbx));
   1.436 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.437 +}
   1.438 +
   1.439 +void TemplateTable::dload() {
   1.440 +  transition(vtos, dtos);
   1.441 +  locals_index(rbx);
   1.442 +  __ movdbl(xmm0, daddress(rbx));
   1.443 +  debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.444 +}
   1.445 +
   1.446 +void TemplateTable::aload() {
   1.447 +  transition(vtos, atos);
   1.448 +  locals_index(rbx);
   1.449 +  __ movq(rax, aaddress(rbx));
   1.450 +  debug_only(__ verify_local_tag(frame::TagReference, rbx));
   1.451 +}
   1.452 +
   1.453 +void TemplateTable::locals_index_wide(Register reg) {
   1.454 +  __ movl(reg, at_bcp(2));
   1.455 +  __ bswapl(reg);
   1.456 +  __ shrl(reg, 16);
   1.457 +  __ negq(reg);
   1.458 +  if (TaggedStackInterpreter) __ shlq(reg, 1);  // index = index*2
   1.459 +}
   1.460 +
   1.461 +void TemplateTable::wide_iload() {
   1.462 +  transition(vtos, itos);
   1.463 +  locals_index_wide(rbx);
   1.464 +  __ movl(rax, iaddress(rbx));
   1.465 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.466 +}
   1.467 +
   1.468 +void TemplateTable::wide_lload() {
   1.469 +  transition(vtos, ltos);
   1.470 +  locals_index_wide(rbx);
   1.471 +  __ movq(rax, laddress(rbx));
   1.472 +  debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.473 +}
   1.474 +
   1.475 +void TemplateTable::wide_fload() {
   1.476 +  transition(vtos, ftos);
   1.477 +  locals_index_wide(rbx);
   1.478 +  __ movflt(xmm0, faddress(rbx));
   1.479 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.480 +}
   1.481 +
   1.482 +void TemplateTable::wide_dload() {
   1.483 +  transition(vtos, dtos);
   1.484 +  locals_index_wide(rbx);
   1.485 +  __ movdbl(xmm0, daddress(rbx));
   1.486 +  debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.487 +}
   1.488 +
   1.489 +void TemplateTable::wide_aload() {
   1.490 +  transition(vtos, atos);
   1.491 +  locals_index_wide(rbx);
   1.492 +  __ movq(rax, aaddress(rbx));
   1.493 +  debug_only(__ verify_local_tag(frame::TagReference, rbx));
   1.494 +}
   1.495 +
   1.496 +void TemplateTable::index_check(Register array, Register index) {
   1.497 +  // destroys rbx
   1.498 +  // check array
   1.499 +  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   1.500 +  // sign extend index for use by indexed load
   1.501 +  __ movslq(index, index);
   1.502 +  // check index
   1.503 +  __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
   1.504 +  if (index != rbx) {
   1.505 +    // ??? convention: move aberrant index into ebx for exception message
   1.506 +    assert(rbx != array, "different registers");
   1.507 +    __ movl(rbx, index);
   1.508 +  }
   1.509 +  __ jump_cc(Assembler::aboveEqual,
   1.510 +             ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
   1.511 +}
   1.512 +
   1.513 +void TemplateTable::iaload() {
   1.514 +  transition(itos, itos);
   1.515 +  __ pop_ptr(rdx);
   1.516 +  // eax: index
   1.517 +  // rdx: array
   1.518 +  index_check(rdx, rax); // kills rbx
   1.519 +  __ movl(rax, Address(rdx, rax,
   1.520 +                       Address::times_4,
   1.521 +                       arrayOopDesc::base_offset_in_bytes(T_INT)));
   1.522 +}
   1.523 +
   1.524 +void TemplateTable::laload() {
   1.525 +  transition(itos, ltos);
   1.526 +  __ pop_ptr(rdx);
   1.527 +  // eax: index
   1.528 +  // rdx: array
   1.529 +  index_check(rdx, rax); // kills rbx
   1.530 +  __ movq(rax, Address(rdx, rbx,
   1.531 +                       Address::times_8,
   1.532 +                       arrayOopDesc::base_offset_in_bytes(T_LONG)));
   1.533 +}
   1.534 +
   1.535 +void TemplateTable::faload() {
   1.536 +  transition(itos, ftos);
   1.537 +  __ pop_ptr(rdx);
   1.538 +  // eax: index
   1.539 +  // rdx: array
   1.540 +  index_check(rdx, rax); // kills rbx
   1.541 +  __ movflt(xmm0, Address(rdx, rax,
   1.542 +                         Address::times_4,
   1.543 +                         arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
   1.544 +}
   1.545 +
   1.546 +void TemplateTable::daload() {
   1.547 +  transition(itos, dtos);
   1.548 +  __ pop_ptr(rdx);
   1.549 +  // eax: index
   1.550 +  // rdx: array
   1.551 +  index_check(rdx, rax); // kills rbx
   1.552 +  __ movdbl(xmm0, Address(rdx, rax,
   1.553 +                          Address::times_8,
   1.554 +                          arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
   1.555 +}
   1.556 +
   1.557 +void TemplateTable::aaload() {
   1.558 +  transition(itos, atos);
   1.559 +  __ pop_ptr(rdx);
   1.560 +  // eax: index
   1.561 +  // rdx: array
   1.562 +  index_check(rdx, rax); // kills rbx
   1.563 +  __ movq(rax, Address(rdx, rax,
   1.564 +                       Address::times_8,
   1.565 +                       arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.566 +}
   1.567 +
   1.568 +void TemplateTable::baload() {
   1.569 +  transition(itos, itos);
   1.570 +  __ pop_ptr(rdx);
   1.571 +  // eax: index
   1.572 +  // rdx: array
   1.573 +  index_check(rdx, rax); // kills rbx
   1.574 +  __ load_signed_byte(rax,
   1.575 +                      Address(rdx, rax,
   1.576 +                              Address::times_1,
   1.577 +                              arrayOopDesc::base_offset_in_bytes(T_BYTE)));
   1.578 +}
   1.579 +
   1.580 +void TemplateTable::caload() {
   1.581 +  transition(itos, itos);
   1.582 +  __ pop_ptr(rdx);
   1.583 +  // eax: index
   1.584 +  // rdx: array
   1.585 +  index_check(rdx, rax); // kills rbx
   1.586 +  __ load_unsigned_word(rax,
   1.587 +                        Address(rdx, rax,
   1.588 +                                Address::times_2,
   1.589 +                                arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.590 +}
   1.591 +
   1.592 +// iload followed by caload frequent pair
   1.593 +void TemplateTable::fast_icaload() {
   1.594 +  transition(vtos, itos);
   1.595 +  // load index out of locals
   1.596 +  locals_index(rbx);
   1.597 +  __ movl(rax, iaddress(rbx));
   1.598 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.599 +
   1.600 +  // eax: index
   1.601 +  // rdx: array
   1.602 +  __ pop_ptr(rdx);
   1.603 +  index_check(rdx, rax); // kills rbx
   1.604 +  __ load_unsigned_word(rax,
   1.605 +                        Address(rdx, rax,
   1.606 +                                Address::times_2,
   1.607 +                                arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.608 +}
   1.609 +
   1.610 +void TemplateTable::saload() {
   1.611 +  transition(itos, itos);
   1.612 +  __ pop_ptr(rdx);
   1.613 +  // eax: index
   1.614 +  // rdx: array
   1.615 +  index_check(rdx, rax); // kills rbx
   1.616 +  __ load_signed_word(rax,
   1.617 +                      Address(rdx, rax,
   1.618 +                              Address::times_2,
   1.619 +                              arrayOopDesc::base_offset_in_bytes(T_SHORT)));
   1.620 +}
   1.621 +
   1.622 +void TemplateTable::iload(int n) {
   1.623 +  transition(vtos, itos);
   1.624 +  __ movl(rax, iaddress(n));
   1.625 +  debug_only(__ verify_local_tag(frame::TagValue, n));
   1.626 +}
   1.627 +
   1.628 +void TemplateTable::lload(int n) {
   1.629 +  transition(vtos, ltos);
   1.630 +  __ movq(rax, laddress(n));
   1.631 +  debug_only(__ verify_local_tag(frame::TagCategory2, n));
   1.632 +}
   1.633 +
   1.634 +void TemplateTable::fload(int n) {
   1.635 +  transition(vtos, ftos);
   1.636 +  __ movflt(xmm0, faddress(n));
   1.637 +  debug_only(__ verify_local_tag(frame::TagValue, n));
   1.638 +}
   1.639 +
   1.640 +void TemplateTable::dload(int n) {
   1.641 +  transition(vtos, dtos);
   1.642 +  __ movdbl(xmm0, daddress(n));
   1.643 +  debug_only(__ verify_local_tag(frame::TagCategory2, n));
   1.644 +}
   1.645 +
   1.646 +void TemplateTable::aload(int n) {
   1.647 +  transition(vtos, atos);
   1.648 +  __ movq(rax, aaddress(n));
   1.649 +  debug_only(__ verify_local_tag(frame::TagReference, n));
   1.650 +}
   1.651 +
   1.652 +void TemplateTable::aload_0() {
   1.653 +  transition(vtos, atos);
   1.654 +  // According to bytecode histograms, the pairs:
   1.655 +  //
   1.656 +  // _aload_0, _fast_igetfield
   1.657 +  // _aload_0, _fast_agetfield
   1.658 +  // _aload_0, _fast_fgetfield
   1.659 +  //
   1.660 +  // occur frequently. If RewriteFrequentPairs is set, the (slow)
   1.661 +  // _aload_0 bytecode checks if the next bytecode is either
   1.662 +  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
   1.663 +  // rewrites the current bytecode into a pair bytecode; otherwise it
   1.664 +  // rewrites the current bytecode into _fast_aload_0 that doesn't do
   1.665 +  // the pair check anymore.
   1.666 +  //
   1.667 +  // Note: If the next bytecode is _getfield, the rewrite must be
   1.668 +  //       delayed, otherwise we may miss an opportunity for a pair.
   1.669 +  //
   1.670 +  // Also rewrite frequent pairs
   1.671 +  //   aload_0, aload_1
   1.672 +  //   aload_0, iload_1
   1.673 +  // These bytecodes with a small amount of code are most profitable
   1.674 +  // to rewrite
   1.675 +  if (RewriteFrequentPairs) {
   1.676 +    Label rewrite, done;
   1.677 +    const Register bc = c_rarg3;
   1.678 +    assert(rbx != bc, "register damaged");
   1.679 +    // get next byte
   1.680 +    __ load_unsigned_byte(rbx,
   1.681 +                          at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   1.682 +
   1.683 +    // do actual aload_0
   1.684 +    aload(0);
   1.685 +
   1.686 +    // if _getfield then wait with rewrite
   1.687 +    __ cmpl(rbx, Bytecodes::_getfield);
   1.688 +    __ jcc(Assembler::equal, done);
   1.689 +
   1.690 +    // if _igetfield then reqrite to _fast_iaccess_0
   1.691 +    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
   1.692 +           Bytecodes::_aload_0,
   1.693 +           "fix bytecode definition");
   1.694 +    __ cmpl(rbx, Bytecodes::_fast_igetfield);
   1.695 +    __ movl(bc, Bytecodes::_fast_iaccess_0);
   1.696 +    __ jccb(Assembler::equal, rewrite);
   1.697 +
   1.698 +    // if _agetfield then reqrite to _fast_aaccess_0
   1.699 +    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
   1.700 +           Bytecodes::_aload_0,
   1.701 +           "fix bytecode definition");
   1.702 +    __ cmpl(rbx, Bytecodes::_fast_agetfield);
   1.703 +    __ movl(bc, Bytecodes::_fast_aaccess_0);
   1.704 +    __ jccb(Assembler::equal, rewrite);
   1.705 +
   1.706 +    // if _fgetfield then reqrite to _fast_faccess_0
   1.707 +    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
   1.708 +           Bytecodes::_aload_0,
   1.709 +           "fix bytecode definition");
   1.710 +    __ cmpl(rbx, Bytecodes::_fast_fgetfield);
   1.711 +    __ movl(bc, Bytecodes::_fast_faccess_0);
   1.712 +    __ jccb(Assembler::equal, rewrite);
   1.713 +
   1.714 +    // else rewrite to _fast_aload0
   1.715 +    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
   1.716 +           Bytecodes::_aload_0,
   1.717 +           "fix bytecode definition");
   1.718 +    __ movl(bc, Bytecodes::_fast_aload_0);
   1.719 +
   1.720 +    // rewrite
   1.721 +    // bc: fast bytecode
   1.722 +    __ bind(rewrite);
   1.723 +    patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);
   1.724 +
   1.725 +    __ bind(done);
   1.726 +  } else {
   1.727 +    aload(0);
   1.728 +  }
   1.729 +}
   1.730 +
   1.731 +void TemplateTable::istore() {
   1.732 +  transition(itos, vtos);
   1.733 +  locals_index(rbx);
   1.734 +  __ movl(iaddress(rbx), rax);
   1.735 +  __ tag_local(frame::TagValue, rbx);
   1.736 +}
   1.737 +
   1.738 +void TemplateTable::lstore() {
   1.739 +  transition(ltos, vtos);
   1.740 +  locals_index(rbx);
   1.741 +  __ movq(laddress(rbx), rax);
   1.742 +  __ tag_local(frame::TagCategory2, rbx);
   1.743 +}
   1.744 +
   1.745 +void TemplateTable::fstore() {
   1.746 +  transition(ftos, vtos);
   1.747 +  locals_index(rbx);
   1.748 +  __ movflt(faddress(rbx), xmm0);
   1.749 +  __ tag_local(frame::TagValue, rbx);
   1.750 +}
   1.751 +
   1.752 +void TemplateTable::dstore() {
   1.753 +  transition(dtos, vtos);
   1.754 +  locals_index(rbx);
   1.755 +  __ movdbl(daddress(rbx), xmm0);
   1.756 +  __ tag_local(frame::TagCategory2, rbx);
   1.757 +}
   1.758 +
   1.759 +void TemplateTable::astore() {
   1.760 +  transition(vtos, vtos);
   1.761 +  __ pop_ptr(rax, rdx);    // will need to pop tag too
   1.762 +  locals_index(rbx);
   1.763 +  __ movq(aaddress(rbx), rax);
   1.764 +  __ tag_local(rdx, rbx);  // store tag from stack, might be returnAddr
   1.765 +}
   1.766 +
   1.767 +void TemplateTable::wide_istore() {
   1.768 +  transition(vtos, vtos);
   1.769 +  __ pop_i();
   1.770 +  locals_index_wide(rbx);
   1.771 +  __ movl(iaddress(rbx), rax);
   1.772 +  __ tag_local(frame::TagValue, rbx);
   1.773 +}
   1.774 +
   1.775 +void TemplateTable::wide_lstore() {
   1.776 +  transition(vtos, vtos);
   1.777 +  __ pop_l();
   1.778 +  locals_index_wide(rbx);
   1.779 +  __ movq(laddress(rbx), rax);
   1.780 +  __ tag_local(frame::TagCategory2, rbx);
   1.781 +}
   1.782 +
   1.783 +void TemplateTable::wide_fstore() {
   1.784 +  transition(vtos, vtos);
   1.785 +  __ pop_f();
   1.786 +  locals_index_wide(rbx);
   1.787 +  __ movflt(faddress(rbx), xmm0);
   1.788 +  __ tag_local(frame::TagValue, rbx);
   1.789 +}
   1.790 +
   1.791 +void TemplateTable::wide_dstore() {
   1.792 +  transition(vtos, vtos);
   1.793 +  __ pop_d();
   1.794 +  locals_index_wide(rbx);
   1.795 +  __ movdbl(daddress(rbx), xmm0);
   1.796 +  __ tag_local(frame::TagCategory2, rbx);
   1.797 +}
   1.798 +
   1.799 +void TemplateTable::wide_astore() {
   1.800 +  transition(vtos, vtos);
   1.801 +  __ pop_ptr(rax, rdx);    // will need to pop tag too
   1.802 +  locals_index_wide(rbx);
   1.803 +  __ movq(aaddress(rbx), rax);
   1.804 +  __ tag_local(rdx, rbx);  // store tag from stack, might be returnAddr
   1.805 +}
   1.806 +
   1.807 +void TemplateTable::iastore() {
   1.808 +  transition(itos, vtos);
   1.809 +  __ pop_i(rbx);
   1.810 +  __ pop_ptr(rdx);
   1.811 +  // eax: value
   1.812 +  // ebx: index
   1.813 +  // rdx: array
   1.814 +  index_check(rdx, rbx); // prefer index in ebx
   1.815 +  __ movl(Address(rdx, rbx,
   1.816 +                  Address::times_4,
   1.817 +                  arrayOopDesc::base_offset_in_bytes(T_INT)),
   1.818 +          rax);
   1.819 +}
   1.820 +
   1.821 +void TemplateTable::lastore() {
   1.822 +  transition(ltos, vtos);
   1.823 +  __ pop_i(rbx);
   1.824 +  __ pop_ptr(rdx);
   1.825 +  // rax: value
   1.826 +  // ebx: index
   1.827 +  // rdx: array
   1.828 +  index_check(rdx, rbx); // prefer index in ebx
   1.829 +  __ movq(Address(rdx, rbx,
   1.830 +                  Address::times_8,
   1.831 +                  arrayOopDesc::base_offset_in_bytes(T_LONG)),
   1.832 +          rax);
   1.833 +}
   1.834 +
   1.835 +void TemplateTable::fastore() {
   1.836 +  transition(ftos, vtos);
   1.837 +  __ pop_i(rbx);
   1.838 +  __ pop_ptr(rdx);
   1.839 +  // xmm0: value
   1.840 +  // ebx:  index
   1.841 +  // rdx:  array
   1.842 +  index_check(rdx, rbx); // prefer index in ebx
   1.843 +  __ movflt(Address(rdx, rbx,
   1.844 +                   Address::times_4,
   1.845 +                   arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
   1.846 +           xmm0);
   1.847 +}
   1.848 +
   1.849 +void TemplateTable::dastore() {
   1.850 +  transition(dtos, vtos);
   1.851 +  __ pop_i(rbx);
   1.852 +  __ pop_ptr(rdx);
   1.853 +  // xmm0: value
   1.854 +  // ebx:  index
   1.855 +  // rdx:  array
   1.856 +  index_check(rdx, rbx); // prefer index in ebx
   1.857 +  __ movdbl(Address(rdx, rbx,
   1.858 +                   Address::times_8,
   1.859 +                   arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
   1.860 +           xmm0);
   1.861 +}
   1.862 +
   1.863 +void TemplateTable::aastore() {
   1.864 +  Label is_null, ok_is_subtype, done;
   1.865 +  transition(vtos, vtos);
   1.866 +  // stack: ..., array, index, value
   1.867 +  __ movq(rax, at_tos());    // value
   1.868 +  __ movl(rcx, at_tos_p1()); // index
   1.869 +  __ movq(rdx, at_tos_p2()); // array
   1.870 +  index_check(rdx, rcx);     // kills rbx
   1.871 +  // do array store check - check for NULL value first
   1.872 +  __ testq(rax, rax);
   1.873 +  __ jcc(Assembler::zero, is_null);
   1.874 +
   1.875 +  // Move subklass into rbx
   1.876 +  __ movq(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
   1.877 +  // Move superklass into rax
   1.878 +  __ movq(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
   1.879 +  __ movq(rax, Address(rax,
   1.880 +                       sizeof(oopDesc) +
   1.881 +                       objArrayKlass::element_klass_offset_in_bytes()));
   1.882 +  // Compress array + index*8 + 12 into a single register.  Frees rcx.
   1.883 +  __ leaq(rdx, Address(rdx, rcx,
   1.884 +                       Address::times_8,
   1.885 +                       arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.886 +
   1.887 +  // Generate subtype check.  Blows rcx, rdi
   1.888 +  // Superklass in rax.  Subklass in rbx.
   1.889 +  __ gen_subtype_check(rbx, ok_is_subtype);
   1.890 +
   1.891 +  // Come here on failure
   1.892 +  // object is at TOS
   1.893 +  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
   1.894 +
   1.895 +  // Come here on success
   1.896 +  __ bind(ok_is_subtype);
   1.897 +  __ movq(rax, at_tos()); // Value
   1.898 +  __ movq(Address(rdx, 0), rax);
   1.899 +  __ store_check(rdx);
   1.900 +  __ jmp(done);
   1.901 +
   1.902 +  // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
   1.903 +  __ bind(is_null);
   1.904 +  __ profile_null_seen(rbx);
   1.905 +  __ movq(Address(rdx, rcx,
   1.906 +                  Address::times_8,
   1.907 +                  arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
   1.908 +          rax);
   1.909 +
   1.910 +  // Pop stack arguments
   1.911 +  __ bind(done);
   1.912 +  __ addq(rsp, 3 * Interpreter::stackElementSize());
   1.913 +}
   1.914 +
   1.915 +void TemplateTable::bastore() {
   1.916 +  transition(itos, vtos);
   1.917 +  __ pop_i(rbx);
   1.918 +  __ pop_ptr(rdx);
   1.919 +  // eax: value
   1.920 +  // ebx: index
   1.921 +  // rdx: array
   1.922 +  index_check(rdx, rbx); // prefer index in ebx
   1.923 +  __ movb(Address(rdx, rbx,
   1.924 +                  Address::times_1,
   1.925 +                  arrayOopDesc::base_offset_in_bytes(T_BYTE)),
   1.926 +          rax);
   1.927 +}
   1.928 +
   1.929 +void TemplateTable::castore() {
   1.930 +  transition(itos, vtos);
   1.931 +  __ pop_i(rbx);
   1.932 +  __ pop_ptr(rdx);
   1.933 +  // eax: value
   1.934 +  // ebx: index
   1.935 +  // rdx: array
   1.936 +  index_check(rdx, rbx);  // prefer index in ebx
   1.937 +  __ movw(Address(rdx, rbx,
   1.938 +                  Address::times_2,
   1.939 +                  arrayOopDesc::base_offset_in_bytes(T_CHAR)),
   1.940 +          rax);
   1.941 +}
   1.942 +
   1.943 +void TemplateTable::sastore() {
   1.944 +  castore();
   1.945 +}
   1.946 +
   1.947 +void TemplateTable::istore(int n) {
   1.948 +  transition(itos, vtos);
   1.949 +  __ movl(iaddress(n), rax);
   1.950 +  __ tag_local(frame::TagValue, n);
   1.951 +}
   1.952 +
   1.953 +void TemplateTable::lstore(int n) {
   1.954 +  transition(ltos, vtos);
   1.955 +  __ movq(laddress(n), rax);
   1.956 +  __ tag_local(frame::TagCategory2, n);
   1.957 +}
   1.958 +
   1.959 +void TemplateTable::fstore(int n) {
   1.960 +  transition(ftos, vtos);
   1.961 +  __ movflt(faddress(n), xmm0);
   1.962 +  __ tag_local(frame::TagValue, n);
   1.963 +}
   1.964 +
   1.965 +void TemplateTable::dstore(int n) {
   1.966 +  transition(dtos, vtos);
   1.967 +  __ movdbl(daddress(n), xmm0);
   1.968 +  __ tag_local(frame::TagCategory2, n);
   1.969 +}
   1.970 +
   1.971 +void TemplateTable::astore(int n) {
   1.972 +  transition(vtos, vtos);
   1.973 +  __ pop_ptr(rax, rdx);
   1.974 +  __ movq(aaddress(n), rax);
   1.975 +  __ tag_local(rdx, n);
   1.976 +}
   1.977 +
   1.978 +void TemplateTable::pop() {
   1.979 +  transition(vtos, vtos);
   1.980 +  __ addq(rsp, Interpreter::stackElementSize());
   1.981 +}
   1.982 +
   1.983 +void TemplateTable::pop2() {
   1.984 +  transition(vtos, vtos);
   1.985 +  __ addq(rsp, 2 * Interpreter::stackElementSize());
   1.986 +}
   1.987 +
   1.988 +void TemplateTable::dup() {
   1.989 +  transition(vtos, vtos);
   1.990 +  __ load_ptr_and_tag(0, rax, rdx);
   1.991 +  __ push_ptr(rax, rdx);
   1.992 +  // stack: ..., a, a
   1.993 +}
   1.994 +
   1.995 +void TemplateTable::dup_x1() {
   1.996 +  transition(vtos, vtos);
   1.997 +  // stack: ..., a, b
   1.998 +  __ load_ptr_and_tag(0, rax, rdx);  // load b
   1.999 +  __ load_ptr_and_tag(1, rcx, rbx);  // load a
  1.1000 +  __ store_ptr_and_tag(1, rax, rdx); // store b
  1.1001 +  __ store_ptr_and_tag(0, rcx, rbx); // store a
  1.1002 +  __ push_ptr(rax, rdx);             // push b
  1.1003 +  // stack: ..., b, a, b
  1.1004 +}
  1.1005 +
  1.1006 +void TemplateTable::dup_x2() {
  1.1007 +  transition(vtos, vtos);
  1.1008 +  // stack: ..., a, b, c
  1.1009 +  __ load_ptr_and_tag(0, rax, rdx);  // load c
  1.1010 +  __ load_ptr_and_tag(2, rcx, rbx);  // load a
  1.1011 +  __ store_ptr_and_tag(2, rax, rdx); // store c in a
  1.1012 +  __ push_ptr(rax, rdx);             // push c
  1.1013 +  // stack: ..., c, b, c, c
  1.1014 +  __ load_ptr_and_tag(2, rax, rdx);  // load b
  1.1015 +  __ store_ptr_and_tag(2, rcx, rbx); // store a in b
  1.1016 +  // stack: ..., c, a, c, c
  1.1017 +  __ store_ptr_and_tag(1, rax, rdx); // store b in c
  1.1018 +  // stack: ..., c, a, b, c
  1.1019 +}
  1.1020 +
  1.1021 +void TemplateTable::dup2() {
  1.1022 +  transition(vtos, vtos);
  1.1023 +  // stack: ..., a, b
  1.1024 +  __ load_ptr_and_tag(1, rax, rdx);  // load a
  1.1025 +  __ push_ptr(rax, rdx);             // push a
  1.1026 +  __ load_ptr_and_tag(1, rax, rdx);  // load b
  1.1027 +  __ push_ptr(rax, rdx);             // push b
  1.1028 +  // stack: ..., a, b, a, b
  1.1029 +}
  1.1030 +
  1.1031 +void TemplateTable::dup2_x1() {
  1.1032 +  transition(vtos, vtos);
  1.1033 +  // stack: ..., a, b, c
  1.1034 +  __ load_ptr_and_tag(0, rcx, rbx);  // load c
  1.1035 +  __ load_ptr_and_tag(1, rax, rdx);  // load b
  1.1036 +  __ push_ptr(rax, rdx);             // push b
  1.1037 +  __ push_ptr(rcx, rbx);             // push c
  1.1038 +  // stack: ..., a, b, c, b, c
  1.1039 +  __ store_ptr_and_tag(3, rcx, rbx); // store c in b
  1.1040 +  // stack: ..., a, c, c, b, c
  1.1041 +  __ load_ptr_and_tag(4, rcx, rbx);  // load a
  1.1042 +  __ store_ptr_and_tag(2, rcx, rbx); // store a in 2nd c
  1.1043 +  // stack: ..., a, c, a, b, c
  1.1044 +  __ store_ptr_and_tag(4, rax, rdx); // store b in a
  1.1045 +  // stack: ..., b, c, a, b, c
  1.1046 +}
  1.1047 +
  1.1048 +void TemplateTable::dup2_x2() {
  1.1049 +  transition(vtos, vtos);
  1.1050 +  // stack: ..., a, b, c, d
  1.1051 +  __ load_ptr_and_tag(0, rcx, rbx);  // load d
  1.1052 +  __ load_ptr_and_tag(1, rax, rdx);  // load c
  1.1053 +  __ push_ptr(rax, rdx);             // push c
  1.1054 +  __ push_ptr(rcx, rbx);             // push d
  1.1055 +  // stack: ..., a, b, c, d, c, d
  1.1056 +  __ load_ptr_and_tag(4, rax, rdx);  // load b
  1.1057 +  __ store_ptr_and_tag(2, rax, rdx); // store b in d
  1.1058 +  __ store_ptr_and_tag(4, rcx, rbx); // store d in b
  1.1059 +  // stack: ..., a, d, c, b, c, d
  1.1060 +  __ load_ptr_and_tag(5, rcx, rbx);  // load a
  1.1061 +  __ load_ptr_and_tag(3, rax, rdx);  // load c
  1.1062 +  __ store_ptr_and_tag(3, rcx, rbx); // store a in c
  1.1063 +  __ store_ptr_and_tag(5, rax, rdx); // store c in a
  1.1064 +  // stack: ..., c, d, a, b, c, d
  1.1065 +}
  1.1066 +
  1.1067 +void TemplateTable::swap() {
  1.1068 +  transition(vtos, vtos);
  1.1069 +  // stack: ..., a, b
  1.1070 +  __ load_ptr_and_tag(1, rcx, rbx);  // load a
  1.1071 +  __ load_ptr_and_tag(0, rax, rdx);  // load b
  1.1072 +  __ store_ptr_and_tag(0, rcx, rbx); // store a in b
  1.1073 +  __ store_ptr_and_tag(1, rax, rdx); // store b in a
  1.1074 +  // stack: ..., b, a
  1.1075 +}
  1.1076 +
  1.1077 +void TemplateTable::iop2(Operation op) {
  1.1078 +  transition(itos, itos);
  1.1079 +  switch (op) {
  1.1080 +  case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
  1.1081 +  case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
  1.1082 +  case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
  1.1083 +  case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
  1.1084 +  case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
  1.1085 +  case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
  1.1086 +  case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
  1.1087 +  case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
  1.1088 +  case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
  1.1089 +  default   : ShouldNotReachHere();
  1.1090 +  }
  1.1091 +}
  1.1092 +
  1.1093 +void TemplateTable::lop2(Operation op) {
  1.1094 +  transition(ltos, ltos);
  1.1095 +  switch (op) {
  1.1096 +  case add  :                    __ pop_l(rdx); __ addq (rax, rdx); break;
  1.1097 +  case sub  : __ movq(rdx, rax); __ pop_l(rax); __ subq (rax, rdx); break;
  1.1098 +  case _and :                    __ pop_l(rdx); __ andq (rax, rdx); break;
  1.1099 +  case _or  :                    __ pop_l(rdx); __ orq  (rax, rdx); break;
  1.1100 +  case _xor :                    __ pop_l(rdx); __ xorq (rax, rdx); break;
  1.1101 +  default : ShouldNotReachHere();
  1.1102 +  }
  1.1103 +}
  1.1104 +
  1.1105 +void TemplateTable::idiv() {
  1.1106 +  transition(itos, itos);
  1.1107 +  __ movl(rcx, rax);
  1.1108 +  __ pop_i(rax);
  1.1109 +  // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
  1.1110 +  //       they are not equal, one could do a normal division (no correction
  1.1111 +  //       needed), which may speed up this implementation for the common case.
  1.1112 +  //       (see also JVM spec., p.243 & p.271)
  1.1113 +  __ corrected_idivl(rcx);
  1.1114 +}
  1.1115 +
  1.1116 +void TemplateTable::irem() {
  1.1117 +  transition(itos, itos);
  1.1118 +  __ movl(rcx, rax);
  1.1119 +  __ pop_i(rax);
  1.1120 +  // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
  1.1121 +  //       they are not equal, one could do a normal division (no correction
  1.1122 +  //       needed), which may speed up this implementation for the common case.
  1.1123 +  //       (see also JVM spec., p.243 & p.271)
  1.1124 +  __ corrected_idivl(rcx);
  1.1125 +  __ movl(rax, rdx);
  1.1126 +}
  1.1127 +
  1.1128 +void TemplateTable::lmul() {
  1.1129 +  transition(ltos, ltos);
  1.1130 +  __ pop_l(rdx);
  1.1131 +  __ imulq(rax, rdx);
  1.1132 +}
  1.1133 +
  1.1134 +void TemplateTable::ldiv() {
  1.1135 +  transition(ltos, ltos);
  1.1136 +  __ movq(rcx, rax);
  1.1137 +  __ pop_l(rax);
  1.1138 +  // generate explicit div0 check
  1.1139 +  __ testq(rcx, rcx);
  1.1140 +  __ jump_cc(Assembler::zero,
  1.1141 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1142 +  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
  1.1143 +  //       they are not equal, one could do a normal division (no correction
  1.1144 +  //       needed), which may speed up this implementation for the common case.
  1.1145 +  //       (see also JVM spec., p.243 & p.271)
  1.1146 +  __ corrected_idivq(rcx); // kills rbx
  1.1147 +}
  1.1148 +
  1.1149 +void TemplateTable::lrem() {
  1.1150 +  transition(ltos, ltos);
  1.1151 +  __ movq(rcx, rax);
  1.1152 +  __ pop_l(rax);
  1.1153 +  __ testq(rcx, rcx);
  1.1154 +  __ jump_cc(Assembler::zero,
  1.1155 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1156 +  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
  1.1157 +  //       they are not equal, one could do a normal division (no correction
  1.1158 +  //       needed), which may speed up this implementation for the common case.
  1.1159 +  //       (see also JVM spec., p.243 & p.271)
  1.1160 +  __ corrected_idivq(rcx); // kills rbx
  1.1161 +  __ movq(rax, rdx);
  1.1162 +}
  1.1163 +
  1.1164 +void TemplateTable::lshl() {
  1.1165 +  transition(itos, ltos);
  1.1166 +  __ movl(rcx, rax);                             // get shift count
  1.1167 +  __ pop_l(rax);                                 // get shift value
  1.1168 +  __ shlq(rax);
  1.1169 +}
  1.1170 +
  1.1171 +void TemplateTable::lshr() {
  1.1172 +  transition(itos, ltos);
  1.1173 +  __ movl(rcx, rax);                             // get shift count
  1.1174 +  __ pop_l(rax);                                 // get shift value
  1.1175 +  __ sarq(rax);
  1.1176 +}
  1.1177 +
  1.1178 +void TemplateTable::lushr() {
  1.1179 +  transition(itos, ltos);
  1.1180 +  __ movl(rcx, rax);                             // get shift count
  1.1181 +  __ pop_l(rax);                                 // get shift value
  1.1182 +  __ shrq(rax);
  1.1183 +}
  1.1184 +
  1.1185 +void TemplateTable::fop2(Operation op) {
  1.1186 +  transition(ftos, ftos);
  1.1187 +  switch (op) {
  1.1188 +  case add:
  1.1189 +    __ addss(xmm0, at_rsp());
  1.1190 +    __ addq(rsp, Interpreter::stackElementSize());
  1.1191 +    break;
  1.1192 +  case sub:
  1.1193 +    __ movflt(xmm1, xmm0);
  1.1194 +    __ pop_f(xmm0);
  1.1195 +    __ subss(xmm0, xmm1);
  1.1196 +    break;
  1.1197 +  case mul:
  1.1198 +    __ mulss(xmm0, at_rsp());
  1.1199 +    __ addq(rsp, Interpreter::stackElementSize());
  1.1200 +    break;
  1.1201 +  case div:
  1.1202 +    __ movflt(xmm1, xmm0);
  1.1203 +    __ pop_f(xmm0);
  1.1204 +    __ divss(xmm0, xmm1);
  1.1205 +    break;
  1.1206 +  case rem:
  1.1207 +    __ movflt(xmm1, xmm0);
  1.1208 +    __ pop_f(xmm0);
  1.1209 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1.1210 +    break;
  1.1211 +  default:
  1.1212 +    ShouldNotReachHere();
  1.1213 +    break;
  1.1214 +  }
  1.1215 +}
  1.1216 +
  1.1217 +void TemplateTable::dop2(Operation op) {
  1.1218 +  transition(dtos, dtos);
  1.1219 +  switch (op) {
  1.1220 +  case add:
  1.1221 +    __ addsd(xmm0, at_rsp());
  1.1222 +    __ addq(rsp, 2 * Interpreter::stackElementSize());
  1.1223 +    break;
  1.1224 +  case sub:
  1.1225 +    __ movdbl(xmm1, xmm0);
  1.1226 +    __ pop_d(xmm0);
  1.1227 +    __ subsd(xmm0, xmm1);
  1.1228 +    break;
  1.1229 +  case mul:
  1.1230 +    __ mulsd(xmm0, at_rsp());
  1.1231 +    __ addq(rsp, 2 * Interpreter::stackElementSize());
  1.1232 +    break;
  1.1233 +  case div:
  1.1234 +    __ movdbl(xmm1, xmm0);
  1.1235 +    __ pop_d(xmm0);
  1.1236 +    __ divsd(xmm0, xmm1);
  1.1237 +    break;
  1.1238 +  case rem:
  1.1239 +    __ movdbl(xmm1, xmm0);
  1.1240 +    __ pop_d(xmm0);
  1.1241 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1.1242 +    break;
  1.1243 +  default:
  1.1244 +    ShouldNotReachHere();
  1.1245 +    break;
  1.1246 +  }
  1.1247 +}
  1.1248 +
  1.1249 +void TemplateTable::ineg() {
  1.1250 +  transition(itos, itos);
  1.1251 +  __ negl(rax);
  1.1252 +}
  1.1253 +
  1.1254 +void TemplateTable::lneg() {
  1.1255 +  transition(ltos, ltos);
  1.1256 +  __ negq(rax);
  1.1257 +}
  1.1258 +
  1.1259 +// Note: 'double' and 'long long' have 32-bits alignment on x86.
  1.1260 +static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  1.1261 +  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  1.1262 +  // of 128-bits operands for SSE instructions.
  1.1263 +  jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
  1.1264 +  // Store the value to a 128-bits operand.
  1.1265 +  operand[0] = lo;
  1.1266 +  operand[1] = hi;
  1.1267 +  return operand;
  1.1268 +}
  1.1269 +
  1.1270 +// Buffer for 128-bits masks used by SSE instructions.
  1.1271 +static jlong float_signflip_pool[2*2];
  1.1272 +static jlong double_signflip_pool[2*2];
  1.1273 +
  1.1274 +void TemplateTable::fneg() {
  1.1275 +  transition(ftos, ftos);
  1.1276 +  static jlong *float_signflip  = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
  1.1277 +  __ xorps(xmm0, ExternalAddress((address) float_signflip));
  1.1278 +}
  1.1279 +
  1.1280 +void TemplateTable::dneg() {
  1.1281 +  transition(dtos, dtos);
  1.1282 +  static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
  1.1283 +  __ xorpd(xmm0, ExternalAddress((address) double_signflip));
  1.1284 +}
  1.1285 +
  1.1286 +void TemplateTable::iinc() {
  1.1287 +  transition(vtos, vtos);
  1.1288 +  __ load_signed_byte(rdx, at_bcp(2)); // get constant
  1.1289 +  locals_index(rbx);
  1.1290 +  __ addl(iaddress(rbx), rdx);
  1.1291 +}
  1.1292 +
  1.1293 +void TemplateTable::wide_iinc() {
  1.1294 +  transition(vtos, vtos);
  1.1295 +  __ movl(rdx, at_bcp(4)); // get constant
  1.1296 +  locals_index_wide(rbx);
  1.1297 +  __ bswapl(rdx); // swap bytes & sign-extend constant
  1.1298 +  __ sarl(rdx, 16);
  1.1299 +  __ addl(iaddress(rbx), rdx);
  1.1300 +  // Note: should probably use only one movl to get both
  1.1301 +  //       the index and the constant -> fix this
  1.1302 +}
  1.1303 +
  1.1304 +void TemplateTable::convert() {
  1.1305 +  // Checking
  1.1306 +#ifdef ASSERT
  1.1307 +  {
  1.1308 +    TosState tos_in  = ilgl;
  1.1309 +    TosState tos_out = ilgl;
  1.1310 +    switch (bytecode()) {
  1.1311 +    case Bytecodes::_i2l: // fall through
  1.1312 +    case Bytecodes::_i2f: // fall through
  1.1313 +    case Bytecodes::_i2d: // fall through
  1.1314 +    case Bytecodes::_i2b: // fall through
  1.1315 +    case Bytecodes::_i2c: // fall through
  1.1316 +    case Bytecodes::_i2s: tos_in = itos; break;
  1.1317 +    case Bytecodes::_l2i: // fall through
  1.1318 +    case Bytecodes::_l2f: // fall through
  1.1319 +    case Bytecodes::_l2d: tos_in = ltos; break;
  1.1320 +    case Bytecodes::_f2i: // fall through
  1.1321 +    case Bytecodes::_f2l: // fall through
  1.1322 +    case Bytecodes::_f2d: tos_in = ftos; break;
  1.1323 +    case Bytecodes::_d2i: // fall through
  1.1324 +    case Bytecodes::_d2l: // fall through
  1.1325 +    case Bytecodes::_d2f: tos_in = dtos; break;
  1.1326 +    default             : ShouldNotReachHere();
  1.1327 +    }
  1.1328 +    switch (bytecode()) {
  1.1329 +    case Bytecodes::_l2i: // fall through
  1.1330 +    case Bytecodes::_f2i: // fall through
  1.1331 +    case Bytecodes::_d2i: // fall through
  1.1332 +    case Bytecodes::_i2b: // fall through
  1.1333 +    case Bytecodes::_i2c: // fall through
  1.1334 +    case Bytecodes::_i2s: tos_out = itos; break;
  1.1335 +    case Bytecodes::_i2l: // fall through
  1.1336 +    case Bytecodes::_f2l: // fall through
  1.1337 +    case Bytecodes::_d2l: tos_out = ltos; break;
  1.1338 +    case Bytecodes::_i2f: // fall through
  1.1339 +    case Bytecodes::_l2f: // fall through
  1.1340 +    case Bytecodes::_d2f: tos_out = ftos; break;
  1.1341 +    case Bytecodes::_i2d: // fall through
  1.1342 +    case Bytecodes::_l2d: // fall through
  1.1343 +    case Bytecodes::_f2d: tos_out = dtos; break;
  1.1344 +    default             : ShouldNotReachHere();
  1.1345 +    }
  1.1346 +    transition(tos_in, tos_out);
  1.1347 +  }
  1.1348 +#endif // ASSERT
  1.1349 +
  1.1350 +  static const int64_t is_nan = 0x8000000000000000L;
  1.1351 +
  1.1352 +  // Conversion
  1.1353 +  switch (bytecode()) {
  1.1354 +  case Bytecodes::_i2l:
  1.1355 +    __ movslq(rax, rax);
  1.1356 +    break;
  1.1357 +  case Bytecodes::_i2f:
  1.1358 +    __ cvtsi2ssl(xmm0, rax);
  1.1359 +    break;
  1.1360 +  case Bytecodes::_i2d:
  1.1361 +    __ cvtsi2sdl(xmm0, rax);
  1.1362 +    break;
  1.1363 +  case Bytecodes::_i2b:
  1.1364 +    __ movsbl(rax, rax);
  1.1365 +    break;
  1.1366 +  case Bytecodes::_i2c:
  1.1367 +    __ movzwl(rax, rax);
  1.1368 +    break;
  1.1369 +  case Bytecodes::_i2s:
  1.1370 +    __ movswl(rax, rax);
  1.1371 +    break;
  1.1372 +  case Bytecodes::_l2i:
  1.1373 +    __ movl(rax, rax);
  1.1374 +    break;
  1.1375 +  case Bytecodes::_l2f:
  1.1376 +    __ cvtsi2ssq(xmm0, rax);
  1.1377 +    break;
  1.1378 +  case Bytecodes::_l2d:
  1.1379 +    __ cvtsi2sdq(xmm0, rax);
  1.1380 +    break;
  1.1381 +  case Bytecodes::_f2i:
  1.1382 +  {
  1.1383 +    Label L;
  1.1384 +    __ cvttss2sil(rax, xmm0);
  1.1385 +    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
  1.1386 +    __ jcc(Assembler::notEqual, L);
  1.1387 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
  1.1388 +    __ bind(L);
  1.1389 +  }
  1.1390 +    break;
  1.1391 +  case Bytecodes::_f2l:
  1.1392 +  {
  1.1393 +    Label L;
  1.1394 +    __ cvttss2siq(rax, xmm0);
  1.1395 +    // NaN or overflow/underflow?
  1.1396 +    __ cmp64(rax, ExternalAddress((address) &is_nan));
  1.1397 +    __ jcc(Assembler::notEqual, L);
  1.1398 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
  1.1399 +    __ bind(L);
  1.1400 +  }
  1.1401 +    break;
  1.1402 +  case Bytecodes::_f2d:
  1.1403 +    __ cvtss2sd(xmm0, xmm0);
  1.1404 +    break;
  1.1405 +  case Bytecodes::_d2i:
  1.1406 +  {
  1.1407 +    Label L;
  1.1408 +    __ cvttsd2sil(rax, xmm0);
  1.1409 +    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
  1.1410 +    __ jcc(Assembler::notEqual, L);
  1.1411 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
  1.1412 +    __ bind(L);
  1.1413 +  }
  1.1414 +    break;
  1.1415 +  case Bytecodes::_d2l:
  1.1416 +  {
  1.1417 +    Label L;
  1.1418 +    __ cvttsd2siq(rax, xmm0);
  1.1419 +    // NaN or overflow/underflow?
  1.1420 +    __ cmp64(rax, ExternalAddress((address) &is_nan));
  1.1421 +    __ jcc(Assembler::notEqual, L);
  1.1422 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
  1.1423 +    __ bind(L);
  1.1424 +  }
  1.1425 +    break;
  1.1426 +  case Bytecodes::_d2f:
  1.1427 +    __ cvtsd2ss(xmm0, xmm0);
  1.1428 +    break;
  1.1429 +  default:
  1.1430 +    ShouldNotReachHere();
  1.1431 +  }
  1.1432 +}
  1.1433 +
  1.1434 +void TemplateTable::lcmp() {
  1.1435 +  transition(ltos, itos);
  1.1436 +  Label done;
  1.1437 +  __ pop_l(rdx);
  1.1438 +  __ cmpq(rdx, rax);
  1.1439 +  __ movl(rax, -1);
  1.1440 +  __ jccb(Assembler::less, done);
  1.1441 +  __ setb(Assembler::notEqual, rax);
  1.1442 +  __ movzbl(rax, rax);
  1.1443 +  __ bind(done);
  1.1444 +}
  1.1445 +
  1.1446 +void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1.1447 +  Label done;
  1.1448 +  if (is_float) {
  1.1449 +    // XXX get rid of pop here, use ... reg, mem32
  1.1450 +    __ pop_f(xmm1);
  1.1451 +    __ ucomiss(xmm1, xmm0);
  1.1452 +  } else {
  1.1453 +    // XXX get rid of pop here, use ... reg, mem64
  1.1454 +    __ pop_d(xmm1);
  1.1455 +    __ ucomisd(xmm1, xmm0);
  1.1456 +  }
  1.1457 +  if (unordered_result < 0) {
  1.1458 +    __ movl(rax, -1);
  1.1459 +    __ jccb(Assembler::parity, done);
  1.1460 +    __ jccb(Assembler::below, done);
  1.1461 +    __ setb(Assembler::notEqual, rdx);
  1.1462 +    __ movzbl(rax, rdx);
  1.1463 +  } else {
  1.1464 +    __ movl(rax, 1);
  1.1465 +    __ jccb(Assembler::parity, done);
  1.1466 +    __ jccb(Assembler::above, done);
  1.1467 +    __ movl(rax, 0);
  1.1468 +    __ jccb(Assembler::equal, done);
  1.1469 +    __ decrementl(rax);
  1.1470 +  }
  1.1471 +  __ bind(done);
  1.1472 +}
  1.1473 +
  1.1474 +void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1.1475 +  __ get_method(rcx); // rcx holds method
  1.1476 +  __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
  1.1477 +                                     // holds bumped taken count
  1.1478 +
  1.1479 +  const ByteSize be_offset = methodOopDesc::backedge_counter_offset() +
  1.1480 +                             InvocationCounter::counter_offset();
  1.1481 +  const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() +
  1.1482 +                              InvocationCounter::counter_offset();
  1.1483 +  const int method_offset = frame::interpreter_frame_method_offset * wordSize;
  1.1484 +
  1.1485 +  // Load up edx with the branch displacement
  1.1486 +  __ movl(rdx, at_bcp(1));
  1.1487 +  __ bswapl(rdx);
  1.1488 +
  1.1489 +  if (!is_wide) {
  1.1490 +    __ sarl(rdx, 16);
  1.1491 +  }
  1.1492 +  __ movslq(rdx, rdx);
  1.1493 +
  1.1494 +  // Handle all the JSR stuff here, then exit.
  1.1495 +  // It's much shorter and cleaner than intermingling with the non-JSR
  1.1496 +  // normal-branch stuff occuring below.
  1.1497 +  if (is_jsr) {
  1.1498 +    // Pre-load the next target bytecode into rbx
  1.1499 +    __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1, 0));
  1.1500 +
  1.1501 +    // compute return address as bci in rax
  1.1502 +    __ leaq(rax, at_bcp((is_wide ? 5 : 3) -
  1.1503 +                        in_bytes(constMethodOopDesc::codes_offset())));
  1.1504 +    __ subq(rax, Address(rcx, methodOopDesc::const_offset()));
  1.1505 +    // Adjust the bcp in r13 by the displacement in rdx
  1.1506 +    __ addq(r13, rdx);
  1.1507 +    // jsr returns atos that is not an oop
  1.1508 +    __ push_i(rax);
  1.1509 +    __ dispatch_only(vtos);
  1.1510 +    return;
  1.1511 +  }
  1.1512 +
  1.1513 +  // Normal (non-jsr) branch handling
  1.1514 +
  1.1515 +  // Adjust the bcp in r13 by the displacement in rdx
  1.1516 +  __ addq(r13, rdx);
  1.1517 +
  1.1518 +  assert(UseLoopCounter || !UseOnStackReplacement,
  1.1519 +         "on-stack-replacement requires loop counters");
  1.1520 +  Label backedge_counter_overflow;
  1.1521 +  Label profile_method;
  1.1522 +  Label dispatch;
  1.1523 +  if (UseLoopCounter) {
  1.1524 +    // increment backedge counter for backward branches
  1.1525 +    // rax: MDO
  1.1526 +    // ebx: MDO bumped taken-count
  1.1527 +    // rcx: method
  1.1528 +    // rdx: target offset
  1.1529 +    // r13: target bcp
  1.1530 +    // r14: locals pointer
  1.1531 +    __ testl(rdx, rdx);             // check if forward or backward branch
  1.1532 +    __ jcc(Assembler::positive, dispatch); // count only if backward branch
  1.1533 +
  1.1534 +    // increment counter
  1.1535 +    __ movl(rax, Address(rcx, be_offset));        // load backedge counter
  1.1536 +    __ incrementl(rax, InvocationCounter::count_increment); // increment
  1.1537 +                                                            // counter
  1.1538 +    __ movl(Address(rcx, be_offset), rax);        // store counter
  1.1539 +
  1.1540 +    __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
  1.1541 +    __ andl(rax, InvocationCounter::count_mask_value); // and the status bits
  1.1542 +    __ addl(rax, Address(rcx, be_offset));        // add both counters
  1.1543 +
  1.1544 +    if (ProfileInterpreter) {
  1.1545 +      // Test to see if we should create a method data oop
  1.1546 +      __ cmp32(rax,
  1.1547 +               ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
  1.1548 +      __ jcc(Assembler::less, dispatch);
  1.1549 +
  1.1550 +      // if no method data exists, go to profile method
  1.1551 +      __ test_method_data_pointer(rax, profile_method);
  1.1552 +
  1.1553 +      if (UseOnStackReplacement) {
  1.1554 +        // check for overflow against ebx which is the MDO taken count
  1.1555 +        __ cmp32(rbx,
  1.1556 +                 ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1557 +        __ jcc(Assembler::below, dispatch);
  1.1558 +
  1.1559 +        // When ProfileInterpreter is on, the backedge_count comes
  1.1560 +        // from the methodDataOop, which value does not get reset on
  1.1561 +        // the call to frequency_counter_overflow().  To avoid
  1.1562 +        // excessive calls to the overflow routine while the method is
  1.1563 +        // being compiled, add a second test to make sure the overflow
  1.1564 +        // function is called only once every overflow_frequency.
  1.1565 +        const int overflow_frequency = 1024;
  1.1566 +        __ andl(rbx, overflow_frequency - 1);
  1.1567 +        __ jcc(Assembler::zero, backedge_counter_overflow);
  1.1568 +
  1.1569 +      }
  1.1570 +    } else {
  1.1571 +      if (UseOnStackReplacement) {
  1.1572 +        // check for overflow against eax, which is the sum of the
  1.1573 +        // counters
  1.1574 +        __ cmp32(rax,
  1.1575 +                 ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1576 +        __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
  1.1577 +
  1.1578 +      }
  1.1579 +    }
  1.1580 +    __ bind(dispatch);
  1.1581 +  }
  1.1582 +
  1.1583 +  // Pre-load the next target bytecode into rbx
  1.1584 +  __ load_unsigned_byte(rbx, Address(r13, 0));
  1.1585 +
  1.1586 +  // continue with the bytecode @ target
  1.1587 +  // eax: return bci for jsr's, unused otherwise
  1.1588 +  // ebx: target bytecode
  1.1589 +  // r13: target bcp
  1.1590 +  __ dispatch_only(vtos);
  1.1591 +
  1.1592 +  if (UseLoopCounter) {
  1.1593 +    if (ProfileInterpreter) {
  1.1594 +      // Out-of-line code to allocate method data oop.
  1.1595 +      __ bind(profile_method);
  1.1596 +      __ call_VM(noreg,
  1.1597 +                 CAST_FROM_FN_PTR(address,
  1.1598 +                                  InterpreterRuntime::profile_method), r13);
  1.1599 +      __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
  1.1600 +      __ movq(rcx, Address(rbp, method_offset));
  1.1601 +      __ movq(rcx, Address(rcx,
  1.1602 +                           in_bytes(methodOopDesc::method_data_offset())));
  1.1603 +      __ movq(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize),
  1.1604 +              rcx);
  1.1605 +      __ test_method_data_pointer(rcx, dispatch);
  1.1606 +      // offset non-null mdp by MDO::data_offset() + IR::profile_method()
  1.1607 +      __ addq(rcx, in_bytes(methodDataOopDesc::data_offset()));
  1.1608 +      __ addq(rcx, rax);
  1.1609 +      __ movq(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize),
  1.1610 +              rcx);
  1.1611 +      __ jmp(dispatch);
  1.1612 +    }
  1.1613 +
  1.1614 +    if (UseOnStackReplacement) {
  1.1615 +      // invocation counter overflow
  1.1616 +      __ bind(backedge_counter_overflow);
  1.1617 +      __ negq(rdx);
  1.1618 +      __ addq(rdx, r13); // branch bcp
  1.1619 +      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
  1.1620 +      __ call_VM(noreg,
  1.1621 +                 CAST_FROM_FN_PTR(address,
  1.1622 +                                  InterpreterRuntime::frequency_counter_overflow),
  1.1623 +                 rdx);
  1.1624 +      __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
  1.1625 +
  1.1626 +      // rax: osr nmethod (osr ok) or NULL (osr not possible)
  1.1627 +      // ebx: target bytecode
  1.1628 +      // rdx: scratch
  1.1629 +      // r14: locals pointer
  1.1630 +      // r13: bcp
  1.1631 +      __ testq(rax, rax);                        // test result
  1.1632 +      __ jcc(Assembler::zero, dispatch);         // no osr if null
  1.1633 +      // nmethod may have been invalidated (VM may block upon call_VM return)
  1.1634 +      __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
  1.1635 +      __ cmpl(rcx, InvalidOSREntryBci);
  1.1636 +      __ jcc(Assembler::equal, dispatch);
  1.1637 +
  1.1638 +      // We have the address of an on stack replacement routine in eax
  1.1639 +      // We need to prepare to execute the OSR method. First we must
  1.1640 +      // migrate the locals and monitors off of the stack.
  1.1641 +
  1.1642 +      __ movq(r13, rax);                             // save the nmethod
  1.1643 +
  1.1644 +      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
  1.1645 +
  1.1646 +      // eax is OSR buffer, move it to expected parameter location
  1.1647 +      __ movq(j_rarg0, rax);
  1.1648 +
  1.1649 +      // We use j_rarg definitions here so that registers don't conflict as parameter
  1.1650 +      // registers change across platforms as we are in the midst of a calling
  1.1651 +      // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
  1.1652 +
  1.1653 +      const Register retaddr = j_rarg2;
  1.1654 +      const Register sender_sp = j_rarg1;
  1.1655 +
  1.1656 +      // pop the interpreter frame
  1.1657 +      __ movq(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
  1.1658 +      __ leave();                                // remove frame anchor
  1.1659 +      __ popq(retaddr);                            // get return address
  1.1660 +      __ movq(rsp, sender_sp);                   // set sp to sender sp
  1.1661 +      // Ensure compiled code always sees stack at proper alignment
  1.1662 +      __ andq(rsp, -(StackAlignmentInBytes));
  1.1663 +
  1.1664 +      // unlike x86 we need no specialized return from compiled code
  1.1665 +      // to the interpreter or the call stub.
  1.1666 +
  1.1667 +      // push the return address
  1.1668 +      __ pushq(retaddr);
  1.1669 +
  1.1670 +      // and begin the OSR nmethod
  1.1671 +      __ jmp(Address(r13, nmethod::osr_entry_point_offset()));
  1.1672 +    }
  1.1673 +  }
  1.1674 +}
  1.1675 +
  1.1676 +
  1.1677 +void TemplateTable::if_0cmp(Condition cc) {
  1.1678 +  transition(itos, vtos);
  1.1679 +  // assume branch is more often taken than not (loops use backward branches)
  1.1680 +  Label not_taken;
  1.1681 +  __ testl(rax, rax);
  1.1682 +  __ jcc(j_not(cc), not_taken);
  1.1683 +  branch(false, false);
  1.1684 +  __ bind(not_taken);
  1.1685 +  __ profile_not_taken_branch(rax);
  1.1686 +}
  1.1687 +
  1.1688 +void TemplateTable::if_icmp(Condition cc) {
  1.1689 +  transition(itos, vtos);
  1.1690 +  // assume branch is more often taken than not (loops use backward branches)
  1.1691 +  Label not_taken;
  1.1692 +  __ pop_i(rdx);
  1.1693 +  __ cmpl(rdx, rax);
  1.1694 +  __ jcc(j_not(cc), not_taken);
  1.1695 +  branch(false, false);
  1.1696 +  __ bind(not_taken);
  1.1697 +  __ profile_not_taken_branch(rax);
  1.1698 +}
  1.1699 +
  1.1700 +void TemplateTable::if_nullcmp(Condition cc) {
  1.1701 +  transition(atos, vtos);
  1.1702 +  // assume branch is more often taken than not (loops use backward branches)
  1.1703 +  Label not_taken;
  1.1704 +  __ testq(rax, rax);
  1.1705 +  __ jcc(j_not(cc), not_taken);
  1.1706 +  branch(false, false);
  1.1707 +  __ bind(not_taken);
  1.1708 +  __ profile_not_taken_branch(rax);
  1.1709 +}
  1.1710 +
  1.1711 +void TemplateTable::if_acmp(Condition cc) {
  1.1712 +  transition(atos, vtos);
  1.1713 +  // assume branch is more often taken than not (loops use backward branches)
  1.1714 +  Label not_taken;
  1.1715 +  __ pop_ptr(rdx);
  1.1716 +  __ cmpq(rdx, rax);
  1.1717 +  __ jcc(j_not(cc), not_taken);
  1.1718 +  branch(false, false);
  1.1719 +  __ bind(not_taken);
  1.1720 +  __ profile_not_taken_branch(rax);
  1.1721 +}
  1.1722 +
  1.1723 +void TemplateTable::ret() {
  1.1724 +  transition(vtos, vtos);
  1.1725 +  locals_index(rbx);
  1.1726 +  __ movq(rbx, aaddress(rbx)); // get return bci, compute return bcp
  1.1727 +  __ profile_ret(rbx, rcx);
  1.1728 +  __ get_method(rax);
  1.1729 +  __ movq(r13, Address(rax, methodOopDesc::const_offset()));
  1.1730 +  __ leaq(r13, Address(r13, rbx, Address::times_1,
  1.1731 +                       constMethodOopDesc::codes_offset()));
  1.1732 +  __ dispatch_next(vtos);
  1.1733 +}
  1.1734 +
  1.1735 +void TemplateTable::wide_ret() {
  1.1736 +  transition(vtos, vtos);
  1.1737 +  locals_index_wide(rbx);
  1.1738 +  __ movq(rbx, aaddress(rbx)); // get return bci, compute return bcp
  1.1739 +  __ profile_ret(rbx, rcx);
  1.1740 +  __ get_method(rax);
  1.1741 +  __ movq(r13, Address(rax, methodOopDesc::const_offset()));
  1.1742 +  __ leaq(r13, Address(r13, rbx, Address::times_1, constMethodOopDesc::codes_offset()));
  1.1743 +  __ dispatch_next(vtos);
  1.1744 +}
  1.1745 +
  1.1746 +void TemplateTable::tableswitch() {
  1.1747 +  Label default_case, continue_execution;
  1.1748 +  transition(itos, vtos);
  1.1749 +  // align r13
  1.1750 +  __ leaq(rbx, at_bcp(BytesPerInt));
  1.1751 +  __ andq(rbx, -BytesPerInt);
  1.1752 +  // load lo & hi
  1.1753 +  __ movl(rcx, Address(rbx, BytesPerInt));
  1.1754 +  __ movl(rdx, Address(rbx, 2 * BytesPerInt));
  1.1755 +  __ bswapl(rcx);
  1.1756 +  __ bswapl(rdx);
  1.1757 +  // check against lo & hi
  1.1758 +  __ cmpl(rax, rcx);
  1.1759 +  __ jcc(Assembler::less, default_case);
  1.1760 +  __ cmpl(rax, rdx);
  1.1761 +  __ jcc(Assembler::greater, default_case);
  1.1762 +  // lookup dispatch offset
  1.1763 +  __ subl(rax, rcx);
  1.1764 +  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
  1.1765 +  __ profile_switch_case(rax, rbx, rcx);
  1.1766 +  // continue execution
  1.1767 +  __ bind(continue_execution);
  1.1768 +  __ bswapl(rdx);
  1.1769 +  __ movslq(rdx, rdx);
  1.1770 +  __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
  1.1771 +  __ addq(r13, rdx);
  1.1772 +  __ dispatch_only(vtos);
  1.1773 +  // handle default
  1.1774 +  __ bind(default_case);
  1.1775 +  __ profile_switch_default(rax);
  1.1776 +  __ movl(rdx, Address(rbx, 0));
  1.1777 +  __ jmp(continue_execution);
  1.1778 +}
  1.1779 +
  1.1780 +void TemplateTable::lookupswitch() {
  1.1781 +  transition(itos, itos);
  1.1782 +  __ stop("lookupswitch bytecode should have been rewritten");
  1.1783 +}
  1.1784 +
  1.1785 +void TemplateTable::fast_linearswitch() {
  1.1786 +  transition(itos, vtos);
  1.1787 +  Label loop_entry, loop, found, continue_execution;
  1.1788 +  // bswap rax so we can avoid bswapping the table entries
  1.1789 +  __ bswapl(rax);
  1.1790 +  // align r13
  1.1791 +  __ leaq(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
  1.1792 +                                     // this instruction (change offsets
  1.1793 +                                     // below)
  1.1794 +  __ andq(rbx, -BytesPerInt);
  1.1795 +  // set counter
  1.1796 +  __ movl(rcx, Address(rbx, BytesPerInt));
  1.1797 +  __ bswapl(rcx);
  1.1798 +  __ jmpb(loop_entry);
  1.1799 +  // table search
  1.1800 +  __ bind(loop);
  1.1801 +  __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
  1.1802 +  __ jcc(Assembler::equal, found);
  1.1803 +  __ bind(loop_entry);
  1.1804 +  __ decrementl(rcx);
  1.1805 +  __ jcc(Assembler::greaterEqual, loop);
  1.1806 +  // default case
  1.1807 +  __ profile_switch_default(rax);
  1.1808 +  __ movl(rdx, Address(rbx, 0));
  1.1809 +  __ jmp(continue_execution);
  1.1810 +  // entry found -> get offset
  1.1811 +  __ bind(found);
  1.1812 +  __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
  1.1813 +  __ profile_switch_case(rcx, rax, rbx);
  1.1814 +  // continue execution
  1.1815 +  __ bind(continue_execution);
  1.1816 +  __ bswapl(rdx);
  1.1817 +  __ movslq(rdx, rdx);
  1.1818 +  __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
  1.1819 +  __ addq(r13, rdx);
  1.1820 +  __ dispatch_only(vtos);
  1.1821 +}
  1.1822 +
  1.1823 +void TemplateTable::fast_binaryswitch() {
  1.1824 +  transition(itos, vtos);
  1.1825 +  // Implementation using the following core algorithm:
  1.1826 +  //
  1.1827 +  // int binary_search(int key, LookupswitchPair* array, int n) {
  1.1828 +  //   // Binary search according to "Methodik des Programmierens" by
  1.1829 +  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  1.1830 +  //   int i = 0;
  1.1831 +  //   int j = n;
  1.1832 +  //   while (i+1 < j) {
  1.1833 +  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  1.1834 +  //     // with      Q: for all i: 0 <= i < n: key < a[i]
  1.1835 +  //     // where a stands for the array and assuming that the (inexisting)
  1.1836 +  //     // element a[n] is infinitely big.
  1.1837 +  //     int h = (i + j) >> 1;
  1.1838 +  //     // i < h < j
  1.1839 +  //     if (key < array[h].fast_match()) {
  1.1840 +  //       j = h;
  1.1841 +  //     } else {
  1.1842 +  //       i = h;
  1.1843 +  //     }
  1.1844 +  //   }
  1.1845 +  //   // R: a[i] <= key < a[i+1] or Q
  1.1846 +  //   // (i.e., if key is within array, i is the correct index)
  1.1847 +  //   return i;
  1.1848 +  // }
  1.1849 +
  1.1850 +  // Register allocation
  1.1851 +  const Register key   = rax; // already set (tosca)
  1.1852 +  const Register array = rbx;
  1.1853 +  const Register i     = rcx;
  1.1854 +  const Register j     = rdx;
  1.1855 +  const Register h     = rdi;
  1.1856 +  const Register temp  = rsi;
  1.1857 +
  1.1858 +  // Find array start
  1.1859 +  __ leaq(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
  1.1860 +                                           // get rid of this
  1.1861 +                                           // instruction (change
  1.1862 +                                           // offsets below)
  1.1863 +  __ andq(array, -BytesPerInt);
  1.1864 +
  1.1865 +  // Initialize i & j
  1.1866 +  __ xorl(i, i);                            // i = 0;
  1.1867 +  __ movl(j, Address(array, -BytesPerInt)); // j = length(array);
  1.1868 +
  1.1869 +  // Convert j into native byteordering
  1.1870 +  __ bswapl(j);
  1.1871 +
  1.1872 +  // And start
  1.1873 +  Label entry;
  1.1874 +  __ jmp(entry);
  1.1875 +
  1.1876 +  // binary search loop
  1.1877 +  {
  1.1878 +    Label loop;
  1.1879 +    __ bind(loop);
  1.1880 +    // int h = (i + j) >> 1;
  1.1881 +    __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
  1.1882 +    __ sarl(h, 1);                               // h = (i + j) >> 1;
  1.1883 +    // if (key < array[h].fast_match()) {
  1.1884 +    //   j = h;
  1.1885 +    // } else {
  1.1886 +    //   i = h;
  1.1887 +    // }
  1.1888 +    // Convert array[h].match to native byte-ordering before compare
  1.1889 +    __ movl(temp, Address(array, h, Address::times_8));
  1.1890 +    __ bswapl(temp);
  1.1891 +    __ cmpl(key, temp);
  1.1892 +    // j = h if (key <  array[h].fast_match())
  1.1893 +    __ cmovl(Assembler::less, j, h);
  1.1894 +    // i = h if (key >= array[h].fast_match())
  1.1895 +    __ cmovl(Assembler::greaterEqual, i, h);
  1.1896 +    // while (i+1 < j)
  1.1897 +    __ bind(entry);
  1.1898 +    __ leal(h, Address(i, 1)); // i+1
  1.1899 +    __ cmpl(h, j);             // i+1 < j
  1.1900 +    __ jcc(Assembler::less, loop);
  1.1901 +  }
  1.1902 +
  1.1903 +  // end of binary search, result index is i (must check again!)
  1.1904 +  Label default_case;
  1.1905 +  // Convert array[i].match to native byte-ordering before compare
  1.1906 +  __ movl(temp, Address(array, i, Address::times_8));
  1.1907 +  __ bswapl(temp);
  1.1908 +  __ cmpl(key, temp);
  1.1909 +  __ jcc(Assembler::notEqual, default_case);
  1.1910 +
  1.1911 +  // entry found -> j = offset
  1.1912 +  __ movl(j , Address(array, i, Address::times_8, BytesPerInt));
  1.1913 +  __ profile_switch_case(i, key, array);
  1.1914 +  __ bswapl(j);
  1.1915 +  __ movslq(j, j);
  1.1916 +  __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
  1.1917 +  __ addq(r13, j);
  1.1918 +  __ dispatch_only(vtos);
  1.1919 +
  1.1920 +  // default case -> j = default offset
  1.1921 +  __ bind(default_case);
  1.1922 +  __ profile_switch_default(i);
  1.1923 +  __ movl(j, Address(array, -2 * BytesPerInt));
  1.1924 +  __ bswapl(j);
  1.1925 +  __ movslq(j, j);
  1.1926 +  __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
  1.1927 +  __ addq(r13, j);
  1.1928 +  __ dispatch_only(vtos);
  1.1929 +}
  1.1930 +
  1.1931 +
  1.1932 +void TemplateTable::_return(TosState state) {
  1.1933 +  transition(state, state);
  1.1934 +  assert(_desc->calls_vm(),
  1.1935 +         "inconsistent calls_vm information"); // call in remove_activation
  1.1936 +
  1.1937 +  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  1.1938 +    assert(state == vtos, "only valid state");
  1.1939 +    __ movq(c_rarg1, aaddress(0));
  1.1940 +    __ movq(rdi, Address(c_rarg1, oopDesc::klass_offset_in_bytes()));
  1.1941 +    __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
  1.1942 +    __ testl(rdi, JVM_ACC_HAS_FINALIZER);
  1.1943 +    Label skip_register_finalizer;
  1.1944 +    __ jcc(Assembler::zero, skip_register_finalizer);
  1.1945 +
  1.1946 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
  1.1947 +
  1.1948 +    __ bind(skip_register_finalizer);
  1.1949 +  }
  1.1950 +
  1.1951 +  __ remove_activation(state, r13);
  1.1952 +  __ jmp(r13);
  1.1953 +}
  1.1954 +
  1.1955 +// ----------------------------------------------------------------------------
  1.1956 +// Volatile variables demand their effects be made known to all CPU's
  1.1957 +// in order.  Store buffers on most chips allow reads & writes to
  1.1958 +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
  1.1959 +// without some kind of memory barrier (i.e., it's not sufficient that
  1.1960 +// the interpreter does not reorder volatile references, the hardware
  1.1961 +// also must not reorder them).
  1.1962 +//
  1.1963 +// According to the new Java Memory Model (JMM):
  1.1964 +// (1) All volatiles are serialized wrt to each other.  ALSO reads &
  1.1965 +//     writes act as aquire & release, so:
  1.1966 +// (2) A read cannot let unrelated NON-volatile memory refs that
  1.1967 +//     happen after the read float up to before the read.  It's OK for
  1.1968 +//     non-volatile memory refs that happen before the volatile read to
  1.1969 +//     float down below it.
  1.1970 +// (3) Similar a volatile write cannot let unrelated NON-volatile
  1.1971 +//     memory refs that happen BEFORE the write float down to after the
  1.1972 +//     write.  It's OK for non-volatile memory refs that happen after the
  1.1973 +//     volatile write to float up before it.
  1.1974 +//
  1.1975 +// We only put in barriers around volatile refs (they are expensive),
  1.1976 +// not _between_ memory refs (that would require us to track the
  1.1977 +// flavor of the previous memory refs).  Requirements (2) and (3)
  1.1978 +// require some barriers before volatile stores and after volatile
  1.1979 +// loads.  These nearly cover requirement (1) but miss the
  1.1980 +// volatile-store-volatile-load case.  This final case is placed after
  1.1981 +// volatile-stores although it could just as well go before
  1.1982 +// volatile-loads.
  1.1983 +void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
  1.1984 +                                     order_constraint) {
  1.1985 +  // Helper function to insert a is-volatile test and memory barrier
  1.1986 +  if (os::is_MP()) { // Not needed on single CPU
  1.1987 +    __ membar(order_constraint);
  1.1988 +  }
  1.1989 +}
  1.1990 +
  1.1991 +void TemplateTable::resolve_cache_and_index(int byte_no,
  1.1992 +                                            Register Rcache,
  1.1993 +                                            Register index) {
  1.1994 +  assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
  1.1995 +
  1.1996 +  const Register temp = rbx;
  1.1997 +  assert_different_registers(Rcache, index, temp);
  1.1998 +
  1.1999 +  const int shift_count = (1 + byte_no) * BitsPerByte;
  1.2000 +  Label resolved;
  1.2001 +  __ get_cache_and_index_at_bcp(Rcache, index, 1);
  1.2002 +  __ movl(temp, Address(Rcache,
  1.2003 +                        index, Address::times_8,
  1.2004 +                        constantPoolCacheOopDesc::base_offset() +
  1.2005 +                        ConstantPoolCacheEntry::indices_offset()));
  1.2006 +  __ shrl(temp, shift_count);
  1.2007 +  // have we resolved this bytecode?
  1.2008 +  __ andl(temp, 0xFF);
  1.2009 +  __ cmpl(temp, (int) bytecode());
  1.2010 +  __ jcc(Assembler::equal, resolved);
  1.2011 +
  1.2012 +  // resolve first time through
  1.2013 +  address entry;
  1.2014 +  switch (bytecode()) {
  1.2015 +  case Bytecodes::_getstatic:
  1.2016 +  case Bytecodes::_putstatic:
  1.2017 +  case Bytecodes::_getfield:
  1.2018 +  case Bytecodes::_putfield:
  1.2019 +    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
  1.2020 +    break;
  1.2021 +  case Bytecodes::_invokevirtual:
  1.2022 +  case Bytecodes::_invokespecial:
  1.2023 +  case Bytecodes::_invokestatic:
  1.2024 +  case Bytecodes::_invokeinterface:
  1.2025 +    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
  1.2026 +    break;
  1.2027 +  default:
  1.2028 +    ShouldNotReachHere();
  1.2029 +    break;
  1.2030 +  }
  1.2031 +  __ movl(temp, (int) bytecode());
  1.2032 +  __ call_VM(noreg, entry, temp);
  1.2033 +
  1.2034 +  // Update registers with resolved info
  1.2035 +  __ get_cache_and_index_at_bcp(Rcache, index, 1);
  1.2036 +  __ bind(resolved);
  1.2037 +}
  1.2038 +
  1.2039 +// The Rcache and index registers must be set before call
  1.2040 +void TemplateTable::load_field_cp_cache_entry(Register obj,
  1.2041 +                                              Register cache,
  1.2042 +                                              Register index,
  1.2043 +                                              Register off,
  1.2044 +                                              Register flags,
  1.2045 +                                              bool is_static = false) {
  1.2046 +  assert_different_registers(cache, index, flags, off);
  1.2047 +
  1.2048 +  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
  1.2049 +  // Field offset
  1.2050 +  __ movq(off, Address(cache, index, Address::times_8,
  1.2051 +                       in_bytes(cp_base_offset +
  1.2052 +                                ConstantPoolCacheEntry::f2_offset())));
  1.2053 +  // Flags
  1.2054 +  __ movl(flags, Address(cache, index, Address::times_8,
  1.2055 +                         in_bytes(cp_base_offset +
  1.2056 +                                  ConstantPoolCacheEntry::flags_offset())));
  1.2057 +
  1.2058 +  // klass overwrite register
  1.2059 +  if (is_static) {
  1.2060 +    __ movq(obj, Address(cache, index, Address::times_8,
  1.2061 +                         in_bytes(cp_base_offset +
  1.2062 +                                  ConstantPoolCacheEntry::f1_offset())));
  1.2063 +  }
  1.2064 +}
  1.2065 +
  1.2066 +void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  1.2067 +                                               Register method,
  1.2068 +                                               Register itable_index,
  1.2069 +                                               Register flags,
  1.2070 +                                               bool is_invokevirtual,
  1.2071 +                                               bool is_invokevfinal /*unused*/) {
  1.2072 +  // setup registers
  1.2073 +  const Register cache = rcx;
  1.2074 +  const Register index = rdx;
  1.2075 +  assert_different_registers(method, flags);
  1.2076 +  assert_different_registers(method, cache, index);
  1.2077 +  assert_different_registers(itable_index, flags);
  1.2078 +  assert_different_registers(itable_index, cache, index);
  1.2079 +  // determine constant pool cache field offsets
  1.2080 +  const int method_offset = in_bytes(
  1.2081 +    constantPoolCacheOopDesc::base_offset() +
  1.2082 +      (is_invokevirtual
  1.2083 +       ? ConstantPoolCacheEntry::f2_offset()
  1.2084 +       : ConstantPoolCacheEntry::f1_offset()));
  1.2085 +  const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2086 +                                    ConstantPoolCacheEntry::flags_offset());
  1.2087 +  // access constant pool cache fields
  1.2088 +  const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2089 +                                    ConstantPoolCacheEntry::f2_offset());
  1.2090 +
  1.2091 +  resolve_cache_and_index(byte_no, cache, index);
  1.2092 +
  1.2093 +  assert(wordSize == 8, "adjust code below");
  1.2094 +  __ movq(method, Address(cache, index, Address::times_8, method_offset));
  1.2095 +  if (itable_index != noreg) {
  1.2096 +    __ movq(itable_index,
  1.2097 +            Address(cache, index, Address::times_8, index_offset));
  1.2098 +  }
  1.2099 +  __ movl(flags , Address(cache, index, Address::times_8, flags_offset));
  1.2100 +}
  1.2101 +
  1.2102 +
  1.2103 +// The registers cache and index expected to be set before call.
  1.2104 +// Correct values of the cache and index registers are preserved.
  1.2105 +void TemplateTable::jvmti_post_field_access(Register cache, Register index,
  1.2106 +                                            bool is_static, bool has_tos) {
  1.2107 +  // do the JVMTI work here to avoid disturbing the register state below
  1.2108 +  // We use c_rarg registers here because we want to use the register used in
  1.2109 +  // the call to the VM
  1.2110 +  if (JvmtiExport::can_post_field_access()) {
  1.2111 +    // Check to see if a field access watch has been set before we
  1.2112 +    // take the time to call into the VM.
  1.2113 +    Label L1;
  1.2114 +    assert_different_registers(cache, index, rax);
  1.2115 +    __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2116 +    __ testl(rax, rax);
  1.2117 +    __ jcc(Assembler::zero, L1);
  1.2118 +
  1.2119 +    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
  1.2120 +
  1.2121 +    // cache entry pointer
  1.2122 +    __ addq(c_rarg2, in_bytes(constantPoolCacheOopDesc::base_offset()));
  1.2123 +    __ shll(c_rarg3, LogBytesPerWord);
  1.2124 +    __ addq(c_rarg2, c_rarg3);
  1.2125 +    if (is_static) {
  1.2126 +      __ xorl(c_rarg1, c_rarg1); // NULL object reference
  1.2127 +    } else {
  1.2128 +      __ movq(c_rarg1, at_tos()); // get object pointer without popping it
  1.2129 +      __ verify_oop(c_rarg1);
  1.2130 +    }
  1.2131 +    // c_rarg1: object pointer or NULL
  1.2132 +    // c_rarg2: cache entry pointer
  1.2133 +    // c_rarg3: jvalue object on the stack
  1.2134 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.2135 +                                       InterpreterRuntime::post_field_access),
  1.2136 +               c_rarg1, c_rarg2, c_rarg3);
  1.2137 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2138 +    __ bind(L1);
  1.2139 +  }
  1.2140 +}
  1.2141 +
  1.2142 +void TemplateTable::pop_and_check_object(Register r) {
  1.2143 +  __ pop_ptr(r);
  1.2144 +  __ null_check(r);  // for field access must check obj.
  1.2145 +  __ verify_oop(r);
  1.2146 +}
  1.2147 +
  1.2148 +void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  1.2149 +  transition(vtos, vtos);
  1.2150 +
  1.2151 +  const Register cache = rcx;
  1.2152 +  const Register index = rdx;
  1.2153 +  const Register obj   = c_rarg3;
  1.2154 +  const Register off   = rbx;
  1.2155 +  const Register flags = rax;
  1.2156 +  const Register bc = c_rarg3; // uses same reg as obj, so don't mix them
  1.2157 +
  1.2158 +  resolve_cache_and_index(byte_no, cache, index);
  1.2159 +  jvmti_post_field_access(cache, index, is_static, false);
  1.2160 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2161 +
  1.2162 +  if (!is_static) {
  1.2163 +    // obj is on the stack
  1.2164 +    pop_and_check_object(obj);
  1.2165 +  }
  1.2166 +
  1.2167 +  const Address field(obj, off, Address::times_1);
  1.2168 +
  1.2169 +  Label Done, notByte, notInt, notShort, notChar,
  1.2170 +              notLong, notFloat, notObj, notDouble;
  1.2171 +
  1.2172 +  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
  1.2173 +  assert(btos == 0, "change code, btos != 0");
  1.2174 +
  1.2175 +  __ andl(flags, 0x0F);
  1.2176 +  __ jcc(Assembler::notZero, notByte);
  1.2177 +  // btos
  1.2178 +  __ load_signed_byte(rax, field);
  1.2179 +  __ push(btos);
  1.2180 +  // Rewrite bytecode to be faster
  1.2181 +  if (!is_static) {
  1.2182 +    patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
  1.2183 +  }
  1.2184 +  __ jmp(Done);
  1.2185 +
  1.2186 +  __ bind(notByte);
  1.2187 +  __ cmpl(flags, atos);
  1.2188 +  __ jcc(Assembler::notEqual, notObj);
  1.2189 +  // atos
  1.2190 +  __ movq(rax, field);
  1.2191 +  __ push(atos);
  1.2192 +  if (!is_static) {
  1.2193 +    patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
  1.2194 +  }
  1.2195 +  __ jmp(Done);
  1.2196 +
  1.2197 +  __ bind(notObj);
  1.2198 +  __ cmpl(flags, itos);
  1.2199 +  __ jcc(Assembler::notEqual, notInt);
  1.2200 +  // itos
  1.2201 +  __ movl(rax, field);
  1.2202 +  __ push(itos);
  1.2203 +  // Rewrite bytecode to be faster
  1.2204 +  if (!is_static) {
  1.2205 +    patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
  1.2206 +  }
  1.2207 +  __ jmp(Done);
  1.2208 +
  1.2209 +  __ bind(notInt);
  1.2210 +  __ cmpl(flags, ctos);
  1.2211 +  __ jcc(Assembler::notEqual, notChar);
  1.2212 +  // ctos
  1.2213 +  __ load_unsigned_word(rax, field);
  1.2214 +  __ push(ctos);
  1.2215 +  // Rewrite bytecode to be faster
  1.2216 +  if (!is_static) {
  1.2217 +    patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
  1.2218 +  }
  1.2219 +  __ jmp(Done);
  1.2220 +
  1.2221 +  __ bind(notChar);
  1.2222 +  __ cmpl(flags, stos);
  1.2223 +  __ jcc(Assembler::notEqual, notShort);
  1.2224 +  // stos
  1.2225 +  __ load_signed_word(rax, field);
  1.2226 +  __ push(stos);
  1.2227 +  // Rewrite bytecode to be faster
  1.2228 +  if (!is_static) {
  1.2229 +    patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
  1.2230 +  }
  1.2231 +  __ jmp(Done);
  1.2232 +
  1.2233 +  __ bind(notShort);
  1.2234 +  __ cmpl(flags, ltos);
  1.2235 +  __ jcc(Assembler::notEqual, notLong);
  1.2236 +  // ltos
  1.2237 +  __ movq(rax, field);
  1.2238 +  __ push(ltos);
  1.2239 +  // Rewrite bytecode to be faster
  1.2240 +  if (!is_static) {
  1.2241 +    patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx);
  1.2242 +  }
  1.2243 +  __ jmp(Done);
  1.2244 +
  1.2245 +  __ bind(notLong);
  1.2246 +  __ cmpl(flags, ftos);
  1.2247 +  __ jcc(Assembler::notEqual, notFloat);
  1.2248 +  // ftos
  1.2249 +  __ movflt(xmm0, field);
  1.2250 +  __ push(ftos);
  1.2251 +  // Rewrite bytecode to be faster
  1.2252 +  if (!is_static) {
  1.2253 +    patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
  1.2254 +  }
  1.2255 +  __ jmp(Done);
  1.2256 +
  1.2257 +  __ bind(notFloat);
  1.2258 +#ifdef ASSERT
  1.2259 +  __ cmpl(flags, dtos);
  1.2260 +  __ jcc(Assembler::notEqual, notDouble);
  1.2261 +#endif
  1.2262 +  // dtos
  1.2263 +  __ movdbl(xmm0, field);
  1.2264 +  __ push(dtos);
  1.2265 +  // Rewrite bytecode to be faster
  1.2266 +  if (!is_static) {
  1.2267 +    patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
  1.2268 +  }
  1.2269 +#ifdef ASSERT
  1.2270 +  __ jmp(Done);
  1.2271 +
  1.2272 +  __ bind(notDouble);
  1.2273 +  __ stop("Bad state");
  1.2274 +#endif
  1.2275 +
  1.2276 +  __ bind(Done);
  1.2277 +  // [jk] not needed currently
  1.2278 +  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadLoad |
  1.2279 +  //                                              Assembler::LoadStore));
  1.2280 +}
  1.2281 +
  1.2282 +
  1.2283 +void TemplateTable::getfield(int byte_no) {
  1.2284 +  getfield_or_static(byte_no, false);
  1.2285 +}
  1.2286 +
  1.2287 +void TemplateTable::getstatic(int byte_no) {
  1.2288 +  getfield_or_static(byte_no, true);
  1.2289 +}
  1.2290 +
  1.2291 +// The registers cache and index expected to be set before call.
  1.2292 +// The function may destroy various registers, just not the cache and index registers.
  1.2293 +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  1.2294 +  transition(vtos, vtos);
  1.2295 +
  1.2296 +  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
  1.2297 +
  1.2298 +  if (JvmtiExport::can_post_field_modification()) {
  1.2299 +    // Check to see if a field modification watch has been set before
  1.2300 +    // we take the time to call into the VM.
  1.2301 +    Label L1;
  1.2302 +    assert_different_registers(cache, index, rax);
  1.2303 +    __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2304 +    __ testl(rax, rax);
  1.2305 +    __ jcc(Assembler::zero, L1);
  1.2306 +
  1.2307 +    __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1);
  1.2308 +
  1.2309 +    if (is_static) {
  1.2310 +      // Life is simple.  Null out the object pointer.
  1.2311 +      __ xorl(c_rarg1, c_rarg1);
  1.2312 +    } else {
  1.2313 +      // Life is harder. The stack holds the value on top, followed by
  1.2314 +      // the object.  We don't know the size of the value, though; it
  1.2315 +      // could be one or two words depending on its type. As a result,
  1.2316 +      // we must find the type to determine where the object is.
  1.2317 +      __ movl(c_rarg3, Address(c_rarg2, rscratch1,
  1.2318 +                           Address::times_8,
  1.2319 +                           in_bytes(cp_base_offset +
  1.2320 +                                     ConstantPoolCacheEntry::flags_offset())));
  1.2321 +      __ shrl(c_rarg3, ConstantPoolCacheEntry::tosBits);
  1.2322 +      // Make sure we don't need to mask rcx for tosBits after the
  1.2323 +      // above shift
  1.2324 +      ConstantPoolCacheEntry::verify_tosBits();
  1.2325 +      __ movq(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
  1.2326 +      __ cmpl(c_rarg3, ltos);
  1.2327 +      __ cmovq(Assembler::equal,
  1.2328 +               c_rarg1, at_tos_p2()); // ltos (two word jvalue)
  1.2329 +      __ cmpl(c_rarg3, dtos);
  1.2330 +      __ cmovq(Assembler::equal,
  1.2331 +               c_rarg1, at_tos_p2()); // dtos (two word jvalue)
  1.2332 +    }
  1.2333 +    // cache entry pointer
  1.2334 +    __ addq(c_rarg2, in_bytes(cp_base_offset));
  1.2335 +    __ shll(rscratch1, LogBytesPerWord);
  1.2336 +    __ addq(c_rarg2, rscratch1);
  1.2337 +    // object (tos)
  1.2338 +    __ movq(c_rarg3, rsp);
  1.2339 +    // c_rarg1: object pointer set up above (NULL if static)
  1.2340 +    // c_rarg2: cache entry pointer
  1.2341 +    // c_rarg3: jvalue object on the stack
  1.2342 +    __ call_VM(noreg,
  1.2343 +               CAST_FROM_FN_PTR(address,
  1.2344 +                                InterpreterRuntime::post_field_modification),
  1.2345 +               c_rarg1, c_rarg2, c_rarg3);
  1.2346 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2347 +    __ bind(L1);
  1.2348 +  }
  1.2349 +}
  1.2350 +
  1.2351 +void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  1.2352 +  transition(vtos, vtos);
  1.2353 +
  1.2354 +  const Register cache = rcx;
  1.2355 +  const Register index = rdx;
  1.2356 +  const Register obj   = rcx;
  1.2357 +  const Register off   = rbx;
  1.2358 +  const Register flags = rax;
  1.2359 +  const Register bc    = c_rarg3;
  1.2360 +
  1.2361 +  resolve_cache_and_index(byte_no, cache, index);
  1.2362 +  jvmti_post_field_mod(cache, index, is_static);
  1.2363 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2364 +
  1.2365 +  // [jk] not needed currently
  1.2366 +  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
  1.2367 +  //                                              Assembler::StoreStore));
  1.2368 +
  1.2369 +  Label notVolatile, Done;
  1.2370 +  __ movl(rdx, flags);
  1.2371 +  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
  1.2372 +  __ andl(rdx, 0x1);
  1.2373 +
  1.2374 +  // field address
  1.2375 +  const Address field(obj, off, Address::times_1);
  1.2376 +
  1.2377 +  Label notByte, notInt, notShort, notChar,
  1.2378 +        notLong, notFloat, notObj, notDouble;
  1.2379 +
  1.2380 +  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
  1.2381 +
  1.2382 +  assert(btos == 0, "change code, btos != 0");
  1.2383 +  __ andl(flags, 0x0f);
  1.2384 +  __ jcc(Assembler::notZero, notByte);
  1.2385 +  // btos
  1.2386 +  __ pop(btos);
  1.2387 +  if (!is_static) pop_and_check_object(obj);
  1.2388 +  __ movb(field, rax);
  1.2389 +  if (!is_static) {
  1.2390 +    patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx);
  1.2391 +  }
  1.2392 +  __ jmp(Done);
  1.2393 +
  1.2394 +  __ bind(notByte);
  1.2395 +  __ cmpl(flags, atos);
  1.2396 +  __ jcc(Assembler::notEqual, notObj);
  1.2397 +  // atos
  1.2398 +  __ pop(atos);
  1.2399 +  if (!is_static) pop_and_check_object(obj);
  1.2400 +  __ movq(field, rax);
  1.2401 +  __ store_check(obj, field); // Need to mark card
  1.2402 +  if (!is_static) {
  1.2403 +    patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
  1.2404 +  }
  1.2405 +  __ jmp(Done);
  1.2406 +
  1.2407 +  __ bind(notObj);
  1.2408 +  __ cmpl(flags, itos);
  1.2409 +  __ jcc(Assembler::notEqual, notInt);
  1.2410 +  // itos
  1.2411 +  __ pop(itos);
  1.2412 +  if (!is_static) pop_and_check_object(obj);
  1.2413 +  __ movl(field, rax);
  1.2414 +  if (!is_static) {
  1.2415 +    patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx);
  1.2416 +  }
  1.2417 +  __ jmp(Done);
  1.2418 +
  1.2419 +  __ bind(notInt);
  1.2420 +  __ cmpl(flags, ctos);
  1.2421 +  __ jcc(Assembler::notEqual, notChar);
  1.2422 +  // ctos
  1.2423 +  __ pop(ctos);
  1.2424 +  if (!is_static) pop_and_check_object(obj);
  1.2425 +  __ movw(field, rax);
  1.2426 +  if (!is_static) {
  1.2427 +    patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx);
  1.2428 +  }
  1.2429 +  __ jmp(Done);
  1.2430 +
  1.2431 +  __ bind(notChar);
  1.2432 +  __ cmpl(flags, stos);
  1.2433 +  __ jcc(Assembler::notEqual, notShort);
  1.2434 +  // stos
  1.2435 +  __ pop(stos);
  1.2436 +  if (!is_static) pop_and_check_object(obj);
  1.2437 +  __ movw(field, rax);
  1.2438 +  if (!is_static) {
  1.2439 +    patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx);
  1.2440 +  }
  1.2441 +  __ jmp(Done);
  1.2442 +
  1.2443 +  __ bind(notShort);
  1.2444 +  __ cmpl(flags, ltos);
  1.2445 +  __ jcc(Assembler::notEqual, notLong);
  1.2446 +  // ltos
  1.2447 +  __ pop(ltos);
  1.2448 +  if (!is_static) pop_and_check_object(obj);
  1.2449 +  __ movq(field, rax);
  1.2450 +  if (!is_static) {
  1.2451 +    patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx);
  1.2452 +  }
  1.2453 +  __ jmp(Done);
  1.2454 +
  1.2455 +  __ bind(notLong);
  1.2456 +  __ cmpl(flags, ftos);
  1.2457 +  __ jcc(Assembler::notEqual, notFloat);
  1.2458 +  // ftos
  1.2459 +  __ pop(ftos);
  1.2460 +  if (!is_static) pop_and_check_object(obj);
  1.2461 +  __ movflt(field, xmm0);
  1.2462 +  if (!is_static) {
  1.2463 +    patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx);
  1.2464 +  }
  1.2465 +  __ jmp(Done);
  1.2466 +
  1.2467 +  __ bind(notFloat);
  1.2468 +#ifdef ASSERT
  1.2469 +  __ cmpl(flags, dtos);
  1.2470 +  __ jcc(Assembler::notEqual, notDouble);
  1.2471 +#endif
  1.2472 +  // dtos
  1.2473 +  __ pop(dtos);
  1.2474 +  if (!is_static) pop_and_check_object(obj);
  1.2475 +  __ movdbl(field, xmm0);
  1.2476 +  if (!is_static) {
  1.2477 +    patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx);
  1.2478 +  }
  1.2479 +
  1.2480 +#ifdef ASSERT
  1.2481 +  __ jmp(Done);
  1.2482 +
  1.2483 +  __ bind(notDouble);
  1.2484 +  __ stop("Bad state");
  1.2485 +#endif
  1.2486 +
  1.2487 +  __ bind(Done);
  1.2488 +  // Check for volatile store
  1.2489 +  __ testl(rdx, rdx);
  1.2490 +  __ jcc(Assembler::zero, notVolatile);
  1.2491 +  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
  1.2492 +                                               Assembler::StoreStore));
  1.2493 +
  1.2494 +  __ bind(notVolatile);
  1.2495 +}
  1.2496 +
  1.2497 +void TemplateTable::putfield(int byte_no) {
  1.2498 +  putfield_or_static(byte_no, false);
  1.2499 +}
  1.2500 +
  1.2501 +void TemplateTable::putstatic(int byte_no) {
  1.2502 +  putfield_or_static(byte_no, true);
  1.2503 +}
  1.2504 +
  1.2505 +void TemplateTable::jvmti_post_fast_field_mod() {
  1.2506 +  if (JvmtiExport::can_post_field_modification()) {
  1.2507 +    // Check to see if a field modification watch has been set before
  1.2508 +    // we take the time to call into the VM.
  1.2509 +    Label L2;
  1.2510 +    __ mov32(c_rarg3, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2511 +    __ testl(c_rarg3, c_rarg3);
  1.2512 +    __ jcc(Assembler::zero, L2);
  1.2513 +    __ pop_ptr(rbx);                  // copy the object pointer from tos
  1.2514 +    __ verify_oop(rbx);
  1.2515 +    __ push_ptr(rbx);                 // put the object pointer back on tos
  1.2516 +    __ subq(rsp, sizeof(jvalue));  // add space for a jvalue object
  1.2517 +    __ movq(c_rarg3, rsp);
  1.2518 +    const Address field(c_rarg3, 0);
  1.2519 +
  1.2520 +    switch (bytecode()) {          // load values into the jvalue object
  1.2521 +    case Bytecodes::_fast_aputfield: // fall through
  1.2522 +    case Bytecodes::_fast_lputfield: __ movq(field, rax); break;
  1.2523 +    case Bytecodes::_fast_iputfield: __ movl(field, rax); break;
  1.2524 +    case Bytecodes::_fast_bputfield: __ movb(field, rax); break;
  1.2525 +    case Bytecodes::_fast_sputfield: // fall through
  1.2526 +    case Bytecodes::_fast_cputfield: __ movw(field, rax); break;
  1.2527 +    case Bytecodes::_fast_fputfield: __ movflt(field, xmm0); break;
  1.2528 +    case Bytecodes::_fast_dputfield: __ movdbl(field, xmm0); break;
  1.2529 +    default:
  1.2530 +      ShouldNotReachHere();
  1.2531 +    }
  1.2532 +
  1.2533 +    // Save rax because call_VM() will clobber it, then use it for
  1.2534 +    // JVMTI purposes
  1.2535 +    __ pushq(rax);
  1.2536 +    // access constant pool cache entry
  1.2537 +    __ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1);
  1.2538 +    __ verify_oop(rbx);
  1.2539 +    // rbx: object pointer copied above
  1.2540 +    // c_rarg2: cache entry pointer
  1.2541 +    // c_rarg3: jvalue object on the stack
  1.2542 +    __ call_VM(noreg,
  1.2543 +               CAST_FROM_FN_PTR(address,
  1.2544 +                                InterpreterRuntime::post_field_modification),
  1.2545 +               rbx, c_rarg2, c_rarg3);
  1.2546 +    __ popq(rax);     // restore lower value
  1.2547 +    __ addq(rsp, sizeof(jvalue));  // release jvalue object space
  1.2548 +    __ bind(L2);
  1.2549 +  }
  1.2550 +}
  1.2551 +
  1.2552 +void TemplateTable::fast_storefield(TosState state) {
  1.2553 +  transition(state, vtos);
  1.2554 +
  1.2555 +  ByteSize base = constantPoolCacheOopDesc::base_offset();
  1.2556 +
  1.2557 +  jvmti_post_fast_field_mod();
  1.2558 +
  1.2559 +  // access constant pool cache
  1.2560 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2561 +
  1.2562 +  // test for volatile with rdx
  1.2563 +  __ movl(rdx, Address(rcx, rbx, Address::times_8,
  1.2564 +                       in_bytes(base +
  1.2565 +                                ConstantPoolCacheEntry::flags_offset())));
  1.2566 +
  1.2567 +  // replace index with field offset from cache entry
  1.2568 +  __ movq(rbx, Address(rcx, rbx, Address::times_8,
  1.2569 +                       in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
  1.2570 +
  1.2571 +  // [jk] not needed currently
  1.2572 +  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
  1.2573 +  //                                              Assembler::StoreStore));
  1.2574 +
  1.2575 +  Label notVolatile;
  1.2576 +  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
  1.2577 +  __ andl(rdx, 0x1);
  1.2578 +
  1.2579 +  // Get object from stack
  1.2580 +  pop_and_check_object(rcx);
  1.2581 +
  1.2582 +  // field address
  1.2583 +  const Address field(rcx, rbx, Address::times_1);
  1.2584 +
  1.2585 +  // access field
  1.2586 +  switch (bytecode()) {
  1.2587 +  case Bytecodes::_fast_aputfield:
  1.2588 +    __ movq(field, rax);
  1.2589 +    __ store_check(rcx, field);
  1.2590 +    break;
  1.2591 +  case Bytecodes::_fast_lputfield:
  1.2592 +    __ movq(field, rax);
  1.2593 +    break;
  1.2594 +  case Bytecodes::_fast_iputfield:
  1.2595 +    __ movl(field, rax);
  1.2596 +    break;
  1.2597 +  case Bytecodes::_fast_bputfield:
  1.2598 +    __ movb(field, rax);
  1.2599 +    break;
  1.2600 +  case Bytecodes::_fast_sputfield:
  1.2601 +    // fall through
  1.2602 +  case Bytecodes::_fast_cputfield:
  1.2603 +    __ movw(field, rax);
  1.2604 +    break;
  1.2605 +  case Bytecodes::_fast_fputfield:
  1.2606 +    __ movflt(field, xmm0);
  1.2607 +    break;
  1.2608 +  case Bytecodes::_fast_dputfield:
  1.2609 +    __ movdbl(field, xmm0);
  1.2610 +    break;
  1.2611 +  default:
  1.2612 +    ShouldNotReachHere();
  1.2613 +  }
  1.2614 +
  1.2615 +  // Check for volatile store
  1.2616 +  __ testl(rdx, rdx);
  1.2617 +  __ jcc(Assembler::zero, notVolatile);
  1.2618 +  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
  1.2619 +                                               Assembler::StoreStore));
  1.2620 +  __ bind(notVolatile);
  1.2621 +}
  1.2622 +
  1.2623 +
  1.2624 +void TemplateTable::fast_accessfield(TosState state) {
  1.2625 +  transition(atos, state);
  1.2626 +
  1.2627 +  // Do the JVMTI work here to avoid disturbing the register state below
  1.2628 +  if (JvmtiExport::can_post_field_access()) {
  1.2629 +    // Check to see if a field access watch has been set before we
  1.2630 +    // take the time to call into the VM.
  1.2631 +    Label L1;
  1.2632 +    __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2633 +    __ testl(rcx, rcx);
  1.2634 +    __ jcc(Assembler::zero, L1);
  1.2635 +    // access constant pool cache entry
  1.2636 +    __ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1);
  1.2637 +    __ movq(r12, rax);  // save object pointer before call_VM() clobbers it
  1.2638 +    __ verify_oop(rax);
  1.2639 +    __ movq(c_rarg1, rax);
  1.2640 +    // c_rarg1: object pointer copied above
  1.2641 +    // c_rarg2: cache entry pointer
  1.2642 +    __ call_VM(noreg,
  1.2643 +               CAST_FROM_FN_PTR(address,
  1.2644 +                                InterpreterRuntime::post_field_access),
  1.2645 +               c_rarg1, c_rarg2);
  1.2646 +    __ movq(rax, r12); // restore object pointer
  1.2647 +    __ bind(L1);
  1.2648 +  }
  1.2649 +
  1.2650 +  // access constant pool cache
  1.2651 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2652 +  // replace index with field offset from cache entry
  1.2653 +  // [jk] not needed currently
  1.2654 +  // if (os::is_MP()) {
  1.2655 +  //   __ movl(rdx, Address(rcx, rbx, Address::times_8,
  1.2656 +  //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2657 +  //                                 ConstantPoolCacheEntry::flags_offset())));
  1.2658 +  //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
  1.2659 +  //   __ andl(rdx, 0x1);
  1.2660 +  // }
  1.2661 +  __ movq(rbx, Address(rcx, rbx, Address::times_8,
  1.2662 +                       in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2663 +                                ConstantPoolCacheEntry::f2_offset())));
  1.2664 +
  1.2665 +  // rax: object
  1.2666 +  __ verify_oop(rax);
  1.2667 +  __ null_check(rax);
  1.2668 +  Address field(rax, rbx, Address::times_1);
  1.2669 +
  1.2670 +  // access field
  1.2671 +  switch (bytecode()) {
  1.2672 +  case Bytecodes::_fast_agetfield:
  1.2673 +    __ movq(rax, field);
  1.2674 +    __ verify_oop(rax);
  1.2675 +    break;
  1.2676 +  case Bytecodes::_fast_lgetfield:
  1.2677 +    __ movq(rax, field);
  1.2678 +    break;
  1.2679 +  case Bytecodes::_fast_igetfield:
  1.2680 +    __ movl(rax, field);
  1.2681 +    break;
  1.2682 +  case Bytecodes::_fast_bgetfield:
  1.2683 +    __ movsbl(rax, field);
  1.2684 +    break;
  1.2685 +  case Bytecodes::_fast_sgetfield:
  1.2686 +    __ load_signed_word(rax, field);
  1.2687 +    break;
  1.2688 +  case Bytecodes::_fast_cgetfield:
  1.2689 +    __ load_unsigned_word(rax, field);
  1.2690 +    break;
  1.2691 +  case Bytecodes::_fast_fgetfield:
  1.2692 +    __ movflt(xmm0, field);
  1.2693 +    break;
  1.2694 +  case Bytecodes::_fast_dgetfield:
  1.2695 +    __ movdbl(xmm0, field);
  1.2696 +    break;
  1.2697 +  default:
  1.2698 +    ShouldNotReachHere();
  1.2699 +  }
  1.2700 +  // [jk] not needed currently
  1.2701 +  // if (os::is_MP()) {
  1.2702 +  //   Label notVolatile;
  1.2703 +  //   __ testl(rdx, rdx);
  1.2704 +  //   __ jcc(Assembler::zero, notVolatile);
  1.2705 +  //   __ membar(Assembler::LoadLoad);
  1.2706 +  //   __ bind(notVolatile);
  1.2707 +  //};
  1.2708 +}
  1.2709 +
  1.2710 +void TemplateTable::fast_xaccess(TosState state) {
  1.2711 +  transition(vtos, state);
  1.2712 +
  1.2713 +  // get receiver
  1.2714 +  __ movq(rax, aaddress(0));
  1.2715 +  debug_only(__ verify_local_tag(frame::TagReference, 0));
  1.2716 +  // access constant pool cache
  1.2717 +  __ get_cache_and_index_at_bcp(rcx, rdx, 2);
  1.2718 +  __ movq(rbx,
  1.2719 +          Address(rcx, rdx, Address::times_8,
  1.2720 +                  in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2721 +                           ConstantPoolCacheEntry::f2_offset())));
  1.2722 +  // make sure exception is reported in correct bcp range (getfield is
  1.2723 +  // next instruction)
  1.2724 +  __ incrementq(r13);
  1.2725 +  __ null_check(rax);
  1.2726 +  switch (state) {
  1.2727 +  case itos:
  1.2728 +    __ movl(rax, Address(rax, rbx, Address::times_1));
  1.2729 +    break;
  1.2730 +  case atos:
  1.2731 +    __ movq(rax, Address(rax, rbx, Address::times_1));
  1.2732 +    __ verify_oop(rax);
  1.2733 +    break;
  1.2734 +  case ftos:
  1.2735 +    __ movflt(xmm0, Address(rax, rbx, Address::times_1));
  1.2736 +    break;
  1.2737 +  default:
  1.2738 +    ShouldNotReachHere();
  1.2739 +  }
  1.2740 +
  1.2741 +  // [jk] not needed currently
  1.2742 +  // if (os::is_MP()) {
  1.2743 +  //   Label notVolatile;
  1.2744 +  //   __ movl(rdx, Address(rcx, rdx, Address::times_8,
  1.2745 +  //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2746 +  //                                 ConstantPoolCacheEntry::flags_offset())));
  1.2747 +  //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
  1.2748 +  //   __ testl(rdx, 0x1);
  1.2749 +  //   __ jcc(Assembler::zero, notVolatile);
  1.2750 +  //   __ membar(Assembler::LoadLoad);
  1.2751 +  //   __ bind(notVolatile);
  1.2752 +  // }
  1.2753 +
  1.2754 +  __ decrementq(r13);
  1.2755 +}
  1.2756 +
  1.2757 +
  1.2758 +
  1.2759 +//-----------------------------------------------------------------------------
  1.2760 +// Calls
  1.2761 +
  1.2762 +void TemplateTable::count_calls(Register method, Register temp) {
  1.2763 +  // implemented elsewhere
  1.2764 +  ShouldNotReachHere();
  1.2765 +}
  1.2766 +
  1.2767 +void TemplateTable::prepare_invoke(Register method,
  1.2768 +                                   Register index,
  1.2769 +                                   int byte_no,
  1.2770 +                                   Bytecodes::Code code) {
  1.2771 +  // determine flags
  1.2772 +  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  1.2773 +  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  1.2774 +  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  1.2775 +  const bool load_receiver       = code != Bytecodes::_invokestatic;
  1.2776 +  const bool receiver_null_check = is_invokespecial;
  1.2777 +  const bool save_flags = is_invokeinterface || is_invokevirtual;
  1.2778 +  // setup registers & access constant pool cache
  1.2779 +  const Register recv   = rcx;
  1.2780 +  const Register flags  = rdx;
  1.2781 +  assert_different_registers(method, index, recv, flags);
  1.2782 +
  1.2783 +  // save 'interpreter return address'
  1.2784 +  __ save_bcp();
  1.2785 +
  1.2786 +  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual);
  1.2787 +
  1.2788 +  // load receiver if needed (note: no return address pushed yet)
  1.2789 +  if (load_receiver) {
  1.2790 +    __ movl(recv, flags);
  1.2791 +    __ andl(recv, 0xFF);
  1.2792 +    if (TaggedStackInterpreter) __ shll(recv, 1);  // index*2
  1.2793 +    __ movq(recv, Address(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1)));
  1.2794 +    __ verify_oop(recv);
  1.2795 +  }
  1.2796 +
  1.2797 +  // do null check if needed
  1.2798 +  if (receiver_null_check) {
  1.2799 +    __ null_check(recv);
  1.2800 +  }
  1.2801 +
  1.2802 +  if (save_flags) {
  1.2803 +    __ movl(r13, flags);
  1.2804 +  }
  1.2805 +
  1.2806 +  // compute return type
  1.2807 +  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
  1.2808 +  // Make sure we don't need to mask flags for tosBits after the above shift
  1.2809 +  ConstantPoolCacheEntry::verify_tosBits();
  1.2810 +  // load return address
  1.2811 +  {
  1.2812 +    ExternalAddress return_5((address)Interpreter::return_5_addrs_by_index_table());
  1.2813 +    ExternalAddress return_3((address)Interpreter::return_3_addrs_by_index_table());
  1.2814 +    __ lea(rscratch1, (is_invokeinterface ? return_5 : return_3));
  1.2815 +    __ movq(flags, Address(rscratch1, flags, Address::times_8));
  1.2816 +  }
  1.2817 +
  1.2818 +  // push return address
  1.2819 +  __ pushq(flags);
  1.2820 +
  1.2821 +  // Restore flag field from the constant pool cache, and restore esi
  1.2822 +  // for later null checks.  r13 is the bytecode pointer
  1.2823 +  if (save_flags) {
  1.2824 +    __ movl(flags, r13);
  1.2825 +    __ restore_bcp();
  1.2826 +  }
  1.2827 +}
  1.2828 +
  1.2829 +
  1.2830 +void TemplateTable::invokevirtual_helper(Register index,
  1.2831 +                                         Register recv,
  1.2832 +                                         Register flags) {
  1.2833 +  // Uses temporary registers rax, rdx  assert_different_registers(index, recv, rax, rdx);
  1.2834 +
  1.2835 +  // Test for an invoke of a final method
  1.2836 +  Label notFinal;
  1.2837 +  __ movl(rax, flags);
  1.2838 +  __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
  1.2839 +  __ jcc(Assembler::zero, notFinal);
  1.2840 +
  1.2841 +  const Register method = index;  // method must be rbx
  1.2842 +  assert(method == rbx,
  1.2843 +         "methodOop must be rbx for interpreter calling convention");
  1.2844 +
  1.2845 +  // do the call - the index is actually the method to call
  1.2846 +  __ verify_oop(method);
  1.2847 +
  1.2848 +  // It's final, need a null check here!
  1.2849 +  __ null_check(recv);
  1.2850 +
  1.2851 +  // profile this call
  1.2852 +  __ profile_final_call(rax);
  1.2853 +
  1.2854 +  __ jump_from_interpreted(method, rax);
  1.2855 +
  1.2856 +  __ bind(notFinal);
  1.2857 +
  1.2858 +  // get receiver klass
  1.2859 +  __ null_check(recv, oopDesc::klass_offset_in_bytes());
  1.2860 +  __ movq(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
  1.2861 +
  1.2862 +  __ verify_oop(rax);
  1.2863 +
  1.2864 +  // profile this call
  1.2865 +  __ profile_virtual_call(rax, r14, rdx);
  1.2866 +
  1.2867 +  // get target methodOop & entry point
  1.2868 +  const int base = instanceKlass::vtable_start_offset() * wordSize;
  1.2869 +  assert(vtableEntry::size() * wordSize == 8,
  1.2870 +         "adjust the scaling in the code below");
  1.2871 +  __ movq(method, Address(rax, index,
  1.2872 +                          Address::times_8,
  1.2873 +                          base + vtableEntry::method_offset_in_bytes()));
  1.2874 +  __ movq(rdx, Address(method, methodOopDesc::interpreter_entry_offset()));
  1.2875 +  __ jump_from_interpreted(method, rdx);
  1.2876 +}
  1.2877 +
  1.2878 +
  1.2879 +void TemplateTable::invokevirtual(int byte_no) {
  1.2880 +  transition(vtos, vtos);
  1.2881 +  prepare_invoke(rbx, noreg, byte_no, bytecode());
  1.2882 +
  1.2883 +  // rbx: index
  1.2884 +  // rcx: receiver
  1.2885 +  // rdx: flags
  1.2886 +
  1.2887 +  invokevirtual_helper(rbx, rcx, rdx);
  1.2888 +}
  1.2889 +
  1.2890 +
  1.2891 +void TemplateTable::invokespecial(int byte_no) {
  1.2892 +  transition(vtos, vtos);
  1.2893 +  prepare_invoke(rbx, noreg, byte_no, bytecode());
  1.2894 +  // do the call
  1.2895 +  __ verify_oop(rbx);
  1.2896 +  __ profile_call(rax);
  1.2897 +  __ jump_from_interpreted(rbx, rax);
  1.2898 +}
  1.2899 +
  1.2900 +
  1.2901 +void TemplateTable::invokestatic(int byte_no) {
  1.2902 +  transition(vtos, vtos);
  1.2903 +  prepare_invoke(rbx, noreg, byte_no, bytecode());
  1.2904 +  // do the call
  1.2905 +  __ verify_oop(rbx);
  1.2906 +  __ profile_call(rax);
  1.2907 +  __ jump_from_interpreted(rbx, rax);
  1.2908 +}
  1.2909 +
  1.2910 +void TemplateTable::fast_invokevfinal(int byte_no) {
  1.2911 +  transition(vtos, vtos);
  1.2912 +  __ stop("fast_invokevfinal not used on amd64");
  1.2913 +}
  1.2914 +
  1.2915 +void TemplateTable::invokeinterface(int byte_no) {
  1.2916 +  transition(vtos, vtos);
  1.2917 +  prepare_invoke(rax, rbx, byte_no, bytecode());
  1.2918 +
  1.2919 +  // rax: Interface
  1.2920 +  // rbx: index
  1.2921 +  // rcx: receiver
  1.2922 +  // rdx: flags
  1.2923 +
  1.2924 +  // Special case of invokeinterface called for virtual method of
  1.2925 +  // java.lang.Object.  See cpCacheOop.cpp for details.
  1.2926 +  // This code isn't produced by javac, but could be produced by
  1.2927 +  // another compliant java compiler.
  1.2928 +  Label notMethod;
  1.2929 +  __ movl(r14, rdx);
  1.2930 +  __ andl(r14, (1 << ConstantPoolCacheEntry::methodInterface));
  1.2931 +  __ jcc(Assembler::zero, notMethod);
  1.2932 +
  1.2933 +  invokevirtual_helper(rbx, rcx, rdx);
  1.2934 +  __ bind(notMethod);
  1.2935 +
  1.2936 +  // Get receiver klass into rdx - also a null check
  1.2937 +  __ restore_locals(); // restore r14
  1.2938 +  __ movq(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
  1.2939 +  __ verify_oop(rdx);
  1.2940 +
  1.2941 +  // profile this call
  1.2942 +  __ profile_virtual_call(rdx, r13, r14);
  1.2943 +
  1.2944 +  __ movq(r14, rdx); // Save klassOop in r14
  1.2945 +
  1.2946 +  // Compute start of first itableOffsetEntry (which is at the end of
  1.2947 +  // the vtable)
  1.2948 +  const int base = instanceKlass::vtable_start_offset() * wordSize;
  1.2949 +  // Get length of vtable
  1.2950 +  assert(vtableEntry::size() * wordSize == 8,
  1.2951 +         "adjust the scaling in the code below");
  1.2952 +  __ movl(r13, Address(rdx,
  1.2953 +                       instanceKlass::vtable_length_offset() * wordSize));
  1.2954 +  __ leaq(rdx, Address(rdx, r13, Address::times_8, base));
  1.2955 +
  1.2956 +  if (HeapWordsPerLong > 1) {
  1.2957 +    // Round up to align_object_offset boundary
  1.2958 +    __ round_to_q(rdx, BytesPerLong);
  1.2959 +  }
  1.2960 +
  1.2961 +  Label entry, search, interface_ok;
  1.2962 +
  1.2963 +  __ jmpb(entry);
  1.2964 +  __ bind(search);
  1.2965 +  __ addq(rdx, itableOffsetEntry::size() * wordSize);
  1.2966 +
  1.2967 +  __ bind(entry);
  1.2968 +
  1.2969 +  // Check that the entry is non-null.  A null entry means that the
  1.2970 +  // receiver class doesn't implement the interface, and wasn't the
  1.2971 +  // same as the receiver class checked when the interface was
  1.2972 +  // resolved.
  1.2973 +  __ pushq(rdx);
  1.2974 +  __ movq(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
  1.2975 +  __ testq(rdx, rdx);
  1.2976 +  __ jcc(Assembler::notZero, interface_ok);
  1.2977 +  // throw exception
  1.2978 +  __ popq(rdx); // pop saved register first.
  1.2979 +  __ popq(rbx); // pop return address (pushed by prepare_invoke)
  1.2980 +  __ restore_bcp(); // r13 must be correct for exception handler (was
  1.2981 +                    // destroyed)
  1.2982 +  __ restore_locals(); // make sure locals pointer is correct as well
  1.2983 +                       // (was destroyed)
  1.2984 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.2985 +                   InterpreterRuntime::throw_IncompatibleClassChangeError));
  1.2986 +  // the call_VM checks for exception, so we should never return here.
  1.2987 +  __ should_not_reach_here();
  1.2988 +  __ bind(interface_ok);
  1.2989 +
  1.2990 +  __ popq(rdx);
  1.2991 +
  1.2992 +  __ cmpq(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
  1.2993 +  __ jcc(Assembler::notEqual, search);
  1.2994 +
  1.2995 +  __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
  1.2996 +
  1.2997 +  __ addq(rdx, r14); // Add offset to klassOop
  1.2998 +  assert(itableMethodEntry::size() * wordSize == 8,
  1.2999 +         "adjust the scaling in the code below");
  1.3000 +  __ movq(rbx, Address(rdx, rbx, Address::times_8));
  1.3001 +  // rbx: methodOop to call
  1.3002 +  // rcx: receiver
  1.3003 +  // Check for abstract method error
  1.3004 +  // Note: This should be done more efficiently via a
  1.3005 +  // throw_abstract_method_error interpreter entry point and a
  1.3006 +  // conditional jump to it in case of a null method.
  1.3007 +  {
  1.3008 +    Label L;
  1.3009 +    __ testq(rbx, rbx);
  1.3010 +    __ jcc(Assembler::notZero, L);
  1.3011 +    // throw exception
  1.3012 +    // note: must restore interpreter registers to canonical
  1.3013 +    //       state for exception handling to work correctly!
  1.3014 +    __ popq(rbx);  // pop return address (pushed by prepare_invoke)
  1.3015 +    __ restore_bcp(); // r13 must be correct for exception handler
  1.3016 +                      // (was destroyed)
  1.3017 +    __ restore_locals(); // make sure locals pointer is correct as
  1.3018 +                         // well (was destroyed)
  1.3019 +    __ call_VM(noreg,
  1.3020 +               CAST_FROM_FN_PTR(address,
  1.3021 +                             InterpreterRuntime::throw_AbstractMethodError));
  1.3022 +    // the call_VM checks for exception, so we should never return here.
  1.3023 +    __ should_not_reach_here();
  1.3024 +    __ bind(L);
  1.3025 +  }
  1.3026 +
  1.3027 +  __ movq(rcx, Address(rbx, methodOopDesc::interpreter_entry_offset()));
  1.3028 +
  1.3029 +  // do the call
  1.3030 +  // rcx: receiver
  1.3031 +  // rbx: methodOop
  1.3032 +  __ jump_from_interpreted(rbx, rdx);
  1.3033 +}
  1.3034 +
  1.3035 +//-----------------------------------------------------------------------------
  1.3036 +// Allocation
  1.3037 +
  1.3038 +void TemplateTable::_new() {
  1.3039 +  transition(vtos, atos);
  1.3040 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3041 +  Label slow_case;
  1.3042 +  Label done;
  1.3043 +  Label initialize_header;
  1.3044 +  Label initialize_object; // including clearing the fields
  1.3045 +  Label allocate_shared;
  1.3046 +  ExternalAddress top((address)Universe::heap()->top_addr());
  1.3047 +  ExternalAddress end((address)Universe::heap()->end_addr());
  1.3048 +
  1.3049 +  __ get_cpool_and_tags(rsi, rax);
  1.3050 +  // get instanceKlass
  1.3051 +  __ movq(rsi, Address(rsi, rdx,
  1.3052 +                       Address::times_8, sizeof(constantPoolOopDesc)));
  1.3053 +
  1.3054 +  // make sure the class we're about to instantiate has been
  1.3055 +  // resolved. Note: slow_case does a pop of stack, which is why we
  1.3056 +  // loaded class/pushed above
  1.3057 +  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
  1.3058 +  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset),
  1.3059 +          JVM_CONSTANT_Class);
  1.3060 +  __ jcc(Assembler::notEqual, slow_case);
  1.3061 +
  1.3062 +  // make sure klass is initialized & doesn't have finalizer
  1.3063 +  // make sure klass is fully initialized
  1.3064 +  __ cmpl(Address(rsi,
  1.3065 +                  instanceKlass::init_state_offset_in_bytes() +
  1.3066 +                  sizeof(oopDesc)),
  1.3067 +          instanceKlass::fully_initialized);
  1.3068 +  __ jcc(Assembler::notEqual, slow_case);
  1.3069 +
  1.3070 +  // get instance_size in instanceKlass (scaled to a count of bytes)
  1.3071 +  __ movl(rdx,
  1.3072 +          Address(rsi,
  1.3073 +                  Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
  1.3074 +  // test to see if it has a finalizer or is malformed in some way
  1.3075 +  __ testl(rdx, Klass::_lh_instance_slow_path_bit);
  1.3076 +  __ jcc(Assembler::notZero, slow_case);
  1.3077 +
  1.3078 +  // Allocate the instance
  1.3079 +  // 1) Try to allocate in the TLAB
  1.3080 +  // 2) if fail and the object is large allocate in the shared Eden
  1.3081 +  // 3) if the above fails (or is not applicable), go to a slow case
  1.3082 +  // (creates a new TLAB, etc.)
  1.3083 +
  1.3084 +  const bool allow_shared_alloc =
  1.3085 +    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  1.3086 +
  1.3087 +  if (UseTLAB) {
  1.3088 +    __ movq(rax, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
  1.3089 +    __ leaq(rbx, Address(rax, rdx, Address::times_1));
  1.3090 +    __ cmpq(rbx, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
  1.3091 +    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
  1.3092 +    __ movq(Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
  1.3093 +    if (ZeroTLAB) {
  1.3094 +      // the fields have been already cleared
  1.3095 +      __ jmp(initialize_header);
  1.3096 +    } else {
  1.3097 +      // initialize both the header and fields
  1.3098 +      __ jmp(initialize_object);
  1.3099 +    }
  1.3100 +  }
  1.3101 +
  1.3102 +  // Allocation in the shared Eden, if allowed.
  1.3103 +  //
  1.3104 +  // rdx: instance size in bytes
  1.3105 +  if (allow_shared_alloc) {
  1.3106 +    __ bind(allocate_shared);
  1.3107 +
  1.3108 +    const Register RtopAddr = rscratch1;
  1.3109 +    const Register RendAddr = rscratch2;
  1.3110 +
  1.3111 +    __ lea(RtopAddr, top);
  1.3112 +    __ lea(RendAddr, end);
  1.3113 +    __ movq(rax, Address(RtopAddr, 0));
  1.3114 +
  1.3115 +    // For retries rax gets set by cmpxchgq
  1.3116 +    Label retry;
  1.3117 +    __ bind(retry);
  1.3118 +    __ leaq(rbx, Address(rax, rdx, Address::times_1));
  1.3119 +    __ cmpq(rbx, Address(RendAddr, 0));
  1.3120 +    __ jcc(Assembler::above, slow_case);
  1.3121 +
  1.3122 +    // Compare rax with the top addr, and if still equal, store the new
  1.3123 +    // top addr in rbx at the address of the top addr pointer. Sets ZF if was
  1.3124 +    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  1.3125 +    //
  1.3126 +    // rax: object begin
  1.3127 +    // rbx: object end
  1.3128 +    // rdx: instance size in bytes
  1.3129 +    if (os::is_MP()) {
  1.3130 +      __ lock();
  1.3131 +    }
  1.3132 +    __ cmpxchgq(rbx, Address(RtopAddr, 0));
  1.3133 +
  1.3134 +    // if someone beat us on the allocation, try again, otherwise continue
  1.3135 +    __ jcc(Assembler::notEqual, retry);
  1.3136 +  }
  1.3137 +
  1.3138 +  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  1.3139 +    // The object is initialized before the header.  If the object size is
  1.3140 +    // zero, go directly to the header initialization.
  1.3141 +    __ bind(initialize_object);
  1.3142 +    __ decrementl(rdx, sizeof(oopDesc));
  1.3143 +    __ jcc(Assembler::zero, initialize_header);
  1.3144 +
  1.3145 +    // Initialize object fields
  1.3146 +    __ xorl(rcx, rcx); // use zero reg to clear memory (shorter code)
  1.3147 +    __ shrl(rdx, LogBytesPerLong);  // divide by oopSize to simplify the loop
  1.3148 +    {
  1.3149 +      Label loop;
  1.3150 +      __ bind(loop);
  1.3151 +      __ movq(Address(rax, rdx, Address::times_8,
  1.3152 +                      sizeof(oopDesc) - oopSize),
  1.3153 +              rcx);
  1.3154 +      __ decrementl(rdx);
  1.3155 +      __ jcc(Assembler::notZero, loop);
  1.3156 +    }
  1.3157 +
  1.3158 +    // initialize object header only.
  1.3159 +    __ bind(initialize_header);
  1.3160 +    if (UseBiasedLocking) {
  1.3161 +      __ movq(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  1.3162 +      __ movq(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1);
  1.3163 +    } else {
  1.3164 +      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
  1.3165 +               (intptr_t) markOopDesc::prototype()); // header (address 0x1)
  1.3166 +    }
  1.3167 +    __ movq(Address(rax, oopDesc::klass_offset_in_bytes()), rsi);  // klass
  1.3168 +    __ jmp(done);
  1.3169 +  }
  1.3170 +
  1.3171 +  {
  1.3172 +    SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
  1.3173 +    // Trigger dtrace event for fastpath
  1.3174 +    __ push(atos); // save the return value
  1.3175 +    __ call_VM_leaf(
  1.3176 +         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
  1.3177 +    __ pop(atos); // restore the return value
  1.3178 +  }
  1.3179 +
  1.3180 +  // slow case
  1.3181 +  __ bind(slow_case);
  1.3182 +  __ get_constant_pool(c_rarg1);
  1.3183 +  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
  1.3184 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
  1.3185 +  __ verify_oop(rax);
  1.3186 +
  1.3187 +  // continue
  1.3188 +  __ bind(done);
  1.3189 +}
  1.3190 +
  1.3191 +void TemplateTable::newarray() {
  1.3192 +  transition(itos, atos);
  1.3193 +  __ load_unsigned_byte(c_rarg1, at_bcp(1));
  1.3194 +  __ movl(c_rarg2, rax);
  1.3195 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
  1.3196 +          c_rarg1, c_rarg2);
  1.3197 +}
  1.3198 +
  1.3199 +void TemplateTable::anewarray() {
  1.3200 +  transition(itos, atos);
  1.3201 +  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
  1.3202 +  __ get_constant_pool(c_rarg1);
  1.3203 +  __ movl(c_rarg3, rax);
  1.3204 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
  1.3205 +          c_rarg1, c_rarg2, c_rarg3);
  1.3206 +}
  1.3207 +
  1.3208 +void TemplateTable::arraylength() {
  1.3209 +  transition(atos, itos);
  1.3210 +  __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
  1.3211 +  __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
  1.3212 +}
  1.3213 +
  1.3214 +void TemplateTable::checkcast() {
  1.3215 +  transition(atos, atos);
  1.3216 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3217 +  __ testq(rax, rax); // object is in rax
  1.3218 +  __ jcc(Assembler::zero, is_null);
  1.3219 +
  1.3220 +  // Get cpool & tags index
  1.3221 +  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
  1.3222 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
  1.3223 +  // See if bytecode has already been quicked
  1.3224 +  __ cmpb(Address(rdx, rbx,
  1.3225 +                  Address::times_1,
  1.3226 +                  typeArrayOopDesc::header_size(T_BYTE) * wordSize),
  1.3227 +          JVM_CONSTANT_Class);
  1.3228 +  __ jcc(Assembler::equal, quicked);
  1.3229 +
  1.3230 +  __ movq(r12, rcx); // save rcx XXX
  1.3231 +  __ push(atos); // save receiver for result, and for GC
  1.3232 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  1.3233 +  __ pop_ptr(rdx); // restore receiver
  1.3234 +  __ movq(rcx, r12); // restore rcx XXX
  1.3235 +  __ jmpb(resolved);
  1.3236 +
  1.3237 +  // Get superklass in rax and subklass in rbx
  1.3238 +  __ bind(quicked);
  1.3239 +  __ movq(rdx, rax); // Save object in rdx; rax needed for subtype check
  1.3240 +  __ movq(rax, Address(rcx, rbx,
  1.3241 +                       Address::times_8, sizeof(constantPoolOopDesc)));
  1.3242 +
  1.3243 +  __ bind(resolved);
  1.3244 +  __ movq(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
  1.3245 +
  1.3246 +  // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
  1.3247 +  // Superklass in rax.  Subklass in rbx.
  1.3248 +  __ gen_subtype_check(rbx, ok_is_subtype);
  1.3249 +
  1.3250 +  // Come here on failure
  1.3251 +  __ push_ptr(rdx);
  1.3252 +  // object is at TOS
  1.3253 +  __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
  1.3254 +
  1.3255 +  // Come here on success
  1.3256 +  __ bind(ok_is_subtype);
  1.3257 +  __ movq(rax, rdx); // Restore object in rdx
  1.3258 +
  1.3259 +  // Collect counts on whether this check-cast sees NULLs a lot or not.
  1.3260 +  if (ProfileInterpreter) {
  1.3261 +    __ jmp(done);
  1.3262 +    __ bind(is_null);
  1.3263 +    __ profile_null_seen(rcx);
  1.3264 +  } else {
  1.3265 +    __ bind(is_null);   // same as 'done'
  1.3266 +  }
  1.3267 +  __ bind(done);
  1.3268 +}
  1.3269 +
  1.3270 +void TemplateTable::instanceof() {
  1.3271 +  transition(atos, itos);
  1.3272 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3273 +  __ testq(rax, rax);
  1.3274 +  __ jcc(Assembler::zero, is_null);
  1.3275 +
  1.3276 +  // Get cpool & tags index
  1.3277 +  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
  1.3278 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
  1.3279 +  // See if bytecode has already been quicked
  1.3280 +  __ cmpb(Address(rdx, rbx,
  1.3281 +                  Address::times_1,
  1.3282 +                  typeArrayOopDesc::header_size(T_BYTE) * wordSize),
  1.3283 +          JVM_CONSTANT_Class);
  1.3284 +  __ jcc(Assembler::equal, quicked);
  1.3285 +
  1.3286 +  __ movq(r12, rcx); // save rcx
  1.3287 +  __ push(atos); // save receiver for result, and for GC
  1.3288 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
  1.3289 +  __ pop_ptr(rdx); // restore receiver
  1.3290 +  __ movq(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
  1.3291 +  __ movq(rcx, r12); // restore rcx
  1.3292 +  __ jmpb(resolved);
  1.3293 +
  1.3294 +  // Get superklass in rax and subklass in rdx
  1.3295 +  __ bind(quicked);
  1.3296 +  __ movq(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
  1.3297 +  __ movq(rax, Address(rcx, rbx,
  1.3298 +                       Address::times_8, sizeof(constantPoolOopDesc)));
  1.3299 +
  1.3300 +  __ bind(resolved);
  1.3301 +
  1.3302 +  // Generate subtype check.  Blows rcx, rdi
  1.3303 +  // Superklass in rax.  Subklass in rdx.
  1.3304 +  __ gen_subtype_check(rdx, ok_is_subtype);
  1.3305 +
  1.3306 +  // Come here on failure
  1.3307 +  __ xorl(rax, rax);
  1.3308 +  __ jmpb(done);
  1.3309 +  // Come here on success
  1.3310 +  __ bind(ok_is_subtype);
  1.3311 +  __ movl(rax, 1);
  1.3312 +
  1.3313 +  // Collect counts on whether this test sees NULLs a lot or not.
  1.3314 +  if (ProfileInterpreter) {
  1.3315 +    __ jmp(done);
  1.3316 +    __ bind(is_null);
  1.3317 +    __ profile_null_seen(rcx);
  1.3318 +  } else {
  1.3319 +    __ bind(is_null);   // same as 'done'
  1.3320 +  }
  1.3321 +  __ bind(done);
  1.3322 +  // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
  1.3323 +  // rax = 1: obj != NULL and obj is     an instanceof the specified klass
  1.3324 +}
  1.3325 +
  1.3326 +//-----------------------------------------------------------------------------
  1.3327 +// Breakpoints
  1.3328 +void TemplateTable::_breakpoint() {
  1.3329 +  // Note: We get here even if we are single stepping..
  1.3330 +  // jbug inists on setting breakpoints at every bytecode
  1.3331 +  // even if we are in single step mode.
  1.3332 +
  1.3333 +  transition(vtos, vtos);
  1.3334 +
  1.3335 +  // get the unpatched byte code
  1.3336 +  __ get_method(c_rarg1);
  1.3337 +  __ call_VM(noreg,
  1.3338 +             CAST_FROM_FN_PTR(address,
  1.3339 +                              InterpreterRuntime::get_original_bytecode_at),
  1.3340 +             c_rarg1, r13);
  1.3341 +  __ movq(rbx, rax);
  1.3342 +
  1.3343 +  // post the breakpoint event
  1.3344 +  __ get_method(c_rarg1);
  1.3345 +  __ call_VM(noreg,
  1.3346 +             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
  1.3347 +             c_rarg1, r13);
  1.3348 +
  1.3349 +  // complete the execution of original bytecode
  1.3350 +  __ dispatch_only_normal(vtos);
  1.3351 +}
  1.3352 +
  1.3353 +//-----------------------------------------------------------------------------
  1.3354 +// Exceptions
  1.3355 +
  1.3356 +void TemplateTable::athrow() {
  1.3357 +  transition(atos, vtos);
  1.3358 +  __ null_check(rax);
  1.3359 +  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
  1.3360 +}
  1.3361 +
  1.3362 +//-----------------------------------------------------------------------------
  1.3363 +// Synchronization
  1.3364 +//
  1.3365 +// Note: monitorenter & exit are symmetric routines; which is reflected
  1.3366 +//       in the assembly code structure as well
  1.3367 +//
  1.3368 +// Stack layout:
  1.3369 +//
  1.3370 +// [expressions  ] <--- rsp               = expression stack top
  1.3371 +// ..
  1.3372 +// [expressions  ]
  1.3373 +// [monitor entry] <--- monitor block top = expression stack bot
  1.3374 +// ..
  1.3375 +// [monitor entry]
  1.3376 +// [frame data   ] <--- monitor block bot
  1.3377 +// ...
  1.3378 +// [saved rbp    ] <--- rbp
  1.3379 +void TemplateTable::monitorenter() {
  1.3380 +  transition(atos, vtos);
  1.3381 +
  1.3382 +  // check for NULL object
  1.3383 +  __ null_check(rax);
  1.3384 +
  1.3385 +  const Address monitor_block_top(
  1.3386 +        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3387 +  const Address monitor_block_bot(
  1.3388 +        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
  1.3389 +  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
  1.3390 +
  1.3391 +  Label allocated;
  1.3392 +
  1.3393 +  // initialize entry pointer
  1.3394 +  __ xorl(c_rarg1, c_rarg1); // points to free slot or NULL
  1.3395 +
  1.3396 +  // find a free slot in the monitor block (result in c_rarg1)
  1.3397 +  {
  1.3398 +    Label entry, loop, exit;
  1.3399 +    __ movq(c_rarg3, monitor_block_top); // points to current entry,
  1.3400 +                                     // starting with top-most entry
  1.3401 +    __ leaq(c_rarg2, monitor_block_bot); // points to word before bottom
  1.3402 +                                     // of monitor block
  1.3403 +    __ jmpb(entry);
  1.3404 +
  1.3405 +    __ bind(loop);
  1.3406 +    // check if current entry is used
  1.3407 +    __ cmpq(Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()), (int) NULL);
  1.3408 +    // if not used then remember entry in c_rarg1
  1.3409 +    __ cmovq(Assembler::equal, c_rarg1, c_rarg3);
  1.3410 +    // check if current entry is for same object
  1.3411 +    __ cmpq(rax, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
  1.3412 +    // if same object then stop searching
  1.3413 +    __ jccb(Assembler::equal, exit);
  1.3414 +    // otherwise advance to next entry
  1.3415 +    __ addq(c_rarg3, entry_size);
  1.3416 +    __ bind(entry);
  1.3417 +    // check if bottom reached
  1.3418 +    __ cmpq(c_rarg3, c_rarg2);
  1.3419 +    // if not at bottom then check this entry
  1.3420 +    __ jcc(Assembler::notEqual, loop);
  1.3421 +    __ bind(exit);
  1.3422 +  }
  1.3423 +
  1.3424 +  __ testq(c_rarg1, c_rarg1); // check if a slot has been found
  1.3425 +  __ jcc(Assembler::notZero, allocated); // if found, continue with that one
  1.3426 +
  1.3427 +  // allocate one if there's no free slot
  1.3428 +  {
  1.3429 +    Label entry, loop;
  1.3430 +    // 1. compute new pointers       // rsp: old expression stack top
  1.3431 +    __ movq(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom
  1.3432 +    __ subq(rsp, entry_size);        // move expression stack top
  1.3433 +    __ subq(c_rarg1, entry_size);        // move expression stack bottom
  1.3434 +    __ movq(c_rarg3, rsp);               // set start value for copy loop
  1.3435 +    __ movq(monitor_block_bot, c_rarg1); // set new monitor block bottom
  1.3436 +    __ jmp(entry);
  1.3437 +    // 2. move expression stack contents
  1.3438 +    __ bind(loop);
  1.3439 +    __ movq(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
  1.3440 +                                            // word from old location
  1.3441 +    __ movq(Address(c_rarg3, 0), c_rarg2);  // and store it at new location
  1.3442 +    __ addq(c_rarg3, wordSize);             // advance to next word
  1.3443 +    __ bind(entry);
  1.3444 +    __ cmpq(c_rarg3, c_rarg1);              // check if bottom reached
  1.3445 +    __ jcc(Assembler::notEqual, loop);      // if not at bottom then
  1.3446 +                                            // copy next word
  1.3447 +  }
  1.3448 +
  1.3449 +  // call run-time routine
  1.3450 +  // c_rarg1: points to monitor entry
  1.3451 +  __ bind(allocated);
  1.3452 +
  1.3453 +  // Increment bcp to point to the next bytecode, so exception
  1.3454 +  // handling for async. exceptions work correctly.
  1.3455 +  // The object has already been poped from the stack, so the
  1.3456 +  // expression stack looks correct.
  1.3457 +  __ incrementq(r13);
  1.3458 +
  1.3459 +  // store object
  1.3460 +  __ movq(Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()), rax);
  1.3461 +  __ lock_object(c_rarg1);
  1.3462 +
  1.3463 +  // check to make sure this monitor doesn't cause stack overflow after locking
  1.3464 +  __ save_bcp();  // in case of exception
  1.3465 +  __ generate_stack_overflow_check(0);
  1.3466 +
  1.3467 +  // The bcp has already been incremented. Just need to dispatch to
  1.3468 +  // next instruction.
  1.3469 +  __ dispatch_next(vtos);
  1.3470 +}
  1.3471 +
  1.3472 +
  1.3473 +void TemplateTable::monitorexit() {
  1.3474 +  transition(atos, vtos);
  1.3475 +
  1.3476 +  // check for NULL object
  1.3477 +  __ null_check(rax);
  1.3478 +
  1.3479 +  const Address monitor_block_top(
  1.3480 +        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3481 +  const Address monitor_block_bot(
  1.3482 +        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
  1.3483 +  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
  1.3484 +
  1.3485 +  Label found;
  1.3486 +
  1.3487 +  // find matching slot
  1.3488 +  {
  1.3489 +    Label entry, loop;
  1.3490 +    __ movq(c_rarg1, monitor_block_top); // points to current entry,
  1.3491 +                                     // starting with top-most entry
  1.3492 +    __ leaq(c_rarg2, monitor_block_bot); // points to word before bottom
  1.3493 +                                     // of monitor block
  1.3494 +    __ jmpb(entry);
  1.3495 +
  1.3496 +    __ bind(loop);
  1.3497 +    // check if current entry is for same object
  1.3498 +    __ cmpq(rax, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
  1.3499 +    // if same object then stop searching
  1.3500 +    __ jcc(Assembler::equal, found);
  1.3501 +    // otherwise advance to next entry
  1.3502 +    __ addq(c_rarg1, entry_size);
  1.3503 +    __ bind(entry);
  1.3504 +    // check if bottom reached
  1.3505 +    __ cmpq(c_rarg1, c_rarg2);
  1.3506 +    // if not at bottom then check this entry
  1.3507 +    __ jcc(Assembler::notEqual, loop);
  1.3508 +  }
  1.3509 +
  1.3510 +  // error handling. Unlocking was not block-structured
  1.3511 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.3512 +                   InterpreterRuntime::throw_illegal_monitor_state_exception));
  1.3513 +  __ should_not_reach_here();
  1.3514 +
  1.3515 +  // call run-time routine
  1.3516 +  // rsi: points to monitor entry
  1.3517 +  __ bind(found);
  1.3518 +  __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
  1.3519 +  __ unlock_object(c_rarg1);
  1.3520 +  __ pop_ptr(rax); // discard object
  1.3521 +}
  1.3522 +
  1.3523 +
  1.3524 +// Wide instructions
  1.3525 +void TemplateTable::wide() {
  1.3526 +  transition(vtos, vtos);
  1.3527 +  __ load_unsigned_byte(rbx, at_bcp(1));
  1.3528 +  __ lea(rscratch1, ExternalAddress((address)Interpreter::_wentry_point));
  1.3529 +  __ jmp(Address(rscratch1, rbx, Address::times_8));
  1.3530 +  // Note: the r13 increment step is part of the individual wide
  1.3531 +  // bytecode implementations
  1.3532 +}
  1.3533 +
  1.3534 +
  1.3535 +// Multi arrays
  1.3536 +void TemplateTable::multianewarray() {
  1.3537 +  transition(vtos, atos);
  1.3538 +  __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
  1.3539 +  // last dim is on top of stack; we want address of first one:
  1.3540 +  // first_addr = last_addr + (ndims - 1) * wordSize
  1.3541 +  if (TaggedStackInterpreter) __ shll(rax, 1);  // index*2
  1.3542 +  __ leaq(c_rarg1, Address(rsp, rax, Address::times_8, -wordSize));
  1.3543 +  call_VM(rax,
  1.3544 +          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
  1.3545 +          c_rarg1);
  1.3546 +  __ load_unsigned_byte(rbx, at_bcp(3));
  1.3547 +  if (TaggedStackInterpreter) __ shll(rbx, 1);  // index*2
  1.3548 +  __ leaq(rsp, Address(rsp, rbx, Address::times_8));
  1.3549 +}

mercurial