src/cpu/x86/vm/templateTable_x86_32.cpp

changeset 435
a61af66fc99e
child 542
93b6525e3b82
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Sat Dec 01 00:00:00 2007 +0000
     1.3 @@ -0,0 +1,3499 @@
     1.4 +/*
     1.5 + * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "incls/_precompiled.incl"
    1.29 +#include "incls/_templateTable_x86_32.cpp.incl"
    1.30 +
    1.31 +#ifndef CC_INTERP
    1.32 +#define __ _masm->
    1.33 +
    1.34 +//----------------------------------------------------------------------------------------------------
    1.35 +// Platform-dependent initialization
    1.36 +
    1.37 +void TemplateTable::pd_initialize() {
    1.38 +  // No i486 specific initialization
    1.39 +}
    1.40 +
    1.41 +//----------------------------------------------------------------------------------------------------
    1.42 +// Address computation
    1.43 +
    1.44 +// local variables
    1.45 +static inline Address iaddress(int n)            {
    1.46 +  return Address(rdi, Interpreter::local_offset_in_bytes(n));
    1.47 +}
    1.48 +
    1.49 +static inline Address laddress(int n)            { return iaddress(n + 1); }
    1.50 +static inline Address haddress(int n)            { return iaddress(n + 0); }
    1.51 +static inline Address faddress(int n)            { return iaddress(n); }
    1.52 +static inline Address daddress(int n)            { return laddress(n); }
    1.53 +static inline Address aaddress(int n)            { return iaddress(n); }
    1.54 +
    1.55 +static inline Address iaddress(Register r)       {
    1.56 +  return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::value_offset_in_bytes());
    1.57 +}
    1.58 +static inline Address laddress(Register r)       {
    1.59 +  return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(1));
    1.60 +}
    1.61 +static inline Address haddress(Register r)       {
    1.62 +  return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0));
    1.63 +}
    1.64 +
    1.65 +static inline Address faddress(Register r)       { return iaddress(r); };
    1.66 +static inline Address daddress(Register r)       {
    1.67 +  assert(!TaggedStackInterpreter, "This doesn't work");
    1.68 +  return laddress(r);
    1.69 +};
    1.70 +static inline Address aaddress(Register r)       { return iaddress(r); };
    1.71 +
    1.72 +// expression stack
    1.73 +// (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
    1.74 +// data beyond the rsp which is potentially unsafe in an MT environment;
    1.75 +// an interrupt may overwrite that data.)
    1.76 +static inline Address at_rsp   () {
    1.77 +  return Address(rsp, 0);
    1.78 +}
    1.79 +
    1.80 +// At top of Java expression stack which may be different than rsp().  It
    1.81 +// isn't for category 1 objects.
    1.82 +static inline Address at_tos   () {
    1.83 +  Address tos = Address(rsp,  Interpreter::expr_offset_in_bytes(0));
    1.84 +  return tos;
    1.85 +}
    1.86 +
    1.87 +static inline Address at_tos_p1() {
    1.88 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
    1.89 +}
    1.90 +
    1.91 +static inline Address at_tos_p2() {
    1.92 +  return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
    1.93 +}
    1.94 +
    1.95 +// Condition conversion
    1.96 +static Assembler::Condition j_not(TemplateTable::Condition cc) {
    1.97 +  switch (cc) {
    1.98 +    case TemplateTable::equal        : return Assembler::notEqual;
    1.99 +    case TemplateTable::not_equal    : return Assembler::equal;
   1.100 +    case TemplateTable::less         : return Assembler::greaterEqual;
   1.101 +    case TemplateTable::less_equal   : return Assembler::greater;
   1.102 +    case TemplateTable::greater      : return Assembler::lessEqual;
   1.103 +    case TemplateTable::greater_equal: return Assembler::less;
   1.104 +  }
   1.105 +  ShouldNotReachHere();
   1.106 +  return Assembler::zero;
   1.107 +}
   1.108 +
   1.109 +
   1.110 +//----------------------------------------------------------------------------------------------------
   1.111 +// Miscelaneous helper routines
   1.112 +
   1.113 +Address TemplateTable::at_bcp(int offset) {
   1.114 +  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   1.115 +  return Address(rsi, offset);
   1.116 +}
   1.117 +
   1.118 +
   1.119 +void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
   1.120 +                                   Register scratch,
   1.121 +                                   bool load_bc_into_scratch/*=true*/) {
   1.122 +
   1.123 +  if (!RewriteBytecodes) return;
   1.124 +  // the pair bytecodes have already done the load.
   1.125 +  if (load_bc_into_scratch) __ movl(bc, bytecode);
   1.126 +  Label patch_done;
   1.127 +  if (JvmtiExport::can_post_breakpoint()) {
   1.128 +    Label fast_patch;
   1.129 +    // if a breakpoint is present we can't rewrite the stream directly
   1.130 +    __ movzxb(scratch, at_bcp(0));
   1.131 +    __ cmpl(scratch, Bytecodes::_breakpoint);
   1.132 +    __ jcc(Assembler::notEqual, fast_patch);
   1.133 +    __ get_method(scratch);
   1.134 +    // Let breakpoint table handling rewrite to quicker bytecode
   1.135 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, rsi, bc);
   1.136 +#ifndef ASSERT
   1.137 +    __ jmpb(patch_done);
   1.138 +    __ bind(fast_patch);
   1.139 +  }
   1.140 +#else
   1.141 +    __ jmp(patch_done);
   1.142 +    __ bind(fast_patch);
   1.143 +  }
   1.144 +  Label okay;
   1.145 +  __ load_unsigned_byte(scratch, at_bcp(0));
   1.146 +  __ cmpl(scratch, (int)Bytecodes::java_code(bytecode));
   1.147 +  __ jccb(Assembler::equal, okay);
   1.148 +  __ cmpl(scratch, bc);
   1.149 +  __ jcc(Assembler::equal, okay);
   1.150 +  __ stop("patching the wrong bytecode");
   1.151 +  __ bind(okay);
   1.152 +#endif
   1.153 +  // patch bytecode
   1.154 +  __ movb(at_bcp(0), bc);
   1.155 +  __ bind(patch_done);
   1.156 +}
   1.157 +
   1.158 +//----------------------------------------------------------------------------------------------------
   1.159 +// Individual instructions
   1.160 +
   1.161 +void TemplateTable::nop() {
   1.162 +  transition(vtos, vtos);
   1.163 +  // nothing to do
   1.164 +}
   1.165 +
   1.166 +void TemplateTable::shouldnotreachhere() {
   1.167 +  transition(vtos, vtos);
   1.168 +  __ stop("shouldnotreachhere bytecode");
   1.169 +}
   1.170 +
   1.171 +
   1.172 +
   1.173 +void TemplateTable::aconst_null() {
   1.174 +  transition(vtos, atos);
   1.175 +  __ xorl(rax, rax);
   1.176 +}
   1.177 +
   1.178 +
   1.179 +void TemplateTable::iconst(int value) {
   1.180 +  transition(vtos, itos);
   1.181 +  if (value == 0) {
   1.182 +    __ xorl(rax, rax);
   1.183 +  } else {
   1.184 +    __ movl(rax, value);
   1.185 +  }
   1.186 +}
   1.187 +
   1.188 +
   1.189 +void TemplateTable::lconst(int value) {
   1.190 +  transition(vtos, ltos);
   1.191 +  if (value == 0) {
   1.192 +    __ xorl(rax, rax);
   1.193 +  } else {
   1.194 +    __ movl(rax, value);
   1.195 +  }
   1.196 +  assert(value >= 0, "check this code");
   1.197 +  __ xorl(rdx, rdx);
   1.198 +}
   1.199 +
   1.200 +
   1.201 +void TemplateTable::fconst(int value) {
   1.202 +  transition(vtos, ftos);
   1.203 +         if (value == 0) { __ fldz();
   1.204 +  } else if (value == 1) { __ fld1();
   1.205 +  } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
   1.206 +  } else                 { ShouldNotReachHere();
   1.207 +  }
   1.208 +}
   1.209 +
   1.210 +
   1.211 +void TemplateTable::dconst(int value) {
   1.212 +  transition(vtos, dtos);
   1.213 +         if (value == 0) { __ fldz();
   1.214 +  } else if (value == 1) { __ fld1();
   1.215 +  } else                 { ShouldNotReachHere();
   1.216 +  }
   1.217 +}
   1.218 +
   1.219 +
   1.220 +void TemplateTable::bipush() {
   1.221 +  transition(vtos, itos);
   1.222 +  __ load_signed_byte(rax, at_bcp(1));
   1.223 +}
   1.224 +
   1.225 +
   1.226 +void TemplateTable::sipush() {
   1.227 +  transition(vtos, itos);
   1.228 +  __ load_unsigned_word(rax, at_bcp(1));
   1.229 +  __ bswap(rax);
   1.230 +  __ sarl(rax, 16);
   1.231 +}
   1.232 +
   1.233 +void TemplateTable::ldc(bool wide) {
   1.234 +  transition(vtos, vtos);
   1.235 +  Label call_ldc, notFloat, notClass, Done;
   1.236 +
   1.237 +  if (wide) {
   1.238 +    __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.239 +  } else {
   1.240 +    __ load_unsigned_byte(rbx, at_bcp(1));
   1.241 +  }
   1.242 +  __ get_cpool_and_tags(rcx, rax);
   1.243 +  const int base_offset = constantPoolOopDesc::header_size() * wordSize;
   1.244 +  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
   1.245 +
   1.246 +  // get type
   1.247 +  __ xorl(rdx, rdx);
   1.248 +  __ movb(rdx, Address(rax, rbx, Address::times_1, tags_offset));
   1.249 +
   1.250 +  // unresolved string - get the resolved string
   1.251 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedString);
   1.252 +  __ jccb(Assembler::equal, call_ldc);
   1.253 +
   1.254 +  // unresolved class - get the resolved class
   1.255 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
   1.256 +  __ jccb(Assembler::equal, call_ldc);
   1.257 +
   1.258 +  // unresolved class in error (resolution failed) - call into runtime
   1.259 +  // so that the same error from first resolution attempt is thrown.
   1.260 +  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
   1.261 +  __ jccb(Assembler::equal, call_ldc);
   1.262 +
   1.263 +  // resolved class - need to call vm to get java mirror of the class
   1.264 +  __ cmpl(rdx, JVM_CONSTANT_Class);
   1.265 +  __ jcc(Assembler::notEqual, notClass);
   1.266 +
   1.267 +  __ bind(call_ldc);
   1.268 +  __ movl(rcx, wide);
   1.269 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rcx);
   1.270 +  __ push(atos);
   1.271 +  __ jmp(Done);
   1.272 +
   1.273 +  __ bind(notClass);
   1.274 +  __ cmpl(rdx, JVM_CONSTANT_Float);
   1.275 +  __ jccb(Assembler::notEqual, notFloat);
   1.276 +  // ftos
   1.277 +  __ fld_s(    Address(rcx, rbx, Address::times_4, base_offset));
   1.278 +  __ push(ftos);
   1.279 +  __ jmp(Done);
   1.280 +
   1.281 +  __ bind(notFloat);
   1.282 +#ifdef ASSERT
   1.283 +  { Label L;
   1.284 +    __ cmpl(rdx, JVM_CONSTANT_Integer);
   1.285 +    __ jcc(Assembler::equal, L);
   1.286 +    __ cmpl(rdx, JVM_CONSTANT_String);
   1.287 +    __ jcc(Assembler::equal, L);
   1.288 +    __ stop("unexpected tag type in ldc");
   1.289 +    __ bind(L);
   1.290 +  }
   1.291 +#endif
   1.292 +  Label isOop;
   1.293 +  // atos and itos
   1.294 +  __ movl(rax, Address(rcx, rbx, Address::times_4, base_offset));
   1.295 +  // String is only oop type we will see here
   1.296 +  __ cmpl(rdx, JVM_CONSTANT_String);
   1.297 +  __ jccb(Assembler::equal, isOop);
   1.298 +  __ push(itos);
   1.299 +  __ jmp(Done);
   1.300 +  __ bind(isOop);
   1.301 +  __ push(atos);
   1.302 +
   1.303 +  if (VerifyOops) {
   1.304 +    __ verify_oop(rax);
   1.305 +  }
   1.306 +  __ bind(Done);
   1.307 +}
   1.308 +
   1.309 +void TemplateTable::ldc2_w() {
   1.310 +  transition(vtos, vtos);
   1.311 +  Label Long, Done;
   1.312 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
   1.313 +
   1.314 +  __ get_cpool_and_tags(rcx, rax);
   1.315 +  const int base_offset = constantPoolOopDesc::header_size() * wordSize;
   1.316 +  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
   1.317 +
   1.318 +  // get type
   1.319 +  __ cmpb(Address(rax, rbx, Address::times_1, tags_offset), JVM_CONSTANT_Double);
   1.320 +  __ jccb(Assembler::notEqual, Long);
   1.321 +  // dtos
   1.322 +  __ fld_d(    Address(rcx, rbx, Address::times_4, base_offset));
   1.323 +  __ push(dtos);
   1.324 +  __ jmpb(Done);
   1.325 +
   1.326 +  __ bind(Long);
   1.327 +  // ltos
   1.328 +  __ movl(rax, Address(rcx, rbx, Address::times_4, base_offset + 0 * wordSize));
   1.329 +  __ movl(rdx, Address(rcx, rbx, Address::times_4, base_offset + 1 * wordSize));
   1.330 +
   1.331 +  __ push(ltos);
   1.332 +
   1.333 +  __ bind(Done);
   1.334 +}
   1.335 +
   1.336 +
   1.337 +void TemplateTable::locals_index(Register reg, int offset) {
   1.338 +  __ load_unsigned_byte(reg, at_bcp(offset));
   1.339 +  __ negl(reg);
   1.340 +}
   1.341 +
   1.342 +
   1.343 +void TemplateTable::iload() {
   1.344 +  transition(vtos, itos);
   1.345 +  if (RewriteFrequentPairs) {
   1.346 +    Label rewrite, done;
   1.347 +
   1.348 +    // get next byte
   1.349 +    __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
   1.350 +    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
   1.351 +    // last two iloads in a pair.  Comparing against fast_iload means that
   1.352 +    // the next bytecode is neither an iload or a caload, and therefore
   1.353 +    // an iload pair.
   1.354 +    __ cmpl(rbx, Bytecodes::_iload);
   1.355 +    __ jcc(Assembler::equal, done);
   1.356 +
   1.357 +    __ cmpl(rbx, Bytecodes::_fast_iload);
   1.358 +    __ movl(rcx, Bytecodes::_fast_iload2);
   1.359 +    __ jccb(Assembler::equal, rewrite);
   1.360 +
   1.361 +    // if _caload, rewrite to fast_icaload
   1.362 +    __ cmpl(rbx, Bytecodes::_caload);
   1.363 +    __ movl(rcx, Bytecodes::_fast_icaload);
   1.364 +    __ jccb(Assembler::equal, rewrite);
   1.365 +
   1.366 +    // rewrite so iload doesn't check again.
   1.367 +    __ movl(rcx, Bytecodes::_fast_iload);
   1.368 +
   1.369 +    // rewrite
   1.370 +    // rcx: fast bytecode
   1.371 +    __ bind(rewrite);
   1.372 +    patch_bytecode(Bytecodes::_iload, rcx, rbx, false);
   1.373 +    __ bind(done);
   1.374 +  }
   1.375 +
   1.376 +  // Get the local value into tos
   1.377 +  locals_index(rbx);
   1.378 +  __ movl(rax, iaddress(rbx));
   1.379 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.380 +}
   1.381 +
   1.382 +
   1.383 +void TemplateTable::fast_iload2() {
   1.384 +  transition(vtos, itos);
   1.385 +  locals_index(rbx);
   1.386 +  __ movl(rax, iaddress(rbx));
   1.387 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.388 +  __ push(itos);
   1.389 +  locals_index(rbx, 3);
   1.390 +  __ movl(rax, iaddress(rbx));
   1.391 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.392 +}
   1.393 +
   1.394 +void TemplateTable::fast_iload() {
   1.395 +  transition(vtos, itos);
   1.396 +  locals_index(rbx);
   1.397 +  __ movl(rax, iaddress(rbx));
   1.398 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.399 +}
   1.400 +
   1.401 +
   1.402 +void TemplateTable::lload() {
   1.403 +  transition(vtos, ltos);
   1.404 +  locals_index(rbx);
   1.405 +  __ movl(rax, laddress(rbx));
   1.406 +  __ movl(rdx, haddress(rbx));
   1.407 +  debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.408 +}
   1.409 +
   1.410 +
   1.411 +void TemplateTable::fload() {
   1.412 +  transition(vtos, ftos);
   1.413 +  locals_index(rbx);
   1.414 +  __ fld_s(faddress(rbx));
   1.415 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.416 +}
   1.417 +
   1.418 +
   1.419 +void TemplateTable::dload() {
   1.420 +  transition(vtos, dtos);
   1.421 +  locals_index(rbx);
   1.422 +  if (TaggedStackInterpreter) {
   1.423 +    // Get double out of locals array, onto temp stack and load with
   1.424 +    // float instruction into ST0
   1.425 +    __ movl(rax, laddress(rbx));
   1.426 +    __ movl(rdx, haddress(rbx));
   1.427 +    __ pushl(rdx);  // push hi first
   1.428 +    __ pushl(rax);
   1.429 +    __ fld_d(Address(rsp, 0));
   1.430 +    __ addl(rsp, 2*wordSize);
   1.431 +    debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.432 +  } else {
   1.433 +    __ fld_d(daddress(rbx));
   1.434 +  }
   1.435 +}
   1.436 +
   1.437 +
   1.438 +void TemplateTable::aload() {
   1.439 +  transition(vtos, atos);
   1.440 +  locals_index(rbx);
   1.441 +  __ movl(rax, iaddress(rbx));
   1.442 +  debug_only(__ verify_local_tag(frame::TagReference, rbx));
   1.443 +}
   1.444 +
   1.445 +
   1.446 +void TemplateTable::locals_index_wide(Register reg) {
   1.447 +  __ movl(reg, at_bcp(2));
   1.448 +  __ bswap(reg);
   1.449 +  __ shrl(reg, 16);
   1.450 +  __ negl(reg);
   1.451 +}
   1.452 +
   1.453 +
   1.454 +void TemplateTable::wide_iload() {
   1.455 +  transition(vtos, itos);
   1.456 +  locals_index_wide(rbx);
   1.457 +  __ movl(rax, iaddress(rbx));
   1.458 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.459 +}
   1.460 +
   1.461 +
   1.462 +void TemplateTable::wide_lload() {
   1.463 +  transition(vtos, ltos);
   1.464 +  locals_index_wide(rbx);
   1.465 +  __ movl(rax, laddress(rbx));
   1.466 +  __ movl(rdx, haddress(rbx));
   1.467 +  debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.468 +}
   1.469 +
   1.470 +
   1.471 +void TemplateTable::wide_fload() {
   1.472 +  transition(vtos, ftos);
   1.473 +  locals_index_wide(rbx);
   1.474 +  __ fld_s(faddress(rbx));
   1.475 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.476 +}
   1.477 +
   1.478 +
   1.479 +void TemplateTable::wide_dload() {
   1.480 +  transition(vtos, dtos);
   1.481 +  locals_index_wide(rbx);
   1.482 +  if (TaggedStackInterpreter) {
   1.483 +    // Get double out of locals array, onto temp stack and load with
   1.484 +    // float instruction into ST0
   1.485 +    __ movl(rax, laddress(rbx));
   1.486 +    __ movl(rdx, haddress(rbx));
   1.487 +    __ pushl(rdx);  // push hi first
   1.488 +    __ pushl(rax);
   1.489 +    __ fld_d(Address(rsp, 0));
   1.490 +    __ addl(rsp, 2*wordSize);
   1.491 +    debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   1.492 +  } else {
   1.493 +    __ fld_d(daddress(rbx));
   1.494 +  }
   1.495 +}
   1.496 +
   1.497 +
   1.498 +void TemplateTable::wide_aload() {
   1.499 +  transition(vtos, atos);
   1.500 +  locals_index_wide(rbx);
   1.501 +  __ movl(rax, iaddress(rbx));
   1.502 +  debug_only(__ verify_local_tag(frame::TagReference, rbx));
   1.503 +}
   1.504 +
   1.505 +void TemplateTable::index_check(Register array, Register index) {
   1.506 +  // Pop ptr into array
   1.507 +  __ pop_ptr(array);
   1.508 +  index_check_without_pop(array, index);
   1.509 +}
   1.510 +
   1.511 +void TemplateTable::index_check_without_pop(Register array, Register index) {
   1.512 +  // destroys rbx,
   1.513 +  // check array
   1.514 +  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
   1.515 +  // check index
   1.516 +  __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
   1.517 +  if (index != rbx) {
   1.518 +    // ??? convention: move aberrant index into rbx, for exception message
   1.519 +    assert(rbx != array, "different registers");
   1.520 +    __ movl(rbx, index);
   1.521 +  }
   1.522 +  __ jump_cc(Assembler::aboveEqual,
   1.523 +             ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
   1.524 +}
   1.525 +
   1.526 +
   1.527 +void TemplateTable::iaload() {
   1.528 +  transition(itos, itos);
   1.529 +  // rdx: array
   1.530 +  index_check(rdx, rax);  // kills rbx,
   1.531 +  // rax,: index
   1.532 +  __ movl(rax, Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_INT)));
   1.533 +}
   1.534 +
   1.535 +
   1.536 +void TemplateTable::laload() {
   1.537 +  transition(itos, ltos);
   1.538 +  // rax,: index
   1.539 +  // rdx: array
   1.540 +  index_check(rdx, rax);
   1.541 +  __ movl(rbx, rax);
   1.542 +  // rbx,: index
   1.543 +  __ movl(rax, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize));
   1.544 +  __ movl(rdx, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize));
   1.545 +}
   1.546 +
   1.547 +
   1.548 +void TemplateTable::faload() {
   1.549 +  transition(itos, ftos);
   1.550 +  // rdx: array
   1.551 +  index_check(rdx, rax);  // kills rbx,
   1.552 +  // rax,: index
   1.553 +  __ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
   1.554 +}
   1.555 +
   1.556 +
   1.557 +void TemplateTable::daload() {
   1.558 +  transition(itos, dtos);
   1.559 +  // rdx: array
   1.560 +  index_check(rdx, rax);  // kills rbx,
   1.561 +  // rax,: index
   1.562 +  __ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
   1.563 +}
   1.564 +
   1.565 +
   1.566 +void TemplateTable::aaload() {
   1.567 +  transition(itos, atos);
   1.568 +  // rdx: array
   1.569 +  index_check(rdx, rax);  // kills rbx,
   1.570 +  // rax,: index
   1.571 +  __ movl(rax, Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.572 +}
   1.573 +
   1.574 +
   1.575 +void TemplateTable::baload() {
   1.576 +  transition(itos, itos);
   1.577 +  // rdx: array
   1.578 +  index_check(rdx, rax);  // kills rbx,
   1.579 +  // rax,: index
   1.580 +  // can do better code for P5 - fix this at some point
   1.581 +  __ load_signed_byte(rbx, Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
   1.582 +  __ movl(rax, rbx);
   1.583 +}
   1.584 +
   1.585 +
   1.586 +void TemplateTable::caload() {
   1.587 +  transition(itos, itos);
   1.588 +  // rdx: array
   1.589 +  index_check(rdx, rax);  // kills rbx,
   1.590 +  // rax,: index
   1.591 +  // can do better code for P5 - may want to improve this at some point
   1.592 +  __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.593 +  __ movl(rax, rbx);
   1.594 +}
   1.595 +
   1.596 +// iload followed by caload frequent pair
   1.597 +void TemplateTable::fast_icaload() {
   1.598 +  transition(vtos, itos);
   1.599 +  // load index out of locals
   1.600 +  locals_index(rbx);
   1.601 +  __ movl(rax, iaddress(rbx));
   1.602 +  debug_only(__ verify_local_tag(frame::TagValue, rbx));
   1.603 +
   1.604 +  // rdx: array
   1.605 +  index_check(rdx, rax);
   1.606 +  // rax,: index
   1.607 +  __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   1.608 +  __ movl(rax, rbx);
   1.609 +}
   1.610 +
   1.611 +void TemplateTable::saload() {
   1.612 +  transition(itos, itos);
   1.613 +  // rdx: array
   1.614 +  index_check(rdx, rax);  // kills rbx,
   1.615 +  // rax,: index
   1.616 +  // can do better code for P5 - may want to improve this at some point
   1.617 +  __ load_signed_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
   1.618 +  __ movl(rax, rbx);
   1.619 +}
   1.620 +
   1.621 +
   1.622 +void TemplateTable::iload(int n) {
   1.623 +  transition(vtos, itos);
   1.624 +  __ movl(rax, iaddress(n));
   1.625 +  debug_only(__ verify_local_tag(frame::TagValue, n));
   1.626 +}
   1.627 +
   1.628 +
   1.629 +void TemplateTable::lload(int n) {
   1.630 +  transition(vtos, ltos);
   1.631 +  __ movl(rax, laddress(n));
   1.632 +  __ movl(rdx, haddress(n));
   1.633 +  debug_only(__ verify_local_tag(frame::TagCategory2, n));
   1.634 +}
   1.635 +
   1.636 +
   1.637 +void TemplateTable::fload(int n) {
   1.638 +  transition(vtos, ftos);
   1.639 +  __ fld_s(faddress(n));
   1.640 +  debug_only(__ verify_local_tag(frame::TagValue, n));
   1.641 +}
   1.642 +
   1.643 +
   1.644 +void TemplateTable::dload(int n) {
   1.645 +  transition(vtos, dtos);
   1.646 +  if (TaggedStackInterpreter) {
   1.647 +    // Get double out of locals array, onto temp stack and load with
   1.648 +    // float instruction into ST0
   1.649 +    __ movl(rax, laddress(n));
   1.650 +    __ movl(rdx, haddress(n));
   1.651 +    __ pushl(rdx);  // push hi first
   1.652 +    __ pushl(rax);
   1.653 +    __ fld_d(Address(rsp, 0));
   1.654 +    __ addl(rsp, 2*wordSize);  // reset rsp
   1.655 +    debug_only(__ verify_local_tag(frame::TagCategory2, n));
   1.656 +  } else {
   1.657 +    __ fld_d(daddress(n));
   1.658 +  }
   1.659 +}
   1.660 +
   1.661 +
   1.662 +void TemplateTable::aload(int n) {
   1.663 +  transition(vtos, atos);
   1.664 +  __ movl(rax, aaddress(n));
   1.665 +  debug_only(__ verify_local_tag(frame::TagReference, n));
   1.666 +}
   1.667 +
   1.668 +
   1.669 +void TemplateTable::aload_0() {
   1.670 +  transition(vtos, atos);
   1.671 +  // According to bytecode histograms, the pairs:
   1.672 +  //
   1.673 +  // _aload_0, _fast_igetfield
   1.674 +  // _aload_0, _fast_agetfield
   1.675 +  // _aload_0, _fast_fgetfield
   1.676 +  //
   1.677 +  // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
   1.678 +  // bytecode checks if the next bytecode is either _fast_igetfield,
   1.679 +  // _fast_agetfield or _fast_fgetfield and then rewrites the
   1.680 +  // current bytecode into a pair bytecode; otherwise it rewrites the current
   1.681 +  // bytecode into _fast_aload_0 that doesn't do the pair check anymore.
   1.682 +  //
   1.683 +  // Note: If the next bytecode is _getfield, the rewrite must be delayed,
   1.684 +  //       otherwise we may miss an opportunity for a pair.
   1.685 +  //
   1.686 +  // Also rewrite frequent pairs
   1.687 +  //   aload_0, aload_1
   1.688 +  //   aload_0, iload_1
   1.689 +  // These bytecodes with a small amount of code are most profitable to rewrite
   1.690 +  if (RewriteFrequentPairs) {
   1.691 +    Label rewrite, done;
   1.692 +    // get next byte
   1.693 +    __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
   1.694 +
   1.695 +    // do actual aload_0
   1.696 +    aload(0);
   1.697 +
   1.698 +    // if _getfield then wait with rewrite
   1.699 +    __ cmpl(rbx, Bytecodes::_getfield);
   1.700 +    __ jcc(Assembler::equal, done);
   1.701 +
   1.702 +    // if _igetfield then reqrite to _fast_iaccess_0
   1.703 +    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.704 +    __ cmpl(rbx, Bytecodes::_fast_igetfield);
   1.705 +    __ movl(rcx, Bytecodes::_fast_iaccess_0);
   1.706 +    __ jccb(Assembler::equal, rewrite);
   1.707 +
   1.708 +    // if _agetfield then reqrite to _fast_aaccess_0
   1.709 +    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.710 +    __ cmpl(rbx, Bytecodes::_fast_agetfield);
   1.711 +    __ movl(rcx, Bytecodes::_fast_aaccess_0);
   1.712 +    __ jccb(Assembler::equal, rewrite);
   1.713 +
   1.714 +    // if _fgetfield then reqrite to _fast_faccess_0
   1.715 +    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.716 +    __ cmpl(rbx, Bytecodes::_fast_fgetfield);
   1.717 +    __ movl(rcx, Bytecodes::_fast_faccess_0);
   1.718 +    __ jccb(Assembler::equal, rewrite);
   1.719 +
   1.720 +    // else rewrite to _fast_aload0
   1.721 +    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
   1.722 +    __ movl(rcx, Bytecodes::_fast_aload_0);
   1.723 +
   1.724 +    // rewrite
   1.725 +    // rcx: fast bytecode
   1.726 +    __ bind(rewrite);
   1.727 +    patch_bytecode(Bytecodes::_aload_0, rcx, rbx, false);
   1.728 +
   1.729 +    __ bind(done);
   1.730 +  } else {
   1.731 +    aload(0);
   1.732 +  }
   1.733 +}
   1.734 +
   1.735 +void TemplateTable::istore() {
   1.736 +  transition(itos, vtos);
   1.737 +  locals_index(rbx);
   1.738 +  __ movl(iaddress(rbx), rax);
   1.739 +  __ tag_local(frame::TagValue, rbx);
   1.740 +}
   1.741 +
   1.742 +
   1.743 +void TemplateTable::lstore() {
   1.744 +  transition(ltos, vtos);
   1.745 +  locals_index(rbx);
   1.746 +  __ movl(laddress(rbx), rax);
   1.747 +  __ movl(haddress(rbx), rdx);
   1.748 +  __ tag_local(frame::TagCategory2, rbx);
   1.749 +}
   1.750 +
   1.751 +
   1.752 +void TemplateTable::fstore() {
   1.753 +  transition(ftos, vtos);
   1.754 +  locals_index(rbx);
   1.755 +  __ fstp_s(faddress(rbx));
   1.756 +  __ tag_local(frame::TagValue, rbx);
   1.757 +}
   1.758 +
   1.759 +
   1.760 +void TemplateTable::dstore() {
   1.761 +  transition(dtos, vtos);
   1.762 +  locals_index(rbx);
   1.763 +  if (TaggedStackInterpreter) {
   1.764 +    // Store double on stack and reload into locals nonadjacently
   1.765 +    __ subl(rsp, 2 * wordSize);
   1.766 +    __ fstp_d(Address(rsp, 0));
   1.767 +    __ popl(rax);
   1.768 +    __ popl(rdx);
   1.769 +    __ movl(laddress(rbx), rax);
   1.770 +    __ movl(haddress(rbx), rdx);
   1.771 +    __ tag_local(frame::TagCategory2, rbx);
   1.772 +  } else {
   1.773 +    __ fstp_d(daddress(rbx));
   1.774 +  }
   1.775 +}
   1.776 +
   1.777 +
   1.778 +void TemplateTable::astore() {
   1.779 +  transition(vtos, vtos);
   1.780 +  __ pop_ptr(rax, rdx);   // will need to pop tag too
   1.781 +  locals_index(rbx);
   1.782 +  __ movl(aaddress(rbx), rax);
   1.783 +  __ tag_local(rdx, rbx);    // need to store same tag in local may be returnAddr
   1.784 +}
   1.785 +
   1.786 +
   1.787 +void TemplateTable::wide_istore() {
   1.788 +  transition(vtos, vtos);
   1.789 +  __ pop_i(rax);
   1.790 +  locals_index_wide(rbx);
   1.791 +  __ movl(iaddress(rbx), rax);
   1.792 +  __ tag_local(frame::TagValue, rbx);
   1.793 +}
   1.794 +
   1.795 +
   1.796 +void TemplateTable::wide_lstore() {
   1.797 +  transition(vtos, vtos);
   1.798 +  __ pop_l(rax, rdx);
   1.799 +  locals_index_wide(rbx);
   1.800 +  __ movl(laddress(rbx), rax);
   1.801 +  __ movl(haddress(rbx), rdx);
   1.802 +  __ tag_local(frame::TagCategory2, rbx);
   1.803 +}
   1.804 +
   1.805 +
   1.806 +void TemplateTable::wide_fstore() {
   1.807 +  wide_istore();
   1.808 +}
   1.809 +
   1.810 +
   1.811 +void TemplateTable::wide_dstore() {
   1.812 +  wide_lstore();
   1.813 +}
   1.814 +
   1.815 +
   1.816 +void TemplateTable::wide_astore() {
   1.817 +  transition(vtos, vtos);
   1.818 +  __ pop_ptr(rax, rdx);
   1.819 +  locals_index_wide(rbx);
   1.820 +  __ movl(aaddress(rbx), rax);
   1.821 +  __ tag_local(rdx, rbx);
   1.822 +}
   1.823 +
   1.824 +
   1.825 +void TemplateTable::iastore() {
   1.826 +  transition(itos, vtos);
   1.827 +  __ pop_i(rbx);
   1.828 +  // rax,: value
   1.829 +  // rdx: array
   1.830 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.831 +  // rbx,: index
   1.832 +  __ movl(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_INT)), rax);
   1.833 +}
   1.834 +
   1.835 +
   1.836 +void TemplateTable::lastore() {
   1.837 +  transition(ltos, vtos);
   1.838 +  __ pop_i(rbx);
   1.839 +  // rax,: low(value)
   1.840 +  // rcx: array
   1.841 +  // rdx: high(value)
   1.842 +  index_check(rcx, rbx);  // prefer index in rbx,
   1.843 +  // rbx,: index
   1.844 +  __ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize), rax);
   1.845 +  __ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize), rdx);
   1.846 +}
   1.847 +
   1.848 +
   1.849 +void TemplateTable::fastore() {
   1.850 +  transition(ftos, vtos);
   1.851 +  __ pop_i(rbx);
   1.852 +  // rdx: array
   1.853 +  // st0: value
   1.854 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.855 +  // rbx,: index
   1.856 +  __ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
   1.857 +}
   1.858 +
   1.859 +
   1.860 +void TemplateTable::dastore() {
   1.861 +  transition(dtos, vtos);
   1.862 +  __ pop_i(rbx);
   1.863 +  // rdx: array
   1.864 +  // st0: value
   1.865 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.866 +  // rbx,: index
   1.867 +  __ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
   1.868 +}
   1.869 +
   1.870 +
   1.871 +void TemplateTable::aastore() {
   1.872 +  Label is_null, ok_is_subtype, done;
   1.873 +  transition(vtos, vtos);
   1.874 +  // stack: ..., array, index, value
   1.875 +  __ movl(rax, at_tos());     // Value
   1.876 +  __ movl(rcx, at_tos_p1());  // Index
   1.877 +  __ movl(rdx, at_tos_p2());  // Array
   1.878 +  index_check_without_pop(rdx, rcx);      // kills rbx,
   1.879 +  // do array store check - check for NULL value first
   1.880 +  __ testl(rax, rax);
   1.881 +  __ jcc(Assembler::zero, is_null);
   1.882 +
   1.883 +  // Move subklass into EBX
   1.884 +  __ movl(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
   1.885 +  // Move superklass into EAX
   1.886 +  __ movl(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
   1.887 +  __ movl(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
   1.888 +  // Compress array+index*4+12 into a single register.  Frees ECX.
   1.889 +  __ leal(rdx, Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.890 +
   1.891 +  // Generate subtype check.  Blows ECX.  Resets EDI to locals.
   1.892 +  // Superklass in EAX.  Subklass in EBX.
   1.893 +  __ gen_subtype_check( rbx, ok_is_subtype );
   1.894 +
   1.895 +  // Come here on failure
   1.896 +  // object is at TOS
   1.897 +  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
   1.898 +
   1.899 +  // Come here on success
   1.900 +  __ bind(ok_is_subtype);
   1.901 +  __ movl(rax, at_rsp());     // Value
   1.902 +  __ movl(Address(rdx, 0), rax);
   1.903 +  __ store_check(rdx);
   1.904 +  __ jmpb(done);
   1.905 +
   1.906 +  // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
   1.907 +  __ bind(is_null);
   1.908 +  __ profile_null_seen(rbx);
   1.909 +  __ movl(Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
   1.910 +
   1.911 +  // Pop stack arguments
   1.912 +  __ bind(done);
   1.913 +  __ addl(rsp, 3 * Interpreter::stackElementSize());
   1.914 +}
   1.915 +
   1.916 +
   1.917 +void TemplateTable::bastore() {
   1.918 +  transition(itos, vtos);
   1.919 +  __ pop_i(rbx);
   1.920 +  // rax,: value
   1.921 +  // rdx: array
   1.922 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.923 +  // rbx,: index
   1.924 +  __ movb(Address(rdx, rbx, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)), rax);
   1.925 +}
   1.926 +
   1.927 +
   1.928 +void TemplateTable::castore() {
   1.929 +  transition(itos, vtos);
   1.930 +  __ pop_i(rbx);
   1.931 +  // rax,: value
   1.932 +  // rdx: array
   1.933 +  index_check(rdx, rbx);  // prefer index in rbx,
   1.934 +  // rbx,: index
   1.935 +  __ movw(Address(rdx, rbx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)), rax);
   1.936 +}
   1.937 +
   1.938 +
   1.939 +void TemplateTable::sastore() {
   1.940 +  castore();
   1.941 +}
   1.942 +
   1.943 +
   1.944 +void TemplateTable::istore(int n) {
   1.945 +  transition(itos, vtos);
   1.946 +  __ movl(iaddress(n), rax);
   1.947 +  __ tag_local(frame::TagValue, n);
   1.948 +}
   1.949 +
   1.950 +
   1.951 +void TemplateTable::lstore(int n) {
   1.952 +  transition(ltos, vtos);
   1.953 +  __ movl(laddress(n), rax);
   1.954 +  __ movl(haddress(n), rdx);
   1.955 +  __ tag_local(frame::TagCategory2, n);
   1.956 +}
   1.957 +
   1.958 +
   1.959 +void TemplateTable::fstore(int n) {
   1.960 +  transition(ftos, vtos);
   1.961 +  __ fstp_s(faddress(n));
   1.962 +  __ tag_local(frame::TagValue, n);
   1.963 +}
   1.964 +
   1.965 +
   1.966 +void TemplateTable::dstore(int n) {
   1.967 +  transition(dtos, vtos);
   1.968 +  if (TaggedStackInterpreter) {
   1.969 +    __ subl(rsp, 2 * wordSize);
   1.970 +    __ fstp_d(Address(rsp, 0));
   1.971 +    __ popl(rax);
   1.972 +    __ popl(rdx);
   1.973 +    __ movl(laddress(n), rax);
   1.974 +    __ movl(haddress(n), rdx);
   1.975 +    __ tag_local(frame::TagCategory2, n);
   1.976 +  } else {
   1.977 +    __ fstp_d(daddress(n));
   1.978 +  }
   1.979 +}
   1.980 +
   1.981 +
   1.982 +void TemplateTable::astore(int n) {
   1.983 +  transition(vtos, vtos);
   1.984 +  __ pop_ptr(rax, rdx);
   1.985 +  __ movl(aaddress(n), rax);
   1.986 +  __ tag_local(rdx, n);
   1.987 +}
   1.988 +
   1.989 +
   1.990 +void TemplateTable::pop() {
   1.991 +  transition(vtos, vtos);
   1.992 +  __ addl(rsp, Interpreter::stackElementSize());
   1.993 +}
   1.994 +
   1.995 +
   1.996 +void TemplateTable::pop2() {
   1.997 +  transition(vtos, vtos);
   1.998 +  __ addl(rsp, 2*Interpreter::stackElementSize());
   1.999 +}
  1.1000 +
  1.1001 +
  1.1002 +void TemplateTable::dup() {
  1.1003 +  transition(vtos, vtos);
  1.1004 +  // stack: ..., a
  1.1005 +  __ load_ptr_and_tag(0, rax, rdx);
  1.1006 +  __ push_ptr(rax, rdx);
  1.1007 +  // stack: ..., a, a
  1.1008 +}
  1.1009 +
  1.1010 +
  1.1011 +void TemplateTable::dup_x1() {
  1.1012 +  transition(vtos, vtos);
  1.1013 +  // stack: ..., a, b
  1.1014 +  __ load_ptr_and_tag(0, rax, rdx);  // load b
  1.1015 +  __ load_ptr_and_tag(1, rcx, rbx);  // load a
  1.1016 +  __ store_ptr_and_tag(1, rax, rdx); // store b
  1.1017 +  __ store_ptr_and_tag(0, rcx, rbx); // store a
  1.1018 +  __ push_ptr(rax, rdx);             // push b
  1.1019 +  // stack: ..., b, a, b
  1.1020 +}
  1.1021 +
  1.1022 +
  1.1023 +void TemplateTable::dup_x2() {
  1.1024 +  transition(vtos, vtos);
  1.1025 +  // stack: ..., a, b, c
  1.1026 +  __ load_ptr_and_tag(0, rax, rdx);  // load c
  1.1027 +  __ load_ptr_and_tag(2, rcx, rbx);  // load a
  1.1028 +  __ store_ptr_and_tag(2, rax, rdx); // store c in a
  1.1029 +  __ push_ptr(rax, rdx);             // push c
  1.1030 +  // stack: ..., c, b, c, c
  1.1031 +  __ load_ptr_and_tag(2, rax, rdx);  // load b
  1.1032 +  __ store_ptr_and_tag(2, rcx, rbx); // store a in b
  1.1033 +  // stack: ..., c, a, c, c
  1.1034 +  __ store_ptr_and_tag(1, rax, rdx); // store b in c
  1.1035 +  // stack: ..., c, a, b, c
  1.1036 +}
  1.1037 +
  1.1038 +
  1.1039 +void TemplateTable::dup2() {
  1.1040 +  transition(vtos, vtos);
  1.1041 +  // stack: ..., a, b
  1.1042 +  __ load_ptr_and_tag(1, rax, rdx);  // load a
  1.1043 +  __ push_ptr(rax, rdx);             // push a
  1.1044 +  __ load_ptr_and_tag(1, rax, rdx);  // load b
  1.1045 +  __ push_ptr(rax, rdx);             // push b
  1.1046 +  // stack: ..., a, b, a, b
  1.1047 +}
  1.1048 +
  1.1049 +
  1.1050 +void TemplateTable::dup2_x1() {
  1.1051 +  transition(vtos, vtos);
  1.1052 +  // stack: ..., a, b, c
  1.1053 +  __ load_ptr_and_tag(0, rcx, rbx);  // load c
  1.1054 +  __ load_ptr_and_tag(1, rax, rdx);  // load b
  1.1055 +  __ push_ptr(rax, rdx);             // push b
  1.1056 +  __ push_ptr(rcx, rbx);             // push c
  1.1057 +  // stack: ..., a, b, c, b, c
  1.1058 +  __ store_ptr_and_tag(3, rcx, rbx); // store c in b
  1.1059 +  // stack: ..., a, c, c, b, c
  1.1060 +  __ load_ptr_and_tag(4, rcx, rbx);  // load a
  1.1061 +  __ store_ptr_and_tag(2, rcx, rbx); // store a in 2nd c
  1.1062 +  // stack: ..., a, c, a, b, c
  1.1063 +  __ store_ptr_and_tag(4, rax, rdx); // store b in a
  1.1064 +  // stack: ..., b, c, a, b, c
  1.1065 +  // stack: ..., b, c, a, b, c
  1.1066 +}
  1.1067 +
  1.1068 +
  1.1069 +void TemplateTable::dup2_x2() {
  1.1070 +  transition(vtos, vtos);
  1.1071 +  // stack: ..., a, b, c, d
  1.1072 +  __ load_ptr_and_tag(0, rcx, rbx);  // load d
  1.1073 +  __ load_ptr_and_tag(1, rax, rdx);  // load c
  1.1074 +  __ push_ptr(rax, rdx);             // push c
  1.1075 +  __ push_ptr(rcx, rbx);             // push d
  1.1076 +  // stack: ..., a, b, c, d, c, d
  1.1077 +  __ load_ptr_and_tag(4, rax, rdx);  // load b
  1.1078 +  __ store_ptr_and_tag(2, rax, rdx); // store b in d
  1.1079 +  __ store_ptr_and_tag(4, rcx, rbx); // store d in b
  1.1080 +  // stack: ..., a, d, c, b, c, d
  1.1081 +  __ load_ptr_and_tag(5, rcx, rbx);  // load a
  1.1082 +  __ load_ptr_and_tag(3, rax, rdx);  // load c
  1.1083 +  __ store_ptr_and_tag(3, rcx, rbx); // store a in c
  1.1084 +  __ store_ptr_and_tag(5, rax, rdx); // store c in a
  1.1085 +  // stack: ..., c, d, a, b, c, d
  1.1086 +  // stack: ..., c, d, a, b, c, d
  1.1087 +}
  1.1088 +
  1.1089 +
  1.1090 +void TemplateTable::swap() {
  1.1091 +  transition(vtos, vtos);
  1.1092 +  // stack: ..., a, b
  1.1093 +  __ load_ptr_and_tag(1, rcx, rbx);  // load a
  1.1094 +  __ load_ptr_and_tag(0, rax, rdx);  // load b
  1.1095 +  __ store_ptr_and_tag(0, rcx, rbx); // store a in b
  1.1096 +  __ store_ptr_and_tag(1, rax, rdx); // store b in a
  1.1097 +  // stack: ..., b, a
  1.1098 +}
  1.1099 +
  1.1100 +
  1.1101 +void TemplateTable::iop2(Operation op) {
  1.1102 +  transition(itos, itos);
  1.1103 +  switch (op) {
  1.1104 +    case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
  1.1105 +    case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
  1.1106 +    case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
  1.1107 +    case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
  1.1108 +    case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
  1.1109 +    case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
  1.1110 +    case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
  1.1111 +    case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
  1.1112 +    case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
  1.1113 +    default   : ShouldNotReachHere();
  1.1114 +  }
  1.1115 +}
  1.1116 +
  1.1117 +
  1.1118 +void TemplateTable::lop2(Operation op) {
  1.1119 +  transition(ltos, ltos);
  1.1120 +  __ pop_l(rbx, rcx);
  1.1121 +  switch (op) {
  1.1122 +    case add : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
  1.1123 +    case sub : __ subl(rbx, rax); __ sbbl(rcx, rdx);
  1.1124 +               __ movl(rax, rbx); __ movl(rdx, rcx); break;
  1.1125 +    case _and: __ andl(rax, rbx); __ andl(rdx, rcx); break;
  1.1126 +    case _or : __ orl (rax, rbx); __ orl (rdx, rcx); break;
  1.1127 +    case _xor: __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
  1.1128 +    default : ShouldNotReachHere();
  1.1129 +  }
  1.1130 +}
  1.1131 +
  1.1132 +
  1.1133 +void TemplateTable::idiv() {
  1.1134 +  transition(itos, itos);
  1.1135 +  __ movl(rcx, rax);
  1.1136 +  __ pop_i(rax);
  1.1137 +  // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
  1.1138 +  //       they are not equal, one could do a normal division (no correction
  1.1139 +  //       needed), which may speed up this implementation for the common case.
  1.1140 +  //       (see also JVM spec., p.243 & p.271)
  1.1141 +  __ corrected_idivl(rcx);
  1.1142 +}
  1.1143 +
  1.1144 +
  1.1145 +void TemplateTable::irem() {
  1.1146 +  transition(itos, itos);
  1.1147 +  __ movl(rcx, rax);
  1.1148 +  __ pop_i(rax);
  1.1149 +  // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
  1.1150 +  //       they are not equal, one could do a normal division (no correction
  1.1151 +  //       needed), which may speed up this implementation for the common case.
  1.1152 +  //       (see also JVM spec., p.243 & p.271)
  1.1153 +  __ corrected_idivl(rcx);
  1.1154 +  __ movl(rax, rdx);
  1.1155 +}
  1.1156 +
  1.1157 +
  1.1158 +void TemplateTable::lmul() {
  1.1159 +  transition(ltos, ltos);
  1.1160 +  __ pop_l(rbx, rcx);
  1.1161 +  __ pushl(rcx); __ pushl(rbx);
  1.1162 +  __ pushl(rdx); __ pushl(rax);
  1.1163 +  __ lmul(2 * wordSize, 0);
  1.1164 +  __ addl(rsp, 4 * wordSize);  // take off temporaries
  1.1165 +}
  1.1166 +
  1.1167 +
  1.1168 +void TemplateTable::ldiv() {
  1.1169 +  transition(ltos, ltos);
  1.1170 +  __ pop_l(rbx, rcx);
  1.1171 +  __ pushl(rcx); __ pushl(rbx);
  1.1172 +  __ pushl(rdx); __ pushl(rax);
  1.1173 +  // check if y = 0
  1.1174 +  __ orl(rax, rdx);
  1.1175 +  __ jump_cc(Assembler::zero,
  1.1176 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1177 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
  1.1178 +  __ addl(rsp, 4 * wordSize);  // take off temporaries
  1.1179 +}
  1.1180 +
  1.1181 +
  1.1182 +void TemplateTable::lrem() {
  1.1183 +  transition(ltos, ltos);
  1.1184 +  __ pop_l(rbx, rcx);
  1.1185 +  __ pushl(rcx); __ pushl(rbx);
  1.1186 +  __ pushl(rdx); __ pushl(rax);
  1.1187 +  // check if y = 0
  1.1188 +  __ orl(rax, rdx);
  1.1189 +  __ jump_cc(Assembler::zero,
  1.1190 +             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  1.1191 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
  1.1192 +  __ addl(rsp, 4 * wordSize);
  1.1193 +}
  1.1194 +
  1.1195 +
  1.1196 +void TemplateTable::lshl() {
  1.1197 +  transition(itos, ltos);
  1.1198 +  __ movl(rcx, rax);                             // get shift count
  1.1199 +  __ pop_l(rax, rdx);                            // get shift value
  1.1200 +  __ lshl(rdx, rax);
  1.1201 +}
  1.1202 +
  1.1203 +
  1.1204 +void TemplateTable::lshr() {
  1.1205 +  transition(itos, ltos);
  1.1206 +  __ movl(rcx, rax);                             // get shift count
  1.1207 +  __ pop_l(rax, rdx);                            // get shift value
  1.1208 +  __ lshr(rdx, rax, true);
  1.1209 +}
  1.1210 +
  1.1211 +
  1.1212 +void TemplateTable::lushr() {
  1.1213 +  transition(itos, ltos);
  1.1214 +  __ movl(rcx, rax);                             // get shift count
  1.1215 +  __ pop_l(rax, rdx);                            // get shift value
  1.1216 +  __ lshr(rdx, rax);
  1.1217 +}
  1.1218 +
  1.1219 +
  1.1220 +void TemplateTable::fop2(Operation op) {
  1.1221 +  transition(ftos, ftos);
  1.1222 +  __ pop_ftos_to_rsp();  // pop ftos into rsp
  1.1223 +  switch (op) {
  1.1224 +    case add: __ fadd_s (at_rsp());                break;
  1.1225 +    case sub: __ fsubr_s(at_rsp());                break;
  1.1226 +    case mul: __ fmul_s (at_rsp());                break;
  1.1227 +    case div: __ fdivr_s(at_rsp());                break;
  1.1228 +    case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
  1.1229 +    default : ShouldNotReachHere();
  1.1230 +  }
  1.1231 +  __ f2ieee();
  1.1232 +  __ popl(rax);  // pop float thing off
  1.1233 +}
  1.1234 +
  1.1235 +
  1.1236 +void TemplateTable::dop2(Operation op) {
  1.1237 +  transition(dtos, dtos);
  1.1238 +  __ pop_dtos_to_rsp();  // pop dtos into rsp
  1.1239 +
  1.1240 +  switch (op) {
  1.1241 +    case add: __ fadd_d (at_rsp());                break;
  1.1242 +    case sub: __ fsubr_d(at_rsp());                break;
  1.1243 +    case mul: {
  1.1244 +      Label L_strict;
  1.1245 +      Label L_join;
  1.1246 +      const Address access_flags      (rcx, methodOopDesc::access_flags_offset());
  1.1247 +      __ get_method(rcx);
  1.1248 +      __ movl(rcx, access_flags);
  1.1249 +      __ testl(rcx, JVM_ACC_STRICT);
  1.1250 +      __ jccb(Assembler::notZero, L_strict);
  1.1251 +      __ fmul_d (at_rsp());
  1.1252 +      __ jmpb(L_join);
  1.1253 +      __ bind(L_strict);
  1.1254 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
  1.1255 +      __ fmulp();
  1.1256 +      __ fmul_d (at_rsp());
  1.1257 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
  1.1258 +      __ fmulp();
  1.1259 +      __ bind(L_join);
  1.1260 +      break;
  1.1261 +    }
  1.1262 +    case div: {
  1.1263 +      Label L_strict;
  1.1264 +      Label L_join;
  1.1265 +      const Address access_flags      (rcx, methodOopDesc::access_flags_offset());
  1.1266 +      __ get_method(rcx);
  1.1267 +      __ movl(rcx, access_flags);
  1.1268 +      __ testl(rcx, JVM_ACC_STRICT);
  1.1269 +      __ jccb(Assembler::notZero, L_strict);
  1.1270 +      __ fdivr_d(at_rsp());
  1.1271 +      __ jmp(L_join);
  1.1272 +      __ bind(L_strict);
  1.1273 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
  1.1274 +      __ fmul_d (at_rsp());
  1.1275 +      __ fdivrp();
  1.1276 +      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
  1.1277 +      __ fmulp();
  1.1278 +      __ bind(L_join);
  1.1279 +      break;
  1.1280 +    }
  1.1281 +    case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
  1.1282 +    default : ShouldNotReachHere();
  1.1283 +  }
  1.1284 +  __ d2ieee();
  1.1285 +  // Pop double precision number from rsp.
  1.1286 +  __ popl(rax);
  1.1287 +  __ popl(rdx);
  1.1288 +}
  1.1289 +
  1.1290 +
  1.1291 +void TemplateTable::ineg() {
  1.1292 +  transition(itos, itos);
  1.1293 +  __ negl(rax);
  1.1294 +}
  1.1295 +
  1.1296 +
  1.1297 +void TemplateTable::lneg() {
  1.1298 +  transition(ltos, ltos);
  1.1299 +  __ lneg(rdx, rax);
  1.1300 +}
  1.1301 +
  1.1302 +
  1.1303 +void TemplateTable::fneg() {
  1.1304 +  transition(ftos, ftos);
  1.1305 +  __ fchs();
  1.1306 +}
  1.1307 +
  1.1308 +
  1.1309 +void TemplateTable::dneg() {
  1.1310 +  transition(dtos, dtos);
  1.1311 +  __ fchs();
  1.1312 +}
  1.1313 +
  1.1314 +
  1.1315 +void TemplateTable::iinc() {
  1.1316 +  transition(vtos, vtos);
  1.1317 +  __ load_signed_byte(rdx, at_bcp(2));           // get constant
  1.1318 +  locals_index(rbx);
  1.1319 +  __ addl(iaddress(rbx), rdx);
  1.1320 +}
  1.1321 +
  1.1322 +
  1.1323 +void TemplateTable::wide_iinc() {
  1.1324 +  transition(vtos, vtos);
  1.1325 +  __ movl(rdx, at_bcp(4));                       // get constant
  1.1326 +  locals_index_wide(rbx);
  1.1327 +  __ bswap(rdx);                                 // swap bytes & sign-extend constant
  1.1328 +  __ sarl(rdx, 16);
  1.1329 +  __ addl(iaddress(rbx), rdx);
  1.1330 +  // Note: should probably use only one movl to get both
  1.1331 +  //       the index and the constant -> fix this
  1.1332 +}
  1.1333 +
  1.1334 +
  1.1335 +void TemplateTable::convert() {
  1.1336 +  // Checking
  1.1337 +#ifdef ASSERT
  1.1338 +  { TosState tos_in  = ilgl;
  1.1339 +    TosState tos_out = ilgl;
  1.1340 +    switch (bytecode()) {
  1.1341 +      case Bytecodes::_i2l: // fall through
  1.1342 +      case Bytecodes::_i2f: // fall through
  1.1343 +      case Bytecodes::_i2d: // fall through
  1.1344 +      case Bytecodes::_i2b: // fall through
  1.1345 +      case Bytecodes::_i2c: // fall through
  1.1346 +      case Bytecodes::_i2s: tos_in = itos; break;
  1.1347 +      case Bytecodes::_l2i: // fall through
  1.1348 +      case Bytecodes::_l2f: // fall through
  1.1349 +      case Bytecodes::_l2d: tos_in = ltos; break;
  1.1350 +      case Bytecodes::_f2i: // fall through
  1.1351 +      case Bytecodes::_f2l: // fall through
  1.1352 +      case Bytecodes::_f2d: tos_in = ftos; break;
  1.1353 +      case Bytecodes::_d2i: // fall through
  1.1354 +      case Bytecodes::_d2l: // fall through
  1.1355 +      case Bytecodes::_d2f: tos_in = dtos; break;
  1.1356 +      default             : ShouldNotReachHere();
  1.1357 +    }
  1.1358 +    switch (bytecode()) {
  1.1359 +      case Bytecodes::_l2i: // fall through
  1.1360 +      case Bytecodes::_f2i: // fall through
  1.1361 +      case Bytecodes::_d2i: // fall through
  1.1362 +      case Bytecodes::_i2b: // fall through
  1.1363 +      case Bytecodes::_i2c: // fall through
  1.1364 +      case Bytecodes::_i2s: tos_out = itos; break;
  1.1365 +      case Bytecodes::_i2l: // fall through
  1.1366 +      case Bytecodes::_f2l: // fall through
  1.1367 +      case Bytecodes::_d2l: tos_out = ltos; break;
  1.1368 +      case Bytecodes::_i2f: // fall through
  1.1369 +      case Bytecodes::_l2f: // fall through
  1.1370 +      case Bytecodes::_d2f: tos_out = ftos; break;
  1.1371 +      case Bytecodes::_i2d: // fall through
  1.1372 +      case Bytecodes::_l2d: // fall through
  1.1373 +      case Bytecodes::_f2d: tos_out = dtos; break;
  1.1374 +      default             : ShouldNotReachHere();
  1.1375 +    }
  1.1376 +    transition(tos_in, tos_out);
  1.1377 +  }
  1.1378 +#endif // ASSERT
  1.1379 +
  1.1380 +  // Conversion
  1.1381 +  // (Note: use pushl(rcx)/popl(rcx) for 1/2-word stack-ptr manipulation)
  1.1382 +  switch (bytecode()) {
  1.1383 +    case Bytecodes::_i2l:
  1.1384 +      __ extend_sign(rdx, rax);
  1.1385 +      break;
  1.1386 +    case Bytecodes::_i2f:
  1.1387 +      __ pushl(rax);         // store int on tos
  1.1388 +      __ fild_s(at_rsp());   // load int to ST0
  1.1389 +      __ f2ieee();           // truncate to float size
  1.1390 +      __ popl(rcx);          // adjust rsp
  1.1391 +      break;
  1.1392 +    case Bytecodes::_i2d:
  1.1393 +      __ pushl(rax);         // add one slot for d2ieee()
  1.1394 +      __ pushl(rax);         // store int on tos
  1.1395 +      __ fild_s(at_rsp());   // load int to ST0
  1.1396 +      __ d2ieee();           // truncate to double size
  1.1397 +      __ popl(rcx);          // adjust rsp
  1.1398 +      __ popl(rcx);
  1.1399 +      break;
  1.1400 +    case Bytecodes::_i2b:
  1.1401 +      __ shll(rax, 24);      // truncate upper 24 bits
  1.1402 +      __ sarl(rax, 24);      // and sign-extend byte
  1.1403 +      break;
  1.1404 +    case Bytecodes::_i2c:
  1.1405 +      __ andl(rax, 0xFFFF);  // truncate upper 16 bits
  1.1406 +      break;
  1.1407 +    case Bytecodes::_i2s:
  1.1408 +      __ shll(rax, 16);      // truncate upper 16 bits
  1.1409 +      __ sarl(rax, 16);      // and sign-extend short
  1.1410 +      break;
  1.1411 +    case Bytecodes::_l2i:
  1.1412 +      /* nothing to do */
  1.1413 +      break;
  1.1414 +    case Bytecodes::_l2f:
  1.1415 +      __ pushl(rdx);         // store long on tos
  1.1416 +      __ pushl(rax);
  1.1417 +      __ fild_d(at_rsp());   // load long to ST0
  1.1418 +      __ f2ieee();           // truncate to float size
  1.1419 +      __ popl(rcx);          // adjust rsp
  1.1420 +      __ popl(rcx);
  1.1421 +      break;
  1.1422 +    case Bytecodes::_l2d:
  1.1423 +      __ pushl(rdx);         // store long on tos
  1.1424 +      __ pushl(rax);
  1.1425 +      __ fild_d(at_rsp());   // load long to ST0
  1.1426 +      __ d2ieee();           // truncate to double size
  1.1427 +      __ popl(rcx);          // adjust rsp
  1.1428 +      __ popl(rcx);
  1.1429 +      break;
  1.1430 +    case Bytecodes::_f2i:
  1.1431 +      __ pushl(rcx);         // reserve space for argument
  1.1432 +      __ fstp_s(at_rsp());   // pass float argument on stack
  1.1433 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
  1.1434 +      break;
  1.1435 +    case Bytecodes::_f2l:
  1.1436 +      __ pushl(rcx);         // reserve space for argument
  1.1437 +      __ fstp_s(at_rsp());   // pass float argument on stack
  1.1438 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
  1.1439 +      break;
  1.1440 +    case Bytecodes::_f2d:
  1.1441 +      /* nothing to do */
  1.1442 +      break;
  1.1443 +    case Bytecodes::_d2i:
  1.1444 +      __ pushl(rcx);         // reserve space for argument
  1.1445 +      __ pushl(rcx);
  1.1446 +      __ fstp_d(at_rsp());   // pass double argument on stack
  1.1447 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
  1.1448 +      break;
  1.1449 +    case Bytecodes::_d2l:
  1.1450 +      __ pushl(rcx);         // reserve space for argument
  1.1451 +      __ pushl(rcx);
  1.1452 +      __ fstp_d(at_rsp());   // pass double argument on stack
  1.1453 +      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
  1.1454 +      break;
  1.1455 +    case Bytecodes::_d2f:
  1.1456 +      __ pushl(rcx);         // reserve space for f2ieee()
  1.1457 +      __ f2ieee();           // truncate to float size
  1.1458 +      __ popl(rcx);          // adjust rsp
  1.1459 +      break;
  1.1460 +    default             :
  1.1461 +      ShouldNotReachHere();
  1.1462 +  }
  1.1463 +}
  1.1464 +
  1.1465 +
  1.1466 +void TemplateTable::lcmp() {
  1.1467 +  transition(ltos, itos);
  1.1468 +  // y = rdx:rax
  1.1469 +  __ pop_l(rbx, rcx);             // get x = rcx:rbx
  1.1470 +  __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
  1.1471 +  __ movl(rax, rcx);
  1.1472 +}
  1.1473 +
  1.1474 +
  1.1475 +void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1.1476 +  if (is_float) {
  1.1477 +    __ pop_ftos_to_rsp();
  1.1478 +    __ fld_s(at_rsp());
  1.1479 +  } else {
  1.1480 +    __ pop_dtos_to_rsp();
  1.1481 +    __ fld_d(at_rsp());
  1.1482 +    __ popl(rdx);
  1.1483 +  }
  1.1484 +  __ popl(rcx);
  1.1485 +  __ fcmp2int(rax, unordered_result < 0);
  1.1486 +}
  1.1487 +
  1.1488 +
  1.1489 +void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1.1490 +  __ get_method(rcx);           // ECX holds method
  1.1491 +  __ profile_taken_branch(rax,rbx); // EAX holds updated MDP, EBX holds bumped taken count
  1.1492 +
  1.1493 +  const ByteSize be_offset = methodOopDesc::backedge_counter_offset() + InvocationCounter::counter_offset();
  1.1494 +  const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() + InvocationCounter::counter_offset();
  1.1495 +  const int method_offset = frame::interpreter_frame_method_offset * wordSize;
  1.1496 +
  1.1497 +  // Load up EDX with the branch displacement
  1.1498 +  __ movl(rdx, at_bcp(1));
  1.1499 +  __ bswap(rdx);
  1.1500 +  if (!is_wide) __ sarl(rdx, 16);
  1.1501 +
  1.1502 +  // Handle all the JSR stuff here, then exit.
  1.1503 +  // It's much shorter and cleaner than intermingling with the
  1.1504 +  // non-JSR normal-branch stuff occuring below.
  1.1505 +  if (is_jsr) {
  1.1506 +    // Pre-load the next target bytecode into EBX
  1.1507 +    __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1, 0));
  1.1508 +
  1.1509 +    // compute return address as bci in rax,
  1.1510 +    __ leal(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
  1.1511 +    __ subl(rax, Address(rcx, methodOopDesc::const_offset()));
  1.1512 +    // Adjust the bcp in ESI by the displacement in EDX
  1.1513 +    __ addl(rsi, rdx);
  1.1514 +    // Push return address
  1.1515 +    __ push_i(rax);
  1.1516 +    // jsr returns vtos
  1.1517 +    __ dispatch_only_noverify(vtos);
  1.1518 +    return;
  1.1519 +  }
  1.1520 +
  1.1521 +  // Normal (non-jsr) branch handling
  1.1522 +
  1.1523 +  // Adjust the bcp in ESI by the displacement in EDX
  1.1524 +  __ addl(rsi, rdx);
  1.1525 +
  1.1526 +  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
  1.1527 +  Label backedge_counter_overflow;
  1.1528 +  Label profile_method;
  1.1529 +  Label dispatch;
  1.1530 +  if (UseLoopCounter) {
  1.1531 +    // increment backedge counter for backward branches
  1.1532 +    // rax,: MDO
  1.1533 +    // rbx,: MDO bumped taken-count
  1.1534 +    // rcx: method
  1.1535 +    // rdx: target offset
  1.1536 +    // rsi: target bcp
  1.1537 +    // rdi: locals pointer
  1.1538 +    __ testl(rdx, rdx);             // check if forward or backward branch
  1.1539 +    __ jcc(Assembler::positive, dispatch); // count only if backward branch
  1.1540 +
  1.1541 +    // increment counter
  1.1542 +    __ movl(rax, Address(rcx, be_offset));        // load backedge counter
  1.1543 +    __ increment(rax, InvocationCounter::count_increment); // increment counter
  1.1544 +    __ movl(Address(rcx, be_offset), rax);        // store counter
  1.1545 +
  1.1546 +    __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
  1.1547 +    __ andl(rax, InvocationCounter::count_mask_value);     // and the status bits
  1.1548 +    __ addl(rax, Address(rcx, be_offset));        // add both counters
  1.1549 +
  1.1550 +    if (ProfileInterpreter) {
  1.1551 +      // Test to see if we should create a method data oop
  1.1552 +      __ cmp32(rax,
  1.1553 +               ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
  1.1554 +      __ jcc(Assembler::less, dispatch);
  1.1555 +
  1.1556 +      // if no method data exists, go to profile method
  1.1557 +      __ test_method_data_pointer(rax, profile_method);
  1.1558 +
  1.1559 +      if (UseOnStackReplacement) {
  1.1560 +        // check for overflow against rbx, which is the MDO taken count
  1.1561 +        __ cmp32(rbx,
  1.1562 +                 ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1563 +        __ jcc(Assembler::below, dispatch);
  1.1564 +
  1.1565 +        // When ProfileInterpreter is on, the backedge_count comes from the
  1.1566 +        // methodDataOop, which value does not get reset on the call to
  1.1567 +        // frequency_counter_overflow().  To avoid excessive calls to the overflow
  1.1568 +        // routine while the method is being compiled, add a second test to make
  1.1569 +        // sure the overflow function is called only once every overflow_frequency.
  1.1570 +        const int overflow_frequency = 1024;
  1.1571 +        __ andl(rbx, overflow_frequency-1);
  1.1572 +        __ jcc(Assembler::zero, backedge_counter_overflow);
  1.1573 +
  1.1574 +      }
  1.1575 +    } else {
  1.1576 +      if (UseOnStackReplacement) {
  1.1577 +        // check for overflow against rax, which is the sum of the counters
  1.1578 +        __ cmp32(rax,
  1.1579 +                 ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
  1.1580 +        __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
  1.1581 +
  1.1582 +      }
  1.1583 +    }
  1.1584 +    __ bind(dispatch);
  1.1585 +  }
  1.1586 +
  1.1587 +  // Pre-load the next target bytecode into EBX
  1.1588 +  __ load_unsigned_byte(rbx, Address(rsi, 0));
  1.1589 +
  1.1590 +  // continue with the bytecode @ target
  1.1591 +  // rax,: return bci for jsr's, unused otherwise
  1.1592 +  // rbx,: target bytecode
  1.1593 +  // rsi: target bcp
  1.1594 +  __ dispatch_only(vtos);
  1.1595 +
  1.1596 +  if (UseLoopCounter) {
  1.1597 +    if (ProfileInterpreter) {
  1.1598 +      // Out-of-line code to allocate method data oop.
  1.1599 +      __ bind(profile_method);
  1.1600 +      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method), rsi);
  1.1601 +      __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
  1.1602 +      __ movl(rcx, Address(rbp, method_offset));
  1.1603 +      __ movl(rcx, Address(rcx, in_bytes(methodOopDesc::method_data_offset())));
  1.1604 +      __ movl(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
  1.1605 +      __ test_method_data_pointer(rcx, dispatch);
  1.1606 +      // offset non-null mdp by MDO::data_offset() + IR::profile_method()
  1.1607 +      __ addl(rcx, in_bytes(methodDataOopDesc::data_offset()));
  1.1608 +      __ addl(rcx, rax);
  1.1609 +      __ movl(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
  1.1610 +      __ jmp(dispatch);
  1.1611 +    }
  1.1612 +
  1.1613 +    if (UseOnStackReplacement) {
  1.1614 +
  1.1615 +      // invocation counter overflow
  1.1616 +      __ bind(backedge_counter_overflow);
  1.1617 +      __ negl(rdx);
  1.1618 +      __ addl(rdx, rsi);        // branch bcp
  1.1619 +      call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rdx);
  1.1620 +      __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
  1.1621 +
  1.1622 +      // rax,: osr nmethod (osr ok) or NULL (osr not possible)
  1.1623 +      // rbx,: target bytecode
  1.1624 +      // rdx: scratch
  1.1625 +      // rdi: locals pointer
  1.1626 +      // rsi: bcp
  1.1627 +      __ testl(rax, rax);                        // test result
  1.1628 +      __ jcc(Assembler::zero, dispatch);         // no osr if null
  1.1629 +      // nmethod may have been invalidated (VM may block upon call_VM return)
  1.1630 +      __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
  1.1631 +      __ cmpl(rcx, InvalidOSREntryBci);
  1.1632 +      __ jcc(Assembler::equal, dispatch);
  1.1633 +
  1.1634 +      // We have the address of an on stack replacement routine in rax,
  1.1635 +      // We need to prepare to execute the OSR method. First we must
  1.1636 +      // migrate the locals and monitors off of the stack.
  1.1637 +
  1.1638 +      __ movl(rsi, rax);                             // save the nmethod
  1.1639 +
  1.1640 +      const Register thread = rcx;
  1.1641 +      __ get_thread(thread);
  1.1642 +      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
  1.1643 +      // rax, is OSR buffer, move it to expected parameter location
  1.1644 +      __ movl(rcx, rax);
  1.1645 +
  1.1646 +      // pop the interpreter frame
  1.1647 +      __ movl(rdx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
  1.1648 +      __ leave();                                // remove frame anchor
  1.1649 +      __ popl(rdi);                              // get return address
  1.1650 +      __ movl(rsp, rdx);                         // set sp to sender sp
  1.1651 +
  1.1652 +
  1.1653 +      Label skip;
  1.1654 +      Label chkint;
  1.1655 +
  1.1656 +      // The interpreter frame we have removed may be returning to
  1.1657 +      // either the callstub or the interpreter. Since we will
  1.1658 +      // now be returning from a compiled (OSR) nmethod we must
  1.1659 +      // adjust the return to the return were it can handler compiled
  1.1660 +      // results and clean the fpu stack. This is very similar to
  1.1661 +      // what a i2c adapter must do.
  1.1662 +
  1.1663 +      // Are we returning to the call stub?
  1.1664 +
  1.1665 +      __ cmp32(rdi, ExternalAddress(StubRoutines::_call_stub_return_address));
  1.1666 +      __ jcc(Assembler::notEqual, chkint);
  1.1667 +
  1.1668 +      // yes adjust to the specialized call stub  return.
  1.1669 +      assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL, "must be set");
  1.1670 +      __ lea(rdi, ExternalAddress(StubRoutines::i486::get_call_stub_compiled_return()));
  1.1671 +      __ jmp(skip);
  1.1672 +
  1.1673 +      __ bind(chkint);
  1.1674 +
  1.1675 +      // Are we returning to the interpreter? Look for sentinel
  1.1676 +
  1.1677 +      __ cmpl(Address(rdi, -8), Interpreter::return_sentinel);
  1.1678 +      __ jcc(Assembler::notEqual, skip);
  1.1679 +
  1.1680 +      // Adjust to compiled return back to interpreter
  1.1681 +
  1.1682 +      __ movl(rdi, Address(rdi, -4));
  1.1683 +      __ bind(skip);
  1.1684 +
  1.1685 +      // Align stack pointer for compiled code (note that caller is
  1.1686 +      // responsible for undoing this fixup by remembering the old SP
  1.1687 +      // in an rbp,-relative location)
  1.1688 +      __ andl(rsp, -(StackAlignmentInBytes));
  1.1689 +
  1.1690 +      // push the (possibly adjusted) return address
  1.1691 +      __ pushl(rdi);
  1.1692 +
  1.1693 +      // and begin the OSR nmethod
  1.1694 +      __ jmp(Address(rsi, nmethod::osr_entry_point_offset()));
  1.1695 +    }
  1.1696 +  }
  1.1697 +}
  1.1698 +
  1.1699 +
  1.1700 +void TemplateTable::if_0cmp(Condition cc) {
  1.1701 +  transition(itos, vtos);
  1.1702 +  // assume branch is more often taken than not (loops use backward branches)
  1.1703 +  Label not_taken;
  1.1704 +  __ testl(rax, rax);
  1.1705 +  __ jcc(j_not(cc), not_taken);
  1.1706 +  branch(false, false);
  1.1707 +  __ bind(not_taken);
  1.1708 +  __ profile_not_taken_branch(rax);
  1.1709 +}
  1.1710 +
  1.1711 +
  1.1712 +void TemplateTable::if_icmp(Condition cc) {
  1.1713 +  transition(itos, vtos);
  1.1714 +  // assume branch is more often taken than not (loops use backward branches)
  1.1715 +  Label not_taken;
  1.1716 +  __ pop_i(rdx);
  1.1717 +  __ cmpl(rdx, rax);
  1.1718 +  __ jcc(j_not(cc), not_taken);
  1.1719 +  branch(false, false);
  1.1720 +  __ bind(not_taken);
  1.1721 +  __ profile_not_taken_branch(rax);
  1.1722 +}
  1.1723 +
  1.1724 +
  1.1725 +void TemplateTable::if_nullcmp(Condition cc) {
  1.1726 +  transition(atos, vtos);
  1.1727 +  // assume branch is more often taken than not (loops use backward branches)
  1.1728 +  Label not_taken;
  1.1729 +  __ testl(rax, rax);
  1.1730 +  __ jcc(j_not(cc), not_taken);
  1.1731 +  branch(false, false);
  1.1732 +  __ bind(not_taken);
  1.1733 +  __ profile_not_taken_branch(rax);
  1.1734 +}
  1.1735 +
  1.1736 +
  1.1737 +void TemplateTable::if_acmp(Condition cc) {
  1.1738 +  transition(atos, vtos);
  1.1739 +  // assume branch is more often taken than not (loops use backward branches)
  1.1740 +  Label not_taken;
  1.1741 +  __ pop_ptr(rdx);
  1.1742 +  __ cmpl(rdx, rax);
  1.1743 +  __ jcc(j_not(cc), not_taken);
  1.1744 +  branch(false, false);
  1.1745 +  __ bind(not_taken);
  1.1746 +  __ profile_not_taken_branch(rax);
  1.1747 +}
  1.1748 +
  1.1749 +
  1.1750 +void TemplateTable::ret() {
  1.1751 +  transition(vtos, vtos);
  1.1752 +  locals_index(rbx);
  1.1753 +  __ movl(rbx, iaddress(rbx));                   // get return bci, compute return bcp
  1.1754 +  __ profile_ret(rbx, rcx);
  1.1755 +  __ get_method(rax);
  1.1756 +  __ movl(rsi, Address(rax, methodOopDesc::const_offset()));
  1.1757 +  __ leal(rsi, Address(rsi, rbx, Address::times_1,
  1.1758 +                       constMethodOopDesc::codes_offset()));
  1.1759 +  __ dispatch_next(vtos);
  1.1760 +}
  1.1761 +
  1.1762 +
  1.1763 +void TemplateTable::wide_ret() {
  1.1764 +  transition(vtos, vtos);
  1.1765 +  locals_index_wide(rbx);
  1.1766 +  __ movl(rbx, iaddress(rbx));                   // get return bci, compute return bcp
  1.1767 +  __ profile_ret(rbx, rcx);
  1.1768 +  __ get_method(rax);
  1.1769 +  __ movl(rsi, Address(rax, methodOopDesc::const_offset()));
  1.1770 +  __ leal(rsi, Address(rsi, rbx, Address::times_1, constMethodOopDesc::codes_offset()));
  1.1771 +  __ dispatch_next(vtos);
  1.1772 +}
  1.1773 +
  1.1774 +
  1.1775 +void TemplateTable::tableswitch() {
  1.1776 +  Label default_case, continue_execution;
  1.1777 +  transition(itos, vtos);
  1.1778 +  // align rsi
  1.1779 +  __ leal(rbx, at_bcp(wordSize));
  1.1780 +  __ andl(rbx, -wordSize);
  1.1781 +  // load lo & hi
  1.1782 +  __ movl(rcx, Address(rbx, 1 * wordSize));
  1.1783 +  __ movl(rdx, Address(rbx, 2 * wordSize));
  1.1784 +  __ bswap(rcx);
  1.1785 +  __ bswap(rdx);
  1.1786 +  // check against lo & hi
  1.1787 +  __ cmpl(rax, rcx);
  1.1788 +  __ jccb(Assembler::less, default_case);
  1.1789 +  __ cmpl(rax, rdx);
  1.1790 +  __ jccb(Assembler::greater, default_case);
  1.1791 +  // lookup dispatch offset
  1.1792 +  __ subl(rax, rcx);
  1.1793 +  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * wordSize));
  1.1794 +  __ profile_switch_case(rax, rbx, rcx);
  1.1795 +  // continue execution
  1.1796 +  __ bind(continue_execution);
  1.1797 +  __ bswap(rdx);
  1.1798 +  __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
  1.1799 +  __ addl(rsi, rdx);
  1.1800 +  __ dispatch_only(vtos);
  1.1801 +  // handle default
  1.1802 +  __ bind(default_case);
  1.1803 +  __ profile_switch_default(rax);
  1.1804 +  __ movl(rdx, Address(rbx, 0));
  1.1805 +  __ jmp(continue_execution);
  1.1806 +}
  1.1807 +
  1.1808 +
  1.1809 +void TemplateTable::lookupswitch() {
  1.1810 +  transition(itos, itos);
  1.1811 +  __ stop("lookupswitch bytecode should have been rewritten");
  1.1812 +}
  1.1813 +
  1.1814 +
  1.1815 +void TemplateTable::fast_linearswitch() {
  1.1816 +  transition(itos, vtos);
  1.1817 +  Label loop_entry, loop, found, continue_execution;
  1.1818 +  // bswap rax, so we can avoid bswapping the table entries
  1.1819 +  __ bswap(rax);
  1.1820 +  // align rsi
  1.1821 +  __ leal(rbx, at_bcp(wordSize));                // btw: should be able to get rid of this instruction (change offsets below)
  1.1822 +  __ andl(rbx, -wordSize);
  1.1823 +  // set counter
  1.1824 +  __ movl(rcx, Address(rbx, wordSize));
  1.1825 +  __ bswap(rcx);
  1.1826 +  __ jmpb(loop_entry);
  1.1827 +  // table search
  1.1828 +  __ bind(loop);
  1.1829 +  __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * wordSize));
  1.1830 +  __ jccb(Assembler::equal, found);
  1.1831 +  __ bind(loop_entry);
  1.1832 +  __ decrement(rcx);
  1.1833 +  __ jcc(Assembler::greaterEqual, loop);
  1.1834 +  // default case
  1.1835 +  __ profile_switch_default(rax);
  1.1836 +  __ movl(rdx, Address(rbx, 0));
  1.1837 +  __ jmpb(continue_execution);
  1.1838 +  // entry found -> get offset
  1.1839 +  __ bind(found);
  1.1840 +  __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * wordSize));
  1.1841 +  __ profile_switch_case(rcx, rax, rbx);
  1.1842 +  // continue execution
  1.1843 +  __ bind(continue_execution);
  1.1844 +  __ bswap(rdx);
  1.1845 +  __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
  1.1846 +  __ addl(rsi, rdx);
  1.1847 +  __ dispatch_only(vtos);
  1.1848 +}
  1.1849 +
  1.1850 +
  1.1851 +void TemplateTable::fast_binaryswitch() {
  1.1852 +  transition(itos, vtos);
  1.1853 +  // Implementation using the following core algorithm:
  1.1854 +  //
  1.1855 +  // int binary_search(int key, LookupswitchPair* array, int n) {
  1.1856 +  //   // Binary search according to "Methodik des Programmierens" by
  1.1857 +  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
  1.1858 +  //   int i = 0;
  1.1859 +  //   int j = n;
  1.1860 +  //   while (i+1 < j) {
  1.1861 +  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
  1.1862 +  //     // with      Q: for all i: 0 <= i < n: key < a[i]
  1.1863 +  //     // where a stands for the array and assuming that the (inexisting)
  1.1864 +  //     // element a[n] is infinitely big.
  1.1865 +  //     int h = (i + j) >> 1;
  1.1866 +  //     // i < h < j
  1.1867 +  //     if (key < array[h].fast_match()) {
  1.1868 +  //       j = h;
  1.1869 +  //     } else {
  1.1870 +  //       i = h;
  1.1871 +  //     }
  1.1872 +  //   }
  1.1873 +  //   // R: a[i] <= key < a[i+1] or Q
  1.1874 +  //   // (i.e., if key is within array, i is the correct index)
  1.1875 +  //   return i;
  1.1876 +  // }
  1.1877 +
  1.1878 +  // register allocation
  1.1879 +  const Register key   = rax;                    // already set (tosca)
  1.1880 +  const Register array = rbx;
  1.1881 +  const Register i     = rcx;
  1.1882 +  const Register j     = rdx;
  1.1883 +  const Register h     = rdi;                    // needs to be restored
  1.1884 +  const Register temp  = rsi;
  1.1885 +  // setup array
  1.1886 +  __ save_bcp();
  1.1887 +
  1.1888 +  __ leal(array, at_bcp(3*wordSize));            // btw: should be able to get rid of this instruction (change offsets below)
  1.1889 +  __ andl(array, -wordSize);
  1.1890 +  // initialize i & j
  1.1891 +  __ xorl(i, i);                                 // i = 0;
  1.1892 +  __ movl(j, Address(array, -wordSize));         // j = length(array);
  1.1893 +  // Convert j into native byteordering
  1.1894 +  __ bswap(j);
  1.1895 +  // and start
  1.1896 +  Label entry;
  1.1897 +  __ jmp(entry);
  1.1898 +
  1.1899 +  // binary search loop
  1.1900 +  { Label loop;
  1.1901 +    __ bind(loop);
  1.1902 +    // int h = (i + j) >> 1;
  1.1903 +    __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
  1.1904 +    __ sarl(h, 1);                               // h = (i + j) >> 1;
  1.1905 +    // if (key < array[h].fast_match()) {
  1.1906 +    //   j = h;
  1.1907 +    // } else {
  1.1908 +    //   i = h;
  1.1909 +    // }
  1.1910 +    // Convert array[h].match to native byte-ordering before compare
  1.1911 +    __ movl(temp, Address(array, h, Address::times_8, 0*wordSize));
  1.1912 +    __ bswap(temp);
  1.1913 +    __ cmpl(key, temp);
  1.1914 +    if (VM_Version::supports_cmov()) {
  1.1915 +      __ cmovl(Assembler::less        , j, h);   // j = h if (key <  array[h].fast_match())
  1.1916 +      __ cmovl(Assembler::greaterEqual, i, h);   // i = h if (key >= array[h].fast_match())
  1.1917 +    } else {
  1.1918 +      Label set_i, end_of_if;
  1.1919 +      __ jccb(Assembler::greaterEqual, set_i);    // {
  1.1920 +      __ movl(j, h);                             //   j = h;
  1.1921 +      __ jmp(end_of_if);                         // }
  1.1922 +      __ bind(set_i);                            // else {
  1.1923 +      __ movl(i, h);                             //   i = h;
  1.1924 +      __ bind(end_of_if);                        // }
  1.1925 +    }
  1.1926 +    // while (i+1 < j)
  1.1927 +    __ bind(entry);
  1.1928 +    __ leal(h, Address(i, 1));                   // i+1
  1.1929 +    __ cmpl(h, j);                               // i+1 < j
  1.1930 +    __ jcc(Assembler::less, loop);
  1.1931 +  }
  1.1932 +
  1.1933 +  // end of binary search, result index is i (must check again!)
  1.1934 +  Label default_case;
  1.1935 +  // Convert array[i].match to native byte-ordering before compare
  1.1936 +  __ movl(temp, Address(array, i, Address::times_8, 0*wordSize));
  1.1937 +  __ bswap(temp);
  1.1938 +  __ cmpl(key, temp);
  1.1939 +  __ jcc(Assembler::notEqual, default_case);
  1.1940 +
  1.1941 +  // entry found -> j = offset
  1.1942 +  __ movl(j , Address(array, i, Address::times_8, 1*wordSize));
  1.1943 +  __ profile_switch_case(i, key, array);
  1.1944 +  __ bswap(j);
  1.1945 +  __ restore_bcp();
  1.1946 +  __ restore_locals();                           // restore rdi
  1.1947 +  __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
  1.1948 +
  1.1949 +  __ addl(rsi, j);
  1.1950 +  __ dispatch_only(vtos);
  1.1951 +
  1.1952 +  // default case -> j = default offset
  1.1953 +  __ bind(default_case);
  1.1954 +  __ profile_switch_default(i);
  1.1955 +  __ movl(j, Address(array, -2*wordSize));
  1.1956 +  __ bswap(j);
  1.1957 +  __ restore_bcp();
  1.1958 +  __ restore_locals();                           // restore rdi
  1.1959 +  __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
  1.1960 +  __ addl(rsi, j);
  1.1961 +  __ dispatch_only(vtos);
  1.1962 +}
  1.1963 +
  1.1964 +
  1.1965 +void TemplateTable::_return(TosState state) {
  1.1966 +  transition(state, state);
  1.1967 +  assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
  1.1968 +
  1.1969 +  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
  1.1970 +    assert(state == vtos, "only valid state");
  1.1971 +    __ movl(rax, aaddress(0));
  1.1972 +    __ movl(rdi, Address(rax, oopDesc::klass_offset_in_bytes()));
  1.1973 +    __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
  1.1974 +    __ testl(rdi, JVM_ACC_HAS_FINALIZER);
  1.1975 +    Label skip_register_finalizer;
  1.1976 +    __ jcc(Assembler::zero, skip_register_finalizer);
  1.1977 +
  1.1978 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), rax);
  1.1979 +
  1.1980 +    __ bind(skip_register_finalizer);
  1.1981 +  }
  1.1982 +
  1.1983 +  __ remove_activation(state, rsi);
  1.1984 +  __ jmp(rsi);
  1.1985 +}
  1.1986 +
  1.1987 +
  1.1988 +// ----------------------------------------------------------------------------
  1.1989 +// Volatile variables demand their effects be made known to all CPU's in
  1.1990 +// order.  Store buffers on most chips allow reads & writes to reorder; the
  1.1991 +// JMM's ReadAfterWrite.java test fails in -Xint mode without some kind of
  1.1992 +// memory barrier (i.e., it's not sufficient that the interpreter does not
  1.1993 +// reorder volatile references, the hardware also must not reorder them).
  1.1994 +//
  1.1995 +// According to the new Java Memory Model (JMM):
  1.1996 +// (1) All volatiles are serialized wrt to each other.
  1.1997 +// ALSO reads & writes act as aquire & release, so:
  1.1998 +// (2) A read cannot let unrelated NON-volatile memory refs that happen after
  1.1999 +// the read float up to before the read.  It's OK for non-volatile memory refs
  1.2000 +// that happen before the volatile read to float down below it.
  1.2001 +// (3) Similar a volatile write cannot let unrelated NON-volatile memory refs
  1.2002 +// that happen BEFORE the write float down to after the write.  It's OK for
  1.2003 +// non-volatile memory refs that happen after the volatile write to float up
  1.2004 +// before it.
  1.2005 +//
  1.2006 +// We only put in barriers around volatile refs (they are expensive), not
  1.2007 +// _between_ memory refs (that would require us to track the flavor of the
  1.2008 +// previous memory refs).  Requirements (2) and (3) require some barriers
  1.2009 +// before volatile stores and after volatile loads.  These nearly cover
  1.2010 +// requirement (1) but miss the volatile-store-volatile-load case.  This final
  1.2011 +// case is placed after volatile-stores although it could just as well go
  1.2012 +// before volatile-loads.
  1.2013 +void TemplateTable::volatile_barrier( ) {
  1.2014 +  // Helper function to insert a is-volatile test and memory barrier
  1.2015 +  if( !os::is_MP() ) return;    // Not needed on single CPU
  1.2016 +  __ membar();
  1.2017 +}
  1.2018 +
  1.2019 +void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) {
  1.2020 +  assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
  1.2021 +
  1.2022 +  Register temp = rbx;
  1.2023 +
  1.2024 +  assert_different_registers(Rcache, index, temp);
  1.2025 +
  1.2026 +  const int shift_count = (1 + byte_no)*BitsPerByte;
  1.2027 +  Label resolved;
  1.2028 +  __ get_cache_and_index_at_bcp(Rcache, index, 1);
  1.2029 +  __ movl(temp, Address(Rcache, index, Address::times_4, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
  1.2030 +  __ shrl(temp, shift_count);
  1.2031 +  // have we resolved this bytecode?
  1.2032 +  __ andl(temp, 0xFF);
  1.2033 +  __ cmpl(temp, (int)bytecode());
  1.2034 +  __ jcc(Assembler::equal, resolved);
  1.2035 +
  1.2036 +  // resolve first time through
  1.2037 +  address entry;
  1.2038 +  switch (bytecode()) {
  1.2039 +    case Bytecodes::_getstatic      : // fall through
  1.2040 +    case Bytecodes::_putstatic      : // fall through
  1.2041 +    case Bytecodes::_getfield       : // fall through
  1.2042 +    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break;
  1.2043 +    case Bytecodes::_invokevirtual  : // fall through
  1.2044 +    case Bytecodes::_invokespecial  : // fall through
  1.2045 +    case Bytecodes::_invokestatic   : // fall through
  1.2046 +    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
  1.2047 +    default                         : ShouldNotReachHere();                                 break;
  1.2048 +  }
  1.2049 +  __ movl(temp, (int)bytecode());
  1.2050 +  __ call_VM(noreg, entry, temp);
  1.2051 +  // Update registers with resolved info
  1.2052 +  __ get_cache_and_index_at_bcp(Rcache, index, 1);
  1.2053 +  __ bind(resolved);
  1.2054 +}
  1.2055 +
  1.2056 +
  1.2057 +// The cache and index registers must be set before call
  1.2058 +void TemplateTable::load_field_cp_cache_entry(Register obj,
  1.2059 +                                              Register cache,
  1.2060 +                                              Register index,
  1.2061 +                                              Register off,
  1.2062 +                                              Register flags,
  1.2063 +                                              bool is_static = false) {
  1.2064 +  assert_different_registers(cache, index, flags, off);
  1.2065 +
  1.2066 +  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
  1.2067 +  // Field offset
  1.2068 +  __ movl(off, Address(cache, index, Address::times_4,
  1.2069 +           in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())));
  1.2070 +  // Flags
  1.2071 +  __ movl(flags, Address(cache, index, Address::times_4,
  1.2072 +           in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())));
  1.2073 +
  1.2074 +  // klass     overwrite register
  1.2075 +  if (is_static) {
  1.2076 +    __ movl(obj, Address(cache, index, Address::times_4,
  1.2077 +             in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())));
  1.2078 +  }
  1.2079 +}
  1.2080 +
  1.2081 +void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
  1.2082 +                                               Register method,
  1.2083 +                                               Register itable_index,
  1.2084 +                                               Register flags,
  1.2085 +                                               bool is_invokevirtual,
  1.2086 +                                               bool is_invokevfinal /*unused*/) {
  1.2087 +  // setup registers
  1.2088 +  const Register cache = rcx;
  1.2089 +  const Register index = rdx;
  1.2090 +  assert_different_registers(method, flags);
  1.2091 +  assert_different_registers(method, cache, index);
  1.2092 +  assert_different_registers(itable_index, flags);
  1.2093 +  assert_different_registers(itable_index, cache, index);
  1.2094 +  // determine constant pool cache field offsets
  1.2095 +  const int method_offset = in_bytes(
  1.2096 +    constantPoolCacheOopDesc::base_offset() +
  1.2097 +      (is_invokevirtual
  1.2098 +       ? ConstantPoolCacheEntry::f2_offset()
  1.2099 +       : ConstantPoolCacheEntry::f1_offset()
  1.2100 +      )
  1.2101 +    );
  1.2102 +  const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2103 +                                    ConstantPoolCacheEntry::flags_offset());
  1.2104 +  // access constant pool cache fields
  1.2105 +  const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
  1.2106 +                                    ConstantPoolCacheEntry::f2_offset());
  1.2107 +
  1.2108 +  resolve_cache_and_index(byte_no, cache, index);
  1.2109 +
  1.2110 +  assert(wordSize == 4, "adjust code below");
  1.2111 +  __ movl(method, Address(cache, index, Address::times_4, method_offset));
  1.2112 +  if (itable_index != noreg) {
  1.2113 +    __ movl(itable_index, Address(cache, index, Address::times_4, index_offset));
  1.2114 +  }
  1.2115 +  __ movl(flags , Address(cache, index, Address::times_4, flags_offset ));
  1.2116 +}
  1.2117 +
  1.2118 +
  1.2119 +// The registers cache and index expected to be set before call.
  1.2120 +// Correct values of the cache and index registers are preserved.
  1.2121 +void TemplateTable::jvmti_post_field_access(Register cache,
  1.2122 +                                            Register index,
  1.2123 +                                            bool is_static,
  1.2124 +                                            bool has_tos) {
  1.2125 +  if (JvmtiExport::can_post_field_access()) {
  1.2126 +    // Check to see if a field access watch has been set before we take
  1.2127 +    // the time to call into the VM.
  1.2128 +    Label L1;
  1.2129 +    assert_different_registers(cache, index, rax);
  1.2130 +    __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2131 +    __ testl(rax,rax);
  1.2132 +    __ jcc(Assembler::zero, L1);
  1.2133 +
  1.2134 +    // cache entry pointer
  1.2135 +    __ addl(cache, in_bytes(constantPoolCacheOopDesc::base_offset()));
  1.2136 +    __ shll(index, LogBytesPerWord);
  1.2137 +    __ addl(cache, index);
  1.2138 +    if (is_static) {
  1.2139 +      __ movl(rax, 0);      // NULL object reference
  1.2140 +    } else {
  1.2141 +      __ pop(atos);         // Get the object
  1.2142 +      __ verify_oop(rax);
  1.2143 +      __ push(atos);        // Restore stack state
  1.2144 +    }
  1.2145 +    // rax,:   object pointer or NULL
  1.2146 +    // cache: cache entry pointer
  1.2147 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
  1.2148 +               rax, cache);
  1.2149 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2150 +    __ bind(L1);
  1.2151 +  }
  1.2152 +}
  1.2153 +
  1.2154 +void TemplateTable::pop_and_check_object(Register r) {
  1.2155 +  __ pop_ptr(r);
  1.2156 +  __ null_check(r);  // for field access must check obj.
  1.2157 +  __ verify_oop(r);
  1.2158 +}
  1.2159 +
  1.2160 +void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
  1.2161 +  transition(vtos, vtos);
  1.2162 +
  1.2163 +  const Register cache = rcx;
  1.2164 +  const Register index = rdx;
  1.2165 +  const Register obj   = rcx;
  1.2166 +  const Register off   = rbx;
  1.2167 +  const Register flags = rax;
  1.2168 +
  1.2169 +  resolve_cache_and_index(byte_no, cache, index);
  1.2170 +  jvmti_post_field_access(cache, index, is_static, false);
  1.2171 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2172 +
  1.2173 +  if (!is_static) pop_and_check_object(obj);
  1.2174 +
  1.2175 +  const Address lo(obj, off, Address::times_1, 0*wordSize);
  1.2176 +  const Address hi(obj, off, Address::times_1, 1*wordSize);
  1.2177 +
  1.2178 +  Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  1.2179 +
  1.2180 +  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
  1.2181 +  assert(btos == 0, "change code, btos != 0");
  1.2182 +  // btos
  1.2183 +  __ andl(flags, 0x0f);
  1.2184 +  __ jcc(Assembler::notZero, notByte);
  1.2185 +
  1.2186 +  __ load_signed_byte(rax, lo );
  1.2187 +  __ push(btos);
  1.2188 +  // Rewrite bytecode to be faster
  1.2189 +  if (!is_static) {
  1.2190 +    patch_bytecode(Bytecodes::_fast_bgetfield, rcx, rbx);
  1.2191 +  }
  1.2192 +  __ jmp(Done);
  1.2193 +
  1.2194 +  __ bind(notByte);
  1.2195 +  // itos
  1.2196 +  __ cmpl(flags, itos );
  1.2197 +  __ jcc(Assembler::notEqual, notInt);
  1.2198 +
  1.2199 +  __ movl(rax, lo );
  1.2200 +  __ push(itos);
  1.2201 +  // Rewrite bytecode to be faster
  1.2202 +  if (!is_static) {
  1.2203 +    patch_bytecode(Bytecodes::_fast_igetfield, rcx, rbx);
  1.2204 +  }
  1.2205 +  __ jmp(Done);
  1.2206 +
  1.2207 +  __ bind(notInt);
  1.2208 +  // atos
  1.2209 +  __ cmpl(flags, atos );
  1.2210 +  __ jcc(Assembler::notEqual, notObj);
  1.2211 +
  1.2212 +  __ movl(rax, lo );
  1.2213 +  __ push(atos);
  1.2214 +  if (!is_static) {
  1.2215 +    patch_bytecode(Bytecodes::_fast_agetfield, rcx, rbx);
  1.2216 +  }
  1.2217 +  __ jmp(Done);
  1.2218 +
  1.2219 +  __ bind(notObj);
  1.2220 +  // ctos
  1.2221 +  __ cmpl(flags, ctos );
  1.2222 +  __ jcc(Assembler::notEqual, notChar);
  1.2223 +
  1.2224 +  __ load_unsigned_word(rax, lo );
  1.2225 +  __ push(ctos);
  1.2226 +  if (!is_static) {
  1.2227 +    patch_bytecode(Bytecodes::_fast_cgetfield, rcx, rbx);
  1.2228 +  }
  1.2229 +  __ jmp(Done);
  1.2230 +
  1.2231 +  __ bind(notChar);
  1.2232 +  // stos
  1.2233 +  __ cmpl(flags, stos );
  1.2234 +  __ jcc(Assembler::notEqual, notShort);
  1.2235 +
  1.2236 +  __ load_signed_word(rax, lo );
  1.2237 +  __ push(stos);
  1.2238 +  if (!is_static) {
  1.2239 +    patch_bytecode(Bytecodes::_fast_sgetfield, rcx, rbx);
  1.2240 +  }
  1.2241 +  __ jmp(Done);
  1.2242 +
  1.2243 +  __ bind(notShort);
  1.2244 +  // ltos
  1.2245 +  __ cmpl(flags, ltos );
  1.2246 +  __ jcc(Assembler::notEqual, notLong);
  1.2247 +
  1.2248 +  // Generate code as if volatile.  There just aren't enough registers to
  1.2249 +  // save that information and this code is faster than the test.
  1.2250 +  __ fild_d(lo);                // Must load atomically
  1.2251 +  __ subl(rsp,2*wordSize);      // Make space for store
  1.2252 +  __ fistp_d(Address(rsp,0));
  1.2253 +  __ popl(rax);
  1.2254 +  __ popl(rdx);
  1.2255 +
  1.2256 +  __ push(ltos);
  1.2257 +  // Don't rewrite to _fast_lgetfield for potential volatile case.
  1.2258 +  __ jmp(Done);
  1.2259 +
  1.2260 +  __ bind(notLong);
  1.2261 +  // ftos
  1.2262 +  __ cmpl(flags, ftos );
  1.2263 +  __ jcc(Assembler::notEqual, notFloat);
  1.2264 +
  1.2265 +  __ fld_s(lo);
  1.2266 +  __ push(ftos);
  1.2267 +  if (!is_static) {
  1.2268 +    patch_bytecode(Bytecodes::_fast_fgetfield, rcx, rbx);
  1.2269 +  }
  1.2270 +  __ jmp(Done);
  1.2271 +
  1.2272 +  __ bind(notFloat);
  1.2273 +  // dtos
  1.2274 +  __ cmpl(flags, dtos );
  1.2275 +  __ jcc(Assembler::notEqual, notDouble);
  1.2276 +
  1.2277 +  __ fld_d(lo);
  1.2278 +  __ push(dtos);
  1.2279 +  if (!is_static) {
  1.2280 +    patch_bytecode(Bytecodes::_fast_dgetfield, rcx, rbx);
  1.2281 +  }
  1.2282 +  __ jmpb(Done);
  1.2283 +
  1.2284 +  __ bind(notDouble);
  1.2285 +
  1.2286 +  __ stop("Bad state");
  1.2287 +
  1.2288 +  __ bind(Done);
  1.2289 +  // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  1.2290 +  // volatile_barrier( );
  1.2291 +}
  1.2292 +
  1.2293 +
  1.2294 +void TemplateTable::getfield(int byte_no) {
  1.2295 +  getfield_or_static(byte_no, false);
  1.2296 +}
  1.2297 +
  1.2298 +
  1.2299 +void TemplateTable::getstatic(int byte_no) {
  1.2300 +  getfield_or_static(byte_no, true);
  1.2301 +}
  1.2302 +
  1.2303 +// The registers cache and index expected to be set before call.
  1.2304 +// The function may destroy various registers, just not the cache and index registers.
  1.2305 +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
  1.2306 +
  1.2307 +  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
  1.2308 +
  1.2309 +  if (JvmtiExport::can_post_field_modification()) {
  1.2310 +    // Check to see if a field modification watch has been set before we take
  1.2311 +    // the time to call into the VM.
  1.2312 +    Label L1;
  1.2313 +    assert_different_registers(cache, index, rax);
  1.2314 +    __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2315 +    __ testl(rax, rax);
  1.2316 +    __ jcc(Assembler::zero, L1);
  1.2317 +
  1.2318 +    // The cache and index registers have been already set.
  1.2319 +    // This allows to eliminate this call but the cache and index
  1.2320 +    // registers have to be correspondingly used after this line.
  1.2321 +    __ get_cache_and_index_at_bcp(rax, rdx, 1);
  1.2322 +
  1.2323 +    if (is_static) {
  1.2324 +      // Life is simple.  Null out the object pointer.
  1.2325 +      __ xorl(rbx, rbx);
  1.2326 +    } else {
  1.2327 +      // Life is harder. The stack holds the value on top, followed by the object.
  1.2328 +      // We don't know the size of the value, though; it could be one or two words
  1.2329 +      // depending on its type. As a result, we must find the type to determine where
  1.2330 +      // the object is.
  1.2331 +      Label two_word, valsize_known;
  1.2332 +      __ movl(rcx, Address(rax, rdx, Address::times_4, in_bytes(cp_base_offset +
  1.2333 +                                   ConstantPoolCacheEntry::flags_offset())));
  1.2334 +      __ movl(rbx, rsp);
  1.2335 +      __ shrl(rcx, ConstantPoolCacheEntry::tosBits);
  1.2336 +      // Make sure we don't need to mask rcx for tosBits after the above shift
  1.2337 +      ConstantPoolCacheEntry::verify_tosBits();
  1.2338 +      __ cmpl(rcx, ltos);
  1.2339 +      __ jccb(Assembler::equal, two_word);
  1.2340 +      __ cmpl(rcx, dtos);
  1.2341 +      __ jccb(Assembler::equal, two_word);
  1.2342 +      __ addl(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
  1.2343 +      __ jmpb(valsize_known);
  1.2344 +
  1.2345 +      __ bind(two_word);
  1.2346 +      __ addl(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
  1.2347 +
  1.2348 +      __ bind(valsize_known);
  1.2349 +      // setup object pointer
  1.2350 +      __ movl(rbx, Address(rbx, 0));
  1.2351 +    }
  1.2352 +    // cache entry pointer
  1.2353 +    __ addl(rax, in_bytes(cp_base_offset));
  1.2354 +    __ shll(rdx, LogBytesPerWord);
  1.2355 +    __ addl(rax, rdx);
  1.2356 +    // object (tos)
  1.2357 +    __ movl(rcx, rsp);
  1.2358 +    // rbx,: object pointer set up above (NULL if static)
  1.2359 +    // rax,: cache entry pointer
  1.2360 +    // rcx: jvalue object on the stack
  1.2361 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
  1.2362 +               rbx, rax, rcx);
  1.2363 +    __ get_cache_and_index_at_bcp(cache, index, 1);
  1.2364 +    __ bind(L1);
  1.2365 +  }
  1.2366 +}
  1.2367 +
  1.2368 +
  1.2369 +void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  1.2370 +  transition(vtos, vtos);
  1.2371 +
  1.2372 +  const Register cache = rcx;
  1.2373 +  const Register index = rdx;
  1.2374 +  const Register obj   = rcx;
  1.2375 +  const Register off   = rbx;
  1.2376 +  const Register flags = rax;
  1.2377 +
  1.2378 +  resolve_cache_and_index(byte_no, cache, index);
  1.2379 +  jvmti_post_field_mod(cache, index, is_static);
  1.2380 +  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
  1.2381 +
  1.2382 +  // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  1.2383 +  // volatile_barrier( );
  1.2384 +
  1.2385 +  Label notVolatile, Done;
  1.2386 +  __ movl(rdx, flags);
  1.2387 +  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
  1.2388 +  __ andl(rdx, 0x1);
  1.2389 +
  1.2390 +  // field addresses
  1.2391 +  const Address lo(obj, off, Address::times_1, 0*wordSize);
  1.2392 +  const Address hi(obj, off, Address::times_1, 1*wordSize);
  1.2393 +
  1.2394 +  Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
  1.2395 +
  1.2396 +  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
  1.2397 +  assert(btos == 0, "change code, btos != 0");
  1.2398 +  // btos
  1.2399 +  __ andl(flags, 0x0f);
  1.2400 +  __ jcc(Assembler::notZero, notByte);
  1.2401 +
  1.2402 +  __ pop(btos);
  1.2403 +  if (!is_static) pop_and_check_object(obj);
  1.2404 +  __ movb(lo, rax );
  1.2405 +  if (!is_static) {
  1.2406 +    patch_bytecode(Bytecodes::_fast_bputfield, rcx, rbx);
  1.2407 +  }
  1.2408 +  __ jmp(Done);
  1.2409 +
  1.2410 +  __ bind(notByte);
  1.2411 +  // itos
  1.2412 +  __ cmpl(flags, itos );
  1.2413 +  __ jcc(Assembler::notEqual, notInt);
  1.2414 +
  1.2415 +  __ pop(itos);
  1.2416 +  if (!is_static) pop_and_check_object(obj);
  1.2417 +
  1.2418 +  __ movl(lo, rax );
  1.2419 +  if (!is_static) {
  1.2420 +    patch_bytecode(Bytecodes::_fast_iputfield, rcx, rbx);
  1.2421 +  }
  1.2422 +  __ jmp(Done);
  1.2423 +
  1.2424 +  __ bind(notInt);
  1.2425 +  // atos
  1.2426 +  __ cmpl(flags, atos );
  1.2427 +  __ jcc(Assembler::notEqual, notObj);
  1.2428 +
  1.2429 +  __ pop(atos);
  1.2430 +  if (!is_static) pop_and_check_object(obj);
  1.2431 +
  1.2432 +  __ movl(lo, rax );
  1.2433 +  __ store_check(obj, lo);  // Need to mark card
  1.2434 +  if (!is_static) {
  1.2435 +    patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
  1.2436 +  }
  1.2437 +  __ jmp(Done);
  1.2438 +
  1.2439 +  __ bind(notObj);
  1.2440 +  // ctos
  1.2441 +  __ cmpl(flags, ctos );
  1.2442 +  __ jcc(Assembler::notEqual, notChar);
  1.2443 +
  1.2444 +  __ pop(ctos);
  1.2445 +  if (!is_static) pop_and_check_object(obj);
  1.2446 +  __ movw(lo, rax );
  1.2447 +  if (!is_static) {
  1.2448 +    patch_bytecode(Bytecodes::_fast_cputfield, rcx, rbx);
  1.2449 +  }
  1.2450 +  __ jmp(Done);
  1.2451 +
  1.2452 +  __ bind(notChar);
  1.2453 +  // stos
  1.2454 +  __ cmpl(flags, stos );
  1.2455 +  __ jcc(Assembler::notEqual, notShort);
  1.2456 +
  1.2457 +  __ pop(stos);
  1.2458 +  if (!is_static) pop_and_check_object(obj);
  1.2459 +  __ movw(lo, rax );
  1.2460 +  if (!is_static) {
  1.2461 +    patch_bytecode(Bytecodes::_fast_sputfield, rcx, rbx);
  1.2462 +  }
  1.2463 +  __ jmp(Done);
  1.2464 +
  1.2465 +  __ bind(notShort);
  1.2466 +  // ltos
  1.2467 +  __ cmpl(flags, ltos );
  1.2468 +  __ jcc(Assembler::notEqual, notLong);
  1.2469 +
  1.2470 +  Label notVolatileLong;
  1.2471 +  __ testl(rdx, rdx);
  1.2472 +  __ jcc(Assembler::zero, notVolatileLong);
  1.2473 +
  1.2474 +  __ pop(ltos);  // overwrites rdx, do this after testing volatile.
  1.2475 +  if (!is_static) pop_and_check_object(obj);
  1.2476 +
  1.2477 +  // Replace with real volatile test
  1.2478 +  __ pushl(rdx);
  1.2479 +  __ pushl(rax);                // Must update atomically with FIST
  1.2480 +  __ fild_d(Address(rsp,0));    // So load into FPU register
  1.2481 +  __ fistp_d(lo);               // and put into memory atomically
  1.2482 +  __ addl(rsp,2*wordSize);
  1.2483 +  volatile_barrier();
  1.2484 +  // Don't rewrite volatile version
  1.2485 +  __ jmp(notVolatile);
  1.2486 +
  1.2487 +  __ bind(notVolatileLong);
  1.2488 +
  1.2489 +  __ pop(ltos);  // overwrites rdx
  1.2490 +  if (!is_static) pop_and_check_object(obj);
  1.2491 +  __ movl(hi, rdx);
  1.2492 +  __ movl(lo, rax);
  1.2493 +  if (!is_static) {
  1.2494 +    patch_bytecode(Bytecodes::_fast_lputfield, rcx, rbx);
  1.2495 +  }
  1.2496 +  __ jmp(notVolatile);
  1.2497 +
  1.2498 +  __ bind(notLong);
  1.2499 +  // ftos
  1.2500 +  __ cmpl(flags, ftos );
  1.2501 +  __ jcc(Assembler::notEqual, notFloat);
  1.2502 +
  1.2503 +  __ pop(ftos);
  1.2504 +  if (!is_static) pop_and_check_object(obj);
  1.2505 +  __ fstp_s(lo);
  1.2506 +  if (!is_static) {
  1.2507 +    patch_bytecode(Bytecodes::_fast_fputfield, rcx, rbx);
  1.2508 +  }
  1.2509 +  __ jmp(Done);
  1.2510 +
  1.2511 +  __ bind(notFloat);
  1.2512 +  // dtos
  1.2513 +  __ cmpl(flags, dtos );
  1.2514 +  __ jcc(Assembler::notEqual, notDouble);
  1.2515 +
  1.2516 +  __ pop(dtos);
  1.2517 +  if (!is_static) pop_and_check_object(obj);
  1.2518 +  __ fstp_d(lo);
  1.2519 +  if (!is_static) {
  1.2520 +    patch_bytecode(Bytecodes::_fast_dputfield, rcx, rbx);
  1.2521 +  }
  1.2522 +  __ jmp(Done);
  1.2523 +
  1.2524 +  __ bind(notDouble);
  1.2525 +
  1.2526 +  __ stop("Bad state");
  1.2527 +
  1.2528 +  __ bind(Done);
  1.2529 +
  1.2530 +  // Check for volatile store
  1.2531 +  __ testl(rdx, rdx);
  1.2532 +  __ jcc(Assembler::zero, notVolatile);
  1.2533 +  volatile_barrier( );
  1.2534 +  __ bind(notVolatile);
  1.2535 +}
  1.2536 +
  1.2537 +
  1.2538 +void TemplateTable::putfield(int byte_no) {
  1.2539 +  putfield_or_static(byte_no, false);
  1.2540 +}
  1.2541 +
  1.2542 +
  1.2543 +void TemplateTable::putstatic(int byte_no) {
  1.2544 +  putfield_or_static(byte_no, true);
  1.2545 +}
  1.2546 +
  1.2547 +void TemplateTable::jvmti_post_fast_field_mod() {
  1.2548 +  if (JvmtiExport::can_post_field_modification()) {
  1.2549 +    // Check to see if a field modification watch has been set before we take
  1.2550 +    // the time to call into the VM.
  1.2551 +    Label L2;
  1.2552 +    __ mov32(rcx, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
  1.2553 +    __ testl(rcx,rcx);
  1.2554 +    __ jcc(Assembler::zero, L2);
  1.2555 +    __ pop_ptr(rbx);               // copy the object pointer from tos
  1.2556 +    __ verify_oop(rbx);
  1.2557 +    __ push_ptr(rbx);              // put the object pointer back on tos
  1.2558 +    __ subl(rsp, sizeof(jvalue));  // add space for a jvalue object
  1.2559 +    __ movl(rcx, rsp);
  1.2560 +    __ push_ptr(rbx);                 // save object pointer so we can steal rbx,
  1.2561 +    __ movl(rbx, 0);
  1.2562 +    const Address lo_value(rcx, rbx, Address::times_1, 0*wordSize);
  1.2563 +    const Address hi_value(rcx, rbx, Address::times_1, 1*wordSize);
  1.2564 +    switch (bytecode()) {          // load values into the jvalue object
  1.2565 +    case Bytecodes::_fast_bputfield: __ movb(lo_value, rax); break;
  1.2566 +    case Bytecodes::_fast_sputfield: __ movw(lo_value, rax); break;
  1.2567 +    case Bytecodes::_fast_cputfield: __ movw(lo_value, rax); break;
  1.2568 +    case Bytecodes::_fast_iputfield: __ movl(lo_value, rax);                         break;
  1.2569 +    case Bytecodes::_fast_lputfield: __ movl(hi_value, rdx); __ movl(lo_value, rax); break;
  1.2570 +    // need to call fld_s() after fstp_s() to restore the value for below
  1.2571 +    case Bytecodes::_fast_fputfield: __ fstp_s(lo_value); __ fld_s(lo_value);        break;
  1.2572 +    // need to call fld_d() after fstp_d() to restore the value for below
  1.2573 +    case Bytecodes::_fast_dputfield: __ fstp_d(lo_value); __ fld_d(lo_value);        break;
  1.2574 +    // since rcx is not an object we don't call store_check() here
  1.2575 +    case Bytecodes::_fast_aputfield: __ movl(lo_value, rax);                         break;
  1.2576 +    default:  ShouldNotReachHere();
  1.2577 +    }
  1.2578 +    __ pop_ptr(rbx);  // restore copy of object pointer
  1.2579 +
  1.2580 +    // Save rax, and sometimes rdx because call_VM() will clobber them,
  1.2581 +    // then use them for JVM/DI purposes
  1.2582 +    __ pushl(rax);
  1.2583 +    if (bytecode() == Bytecodes::_fast_lputfield) __ pushl(rdx);
  1.2584 +    // access constant pool cache entry
  1.2585 +    __ get_cache_entry_pointer_at_bcp(rax, rdx, 1);
  1.2586 +    __ verify_oop(rbx);
  1.2587 +    // rbx,: object pointer copied above
  1.2588 +    // rax,: cache entry pointer
  1.2589 +    // rcx: jvalue object on the stack
  1.2590 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx);
  1.2591 +    if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);  // restore high value
  1.2592 +    __ popl(rax);     // restore lower value
  1.2593 +    __ addl(rsp, sizeof(jvalue));  // release jvalue object space
  1.2594 +    __ bind(L2);
  1.2595 +  }
  1.2596 +}
  1.2597 +
  1.2598 +void TemplateTable::fast_storefield(TosState state) {
  1.2599 +  transition(state, vtos);
  1.2600 +
  1.2601 +  ByteSize base = constantPoolCacheOopDesc::base_offset();
  1.2602 +
  1.2603 +  jvmti_post_fast_field_mod();
  1.2604 +
  1.2605 +  // access constant pool cache
  1.2606 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2607 +
  1.2608 +  // test for volatile with rdx but rdx is tos register for lputfield.
  1.2609 +  if (bytecode() == Bytecodes::_fast_lputfield) __ pushl(rdx);
  1.2610 +  __ movl(rdx, Address(rcx, rbx, Address::times_4, in_bytes(base +
  1.2611 +                       ConstantPoolCacheEntry::flags_offset())));
  1.2612 +
  1.2613 +  // replace index with field offset from cache entry
  1.2614 +  __ movl(rbx, Address(rcx, rbx, Address::times_4, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
  1.2615 +
  1.2616 +  // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
  1.2617 +  // volatile_barrier( );
  1.2618 +
  1.2619 +  Label notVolatile, Done;
  1.2620 +  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
  1.2621 +  __ andl(rdx, 0x1);
  1.2622 +  // Check for volatile store
  1.2623 +  __ testl(rdx, rdx);
  1.2624 +  __ jcc(Assembler::zero, notVolatile);
  1.2625 +
  1.2626 +  if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);
  1.2627 +
  1.2628 +  // Get object from stack
  1.2629 +  pop_and_check_object(rcx);
  1.2630 +
  1.2631 +  // field addresses
  1.2632 +  const Address lo(rcx, rbx, Address::times_1, 0*wordSize);
  1.2633 +  const Address hi(rcx, rbx, Address::times_1, 1*wordSize);
  1.2634 +
  1.2635 +  // access field
  1.2636 +  switch (bytecode()) {
  1.2637 +    case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
  1.2638 +    case Bytecodes::_fast_sputfield: // fall through
  1.2639 +    case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
  1.2640 +    case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
  1.2641 +    case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
  1.2642 +    case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
  1.2643 +    case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
  1.2644 +    case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
  1.2645 +    default:
  1.2646 +      ShouldNotReachHere();
  1.2647 +  }
  1.2648 +
  1.2649 +  Label done;
  1.2650 +  volatile_barrier( );
  1.2651 +  __ jmpb(done);
  1.2652 +
  1.2653 +  // Same code as above, but don't need rdx to test for volatile.
  1.2654 +  __ bind(notVolatile);
  1.2655 +
  1.2656 +  if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);
  1.2657 +
  1.2658 +  // Get object from stack
  1.2659 +  pop_and_check_object(rcx);
  1.2660 +
  1.2661 +  // access field
  1.2662 +  switch (bytecode()) {
  1.2663 +    case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
  1.2664 +    case Bytecodes::_fast_sputfield: // fall through
  1.2665 +    case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
  1.2666 +    case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
  1.2667 +    case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
  1.2668 +    case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
  1.2669 +    case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
  1.2670 +    case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
  1.2671 +    default:
  1.2672 +      ShouldNotReachHere();
  1.2673 +  }
  1.2674 +  __ bind(done);
  1.2675 +}
  1.2676 +
  1.2677 +
  1.2678 +void TemplateTable::fast_accessfield(TosState state) {
  1.2679 +  transition(atos, state);
  1.2680 +
  1.2681 +  // do the JVMTI work here to avoid disturbing the register state below
  1.2682 +  if (JvmtiExport::can_post_field_access()) {
  1.2683 +    // Check to see if a field access watch has been set before we take
  1.2684 +    // the time to call into the VM.
  1.2685 +    Label L1;
  1.2686 +    __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
  1.2687 +    __ testl(rcx,rcx);
  1.2688 +    __ jcc(Assembler::zero, L1);
  1.2689 +    // access constant pool cache entry
  1.2690 +    __ get_cache_entry_pointer_at_bcp(rcx, rdx, 1);
  1.2691 +    __ push_ptr(rax);  // save object pointer before call_VM() clobbers it
  1.2692 +    __ verify_oop(rax);
  1.2693 +    // rax,: object pointer copied above
  1.2694 +    // rcx: cache entry pointer
  1.2695 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), rax, rcx);
  1.2696 +    __ pop_ptr(rax);   // restore object pointer
  1.2697 +    __ bind(L1);
  1.2698 +  }
  1.2699 +
  1.2700 +  // access constant pool cache
  1.2701 +  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
  1.2702 +  // replace index with field offset from cache entry
  1.2703 +  __ movl(rbx, Address(rcx, rbx, Address::times_4, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
  1.2704 +
  1.2705 +
  1.2706 +  // rax,: object
  1.2707 +  __ verify_oop(rax);
  1.2708 +  __ null_check(rax);
  1.2709 +  // field addresses
  1.2710 +  const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
  1.2711 +  const Address hi = Address(rax, rbx, Address::times_1, 1*wordSize);
  1.2712 +
  1.2713 +  // access field
  1.2714 +  switch (bytecode()) {
  1.2715 +    case Bytecodes::_fast_bgetfield: __ movsxb(rax, lo );                 break;
  1.2716 +    case Bytecodes::_fast_sgetfield: __ load_signed_word(rax, lo );       break;
  1.2717 +    case Bytecodes::_fast_cgetfield: __ load_unsigned_word(rax, lo );     break;
  1.2718 +    case Bytecodes::_fast_igetfield: __ movl(rax, lo);                    break;
  1.2719 +    case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten");  break;
  1.2720 +    case Bytecodes::_fast_fgetfield: __ fld_s(lo);                        break;
  1.2721 +    case Bytecodes::_fast_dgetfield: __ fld_d(lo);                        break;
  1.2722 +    case Bytecodes::_fast_agetfield: __ movl(rax, lo); __ verify_oop(rax); break;
  1.2723 +    default:
  1.2724 +      ShouldNotReachHere();
  1.2725 +  }
  1.2726 +
  1.2727 +  // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
  1.2728 +  // volatile_barrier( );
  1.2729 +}
  1.2730 +
  1.2731 +void TemplateTable::fast_xaccess(TosState state) {
  1.2732 +  transition(vtos, state);
  1.2733 +  // get receiver
  1.2734 +  __ movl(rax, aaddress(0));
  1.2735 +  debug_only(__ verify_local_tag(frame::TagReference, 0));
  1.2736 +  // access constant pool cache
  1.2737 +  __ get_cache_and_index_at_bcp(rcx, rdx, 2);
  1.2738 +  __ movl(rbx, Address(rcx, rdx, Address::times_4, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
  1.2739 +  // make sure exception is reported in correct bcp range (getfield is next instruction)
  1.2740 +  __ increment(rsi);
  1.2741 +  __ null_check(rax);
  1.2742 +  const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
  1.2743 +  if (state == itos) {
  1.2744 +    __ movl(rax, lo);
  1.2745 +  } else if (state == atos) {
  1.2746 +    __ movl(rax, lo);
  1.2747 +    __ verify_oop(rax);
  1.2748 +  } else if (state == ftos) {
  1.2749 +    __ fld_s(lo);
  1.2750 +  } else {
  1.2751 +    ShouldNotReachHere();
  1.2752 +  }
  1.2753 +  __ decrement(rsi);
  1.2754 +}
  1.2755 +
  1.2756 +
  1.2757 +
  1.2758 +//----------------------------------------------------------------------------------------------------
  1.2759 +// Calls
  1.2760 +
  1.2761 +void TemplateTable::count_calls(Register method, Register temp) {
  1.2762 +  // implemented elsewhere
  1.2763 +  ShouldNotReachHere();
  1.2764 +}
  1.2765 +
  1.2766 +
  1.2767 +void TemplateTable::prepare_invoke(Register method, Register index, int byte_no, Bytecodes::Code code) {
  1.2768 +  // determine flags
  1.2769 +  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  1.2770 +  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  1.2771 +  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  1.2772 +  const bool load_receiver       = code != Bytecodes::_invokestatic;
  1.2773 +  const bool receiver_null_check = is_invokespecial;
  1.2774 +  const bool save_flags = is_invokeinterface || is_invokevirtual;
  1.2775 +  // setup registers & access constant pool cache
  1.2776 +  const Register recv   = rcx;
  1.2777 +  const Register flags  = rdx;
  1.2778 +  assert_different_registers(method, index, recv, flags);
  1.2779 +
  1.2780 +  // save 'interpreter return address'
  1.2781 +  __ save_bcp();
  1.2782 +
  1.2783 +  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual);
  1.2784 +
  1.2785 +  // load receiver if needed (note: no return address pushed yet)
  1.2786 +  if (load_receiver) {
  1.2787 +    __ movl(recv, flags);
  1.2788 +    __ andl(recv, 0xFF);
  1.2789 +    // recv count is 0 based?
  1.2790 +    __ movl(recv, Address(rsp, recv, Interpreter::stackElementScale(), -Interpreter::expr_offset_in_bytes(1)));
  1.2791 +    __ verify_oop(recv);
  1.2792 +  }
  1.2793 +
  1.2794 +  // do null check if needed
  1.2795 +  if (receiver_null_check) {
  1.2796 +    __ null_check(recv);
  1.2797 +  }
  1.2798 +
  1.2799 +  if (save_flags) {
  1.2800 +    __ movl(rsi, flags);
  1.2801 +  }
  1.2802 +
  1.2803 +  // compute return type
  1.2804 +  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
  1.2805 +  // Make sure we don't need to mask flags for tosBits after the above shift
  1.2806 +  ConstantPoolCacheEntry::verify_tosBits();
  1.2807 +  // load return address
  1.2808 +  { const int table =
  1.2809 +      is_invokeinterface
  1.2810 +      ? (int)Interpreter::return_5_addrs_by_index_table()
  1.2811 +      : (int)Interpreter::return_3_addrs_by_index_table();
  1.2812 +    __ movl(flags, Address(noreg, flags, Address::times_4, table));
  1.2813 +  }
  1.2814 +
  1.2815 +  // push return address
  1.2816 +  __ pushl(flags);
  1.2817 +
  1.2818 +  // Restore flag value from the constant pool cache, and restore rsi
  1.2819 +  // for later null checks.  rsi is the bytecode pointer
  1.2820 +  if (save_flags) {
  1.2821 +    __ movl(flags, rsi);
  1.2822 +    __ restore_bcp();
  1.2823 +  }
  1.2824 +}
  1.2825 +
  1.2826 +
  1.2827 +void TemplateTable::invokevirtual_helper(Register index, Register recv,
  1.2828 +                        Register flags) {
  1.2829 +
  1.2830 +  // Uses temporary registers rax, rdx
  1.2831 +  assert_different_registers(index, recv, rax, rdx);
  1.2832 +
  1.2833 +  // Test for an invoke of a final method
  1.2834 +  Label notFinal;
  1.2835 +  __ movl(rax, flags);
  1.2836 +  __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
  1.2837 +  __ jcc(Assembler::zero, notFinal);
  1.2838 +
  1.2839 +  Register method = index;  // method must be rbx,
  1.2840 +  assert(method == rbx, "methodOop must be rbx, for interpreter calling convention");
  1.2841 +
  1.2842 +  // do the call - the index is actually the method to call
  1.2843 +  __ verify_oop(method);
  1.2844 +
  1.2845 +  // It's final, need a null check here!
  1.2846 +  __ null_check(recv);
  1.2847 +
  1.2848 +  // profile this call
  1.2849 +  __ profile_final_call(rax);
  1.2850 +
  1.2851 +  __ jump_from_interpreted(method, rax);
  1.2852 +
  1.2853 +  __ bind(notFinal);
  1.2854 +
  1.2855 +  // get receiver klass
  1.2856 +  __ null_check(recv, oopDesc::klass_offset_in_bytes());
  1.2857 +  // Keep recv in rcx for callee expects it there
  1.2858 +  __ movl(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
  1.2859 +  __ verify_oop(rax);
  1.2860 +
  1.2861 +  // profile this call
  1.2862 +  __ profile_virtual_call(rax, rdi, rdx);
  1.2863 +
  1.2864 +  // get target methodOop & entry point
  1.2865 +  const int base = instanceKlass::vtable_start_offset() * wordSize;
  1.2866 +  assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
  1.2867 +  __ movl(method, Address(rax, index, Address::times_4, base + vtableEntry::method_offset_in_bytes()));
  1.2868 +  __ jump_from_interpreted(method, rdx);
  1.2869 +}
  1.2870 +
  1.2871 +
  1.2872 +void TemplateTable::invokevirtual(int byte_no) {
  1.2873 +  transition(vtos, vtos);
  1.2874 +  prepare_invoke(rbx, noreg, byte_no, bytecode());
  1.2875 +
  1.2876 +  // rbx,: index
  1.2877 +  // rcx: receiver
  1.2878 +  // rdx: flags
  1.2879 +
  1.2880 +  invokevirtual_helper(rbx, rcx, rdx);
  1.2881 +}
  1.2882 +
  1.2883 +
  1.2884 +void TemplateTable::invokespecial(int byte_no) {
  1.2885 +  transition(vtos, vtos);
  1.2886 +  prepare_invoke(rbx, noreg, byte_no, bytecode());
  1.2887 +  // do the call
  1.2888 +  __ verify_oop(rbx);
  1.2889 +  __ profile_call(rax);
  1.2890 +  __ jump_from_interpreted(rbx, rax);
  1.2891 +}
  1.2892 +
  1.2893 +
  1.2894 +void TemplateTable::invokestatic(int byte_no) {
  1.2895 +  transition(vtos, vtos);
  1.2896 +  prepare_invoke(rbx, noreg, byte_no, bytecode());
  1.2897 +  // do the call
  1.2898 +  __ verify_oop(rbx);
  1.2899 +  __ profile_call(rax);
  1.2900 +  __ jump_from_interpreted(rbx, rax);
  1.2901 +}
  1.2902 +
  1.2903 +
  1.2904 +void TemplateTable::fast_invokevfinal(int byte_no) {
  1.2905 +  transition(vtos, vtos);
  1.2906 +  __ stop("fast_invokevfinal not used on x86");
  1.2907 +}
  1.2908 +
  1.2909 +
  1.2910 +void TemplateTable::invokeinterface(int byte_no) {
  1.2911 +  transition(vtos, vtos);
  1.2912 +  prepare_invoke(rax, rbx, byte_no, bytecode());
  1.2913 +
  1.2914 +  // rax,: Interface
  1.2915 +  // rbx,: index
  1.2916 +  // rcx: receiver
  1.2917 +  // rdx: flags
  1.2918 +
  1.2919 +  // Special case of invokeinterface called for virtual method of
  1.2920 +  // java.lang.Object.  See cpCacheOop.cpp for details.
  1.2921 +  // This code isn't produced by javac, but could be produced by
  1.2922 +  // another compliant java compiler.
  1.2923 +  Label notMethod;
  1.2924 +  __ movl(rdi, rdx);
  1.2925 +  __ andl(rdi, (1 << ConstantPoolCacheEntry::methodInterface));
  1.2926 +  __ jcc(Assembler::zero, notMethod);
  1.2927 +
  1.2928 +  invokevirtual_helper(rbx, rcx, rdx);
  1.2929 +  __ bind(notMethod);
  1.2930 +
  1.2931 +  // Get receiver klass into rdx - also a null check
  1.2932 +  __ restore_locals();  // restore rdi
  1.2933 +  __ movl(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
  1.2934 +  __ verify_oop(rdx);
  1.2935 +
  1.2936 +  // profile this call
  1.2937 +  __ profile_virtual_call(rdx, rsi, rdi);
  1.2938 +
  1.2939 +  __ movl(rdi, rdx); // Save klassOop in rdi
  1.2940 +
  1.2941 +  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  1.2942 +  const int base = instanceKlass::vtable_start_offset() * wordSize;
  1.2943 +  assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
  1.2944 +  __ movl(rsi, Address(rdx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable
  1.2945 +  __ leal(rdx, Address(rdx, rsi, Address::times_4, base));
  1.2946 +  if (HeapWordsPerLong > 1) {
  1.2947 +    // Round up to align_object_offset boundary
  1.2948 +    __ round_to(rdx, BytesPerLong);
  1.2949 +  }
  1.2950 +
  1.2951 +  Label entry, search, interface_ok;
  1.2952 +
  1.2953 +  __ jmpb(entry);
  1.2954 +  __ bind(search);
  1.2955 +  __ addl(rdx, itableOffsetEntry::size() * wordSize);
  1.2956 +
  1.2957 +  __ bind(entry);
  1.2958 +
  1.2959 +  // Check that the entry is non-null.  A null entry means that the receiver
  1.2960 +  // class doesn't implement the interface, and wasn't the same as the
  1.2961 +  // receiver class checked when the interface was resolved.
  1.2962 +  __ pushl(rdx);
  1.2963 +  __ movl(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
  1.2964 +  __ testl(rdx, rdx);
  1.2965 +  __ jcc(Assembler::notZero, interface_ok);
  1.2966 +  // throw exception
  1.2967 +  __ popl(rdx);          // pop saved register first.
  1.2968 +  __ popl(rbx);          // pop return address (pushed by prepare_invoke)
  1.2969 +  __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
  1.2970 +  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
  1.2971 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
  1.2972 +                   InterpreterRuntime::throw_IncompatibleClassChangeError));
  1.2973 +  // the call_VM checks for exception, so we should never return here.
  1.2974 +  __ should_not_reach_here();
  1.2975 +  __ bind(interface_ok);
  1.2976 +
  1.2977 +    __ popl(rdx);
  1.2978 +
  1.2979 +    __ cmpl(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
  1.2980 +    __ jcc(Assembler::notEqual, search);
  1.2981 +
  1.2982 +    __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
  1.2983 +    __ addl(rdx, rdi); // Add offset to klassOop
  1.2984 +    assert(itableMethodEntry::size() * wordSize == 4, "adjust the scaling in the code below");
  1.2985 +    __ movl(rbx, Address(rdx, rbx, Address::times_4));
  1.2986 +    // rbx,: methodOop to call
  1.2987 +    // rcx: receiver
  1.2988 +    // Check for abstract method error
  1.2989 +    // Note: This should be done more efficiently via a throw_abstract_method_error
  1.2990 +    //       interpreter entry point and a conditional jump to it in case of a null
  1.2991 +    //       method.
  1.2992 +    { Label L;
  1.2993 +      __ testl(rbx, rbx);
  1.2994 +      __ jcc(Assembler::notZero, L);
  1.2995 +      // throw exception
  1.2996 +          // note: must restore interpreter registers to canonical
  1.2997 +          //       state for exception handling to work correctly!
  1.2998 +          __ popl(rbx);          // pop return address (pushed by prepare_invoke)
  1.2999 +          __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
  1.3000 +          __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
  1.3001 +      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
  1.3002 +      // the call_VM checks for exception, so we should never return here.
  1.3003 +      __ should_not_reach_here();
  1.3004 +      __ bind(L);
  1.3005 +    }
  1.3006 +
  1.3007 +  // do the call
  1.3008 +  // rcx: receiver
  1.3009 +  // rbx,: methodOop
  1.3010 +  __ jump_from_interpreted(rbx, rdx);
  1.3011 +}
  1.3012 +
  1.3013 +//----------------------------------------------------------------------------------------------------
  1.3014 +// Allocation
  1.3015 +
  1.3016 +void TemplateTable::_new() {
  1.3017 +  transition(vtos, atos);
  1.3018 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3019 +  Label slow_case;
  1.3020 +  Label done;
  1.3021 +  Label initialize_header;
  1.3022 +  Label initialize_object;  // including clearing the fields
  1.3023 +  Label allocate_shared;
  1.3024 +
  1.3025 +  ExternalAddress heap_top((address)Universe::heap()->top_addr());
  1.3026 +
  1.3027 +  __ get_cpool_and_tags(rcx, rax);
  1.3028 +  // get instanceKlass
  1.3029 +  __ movl(rcx, Address(rcx, rdx, Address::times_4, sizeof(constantPoolOopDesc)));
  1.3030 +  __ pushl(rcx);  // save the contexts of klass for initializing the header
  1.3031 +
  1.3032 +  // make sure the class we're about to instantiate has been resolved.
  1.3033 +  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
  1.3034 +  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
  1.3035 +  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
  1.3036 +  __ jcc(Assembler::notEqual, slow_case);
  1.3037 +
  1.3038 +  // make sure klass is initialized & doesn't have finalizer
  1.3039 +  // make sure klass is fully initialized
  1.3040 +  __ cmpl(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
  1.3041 +  __ jcc(Assembler::notEqual, slow_case);
  1.3042 +
  1.3043 +  // get instance_size in instanceKlass (scaled to a count of bytes)
  1.3044 +  __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
  1.3045 +  // test to see if it has a finalizer or is malformed in some way
  1.3046 +  __ testl(rdx, Klass::_lh_instance_slow_path_bit);
  1.3047 +  __ jcc(Assembler::notZero, slow_case);
  1.3048 +
  1.3049 +  //
  1.3050 +  // Allocate the instance
  1.3051 +  // 1) Try to allocate in the TLAB
  1.3052 +  // 2) if fail and the object is large allocate in the shared Eden
  1.3053 +  // 3) if the above fails (or is not applicable), go to a slow case
  1.3054 +  // (creates a new TLAB, etc.)
  1.3055 +
  1.3056 +  const bool allow_shared_alloc =
  1.3057 +    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
  1.3058 +
  1.3059 +  if (UseTLAB) {
  1.3060 +    const Register thread = rcx;
  1.3061 +
  1.3062 +    __ get_thread(thread);
  1.3063 +    __ movl(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
  1.3064 +    __ leal(rbx, Address(rax, rdx, Address::times_1));
  1.3065 +    __ cmpl(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
  1.3066 +    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
  1.3067 +    __ movl(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
  1.3068 +    if (ZeroTLAB) {
  1.3069 +      // the fields have been already cleared
  1.3070 +      __ jmp(initialize_header);
  1.3071 +    } else {
  1.3072 +      // initialize both the header and fields
  1.3073 +      __ jmp(initialize_object);
  1.3074 +    }
  1.3075 +  }
  1.3076 +
  1.3077 +  // Allocation in the shared Eden, if allowed.
  1.3078 +  //
  1.3079 +  // rdx: instance size in bytes
  1.3080 +  if (allow_shared_alloc) {
  1.3081 +    __ bind(allocate_shared);
  1.3082 +
  1.3083 +    Label retry;
  1.3084 +    __ bind(retry);
  1.3085 +    __ mov32(rax, heap_top);
  1.3086 +    __ leal(rbx, Address(rax, rdx, Address::times_1));
  1.3087 +    __ cmp32(rbx, ExternalAddress((address)Universe::heap()->end_addr()));
  1.3088 +    __ jcc(Assembler::above, slow_case);
  1.3089 +
  1.3090 +    // Compare rax, with the top addr, and if still equal, store the new
  1.3091 +    // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
  1.3092 +    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
  1.3093 +    //
  1.3094 +    // rax,: object begin
  1.3095 +    // rbx,: object end
  1.3096 +    // rdx: instance size in bytes
  1.3097 +    if (os::is_MP()) __ lock();
  1.3098 +    __ cmpxchgptr(rbx, heap_top);
  1.3099 +
  1.3100 +    // if someone beat us on the allocation, try again, otherwise continue
  1.3101 +    __ jcc(Assembler::notEqual, retry);
  1.3102 +  }
  1.3103 +
  1.3104 +  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
  1.3105 +    // The object is initialized before the header.  If the object size is
  1.3106 +    // zero, go directly to the header initialization.
  1.3107 +    __ bind(initialize_object);
  1.3108 +    __ decrement(rdx, sizeof(oopDesc));
  1.3109 +    __ jcc(Assembler::zero, initialize_header);
  1.3110 +
  1.3111 +  // Initialize topmost object field, divide rdx by 8, check if odd and
  1.3112 +  // test if zero.
  1.3113 +    __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
  1.3114 +    __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
  1.3115 +
  1.3116 +  // rdx must have been multiple of 8
  1.3117 +#ifdef ASSERT
  1.3118 +    // make sure rdx was multiple of 8
  1.3119 +    Label L;
  1.3120 +    // Ignore partial flag stall after shrl() since it is debug VM
  1.3121 +    __ jccb(Assembler::carryClear, L);
  1.3122 +    __ stop("object size is not multiple of 2 - adjust this code");
  1.3123 +    __ bind(L);
  1.3124 +    // rdx must be > 0, no extra check needed here
  1.3125 +#endif
  1.3126 +
  1.3127 +    // initialize remaining object fields: rdx was a multiple of 8
  1.3128 +    { Label loop;
  1.3129 +    __ bind(loop);
  1.3130 +    __ movl(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
  1.3131 +    __ movl(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx);
  1.3132 +    __ decrement(rdx);
  1.3133 +    __ jcc(Assembler::notZero, loop);
  1.3134 +    }
  1.3135 +
  1.3136 +    // initialize object header only.
  1.3137 +    __ bind(initialize_header);
  1.3138 +    if (UseBiasedLocking) {
  1.3139 +      __ popl(rcx);   // get saved klass back in the register.
  1.3140 +      __ movl(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  1.3141 +      __ movl(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
  1.3142 +    } else {
  1.3143 +      __ movl(Address(rax, oopDesc::mark_offset_in_bytes ()),
  1.3144 +              (int)markOopDesc::prototype()); // header
  1.3145 +      __ popl(rcx);   // get saved klass back in the register.
  1.3146 +    }
  1.3147 +    __ movl(Address(rax, oopDesc::klass_offset_in_bytes()), rcx);  // klass
  1.3148 +
  1.3149 +    {
  1.3150 +      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
  1.3151 +      // Trigger dtrace event for fastpath
  1.3152 +      __ push(atos);
  1.3153 +      __ call_VM_leaf(
  1.3154 +           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
  1.3155 +      __ pop(atos);
  1.3156 +    }
  1.3157 +
  1.3158 +    __ jmp(done);
  1.3159 +  }
  1.3160 +
  1.3161 +  // slow case
  1.3162 +  __ bind(slow_case);
  1.3163 +  __ popl(rcx);   // restore stack pointer to what it was when we came in.
  1.3164 +  __ get_constant_pool(rax);
  1.3165 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3166 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rax, rdx);
  1.3167 +
  1.3168 +  // continue
  1.3169 +  __ bind(done);
  1.3170 +}
  1.3171 +
  1.3172 +
  1.3173 +void TemplateTable::newarray() {
  1.3174 +  transition(itos, atos);
  1.3175 +  __ push_i(rax);                                 // make sure everything is on the stack
  1.3176 +  __ load_unsigned_byte(rdx, at_bcp(1));
  1.3177 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), rdx, rax);
  1.3178 +  __ pop_i(rdx);                                  // discard size
  1.3179 +}
  1.3180 +
  1.3181 +
  1.3182 +void TemplateTable::anewarray() {
  1.3183 +  transition(itos, atos);
  1.3184 +  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  1.3185 +  __ get_constant_pool(rcx);
  1.3186 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), rcx, rdx, rax);
  1.3187 +}
  1.3188 +
  1.3189 +
  1.3190 +void TemplateTable::arraylength() {
  1.3191 +  transition(atos, itos);
  1.3192 +  __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
  1.3193 +  __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
  1.3194 +}
  1.3195 +
  1.3196 +
  1.3197 +void TemplateTable::checkcast() {
  1.3198 +  transition(atos, atos);
  1.3199 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3200 +  __ testl(rax, rax);   // Object is in EAX
  1.3201 +  __ jcc(Assembler::zero, is_null);
  1.3202 +
  1.3203 +  // Get cpool & tags index
  1.3204 +  __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
  1.3205 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index
  1.3206 +  // See if bytecode has already been quicked
  1.3207 +  __ cmpb(Address(rdx, rbx, Address::times_1, typeArrayOopDesc::header_size(T_BYTE) * wordSize), JVM_CONSTANT_Class);
  1.3208 +  __ jcc(Assembler::equal, quicked);
  1.3209 +
  1.3210 +  __ push(atos);
  1.3211 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
  1.3212 +  __ pop_ptr(rdx);
  1.3213 +  __ jmpb(resolved);
  1.3214 +
  1.3215 +  // Get superklass in EAX and subklass in EBX
  1.3216 +  __ bind(quicked);
  1.3217 +  __ movl(rdx, rax);          // Save object in EDX; EAX needed for subtype check
  1.3218 +  __ movl(rax, Address(rcx, rbx, Address::times_4, sizeof(constantPoolOopDesc)));
  1.3219 +
  1.3220 +  __ bind(resolved);
  1.3221 +  __ movl(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
  1.3222 +
  1.3223 +  // Generate subtype check.  Blows ECX.  Resets EDI.  Object in EDX.
  1.3224 +  // Superklass in EAX.  Subklass in EBX.
  1.3225 +  __ gen_subtype_check( rbx, ok_is_subtype );
  1.3226 +
  1.3227 +  // Come here on failure
  1.3228 +  __ pushl(rdx);
  1.3229 +  // object is at TOS
  1.3230 +  __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
  1.3231 +
  1.3232 +  // Come here on success
  1.3233 +  __ bind(ok_is_subtype);
  1.3234 +  __ movl(rax,rdx);           // Restore object in EDX
  1.3235 +
  1.3236 +  // Collect counts on whether this check-cast sees NULLs a lot or not.
  1.3237 +  if (ProfileInterpreter) {
  1.3238 +    __ jmp(done);
  1.3239 +    __ bind(is_null);
  1.3240 +    __ profile_null_seen(rcx);
  1.3241 +  } else {
  1.3242 +    __ bind(is_null);   // same as 'done'
  1.3243 +  }
  1.3244 +  __ bind(done);
  1.3245 +}
  1.3246 +
  1.3247 +
  1.3248 +void TemplateTable::instanceof() {
  1.3249 +  transition(atos, itos);
  1.3250 +  Label done, is_null, ok_is_subtype, quicked, resolved;
  1.3251 +  __ testl(rax, rax);
  1.3252 +  __ jcc(Assembler::zero, is_null);
  1.3253 +
  1.3254 +  // Get cpool & tags index
  1.3255 +  __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
  1.3256 +  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index
  1.3257 +  // See if bytecode has already been quicked
  1.3258 +  __ cmpb(Address(rdx, rbx, Address::times_1, typeArrayOopDesc::header_size(T_BYTE) * wordSize), JVM_CONSTANT_Class);
  1.3259 +  __ jcc(Assembler::equal, quicked);
  1.3260 +
  1.3261 +  __ push(atos);
  1.3262 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
  1.3263 +  __ pop_ptr(rdx);
  1.3264 +  __ movl(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
  1.3265 +  __ jmp(resolved);
  1.3266 +
  1.3267 +  // Get superklass in EAX and subklass in EDX
  1.3268 +  __ bind(quicked);
  1.3269 +  __ movl(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
  1.3270 +  __ movl(rax, Address(rcx, rbx, Address::times_4, sizeof(constantPoolOopDesc)));
  1.3271 +
  1.3272 +  __ bind(resolved);
  1.3273 +
  1.3274 +  // Generate subtype check.  Blows ECX.  Resets EDI.
  1.3275 +  // Superklass in EAX.  Subklass in EDX.
  1.3276 +  __ gen_subtype_check( rdx, ok_is_subtype );
  1.3277 +
  1.3278 +  // Come here on failure
  1.3279 +  __ xorl(rax,rax);
  1.3280 +  __ jmpb(done);
  1.3281 +  // Come here on success
  1.3282 +  __ bind(ok_is_subtype);
  1.3283 +  __ movl(rax, 1);
  1.3284 +
  1.3285 +  // Collect counts on whether this test sees NULLs a lot or not.
  1.3286 +  if (ProfileInterpreter) {
  1.3287 +    __ jmp(done);
  1.3288 +    __ bind(is_null);
  1.3289 +    __ profile_null_seen(rcx);
  1.3290 +  } else {
  1.3291 +    __ bind(is_null);   // same as 'done'
  1.3292 +  }
  1.3293 +  __ bind(done);
  1.3294 +  // rax, = 0: obj == NULL or  obj is not an instanceof the specified klass
  1.3295 +  // rax, = 1: obj != NULL and obj is     an instanceof the specified klass
  1.3296 +}
  1.3297 +
  1.3298 +
  1.3299 +//----------------------------------------------------------------------------------------------------
  1.3300 +// Breakpoints
  1.3301 +void TemplateTable::_breakpoint() {
  1.3302 +
  1.3303 +  // Note: We get here even if we are single stepping..
  1.3304 +  // jbug inists on setting breakpoints at every bytecode
  1.3305 +  // even if we are in single step mode.
  1.3306 +
  1.3307 +  transition(vtos, vtos);
  1.3308 +
  1.3309 +  // get the unpatched byte code
  1.3310 +  __ get_method(rcx);
  1.3311 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), rcx, rsi);
  1.3312 +  __ movl(rbx, rax);
  1.3313 +
  1.3314 +  // post the breakpoint event
  1.3315 +  __ get_method(rcx);
  1.3316 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), rcx, rsi);
  1.3317 +
  1.3318 +  // complete the execution of original bytecode
  1.3319 +  __ dispatch_only_normal(vtos);
  1.3320 +}
  1.3321 +
  1.3322 +
  1.3323 +//----------------------------------------------------------------------------------------------------
  1.3324 +// Exceptions
  1.3325 +
  1.3326 +void TemplateTable::athrow() {
  1.3327 +  transition(atos, vtos);
  1.3328 +  __ null_check(rax);
  1.3329 +  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
  1.3330 +}
  1.3331 +
  1.3332 +
  1.3333 +//----------------------------------------------------------------------------------------------------
  1.3334 +// Synchronization
  1.3335 +//
  1.3336 +// Note: monitorenter & exit are symmetric routines; which is reflected
  1.3337 +//       in the assembly code structure as well
  1.3338 +//
  1.3339 +// Stack layout:
  1.3340 +//
  1.3341 +// [expressions  ] <--- rsp               = expression stack top
  1.3342 +// ..
  1.3343 +// [expressions  ]
  1.3344 +// [monitor entry] <--- monitor block top = expression stack bot
  1.3345 +// ..
  1.3346 +// [monitor entry]
  1.3347 +// [frame data   ] <--- monitor block bot
  1.3348 +// ...
  1.3349 +// [saved rbp,    ] <--- rbp,
  1.3350 +
  1.3351 +
  1.3352 +void TemplateTable::monitorenter() {
  1.3353 +  transition(atos, vtos);
  1.3354 +
  1.3355 +  // check for NULL object
  1.3356 +  __ null_check(rax);
  1.3357 +
  1.3358 +  const Address monitor_block_top(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3359 +  const Address monitor_block_bot(rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
  1.3360 +  const int entry_size =         (     frame::interpreter_frame_monitor_size()           * wordSize);
  1.3361 +  Label allocated;
  1.3362 +
  1.3363 +  // initialize entry pointer
  1.3364 +  __ xorl(rdx, rdx);                             // points to free slot or NULL
  1.3365 +
  1.3366 +  // find a free slot in the monitor block (result in rdx)
  1.3367 +  { Label entry, loop, exit;
  1.3368 +    __ movl(rcx, monitor_block_top);             // points to current entry, starting with top-most entry
  1.3369 +    __ leal(rbx, monitor_block_bot);             // points to word before bottom of monitor block
  1.3370 +    __ jmpb(entry);
  1.3371 +
  1.3372 +    __ bind(loop);
  1.3373 +    __ cmpl(Address(rcx, BasicObjectLock::obj_offset_in_bytes()), NULL_WORD);  // check if current entry is used
  1.3374 +
  1.3375 +// TODO - need new func here - kbt
  1.3376 +    if (VM_Version::supports_cmov()) {
  1.3377 +      __ cmovl(Assembler::equal, rdx, rcx);      // if not used then remember entry in rdx
  1.3378 +    } else {
  1.3379 +      Label L;
  1.3380 +      __ jccb(Assembler::notEqual, L);
  1.3381 +      __ movl(rdx, rcx);                         // if not used then remember entry in rdx
  1.3382 +      __ bind(L);
  1.3383 +    }
  1.3384 +    __ cmpl(rax, Address(rcx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
  1.3385 +    __ jccb(Assembler::equal, exit);              // if same object then stop searching
  1.3386 +    __ addl(rcx, entry_size);                    // otherwise advance to next entry
  1.3387 +    __ bind(entry);
  1.3388 +    __ cmpl(rcx, rbx);                           // check if bottom reached
  1.3389 +    __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
  1.3390 +    __ bind(exit);
  1.3391 +  }
  1.3392 +
  1.3393 +  __ testl(rdx, rdx);                            // check if a slot has been found
  1.3394 +  __ jccb(Assembler::notZero, allocated);         // if found, continue with that one
  1.3395 +
  1.3396 +  // allocate one if there's no free slot
  1.3397 +  { Label entry, loop;
  1.3398 +    // 1. compute new pointers                   // rsp: old expression stack top
  1.3399 +    __ movl(rdx, monitor_block_bot);             // rdx: old expression stack bottom
  1.3400 +    __ subl(rsp, entry_size);                    // move expression stack top
  1.3401 +    __ subl(rdx, entry_size);                    // move expression stack bottom
  1.3402 +    __ movl(rcx, rsp);                           // set start value for copy loop
  1.3403 +    __ movl(monitor_block_bot, rdx);             // set new monitor block top
  1.3404 +    __ jmp(entry);
  1.3405 +    // 2. move expression stack contents
  1.3406 +    __ bind(loop);
  1.3407 +    __ movl(rbx, Address(rcx, entry_size));      // load expression stack word from old location
  1.3408 +    __ movl(Address(rcx, 0), rbx);               // and store it at new location
  1.3409 +    __ addl(rcx, wordSize);                      // advance to next word
  1.3410 +    __ bind(entry);
  1.3411 +    __ cmpl(rcx, rdx);                           // check if bottom reached
  1.3412 +    __ jcc(Assembler::notEqual, loop);           // if not at bottom then copy next word
  1.3413 +  }
  1.3414 +
  1.3415 +  // call run-time routine
  1.3416 +  // rdx: points to monitor entry
  1.3417 +  __ bind(allocated);
  1.3418 +
  1.3419 +  // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
  1.3420 +  // The object has already been poped from the stack, so the expression stack looks correct.
  1.3421 +  __ increment(rsi);
  1.3422 +
  1.3423 +  __ movl(Address(rdx, BasicObjectLock::obj_offset_in_bytes()), rax);     // store object
  1.3424 +  __ lock_object(rdx);
  1.3425 +
  1.3426 +  // check to make sure this monitor doesn't cause stack overflow after locking
  1.3427 +  __ save_bcp();  // in case of exception
  1.3428 +  __ generate_stack_overflow_check(0);
  1.3429 +
  1.3430 +  // The bcp has already been incremented. Just need to dispatch to next instruction.
  1.3431 +  __ dispatch_next(vtos);
  1.3432 +}
  1.3433 +
  1.3434 +
  1.3435 +void TemplateTable::monitorexit() {
  1.3436 +  transition(atos, vtos);
  1.3437 +
  1.3438 +  // check for NULL object
  1.3439 +  __ null_check(rax);
  1.3440 +
  1.3441 +  const Address monitor_block_top(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
  1.3442 +  const Address monitor_block_bot(rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
  1.3443 +  const int entry_size =         (     frame::interpreter_frame_monitor_size()           * wordSize);
  1.3444 +  Label found;
  1.3445 +
  1.3446 +  // find matching slot
  1.3447 +  { Label entry, loop;
  1.3448 +    __ movl(rdx, monitor_block_top);             // points to current entry, starting with top-most entry
  1.3449 +    __ leal(rbx, monitor_block_bot);             // points to word before bottom of monitor block
  1.3450 +    __ jmpb(entry);
  1.3451 +
  1.3452 +    __ bind(loop);
  1.3453 +    __ cmpl(rax, Address(rdx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
  1.3454 +    __ jcc(Assembler::equal, found);             // if same object then stop searching
  1.3455 +    __ addl(rdx, entry_size);                    // otherwise advance to next entry
  1.3456 +    __ bind(entry);
  1.3457 +    __ cmpl(rdx, rbx);                           // check if bottom reached
  1.3458 +    __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
  1.3459 +  }
  1.3460 +
  1.3461 +  // error handling. Unlocking was not block-structured
  1.3462 +  Label end;
  1.3463 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
  1.3464 +  __ should_not_reach_here();
  1.3465 +
  1.3466 +  // call run-time routine
  1.3467 +  // rcx: points to monitor entry
  1.3468 +  __ bind(found);
  1.3469 +  __ push_ptr(rax);                                 // make sure object is on stack (contract with oopMaps)
  1.3470 +  __ unlock_object(rdx);
  1.3471 +  __ pop_ptr(rax);                                  // discard object
  1.3472 +  __ bind(end);
  1.3473 +}
  1.3474 +
  1.3475 +
  1.3476 +//----------------------------------------------------------------------------------------------------
  1.3477 +// Wide instructions
  1.3478 +
  1.3479 +void TemplateTable::wide() {
  1.3480 +  transition(vtos, vtos);
  1.3481 +  __ load_unsigned_byte(rbx, at_bcp(1));
  1.3482 +  __ jmp(Address(noreg, rbx, Address::times_4, int(Interpreter::_wentry_point)));
  1.3483 +  // Note: the rsi increment step is part of the individual wide bytecode implementations
  1.3484 +}
  1.3485 +
  1.3486 +
  1.3487 +//----------------------------------------------------------------------------------------------------
  1.3488 +// Multi arrays
  1.3489 +
  1.3490 +void TemplateTable::multianewarray() {
  1.3491 +  transition(vtos, atos);
  1.3492 +  __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
  1.3493 +  // last dim is on top of stack; we want address of first one:
  1.3494 +  // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
  1.3495 +  // the latter wordSize to point to the beginning of the array.
  1.3496 +  __ leal(  rax, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
  1.3497 +  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rax);     // pass in rax,
  1.3498 +  __ load_unsigned_byte(rbx, at_bcp(3));
  1.3499 +  __ leal(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
  1.3500 +}
  1.3501 +
  1.3502 +#endif /* !CC_INTERP */

mercurial