src/cpu/x86/vm/interpreter_x86_64.cpp

changeset 435
a61af66fc99e
child 548
ba764ed4b6f2
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/interpreter_x86_64.cpp	Sat Dec 01 00:00:00 2007 +0000
     1.3 @@ -0,0 +1,459 @@
     1.4 +/*
     1.5 + * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "incls/_precompiled.incl"
    1.29 +#include "incls/_interpreter_x86_64.cpp.incl"
    1.30 +
    1.31 +#define __ _masm->
    1.32 +
    1.33 +
    1.34 +#ifdef _WIN64
    1.35 +address AbstractInterpreterGenerator::generate_slow_signature_handler() {
    1.36 +  address entry = __ pc();
    1.37 +
    1.38 +  // rbx: method
    1.39 +  // r14: pointer to locals
    1.40 +  // c_rarg3: first stack arg - wordSize
    1.41 +  __ movq(c_rarg3, rsp);
    1.42 +  // adjust rsp
    1.43 +  __ subq(rsp, 4 * wordSize);
    1.44 +  __ call_VM(noreg,
    1.45 +             CAST_FROM_FN_PTR(address,
    1.46 +                              InterpreterRuntime::slow_signature_handler),
    1.47 +             rbx, r14, c_rarg3);
    1.48 +
    1.49 +  // rax: result handler
    1.50 +
    1.51 +  // Stack layout:
    1.52 +  // rsp: 3 integer or float args (if static first is unused)
    1.53 +  //      1 float/double identifiers
    1.54 +  //        return address
    1.55 +  //        stack args
    1.56 +  //        garbage
    1.57 +  //        expression stack bottom
    1.58 +  //        bcp (NULL)
    1.59 +  //        ...
    1.60 +
    1.61 +  // Do FP first so we can use c_rarg3 as temp
    1.62 +  __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers
    1.63 +
    1.64 +  for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
    1.65 +    XMMRegister floatreg = as_XMMRegister(i+1);
    1.66 +    Label isfloatordouble, isdouble, next;
    1.67 +
    1.68 +    __ testl(c_rarg3, 1 << (i*2));      // Float or Double?
    1.69 +    __ jcc(Assembler::notZero, isfloatordouble);
    1.70 +
    1.71 +    // Do Int register here
    1.72 +    switch ( i ) {
    1.73 +      case 0:
    1.74 +        __ movl(rscratch1, Address(rbx, methodOopDesc::access_flags_offset()));
    1.75 +        __ testl(rscratch1, JVM_ACC_STATIC);
    1.76 +        __ cmovq(Assembler::zero, c_rarg1, Address(rsp, 0));
    1.77 +        break;
    1.78 +      case 1:
    1.79 +        __ movq(c_rarg2, Address(rsp, wordSize));
    1.80 +        break;
    1.81 +      case 2:
    1.82 +        __ movq(c_rarg3, Address(rsp, 2 * wordSize));
    1.83 +        break;
    1.84 +      default:
    1.85 +        break;
    1.86 +    }
    1.87 +
    1.88 +    __ jmp (next);
    1.89 +
    1.90 +    __ bind(isfloatordouble);
    1.91 +    __ testl(c_rarg3, 1 << ((i*2)+1));     // Double?
    1.92 +    __ jcc(Assembler::notZero, isdouble);
    1.93 +
    1.94 +// Do Float Here
    1.95 +    __ movflt(floatreg, Address(rsp, i * wordSize));
    1.96 +    __ jmp(next);
    1.97 +
    1.98 +// Do Double here
    1.99 +    __ bind(isdouble);
   1.100 +    __ movdbl(floatreg, Address(rsp, i * wordSize));
   1.101 +
   1.102 +    __ bind(next);
   1.103 +  }
   1.104 +
   1.105 +
   1.106 +  // restore rsp
   1.107 +  __ addq(rsp, 4 * wordSize);
   1.108 +
   1.109 +  __ ret(0);
   1.110 +
   1.111 +  return entry;
   1.112 +}
   1.113 +#else
   1.114 +address AbstractInterpreterGenerator::generate_slow_signature_handler() {
   1.115 +  address entry = __ pc();
   1.116 +
   1.117 +  // rbx: method
   1.118 +  // r14: pointer to locals
   1.119 +  // c_rarg3: first stack arg - wordSize
   1.120 +  __ movq(c_rarg3, rsp);
   1.121 +  // adjust rsp
   1.122 +  __ subq(rsp, 14 * wordSize);
   1.123 +  __ call_VM(noreg,
   1.124 +             CAST_FROM_FN_PTR(address,
   1.125 +                              InterpreterRuntime::slow_signature_handler),
   1.126 +             rbx, r14, c_rarg3);
   1.127 +
   1.128 +  // rax: result handler
   1.129 +
   1.130 +  // Stack layout:
   1.131 +  // rsp: 5 integer args (if static first is unused)
   1.132 +  //      1 float/double identifiers
   1.133 +  //      8 double args
   1.134 +  //        return address
   1.135 +  //        stack args
   1.136 +  //        garbage
   1.137 +  //        expression stack bottom
   1.138 +  //        bcp (NULL)
   1.139 +  //        ...
   1.140 +
   1.141 +  // Do FP first so we can use c_rarg3 as temp
   1.142 +  __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers
   1.143 +
   1.144 +  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
   1.145 +    const XMMRegister r = as_XMMRegister(i);
   1.146 +
   1.147 +    Label d, done;
   1.148 +
   1.149 +    __ testl(c_rarg3, 1 << i);
   1.150 +    __ jcc(Assembler::notZero, d);
   1.151 +    __ movflt(r, Address(rsp, (6 + i) * wordSize));
   1.152 +    __ jmp(done);
   1.153 +    __ bind(d);
   1.154 +    __ movdbl(r, Address(rsp, (6 + i) * wordSize));
   1.155 +    __ bind(done);
   1.156 +  }
   1.157 +
   1.158 +  // Now handle integrals.  Only do c_rarg1 if not static.
   1.159 +  __ movl(c_rarg3, Address(rbx, methodOopDesc::access_flags_offset()));
   1.160 +  __ testl(c_rarg3, JVM_ACC_STATIC);
   1.161 +  __ cmovq(Assembler::zero, c_rarg1, Address(rsp, 0));
   1.162 +
   1.163 +  __ movq(c_rarg2, Address(rsp, wordSize));
   1.164 +  __ movq(c_rarg3, Address(rsp, 2 * wordSize));
   1.165 +  __ movq(c_rarg4, Address(rsp, 3 * wordSize));
   1.166 +  __ movq(c_rarg5, Address(rsp, 4 * wordSize));
   1.167 +
   1.168 +  // restore rsp
   1.169 +  __ addq(rsp, 14 * wordSize);
   1.170 +
   1.171 +  __ ret(0);
   1.172 +
   1.173 +  return entry;
   1.174 +}
   1.175 +#endif
   1.176 +
   1.177 +
   1.178 +//
   1.179 +// Various method entries
   1.180 +//
   1.181 +
   1.182 +address InterpreterGenerator::generate_math_entry(
   1.183 +  AbstractInterpreter::MethodKind kind) {
   1.184 +  // rbx: methodOop
   1.185 +
   1.186 +  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
   1.187 +
   1.188 +  assert(kind == Interpreter::java_lang_math_sqrt,
   1.189 +         "Other intrinsics are not special");
   1.190 +
   1.191 +  address entry_point = __ pc();
   1.192 +
   1.193 +  // These don't need a safepoint check because they aren't virtually
   1.194 +  // callable. We won't enter these intrinsics from compiled code.
   1.195 +  // If in the future we added an intrinsic which was virtually callable
   1.196 +  // we'd have to worry about how to safepoint so that this code is used.
   1.197 +
   1.198 +  // mathematical functions inlined by compiler
   1.199 +  // (interpreter must provide identical implementation
   1.200 +  // in order to avoid monotonicity bugs when switching
   1.201 +  // from interpreter to compiler in the middle of some
   1.202 +  // computation)
   1.203 +
   1.204 +  // Note: For JDK 1.2 StrictMath doesn't exist and Math.sin/cos/sqrt are
   1.205 +  //       native methods. Interpreter::method_kind(...) does a check for
   1.206 +  //       native methods first before checking for intrinsic methods and
   1.207 +  //       thus will never select this entry point. Make sure it is not
   1.208 +  //       called accidentally since the SharedRuntime entry points will
   1.209 +  //       not work for JDK 1.2.
   1.210 +  //
   1.211 +  // We no longer need to check for JDK 1.2 since it's EOL'ed.
   1.212 +  // The following check existed in pre 1.6 implementation,
   1.213 +  //    if (Universe::is_jdk12x_version()) {
   1.214 +  //      __ should_not_reach_here();
   1.215 +  //    }
   1.216 +  // Universe::is_jdk12x_version() always returns false since
   1.217 +  // the JDK version is not yet determined when this method is called.
   1.218 +  // This method is called during interpreter_init() whereas
   1.219 +  // JDK version is only determined when universe2_init() is called.
   1.220 +
   1.221 +  // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are
   1.222 +  //       java methods.  Interpreter::method_kind(...) will select
   1.223 +  //       this entry point for the corresponding methods in JDK 1.3.
   1.224 +  __ sqrtsd(xmm0, Address(rsp, wordSize));
   1.225 +
   1.226 +  __ popq(rax);
   1.227 +  __ movq(rsp, r13);
   1.228 +  __ jmp(rax);
   1.229 +
   1.230 +  return entry_point;
   1.231 +}
   1.232 +
   1.233 +
   1.234 +// Abstract method entry
   1.235 +// Attempt to execute abstract method. Throw exception
   1.236 +address InterpreterGenerator::generate_abstract_entry(void) {
   1.237 +  // rbx: methodOop
   1.238 +  // r13: sender SP
   1.239 +
   1.240 +  address entry_point = __ pc();
   1.241 +
   1.242 +  // abstract method entry
   1.243 +  // remove return address. Not really needed, since exception
   1.244 +  // handling throws away expression stack
   1.245 +  __ popq(rbx);
   1.246 +
   1.247 +  // adjust stack to what a normal return would do
   1.248 +  __ movq(rsp, r13);
   1.249 +
   1.250 +  // throw exception
   1.251 +  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
   1.252 +                             InterpreterRuntime::throw_AbstractMethodError));
   1.253 +  // the call_VM checks for exception, so we should never return here.
   1.254 +  __ should_not_reach_here();
   1.255 +
   1.256 +  return entry_point;
   1.257 +}
   1.258 +
   1.259 +
   1.260 +// Empty method, generate a very fast return.
   1.261 +
   1.262 +address InterpreterGenerator::generate_empty_entry(void) {
   1.263 +  // rbx: methodOop
   1.264 +  // r13: sender sp must set sp to this value on return
   1.265 +
   1.266 +  if (!UseFastEmptyMethods) {
   1.267 +    return NULL;
   1.268 +  }
   1.269 +
   1.270 +  address entry_point = __ pc();
   1.271 +
   1.272 +  // If we need a safepoint check, generate full interpreter entry.
   1.273 +  Label slow_path;
   1.274 +  __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
   1.275 +           SafepointSynchronize::_not_synchronized);
   1.276 +  __ jcc(Assembler::notEqual, slow_path);
   1.277 +
   1.278 +  // do nothing for empty methods (do not even increment invocation counter)
   1.279 +  // Code: _return
   1.280 +  // _return
   1.281 +  // return w/o popping parameters
   1.282 +  __ popq(rax);
   1.283 +  __ movq(rsp, r13);
   1.284 +  __ jmp(rax);
   1.285 +
   1.286 +  __ bind(slow_path);
   1.287 +  (void) generate_normal_entry(false);
   1.288 +  return entry_point;
   1.289 +
   1.290 +}
   1.291 +
   1.292 +// Call an accessor method (assuming it is resolved, otherwise drop
   1.293 +// into vanilla (slow path) entry
   1.294 +address InterpreterGenerator::generate_accessor_entry(void) {
   1.295 +  // rbx: methodOop
   1.296 +
   1.297 +  // r13: senderSP must preserver for slow path, set SP to it on fast path
   1.298 +
   1.299 +  address entry_point = __ pc();
   1.300 +  Label xreturn_path;
   1.301 +
   1.302 +  // do fastpath for resolved accessor methods
   1.303 +  if (UseFastAccessorMethods) {
   1.304 +    // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites
   1.305 +    //       thereof; parameter size = 1
   1.306 +    // Note: We can only use this code if the getfield has been resolved
   1.307 +    //       and if we don't have a null-pointer exception => check for
   1.308 +    //       these conditions first and use slow path if necessary.
   1.309 +    Label slow_path;
   1.310 +    // If we need a safepoint check, generate full interpreter entry.
   1.311 +    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
   1.312 +             SafepointSynchronize::_not_synchronized);
   1.313 +
   1.314 +    __ jcc(Assembler::notEqual, slow_path);
   1.315 +    // rbx: method
   1.316 +    __ movq(rax, Address(rsp, wordSize));
   1.317 +
   1.318 +    // check if local 0 != NULL and read field
   1.319 +    __ testq(rax, rax);
   1.320 +    __ jcc(Assembler::zero, slow_path);
   1.321 +
   1.322 +    __ movq(rdi, Address(rbx, methodOopDesc::constants_offset()));
   1.323 +    // read first instruction word and extract bytecode @ 1 and index @ 2
   1.324 +    __ movq(rdx, Address(rbx, methodOopDesc::const_offset()));
   1.325 +    __ movl(rdx, Address(rdx, constMethodOopDesc::codes_offset()));
   1.326 +    // Shift codes right to get the index on the right.
   1.327 +    // The bytecode fetched looks like <index><0xb4><0x2a>
   1.328 +    __ shrl(rdx, 2 * BitsPerByte);
   1.329 +    __ shll(rdx, exact_log2(in_words(ConstantPoolCacheEntry::size())));
   1.330 +    __ movq(rdi, Address(rdi, constantPoolOopDesc::cache_offset_in_bytes()));
   1.331 +
   1.332 +    // rax: local 0
   1.333 +    // rbx: method
   1.334 +    // rdx: constant pool cache index
   1.335 +    // rdi: constant pool cache
   1.336 +
   1.337 +    // check if getfield has been resolved and read constant pool cache entry
   1.338 +    // check the validity of the cache entry by testing whether _indices field
   1.339 +    // contains Bytecode::_getfield in b1 byte.
   1.340 +    assert(in_words(ConstantPoolCacheEntry::size()) == 4,
   1.341 +           "adjust shift below");
   1.342 +    __ movl(rcx,
   1.343 +            Address(rdi,
   1.344 +                    rdx,
   1.345 +                    Address::times_8,
   1.346 +                    constantPoolCacheOopDesc::base_offset() +
   1.347 +                    ConstantPoolCacheEntry::indices_offset()));
   1.348 +    __ shrl(rcx, 2 * BitsPerByte);
   1.349 +    __ andl(rcx, 0xFF);
   1.350 +    __ cmpl(rcx, Bytecodes::_getfield);
   1.351 +    __ jcc(Assembler::notEqual, slow_path);
   1.352 +
   1.353 +    // Note: constant pool entry is not valid before bytecode is resolved
   1.354 +    __ movq(rcx,
   1.355 +            Address(rdi,
   1.356 +                    rdx,
   1.357 +                    Address::times_8,
   1.358 +                    constantPoolCacheOopDesc::base_offset() +
   1.359 +                    ConstantPoolCacheEntry::f2_offset()));
   1.360 +    // edx: flags
   1.361 +    __ movl(rdx,
   1.362 +            Address(rdi,
   1.363 +                    rdx,
   1.364 +                    Address::times_8,
   1.365 +                    constantPoolCacheOopDesc::base_offset() +
   1.366 +                    ConstantPoolCacheEntry::flags_offset()));
   1.367 +
   1.368 +    Label notObj, notInt, notByte, notShort;
   1.369 +    const Address field_address(rax, rcx, Address::times_1);
   1.370 +
   1.371 +    // Need to differentiate between igetfield, agetfield, bgetfield etc.
   1.372 +    // because they are different sizes.
   1.373 +    // Use the type from the constant pool cache
   1.374 +    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
   1.375 +    // Make sure we don't need to mask edx for tosBits after the above shift
   1.376 +    ConstantPoolCacheEntry::verify_tosBits();
   1.377 +
   1.378 +    __ cmpl(rdx, atos);
   1.379 +    __ jcc(Assembler::notEqual, notObj);
   1.380 +    // atos
   1.381 +    __ movq(rax, field_address);
   1.382 +    __ jmp(xreturn_path);
   1.383 +
   1.384 +    __ bind(notObj);
   1.385 +    __ cmpl(rdx, itos);
   1.386 +    __ jcc(Assembler::notEqual, notInt);
   1.387 +    // itos
   1.388 +    __ movl(rax, field_address);
   1.389 +    __ jmp(xreturn_path);
   1.390 +
   1.391 +    __ bind(notInt);
   1.392 +    __ cmpl(rdx, btos);
   1.393 +    __ jcc(Assembler::notEqual, notByte);
   1.394 +    // btos
   1.395 +    __ load_signed_byte(rax, field_address);
   1.396 +    __ jmp(xreturn_path);
   1.397 +
   1.398 +    __ bind(notByte);
   1.399 +    __ cmpl(rdx, stos);
   1.400 +    __ jcc(Assembler::notEqual, notShort);
   1.401 +    // stos
   1.402 +    __ load_signed_word(rax, field_address);
   1.403 +    __ jmp(xreturn_path);
   1.404 +
   1.405 +    __ bind(notShort);
   1.406 +#ifdef ASSERT
   1.407 +    Label okay;
   1.408 +    __ cmpl(rdx, ctos);
   1.409 +    __ jcc(Assembler::equal, okay);
   1.410 +    __ stop("what type is this?");
   1.411 +    __ bind(okay);
   1.412 +#endif
   1.413 +    // ctos
   1.414 +    __ load_unsigned_word(rax, field_address);
   1.415 +
   1.416 +    __ bind(xreturn_path);
   1.417 +
   1.418 +    // _ireturn/_areturn
   1.419 +    __ popq(rdi);
   1.420 +    __ movq(rsp, r13);
   1.421 +    __ jmp(rdi);
   1.422 +    __ ret(0);
   1.423 +
   1.424 +    // generate a vanilla interpreter entry as the slow path
   1.425 +    __ bind(slow_path);
   1.426 +    (void) generate_normal_entry(false);
   1.427 +  } else {
   1.428 +    (void) generate_normal_entry(false);
   1.429 +  }
   1.430 +
   1.431 +  return entry_point;
   1.432 +}
   1.433 +
   1.434 +// This method tells the deoptimizer how big an interpreted frame must be:
   1.435 +int AbstractInterpreter::size_activation(methodOop method,
   1.436 +                                         int tempcount,
   1.437 +                                         int popframe_extra_args,
   1.438 +                                         int moncount,
   1.439 +                                         int callee_param_count,
   1.440 +                                         int callee_locals,
   1.441 +                                         bool is_top_frame) {
   1.442 +  return layout_activation(method,
   1.443 +                           tempcount, popframe_extra_args, moncount,
   1.444 +                           callee_param_count, callee_locals,
   1.445 +                           (frame*) NULL, (frame*) NULL, is_top_frame);
   1.446 +}
   1.447 +
   1.448 +void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
   1.449 +
   1.450 +  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
   1.451 +  // the days we had adapter frames. When we deoptimize a situation where a
   1.452 +  // compiled caller calls a compiled caller will have registers it expects
   1.453 +  // to survive the call to the callee. If we deoptimize the callee the only
   1.454 +  // way we can restore these registers is to have the oldest interpreter
   1.455 +  // frame that we create restore these values. That is what this routine
   1.456 +  // will accomplish.
   1.457 +
   1.458 +  // At the moment we have modified c2 to not have any callee save registers
   1.459 +  // so this problem does not exist and this routine is just a place holder.
   1.460 +
   1.461 +  assert(f->is_interpreted_frame(), "must be interpreted");
   1.462 +}

mercurial