src/cpu/x86/vm/interpreter_x86_64.cpp

Tue, 14 Oct 2008 15:10:26 -0700

author
kvn
date
Tue, 14 Oct 2008 15:10:26 -0700
changeset 840
2649e5276dd7
parent 739
dc7f315e41f7
child 1145
e5b0439ef4ae
permissions
-rw-r--r--

6532536: Optimize arraycopy stubs for Intel cpus
Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus
Reviewed-by: rasbold

duke@435 1 /*
xdono@631 2 * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved.
duke@435 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@435 4 *
duke@435 5 * This code is free software; you can redistribute it and/or modify it
duke@435 6 * under the terms of the GNU General Public License version 2 only, as
duke@435 7 * published by the Free Software Foundation.
duke@435 8 *
duke@435 9 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@435 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@435 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@435 12 * version 2 for more details (a copy is included in the LICENSE file that
duke@435 13 * accompanied this code).
duke@435 14 *
duke@435 15 * You should have received a copy of the GNU General Public License version
duke@435 16 * 2 along with this work; if not, write to the Free Software Foundation,
duke@435 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@435 18 *
duke@435 19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
duke@435 20 * CA 95054 USA or visit www.sun.com if you need additional information or
duke@435 21 * have any questions.
duke@435 22 *
duke@435 23 */
duke@435 24
duke@435 25 #include "incls/_precompiled.incl"
duke@435 26 #include "incls/_interpreter_x86_64.cpp.incl"
duke@435 27
duke@435 28 #define __ _masm->
duke@435 29
duke@435 30
duke@435 31 #ifdef _WIN64
duke@435 32 address AbstractInterpreterGenerator::generate_slow_signature_handler() {
duke@435 33 address entry = __ pc();
duke@435 34
duke@435 35 // rbx: method
duke@435 36 // r14: pointer to locals
duke@435 37 // c_rarg3: first stack arg - wordSize
never@739 38 __ mov(c_rarg3, rsp);
duke@435 39 // adjust rsp
never@739 40 __ subptr(rsp, 4 * wordSize);
duke@435 41 __ call_VM(noreg,
duke@435 42 CAST_FROM_FN_PTR(address,
duke@435 43 InterpreterRuntime::slow_signature_handler),
duke@435 44 rbx, r14, c_rarg3);
duke@435 45
duke@435 46 // rax: result handler
duke@435 47
duke@435 48 // Stack layout:
duke@435 49 // rsp: 3 integer or float args (if static first is unused)
duke@435 50 // 1 float/double identifiers
duke@435 51 // return address
duke@435 52 // stack args
duke@435 53 // garbage
duke@435 54 // expression stack bottom
duke@435 55 // bcp (NULL)
duke@435 56 // ...
duke@435 57
duke@435 58 // Do FP first so we can use c_rarg3 as temp
duke@435 59 __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers
duke@435 60
duke@435 61 for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
duke@435 62 XMMRegister floatreg = as_XMMRegister(i+1);
duke@435 63 Label isfloatordouble, isdouble, next;
duke@435 64
duke@435 65 __ testl(c_rarg3, 1 << (i*2)); // Float or Double?
duke@435 66 __ jcc(Assembler::notZero, isfloatordouble);
duke@435 67
duke@435 68 // Do Int register here
duke@435 69 switch ( i ) {
duke@435 70 case 0:
duke@435 71 __ movl(rscratch1, Address(rbx, methodOopDesc::access_flags_offset()));
duke@435 72 __ testl(rscratch1, JVM_ACC_STATIC);
never@739 73 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
duke@435 74 break;
duke@435 75 case 1:
never@739 76 __ movptr(c_rarg2, Address(rsp, wordSize));
duke@435 77 break;
duke@435 78 case 2:
never@739 79 __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
duke@435 80 break;
duke@435 81 default:
duke@435 82 break;
duke@435 83 }
duke@435 84
duke@435 85 __ jmp (next);
duke@435 86
duke@435 87 __ bind(isfloatordouble);
duke@435 88 __ testl(c_rarg3, 1 << ((i*2)+1)); // Double?
duke@435 89 __ jcc(Assembler::notZero, isdouble);
duke@435 90
duke@435 91 // Do Float Here
duke@435 92 __ movflt(floatreg, Address(rsp, i * wordSize));
duke@435 93 __ jmp(next);
duke@435 94
duke@435 95 // Do Double here
duke@435 96 __ bind(isdouble);
duke@435 97 __ movdbl(floatreg, Address(rsp, i * wordSize));
duke@435 98
duke@435 99 __ bind(next);
duke@435 100 }
duke@435 101
duke@435 102
duke@435 103 // restore rsp
never@739 104 __ addptr(rsp, 4 * wordSize);
duke@435 105
duke@435 106 __ ret(0);
duke@435 107
duke@435 108 return entry;
duke@435 109 }
duke@435 110 #else
duke@435 111 address AbstractInterpreterGenerator::generate_slow_signature_handler() {
duke@435 112 address entry = __ pc();
duke@435 113
duke@435 114 // rbx: method
duke@435 115 // r14: pointer to locals
duke@435 116 // c_rarg3: first stack arg - wordSize
never@739 117 __ mov(c_rarg3, rsp);
duke@435 118 // adjust rsp
never@739 119 __ subptr(rsp, 14 * wordSize);
duke@435 120 __ call_VM(noreg,
duke@435 121 CAST_FROM_FN_PTR(address,
duke@435 122 InterpreterRuntime::slow_signature_handler),
duke@435 123 rbx, r14, c_rarg3);
duke@435 124
duke@435 125 // rax: result handler
duke@435 126
duke@435 127 // Stack layout:
duke@435 128 // rsp: 5 integer args (if static first is unused)
duke@435 129 // 1 float/double identifiers
duke@435 130 // 8 double args
duke@435 131 // return address
duke@435 132 // stack args
duke@435 133 // garbage
duke@435 134 // expression stack bottom
duke@435 135 // bcp (NULL)
duke@435 136 // ...
duke@435 137
duke@435 138 // Do FP first so we can use c_rarg3 as temp
duke@435 139 __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers
duke@435 140
duke@435 141 for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
duke@435 142 const XMMRegister r = as_XMMRegister(i);
duke@435 143
duke@435 144 Label d, done;
duke@435 145
duke@435 146 __ testl(c_rarg3, 1 << i);
duke@435 147 __ jcc(Assembler::notZero, d);
duke@435 148 __ movflt(r, Address(rsp, (6 + i) * wordSize));
duke@435 149 __ jmp(done);
duke@435 150 __ bind(d);
duke@435 151 __ movdbl(r, Address(rsp, (6 + i) * wordSize));
duke@435 152 __ bind(done);
duke@435 153 }
duke@435 154
duke@435 155 // Now handle integrals. Only do c_rarg1 if not static.
duke@435 156 __ movl(c_rarg3, Address(rbx, methodOopDesc::access_flags_offset()));
duke@435 157 __ testl(c_rarg3, JVM_ACC_STATIC);
never@739 158 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
duke@435 159
never@739 160 __ movptr(c_rarg2, Address(rsp, wordSize));
never@739 161 __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
never@739 162 __ movptr(c_rarg4, Address(rsp, 3 * wordSize));
never@739 163 __ movptr(c_rarg5, Address(rsp, 4 * wordSize));
duke@435 164
duke@435 165 // restore rsp
never@739 166 __ addptr(rsp, 14 * wordSize);
duke@435 167
duke@435 168 __ ret(0);
duke@435 169
duke@435 170 return entry;
duke@435 171 }
duke@435 172 #endif
duke@435 173
duke@435 174
duke@435 175 //
duke@435 176 // Various method entries
duke@435 177 //
duke@435 178
never@739 179 address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
never@739 180
never@739 181 // rbx,: methodOop
never@739 182 // rcx: scratrch
never@739 183 // r13: sender sp
duke@435 184
duke@435 185 if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
duke@435 186
duke@435 187 address entry_point = __ pc();
duke@435 188
duke@435 189 // These don't need a safepoint check because they aren't virtually
duke@435 190 // callable. We won't enter these intrinsics from compiled code.
duke@435 191 // If in the future we added an intrinsic which was virtually callable
duke@435 192 // we'd have to worry about how to safepoint so that this code is used.
duke@435 193
duke@435 194 // mathematical functions inlined by compiler
duke@435 195 // (interpreter must provide identical implementation
duke@435 196 // in order to avoid monotonicity bugs when switching
duke@435 197 // from interpreter to compiler in the middle of some
duke@435 198 // computation)
never@739 199 //
never@739 200 // stack: [ ret adr ] <-- rsp
never@739 201 // [ lo(arg) ]
never@739 202 // [ hi(arg) ]
never@739 203 //
duke@435 204
duke@435 205 // Note: For JDK 1.2 StrictMath doesn't exist and Math.sin/cos/sqrt are
duke@435 206 // native methods. Interpreter::method_kind(...) does a check for
duke@435 207 // native methods first before checking for intrinsic methods and
duke@435 208 // thus will never select this entry point. Make sure it is not
duke@435 209 // called accidentally since the SharedRuntime entry points will
duke@435 210 // not work for JDK 1.2.
duke@435 211 //
duke@435 212 // We no longer need to check for JDK 1.2 since it's EOL'ed.
duke@435 213 // The following check existed in pre 1.6 implementation,
duke@435 214 // if (Universe::is_jdk12x_version()) {
duke@435 215 // __ should_not_reach_here();
duke@435 216 // }
duke@435 217 // Universe::is_jdk12x_version() always returns false since
duke@435 218 // the JDK version is not yet determined when this method is called.
duke@435 219 // This method is called during interpreter_init() whereas
duke@435 220 // JDK version is only determined when universe2_init() is called.
duke@435 221
duke@435 222 // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are
duke@435 223 // java methods. Interpreter::method_kind(...) will select
duke@435 224 // this entry point for the corresponding methods in JDK 1.3.
never@739 225 // get argument
duke@435 226
never@739 227 if (kind == Interpreter::java_lang_math_sqrt) {
never@739 228 __ sqrtsd(xmm0, Address(rsp, wordSize));
never@739 229 } else {
never@739 230 __ fld_d(Address(rsp, wordSize));
never@739 231 switch (kind) {
never@739 232 case Interpreter::java_lang_math_sin :
never@739 233 __ trigfunc('s');
never@739 234 break;
never@739 235 case Interpreter::java_lang_math_cos :
never@739 236 __ trigfunc('c');
never@739 237 break;
never@739 238 case Interpreter::java_lang_math_tan :
never@739 239 __ trigfunc('t');
never@739 240 break;
never@739 241 case Interpreter::java_lang_math_abs:
never@739 242 __ fabs();
never@739 243 break;
never@739 244 case Interpreter::java_lang_math_log:
never@739 245 __ flog();
never@739 246 break;
never@739 247 case Interpreter::java_lang_math_log10:
never@739 248 __ flog10();
never@739 249 break;
never@739 250 default :
never@739 251 ShouldNotReachHere();
never@739 252 }
never@739 253
never@739 254 // return double result in xmm0 for interpreter and compilers.
never@739 255 __ subptr(rsp, 2*wordSize);
never@739 256 // Round to 64bit precision
never@739 257 __ fstp_d(Address(rsp, 0));
never@739 258 __ movdbl(xmm0, Address(rsp, 0));
never@739 259 __ addptr(rsp, 2*wordSize);
never@739 260 }
never@739 261
never@739 262
never@739 263 __ pop(rax);
never@739 264 __ mov(rsp, r13);
duke@435 265 __ jmp(rax);
duke@435 266
duke@435 267 return entry_point;
duke@435 268 }
duke@435 269
duke@435 270
duke@435 271 // Abstract method entry
duke@435 272 // Attempt to execute abstract method. Throw exception
duke@435 273 address InterpreterGenerator::generate_abstract_entry(void) {
duke@435 274 // rbx: methodOop
duke@435 275 // r13: sender SP
duke@435 276
duke@435 277 address entry_point = __ pc();
duke@435 278
duke@435 279 // abstract method entry
duke@435 280 // remove return address. Not really needed, since exception
duke@435 281 // handling throws away expression stack
never@739 282 __ pop(rbx);
duke@435 283
duke@435 284 // adjust stack to what a normal return would do
never@739 285 __ mov(rsp, r13);
duke@435 286
duke@435 287 // throw exception
duke@435 288 __ call_VM(noreg, CAST_FROM_FN_PTR(address,
duke@435 289 InterpreterRuntime::throw_AbstractMethodError));
duke@435 290 // the call_VM checks for exception, so we should never return here.
duke@435 291 __ should_not_reach_here();
duke@435 292
duke@435 293 return entry_point;
duke@435 294 }
duke@435 295
duke@435 296
duke@435 297 // Empty method, generate a very fast return.
duke@435 298
duke@435 299 address InterpreterGenerator::generate_empty_entry(void) {
duke@435 300 // rbx: methodOop
duke@435 301 // r13: sender sp must set sp to this value on return
duke@435 302
duke@435 303 if (!UseFastEmptyMethods) {
duke@435 304 return NULL;
duke@435 305 }
duke@435 306
duke@435 307 address entry_point = __ pc();
duke@435 308
duke@435 309 // If we need a safepoint check, generate full interpreter entry.
duke@435 310 Label slow_path;
duke@435 311 __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
duke@435 312 SafepointSynchronize::_not_synchronized);
duke@435 313 __ jcc(Assembler::notEqual, slow_path);
duke@435 314
duke@435 315 // do nothing for empty methods (do not even increment invocation counter)
duke@435 316 // Code: _return
duke@435 317 // _return
duke@435 318 // return w/o popping parameters
never@739 319 __ pop(rax);
never@739 320 __ mov(rsp, r13);
duke@435 321 __ jmp(rax);
duke@435 322
duke@435 323 __ bind(slow_path);
duke@435 324 (void) generate_normal_entry(false);
duke@435 325 return entry_point;
duke@435 326
duke@435 327 }
duke@435 328
duke@435 329 // This method tells the deoptimizer how big an interpreted frame must be:
duke@435 330 int AbstractInterpreter::size_activation(methodOop method,
duke@435 331 int tempcount,
duke@435 332 int popframe_extra_args,
duke@435 333 int moncount,
duke@435 334 int callee_param_count,
duke@435 335 int callee_locals,
duke@435 336 bool is_top_frame) {
duke@435 337 return layout_activation(method,
duke@435 338 tempcount, popframe_extra_args, moncount,
duke@435 339 callee_param_count, callee_locals,
duke@435 340 (frame*) NULL, (frame*) NULL, is_top_frame);
duke@435 341 }
duke@435 342
duke@435 343 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
duke@435 344
duke@435 345 // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
duke@435 346 // the days we had adapter frames. When we deoptimize a situation where a
duke@435 347 // compiled caller calls a compiled caller will have registers it expects
duke@435 348 // to survive the call to the callee. If we deoptimize the callee the only
duke@435 349 // way we can restore these registers is to have the oldest interpreter
duke@435 350 // frame that we create restore these values. That is what this routine
duke@435 351 // will accomplish.
duke@435 352
duke@435 353 // At the moment we have modified c2 to not have any callee save registers
duke@435 354 // so this problem does not exist and this routine is just a place holder.
duke@435 355
duke@435 356 assert(f->is_interpreted_frame(), "must be interpreted");
duke@435 357 }

mercurial