src/cpu/x86/vm/c1_CodeStubs_x86.cpp

Tue, 14 Oct 2008 15:10:26 -0700

author
kvn
date
Tue, 14 Oct 2008 15:10:26 -0700
changeset 840
2649e5276dd7
parent 797
f8199438385b
child 815
eb28cf662f56
permissions
-rw-r--r--

6532536: Optimize arraycopy stubs for Intel cpus
Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus
Reviewed-by: rasbold

     1 /*
     2  * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_c1_CodeStubs_x86.cpp.incl"
    29 #define __ ce->masm()->
    31 float ConversionStub::float_zero = 0.0;
    32 double ConversionStub::double_zero = 0.0;
    34 void ConversionStub::emit_code(LIR_Assembler* ce) {
    35   __ bind(_entry);
    36   assert(bytecode() == Bytecodes::_f2i || bytecode() == Bytecodes::_d2i, "other conversions do not require stub");
    39   if (input()->is_single_xmm()) {
    40     __ comiss(input()->as_xmm_float_reg(),
    41               ExternalAddress((address)&float_zero));
    42   } else if (input()->is_double_xmm()) {
    43     __ comisd(input()->as_xmm_double_reg(),
    44               ExternalAddress((address)&double_zero));
    45   } else {
    46     LP64_ONLY(ShouldNotReachHere());
    47     __ push(rax);
    48     __ ftst();
    49     __ fnstsw_ax();
    50     __ sahf();
    51     __ pop(rax);
    52   }
    54   Label NaN, do_return;
    55   __ jccb(Assembler::parity, NaN);
    56   __ jccb(Assembler::below, do_return);
    58   // input is > 0 -> return maxInt
    59   // result register already contains 0x80000000, so subtracting 1 gives 0x7fffffff
    60   __ decrement(result()->as_register());
    61   __ jmpb(do_return);
    63   // input is NaN -> return 0
    64   __ bind(NaN);
    65   __ xorptr(result()->as_register(), result()->as_register());
    67   __ bind(do_return);
    68   __ jmp(_continuation);
    69 }
    71 #ifdef TIERED
    72 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
    73   __ bind(_entry);
    74   ce->store_parameter(_bci, 0);
    75   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
    76   ce->add_call_info_here(_info);
    77   ce->verify_oop_map(_info);
    79   __ jmp(_continuation);
    80 }
    81 #endif // TIERED
    85 RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
    86                                bool throw_index_out_of_bounds_exception)
    87   : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
    88   , _index(index)
    89 {
    90   _info = info == NULL ? NULL : new CodeEmitInfo(info);
    91 }
    94 void RangeCheckStub::emit_code(LIR_Assembler* ce) {
    95   __ bind(_entry);
    96   // pass the array index on stack because all registers must be preserved
    97   if (_index->is_cpu_register()) {
    98     ce->store_parameter(_index->as_register(), 0);
    99   } else {
   100     ce->store_parameter(_index->as_jint(), 0);
   101   }
   102   Runtime1::StubID stub_id;
   103   if (_throw_index_out_of_bounds_exception) {
   104     stub_id = Runtime1::throw_index_exception_id;
   105   } else {
   106     stub_id = Runtime1::throw_range_check_failed_id;
   107   }
   108   __ call(RuntimeAddress(Runtime1::entry_for(stub_id)));
   109   ce->add_call_info_here(_info);
   110   debug_only(__ should_not_reach_here());
   111 }
   114 void DivByZeroStub::emit_code(LIR_Assembler* ce) {
   115   if (_offset != -1) {
   116     ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
   117   }
   118   __ bind(_entry);
   119   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_div0_exception_id)));
   120   ce->add_call_info_here(_info);
   121   debug_only(__ should_not_reach_here());
   122 }
   125 // Implementation of NewInstanceStub
   127 NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
   128   _result = result;
   129   _klass = klass;
   130   _klass_reg = klass_reg;
   131   _info = new CodeEmitInfo(info);
   132   assert(stub_id == Runtime1::new_instance_id                 ||
   133          stub_id == Runtime1::fast_new_instance_id            ||
   134          stub_id == Runtime1::fast_new_instance_init_check_id,
   135          "need new_instance id");
   136   _stub_id   = stub_id;
   137 }
   140 void NewInstanceStub::emit_code(LIR_Assembler* ce) {
   141   assert(__ rsp_offset() == 0, "frame size should be fixed");
   142   __ bind(_entry);
   143   __ movptr(rdx, _klass_reg->as_register());
   144   __ call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
   145   ce->add_call_info_here(_info);
   146   ce->verify_oop_map(_info);
   147   assert(_result->as_register() == rax, "result must in rax,");
   148   __ jmp(_continuation);
   149 }
   152 // Implementation of NewTypeArrayStub
   154 NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
   155   _klass_reg = klass_reg;
   156   _length = length;
   157   _result = result;
   158   _info = new CodeEmitInfo(info);
   159 }
   162 void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
   163   assert(__ rsp_offset() == 0, "frame size should be fixed");
   164   __ bind(_entry);
   165   assert(_length->as_register() == rbx, "length must in rbx,");
   166   assert(_klass_reg->as_register() == rdx, "klass_reg must in rdx");
   167   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
   168   ce->add_call_info_here(_info);
   169   ce->verify_oop_map(_info);
   170   assert(_result->as_register() == rax, "result must in rax,");
   171   __ jmp(_continuation);
   172 }
   175 // Implementation of NewObjectArrayStub
   177 NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
   178   _klass_reg = klass_reg;
   179   _result = result;
   180   _length = length;
   181   _info = new CodeEmitInfo(info);
   182 }
   185 void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
   186   assert(__ rsp_offset() == 0, "frame size should be fixed");
   187   __ bind(_entry);
   188   assert(_length->as_register() == rbx, "length must in rbx,");
   189   assert(_klass_reg->as_register() == rdx, "klass_reg must in rdx");
   190   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
   191   ce->add_call_info_here(_info);
   192   ce->verify_oop_map(_info);
   193   assert(_result->as_register() == rax, "result must in rax,");
   194   __ jmp(_continuation);
   195 }
   198 // Implementation of MonitorAccessStubs
   200 MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
   201 : MonitorAccessStub(obj_reg, lock_reg)
   202 {
   203   _info = new CodeEmitInfo(info);
   204 }
   207 void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
   208   assert(__ rsp_offset() == 0, "frame size should be fixed");
   209   __ bind(_entry);
   210   ce->store_parameter(_obj_reg->as_register(),  1);
   211   ce->store_parameter(_lock_reg->as_register(), 0);
   212   Runtime1::StubID enter_id;
   213   if (ce->compilation()->has_fpu_code()) {
   214     enter_id = Runtime1::monitorenter_id;
   215   } else {
   216     enter_id = Runtime1::monitorenter_nofpu_id;
   217   }
   218   __ call(RuntimeAddress(Runtime1::entry_for(enter_id)));
   219   ce->add_call_info_here(_info);
   220   ce->verify_oop_map(_info);
   221   __ jmp(_continuation);
   222 }
   225 void MonitorExitStub::emit_code(LIR_Assembler* ce) {
   226   __ bind(_entry);
   227   if (_compute_lock) {
   228     // lock_reg was destroyed by fast unlocking attempt => recompute it
   229     ce->monitor_address(_monitor_ix, _lock_reg);
   230   }
   231   ce->store_parameter(_lock_reg->as_register(), 0);
   232   // note: non-blocking leaf routine => no call info needed
   233   Runtime1::StubID exit_id;
   234   if (ce->compilation()->has_fpu_code()) {
   235     exit_id = Runtime1::monitorexit_id;
   236   } else {
   237     exit_id = Runtime1::monitorexit_nofpu_id;
   238   }
   239   __ call(RuntimeAddress(Runtime1::entry_for(exit_id)));
   240   __ jmp(_continuation);
   241 }
   244 // Implementation of patching:
   245 // - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
   246 // - Replace original code with a call to the stub
   247 // At Runtime:
   248 // - call to stub, jump to runtime
   249 // - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
   250 // - in runtime: after initializing class, restore original code, reexecute instruction
   252 int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
   254 void PatchingStub::align_patch_site(MacroAssembler* masm) {
   255   // We're patching a 5-7 byte instruction on intel and we need to
   256   // make sure that we don't see a piece of the instruction.  It
   257   // appears mostly impossible on Intel to simply invalidate other
   258   // processors caches and since they may do aggressive prefetch it's
   259   // very hard to make a guess about what code might be in the icache.
   260   // Force the instruction to be double word aligned so that it
   261   // doesn't span a cache line.
   262   masm->align(round_to(NativeGeneralJump::instruction_size, wordSize));
   263 }
   265 void PatchingStub::emit_code(LIR_Assembler* ce) {
   266   assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, "not enough room for call");
   268   Label call_patch;
   270   // static field accesses have special semantics while the class
   271   // initializer is being run so we emit a test which can be used to
   272   // check that this code is being executed by the initializing
   273   // thread.
   274   address being_initialized_entry = __ pc();
   275   if (CommentedAssembly) {
   276     __ block_comment(" patch template");
   277   }
   278   if (_id == load_klass_id) {
   279     // produce a copy of the load klass instruction for use by the being initialized case
   280     address start = __ pc();
   281     jobject o = NULL;
   282     __ movoop(_obj, o);
   283 #ifdef ASSERT
   284     for (int i = 0; i < _bytes_to_copy; i++) {
   285       address ptr = (address)(_pc_start + i);
   286       int a_byte = (*ptr) & 0xFF;
   287       assert(a_byte == *start++, "should be the same code");
   288     }
   289 #endif
   290   } else {
   291     // make a copy the code which is going to be patched.
   292     for ( int i = 0; i < _bytes_to_copy; i++) {
   293       address ptr = (address)(_pc_start + i);
   294       int a_byte = (*ptr) & 0xFF;
   295       __ a_byte (a_byte);
   296       *ptr = 0x90; // make the site look like a nop
   297     }
   298   }
   300   address end_of_patch = __ pc();
   301   int bytes_to_skip = 0;
   302   if (_id == load_klass_id) {
   303     int offset = __ offset();
   304     if (CommentedAssembly) {
   305       __ block_comment(" being_initialized check");
   306     }
   307     assert(_obj != noreg, "must be a valid register");
   308     Register tmp = rax;
   309     if (_obj == tmp) tmp = rbx;
   310     __ push(tmp);
   311     __ get_thread(tmp);
   312     __ cmpptr(tmp, Address(_obj, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc)));
   313     __ pop(tmp);
   314     __ jcc(Assembler::notEqual, call_patch);
   316     // access_field patches may execute the patched code before it's
   317     // copied back into place so we need to jump back into the main
   318     // code of the nmethod to continue execution.
   319     __ jmp(_patch_site_continuation);
   321     // make sure this extra code gets skipped
   322     bytes_to_skip += __ offset() - offset;
   323   }
   324   if (CommentedAssembly) {
   325     __ block_comment("patch data encoded as movl");
   326   }
   327   // Now emit the patch record telling the runtime how to find the
   328   // pieces of the patch.  We only need 3 bytes but for readability of
   329   // the disassembly we make the data look like a movl reg, imm32,
   330   // which requires 5 bytes
   331   int sizeof_patch_record = 5;
   332   bytes_to_skip += sizeof_patch_record;
   334   // emit the offsets needed to find the code to patch
   335   int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
   337   __ a_byte(0xB8);
   338   __ a_byte(0);
   339   __ a_byte(being_initialized_entry_offset);
   340   __ a_byte(bytes_to_skip);
   341   __ a_byte(_bytes_to_copy);
   342   address patch_info_pc = __ pc();
   343   assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
   345   address entry = __ pc();
   346   NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
   347   address target = NULL;
   348   switch (_id) {
   349     case access_field_id:  target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
   350     case load_klass_id:    target = Runtime1::entry_for(Runtime1::load_klass_patching_id); break;
   351     default: ShouldNotReachHere();
   352   }
   353   __ bind(call_patch);
   355   if (CommentedAssembly) {
   356     __ block_comment("patch entry point");
   357   }
   358   __ call(RuntimeAddress(target));
   359   assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
   360   ce->add_call_info_here(_info);
   361   int jmp_off = __ offset();
   362   __ jmp(_patch_site_entry);
   363   // Add enough nops so deoptimization can overwrite the jmp above with a call
   364   // and not destroy the world.
   365   for (int j = __ offset() ; j < jmp_off + 5 ; j++ ) {
   366     __ nop();
   367   }
   368   if (_id == load_klass_id) {
   369     CodeSection* cs = __ code_section();
   370     RelocIterator iter(cs, (address)_pc_start, (address)(_pc_start + 1));
   371     relocInfo::change_reloc_info_for_address(&iter, (address) _pc_start, relocInfo::oop_type, relocInfo::none);
   372   }
   373 }
   376 void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
   377   ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
   378   __ bind(_entry);
   379   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id)));
   380   ce->add_call_info_here(_info);
   381   debug_only(__ should_not_reach_here());
   382 }
   385 void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
   386   assert(__ rsp_offset() == 0, "frame size should be fixed");
   388   __ bind(_entry);
   389   // pass the object on stack because all registers must be preserved
   390   if (_obj->is_cpu_register()) {
   391     ce->store_parameter(_obj->as_register(), 0);
   392   }
   393   __ call(RuntimeAddress(Runtime1::entry_for(_stub)));
   394   ce->add_call_info_here(_info);
   395   debug_only(__ should_not_reach_here());
   396 }
   399 ArrayStoreExceptionStub::ArrayStoreExceptionStub(CodeEmitInfo* info):
   400   _info(info) {
   401 }
   404 void ArrayStoreExceptionStub::emit_code(LIR_Assembler* ce) {
   405   assert(__ rsp_offset() == 0, "frame size should be fixed");
   406   __ bind(_entry);
   407   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_array_store_exception_id)));
   408   ce->add_call_info_here(_info);
   409   debug_only(__ should_not_reach_here());
   410 }
   413 void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
   414   //---------------slow case: call to native-----------------
   415   __ bind(_entry);
   416   // Figure out where the args should go
   417   // This should really convert the IntrinsicID to the methodOop and signature
   418   // but I don't know how to do that.
   419   //
   420   VMRegPair args[5];
   421   BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
   422   SharedRuntime::java_calling_convention(signature, args, 5, true);
   424   // push parameters
   425   // (src, src_pos, dest, destPos, length)
   426   Register r[5];
   427   r[0] = src()->as_register();
   428   r[1] = src_pos()->as_register();
   429   r[2] = dst()->as_register();
   430   r[3] = dst_pos()->as_register();
   431   r[4] = length()->as_register();
   433   // next registers will get stored on the stack
   434   for (int i = 0; i < 5 ; i++ ) {
   435     VMReg r_1 = args[i].first();
   436     if (r_1->is_stack()) {
   437       int st_off = r_1->reg2stack() * wordSize;
   438       __ movptr (Address(rsp, st_off), r[i]);
   439     } else {
   440       assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
   441     }
   442   }
   444   ce->align_call(lir_static_call);
   446   ce->emit_static_call_stub();
   447   AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
   448                          relocInfo::static_call_type);
   449   __ call(resolve);
   450   ce->add_call_info_here(info());
   452 #ifndef PRODUCT
   453   __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
   454 #endif
   456   __ jmp(_continuation);
   457 }
   459 /////////////////////////////////////////////////////////////////////////////
   460 #ifndef SERIALGC
   462 void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
   464   // At this point we know that marking is in progress
   466   __ bind(_entry);
   467   assert(pre_val()->is_register(), "Precondition.");
   469   Register pre_val_reg = pre_val()->as_register();
   471   ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
   473   __ cmpptr(pre_val_reg, (int32_t) NULL_WORD);
   474   __ jcc(Assembler::equal, _continuation);
   475   ce->store_parameter(pre_val()->as_register(), 0);
   476   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
   477   __ jmp(_continuation);
   479 }
   481 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
   483 jbyte* G1PostBarrierStub::byte_map_base_slow() {
   484   BarrierSet* bs = Universe::heap()->barrier_set();
   485   assert(bs->is_a(BarrierSet::G1SATBCTLogging),
   486          "Must be if we're using this.");
   487   return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
   488 }
   490 void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
   491   __ bind(_entry);
   492   assert(addr()->is_register(), "Precondition.");
   493   assert(new_val()->is_register(), "Precondition.");
   494   Register new_val_reg = new_val()->as_register();
   495   __ cmpptr(new_val_reg, (int32_t) NULL_WORD);
   496   __ jcc(Assembler::equal, _continuation);
   497   ce->store_parameter(addr()->as_register(), 0);
   498   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
   499   __ jmp(_continuation);
   500 }
   502 #endif // SERIALGC
   503 /////////////////////////////////////////////////////////////////////////////
   505 #undef __

mercurial