1.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 1.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Mar 13 18:39:22 2009 -0700 1.3 @@ -900,19 +900,7 @@ 1.4 __ align(CodeEntryAlignment); 1.5 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 1.6 address start = __ pc(); 1.7 - Label loop, miss; 1.8 - 1.9 - // Compare super with sub directly, since super is not in its own SSA. 1.10 - // The compiler used to emit this test, but we fold it in here, 1.11 - // to increase overall code density, with no real loss of speed. 1.12 - { Label L; 1.13 - __ cmp(O1, O2); 1.14 - __ brx(Assembler::notEqual, false, Assembler::pt, L); 1.15 - __ delayed()->nop(); 1.16 - __ retl(); 1.17 - __ delayed()->addcc(G0,0,O0); // set Z flags, zero result 1.18 - __ bind(L); 1.19 - } 1.20 + Label miss; 1.21 1.22 #if defined(COMPILER2) && !defined(_LP64) 1.23 // Do not use a 'save' because it blows the 64-bit O registers. 1.24 @@ -936,56 +924,12 @@ 1.25 Register L2_super = L2; 1.26 Register L3_index = L3; 1.27 1.28 -#ifdef _LP64 1.29 - Register L4_ooptmp = L4; 1.30 - 1.31 - if (UseCompressedOops) { 1.32 - // this must be under UseCompressedOops check, as we rely upon fact 1.33 - // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save 1.34 - // on stack, see several lines above 1.35 - __ encode_heap_oop(Rsuper, L4_ooptmp); 1.36 - } 1.37 -#endif 1.38 - 1.39 - inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1); 1.40 - 1.41 - __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); 1.42 - __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len); 1.43 - __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr); 1.44 - __ clr(L3_index); // zero index 1.45 - // Load a little early; will load 1 off the end of the array. 1.46 - // Ok for now; revisit if we have other uses of this routine. 1.47 - if (UseCompressedOops) { 1.48 - __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early 1.49 - } else { 1.50 - __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early 1.51 - } 1.52 - 1.53 - assert(heapOopSize != 0, "heapOopSize should be initialized"); 1.54 - // The scan loop 1.55 - __ BIND(loop); 1.56 - __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size 1.57 - __ cmp(L3_index,L0_ary_len); 1.58 - __ br(Assembler::equal,false,Assembler::pn,miss); 1.59 - __ delayed()->inc(L3_index); // Bump index 1.60 - 1.61 - if (UseCompressedOops) { 1.62 -#ifdef _LP64 1.63 - __ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit 1.64 - __ br( Assembler::notEqual, false, Assembler::pt, loop ); 1.65 - __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early 1.66 -#else 1.67 - ShouldNotReachHere(); 1.68 -#endif 1.69 - } else { 1.70 - __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit 1.71 - __ brx( Assembler::notEqual, false, Assembler::pt, loop ); 1.72 - __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early 1.73 - } 1.74 - 1.75 - // Got a hit; report success; set cache. Cache load doesn't 1.76 - // happen here; for speed it is directly emitted by the compiler. 1.77 - __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); 1.78 + __ check_klass_subtype_slow_path(Rsub, Rsuper, 1.79 + L0, L1, L2, L3, 1.80 + NULL, &miss); 1.81 + 1.82 + // Match falls through here. 1.83 + __ addcc(G0,0,Rret); // set Z flags, Z result 1.84 1.85 #if defined(COMPILER2) && !defined(_LP64) 1.86 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 1.87 @@ -999,7 +943,6 @@ 1.88 __ delayed()->restore(); 1.89 #endif 1.90 1.91 - // Hit or miss falls through here 1.92 __ BIND(miss); 1.93 __ addcc(G0,1,Rret); // set NZ flags, NZ result 1.94 1.95 @@ -2330,51 +2273,31 @@ 1.96 Register super_check_offset, 1.97 Register super_klass, 1.98 Register temp, 1.99 - Label& L_success, 1.100 - Register deccc_hack = noreg) { 1.101 + Label& L_success) { 1.102 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 1.103 1.104 BLOCK_COMMENT("type_check:"); 1.105 1.106 - Label L_miss; 1.107 + Label L_miss, L_pop_to_miss; 1.108 1.109 assert_clean_int(super_check_offset, temp); 1.110 1.111 - // maybe decrement caller's trip count: 1.112 -#define DELAY_SLOT delayed(); \ 1.113 - { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); } 1.114 - 1.115 - // if the pointers are equal, we are done (e.g., String[] elements) 1.116 - __ cmp(sub_klass, super_klass); 1.117 - __ brx(Assembler::equal, true, Assembler::pt, L_success); 1.118 - __ DELAY_SLOT; 1.119 - 1.120 - // check the supertype display: 1.121 - __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type 1.122 - __ cmp(super_klass, temp); // test the super type 1.123 - __ brx(Assembler::equal, true, Assembler::pt, L_success); 1.124 - __ DELAY_SLOT; 1.125 - 1.126 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 1.127 - Klass::secondary_super_cache_offset_in_bytes()); 1.128 - __ cmp(super_klass, sc_offset); 1.129 - __ brx(Assembler::notEqual, true, Assembler::pt, L_miss); 1.130 - __ delayed()->nop(); 1.131 - 1.132 + __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, 1.133 + &L_success, &L_miss, NULL, 1.134 + super_check_offset); 1.135 + 1.136 + BLOCK_COMMENT("type_check_slow_path:"); 1.137 __ save_frame(0); 1.138 - __ mov(sub_klass->after_save(), O1); 1.139 - // mov(super_klass->after_save(), O2); //fill delay slot 1.140 - assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation"); 1.141 - __ call(StubRoutines::Sparc::_partial_subtype_check); 1.142 - __ delayed()->mov(super_klass->after_save(), O2); 1.143 + __ check_klass_subtype_slow_path(sub_klass->after_save(), 1.144 + super_klass->after_save(), 1.145 + L0, L1, L2, L4, 1.146 + NULL, &L_pop_to_miss); 1.147 + __ ba(false, L_success); 1.148 + __ delayed()->restore(); 1.149 + 1.150 + __ bind(L_pop_to_miss); 1.151 __ restore(); 1.152 1.153 - // Upon return, the condition codes are already set. 1.154 - __ brx(Assembler::equal, true, Assembler::pt, L_success); 1.155 - __ DELAY_SLOT; 1.156 - 1.157 -#undef DELAY_SLOT 1.158 - 1.159 // Fall through on failure! 1.160 __ BIND(L_miss); 1.161 } 1.162 @@ -2411,7 +2334,7 @@ 1.163 gen_write_ref_array_pre_barrier(O1, O2); 1.164 1.165 #ifdef ASSERT 1.166 - // We sometimes save a frame (see partial_subtype_check below). 1.167 + // We sometimes save a frame (see generate_type_check below). 1.168 // If this will cause trouble, let's fail now instead of later. 1.169 __ save_frame(0); 1.170 __ restore(); 1.171 @@ -2455,41 +2378,39 @@ 1.172 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 1.173 __ align(16); 1.174 1.175 - __ bind(store_element); 1.176 - // deccc(G1_remain); // decrement the count (hoisted) 1.177 + __ BIND(store_element); 1.178 + __ deccc(G1_remain); // decrement the count 1.179 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop 1.180 __ inc(O5_offset, heapOopSize); // step to next offset 1.181 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 1.182 __ delayed()->set(0, O0); // return -1 on success 1.183 1.184 // ======== loop entry is here ======== 1.185 - __ bind(load_element); 1.186 + __ BIND(load_element); 1.187 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 1.188 __ br_null(G3_oop, true, Assembler::pt, store_element); 1.189 - __ delayed()->deccc(G1_remain); // decrement the count 1.190 + __ delayed()->nop(); 1.191 1.192 __ load_klass(G3_oop, G4_klass); // query the object klass 1.193 1.194 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 1.195 // branch to this on success: 1.196 - store_element, 1.197 - // decrement this on success: 1.198 - G1_remain); 1.199 + store_element); 1.200 // ======== end loop ======== 1.201 1.202 // It was a real error; we must depend on the caller to finish the job. 1.203 // Register G1 has number of *remaining* oops, O2 number of *total* oops. 1.204 // Emit GC store barriers for the oops we have copied (O2 minus G1), 1.205 // and report their number to the caller. 1.206 - __ bind(fail); 1.207 + __ BIND(fail); 1.208 __ subcc(O2_count, G1_remain, O2_count); 1.209 __ brx(Assembler::zero, false, Assembler::pt, done); 1.210 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller 1.211 1.212 - __ bind(do_card_marks); 1.213 + __ BIND(do_card_marks); 1.214 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] 1.215 1.216 - __ bind(done); 1.217 + __ BIND(done); 1.218 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 1.219 __ retl(); 1.220 __ delayed()->nop(); // return value in 00