898 // raddr: O7, blown by call |
898 // raddr: O7, blown by call |
899 address generate_partial_subtype_check() { |
899 address generate_partial_subtype_check() { |
900 __ align(CodeEntryAlignment); |
900 __ align(CodeEntryAlignment); |
901 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); |
901 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); |
902 address start = __ pc(); |
902 address start = __ pc(); |
903 Label loop, miss; |
903 Label miss; |
904 |
|
905 // Compare super with sub directly, since super is not in its own SSA. |
|
906 // The compiler used to emit this test, but we fold it in here, |
|
907 // to increase overall code density, with no real loss of speed. |
|
908 { Label L; |
|
909 __ cmp(O1, O2); |
|
910 __ brx(Assembler::notEqual, false, Assembler::pt, L); |
|
911 __ delayed()->nop(); |
|
912 __ retl(); |
|
913 __ delayed()->addcc(G0,0,O0); // set Z flags, zero result |
|
914 __ bind(L); |
|
915 } |
|
916 |
904 |
917 #if defined(COMPILER2) && !defined(_LP64) |
905 #if defined(COMPILER2) && !defined(_LP64) |
918 // Do not use a 'save' because it blows the 64-bit O registers. |
906 // Do not use a 'save' because it blows the 64-bit O registers. |
919 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) |
907 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) |
920 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); |
908 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); |
934 Register L0_ary_len = L0; |
922 Register L0_ary_len = L0; |
935 Register L1_ary_ptr = L1; |
923 Register L1_ary_ptr = L1; |
936 Register L2_super = L2; |
924 Register L2_super = L2; |
937 Register L3_index = L3; |
925 Register L3_index = L3; |
938 |
926 |
939 #ifdef _LP64 |
927 __ check_klass_subtype_slow_path(Rsub, Rsuper, |
940 Register L4_ooptmp = L4; |
928 L0, L1, L2, L3, |
941 |
929 NULL, &miss); |
942 if (UseCompressedOops) { |
930 |
943 // this must be under UseCompressedOops check, as we rely upon fact |
931 // Match falls through here. |
944 // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save |
932 __ addcc(G0,0,Rret); // set Z flags, Z result |
945 // on stack, see several lines above |
|
946 __ encode_heap_oop(Rsuper, L4_ooptmp); |
|
947 } |
|
948 #endif |
|
949 |
|
950 inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1); |
|
951 |
|
952 __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); |
|
953 __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len); |
|
954 __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr); |
|
955 __ clr(L3_index); // zero index |
|
956 // Load a little early; will load 1 off the end of the array. |
|
957 // Ok for now; revisit if we have other uses of this routine. |
|
958 if (UseCompressedOops) { |
|
959 __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early |
|
960 } else { |
|
961 __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early |
|
962 } |
|
963 |
|
964 assert(heapOopSize != 0, "heapOopSize should be initialized"); |
|
965 // The scan loop |
|
966 __ BIND(loop); |
|
967 __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size |
|
968 __ cmp(L3_index,L0_ary_len); |
|
969 __ br(Assembler::equal,false,Assembler::pn,miss); |
|
970 __ delayed()->inc(L3_index); // Bump index |
|
971 |
|
972 if (UseCompressedOops) { |
|
973 #ifdef _LP64 |
|
974 __ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit |
|
975 __ br( Assembler::notEqual, false, Assembler::pt, loop ); |
|
976 __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early |
|
977 #else |
|
978 ShouldNotReachHere(); |
|
979 #endif |
|
980 } else { |
|
981 __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit |
|
982 __ brx( Assembler::notEqual, false, Assembler::pt, loop ); |
|
983 __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early |
|
984 } |
|
985 |
|
986 // Got a hit; report success; set cache. Cache load doesn't |
|
987 // happen here; for speed it is directly emitted by the compiler. |
|
988 __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); |
|
989 |
933 |
990 #if defined(COMPILER2) && !defined(_LP64) |
934 #if defined(COMPILER2) && !defined(_LP64) |
991 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); |
935 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); |
992 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); |
936 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); |
993 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); |
937 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); |
2328 // Smashes only the given temp registers. |
2271 // Smashes only the given temp registers. |
2329 void generate_type_check(Register sub_klass, |
2272 void generate_type_check(Register sub_klass, |
2330 Register super_check_offset, |
2273 Register super_check_offset, |
2331 Register super_klass, |
2274 Register super_klass, |
2332 Register temp, |
2275 Register temp, |
2333 Label& L_success, |
2276 Label& L_success) { |
2334 Register deccc_hack = noreg) { |
|
2335 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); |
2277 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); |
2336 |
2278 |
2337 BLOCK_COMMENT("type_check:"); |
2279 BLOCK_COMMENT("type_check:"); |
2338 |
2280 |
2339 Label L_miss; |
2281 Label L_miss, L_pop_to_miss; |
2340 |
2282 |
2341 assert_clean_int(super_check_offset, temp); |
2283 assert_clean_int(super_check_offset, temp); |
2342 |
2284 |
2343 // maybe decrement caller's trip count: |
2285 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, |
2344 #define DELAY_SLOT delayed(); \ |
2286 &L_success, &L_miss, NULL, |
2345 { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); } |
2287 super_check_offset); |
2346 |
2288 |
2347 // if the pointers are equal, we are done (e.g., String[] elements) |
2289 BLOCK_COMMENT("type_check_slow_path:"); |
2348 __ cmp(sub_klass, super_klass); |
|
2349 __ brx(Assembler::equal, true, Assembler::pt, L_success); |
|
2350 __ DELAY_SLOT; |
|
2351 |
|
2352 // check the supertype display: |
|
2353 __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type |
|
2354 __ cmp(super_klass, temp); // test the super type |
|
2355 __ brx(Assembler::equal, true, Assembler::pt, L_success); |
|
2356 __ DELAY_SLOT; |
|
2357 |
|
2358 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + |
|
2359 Klass::secondary_super_cache_offset_in_bytes()); |
|
2360 __ cmp(super_klass, sc_offset); |
|
2361 __ brx(Assembler::notEqual, true, Assembler::pt, L_miss); |
|
2362 __ delayed()->nop(); |
|
2363 |
|
2364 __ save_frame(0); |
2290 __ save_frame(0); |
2365 __ mov(sub_klass->after_save(), O1); |
2291 __ check_klass_subtype_slow_path(sub_klass->after_save(), |
2366 // mov(super_klass->after_save(), O2); //fill delay slot |
2292 super_klass->after_save(), |
2367 assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation"); |
2293 L0, L1, L2, L4, |
2368 __ call(StubRoutines::Sparc::_partial_subtype_check); |
2294 NULL, &L_pop_to_miss); |
2369 __ delayed()->mov(super_klass->after_save(), O2); |
2295 __ ba(false, L_success); |
|
2296 __ delayed()->restore(); |
|
2297 |
|
2298 __ bind(L_pop_to_miss); |
2370 __ restore(); |
2299 __ restore(); |
2371 |
|
2372 // Upon return, the condition codes are already set. |
|
2373 __ brx(Assembler::equal, true, Assembler::pt, L_success); |
|
2374 __ DELAY_SLOT; |
|
2375 |
|
2376 #undef DELAY_SLOT |
|
2377 |
2300 |
2378 // Fall through on failure! |
2301 // Fall through on failure! |
2379 __ BIND(L_miss); |
2302 __ BIND(L_miss); |
2380 } |
2303 } |
2381 |
2304 |
2453 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays |
2376 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays |
2454 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* |
2377 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* |
2455 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super |
2378 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super |
2456 __ align(16); |
2379 __ align(16); |
2457 |
2380 |
2458 __ bind(store_element); |
2381 __ BIND(store_element); |
2459 // deccc(G1_remain); // decrement the count (hoisted) |
2382 __ deccc(G1_remain); // decrement the count |
2460 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop |
2383 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop |
2461 __ inc(O5_offset, heapOopSize); // step to next offset |
2384 __ inc(O5_offset, heapOopSize); // step to next offset |
2462 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); |
2385 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); |
2463 __ delayed()->set(0, O0); // return -1 on success |
2386 __ delayed()->set(0, O0); // return -1 on success |
2464 |
2387 |
2465 // ======== loop entry is here ======== |
2388 // ======== loop entry is here ======== |
2466 __ bind(load_element); |
2389 __ BIND(load_element); |
2467 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop |
2390 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop |
2468 __ br_null(G3_oop, true, Assembler::pt, store_element); |
2391 __ br_null(G3_oop, true, Assembler::pt, store_element); |
2469 __ delayed()->deccc(G1_remain); // decrement the count |
2392 __ delayed()->nop(); |
2470 |
2393 |
2471 __ load_klass(G3_oop, G4_klass); // query the object klass |
2394 __ load_klass(G3_oop, G4_klass); // query the object klass |
2472 |
2395 |
2473 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, |
2396 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, |
2474 // branch to this on success: |
2397 // branch to this on success: |
2475 store_element, |
2398 store_element); |
2476 // decrement this on success: |
|
2477 G1_remain); |
|
2478 // ======== end loop ======== |
2399 // ======== end loop ======== |
2479 |
2400 |
2480 // It was a real error; we must depend on the caller to finish the job. |
2401 // It was a real error; we must depend on the caller to finish the job. |
2481 // Register G1 has number of *remaining* oops, O2 number of *total* oops. |
2402 // Register G1 has number of *remaining* oops, O2 number of *total* oops. |
2482 // Emit GC store barriers for the oops we have copied (O2 minus G1), |
2403 // Emit GC store barriers for the oops we have copied (O2 minus G1), |
2483 // and report their number to the caller. |
2404 // and report their number to the caller. |
2484 __ bind(fail); |
2405 __ BIND(fail); |
2485 __ subcc(O2_count, G1_remain, O2_count); |
2406 __ subcc(O2_count, G1_remain, O2_count); |
2486 __ brx(Assembler::zero, false, Assembler::pt, done); |
2407 __ brx(Assembler::zero, false, Assembler::pt, done); |
2487 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller |
2408 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller |
2488 |
2409 |
2489 __ bind(do_card_marks); |
2410 __ BIND(do_card_marks); |
2490 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] |
2411 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] |
2491 |
2412 |
2492 __ bind(done); |
2413 __ BIND(done); |
2493 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); |
2414 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); |
2494 __ retl(); |
2415 __ retl(); |
2495 __ delayed()->nop(); // return value in 00 |
2416 __ delayed()->nop(); // return value in 00 |
2496 |
2417 |
2497 return start; |
2418 return start; |