2269 assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); |
2276 assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); |
2270 Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); |
2277 Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); |
2271 |
2278 |
2272 // Perform cloning to keep Invariance state correct since the |
2279 // Perform cloning to keep Invariance state correct since the |
2273 // late schedule will place invariant things in the loop. |
2280 // late schedule will place invariant things in the loop. |
2274 ld_rng = invar.clone(ld_rng, ctrl); |
2281 rng = invar.clone(rng, ctrl); |
2275 if (offset && offset != zero) { |
2282 if (offset && offset != zero) { |
2276 assert(invar.is_invariant(offset), "offset must be loop invariant"); |
2283 assert(invar.is_invariant(offset), "offset must be loop invariant"); |
2277 offset = invar.clone(offset, ctrl); |
2284 offset = invar.clone(offset, ctrl); |
2278 } |
2285 } |
2279 |
2286 |
2280 // Test the lower bound |
2287 // Test the lower bound |
2281 Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, false); |
2288 Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false); |
2282 IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); |
2289 IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); |
2283 _igvn.hash_delete(lower_bound_iff); |
2290 _igvn.hash_delete(lower_bound_iff); |
2284 lower_bound_iff->set_req(1, lower_bound_bol); |
2291 lower_bound_iff->set_req(1, lower_bound_bol); |
2285 if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); |
2292 if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); |
2286 |
2293 |
2287 // Test the upper bound |
2294 // Test the upper bound |
2288 Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, true); |
2295 Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true); |
2289 IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); |
2296 IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); |
2290 _igvn.hash_delete(upper_bound_iff); |
2297 _igvn.hash_delete(upper_bound_iff); |
2291 upper_bound_iff->set_req(1, upper_bound_bol); |
2298 upper_bound_iff->set_req(1, upper_bound_bol); |
2292 if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); |
2299 if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); |
2293 |
2300 |
2364 hoisted |= _next->loop_predication( phase); |
2371 hoisted |= _next->loop_predication( phase); |
2365 } |
2372 } |
2366 |
2373 |
2367 return hoisted; |
2374 return hoisted; |
2368 } |
2375 } |
|
2376 |
|
2377 |
|
2378 // Process all the loops in the loop tree and replace any fill |
|
2379 // patterns with an intrisc version. |
|
2380 bool PhaseIdealLoop::do_intrinsify_fill() { |
|
2381 bool changed = false; |
|
2382 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { |
|
2383 IdealLoopTree* lpt = iter.current(); |
|
2384 changed |= intrinsify_fill(lpt); |
|
2385 } |
|
2386 return changed; |
|
2387 } |
|
2388 |
|
2389 |
|
2390 // Examine an inner loop looking for a a single store of an invariant |
|
2391 // value in a unit stride loop, |
|
2392 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value, |
|
2393 Node*& shift, Node*& con) { |
|
2394 const char* msg = NULL; |
|
2395 Node* msg_node = NULL; |
|
2396 |
|
2397 store_value = NULL; |
|
2398 con = NULL; |
|
2399 shift = NULL; |
|
2400 |
|
2401 // Process the loop looking for stores. If there are multiple |
|
2402 // stores or extra control flow give at this point. |
|
2403 CountedLoopNode* head = lpt->_head->as_CountedLoop(); |
|
2404 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { |
|
2405 Node* n = lpt->_body.at(i); |
|
2406 if (n->outcnt() == 0) continue; // Ignore dead |
|
2407 if (n->is_Store()) { |
|
2408 if (store != NULL) { |
|
2409 msg = "multiple stores"; |
|
2410 break; |
|
2411 } |
|
2412 int opc = n->Opcode(); |
|
2413 if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreCM) { |
|
2414 msg = "oop fills not handled"; |
|
2415 break; |
|
2416 } |
|
2417 Node* value = n->in(MemNode::ValueIn); |
|
2418 if (!lpt->is_invariant(value)) { |
|
2419 msg = "variant store value"; |
|
2420 } |
|
2421 store = n; |
|
2422 store_value = value; |
|
2423 } else if (n->is_If() && n != head->loopexit()) { |
|
2424 msg = "extra control flow"; |
|
2425 msg_node = n; |
|
2426 } |
|
2427 } |
|
2428 |
|
2429 if (store == NULL) { |
|
2430 // No store in loop |
|
2431 return false; |
|
2432 } |
|
2433 |
|
2434 if (msg == NULL && head->stride_con() != 1) { |
|
2435 // could handle negative strides too |
|
2436 if (head->stride_con() < 0) { |
|
2437 msg = "negative stride"; |
|
2438 } else { |
|
2439 msg = "non-unit stride"; |
|
2440 } |
|
2441 } |
|
2442 |
|
2443 if (msg == NULL && !store->in(MemNode::Address)->is_AddP()) { |
|
2444 msg = "can't handle store address"; |
|
2445 msg_node = store->in(MemNode::Address); |
|
2446 } |
|
2447 |
|
2448 // Make sure there is an appropriate fill routine |
|
2449 BasicType t = store->as_Mem()->memory_type(); |
|
2450 const char* fill_name; |
|
2451 if (msg == NULL && |
|
2452 StubRoutines::select_fill_function(t, false, fill_name) == NULL) { |
|
2453 msg = "unsupported store"; |
|
2454 msg_node = store; |
|
2455 } |
|
2456 |
|
2457 if (msg != NULL) { |
|
2458 #ifndef PRODUCT |
|
2459 if (TraceOptimizeFill) { |
|
2460 tty->print_cr("not fill intrinsic candidate: %s", msg); |
|
2461 if (msg_node != NULL) msg_node->dump(); |
|
2462 } |
|
2463 #endif |
|
2464 return false; |
|
2465 } |
|
2466 |
|
2467 // Make sure the address expression can be handled. It should be |
|
2468 // head->phi * elsize + con. head->phi might have a ConvI2L. |
|
2469 Node* elements[4]; |
|
2470 Node* conv = NULL; |
|
2471 int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements)); |
|
2472 for (int e = 0; e < count; e++) { |
|
2473 Node* n = elements[e]; |
|
2474 if (n->is_Con() && con == NULL) { |
|
2475 con = n; |
|
2476 } else if (n->Opcode() == Op_LShiftX && shift == NULL) { |
|
2477 Node* value = n->in(1); |
|
2478 #ifdef _LP64 |
|
2479 if (value->Opcode() == Op_ConvI2L) { |
|
2480 conv = value; |
|
2481 value = value->in(1); |
|
2482 } |
|
2483 #endif |
|
2484 if (value != head->phi()) { |
|
2485 msg = "unhandled shift in address"; |
|
2486 } else { |
|
2487 shift = n; |
|
2488 assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int(), "scale should match"); |
|
2489 } |
|
2490 } else if (n->Opcode() == Op_ConvI2L && conv == NULL) { |
|
2491 if (n->in(1) == head->phi()) { |
|
2492 conv = n; |
|
2493 } else { |
|
2494 msg = "unhandled input to ConvI2L"; |
|
2495 } |
|
2496 } else if (n == head->phi()) { |
|
2497 // no shift, check below for allowed cases |
|
2498 } else { |
|
2499 msg = "unhandled node in address"; |
|
2500 msg_node = n; |
|
2501 } |
|
2502 } |
|
2503 |
|
2504 if (count == -1) { |
|
2505 msg = "malformed address expression"; |
|
2506 msg_node = store; |
|
2507 } |
|
2508 |
|
2509 // byte sized items won't have a shift |
|
2510 if (msg == NULL && shift == NULL && t != T_BYTE && t != T_BOOLEAN) { |
|
2511 msg = "can't find shift"; |
|
2512 msg_node = store; |
|
2513 } |
|
2514 |
|
2515 if (msg != NULL) { |
|
2516 #ifndef PRODUCT |
|
2517 if (TraceOptimizeFill) { |
|
2518 tty->print_cr("not fill intrinsic: %s", msg); |
|
2519 if (msg_node != NULL) msg_node->dump(); |
|
2520 } |
|
2521 #endif |
|
2522 return false; |
|
2523 } |
|
2524 |
|
2525 // No make sure all the other nodes in the loop can be handled |
|
2526 VectorSet ok(Thread::current()->resource_area()); |
|
2527 |
|
2528 // store related values are ok |
|
2529 ok.set(store->_idx); |
|
2530 ok.set(store->in(MemNode::Memory)->_idx); |
|
2531 |
|
2532 // Loop structure is ok |
|
2533 ok.set(head->_idx); |
|
2534 ok.set(head->loopexit()->_idx); |
|
2535 ok.set(head->phi()->_idx); |
|
2536 ok.set(head->incr()->_idx); |
|
2537 ok.set(head->loopexit()->cmp_node()->_idx); |
|
2538 ok.set(head->loopexit()->in(1)->_idx); |
|
2539 |
|
2540 // Address elements are ok |
|
2541 if (con) ok.set(con->_idx); |
|
2542 if (shift) ok.set(shift->_idx); |
|
2543 if (conv) ok.set(conv->_idx); |
|
2544 |
|
2545 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { |
|
2546 Node* n = lpt->_body.at(i); |
|
2547 if (n->outcnt() == 0) continue; // Ignore dead |
|
2548 if (ok.test(n->_idx)) continue; |
|
2549 // Backedge projection is ok |
|
2550 if (n->is_IfTrue() && n->in(0) == head->loopexit()) continue; |
|
2551 if (!n->is_AddP()) { |
|
2552 msg = "unhandled node"; |
|
2553 msg_node = n; |
|
2554 break; |
|
2555 } |
|
2556 } |
|
2557 |
|
2558 // Make sure no unexpected values are used outside the loop |
|
2559 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { |
|
2560 Node* n = lpt->_body.at(i); |
|
2561 // These values can be replaced with other nodes if they are used |
|
2562 // outside the loop. |
|
2563 if (n == store || n == head->loopexit() || n == head->incr()) continue; |
|
2564 for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) { |
|
2565 Node* use = iter.get(); |
|
2566 if (!lpt->_body.contains(use)) { |
|
2567 msg = "node is used outside loop"; |
|
2568 // lpt->_body.dump(); |
|
2569 msg_node = n; |
|
2570 break; |
|
2571 } |
|
2572 } |
|
2573 } |
|
2574 |
|
2575 #ifdef ASSERT |
|
2576 if (TraceOptimizeFill) { |
|
2577 if (msg != NULL) { |
|
2578 tty->print_cr("no fill intrinsic: %s", msg); |
|
2579 if (msg_node != NULL) msg_node->dump(); |
|
2580 } else { |
|
2581 tty->print_cr("fill intrinsic for:"); |
|
2582 } |
|
2583 store->dump(); |
|
2584 if (Verbose) { |
|
2585 lpt->_body.dump(); |
|
2586 } |
|
2587 } |
|
2588 #endif |
|
2589 |
|
2590 return msg == NULL; |
|
2591 } |
|
2592 |
|
2593 |
|
2594 |
|
2595 bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) { |
|
2596 // Only for counted inner loops |
|
2597 if (!lpt->is_counted() || !lpt->is_inner()) { |
|
2598 return false; |
|
2599 } |
|
2600 |
|
2601 // Must have constant stride |
|
2602 CountedLoopNode* head = lpt->_head->as_CountedLoop(); |
|
2603 if (!head->stride_is_con() || !head->is_normal_loop()) { |
|
2604 return false; |
|
2605 } |
|
2606 |
|
2607 // Check that the body only contains a store of a loop invariant |
|
2608 // value that is indexed by the loop phi. |
|
2609 Node* store = NULL; |
|
2610 Node* store_value = NULL; |
|
2611 Node* shift = NULL; |
|
2612 Node* offset = NULL; |
|
2613 if (!match_fill_loop(lpt, store, store_value, shift, offset)) { |
|
2614 return false; |
|
2615 } |
|
2616 |
|
2617 // Now replace the whole loop body by a call to a fill routine that |
|
2618 // covers the same region as the loop. |
|
2619 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base); |
|
2620 |
|
2621 // Build an expression for the beginning of the copy region |
|
2622 Node* index = head->init_trip(); |
|
2623 #ifdef _LP64 |
|
2624 index = new (C, 2) ConvI2LNode(index); |
|
2625 _igvn.register_new_node_with_optimizer(index); |
|
2626 #endif |
|
2627 if (shift != NULL) { |
|
2628 // byte arrays don't require a shift but others do. |
|
2629 index = new (C, 3) LShiftXNode(index, shift->in(2)); |
|
2630 _igvn.register_new_node_with_optimizer(index); |
|
2631 } |
|
2632 index = new (C, 4) AddPNode(base, base, index); |
|
2633 _igvn.register_new_node_with_optimizer(index); |
|
2634 Node* from = new (C, 4) AddPNode(base, index, offset); |
|
2635 _igvn.register_new_node_with_optimizer(from); |
|
2636 // Compute the number of elements to copy |
|
2637 Node* len = new (C, 3) SubINode(head->limit(), head->init_trip()); |
|
2638 _igvn.register_new_node_with_optimizer(len); |
|
2639 |
|
2640 BasicType t = store->as_Mem()->memory_type(); |
|
2641 bool aligned = false; |
|
2642 if (offset != NULL && head->init_trip()->is_Con()) { |
|
2643 int element_size = type2aelembytes(t); |
|
2644 aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0; |
|
2645 } |
|
2646 |
|
2647 // Build a call to the fill routine |
|
2648 const char* fill_name; |
|
2649 address fill = StubRoutines::select_fill_function(t, aligned, fill_name); |
|
2650 assert(fill != NULL, "what?"); |
|
2651 |
|
2652 // Convert float/double to int/long for fill routines |
|
2653 if (t == T_FLOAT) { |
|
2654 store_value = new (C, 2) MoveF2INode(store_value); |
|
2655 _igvn.register_new_node_with_optimizer(store_value); |
|
2656 } else if (t == T_DOUBLE) { |
|
2657 store_value = new (C, 2) MoveD2LNode(store_value); |
|
2658 _igvn.register_new_node_with_optimizer(store_value); |
|
2659 } |
|
2660 |
|
2661 Node* mem_phi = store->in(MemNode::Memory); |
|
2662 Node* result_ctrl; |
|
2663 Node* result_mem; |
|
2664 const TypeFunc* call_type = OptoRuntime::array_fill_Type(); |
|
2665 int size = call_type->domain()->cnt(); |
|
2666 CallLeafNode *call = new (C, size) CallLeafNoFPNode(call_type, fill, |
|
2667 fill_name, TypeAryPtr::get_array_body_type(t)); |
|
2668 call->init_req(TypeFunc::Parms+0, from); |
|
2669 call->init_req(TypeFunc::Parms+1, store_value); |
|
2670 call->init_req(TypeFunc::Parms+2, len); |
|
2671 call->init_req( TypeFunc::Control, head->init_control()); |
|
2672 call->init_req( TypeFunc::I_O , C->top() ) ; // does no i/o |
|
2673 call->init_req( TypeFunc::Memory , mem_phi->in(LoopNode::EntryControl) ); |
|
2674 call->init_req( TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr) ); |
|
2675 call->init_req( TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr) ); |
|
2676 _igvn.register_new_node_with_optimizer(call); |
|
2677 result_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control); |
|
2678 _igvn.register_new_node_with_optimizer(result_ctrl); |
|
2679 result_mem = new (C, 1) ProjNode(call,TypeFunc::Memory); |
|
2680 _igvn.register_new_node_with_optimizer(result_mem); |
|
2681 |
|
2682 // If this fill is tightly coupled to an allocation and overwrites |
|
2683 // the whole body, allow it to take over the zeroing. |
|
2684 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this); |
|
2685 if (alloc != NULL && alloc->is_AllocateArray()) { |
|
2686 Node* length = alloc->as_AllocateArray()->Ideal_length(); |
|
2687 if (head->limit() == length && |
|
2688 head->init_trip() == _igvn.intcon(0)) { |
|
2689 if (TraceOptimizeFill) { |
|
2690 tty->print_cr("Eliminated zeroing in allocation"); |
|
2691 } |
|
2692 alloc->maybe_set_complete(&_igvn); |
|
2693 } else { |
|
2694 #ifdef ASSERT |
|
2695 if (TraceOptimizeFill) { |
|
2696 tty->print_cr("filling array but bounds don't match"); |
|
2697 alloc->dump(); |
|
2698 head->init_trip()->dump(); |
|
2699 head->limit()->dump(); |
|
2700 length->dump(); |
|
2701 } |
|
2702 #endif |
|
2703 } |
|
2704 } |
|
2705 |
|
2706 // Redirect the old control and memory edges that are outside the loop. |
|
2707 Node* exit = head->loopexit()->proj_out(0); |
|
2708 _igvn.replace_node(exit, result_ctrl); |
|
2709 _igvn.replace_node(store, result_mem); |
|
2710 // Any uses the increment outside of the loop become the loop limit. |
|
2711 _igvn.replace_node(head->incr(), head->limit()); |
|
2712 |
|
2713 // Disconnect the head from the loop. |
|
2714 for (uint i = 0; i < lpt->_body.size(); i++) { |
|
2715 Node* n = lpt->_body.at(i); |
|
2716 _igvn.replace_node(n, C->top()); |
|
2717 } |
|
2718 |
|
2719 return true; |
|
2720 } |