470 // of the CFG diamond is now speculatively executed. This code has to be |
471 // of the CFG diamond is now speculatively executed. This code has to be |
471 // "cheap enough". We are pretty much limited to CFG diamonds that merge |
472 // "cheap enough". We are pretty much limited to CFG diamonds that merge |
472 // 1 or 2 items with a total of 1 or 2 ops executed speculatively. |
473 // 1 or 2 items with a total of 1 or 2 ops executed speculatively. |
473 Node *PhaseIdealLoop::conditional_move( Node *region ) { |
474 Node *PhaseIdealLoop::conditional_move( Node *region ) { |
474 |
475 |
475 assert( region->is_Region(), "sanity check" ); |
476 assert(region->is_Region(), "sanity check"); |
476 if( region->req() != 3 ) return NULL; |
477 if (region->req() != 3) return NULL; |
477 |
478 |
478 // Check for CFG diamond |
479 // Check for CFG diamond |
479 Node *lp = region->in(1); |
480 Node *lp = region->in(1); |
480 Node *rp = region->in(2); |
481 Node *rp = region->in(2); |
481 if( !lp || !rp ) return NULL; |
482 if (!lp || !rp) return NULL; |
482 Node *lp_c = lp->in(0); |
483 Node *lp_c = lp->in(0); |
483 if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL; |
484 if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL; |
484 IfNode *iff = lp_c->as_If(); |
485 IfNode *iff = lp_c->as_If(); |
485 |
|
486 // Check for highly predictable branch. No point in CMOV'ing if |
|
487 // we are going to predict accurately all the time. |
|
488 // %%% This hides patterns produced by utility methods like Math.min. |
|
489 if( iff->_prob < PROB_UNLIKELY_MAG(3) || |
|
490 iff->_prob > PROB_LIKELY_MAG(3) ) |
|
491 return NULL; |
|
492 |
486 |
493 // Check for ops pinned in an arm of the diamond. |
487 // Check for ops pinned in an arm of the diamond. |
494 // Can't remove the control flow in this case |
488 // Can't remove the control flow in this case |
495 if( lp->outcnt() > 1 ) return NULL; |
489 if (lp->outcnt() > 1) return NULL; |
496 if( rp->outcnt() > 1 ) return NULL; |
490 if (rp->outcnt() > 1) return NULL; |
|
491 |
|
492 IdealLoopTree* r_loop = get_loop(region); |
|
493 assert(r_loop == get_loop(iff), "sanity"); |
|
494 // Always convert to CMOVE if all results are used only outside this loop. |
|
495 bool used_inside_loop = (r_loop == _ltree_root); |
497 |
496 |
498 // Check profitability |
497 // Check profitability |
499 int cost = 0; |
498 int cost = 0; |
500 int phis = 0; |
499 int phis = 0; |
501 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
500 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
502 Node *out = region->fast_out(i); |
501 Node *out = region->fast_out(i); |
503 if( !out->is_Phi() ) continue; // Ignore other control edges, etc |
502 if (!out->is_Phi()) continue; // Ignore other control edges, etc |
504 phis++; |
503 phis++; |
505 PhiNode* phi = out->as_Phi(); |
504 PhiNode* phi = out->as_Phi(); |
506 switch (phi->type()->basic_type()) { |
505 BasicType bt = phi->type()->basic_type(); |
507 case T_LONG: |
506 switch (bt) { |
508 cost++; // Probably encodes as 2 CMOV's |
507 case T_FLOAT: |
|
508 case T_DOUBLE: { |
|
509 cost += Matcher::float_cmove_cost(); // Could be very expensive |
|
510 break; |
|
511 } |
|
512 case T_LONG: { |
|
513 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's |
|
514 } |
509 case T_INT: // These all CMOV fine |
515 case T_INT: // These all CMOV fine |
510 case T_FLOAT: |
516 case T_ADDRESS: { // (RawPtr) |
511 case T_DOUBLE: |
|
512 case T_ADDRESS: // (RawPtr) |
|
513 cost++; |
517 cost++; |
514 break; |
518 break; |
|
519 } |
515 case T_NARROWOOP: // Fall through |
520 case T_NARROWOOP: // Fall through |
516 case T_OBJECT: { // Base oops are OK, but not derived oops |
521 case T_OBJECT: { // Base oops are OK, but not derived oops |
517 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); |
522 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); |
518 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a |
523 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a |
519 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus |
524 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus |
522 // and good. But if the base is dead, we'll not make a CMOVE. Later |
527 // and good. But if the base is dead, we'll not make a CMOVE. Later |
523 // the allocator will have to produce a base by creating a CMOVE of the |
528 // the allocator will have to produce a base by creating a CMOVE of the |
524 // relevant bases. This puts the allocator in the business of |
529 // relevant bases. This puts the allocator in the business of |
525 // manufacturing expensive instructions, generally a bad plan. |
530 // manufacturing expensive instructions, generally a bad plan. |
526 // Just Say No to Conditionally-Moved Derived Pointers. |
531 // Just Say No to Conditionally-Moved Derived Pointers. |
527 if( tp && tp->offset() != 0 ) |
532 if (tp && tp->offset() != 0) |
528 return NULL; |
533 return NULL; |
529 cost++; |
534 cost++; |
530 break; |
535 break; |
531 } |
536 } |
532 default: |
537 default: |
533 return NULL; // In particular, can't do memory or I/O |
538 return NULL; // In particular, can't do memory or I/O |
534 } |
539 } |
535 // Add in cost any speculative ops |
540 // Add in cost any speculative ops |
536 for( uint j = 1; j < region->req(); j++ ) { |
541 for (uint j = 1; j < region->req(); j++) { |
537 Node *proj = region->in(j); |
542 Node *proj = region->in(j); |
538 Node *inp = phi->in(j); |
543 Node *inp = phi->in(j); |
539 if (get_ctrl(inp) == proj) { // Found local op |
544 if (get_ctrl(inp) == proj) { // Found local op |
540 cost++; |
545 cost++; |
541 // Check for a chain of dependent ops; these will all become |
546 // Check for a chain of dependent ops; these will all become |
542 // speculative in a CMOV. |
547 // speculative in a CMOV. |
543 for( uint k = 1; k < inp->req(); k++ ) |
548 for (uint k = 1; k < inp->req(); k++) |
544 if (get_ctrl(inp->in(k)) == proj) |
549 if (get_ctrl(inp->in(k)) == proj) |
545 return NULL; // Too much speculative goo |
550 cost += ConditionalMoveLimit; // Too much speculative goo |
546 } |
551 } |
547 } |
552 } |
548 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. |
553 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. |
549 // This will likely Split-If, a higher-payoff operation. |
554 // This will likely Split-If, a higher-payoff operation. |
550 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { |
555 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { |
551 Node* use = phi->fast_out(k); |
556 Node* use = phi->fast_out(k); |
552 if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() ) |
557 if (use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP()) |
553 return NULL; |
558 cost += ConditionalMoveLimit; |
554 } |
559 // Is there a use inside the loop? |
555 } |
560 // Note: check only basic types since CMoveP is pinned. |
556 if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo |
561 if (!used_inside_loop && is_java_primitive(bt)) { |
|
562 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use); |
|
563 if (r_loop == u_loop || r_loop->is_member(u_loop)) { |
|
564 used_inside_loop = true; |
|
565 } |
|
566 } |
|
567 } |
|
568 } |
557 Node* bol = iff->in(1); |
569 Node* bol = iff->in(1); |
558 assert( bol->Opcode() == Op_Bool, "" ); |
570 assert(bol->Opcode() == Op_Bool, ""); |
559 int cmp_op = bol->in(1)->Opcode(); |
571 int cmp_op = bol->in(1)->Opcode(); |
560 // It is expensive to generate flags from a float compare. |
572 // It is expensive to generate flags from a float compare. |
561 // Avoid duplicated float compare. |
573 // Avoid duplicated float compare. |
562 if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; |
574 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; |
|
575 |
|
576 float infrequent_prob = PROB_UNLIKELY_MAG(3); |
|
577 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop. |
|
578 if (used_inside_loop) { |
|
579 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo |
|
580 |
|
581 // BlockLayoutByFrequency optimization moves infrequent branch |
|
582 // from hot path. No point in CMOV'ing in such case (110 is used |
|
583 // instead of 100 to take into account not exactness of float value). |
|
584 if (BlockLayoutByFrequency) { |
|
585 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f); |
|
586 } |
|
587 } |
|
588 // Check for highly predictable branch. No point in CMOV'ing if |
|
589 // we are going to predict accurately all the time. |
|
590 if (iff->_prob < infrequent_prob || |
|
591 iff->_prob > (1.0f - infrequent_prob)) |
|
592 return NULL; |
563 |
593 |
564 // -------------- |
594 // -------------- |
565 // Now replace all Phis with CMOV's |
595 // Now replace all Phis with CMOV's |
566 Node *cmov_ctrl = iff->in(0); |
596 Node *cmov_ctrl = iff->in(0); |
567 uint flip = (lp->Opcode() == Op_IfTrue); |
597 uint flip = (lp->Opcode() == Op_IfTrue); |
568 while( 1 ) { |
598 while (1) { |
569 PhiNode* phi = NULL; |
599 PhiNode* phi = NULL; |
570 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
600 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
571 Node *out = region->fast_out(i); |
601 Node *out = region->fast_out(i); |
572 if (out->is_Phi()) { |
602 if (out->is_Phi()) { |
573 phi = out->as_Phi(); |
603 phi = out->as_Phi(); |
574 break; |
604 break; |
575 } |
605 } |
576 } |
606 } |
577 if (phi == NULL) break; |
607 if (phi == NULL) break; |
578 #ifndef PRODUCT |
608 #ifndef PRODUCT |
579 if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV"); |
609 if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV"); |
580 #endif |
610 #endif |
581 // Move speculative ops |
611 // Move speculative ops |
582 for( uint j = 1; j < region->req(); j++ ) { |
612 for (uint j = 1; j < region->req(); j++) { |
583 Node *proj = region->in(j); |
613 Node *proj = region->in(j); |
584 Node *inp = phi->in(j); |
614 Node *inp = phi->in(j); |
585 if (get_ctrl(inp) == proj) { // Found local op |
615 if (get_ctrl(inp) == proj) { // Found local op |
586 #ifndef PRODUCT |
616 #ifndef PRODUCT |
587 if( PrintOpto && VerifyLoopOptimizations ) { |
617 if (PrintOpto && VerifyLoopOptimizations) { |
588 tty->print(" speculate: "); |
618 tty->print(" speculate: "); |
589 inp->dump(); |
619 inp->dump(); |
590 } |
620 } |
591 #endif |
621 #endif |
592 set_ctrl(inp, cmov_ctrl); |
622 set_ctrl(inp, cmov_ctrl); |
674 // Use same limit as split_if_with_blocks_post |
711 // Use same limit as split_if_with_blocks_post |
675 if( C->unique() > 35000 ) return n; // Method too big |
712 if( C->unique() > 35000 ) return n; // Method too big |
676 |
713 |
677 // Split 'n' through the merge point if it is profitable |
714 // Split 'n' through the merge point if it is profitable |
678 Node *phi = split_thru_phi( n, n_blk, policy ); |
715 Node *phi = split_thru_phi( n, n_blk, policy ); |
679 if( !phi ) return n; |
716 if (!phi) return n; |
680 |
717 |
681 // Found a Phi to split thru! |
718 // Found a Phi to split thru! |
682 // Replace 'n' with the new phi |
719 // Replace 'n' with the new phi |
683 _igvn.replace_node( n, phi ); |
720 _igvn.replace_node( n, phi ); |
684 // Moved a load around the loop, 'en-registering' something. |
721 // Moved a load around the loop, 'en-registering' something. |
685 if( n_blk->Opcode() == Op_Loop && n->is_Load() && |
722 if (n_blk->is_Loop() && n->is_Load() && |
686 !phi->in(LoopNode::LoopBackControl)->is_Load() ) |
723 !phi->in(LoopNode::LoopBackControl)->is_Load()) |
687 C->set_major_progress(); |
724 C->set_major_progress(); |
688 |
725 |
689 return phi; |
726 return phi; |
690 } |
727 } |
691 |
728 |