1451 assert(ctrl->is_CFG(), "must be control"); |
1451 assert(ctrl->is_CFG(), "must be control"); |
1452 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); |
1452 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); |
1453 return _phase->dom_lca_internal(ctrl, backedge) == ctrl; |
1453 return _phase->dom_lca_internal(ctrl, backedge) == ctrl; |
1454 } |
1454 } |
1455 |
1455 |
|
1456 //------------------------------adjust_limit----------------------------------- |
|
1457 // Helper function for add_constraint(). |
|
1458 Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl) { |
|
1459 // Compute "I :: (limit-offset)/scale" |
|
1460 Node *con = new (C, 3) SubINode(rc_limit, offset); |
|
1461 register_new_node(con, pre_ctrl); |
|
1462 Node *X = new (C, 3) DivINode(0, con, scale); |
|
1463 register_new_node(X, pre_ctrl); |
|
1464 |
|
1465 // Adjust loop limit |
|
1466 loop_limit = (stride_con > 0) |
|
1467 ? (Node*)(new (C, 3) MinINode(loop_limit, X)) |
|
1468 : (Node*)(new (C, 3) MaxINode(loop_limit, X)); |
|
1469 register_new_node(loop_limit, pre_ctrl); |
|
1470 return loop_limit; |
|
1471 } |
|
1472 |
1456 //------------------------------add_constraint--------------------------------- |
1473 //------------------------------add_constraint--------------------------------- |
1457 // Constrain the main loop iterations so the conditions: |
1474 // Constrain the main loop iterations so the conditions: |
1458 // low_limit <= scale_con * I + offset < upper_limit |
1475 // low_limit <= scale_con * I + offset < upper_limit |
1459 // always holds true. That is, either increase the number of iterations in |
1476 // always holds true. That is, either increase the number of iterations in |
1460 // the pre-loop or the post-loop until the condition holds true in the main |
1477 // the pre-loop or the post-loop until the condition holds true in the main |
1467 |
1484 |
1468 // Also for positive stride*scale the affine function is increasing, so the |
1485 // Also for positive stride*scale the affine function is increasing, so the |
1469 // pre-loop must check for underflow and the post-loop for overflow. |
1486 // pre-loop must check for underflow and the post-loop for overflow. |
1470 // Negative stride*scale reverses this; pre-loop checks for overflow and |
1487 // Negative stride*scale reverses this; pre-loop checks for overflow and |
1471 // post-loop for underflow. |
1488 // post-loop for underflow. |
1472 if (stride_con*scale_con > 0) { |
1489 |
|
1490 Node *scale = _igvn.intcon(scale_con); |
|
1491 set_ctrl(scale, C->root()); |
|
1492 |
|
1493 if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow |
1473 // The overflow limit: scale*I+offset < upper_limit |
1494 // The overflow limit: scale*I+offset < upper_limit |
1474 // For main-loop compute |
1495 // For main-loop compute |
1475 // ( if (scale > 0) /* and stride > 0 */ |
1496 // ( if (scale > 0) /* and stride > 0 */ |
1476 // I < (upper_limit-offset)/scale |
1497 // I < (upper_limit-offset)/scale |
1477 // else /* scale < 0 and stride < 0 */ |
1498 // else /* scale < 0 and stride < 0 */ |
1478 // I > (upper_limit-offset)/scale |
1499 // I > (upper_limit-offset)/scale |
1479 // ) |
1500 // ) |
1480 // |
1501 // |
1481 // (upper_limit-offset) may overflow when offset < 0. |
1502 // (upper_limit-offset) may overflow or underflow. |
1482 // But it is fine since main loop will either have |
1503 // But it is fine since main loop will either have |
1483 // less iterations or will be skipped in such case. |
1504 // less iterations or will be skipped in such case. |
1484 Node *con = new (C, 3) SubINode(upper_limit, offset); |
1505 *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl); |
1485 register_new_node(con, pre_ctrl); |
|
1486 Node *scale = _igvn.intcon(scale_con); |
|
1487 set_ctrl(scale, C->root()); |
|
1488 Node *X = new (C, 3) DivINode(0, con, scale); |
|
1489 register_new_node(X, pre_ctrl); |
|
1490 |
|
1491 // Adjust main-loop last iteration |
|
1492 Node *loop_limit = *main_limit; |
|
1493 loop_limit = (stride_con > 0) // scale > 0 |
|
1494 ? (Node*)(new (C, 3) MinINode(loop_limit, X)) |
|
1495 : (Node*)(new (C, 3) MaxINode(loop_limit, X)); |
|
1496 register_new_node(loop_limit, pre_ctrl); |
|
1497 *main_limit = loop_limit; |
|
1498 |
1506 |
1499 // The underflow limit: low_limit <= scale*I+offset. |
1507 // The underflow limit: low_limit <= scale*I+offset. |
1500 // For pre-loop compute |
1508 // For pre-loop compute |
1501 // NOT(scale*I+offset >= low_limit) |
1509 // NOT(scale*I+offset >= low_limit) |
1502 // scale*I+offset < low_limit |
1510 // scale*I+offset < low_limit |
1507 // ) |
1515 // ) |
1508 |
1516 |
1509 if (low_limit->get_int() == -max_jint) { |
1517 if (low_limit->get_int() == -max_jint) { |
1510 if (!RangeLimitCheck) return; |
1518 if (!RangeLimitCheck) return; |
1511 // We need this guard when scale*pre_limit+offset >= limit |
1519 // We need this guard when scale*pre_limit+offset >= limit |
1512 // due to underflow so we need execute pre-loop until |
1520 // due to underflow. So we need execute pre-loop until |
1513 // scale*I+offset >= min_int. But (low_limit-offset) will |
1521 // scale*I+offset >= min_int. But (min_int-offset) will |
1514 // underflow when offset > 0 and X will be > original_limit. |
1522 // underflow when offset > 0 and X will be > original_limit |
1515 // To avoid it we replace offset = offset > 0 ? 0 : offset |
1523 // when stride > 0. To avoid it we replace positive offset with 0. |
1516 // and add min(pre_limit, original_limit). |
1524 // |
|
1525 // Also (min_int+1 == -max_int) is used instead of min_int here |
|
1526 // to avoid problem with scale == -1 (min_int/(-1) == min_int). |
1517 Node* shift = _igvn.intcon(31); |
1527 Node* shift = _igvn.intcon(31); |
1518 set_ctrl(shift, C->root()); |
1528 set_ctrl(shift, C->root()); |
1519 Node *neg_off = new (C, 3) RShiftINode(offset, shift); |
1529 Node* sign = new (C, 3) RShiftINode(offset, shift); |
1520 register_new_node(neg_off, pre_ctrl); |
1530 register_new_node(sign, pre_ctrl); |
1521 offset = new (C, 3) AndINode(offset, neg_off); |
1531 offset = new (C, 3) AndINode(offset, sign); |
1522 register_new_node(offset, pre_ctrl); |
1532 register_new_node(offset, pre_ctrl); |
1523 } else { |
1533 } else { |
1524 assert(low_limit->get_int() == 0, "wrong low limit for range check"); |
1534 assert(low_limit->get_int() == 0, "wrong low limit for range check"); |
1525 // The only problem we have here when offset == min_int |
1535 // The only problem we have here when offset == min_int |
1526 // since (0-min_int) == min_int. It may be fine for scale > 0 |
1536 // since (0-min_int) == min_int. It may be fine for stride > 0 |
1527 // but for scale < 0 X will be < original_limit. |
1537 // but for stride < 0 X will be < original_limit. To avoid it |
1528 } |
1538 // max(pre_limit, original_limit) is used in do_range_check(). |
1529 con = new (C, 3) SubINode(low_limit, offset); |
1539 } |
1530 register_new_node(con, pre_ctrl); |
1540 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond); |
1531 scale = _igvn.intcon(scale_con); |
1541 *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl); |
1532 set_ctrl(scale, C->root()); |
|
1533 X = new (C, 3) DivINode(0, con, scale); |
|
1534 register_new_node(X, pre_ctrl); |
|
1535 |
|
1536 // Adjust pre-loop last iteration |
|
1537 loop_limit = *pre_limit; |
|
1538 loop_limit = (stride_con > 0) // scale > 0 |
|
1539 ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) |
|
1540 : (Node*)(new (C, 3) MinINode(loop_limit, X)); |
|
1541 register_new_node( loop_limit, pre_ctrl ); |
|
1542 *pre_limit = loop_limit; |
|
1543 |
1542 |
1544 } else { // stride_con*scale_con < 0 |
1543 } else { // stride_con*scale_con < 0 |
1545 // For negative stride*scale pre-loop checks for overflow and |
1544 // For negative stride*scale pre-loop checks for overflow and |
1546 // post-loop for underflow. |
1545 // post-loop for underflow. |
1547 // |
1546 // |
1548 // The underflow limit: low_limit <= scale*I+offset. |
|
1549 // For main-loop compute |
|
1550 // scale*I+offset+1 > low_limit |
|
1551 // ( if (scale < 0) /* and stride > 0 */ |
|
1552 // I < (low_limit-(offset+1))/scale |
|
1553 // else /* scale < 0 and stride < 0 */ |
|
1554 // I > (low_limit-(offset+1))/scale |
|
1555 // ) |
|
1556 |
|
1557 if (low_limit->get_int() == -max_jint) { |
|
1558 if (!RangeLimitCheck) return; |
|
1559 } else { |
|
1560 assert(low_limit->get_int() == 0, "wrong low limit for range check"); |
|
1561 } |
|
1562 |
|
1563 Node *one = _igvn.intcon(1); |
|
1564 set_ctrl(one, C->root()); |
|
1565 Node *plus_one = new (C, 3) AddINode(offset, one); |
|
1566 register_new_node( plus_one, pre_ctrl ); |
|
1567 Node *con = new (C, 3) SubINode(low_limit, plus_one); |
|
1568 register_new_node(con, pre_ctrl); |
|
1569 Node *scale = _igvn.intcon(scale_con); |
|
1570 set_ctrl(scale, C->root()); |
|
1571 Node *X = new (C, 3) DivINode(0, con, scale); |
|
1572 register_new_node(X, pre_ctrl); |
|
1573 |
|
1574 // Adjust main-loop last iteration |
|
1575 Node *loop_limit = *main_limit; |
|
1576 loop_limit = (stride_con > 0) // scale < 0 |
|
1577 ? (Node*)(new (C, 3) MinINode(loop_limit, X)) |
|
1578 : (Node*)(new (C, 3) MaxINode(loop_limit, X)); |
|
1579 register_new_node(loop_limit, pre_ctrl); |
|
1580 *main_limit = loop_limit; |
|
1581 |
|
1582 // The overflow limit: scale*I+offset < upper_limit |
1547 // The overflow limit: scale*I+offset < upper_limit |
1583 // For pre-loop compute |
1548 // For pre-loop compute |
1584 // NOT(scale*I+offset < upper_limit) |
1549 // NOT(scale*I+offset < upper_limit) |
1585 // scale*I+offset >= upper_limit |
1550 // scale*I+offset >= upper_limit |
1586 // scale*I+offset+1 > upper_limit |
1551 // scale*I+offset+1 > upper_limit |
1587 // ( if (scale < 0) /* and stride > 0 */ |
1552 // ( if (scale < 0) /* and stride > 0 */ |
1588 // I < (upper_limit-(offset+1))/scale |
1553 // I < (upper_limit-(offset+1))/scale |
1589 // else /* scale < 0 and stride < 0 */ |
1554 // else /* scale > 0 and stride < 0 */ |
1590 // I > (upper_limit-(offset+1))/scale |
1555 // I > (upper_limit-(offset+1))/scale |
1591 // ) |
1556 // ) |
1592 plus_one = new (C, 3) AddINode(offset, one); |
1557 // |
|
1558 // (upper_limit-offset-1) may underflow or overflow. |
|
1559 // To avoid it min(pre_limit, original_limit) is used |
|
1560 // in do_range_check() for stride > 0 and max() for < 0. |
|
1561 Node *one = _igvn.intcon(1); |
|
1562 set_ctrl(one, C->root()); |
|
1563 |
|
1564 Node *plus_one = new (C, 3) AddINode(offset, one); |
1593 register_new_node( plus_one, pre_ctrl ); |
1565 register_new_node( plus_one, pre_ctrl ); |
1594 con = new (C, 3) SubINode(upper_limit, plus_one); |
1566 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond); |
1595 register_new_node(con, pre_ctrl); |
1567 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl); |
1596 scale = _igvn.intcon(scale_con); |
1568 |
1597 set_ctrl(scale, C->root()); |
1569 if (low_limit->get_int() == -max_jint) { |
1598 X = new (C, 3) DivINode(0, con, scale); |
1570 if (!RangeLimitCheck) return; |
1599 register_new_node(X, pre_ctrl); |
1571 // We need this guard when scale*main_limit+offset >= limit |
1600 |
1572 // due to underflow. So we need execute main-loop while |
1601 // Adjust pre-loop last iteration |
1573 // scale*I+offset+1 > min_int. But (min_int-offset-1) will |
1602 loop_limit = *pre_limit; |
1574 // underflow when (offset+1) > 0 and X will be < main_limit |
1603 loop_limit = (stride_con > 0) // scale < 0 |
1575 // when scale < 0 (and stride > 0). To avoid it we replace |
1604 ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) |
1576 // positive (offset+1) with 0. |
1605 : (Node*)(new (C, 3) MinINode(loop_limit, X)); |
1577 // |
1606 register_new_node( loop_limit, pre_ctrl ); |
1578 // Also (min_int+1 == -max_int) is used instead of min_int here |
1607 *pre_limit = loop_limit; |
1579 // to avoid problem with scale == -1 (min_int/(-1) == min_int). |
1608 |
1580 Node* shift = _igvn.intcon(31); |
|
1581 set_ctrl(shift, C->root()); |
|
1582 Node* sign = new (C, 3) RShiftINode(plus_one, shift); |
|
1583 register_new_node(sign, pre_ctrl); |
|
1584 plus_one = new (C, 3) AndINode(plus_one, sign); |
|
1585 register_new_node(plus_one, pre_ctrl); |
|
1586 } else { |
|
1587 assert(low_limit->get_int() == 0, "wrong low limit for range check"); |
|
1588 // The only problem we have here when offset == max_int |
|
1589 // since (max_int+1) == min_int and (0-min_int) == min_int. |
|
1590 // But it is fine since main loop will either have |
|
1591 // less iterations or will be skipped in such case. |
|
1592 } |
|
1593 // The underflow limit: low_limit <= scale*I+offset. |
|
1594 // For main-loop compute |
|
1595 // scale*I+offset+1 > low_limit |
|
1596 // ( if (scale < 0) /* and stride > 0 */ |
|
1597 // I < (low_limit-(offset+1))/scale |
|
1598 // else /* scale > 0 and stride < 0 */ |
|
1599 // I > (low_limit-(offset+1))/scale |
|
1600 // ) |
|
1601 |
|
1602 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl); |
1609 } |
1603 } |
1610 } |
1604 } |
1611 |
1605 |
1612 |
1606 |
1613 //------------------------------is_scaled_iv--------------------------------- |
1607 //------------------------------is_scaled_iv--------------------------------- |