839 void PhaseCFG::partial_latency_of_defs(Node *n) { |
839 void PhaseCFG::partial_latency_of_defs(Node *n) { |
840 // Set the latency for this instruction |
840 // Set the latency for this instruction |
841 #ifndef PRODUCT |
841 #ifndef PRODUCT |
842 if (trace_opto_pipelining()) { |
842 if (trace_opto_pipelining()) { |
843 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", |
843 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", |
844 n->_idx, _node_latency.at_grow(n->_idx)); |
844 n->_idx, _node_latency->at_grow(n->_idx)); |
845 dump(); |
845 dump(); |
846 } |
846 } |
847 #endif |
847 #endif |
848 |
848 |
849 if (n->is_Proj()) |
849 if (n->is_Proj()) |
851 |
851 |
852 if (n->is_Root()) |
852 if (n->is_Root()) |
853 return; |
853 return; |
854 |
854 |
855 uint nlen = n->len(); |
855 uint nlen = n->len(); |
856 uint use_latency = _node_latency.at_grow(n->_idx); |
856 uint use_latency = _node_latency->at_grow(n->_idx); |
857 uint use_pre_order = _bbs[n->_idx]->_pre_order; |
857 uint use_pre_order = _bbs[n->_idx]->_pre_order; |
858 |
858 |
859 for ( uint j=0; j<nlen; j++ ) { |
859 for ( uint j=0; j<nlen; j++ ) { |
860 Node *def = n->in(j); |
860 Node *def = n->in(j); |
861 |
861 |
882 continue; |
882 continue; |
883 |
883 |
884 uint delta_latency = n->latency(j); |
884 uint delta_latency = n->latency(j); |
885 uint current_latency = delta_latency + use_latency; |
885 uint current_latency = delta_latency + use_latency; |
886 |
886 |
887 if (_node_latency.at_grow(def->_idx) < current_latency) { |
887 if (_node_latency->at_grow(def->_idx) < current_latency) { |
888 _node_latency.at_put_grow(def->_idx, current_latency); |
888 _node_latency->at_put_grow(def->_idx, current_latency); |
889 } |
889 } |
890 |
890 |
891 #ifndef PRODUCT |
891 #ifndef PRODUCT |
892 if (trace_opto_pipelining()) { |
892 if (trace_opto_pipelining()) { |
893 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", |
893 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", |
894 use_latency, j, delta_latency, current_latency, def->_idx, |
894 use_latency, j, delta_latency, current_latency, def->_idx, |
895 _node_latency.at_grow(def->_idx)); |
895 _node_latency->at_grow(def->_idx)); |
896 } |
896 } |
897 #endif |
897 #endif |
898 } |
898 } |
899 } |
899 } |
900 |
900 |
924 |
924 |
925 if (use_pre_order == def_pre_order && use->is_Phi()) |
925 if (use_pre_order == def_pre_order && use->is_Phi()) |
926 return 0; |
926 return 0; |
927 |
927 |
928 uint nlen = use->len(); |
928 uint nlen = use->len(); |
929 uint nl = _node_latency.at_grow(use->_idx); |
929 uint nl = _node_latency->at_grow(use->_idx); |
930 |
930 |
931 for ( uint j=0; j<nlen; j++ ) { |
931 for ( uint j=0; j<nlen; j++ ) { |
932 if (use->in(j) == n) { |
932 if (use->in(j) == n) { |
933 // Change this if we want local latencies |
933 // Change this if we want local latencies |
934 uint ul = use->latency(j); |
934 uint ul = use->latency(j); |
960 void PhaseCFG::latency_from_uses(Node *n) { |
960 void PhaseCFG::latency_from_uses(Node *n) { |
961 // Set the latency for this instruction |
961 // Set the latency for this instruction |
962 #ifndef PRODUCT |
962 #ifndef PRODUCT |
963 if (trace_opto_pipelining()) { |
963 if (trace_opto_pipelining()) { |
964 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", |
964 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", |
965 n->_idx, _node_latency.at_grow(n->_idx)); |
965 n->_idx, _node_latency->at_grow(n->_idx)); |
966 dump(); |
966 dump(); |
967 } |
967 } |
968 #endif |
968 #endif |
969 uint latency=0; |
969 uint latency=0; |
970 const Node *def = n->is_Proj() ? n->in(0): n; |
970 const Node *def = n->is_Proj() ? n->in(0): n; |
973 uint l = latency_from_use(n, def, n->fast_out(i)); |
973 uint l = latency_from_use(n, def, n->fast_out(i)); |
974 |
974 |
975 if (latency < l) latency = l; |
975 if (latency < l) latency = l; |
976 } |
976 } |
977 |
977 |
978 _node_latency.at_put_grow(n->_idx, latency); |
978 _node_latency->at_put_grow(n->_idx, latency); |
979 } |
979 } |
980 |
980 |
981 //------------------------------hoist_to_cheaper_block------------------------- |
981 //------------------------------hoist_to_cheaper_block------------------------- |
982 // Pick a block for node self, between early and LCA, that is a cheaper |
982 // Pick a block for node self, between early and LCA, that is a cheaper |
983 // alternative to LCA. |
983 // alternative to LCA. |
984 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { |
984 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { |
985 const double delta = 1+PROB_UNLIKELY_MAG(4); |
985 const double delta = 1+PROB_UNLIKELY_MAG(4); |
986 Block* least = LCA; |
986 Block* least = LCA; |
987 double least_freq = least->_freq; |
987 double least_freq = least->_freq; |
988 uint target = _node_latency.at_grow(self->_idx); |
988 uint target = _node_latency->at_grow(self->_idx); |
989 uint start_latency = _node_latency.at_grow(LCA->_nodes[0]->_idx); |
989 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); |
990 uint end_latency = _node_latency.at_grow(LCA->_nodes[LCA->end_idx()]->_idx); |
990 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); |
991 bool in_latency = (target <= start_latency); |
991 bool in_latency = (target <= start_latency); |
992 const Block* root_block = _bbs[_root->_idx]; |
992 const Block* root_block = _bbs[_root->_idx]; |
993 |
993 |
994 // Turn off latency scheduling if scheduling is just plain off |
994 // Turn off latency scheduling if scheduling is just plain off |
995 if (!C->do_scheduling()) |
995 if (!C->do_scheduling()) |
1003 in_latency = true; |
1003 in_latency = true; |
1004 |
1004 |
1005 #ifndef PRODUCT |
1005 #ifndef PRODUCT |
1006 if (trace_opto_pipelining()) { |
1006 if (trace_opto_pipelining()) { |
1007 tty->print("# Find cheaper block for latency %d: ", |
1007 tty->print("# Find cheaper block for latency %d: ", |
1008 _node_latency.at_grow(self->_idx)); |
1008 _node_latency->at_grow(self->_idx)); |
1009 self->dump(); |
1009 self->dump(); |
1010 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1010 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1011 LCA->_pre_order, |
1011 LCA->_pre_order, |
1012 LCA->_nodes[0]->_idx, |
1012 LCA->_nodes[0]->_idx, |
1013 start_latency, |
1013 start_latency, |
1030 |
1030 |
1031 // Don't hoist machine instructions to the root basic block |
1031 // Don't hoist machine instructions to the root basic block |
1032 if (mach && LCA == root_block) |
1032 if (mach && LCA == root_block) |
1033 break; |
1033 break; |
1034 |
1034 |
1035 uint start_lat = _node_latency.at_grow(LCA->_nodes[0]->_idx); |
1035 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); |
1036 uint end_idx = LCA->end_idx(); |
1036 uint end_idx = LCA->end_idx(); |
1037 uint end_lat = _node_latency.at_grow(LCA->_nodes[end_idx]->_idx); |
1037 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); |
1038 double LCA_freq = LCA->_freq; |
1038 double LCA_freq = LCA->_freq; |
1039 #ifndef PRODUCT |
1039 #ifndef PRODUCT |
1040 if (trace_opto_pipelining()) { |
1040 if (trace_opto_pipelining()) { |
1041 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1041 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1042 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); |
1042 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); |
1071 #ifndef PRODUCT |
1071 #ifndef PRODUCT |
1072 if (trace_opto_pipelining()) { |
1072 if (trace_opto_pipelining()) { |
1073 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); |
1073 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); |
1074 } |
1074 } |
1075 #endif |
1075 #endif |
1076 _node_latency.at_put_grow(self->_idx, end_latency); |
1076 _node_latency->at_put_grow(self->_idx, end_latency); |
1077 partial_latency_of_defs(self); |
1077 partial_latency_of_defs(self); |
1078 } |
1078 } |
1079 |
1079 |
1080 return least; |
1080 return least; |
1081 } |
1081 } |
1253 proj_list.push(_root); // Add real root as another root |
1253 proj_list.push(_root); // Add real root as another root |
1254 proj_list.pop(); |
1254 proj_list.pop(); |
1255 |
1255 |
1256 // Compute the latency information (via backwards walk) for all the |
1256 // Compute the latency information (via backwards walk) for all the |
1257 // instructions in the graph |
1257 // instructions in the graph |
1258 GrowableArray<uint> node_latency; |
1258 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
1259 _node_latency = node_latency; |
|
1260 |
1259 |
1261 if( C->do_scheduling() ) |
1260 if( C->do_scheduling() ) |
1262 ComputeLatenciesBackwards(visited, stack); |
1261 ComputeLatenciesBackwards(visited, stack); |
1263 |
1262 |
1264 // Now schedule all codes as LATE as possible. This is the LCA in the |
1263 // Now schedule all codes as LATE as possible. This is the LCA in the |