Tue, 08 Aug 2017 15:57:29 +0800
merge
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. |
aoqi@0 | 8 | * |
aoqi@0 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 13 | * accompanied this code). |
aoqi@0 | 14 | * |
aoqi@0 | 15 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 18 | * |
aoqi@0 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 20 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 21 | * questions. |
aoqi@0 | 22 | * |
aoqi@0 | 23 | */ |
aoqi@0 | 24 | |
aoqi@1 | 25 | /* |
aoqi@1 | 26 | * This file has been modified by Loongson Technology in 2015. These |
aoqi@1 | 27 | * modifications are Copyright (c) 2015 Loongson Technology, and are made |
aoqi@1 | 28 | * available on the same license terms set forth above. |
aoqi@1 | 29 | */ |
aoqi@1 | 30 | |
aoqi@0 | 31 | #ifndef SHARE_VM_OPTO_OUTPUT_HPP |
aoqi@0 | 32 | #define SHARE_VM_OPTO_OUTPUT_HPP |
aoqi@0 | 33 | |
aoqi@0 | 34 | #include "opto/block.hpp" |
aoqi@0 | 35 | #include "opto/node.hpp" |
aoqi@0 | 36 | #ifdef TARGET_ARCH_MODEL_x86_32 |
aoqi@0 | 37 | # include "adfiles/ad_x86_32.hpp" |
aoqi@0 | 38 | #endif |
aoqi@0 | 39 | #ifdef TARGET_ARCH_MODEL_x86_64 |
aoqi@0 | 40 | # include "adfiles/ad_x86_64.hpp" |
aoqi@0 | 41 | #endif |
aoqi@1 | 42 | #ifdef TARGET_ARCH_MODEL_mips_64 |
aoqi@1 | 43 | # include "adfiles/ad_mips_64.hpp" |
aoqi@1 | 44 | #endif |
aoqi@0 | 45 | #ifdef TARGET_ARCH_MODEL_sparc |
aoqi@0 | 46 | # include "adfiles/ad_sparc.hpp" |
aoqi@0 | 47 | #endif |
aoqi@0 | 48 | #ifdef TARGET_ARCH_MODEL_zero |
aoqi@0 | 49 | # include "adfiles/ad_zero.hpp" |
aoqi@0 | 50 | #endif |
aoqi@0 | 51 | #ifdef TARGET_ARCH_MODEL_arm |
aoqi@0 | 52 | # include "adfiles/ad_arm.hpp" |
aoqi@0 | 53 | #endif |
aoqi@0 | 54 | #ifdef TARGET_ARCH_MODEL_ppc_32 |
aoqi@0 | 55 | # include "adfiles/ad_ppc_32.hpp" |
aoqi@0 | 56 | #endif |
aoqi@0 | 57 | #ifdef TARGET_ARCH_MODEL_ppc_64 |
aoqi@0 | 58 | # include "adfiles/ad_ppc_64.hpp" |
aoqi@0 | 59 | #endif |
aoqi@0 | 60 | |
aoqi@0 | 61 | class Arena; |
aoqi@0 | 62 | class Bundle; |
aoqi@0 | 63 | class Block; |
aoqi@0 | 64 | class Block_Array; |
aoqi@0 | 65 | class Node; |
aoqi@0 | 66 | class Node_Array; |
aoqi@0 | 67 | class Node_List; |
aoqi@0 | 68 | class PhaseCFG; |
aoqi@0 | 69 | class PhaseChaitin; |
aoqi@0 | 70 | class Pipeline_Use_Element; |
aoqi@0 | 71 | class Pipeline_Use; |
aoqi@0 | 72 | |
aoqi@0 | 73 | #ifndef PRODUCT |
aoqi@0 | 74 | #define DEBUG_ARG(x) , x |
aoqi@0 | 75 | #else |
aoqi@0 | 76 | #define DEBUG_ARG(x) |
aoqi@0 | 77 | #endif |
aoqi@0 | 78 | |
aoqi@0 | 79 | // Define the initial sizes for allocation of the resizable code buffer |
aoqi@0 | 80 | enum { |
aoqi@0 | 81 | initial_code_capacity = 16 * 1024, |
aoqi@0 | 82 | initial_stub_capacity = 4 * 1024, |
aoqi@0 | 83 | initial_const_capacity = 4 * 1024, |
aoqi@0 | 84 | initial_locs_capacity = 3 * 1024 |
aoqi@0 | 85 | }; |
aoqi@0 | 86 | |
aoqi@0 | 87 | //------------------------------Scheduling---------------------------------- |
aoqi@0 | 88 | // This class contains all the information necessary to implement instruction |
aoqi@0 | 89 | // scheduling and bundling. |
aoqi@0 | 90 | class Scheduling { |
aoqi@0 | 91 | |
aoqi@0 | 92 | private: |
aoqi@0 | 93 | // Arena to use |
aoqi@0 | 94 | Arena *_arena; |
aoqi@0 | 95 | |
aoqi@0 | 96 | // Control-Flow Graph info |
aoqi@0 | 97 | PhaseCFG *_cfg; |
aoqi@0 | 98 | |
aoqi@0 | 99 | // Register Allocation info |
aoqi@0 | 100 | PhaseRegAlloc *_regalloc; |
aoqi@0 | 101 | |
aoqi@0 | 102 | // Number of nodes in the method |
aoqi@0 | 103 | uint _node_bundling_limit; |
aoqi@0 | 104 | |
aoqi@0 | 105 | // List of scheduled nodes. Generated in reverse order |
aoqi@0 | 106 | Node_List _scheduled; |
aoqi@0 | 107 | |
aoqi@0 | 108 | // List of nodes currently available for choosing for scheduling |
aoqi@0 | 109 | Node_List _available; |
aoqi@0 | 110 | |
aoqi@0 | 111 | // For each instruction beginning a bundle, the number of following |
aoqi@0 | 112 | // nodes to be bundled with it. |
aoqi@0 | 113 | Bundle *_node_bundling_base; |
aoqi@0 | 114 | |
aoqi@0 | 115 | // Mapping from register to Node |
aoqi@0 | 116 | Node_List _reg_node; |
aoqi@0 | 117 | |
aoqi@0 | 118 | // Free list for pinch nodes. |
aoqi@0 | 119 | Node_List _pinch_free_list; |
aoqi@0 | 120 | |
aoqi@0 | 121 | // Latency from the beginning of the containing basic block (base 1) |
aoqi@0 | 122 | // for each node. |
aoqi@0 | 123 | unsigned short *_node_latency; |
aoqi@0 | 124 | |
aoqi@0 | 125 | // Number of uses of this node within the containing basic block. |
aoqi@0 | 126 | short *_uses; |
aoqi@0 | 127 | |
aoqi@0 | 128 | // Schedulable portion of current block. Skips Region/Phi/CreateEx up |
aoqi@0 | 129 | // front, branch+proj at end. Also skips Catch/CProj (same as |
aoqi@0 | 130 | // branch-at-end), plus just-prior exception-throwing call. |
aoqi@0 | 131 | uint _bb_start, _bb_end; |
aoqi@0 | 132 | |
aoqi@0 | 133 | // Latency from the end of the basic block as scheduled |
aoqi@0 | 134 | unsigned short *_current_latency; |
aoqi@0 | 135 | |
aoqi@0 | 136 | // Remember the next node |
aoqi@0 | 137 | Node *_next_node; |
aoqi@0 | 138 | |
aoqi@0 | 139 | // Use this for an unconditional branch delay slot |
aoqi@0 | 140 | Node *_unconditional_delay_slot; |
aoqi@0 | 141 | |
aoqi@0 | 142 | // Pointer to a Nop |
aoqi@0 | 143 | MachNopNode *_nop; |
aoqi@0 | 144 | |
aoqi@0 | 145 | // Length of the current bundle, in instructions |
aoqi@0 | 146 | uint _bundle_instr_count; |
aoqi@0 | 147 | |
aoqi@0 | 148 | // Current Cycle number, for computing latencies and bundling |
aoqi@0 | 149 | uint _bundle_cycle_number; |
aoqi@0 | 150 | |
aoqi@0 | 151 | // Bundle information |
aoqi@0 | 152 | Pipeline_Use_Element _bundle_use_elements[resource_count]; |
aoqi@0 | 153 | Pipeline_Use _bundle_use; |
aoqi@0 | 154 | |
aoqi@0 | 155 | // Dump the available list |
aoqi@0 | 156 | void dump_available() const; |
aoqi@0 | 157 | |
aoqi@0 | 158 | public: |
aoqi@0 | 159 | Scheduling(Arena *arena, Compile &compile); |
aoqi@0 | 160 | |
aoqi@0 | 161 | // Destructor |
aoqi@0 | 162 | NOT_PRODUCT( ~Scheduling(); ) |
aoqi@0 | 163 | |
aoqi@0 | 164 | // Step ahead "i" cycles |
aoqi@0 | 165 | void step(uint i); |
aoqi@0 | 166 | |
aoqi@0 | 167 | // Step ahead 1 cycle, and clear the bundle state (for example, |
aoqi@0 | 168 | // at a branch target) |
aoqi@0 | 169 | void step_and_clear(); |
aoqi@0 | 170 | |
aoqi@0 | 171 | Bundle* node_bundling(const Node *n) { |
aoqi@0 | 172 | assert(valid_bundle_info(n), "oob"); |
aoqi@0 | 173 | return (&_node_bundling_base[n->_idx]); |
aoqi@0 | 174 | } |
aoqi@0 | 175 | |
aoqi@0 | 176 | bool valid_bundle_info(const Node *n) const { |
aoqi@0 | 177 | return (_node_bundling_limit > n->_idx); |
aoqi@0 | 178 | } |
aoqi@0 | 179 | |
aoqi@0 | 180 | bool starts_bundle(const Node *n) const { |
aoqi@0 | 181 | return (_node_bundling_limit > n->_idx && _node_bundling_base[n->_idx].starts_bundle()); |
aoqi@0 | 182 | } |
aoqi@0 | 183 | |
aoqi@0 | 184 | // Do the scheduling |
aoqi@0 | 185 | void DoScheduling(); |
aoqi@0 | 186 | |
aoqi@0 | 187 | // Compute the local latencies walking forward over the list of |
aoqi@0 | 188 | // nodes for a basic block |
aoqi@0 | 189 | void ComputeLocalLatenciesForward(const Block *bb); |
aoqi@0 | 190 | |
aoqi@0 | 191 | // Compute the register antidependencies within a basic block |
aoqi@0 | 192 | void ComputeRegisterAntidependencies(Block *bb); |
aoqi@0 | 193 | void verify_do_def( Node *n, OptoReg::Name def, const char *msg ); |
aoqi@0 | 194 | void verify_good_schedule( Block *b, const char *msg ); |
aoqi@0 | 195 | void anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ); |
aoqi@0 | 196 | void anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ); |
aoqi@0 | 197 | |
aoqi@0 | 198 | // Add a node to the current bundle |
aoqi@0 | 199 | void AddNodeToBundle(Node *n, const Block *bb); |
aoqi@0 | 200 | |
aoqi@0 | 201 | // Add a node to the list of available nodes |
aoqi@0 | 202 | void AddNodeToAvailableList(Node *n); |
aoqi@0 | 203 | |
aoqi@0 | 204 | // Compute the local use count for the nodes in a block, and compute |
aoqi@0 | 205 | // the list of instructions with no uses in the block as available |
aoqi@0 | 206 | void ComputeUseCount(const Block *bb); |
aoqi@0 | 207 | |
aoqi@0 | 208 | // Choose an instruction from the available list to add to the bundle |
aoqi@0 | 209 | Node * ChooseNodeToBundle(); |
aoqi@0 | 210 | |
aoqi@0 | 211 | // See if this Node fits into the currently accumulating bundle |
aoqi@0 | 212 | bool NodeFitsInBundle(Node *n); |
aoqi@0 | 213 | |
aoqi@0 | 214 | // Decrement the use count for a node |
aoqi@0 | 215 | void DecrementUseCounts(Node *n, const Block *bb); |
aoqi@0 | 216 | |
aoqi@0 | 217 | // Garbage collect pinch nodes for reuse by other blocks. |
aoqi@0 | 218 | void garbage_collect_pinch_nodes(); |
aoqi@0 | 219 | // Clean up a pinch node for reuse (helper for above). |
aoqi@0 | 220 | void cleanup_pinch( Node *pinch ); |
aoqi@0 | 221 | |
aoqi@0 | 222 | // Information for statistics gathering |
aoqi@0 | 223 | #ifndef PRODUCT |
aoqi@0 | 224 | private: |
aoqi@0 | 225 | // Gather information on size of nops relative to total |
aoqi@0 | 226 | uint _branches, _unconditional_delays; |
aoqi@0 | 227 | |
aoqi@0 | 228 | static uint _total_nop_size, _total_method_size; |
aoqi@0 | 229 | static uint _total_branches, _total_unconditional_delays; |
aoqi@0 | 230 | static uint _total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1]; |
aoqi@0 | 231 | |
aoqi@0 | 232 | public: |
aoqi@0 | 233 | static void print_statistics(); |
aoqi@0 | 234 | |
aoqi@0 | 235 | static void increment_instructions_per_bundle(uint i) { |
aoqi@0 | 236 | _total_instructions_per_bundle[i]++; |
aoqi@0 | 237 | } |
aoqi@0 | 238 | |
aoqi@0 | 239 | static void increment_nop_size(uint s) { |
aoqi@0 | 240 | _total_nop_size += s; |
aoqi@0 | 241 | } |
aoqi@0 | 242 | |
aoqi@0 | 243 | static void increment_method_size(uint s) { |
aoqi@0 | 244 | _total_method_size += s; |
aoqi@0 | 245 | } |
aoqi@0 | 246 | #endif |
aoqi@0 | 247 | |
aoqi@0 | 248 | }; |
aoqi@0 | 249 | |
aoqi@0 | 250 | #endif // SHARE_VM_OPTO_OUTPUT_HPP |