Wed, 28 Aug 2013 11:22:43 +0200
8023597: Optimize G1 barriers code for unsafe load_store
Summary: Avoid loading old values in G1 pre-barriers for inlined unsafe load_store nodes.
Reviewed-by: kvn, tonyp
Contributed-by: Martin Doerr <martin.doerr@sap.com>
duke@435 | 1 | /* |
trims@2708 | 2 | * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #ifndef SHARE_VM_OPTO_OUTPUT_HPP |
stefank@2314 | 26 | #define SHARE_VM_OPTO_OUTPUT_HPP |
stefank@2314 | 27 | |
stefank@2314 | 28 | #include "opto/block.hpp" |
stefank@2314 | 29 | #include "opto/node.hpp" |
stefank@2314 | 30 | #ifdef TARGET_ARCH_MODEL_x86_32 |
stefank@2314 | 31 | # include "adfiles/ad_x86_32.hpp" |
stefank@2314 | 32 | #endif |
stefank@2314 | 33 | #ifdef TARGET_ARCH_MODEL_x86_64 |
stefank@2314 | 34 | # include "adfiles/ad_x86_64.hpp" |
stefank@2314 | 35 | #endif |
stefank@2314 | 36 | #ifdef TARGET_ARCH_MODEL_sparc |
stefank@2314 | 37 | # include "adfiles/ad_sparc.hpp" |
stefank@2314 | 38 | #endif |
stefank@2314 | 39 | #ifdef TARGET_ARCH_MODEL_zero |
stefank@2314 | 40 | # include "adfiles/ad_zero.hpp" |
stefank@2314 | 41 | #endif |
bobv@2508 | 42 | #ifdef TARGET_ARCH_MODEL_arm |
bobv@2508 | 43 | # include "adfiles/ad_arm.hpp" |
bobv@2508 | 44 | #endif |
bobv@2508 | 45 | #ifdef TARGET_ARCH_MODEL_ppc |
bobv@2508 | 46 | # include "adfiles/ad_ppc.hpp" |
bobv@2508 | 47 | #endif |
stefank@2314 | 48 | |
duke@435 | 49 | class Arena; |
duke@435 | 50 | class Bundle; |
duke@435 | 51 | class Block; |
duke@435 | 52 | class Block_Array; |
duke@435 | 53 | class Node; |
duke@435 | 54 | class Node_Array; |
duke@435 | 55 | class Node_List; |
duke@435 | 56 | class PhaseCFG; |
duke@435 | 57 | class PhaseChaitin; |
duke@435 | 58 | class Pipeline_Use_Element; |
duke@435 | 59 | class Pipeline_Use; |
duke@435 | 60 | |
duke@435 | 61 | #ifndef PRODUCT |
duke@435 | 62 | #define DEBUG_ARG(x) , x |
duke@435 | 63 | #else |
duke@435 | 64 | #define DEBUG_ARG(x) |
duke@435 | 65 | #endif |
duke@435 | 66 | |
duke@435 | 67 | // Define the initial sizes for allocation of the resizable code buffer |
duke@435 | 68 | enum { |
duke@435 | 69 | initial_code_capacity = 16 * 1024, |
duke@435 | 70 | initial_stub_capacity = 4 * 1024, |
duke@435 | 71 | initial_const_capacity = 4 * 1024, |
duke@435 | 72 | initial_locs_capacity = 3 * 1024 |
duke@435 | 73 | }; |
duke@435 | 74 | |
duke@435 | 75 | //------------------------------Scheduling---------------------------------- |
duke@435 | 76 | // This class contains all the information necessary to implement instruction |
duke@435 | 77 | // scheduling and bundling. |
duke@435 | 78 | class Scheduling { |
duke@435 | 79 | |
duke@435 | 80 | private: |
duke@435 | 81 | // Arena to use |
duke@435 | 82 | Arena *_arena; |
duke@435 | 83 | |
duke@435 | 84 | // Control-Flow Graph info |
duke@435 | 85 | PhaseCFG *_cfg; |
duke@435 | 86 | |
duke@435 | 87 | // Register Allocation info |
duke@435 | 88 | PhaseRegAlloc *_regalloc; |
duke@435 | 89 | |
duke@435 | 90 | // Number of nodes in the method |
duke@435 | 91 | uint _node_bundling_limit; |
duke@435 | 92 | |
duke@435 | 93 | // List of scheduled nodes. Generated in reverse order |
duke@435 | 94 | Node_List _scheduled; |
duke@435 | 95 | |
duke@435 | 96 | // List of nodes currently available for choosing for scheduling |
duke@435 | 97 | Node_List _available; |
duke@435 | 98 | |
duke@435 | 99 | // For each instruction beginning a bundle, the number of following |
duke@435 | 100 | // nodes to be bundled with it. |
duke@435 | 101 | Bundle *_node_bundling_base; |
duke@435 | 102 | |
duke@435 | 103 | // Mapping from register to Node |
duke@435 | 104 | Node_List _reg_node; |
duke@435 | 105 | |
duke@435 | 106 | // Free list for pinch nodes. |
duke@435 | 107 | Node_List _pinch_free_list; |
duke@435 | 108 | |
duke@435 | 109 | // Latency from the beginning of the containing basic block (base 1) |
duke@435 | 110 | // for each node. |
duke@435 | 111 | unsigned short *_node_latency; |
duke@435 | 112 | |
duke@435 | 113 | // Number of uses of this node within the containing basic block. |
duke@435 | 114 | short *_uses; |
duke@435 | 115 | |
duke@435 | 116 | // Schedulable portion of current block. Skips Region/Phi/CreateEx up |
duke@435 | 117 | // front, branch+proj at end. Also skips Catch/CProj (same as |
duke@435 | 118 | // branch-at-end), plus just-prior exception-throwing call. |
duke@435 | 119 | uint _bb_start, _bb_end; |
duke@435 | 120 | |
duke@435 | 121 | // Latency from the end of the basic block as scheduled |
duke@435 | 122 | unsigned short *_current_latency; |
duke@435 | 123 | |
duke@435 | 124 | // Remember the next node |
duke@435 | 125 | Node *_next_node; |
duke@435 | 126 | |
duke@435 | 127 | // Use this for an unconditional branch delay slot |
duke@435 | 128 | Node *_unconditional_delay_slot; |
duke@435 | 129 | |
duke@435 | 130 | // Pointer to a Nop |
duke@435 | 131 | MachNopNode *_nop; |
duke@435 | 132 | |
duke@435 | 133 | // Length of the current bundle, in instructions |
duke@435 | 134 | uint _bundle_instr_count; |
duke@435 | 135 | |
duke@435 | 136 | // Current Cycle number, for computing latencies and bundling |
duke@435 | 137 | uint _bundle_cycle_number; |
duke@435 | 138 | |
duke@435 | 139 | // Bundle information |
duke@435 | 140 | Pipeline_Use_Element _bundle_use_elements[resource_count]; |
duke@435 | 141 | Pipeline_Use _bundle_use; |
duke@435 | 142 | |
duke@435 | 143 | // Dump the available list |
duke@435 | 144 | void dump_available() const; |
duke@435 | 145 | |
duke@435 | 146 | public: |
duke@435 | 147 | Scheduling(Arena *arena, Compile &compile); |
duke@435 | 148 | |
duke@435 | 149 | // Destructor |
duke@435 | 150 | NOT_PRODUCT( ~Scheduling(); ) |
duke@435 | 151 | |
duke@435 | 152 | // Step ahead "i" cycles |
duke@435 | 153 | void step(uint i); |
duke@435 | 154 | |
duke@435 | 155 | // Step ahead 1 cycle, and clear the bundle state (for example, |
duke@435 | 156 | // at a branch target) |
duke@435 | 157 | void step_and_clear(); |
duke@435 | 158 | |
duke@435 | 159 | Bundle* node_bundling(const Node *n) { |
duke@435 | 160 | assert(valid_bundle_info(n), "oob"); |
duke@435 | 161 | return (&_node_bundling_base[n->_idx]); |
duke@435 | 162 | } |
duke@435 | 163 | |
duke@435 | 164 | bool valid_bundle_info(const Node *n) const { |
duke@435 | 165 | return (_node_bundling_limit > n->_idx); |
duke@435 | 166 | } |
duke@435 | 167 | |
duke@435 | 168 | bool starts_bundle(const Node *n) const { |
duke@435 | 169 | return (_node_bundling_limit > n->_idx && _node_bundling_base[n->_idx].starts_bundle()); |
duke@435 | 170 | } |
duke@435 | 171 | |
duke@435 | 172 | // Do the scheduling |
duke@435 | 173 | void DoScheduling(); |
duke@435 | 174 | |
duke@435 | 175 | // Compute the local latencies walking forward over the list of |
duke@435 | 176 | // nodes for a basic block |
duke@435 | 177 | void ComputeLocalLatenciesForward(const Block *bb); |
duke@435 | 178 | |
duke@435 | 179 | // Compute the register antidependencies within a basic block |
duke@435 | 180 | void ComputeRegisterAntidependencies(Block *bb); |
duke@435 | 181 | void verify_do_def( Node *n, OptoReg::Name def, const char *msg ); |
duke@435 | 182 | void verify_good_schedule( Block *b, const char *msg ); |
duke@435 | 183 | void anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ); |
duke@435 | 184 | void anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ); |
duke@435 | 185 | |
duke@435 | 186 | // Add a node to the current bundle |
duke@435 | 187 | void AddNodeToBundle(Node *n, const Block *bb); |
duke@435 | 188 | |
duke@435 | 189 | // Add a node to the list of available nodes |
duke@435 | 190 | void AddNodeToAvailableList(Node *n); |
duke@435 | 191 | |
duke@435 | 192 | // Compute the local use count for the nodes in a block, and compute |
duke@435 | 193 | // the list of instructions with no uses in the block as available |
duke@435 | 194 | void ComputeUseCount(const Block *bb); |
duke@435 | 195 | |
duke@435 | 196 | // Choose an instruction from the available list to add to the bundle |
duke@435 | 197 | Node * ChooseNodeToBundle(); |
duke@435 | 198 | |
duke@435 | 199 | // See if this Node fits into the currently accumulating bundle |
duke@435 | 200 | bool NodeFitsInBundle(Node *n); |
duke@435 | 201 | |
duke@435 | 202 | // Decrement the use count for a node |
duke@435 | 203 | void DecrementUseCounts(Node *n, const Block *bb); |
duke@435 | 204 | |
duke@435 | 205 | // Garbage collect pinch nodes for reuse by other blocks. |
duke@435 | 206 | void garbage_collect_pinch_nodes(); |
duke@435 | 207 | // Clean up a pinch node for reuse (helper for above). |
duke@435 | 208 | void cleanup_pinch( Node *pinch ); |
duke@435 | 209 | |
duke@435 | 210 | // Information for statistics gathering |
duke@435 | 211 | #ifndef PRODUCT |
duke@435 | 212 | private: |
duke@435 | 213 | // Gather information on size of nops relative to total |
duke@435 | 214 | uint _branches, _unconditional_delays; |
duke@435 | 215 | |
duke@435 | 216 | static uint _total_nop_size, _total_method_size; |
duke@435 | 217 | static uint _total_branches, _total_unconditional_delays; |
duke@435 | 218 | static uint _total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1]; |
duke@435 | 219 | |
duke@435 | 220 | public: |
duke@435 | 221 | static void print_statistics(); |
duke@435 | 222 | |
duke@435 | 223 | static void increment_instructions_per_bundle(uint i) { |
duke@435 | 224 | _total_instructions_per_bundle[i]++; |
duke@435 | 225 | } |
duke@435 | 226 | |
duke@435 | 227 | static void increment_nop_size(uint s) { |
duke@435 | 228 | _total_nop_size += s; |
duke@435 | 229 | } |
duke@435 | 230 | |
duke@435 | 231 | static void increment_method_size(uint s) { |
duke@435 | 232 | _total_method_size += s; |
duke@435 | 233 | } |
duke@435 | 234 | #endif |
duke@435 | 235 | |
duke@435 | 236 | }; |
stefank@2314 | 237 | |
stefank@2314 | 238 | #endif // SHARE_VM_OPTO_OUTPUT_HPP |