Fri, 25 May 2012 07:53:11 -0700
7170463: C2 should recognize "obj.getClass() == A.class" code pattern
Summary: optimize this code pattern obj.getClass() == A.class.
Reviewed-by: jrose, kvn
Contributed-by: Krystal Mok <sajia@taobao.com>
duke@435 | 1 | /* |
twisti@3050 | 2 | * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #ifndef SHARE_VM_OPTO_CALLGENERATOR_HPP |
stefank@2314 | 26 | #define SHARE_VM_OPTO_CALLGENERATOR_HPP |
stefank@2314 | 27 | |
stefank@2314 | 28 | #include "opto/callnode.hpp" |
stefank@2314 | 29 | #include "opto/compile.hpp" |
stefank@2314 | 30 | #include "opto/type.hpp" |
stefank@2314 | 31 | #include "runtime/deoptimization.hpp" |
stefank@2314 | 32 | |
duke@435 | 33 | //---------------------------CallGenerator------------------------------------- |
duke@435 | 34 | // The subclasses of this class handle generation of ideal nodes for |
duke@435 | 35 | // call sites and method entry points. |
duke@435 | 36 | |
duke@435 | 37 | class CallGenerator : public ResourceObj { |
duke@435 | 38 | public: |
duke@435 | 39 | enum { |
duke@435 | 40 | xxxunusedxxx |
duke@435 | 41 | }; |
duke@435 | 42 | |
duke@435 | 43 | private: |
duke@435 | 44 | ciMethod* _method; // The method being called. |
duke@435 | 45 | |
duke@435 | 46 | protected: |
duke@435 | 47 | CallGenerator(ciMethod* method); |
duke@435 | 48 | |
duke@435 | 49 | public: |
duke@435 | 50 | // Accessors |
duke@435 | 51 | ciMethod* method() const { return _method; } |
duke@435 | 52 | |
duke@435 | 53 | // is_inline: At least some code implementing the method is copied here. |
duke@435 | 54 | virtual bool is_inline() const { return false; } |
duke@435 | 55 | // is_intrinsic: There's a method-specific way of generating the inline code. |
duke@435 | 56 | virtual bool is_intrinsic() const { return false; } |
duke@435 | 57 | // is_parse: Bytecodes implementing the specific method are copied here. |
duke@435 | 58 | virtual bool is_parse() const { return false; } |
duke@435 | 59 | // is_virtual: The call uses the receiver type to select or check the method. |
duke@435 | 60 | virtual bool is_virtual() const { return false; } |
duke@435 | 61 | // is_deferred: The decision whether to inline or not is deferred. |
duke@435 | 62 | virtual bool is_deferred() const { return false; } |
duke@435 | 63 | // is_predicted: Uses an explicit check against a predicted type. |
duke@435 | 64 | virtual bool is_predicted() const { return false; } |
duke@435 | 65 | // is_trap: Does not return to the caller. (E.g., uncommon trap.) |
duke@435 | 66 | virtual bool is_trap() const { return false; } |
duke@435 | 67 | |
never@1515 | 68 | // is_late_inline: supports conversion of call into an inline |
never@1515 | 69 | virtual bool is_late_inline() const { return false; } |
never@1515 | 70 | // Replace the call with an inline version of the code |
never@1515 | 71 | virtual void do_late_inline() { ShouldNotReachHere(); } |
never@1515 | 72 | |
never@1515 | 73 | virtual CallStaticJavaNode* call_node() const { ShouldNotReachHere(); return NULL; } |
never@1515 | 74 | |
duke@435 | 75 | // Note: It is possible for a CG to be both inline and virtual. |
duke@435 | 76 | // (The hashCode intrinsic does a vtable check and an inlined fast path.) |
duke@435 | 77 | |
duke@435 | 78 | // Utilities: |
duke@435 | 79 | const TypeFunc* tf() const; |
duke@435 | 80 | |
duke@435 | 81 | // The given jvms has state and arguments for a call to my method. |
duke@435 | 82 | // Edges after jvms->argoff() carry all (pre-popped) argument values. |
duke@435 | 83 | // |
duke@435 | 84 | // Update the map with state and return values (if any) and return it. |
duke@435 | 85 | // The return values (0, 1, or 2) must be pushed on the map's stack, |
duke@435 | 86 | // and the sp of the jvms incremented accordingly. |
duke@435 | 87 | // |
duke@435 | 88 | // The jvms is returned on success. Alternatively, a copy of the |
duke@435 | 89 | // given jvms, suitably updated, may be returned, in which case the |
duke@435 | 90 | // caller should discard the original jvms. |
duke@435 | 91 | // |
duke@435 | 92 | // The non-Parm edges of the returned map will contain updated global state, |
duke@435 | 93 | // and one or two edges before jvms->sp() will carry any return values. |
duke@435 | 94 | // Other map edges may contain locals or monitors, and should not |
duke@435 | 95 | // be changed in meaning. |
duke@435 | 96 | // |
duke@435 | 97 | // If the call traps, the returned map must have a control edge of top. |
duke@435 | 98 | // If the call can throw, the returned map must report has_exceptions(). |
duke@435 | 99 | // |
duke@435 | 100 | // If the result is NULL, it means that this CallGenerator was unable |
duke@435 | 101 | // to handle the given call, and another CallGenerator should be consulted. |
duke@435 | 102 | virtual JVMState* generate(JVMState* jvms) = 0; |
duke@435 | 103 | |
duke@435 | 104 | // How to generate a call site that is inlined: |
duke@435 | 105 | static CallGenerator* for_inline(ciMethod* m, float expected_uses = -1); |
duke@435 | 106 | // How to generate code for an on-stack replacement handler. |
duke@435 | 107 | static CallGenerator* for_osr(ciMethod* m, int osr_bci); |
duke@435 | 108 | |
duke@435 | 109 | // How to generate vanilla out-of-line call sites: |
never@1515 | 110 | static CallGenerator* for_direct_call(ciMethod* m, bool separate_io_projs = false); // static, special |
twisti@3313 | 111 | static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface |
twisti@1572 | 112 | static CallGenerator* for_dynamic_call(ciMethod* m); // invokedynamic |
twisti@3313 | 113 | |
twisti@3313 | 114 | static CallGenerator* for_method_handle_call(Node* method_handle, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile); |
twisti@3313 | 115 | static CallGenerator* for_invokedynamic_call( JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile); |
duke@435 | 116 | |
twisti@3050 | 117 | static CallGenerator* for_method_handle_inline(Node* method_handle, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile); |
twisti@3050 | 118 | static CallGenerator* for_invokedynamic_inline(ciCallSite* call_site, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile); |
never@2949 | 119 | |
never@1515 | 120 | // How to generate a replace a direct call with an inline version |
never@1515 | 121 | static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); |
never@1515 | 122 | |
duke@435 | 123 | // How to make a call but defer the decision whether to inline or not. |
duke@435 | 124 | static CallGenerator* for_warm_call(WarmCallInfo* ci, |
duke@435 | 125 | CallGenerator* if_cold, |
duke@435 | 126 | CallGenerator* if_hot); |
duke@435 | 127 | |
duke@435 | 128 | // How to make a call that optimistically assumes a receiver type: |
duke@435 | 129 | static CallGenerator* for_predicted_call(ciKlass* predicted_receiver, |
duke@435 | 130 | CallGenerator* if_missed, |
duke@435 | 131 | CallGenerator* if_hit, |
duke@435 | 132 | float hit_prob); |
duke@435 | 133 | |
twisti@1573 | 134 | // How to make a call that optimistically assumes a MethodHandle target: |
twisti@1573 | 135 | static CallGenerator* for_predicted_dynamic_call(ciMethodHandle* predicted_method_handle, |
twisti@1573 | 136 | CallGenerator* if_missed, |
twisti@1573 | 137 | CallGenerator* if_hit, |
twisti@1573 | 138 | float hit_prob); |
twisti@1573 | 139 | |
duke@435 | 140 | // How to make a call that gives up and goes back to the interpreter: |
duke@435 | 141 | static CallGenerator* for_uncommon_trap(ciMethod* m, |
duke@435 | 142 | Deoptimization::DeoptReason reason, |
duke@435 | 143 | Deoptimization::DeoptAction action); |
duke@435 | 144 | |
duke@435 | 145 | // Registry for intrinsics: |
duke@435 | 146 | static CallGenerator* for_intrinsic(ciMethod* m); |
duke@435 | 147 | static void register_intrinsic(ciMethod* m, CallGenerator* cg); |
duke@435 | 148 | }; |
duke@435 | 149 | |
duke@435 | 150 | class InlineCallGenerator : public CallGenerator { |
duke@435 | 151 | virtual bool is_inline() const { return true; } |
duke@435 | 152 | |
duke@435 | 153 | protected: |
duke@435 | 154 | InlineCallGenerator(ciMethod* method) : CallGenerator(method) { } |
duke@435 | 155 | }; |
duke@435 | 156 | |
duke@435 | 157 | |
duke@435 | 158 | //---------------------------WarmCallInfo-------------------------------------- |
duke@435 | 159 | // A struct to collect information about a given call site. |
duke@435 | 160 | // Helps sort call sites into "hot", "medium", and "cold". |
duke@435 | 161 | // Participates in the queueing of "medium" call sites for possible inlining. |
duke@435 | 162 | class WarmCallInfo : public ResourceObj { |
duke@435 | 163 | private: |
duke@435 | 164 | |
duke@435 | 165 | CallNode* _call; // The CallNode which may be inlined. |
duke@435 | 166 | CallGenerator* _hot_cg;// CG for expanding the call node |
duke@435 | 167 | |
duke@435 | 168 | // These are the metrics we use to evaluate call sites: |
duke@435 | 169 | |
duke@435 | 170 | float _count; // How often do we expect to reach this site? |
duke@435 | 171 | float _profit; // How much time do we expect to save by inlining? |
duke@435 | 172 | float _work; // How long do we expect the average call to take? |
duke@435 | 173 | float _size; // How big do we expect the inlined code to be? |
duke@435 | 174 | |
duke@435 | 175 | float _heat; // Combined score inducing total order on call sites. |
duke@435 | 176 | WarmCallInfo* _next; // Next cooler call info in pending queue. |
duke@435 | 177 | |
duke@435 | 178 | // Count is the number of times this call site is expected to be executed. |
duke@435 | 179 | // Large count is favorable for inlining, because the extra compilation |
duke@435 | 180 | // work will be amortized more completely. |
duke@435 | 181 | |
duke@435 | 182 | // Profit is a rough measure of the amount of time we expect to save |
duke@435 | 183 | // per execution of this site if we inline it. (1.0 == call overhead) |
duke@435 | 184 | // Large profit favors inlining. Negative profit disables inlining. |
duke@435 | 185 | |
duke@435 | 186 | // Work is a rough measure of the amount of time a typical out-of-line |
duke@435 | 187 | // call from this site is expected to take. (1.0 == call, no-op, return) |
duke@435 | 188 | // Small work is somewhat favorable for inlining, since methods with |
duke@435 | 189 | // short "hot" traces are more likely to inline smoothly. |
duke@435 | 190 | |
duke@435 | 191 | // Size is the number of graph nodes we expect this method to produce, |
duke@435 | 192 | // not counting the inlining of any further warm calls it may include. |
duke@435 | 193 | // Small size favors inlining, since small methods are more likely to |
duke@435 | 194 | // inline smoothly. The size is estimated by examining the native code |
duke@435 | 195 | // if available. The method bytecodes are also examined, assuming |
duke@435 | 196 | // empirically observed node counts for each kind of bytecode. |
duke@435 | 197 | |
duke@435 | 198 | // Heat is the combined "goodness" of a site's inlining. If we were |
duke@435 | 199 | // omniscient, it would be the difference of two sums of future execution |
duke@435 | 200 | // times of code emitted for this site (amortized across multiple sites if |
duke@435 | 201 | // sharing applies). The two sums are for versions of this call site with |
duke@435 | 202 | // and without inlining. |
duke@435 | 203 | |
duke@435 | 204 | // We approximate this mythical quantity by playing with averages, |
duke@435 | 205 | // rough estimates, and assumptions that history repeats itself. |
duke@435 | 206 | // The basic formula count * profit is heuristically adjusted |
duke@435 | 207 | // by looking at the expected compilation and execution times of |
duke@435 | 208 | // of the inlined call. |
duke@435 | 209 | |
duke@435 | 210 | // Note: Some of these metrics may not be present in the final product, |
duke@435 | 211 | // but exist in development builds to experiment with inline policy tuning. |
duke@435 | 212 | |
duke@435 | 213 | // This heuristic framework does not model well the very significant |
duke@435 | 214 | // effects of multiple-level inlining. It is possible to see no immediate |
duke@435 | 215 | // profit from inlining X->Y, but to get great profit from a subsequent |
duke@435 | 216 | // inlining X->Y->Z. |
duke@435 | 217 | |
duke@435 | 218 | // This framework does not take well into account the problem of N**2 code |
duke@435 | 219 | // size in a clique of mutually inlinable methods. |
duke@435 | 220 | |
duke@435 | 221 | WarmCallInfo* next() const { return _next; } |
duke@435 | 222 | void set_next(WarmCallInfo* n) { _next = n; } |
duke@435 | 223 | |
never@2725 | 224 | static WarmCallInfo _always_hot; |
never@2725 | 225 | static WarmCallInfo _always_cold; |
never@2725 | 226 | |
never@2725 | 227 | // Constructor intitialization of always_hot and always_cold |
never@2725 | 228 | WarmCallInfo(float c, float p, float w, float s) { |
never@2725 | 229 | _call = NULL; |
never@2725 | 230 | _hot_cg = NULL; |
never@2725 | 231 | _next = NULL; |
never@2725 | 232 | _count = c; |
never@2725 | 233 | _profit = p; |
never@2725 | 234 | _work = w; |
never@2725 | 235 | _size = s; |
never@2725 | 236 | _heat = 0; |
never@2725 | 237 | } |
duke@435 | 238 | |
duke@435 | 239 | public: |
duke@435 | 240 | // Because WarmInfo objects live over the entire lifetime of the |
duke@435 | 241 | // Compile object, they are allocated into the comp_arena, which |
duke@435 | 242 | // does not get resource marked or reset during the compile process |
duke@435 | 243 | void *operator new( size_t x, Compile* C ) { return C->comp_arena()->Amalloc(x); } |
duke@435 | 244 | void operator delete( void * ) { } // fast deallocation |
duke@435 | 245 | |
duke@435 | 246 | static WarmCallInfo* always_hot(); |
duke@435 | 247 | static WarmCallInfo* always_cold(); |
duke@435 | 248 | |
duke@435 | 249 | WarmCallInfo() { |
duke@435 | 250 | _call = NULL; |
duke@435 | 251 | _hot_cg = NULL; |
duke@435 | 252 | _next = NULL; |
duke@435 | 253 | _count = _profit = _work = _size = _heat = 0; |
duke@435 | 254 | } |
duke@435 | 255 | |
duke@435 | 256 | CallNode* call() const { return _call; } |
duke@435 | 257 | float count() const { return _count; } |
duke@435 | 258 | float size() const { return _size; } |
duke@435 | 259 | float work() const { return _work; } |
duke@435 | 260 | float profit() const { return _profit; } |
duke@435 | 261 | float heat() const { return _heat; } |
duke@435 | 262 | |
duke@435 | 263 | void set_count(float x) { _count = x; } |
duke@435 | 264 | void set_size(float x) { _size = x; } |
duke@435 | 265 | void set_work(float x) { _work = x; } |
duke@435 | 266 | void set_profit(float x) { _profit = x; } |
duke@435 | 267 | void set_heat(float x) { _heat = x; } |
duke@435 | 268 | |
duke@435 | 269 | // Load initial heuristics from profiles, etc. |
duke@435 | 270 | // The heuristics can be tweaked further by the caller. |
duke@435 | 271 | void init(JVMState* call_site, ciMethod* call_method, ciCallProfile& profile, float prof_factor); |
duke@435 | 272 | |
duke@435 | 273 | static float MAX_VALUE() { return +1.0e10; } |
duke@435 | 274 | static float MIN_VALUE() { return -1.0e10; } |
duke@435 | 275 | |
duke@435 | 276 | float compute_heat() const; |
duke@435 | 277 | |
duke@435 | 278 | void set_call(CallNode* call) { _call = call; } |
duke@435 | 279 | void set_hot_cg(CallGenerator* cg) { _hot_cg = cg; } |
duke@435 | 280 | |
duke@435 | 281 | // Do not queue very hot or very cold calls. |
duke@435 | 282 | // Make very cold ones out of line immediately. |
duke@435 | 283 | // Inline very hot ones immediately. |
duke@435 | 284 | // These queries apply various tunable limits |
duke@435 | 285 | // to the above metrics in a systematic way. |
duke@435 | 286 | // Test for coldness before testing for hotness. |
duke@435 | 287 | bool is_cold() const; |
duke@435 | 288 | bool is_hot() const; |
duke@435 | 289 | |
duke@435 | 290 | // Force a warm call to be hot. This worklists the call node for inlining. |
duke@435 | 291 | void make_hot(); |
duke@435 | 292 | |
duke@435 | 293 | // Force a warm call to be cold. This worklists the call node for out-of-lining. |
duke@435 | 294 | void make_cold(); |
duke@435 | 295 | |
duke@435 | 296 | // A reproducible total ordering, in which heat is the major key. |
duke@435 | 297 | bool warmer_than(WarmCallInfo* that); |
duke@435 | 298 | |
duke@435 | 299 | // List management. These methods are called with the list head, |
duke@435 | 300 | // and return the new list head, inserting or removing the receiver. |
duke@435 | 301 | WarmCallInfo* insert_into(WarmCallInfo* head); |
duke@435 | 302 | WarmCallInfo* remove_from(WarmCallInfo* head); |
duke@435 | 303 | |
duke@435 | 304 | #ifndef PRODUCT |
duke@435 | 305 | void print() const; |
duke@435 | 306 | void print_all() const; |
duke@435 | 307 | int count_all() const; |
duke@435 | 308 | #endif |
duke@435 | 309 | }; |
stefank@2314 | 310 | |
stefank@2314 | 311 | #endif // SHARE_VM_OPTO_CALLGENERATOR_HPP |