duke@435: /* duke@435: * Copyright 2000-2005 Sun Microsystems, Inc. All Rights Reserved. duke@435: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. duke@435: * duke@435: * This code is free software; you can redistribute it and/or modify it duke@435: * under the terms of the GNU General Public License version 2 only, as duke@435: * published by the Free Software Foundation. duke@435: * duke@435: * This code is distributed in the hope that it will be useful, but WITHOUT duke@435: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or duke@435: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License duke@435: * version 2 for more details (a copy is included in the LICENSE file that duke@435: * accompanied this code). duke@435: * duke@435: * You should have received a copy of the GNU General Public License version duke@435: * 2 along with this work; if not, write to the Free Software Foundation, duke@435: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. duke@435: * duke@435: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, duke@435: * CA 95054 USA or visit www.sun.com if you need additional information or duke@435: * have any questions. duke@435: * duke@435: */ duke@435: duke@435: //---------------------------CallGenerator------------------------------------- duke@435: // The subclasses of this class handle generation of ideal nodes for duke@435: // call sites and method entry points. duke@435: duke@435: class CallGenerator : public ResourceObj { duke@435: public: duke@435: enum { duke@435: xxxunusedxxx duke@435: }; duke@435: duke@435: private: duke@435: ciMethod* _method; // The method being called. duke@435: duke@435: protected: duke@435: CallGenerator(ciMethod* method); duke@435: duke@435: public: duke@435: // Accessors duke@435: ciMethod* method() const { return _method; } duke@435: duke@435: // is_inline: At least some code implementing the method is copied here. duke@435: virtual bool is_inline() const { return false; } duke@435: // is_intrinsic: There's a method-specific way of generating the inline code. duke@435: virtual bool is_intrinsic() const { return false; } duke@435: // is_parse: Bytecodes implementing the specific method are copied here. duke@435: virtual bool is_parse() const { return false; } duke@435: // is_virtual: The call uses the receiver type to select or check the method. duke@435: virtual bool is_virtual() const { return false; } duke@435: // is_deferred: The decision whether to inline or not is deferred. duke@435: virtual bool is_deferred() const { return false; } duke@435: // is_predicted: Uses an explicit check against a predicted type. duke@435: virtual bool is_predicted() const { return false; } duke@435: // is_trap: Does not return to the caller. (E.g., uncommon trap.) duke@435: virtual bool is_trap() const { return false; } duke@435: never@1515: // is_late_inline: supports conversion of call into an inline never@1515: virtual bool is_late_inline() const { return false; } never@1515: // Replace the call with an inline version of the code never@1515: virtual void do_late_inline() { ShouldNotReachHere(); } never@1515: never@1515: virtual CallStaticJavaNode* call_node() const { ShouldNotReachHere(); return NULL; } never@1515: duke@435: // Note: It is possible for a CG to be both inline and virtual. duke@435: // (The hashCode intrinsic does a vtable check and an inlined fast path.) duke@435: duke@435: // Utilities: duke@435: const TypeFunc* tf() const; duke@435: duke@435: // The given jvms has state and arguments for a call to my method. duke@435: // Edges after jvms->argoff() carry all (pre-popped) argument values. duke@435: // duke@435: // Update the map with state and return values (if any) and return it. duke@435: // The return values (0, 1, or 2) must be pushed on the map's stack, duke@435: // and the sp of the jvms incremented accordingly. duke@435: // duke@435: // The jvms is returned on success. Alternatively, a copy of the duke@435: // given jvms, suitably updated, may be returned, in which case the duke@435: // caller should discard the original jvms. duke@435: // duke@435: // The non-Parm edges of the returned map will contain updated global state, duke@435: // and one or two edges before jvms->sp() will carry any return values. duke@435: // Other map edges may contain locals or monitors, and should not duke@435: // be changed in meaning. duke@435: // duke@435: // If the call traps, the returned map must have a control edge of top. duke@435: // If the call can throw, the returned map must report has_exceptions(). duke@435: // duke@435: // If the result is NULL, it means that this CallGenerator was unable duke@435: // to handle the given call, and another CallGenerator should be consulted. duke@435: virtual JVMState* generate(JVMState* jvms) = 0; duke@435: duke@435: // How to generate a call site that is inlined: duke@435: static CallGenerator* for_inline(ciMethod* m, float expected_uses = -1); duke@435: // How to generate code for an on-stack replacement handler. duke@435: static CallGenerator* for_osr(ciMethod* m, int osr_bci); duke@435: duke@435: // How to generate vanilla out-of-line call sites: never@1515: static CallGenerator* for_direct_call(ciMethod* m, bool separate_io_projs = false); // static, special duke@435: static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface duke@435: never@1515: // How to generate a replace a direct call with an inline version never@1515: static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); never@1515: duke@435: // How to make a call but defer the decision whether to inline or not. duke@435: static CallGenerator* for_warm_call(WarmCallInfo* ci, duke@435: CallGenerator* if_cold, duke@435: CallGenerator* if_hot); duke@435: duke@435: // How to make a call that optimistically assumes a receiver type: duke@435: static CallGenerator* for_predicted_call(ciKlass* predicted_receiver, duke@435: CallGenerator* if_missed, duke@435: CallGenerator* if_hit, duke@435: float hit_prob); duke@435: duke@435: // How to make a call that gives up and goes back to the interpreter: duke@435: static CallGenerator* for_uncommon_trap(ciMethod* m, duke@435: Deoptimization::DeoptReason reason, duke@435: Deoptimization::DeoptAction action); duke@435: duke@435: // Registry for intrinsics: duke@435: static CallGenerator* for_intrinsic(ciMethod* m); duke@435: static void register_intrinsic(ciMethod* m, CallGenerator* cg); duke@435: }; duke@435: duke@435: class InlineCallGenerator : public CallGenerator { duke@435: virtual bool is_inline() const { return true; } duke@435: duke@435: protected: duke@435: InlineCallGenerator(ciMethod* method) : CallGenerator(method) { } duke@435: }; duke@435: duke@435: duke@435: //---------------------------WarmCallInfo-------------------------------------- duke@435: // A struct to collect information about a given call site. duke@435: // Helps sort call sites into "hot", "medium", and "cold". duke@435: // Participates in the queueing of "medium" call sites for possible inlining. duke@435: class WarmCallInfo : public ResourceObj { duke@435: private: duke@435: duke@435: CallNode* _call; // The CallNode which may be inlined. duke@435: CallGenerator* _hot_cg;// CG for expanding the call node duke@435: duke@435: // These are the metrics we use to evaluate call sites: duke@435: duke@435: float _count; // How often do we expect to reach this site? duke@435: float _profit; // How much time do we expect to save by inlining? duke@435: float _work; // How long do we expect the average call to take? duke@435: float _size; // How big do we expect the inlined code to be? duke@435: duke@435: float _heat; // Combined score inducing total order on call sites. duke@435: WarmCallInfo* _next; // Next cooler call info in pending queue. duke@435: duke@435: // Count is the number of times this call site is expected to be executed. duke@435: // Large count is favorable for inlining, because the extra compilation duke@435: // work will be amortized more completely. duke@435: duke@435: // Profit is a rough measure of the amount of time we expect to save duke@435: // per execution of this site if we inline it. (1.0 == call overhead) duke@435: // Large profit favors inlining. Negative profit disables inlining. duke@435: duke@435: // Work is a rough measure of the amount of time a typical out-of-line duke@435: // call from this site is expected to take. (1.0 == call, no-op, return) duke@435: // Small work is somewhat favorable for inlining, since methods with duke@435: // short "hot" traces are more likely to inline smoothly. duke@435: duke@435: // Size is the number of graph nodes we expect this method to produce, duke@435: // not counting the inlining of any further warm calls it may include. duke@435: // Small size favors inlining, since small methods are more likely to duke@435: // inline smoothly. The size is estimated by examining the native code duke@435: // if available. The method bytecodes are also examined, assuming duke@435: // empirically observed node counts for each kind of bytecode. duke@435: duke@435: // Heat is the combined "goodness" of a site's inlining. If we were duke@435: // omniscient, it would be the difference of two sums of future execution duke@435: // times of code emitted for this site (amortized across multiple sites if duke@435: // sharing applies). The two sums are for versions of this call site with duke@435: // and without inlining. duke@435: duke@435: // We approximate this mythical quantity by playing with averages, duke@435: // rough estimates, and assumptions that history repeats itself. duke@435: // The basic formula count * profit is heuristically adjusted duke@435: // by looking at the expected compilation and execution times of duke@435: // of the inlined call. duke@435: duke@435: // Note: Some of these metrics may not be present in the final product, duke@435: // but exist in development builds to experiment with inline policy tuning. duke@435: duke@435: // This heuristic framework does not model well the very significant duke@435: // effects of multiple-level inlining. It is possible to see no immediate duke@435: // profit from inlining X->Y, but to get great profit from a subsequent duke@435: // inlining X->Y->Z. duke@435: duke@435: // This framework does not take well into account the problem of N**2 code duke@435: // size in a clique of mutually inlinable methods. duke@435: duke@435: WarmCallInfo* next() const { return _next; } duke@435: void set_next(WarmCallInfo* n) { _next = n; } duke@435: duke@435: static WarmCallInfo* _always_hot; duke@435: static WarmCallInfo* _always_cold; duke@435: duke@435: public: duke@435: // Because WarmInfo objects live over the entire lifetime of the duke@435: // Compile object, they are allocated into the comp_arena, which duke@435: // does not get resource marked or reset during the compile process duke@435: void *operator new( size_t x, Compile* C ) { return C->comp_arena()->Amalloc(x); } duke@435: void operator delete( void * ) { } // fast deallocation duke@435: duke@435: static WarmCallInfo* always_hot(); duke@435: static WarmCallInfo* always_cold(); duke@435: duke@435: WarmCallInfo() { duke@435: _call = NULL; duke@435: _hot_cg = NULL; duke@435: _next = NULL; duke@435: _count = _profit = _work = _size = _heat = 0; duke@435: } duke@435: duke@435: CallNode* call() const { return _call; } duke@435: float count() const { return _count; } duke@435: float size() const { return _size; } duke@435: float work() const { return _work; } duke@435: float profit() const { return _profit; } duke@435: float heat() const { return _heat; } duke@435: duke@435: void set_count(float x) { _count = x; } duke@435: void set_size(float x) { _size = x; } duke@435: void set_work(float x) { _work = x; } duke@435: void set_profit(float x) { _profit = x; } duke@435: void set_heat(float x) { _heat = x; } duke@435: duke@435: // Load initial heuristics from profiles, etc. duke@435: // The heuristics can be tweaked further by the caller. duke@435: void init(JVMState* call_site, ciMethod* call_method, ciCallProfile& profile, float prof_factor); duke@435: duke@435: static float MAX_VALUE() { return +1.0e10; } duke@435: static float MIN_VALUE() { return -1.0e10; } duke@435: duke@435: float compute_heat() const; duke@435: duke@435: void set_call(CallNode* call) { _call = call; } duke@435: void set_hot_cg(CallGenerator* cg) { _hot_cg = cg; } duke@435: duke@435: // Do not queue very hot or very cold calls. duke@435: // Make very cold ones out of line immediately. duke@435: // Inline very hot ones immediately. duke@435: // These queries apply various tunable limits duke@435: // to the above metrics in a systematic way. duke@435: // Test for coldness before testing for hotness. duke@435: bool is_cold() const; duke@435: bool is_hot() const; duke@435: duke@435: // Force a warm call to be hot. This worklists the call node for inlining. duke@435: void make_hot(); duke@435: duke@435: // Force a warm call to be cold. This worklists the call node for out-of-lining. duke@435: void make_cold(); duke@435: duke@435: // A reproducible total ordering, in which heat is the major key. duke@435: bool warmer_than(WarmCallInfo* that); duke@435: duke@435: // List management. These methods are called with the list head, duke@435: // and return the new list head, inserting or removing the receiver. duke@435: WarmCallInfo* insert_into(WarmCallInfo* head); duke@435: WarmCallInfo* remove_from(WarmCallInfo* head); duke@435: duke@435: #ifndef PRODUCT duke@435: void print() const; duke@435: void print_all() const; duke@435: int count_all() const; duke@435: #endif duke@435: };