src/share/vm/opto/compile.cpp

Thu, 02 Dec 2010 17:21:12 -0800

author
iveresov
date
Thu, 02 Dec 2010 17:21:12 -0800
changeset 2349
5ddfcf4b079e
parent 2314
f95d63e2154a
child 2350
2f644f85485d
permissions
-rw-r--r--

7003554: (tiered) assert(is_null_object() || handle() != NULL) failed: cannot embed null pointer
Summary: C1 with profiling doesn't check whether the MDO has been really allocated, which can silently fail if the perm gen is full. The solution is to check if the allocation failed and bailout out of inlining or compilation.
Reviewed-by: kvn, never

     1 /*
     2  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "asm/assembler.hpp"
    27 #include "classfile/systemDictionary.hpp"
    28 #include "code/exceptionHandlerTable.hpp"
    29 #include "code/nmethod.hpp"
    30 #include "compiler/compileLog.hpp"
    31 #include "compiler/oopMap.hpp"
    32 #include "opto/addnode.hpp"
    33 #include "opto/block.hpp"
    34 #include "opto/c2compiler.hpp"
    35 #include "opto/callGenerator.hpp"
    36 #include "opto/callnode.hpp"
    37 #include "opto/cfgnode.hpp"
    38 #include "opto/chaitin.hpp"
    39 #include "opto/compile.hpp"
    40 #include "opto/connode.hpp"
    41 #include "opto/divnode.hpp"
    42 #include "opto/escape.hpp"
    43 #include "opto/idealGraphPrinter.hpp"
    44 #include "opto/loopnode.hpp"
    45 #include "opto/machnode.hpp"
    46 #include "opto/macro.hpp"
    47 #include "opto/matcher.hpp"
    48 #include "opto/memnode.hpp"
    49 #include "opto/mulnode.hpp"
    50 #include "opto/node.hpp"
    51 #include "opto/opcodes.hpp"
    52 #include "opto/output.hpp"
    53 #include "opto/parse.hpp"
    54 #include "opto/phaseX.hpp"
    55 #include "opto/rootnode.hpp"
    56 #include "opto/runtime.hpp"
    57 #include "opto/stringopts.hpp"
    58 #include "opto/type.hpp"
    59 #include "opto/vectornode.hpp"
    60 #include "runtime/arguments.hpp"
    61 #include "runtime/signature.hpp"
    62 #include "runtime/stubRoutines.hpp"
    63 #include "runtime/timer.hpp"
    64 #include "utilities/copy.hpp"
    65 #ifdef TARGET_ARCH_MODEL_x86_32
    66 # include "adfiles/ad_x86_32.hpp"
    67 #endif
    68 #ifdef TARGET_ARCH_MODEL_x86_64
    69 # include "adfiles/ad_x86_64.hpp"
    70 #endif
    71 #ifdef TARGET_ARCH_MODEL_sparc
    72 # include "adfiles/ad_sparc.hpp"
    73 #endif
    74 #ifdef TARGET_ARCH_MODEL_zero
    75 # include "adfiles/ad_zero.hpp"
    76 #endif
    78 /// Support for intrinsics.
    80 // Return the index at which m must be inserted (or already exists).
    81 // The sort order is by the address of the ciMethod, with is_virtual as minor key.
    82 int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
    83 #ifdef ASSERT
    84   for (int i = 1; i < _intrinsics->length(); i++) {
    85     CallGenerator* cg1 = _intrinsics->at(i-1);
    86     CallGenerator* cg2 = _intrinsics->at(i);
    87     assert(cg1->method() != cg2->method()
    88            ? cg1->method()     < cg2->method()
    89            : cg1->is_virtual() < cg2->is_virtual(),
    90            "compiler intrinsics list must stay sorted");
    91   }
    92 #endif
    93   // Binary search sorted list, in decreasing intervals [lo, hi].
    94   int lo = 0, hi = _intrinsics->length()-1;
    95   while (lo <= hi) {
    96     int mid = (uint)(hi + lo) / 2;
    97     ciMethod* mid_m = _intrinsics->at(mid)->method();
    98     if (m < mid_m) {
    99       hi = mid-1;
   100     } else if (m > mid_m) {
   101       lo = mid+1;
   102     } else {
   103       // look at minor sort key
   104       bool mid_virt = _intrinsics->at(mid)->is_virtual();
   105       if (is_virtual < mid_virt) {
   106         hi = mid-1;
   107       } else if (is_virtual > mid_virt) {
   108         lo = mid+1;
   109       } else {
   110         return mid;  // exact match
   111       }
   112     }
   113   }
   114   return lo;  // inexact match
   115 }
   117 void Compile::register_intrinsic(CallGenerator* cg) {
   118   if (_intrinsics == NULL) {
   119     _intrinsics = new GrowableArray<CallGenerator*>(60);
   120   }
   121   // This code is stolen from ciObjectFactory::insert.
   122   // Really, GrowableArray should have methods for
   123   // insert_at, remove_at, and binary_search.
   124   int len = _intrinsics->length();
   125   int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
   126   if (index == len) {
   127     _intrinsics->append(cg);
   128   } else {
   129 #ifdef ASSERT
   130     CallGenerator* oldcg = _intrinsics->at(index);
   131     assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
   132 #endif
   133     _intrinsics->append(_intrinsics->at(len-1));
   134     int pos;
   135     for (pos = len-2; pos >= index; pos--) {
   136       _intrinsics->at_put(pos+1,_intrinsics->at(pos));
   137     }
   138     _intrinsics->at_put(index, cg);
   139   }
   140   assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
   141 }
   143 CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
   144   assert(m->is_loaded(), "don't try this on unloaded methods");
   145   if (_intrinsics != NULL) {
   146     int index = intrinsic_insertion_index(m, is_virtual);
   147     if (index < _intrinsics->length()
   148         && _intrinsics->at(index)->method() == m
   149         && _intrinsics->at(index)->is_virtual() == is_virtual) {
   150       return _intrinsics->at(index);
   151     }
   152   }
   153   // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
   154   if (m->intrinsic_id() != vmIntrinsics::_none &&
   155       m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
   156     CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
   157     if (cg != NULL) {
   158       // Save it for next time:
   159       register_intrinsic(cg);
   160       return cg;
   161     } else {
   162       gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
   163     }
   164   }
   165   return NULL;
   166 }
   168 // Compile:: register_library_intrinsics and make_vm_intrinsic are defined
   169 // in library_call.cpp.
   172 #ifndef PRODUCT
   173 // statistics gathering...
   175 juint  Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
   176 jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};
   178 bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
   179   assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
   180   int oflags = _intrinsic_hist_flags[id];
   181   assert(flags != 0, "what happened?");
   182   if (is_virtual) {
   183     flags |= _intrinsic_virtual;
   184   }
   185   bool changed = (flags != oflags);
   186   if ((flags & _intrinsic_worked) != 0) {
   187     juint count = (_intrinsic_hist_count[id] += 1);
   188     if (count == 1) {
   189       changed = true;           // first time
   190     }
   191     // increment the overall count also:
   192     _intrinsic_hist_count[vmIntrinsics::_none] += 1;
   193   }
   194   if (changed) {
   195     if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
   196       // Something changed about the intrinsic's virtuality.
   197       if ((flags & _intrinsic_virtual) != 0) {
   198         // This is the first use of this intrinsic as a virtual call.
   199         if (oflags != 0) {
   200           // We already saw it as a non-virtual, so note both cases.
   201           flags |= _intrinsic_both;
   202         }
   203       } else if ((oflags & _intrinsic_both) == 0) {
   204         // This is the first use of this intrinsic as a non-virtual
   205         flags |= _intrinsic_both;
   206       }
   207     }
   208     _intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
   209   }
   210   // update the overall flags also:
   211   _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
   212   return changed;
   213 }
   215 static char* format_flags(int flags, char* buf) {
   216   buf[0] = 0;
   217   if ((flags & Compile::_intrinsic_worked) != 0)    strcat(buf, ",worked");
   218   if ((flags & Compile::_intrinsic_failed) != 0)    strcat(buf, ",failed");
   219   if ((flags & Compile::_intrinsic_disabled) != 0)  strcat(buf, ",disabled");
   220   if ((flags & Compile::_intrinsic_virtual) != 0)   strcat(buf, ",virtual");
   221   if ((flags & Compile::_intrinsic_both) != 0)      strcat(buf, ",nonvirtual");
   222   if (buf[0] == 0)  strcat(buf, ",");
   223   assert(buf[0] == ',', "must be");
   224   return &buf[1];
   225 }
   227 void Compile::print_intrinsic_statistics() {
   228   char flagsbuf[100];
   229   ttyLocker ttyl;
   230   if (xtty != NULL)  xtty->head("statistics type='intrinsic'");
   231   tty->print_cr("Compiler intrinsic usage:");
   232   juint total = _intrinsic_hist_count[vmIntrinsics::_none];
   233   if (total == 0)  total = 1;  // avoid div0 in case of no successes
   234   #define PRINT_STAT_LINE(name, c, f) \
   235     tty->print_cr("  %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
   236   for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
   237     vmIntrinsics::ID id = (vmIntrinsics::ID) index;
   238     int   flags = _intrinsic_hist_flags[id];
   239     juint count = _intrinsic_hist_count[id];
   240     if ((flags | count) != 0) {
   241       PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
   242     }
   243   }
   244   PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
   245   if (xtty != NULL)  xtty->tail("statistics");
   246 }
   248 void Compile::print_statistics() {
   249   { ttyLocker ttyl;
   250     if (xtty != NULL)  xtty->head("statistics type='opto'");
   251     Parse::print_statistics();
   252     PhaseCCP::print_statistics();
   253     PhaseRegAlloc::print_statistics();
   254     Scheduling::print_statistics();
   255     PhasePeephole::print_statistics();
   256     PhaseIdealLoop::print_statistics();
   257     if (xtty != NULL)  xtty->tail("statistics");
   258   }
   259   if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
   260     // put this under its own <statistics> element.
   261     print_intrinsic_statistics();
   262   }
   263 }
   264 #endif //PRODUCT
   266 // Support for bundling info
   267 Bundle* Compile::node_bundling(const Node *n) {
   268   assert(valid_bundle_info(n), "oob");
   269   return &_node_bundling_base[n->_idx];
   270 }
   272 bool Compile::valid_bundle_info(const Node *n) {
   273   return (_node_bundling_limit > n->_idx);
   274 }
   277 void Compile::gvn_replace_by(Node* n, Node* nn) {
   278   for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {
   279     Node* use = n->last_out(i);
   280     bool is_in_table = initial_gvn()->hash_delete(use);
   281     uint uses_found = 0;
   282     for (uint j = 0; j < use->len(); j++) {
   283       if (use->in(j) == n) {
   284         if (j < use->req())
   285           use->set_req(j, nn);
   286         else
   287           use->set_prec(j, nn);
   288         uses_found++;
   289       }
   290     }
   291     if (is_in_table) {
   292       // reinsert into table
   293       initial_gvn()->hash_find_insert(use);
   294     }
   295     record_for_igvn(use);
   296     i -= uses_found;    // we deleted 1 or more copies of this edge
   297   }
   298 }
   303 // Identify all nodes that are reachable from below, useful.
   304 // Use breadth-first pass that records state in a Unique_Node_List,
   305 // recursive traversal is slower.
   306 void Compile::identify_useful_nodes(Unique_Node_List &useful) {
   307   int estimated_worklist_size = unique();
   308   useful.map( estimated_worklist_size, NULL );  // preallocate space
   310   // Initialize worklist
   311   if (root() != NULL)     { useful.push(root()); }
   312   // If 'top' is cached, declare it useful to preserve cached node
   313   if( cached_top_node() ) { useful.push(cached_top_node()); }
   315   // Push all useful nodes onto the list, breadthfirst
   316   for( uint next = 0; next < useful.size(); ++next ) {
   317     assert( next < unique(), "Unique useful nodes < total nodes");
   318     Node *n  = useful.at(next);
   319     uint max = n->len();
   320     for( uint i = 0; i < max; ++i ) {
   321       Node *m = n->in(i);
   322       if( m == NULL ) continue;
   323       useful.push(m);
   324     }
   325   }
   326 }
   328 // Disconnect all useless nodes by disconnecting those at the boundary.
   329 void Compile::remove_useless_nodes(Unique_Node_List &useful) {
   330   uint next = 0;
   331   while( next < useful.size() ) {
   332     Node *n = useful.at(next++);
   333     // Use raw traversal of out edges since this code removes out edges
   334     int max = n->outcnt();
   335     for (int j = 0; j < max; ++j ) {
   336       Node* child = n->raw_out(j);
   337       if( ! useful.member(child) ) {
   338         assert( !child->is_top() || child != top(),
   339                 "If top is cached in Compile object it is in useful list");
   340         // Only need to remove this out-edge to the useless node
   341         n->raw_del_out(j);
   342         --j;
   343         --max;
   344       }
   345     }
   346     if (n->outcnt() == 1 && n->has_special_unique_user()) {
   347       record_for_igvn( n->unique_out() );
   348     }
   349   }
   350   debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
   351 }
   353 //------------------------------frame_size_in_words-----------------------------
   354 // frame_slots in units of words
   355 int Compile::frame_size_in_words() const {
   356   // shift is 0 in LP32 and 1 in LP64
   357   const int shift = (LogBytesPerWord - LogBytesPerInt);
   358   int words = _frame_slots >> shift;
   359   assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
   360   return words;
   361 }
   363 // ============================================================================
   364 //------------------------------CompileWrapper---------------------------------
   365 class CompileWrapper : public StackObj {
   366   Compile *const _compile;
   367  public:
   368   CompileWrapper(Compile* compile);
   370   ~CompileWrapper();
   371 };
   373 CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
   374   // the Compile* pointer is stored in the current ciEnv:
   375   ciEnv* env = compile->env();
   376   assert(env == ciEnv::current(), "must already be a ciEnv active");
   377   assert(env->compiler_data() == NULL, "compile already active?");
   378   env->set_compiler_data(compile);
   379   assert(compile == Compile::current(), "sanity");
   381   compile->set_type_dict(NULL);
   382   compile->set_type_hwm(NULL);
   383   compile->set_type_last_size(0);
   384   compile->set_last_tf(NULL, NULL);
   385   compile->set_indexSet_arena(NULL);
   386   compile->set_indexSet_free_block_list(NULL);
   387   compile->init_type_arena();
   388   Type::Initialize(compile);
   389   _compile->set_scratch_buffer_blob(NULL);
   390   _compile->begin_method();
   391 }
   392 CompileWrapper::~CompileWrapper() {
   393   _compile->end_method();
   394   if (_compile->scratch_buffer_blob() != NULL)
   395     BufferBlob::free(_compile->scratch_buffer_blob());
   396   _compile->env()->set_compiler_data(NULL);
   397 }
   400 //----------------------------print_compile_messages---------------------------
   401 void Compile::print_compile_messages() {
   402 #ifndef PRODUCT
   403   // Check if recompiling
   404   if (_subsume_loads == false && PrintOpto) {
   405     // Recompiling without allowing machine instructions to subsume loads
   406     tty->print_cr("*********************************************************");
   407     tty->print_cr("** Bailout: Recompile without subsuming loads          **");
   408     tty->print_cr("*********************************************************");
   409   }
   410   if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {
   411     // Recompiling without escape analysis
   412     tty->print_cr("*********************************************************");
   413     tty->print_cr("** Bailout: Recompile without escape analysis          **");
   414     tty->print_cr("*********************************************************");
   415   }
   416   if (env()->break_at_compile()) {
   417     // Open the debugger when compiling this method.
   418     tty->print("### Breaking when compiling: ");
   419     method()->print_short_name();
   420     tty->cr();
   421     BREAKPOINT;
   422   }
   424   if( PrintOpto ) {
   425     if (is_osr_compilation()) {
   426       tty->print("[OSR]%3d", _compile_id);
   427     } else {
   428       tty->print("%3d", _compile_id);
   429     }
   430   }
   431 #endif
   432 }
   435 void Compile::init_scratch_buffer_blob() {
   436   if( scratch_buffer_blob() != NULL )  return;
   438   // Construct a temporary CodeBuffer to have it construct a BufferBlob
   439   // Cache this BufferBlob for this compile.
   440   ResourceMark rm;
   441   int size = (MAX_inst_size + MAX_stubs_size + MAX_const_size);
   442   BufferBlob* blob = BufferBlob::create("Compile::scratch_buffer", size);
   443   // Record the buffer blob for next time.
   444   set_scratch_buffer_blob(blob);
   445   // Have we run out of code space?
   446   if (scratch_buffer_blob() == NULL) {
   447     // Let CompilerBroker disable further compilations.
   448     record_failure("Not enough space for scratch buffer in CodeCache");
   449     return;
   450   }
   452   // Initialize the relocation buffers
   453   relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;
   454   set_scratch_locs_memory(locs_buf);
   455 }
   458 //-----------------------scratch_emit_size-------------------------------------
   459 // Helper function that computes size by emitting code
   460 uint Compile::scratch_emit_size(const Node* n) {
   461   // Emit into a trash buffer and count bytes emitted.
   462   // This is a pretty expensive way to compute a size,
   463   // but it works well enough if seldom used.
   464   // All common fixed-size instructions are given a size
   465   // method by the AD file.
   466   // Note that the scratch buffer blob and locs memory are
   467   // allocated at the beginning of the compile task, and
   468   // may be shared by several calls to scratch_emit_size.
   469   // The allocation of the scratch buffer blob is particularly
   470   // expensive, since it has to grab the code cache lock.
   471   BufferBlob* blob = this->scratch_buffer_blob();
   472   assert(blob != NULL, "Initialize BufferBlob at start");
   473   assert(blob->size() > MAX_inst_size, "sanity");
   474   relocInfo* locs_buf = scratch_locs_memory();
   475   address blob_begin = blob->content_begin();
   476   address blob_end   = (address)locs_buf;
   477   assert(blob->content_contains(blob_end), "sanity");
   478   CodeBuffer buf(blob_begin, blob_end - blob_begin);
   479   buf.initialize_consts_size(MAX_const_size);
   480   buf.initialize_stubs_size(MAX_stubs_size);
   481   assert(locs_buf != NULL, "sanity");
   482   int lsize = MAX_locs_size / 2;
   483   buf.insts()->initialize_shared_locs(&locs_buf[0],     lsize);
   484   buf.stubs()->initialize_shared_locs(&locs_buf[lsize], lsize);
   485   n->emit(buf, this->regalloc());
   486   return buf.insts_size();
   487 }
   490 // ============================================================================
   491 //------------------------------Compile standard-------------------------------
   492 debug_only( int Compile::_debug_idx = 100000; )
   494 // Compile a method.  entry_bci is -1 for normal compilations and indicates
   495 // the continuation bci for on stack replacement.
   498 Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads, bool do_escape_analysis )
   499                 : Phase(Compiler),
   500                   _env(ci_env),
   501                   _log(ci_env->log()),
   502                   _compile_id(ci_env->compile_id()),
   503                   _save_argument_registers(false),
   504                   _stub_name(NULL),
   505                   _stub_function(NULL),
   506                   _stub_entry_point(NULL),
   507                   _method(target),
   508                   _entry_bci(osr_bci),
   509                   _initial_gvn(NULL),
   510                   _for_igvn(NULL),
   511                   _warm_calls(NULL),
   512                   _subsume_loads(subsume_loads),
   513                   _do_escape_analysis(do_escape_analysis),
   514                   _failure_reason(NULL),
   515                   _code_buffer("Compile::Fill_buffer"),
   516                   _orig_pc_slot(0),
   517                   _orig_pc_slot_offset_in_bytes(0),
   518                   _has_method_handle_invokes(false),
   519                   _node_bundling_limit(0),
   520                   _node_bundling_base(NULL),
   521                   _java_calls(0),
   522                   _inner_loops(0),
   523 #ifndef PRODUCT
   524                   _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
   525                   _printer(IdealGraphPrinter::printer()),
   526 #endif
   527                   _congraph(NULL) {
   528   C = this;
   530   CompileWrapper cw(this);
   531 #ifndef PRODUCT
   532   if (TimeCompiler2) {
   533     tty->print(" ");
   534     target->holder()->name()->print();
   535     tty->print(".");
   536     target->print_short_name();
   537     tty->print("  ");
   538   }
   539   TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
   540   TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
   541   bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
   542   if (!print_opto_assembly) {
   543     bool print_assembly = (PrintAssembly || _method->should_print_assembly());
   544     if (print_assembly && !Disassembler::can_decode()) {
   545       tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
   546       print_opto_assembly = true;
   547     }
   548   }
   549   set_print_assembly(print_opto_assembly);
   550   set_parsed_irreducible_loop(false);
   551 #endif
   553   if (ProfileTraps) {
   554     // Make sure the method being compiled gets its own MDO,
   555     // so we can at least track the decompile_count().
   556     method()->ensure_method_data();
   557   }
   559   Init(::AliasLevel);
   562   print_compile_messages();
   564   if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) )
   565     _ilt = InlineTree::build_inline_tree_root();
   566   else
   567     _ilt = NULL;
   569   // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
   570   assert(num_alias_types() >= AliasIdxRaw, "");
   572 #define MINIMUM_NODE_HASH  1023
   573   // Node list that Iterative GVN will start with
   574   Unique_Node_List for_igvn(comp_arena());
   575   set_for_igvn(&for_igvn);
   577   // GVN that will be run immediately on new nodes
   578   uint estimated_size = method()->code_size()*4+64;
   579   estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
   580   PhaseGVN gvn(node_arena(), estimated_size);
   581   set_initial_gvn(&gvn);
   583   { // Scope for timing the parser
   584     TracePhase t3("parse", &_t_parser, true);
   586     // Put top into the hash table ASAP.
   587     initial_gvn()->transform_no_reclaim(top());
   589     // Set up tf(), start(), and find a CallGenerator.
   590     CallGenerator* cg;
   591     if (is_osr_compilation()) {
   592       const TypeTuple *domain = StartOSRNode::osr_domain();
   593       const TypeTuple *range = TypeTuple::make_range(method()->signature());
   594       init_tf(TypeFunc::make(domain, range));
   595       StartNode* s = new (this, 2) StartOSRNode(root(), domain);
   596       initial_gvn()->set_type_bottom(s);
   597       init_start(s);
   598       cg = CallGenerator::for_osr(method(), entry_bci());
   599     } else {
   600       // Normal case.
   601       init_tf(TypeFunc::make(method()));
   602       StartNode* s = new (this, 2) StartNode(root(), tf()->domain());
   603       initial_gvn()->set_type_bottom(s);
   604       init_start(s);
   605       float past_uses = method()->interpreter_invocation_count();
   606       float expected_uses = past_uses;
   607       cg = CallGenerator::for_inline(method(), expected_uses);
   608     }
   609     if (failing())  return;
   610     if (cg == NULL) {
   611       record_method_not_compilable_all_tiers("cannot parse method");
   612       return;
   613     }
   614     JVMState* jvms = build_start_state(start(), tf());
   615     if ((jvms = cg->generate(jvms)) == NULL) {
   616       record_method_not_compilable("method parse failed");
   617       return;
   618     }
   619     GraphKit kit(jvms);
   621     if (!kit.stopped()) {
   622       // Accept return values, and transfer control we know not where.
   623       // This is done by a special, unique ReturnNode bound to root.
   624       return_values(kit.jvms());
   625     }
   627     if (kit.has_exceptions()) {
   628       // Any exceptions that escape from this call must be rethrown
   629       // to whatever caller is dynamically above us on the stack.
   630       // This is done by a special, unique RethrowNode bound to root.
   631       rethrow_exceptions(kit.transfer_exceptions_into_jvms());
   632     }
   634     if (!failing() && has_stringbuilder()) {
   635       {
   636         // remove useless nodes to make the usage analysis simpler
   637         ResourceMark rm;
   638         PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
   639       }
   641       {
   642         ResourceMark rm;
   643         print_method("Before StringOpts", 3);
   644         PhaseStringOpts pso(initial_gvn(), &for_igvn);
   645         print_method("After StringOpts", 3);
   646       }
   648       // now inline anything that we skipped the first time around
   649       while (_late_inlines.length() > 0) {
   650         CallGenerator* cg = _late_inlines.pop();
   651         cg->do_late_inline();
   652       }
   653     }
   654     assert(_late_inlines.length() == 0, "should have been processed");
   656     print_method("Before RemoveUseless", 3);
   658     // Remove clutter produced by parsing.
   659     if (!failing()) {
   660       ResourceMark rm;
   661       PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
   662     }
   663   }
   665   // Note:  Large methods are capped off in do_one_bytecode().
   666   if (failing())  return;
   668   // After parsing, node notes are no longer automagic.
   669   // They must be propagated by register_new_node_with_optimizer(),
   670   // clone(), or the like.
   671   set_default_node_notes(NULL);
   673   for (;;) {
   674     int successes = Inline_Warm();
   675     if (failing())  return;
   676     if (successes == 0)  break;
   677   }
   679   // Drain the list.
   680   Finish_Warm();
   681 #ifndef PRODUCT
   682   if (_printer) {
   683     _printer->print_inlining(this);
   684   }
   685 #endif
   687   if (failing())  return;
   688   NOT_PRODUCT( verify_graph_edges(); )
   690   // Now optimize
   691   Optimize();
   692   if (failing())  return;
   693   NOT_PRODUCT( verify_graph_edges(); )
   695 #ifndef PRODUCT
   696   if (PrintIdeal) {
   697     ttyLocker ttyl;  // keep the following output all in one block
   698     // This output goes directly to the tty, not the compiler log.
   699     // To enable tools to match it up with the compilation activity,
   700     // be sure to tag this tty output with the compile ID.
   701     if (xtty != NULL) {
   702       xtty->head("ideal compile_id='%d'%s", compile_id(),
   703                  is_osr_compilation()    ? " compile_kind='osr'" :
   704                  "");
   705     }
   706     root()->dump(9999);
   707     if (xtty != NULL) {
   708       xtty->tail("ideal");
   709     }
   710   }
   711 #endif
   713   // Now that we know the size of all the monitors we can add a fixed slot
   714   // for the original deopt pc.
   716   _orig_pc_slot =  fixed_slots();
   717   int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
   718   set_fixed_slots(next_slot);
   720   // Now generate code
   721   Code_Gen();
   722   if (failing())  return;
   724   // Check if we want to skip execution of all compiled code.
   725   {
   726 #ifndef PRODUCT
   727     if (OptoNoExecute) {
   728       record_method_not_compilable("+OptoNoExecute");  // Flag as failed
   729       return;
   730     }
   731     TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
   732 #endif
   734     if (is_osr_compilation()) {
   735       _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
   736       _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
   737     } else {
   738       _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
   739       _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
   740     }
   742     env()->register_method(_method, _entry_bci,
   743                            &_code_offsets,
   744                            _orig_pc_slot_offset_in_bytes,
   745                            code_buffer(),
   746                            frame_size_in_words(), _oop_map_set,
   747                            &_handler_table, &_inc_table,
   748                            compiler,
   749                            env()->comp_level(),
   750                            true, /*has_debug_info*/
   751                            has_unsafe_access()
   752                            );
   753   }
   754 }
   756 //------------------------------Compile----------------------------------------
   757 // Compile a runtime stub
   758 Compile::Compile( ciEnv* ci_env,
   759                   TypeFunc_generator generator,
   760                   address stub_function,
   761                   const char *stub_name,
   762                   int is_fancy_jump,
   763                   bool pass_tls,
   764                   bool save_arg_registers,
   765                   bool return_pc )
   766   : Phase(Compiler),
   767     _env(ci_env),
   768     _log(ci_env->log()),
   769     _compile_id(-1),
   770     _save_argument_registers(save_arg_registers),
   771     _method(NULL),
   772     _stub_name(stub_name),
   773     _stub_function(stub_function),
   774     _stub_entry_point(NULL),
   775     _entry_bci(InvocationEntryBci),
   776     _initial_gvn(NULL),
   777     _for_igvn(NULL),
   778     _warm_calls(NULL),
   779     _orig_pc_slot(0),
   780     _orig_pc_slot_offset_in_bytes(0),
   781     _subsume_loads(true),
   782     _do_escape_analysis(false),
   783     _failure_reason(NULL),
   784     _code_buffer("Compile::Fill_buffer"),
   785     _has_method_handle_invokes(false),
   786     _node_bundling_limit(0),
   787     _node_bundling_base(NULL),
   788     _java_calls(0),
   789     _inner_loops(0),
   790 #ifndef PRODUCT
   791     _trace_opto_output(TraceOptoOutput),
   792     _printer(NULL),
   793 #endif
   794     _congraph(NULL) {
   795   C = this;
   797 #ifndef PRODUCT
   798   TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
   799   TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
   800   set_print_assembly(PrintFrameConverterAssembly);
   801   set_parsed_irreducible_loop(false);
   802 #endif
   803   CompileWrapper cw(this);
   804   Init(/*AliasLevel=*/ 0);
   805   init_tf((*generator)());
   807   {
   808     // The following is a dummy for the sake of GraphKit::gen_stub
   809     Unique_Node_List for_igvn(comp_arena());
   810     set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
   811     PhaseGVN gvn(Thread::current()->resource_area(),255);
   812     set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
   813     gvn.transform_no_reclaim(top());
   815     GraphKit kit;
   816     kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
   817   }
   819   NOT_PRODUCT( verify_graph_edges(); )
   820   Code_Gen();
   821   if (failing())  return;
   824   // Entry point will be accessed using compile->stub_entry_point();
   825   if (code_buffer() == NULL) {
   826     Matcher::soft_match_failure();
   827   } else {
   828     if (PrintAssembly && (WizardMode || Verbose))
   829       tty->print_cr("### Stub::%s", stub_name);
   831     if (!failing()) {
   832       assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");
   834       // Make the NMethod
   835       // For now we mark the frame as never safe for profile stackwalking
   836       RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
   837                                                       code_buffer(),
   838                                                       CodeOffsets::frame_never_safe,
   839                                                       // _code_offsets.value(CodeOffsets::Frame_Complete),
   840                                                       frame_size_in_words(),
   841                                                       _oop_map_set,
   842                                                       save_arg_registers);
   843       assert(rs != NULL && rs->is_runtime_stub(), "sanity check");
   845       _stub_entry_point = rs->entry_point();
   846     }
   847   }
   848 }
   850 #ifndef PRODUCT
   851 void print_opto_verbose_signature( const TypeFunc *j_sig, const char *stub_name ) {
   852   if(PrintOpto && Verbose) {
   853     tty->print("%s   ", stub_name); j_sig->print_flattened(); tty->cr();
   854   }
   855 }
   856 #endif
   858 void Compile::print_codes() {
   859 }
   861 //------------------------------Init-------------------------------------------
   862 // Prepare for a single compilation
   863 void Compile::Init(int aliaslevel) {
   864   _unique  = 0;
   865   _regalloc = NULL;
   867   _tf      = NULL;  // filled in later
   868   _top     = NULL;  // cached later
   869   _matcher = NULL;  // filled in later
   870   _cfg     = NULL;  // filled in later
   872   set_24_bit_selection_and_mode(Use24BitFP, false);
   874   _node_note_array = NULL;
   875   _default_node_notes = NULL;
   877   _immutable_memory = NULL; // filled in at first inquiry
   879   // Globally visible Nodes
   880   // First set TOP to NULL to give safe behavior during creation of RootNode
   881   set_cached_top_node(NULL);
   882   set_root(new (this, 3) RootNode());
   883   // Now that you have a Root to point to, create the real TOP
   884   set_cached_top_node( new (this, 1) ConNode(Type::TOP) );
   885   set_recent_alloc(NULL, NULL);
   887   // Create Debug Information Recorder to record scopes, oopmaps, etc.
   888   env()->set_oop_recorder(new OopRecorder(comp_arena()));
   889   env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
   890   env()->set_dependencies(new Dependencies(env()));
   892   _fixed_slots = 0;
   893   set_has_split_ifs(false);
   894   set_has_loops(has_method() && method()->has_loops()); // first approximation
   895   set_has_stringbuilder(false);
   896   _trap_can_recompile = false;  // no traps emitted yet
   897   _major_progress = true; // start out assuming good things will happen
   898   set_has_unsafe_access(false);
   899   Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
   900   set_decompile_count(0);
   902   set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
   903   set_num_loop_opts(LoopOptsCount);
   904   set_do_inlining(Inline);
   905   set_max_inline_size(MaxInlineSize);
   906   set_freq_inline_size(FreqInlineSize);
   907   set_do_scheduling(OptoScheduling);
   908   set_do_count_invocations(false);
   909   set_do_method_data_update(false);
   911   if (debug_info()->recording_non_safepoints()) {
   912     set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
   913                         (comp_arena(), 8, 0, NULL));
   914     set_default_node_notes(Node_Notes::make(this));
   915   }
   917   // // -- Initialize types before each compile --
   918   // // Update cached type information
   919   // if( _method && _method->constants() )
   920   //   Type::update_loaded_types(_method, _method->constants());
   922   // Init alias_type map.
   923   if (!_do_escape_analysis && aliaslevel == 3)
   924     aliaslevel = 2;  // No unique types without escape analysis
   925   _AliasLevel = aliaslevel;
   926   const int grow_ats = 16;
   927   _max_alias_types = grow_ats;
   928   _alias_types   = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
   929   AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType,  grow_ats);
   930   Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
   931   {
   932     for (int i = 0; i < grow_ats; i++)  _alias_types[i] = &ats[i];
   933   }
   934   // Initialize the first few types.
   935   _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
   936   _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
   937   _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
   938   _num_alias_types = AliasIdxRaw+1;
   939   // Zero out the alias type cache.
   940   Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
   941   // A NULL adr_type hits in the cache right away.  Preload the right answer.
   942   probe_alias_cache(NULL)->_index = AliasIdxTop;
   944   _intrinsics = NULL;
   945   _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   946   _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   947   register_library_intrinsics();
   948 }
   950 //---------------------------init_start----------------------------------------
   951 // Install the StartNode on this compile object.
   952 void Compile::init_start(StartNode* s) {
   953   if (failing())
   954     return; // already failing
   955   assert(s == start(), "");
   956 }
   958 StartNode* Compile::start() const {
   959   assert(!failing(), "");
   960   for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
   961     Node* start = root()->fast_out(i);
   962     if( start->is_Start() )
   963       return start->as_Start();
   964   }
   965   ShouldNotReachHere();
   966   return NULL;
   967 }
   969 //-------------------------------immutable_memory-------------------------------------
   970 // Access immutable memory
   971 Node* Compile::immutable_memory() {
   972   if (_immutable_memory != NULL) {
   973     return _immutable_memory;
   974   }
   975   StartNode* s = start();
   976   for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
   977     Node *p = s->fast_out(i);
   978     if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
   979       _immutable_memory = p;
   980       return _immutable_memory;
   981     }
   982   }
   983   ShouldNotReachHere();
   984   return NULL;
   985 }
   987 //----------------------set_cached_top_node------------------------------------
   988 // Install the cached top node, and make sure Node::is_top works correctly.
   989 void Compile::set_cached_top_node(Node* tn) {
   990   if (tn != NULL)  verify_top(tn);
   991   Node* old_top = _top;
   992   _top = tn;
   993   // Calling Node::setup_is_top allows the nodes the chance to adjust
   994   // their _out arrays.
   995   if (_top != NULL)     _top->setup_is_top();
   996   if (old_top != NULL)  old_top->setup_is_top();
   997   assert(_top == NULL || top()->is_top(), "");
   998 }
  1000 #ifndef PRODUCT
  1001 void Compile::verify_top(Node* tn) const {
  1002   if (tn != NULL) {
  1003     assert(tn->is_Con(), "top node must be a constant");
  1004     assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
  1005     assert(tn->in(0) != NULL, "must have live top node");
  1008 #endif
  1011 ///-------------------Managing Per-Node Debug & Profile Info-------------------
  1013 void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
  1014   guarantee(arr != NULL, "");
  1015   int num_blocks = arr->length();
  1016   if (grow_by < num_blocks)  grow_by = num_blocks;
  1017   int num_notes = grow_by * _node_notes_block_size;
  1018   Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
  1019   Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
  1020   while (num_notes > 0) {
  1021     arr->append(notes);
  1022     notes     += _node_notes_block_size;
  1023     num_notes -= _node_notes_block_size;
  1025   assert(num_notes == 0, "exact multiple, please");
  1028 bool Compile::copy_node_notes_to(Node* dest, Node* source) {
  1029   if (source == NULL || dest == NULL)  return false;
  1031   if (dest->is_Con())
  1032     return false;               // Do not push debug info onto constants.
  1034 #ifdef ASSERT
  1035   // Leave a bread crumb trail pointing to the original node:
  1036   if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
  1037     dest->set_debug_orig(source);
  1039 #endif
  1041   if (node_note_array() == NULL)
  1042     return false;               // Not collecting any notes now.
  1044   // This is a copy onto a pre-existing node, which may already have notes.
  1045   // If both nodes have notes, do not overwrite any pre-existing notes.
  1046   Node_Notes* source_notes = node_notes_at(source->_idx);
  1047   if (source_notes == NULL || source_notes->is_clear())  return false;
  1048   Node_Notes* dest_notes   = node_notes_at(dest->_idx);
  1049   if (dest_notes == NULL || dest_notes->is_clear()) {
  1050     return set_node_notes_at(dest->_idx, source_notes);
  1053   Node_Notes merged_notes = (*source_notes);
  1054   // The order of operations here ensures that dest notes will win...
  1055   merged_notes.update_from(dest_notes);
  1056   return set_node_notes_at(dest->_idx, &merged_notes);
  1060 //--------------------------allow_range_check_smearing-------------------------
  1061 // Gating condition for coalescing similar range checks.
  1062 // Sometimes we try 'speculatively' replacing a series of a range checks by a
  1063 // single covering check that is at least as strong as any of them.
  1064 // If the optimization succeeds, the simplified (strengthened) range check
  1065 // will always succeed.  If it fails, we will deopt, and then give up
  1066 // on the optimization.
  1067 bool Compile::allow_range_check_smearing() const {
  1068   // If this method has already thrown a range-check,
  1069   // assume it was because we already tried range smearing
  1070   // and it failed.
  1071   uint already_trapped = trap_count(Deoptimization::Reason_range_check);
  1072   return !already_trapped;
  1076 //------------------------------flatten_alias_type-----------------------------
  1077 const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
  1078   int offset = tj->offset();
  1079   TypePtr::PTR ptr = tj->ptr();
  1081   // Known instance (scalarizable allocation) alias only with itself.
  1082   bool is_known_inst = tj->isa_oopptr() != NULL &&
  1083                        tj->is_oopptr()->is_known_instance();
  1085   // Process weird unsafe references.
  1086   if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
  1087     assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
  1088     assert(!is_known_inst, "scalarizable allocation should not have unsafe references");
  1089     tj = TypeOopPtr::BOTTOM;
  1090     ptr = tj->ptr();
  1091     offset = tj->offset();
  1094   // Array pointers need some flattening
  1095   const TypeAryPtr *ta = tj->isa_aryptr();
  1096   if( ta && is_known_inst ) {
  1097     if ( offset != Type::OffsetBot &&
  1098          offset > arrayOopDesc::length_offset_in_bytes() ) {
  1099       offset = Type::OffsetBot; // Flatten constant access into array body only
  1100       tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());
  1102   } else if( ta && _AliasLevel >= 2 ) {
  1103     // For arrays indexed by constant indices, we flatten the alias
  1104     // space to include all of the array body.  Only the header, klass
  1105     // and array length can be accessed un-aliased.
  1106     if( offset != Type::OffsetBot ) {
  1107       if( ta->const_oop() ) { // methodDataOop or methodOop
  1108         offset = Type::OffsetBot;   // Flatten constant access into array body
  1109         tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
  1110       } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
  1111         // range is OK as-is.
  1112         tj = ta = TypeAryPtr::RANGE;
  1113       } else if( offset == oopDesc::klass_offset_in_bytes() ) {
  1114         tj = TypeInstPtr::KLASS; // all klass loads look alike
  1115         ta = TypeAryPtr::RANGE; // generic ignored junk
  1116         ptr = TypePtr::BotPTR;
  1117       } else if( offset == oopDesc::mark_offset_in_bytes() ) {
  1118         tj = TypeInstPtr::MARK;
  1119         ta = TypeAryPtr::RANGE; // generic ignored junk
  1120         ptr = TypePtr::BotPTR;
  1121       } else {                  // Random constant offset into array body
  1122         offset = Type::OffsetBot;   // Flatten constant access into array body
  1123         tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);
  1126     // Arrays of fixed size alias with arrays of unknown size.
  1127     if (ta->size() != TypeInt::POS) {
  1128       const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
  1129       tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);
  1131     // Arrays of known objects become arrays of unknown objects.
  1132     if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {
  1133       const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());
  1134       tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
  1136     if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
  1137       const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
  1138       tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
  1140     // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
  1141     // cannot be distinguished by bytecode alone.
  1142     if (ta->elem() == TypeInt::BOOL) {
  1143       const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
  1144       ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
  1145       tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);
  1147     // During the 2nd round of IterGVN, NotNull castings are removed.
  1148     // Make sure the Bottom and NotNull variants alias the same.
  1149     // Also, make sure exact and non-exact variants alias the same.
  1150     if( ptr == TypePtr::NotNull || ta->klass_is_exact() ) {
  1151       if (ta->const_oop()) {
  1152         tj = ta = TypeAryPtr::make(TypePtr::Constant,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
  1153       } else {
  1154         tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
  1159   // Oop pointers need some flattening
  1160   const TypeInstPtr *to = tj->isa_instptr();
  1161   if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
  1162     if( ptr == TypePtr::Constant ) {
  1163       // No constant oop pointers (such as Strings); they alias with
  1164       // unknown strings.
  1165       assert(!is_known_inst, "not scalarizable allocation");
  1166       tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
  1167     } else if( is_known_inst ) {
  1168       tj = to; // Keep NotNull and klass_is_exact for instance type
  1169     } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
  1170       // During the 2nd round of IterGVN, NotNull castings are removed.
  1171       // Make sure the Bottom and NotNull variants alias the same.
  1172       // Also, make sure exact and non-exact variants alias the same.
  1173       tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
  1175     // Canonicalize the holder of this field
  1176     ciInstanceKlass *k = to->klass()->as_instance_klass();
  1177     if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {
  1178       // First handle header references such as a LoadKlassNode, even if the
  1179       // object's klass is unloaded at compile time (4965979).
  1180       if (!is_known_inst) { // Do it only for non-instance types
  1181         tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);
  1183     } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
  1184       to = NULL;
  1185       tj = TypeOopPtr::BOTTOM;
  1186       offset = tj->offset();
  1187     } else {
  1188       ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
  1189       if (!k->equals(canonical_holder) || tj->offset() != offset) {
  1190         if( is_known_inst ) {
  1191           tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());
  1192         } else {
  1193           tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);
  1199   // Klass pointers to object array klasses need some flattening
  1200   const TypeKlassPtr *tk = tj->isa_klassptr();
  1201   if( tk ) {
  1202     // If we are referencing a field within a Klass, we need
  1203     // to assume the worst case of an Object.  Both exact and
  1204     // inexact types must flatten to the same alias class.
  1205     // Since the flattened result for a klass is defined to be
  1206     // precisely java.lang.Object, use a constant ptr.
  1207     if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
  1209       tj = tk = TypeKlassPtr::make(TypePtr::Constant,
  1210                                    TypeKlassPtr::OBJECT->klass(),
  1211                                    offset);
  1214     ciKlass* klass = tk->klass();
  1215     if( klass->is_obj_array_klass() ) {
  1216       ciKlass* k = TypeAryPtr::OOPS->klass();
  1217       if( !k || !k->is_loaded() )                  // Only fails for some -Xcomp runs
  1218         k = TypeInstPtr::BOTTOM->klass();
  1219       tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
  1222     // Check for precise loads from the primary supertype array and force them
  1223     // to the supertype cache alias index.  Check for generic array loads from
  1224     // the primary supertype array and also force them to the supertype cache
  1225     // alias index.  Since the same load can reach both, we need to merge
  1226     // these 2 disparate memories into the same alias class.  Since the
  1227     // primary supertype array is read-only, there's no chance of confusion
  1228     // where we bypass an array load and an array store.
  1229     uint off2 = offset - Klass::primary_supers_offset_in_bytes();
  1230     if( offset == Type::OffsetBot ||
  1231         off2 < Klass::primary_super_limit()*wordSize ) {
  1232       offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes();
  1233       tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
  1237   // Flatten all Raw pointers together.
  1238   if (tj->base() == Type::RawPtr)
  1239     tj = TypeRawPtr::BOTTOM;
  1241   if (tj->base() == Type::AnyPtr)
  1242     tj = TypePtr::BOTTOM;      // An error, which the caller must check for.
  1244   // Flatten all to bottom for now
  1245   switch( _AliasLevel ) {
  1246   case 0:
  1247     tj = TypePtr::BOTTOM;
  1248     break;
  1249   case 1:                       // Flatten to: oop, static, field or array
  1250     switch (tj->base()) {
  1251     //case Type::AryPtr: tj = TypeAryPtr::RANGE;    break;
  1252     case Type::RawPtr:   tj = TypeRawPtr::BOTTOM;   break;
  1253     case Type::AryPtr:   // do not distinguish arrays at all
  1254     case Type::InstPtr:  tj = TypeInstPtr::BOTTOM;  break;
  1255     case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
  1256     case Type::AnyPtr:   tj = TypePtr::BOTTOM;      break;  // caller checks it
  1257     default: ShouldNotReachHere();
  1259     break;
  1260   case 2:                       // No collapsing at level 2; keep all splits
  1261   case 3:                       // No collapsing at level 3; keep all splits
  1262     break;
  1263   default:
  1264     Unimplemented();
  1267   offset = tj->offset();
  1268   assert( offset != Type::OffsetTop, "Offset has fallen from constant" );
  1270   assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
  1271           (offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
  1272           (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
  1273           (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
  1274           (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
  1275           (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
  1276           (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr)  ,
  1277           "For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
  1278   assert( tj->ptr() != TypePtr::TopPTR &&
  1279           tj->ptr() != TypePtr::AnyNull &&
  1280           tj->ptr() != TypePtr::Null, "No imprecise addresses" );
  1281 //    assert( tj->ptr() != TypePtr::Constant ||
  1282 //            tj->base() == Type::RawPtr ||
  1283 //            tj->base() == Type::KlassPtr, "No constant oop addresses" );
  1285   return tj;
  1288 void Compile::AliasType::Init(int i, const TypePtr* at) {
  1289   _index = i;
  1290   _adr_type = at;
  1291   _field = NULL;
  1292   _is_rewritable = true; // default
  1293   const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
  1294   if (atoop != NULL && atoop->is_known_instance()) {
  1295     const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);
  1296     _general_index = Compile::current()->get_alias_index(gt);
  1297   } else {
  1298     _general_index = 0;
  1302 //---------------------------------print_on------------------------------------
  1303 #ifndef PRODUCT
  1304 void Compile::AliasType::print_on(outputStream* st) {
  1305   if (index() < 10)
  1306         st->print("@ <%d> ", index());
  1307   else  st->print("@ <%d>",  index());
  1308   st->print(is_rewritable() ? "   " : " RO");
  1309   int offset = adr_type()->offset();
  1310   if (offset == Type::OffsetBot)
  1311         st->print(" +any");
  1312   else  st->print(" +%-3d", offset);
  1313   st->print(" in ");
  1314   adr_type()->dump_on(st);
  1315   const TypeOopPtr* tjp = adr_type()->isa_oopptr();
  1316   if (field() != NULL && tjp) {
  1317     if (tjp->klass()  != field()->holder() ||
  1318         tjp->offset() != field()->offset_in_bytes()) {
  1319       st->print(" != ");
  1320       field()->print();
  1321       st->print(" ***");
  1326 void print_alias_types() {
  1327   Compile* C = Compile::current();
  1328   tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
  1329   for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
  1330     C->alias_type(idx)->print_on(tty);
  1331     tty->cr();
  1334 #endif
  1337 //----------------------------probe_alias_cache--------------------------------
  1338 Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
  1339   intptr_t key = (intptr_t) adr_type;
  1340   key ^= key >> logAliasCacheSize;
  1341   return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
  1345 //-----------------------------grow_alias_types--------------------------------
  1346 void Compile::grow_alias_types() {
  1347   const int old_ats  = _max_alias_types; // how many before?
  1348   const int new_ats  = old_ats;          // how many more?
  1349   const int grow_ats = old_ats+new_ats;  // how many now?
  1350   _max_alias_types = grow_ats;
  1351   _alias_types =  REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
  1352   AliasType* ats =    NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
  1353   Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
  1354   for (int i = 0; i < new_ats; i++)  _alias_types[old_ats+i] = &ats[i];
  1358 //--------------------------------find_alias_type------------------------------
  1359 Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create) {
  1360   if (_AliasLevel == 0)
  1361     return alias_type(AliasIdxBot);
  1363   AliasCacheEntry* ace = probe_alias_cache(adr_type);
  1364   if (ace->_adr_type == adr_type) {
  1365     return alias_type(ace->_index);
  1368   // Handle special cases.
  1369   if (adr_type == NULL)             return alias_type(AliasIdxTop);
  1370   if (adr_type == TypePtr::BOTTOM)  return alias_type(AliasIdxBot);
  1372   // Do it the slow way.
  1373   const TypePtr* flat = flatten_alias_type(adr_type);
  1375 #ifdef ASSERT
  1376   assert(flat == flatten_alias_type(flat), "idempotent");
  1377   assert(flat != TypePtr::BOTTOM,     "cannot alias-analyze an untyped ptr");
  1378   if (flat->isa_oopptr() && !flat->isa_klassptr()) {
  1379     const TypeOopPtr* foop = flat->is_oopptr();
  1380     // Scalarizable allocations have exact klass always.
  1381     bool exact = !foop->klass_is_exact() || foop->is_known_instance();
  1382     const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();
  1383     assert(foop == flatten_alias_type(xoop), "exactness must not affect alias type");
  1385   assert(flat == flatten_alias_type(flat), "exact bit doesn't matter");
  1386 #endif
  1388   int idx = AliasIdxTop;
  1389   for (int i = 0; i < num_alias_types(); i++) {
  1390     if (alias_type(i)->adr_type() == flat) {
  1391       idx = i;
  1392       break;
  1396   if (idx == AliasIdxTop) {
  1397     if (no_create)  return NULL;
  1398     // Grow the array if necessary.
  1399     if (_num_alias_types == _max_alias_types)  grow_alias_types();
  1400     // Add a new alias type.
  1401     idx = _num_alias_types++;
  1402     _alias_types[idx]->Init(idx, flat);
  1403     if (flat == TypeInstPtr::KLASS)  alias_type(idx)->set_rewritable(false);
  1404     if (flat == TypeAryPtr::RANGE)   alias_type(idx)->set_rewritable(false);
  1405     if (flat->isa_instptr()) {
  1406       if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
  1407           && flat->is_instptr()->klass() == env()->Class_klass())
  1408         alias_type(idx)->set_rewritable(false);
  1410     if (flat->isa_klassptr()) {
  1411       if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc))
  1412         alias_type(idx)->set_rewritable(false);
  1413       if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc))
  1414         alias_type(idx)->set_rewritable(false);
  1415       if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc))
  1416         alias_type(idx)->set_rewritable(false);
  1417       if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc))
  1418         alias_type(idx)->set_rewritable(false);
  1420     // %%% (We would like to finalize JavaThread::threadObj_offset(),
  1421     // but the base pointer type is not distinctive enough to identify
  1422     // references into JavaThread.)
  1424     // Check for final instance fields.
  1425     const TypeInstPtr* tinst = flat->isa_instptr();
  1426     if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {
  1427       ciInstanceKlass *k = tinst->klass()->as_instance_klass();
  1428       ciField* field = k->get_field_by_offset(tinst->offset(), false);
  1429       // Set field() and is_rewritable() attributes.
  1430       if (field != NULL)  alias_type(idx)->set_field(field);
  1432     const TypeKlassPtr* tklass = flat->isa_klassptr();
  1433     // Check for final static fields.
  1434     if (tklass && tklass->klass()->is_instance_klass()) {
  1435       ciInstanceKlass *k = tklass->klass()->as_instance_klass();
  1436       ciField* field = k->get_field_by_offset(tklass->offset(), true);
  1437       // Set field() and is_rewritable() attributes.
  1438       if (field != NULL)   alias_type(idx)->set_field(field);
  1442   // Fill the cache for next time.
  1443   ace->_adr_type = adr_type;
  1444   ace->_index    = idx;
  1445   assert(alias_type(adr_type) == alias_type(idx),  "type must be installed");
  1447   // Might as well try to fill the cache for the flattened version, too.
  1448   AliasCacheEntry* face = probe_alias_cache(flat);
  1449   if (face->_adr_type == NULL) {
  1450     face->_adr_type = flat;
  1451     face->_index    = idx;
  1452     assert(alias_type(flat) == alias_type(idx), "flat type must work too");
  1455   return alias_type(idx);
  1459 Compile::AliasType* Compile::alias_type(ciField* field) {
  1460   const TypeOopPtr* t;
  1461   if (field->is_static())
  1462     t = TypeKlassPtr::make(field->holder());
  1463   else
  1464     t = TypeOopPtr::make_from_klass_raw(field->holder());
  1465   AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()));
  1466   assert(field->is_final() == !atp->is_rewritable(), "must get the rewritable bits correct");
  1467   return atp;
  1471 //------------------------------have_alias_type--------------------------------
  1472 bool Compile::have_alias_type(const TypePtr* adr_type) {
  1473   AliasCacheEntry* ace = probe_alias_cache(adr_type);
  1474   if (ace->_adr_type == adr_type) {
  1475     return true;
  1478   // Handle special cases.
  1479   if (adr_type == NULL)             return true;
  1480   if (adr_type == TypePtr::BOTTOM)  return true;
  1482   return find_alias_type(adr_type, true) != NULL;
  1485 //-----------------------------must_alias--------------------------------------
  1486 // True if all values of the given address type are in the given alias category.
  1487 bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
  1488   if (alias_idx == AliasIdxBot)         return true;  // the universal category
  1489   if (adr_type == NULL)                 return true;  // NULL serves as TypePtr::TOP
  1490   if (alias_idx == AliasIdxTop)         return false; // the empty category
  1491   if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins
  1493   // the only remaining possible overlap is identity
  1494   int adr_idx = get_alias_index(adr_type);
  1495   assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
  1496   assert(adr_idx == alias_idx ||
  1497          (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
  1498           && adr_type                       != TypeOopPtr::BOTTOM),
  1499          "should not be testing for overlap with an unsafe pointer");
  1500   return adr_idx == alias_idx;
  1503 //------------------------------can_alias--------------------------------------
  1504 // True if any values of the given address type are in the given alias category.
  1505 bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
  1506   if (alias_idx == AliasIdxTop)         return false; // the empty category
  1507   if (adr_type == NULL)                 return false; // NULL serves as TypePtr::TOP
  1508   if (alias_idx == AliasIdxBot)         return true;  // the universal category
  1509   if (adr_type->base() == Type::AnyPtr) return true;  // TypePtr::BOTTOM or its twins
  1511   // the only remaining possible overlap is identity
  1512   int adr_idx = get_alias_index(adr_type);
  1513   assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
  1514   return adr_idx == alias_idx;
  1519 //---------------------------pop_warm_call-------------------------------------
  1520 WarmCallInfo* Compile::pop_warm_call() {
  1521   WarmCallInfo* wci = _warm_calls;
  1522   if (wci != NULL)  _warm_calls = wci->remove_from(wci);
  1523   return wci;
  1526 //----------------------------Inline_Warm--------------------------------------
  1527 int Compile::Inline_Warm() {
  1528   // If there is room, try to inline some more warm call sites.
  1529   // %%% Do a graph index compaction pass when we think we're out of space?
  1530   if (!InlineWarmCalls)  return 0;
  1532   int calls_made_hot = 0;
  1533   int room_to_grow   = NodeCountInliningCutoff - unique();
  1534   int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
  1535   int amount_grown   = 0;
  1536   WarmCallInfo* call;
  1537   while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
  1538     int est_size = (int)call->size();
  1539     if (est_size > (room_to_grow - amount_grown)) {
  1540       // This one won't fit anyway.  Get rid of it.
  1541       call->make_cold();
  1542       continue;
  1544     call->make_hot();
  1545     calls_made_hot++;
  1546     amount_grown   += est_size;
  1547     amount_to_grow -= est_size;
  1550   if (calls_made_hot > 0)  set_major_progress();
  1551   return calls_made_hot;
  1555 //----------------------------Finish_Warm--------------------------------------
  1556 void Compile::Finish_Warm() {
  1557   if (!InlineWarmCalls)  return;
  1558   if (failing())  return;
  1559   if (warm_calls() == NULL)  return;
  1561   // Clean up loose ends, if we are out of space for inlining.
  1562   WarmCallInfo* call;
  1563   while ((call = pop_warm_call()) != NULL) {
  1564     call->make_cold();
  1568 //---------------------cleanup_loop_predicates-----------------------
  1569 // Remove the opaque nodes that protect the predicates so that all unused
  1570 // checks and uncommon_traps will be eliminated from the ideal graph
  1571 void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
  1572   if (predicate_count()==0) return;
  1573   for (int i = predicate_count(); i > 0; i--) {
  1574     Node * n = predicate_opaque1_node(i-1);
  1575     assert(n->Opcode() == Op_Opaque1, "must be");
  1576     igvn.replace_node(n, n->in(1));
  1578   assert(predicate_count()==0, "should be clean!");
  1579   igvn.optimize();
  1582 //------------------------------Optimize---------------------------------------
  1583 // Given a graph, optimize it.
  1584 void Compile::Optimize() {
  1585   TracePhase t1("optimizer", &_t_optimizer, true);
  1587 #ifndef PRODUCT
  1588   if (env()->break_at_compile()) {
  1589     BREAKPOINT;
  1592 #endif
  1594   ResourceMark rm;
  1595   int          loop_opts_cnt;
  1597   NOT_PRODUCT( verify_graph_edges(); )
  1599   print_method("After Parsing");
  1602   // Iterative Global Value Numbering, including ideal transforms
  1603   // Initialize IterGVN with types and values from parse-time GVN
  1604   PhaseIterGVN igvn(initial_gvn());
  1606     NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
  1607     igvn.optimize();
  1610   print_method("Iter GVN 1", 2);
  1612   if (failing())  return;
  1614   // Perform escape analysis
  1615   if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
  1616     TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, true);
  1617     ConnectionGraph::do_analysis(this, &igvn);
  1619     if (failing())  return;
  1621     igvn.optimize();
  1622     print_method("Iter GVN 3", 2);
  1624     if (failing())  return;
  1628   // Loop transforms on the ideal graph.  Range Check Elimination,
  1629   // peeling, unrolling, etc.
  1631   // Set loop opts counter
  1632   loop_opts_cnt = num_loop_opts();
  1633   if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
  1635       TracePhase t2("idealLoop", &_t_idealLoop, true);
  1636       PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate);
  1637       loop_opts_cnt--;
  1638       if (major_progress()) print_method("PhaseIdealLoop 1", 2);
  1639       if (failing())  return;
  1641     // Loop opts pass if partial peeling occurred in previous pass
  1642     if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
  1643       TracePhase t3("idealLoop", &_t_idealLoop, true);
  1644       PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
  1645       loop_opts_cnt--;
  1646       if (major_progress()) print_method("PhaseIdealLoop 2", 2);
  1647       if (failing())  return;
  1649     // Loop opts pass for loop-unrolling before CCP
  1650     if(major_progress() && (loop_opts_cnt > 0)) {
  1651       TracePhase t4("idealLoop", &_t_idealLoop, true);
  1652       PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
  1653       loop_opts_cnt--;
  1654       if (major_progress()) print_method("PhaseIdealLoop 3", 2);
  1656     if (!failing()) {
  1657       // Verify that last round of loop opts produced a valid graph
  1658       NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
  1659       PhaseIdealLoop::verify(igvn);
  1662   if (failing())  return;
  1664   // Conditional Constant Propagation;
  1665   PhaseCCP ccp( &igvn );
  1666   assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
  1668     TracePhase t2("ccp", &_t_ccp, true);
  1669     ccp.do_transform();
  1671   print_method("PhaseCPP 1", 2);
  1673   assert( true, "Break here to ccp.dump_old2new_map()");
  1675   // Iterative Global Value Numbering, including ideal transforms
  1677     NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
  1678     igvn = ccp;
  1679     igvn.optimize();
  1682   print_method("Iter GVN 2", 2);
  1684   if (failing())  return;
  1686   // Loop transforms on the ideal graph.  Range Check Elimination,
  1687   // peeling, unrolling, etc.
  1688   if(loop_opts_cnt > 0) {
  1689     debug_only( int cnt = 0; );
  1690     bool loop_predication = UseLoopPredicate;
  1691     while(major_progress() && (loop_opts_cnt > 0)) {
  1692       TracePhase t2("idealLoop", &_t_idealLoop, true);
  1693       assert( cnt++ < 40, "infinite cycle in loop optimization" );
  1694       PhaseIdealLoop ideal_loop( igvn, true, loop_predication);
  1695       loop_opts_cnt--;
  1696       if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
  1697       if (failing())  return;
  1698       // Perform loop predication optimization during first iteration after CCP.
  1699       // After that switch it off and cleanup unused loop predicates.
  1700       if (loop_predication) {
  1701         loop_predication = false;
  1702         cleanup_loop_predicates(igvn);
  1703         if (failing())  return;
  1709     // Verify that all previous optimizations produced a valid graph
  1710     // at least to this point, even if no loop optimizations were done.
  1711     NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
  1712     PhaseIdealLoop::verify(igvn);
  1716     NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
  1717     PhaseMacroExpand  mex(igvn);
  1718     if (mex.expand_macro_nodes()) {
  1719       assert(failing(), "must bail out w/ explicit message");
  1720       return;
  1724  } // (End scope of igvn; run destructor if necessary for asserts.)
  1726   // A method with only infinite loops has no edges entering loops from root
  1728     NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
  1729     if (final_graph_reshaping()) {
  1730       assert(failing(), "must bail out w/ explicit message");
  1731       return;
  1735   print_method("Optimize finished", 2);
  1739 //------------------------------Code_Gen---------------------------------------
  1740 // Given a graph, generate code for it
  1741 void Compile::Code_Gen() {
  1742   if (failing())  return;
  1744   // Perform instruction selection.  You might think we could reclaim Matcher
  1745   // memory PDQ, but actually the Matcher is used in generating spill code.
  1746   // Internals of the Matcher (including some VectorSets) must remain live
  1747   // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
  1748   // set a bit in reclaimed memory.
  1750   // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
  1751   // nodes.  Mapping is only valid at the root of each matched subtree.
  1752   NOT_PRODUCT( verify_graph_edges(); )
  1754   Node_List proj_list;
  1755   Matcher m(proj_list);
  1756   _matcher = &m;
  1758     TracePhase t2("matcher", &_t_matcher, true);
  1759     m.match();
  1761   // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
  1762   // nodes.  Mapping is only valid at the root of each matched subtree.
  1763   NOT_PRODUCT( verify_graph_edges(); )
  1765   // If you have too many nodes, or if matching has failed, bail out
  1766   check_node_count(0, "out of nodes matching instructions");
  1767   if (failing())  return;
  1769   // Build a proper-looking CFG
  1770   PhaseCFG cfg(node_arena(), root(), m);
  1771   _cfg = &cfg;
  1773     NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
  1774     cfg.Dominators();
  1775     if (failing())  return;
  1777     NOT_PRODUCT( verify_graph_edges(); )
  1779     cfg.Estimate_Block_Frequency();
  1780     cfg.GlobalCodeMotion(m,unique(),proj_list);
  1782     print_method("Global code motion", 2);
  1784     if (failing())  return;
  1785     NOT_PRODUCT( verify_graph_edges(); )
  1787     debug_only( cfg.verify(); )
  1789   NOT_PRODUCT( verify_graph_edges(); )
  1791   PhaseChaitin regalloc(unique(),cfg,m);
  1792   _regalloc = &regalloc;
  1794     TracePhase t2("regalloc", &_t_registerAllocation, true);
  1795     // Perform any platform dependent preallocation actions.  This is used,
  1796     // for example, to avoid taking an implicit null pointer exception
  1797     // using the frame pointer on win95.
  1798     _regalloc->pd_preallocate_hook();
  1800     // Perform register allocation.  After Chaitin, use-def chains are
  1801     // no longer accurate (at spill code) and so must be ignored.
  1802     // Node->LRG->reg mappings are still accurate.
  1803     _regalloc->Register_Allocate();
  1805     // Bail out if the allocator builds too many nodes
  1806     if (failing())  return;
  1809   // Prior to register allocation we kept empty basic blocks in case the
  1810   // the allocator needed a place to spill.  After register allocation we
  1811   // are not adding any new instructions.  If any basic block is empty, we
  1812   // can now safely remove it.
  1814     NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
  1815     cfg.remove_empty();
  1816     if (do_freq_based_layout()) {
  1817       PhaseBlockLayout layout(cfg);
  1818     } else {
  1819       cfg.set_loop_alignment();
  1821     cfg.fixup_flow();
  1824   // Perform any platform dependent postallocation verifications.
  1825   debug_only( _regalloc->pd_postallocate_verify_hook(); )
  1827   // Apply peephole optimizations
  1828   if( OptoPeephole ) {
  1829     NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
  1830     PhasePeephole peep( _regalloc, cfg);
  1831     peep.do_transform();
  1834   // Convert Nodes to instruction bits in a buffer
  1836     // %%%% workspace merge brought two timers together for one job
  1837     TracePhase t2a("output", &_t_output, true);
  1838     NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
  1839     Output();
  1842   print_method("Final Code");
  1844   // He's dead, Jim.
  1845   _cfg     = (PhaseCFG*)0xdeadbeef;
  1846   _regalloc = (PhaseChaitin*)0xdeadbeef;
  1850 //------------------------------dump_asm---------------------------------------
  1851 // Dump formatted assembly
  1852 #ifndef PRODUCT
  1853 void Compile::dump_asm(int *pcs, uint pc_limit) {
  1854   bool cut_short = false;
  1855   tty->print_cr("#");
  1856   tty->print("#  ");  _tf->dump();  tty->cr();
  1857   tty->print_cr("#");
  1859   // For all blocks
  1860   int pc = 0x0;                 // Program counter
  1861   char starts_bundle = ' ';
  1862   _regalloc->dump_frame();
  1864   Node *n = NULL;
  1865   for( uint i=0; i<_cfg->_num_blocks; i++ ) {
  1866     if (VMThread::should_terminate()) { cut_short = true; break; }
  1867     Block *b = _cfg->_blocks[i];
  1868     if (b->is_connector() && !Verbose) continue;
  1869     n = b->_nodes[0];
  1870     if (pcs && n->_idx < pc_limit)
  1871       tty->print("%3.3x   ", pcs[n->_idx]);
  1872     else
  1873       tty->print("      ");
  1874     b->dump_head( &_cfg->_bbs );
  1875     if (b->is_connector()) {
  1876       tty->print_cr("        # Empty connector block");
  1877     } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
  1878       tty->print_cr("        # Block is sole successor of call");
  1881     // For all instructions
  1882     Node *delay = NULL;
  1883     for( uint j = 0; j<b->_nodes.size(); j++ ) {
  1884       if (VMThread::should_terminate()) { cut_short = true; break; }
  1885       n = b->_nodes[j];
  1886       if (valid_bundle_info(n)) {
  1887         Bundle *bundle = node_bundling(n);
  1888         if (bundle->used_in_unconditional_delay()) {
  1889           delay = n;
  1890           continue;
  1892         if (bundle->starts_bundle())
  1893           starts_bundle = '+';
  1896       if (WizardMode) n->dump();
  1898       if( !n->is_Region() &&    // Dont print in the Assembly
  1899           !n->is_Phi() &&       // a few noisely useless nodes
  1900           !n->is_Proj() &&
  1901           !n->is_MachTemp() &&
  1902           !n->is_SafePointScalarObject() &&
  1903           !n->is_Catch() &&     // Would be nice to print exception table targets
  1904           !n->is_MergeMem() &&  // Not very interesting
  1905           !n->is_top() &&       // Debug info table constants
  1906           !(n->is_Con() && !n->is_Mach())// Debug info table constants
  1907           ) {
  1908         if (pcs && n->_idx < pc_limit)
  1909           tty->print("%3.3x", pcs[n->_idx]);
  1910         else
  1911           tty->print("   ");
  1912         tty->print(" %c ", starts_bundle);
  1913         starts_bundle = ' ';
  1914         tty->print("\t");
  1915         n->format(_regalloc, tty);
  1916         tty->cr();
  1919       // If we have an instruction with a delay slot, and have seen a delay,
  1920       // then back up and print it
  1921       if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
  1922         assert(delay != NULL, "no unconditional delay instruction");
  1923         if (WizardMode) delay->dump();
  1925         if (node_bundling(delay)->starts_bundle())
  1926           starts_bundle = '+';
  1927         if (pcs && n->_idx < pc_limit)
  1928           tty->print("%3.3x", pcs[n->_idx]);
  1929         else
  1930           tty->print("   ");
  1931         tty->print(" %c ", starts_bundle);
  1932         starts_bundle = ' ';
  1933         tty->print("\t");
  1934         delay->format(_regalloc, tty);
  1935         tty->print_cr("");
  1936         delay = NULL;
  1939       // Dump the exception table as well
  1940       if( n->is_Catch() && (Verbose || WizardMode) ) {
  1941         // Print the exception table for this offset
  1942         _handler_table.print_subtable_for(pc);
  1946     if (pcs && n->_idx < pc_limit)
  1947       tty->print_cr("%3.3x", pcs[n->_idx]);
  1948     else
  1949       tty->print_cr("");
  1951     assert(cut_short || delay == NULL, "no unconditional delay branch");
  1953   } // End of per-block dump
  1954   tty->print_cr("");
  1956   if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
  1958 #endif
  1960 //------------------------------Final_Reshape_Counts---------------------------
  1961 // This class defines counters to help identify when a method
  1962 // may/must be executed using hardware with only 24-bit precision.
  1963 struct Final_Reshape_Counts : public StackObj {
  1964   int  _call_count;             // count non-inlined 'common' calls
  1965   int  _float_count;            // count float ops requiring 24-bit precision
  1966   int  _double_count;           // count double ops requiring more precision
  1967   int  _java_call_count;        // count non-inlined 'java' calls
  1968   int  _inner_loop_count;       // count loops which need alignment
  1969   VectorSet _visited;           // Visitation flags
  1970   Node_List _tests;             // Set of IfNodes & PCTableNodes
  1972   Final_Reshape_Counts() :
  1973     _call_count(0), _float_count(0), _double_count(0),
  1974     _java_call_count(0), _inner_loop_count(0),
  1975     _visited( Thread::current()->resource_area() ) { }
  1977   void inc_call_count  () { _call_count  ++; }
  1978   void inc_float_count () { _float_count ++; }
  1979   void inc_double_count() { _double_count++; }
  1980   void inc_java_call_count() { _java_call_count++; }
  1981   void inc_inner_loop_count() { _inner_loop_count++; }
  1983   int  get_call_count  () const { return _call_count  ; }
  1984   int  get_float_count () const { return _float_count ; }
  1985   int  get_double_count() const { return _double_count; }
  1986   int  get_java_call_count() const { return _java_call_count; }
  1987   int  get_inner_loop_count() const { return _inner_loop_count; }
  1988 };
  1990 static bool oop_offset_is_sane(const TypeInstPtr* tp) {
  1991   ciInstanceKlass *k = tp->klass()->as_instance_klass();
  1992   // Make sure the offset goes inside the instance layout.
  1993   return k->contains_field_offset(tp->offset());
  1994   // Note that OffsetBot and OffsetTop are very negative.
  1997 //------------------------------final_graph_reshaping_impl----------------------
  1998 // Implement items 1-5 from final_graph_reshaping below.
  1999 static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
  2001   if ( n->outcnt() == 0 ) return; // dead node
  2002   uint nop = n->Opcode();
  2004   // Check for 2-input instruction with "last use" on right input.
  2005   // Swap to left input.  Implements item (2).
  2006   if( n->req() == 3 &&          // two-input instruction
  2007       n->in(1)->outcnt() > 1 && // left use is NOT a last use
  2008       (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
  2009       n->in(2)->outcnt() == 1 &&// right use IS a last use
  2010       !n->in(2)->is_Con() ) {   // right use is not a constant
  2011     // Check for commutative opcode
  2012     switch( nop ) {
  2013     case Op_AddI:  case Op_AddF:  case Op_AddD:  case Op_AddL:
  2014     case Op_MaxI:  case Op_MinI:
  2015     case Op_MulI:  case Op_MulF:  case Op_MulD:  case Op_MulL:
  2016     case Op_AndL:  case Op_XorL:  case Op_OrL:
  2017     case Op_AndI:  case Op_XorI:  case Op_OrI: {
  2018       // Move "last use" input to left by swapping inputs
  2019       n->swap_edges(1, 2);
  2020       break;
  2022     default:
  2023       break;
  2027 #ifdef ASSERT
  2028   if( n->is_Mem() ) {
  2029     Compile* C = Compile::current();
  2030     int alias_idx = C->get_alias_index(n->as_Mem()->adr_type());
  2031     assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw ||
  2032             // oop will be recorded in oop map if load crosses safepoint
  2033             n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() ||
  2034                              LoadNode::is_immutable_value(n->in(MemNode::Address))),
  2035             "raw memory operations should have control edge");
  2037 #endif
  2038   // Count FPU ops and common calls, implements item (3)
  2039   switch( nop ) {
  2040   // Count all float operations that may use FPU
  2041   case Op_AddF:
  2042   case Op_SubF:
  2043   case Op_MulF:
  2044   case Op_DivF:
  2045   case Op_NegF:
  2046   case Op_ModF:
  2047   case Op_ConvI2F:
  2048   case Op_ConF:
  2049   case Op_CmpF:
  2050   case Op_CmpF3:
  2051   // case Op_ConvL2F: // longs are split into 32-bit halves
  2052     frc.inc_float_count();
  2053     break;
  2055   case Op_ConvF2D:
  2056   case Op_ConvD2F:
  2057     frc.inc_float_count();
  2058     frc.inc_double_count();
  2059     break;
  2061   // Count all double operations that may use FPU
  2062   case Op_AddD:
  2063   case Op_SubD:
  2064   case Op_MulD:
  2065   case Op_DivD:
  2066   case Op_NegD:
  2067   case Op_ModD:
  2068   case Op_ConvI2D:
  2069   case Op_ConvD2I:
  2070   // case Op_ConvL2D: // handled by leaf call
  2071   // case Op_ConvD2L: // handled by leaf call
  2072   case Op_ConD:
  2073   case Op_CmpD:
  2074   case Op_CmpD3:
  2075     frc.inc_double_count();
  2076     break;
  2077   case Op_Opaque1:              // Remove Opaque Nodes before matching
  2078   case Op_Opaque2:              // Remove Opaque Nodes before matching
  2079     n->subsume_by(n->in(1));
  2080     break;
  2081   case Op_CallStaticJava:
  2082   case Op_CallJava:
  2083   case Op_CallDynamicJava:
  2084     frc.inc_java_call_count(); // Count java call site;
  2085   case Op_CallRuntime:
  2086   case Op_CallLeaf:
  2087   case Op_CallLeafNoFP: {
  2088     assert( n->is_Call(), "" );
  2089     CallNode *call = n->as_Call();
  2090     // Count call sites where the FP mode bit would have to be flipped.
  2091     // Do not count uncommon runtime calls:
  2092     // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
  2093     // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
  2094     if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
  2095       frc.inc_call_count();   // Count the call site
  2096     } else {                  // See if uncommon argument is shared
  2097       Node *n = call->in(TypeFunc::Parms);
  2098       int nop = n->Opcode();
  2099       // Clone shared simple arguments to uncommon calls, item (1).
  2100       if( n->outcnt() > 1 &&
  2101           !n->is_Proj() &&
  2102           nop != Op_CreateEx &&
  2103           nop != Op_CheckCastPP &&
  2104           nop != Op_DecodeN &&
  2105           !n->is_Mem() ) {
  2106         Node *x = n->clone();
  2107         call->set_req( TypeFunc::Parms, x );
  2110     break;
  2113   case Op_StoreD:
  2114   case Op_LoadD:
  2115   case Op_LoadD_unaligned:
  2116     frc.inc_double_count();
  2117     goto handle_mem;
  2118   case Op_StoreF:
  2119   case Op_LoadF:
  2120     frc.inc_float_count();
  2121     goto handle_mem;
  2123   case Op_StoreB:
  2124   case Op_StoreC:
  2125   case Op_StoreCM:
  2126   case Op_StorePConditional:
  2127   case Op_StoreI:
  2128   case Op_StoreL:
  2129   case Op_StoreIConditional:
  2130   case Op_StoreLConditional:
  2131   case Op_CompareAndSwapI:
  2132   case Op_CompareAndSwapL:
  2133   case Op_CompareAndSwapP:
  2134   case Op_CompareAndSwapN:
  2135   case Op_StoreP:
  2136   case Op_StoreN:
  2137   case Op_LoadB:
  2138   case Op_LoadUB:
  2139   case Op_LoadUS:
  2140   case Op_LoadI:
  2141   case Op_LoadUI2L:
  2142   case Op_LoadKlass:
  2143   case Op_LoadNKlass:
  2144   case Op_LoadL:
  2145   case Op_LoadL_unaligned:
  2146   case Op_LoadPLocked:
  2147   case Op_LoadLLocked:
  2148   case Op_LoadP:
  2149   case Op_LoadN:
  2150   case Op_LoadRange:
  2151   case Op_LoadS: {
  2152   handle_mem:
  2153 #ifdef ASSERT
  2154     if( VerifyOptoOopOffsets ) {
  2155       assert( n->is_Mem(), "" );
  2156       MemNode *mem  = (MemNode*)n;
  2157       // Check to see if address types have grounded out somehow.
  2158       const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
  2159       assert( !tp || oop_offset_is_sane(tp), "" );
  2161 #endif
  2162     break;
  2165   case Op_AddP: {               // Assert sane base pointers
  2166     Node *addp = n->in(AddPNode::Address);
  2167     assert( !addp->is_AddP() ||
  2168             addp->in(AddPNode::Base)->is_top() || // Top OK for allocation
  2169             addp->in(AddPNode::Base) == n->in(AddPNode::Base),
  2170             "Base pointers must match" );
  2171 #ifdef _LP64
  2172     if (UseCompressedOops &&
  2173         addp->Opcode() == Op_ConP &&
  2174         addp == n->in(AddPNode::Base) &&
  2175         n->in(AddPNode::Offset)->is_Con()) {
  2176       // Use addressing with narrow klass to load with offset on x86.
  2177       // On sparc loading 32-bits constant and decoding it have less
  2178       // instructions (4) then load 64-bits constant (7).
  2179       // Do this transformation here since IGVN will convert ConN back to ConP.
  2180       const Type* t = addp->bottom_type();
  2181       if (t->isa_oopptr()) {
  2182         Node* nn = NULL;
  2184         // Look for existing ConN node of the same exact type.
  2185         Compile* C = Compile::current();
  2186         Node* r  = C->root();
  2187         uint cnt = r->outcnt();
  2188         for (uint i = 0; i < cnt; i++) {
  2189           Node* m = r->raw_out(i);
  2190           if (m!= NULL && m->Opcode() == Op_ConN &&
  2191               m->bottom_type()->make_ptr() == t) {
  2192             nn = m;
  2193             break;
  2196         if (nn != NULL) {
  2197           // Decode a narrow oop to match address
  2198           // [R12 + narrow_oop_reg<<3 + offset]
  2199           nn = new (C,  2) DecodeNNode(nn, t);
  2200           n->set_req(AddPNode::Base, nn);
  2201           n->set_req(AddPNode::Address, nn);
  2202           if (addp->outcnt() == 0) {
  2203             addp->disconnect_inputs(NULL);
  2208 #endif
  2209     break;
  2212 #ifdef _LP64
  2213   case Op_CastPP:
  2214     if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
  2215       Compile* C = Compile::current();
  2216       Node* in1 = n->in(1);
  2217       const Type* t = n->bottom_type();
  2218       Node* new_in1 = in1->clone();
  2219       new_in1->as_DecodeN()->set_type(t);
  2221       if (!Matcher::narrow_oop_use_complex_address()) {
  2222         //
  2223         // x86, ARM and friends can handle 2 adds in addressing mode
  2224         // and Matcher can fold a DecodeN node into address by using
  2225         // a narrow oop directly and do implicit NULL check in address:
  2226         //
  2227         // [R12 + narrow_oop_reg<<3 + offset]
  2228         // NullCheck narrow_oop_reg
  2229         //
  2230         // On other platforms (Sparc) we have to keep new DecodeN node and
  2231         // use it to do implicit NULL check in address:
  2232         //
  2233         // decode_not_null narrow_oop_reg, base_reg
  2234         // [base_reg + offset]
  2235         // NullCheck base_reg
  2236         //
  2237         // Pin the new DecodeN node to non-null path on these platform (Sparc)
  2238         // to keep the information to which NULL check the new DecodeN node
  2239         // corresponds to use it as value in implicit_null_check().
  2240         //
  2241         new_in1->set_req(0, n->in(0));
  2244       n->subsume_by(new_in1);
  2245       if (in1->outcnt() == 0) {
  2246         in1->disconnect_inputs(NULL);
  2249     break;
  2251   case Op_CmpP:
  2252     // Do this transformation here to preserve CmpPNode::sub() and
  2253     // other TypePtr related Ideal optimizations (for example, ptr nullness).
  2254     if (n->in(1)->is_DecodeN() || n->in(2)->is_DecodeN()) {
  2255       Node* in1 = n->in(1);
  2256       Node* in2 = n->in(2);
  2257       if (!in1->is_DecodeN()) {
  2258         in2 = in1;
  2259         in1 = n->in(2);
  2261       assert(in1->is_DecodeN(), "sanity");
  2263       Compile* C = Compile::current();
  2264       Node* new_in2 = NULL;
  2265       if (in2->is_DecodeN()) {
  2266         new_in2 = in2->in(1);
  2267       } else if (in2->Opcode() == Op_ConP) {
  2268         const Type* t = in2->bottom_type();
  2269         if (t == TypePtr::NULL_PTR) {
  2270           // Don't convert CmpP null check into CmpN if compressed
  2271           // oops implicit null check is not generated.
  2272           // This will allow to generate normal oop implicit null check.
  2273           if (Matcher::gen_narrow_oop_implicit_null_checks())
  2274             new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
  2275           //
  2276           // This transformation together with CastPP transformation above
  2277           // will generated code for implicit NULL checks for compressed oops.
  2278           //
  2279           // The original code after Optimize()
  2280           //
  2281           //    LoadN memory, narrow_oop_reg
  2282           //    decode narrow_oop_reg, base_reg
  2283           //    CmpP base_reg, NULL
  2284           //    CastPP base_reg // NotNull
  2285           //    Load [base_reg + offset], val_reg
  2286           //
  2287           // after these transformations will be
  2288           //
  2289           //    LoadN memory, narrow_oop_reg
  2290           //    CmpN narrow_oop_reg, NULL
  2291           //    decode_not_null narrow_oop_reg, base_reg
  2292           //    Load [base_reg + offset], val_reg
  2293           //
  2294           // and the uncommon path (== NULL) will use narrow_oop_reg directly
  2295           // since narrow oops can be used in debug info now (see the code in
  2296           // final_graph_reshaping_walk()).
  2297           //
  2298           // At the end the code will be matched to
  2299           // on x86:
  2300           //
  2301           //    Load_narrow_oop memory, narrow_oop_reg
  2302           //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
  2303           //    NullCheck narrow_oop_reg
  2304           //
  2305           // and on sparc:
  2306           //
  2307           //    Load_narrow_oop memory, narrow_oop_reg
  2308           //    decode_not_null narrow_oop_reg, base_reg
  2309           //    Load [base_reg + offset], val_reg
  2310           //    NullCheck base_reg
  2311           //
  2312         } else if (t->isa_oopptr()) {
  2313           new_in2 = ConNode::make(C, t->make_narrowoop());
  2316       if (new_in2 != NULL) {
  2317         Node* cmpN = new (C, 3) CmpNNode(in1->in(1), new_in2);
  2318         n->subsume_by( cmpN );
  2319         if (in1->outcnt() == 0) {
  2320           in1->disconnect_inputs(NULL);
  2322         if (in2->outcnt() == 0) {
  2323           in2->disconnect_inputs(NULL);
  2327     break;
  2329   case Op_DecodeN:
  2330     assert(!n->in(1)->is_EncodeP(), "should be optimized out");
  2331     // DecodeN could be pinned when it can't be fold into
  2332     // an address expression, see the code for Op_CastPP above.
  2333     assert(n->in(0) == NULL || !Matcher::narrow_oop_use_complex_address(), "no control");
  2334     break;
  2336   case Op_EncodeP: {
  2337     Node* in1 = n->in(1);
  2338     if (in1->is_DecodeN()) {
  2339       n->subsume_by(in1->in(1));
  2340     } else if (in1->Opcode() == Op_ConP) {
  2341       Compile* C = Compile::current();
  2342       const Type* t = in1->bottom_type();
  2343       if (t == TypePtr::NULL_PTR) {
  2344         n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
  2345       } else if (t->isa_oopptr()) {
  2346         n->subsume_by(ConNode::make(C, t->make_narrowoop()));
  2349     if (in1->outcnt() == 0) {
  2350       in1->disconnect_inputs(NULL);
  2352     break;
  2355   case Op_Proj: {
  2356     if (OptimizeStringConcat) {
  2357       ProjNode* p = n->as_Proj();
  2358       if (p->_is_io_use) {
  2359         // Separate projections were used for the exception path which
  2360         // are normally removed by a late inline.  If it wasn't inlined
  2361         // then they will hang around and should just be replaced with
  2362         // the original one.
  2363         Node* proj = NULL;
  2364         // Replace with just one
  2365         for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) {
  2366           Node *use = i.get();
  2367           if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) {
  2368             proj = use;
  2369             break;
  2372         assert(p != NULL, "must be found");
  2373         p->subsume_by(proj);
  2376     break;
  2379   case Op_Phi:
  2380     if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
  2381       // The EncodeP optimization may create Phi with the same edges
  2382       // for all paths. It is not handled well by Register Allocator.
  2383       Node* unique_in = n->in(1);
  2384       assert(unique_in != NULL, "");
  2385       uint cnt = n->req();
  2386       for (uint i = 2; i < cnt; i++) {
  2387         Node* m = n->in(i);
  2388         assert(m != NULL, "");
  2389         if (unique_in != m)
  2390           unique_in = NULL;
  2392       if (unique_in != NULL) {
  2393         n->subsume_by(unique_in);
  2396     break;
  2398 #endif
  2400   case Op_ModI:
  2401     if (UseDivMod) {
  2402       // Check if a%b and a/b both exist
  2403       Node* d = n->find_similar(Op_DivI);
  2404       if (d) {
  2405         // Replace them with a fused divmod if supported
  2406         Compile* C = Compile::current();
  2407         if (Matcher::has_match_rule(Op_DivModI)) {
  2408           DivModINode* divmod = DivModINode::make(C, n);
  2409           d->subsume_by(divmod->div_proj());
  2410           n->subsume_by(divmod->mod_proj());
  2411         } else {
  2412           // replace a%b with a-((a/b)*b)
  2413           Node* mult = new (C, 3) MulINode(d, d->in(2));
  2414           Node* sub  = new (C, 3) SubINode(d->in(1), mult);
  2415           n->subsume_by( sub );
  2419     break;
  2421   case Op_ModL:
  2422     if (UseDivMod) {
  2423       // Check if a%b and a/b both exist
  2424       Node* d = n->find_similar(Op_DivL);
  2425       if (d) {
  2426         // Replace them with a fused divmod if supported
  2427         Compile* C = Compile::current();
  2428         if (Matcher::has_match_rule(Op_DivModL)) {
  2429           DivModLNode* divmod = DivModLNode::make(C, n);
  2430           d->subsume_by(divmod->div_proj());
  2431           n->subsume_by(divmod->mod_proj());
  2432         } else {
  2433           // replace a%b with a-((a/b)*b)
  2434           Node* mult = new (C, 3) MulLNode(d, d->in(2));
  2435           Node* sub  = new (C, 3) SubLNode(d->in(1), mult);
  2436           n->subsume_by( sub );
  2440     break;
  2442   case Op_Load16B:
  2443   case Op_Load8B:
  2444   case Op_Load4B:
  2445   case Op_Load8S:
  2446   case Op_Load4S:
  2447   case Op_Load2S:
  2448   case Op_Load8C:
  2449   case Op_Load4C:
  2450   case Op_Load2C:
  2451   case Op_Load4I:
  2452   case Op_Load2I:
  2453   case Op_Load2L:
  2454   case Op_Load4F:
  2455   case Op_Load2F:
  2456   case Op_Load2D:
  2457   case Op_Store16B:
  2458   case Op_Store8B:
  2459   case Op_Store4B:
  2460   case Op_Store8C:
  2461   case Op_Store4C:
  2462   case Op_Store2C:
  2463   case Op_Store4I:
  2464   case Op_Store2I:
  2465   case Op_Store2L:
  2466   case Op_Store4F:
  2467   case Op_Store2F:
  2468   case Op_Store2D:
  2469     break;
  2471   case Op_PackB:
  2472   case Op_PackS:
  2473   case Op_PackC:
  2474   case Op_PackI:
  2475   case Op_PackF:
  2476   case Op_PackL:
  2477   case Op_PackD:
  2478     if (n->req()-1 > 2) {
  2479       // Replace many operand PackNodes with a binary tree for matching
  2480       PackNode* p = (PackNode*) n;
  2481       Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
  2482       n->subsume_by(btp);
  2484     break;
  2485   case Op_Loop:
  2486   case Op_CountedLoop:
  2487     if (n->as_Loop()->is_inner_loop()) {
  2488       frc.inc_inner_loop_count();
  2490     break;
  2491   default:
  2492     assert( !n->is_Call(), "" );
  2493     assert( !n->is_Mem(), "" );
  2494     break;
  2497   // Collect CFG split points
  2498   if (n->is_MultiBranch())
  2499     frc._tests.push(n);
  2502 //------------------------------final_graph_reshaping_walk---------------------
  2503 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
  2504 // requires that the walk visits a node's inputs before visiting the node.
  2505 static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
  2506   ResourceArea *area = Thread::current()->resource_area();
  2507   Unique_Node_List sfpt(area);
  2509   frc._visited.set(root->_idx); // first, mark node as visited
  2510   uint cnt = root->req();
  2511   Node *n = root;
  2512   uint  i = 0;
  2513   while (true) {
  2514     if (i < cnt) {
  2515       // Place all non-visited non-null inputs onto stack
  2516       Node* m = n->in(i);
  2517       ++i;
  2518       if (m != NULL && !frc._visited.test_set(m->_idx)) {
  2519         if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
  2520           sfpt.push(m);
  2521         cnt = m->req();
  2522         nstack.push(n, i); // put on stack parent and next input's index
  2523         n = m;
  2524         i = 0;
  2526     } else {
  2527       // Now do post-visit work
  2528       final_graph_reshaping_impl( n, frc );
  2529       if (nstack.is_empty())
  2530         break;             // finished
  2531       n = nstack.node();   // Get node from stack
  2532       cnt = n->req();
  2533       i = nstack.index();
  2534       nstack.pop();        // Shift to the next node on stack
  2538   // Skip next transformation if compressed oops are not used.
  2539   if (!UseCompressedOops || !Matcher::gen_narrow_oop_implicit_null_checks())
  2540     return;
  2542   // Go over safepoints nodes to skip DecodeN nodes for debug edges.
  2543   // It could be done for an uncommon traps or any safepoints/calls
  2544   // if the DecodeN node is referenced only in a debug info.
  2545   while (sfpt.size() > 0) {
  2546     n = sfpt.pop();
  2547     JVMState *jvms = n->as_SafePoint()->jvms();
  2548     assert(jvms != NULL, "sanity");
  2549     int start = jvms->debug_start();
  2550     int end   = n->req();
  2551     bool is_uncommon = (n->is_CallStaticJava() &&
  2552                         n->as_CallStaticJava()->uncommon_trap_request() != 0);
  2553     for (int j = start; j < end; j++) {
  2554       Node* in = n->in(j);
  2555       if (in->is_DecodeN()) {
  2556         bool safe_to_skip = true;
  2557         if (!is_uncommon ) {
  2558           // Is it safe to skip?
  2559           for (uint i = 0; i < in->outcnt(); i++) {
  2560             Node* u = in->raw_out(i);
  2561             if (!u->is_SafePoint() ||
  2562                  u->is_Call() && u->as_Call()->has_non_debug_use(n)) {
  2563               safe_to_skip = false;
  2567         if (safe_to_skip) {
  2568           n->set_req(j, in->in(1));
  2570         if (in->outcnt() == 0) {
  2571           in->disconnect_inputs(NULL);
  2578 //------------------------------final_graph_reshaping--------------------------
  2579 // Final Graph Reshaping.
  2580 //
  2581 // (1) Clone simple inputs to uncommon calls, so they can be scheduled late
  2582 //     and not commoned up and forced early.  Must come after regular
  2583 //     optimizations to avoid GVN undoing the cloning.  Clone constant
  2584 //     inputs to Loop Phis; these will be split by the allocator anyways.
  2585 //     Remove Opaque nodes.
  2586 // (2) Move last-uses by commutative operations to the left input to encourage
  2587 //     Intel update-in-place two-address operations and better register usage
  2588 //     on RISCs.  Must come after regular optimizations to avoid GVN Ideal
  2589 //     calls canonicalizing them back.
  2590 // (3) Count the number of double-precision FP ops, single-precision FP ops
  2591 //     and call sites.  On Intel, we can get correct rounding either by
  2592 //     forcing singles to memory (requires extra stores and loads after each
  2593 //     FP bytecode) or we can set a rounding mode bit (requires setting and
  2594 //     clearing the mode bit around call sites).  The mode bit is only used
  2595 //     if the relative frequency of single FP ops to calls is low enough.
  2596 //     This is a key transform for SPEC mpeg_audio.
  2597 // (4) Detect infinite loops; blobs of code reachable from above but not
  2598 //     below.  Several of the Code_Gen algorithms fail on such code shapes,
  2599 //     so we simply bail out.  Happens a lot in ZKM.jar, but also happens
  2600 //     from time to time in other codes (such as -Xcomp finalizer loops, etc).
  2601 //     Detection is by looking for IfNodes where only 1 projection is
  2602 //     reachable from below or CatchNodes missing some targets.
  2603 // (5) Assert for insane oop offsets in debug mode.
  2605 bool Compile::final_graph_reshaping() {
  2606   // an infinite loop may have been eliminated by the optimizer,
  2607   // in which case the graph will be empty.
  2608   if (root()->req() == 1) {
  2609     record_method_not_compilable("trivial infinite loop");
  2610     return true;
  2613   Final_Reshape_Counts frc;
  2615   // Visit everybody reachable!
  2616   // Allocate stack of size C->unique()/2 to avoid frequent realloc
  2617   Node_Stack nstack(unique() >> 1);
  2618   final_graph_reshaping_walk(nstack, root(), frc);
  2620   // Check for unreachable (from below) code (i.e., infinite loops).
  2621   for( uint i = 0; i < frc._tests.size(); i++ ) {
  2622     MultiBranchNode *n = frc._tests[i]->as_MultiBranch();
  2623     // Get number of CFG targets.
  2624     // Note that PCTables include exception targets after calls.
  2625     uint required_outcnt = n->required_outcnt();
  2626     if (n->outcnt() != required_outcnt) {
  2627       // Check for a few special cases.  Rethrow Nodes never take the
  2628       // 'fall-thru' path, so expected kids is 1 less.
  2629       if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {
  2630         if (n->in(0)->in(0)->is_Call()) {
  2631           CallNode *call = n->in(0)->in(0)->as_Call();
  2632           if (call->entry_point() == OptoRuntime::rethrow_stub()) {
  2633             required_outcnt--;      // Rethrow always has 1 less kid
  2634           } else if (call->req() > TypeFunc::Parms &&
  2635                      call->is_CallDynamicJava()) {
  2636             // Check for null receiver. In such case, the optimizer has
  2637             // detected that the virtual call will always result in a null
  2638             // pointer exception. The fall-through projection of this CatchNode
  2639             // will not be populated.
  2640             Node *arg0 = call->in(TypeFunc::Parms);
  2641             if (arg0->is_Type() &&
  2642                 arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {
  2643               required_outcnt--;
  2645           } else if (call->entry_point() == OptoRuntime::new_array_Java() &&
  2646                      call->req() > TypeFunc::Parms+1 &&
  2647                      call->is_CallStaticJava()) {
  2648             // Check for negative array length. In such case, the optimizer has
  2649             // detected that the allocation attempt will always result in an
  2650             // exception. There is no fall-through projection of this CatchNode .
  2651             Node *arg1 = call->in(TypeFunc::Parms+1);
  2652             if (arg1->is_Type() &&
  2653                 arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {
  2654               required_outcnt--;
  2659       // Recheck with a better notion of 'required_outcnt'
  2660       if (n->outcnt() != required_outcnt) {
  2661         record_method_not_compilable("malformed control flow");
  2662         return true;            // Not all targets reachable!
  2665     // Check that I actually visited all kids.  Unreached kids
  2666     // must be infinite loops.
  2667     for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
  2668       if (!frc._visited.test(n->fast_out(j)->_idx)) {
  2669         record_method_not_compilable("infinite loop");
  2670         return true;            // Found unvisited kid; must be unreach
  2674   // If original bytecodes contained a mixture of floats and doubles
  2675   // check if the optimizer has made it homogenous, item (3).
  2676   if( Use24BitFPMode && Use24BitFP && UseSSE == 0 &&
  2677       frc.get_float_count() > 32 &&
  2678       frc.get_double_count() == 0 &&
  2679       (10 * frc.get_call_count() < frc.get_float_count()) ) {
  2680     set_24_bit_selection_and_mode( false,  true );
  2683   set_java_calls(frc.get_java_call_count());
  2684   set_inner_loops(frc.get_inner_loop_count());
  2686   // No infinite loops, no reason to bail out.
  2687   return false;
  2690 //-----------------------------too_many_traps----------------------------------
  2691 // Report if there are too many traps at the current method and bci.
  2692 // Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.
  2693 bool Compile::too_many_traps(ciMethod* method,
  2694                              int bci,
  2695                              Deoptimization::DeoptReason reason) {
  2696   ciMethodData* md = method->method_data();
  2697   if (md->is_empty()) {
  2698     // Assume the trap has not occurred, or that it occurred only
  2699     // because of a transient condition during start-up in the interpreter.
  2700     return false;
  2702   if (md->has_trap_at(bci, reason) != 0) {
  2703     // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
  2704     // Also, if there are multiple reasons, or if there is no per-BCI record,
  2705     // assume the worst.
  2706     if (log())
  2707       log()->elem("observe trap='%s' count='%d'",
  2708                   Deoptimization::trap_reason_name(reason),
  2709                   md->trap_count(reason));
  2710     return true;
  2711   } else {
  2712     // Ignore method/bci and see if there have been too many globally.
  2713     return too_many_traps(reason, md);
  2717 // Less-accurate variant which does not require a method and bci.
  2718 bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
  2719                              ciMethodData* logmd) {
  2720  if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
  2721     // Too many traps globally.
  2722     // Note that we use cumulative trap_count, not just md->trap_count.
  2723     if (log()) {
  2724       int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);
  2725       log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",
  2726                   Deoptimization::trap_reason_name(reason),
  2727                   mcount, trap_count(reason));
  2729     return true;
  2730   } else {
  2731     // The coast is clear.
  2732     return false;
  2736 //--------------------------too_many_recompiles--------------------------------
  2737 // Report if there are too many recompiles at the current method and bci.
  2738 // Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.
  2739 // Is not eager to return true, since this will cause the compiler to use
  2740 // Action_none for a trap point, to avoid too many recompilations.
  2741 bool Compile::too_many_recompiles(ciMethod* method,
  2742                                   int bci,
  2743                                   Deoptimization::DeoptReason reason) {
  2744   ciMethodData* md = method->method_data();
  2745   if (md->is_empty()) {
  2746     // Assume the trap has not occurred, or that it occurred only
  2747     // because of a transient condition during start-up in the interpreter.
  2748     return false;
  2750   // Pick a cutoff point well within PerBytecodeRecompilationCutoff.
  2751   uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;
  2752   uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
  2753   Deoptimization::DeoptReason per_bc_reason
  2754     = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
  2755   if ((per_bc_reason == Deoptimization::Reason_none
  2756        || md->has_trap_at(bci, reason) != 0)
  2757       // The trap frequency measure we care about is the recompile count:
  2758       && md->trap_recompiled_at(bci)
  2759       && md->overflow_recompile_count() >= bc_cutoff) {
  2760     // Do not emit a trap here if it has already caused recompilations.
  2761     // Also, if there are multiple reasons, or if there is no per-BCI record,
  2762     // assume the worst.
  2763     if (log())
  2764       log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",
  2765                   Deoptimization::trap_reason_name(reason),
  2766                   md->trap_count(reason),
  2767                   md->overflow_recompile_count());
  2768     return true;
  2769   } else if (trap_count(reason) != 0
  2770              && decompile_count() >= m_cutoff) {
  2771     // Too many recompiles globally, and we have seen this sort of trap.
  2772     // Use cumulative decompile_count, not just md->decompile_count.
  2773     if (log())
  2774       log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",
  2775                   Deoptimization::trap_reason_name(reason),
  2776                   md->trap_count(reason), trap_count(reason),
  2777                   md->decompile_count(), decompile_count());
  2778     return true;
  2779   } else {
  2780     // The coast is clear.
  2781     return false;
  2786 #ifndef PRODUCT
  2787 //------------------------------verify_graph_edges---------------------------
  2788 // Walk the Graph and verify that there is a one-to-one correspondence
  2789 // between Use-Def edges and Def-Use edges in the graph.
  2790 void Compile::verify_graph_edges(bool no_dead_code) {
  2791   if (VerifyGraphEdges) {
  2792     ResourceArea *area = Thread::current()->resource_area();
  2793     Unique_Node_List visited(area);
  2794     // Call recursive graph walk to check edges
  2795     _root->verify_edges(visited);
  2796     if (no_dead_code) {
  2797       // Now make sure that no visited node is used by an unvisited node.
  2798       bool dead_nodes = 0;
  2799       Unique_Node_List checked(area);
  2800       while (visited.size() > 0) {
  2801         Node* n = visited.pop();
  2802         checked.push(n);
  2803         for (uint i = 0; i < n->outcnt(); i++) {
  2804           Node* use = n->raw_out(i);
  2805           if (checked.member(use))  continue;  // already checked
  2806           if (visited.member(use))  continue;  // already in the graph
  2807           if (use->is_Con())        continue;  // a dead ConNode is OK
  2808           // At this point, we have found a dead node which is DU-reachable.
  2809           if (dead_nodes++ == 0)
  2810             tty->print_cr("*** Dead nodes reachable via DU edges:");
  2811           use->dump(2);
  2812           tty->print_cr("---");
  2813           checked.push(use);  // No repeats; pretend it is now checked.
  2816       assert(dead_nodes == 0, "using nodes must be reachable from root");
  2820 #endif
  2822 // The Compile object keeps track of failure reasons separately from the ciEnv.
  2823 // This is required because there is not quite a 1-1 relation between the
  2824 // ciEnv and its compilation task and the Compile object.  Note that one
  2825 // ciEnv might use two Compile objects, if C2Compiler::compile_method decides
  2826 // to backtrack and retry without subsuming loads.  Other than this backtracking
  2827 // behavior, the Compile's failure reason is quietly copied up to the ciEnv
  2828 // by the logic in C2Compiler.
  2829 void Compile::record_failure(const char* reason) {
  2830   if (log() != NULL) {
  2831     log()->elem("failure reason='%s' phase='compile'", reason);
  2833   if (_failure_reason == NULL) {
  2834     // Record the first failure reason.
  2835     _failure_reason = reason;
  2837   if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
  2838     C->print_method(_failure_reason);
  2840   _root = NULL;  // flush the graph, too
  2843 Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
  2844   : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false)
  2846   if (dolog) {
  2847     C = Compile::current();
  2848     _log = C->log();
  2849   } else {
  2850     C = NULL;
  2851     _log = NULL;
  2853   if (_log != NULL) {
  2854     _log->begin_head("phase name='%s' nodes='%d'", name, C->unique());
  2855     _log->stamp();
  2856     _log->end_head();
  2860 Compile::TracePhase::~TracePhase() {
  2861   if (_log != NULL) {
  2862     _log->done("phase nodes='%d'", C->unique());

mercurial