Thu, 20 Sep 2012 16:49:17 +0200
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
Summary: use shorter instruction sequences for atomic add and atomic exchange when possible.
Reviewed-by: kvn, jrose
1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "code/nmethod.hpp"
27 #include "compiler/compileBroker.hpp"
28 #include "opto/compile.hpp"
29 #include "opto/node.hpp"
30 #include "opto/phase.hpp"
32 #ifndef PRODUCT
33 int Phase::_total_bytes_compiled = 0;
35 elapsedTimer Phase::_t_totalCompilation;
36 elapsedTimer Phase::_t_methodCompilation;
37 elapsedTimer Phase::_t_stubCompilation;
38 #endif
40 // The next timers used for LogCompilation
41 elapsedTimer Phase::_t_parser;
42 elapsedTimer Phase::_t_optimizer;
43 elapsedTimer Phase::_t_escapeAnalysis;
44 elapsedTimer Phase::_t_connectionGraph;
45 elapsedTimer Phase::_t_idealLoop;
46 elapsedTimer Phase::_t_ccp;
47 elapsedTimer Phase::_t_matcher;
48 elapsedTimer Phase::_t_registerAllocation;
49 elapsedTimer Phase::_t_output;
51 #ifndef PRODUCT
52 elapsedTimer Phase::_t_graphReshaping;
53 elapsedTimer Phase::_t_scheduler;
54 elapsedTimer Phase::_t_blockOrdering;
55 elapsedTimer Phase::_t_macroEliminate;
56 elapsedTimer Phase::_t_macroExpand;
57 elapsedTimer Phase::_t_peephole;
58 elapsedTimer Phase::_t_codeGeneration;
59 elapsedTimer Phase::_t_registerMethod;
60 elapsedTimer Phase::_t_temporaryTimer1;
61 elapsedTimer Phase::_t_temporaryTimer2;
62 elapsedTimer Phase::_t_idealLoopVerify;
64 // Subtimers for _t_optimizer
65 elapsedTimer Phase::_t_iterGVN;
66 elapsedTimer Phase::_t_iterGVN2;
68 // Subtimers for _t_registerAllocation
69 elapsedTimer Phase::_t_ctorChaitin;
70 elapsedTimer Phase::_t_buildIFGphysical;
71 elapsedTimer Phase::_t_computeLive;
72 elapsedTimer Phase::_t_regAllocSplit;
73 elapsedTimer Phase::_t_postAllocCopyRemoval;
74 elapsedTimer Phase::_t_fixupSpills;
76 // Subtimers for _t_output
77 elapsedTimer Phase::_t_instrSched;
78 elapsedTimer Phase::_t_buildOopMaps;
79 #endif
81 //------------------------------Phase------------------------------------------
82 Phase::Phase( PhaseNumber pnum ) : _pnum(pnum), C( pnum == Compiler ? NULL : Compile::current()) {
83 // Poll for requests from shutdown mechanism to quiesce compiler (4448539, 4448544).
84 // This is an effective place to poll, since the compiler is full of phases.
85 // In particular, every inlining site uses a recursively created Parse phase.
86 CompileBroker::maybe_block();
87 }
89 #ifndef PRODUCT
90 static const double minimum_reported_time = 0.0001; // seconds
91 static const double expected_method_compile_coverage = 0.97; // %
92 static const double minimum_meaningful_method_compile = 2.00; // seconds
94 void Phase::print_timers() {
95 tty->print_cr ("Accumulated compiler times:");
96 tty->print_cr ("---------------------------");
97 tty->print_cr (" Total compilation: %3.3f sec.", Phase::_t_totalCompilation.seconds());
98 tty->print (" method compilation : %3.3f sec", Phase::_t_methodCompilation.seconds());
99 tty->print ("/%d bytes",_total_bytes_compiled);
100 tty->print_cr (" (%3.0f bytes per sec) ", Phase::_total_bytes_compiled / Phase::_t_methodCompilation.seconds());
101 tty->print_cr (" stub compilation : %3.3f sec.", Phase::_t_stubCompilation.seconds());
102 tty->print_cr (" Phases:");
103 tty->print_cr (" parse : %3.3f sec", Phase::_t_parser.seconds());
104 tty->print_cr (" optimizer : %3.3f sec", Phase::_t_optimizer.seconds());
105 if( Verbose || WizardMode ) {
106 if (DoEscapeAnalysis) {
107 // EA is part of Optimizer.
108 tty->print_cr (" escape analysis: %3.3f sec", Phase::_t_escapeAnalysis.seconds());
109 tty->print_cr (" connection graph: %3.3f sec", Phase::_t_connectionGraph.seconds());
110 tty->print_cr (" macroEliminate : %3.3f sec", Phase::_t_macroEliminate.seconds());
111 }
112 tty->print_cr (" iterGVN : %3.3f sec", Phase::_t_iterGVN.seconds());
113 tty->print_cr (" idealLoop : %3.3f sec", Phase::_t_idealLoop.seconds());
114 tty->print_cr (" idealLoopVerify: %3.3f sec", Phase::_t_idealLoopVerify.seconds());
115 tty->print_cr (" ccp : %3.3f sec", Phase::_t_ccp.seconds());
116 tty->print_cr (" iterGVN2 : %3.3f sec", Phase::_t_iterGVN2.seconds());
117 tty->print_cr (" macroExpand : %3.3f sec", Phase::_t_macroExpand.seconds());
118 tty->print_cr (" graphReshape : %3.3f sec", Phase::_t_graphReshaping.seconds());
119 double optimizer_subtotal = Phase::_t_iterGVN.seconds() + Phase::_t_iterGVN2.seconds() +
120 Phase::_t_escapeAnalysis.seconds() + Phase::_t_macroEliminate.seconds() +
121 Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
122 Phase::_t_macroExpand.seconds() + Phase::_t_graphReshaping.seconds();
123 double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0));
124 tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", optimizer_subtotal, percent_of_optimizer);
125 }
126 tty->print_cr (" matcher : %3.3f sec", Phase::_t_matcher.seconds());
127 tty->print_cr (" scheduler : %3.3f sec", Phase::_t_scheduler.seconds());
128 tty->print_cr (" regalloc : %3.3f sec", Phase::_t_registerAllocation.seconds());
129 if( Verbose || WizardMode ) {
130 tty->print_cr (" ctorChaitin : %3.3f sec", Phase::_t_ctorChaitin.seconds());
131 tty->print_cr (" buildIFG : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
132 tty->print_cr (" computeLive : %3.3f sec", Phase::_t_computeLive.seconds());
133 tty->print_cr (" regAllocSplit : %3.3f sec", Phase::_t_regAllocSplit.seconds());
134 tty->print_cr (" postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
135 tty->print_cr (" fixupSpills : %3.3f sec", Phase::_t_fixupSpills.seconds());
136 double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
137 Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
138 Phase::_t_regAllocSplit.seconds() + Phase::_t_fixupSpills.seconds() +
139 Phase::_t_postAllocCopyRemoval.seconds();
140 double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
141 tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc);
142 }
143 tty->print_cr (" blockOrdering : %3.3f sec", Phase::_t_blockOrdering.seconds());
144 tty->print_cr (" peephole : %3.3f sec", Phase::_t_peephole.seconds());
145 tty->print_cr (" codeGen : %3.3f sec", Phase::_t_codeGeneration.seconds());
146 tty->print_cr (" install_code : %3.3f sec", Phase::_t_registerMethod.seconds());
147 tty->print_cr (" -------------- : ----------");
148 double phase_subtotal = Phase::_t_parser.seconds() +
149 Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() +
150 Phase::_t_matcher.seconds() + Phase::_t_scheduler.seconds() +
151 Phase::_t_registerAllocation.seconds() + Phase::_t_blockOrdering.seconds() +
152 Phase::_t_codeGeneration.seconds() + Phase::_t_registerMethod.seconds();
153 double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0;
154 // counters inside Compile::CodeGen include time for adapters and stubs
155 // so phase-total can be greater than 100%
156 tty->print_cr (" total : %3.3f sec, %3.2f %%", phase_subtotal, percent_of_method_compile);
158 assert( percent_of_method_compile > expected_method_compile_coverage ||
159 phase_subtotal < minimum_meaningful_method_compile,
160 "Must account for method compilation");
162 if( Phase::_t_temporaryTimer1.seconds() > minimum_reported_time ) {
163 tty->cr();
164 tty->print_cr (" temporaryTimer1: %3.3f sec", Phase::_t_temporaryTimer1.seconds());
165 }
166 if( Phase::_t_temporaryTimer2.seconds() > minimum_reported_time ) {
167 tty->cr();
168 tty->print_cr (" temporaryTimer2: %3.3f sec", Phase::_t_temporaryTimer2.seconds());
169 }
170 tty->print_cr (" output : %3.3f sec", Phase::_t_output.seconds());
171 tty->print_cr (" isched : %3.3f sec", Phase::_t_instrSched.seconds());
172 tty->print_cr (" bldOopMaps : %3.3f sec", Phase::_t_buildOopMaps.seconds());
173 }
174 #endif