Fri, 15 Jun 2012 01:25:19 -0700
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland
1 /*
2 * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_OPTO_C2_GLOBALS_HPP
26 #define SHARE_VM_OPTO_C2_GLOBALS_HPP
28 #include "runtime/globals.hpp"
29 #ifdef TARGET_ARCH_x86
30 # include "c2_globals_x86.hpp"
31 #endif
32 #ifdef TARGET_ARCH_sparc
33 # include "c2_globals_sparc.hpp"
34 #endif
35 #ifdef TARGET_ARCH_arm
36 # include "c2_globals_arm.hpp"
37 #endif
38 #ifdef TARGET_OS_FAMILY_linux
39 # include "c2_globals_linux.hpp"
40 #endif
41 #ifdef TARGET_OS_FAMILY_solaris
42 # include "c2_globals_solaris.hpp"
43 #endif
44 #ifdef TARGET_OS_FAMILY_windows
45 # include "c2_globals_windows.hpp"
46 #endif
47 #ifdef TARGET_OS_FAMILY_bsd
48 # include "c2_globals_bsd.hpp"
49 #endif
51 //
52 // Defines all globals flags used by the server compiler.
53 //
55 #define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct) \
56 \
57 notproduct(intx, CompileZapFirst, 0, \
58 "If +ZapDeadCompiledLocals, " \
59 "skip this many before compiling in zap calls") \
60 \
61 notproduct(intx, CompileZapLast, -1, \
62 "If +ZapDeadCompiledLocals, " \
63 "compile this many after skipping (incl. skip count, -1 = all)") \
64 \
65 notproduct(intx, ZapDeadCompiledLocalsFirst, 0, \
66 "If +ZapDeadCompiledLocals, " \
67 "skip this many before really doing it") \
68 \
69 notproduct(intx, ZapDeadCompiledLocalsLast, -1, \
70 "If +ZapDeadCompiledLocals, " \
71 "do this many after skipping (incl. skip count, -1 = all)") \
72 \
73 develop(intx, OptoPrologueNops, 0, \
74 "Insert this many extra nop instructions " \
75 "in the prologue of every nmethod") \
76 \
77 product_pd(intx, InteriorEntryAlignment, \
78 "Code alignment for interior entry points " \
79 "in generated code (in bytes)") \
80 \
81 product(intx, MaxLoopPad, (OptoLoopAlignment-1), \
82 "Align a loop if padding size in bytes is less or equal to this value") \
83 \
84 product(intx, MaxVectorSize, 32, \
85 "Max vector size in bytes, " \
86 "actual size could be less depending on elements type") \
87 \
88 product(bool, AlignVector, false, \
89 "Perform vector store/load alignment in loop") \
90 \
91 product(intx, NumberOfLoopInstrToAlign, 4, \
92 "Number of first instructions in a loop to align") \
93 \
94 notproduct(intx, IndexSetWatch, 0, \
95 "Trace all operations on this IndexSet (-1 means all, 0 none)") \
96 \
97 develop(intx, OptoNodeListSize, 4, \
98 "Starting allocation size of Node_List data structures") \
99 \
100 develop(intx, OptoBlockListSize, 8, \
101 "Starting allocation size of Block_List data structures") \
102 \
103 develop(intx, OptoPeepholeAt, -1, \
104 "Apply peephole optimizations to this peephole rule") \
105 \
106 notproduct(bool, PrintIdeal, false, \
107 "Print ideal graph before code generation") \
108 \
109 notproduct(bool, PrintOpto, false, \
110 "Print compiler2 attempts") \
111 \
112 notproduct(bool, PrintOptoInlining, false, \
113 "Print compiler2 inlining decisions") \
114 \
115 notproduct(bool, VerifyOpto, false, \
116 "Apply more time consuming verification during compilation") \
117 \
118 notproduct(bool, VerifyOptoOopOffsets, false, \
119 "Check types of base addresses in field references") \
120 \
121 develop(bool, IdealizedNumerics, false, \
122 "Check performance difference allowing FP " \
123 "associativity and commutativity...") \
124 \
125 develop(bool, OptoBreakpoint, false, \
126 "insert breakpoint at method entry") \
127 \
128 notproduct(bool, OptoBreakpointOSR, false, \
129 "insert breakpoint at osr method entry") \
130 \
131 notproduct(intx, BreakAtNode, 0, \
132 "Break at construction of this Node (either _idx or _debug_idx)") \
133 \
134 notproduct(bool, OptoBreakpointC2R, false, \
135 "insert breakpoint at runtime stub entry") \
136 \
137 notproduct(bool, OptoNoExecute, false, \
138 "Attempt to parse and compile but do not execute generated code") \
139 \
140 notproduct(bool, PrintOptoStatistics, false, \
141 "Print New compiler statistics") \
142 \
143 notproduct(bool, PrintOptoAssembly, false, \
144 "Print New compiler assembly output") \
145 \
146 develop_pd(bool, OptoPeephole, \
147 "Apply peephole optimizations after register allocation") \
148 \
149 develop(bool, OptoRemoveUseless, true, \
150 "Remove useless nodes after parsing") \
151 \
152 notproduct(bool, PrintFrameConverterAssembly, false, \
153 "Print New compiler assembly output for frame converters") \
154 \
155 notproduct(bool, PrintParseStatistics, false, \
156 "Print nodes, transforms and new values made per bytecode parsed")\
157 \
158 notproduct(bool, PrintOptoPeephole, false, \
159 "Print New compiler peephole replacements") \
160 \
161 develop(bool, PrintCFGBlockFreq, false, \
162 "Print CFG block freqencies") \
163 \
164 develop(bool, TraceOptoParse, false, \
165 "Trace bytecode parse and control-flow merge") \
166 \
167 product_pd(intx, LoopUnrollLimit, \
168 "Unroll loop bodies with node count less than this") \
169 \
170 product(intx, LoopUnrollMin, 4, \
171 "Minimum number of unroll loop bodies before checking progress" \
172 "of rounds of unroll,optimize,..") \
173 \
174 develop(intx, UnrollLimitForProfileCheck, 1, \
175 "Don't use profile_trip_cnt() to restrict unrolling until " \
176 "unrolling would push the number of unrolled iterations above " \
177 "UnrollLimitForProfileCheck. A higher value allows more " \
178 "unrolling. Zero acts as a very large value." ) \
179 \
180 product(intx, MultiArrayExpandLimit, 6, \
181 "Maximum number of individual allocations in an inline-expanded " \
182 "multianewarray instruction") \
183 \
184 notproduct(bool, TraceProfileTripCount, false, \
185 "Trace profile loop trip count information") \
186 \
187 product(bool, UseLoopPredicate, true, \
188 "Generate a predicate to select fast/slow loop versions") \
189 \
190 develop(bool, TraceLoopPredicate, false, \
191 "Trace generation of loop predicates") \
192 \
193 develop(bool, TraceLoopOpts, false, \
194 "Trace executed loop optimizations") \
195 \
196 diagnostic(bool, LoopLimitCheck, true, \
197 "Generate a loop limits check for overflow") \
198 \
199 develop(bool, TraceLoopLimitCheck, false, \
200 "Trace generation of loop limits checks") \
201 \
202 diagnostic(bool, RangeLimitCheck, true, \
203 "Additional overflow checks during range check elimination") \
204 \
205 develop(bool, TraceRangeLimitCheck, false, \
206 "Trace additional overflow checks in RCE") \
207 \
208 diagnostic(bool, UnrollLimitCheck, true, \
209 "Additional overflow checks during loop unroll") \
210 \
211 product(bool, OptimizeFill, true, \
212 "convert fill/copy loops into intrinsic") \
213 \
214 develop(bool, TraceOptimizeFill, false, \
215 "print detailed information about fill conversion") \
216 \
217 develop(bool, OptoCoalesce, true, \
218 "Use Conservative Copy Coalescing in the Register Allocator") \
219 \
220 develop(bool, UseUniqueSubclasses, true, \
221 "Narrow an abstract reference to the unique concrete subclass") \
222 \
223 develop(bool, UseExactTypes, true, \
224 "Use exact types to eliminate array store checks and v-calls") \
225 \
226 product(intx, TrackedInitializationLimit, 50, \
227 "When initializing fields, track up to this many words") \
228 \
229 product(bool, ReduceFieldZeroing, true, \
230 "When initializing fields, try to avoid needless zeroing") \
231 \
232 product(bool, ReduceInitialCardMarks, true, \
233 "When initializing fields, try to avoid needless card marks") \
234 \
235 product(bool, ReduceBulkZeroing, true, \
236 "When bulk-initializing, try to avoid needless zeroing") \
237 \
238 product(bool, UseFPUForSpilling, false, \
239 "Spill integer registers to FPU instead of stack when possible") \
240 \
241 develop_pd(intx, RegisterCostAreaRatio, \
242 "Spill selection in reg allocator: scale area by (X/64K) before " \
243 "adding cost") \
244 \
245 develop_pd(bool, UseCISCSpill, \
246 "Use ADLC supplied cisc instructions during allocation") \
247 \
248 notproduct(bool, VerifyGraphEdges , false, \
249 "Verify Bi-directional Edges") \
250 \
251 notproduct(bool, VerifyDUIterators, true, \
252 "Verify the safety of all iterations of Bi-directional Edges") \
253 \
254 notproduct(bool, VerifyHashTableKeys, true, \
255 "Verify the immutability of keys in the VN hash tables") \
256 \
257 notproduct(bool, VerifyRegisterAllocator , false, \
258 "Verify Register Allocator") \
259 \
260 develop_pd(intx, FLOATPRESSURE, \
261 "Number of float LRG's that constitute high register pressure") \
262 \
263 develop_pd(intx, INTPRESSURE, \
264 "Number of integer LRG's that constitute high register pressure") \
265 \
266 notproduct(bool, TraceOptoPipelining, false, \
267 "Trace pipelining information") \
268 \
269 notproduct(bool, TraceOptoOutput, false, \
270 "Trace pipelining information") \
271 \
272 product_pd(bool, OptoScheduling, \
273 "Instruction Scheduling after register allocation") \
274 \
275 product(bool, PartialPeelLoop, true, \
276 "Partial peel (rotate) loops") \
277 \
278 product(intx, PartialPeelNewPhiDelta, 0, \
279 "Additional phis that can be created by partial peeling") \
280 \
281 notproduct(bool, TracePartialPeeling, false, \
282 "Trace partial peeling (loop rotation) information") \
283 \
284 product(bool, PartialPeelAtUnsignedTests, true, \
285 "Partial peel at unsigned tests if no signed test exists") \
286 \
287 product(bool, ReassociateInvariants, true, \
288 "Enable reassociation of expressions with loop invariants.") \
289 \
290 product(bool, LoopUnswitching, true, \
291 "Enable loop unswitching (a form of invariant test hoisting)") \
292 \
293 notproduct(bool, TraceLoopUnswitching, false, \
294 "Trace loop unswitching") \
295 \
296 product(bool, UseSuperWord, true, \
297 "Transform scalar operations into superword operations") \
298 \
299 develop(bool, SuperWordRTDepCheck, false, \
300 "Enable runtime dependency checks.") \
301 \
302 product(bool, TraceSuperWord, false, \
303 "Trace superword transforms") \
304 \
305 product_pd(bool, OptoBundling, \
306 "Generate nops to fill i-cache lines") \
307 \
308 product_pd(intx, ConditionalMoveLimit, \
309 "Limit of ops to make speculative when using CMOVE") \
310 \
311 /* Set BranchOnRegister == false. See 4965987. */ \
312 product(bool, BranchOnRegister, false, \
313 "Use Sparc V9 branch-on-register opcodes") \
314 \
315 develop(bool, SparcV9RegsHiBitsZero, true, \
316 "Assume Sparc V9 I&L registers on V8+ systems are zero-extended") \
317 \
318 product(bool, UseRDPCForConstantTableBase, false, \
319 "Use Sparc RDPC instruction for the constant table base.") \
320 \
321 develop(intx, PrintIdealGraphLevel, 0, \
322 "Print ideal graph to XML file / network interface. " \
323 "By default attempts to connect to the visualizer on a socket.") \
324 \
325 develop(intx, PrintIdealGraphPort, 4444, \
326 "Ideal graph printer to network port") \
327 \
328 notproduct(ccstr, PrintIdealGraphAddress, "127.0.0.1", \
329 "IP address to connect to visualizer") \
330 \
331 notproduct(ccstr, PrintIdealGraphFile, NULL, \
332 "File to dump ideal graph to. If set overrides the " \
333 "use of the network") \
334 \
335 product(bool, UseOldInlining, true, \
336 "Enable the 1.3 inlining strategy") \
337 \
338 product(bool, UseBimorphicInlining, true, \
339 "Profiling based inlining for two receivers") \
340 \
341 product(bool, UseOnlyInlinedBimorphic, true, \
342 "Don't use BimorphicInlining if can't inline a second method") \
343 \
344 product(bool, InsertMemBarAfterArraycopy, true, \
345 "Insert memory barrier after arraycopy call") \
346 \
347 develop(bool, SubsumeLoads, true, \
348 "Attempt to compile while subsuming loads into machine instructions.") \
349 \
350 develop(bool, StressRecompilation, false, \
351 "Recompile each compiled method without subsuming loads or escape analysis.") \
352 \
353 /* controls for tier 1 compilations */ \
354 \
355 develop(bool, Tier1CountInvocations, true, \
356 "Generate code, during tier 1, to update invocation counter") \
357 \
358 product(intx, Tier1Inline, false, \
359 "enable inlining during tier 1") \
360 \
361 product(intx, Tier1MaxInlineSize, 8, \
362 "maximum bytecode size of a method to be inlined, during tier 1") \
363 \
364 product(intx, Tier1FreqInlineSize, 35, \
365 "max bytecode size of a frequent method to be inlined, tier 1") \
366 \
367 develop(intx, ImplicitNullCheckThreshold, 3, \
368 "Don't do implicit null checks if NPE's in a method exceeds limit") \
369 \
370 /* controls for loop optimization */ \
371 product(intx, Tier1LoopOptsCount, 0, \
372 "Set level of loop optimization for tier 1 compiles") \
373 \
374 product(intx, LoopOptsCount, 43, \
375 "Set level of loop optimization for tier 1 compiles") \
376 \
377 /* controls for heat-based inlining */ \
378 \
379 develop(intx, NodeCountInliningCutoff, 18000, \
380 "If parser node generation exceeds limit stop inlining") \
381 \
382 develop(intx, NodeCountInliningStep, 1000, \
383 "Target size of warm calls inlined between optimization passes") \
384 \
385 develop(bool, InlineWarmCalls, false, \
386 "Use a heat-based priority queue to govern inlining") \
387 \
388 develop(intx, HotCallCountThreshold, 999999, \
389 "large numbers of calls (per method invocation) force hotness") \
390 \
391 develop(intx, HotCallProfitThreshold, 999999, \
392 "highly profitable inlining opportunities force hotness") \
393 \
394 develop(intx, HotCallTrivialWork, -1, \
395 "trivial execution time (no larger than this) forces hotness") \
396 \
397 develop(intx, HotCallTrivialSize, -1, \
398 "trivial methods (no larger than this) force calls to be hot") \
399 \
400 develop(intx, WarmCallMinCount, -1, \
401 "number of calls (per method invocation) to enable inlining") \
402 \
403 develop(intx, WarmCallMinProfit, -1, \
404 "number of calls (per method invocation) to enable inlining") \
405 \
406 develop(intx, WarmCallMaxWork, 999999, \
407 "execution time of the largest inlinable method") \
408 \
409 develop(intx, WarmCallMaxSize, 999999, \
410 "size of the largest inlinable method") \
411 \
412 product(intx, MaxNodeLimit, 65000, \
413 "Maximum number of nodes") \
414 \
415 product(intx, NodeLimitFudgeFactor, 1000, \
416 "Fudge Factor for certain optimizations") \
417 \
418 product(bool, UseJumpTables, true, \
419 "Use JumpTables instead of a binary search tree for switches") \
420 \
421 product(bool, UseDivMod, true, \
422 "Use combined DivMod instruction if available") \
423 \
424 product(intx, MinJumpTableSize, 18, \
425 "Minimum number of targets in a generated jump table") \
426 \
427 product(intx, MaxJumpTableSize, 65000, \
428 "Maximum number of targets in a generated jump table") \
429 \
430 product(intx, MaxJumpTableSparseness, 5, \
431 "Maximum sparseness for jumptables") \
432 \
433 product(bool, EliminateLocks, true, \
434 "Coarsen locks when possible") \
435 \
436 product(bool, EliminateNestedLocks, true, \
437 "Eliminate nested locks of the same object when possible") \
438 \
439 notproduct(bool, PrintLockStatistics, false, \
440 "Print precise statistics on the dynamic lock usage") \
441 \
442 diagnostic(bool, PrintPreciseBiasedLockingStatistics, false, \
443 "Print per-lock-site statistics of biased locking in JVM") \
444 \
445 notproduct(bool, PrintEliminateLocks, false, \
446 "Print out when locks are eliminated") \
447 \
448 diagnostic(bool, EliminateAutoBox, false, \
449 "Private flag to control optimizations for autobox elimination") \
450 \
451 product(intx, AutoBoxCacheMax, 128, \
452 "Sets max value cached by the java.lang.Integer autobox cache") \
453 \
454 product(bool, DoEscapeAnalysis, true, \
455 "Perform escape analysis") \
456 \
457 notproduct(bool, PrintEscapeAnalysis, false, \
458 "Print the results of escape analysis") \
459 \
460 product(bool, EliminateAllocations, true, \
461 "Use escape analysis to eliminate allocations") \
462 \
463 notproduct(bool, PrintEliminateAllocations, false, \
464 "Print out when allocations are eliminated") \
465 \
466 product(intx, EliminateAllocationArraySizeLimit, 64, \
467 "Array size (number of elements) limit for scalar replacement") \
468 \
469 product(bool, OptimizePtrCompare, true, \
470 "Use escape analysis to optimize pointers compare") \
471 \
472 notproduct(bool, PrintOptimizePtrCompare, false, \
473 "Print information about optimized pointers compare") \
474 \
475 notproduct(bool, VerifyConnectionGraph , true, \
476 "Verify Connection Graph construction in Escape Analysis") \
477 \
478 product(bool, UseOptoBiasInlining, true, \
479 "Generate biased locking code in C2 ideal graph") \
480 \
481 product(bool, OptimizeStringConcat, true, \
482 "Optimize the construction of Strings by StringBuilder") \
483 \
484 notproduct(bool, PrintOptimizeStringConcat, false, \
485 "Print information about transformations performed on Strings") \
486 \
487 product(intx, ValueSearchLimit, 1000, \
488 "Recursion limit in PhaseMacroExpand::value_from_mem_phi") \
489 \
490 product(intx, MaxLabelRootDepth, 1100, \
491 "Maximum times call Label_Root to prevent stack overflow") \
492 \
493 diagnostic(intx, DominatorSearchLimit, 1000, \
494 "Iterations limit in Node::dominates") \
495 \
496 product(bool, BlockLayoutByFrequency, true, \
497 "Use edge frequencies to drive block ordering") \
498 \
499 product(intx, BlockLayoutMinDiamondPercentage, 20, \
500 "Miniumum %% of a successor (predecessor) for which block layout "\
501 "a will allow a fork (join) in a single chain") \
502 \
503 product(bool, BlockLayoutRotateLoops, true, \
504 "Allow back branches to be fall throughs in the block layour") \
506 C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
508 #endif // SHARE_VM_OPTO_C2_GLOBALS_HPP