Tue, 02 Nov 2010 09:00:37 -0700
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
Summary: Use hardware DIV instruction for long division by constant when it is faster than code with multiply.
Reviewed-by: never
1 /*
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 # include "incls/_precompiled.incl"
26 # include "incls/_vm_version_sparc.cpp.incl"
28 int VM_Version::_features = VM_Version::unknown_m;
29 const char* VM_Version::_features_str = "";
31 bool VM_Version::is_niagara1_plus() {
32 // This is a placeholder until the real test is determined.
33 return is_niagara1() &&
34 (os::processor_count() > maximum_niagara1_processor_count());
35 }
37 void VM_Version::initialize() {
38 _features = determine_features();
39 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
40 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
41 PrefetchFieldsAhead = prefetch_fields_ahead();
43 // Allocation prefetch settings
44 intx cache_line_size = L1_data_cache_line_size();
45 if( cache_line_size > AllocatePrefetchStepSize )
46 AllocatePrefetchStepSize = cache_line_size;
47 if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
48 AllocatePrefetchLines = 3; // Optimistic value
49 assert( AllocatePrefetchLines > 0, "invalid value");
50 if( AllocatePrefetchLines < 1 ) // set valid value in product VM
51 AllocatePrefetchLines = 1; // Conservative value
53 AllocatePrefetchDistance = allocate_prefetch_distance();
54 AllocatePrefetchStyle = allocate_prefetch_style();
56 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
58 UseSSE = 0; // Only on x86 and x64
60 _supports_cx8 = has_v9();
62 if (is_niagara1()) {
63 // Indirect branch is the same cost as direct
64 if (FLAG_IS_DEFAULT(UseInlineCaches)) {
65 FLAG_SET_DEFAULT(UseInlineCaches, false);
66 }
67 #ifdef _LP64
68 // 32-bit oops don't make sense for the 64-bit VM on sparc
69 // since the 32-bit VM has the same registers and smaller objects.
70 Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
71 #endif // _LP64
72 #ifdef COMPILER2
73 // Indirect branch is the same cost as direct
74 if (FLAG_IS_DEFAULT(UseJumpTables)) {
75 FLAG_SET_DEFAULT(UseJumpTables, true);
76 }
77 // Single-issue, so entry and loop tops are
78 // aligned on a single instruction boundary
79 if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
80 FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
81 }
82 if (is_niagara1_plus()) {
83 if (has_blk_init() && AllocatePrefetchStyle > 0 &&
84 FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
85 // Use BIS instruction for allocation prefetch.
86 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
87 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
88 // Use smaller prefetch distance on N2 with BIS
89 FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
90 }
91 }
92 if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
93 // Use different prefetch distance without BIS
94 FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
95 }
96 }
97 #endif
98 if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
99 FLAG_SET_DEFAULT(OptoLoopAlignment, 4);
100 }
101 // When using CMS, we cannot use memset() in BOT updates because
102 // the sun4v/CMT version in libc_psr uses BIS which exposes
103 // "phantom zeros" to concurrent readers. See 6948537.
104 if (FLAG_IS_DEFAULT(UseMemSetInBOT) && UseConcMarkSweepGC) {
105 FLAG_SET_DEFAULT(UseMemSetInBOT, false);
106 }
107 }
109 // Use hardware population count instruction if available.
110 if (has_hardware_popc()) {
111 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
112 FLAG_SET_DEFAULT(UsePopCountInstruction, true);
113 }
114 }
116 #ifdef COMPILER2
117 // Currently not supported anywhere.
118 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
119 #endif
121 char buf[512];
122 jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
123 (has_v8() ? ", has_v8" : ""),
124 (has_v9() ? ", has_v9" : ""),
125 (has_hardware_popc() ? ", popc" : ""),
126 (has_vis1() ? ", has_vis1" : ""),
127 (has_vis2() ? ", has_vis2" : ""),
128 (has_blk_init() ? ", has_blk_init" : ""),
129 (is_ultra3() ? ", is_ultra3" : ""),
130 (is_sun4v() ? ", is_sun4v" : ""),
131 (is_niagara1() ? ", is_niagara1" : ""),
132 (is_niagara1_plus() ? ", is_niagara1_plus" : ""),
133 (is_sparc64() ? ", is_sparc64" : ""),
134 (!has_hardware_mul32() ? ", no-mul32" : ""),
135 (!has_hardware_div32() ? ", no-div32" : ""),
136 (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
138 // buf is started with ", " or is empty
139 _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf);
141 #ifndef PRODUCT
142 if (PrintMiscellaneous && Verbose) {
143 tty->print("Allocation: ");
144 if (AllocatePrefetchStyle <= 0) {
145 tty->print_cr("no prefetching");
146 } else {
147 if (AllocatePrefetchLines > 1) {
148 tty->print_cr("PREFETCH %d, %d lines of size %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
149 } else {
150 tty->print_cr("PREFETCH %d, one line", AllocatePrefetchDistance);
151 }
152 }
153 if (PrefetchCopyIntervalInBytes > 0) {
154 tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
155 }
156 if (PrefetchScanIntervalInBytes > 0) {
157 tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
158 }
159 if (PrefetchFieldsAhead > 0) {
160 tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
161 }
162 }
163 #endif // PRODUCT
164 }
166 void VM_Version::print_features() {
167 tty->print_cr("Version:%s", cpu_features());
168 }
170 int VM_Version::determine_features() {
171 if (UseV8InstrsOnly) {
172 NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-V8");)
173 return generic_v8_m;
174 }
176 int features = platform_features(unknown_m); // platform_features() is os_arch specific
178 if (features == unknown_m) {
179 features = generic_v9_m;
180 warning("Cannot recognize SPARC version. Default to V9");
181 }
183 if (UseNiagaraInstrs) {
184 if (is_niagara1(features)) {
185 // Happy to accomodate...
186 } else {
187 NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Niagara");)
188 features = niagara1_m;
189 }
190 } else {
191 if (is_niagara1(features) && !FLAG_IS_DEFAULT(UseNiagaraInstrs)) {
192 NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Not-Niagara");)
193 features &= ~niagara1_unique_m;
194 } else {
195 // Happy to accomodate...
196 }
197 }
199 return features;
200 }
202 static int saved_features = 0;
204 void VM_Version::allow_all() {
205 saved_features = _features;
206 _features = all_features_m;
207 }
209 void VM_Version::revert() {
210 _features = saved_features;
211 }
213 unsigned int VM_Version::calc_parallel_worker_threads() {
214 unsigned int result;
215 if (is_niagara1_plus()) {
216 result = nof_parallel_worker_threads(5, 16, 8);
217 } else {
218 result = nof_parallel_worker_threads(5, 8, 8);
219 }
220 return result;
221 }