Tue, 02 Nov 2010 09:00:37 -0700
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
Summary: Use hardware DIV instruction for long division by constant when it is faster than code with multiply.
Reviewed-by: never
1 /*
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 class VM_Version: public Abstract_VM_Version {
26 protected:
27 enum Feature_Flag {
28 v8_instructions = 0,
29 hardware_mul32 = 1,
30 hardware_div32 = 2,
31 hardware_fsmuld = 3,
32 hardware_popc = 4,
33 v9_instructions = 5,
34 vis1_instructions = 6,
35 vis2_instructions = 7,
36 sun4v_instructions = 8,
37 blk_init_instructions = 9,
38 fmaf_instructions = 10
39 };
41 enum Feature_Flag_Set {
42 unknown_m = 0,
43 all_features_m = -1,
45 v8_instructions_m = 1 << v8_instructions,
46 hardware_mul32_m = 1 << hardware_mul32,
47 hardware_div32_m = 1 << hardware_div32,
48 hardware_fsmuld_m = 1 << hardware_fsmuld,
49 hardware_popc_m = 1 << hardware_popc,
50 v9_instructions_m = 1 << v9_instructions,
51 vis1_instructions_m = 1 << vis1_instructions,
52 vis2_instructions_m = 1 << vis2_instructions,
53 sun4v_m = 1 << sun4v_instructions,
54 blk_init_instructions_m = 1 << blk_init_instructions,
55 fmaf_instructions_m = 1 << fmaf_instructions,
57 generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
58 generic_v9_m = generic_v8_m | v9_instructions_m,
59 ultra3_m = generic_v9_m | vis1_instructions_m | vis2_instructions_m,
61 // Temporary until we have something more accurate
62 niagara1_unique_m = sun4v_m,
63 niagara1_m = generic_v9_m | niagara1_unique_m
64 };
66 static int _features;
67 static const char* _features_str;
69 static void print_features();
70 static int determine_features();
71 static int platform_features(int features);
73 static bool is_niagara1(int features) { return (features & sun4v_m) != 0; }
74 static bool is_sparc64(int features) { return (features & fmaf_instructions_m) != 0; }
76 static int maximum_niagara1_processor_count() { return 32; }
77 // Returns true if the platform is in the niagara line and
78 // newer than the niagara1.
79 static bool is_niagara1_plus();
81 public:
82 // Initialization
83 static void initialize();
85 // Instruction support
86 static bool has_v8() { return (_features & v8_instructions_m) != 0; }
87 static bool has_v9() { return (_features & v9_instructions_m) != 0; }
88 static bool has_hardware_mul32() { return (_features & hardware_mul32_m) != 0; }
89 static bool has_hardware_div32() { return (_features & hardware_div32_m) != 0; }
90 static bool has_hardware_fsmuld() { return (_features & hardware_fsmuld_m) != 0; }
91 static bool has_hardware_popc() { return (_features & hardware_popc_m) != 0; }
92 static bool has_vis1() { return (_features & vis1_instructions_m) != 0; }
93 static bool has_vis2() { return (_features & vis2_instructions_m) != 0; }
94 static bool has_blk_init() { return (_features & blk_init_instructions_m) != 0; }
96 static bool supports_compare_and_exchange()
97 { return has_v9(); }
99 static bool is_ultra3() { return (_features & ultra3_m) == ultra3_m; }
100 static bool is_sun4v() { return (_features & sun4v_m) != 0; }
101 static bool is_niagara1() { return is_niagara1(_features); }
102 static bool is_sparc64() { return is_sparc64(_features); }
104 static bool has_fast_fxtof() { return has_v9() && !is_ultra3(); }
105 static bool has_fast_idiv() { return is_niagara1_plus() || is_sparc64(); }
107 static const char* cpu_features() { return _features_str; }
109 static intx L1_data_cache_line_size() {
110 return 64; // default prefetch block size on sparc
111 }
113 // Prefetch
114 static intx prefetch_copy_interval_in_bytes() {
115 intx interval = PrefetchCopyIntervalInBytes;
116 return interval >= 0 ? interval : (has_v9() ? 512 : 0);
117 }
118 static intx prefetch_scan_interval_in_bytes() {
119 intx interval = PrefetchScanIntervalInBytes;
120 return interval >= 0 ? interval : (has_v9() ? 512 : 0);
121 }
122 static intx prefetch_fields_ahead() {
123 intx count = PrefetchFieldsAhead;
124 return count >= 0 ? count : (is_ultra3() ? 1 : 0);
125 }
127 static intx allocate_prefetch_distance() {
128 // This method should be called before allocate_prefetch_style().
129 intx count = AllocatePrefetchDistance;
130 if (count < 0) { // default is not defined ?
131 count = 512;
132 }
133 return count;
134 }
135 static intx allocate_prefetch_style() {
136 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
137 // Return 0 if AllocatePrefetchDistance was not defined.
138 return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
139 }
141 // Legacy
142 static bool v8_instructions_work() { return has_v8() && !has_v9(); }
143 static bool v9_instructions_work() { return has_v9(); }
145 // Assembler testing
146 static void allow_all();
147 static void revert();
149 // Override the Abstract_VM_Version implementation.
150 static uint page_size_count() { return is_sun4v() ? 4 : 2; }
152 // Calculates the number of parallel threads
153 static unsigned int calc_parallel_worker_threads();
154 };