Tue, 04 Feb 2020 18:13:14 +0800
Merge
goetz@6458 | 1 | /* |
gromero@9662 | 2 | * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. |
gromero@9662 | 3 | * Copyright 2012, 2018 SAP AG. All rights reserved. |
goetz@6458 | 4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
goetz@6458 | 5 | * |
goetz@6458 | 6 | * This code is free software; you can redistribute it and/or modify it |
goetz@6458 | 7 | * under the terms of the GNU General Public License version 2 only, as |
goetz@6458 | 8 | * published by the Free Software Foundation. |
goetz@6458 | 9 | * |
goetz@6458 | 10 | * This code is distributed in the hope that it will be useful, but WITHOUT |
goetz@6458 | 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
goetz@6458 | 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
goetz@6458 | 13 | * version 2 for more details (a copy is included in the LICENSE file that |
goetz@6458 | 14 | * accompanied this code). |
goetz@6458 | 15 | * |
goetz@6458 | 16 | * You should have received a copy of the GNU General Public License version |
goetz@6458 | 17 | * 2 along with this work; if not, write to the Free Software Foundation, |
goetz@6458 | 18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
goetz@6458 | 19 | * |
goetz@6458 | 20 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
goetz@6458 | 21 | * or visit www.oracle.com if you need additional information or have any |
goetz@6458 | 22 | * questions. |
goetz@6458 | 23 | * |
goetz@6458 | 24 | */ |
goetz@6458 | 25 | |
goetz@6458 | 26 | #include "precompiled.hpp" |
goetz@6511 | 27 | #include "asm/assembler.inline.hpp" |
goetz@6511 | 28 | #include "asm/macroAssembler.inline.hpp" |
goetz@6458 | 29 | #include "compiler/disassembler.hpp" |
goetz@6458 | 30 | #include "memory/resourceArea.hpp" |
goetz@6458 | 31 | #include "runtime/java.hpp" |
goetz@6458 | 32 | #include "runtime/stubCodeGenerator.hpp" |
goetz@6458 | 33 | #include "utilities/defaultStream.hpp" |
goetz@6458 | 34 | #include "vm_version_ppc.hpp" |
goetz@6458 | 35 | #ifdef TARGET_OS_FAMILY_aix |
goetz@6458 | 36 | # include "os_aix.inline.hpp" |
goetz@6458 | 37 | #endif |
goetz@6458 | 38 | #ifdef TARGET_OS_FAMILY_linux |
goetz@6458 | 39 | # include "os_linux.inline.hpp" |
goetz@6458 | 40 | #endif |
goetz@6458 | 41 | |
goetz@6458 | 42 | # include <sys/sysinfo.h> |
goetz@6458 | 43 | |
goetz@6458 | 44 | int VM_Version::_features = VM_Version::unknown_m; |
goetz@6458 | 45 | int VM_Version::_measured_cache_line_size = 128; // default value |
goetz@6458 | 46 | const char* VM_Version::_features_str = ""; |
goetz@6458 | 47 | bool VM_Version::_is_determine_features_test_running = false; |
gromero@9662 | 48 | uint64_t VM_Version::_dscr_val = 0; |
goetz@6458 | 49 | |
goetz@6458 | 50 | #define MSG(flag) \ |
goetz@6458 | 51 | if (flag && !FLAG_IS_DEFAULT(flag)) \ |
goetz@6458 | 52 | jio_fprintf(defaultStream::error_stream(), \ |
goetz@6458 | 53 | "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \ |
goetz@6458 | 54 | " -XX:+" #flag " will be disabled!\n"); |
goetz@6458 | 55 | |
goetz@6458 | 56 | void VM_Version::initialize() { |
goetz@6458 | 57 | |
goetz@6458 | 58 | // Test which instructions are supported and measure cache line size. |
goetz@6458 | 59 | determine_features(); |
goetz@6458 | 60 | |
goetz@6458 | 61 | // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. |
goetz@6458 | 62 | if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { |
gromero@9662 | 63 | if (VM_Version::has_lqarx()) { |
gromero@9662 | 64 | FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8); |
gromero@9662 | 65 | } else if (VM_Version::has_popcntw()) { |
goetz@6458 | 66 | FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7); |
goetz@6458 | 67 | } else if (VM_Version::has_cmpb()) { |
goetz@6458 | 68 | FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6); |
goetz@6458 | 69 | } else if (VM_Version::has_popcntb()) { |
goetz@6458 | 70 | FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 5); |
goetz@6458 | 71 | } else { |
goetz@6458 | 72 | FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0); |
goetz@6458 | 73 | } |
goetz@6458 | 74 | } |
goetz@6458 | 75 | guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 || |
gromero@9662 | 76 | PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 || |
gromero@9662 | 77 | PowerArchitecturePPC64 == 8, |
gromero@9662 | 78 | "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8"); |
gromero@9662 | 79 | |
gromero@9662 | 80 | // Power 8: Configure Data Stream Control Register. |
gromero@9662 | 81 | if (PowerArchitecturePPC64 >= 8) { |
gromero@9662 | 82 | config_dscr(); |
gromero@9662 | 83 | } |
goetz@6458 | 84 | |
goetz@6458 | 85 | if (!UseSIGTRAP) { |
goetz@6458 | 86 | MSG(TrapBasedICMissChecks); |
goetz@6458 | 87 | MSG(TrapBasedNotEntrantChecks); |
goetz@6458 | 88 | MSG(TrapBasedNullChecks); |
goetz@6458 | 89 | FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false); |
goetz@6458 | 90 | FLAG_SET_ERGO(bool, TrapBasedNullChecks, false); |
goetz@6458 | 91 | FLAG_SET_ERGO(bool, TrapBasedICMissChecks, false); |
goetz@6458 | 92 | } |
goetz@6458 | 93 | |
goetz@6458 | 94 | #ifdef COMPILER2 |
goetz@6490 | 95 | if (!UseSIGTRAP) { |
goetz@6490 | 96 | MSG(TrapBasedRangeChecks); |
goetz@6490 | 97 | FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false); |
goetz@6490 | 98 | } |
goetz@6490 | 99 | |
goetz@6458 | 100 | // On Power6 test for section size. |
goetz@6495 | 101 | if (PowerArchitecturePPC64 == 6) { |
goetz@6458 | 102 | determine_section_size(); |
goetz@6495 | 103 | // TODO: PPC port } else { |
goetz@6458 | 104 | // TODO: PPC port PdScheduling::power6SectorSize = 0x20; |
goetz@6495 | 105 | } |
goetz@6458 | 106 | |
goetz@6458 | 107 | MaxVectorSize = 8; |
goetz@6458 | 108 | #endif |
goetz@6458 | 109 | |
goetz@6458 | 110 | // Create and print feature-string. |
goetz@6495 | 111 | char buf[(num_features+1) * 16]; // Max 16 chars per feature. |
goetz@6458 | 112 | jio_snprintf(buf, sizeof(buf), |
ogatak@9713 | 113 | "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s", |
goetz@6458 | 114 | (has_fsqrt() ? " fsqrt" : ""), |
goetz@6458 | 115 | (has_isel() ? " isel" : ""), |
goetz@6458 | 116 | (has_lxarxeh() ? " lxarxeh" : ""), |
goetz@6458 | 117 | (has_cmpb() ? " cmpb" : ""), |
goetz@6458 | 118 | //(has_mftgpr()? " mftgpr" : ""), |
goetz@6458 | 119 | (has_popcntb() ? " popcntb" : ""), |
goetz@6458 | 120 | (has_popcntw() ? " popcntw" : ""), |
goetz@6458 | 121 | (has_fcfids() ? " fcfids" : ""), |
simonis@8608 | 122 | (has_vand() ? " vand" : ""), |
gromero@9662 | 123 | (has_lqarx() ? " lqarx" : ""), |
mdoerr@9497 | 124 | (has_vcipher() ? " aes" : ""), |
gromero@9662 | 125 | (has_vpmsumb() ? " vpmsumb" : ""), |
gromero@9662 | 126 | (has_mfdscr() ? " mfdscr" : ""), |
ogatak@9713 | 127 | (has_vsx() ? " vsx" : ""), |
ogatak@9713 | 128 | (has_vshasig() ? " sha" : "") |
goetz@6458 | 129 | // Make sure number of %s matches num_features! |
goetz@6458 | 130 | ); |
goetz@6458 | 131 | _features_str = strdup(buf); |
gromero@9662 | 132 | if (Verbose) { |
gromero@9662 | 133 | print_features(); |
gromero@9662 | 134 | } |
goetz@6458 | 135 | |
goetz@6458 | 136 | // PPC64 supports 8-byte compare-exchange operations (see |
goetz@6458 | 137 | // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) |
goetz@6458 | 138 | // and 'atomic long memory ops' (see Unsafe_GetLongVolatile). |
goetz@6458 | 139 | _supports_cx8 = true; |
goetz@6458 | 140 | |
goetz@6458 | 141 | UseSSE = 0; // Only on x86 and x64 |
goetz@6458 | 142 | |
goetz@6458 | 143 | intx cache_line_size = _measured_cache_line_size; |
goetz@6458 | 144 | |
goetz@6458 | 145 | if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; |
goetz@6458 | 146 | |
goetz@6458 | 147 | if (AllocatePrefetchStyle == 4) { |
goetz@6495 | 148 | AllocatePrefetchStepSize = cache_line_size; // Need exact value. |
goetz@6495 | 149 | if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default. |
goetz@6495 | 150 | if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined? |
goetz@6458 | 151 | } else { |
goetz@6458 | 152 | if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; |
goetz@6495 | 153 | if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value. |
goetz@6495 | 154 | if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined? |
goetz@6458 | 155 | } |
goetz@6458 | 156 | |
goetz@6458 | 157 | assert(AllocatePrefetchLines > 0, "invalid value"); |
goetz@7424 | 158 | if (AllocatePrefetchLines < 1) { // Set valid value in product VM. |
goetz@6495 | 159 | AllocatePrefetchLines = 1; // Conservative value. |
goetz@7424 | 160 | } |
goetz@6458 | 161 | |
goetz@7424 | 162 | if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) { |
goetz@6495 | 163 | AllocatePrefetchStyle = 1; // Fall back if inappropriate. |
goetz@7424 | 164 | } |
goetz@6458 | 165 | |
goetz@6458 | 166 | assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); |
goetz@7424 | 167 | |
gromero@9496 | 168 | // Implementation does not use any of the vector instructions |
gromero@9496 | 169 | // available with Power8. Their exploitation is still pending. |
gromero@9496 | 170 | if (!UseCRC32Intrinsics) { |
gromero@9496 | 171 | if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { |
gromero@9496 | 172 | FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); |
gromero@9496 | 173 | } |
goetz@7424 | 174 | } |
goetz@7424 | 175 | |
goetz@7424 | 176 | // The AES intrinsic stubs require AES instruction support. |
simonis@8608 | 177 | if (has_vcipher()) { |
simonis@8608 | 178 | if (FLAG_IS_DEFAULT(UseAES)) { |
simonis@8608 | 179 | UseAES = true; |
simonis@8608 | 180 | } |
simonis@8608 | 181 | } else if (UseAES) { |
simonis@8608 | 182 | if (!FLAG_IS_DEFAULT(UseAES)) |
simonis@8608 | 183 | warning("AES instructions are not available on this CPU"); |
simonis@8608 | 184 | FLAG_SET_DEFAULT(UseAES, false); |
simonis@8608 | 185 | } |
simonis@8608 | 186 | |
simonis@8608 | 187 | if (UseAES && has_vcipher()) { |
simonis@8608 | 188 | if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { |
simonis@8608 | 189 | UseAESIntrinsics = true; |
simonis@8608 | 190 | } |
simonis@8608 | 191 | } else if (UseAESIntrinsics) { |
simonis@8608 | 192 | if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) |
simonis@8608 | 193 | warning("AES intrinsics are not available on this CPU"); |
simonis@8608 | 194 | FLAG_SET_DEFAULT(UseAESIntrinsics, false); |
simonis@8608 | 195 | } |
simonis@8608 | 196 | |
ascarpino@9788 | 197 | if (UseGHASHIntrinsics) { |
ascarpino@9788 | 198 | warning("GHASH intrinsics are not available on this CPU"); |
ascarpino@9788 | 199 | FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); |
ascarpino@9788 | 200 | } |
ascarpino@9788 | 201 | |
ogatak@9713 | 202 | if (has_vshasig()) { |
ogatak@9713 | 203 | if (FLAG_IS_DEFAULT(UseSHA)) { |
ogatak@9713 | 204 | UseSHA = true; |
ogatak@9713 | 205 | } |
ogatak@9713 | 206 | } else if (UseSHA) { |
ogatak@9713 | 207 | if (!FLAG_IS_DEFAULT(UseSHA)) |
ogatak@9713 | 208 | warning("SHA instructions are not available on this CPU"); |
goetz@7424 | 209 | FLAG_SET_DEFAULT(UseSHA, false); |
goetz@7424 | 210 | } |
ogatak@9713 | 211 | |
ogatak@9713 | 212 | if (UseSHA1Intrinsics) { |
ogatak@9713 | 213 | warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); |
goetz@7424 | 214 | FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); |
ogatak@9713 | 215 | } |
ogatak@9713 | 216 | |
ogatak@9713 | 217 | if (UseSHA && has_vshasig()) { |
ogatak@9713 | 218 | if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { |
ogatak@9713 | 219 | FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); |
ogatak@9713 | 220 | } |
ogatak@9713 | 221 | } else if (UseSHA256Intrinsics) { |
ogatak@9713 | 222 | warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); |
goetz@7424 | 223 | FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); |
ogatak@9713 | 224 | } |
ogatak@9713 | 225 | |
ogatak@9713 | 226 | if (UseSHA && has_vshasig()) { |
ogatak@9713 | 227 | if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { |
ogatak@9713 | 228 | FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); |
ogatak@9713 | 229 | } |
ogatak@9713 | 230 | } else if (UseSHA512Intrinsics) { |
ogatak@9713 | 231 | warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); |
goetz@7424 | 232 | FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); |
goetz@7424 | 233 | } |
goetz@7424 | 234 | |
ogatak@9713 | 235 | if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { |
ogatak@9713 | 236 | FLAG_SET_DEFAULT(UseSHA, false); |
ogatak@9713 | 237 | } |
ogatak@9713 | 238 | |
mdoerr@8903 | 239 | if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { |
mdoerr@8903 | 240 | UseMontgomeryMultiplyIntrinsic = true; |
mdoerr@8903 | 241 | } |
mdoerr@8903 | 242 | if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { |
mdoerr@8903 | 243 | UseMontgomerySquareIntrinsic = true; |
mdoerr@8903 | 244 | } |
goetz@6458 | 245 | } |
goetz@6458 | 246 | |
goetz@6458 | 247 | void VM_Version::print_features() { |
drchase@6680 | 248 | tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size()); |
goetz@6458 | 249 | } |
goetz@6458 | 250 | |
goetz@6458 | 251 | #ifdef COMPILER2 |
goetz@6458 | 252 | // Determine section size on power6: If section size is 8 instructions, |
goetz@6458 | 253 | // there should be a difference between the two testloops of ~15 %. If |
goetz@6458 | 254 | // no difference is detected the section is assumed to be 32 instructions. |
goetz@6458 | 255 | void VM_Version::determine_section_size() { |
goetz@6458 | 256 | |
goetz@6458 | 257 | int unroll = 80; |
goetz@6458 | 258 | |
goetz@6458 | 259 | const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; |
goetz@6458 | 260 | |
goetz@6495 | 261 | // Allocate space for the code. |
goetz@6458 | 262 | ResourceMark rm; |
goetz@6458 | 263 | CodeBuffer cb("detect_section_size", code_size, 0); |
goetz@6458 | 264 | MacroAssembler* a = new MacroAssembler(&cb); |
goetz@6458 | 265 | |
goetz@6458 | 266 | uint32_t *code = (uint32_t *)a->pc(); |
goetz@6495 | 267 | // Emit code. |
goetz@6511 | 268 | void (*test1)() = (void(*)())(void *)a->function_entry(); |
goetz@6458 | 269 | |
goetz@6458 | 270 | Label l1; |
goetz@6458 | 271 | |
goetz@6458 | 272 | a->li(R4, 1); |
goetz@6458 | 273 | a->sldi(R4, R4, 28); |
goetz@6458 | 274 | a->b(l1); |
goetz@6458 | 275 | a->align(CodeEntryAlignment); |
goetz@6458 | 276 | |
goetz@6458 | 277 | a->bind(l1); |
goetz@6458 | 278 | |
goetz@6458 | 279 | for (int i = 0; i < unroll; i++) { |
goetz@6458 | 280 | // Schleife 1 |
goetz@6458 | 281 | // ------- sector 0 ------------ |
goetz@6458 | 282 | // ;; 0 |
goetz@6458 | 283 | a->nop(); // 1 |
goetz@6458 | 284 | a->fpnop0(); // 2 |
goetz@6458 | 285 | a->fpnop1(); // 3 |
goetz@6458 | 286 | a->addi(R4,R4, -1); // 4 |
goetz@6458 | 287 | |
goetz@6458 | 288 | // ;; 1 |
goetz@6458 | 289 | a->nop(); // 5 |
goetz@6495 | 290 | a->fmr(F6, F6); // 6 |
goetz@6495 | 291 | a->fmr(F7, F7); // 7 |
goetz@6458 | 292 | a->endgroup(); // 8 |
goetz@6458 | 293 | // ------- sector 8 ------------ |
goetz@6458 | 294 | |
goetz@6458 | 295 | // ;; 2 |
goetz@6458 | 296 | a->nop(); // 9 |
goetz@6458 | 297 | a->nop(); // 10 |
goetz@6495 | 298 | a->fmr(F8, F8); // 11 |
goetz@6495 | 299 | a->fmr(F9, F9); // 12 |
goetz@6458 | 300 | |
goetz@6458 | 301 | // ;; 3 |
goetz@6458 | 302 | a->nop(); // 13 |
goetz@6495 | 303 | a->fmr(F10, F10); // 14 |
goetz@6495 | 304 | a->fmr(F11, F11); // 15 |
goetz@6458 | 305 | a->endgroup(); // 16 |
goetz@6458 | 306 | // -------- sector 16 ------------- |
goetz@6458 | 307 | |
goetz@6458 | 308 | // ;; 4 |
goetz@6458 | 309 | a->nop(); // 17 |
goetz@6458 | 310 | a->nop(); // 18 |
goetz@6495 | 311 | a->fmr(F15, F15); // 19 |
goetz@6495 | 312 | a->fmr(F16, F16); // 20 |
goetz@6458 | 313 | |
goetz@6458 | 314 | // ;; 5 |
goetz@6458 | 315 | a->nop(); // 21 |
goetz@6495 | 316 | a->fmr(F17, F17); // 22 |
goetz@6495 | 317 | a->fmr(F18, F18); // 23 |
goetz@6458 | 318 | a->endgroup(); // 24 |
goetz@6458 | 319 | // ------- sector 24 ------------ |
goetz@6458 | 320 | |
goetz@6458 | 321 | // ;; 6 |
goetz@6458 | 322 | a->nop(); // 25 |
goetz@6458 | 323 | a->nop(); // 26 |
goetz@6495 | 324 | a->fmr(F19, F19); // 27 |
goetz@6495 | 325 | a->fmr(F20, F20); // 28 |
goetz@6458 | 326 | |
goetz@6458 | 327 | // ;; 7 |
goetz@6458 | 328 | a->nop(); // 29 |
goetz@6495 | 329 | a->fmr(F21, F21); // 30 |
goetz@6495 | 330 | a->fmr(F22, F22); // 31 |
goetz@6458 | 331 | a->brnop0(); // 32 |
goetz@6458 | 332 | |
goetz@6458 | 333 | // ------- sector 32 ------------ |
goetz@6458 | 334 | } |
goetz@6458 | 335 | |
goetz@6458 | 336 | // ;; 8 |
goetz@6495 | 337 | a->cmpdi(CCR0, R4, unroll); // 33 |
goetz@6495 | 338 | a->bge(CCR0, l1); // 34 |
goetz@6458 | 339 | a->blr(); |
goetz@6458 | 340 | |
goetz@6495 | 341 | // Emit code. |
goetz@6511 | 342 | void (*test2)() = (void(*)())(void *)a->function_entry(); |
goetz@6458 | 343 | // uint32_t *code = (uint32_t *)a->pc(); |
goetz@6458 | 344 | |
goetz@6458 | 345 | Label l2; |
goetz@6458 | 346 | |
goetz@6458 | 347 | a->li(R4, 1); |
goetz@6458 | 348 | a->sldi(R4, R4, 28); |
goetz@6458 | 349 | a->b(l2); |
goetz@6458 | 350 | a->align(CodeEntryAlignment); |
goetz@6458 | 351 | |
goetz@6458 | 352 | a->bind(l2); |
goetz@6458 | 353 | |
goetz@6458 | 354 | for (int i = 0; i < unroll; i++) { |
goetz@6458 | 355 | // Schleife 2 |
goetz@6458 | 356 | // ------- sector 0 ------------ |
goetz@6458 | 357 | // ;; 0 |
goetz@6458 | 358 | a->brnop0(); // 1 |
goetz@6458 | 359 | a->nop(); // 2 |
goetz@6458 | 360 | //a->cmpdi(CCR0, R4, unroll); |
goetz@6458 | 361 | a->fpnop0(); // 3 |
goetz@6458 | 362 | a->fpnop1(); // 4 |
goetz@6458 | 363 | a->addi(R4,R4, -1); // 5 |
goetz@6458 | 364 | |
goetz@6458 | 365 | // ;; 1 |
goetz@6458 | 366 | |
goetz@6458 | 367 | a->nop(); // 6 |
goetz@6458 | 368 | a->fmr(F6, F6); // 7 |
goetz@6458 | 369 | a->fmr(F7, F7); // 8 |
goetz@6458 | 370 | // ------- sector 8 --------------- |
goetz@6458 | 371 | |
goetz@6458 | 372 | // ;; 2 |
goetz@6458 | 373 | a->endgroup(); // 9 |
goetz@6458 | 374 | |
goetz@6458 | 375 | // ;; 3 |
goetz@6458 | 376 | a->nop(); // 10 |
goetz@6458 | 377 | a->nop(); // 11 |
goetz@6458 | 378 | a->fmr(F8, F8); // 12 |
goetz@6458 | 379 | |
goetz@6458 | 380 | // ;; 4 |
goetz@6458 | 381 | a->fmr(F9, F9); // 13 |
goetz@6458 | 382 | a->nop(); // 14 |
goetz@6458 | 383 | a->fmr(F10, F10); // 15 |
goetz@6458 | 384 | |
goetz@6458 | 385 | // ;; 5 |
goetz@6458 | 386 | a->fmr(F11, F11); // 16 |
goetz@6458 | 387 | // -------- sector 16 ------------- |
goetz@6458 | 388 | |
goetz@6458 | 389 | // ;; 6 |
goetz@6458 | 390 | a->endgroup(); // 17 |
goetz@6458 | 391 | |
goetz@6458 | 392 | // ;; 7 |
goetz@6458 | 393 | a->nop(); // 18 |
goetz@6458 | 394 | a->nop(); // 19 |
goetz@6458 | 395 | a->fmr(F15, F15); // 20 |
goetz@6458 | 396 | |
goetz@6458 | 397 | // ;; 8 |
goetz@6458 | 398 | a->fmr(F16, F16); // 21 |
goetz@6458 | 399 | a->nop(); // 22 |
goetz@6458 | 400 | a->fmr(F17, F17); // 23 |
goetz@6458 | 401 | |
goetz@6458 | 402 | // ;; 9 |
goetz@6458 | 403 | a->fmr(F18, F18); // 24 |
goetz@6458 | 404 | // -------- sector 24 ------------- |
goetz@6458 | 405 | |
goetz@6458 | 406 | // ;; 10 |
goetz@6458 | 407 | a->endgroup(); // 25 |
goetz@6458 | 408 | |
goetz@6458 | 409 | // ;; 11 |
goetz@6458 | 410 | a->nop(); // 26 |
goetz@6458 | 411 | a->nop(); // 27 |
goetz@6458 | 412 | a->fmr(F19, F19); // 28 |
goetz@6458 | 413 | |
goetz@6458 | 414 | // ;; 12 |
goetz@6458 | 415 | a->fmr(F20, F20); // 29 |
goetz@6458 | 416 | a->nop(); // 30 |
goetz@6458 | 417 | a->fmr(F21, F21); // 31 |
goetz@6458 | 418 | |
goetz@6458 | 419 | // ;; 13 |
goetz@6458 | 420 | a->fmr(F22, F22); // 32 |
goetz@6458 | 421 | } |
goetz@6458 | 422 | |
goetz@6458 | 423 | // -------- sector 32 ------------- |
goetz@6458 | 424 | // ;; 14 |
goetz@6458 | 425 | a->cmpdi(CCR0, R4, unroll); // 33 |
goetz@6458 | 426 | a->bge(CCR0, l2); // 34 |
goetz@6458 | 427 | |
goetz@6458 | 428 | a->blr(); |
goetz@6458 | 429 | uint32_t *code_end = (uint32_t *)a->pc(); |
goetz@6458 | 430 | a->flush(); |
goetz@6458 | 431 | |
goetz@6458 | 432 | double loop1_seconds,loop2_seconds, rel_diff; |
goetz@6458 | 433 | uint64_t start1, stop1; |
goetz@6458 | 434 | |
goetz@6458 | 435 | start1 = os::current_thread_cpu_time(false); |
goetz@6458 | 436 | (*test1)(); |
goetz@6458 | 437 | stop1 = os::current_thread_cpu_time(false); |
goetz@6458 | 438 | loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0); |
goetz@6458 | 439 | |
goetz@6458 | 440 | |
goetz@6458 | 441 | start1 = os::current_thread_cpu_time(false); |
goetz@6458 | 442 | (*test2)(); |
goetz@6458 | 443 | stop1 = os::current_thread_cpu_time(false); |
goetz@6458 | 444 | |
goetz@6458 | 445 | loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0); |
goetz@6458 | 446 | |
goetz@6458 | 447 | rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100; |
goetz@6458 | 448 | |
goetz@6458 | 449 | if (PrintAssembly) { |
goetz@6458 | 450 | ttyLocker ttyl; |
coleenp@7358 | 451 | tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); |
goetz@6458 | 452 | Disassembler::decode((u_char*)code, (u_char*)code_end, tty); |
goetz@6458 | 453 | tty->print_cr("Time loop1 :%f", loop1_seconds); |
goetz@6458 | 454 | tty->print_cr("Time loop2 :%f", loop2_seconds); |
goetz@6458 | 455 | tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff); |
goetz@6458 | 456 | |
goetz@6458 | 457 | if (rel_diff > 12.0) { |
goetz@6458 | 458 | tty->print_cr("Section Size 8 Instructions"); |
goetz@6458 | 459 | } else{ |
goetz@6458 | 460 | tty->print_cr("Section Size 32 Instructions or Power5"); |
goetz@6458 | 461 | } |
goetz@6458 | 462 | } |
goetz@6458 | 463 | |
goetz@6458 | 464 | #if 0 // TODO: PPC port |
goetz@6458 | 465 | // Set sector size (if not set explicitly). |
goetz@6458 | 466 | if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) { |
goetz@6458 | 467 | if (rel_diff > 12.0) { |
goetz@6458 | 468 | PdScheduling::power6SectorSize = 0x20; |
goetz@6458 | 469 | } else { |
goetz@6458 | 470 | PdScheduling::power6SectorSize = 0x80; |
goetz@6458 | 471 | } |
goetz@6458 | 472 | } else if (Power6SectorSize128PPC64) { |
goetz@6458 | 473 | PdScheduling::power6SectorSize = 0x80; |
goetz@6458 | 474 | } else { |
goetz@6458 | 475 | PdScheduling::power6SectorSize = 0x20; |
goetz@6458 | 476 | } |
goetz@6458 | 477 | #endif |
goetz@6458 | 478 | if (UsePower6SchedulerPPC64) Unimplemented(); |
goetz@6458 | 479 | } |
goetz@6458 | 480 | #endif // COMPILER2 |
goetz@6458 | 481 | |
goetz@6458 | 482 | void VM_Version::determine_features() { |
goetz@6511 | 483 | #if defined(ABI_ELFv2) |
goetz@6511 | 484 | const int code_size = (num_features+1+2*7)*BytesPerInstWord; // TODO(asmundak): calculation is incorrect. |
goetz@6511 | 485 | #else |
goetz@6495 | 486 | // 7 InstWords for each call (function descriptor + blr instruction). |
goetz@6495 | 487 | const int code_size = (num_features+1+2*7)*BytesPerInstWord; |
goetz@6511 | 488 | #endif |
goetz@6458 | 489 | int features = 0; |
goetz@6458 | 490 | |
goetz@6458 | 491 | // create test area |
goetz@6495 | 492 | enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size). |
goetz@6458 | 493 | char test_area[BUFFER_SIZE]; |
goetz@6458 | 494 | char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; |
goetz@6458 | 495 | |
goetz@6495 | 496 | // Allocate space for the code. |
goetz@6458 | 497 | ResourceMark rm; |
goetz@6458 | 498 | CodeBuffer cb("detect_cpu_features", code_size, 0); |
goetz@6458 | 499 | MacroAssembler* a = new MacroAssembler(&cb); |
goetz@6458 | 500 | |
goetz@6515 | 501 | // Must be set to true so we can generate the test code. |
goetz@6515 | 502 | _features = VM_Version::all_features_m; |
goetz@6515 | 503 | |
goetz@6495 | 504 | // Emit code. |
goetz@6511 | 505 | void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry(); |
goetz@6458 | 506 | uint32_t *code = (uint32_t *)a->pc(); |
goetz@6458 | 507 | // Don't use R0 in ldarx. |
goetz@6495 | 508 | // Keep R3_ARG1 unmodified, it contains &field (see below). |
goetz@6495 | 509 | // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). |
goetz@6495 | 510 | a->fsqrt(F3, F4); // code[0] -> fsqrt_m |
goetz@6515 | 511 | a->fsqrts(F3, F4); // code[1] -> fsqrts_m |
goetz@6515 | 512 | a->isel(R7, R5, R6, 0); // code[2] -> isel_m |
goetz@6515 | 513 | a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m |
goetz@6515 | 514 | a->cmpb(R7, R5, R6); // code[4] -> bcmp |
goetz@6515 | 515 | //a->mftgpr(R7, F3); // code[5] -> mftgpr |
goetz@6515 | 516 | a->popcntb(R7, R5); // code[6] -> popcntb |
goetz@6515 | 517 | a->popcntw(R7, R5); // code[7] -> popcntw |
goetz@6515 | 518 | a->fcfids(F3, F4); // code[8] -> fcfids |
goetz@6515 | 519 | a->vand(VR0, VR0, VR0); // code[9] -> vand |
gromero@9662 | 520 | a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[10] -> lqarx_m |
gromero@9662 | 521 | a->vcipher(VR0, VR1, VR2); // code[11] -> vcipher |
gromero@9662 | 522 | a->vpmsumb(VR0, VR1, VR2); // code[12] -> vpmsumb |
gromero@9662 | 523 | a->mfdscr(R0); // code[13] -> mfdscr |
gromero@9684 | 524 | a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx |
ogatak@9713 | 525 | a->vshasigmaw(VR0, VR1, 1, 0xF); // code[15] -> vshasig |
goetz@6458 | 526 | a->blr(); |
goetz@6458 | 527 | |
goetz@6495 | 528 | // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. |
goetz@6511 | 529 | void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry(); |
goetz@6495 | 530 | a->dcbz(R3_ARG1); // R3_ARG1 = addr |
goetz@6458 | 531 | a->blr(); |
goetz@6458 | 532 | |
goetz@6458 | 533 | uint32_t *code_end = (uint32_t *)a->pc(); |
goetz@6458 | 534 | a->flush(); |
goetz@6515 | 535 | _features = VM_Version::unknown_m; |
goetz@6458 | 536 | |
goetz@6458 | 537 | // Print the detection code. |
goetz@6458 | 538 | if (PrintAssembly) { |
goetz@6458 | 539 | ttyLocker ttyl; |
coleenp@7358 | 540 | tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); |
goetz@6458 | 541 | Disassembler::decode((u_char*)code, (u_char*)code_end, tty); |
goetz@6458 | 542 | } |
goetz@6458 | 543 | |
goetz@6458 | 544 | // Measure cache line size. |
goetz@6495 | 545 | memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF. |
goetz@6495 | 546 | (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle. |
goetz@6458 | 547 | int count = 0; // count zeroed bytes |
goetz@6458 | 548 | for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; |
goetz@6458 | 549 | guarantee(is_power_of_2(count), "cache line size needs to be a power of 2"); |
goetz@6458 | 550 | _measured_cache_line_size = count; |
goetz@6458 | 551 | |
goetz@6458 | 552 | // Execute code. Illegal instructions will be replaced by 0 in the signal handler. |
goetz@6458 | 553 | VM_Version::_is_determine_features_test_running = true; |
goetz@6458 | 554 | (*test)((address)mid_of_test_area, (uint64_t)0); |
goetz@6458 | 555 | VM_Version::_is_determine_features_test_running = false; |
goetz@6458 | 556 | |
goetz@6458 | 557 | // determine which instructions are legal. |
goetz@6458 | 558 | int feature_cntr = 0; |
goetz@6458 | 559 | if (code[feature_cntr++]) features |= fsqrt_m; |
goetz@6515 | 560 | if (code[feature_cntr++]) features |= fsqrts_m; |
goetz@6458 | 561 | if (code[feature_cntr++]) features |= isel_m; |
goetz@6458 | 562 | if (code[feature_cntr++]) features |= lxarxeh_m; |
goetz@6458 | 563 | if (code[feature_cntr++]) features |= cmpb_m; |
goetz@6458 | 564 | //if(code[feature_cntr++])features |= mftgpr_m; |
goetz@6458 | 565 | if (code[feature_cntr++]) features |= popcntb_m; |
goetz@6458 | 566 | if (code[feature_cntr++]) features |= popcntw_m; |
goetz@6458 | 567 | if (code[feature_cntr++]) features |= fcfids_m; |
goetz@6458 | 568 | if (code[feature_cntr++]) features |= vand_m; |
gromero@9662 | 569 | if (code[feature_cntr++]) features |= lqarx_m; |
simonis@8608 | 570 | if (code[feature_cntr++]) features |= vcipher_m; |
mdoerr@9497 | 571 | if (code[feature_cntr++]) features |= vpmsumb_m; |
gromero@9662 | 572 | if (code[feature_cntr++]) features |= mfdscr_m; |
gromero@9662 | 573 | if (code[feature_cntr++]) features |= vsx_m; |
ogatak@9713 | 574 | if (code[feature_cntr++]) features |= vshasig_m; |
goetz@6458 | 575 | |
goetz@6458 | 576 | // Print the detection code. |
goetz@6458 | 577 | if (PrintAssembly) { |
goetz@6458 | 578 | ttyLocker ttyl; |
coleenp@7358 | 579 | tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code)); |
goetz@6458 | 580 | Disassembler::decode((u_char*)code, (u_char*)code_end, tty); |
goetz@6458 | 581 | } |
goetz@6458 | 582 | |
goetz@6458 | 583 | _features = features; |
goetz@6458 | 584 | } |
goetz@6458 | 585 | |
gromero@9662 | 586 | // Power 8: Configure Data Stream Control Register. |
gromero@9662 | 587 | void VM_Version::config_dscr() { |
gromero@9662 | 588 | assert(has_lqarx(), "Only execute on Power 8 or later!"); |
gromero@9662 | 589 | |
gromero@9662 | 590 | // 7 InstWords for each call (function descriptor + blr instruction). |
gromero@9662 | 591 | const int code_size = (2+2*7)*BytesPerInstWord; |
gromero@9662 | 592 | |
gromero@9662 | 593 | // Allocate space for the code. |
gromero@9662 | 594 | ResourceMark rm; |
gromero@9662 | 595 | CodeBuffer cb("config_dscr", code_size, 0); |
gromero@9662 | 596 | MacroAssembler* a = new MacroAssembler(&cb); |
gromero@9662 | 597 | |
gromero@9662 | 598 | // Emit code. |
gromero@9662 | 599 | uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry(); |
gromero@9662 | 600 | uint32_t *code = (uint32_t *)a->pc(); |
gromero@9662 | 601 | a->mfdscr(R3); |
gromero@9662 | 602 | a->blr(); |
gromero@9662 | 603 | |
gromero@9662 | 604 | void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry(); |
gromero@9662 | 605 | a->mtdscr(R3); |
gromero@9662 | 606 | a->blr(); |
gromero@9662 | 607 | |
gromero@9662 | 608 | uint32_t *code_end = (uint32_t *)a->pc(); |
gromero@9662 | 609 | a->flush(); |
gromero@9662 | 610 | |
gromero@9662 | 611 | // Print the detection code. |
gromero@9662 | 612 | if (PrintAssembly) { |
gromero@9662 | 613 | ttyLocker ttyl; |
gromero@9662 | 614 | tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", p2i(code)); |
gromero@9662 | 615 | Disassembler::decode((u_char*)code, (u_char*)code_end, tty); |
gromero@9662 | 616 | } |
gromero@9662 | 617 | |
gromero@9662 | 618 | // Apply the configuration if needed. |
gromero@9662 | 619 | _dscr_val = (*get_dscr)(); |
gromero@9662 | 620 | if (Verbose) { |
gromero@9662 | 621 | tty->print_cr("dscr value was 0x%lx" , _dscr_val); |
gromero@9662 | 622 | } |
gromero@9662 | 623 | bool change_requested = false; |
gromero@9662 | 624 | if (DSCR_PPC64 != (uintx)-1) { |
gromero@9662 | 625 | _dscr_val = DSCR_PPC64; |
gromero@9662 | 626 | change_requested = true; |
gromero@9662 | 627 | } |
gromero@9662 | 628 | if (DSCR_DPFD_PPC64 <= 7) { |
gromero@9662 | 629 | uint64_t mask = 0x7; |
gromero@9662 | 630 | if ((_dscr_val & mask) != DSCR_DPFD_PPC64) { |
gromero@9662 | 631 | _dscr_val = (_dscr_val & ~mask) | (DSCR_DPFD_PPC64); |
gromero@9662 | 632 | change_requested = true; |
gromero@9662 | 633 | } |
gromero@9662 | 634 | } |
gromero@9662 | 635 | if (DSCR_URG_PPC64 <= 7) { |
gromero@9662 | 636 | uint64_t mask = 0x7 << 6; |
gromero@9662 | 637 | if ((_dscr_val & mask) != DSCR_DPFD_PPC64 << 6) { |
gromero@9662 | 638 | _dscr_val = (_dscr_val & ~mask) | (DSCR_URG_PPC64 << 6); |
gromero@9662 | 639 | change_requested = true; |
gromero@9662 | 640 | } |
gromero@9662 | 641 | } |
gromero@9662 | 642 | if (change_requested) { |
gromero@9662 | 643 | (*set_dscr)(_dscr_val); |
gromero@9662 | 644 | if (Verbose) { |
gromero@9662 | 645 | tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)()); |
gromero@9662 | 646 | } |
gromero@9662 | 647 | } |
gromero@9662 | 648 | } |
goetz@6458 | 649 | |
goetz@6458 | 650 | static int saved_features = 0; |
goetz@6458 | 651 | |
goetz@6458 | 652 | void VM_Version::allow_all() { |
goetz@6458 | 653 | saved_features = _features; |
goetz@6458 | 654 | _features = all_features_m; |
goetz@6458 | 655 | } |
goetz@6458 | 656 | |
goetz@6458 | 657 | void VM_Version::revert() { |
goetz@6458 | 658 | _features = saved_features; |
goetz@6458 | 659 | } |