goetz@6458: /* gromero@9662: * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. gromero@9662: * Copyright 2012, 2018 SAP AG. All rights reserved. goetz@6458: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. goetz@6458: * goetz@6458: * This code is free software; you can redistribute it and/or modify it goetz@6458: * under the terms of the GNU General Public License version 2 only, as goetz@6458: * published by the Free Software Foundation. goetz@6458: * goetz@6458: * This code is distributed in the hope that it will be useful, but WITHOUT goetz@6458: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or goetz@6458: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License goetz@6458: * version 2 for more details (a copy is included in the LICENSE file that goetz@6458: * accompanied this code). goetz@6458: * goetz@6458: * You should have received a copy of the GNU General Public License version goetz@6458: * 2 along with this work; if not, write to the Free Software Foundation, goetz@6458: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. goetz@6458: * goetz@6458: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA goetz@6458: * or visit www.oracle.com if you need additional information or have any goetz@6458: * questions. goetz@6458: * goetz@6458: */ goetz@6458: goetz@6458: #include "precompiled.hpp" goetz@6511: #include "asm/assembler.inline.hpp" goetz@6511: #include "asm/macroAssembler.inline.hpp" goetz@6458: #include "compiler/disassembler.hpp" goetz@6458: #include "memory/resourceArea.hpp" goetz@6458: #include "runtime/java.hpp" goetz@6458: #include "runtime/stubCodeGenerator.hpp" goetz@6458: #include "utilities/defaultStream.hpp" goetz@6458: #include "vm_version_ppc.hpp" goetz@6458: #ifdef TARGET_OS_FAMILY_aix goetz@6458: # include "os_aix.inline.hpp" goetz@6458: #endif goetz@6458: #ifdef TARGET_OS_FAMILY_linux goetz@6458: # include "os_linux.inline.hpp" goetz@6458: #endif goetz@6458: goetz@6458: # include goetz@6458: goetz@6458: int VM_Version::_features = VM_Version::unknown_m; goetz@6458: int VM_Version::_measured_cache_line_size = 128; // default value goetz@6458: const char* VM_Version::_features_str = ""; goetz@6458: bool VM_Version::_is_determine_features_test_running = false; gromero@9662: uint64_t VM_Version::_dscr_val = 0; goetz@6458: goetz@6458: #define MSG(flag) \ goetz@6458: if (flag && !FLAG_IS_DEFAULT(flag)) \ goetz@6458: jio_fprintf(defaultStream::error_stream(), \ goetz@6458: "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \ goetz@6458: " -XX:+" #flag " will be disabled!\n"); goetz@6458: goetz@6458: void VM_Version::initialize() { goetz@6458: goetz@6458: // Test which instructions are supported and measure cache line size. goetz@6458: determine_features(); goetz@6458: goetz@6458: // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. goetz@6458: if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { gromero@9662: if (VM_Version::has_lqarx()) { gromero@9662: FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8); gromero@9662: } else if (VM_Version::has_popcntw()) { goetz@6458: FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7); goetz@6458: } else if (VM_Version::has_cmpb()) { goetz@6458: FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6); goetz@6458: } else if (VM_Version::has_popcntb()) { goetz@6458: FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 5); goetz@6458: } else { goetz@6458: FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0); goetz@6458: } goetz@6458: } goetz@6458: guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 || gromero@9662: PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 || gromero@9662: PowerArchitecturePPC64 == 8, gromero@9662: "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8"); gromero@9662: gromero@9662: // Power 8: Configure Data Stream Control Register. gromero@9662: if (PowerArchitecturePPC64 >= 8) { gromero@9662: config_dscr(); gromero@9662: } goetz@6458: goetz@6458: if (!UseSIGTRAP) { goetz@6458: MSG(TrapBasedICMissChecks); goetz@6458: MSG(TrapBasedNotEntrantChecks); goetz@6458: MSG(TrapBasedNullChecks); goetz@6458: FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false); goetz@6458: FLAG_SET_ERGO(bool, TrapBasedNullChecks, false); goetz@6458: FLAG_SET_ERGO(bool, TrapBasedICMissChecks, false); goetz@6458: } goetz@6458: goetz@6458: #ifdef COMPILER2 goetz@6490: if (!UseSIGTRAP) { goetz@6490: MSG(TrapBasedRangeChecks); goetz@6490: FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false); goetz@6490: } goetz@6490: goetz@6458: // On Power6 test for section size. goetz@6495: if (PowerArchitecturePPC64 == 6) { goetz@6458: determine_section_size(); goetz@6495: // TODO: PPC port } else { goetz@6458: // TODO: PPC port PdScheduling::power6SectorSize = 0x20; goetz@6495: } goetz@6458: goetz@6458: MaxVectorSize = 8; goetz@6458: #endif goetz@6458: goetz@6458: // Create and print feature-string. goetz@6495: char buf[(num_features+1) * 16]; // Max 16 chars per feature. goetz@6458: jio_snprintf(buf, sizeof(buf), ogatak@9713: "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s", goetz@6458: (has_fsqrt() ? " fsqrt" : ""), goetz@6458: (has_isel() ? " isel" : ""), goetz@6458: (has_lxarxeh() ? " lxarxeh" : ""), goetz@6458: (has_cmpb() ? " cmpb" : ""), goetz@6458: //(has_mftgpr()? " mftgpr" : ""), goetz@6458: (has_popcntb() ? " popcntb" : ""), goetz@6458: (has_popcntw() ? " popcntw" : ""), goetz@6458: (has_fcfids() ? " fcfids" : ""), simonis@8608: (has_vand() ? " vand" : ""), gromero@9662: (has_lqarx() ? " lqarx" : ""), mdoerr@9497: (has_vcipher() ? " aes" : ""), gromero@9662: (has_vpmsumb() ? " vpmsumb" : ""), gromero@9662: (has_mfdscr() ? " mfdscr" : ""), ogatak@9713: (has_vsx() ? " vsx" : ""), ogatak@9713: (has_vshasig() ? " sha" : "") goetz@6458: // Make sure number of %s matches num_features! goetz@6458: ); goetz@6458: _features_str = strdup(buf); gromero@9662: if (Verbose) { gromero@9662: print_features(); gromero@9662: } goetz@6458: goetz@6458: // PPC64 supports 8-byte compare-exchange operations (see goetz@6458: // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) goetz@6458: // and 'atomic long memory ops' (see Unsafe_GetLongVolatile). goetz@6458: _supports_cx8 = true; goetz@6458: goetz@6458: UseSSE = 0; // Only on x86 and x64 goetz@6458: goetz@6458: intx cache_line_size = _measured_cache_line_size; goetz@6458: goetz@6458: if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; goetz@6458: goetz@6458: if (AllocatePrefetchStyle == 4) { goetz@6495: AllocatePrefetchStepSize = cache_line_size; // Need exact value. goetz@6495: if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default. goetz@6495: if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined? goetz@6458: } else { goetz@6458: if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; goetz@6495: if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value. goetz@6495: if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined? goetz@6458: } goetz@6458: goetz@6458: assert(AllocatePrefetchLines > 0, "invalid value"); goetz@7424: if (AllocatePrefetchLines < 1) { // Set valid value in product VM. goetz@6495: AllocatePrefetchLines = 1; // Conservative value. goetz@7424: } goetz@6458: goetz@7424: if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) { goetz@6495: AllocatePrefetchStyle = 1; // Fall back if inappropriate. goetz@7424: } goetz@6458: goetz@6458: assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); goetz@7424: gromero@9496: // Implementation does not use any of the vector instructions gromero@9496: // available with Power8. Their exploitation is still pending. gromero@9496: if (!UseCRC32Intrinsics) { gromero@9496: if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { gromero@9496: FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); gromero@9496: } goetz@7424: } goetz@7424: goetz@7424: // The AES intrinsic stubs require AES instruction support. simonis@8608: if (has_vcipher()) { simonis@8608: if (FLAG_IS_DEFAULT(UseAES)) { simonis@8608: UseAES = true; simonis@8608: } simonis@8608: } else if (UseAES) { simonis@8608: if (!FLAG_IS_DEFAULT(UseAES)) simonis@8608: warning("AES instructions are not available on this CPU"); simonis@8608: FLAG_SET_DEFAULT(UseAES, false); simonis@8608: } simonis@8608: simonis@8608: if (UseAES && has_vcipher()) { simonis@8608: if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { simonis@8608: UseAESIntrinsics = true; simonis@8608: } simonis@8608: } else if (UseAESIntrinsics) { simonis@8608: if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) simonis@8608: warning("AES intrinsics are not available on this CPU"); simonis@8608: FLAG_SET_DEFAULT(UseAESIntrinsics, false); simonis@8608: } simonis@8608: ascarpino@9788: if (UseGHASHIntrinsics) { ascarpino@9788: warning("GHASH intrinsics are not available on this CPU"); ascarpino@9788: FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); ascarpino@9788: } ascarpino@9788: ogatak@9713: if (has_vshasig()) { ogatak@9713: if (FLAG_IS_DEFAULT(UseSHA)) { ogatak@9713: UseSHA = true; ogatak@9713: } ogatak@9713: } else if (UseSHA) { ogatak@9713: if (!FLAG_IS_DEFAULT(UseSHA)) ogatak@9713: warning("SHA instructions are not available on this CPU"); goetz@7424: FLAG_SET_DEFAULT(UseSHA, false); goetz@7424: } ogatak@9713: ogatak@9713: if (UseSHA1Intrinsics) { ogatak@9713: warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); goetz@7424: FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ogatak@9713: } ogatak@9713: ogatak@9713: if (UseSHA && has_vshasig()) { ogatak@9713: if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { ogatak@9713: FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); ogatak@9713: } ogatak@9713: } else if (UseSHA256Intrinsics) { ogatak@9713: warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); goetz@7424: FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ogatak@9713: } ogatak@9713: ogatak@9713: if (UseSHA && has_vshasig()) { ogatak@9713: if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { ogatak@9713: FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); ogatak@9713: } ogatak@9713: } else if (UseSHA512Intrinsics) { ogatak@9713: warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); goetz@7424: FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); goetz@7424: } goetz@7424: ogatak@9713: if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { ogatak@9713: FLAG_SET_DEFAULT(UseSHA, false); ogatak@9713: } ogatak@9713: mdoerr@8903: if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { mdoerr@8903: UseMontgomeryMultiplyIntrinsic = true; mdoerr@8903: } mdoerr@8903: if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { mdoerr@8903: UseMontgomerySquareIntrinsic = true; mdoerr@8903: } goetz@6458: } goetz@6458: goetz@6458: void VM_Version::print_features() { drchase@6680: tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size()); goetz@6458: } goetz@6458: goetz@6458: #ifdef COMPILER2 goetz@6458: // Determine section size on power6: If section size is 8 instructions, goetz@6458: // there should be a difference between the two testloops of ~15 %. If goetz@6458: // no difference is detected the section is assumed to be 32 instructions. goetz@6458: void VM_Version::determine_section_size() { goetz@6458: goetz@6458: int unroll = 80; goetz@6458: goetz@6458: const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; goetz@6458: goetz@6495: // Allocate space for the code. goetz@6458: ResourceMark rm; goetz@6458: CodeBuffer cb("detect_section_size", code_size, 0); goetz@6458: MacroAssembler* a = new MacroAssembler(&cb); goetz@6458: goetz@6458: uint32_t *code = (uint32_t *)a->pc(); goetz@6495: // Emit code. goetz@6511: void (*test1)() = (void(*)())(void *)a->function_entry(); goetz@6458: goetz@6458: Label l1; goetz@6458: goetz@6458: a->li(R4, 1); goetz@6458: a->sldi(R4, R4, 28); goetz@6458: a->b(l1); goetz@6458: a->align(CodeEntryAlignment); goetz@6458: goetz@6458: a->bind(l1); goetz@6458: goetz@6458: for (int i = 0; i < unroll; i++) { goetz@6458: // Schleife 1 goetz@6458: // ------- sector 0 ------------ goetz@6458: // ;; 0 goetz@6458: a->nop(); // 1 goetz@6458: a->fpnop0(); // 2 goetz@6458: a->fpnop1(); // 3 goetz@6458: a->addi(R4,R4, -1); // 4 goetz@6458: goetz@6458: // ;; 1 goetz@6458: a->nop(); // 5 goetz@6495: a->fmr(F6, F6); // 6 goetz@6495: a->fmr(F7, F7); // 7 goetz@6458: a->endgroup(); // 8 goetz@6458: // ------- sector 8 ------------ goetz@6458: goetz@6458: // ;; 2 goetz@6458: a->nop(); // 9 goetz@6458: a->nop(); // 10 goetz@6495: a->fmr(F8, F8); // 11 goetz@6495: a->fmr(F9, F9); // 12 goetz@6458: goetz@6458: // ;; 3 goetz@6458: a->nop(); // 13 goetz@6495: a->fmr(F10, F10); // 14 goetz@6495: a->fmr(F11, F11); // 15 goetz@6458: a->endgroup(); // 16 goetz@6458: // -------- sector 16 ------------- goetz@6458: goetz@6458: // ;; 4 goetz@6458: a->nop(); // 17 goetz@6458: a->nop(); // 18 goetz@6495: a->fmr(F15, F15); // 19 goetz@6495: a->fmr(F16, F16); // 20 goetz@6458: goetz@6458: // ;; 5 goetz@6458: a->nop(); // 21 goetz@6495: a->fmr(F17, F17); // 22 goetz@6495: a->fmr(F18, F18); // 23 goetz@6458: a->endgroup(); // 24 goetz@6458: // ------- sector 24 ------------ goetz@6458: goetz@6458: // ;; 6 goetz@6458: a->nop(); // 25 goetz@6458: a->nop(); // 26 goetz@6495: a->fmr(F19, F19); // 27 goetz@6495: a->fmr(F20, F20); // 28 goetz@6458: goetz@6458: // ;; 7 goetz@6458: a->nop(); // 29 goetz@6495: a->fmr(F21, F21); // 30 goetz@6495: a->fmr(F22, F22); // 31 goetz@6458: a->brnop0(); // 32 goetz@6458: goetz@6458: // ------- sector 32 ------------ goetz@6458: } goetz@6458: goetz@6458: // ;; 8 goetz@6495: a->cmpdi(CCR0, R4, unroll); // 33 goetz@6495: a->bge(CCR0, l1); // 34 goetz@6458: a->blr(); goetz@6458: goetz@6495: // Emit code. goetz@6511: void (*test2)() = (void(*)())(void *)a->function_entry(); goetz@6458: // uint32_t *code = (uint32_t *)a->pc(); goetz@6458: goetz@6458: Label l2; goetz@6458: goetz@6458: a->li(R4, 1); goetz@6458: a->sldi(R4, R4, 28); goetz@6458: a->b(l2); goetz@6458: a->align(CodeEntryAlignment); goetz@6458: goetz@6458: a->bind(l2); goetz@6458: goetz@6458: for (int i = 0; i < unroll; i++) { goetz@6458: // Schleife 2 goetz@6458: // ------- sector 0 ------------ goetz@6458: // ;; 0 goetz@6458: a->brnop0(); // 1 goetz@6458: a->nop(); // 2 goetz@6458: //a->cmpdi(CCR0, R4, unroll); goetz@6458: a->fpnop0(); // 3 goetz@6458: a->fpnop1(); // 4 goetz@6458: a->addi(R4,R4, -1); // 5 goetz@6458: goetz@6458: // ;; 1 goetz@6458: goetz@6458: a->nop(); // 6 goetz@6458: a->fmr(F6, F6); // 7 goetz@6458: a->fmr(F7, F7); // 8 goetz@6458: // ------- sector 8 --------------- goetz@6458: goetz@6458: // ;; 2 goetz@6458: a->endgroup(); // 9 goetz@6458: goetz@6458: // ;; 3 goetz@6458: a->nop(); // 10 goetz@6458: a->nop(); // 11 goetz@6458: a->fmr(F8, F8); // 12 goetz@6458: goetz@6458: // ;; 4 goetz@6458: a->fmr(F9, F9); // 13 goetz@6458: a->nop(); // 14 goetz@6458: a->fmr(F10, F10); // 15 goetz@6458: goetz@6458: // ;; 5 goetz@6458: a->fmr(F11, F11); // 16 goetz@6458: // -------- sector 16 ------------- goetz@6458: goetz@6458: // ;; 6 goetz@6458: a->endgroup(); // 17 goetz@6458: goetz@6458: // ;; 7 goetz@6458: a->nop(); // 18 goetz@6458: a->nop(); // 19 goetz@6458: a->fmr(F15, F15); // 20 goetz@6458: goetz@6458: // ;; 8 goetz@6458: a->fmr(F16, F16); // 21 goetz@6458: a->nop(); // 22 goetz@6458: a->fmr(F17, F17); // 23 goetz@6458: goetz@6458: // ;; 9 goetz@6458: a->fmr(F18, F18); // 24 goetz@6458: // -------- sector 24 ------------- goetz@6458: goetz@6458: // ;; 10 goetz@6458: a->endgroup(); // 25 goetz@6458: goetz@6458: // ;; 11 goetz@6458: a->nop(); // 26 goetz@6458: a->nop(); // 27 goetz@6458: a->fmr(F19, F19); // 28 goetz@6458: goetz@6458: // ;; 12 goetz@6458: a->fmr(F20, F20); // 29 goetz@6458: a->nop(); // 30 goetz@6458: a->fmr(F21, F21); // 31 goetz@6458: goetz@6458: // ;; 13 goetz@6458: a->fmr(F22, F22); // 32 goetz@6458: } goetz@6458: goetz@6458: // -------- sector 32 ------------- goetz@6458: // ;; 14 goetz@6458: a->cmpdi(CCR0, R4, unroll); // 33 goetz@6458: a->bge(CCR0, l2); // 34 goetz@6458: goetz@6458: a->blr(); goetz@6458: uint32_t *code_end = (uint32_t *)a->pc(); goetz@6458: a->flush(); goetz@6458: goetz@6458: double loop1_seconds,loop2_seconds, rel_diff; goetz@6458: uint64_t start1, stop1; goetz@6458: goetz@6458: start1 = os::current_thread_cpu_time(false); goetz@6458: (*test1)(); goetz@6458: stop1 = os::current_thread_cpu_time(false); goetz@6458: loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0); goetz@6458: goetz@6458: goetz@6458: start1 = os::current_thread_cpu_time(false); goetz@6458: (*test2)(); goetz@6458: stop1 = os::current_thread_cpu_time(false); goetz@6458: goetz@6458: loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0); goetz@6458: goetz@6458: rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100; goetz@6458: goetz@6458: if (PrintAssembly) { goetz@6458: ttyLocker ttyl; coleenp@7358: tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); goetz@6458: Disassembler::decode((u_char*)code, (u_char*)code_end, tty); goetz@6458: tty->print_cr("Time loop1 :%f", loop1_seconds); goetz@6458: tty->print_cr("Time loop2 :%f", loop2_seconds); goetz@6458: tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff); goetz@6458: goetz@6458: if (rel_diff > 12.0) { goetz@6458: tty->print_cr("Section Size 8 Instructions"); goetz@6458: } else{ goetz@6458: tty->print_cr("Section Size 32 Instructions or Power5"); goetz@6458: } goetz@6458: } goetz@6458: goetz@6458: #if 0 // TODO: PPC port goetz@6458: // Set sector size (if not set explicitly). goetz@6458: if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) { goetz@6458: if (rel_diff > 12.0) { goetz@6458: PdScheduling::power6SectorSize = 0x20; goetz@6458: } else { goetz@6458: PdScheduling::power6SectorSize = 0x80; goetz@6458: } goetz@6458: } else if (Power6SectorSize128PPC64) { goetz@6458: PdScheduling::power6SectorSize = 0x80; goetz@6458: } else { goetz@6458: PdScheduling::power6SectorSize = 0x20; goetz@6458: } goetz@6458: #endif goetz@6458: if (UsePower6SchedulerPPC64) Unimplemented(); goetz@6458: } goetz@6458: #endif // COMPILER2 goetz@6458: goetz@6458: void VM_Version::determine_features() { goetz@6511: #if defined(ABI_ELFv2) goetz@6511: const int code_size = (num_features+1+2*7)*BytesPerInstWord; // TODO(asmundak): calculation is incorrect. goetz@6511: #else goetz@6495: // 7 InstWords for each call (function descriptor + blr instruction). goetz@6495: const int code_size = (num_features+1+2*7)*BytesPerInstWord; goetz@6511: #endif goetz@6458: int features = 0; goetz@6458: goetz@6458: // create test area goetz@6495: enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size). goetz@6458: char test_area[BUFFER_SIZE]; goetz@6458: char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; goetz@6458: goetz@6495: // Allocate space for the code. goetz@6458: ResourceMark rm; goetz@6458: CodeBuffer cb("detect_cpu_features", code_size, 0); goetz@6458: MacroAssembler* a = new MacroAssembler(&cb); goetz@6458: goetz@6515: // Must be set to true so we can generate the test code. goetz@6515: _features = VM_Version::all_features_m; goetz@6515: goetz@6495: // Emit code. goetz@6511: void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry(); goetz@6458: uint32_t *code = (uint32_t *)a->pc(); goetz@6458: // Don't use R0 in ldarx. goetz@6495: // Keep R3_ARG1 unmodified, it contains &field (see below). goetz@6495: // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). goetz@6495: a->fsqrt(F3, F4); // code[0] -> fsqrt_m goetz@6515: a->fsqrts(F3, F4); // code[1] -> fsqrts_m goetz@6515: a->isel(R7, R5, R6, 0); // code[2] -> isel_m goetz@6515: a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m goetz@6515: a->cmpb(R7, R5, R6); // code[4] -> bcmp goetz@6515: //a->mftgpr(R7, F3); // code[5] -> mftgpr goetz@6515: a->popcntb(R7, R5); // code[6] -> popcntb goetz@6515: a->popcntw(R7, R5); // code[7] -> popcntw goetz@6515: a->fcfids(F3, F4); // code[8] -> fcfids goetz@6515: a->vand(VR0, VR0, VR0); // code[9] -> vand gromero@9662: a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[10] -> lqarx_m gromero@9662: a->vcipher(VR0, VR1, VR2); // code[11] -> vcipher gromero@9662: a->vpmsumb(VR0, VR1, VR2); // code[12] -> vpmsumb gromero@9662: a->mfdscr(R0); // code[13] -> mfdscr gromero@9684: a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx ogatak@9713: a->vshasigmaw(VR0, VR1, 1, 0xF); // code[15] -> vshasig goetz@6458: a->blr(); goetz@6458: goetz@6495: // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. goetz@6511: void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry(); goetz@6495: a->dcbz(R3_ARG1); // R3_ARG1 = addr goetz@6458: a->blr(); goetz@6458: goetz@6458: uint32_t *code_end = (uint32_t *)a->pc(); goetz@6458: a->flush(); goetz@6515: _features = VM_Version::unknown_m; goetz@6458: goetz@6458: // Print the detection code. goetz@6458: if (PrintAssembly) { goetz@6458: ttyLocker ttyl; coleenp@7358: tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); goetz@6458: Disassembler::decode((u_char*)code, (u_char*)code_end, tty); goetz@6458: } goetz@6458: goetz@6458: // Measure cache line size. goetz@6495: memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF. goetz@6495: (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle. goetz@6458: int count = 0; // count zeroed bytes goetz@6458: for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; goetz@6458: guarantee(is_power_of_2(count), "cache line size needs to be a power of 2"); goetz@6458: _measured_cache_line_size = count; goetz@6458: goetz@6458: // Execute code. Illegal instructions will be replaced by 0 in the signal handler. goetz@6458: VM_Version::_is_determine_features_test_running = true; goetz@6458: (*test)((address)mid_of_test_area, (uint64_t)0); goetz@6458: VM_Version::_is_determine_features_test_running = false; goetz@6458: goetz@6458: // determine which instructions are legal. goetz@6458: int feature_cntr = 0; goetz@6458: if (code[feature_cntr++]) features |= fsqrt_m; goetz@6515: if (code[feature_cntr++]) features |= fsqrts_m; goetz@6458: if (code[feature_cntr++]) features |= isel_m; goetz@6458: if (code[feature_cntr++]) features |= lxarxeh_m; goetz@6458: if (code[feature_cntr++]) features |= cmpb_m; goetz@6458: //if(code[feature_cntr++])features |= mftgpr_m; goetz@6458: if (code[feature_cntr++]) features |= popcntb_m; goetz@6458: if (code[feature_cntr++]) features |= popcntw_m; goetz@6458: if (code[feature_cntr++]) features |= fcfids_m; goetz@6458: if (code[feature_cntr++]) features |= vand_m; gromero@9662: if (code[feature_cntr++]) features |= lqarx_m; simonis@8608: if (code[feature_cntr++]) features |= vcipher_m; mdoerr@9497: if (code[feature_cntr++]) features |= vpmsumb_m; gromero@9662: if (code[feature_cntr++]) features |= mfdscr_m; gromero@9662: if (code[feature_cntr++]) features |= vsx_m; ogatak@9713: if (code[feature_cntr++]) features |= vshasig_m; goetz@6458: goetz@6458: // Print the detection code. goetz@6458: if (PrintAssembly) { goetz@6458: ttyLocker ttyl; coleenp@7358: tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code)); goetz@6458: Disassembler::decode((u_char*)code, (u_char*)code_end, tty); goetz@6458: } goetz@6458: goetz@6458: _features = features; goetz@6458: } goetz@6458: gromero@9662: // Power 8: Configure Data Stream Control Register. gromero@9662: void VM_Version::config_dscr() { gromero@9662: assert(has_lqarx(), "Only execute on Power 8 or later!"); gromero@9662: gromero@9662: // 7 InstWords for each call (function descriptor + blr instruction). gromero@9662: const int code_size = (2+2*7)*BytesPerInstWord; gromero@9662: gromero@9662: // Allocate space for the code. gromero@9662: ResourceMark rm; gromero@9662: CodeBuffer cb("config_dscr", code_size, 0); gromero@9662: MacroAssembler* a = new MacroAssembler(&cb); gromero@9662: gromero@9662: // Emit code. gromero@9662: uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry(); gromero@9662: uint32_t *code = (uint32_t *)a->pc(); gromero@9662: a->mfdscr(R3); gromero@9662: a->blr(); gromero@9662: gromero@9662: void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry(); gromero@9662: a->mtdscr(R3); gromero@9662: a->blr(); gromero@9662: gromero@9662: uint32_t *code_end = (uint32_t *)a->pc(); gromero@9662: a->flush(); gromero@9662: gromero@9662: // Print the detection code. gromero@9662: if (PrintAssembly) { gromero@9662: ttyLocker ttyl; gromero@9662: tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", p2i(code)); gromero@9662: Disassembler::decode((u_char*)code, (u_char*)code_end, tty); gromero@9662: } gromero@9662: gromero@9662: // Apply the configuration if needed. gromero@9662: _dscr_val = (*get_dscr)(); gromero@9662: if (Verbose) { gromero@9662: tty->print_cr("dscr value was 0x%lx" , _dscr_val); gromero@9662: } gromero@9662: bool change_requested = false; gromero@9662: if (DSCR_PPC64 != (uintx)-1) { gromero@9662: _dscr_val = DSCR_PPC64; gromero@9662: change_requested = true; gromero@9662: } gromero@9662: if (DSCR_DPFD_PPC64 <= 7) { gromero@9662: uint64_t mask = 0x7; gromero@9662: if ((_dscr_val & mask) != DSCR_DPFD_PPC64) { gromero@9662: _dscr_val = (_dscr_val & ~mask) | (DSCR_DPFD_PPC64); gromero@9662: change_requested = true; gromero@9662: } gromero@9662: } gromero@9662: if (DSCR_URG_PPC64 <= 7) { gromero@9662: uint64_t mask = 0x7 << 6; gromero@9662: if ((_dscr_val & mask) != DSCR_DPFD_PPC64 << 6) { gromero@9662: _dscr_val = (_dscr_val & ~mask) | (DSCR_URG_PPC64 << 6); gromero@9662: change_requested = true; gromero@9662: } gromero@9662: } gromero@9662: if (change_requested) { gromero@9662: (*set_dscr)(_dscr_val); gromero@9662: if (Verbose) { gromero@9662: tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)()); gromero@9662: } gromero@9662: } gromero@9662: } goetz@6458: goetz@6458: static int saved_features = 0; goetz@6458: goetz@6458: void VM_Version::allow_all() { goetz@6458: saved_features = _features; goetz@6458: _features = all_features_m; goetz@6458: } goetz@6458: goetz@6458: void VM_Version::revert() { goetz@6458: _features = saved_features; goetz@6458: }