Thu, 03 Jan 2013 16:30:47 -0800
8005544: Use 256bit YMM registers in arraycopy stubs on x86
Summary: Use YMM registers in arraycopy and array_fill stubs.
Reviewed-by: roland, twisti
duke@435 | 1 | /* |
twisti@4020 | 2 | * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #ifndef CPU_X86_VM_GLOBALS_X86_HPP |
stefank@2314 | 26 | #define CPU_X86_VM_GLOBALS_X86_HPP |
stefank@2314 | 27 | |
stefank@2314 | 28 | #include "utilities/globalDefinitions.hpp" |
stefank@2314 | 29 | #include "utilities/macros.hpp" |
stefank@2314 | 30 | |
duke@435 | 31 | // Sets the default values for platform dependent flags used by the runtime system. |
duke@435 | 32 | // (see globals.hpp) |
duke@435 | 33 | |
phh@1499 | 34 | define_pd_global(bool, ConvertSleepToYield, true); |
phh@1499 | 35 | define_pd_global(bool, ShareVtableStubs, true); |
phh@1499 | 36 | define_pd_global(bool, CountInterpCalls, true); |
phh@1499 | 37 | define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this |
duke@435 | 38 | |
phh@1499 | 39 | define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks |
phh@1499 | 40 | define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast |
duke@435 | 41 | |
duke@435 | 42 | // See 4827828 for this change. There is no globals_core_i486.hpp. I can't |
duke@435 | 43 | // assign a different value for C2 without touching a number of files. Use |
duke@435 | 44 | // #ifdef to minimize the change as it's late in Mantis. -- FIXME. |
duke@435 | 45 | // c1 doesn't have this problem because the fix to 4858033 assures us |
duke@435 | 46 | // the the vep is aligned at CodeEntryAlignment whereas c2 only aligns |
duke@435 | 47 | // the uep and the vep doesn't get real alignment but just slops on by |
duke@435 | 48 | // only assured that the entry instruction meets the 5 byte size requirement. |
duke@435 | 49 | #ifdef COMPILER2 |
phh@1499 | 50 | define_pd_global(intx, CodeEntryAlignment, 32); |
duke@435 | 51 | #else |
phh@1499 | 52 | define_pd_global(intx, CodeEntryAlignment, 16); |
duke@435 | 53 | #endif // COMPILER2 |
kvn@1800 | 54 | define_pd_global(intx, OptoLoopAlignment, 16); |
phh@1499 | 55 | define_pd_global(intx, InlineFrequencyCount, 100); |
phh@1499 | 56 | define_pd_global(intx, InlineSmallCode, 1000); |
duke@435 | 57 | |
phh@1499 | 58 | define_pd_global(intx, StackYellowPages, 2); |
phh@1499 | 59 | define_pd_global(intx, StackRedPages, 1); |
duke@435 | 60 | #ifdef AMD64 |
duke@435 | 61 | // Very large C++ stack frames using solaris-amd64 optimized builds |
duke@435 | 62 | // due to lack of optimization caused by C++ compiler bugs |
coleenp@3548 | 63 | define_pd_global(intx, StackShadowPages, NOT_WIN64(20) WIN64_ONLY(6) DEBUG_ONLY(+2)); |
duke@435 | 64 | #else |
never@3569 | 65 | define_pd_global(intx, StackShadowPages, 4 DEBUG_ONLY(+5)); |
duke@435 | 66 | #endif // AMD64 |
duke@435 | 67 | |
phh@1499 | 68 | define_pd_global(intx, PreInflateSpin, 10); |
duke@435 | 69 | |
duke@435 | 70 | define_pd_global(bool, RewriteBytecodes, true); |
duke@435 | 71 | define_pd_global(bool, RewriteFrequentPairs, true); |
bobv@2223 | 72 | |
never@3156 | 73 | #ifdef _ALLBSD_SOURCE |
never@3156 | 74 | define_pd_global(bool, UseMembar, true); |
never@3156 | 75 | #else |
bobv@2223 | 76 | define_pd_global(bool, UseMembar, false); |
never@3156 | 77 | #endif |
stefank@2314 | 78 | |
ysr@2650 | 79 | // GC Ergo Flags |
ysr@2650 | 80 | define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread |
twisti@4020 | 81 | |
twisti@4020 | 82 | #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ |
twisti@4020 | 83 | \ |
twisti@4020 | 84 | develop(bool, IEEEPrecision, true, \ |
twisti@4020 | 85 | "Enables IEEE precision (for INTEL only)") \ |
twisti@4020 | 86 | \ |
twisti@4020 | 87 | product(intx, FenceInstruction, 0, \ |
twisti@4020 | 88 | "(Unsafe,Unstable) Experimental") \ |
twisti@4020 | 89 | \ |
twisti@4020 | 90 | product(intx, ReadPrefetchInstr, 0, \ |
twisti@4020 | 91 | "Prefetch instruction to prefetch ahead") \ |
twisti@4020 | 92 | \ |
twisti@4020 | 93 | product(bool, UseStoreImmI16, true, \ |
twisti@4020 | 94 | "Use store immediate 16-bits value instruction on x86") \ |
twisti@4020 | 95 | \ |
twisti@4020 | 96 | product(intx, UseAVX, 99, \ |
twisti@4020 | 97 | "Highest supported AVX instructions set on x86/x64") \ |
twisti@4020 | 98 | \ |
twisti@4020 | 99 | diagnostic(bool, UseIncDec, true, \ |
twisti@4020 | 100 | "Use INC, DEC instructions on x86") \ |
twisti@4020 | 101 | \ |
twisti@4020 | 102 | product(bool, UseNewLongLShift, false, \ |
twisti@4020 | 103 | "Use optimized bitwise shift left") \ |
twisti@4020 | 104 | \ |
twisti@4020 | 105 | product(bool, UseAddressNop, false, \ |
twisti@4020 | 106 | "Use '0F 1F [addr]' NOP instructions on x86 cpus") \ |
twisti@4020 | 107 | \ |
twisti@4020 | 108 | product(bool, UseXmmLoadAndClearUpper, true, \ |
twisti@4020 | 109 | "Load low part of XMM register and clear upper part") \ |
twisti@4020 | 110 | \ |
twisti@4020 | 111 | product(bool, UseXmmRegToRegMoveAll, false, \ |
twisti@4020 | 112 | "Copy all XMM register bits when moving value between registers") \ |
twisti@4020 | 113 | \ |
twisti@4020 | 114 | product(bool, UseXmmI2D, false, \ |
twisti@4020 | 115 | "Use SSE2 CVTDQ2PD instruction to convert Integer to Double") \ |
twisti@4020 | 116 | \ |
twisti@4020 | 117 | product(bool, UseXmmI2F, false, \ |
twisti@4020 | 118 | "Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \ |
twisti@4020 | 119 | \ |
twisti@4020 | 120 | product(bool, UseUnalignedLoadStores, false, \ |
twisti@4020 | 121 | "Use SSE2 MOVDQU instruction for Arraycopy") \ |
twisti@4020 | 122 | \ |
kvn@4410 | 123 | product(bool, UseFastStosb, false, \ |
kvn@4410 | 124 | "Use fast-string operation for zeroing: rep stosb") \ |
kvn@4410 | 125 | \ |
twisti@4020 | 126 | /* assembler */ \ |
twisti@4020 | 127 | product(bool, Use486InstrsOnly, false, \ |
twisti@4020 | 128 | "Use 80486 Compliant instruction subset") \ |
twisti@4020 | 129 | \ |
twisti@4020 | 130 | product(bool, UseCountLeadingZerosInstruction, false, \ |
twisti@4020 | 131 | "Use count leading zeros instruction") \ |
twisti@4020 | 132 | |
stefank@2314 | 133 | #endif // CPU_X86_VM_GLOBALS_X86_HPP |