Thu, 03 Jan 2013 16:30:47 -0800
8005544: Use 256bit YMM registers in arraycopy stubs on x86
Summary: Use YMM registers in arraycopy and array_fill stubs.
Reviewed-by: roland, twisti
duke@435 | 1 | /* |
twisti@2103 | 2 | * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #include "precompiled.hpp" |
twisti@4318 | 26 | #include "asm/macroAssembler.hpp" |
stefank@2314 | 27 | #include "memory/resourceArea.hpp" |
stefank@2314 | 28 | #include "prims/jniFastGetField.hpp" |
stefank@2314 | 29 | #include "prims/jvm_misc.hpp" |
stefank@2314 | 30 | #include "runtime/safepoint.hpp" |
duke@435 | 31 | |
duke@435 | 32 | #define __ masm-> |
duke@435 | 33 | |
duke@435 | 34 | #define BUFFER_SIZE 30*wordSize |
duke@435 | 35 | |
duke@435 | 36 | // Instead of issuing lfence for LoadLoad barrier, we create data dependency |
duke@435 | 37 | // between loads, which is more efficient than lfence. |
duke@435 | 38 | |
duke@435 | 39 | // Common register usage: |
duke@435 | 40 | // rax/xmm0: result |
duke@435 | 41 | // c_rarg0: jni env |
duke@435 | 42 | // c_rarg1: obj |
duke@435 | 43 | // c_rarg2: jfield id |
duke@435 | 44 | |
duke@435 | 45 | static const Register robj = r9; |
duke@435 | 46 | static const Register rcounter = r10; |
duke@435 | 47 | static const Register roffset = r11; |
duke@435 | 48 | static const Register rcounter_addr = r11; |
duke@435 | 49 | |
duke@435 | 50 | // Warning: do not use rip relative addressing after the first counter load |
duke@435 | 51 | // since that may scratch r10! |
duke@435 | 52 | |
duke@435 | 53 | address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { |
duke@435 | 54 | const char *name; |
duke@435 | 55 | switch (type) { |
duke@435 | 56 | case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; |
duke@435 | 57 | case T_BYTE: name = "jni_fast_GetByteField"; break; |
duke@435 | 58 | case T_CHAR: name = "jni_fast_GetCharField"; break; |
duke@435 | 59 | case T_SHORT: name = "jni_fast_GetShortField"; break; |
duke@435 | 60 | case T_INT: name = "jni_fast_GetIntField"; break; |
duke@435 | 61 | case T_LONG: name = "jni_fast_GetLongField"; break; |
duke@435 | 62 | default: ShouldNotReachHere(); |
duke@435 | 63 | } |
duke@435 | 64 | ResourceMark rm; |
twisti@2103 | 65 | BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); |
twisti@2103 | 66 | CodeBuffer cbuf(blob); |
duke@435 | 67 | MacroAssembler* masm = new MacroAssembler(&cbuf); |
twisti@2103 | 68 | address fast_entry = __ pc(); |
duke@435 | 69 | |
duke@435 | 70 | Label slow; |
duke@435 | 71 | |
duke@435 | 72 | ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); |
duke@435 | 73 | __ mov32 (rcounter, counter); |
never@739 | 74 | __ mov (robj, c_rarg1); |
duke@435 | 75 | __ testb (rcounter, 1); |
duke@435 | 76 | __ jcc (Assembler::notZero, slow); |
duke@435 | 77 | if (os::is_MP()) { |
never@739 | 78 | __ xorptr(robj, rcounter); |
never@739 | 79 | __ xorptr(robj, rcounter); // obj, since |
duke@435 | 80 | // robj ^ rcounter ^ rcounter == robj |
duke@435 | 81 | // robj is data dependent on rcounter. |
duke@435 | 82 | } |
never@739 | 83 | __ movptr(robj, Address(robj, 0)); // *obj |
never@739 | 84 | __ mov (roffset, c_rarg2); |
never@739 | 85 | __ shrptr(roffset, 2); // offset |
duke@435 | 86 | |
duke@435 | 87 | assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); |
duke@435 | 88 | speculative_load_pclist[count] = __ pc(); |
duke@435 | 89 | switch (type) { |
duke@435 | 90 | case T_BOOLEAN: __ movzbl (rax, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 91 | case T_BYTE: __ movsbl (rax, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 92 | case T_CHAR: __ movzwl (rax, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 93 | case T_SHORT: __ movswl (rax, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 94 | case T_INT: __ movl (rax, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 95 | case T_LONG: __ movq (rax, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 96 | default: ShouldNotReachHere(); |
duke@435 | 97 | } |
duke@435 | 98 | |
duke@435 | 99 | if (os::is_MP()) { |
duke@435 | 100 | __ lea(rcounter_addr, counter); |
duke@435 | 101 | // ca is data dependent on rax. |
never@739 | 102 | __ xorptr(rcounter_addr, rax); |
never@739 | 103 | __ xorptr(rcounter_addr, rax); |
duke@435 | 104 | __ cmpl (rcounter, Address(rcounter_addr, 0)); |
duke@435 | 105 | } else { |
duke@435 | 106 | __ cmp32 (rcounter, counter); |
duke@435 | 107 | } |
duke@435 | 108 | __ jcc (Assembler::notEqual, slow); |
duke@435 | 109 | |
duke@435 | 110 | __ ret (0); |
duke@435 | 111 | |
duke@435 | 112 | slowcase_entry_pclist[count++] = __ pc(); |
duke@435 | 113 | __ bind (slow); |
duke@435 | 114 | address slow_case_addr; |
duke@435 | 115 | switch (type) { |
duke@435 | 116 | case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; |
duke@435 | 117 | case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; |
duke@435 | 118 | case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; |
duke@435 | 119 | case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; |
duke@435 | 120 | case T_INT: slow_case_addr = jni_GetIntField_addr(); break; |
duke@435 | 121 | case T_LONG: slow_case_addr = jni_GetLongField_addr(); |
duke@435 | 122 | } |
duke@435 | 123 | // tail call |
duke@435 | 124 | __ jump (ExternalAddress(slow_case_addr)); |
duke@435 | 125 | |
duke@435 | 126 | __ flush (); |
duke@435 | 127 | |
duke@435 | 128 | return fast_entry; |
duke@435 | 129 | } |
duke@435 | 130 | |
duke@435 | 131 | address JNI_FastGetField::generate_fast_get_boolean_field() { |
duke@435 | 132 | return generate_fast_get_int_field0(T_BOOLEAN); |
duke@435 | 133 | } |
duke@435 | 134 | |
duke@435 | 135 | address JNI_FastGetField::generate_fast_get_byte_field() { |
duke@435 | 136 | return generate_fast_get_int_field0(T_BYTE); |
duke@435 | 137 | } |
duke@435 | 138 | |
duke@435 | 139 | address JNI_FastGetField::generate_fast_get_char_field() { |
duke@435 | 140 | return generate_fast_get_int_field0(T_CHAR); |
duke@435 | 141 | } |
duke@435 | 142 | |
duke@435 | 143 | address JNI_FastGetField::generate_fast_get_short_field() { |
duke@435 | 144 | return generate_fast_get_int_field0(T_SHORT); |
duke@435 | 145 | } |
duke@435 | 146 | |
duke@435 | 147 | address JNI_FastGetField::generate_fast_get_int_field() { |
duke@435 | 148 | return generate_fast_get_int_field0(T_INT); |
duke@435 | 149 | } |
duke@435 | 150 | |
duke@435 | 151 | address JNI_FastGetField::generate_fast_get_long_field() { |
duke@435 | 152 | return generate_fast_get_int_field0(T_LONG); |
duke@435 | 153 | } |
duke@435 | 154 | |
duke@435 | 155 | address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { |
duke@435 | 156 | const char *name; |
duke@435 | 157 | switch (type) { |
duke@435 | 158 | case T_FLOAT: name = "jni_fast_GetFloatField"; break; |
duke@435 | 159 | case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; |
duke@435 | 160 | default: ShouldNotReachHere(); |
duke@435 | 161 | } |
duke@435 | 162 | ResourceMark rm; |
twisti@2103 | 163 | BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); |
twisti@2103 | 164 | CodeBuffer cbuf(blob); |
duke@435 | 165 | MacroAssembler* masm = new MacroAssembler(&cbuf); |
twisti@2103 | 166 | address fast_entry = __ pc(); |
duke@435 | 167 | |
duke@435 | 168 | Label slow; |
duke@435 | 169 | |
duke@435 | 170 | ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); |
duke@435 | 171 | __ mov32 (rcounter, counter); |
never@739 | 172 | __ mov (robj, c_rarg1); |
duke@435 | 173 | __ testb (rcounter, 1); |
duke@435 | 174 | __ jcc (Assembler::notZero, slow); |
duke@435 | 175 | if (os::is_MP()) { |
never@739 | 176 | __ xorptr(robj, rcounter); |
never@739 | 177 | __ xorptr(robj, rcounter); // obj, since |
duke@435 | 178 | // robj ^ rcounter ^ rcounter == robj |
duke@435 | 179 | // robj is data dependent on rcounter. |
duke@435 | 180 | } |
never@739 | 181 | __ movptr(robj, Address(robj, 0)); // *obj |
never@739 | 182 | __ mov (roffset, c_rarg2); |
never@739 | 183 | __ shrptr(roffset, 2); // offset |
duke@435 | 184 | |
duke@435 | 185 | assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); |
duke@435 | 186 | speculative_load_pclist[count] = __ pc(); |
duke@435 | 187 | switch (type) { |
duke@435 | 188 | case T_FLOAT: __ movflt (xmm0, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 189 | case T_DOUBLE: __ movdbl (xmm0, Address(robj, roffset, Address::times_1)); break; |
duke@435 | 190 | default: ShouldNotReachHere(); |
duke@435 | 191 | } |
duke@435 | 192 | |
duke@435 | 193 | if (os::is_MP()) { |
duke@435 | 194 | __ lea(rcounter_addr, counter); |
duke@435 | 195 | __ movdq (rax, xmm0); |
duke@435 | 196 | // counter address is data dependent on xmm0. |
never@739 | 197 | __ xorptr(rcounter_addr, rax); |
never@739 | 198 | __ xorptr(rcounter_addr, rax); |
duke@435 | 199 | __ cmpl (rcounter, Address(rcounter_addr, 0)); |
duke@435 | 200 | } else { |
duke@435 | 201 | __ cmp32 (rcounter, counter); |
duke@435 | 202 | } |
duke@435 | 203 | __ jcc (Assembler::notEqual, slow); |
duke@435 | 204 | |
duke@435 | 205 | __ ret (0); |
duke@435 | 206 | |
duke@435 | 207 | slowcase_entry_pclist[count++] = __ pc(); |
duke@435 | 208 | __ bind (slow); |
duke@435 | 209 | address slow_case_addr; |
duke@435 | 210 | switch (type) { |
duke@435 | 211 | case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; |
duke@435 | 212 | case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); |
duke@435 | 213 | } |
duke@435 | 214 | // tail call |
duke@435 | 215 | __ jump (ExternalAddress(slow_case_addr)); |
duke@435 | 216 | |
duke@435 | 217 | __ flush (); |
duke@435 | 218 | |
duke@435 | 219 | return fast_entry; |
duke@435 | 220 | } |
duke@435 | 221 | |
duke@435 | 222 | address JNI_FastGetField::generate_fast_get_float_field() { |
duke@435 | 223 | return generate_fast_get_float_field0(T_FLOAT); |
duke@435 | 224 | } |
duke@435 | 225 | |
duke@435 | 226 | address JNI_FastGetField::generate_fast_get_double_field() { |
duke@435 | 227 | return generate_fast_get_float_field0(T_DOUBLE); |
duke@435 | 228 | } |