Tue, 02 Sep 2014 12:48:45 -0700
8055494: Add C2 x86 intrinsic for BigInteger::multiplyToLen() method
Summary: Add new C2 intrinsic for BigInteger::multiplyToLen() on x86 in 64-bit VM.
Reviewed-by: roland
duke@435 | 1 | /* |
twisti@2103 | 2 | * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #include "precompiled.hpp" |
twisti@4318 | 26 | #include "asm/macroAssembler.hpp" |
stefank@2314 | 27 | #include "memory/resourceArea.hpp" |
stefank@2314 | 28 | #include "prims/jniFastGetField.hpp" |
stefank@2314 | 29 | #include "prims/jvm_misc.hpp" |
stefank@2314 | 30 | #include "runtime/safepoint.hpp" |
duke@435 | 31 | |
duke@435 | 32 | #define __ masm-> |
duke@435 | 33 | |
duke@435 | 34 | #define BUFFER_SIZE 30 |
duke@435 | 35 | |
duke@435 | 36 | #ifdef _WINDOWS |
duke@435 | 37 | GetBooleanField_t JNI_FastGetField::jni_fast_GetBooleanField_fp; |
duke@435 | 38 | GetByteField_t JNI_FastGetField::jni_fast_GetByteField_fp; |
duke@435 | 39 | GetCharField_t JNI_FastGetField::jni_fast_GetCharField_fp; |
duke@435 | 40 | GetShortField_t JNI_FastGetField::jni_fast_GetShortField_fp; |
duke@435 | 41 | GetIntField_t JNI_FastGetField::jni_fast_GetIntField_fp; |
duke@435 | 42 | GetLongField_t JNI_FastGetField::jni_fast_GetLongField_fp; |
duke@435 | 43 | GetFloatField_t JNI_FastGetField::jni_fast_GetFloatField_fp; |
duke@435 | 44 | GetDoubleField_t JNI_FastGetField::jni_fast_GetDoubleField_fp; |
duke@435 | 45 | #endif |
duke@435 | 46 | |
duke@435 | 47 | // Instead of issuing lfence for LoadLoad barrier, we create data dependency |
duke@435 | 48 | // between loads, which is much more efficient than lfence. |
duke@435 | 49 | |
duke@435 | 50 | address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { |
duke@435 | 51 | const char *name; |
duke@435 | 52 | switch (type) { |
duke@435 | 53 | case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; |
duke@435 | 54 | case T_BYTE: name = "jni_fast_GetByteField"; break; |
duke@435 | 55 | case T_CHAR: name = "jni_fast_GetCharField"; break; |
duke@435 | 56 | case T_SHORT: name = "jni_fast_GetShortField"; break; |
duke@435 | 57 | case T_INT: name = "jni_fast_GetIntField"; break; |
duke@435 | 58 | default: ShouldNotReachHere(); |
duke@435 | 59 | } |
duke@435 | 60 | ResourceMark rm; |
twisti@2103 | 61 | BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); |
twisti@2103 | 62 | CodeBuffer cbuf(blob); |
duke@435 | 63 | MacroAssembler* masm = new MacroAssembler(&cbuf); |
twisti@2103 | 64 | address fast_entry = __ pc(); |
duke@435 | 65 | |
duke@435 | 66 | Label slow; |
duke@435 | 67 | |
duke@435 | 68 | // stack layout: offset from rsp (in words): |
duke@435 | 69 | // return pc 0 |
duke@435 | 70 | // jni env 1 |
duke@435 | 71 | // obj 2 |
duke@435 | 72 | // jfieldID 3 |
duke@435 | 73 | |
duke@435 | 74 | ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); |
duke@435 | 75 | __ mov32 (rcx, counter); |
duke@435 | 76 | __ testb (rcx, 1); |
duke@435 | 77 | __ jcc (Assembler::notZero, slow); |
duke@435 | 78 | if (os::is_MP()) { |
never@739 | 79 | __ mov(rax, rcx); |
never@739 | 80 | __ andptr(rax, 1); // rax, must end up 0 |
never@739 | 81 | __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); |
duke@435 | 82 | // obj, notice rax, is 0. |
duke@435 | 83 | // rdx is data dependent on rcx. |
duke@435 | 84 | } else { |
never@739 | 85 | __ movptr (rdx, Address(rsp, 2*wordSize)); // obj |
duke@435 | 86 | } |
never@739 | 87 | __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID |
never@739 | 88 | __ movptr(rdx, Address(rdx, 0)); // *obj |
never@739 | 89 | __ shrptr (rax, 2); // offset |
duke@435 | 90 | |
duke@435 | 91 | assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); |
duke@435 | 92 | speculative_load_pclist[count] = __ pc(); |
duke@435 | 93 | switch (type) { |
never@739 | 94 | case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break; |
never@739 | 95 | case T_BYTE: __ movsbl (rax, Address(rdx, rax, Address::times_1)); break; |
never@739 | 96 | case T_CHAR: __ movzwl (rax, Address(rdx, rax, Address::times_1)); break; |
never@739 | 97 | case T_SHORT: __ movswl (rax, Address(rdx, rax, Address::times_1)); break; |
duke@435 | 98 | case T_INT: __ movl (rax, Address(rdx, rax, Address::times_1)); break; |
duke@435 | 99 | default: ShouldNotReachHere(); |
duke@435 | 100 | } |
duke@435 | 101 | |
duke@435 | 102 | Address ca1; |
duke@435 | 103 | if (os::is_MP()) { |
duke@435 | 104 | __ lea(rdx, counter); |
never@739 | 105 | __ xorptr(rdx, rax); |
never@739 | 106 | __ xorptr(rdx, rax); |
duke@435 | 107 | __ cmp32(rcx, Address(rdx, 0)); |
duke@435 | 108 | // ca1 is the same as ca because |
duke@435 | 109 | // rax, ^ counter_addr ^ rax, = address |
duke@435 | 110 | // ca1 is data dependent on rax,. |
duke@435 | 111 | } else { |
duke@435 | 112 | __ cmp32(rcx, counter); |
duke@435 | 113 | } |
duke@435 | 114 | __ jcc (Assembler::notEqual, slow); |
duke@435 | 115 | |
duke@435 | 116 | #ifndef _WINDOWS |
duke@435 | 117 | __ ret (0); |
duke@435 | 118 | #else |
duke@435 | 119 | // __stdcall calling convention |
duke@435 | 120 | __ ret (3*wordSize); |
duke@435 | 121 | #endif |
duke@435 | 122 | |
duke@435 | 123 | slowcase_entry_pclist[count++] = __ pc(); |
duke@435 | 124 | __ bind (slow); |
duke@435 | 125 | address slow_case_addr; |
duke@435 | 126 | switch (type) { |
duke@435 | 127 | case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; |
duke@435 | 128 | case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; |
duke@435 | 129 | case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; |
duke@435 | 130 | case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; |
duke@435 | 131 | case T_INT: slow_case_addr = jni_GetIntField_addr(); |
duke@435 | 132 | } |
duke@435 | 133 | // tail call |
duke@435 | 134 | __ jump (ExternalAddress(slow_case_addr)); |
duke@435 | 135 | |
duke@435 | 136 | __ flush (); |
duke@435 | 137 | |
duke@435 | 138 | #ifndef _WINDOWS |
duke@435 | 139 | return fast_entry; |
duke@435 | 140 | #else |
duke@435 | 141 | switch (type) { |
twisti@2103 | 142 | case T_BOOLEAN: jni_fast_GetBooleanField_fp = (GetBooleanField_t) fast_entry; break; |
twisti@2103 | 143 | case T_BYTE: jni_fast_GetByteField_fp = (GetByteField_t) fast_entry; break; |
twisti@2103 | 144 | case T_CHAR: jni_fast_GetCharField_fp = (GetCharField_t) fast_entry; break; |
twisti@2103 | 145 | case T_SHORT: jni_fast_GetShortField_fp = (GetShortField_t) fast_entry; break; |
twisti@2103 | 146 | case T_INT: jni_fast_GetIntField_fp = (GetIntField_t) fast_entry; break; |
duke@435 | 147 | } |
duke@435 | 148 | return os::win32::fast_jni_accessor_wrapper(type); |
duke@435 | 149 | #endif |
duke@435 | 150 | } |
duke@435 | 151 | |
duke@435 | 152 | address JNI_FastGetField::generate_fast_get_boolean_field() { |
duke@435 | 153 | return generate_fast_get_int_field0(T_BOOLEAN); |
duke@435 | 154 | } |
duke@435 | 155 | |
duke@435 | 156 | address JNI_FastGetField::generate_fast_get_byte_field() { |
duke@435 | 157 | return generate_fast_get_int_field0(T_BYTE); |
duke@435 | 158 | } |
duke@435 | 159 | |
duke@435 | 160 | address JNI_FastGetField::generate_fast_get_char_field() { |
duke@435 | 161 | return generate_fast_get_int_field0(T_CHAR); |
duke@435 | 162 | } |
duke@435 | 163 | |
duke@435 | 164 | address JNI_FastGetField::generate_fast_get_short_field() { |
duke@435 | 165 | return generate_fast_get_int_field0(T_SHORT); |
duke@435 | 166 | } |
duke@435 | 167 | |
duke@435 | 168 | address JNI_FastGetField::generate_fast_get_int_field() { |
duke@435 | 169 | return generate_fast_get_int_field0(T_INT); |
duke@435 | 170 | } |
duke@435 | 171 | |
duke@435 | 172 | address JNI_FastGetField::generate_fast_get_long_field() { |
duke@435 | 173 | const char *name = "jni_fast_GetLongField"; |
duke@435 | 174 | ResourceMark rm; |
twisti@2103 | 175 | BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); |
twisti@2103 | 176 | CodeBuffer cbuf(blob); |
duke@435 | 177 | MacroAssembler* masm = new MacroAssembler(&cbuf); |
twisti@2103 | 178 | address fast_entry = __ pc(); |
duke@435 | 179 | |
duke@435 | 180 | Label slow; |
duke@435 | 181 | |
duke@435 | 182 | // stack layout: offset from rsp (in words): |
duke@435 | 183 | // old rsi 0 |
duke@435 | 184 | // return pc 1 |
duke@435 | 185 | // jni env 2 |
duke@435 | 186 | // obj 3 |
duke@435 | 187 | // jfieldID 4 |
duke@435 | 188 | |
duke@435 | 189 | ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); |
duke@435 | 190 | |
never@739 | 191 | __ push (rsi); |
duke@435 | 192 | __ mov32 (rcx, counter); |
duke@435 | 193 | __ testb (rcx, 1); |
duke@435 | 194 | __ jcc (Assembler::notZero, slow); |
duke@435 | 195 | if (os::is_MP()) { |
never@739 | 196 | __ mov(rax, rcx); |
never@739 | 197 | __ andptr(rax, 1); // rax, must end up 0 |
never@739 | 198 | __ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize)); |
duke@435 | 199 | // obj, notice rax, is 0. |
duke@435 | 200 | // rdx is data dependent on rcx. |
duke@435 | 201 | } else { |
never@739 | 202 | __ movptr(rdx, Address(rsp, 3*wordSize)); // obj |
duke@435 | 203 | } |
never@739 | 204 | __ movptr(rsi, Address(rsp, 4*wordSize)); // jfieldID |
never@739 | 205 | __ movptr(rdx, Address(rdx, 0)); // *obj |
never@739 | 206 | __ shrptr(rsi, 2); // offset |
duke@435 | 207 | |
duke@435 | 208 | assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small"); |
duke@435 | 209 | speculative_load_pclist[count++] = __ pc(); |
never@739 | 210 | __ movptr(rax, Address(rdx, rsi, Address::times_1)); |
never@739 | 211 | #ifndef _LP64 |
duke@435 | 212 | speculative_load_pclist[count] = __ pc(); |
never@739 | 213 | __ movl(rdx, Address(rdx, rsi, Address::times_1, 4)); |
never@739 | 214 | #endif // _LP64 |
duke@435 | 215 | |
duke@435 | 216 | if (os::is_MP()) { |
never@739 | 217 | __ lea(rsi, counter); |
never@739 | 218 | __ xorptr(rsi, rdx); |
never@739 | 219 | __ xorptr(rsi, rax); |
never@739 | 220 | __ xorptr(rsi, rdx); |
never@739 | 221 | __ xorptr(rsi, rax); |
duke@435 | 222 | __ cmp32(rcx, Address(rsi, 0)); |
duke@435 | 223 | // ca1 is the same as ca because |
duke@435 | 224 | // rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address |
duke@435 | 225 | // ca1 is data dependent on both rax, and rdx. |
duke@435 | 226 | } else { |
duke@435 | 227 | __ cmp32(rcx, counter); |
duke@435 | 228 | } |
duke@435 | 229 | __ jcc (Assembler::notEqual, slow); |
duke@435 | 230 | |
never@739 | 231 | __ pop (rsi); |
duke@435 | 232 | |
duke@435 | 233 | #ifndef _WINDOWS |
duke@435 | 234 | __ ret (0); |
duke@435 | 235 | #else |
duke@435 | 236 | // __stdcall calling convention |
duke@435 | 237 | __ ret (3*wordSize); |
duke@435 | 238 | #endif |
duke@435 | 239 | |
duke@435 | 240 | slowcase_entry_pclist[count-1] = __ pc(); |
duke@435 | 241 | slowcase_entry_pclist[count++] = __ pc(); |
duke@435 | 242 | __ bind (slow); |
never@739 | 243 | __ pop (rsi); |
duke@435 | 244 | address slow_case_addr = jni_GetLongField_addr();; |
duke@435 | 245 | // tail call |
duke@435 | 246 | __ jump (ExternalAddress(slow_case_addr)); |
duke@435 | 247 | |
duke@435 | 248 | __ flush (); |
duke@435 | 249 | |
duke@435 | 250 | #ifndef _WINDOWS |
duke@435 | 251 | return fast_entry; |
duke@435 | 252 | #else |
twisti@2103 | 253 | jni_fast_GetLongField_fp = (GetLongField_t) fast_entry; |
duke@435 | 254 | return os::win32::fast_jni_accessor_wrapper(T_LONG); |
duke@435 | 255 | #endif |
duke@435 | 256 | } |
duke@435 | 257 | |
duke@435 | 258 | address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { |
duke@435 | 259 | const char *name; |
duke@435 | 260 | switch (type) { |
duke@435 | 261 | case T_FLOAT: name = "jni_fast_GetFloatField"; break; |
duke@435 | 262 | case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; |
duke@435 | 263 | default: ShouldNotReachHere(); |
duke@435 | 264 | } |
duke@435 | 265 | ResourceMark rm; |
twisti@2103 | 266 | BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); |
twisti@2103 | 267 | CodeBuffer cbuf(blob); |
duke@435 | 268 | MacroAssembler* masm = new MacroAssembler(&cbuf); |
twisti@2103 | 269 | address fast_entry = __ pc(); |
duke@435 | 270 | |
duke@435 | 271 | Label slow_with_pop, slow; |
duke@435 | 272 | |
duke@435 | 273 | // stack layout: offset from rsp (in words): |
duke@435 | 274 | // return pc 0 |
duke@435 | 275 | // jni env 1 |
duke@435 | 276 | // obj 2 |
duke@435 | 277 | // jfieldID 3 |
duke@435 | 278 | |
duke@435 | 279 | ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); |
duke@435 | 280 | |
duke@435 | 281 | __ mov32 (rcx, counter); |
duke@435 | 282 | __ testb (rcx, 1); |
duke@435 | 283 | __ jcc (Assembler::notZero, slow); |
duke@435 | 284 | if (os::is_MP()) { |
never@739 | 285 | __ mov(rax, rcx); |
never@739 | 286 | __ andptr(rax, 1); // rax, must end up 0 |
never@739 | 287 | __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); |
duke@435 | 288 | // obj, notice rax, is 0. |
duke@435 | 289 | // rdx is data dependent on rcx. |
duke@435 | 290 | } else { |
never@739 | 291 | __ movptr(rdx, Address(rsp, 2*wordSize)); // obj |
duke@435 | 292 | } |
never@739 | 293 | __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID |
never@739 | 294 | __ movptr(rdx, Address(rdx, 0)); // *obj |
never@739 | 295 | __ shrptr(rax, 2); // offset |
duke@435 | 296 | |
duke@435 | 297 | assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); |
duke@435 | 298 | speculative_load_pclist[count] = __ pc(); |
duke@435 | 299 | switch (type) { |
never@739 | 300 | #ifndef _LP64 |
duke@435 | 301 | case T_FLOAT: __ fld_s (Address(rdx, rax, Address::times_1)); break; |
duke@435 | 302 | case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break; |
never@739 | 303 | #else |
never@739 | 304 | case T_FLOAT: __ movflt (xmm0, Address(robj, roffset, Address::times_1)); break; |
never@739 | 305 | case T_DOUBLE: __ movdbl (xmm0, Address(robj, roffset, Address::times_1)); break; |
never@739 | 306 | #endif // _LP64 |
duke@435 | 307 | default: ShouldNotReachHere(); |
duke@435 | 308 | } |
duke@435 | 309 | |
duke@435 | 310 | Address ca1; |
duke@435 | 311 | if (os::is_MP()) { |
duke@435 | 312 | __ fst_s (Address(rsp, -4)); |
duke@435 | 313 | __ lea(rdx, counter); |
duke@435 | 314 | __ movl (rax, Address(rsp, -4)); |
never@739 | 315 | // garbage hi-order bits on 64bit are harmless. |
never@739 | 316 | __ xorptr(rdx, rax); |
never@739 | 317 | __ xorptr(rdx, rax); |
duke@435 | 318 | __ cmp32(rcx, Address(rdx, 0)); |
duke@435 | 319 | // rax, ^ counter_addr ^ rax, = address |
duke@435 | 320 | // ca1 is data dependent on the field |
duke@435 | 321 | // access. |
duke@435 | 322 | } else { |
duke@435 | 323 | __ cmp32(rcx, counter); |
duke@435 | 324 | } |
duke@435 | 325 | __ jcc (Assembler::notEqual, slow_with_pop); |
duke@435 | 326 | |
duke@435 | 327 | #ifndef _WINDOWS |
duke@435 | 328 | __ ret (0); |
duke@435 | 329 | #else |
duke@435 | 330 | // __stdcall calling convention |
duke@435 | 331 | __ ret (3*wordSize); |
duke@435 | 332 | #endif |
duke@435 | 333 | |
duke@435 | 334 | __ bind (slow_with_pop); |
duke@435 | 335 | // invalid load. pop FPU stack. |
duke@435 | 336 | __ fstp_d (0); |
duke@435 | 337 | |
duke@435 | 338 | slowcase_entry_pclist[count++] = __ pc(); |
duke@435 | 339 | __ bind (slow); |
duke@435 | 340 | address slow_case_addr; |
duke@435 | 341 | switch (type) { |
duke@435 | 342 | case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; |
duke@435 | 343 | case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; |
duke@435 | 344 | default: ShouldNotReachHere(); |
duke@435 | 345 | } |
duke@435 | 346 | // tail call |
duke@435 | 347 | __ jump (ExternalAddress(slow_case_addr)); |
duke@435 | 348 | |
duke@435 | 349 | __ flush (); |
duke@435 | 350 | |
duke@435 | 351 | #ifndef _WINDOWS |
duke@435 | 352 | return fast_entry; |
duke@435 | 353 | #else |
duke@435 | 354 | switch (type) { |
twisti@2103 | 355 | case T_FLOAT: jni_fast_GetFloatField_fp = (GetFloatField_t) fast_entry; break; |
twisti@2103 | 356 | case T_DOUBLE: jni_fast_GetDoubleField_fp = (GetDoubleField_t) fast_entry; break; |
duke@435 | 357 | } |
duke@435 | 358 | return os::win32::fast_jni_accessor_wrapper(type); |
duke@435 | 359 | #endif |
duke@435 | 360 | } |
duke@435 | 361 | |
duke@435 | 362 | address JNI_FastGetField::generate_fast_get_float_field() { |
duke@435 | 363 | return generate_fast_get_float_field0(T_FLOAT); |
duke@435 | 364 | } |
duke@435 | 365 | |
duke@435 | 366 | address JNI_FastGetField::generate_fast_get_double_field() { |
duke@435 | 367 | return generate_fast_get_float_field0(T_DOUBLE); |
duke@435 | 368 | } |