4763 bind(L); |
4763 bind(L); |
4764 } |
4764 } |
4765 pop_CPU_state(); |
4765 pop_CPU_state(); |
4766 } |
4766 } |
4767 |
4767 |
|
4768 void MacroAssembler::restore_cpu_control_state_after_jni() { |
|
4769 // Either restore the MXCSR register after returning from the JNI Call |
|
4770 // or verify that it wasn't changed (with -Xcheck:jni flag). |
|
4771 if (VM_Version::supports_sse()) { |
|
4772 if (RestoreMXCSROnJNICalls) { |
|
4773 ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std())); |
|
4774 } else if (CheckJNICalls) { |
|
4775 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); |
|
4776 } |
|
4777 } |
|
4778 if (VM_Version::supports_avx()) { |
|
4779 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. |
|
4780 vzeroupper(); |
|
4781 } |
|
4782 |
|
4783 #ifndef _LP64 |
|
4784 // Either restore the x87 floating pointer control word after returning |
|
4785 // from the JNI call or verify that it wasn't changed. |
|
4786 if (CheckJNICalls) { |
|
4787 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); |
|
4788 } |
|
4789 #endif // _LP64 |
|
4790 } |
|
4791 |
|
4792 |
4768 void MacroAssembler::load_klass(Register dst, Register src) { |
4793 void MacroAssembler::load_klass(Register dst, Register src) { |
4769 #ifdef _LP64 |
4794 #ifdef _LP64 |
4770 if (UseCompressedKlassPointers) { |
4795 if (UseCompressedKlassPointers) { |
4771 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
4796 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
4772 decode_klass_not_null(dst); |
4797 decode_klass_not_null(dst); |
5757 vptest(vec1, vec1); |
5782 vptest(vec1, vec1); |
5758 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
5783 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
5759 addptr(result, stride2); |
5784 addptr(result, stride2); |
5760 subl(cnt2, stride2); |
5785 subl(cnt2, stride2); |
5761 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
5786 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
|
5787 // clean upper bits of YMM registers |
|
5788 vzeroupper(); |
5762 |
5789 |
5763 // compare wide vectors tail |
5790 // compare wide vectors tail |
5764 bind(COMPARE_WIDE_TAIL); |
5791 bind(COMPARE_WIDE_TAIL); |
5765 testptr(result, result); |
5792 testptr(result, result); |
5766 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
5793 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
5770 negptr(result); |
5797 negptr(result); |
5771 jmpb(COMPARE_WIDE_VECTORS_LOOP); |
5798 jmpb(COMPARE_WIDE_VECTORS_LOOP); |
5772 |
5799 |
5773 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
5800 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
5774 bind(VECTOR_NOT_EQUAL); |
5801 bind(VECTOR_NOT_EQUAL); |
|
5802 // clean upper bits of YMM registers |
|
5803 vzeroupper(); |
5775 lea(str1, Address(str1, result, scale)); |
5804 lea(str1, Address(str1, result, scale)); |
5776 lea(str2, Address(str2, result, scale)); |
5805 lea(str2, Address(str2, result, scale)); |
5777 jmp(COMPARE_16_CHARS); |
5806 jmp(COMPARE_16_CHARS); |
5778 |
5807 |
5779 // Compare tail chars, length between 1 to 15 chars |
5808 // Compare tail chars, length between 1 to 15 chars |
6026 bind(FALSE_LABEL); |
6055 bind(FALSE_LABEL); |
6027 xorl(result, result); // return false |
6056 xorl(result, result); // return false |
6028 |
6057 |
6029 // That's it |
6058 // That's it |
6030 bind(DONE); |
6059 bind(DONE); |
|
6060 if (UseAVX >= 2) { |
|
6061 // clean upper bits of YMM registers |
|
6062 vzeroupper(); |
|
6063 } |
6031 } |
6064 } |
6032 |
6065 |
6033 void MacroAssembler::generate_fill(BasicType t, bool aligned, |
6066 void MacroAssembler::generate_fill(BasicType t, bool aligned, |
6034 Register to, Register value, Register count, |
6067 Register to, Register value, Register count, |
6035 Register rtmp, XMMRegister xtmp) { |
6068 Register rtmp, XMMRegister xtmp) { |
6155 addl(count, 8 << shift); |
6188 addl(count, 8 << shift); |
6156 jccb(Assembler::less, L_check_fill_8_bytes); |
6189 jccb(Assembler::less, L_check_fill_8_bytes); |
6157 vmovdqu(Address(to, 0), xtmp); |
6190 vmovdqu(Address(to, 0), xtmp); |
6158 addptr(to, 32); |
6191 addptr(to, 32); |
6159 subl(count, 8 << shift); |
6192 subl(count, 8 << shift); |
|
6193 |
|
6194 BIND(L_check_fill_8_bytes); |
|
6195 // clean upper bits of YMM registers |
|
6196 vzeroupper(); |
6160 } else { |
6197 } else { |
6161 // Fill 32-byte chunks |
6198 // Fill 32-byte chunks |
6162 pshufd(xtmp, xtmp, 0); |
6199 pshufd(xtmp, xtmp, 0); |
6163 |
6200 |
6164 subl(count, 8 << shift); |
6201 subl(count, 8 << shift); |
6178 } |
6215 } |
6179 |
6216 |
6180 addptr(to, 32); |
6217 addptr(to, 32); |
6181 subl(count, 8 << shift); |
6218 subl(count, 8 << shift); |
6182 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
6219 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
|
6220 |
|
6221 BIND(L_check_fill_8_bytes); |
6183 } |
6222 } |
6184 BIND(L_check_fill_8_bytes); |
|
6185 addl(count, 8 << shift); |
6223 addl(count, 8 << shift); |
6186 jccb(Assembler::zero, L_exit); |
6224 jccb(Assembler::zero, L_exit); |
6187 jmpb(L_fill_8_bytes); |
6225 jmpb(L_fill_8_bytes); |
6188 |
6226 |
6189 // |
6227 // |
6314 bind(L_chars_16_check); |
6352 bind(L_chars_16_check); |
6315 addptr(len, 16); |
6353 addptr(len, 16); |
6316 jccb(Assembler::lessEqual, L_copy_16_chars); |
6354 jccb(Assembler::lessEqual, L_copy_16_chars); |
6317 |
6355 |
6318 bind(L_copy_16_chars_exit); |
6356 bind(L_copy_16_chars_exit); |
|
6357 if (UseAVX >= 2) { |
|
6358 // clean upper bits of YMM registers |
|
6359 vzeroupper(); |
|
6360 } |
6319 subptr(len, 8); |
6361 subptr(len, 8); |
6320 jccb(Assembler::greater, L_copy_8_chars_exit); |
6362 jccb(Assembler::greater, L_copy_8_chars_exit); |
6321 |
6363 |
6322 bind(L_copy_8_chars); |
6364 bind(L_copy_8_chars); |
6323 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); |
6365 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); |