6009 } |
6009 } |
6010 BIND(L_fill_32_bytes); |
6010 BIND(L_fill_32_bytes); |
6011 { |
6011 { |
6012 assert( UseSSE >= 2, "supported cpu only" ); |
6012 assert( UseSSE >= 2, "supported cpu only" ); |
6013 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; |
6013 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; |
6014 // Fill 32-byte chunks |
|
6015 movdl(xtmp, value); |
6014 movdl(xtmp, value); |
6016 pshufd(xtmp, xtmp, 0); |
6015 if (UseAVX >= 2 && UseUnalignedLoadStores) { |
6017 |
6016 // Fill 64-byte chunks |
6018 subl(count, 8 << shift); |
6017 Label L_fill_64_bytes_loop, L_check_fill_32_bytes; |
6019 jcc(Assembler::less, L_check_fill_8_bytes); |
6018 vpbroadcastd(xtmp, xtmp); |
6020 align(16); |
6019 |
6021 |
6020 subl(count, 16 << shift); |
6022 BIND(L_fill_32_bytes_loop); |
6021 jcc(Assembler::less, L_check_fill_32_bytes); |
6023 |
6022 align(16); |
6024 if (UseUnalignedLoadStores) { |
6023 |
6025 movdqu(Address(to, 0), xtmp); |
6024 BIND(L_fill_64_bytes_loop); |
6026 movdqu(Address(to, 16), xtmp); |
6025 vmovdqu(Address(to, 0), xtmp); |
|
6026 vmovdqu(Address(to, 32), xtmp); |
|
6027 addptr(to, 64); |
|
6028 subl(count, 16 << shift); |
|
6029 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); |
|
6030 |
|
6031 BIND(L_check_fill_32_bytes); |
|
6032 addl(count, 8 << shift); |
|
6033 jccb(Assembler::less, L_check_fill_8_bytes); |
|
6034 vmovdqu(Address(to, 0), xtmp); |
|
6035 addptr(to, 32); |
|
6036 subl(count, 8 << shift); |
6027 } else { |
6037 } else { |
6028 movq(Address(to, 0), xtmp); |
6038 // Fill 32-byte chunks |
6029 movq(Address(to, 8), xtmp); |
6039 pshufd(xtmp, xtmp, 0); |
6030 movq(Address(to, 16), xtmp); |
6040 |
6031 movq(Address(to, 24), xtmp); |
6041 subl(count, 8 << shift); |
|
6042 jcc(Assembler::less, L_check_fill_8_bytes); |
|
6043 align(16); |
|
6044 |
|
6045 BIND(L_fill_32_bytes_loop); |
|
6046 |
|
6047 if (UseUnalignedLoadStores) { |
|
6048 movdqu(Address(to, 0), xtmp); |
|
6049 movdqu(Address(to, 16), xtmp); |
|
6050 } else { |
|
6051 movq(Address(to, 0), xtmp); |
|
6052 movq(Address(to, 8), xtmp); |
|
6053 movq(Address(to, 16), xtmp); |
|
6054 movq(Address(to, 24), xtmp); |
|
6055 } |
|
6056 |
|
6057 addptr(to, 32); |
|
6058 subl(count, 8 << shift); |
|
6059 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
6032 } |
6060 } |
6033 |
|
6034 addptr(to, 32); |
|
6035 subl(count, 8 << shift); |
|
6036 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
|
6037 BIND(L_check_fill_8_bytes); |
6061 BIND(L_check_fill_8_bytes); |
6038 addl(count, 8 << shift); |
6062 addl(count, 8 << shift); |
6039 jccb(Assembler::zero, L_exit); |
6063 jccb(Assembler::zero, L_exit); |
6040 jmpb(L_fill_8_bytes); |
6064 jmpb(L_fill_8_bytes); |
6041 |
6065 |