2172 // c_rarg0 - source byte array address |
2172 // c_rarg0 - source byte array address |
2173 // c_rarg1 - destination byte array address |
2173 // c_rarg1 - destination byte array address |
2174 // c_rarg2 - K (key) in little endian int array |
2174 // c_rarg2 - K (key) in little endian int array |
2175 // |
2175 // |
2176 address generate_aescrypt_encryptBlock() { |
2176 address generate_aescrypt_encryptBlock() { |
2177 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); |
2177 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2178 __ align(CodeEntryAlignment); |
2178 __ align(CodeEntryAlignment); |
2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
2180 Label L_doLast; |
2180 Label L_doLast; |
2181 address start = __ pc(); |
2181 address start = __ pc(); |
2182 |
2182 |
2183 const Register from = rsi; // source array address |
2183 const Register from = rdx; // source array address |
2184 const Register to = rdx; // destination array address |
2184 const Register to = rdx; // destination array address |
2185 const Register key = rcx; // key array address |
2185 const Register key = rcx; // key array address |
2186 const Register keylen = rax; |
2186 const Register keylen = rax; |
2187 const Address from_param(rbp, 8+0); |
2187 const Address from_param(rbp, 8+0); |
2188 const Address to_param (rbp, 8+4); |
2188 const Address to_param (rbp, 8+4); |
2189 const Address key_param (rbp, 8+8); |
2189 const Address key_param (rbp, 8+8); |
2190 |
2190 |
2191 const XMMRegister xmm_result = xmm0; |
2191 const XMMRegister xmm_result = xmm0; |
2192 const XMMRegister xmm_temp = xmm1; |
2192 const XMMRegister xmm_key_shuf_mask = xmm1; |
2193 const XMMRegister xmm_key_shuf_mask = xmm2; |
2193 const XMMRegister xmm_temp1 = xmm2; |
2194 |
2194 const XMMRegister xmm_temp2 = xmm3; |
2195 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2195 const XMMRegister xmm_temp3 = xmm4; |
2196 __ push(rsi); |
2196 const XMMRegister xmm_temp4 = xmm5; |
2197 __ movptr(from , from_param); |
2197 |
2198 __ movptr(to , to_param); |
2198 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2199 __ movptr(key , key_param); |
2199 __ movptr(from, from_param); |
2200 |
2200 __ movptr(key, key_param); |
|
2201 |
|
2202 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
2201 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2203 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2202 // keylen = # of 32-bit words, convert to 128-bit words |
|
2203 __ shrl(keylen, 2); |
|
2204 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more |
|
2205 |
2204 |
2206 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2205 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2207 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
2206 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
|
2207 __ movptr(to, to_param); |
2208 |
2208 |
2209 // For encryption, the java expanded key ordering is just what we need |
2209 // For encryption, the java expanded key ordering is just what we need |
2210 |
2210 |
2211 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); |
2211 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2212 __ pxor(xmm_result, xmm_temp); |
2212 __ pxor(xmm_result, xmm_temp1); |
2213 for (int offset = 0x10; offset <= 0x90; offset += 0x10) { |
2213 |
2214 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); |
2214 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2215 } |
2215 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
2216 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); |
2216 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
2217 __ cmpl(keylen, 0); |
2217 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
2218 __ jcc(Assembler::equal, L_doLast); |
2218 |
2219 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys |
2219 __ aesenc(xmm_result, xmm_temp1); |
2220 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); |
2220 __ aesenc(xmm_result, xmm_temp2); |
2221 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); |
2221 __ aesenc(xmm_result, xmm_temp3); |
2222 __ subl(keylen, 2); |
2222 __ aesenc(xmm_result, xmm_temp4); |
2223 __ jcc(Assembler::equal, L_doLast); |
2223 |
2224 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys |
2224 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
2225 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); |
2225 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
2226 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); |
2226 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2227 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2228 |
|
2229 __ aesenc(xmm_result, xmm_temp1); |
|
2230 __ aesenc(xmm_result, xmm_temp2); |
|
2231 __ aesenc(xmm_result, xmm_temp3); |
|
2232 __ aesenc(xmm_result, xmm_temp4); |
|
2233 |
|
2234 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2235 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2236 |
|
2237 __ cmpl(keylen, 44); |
|
2238 __ jccb(Assembler::equal, L_doLast); |
|
2239 |
|
2240 __ aesenc(xmm_result, xmm_temp1); |
|
2241 __ aesenc(xmm_result, xmm_temp2); |
|
2242 |
|
2243 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2244 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2245 |
|
2246 __ cmpl(keylen, 52); |
|
2247 __ jccb(Assembler::equal, L_doLast); |
|
2248 |
|
2249 __ aesenc(xmm_result, xmm_temp1); |
|
2250 __ aesenc(xmm_result, xmm_temp2); |
|
2251 |
|
2252 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2253 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
2227 |
2254 |
2228 __ BIND(L_doLast); |
2255 __ BIND(L_doLast); |
2229 __ aesenclast(xmm_result, xmm_temp); |
2256 __ aesenc(xmm_result, xmm_temp1); |
|
2257 __ aesenclast(xmm_result, xmm_temp2); |
2230 __ movdqu(Address(to, 0), xmm_result); // store the result |
2258 __ movdqu(Address(to, 0), xmm_result); // store the result |
2231 __ xorptr(rax, rax); // return 0 |
2259 __ xorptr(rax, rax); // return 0 |
2232 __ pop(rsi); |
|
2233 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2260 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2234 __ ret(0); |
2261 __ ret(0); |
2235 |
2262 |
2236 return start; |
2263 return start; |
2237 } |
2264 } |
2243 // c_rarg0 - source byte array address |
2270 // c_rarg0 - source byte array address |
2244 // c_rarg1 - destination byte array address |
2271 // c_rarg1 - destination byte array address |
2245 // c_rarg2 - K (key) in little endian int array |
2272 // c_rarg2 - K (key) in little endian int array |
2246 // |
2273 // |
2247 address generate_aescrypt_decryptBlock() { |
2274 address generate_aescrypt_decryptBlock() { |
2248 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); |
2275 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2249 __ align(CodeEntryAlignment); |
2276 __ align(CodeEntryAlignment); |
2250 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
2277 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
2251 Label L_doLast; |
2278 Label L_doLast; |
2252 address start = __ pc(); |
2279 address start = __ pc(); |
2253 |
2280 |
2254 const Register from = rsi; // source array address |
2281 const Register from = rdx; // source array address |
2255 const Register to = rdx; // destination array address |
2282 const Register to = rdx; // destination array address |
2256 const Register key = rcx; // key array address |
2283 const Register key = rcx; // key array address |
2257 const Register keylen = rax; |
2284 const Register keylen = rax; |
2258 const Address from_param(rbp, 8+0); |
2285 const Address from_param(rbp, 8+0); |
2259 const Address to_param (rbp, 8+4); |
2286 const Address to_param (rbp, 8+4); |
2260 const Address key_param (rbp, 8+8); |
2287 const Address key_param (rbp, 8+8); |
2261 |
2288 |
2262 const XMMRegister xmm_result = xmm0; |
2289 const XMMRegister xmm_result = xmm0; |
2263 const XMMRegister xmm_temp = xmm1; |
2290 const XMMRegister xmm_key_shuf_mask = xmm1; |
2264 const XMMRegister xmm_key_shuf_mask = xmm2; |
2291 const XMMRegister xmm_temp1 = xmm2; |
|
2292 const XMMRegister xmm_temp2 = xmm3; |
|
2293 const XMMRegister xmm_temp3 = xmm4; |
|
2294 const XMMRegister xmm_temp4 = xmm5; |
2265 |
2295 |
2266 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2296 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2267 __ push(rsi); |
2297 __ movptr(from, from_param); |
2268 __ movptr(from , from_param); |
2298 __ movptr(key, key_param); |
2269 __ movptr(to , to_param); |
2299 |
2270 __ movptr(key , key_param); |
2300 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
2271 |
|
2272 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2301 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2273 // keylen = # of 32-bit words, convert to 128-bit words |
|
2274 __ shrl(keylen, 2); |
|
2275 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more |
|
2276 |
2302 |
2277 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2303 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2278 __ movdqu(xmm_result, Address(from, 0)); |
2304 __ movdqu(xmm_result, Address(from, 0)); |
|
2305 __ movptr(to, to_param); |
2279 |
2306 |
2280 // for decryption java expanded key ordering is rotated one position from what we want |
2307 // for decryption java expanded key ordering is rotated one position from what we want |
2281 // so we start from 0x10 here and hit 0x00 last |
2308 // so we start from 0x10 here and hit 0x00 last |
2282 // we don't know if the key is aligned, hence not using load-execute form |
2309 // we don't know if the key is aligned, hence not using load-execute form |
2283 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); |
2310 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2284 __ pxor (xmm_result, xmm_temp); |
2311 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
2285 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { |
2312 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
2286 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); |
2313 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
2287 } |
2314 |
2288 __ cmpl(keylen, 0); |
2315 __ pxor (xmm_result, xmm_temp1); |
2289 __ jcc(Assembler::equal, L_doLast); |
2316 __ aesdec(xmm_result, xmm_temp2); |
2290 // only in 192 and 256 bit keys |
2317 __ aesdec(xmm_result, xmm_temp3); |
2291 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); |
2318 __ aesdec(xmm_result, xmm_temp4); |
2292 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); |
2319 |
2293 __ subl(keylen, 2); |
2320 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
2294 __ jcc(Assembler::equal, L_doLast); |
2321 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
2295 // only in 256 bit keys |
2322 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
2296 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); |
2323 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
2297 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); |
2324 |
|
2325 __ aesdec(xmm_result, xmm_temp1); |
|
2326 __ aesdec(xmm_result, xmm_temp2); |
|
2327 __ aesdec(xmm_result, xmm_temp3); |
|
2328 __ aesdec(xmm_result, xmm_temp4); |
|
2329 |
|
2330 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2331 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2332 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); |
|
2333 |
|
2334 __ cmpl(keylen, 44); |
|
2335 __ jccb(Assembler::equal, L_doLast); |
|
2336 |
|
2337 __ aesdec(xmm_result, xmm_temp1); |
|
2338 __ aesdec(xmm_result, xmm_temp2); |
|
2339 |
|
2340 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2341 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2342 |
|
2343 __ cmpl(keylen, 52); |
|
2344 __ jccb(Assembler::equal, L_doLast); |
|
2345 |
|
2346 __ aesdec(xmm_result, xmm_temp1); |
|
2347 __ aesdec(xmm_result, xmm_temp2); |
|
2348 |
|
2349 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2350 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
2298 |
2351 |
2299 __ BIND(L_doLast); |
2352 __ BIND(L_doLast); |
|
2353 __ aesdec(xmm_result, xmm_temp1); |
|
2354 __ aesdec(xmm_result, xmm_temp2); |
|
2355 |
2300 // for decryption the aesdeclast operation is always on key+0x00 |
2356 // for decryption the aesdeclast operation is always on key+0x00 |
2301 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); |
2357 __ aesdeclast(xmm_result, xmm_temp3); |
2302 __ aesdeclast(xmm_result, xmm_temp); |
|
2303 |
|
2304 __ movdqu(Address(to, 0), xmm_result); // store the result |
2358 __ movdqu(Address(to, 0), xmm_result); // store the result |
2305 |
|
2306 __ xorptr(rax, rax); // return 0 |
2359 __ xorptr(rax, rax); // return 0 |
2307 __ pop(rsi); |
|
2308 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2360 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2309 __ ret(0); |
2361 __ ret(0); |
2310 |
2362 |
2311 return start; |
2363 return start; |
2312 } |
2364 } |