3302 if (UseBlockZeroing) { |
3302 if (UseBlockZeroing) { |
3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); |
3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); |
3304 } |
3304 } |
3305 } |
3305 } |
3306 |
3306 |
|
3307 address generate_aescrypt_encryptBlock() { |
|
3308 __ align(CodeEntryAlignment); |
|
3309 StubCodeMark mark(this, "StubRoutines", "aesencryptBlock"); |
|
3310 Label L_doLast128bit, L_storeOutput; |
|
3311 address start = __ pc(); |
|
3312 Register from = O0; // source byte array |
|
3313 Register to = O1; // destination byte array |
|
3314 Register key = O2; // expanded key array |
|
3315 const Register keylen = O4; //reg for storing expanded key array length |
|
3316 |
|
3317 // read expanded key length |
|
3318 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); |
|
3319 |
|
3320 // load input into F54-F56; F30-F31 used as temp |
|
3321 __ ldf(FloatRegisterImpl::S, from, 0, F30); |
|
3322 __ ldf(FloatRegisterImpl::S, from, 4, F31); |
|
3323 __ fmov(FloatRegisterImpl::D, F30, F54); |
|
3324 __ ldf(FloatRegisterImpl::S, from, 8, F30); |
|
3325 __ ldf(FloatRegisterImpl::S, from, 12, F31); |
|
3326 __ fmov(FloatRegisterImpl::D, F30, F56); |
|
3327 |
|
3328 // load expanded key |
|
3329 for ( int i = 0; i <= 38; i += 2 ) { |
|
3330 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i)); |
|
3331 } |
|
3332 |
|
3333 // perform cipher transformation |
|
3334 __ fxor(FloatRegisterImpl::D, F0, F54, F54); |
|
3335 __ fxor(FloatRegisterImpl::D, F2, F56, F56); |
|
3336 // rounds 1 through 8 |
|
3337 for ( int i = 4; i <= 28; i += 8 ) { |
|
3338 __ aes_eround01(as_FloatRegister(i), F54, F56, F58); |
|
3339 __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60); |
|
3340 __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54); |
|
3341 __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56); |
|
3342 } |
|
3343 __ aes_eround01(F36, F54, F56, F58); //round 9 |
|
3344 __ aes_eround23(F38, F54, F56, F60); |
|
3345 |
|
3346 // 128-bit original key size |
|
3347 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit); |
|
3348 |
|
3349 for ( int i = 40; i <= 50; i += 2 ) { |
|
3350 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) ); |
|
3351 } |
|
3352 __ aes_eround01(F40, F58, F60, F54); //round 10 |
|
3353 __ aes_eround23(F42, F58, F60, F56); |
|
3354 __ aes_eround01(F44, F54, F56, F58); //round 11 |
|
3355 __ aes_eround23(F46, F54, F56, F60); |
|
3356 |
|
3357 // 192-bit original key size |
|
3358 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput); |
|
3359 |
|
3360 __ ldf(FloatRegisterImpl::D, key, 208, F52); |
|
3361 __ aes_eround01(F48, F58, F60, F54); //round 12 |
|
3362 __ aes_eround23(F50, F58, F60, F56); |
|
3363 __ ldf(FloatRegisterImpl::D, key, 216, F46); |
|
3364 __ ldf(FloatRegisterImpl::D, key, 224, F48); |
|
3365 __ ldf(FloatRegisterImpl::D, key, 232, F50); |
|
3366 __ aes_eround01(F52, F54, F56, F58); //round 13 |
|
3367 __ aes_eround23(F46, F54, F56, F60); |
|
3368 __ br(Assembler::always, false, Assembler::pt, L_storeOutput); |
|
3369 __ delayed()->nop(); |
|
3370 |
|
3371 __ BIND(L_doLast128bit); |
|
3372 __ ldf(FloatRegisterImpl::D, key, 160, F48); |
|
3373 __ ldf(FloatRegisterImpl::D, key, 168, F50); |
|
3374 |
|
3375 __ BIND(L_storeOutput); |
|
3376 // perform last round of encryption common for all key sizes |
|
3377 __ aes_eround01_l(F48, F58, F60, F54); //last round |
|
3378 __ aes_eround23_l(F50, F58, F60, F56); |
|
3379 |
|
3380 // store output into the destination array, F0-F1 used as temp |
|
3381 __ fmov(FloatRegisterImpl::D, F54, F0); |
|
3382 __ stf(FloatRegisterImpl::S, F0, to, 0); |
|
3383 __ stf(FloatRegisterImpl::S, F1, to, 4); |
|
3384 __ fmov(FloatRegisterImpl::D, F56, F0); |
|
3385 __ stf(FloatRegisterImpl::S, F0, to, 8); |
|
3386 __ retl(); |
|
3387 __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); |
|
3388 |
|
3389 return start; |
|
3390 } |
|
3391 |
|
3392 address generate_aescrypt_decryptBlock() { |
|
3393 __ align(CodeEntryAlignment); |
|
3394 StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock"); |
|
3395 address start = __ pc(); |
|
3396 Label L_expand192bit, L_expand256bit, L_common_transform; |
|
3397 Register from = O0; // source byte array |
|
3398 Register to = O1; // destination byte array |
|
3399 Register key = O2; // expanded key array |
|
3400 Register original_key = O3; // original key array only required during decryption |
|
3401 const Register keylen = O4; // reg for storing expanded key array length |
|
3402 |
|
3403 // read expanded key array length |
|
3404 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); |
|
3405 |
|
3406 // load input into F52-F54; F30,F31 used as temp |
|
3407 __ ldf(FloatRegisterImpl::S, from, 0, F30); |
|
3408 __ ldf(FloatRegisterImpl::S, from, 4, F31); |
|
3409 __ fmov(FloatRegisterImpl::D, F30, F52); |
|
3410 __ ldf(FloatRegisterImpl::S, from, 8, F30); |
|
3411 __ ldf(FloatRegisterImpl::S, from, 12, F31); |
|
3412 __ fmov(FloatRegisterImpl::D, F30, F54); |
|
3413 |
|
3414 // load original key from SunJCE expanded decryption key |
|
3415 for ( int i = 0; i <= 3; i++ ) { |
|
3416 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); |
|
3417 } |
|
3418 |
|
3419 // 256-bit original key size |
|
3420 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); |
|
3421 |
|
3422 // 192-bit original key size |
|
3423 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); |
|
3424 |
|
3425 // 128-bit original key size |
|
3426 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions |
|
3427 for ( int i = 0; i <= 36; i += 4 ) { |
|
3428 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); |
|
3429 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); |
|
3430 } |
|
3431 |
|
3432 // perform 128-bit key specific inverse cipher transformation |
|
3433 __ fxor(FloatRegisterImpl::D, F42, F54, F54); |
|
3434 __ fxor(FloatRegisterImpl::D, F40, F52, F52); |
|
3435 __ br(Assembler::always, false, Assembler::pt, L_common_transform); |
|
3436 __ delayed()->nop(); |
|
3437 |
|
3438 __ BIND(L_expand192bit); |
|
3439 |
|
3440 // start loading rest of the 192-bit key |
|
3441 __ ldf(FloatRegisterImpl::S, original_key, 16, F4); |
|
3442 __ ldf(FloatRegisterImpl::S, original_key, 20, F5); |
|
3443 |
|
3444 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions |
|
3445 for ( int i = 0; i <= 36; i += 6 ) { |
|
3446 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); |
|
3447 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); |
|
3448 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); |
|
3449 } |
|
3450 __ aes_kexpand1(F42, F46, 7, F48); |
|
3451 __ aes_kexpand2(F44, F48, F50); |
|
3452 |
|
3453 // perform 192-bit key specific inverse cipher transformation |
|
3454 __ fxor(FloatRegisterImpl::D, F50, F54, F54); |
|
3455 __ fxor(FloatRegisterImpl::D, F48, F52, F52); |
|
3456 __ aes_dround23(F46, F52, F54, F58); |
|
3457 __ aes_dround01(F44, F52, F54, F56); |
|
3458 __ aes_dround23(F42, F56, F58, F54); |
|
3459 __ aes_dround01(F40, F56, F58, F52); |
|
3460 __ br(Assembler::always, false, Assembler::pt, L_common_transform); |
|
3461 __ delayed()->nop(); |
|
3462 |
|
3463 __ BIND(L_expand256bit); |
|
3464 |
|
3465 // load rest of the 256-bit key |
|
3466 for ( int i = 4; i <= 7; i++ ) { |
|
3467 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); |
|
3468 } |
|
3469 |
|
3470 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions |
|
3471 for ( int i = 0; i <= 40; i += 8 ) { |
|
3472 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); |
|
3473 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); |
|
3474 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); |
|
3475 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); |
|
3476 } |
|
3477 __ aes_kexpand1(F48, F54, 6, F56); |
|
3478 __ aes_kexpand2(F50, F56, F58); |
|
3479 |
|
3480 for ( int i = 0; i <= 6; i += 2 ) { |
|
3481 __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i)); |
|
3482 } |
|
3483 |
|
3484 // load input into F52-F54 |
|
3485 __ ldf(FloatRegisterImpl::D, from, 0, F52); |
|
3486 __ ldf(FloatRegisterImpl::D, from, 8, F54); |
|
3487 |
|
3488 // perform 256-bit key specific inverse cipher transformation |
|
3489 __ fxor(FloatRegisterImpl::D, F0, F54, F54); |
|
3490 __ fxor(FloatRegisterImpl::D, F2, F52, F52); |
|
3491 __ aes_dround23(F4, F52, F54, F58); |
|
3492 __ aes_dround01(F6, F52, F54, F56); |
|
3493 __ aes_dround23(F50, F56, F58, F54); |
|
3494 __ aes_dround01(F48, F56, F58, F52); |
|
3495 __ aes_dround23(F46, F52, F54, F58); |
|
3496 __ aes_dround01(F44, F52, F54, F56); |
|
3497 __ aes_dround23(F42, F56, F58, F54); |
|
3498 __ aes_dround01(F40, F56, F58, F52); |
|
3499 |
|
3500 for ( int i = 0; i <= 7; i++ ) { |
|
3501 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); |
|
3502 } |
|
3503 |
|
3504 // perform inverse cipher transformations common for all key sizes |
|
3505 __ BIND(L_common_transform); |
|
3506 for ( int i = 38; i >= 6; i -= 8 ) { |
|
3507 __ aes_dround23(as_FloatRegister(i), F52, F54, F58); |
|
3508 __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56); |
|
3509 if ( i != 6) { |
|
3510 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54); |
|
3511 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52); |
|
3512 } else { |
|
3513 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54); |
|
3514 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52); |
|
3515 } |
|
3516 } |
|
3517 |
|
3518 // store output to destination array, F0-F1 used as temp |
|
3519 __ fmov(FloatRegisterImpl::D, F52, F0); |
|
3520 __ stf(FloatRegisterImpl::S, F0, to, 0); |
|
3521 __ stf(FloatRegisterImpl::S, F1, to, 4); |
|
3522 __ fmov(FloatRegisterImpl::D, F54, F0); |
|
3523 __ stf(FloatRegisterImpl::S, F0, to, 8); |
|
3524 __ retl(); |
|
3525 __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); |
|
3526 |
|
3527 return start; |
|
3528 } |
|
3529 |
|
3530 address generate_cipherBlockChaining_encryptAESCrypt() { |
|
3531 __ align(CodeEntryAlignment); |
|
3532 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
|
3533 Label L_cbcenc128, L_cbcenc192, L_cbcenc256; |
|
3534 address start = __ pc(); |
|
3535 Register from = O0; // source byte array |
|
3536 Register to = O1; // destination byte array |
|
3537 Register key = O2; // expanded key array |
|
3538 Register rvec = O3; // init vector |
|
3539 const Register len_reg = O4; // cipher length |
|
3540 const Register keylen = O5; // reg for storing expanded key array length |
|
3541 |
|
3542 // save cipher len to return in the end |
|
3543 __ mov(len_reg, L1); |
|
3544 |
|
3545 // read expanded key length |
|
3546 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); |
|
3547 |
|
3548 // load init vector |
|
3549 __ ldf(FloatRegisterImpl::D, rvec, 0, F60); |
|
3550 __ ldf(FloatRegisterImpl::D, rvec, 8, F62); |
|
3551 __ ldx(key,0,G1); |
|
3552 __ ldx(key,8,G2); |
|
3553 |
|
3554 // start loading expanded key |
|
3555 for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) { |
|
3556 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); |
|
3557 } |
|
3558 |
|
3559 // 128-bit original key size |
|
3560 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128); |
|
3561 |
|
3562 for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) { |
|
3563 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); |
|
3564 } |
|
3565 |
|
3566 // 192-bit original key size |
|
3567 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192); |
|
3568 |
|
3569 for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) { |
|
3570 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); |
|
3571 } |
|
3572 |
|
3573 // 256-bit original key size |
|
3574 __ br(Assembler::always, false, Assembler::pt, L_cbcenc256); |
|
3575 __ delayed()->nop(); |
|
3576 |
|
3577 __ align(OptoLoopAlignment); |
|
3578 __ BIND(L_cbcenc128); |
|
3579 __ ldx(from,0,G3); |
|
3580 __ ldx(from,8,G4); |
|
3581 __ xor3(G1,G3,G3); |
|
3582 __ xor3(G2,G4,G4); |
|
3583 __ movxtod(G3,F56); |
|
3584 __ movxtod(G4,F58); |
|
3585 __ fxor(FloatRegisterImpl::D, F60, F56, F60); |
|
3586 __ fxor(FloatRegisterImpl::D, F62, F58, F62); |
|
3587 |
|
3588 // TEN_EROUNDS |
|
3589 for ( int i = 0; i <= 32; i += 8 ) { |
|
3590 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); |
|
3591 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); |
|
3592 if (i != 32 ) { |
|
3593 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); |
|
3594 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); |
|
3595 } else { |
|
3596 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); |
|
3597 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); |
|
3598 } |
|
3599 } |
|
3600 |
|
3601 __ stf(FloatRegisterImpl::D, F60, to, 0); |
|
3602 __ stf(FloatRegisterImpl::D, F62, to, 8); |
|
3603 __ add(from, 16, from); |
|
3604 __ add(to, 16, to); |
|
3605 __ subcc(len_reg, 16, len_reg); |
|
3606 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128); |
|
3607 __ delayed()->nop(); |
|
3608 __ stf(FloatRegisterImpl::D, F60, rvec, 0); |
|
3609 __ stf(FloatRegisterImpl::D, F62, rvec, 8); |
|
3610 __ retl(); |
|
3611 __ delayed()->mov(L1, O0); |
|
3612 |
|
3613 __ align(OptoLoopAlignment); |
|
3614 __ BIND(L_cbcenc192); |
|
3615 __ ldx(from,0,G3); |
|
3616 __ ldx(from,8,G4); |
|
3617 __ xor3(G1,G3,G3); |
|
3618 __ xor3(G2,G4,G4); |
|
3619 __ movxtod(G3,F56); |
|
3620 __ movxtod(G4,F58); |
|
3621 __ fxor(FloatRegisterImpl::D, F60, F56, F60); |
|
3622 __ fxor(FloatRegisterImpl::D, F62, F58, F62); |
|
3623 |
|
3624 // TWELEVE_EROUNDS |
|
3625 for ( int i = 0; i <= 40; i += 8 ) { |
|
3626 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); |
|
3627 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); |
|
3628 if (i != 40 ) { |
|
3629 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); |
|
3630 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); |
|
3631 } else { |
|
3632 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); |
|
3633 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); |
|
3634 } |
|
3635 } |
|
3636 |
|
3637 __ stf(FloatRegisterImpl::D, F60, to, 0); |
|
3638 __ stf(FloatRegisterImpl::D, F62, to, 8); |
|
3639 __ add(from, 16, from); |
|
3640 __ subcc(len_reg, 16, len_reg); |
|
3641 __ add(to, 16, to); |
|
3642 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192); |
|
3643 __ delayed()->nop(); |
|
3644 __ stf(FloatRegisterImpl::D, F60, rvec, 0); |
|
3645 __ stf(FloatRegisterImpl::D, F62, rvec, 8); |
|
3646 __ retl(); |
|
3647 __ delayed()->mov(L1, O0); |
|
3648 |
|
3649 __ align(OptoLoopAlignment); |
|
3650 __ BIND(L_cbcenc256); |
|
3651 __ ldx(from,0,G3); |
|
3652 __ ldx(from,8,G4); |
|
3653 __ xor3(G1,G3,G3); |
|
3654 __ xor3(G2,G4,G4); |
|
3655 __ movxtod(G3,F56); |
|
3656 __ movxtod(G4,F58); |
|
3657 __ fxor(FloatRegisterImpl::D, F60, F56, F60); |
|
3658 __ fxor(FloatRegisterImpl::D, F62, F58, F62); |
|
3659 |
|
3660 // FOURTEEN_EROUNDS |
|
3661 for ( int i = 0; i <= 48; i += 8 ) { |
|
3662 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); |
|
3663 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); |
|
3664 if (i != 48 ) { |
|
3665 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); |
|
3666 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); |
|
3667 } else { |
|
3668 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); |
|
3669 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); |
|
3670 } |
|
3671 } |
|
3672 |
|
3673 __ stf(FloatRegisterImpl::D, F60, to, 0); |
|
3674 __ stf(FloatRegisterImpl::D, F62, to, 8); |
|
3675 __ add(from, 16, from); |
|
3676 __ subcc(len_reg, 16, len_reg); |
|
3677 __ add(to, 16, to); |
|
3678 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256); |
|
3679 __ delayed()->nop(); |
|
3680 __ stf(FloatRegisterImpl::D, F60, rvec, 0); |
|
3681 __ stf(FloatRegisterImpl::D, F62, rvec, 8); |
|
3682 __ retl(); |
|
3683 __ delayed()->mov(L1, O0); |
|
3684 |
|
3685 return start; |
|
3686 } |
|
3687 |
|
3688 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { |
|
3689 __ align(CodeEntryAlignment); |
|
3690 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
|
3691 Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start; |
|
3692 Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256; |
|
3693 address start = __ pc(); |
|
3694 Register from = I0; // source byte array |
|
3695 Register to = I1; // destination byte array |
|
3696 Register key = I2; // expanded key array |
|
3697 Register rvec = I3; // init vector |
|
3698 const Register len_reg = I4; // cipher length |
|
3699 const Register original_key = I5; // original key array only required during decryption |
|
3700 const Register keylen = L6; // reg for storing expanded key array length |
|
3701 |
|
3702 // save cipher len before save_frame, to return in the end |
|
3703 __ mov(O4, L0); |
|
3704 __ save_frame(0); //args are read from I* registers since we save the frame in the beginning |
|
3705 |
|
3706 // load original key from SunJCE expanded decryption key |
|
3707 for ( int i = 0; i <= 3; i++ ) { |
|
3708 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); |
|
3709 } |
|
3710 |
|
3711 // load initial vector |
|
3712 __ ldx(rvec,0,L0); |
|
3713 __ ldx(rvec,8,L1); |
|
3714 |
|
3715 // read expanded key array length |
|
3716 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); |
|
3717 |
|
3718 // 256-bit original key size |
|
3719 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); |
|
3720 |
|
3721 // 192-bit original key size |
|
3722 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); |
|
3723 |
|
3724 // 128-bit original key size |
|
3725 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions |
|
3726 for ( int i = 0; i <= 36; i += 4 ) { |
|
3727 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); |
|
3728 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); |
|
3729 } |
|
3730 |
|
3731 // load expanded key[last-1] and key[last] elements |
|
3732 __ movdtox(F40,L2); |
|
3733 __ movdtox(F42,L3); |
|
3734 |
|
3735 __ and3(len_reg, 16, L4); |
|
3736 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128); |
|
3737 __ delayed()->nop(); |
|
3738 |
|
3739 __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); |
|
3740 __ delayed()->nop(); |
|
3741 |
|
3742 __ BIND(L_expand192bit); |
|
3743 // load rest of the 192-bit key |
|
3744 __ ldf(FloatRegisterImpl::S, original_key, 16, F4); |
|
3745 __ ldf(FloatRegisterImpl::S, original_key, 20, F5); |
|
3746 |
|
3747 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions |
|
3748 for ( int i = 0; i <= 36; i += 6 ) { |
|
3749 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); |
|
3750 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); |
|
3751 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); |
|
3752 } |
|
3753 __ aes_kexpand1(F42, F46, 7, F48); |
|
3754 __ aes_kexpand2(F44, F48, F50); |
|
3755 |
|
3756 // load expanded key[last-1] and key[last] elements |
|
3757 __ movdtox(F48,L2); |
|
3758 __ movdtox(F50,L3); |
|
3759 |
|
3760 __ and3(len_reg, 16, L4); |
|
3761 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192); |
|
3762 __ delayed()->nop(); |
|
3763 |
|
3764 __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); |
|
3765 __ delayed()->nop(); |
|
3766 |
|
3767 __ BIND(L_expand256bit); |
|
3768 // load rest of the 256-bit key |
|
3769 for ( int i = 4; i <= 7; i++ ) { |
|
3770 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); |
|
3771 } |
|
3772 |
|
3773 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions |
|
3774 for ( int i = 0; i <= 40; i += 8 ) { |
|
3775 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); |
|
3776 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); |
|
3777 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); |
|
3778 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); |
|
3779 } |
|
3780 __ aes_kexpand1(F48, F54, 6, F56); |
|
3781 __ aes_kexpand2(F50, F56, F58); |
|
3782 |
|
3783 // load expanded key[last-1] and key[last] elements |
|
3784 __ movdtox(F56,L2); |
|
3785 __ movdtox(F58,L3); |
|
3786 |
|
3787 __ and3(len_reg, 16, L4); |
|
3788 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256); |
|
3789 __ delayed()->nop(); |
|
3790 |
|
3791 __ BIND(L_dec_first_block_start); |
|
3792 __ ldx(from,0,L4); |
|
3793 __ ldx(from,8,L5); |
|
3794 __ xor3(L2,L4,G1); |
|
3795 __ movxtod(G1,F60); |
|
3796 __ xor3(L3,L5,G1); |
|
3797 __ movxtod(G1,F62); |
|
3798 |
|
3799 // 128-bit original key size |
|
3800 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128); |
|
3801 |
|
3802 // 192-bit original key size |
|
3803 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192); |
|
3804 |
|
3805 __ aes_dround23(F54, F60, F62, F58); |
|
3806 __ aes_dround01(F52, F60, F62, F56); |
|
3807 __ aes_dround23(F50, F56, F58, F62); |
|
3808 __ aes_dround01(F48, F56, F58, F60); |
|
3809 |
|
3810 __ BIND(L_dec_first_block192); |
|
3811 __ aes_dround23(F46, F60, F62, F58); |
|
3812 __ aes_dround01(F44, F60, F62, F56); |
|
3813 __ aes_dround23(F42, F56, F58, F62); |
|
3814 __ aes_dround01(F40, F56, F58, F60); |
|
3815 |
|
3816 __ BIND(L_dec_first_block128); |
|
3817 for ( int i = 38; i >= 6; i -= 8 ) { |
|
3818 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); |
|
3819 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); |
|
3820 if ( i != 6) { |
|
3821 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); |
|
3822 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); |
|
3823 } else { |
|
3824 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); |
|
3825 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); |
|
3826 } |
|
3827 } |
|
3828 |
|
3829 __ movxtod(L0,F56); |
|
3830 __ movxtod(L1,F58); |
|
3831 __ mov(L4,L0); |
|
3832 __ mov(L5,L1); |
|
3833 __ fxor(FloatRegisterImpl::D, F56, F60, F60); |
|
3834 __ fxor(FloatRegisterImpl::D, F58, F62, F62); |
|
3835 |
|
3836 __ stf(FloatRegisterImpl::D, F60, to, 0); |
|
3837 __ stf(FloatRegisterImpl::D, F62, to, 8); |
|
3838 |
|
3839 __ add(from, 16, from); |
|
3840 __ add(to, 16, to); |
|
3841 __ subcc(len_reg, 16, len_reg); |
|
3842 __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end); |
|
3843 __ delayed()->nop(); |
|
3844 |
|
3845 // 256-bit original key size |
|
3846 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256); |
|
3847 |
|
3848 // 192-bit original key size |
|
3849 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192); |
|
3850 |
|
3851 __ align(OptoLoopAlignment); |
|
3852 __ BIND(L_dec_next2_blocks128); |
|
3853 __ nop(); |
|
3854 |
|
3855 // F40:F42 used for first 16-bytes |
|
3856 __ ldx(from,0,G4); |
|
3857 __ ldx(from,8,G5); |
|
3858 __ xor3(L2,G4,G1); |
|
3859 __ movxtod(G1,F40); |
|
3860 __ xor3(L3,G5,G1); |
|
3861 __ movxtod(G1,F42); |
|
3862 |
|
3863 // F60:F62 used for next 16-bytes |
|
3864 __ ldx(from,16,L4); |
|
3865 __ ldx(from,24,L5); |
|
3866 __ xor3(L2,L4,G1); |
|
3867 __ movxtod(G1,F60); |
|
3868 __ xor3(L3,L5,G1); |
|
3869 __ movxtod(G1,F62); |
|
3870 |
|
3871 for ( int i = 38; i >= 6; i -= 8 ) { |
|
3872 __ aes_dround23(as_FloatRegister(i), F40, F42, F44); |
|
3873 __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46); |
|
3874 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); |
|
3875 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); |
|
3876 if (i != 6 ) { |
|
3877 __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42); |
|
3878 __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40); |
|
3879 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); |
|
3880 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); |
|
3881 } else { |
|
3882 __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42); |
|
3883 __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40); |
|
3884 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); |
|
3885 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); |
|
3886 } |
|
3887 } |
|
3888 |
|
3889 __ movxtod(L0,F46); |
|
3890 __ movxtod(L1,F44); |
|
3891 __ fxor(FloatRegisterImpl::D, F46, F40, F40); |
|
3892 __ fxor(FloatRegisterImpl::D, F44, F42, F42); |
|
3893 |
|
3894 __ stf(FloatRegisterImpl::D, F40, to, 0); |
|
3895 __ stf(FloatRegisterImpl::D, F42, to, 8); |
|
3896 |
|
3897 __ movxtod(G4,F56); |
|
3898 __ movxtod(G5,F58); |
|
3899 __ mov(L4,L0); |
|
3900 __ mov(L5,L1); |
|
3901 __ fxor(FloatRegisterImpl::D, F56, F60, F60); |
|
3902 __ fxor(FloatRegisterImpl::D, F58, F62, F62); |
|
3903 |
|
3904 __ stf(FloatRegisterImpl::D, F60, to, 16); |
|
3905 __ stf(FloatRegisterImpl::D, F62, to, 24); |
|
3906 |
|
3907 __ add(from, 32, from); |
|
3908 __ add(to, 32, to); |
|
3909 __ subcc(len_reg, 32, len_reg); |
|
3910 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128); |
|
3911 __ delayed()->nop(); |
|
3912 __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); |
|
3913 __ delayed()->nop(); |
|
3914 |
|
3915 __ align(OptoLoopAlignment); |
|
3916 __ BIND(L_dec_next2_blocks192); |
|
3917 __ nop(); |
|
3918 |
|
3919 // F48:F50 used for first 16-bytes |
|
3920 __ ldx(from,0,G4); |
|
3921 __ ldx(from,8,G5); |
|
3922 __ xor3(L2,G4,G1); |
|
3923 __ movxtod(G1,F48); |
|
3924 __ xor3(L3,G5,G1); |
|
3925 __ movxtod(G1,F50); |
|
3926 |
|
3927 // F60:F62 used for next 16-bytes |
|
3928 __ ldx(from,16,L4); |
|
3929 __ ldx(from,24,L5); |
|
3930 __ xor3(L2,L4,G1); |
|
3931 __ movxtod(G1,F60); |
|
3932 __ xor3(L3,L5,G1); |
|
3933 __ movxtod(G1,F62); |
|
3934 |
|
3935 for ( int i = 46; i >= 6; i -= 8 ) { |
|
3936 __ aes_dround23(as_FloatRegister(i), F48, F50, F52); |
|
3937 __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54); |
|
3938 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); |
|
3939 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); |
|
3940 if (i != 6 ) { |
|
3941 __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50); |
|
3942 __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48); |
|
3943 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); |
|
3944 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); |
|
3945 } else { |
|
3946 __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50); |
|
3947 __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48); |
|
3948 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); |
|
3949 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); |
|
3950 } |
|
3951 } |
|
3952 |
|
3953 __ movxtod(L0,F54); |
|
3954 __ movxtod(L1,F52); |
|
3955 __ fxor(FloatRegisterImpl::D, F54, F48, F48); |
|
3956 __ fxor(FloatRegisterImpl::D, F52, F50, F50); |
|
3957 |
|
3958 __ stf(FloatRegisterImpl::D, F48, to, 0); |
|
3959 __ stf(FloatRegisterImpl::D, F50, to, 8); |
|
3960 |
|
3961 __ movxtod(G4,F56); |
|
3962 __ movxtod(G5,F58); |
|
3963 __ mov(L4,L0); |
|
3964 __ mov(L5,L1); |
|
3965 __ fxor(FloatRegisterImpl::D, F56, F60, F60); |
|
3966 __ fxor(FloatRegisterImpl::D, F58, F62, F62); |
|
3967 |
|
3968 __ stf(FloatRegisterImpl::D, F60, to, 16); |
|
3969 __ stf(FloatRegisterImpl::D, F62, to, 24); |
|
3970 |
|
3971 __ add(from, 32, from); |
|
3972 __ add(to, 32, to); |
|
3973 __ subcc(len_reg, 32, len_reg); |
|
3974 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192); |
|
3975 __ delayed()->nop(); |
|
3976 __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); |
|
3977 __ delayed()->nop(); |
|
3978 |
|
3979 __ align(OptoLoopAlignment); |
|
3980 __ BIND(L_dec_next2_blocks256); |
|
3981 __ nop(); |
|
3982 |
|
3983 // F0:F2 used for first 16-bytes |
|
3984 __ ldx(from,0,G4); |
|
3985 __ ldx(from,8,G5); |
|
3986 __ xor3(L2,G4,G1); |
|
3987 __ movxtod(G1,F0); |
|
3988 __ xor3(L3,G5,G1); |
|
3989 __ movxtod(G1,F2); |
|
3990 |
|
3991 // F60:F62 used for next 16-bytes |
|
3992 __ ldx(from,16,L4); |
|
3993 __ ldx(from,24,L5); |
|
3994 __ xor3(L2,L4,G1); |
|
3995 __ movxtod(G1,F60); |
|
3996 __ xor3(L3,L5,G1); |
|
3997 __ movxtod(G1,F62); |
|
3998 |
|
3999 __ aes_dround23(F54, F0, F2, F4); |
|
4000 __ aes_dround01(F52, F0, F2, F6); |
|
4001 __ aes_dround23(F54, F60, F62, F58); |
|
4002 __ aes_dround01(F52, F60, F62, F56); |
|
4003 __ aes_dround23(F50, F6, F4, F2); |
|
4004 __ aes_dround01(F48, F6, F4, F0); |
|
4005 __ aes_dround23(F50, F56, F58, F62); |
|
4006 __ aes_dround01(F48, F56, F58, F60); |
|
4007 // save F48:F54 in temp registers |
|
4008 __ movdtox(F54,G2); |
|
4009 __ movdtox(F52,G3); |
|
4010 __ movdtox(F50,G6); |
|
4011 __ movdtox(F48,G1); |
|
4012 for ( int i = 46; i >= 14; i -= 8 ) { |
|
4013 __ aes_dround23(as_FloatRegister(i), F0, F2, F4); |
|
4014 __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6); |
|
4015 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); |
|
4016 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); |
|
4017 __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2); |
|
4018 __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0); |
|
4019 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); |
|
4020 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); |
|
4021 } |
|
4022 // init F48:F54 with F0:F6 values (original key) |
|
4023 __ ldf(FloatRegisterImpl::D, original_key, 0, F48); |
|
4024 __ ldf(FloatRegisterImpl::D, original_key, 8, F50); |
|
4025 __ ldf(FloatRegisterImpl::D, original_key, 16, F52); |
|
4026 __ ldf(FloatRegisterImpl::D, original_key, 24, F54); |
|
4027 __ aes_dround23(F54, F0, F2, F4); |
|
4028 __ aes_dround01(F52, F0, F2, F6); |
|
4029 __ aes_dround23(F54, F60, F62, F58); |
|
4030 __ aes_dround01(F52, F60, F62, F56); |
|
4031 __ aes_dround23_l(F50, F6, F4, F2); |
|
4032 __ aes_dround01_l(F48, F6, F4, F0); |
|
4033 __ aes_dround23_l(F50, F56, F58, F62); |
|
4034 __ aes_dround01_l(F48, F56, F58, F60); |
|
4035 // re-init F48:F54 with their original values |
|
4036 __ movxtod(G2,F54); |
|
4037 __ movxtod(G3,F52); |
|
4038 __ movxtod(G6,F50); |
|
4039 __ movxtod(G1,F48); |
|
4040 |
|
4041 __ movxtod(L0,F6); |
|
4042 __ movxtod(L1,F4); |
|
4043 __ fxor(FloatRegisterImpl::D, F6, F0, F0); |
|
4044 __ fxor(FloatRegisterImpl::D, F4, F2, F2); |
|
4045 |
|
4046 __ stf(FloatRegisterImpl::D, F0, to, 0); |
|
4047 __ stf(FloatRegisterImpl::D, F2, to, 8); |
|
4048 |
|
4049 __ movxtod(G4,F56); |
|
4050 __ movxtod(G5,F58); |
|
4051 __ mov(L4,L0); |
|
4052 __ mov(L5,L1); |
|
4053 __ fxor(FloatRegisterImpl::D, F56, F60, F60); |
|
4054 __ fxor(FloatRegisterImpl::D, F58, F62, F62); |
|
4055 |
|
4056 __ stf(FloatRegisterImpl::D, F60, to, 16); |
|
4057 __ stf(FloatRegisterImpl::D, F62, to, 24); |
|
4058 |
|
4059 __ add(from, 32, from); |
|
4060 __ add(to, 32, to); |
|
4061 __ subcc(len_reg, 32, len_reg); |
|
4062 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256); |
|
4063 __ delayed()->nop(); |
|
4064 |
|
4065 __ BIND(L_cbcdec_end); |
|
4066 __ stx(L0, rvec, 0); |
|
4067 __ stx(L1, rvec, 8); |
|
4068 __ restore(); |
|
4069 __ mov(L0, O0); |
|
4070 __ retl(); |
|
4071 __ delayed()->nop(); |
|
4072 |
|
4073 return start; |
|
4074 } |
|
4075 |
3307 void generate_initial() { |
4076 void generate_initial() { |
3308 // Generates all stubs and initializes the entry points |
4077 // Generates all stubs and initializes the entry points |
3309 |
4078 |
3310 //------------------------------------------------------------------------------------------------------------------------ |
4079 //------------------------------------------------------------------------------------------------------------------------ |
3311 // entry points that exist in all platforms |
4080 // entry points that exist in all platforms |