Wed, 30 Apr 2014 14:14:01 -0700
8035936: SIGBUS in StubRoutines::aesencryptBlock, solaris-sparc
Summary: Fix the arbitrary alignment issue in SPARC AES crypto stub routines.
Reviewed-by: kvn, iveresov
Contributed-by: shrinivas.joshi@oracle.com
1.1 --- a/src/cpu/sparc/vm/assembler_sparc.hpp Thu May 01 15:02:46 2014 -0700 1.2 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Wed Apr 30 14:14:01 2014 -0700 1.3 @@ -1,5 +1,5 @@ 1.4 /* 1.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 1.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 * 1.9 * This code is free software; you can redistribute it and/or modify it 1.10 @@ -123,8 +123,13 @@ 1.11 fpop2_op3 = 0x35, 1.12 impdep1_op3 = 0x36, 1.13 aes3_op3 = 0x36, 1.14 + alignaddr_op3 = 0x36, 1.15 + faligndata_op3 = 0x36, 1.16 flog3_op3 = 0x36, 1.17 + edge_op3 = 0x36, 1.18 + fsrc_op3 = 0x36, 1.19 impdep2_op3 = 0x37, 1.20 + stpartialf_op3 = 0x37, 1.21 jmpl_op3 = 0x38, 1.22 rett_op3 = 0x39, 1.23 trap_op3 = 0x3a, 1.24 @@ -175,17 +180,23 @@ 1.25 1.26 enum opfs { 1.27 // selected opfs 1.28 + edge8n_opf = 0x01, 1.29 + 1.30 fmovs_opf = 0x01, 1.31 fmovd_opf = 0x02, 1.32 1.33 fnegs_opf = 0x05, 1.34 fnegd_opf = 0x06, 1.35 1.36 + alignaddr_opf = 0x18, 1.37 + 1.38 fadds_opf = 0x41, 1.39 faddd_opf = 0x42, 1.40 fsubs_opf = 0x45, 1.41 fsubd_opf = 0x46, 1.42 1.43 + faligndata_opf = 0x48, 1.44 + 1.45 fmuls_opf = 0x49, 1.46 fmuld_opf = 0x4a, 1.47 fdivs_opf = 0x4d, 1.48 @@ -348,6 +359,8 @@ 1.49 ASI_PRIMARY = 0x80, 1.50 ASI_PRIMARY_NOFAULT = 0x82, 1.51 ASI_PRIMARY_LITTLE = 0x88, 1.52 + // 8x8-bit partial store 1.53 + ASI_PST8_PRIMARY = 0xC0, 1.54 // Block initializing store 1.55 ASI_ST_BLKINIT_PRIMARY = 0xE2, 1.56 // Most-Recently-Used (MRU) BIS variant 1.57 @@ -585,6 +598,9 @@ 1.58 // instruction only in VIS1 1.59 static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); } 1.60 1.61 + // instruction only in VIS2 1.62 + static void vis2_only() { assert( VM_Version::has_vis2(), "This instruction only works on SPARC with VIS2"); } 1.63 + 1.64 // instruction only in VIS3 1.65 static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); } 1.66 1.67 @@ -1164,6 +1180,20 @@ 1.68 inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); } 1.69 1.70 1.71 + // VIS1 instructions 1.72 + 1.73 + void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); } 1.74 + 1.75 + void faligndata( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); } 1.76 + 1.77 + void fsrc2( FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); } 1.78 + 1.79 + void stpartialf( Register s1, Register s2, FloatRegister d, int ia = -1 ) { vis1_only(); emit_int32( op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); } 1.80 + 1.81 + // VIS2 instructions 1.82 + 1.83 + void edge8n( Register s1, Register s2, Register d ) { vis2_only(); emit_int32( op(arith_op) | rd(d) | op3(edge_op3) | rs1(s1) | opf(edge8n_opf) | rs2(s2)); } 1.84 + 1.85 // VIS3 instructions 1.86 1.87 void movstosw( FloatRegister s, Register d ) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
2.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu May 01 15:02:46 2014 -0700 2.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Apr 30 14:14:01 2014 -0700 2.3 @@ -1,5 +1,5 @@ 2.4 /* 2.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 2.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 2.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.8 * 2.9 * This code is free software; you can redistribute it and/or modify it 2.10 @@ -3305,9 +3305,12 @@ 2.11 } 2.12 2.13 address generate_aescrypt_encryptBlock() { 2.14 + // required since we read expanded key 'int' array starting first element without alignment considerations 2.15 + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, 2.16 + "the following code assumes that first element of an int array is aligned to 8 bytes"); 2.17 __ align(CodeEntryAlignment); 2.18 - StubCodeMark mark(this, "StubRoutines", "aesencryptBlock"); 2.19 - Label L_doLast128bit, L_storeOutput; 2.20 + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 2.21 + Label L_load_misaligned_input, L_load_expanded_key, L_doLast128bit, L_storeOutput, L_store_misaligned_output; 2.22 address start = __ pc(); 2.23 Register from = O0; // source byte array 2.24 Register to = O1; // destination byte array 2.25 @@ -3317,15 +3320,33 @@ 2.26 // read expanded key length 2.27 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); 2.28 2.29 - // load input into F54-F56; F30-F31 used as temp 2.30 - __ ldf(FloatRegisterImpl::S, from, 0, F30); 2.31 - __ ldf(FloatRegisterImpl::S, from, 4, F31); 2.32 - __ fmov(FloatRegisterImpl::D, F30, F54); 2.33 - __ ldf(FloatRegisterImpl::S, from, 8, F30); 2.34 - __ ldf(FloatRegisterImpl::S, from, 12, F31); 2.35 - __ fmov(FloatRegisterImpl::D, F30, F56); 2.36 - 2.37 - // load expanded key 2.38 + // Method to address arbitrary alignment for load instructions: 2.39 + // Check last 3 bits of 'from' address to see if it is aligned to 8-byte boundary 2.40 + // If zero/aligned then continue with double FP load instructions 2.41 + // If not zero/mis-aligned then alignaddr will set GSR.align with number of bytes to skip during faligndata 2.42 + // alignaddr will also convert arbitrary aligned 'from' address to nearest 8-byte aligned address 2.43 + // load 3 * 8-byte components (to read 16 bytes input) in 3 different FP regs starting at this aligned address 2.44 + // faligndata will then extract (based on GSR.align value) the appropriate 8 bytes from the 2 source regs 2.45 + 2.46 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.47 + __ andcc(from, 7, G0); 2.48 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input); 2.49 + __ delayed()->alignaddr(from, G0, from); 2.50 + 2.51 + // aligned case: load input into F54-F56 2.52 + __ ldf(FloatRegisterImpl::D, from, 0, F54); 2.53 + __ ldf(FloatRegisterImpl::D, from, 8, F56); 2.54 + __ ba_short(L_load_expanded_key); 2.55 + 2.56 + __ BIND(L_load_misaligned_input); 2.57 + __ ldf(FloatRegisterImpl::D, from, 0, F54); 2.58 + __ ldf(FloatRegisterImpl::D, from, 8, F56); 2.59 + __ ldf(FloatRegisterImpl::D, from, 16, F58); 2.60 + __ faligndata(F54, F56, F54); 2.61 + __ faligndata(F56, F58, F56); 2.62 + 2.63 + __ BIND(L_load_expanded_key); 2.64 + // Since we load expanded key buffers starting first element, 8-byte alignment is guaranteed 2.65 for ( int i = 0; i <= 38; i += 2 ) { 2.66 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i)); 2.67 } 2.68 @@ -3365,8 +3386,7 @@ 2.69 __ ldf(FloatRegisterImpl::D, key, 232, F50); 2.70 __ aes_eround01(F52, F54, F56, F58); //round 13 2.71 __ aes_eround23(F46, F54, F56, F60); 2.72 - __ br(Assembler::always, false, Assembler::pt, L_storeOutput); 2.73 - __ delayed()->nop(); 2.74 + __ ba_short(L_storeOutput); 2.75 2.76 __ BIND(L_doLast128bit); 2.77 __ ldf(FloatRegisterImpl::D, key, 160, F48); 2.78 @@ -3377,23 +3397,62 @@ 2.79 __ aes_eround01_l(F48, F58, F60, F54); //last round 2.80 __ aes_eround23_l(F50, F58, F60, F56); 2.81 2.82 - // store output into the destination array, F0-F1 used as temp 2.83 - __ fmov(FloatRegisterImpl::D, F54, F0); 2.84 - __ stf(FloatRegisterImpl::S, F0, to, 0); 2.85 - __ stf(FloatRegisterImpl::S, F1, to, 4); 2.86 - __ fmov(FloatRegisterImpl::D, F56, F0); 2.87 - __ stf(FloatRegisterImpl::S, F0, to, 8); 2.88 + // Method to address arbitrary alignment for store instructions: 2.89 + // Check last 3 bits of 'dest' address to see if it is aligned to 8-byte boundary 2.90 + // If zero/aligned then continue with double FP store instructions 2.91 + // If not zero/mis-aligned then edge8n will generate edge mask in result reg (O3 in below case) 2.92 + // Example: If dest address is 0x07 and nearest 8-byte aligned address is 0x00 then edge mask will be 00000001 2.93 + // Compute (8-n) where n is # of bytes skipped by partial store(stpartialf) inst from edge mask, n=7 in this case 2.94 + // We get the value of n from the andcc that checks 'dest' alignment. n is available in O5 in below case. 2.95 + // Set GSR.align to (8-n) using alignaddr 2.96 + // Circular byte shift store values by n places so that the original bytes are at correct position for stpartialf 2.97 + // Set the arbitrarily aligned 'dest' address to nearest 8-byte aligned address 2.98 + // Store (partial) the original first (8-n) bytes starting at the original 'dest' address 2.99 + // Negate the edge mask so that the subsequent stpartialf can store the original (8-n-1)th through 8th bytes at appropriate address 2.100 + // We need to execute this process for both the 8-byte result values 2.101 + 2.102 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.103 + __ andcc(to, 7, O5); 2.104 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output); 2.105 + __ delayed()->edge8n(to, G0, O3); 2.106 + 2.107 + // aligned case: store output into the destination array 2.108 + __ stf(FloatRegisterImpl::D, F54, to, 0); 2.109 __ retl(); 2.110 - __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); 2.111 + __ delayed()->stf(FloatRegisterImpl::D, F56, to, 8); 2.112 + 2.113 + __ BIND(L_store_misaligned_output); 2.114 + __ add(to, 8, O4); 2.115 + __ mov(8, O2); 2.116 + __ sub(O2, O5, O2); 2.117 + __ alignaddr(O2, G0, O2); 2.118 + __ faligndata(F54, F54, F54); 2.119 + __ faligndata(F56, F56, F56); 2.120 + __ and3(to, -8, to); 2.121 + __ and3(O4, -8, O4); 2.122 + __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY); 2.123 + __ stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY); 2.124 + __ add(to, 8, to); 2.125 + __ add(O4, 8, O4); 2.126 + __ orn(G0, O3, O3); 2.127 + __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY); 2.128 + __ retl(); 2.129 + __ delayed()->stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY); 2.130 2.131 return start; 2.132 } 2.133 2.134 address generate_aescrypt_decryptBlock() { 2.135 + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, 2.136 + "the following code assumes that first element of an int array is aligned to 8 bytes"); 2.137 + // required since we read original key 'byte' array as well in the decryption stubs 2.138 + assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, 2.139 + "the following code assumes that first element of a byte array is aligned to 8 bytes"); 2.140 __ align(CodeEntryAlignment); 2.141 - StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock"); 2.142 + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 2.143 address start = __ pc(); 2.144 - Label L_expand192bit, L_expand256bit, L_common_transform; 2.145 + Label L_load_misaligned_input, L_load_original_key, L_expand192bit, L_expand256bit, L_reload_misaligned_input; 2.146 + Label L_256bit_transform, L_common_transform, L_store_misaligned_output; 2.147 Register from = O0; // source byte array 2.148 Register to = O1; // destination byte array 2.149 Register key = O2; // expanded key array 2.150 @@ -3403,15 +3462,29 @@ 2.151 // read expanded key array length 2.152 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); 2.153 2.154 - // load input into F52-F54; F30,F31 used as temp 2.155 - __ ldf(FloatRegisterImpl::S, from, 0, F30); 2.156 - __ ldf(FloatRegisterImpl::S, from, 4, F31); 2.157 - __ fmov(FloatRegisterImpl::D, F30, F52); 2.158 - __ ldf(FloatRegisterImpl::S, from, 8, F30); 2.159 - __ ldf(FloatRegisterImpl::S, from, 12, F31); 2.160 - __ fmov(FloatRegisterImpl::D, F30, F54); 2.161 - 2.162 + // save 'from' since we may need to recheck alignment in case of 256-bit decryption 2.163 + __ mov(from, G1); 2.164 + 2.165 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.166 + __ andcc(from, 7, G0); 2.167 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input); 2.168 + __ delayed()->alignaddr(from, G0, from); 2.169 + 2.170 + // aligned case: load input into F52-F54 2.171 + __ ldf(FloatRegisterImpl::D, from, 0, F52); 2.172 + __ ldf(FloatRegisterImpl::D, from, 8, F54); 2.173 + __ ba_short(L_load_original_key); 2.174 + 2.175 + __ BIND(L_load_misaligned_input); 2.176 + __ ldf(FloatRegisterImpl::D, from, 0, F52); 2.177 + __ ldf(FloatRegisterImpl::D, from, 8, F54); 2.178 + __ ldf(FloatRegisterImpl::D, from, 16, F56); 2.179 + __ faligndata(F52, F54, F52); 2.180 + __ faligndata(F54, F56, F54); 2.181 + 2.182 + __ BIND(L_load_original_key); 2.183 // load original key from SunJCE expanded decryption key 2.184 + // Since we load original key buffer starting first element, 8-byte alignment is guaranteed 2.185 for ( int i = 0; i <= 3; i++ ) { 2.186 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); 2.187 } 2.188 @@ -3432,8 +3505,7 @@ 2.189 // perform 128-bit key specific inverse cipher transformation 2.190 __ fxor(FloatRegisterImpl::D, F42, F54, F54); 2.191 __ fxor(FloatRegisterImpl::D, F40, F52, F52); 2.192 - __ br(Assembler::always, false, Assembler::pt, L_common_transform); 2.193 - __ delayed()->nop(); 2.194 + __ ba_short(L_common_transform); 2.195 2.196 __ BIND(L_expand192bit); 2.197 2.198 @@ -3457,8 +3529,7 @@ 2.199 __ aes_dround01(F44, F52, F54, F56); 2.200 __ aes_dround23(F42, F56, F58, F54); 2.201 __ aes_dround01(F40, F56, F58, F52); 2.202 - __ br(Assembler::always, false, Assembler::pt, L_common_transform); 2.203 - __ delayed()->nop(); 2.204 + __ ba_short(L_common_transform); 2.205 2.206 __ BIND(L_expand256bit); 2.207 2.208 @@ -3478,14 +3549,31 @@ 2.209 __ aes_kexpand2(F50, F56, F58); 2.210 2.211 for ( int i = 0; i <= 6; i += 2 ) { 2.212 - __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i)); 2.213 + __ fsrc2(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i)); 2.214 } 2.215 2.216 - // load input into F52-F54 2.217 + // reload original 'from' address 2.218 + __ mov(G1, from); 2.219 + 2.220 + // re-check 8-byte alignment 2.221 + __ andcc(from, 7, G0); 2.222 + __ br(Assembler::notZero, true, Assembler::pn, L_reload_misaligned_input); 2.223 + __ delayed()->alignaddr(from, G0, from); 2.224 + 2.225 + // aligned case: load input into F52-F54 2.226 __ ldf(FloatRegisterImpl::D, from, 0, F52); 2.227 __ ldf(FloatRegisterImpl::D, from, 8, F54); 2.228 + __ ba_short(L_256bit_transform); 2.229 + 2.230 + __ BIND(L_reload_misaligned_input); 2.231 + __ ldf(FloatRegisterImpl::D, from, 0, F52); 2.232 + __ ldf(FloatRegisterImpl::D, from, 8, F54); 2.233 + __ ldf(FloatRegisterImpl::D, from, 16, F56); 2.234 + __ faligndata(F52, F54, F52); 2.235 + __ faligndata(F54, F56, F54); 2.236 2.237 // perform 256-bit key specific inverse cipher transformation 2.238 + __ BIND(L_256bit_transform); 2.239 __ fxor(FloatRegisterImpl::D, F0, F54, F54); 2.240 __ fxor(FloatRegisterImpl::D, F2, F52, F52); 2.241 __ aes_dround23(F4, F52, F54, F58); 2.242 @@ -3515,43 +3603,71 @@ 2.243 } 2.244 } 2.245 2.246 - // store output to destination array, F0-F1 used as temp 2.247 - __ fmov(FloatRegisterImpl::D, F52, F0); 2.248 - __ stf(FloatRegisterImpl::S, F0, to, 0); 2.249 - __ stf(FloatRegisterImpl::S, F1, to, 4); 2.250 - __ fmov(FloatRegisterImpl::D, F54, F0); 2.251 - __ stf(FloatRegisterImpl::S, F0, to, 8); 2.252 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.253 + __ andcc(to, 7, O5); 2.254 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output); 2.255 + __ delayed()->edge8n(to, G0, O3); 2.256 + 2.257 + // aligned case: store output into the destination array 2.258 + __ stf(FloatRegisterImpl::D, F52, to, 0); 2.259 __ retl(); 2.260 - __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); 2.261 + __ delayed()->stf(FloatRegisterImpl::D, F54, to, 8); 2.262 + 2.263 + __ BIND(L_store_misaligned_output); 2.264 + __ add(to, 8, O4); 2.265 + __ mov(8, O2); 2.266 + __ sub(O2, O5, O2); 2.267 + __ alignaddr(O2, G0, O2); 2.268 + __ faligndata(F52, F52, F52); 2.269 + __ faligndata(F54, F54, F54); 2.270 + __ and3(to, -8, to); 2.271 + __ and3(O4, -8, O4); 2.272 + __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY); 2.273 + __ stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY); 2.274 + __ add(to, 8, to); 2.275 + __ add(O4, 8, O4); 2.276 + __ orn(G0, O3, O3); 2.277 + __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY); 2.278 + __ retl(); 2.279 + __ delayed()->stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY); 2.280 2.281 return start; 2.282 } 2.283 2.284 address generate_cipherBlockChaining_encryptAESCrypt() { 2.285 + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, 2.286 + "the following code assumes that first element of an int array is aligned to 8 bytes"); 2.287 + assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, 2.288 + "the following code assumes that first element of a byte array is aligned to 8 bytes"); 2.289 __ align(CodeEntryAlignment); 2.290 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); 2.291 - Label L_cbcenc128, L_cbcenc192, L_cbcenc256; 2.292 + Label L_cbcenc128, L_load_misaligned_input_128bit, L_128bit_transform, L_store_misaligned_output_128bit; 2.293 + Label L_check_loop_end_128bit, L_cbcenc192, L_load_misaligned_input_192bit, L_192bit_transform; 2.294 + Label L_store_misaligned_output_192bit, L_check_loop_end_192bit, L_cbcenc256, L_load_misaligned_input_256bit; 2.295 + Label L_256bit_transform, L_store_misaligned_output_256bit, L_check_loop_end_256bit; 2.296 address start = __ pc(); 2.297 - Register from = O0; // source byte array 2.298 - Register to = O1; // destination byte array 2.299 - Register key = O2; // expanded key array 2.300 - Register rvec = O3; // init vector 2.301 - const Register len_reg = O4; // cipher length 2.302 - const Register keylen = O5; // reg for storing expanded key array length 2.303 - 2.304 - // save cipher len to return in the end 2.305 - __ mov(len_reg, L1); 2.306 + Register from = I0; // source byte array 2.307 + Register to = I1; // destination byte array 2.308 + Register key = I2; // expanded key array 2.309 + Register rvec = I3; // init vector 2.310 + const Register len_reg = I4; // cipher length 2.311 + const Register keylen = I5; // reg for storing expanded key array length 2.312 + 2.313 + // save cipher len before save_frame, to return in the end 2.314 + __ mov(O4, L0); 2.315 + __ save_frame(0); 2.316 2.317 // read expanded key length 2.318 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); 2.319 2.320 - // load init vector 2.321 + // load initial vector, 8-byte alignment is guranteed 2.322 __ ldf(FloatRegisterImpl::D, rvec, 0, F60); 2.323 __ ldf(FloatRegisterImpl::D, rvec, 8, F62); 2.324 + // load key, 8-byte alignment is guranteed 2.325 __ ldx(key,0,G1); 2.326 - __ ldx(key,8,G2); 2.327 - 2.328 - // start loading expanded key 2.329 + __ ldx(key,8,G5); 2.330 + 2.331 + // start loading expanded key, 8-byte alignment is guranteed 2.332 for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) { 2.333 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); 2.334 } 2.335 @@ -3571,15 +3687,35 @@ 2.336 } 2.337 2.338 // 256-bit original key size 2.339 - __ br(Assembler::always, false, Assembler::pt, L_cbcenc256); 2.340 - __ delayed()->nop(); 2.341 + __ ba_short(L_cbcenc256); 2.342 2.343 __ align(OptoLoopAlignment); 2.344 __ BIND(L_cbcenc128); 2.345 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.346 + __ andcc(from, 7, G0); 2.347 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_128bit); 2.348 + __ delayed()->mov(from, L1); // save original 'from' address before alignaddr 2.349 + 2.350 + // aligned case: load input into G3 and G4 2.351 __ ldx(from,0,G3); 2.352 __ ldx(from,8,G4); 2.353 + __ ba_short(L_128bit_transform); 2.354 + 2.355 + __ BIND(L_load_misaligned_input_128bit); 2.356 + // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption 2.357 + __ alignaddr(from, G0, from); 2.358 + __ ldf(FloatRegisterImpl::D, from, 0, F48); 2.359 + __ ldf(FloatRegisterImpl::D, from, 8, F50); 2.360 + __ ldf(FloatRegisterImpl::D, from, 16, F52); 2.361 + __ faligndata(F48, F50, F48); 2.362 + __ faligndata(F50, F52, F50); 2.363 + __ movdtox(F48, G3); 2.364 + __ movdtox(F50, G4); 2.365 + __ mov(L1, from); 2.366 + 2.367 + __ BIND(L_128bit_transform); 2.368 __ xor3(G1,G3,G3); 2.369 - __ xor3(G2,G4,G4); 2.370 + __ xor3(G5,G4,G4); 2.371 __ movxtod(G3,F56); 2.372 __ movxtod(G4,F58); 2.373 __ fxor(FloatRegisterImpl::D, F60, F56, F60); 2.374 @@ -3598,24 +3734,81 @@ 2.375 } 2.376 } 2.377 2.378 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.379 + __ andcc(to, 7, L1); 2.380 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_128bit); 2.381 + __ delayed()->edge8n(to, G0, L2); 2.382 + 2.383 + // aligned case: store output into the destination array 2.384 __ stf(FloatRegisterImpl::D, F60, to, 0); 2.385 __ stf(FloatRegisterImpl::D, F62, to, 8); 2.386 + __ ba_short(L_check_loop_end_128bit); 2.387 + 2.388 + __ BIND(L_store_misaligned_output_128bit); 2.389 + __ add(to, 8, L3); 2.390 + __ mov(8, L4); 2.391 + __ sub(L4, L1, L4); 2.392 + __ alignaddr(L4, G0, L4); 2.393 + // save cipher text before circular right shift 2.394 + // as it needs to be stored as iv for next block (see code before next retl) 2.395 + __ movdtox(F60, L6); 2.396 + __ movdtox(F62, L7); 2.397 + __ faligndata(F60, F60, F60); 2.398 + __ faligndata(F62, F62, F62); 2.399 + __ mov(to, L5); 2.400 + __ and3(to, -8, to); 2.401 + __ and3(L3, -8, L3); 2.402 + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); 2.403 + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); 2.404 + __ add(to, 8, to); 2.405 + __ add(L3, 8, L3); 2.406 + __ orn(G0, L2, L2); 2.407 + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); 2.408 + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); 2.409 + __ mov(L5, to); 2.410 + __ movxtod(L6, F60); 2.411 + __ movxtod(L7, F62); 2.412 + 2.413 + __ BIND(L_check_loop_end_128bit); 2.414 __ add(from, 16, from); 2.415 __ add(to, 16, to); 2.416 __ subcc(len_reg, 16, len_reg); 2.417 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128); 2.418 __ delayed()->nop(); 2.419 + // re-init intial vector for next block, 8-byte alignment is guaranteed 2.420 __ stf(FloatRegisterImpl::D, F60, rvec, 0); 2.421 __ stf(FloatRegisterImpl::D, F62, rvec, 8); 2.422 + __ restore(); 2.423 __ retl(); 2.424 - __ delayed()->mov(L1, O0); 2.425 + __ delayed()->mov(L0, O0); 2.426 2.427 __ align(OptoLoopAlignment); 2.428 __ BIND(L_cbcenc192); 2.429 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.430 + __ andcc(from, 7, G0); 2.431 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_192bit); 2.432 + __ delayed()->mov(from, L1); // save original 'from' address before alignaddr 2.433 + 2.434 + // aligned case: load input into G3 and G4 2.435 __ ldx(from,0,G3); 2.436 __ ldx(from,8,G4); 2.437 + __ ba_short(L_192bit_transform); 2.438 + 2.439 + __ BIND(L_load_misaligned_input_192bit); 2.440 + // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption 2.441 + __ alignaddr(from, G0, from); 2.442 + __ ldf(FloatRegisterImpl::D, from, 0, F48); 2.443 + __ ldf(FloatRegisterImpl::D, from, 8, F50); 2.444 + __ ldf(FloatRegisterImpl::D, from, 16, F52); 2.445 + __ faligndata(F48, F50, F48); 2.446 + __ faligndata(F50, F52, F50); 2.447 + __ movdtox(F48, G3); 2.448 + __ movdtox(F50, G4); 2.449 + __ mov(L1, from); 2.450 + 2.451 + __ BIND(L_192bit_transform); 2.452 __ xor3(G1,G3,G3); 2.453 - __ xor3(G2,G4,G4); 2.454 + __ xor3(G5,G4,G4); 2.455 __ movxtod(G3,F56); 2.456 __ movxtod(G4,F58); 2.457 __ fxor(FloatRegisterImpl::D, F60, F56, F60); 2.458 @@ -3634,24 +3827,81 @@ 2.459 } 2.460 } 2.461 2.462 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.463 + __ andcc(to, 7, L1); 2.464 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_192bit); 2.465 + __ delayed()->edge8n(to, G0, L2); 2.466 + 2.467 + // aligned case: store output into the destination array 2.468 __ stf(FloatRegisterImpl::D, F60, to, 0); 2.469 __ stf(FloatRegisterImpl::D, F62, to, 8); 2.470 + __ ba_short(L_check_loop_end_192bit); 2.471 + 2.472 + __ BIND(L_store_misaligned_output_192bit); 2.473 + __ add(to, 8, L3); 2.474 + __ mov(8, L4); 2.475 + __ sub(L4, L1, L4); 2.476 + __ alignaddr(L4, G0, L4); 2.477 + __ movdtox(F60, L6); 2.478 + __ movdtox(F62, L7); 2.479 + __ faligndata(F60, F60, F60); 2.480 + __ faligndata(F62, F62, F62); 2.481 + __ mov(to, L5); 2.482 + __ and3(to, -8, to); 2.483 + __ and3(L3, -8, L3); 2.484 + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); 2.485 + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); 2.486 + __ add(to, 8, to); 2.487 + __ add(L3, 8, L3); 2.488 + __ orn(G0, L2, L2); 2.489 + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); 2.490 + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); 2.491 + __ mov(L5, to); 2.492 + __ movxtod(L6, F60); 2.493 + __ movxtod(L7, F62); 2.494 + 2.495 + __ BIND(L_check_loop_end_192bit); 2.496 __ add(from, 16, from); 2.497 __ subcc(len_reg, 16, len_reg); 2.498 __ add(to, 16, to); 2.499 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192); 2.500 __ delayed()->nop(); 2.501 + // re-init intial vector for next block, 8-byte alignment is guaranteed 2.502 __ stf(FloatRegisterImpl::D, F60, rvec, 0); 2.503 __ stf(FloatRegisterImpl::D, F62, rvec, 8); 2.504 + __ restore(); 2.505 __ retl(); 2.506 - __ delayed()->mov(L1, O0); 2.507 + __ delayed()->mov(L0, O0); 2.508 2.509 __ align(OptoLoopAlignment); 2.510 __ BIND(L_cbcenc256); 2.511 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.512 + __ andcc(from, 7, G0); 2.513 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_256bit); 2.514 + __ delayed()->mov(from, L1); // save original 'from' address before alignaddr 2.515 + 2.516 + // aligned case: load input into G3 and G4 2.517 __ ldx(from,0,G3); 2.518 __ ldx(from,8,G4); 2.519 + __ ba_short(L_256bit_transform); 2.520 + 2.521 + __ BIND(L_load_misaligned_input_256bit); 2.522 + // cannot clobber F48, F50 and F52. F56, F58 can be used though 2.523 + __ alignaddr(from, G0, from); 2.524 + __ movdtox(F60, L2); // save F60 before overwriting 2.525 + __ ldf(FloatRegisterImpl::D, from, 0, F56); 2.526 + __ ldf(FloatRegisterImpl::D, from, 8, F58); 2.527 + __ ldf(FloatRegisterImpl::D, from, 16, F60); 2.528 + __ faligndata(F56, F58, F56); 2.529 + __ faligndata(F58, F60, F58); 2.530 + __ movdtox(F56, G3); 2.531 + __ movdtox(F58, G4); 2.532 + __ mov(L1, from); 2.533 + __ movxtod(L2, F60); 2.534 + 2.535 + __ BIND(L_256bit_transform); 2.536 __ xor3(G1,G3,G3); 2.537 - __ xor3(G2,G4,G4); 2.538 + __ xor3(G5,G4,G4); 2.539 __ movxtod(G3,F56); 2.540 __ movxtod(G4,F58); 2.541 __ fxor(FloatRegisterImpl::D, F60, F56, F60); 2.542 @@ -3670,26 +3920,69 @@ 2.543 } 2.544 } 2.545 2.546 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.547 + __ andcc(to, 7, L1); 2.548 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_256bit); 2.549 + __ delayed()->edge8n(to, G0, L2); 2.550 + 2.551 + // aligned case: store output into the destination array 2.552 __ stf(FloatRegisterImpl::D, F60, to, 0); 2.553 __ stf(FloatRegisterImpl::D, F62, to, 8); 2.554 + __ ba_short(L_check_loop_end_256bit); 2.555 + 2.556 + __ BIND(L_store_misaligned_output_256bit); 2.557 + __ add(to, 8, L3); 2.558 + __ mov(8, L4); 2.559 + __ sub(L4, L1, L4); 2.560 + __ alignaddr(L4, G0, L4); 2.561 + __ movdtox(F60, L6); 2.562 + __ movdtox(F62, L7); 2.563 + __ faligndata(F60, F60, F60); 2.564 + __ faligndata(F62, F62, F62); 2.565 + __ mov(to, L5); 2.566 + __ and3(to, -8, to); 2.567 + __ and3(L3, -8, L3); 2.568 + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); 2.569 + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); 2.570 + __ add(to, 8, to); 2.571 + __ add(L3, 8, L3); 2.572 + __ orn(G0, L2, L2); 2.573 + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); 2.574 + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); 2.575 + __ mov(L5, to); 2.576 + __ movxtod(L6, F60); 2.577 + __ movxtod(L7, F62); 2.578 + 2.579 + __ BIND(L_check_loop_end_256bit); 2.580 __ add(from, 16, from); 2.581 __ subcc(len_reg, 16, len_reg); 2.582 __ add(to, 16, to); 2.583 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256); 2.584 __ delayed()->nop(); 2.585 + // re-init intial vector for next block, 8-byte alignment is guaranteed 2.586 __ stf(FloatRegisterImpl::D, F60, rvec, 0); 2.587 __ stf(FloatRegisterImpl::D, F62, rvec, 8); 2.588 + __ restore(); 2.589 __ retl(); 2.590 - __ delayed()->mov(L1, O0); 2.591 + __ delayed()->mov(L0, O0); 2.592 2.593 return start; 2.594 } 2.595 2.596 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { 2.597 + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, 2.598 + "the following code assumes that first element of an int array is aligned to 8 bytes"); 2.599 + assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, 2.600 + "the following code assumes that first element of a byte array is aligned to 8 bytes"); 2.601 __ align(CodeEntryAlignment); 2.602 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); 2.603 Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start; 2.604 Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256; 2.605 + Label L_load_misaligned_input_first_block, L_transform_first_block, L_load_misaligned_next2_blocks128, L_transform_next2_blocks128; 2.606 + Label L_load_misaligned_next2_blocks192, L_transform_next2_blocks192, L_load_misaligned_next2_blocks256, L_transform_next2_blocks256; 2.607 + Label L_store_misaligned_output_first_block, L_check_decrypt_end, L_store_misaligned_output_next2_blocks128; 2.608 + Label L_check_decrypt_loop_end128, L_store_misaligned_output_next2_blocks192, L_check_decrypt_loop_end192; 2.609 + Label L_store_misaligned_output_next2_blocks256, L_check_decrypt_loop_end256; 2.610 address start = __ pc(); 2.611 Register from = I0; // source byte array 2.612 Register to = I1; // destination byte array 2.613 @@ -3704,11 +3997,12 @@ 2.614 __ save_frame(0); //args are read from I* registers since we save the frame in the beginning 2.615 2.616 // load original key from SunJCE expanded decryption key 2.617 + // Since we load original key buffer starting first element, 8-byte alignment is guaranteed 2.618 for ( int i = 0; i <= 3; i++ ) { 2.619 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); 2.620 } 2.621 2.622 - // load initial vector 2.623 + // load initial vector, 8-byte alignment is guaranteed 2.624 __ ldx(rvec,0,L0); 2.625 __ ldx(rvec,8,L1); 2.626 2.627 @@ -3733,11 +4027,10 @@ 2.628 __ movdtox(F42,L3); 2.629 2.630 __ and3(len_reg, 16, L4); 2.631 - __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128); 2.632 - __ delayed()->nop(); 2.633 - 2.634 - __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); 2.635 - __ delayed()->nop(); 2.636 + __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks128); 2.637 + __ nop(); 2.638 + 2.639 + __ ba_short(L_dec_first_block_start); 2.640 2.641 __ BIND(L_expand192bit); 2.642 // load rest of the 192-bit key 2.643 @@ -3758,11 +4051,10 @@ 2.644 __ movdtox(F50,L3); 2.645 2.646 __ and3(len_reg, 16, L4); 2.647 - __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192); 2.648 - __ delayed()->nop(); 2.649 - 2.650 - __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); 2.651 - __ delayed()->nop(); 2.652 + __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks192); 2.653 + __ nop(); 2.654 + 2.655 + __ ba_short(L_dec_first_block_start); 2.656 2.657 __ BIND(L_expand256bit); 2.658 // load rest of the 256-bit key 2.659 @@ -3785,12 +4077,32 @@ 2.660 __ movdtox(F58,L3); 2.661 2.662 __ and3(len_reg, 16, L4); 2.663 - __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256); 2.664 - __ delayed()->nop(); 2.665 + __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks256); 2.666 2.667 __ BIND(L_dec_first_block_start); 2.668 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.669 + __ andcc(from, 7, G0); 2.670 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_first_block); 2.671 + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr 2.672 + 2.673 + // aligned case: load input into L4 and L5 2.674 __ ldx(from,0,L4); 2.675 __ ldx(from,8,L5); 2.676 + __ ba_short(L_transform_first_block); 2.677 + 2.678 + __ BIND(L_load_misaligned_input_first_block); 2.679 + __ alignaddr(from, G0, from); 2.680 + // F58, F60, F62 can be clobbered 2.681 + __ ldf(FloatRegisterImpl::D, from, 0, F58); 2.682 + __ ldf(FloatRegisterImpl::D, from, 8, F60); 2.683 + __ ldf(FloatRegisterImpl::D, from, 16, F62); 2.684 + __ faligndata(F58, F60, F58); 2.685 + __ faligndata(F60, F62, F60); 2.686 + __ movdtox(F58, L4); 2.687 + __ movdtox(F60, L5); 2.688 + __ mov(G1, from); 2.689 + 2.690 + __ BIND(L_transform_first_block); 2.691 __ xor3(L2,L4,G1); 2.692 __ movxtod(G1,F60); 2.693 __ xor3(L3,L5,G1); 2.694 @@ -3833,9 +4145,36 @@ 2.695 __ fxor(FloatRegisterImpl::D, F56, F60, F60); 2.696 __ fxor(FloatRegisterImpl::D, F58, F62, F62); 2.697 2.698 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.699 + __ andcc(to, 7, G1); 2.700 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_first_block); 2.701 + __ delayed()->edge8n(to, G0, G2); 2.702 + 2.703 + // aligned case: store output into the destination array 2.704 __ stf(FloatRegisterImpl::D, F60, to, 0); 2.705 __ stf(FloatRegisterImpl::D, F62, to, 8); 2.706 - 2.707 + __ ba_short(L_check_decrypt_end); 2.708 + 2.709 + __ BIND(L_store_misaligned_output_first_block); 2.710 + __ add(to, 8, G3); 2.711 + __ mov(8, G4); 2.712 + __ sub(G4, G1, G4); 2.713 + __ alignaddr(G4, G0, G4); 2.714 + __ faligndata(F60, F60, F60); 2.715 + __ faligndata(F62, F62, F62); 2.716 + __ mov(to, G1); 2.717 + __ and3(to, -8, to); 2.718 + __ and3(G3, -8, G3); 2.719 + __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY); 2.720 + __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY); 2.721 + __ add(to, 8, to); 2.722 + __ add(G3, 8, G3); 2.723 + __ orn(G0, G2, G2); 2.724 + __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY); 2.725 + __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY); 2.726 + __ mov(G1, to); 2.727 + 2.728 + __ BIND(L_check_decrypt_end); 2.729 __ add(from, 16, from); 2.730 __ add(to, 16, to); 2.731 __ subcc(len_reg, 16, len_reg); 2.732 @@ -3852,17 +4191,44 @@ 2.733 __ BIND(L_dec_next2_blocks128); 2.734 __ nop(); 2.735 2.736 - // F40:F42 used for first 16-bytes 2.737 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.738 + __ andcc(from, 7, G0); 2.739 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks128); 2.740 + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr 2.741 + 2.742 + // aligned case: load input into G4, G5, L4 and L5 2.743 __ ldx(from,0,G4); 2.744 __ ldx(from,8,G5); 2.745 + __ ldx(from,16,L4); 2.746 + __ ldx(from,24,L5); 2.747 + __ ba_short(L_transform_next2_blocks128); 2.748 + 2.749 + __ BIND(L_load_misaligned_next2_blocks128); 2.750 + __ alignaddr(from, G0, from); 2.751 + // F40, F42, F58, F60, F62 can be clobbered 2.752 + __ ldf(FloatRegisterImpl::D, from, 0, F40); 2.753 + __ ldf(FloatRegisterImpl::D, from, 8, F42); 2.754 + __ ldf(FloatRegisterImpl::D, from, 16, F60); 2.755 + __ ldf(FloatRegisterImpl::D, from, 24, F62); 2.756 + __ ldf(FloatRegisterImpl::D, from, 32, F58); 2.757 + __ faligndata(F40, F42, F40); 2.758 + __ faligndata(F42, F60, F42); 2.759 + __ faligndata(F60, F62, F60); 2.760 + __ faligndata(F62, F58, F62); 2.761 + __ movdtox(F40, G4); 2.762 + __ movdtox(F42, G5); 2.763 + __ movdtox(F60, L4); 2.764 + __ movdtox(F62, L5); 2.765 + __ mov(G1, from); 2.766 + 2.767 + __ BIND(L_transform_next2_blocks128); 2.768 + // F40:F42 used for first 16-bytes 2.769 __ xor3(L2,G4,G1); 2.770 __ movxtod(G1,F40); 2.771 __ xor3(L3,G5,G1); 2.772 __ movxtod(G1,F42); 2.773 2.774 // F60:F62 used for next 16-bytes 2.775 - __ ldx(from,16,L4); 2.776 - __ ldx(from,24,L5); 2.777 __ xor3(L2,L4,G1); 2.778 __ movxtod(G1,F60); 2.779 __ xor3(L3,L5,G1); 2.780 @@ -3891,9 +4257,6 @@ 2.781 __ fxor(FloatRegisterImpl::D, F46, F40, F40); 2.782 __ fxor(FloatRegisterImpl::D, F44, F42, F42); 2.783 2.784 - __ stf(FloatRegisterImpl::D, F40, to, 0); 2.785 - __ stf(FloatRegisterImpl::D, F42, to, 8); 2.786 - 2.787 __ movxtod(G4,F56); 2.788 __ movxtod(G5,F58); 2.789 __ mov(L4,L0); 2.790 @@ -3901,32 +4264,93 @@ 2.791 __ fxor(FloatRegisterImpl::D, F56, F60, F60); 2.792 __ fxor(FloatRegisterImpl::D, F58, F62, F62); 2.793 2.794 + // For mis-aligned store of 32 bytes of result we can do: 2.795 + // Circular right-shift all 4 FP registers so that 'head' and 'tail' 2.796 + // parts that need to be stored starting at mis-aligned address are in a FP reg 2.797 + // the other 3 FP regs can thus be stored using regular store 2.798 + // we then use the edge + partial-store mechanism to store the 'head' and 'tail' parts 2.799 + 2.800 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.801 + __ andcc(to, 7, G1); 2.802 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks128); 2.803 + __ delayed()->edge8n(to, G0, G2); 2.804 + 2.805 + // aligned case: store output into the destination array 2.806 + __ stf(FloatRegisterImpl::D, F40, to, 0); 2.807 + __ stf(FloatRegisterImpl::D, F42, to, 8); 2.808 __ stf(FloatRegisterImpl::D, F60, to, 16); 2.809 __ stf(FloatRegisterImpl::D, F62, to, 24); 2.810 - 2.811 + __ ba_short(L_check_decrypt_loop_end128); 2.812 + 2.813 + __ BIND(L_store_misaligned_output_next2_blocks128); 2.814 + __ mov(8, G4); 2.815 + __ sub(G4, G1, G4); 2.816 + __ alignaddr(G4, G0, G4); 2.817 + __ faligndata(F40, F42, F56); // F56 can be clobbered 2.818 + __ faligndata(F42, F60, F42); 2.819 + __ faligndata(F60, F62, F60); 2.820 + __ faligndata(F62, F40, F40); 2.821 + __ mov(to, G1); 2.822 + __ and3(to, -8, to); 2.823 + __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY); 2.824 + __ stf(FloatRegisterImpl::D, F56, to, 8); 2.825 + __ stf(FloatRegisterImpl::D, F42, to, 16); 2.826 + __ stf(FloatRegisterImpl::D, F60, to, 24); 2.827 + __ add(to, 32, to); 2.828 + __ orn(G0, G2, G2); 2.829 + __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY); 2.830 + __ mov(G1, to); 2.831 + 2.832 + __ BIND(L_check_decrypt_loop_end128); 2.833 __ add(from, 32, from); 2.834 __ add(to, 32, to); 2.835 __ subcc(len_reg, 32, len_reg); 2.836 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128); 2.837 __ delayed()->nop(); 2.838 - __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); 2.839 - __ delayed()->nop(); 2.840 + __ ba_short(L_cbcdec_end); 2.841 2.842 __ align(OptoLoopAlignment); 2.843 __ BIND(L_dec_next2_blocks192); 2.844 __ nop(); 2.845 2.846 - // F48:F50 used for first 16-bytes 2.847 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.848 + __ andcc(from, 7, G0); 2.849 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks192); 2.850 + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr 2.851 + 2.852 + // aligned case: load input into G4, G5, L4 and L5 2.853 __ ldx(from,0,G4); 2.854 __ ldx(from,8,G5); 2.855 + __ ldx(from,16,L4); 2.856 + __ ldx(from,24,L5); 2.857 + __ ba_short(L_transform_next2_blocks192); 2.858 + 2.859 + __ BIND(L_load_misaligned_next2_blocks192); 2.860 + __ alignaddr(from, G0, from); 2.861 + // F48, F50, F52, F60, F62 can be clobbered 2.862 + __ ldf(FloatRegisterImpl::D, from, 0, F48); 2.863 + __ ldf(FloatRegisterImpl::D, from, 8, F50); 2.864 + __ ldf(FloatRegisterImpl::D, from, 16, F60); 2.865 + __ ldf(FloatRegisterImpl::D, from, 24, F62); 2.866 + __ ldf(FloatRegisterImpl::D, from, 32, F52); 2.867 + __ faligndata(F48, F50, F48); 2.868 + __ faligndata(F50, F60, F50); 2.869 + __ faligndata(F60, F62, F60); 2.870 + __ faligndata(F62, F52, F62); 2.871 + __ movdtox(F48, G4); 2.872 + __ movdtox(F50, G5); 2.873 + __ movdtox(F60, L4); 2.874 + __ movdtox(F62, L5); 2.875 + __ mov(G1, from); 2.876 + 2.877 + __ BIND(L_transform_next2_blocks192); 2.878 + // F48:F50 used for first 16-bytes 2.879 __ xor3(L2,G4,G1); 2.880 __ movxtod(G1,F48); 2.881 __ xor3(L3,G5,G1); 2.882 __ movxtod(G1,F50); 2.883 2.884 // F60:F62 used for next 16-bytes 2.885 - __ ldx(from,16,L4); 2.886 - __ ldx(from,24,L5); 2.887 __ xor3(L2,L4,G1); 2.888 __ movxtod(G1,F60); 2.889 __ xor3(L3,L5,G1); 2.890 @@ -3955,9 +4379,6 @@ 2.891 __ fxor(FloatRegisterImpl::D, F54, F48, F48); 2.892 __ fxor(FloatRegisterImpl::D, F52, F50, F50); 2.893 2.894 - __ stf(FloatRegisterImpl::D, F48, to, 0); 2.895 - __ stf(FloatRegisterImpl::D, F50, to, 8); 2.896 - 2.897 __ movxtod(G4,F56); 2.898 __ movxtod(G5,F58); 2.899 __ mov(L4,L0); 2.900 @@ -3965,32 +4386,87 @@ 2.901 __ fxor(FloatRegisterImpl::D, F56, F60, F60); 2.902 __ fxor(FloatRegisterImpl::D, F58, F62, F62); 2.903 2.904 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.905 + __ andcc(to, 7, G1); 2.906 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks192); 2.907 + __ delayed()->edge8n(to, G0, G2); 2.908 + 2.909 + // aligned case: store output into the destination array 2.910 + __ stf(FloatRegisterImpl::D, F48, to, 0); 2.911 + __ stf(FloatRegisterImpl::D, F50, to, 8); 2.912 __ stf(FloatRegisterImpl::D, F60, to, 16); 2.913 __ stf(FloatRegisterImpl::D, F62, to, 24); 2.914 - 2.915 + __ ba_short(L_check_decrypt_loop_end192); 2.916 + 2.917 + __ BIND(L_store_misaligned_output_next2_blocks192); 2.918 + __ mov(8, G4); 2.919 + __ sub(G4, G1, G4); 2.920 + __ alignaddr(G4, G0, G4); 2.921 + __ faligndata(F48, F50, F56); // F56 can be clobbered 2.922 + __ faligndata(F50, F60, F50); 2.923 + __ faligndata(F60, F62, F60); 2.924 + __ faligndata(F62, F48, F48); 2.925 + __ mov(to, G1); 2.926 + __ and3(to, -8, to); 2.927 + __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY); 2.928 + __ stf(FloatRegisterImpl::D, F56, to, 8); 2.929 + __ stf(FloatRegisterImpl::D, F50, to, 16); 2.930 + __ stf(FloatRegisterImpl::D, F60, to, 24); 2.931 + __ add(to, 32, to); 2.932 + __ orn(G0, G2, G2); 2.933 + __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY); 2.934 + __ mov(G1, to); 2.935 + 2.936 + __ BIND(L_check_decrypt_loop_end192); 2.937 __ add(from, 32, from); 2.938 __ add(to, 32, to); 2.939 __ subcc(len_reg, 32, len_reg); 2.940 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192); 2.941 __ delayed()->nop(); 2.942 - __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); 2.943 - __ delayed()->nop(); 2.944 + __ ba_short(L_cbcdec_end); 2.945 2.946 __ align(OptoLoopAlignment); 2.947 __ BIND(L_dec_next2_blocks256); 2.948 __ nop(); 2.949 2.950 - // F0:F2 used for first 16-bytes 2.951 + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero 2.952 + __ andcc(from, 7, G0); 2.953 + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks256); 2.954 + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr 2.955 + 2.956 + // aligned case: load input into G4, G5, L4 and L5 2.957 __ ldx(from,0,G4); 2.958 __ ldx(from,8,G5); 2.959 + __ ldx(from,16,L4); 2.960 + __ ldx(from,24,L5); 2.961 + __ ba_short(L_transform_next2_blocks256); 2.962 + 2.963 + __ BIND(L_load_misaligned_next2_blocks256); 2.964 + __ alignaddr(from, G0, from); 2.965 + // F0, F2, F4, F60, F62 can be clobbered 2.966 + __ ldf(FloatRegisterImpl::D, from, 0, F0); 2.967 + __ ldf(FloatRegisterImpl::D, from, 8, F2); 2.968 + __ ldf(FloatRegisterImpl::D, from, 16, F60); 2.969 + __ ldf(FloatRegisterImpl::D, from, 24, F62); 2.970 + __ ldf(FloatRegisterImpl::D, from, 32, F4); 2.971 + __ faligndata(F0, F2, F0); 2.972 + __ faligndata(F2, F60, F2); 2.973 + __ faligndata(F60, F62, F60); 2.974 + __ faligndata(F62, F4, F62); 2.975 + __ movdtox(F0, G4); 2.976 + __ movdtox(F2, G5); 2.977 + __ movdtox(F60, L4); 2.978 + __ movdtox(F62, L5); 2.979 + __ mov(G1, from); 2.980 + 2.981 + __ BIND(L_transform_next2_blocks256); 2.982 + // F0:F2 used for first 16-bytes 2.983 __ xor3(L2,G4,G1); 2.984 __ movxtod(G1,F0); 2.985 __ xor3(L3,G5,G1); 2.986 __ movxtod(G1,F2); 2.987 2.988 // F60:F62 used for next 16-bytes 2.989 - __ ldx(from,16,L4); 2.990 - __ ldx(from,24,L5); 2.991 __ xor3(L2,L4,G1); 2.992 __ movxtod(G1,F60); 2.993 __ xor3(L3,L5,G1); 2.994 @@ -4043,9 +4519,6 @@ 2.995 __ fxor(FloatRegisterImpl::D, F6, F0, F0); 2.996 __ fxor(FloatRegisterImpl::D, F4, F2, F2); 2.997 2.998 - __ stf(FloatRegisterImpl::D, F0, to, 0); 2.999 - __ stf(FloatRegisterImpl::D, F2, to, 8); 2.1000 - 2.1001 __ movxtod(G4,F56); 2.1002 __ movxtod(G5,F58); 2.1003 __ mov(L4,L0); 2.1004 @@ -4053,9 +4526,38 @@ 2.1005 __ fxor(FloatRegisterImpl::D, F56, F60, F60); 2.1006 __ fxor(FloatRegisterImpl::D, F58, F62, F62); 2.1007 2.1008 + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero 2.1009 + __ andcc(to, 7, G1); 2.1010 + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks256); 2.1011 + __ delayed()->edge8n(to, G0, G2); 2.1012 + 2.1013 + // aligned case: store output into the destination array 2.1014 + __ stf(FloatRegisterImpl::D, F0, to, 0); 2.1015 + __ stf(FloatRegisterImpl::D, F2, to, 8); 2.1016 __ stf(FloatRegisterImpl::D, F60, to, 16); 2.1017 __ stf(FloatRegisterImpl::D, F62, to, 24); 2.1018 - 2.1019 + __ ba_short(L_check_decrypt_loop_end256); 2.1020 + 2.1021 + __ BIND(L_store_misaligned_output_next2_blocks256); 2.1022 + __ mov(8, G4); 2.1023 + __ sub(G4, G1, G4); 2.1024 + __ alignaddr(G4, G0, G4); 2.1025 + __ faligndata(F0, F2, F56); // F56 can be clobbered 2.1026 + __ faligndata(F2, F60, F2); 2.1027 + __ faligndata(F60, F62, F60); 2.1028 + __ faligndata(F62, F0, F0); 2.1029 + __ mov(to, G1); 2.1030 + __ and3(to, -8, to); 2.1031 + __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY); 2.1032 + __ stf(FloatRegisterImpl::D, F56, to, 8); 2.1033 + __ stf(FloatRegisterImpl::D, F2, to, 16); 2.1034 + __ stf(FloatRegisterImpl::D, F60, to, 24); 2.1035 + __ add(to, 32, to); 2.1036 + __ orn(G0, G2, G2); 2.1037 + __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY); 2.1038 + __ mov(G1, to); 2.1039 + 2.1040 + __ BIND(L_check_decrypt_loop_end256); 2.1041 __ add(from, 32, from); 2.1042 __ add(to, 32, to); 2.1043 __ subcc(len_reg, 32, len_reg); 2.1044 @@ -4063,6 +4565,7 @@ 2.1045 __ delayed()->nop(); 2.1046 2.1047 __ BIND(L_cbcdec_end); 2.1048 + // re-init intial vector for next block, 8-byte alignment is guaranteed 2.1049 __ stx(L0, rvec, 0); 2.1050 __ stx(L1, rvec, 8); 2.1051 __ restore();
3.1 --- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp Thu May 01 15:02:46 2014 -0700 3.2 +++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp Wed Apr 30 14:14:01 2014 -0700 3.3 @@ -41,7 +41,7 @@ 3.4 enum /* platform_dependent_constants */ { 3.5 // %%%%%%%% May be able to shrink this a lot 3.6 code_size1 = 20000, // simply increase if too small (assembler will crash if too small) 3.7 - code_size2 = 20000 // simply increase if too small (assembler will crash if too small) 3.8 + code_size2 = 22000 // simply increase if too small (assembler will crash if too small) 3.9 }; 3.10 3.11 class Sparc {
4.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Thu May 01 15:02:46 2014 -0700 4.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Apr 30 14:14:01 2014 -0700 4.3 @@ -1,5 +1,5 @@ 4.4 /* 4.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 4.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 4.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.8 * 4.9 * This code is free software; you can redistribute it and/or modify it 4.10 @@ -266,9 +266,9 @@ 4.11 if (!has_vis1()) // Drop to 0 if no VIS1 support 4.12 UseVIS = 0; 4.13 4.14 - // T2 and above should have support for AES instructions 4.15 + // SPARC T4 and above should have support for AES instructions 4.16 if (has_aes()) { 4.17 - if (UseVIS > 0) { // AES intrinsics use FXOR instruction which is VIS1 4.18 + if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3 4.19 if (FLAG_IS_DEFAULT(UseAES)) { 4.20 FLAG_SET_DEFAULT(UseAES, true); 4.21 } 4.22 @@ -282,7 +282,7 @@ 4.23 } 4.24 } else { 4.25 if (UseAES || UseAESIntrinsics) { 4.26 - warning("SPARC AES intrinsics require VIS1 instruction support. Intrinsics will be disabled."); 4.27 + warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled."); 4.28 if (UseAES) { 4.29 FLAG_SET_DEFAULT(UseAES, false); 4.30 }
5.1 --- a/src/share/vm/classfile/vmSymbols.hpp Thu May 01 15:02:46 2014 -0700 5.2 +++ b/src/share/vm/classfile/vmSymbols.hpp Wed Apr 30 14:14:01 2014 -0700 5.3 @@ -774,7 +774,7 @@ 5.4 /* java/lang/ref/Reference */ \ 5.5 do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ 5.6 \ 5.7 - /* support for com.sum.crypto.provider.AESCrypt and some of its callers */ \ 5.8 + /* support for com.sun.crypto.provider.AESCrypt and some of its callers */ \ 5.9 do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \ 5.10 do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \ 5.11 do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
6.1 --- a/src/share/vm/opto/runtime.cpp Thu May 01 15:02:46 2014 -0700 6.2 +++ b/src/share/vm/opto/runtime.cpp Wed Apr 30 14:14:01 2014 -0700 6.3 @@ -1,5 +1,5 @@ 6.4 /* 6.5 - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. 6.6 + * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. 6.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6.8 * 6.9 * This code is free software; you can redistribute it and/or modify it 6.10 @@ -870,7 +870,7 @@ 6.11 return TypeFunc::make(domain, range); 6.12 } 6.13 6.14 -// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void 6.15 +// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int 6.16 const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { 6.17 // create input type (domain) 6.18 int num_args = 5;
7.1 --- a/test/compiler/7184394/TestAESBase.java Thu May 01 15:02:46 2014 -0700 7.2 +++ b/test/compiler/7184394/TestAESBase.java Wed Apr 30 14:14:01 2014 -0700 7.3 @@ -1,5 +1,5 @@ 7.4 /* 7.5 - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 7.6 + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. 7.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7.8 * 7.9 * This code is free software; you can redistribute it and/or modify it 7.10 @@ -40,9 +40,20 @@ 7.11 int msgSize = Integer.getInteger("msgSize", 646); 7.12 boolean checkOutput = Boolean.getBoolean("checkOutput"); 7.13 boolean noReinit = Boolean.getBoolean("noReinit"); 7.14 + boolean testingMisalignment; 7.15 + private static final int ALIGN = 8; 7.16 + int encInputOffset = Integer.getInteger("encInputOffset", 0) % ALIGN; 7.17 + int encOutputOffset = Integer.getInteger("encOutputOffset", 0) % ALIGN; 7.18 + int decOutputOffset = Integer.getInteger("decOutputOffset", 0) % ALIGN; 7.19 + int lastChunkSize = Integer.getInteger("lastChunkSize", 32); 7.20 int keySize = Integer.getInteger("keySize", 128); 7.21 + int inputLength; 7.22 + int encodeLength; 7.23 + int decodeLength; 7.24 + int decodeMsgSize; 7.25 String algorithm = System.getProperty("algorithm", "AES"); 7.26 String mode = System.getProperty("mode", "CBC"); 7.27 + String paddingStr = System.getProperty("paddingStr", "PKCS5Padding"); 7.28 byte[] input; 7.29 byte[] encode; 7.30 byte[] expectedEncode; 7.31 @@ -51,7 +62,6 @@ 7.32 Random random = new Random(0); 7.33 Cipher cipher; 7.34 Cipher dCipher; 7.35 - String paddingStr = "PKCS5Padding"; 7.36 AlgorithmParameters algParams; 7.37 SecretKey key; 7.38 7.39 @@ -67,7 +77,10 @@ 7.40 7.41 public void prepare() { 7.42 try { 7.43 - System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput); 7.44 + System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" + encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize ); 7.45 + 7.46 + if (encInputOffset % ALIGN != 0 || encOutputOffset % ALIGN != 0 || decOutputOffset % ALIGN !=0 ) 7.47 + testingMisalignment = true; 7.48 7.49 int keyLenBytes = (keySize == 0 ? 16 : keySize/8); 7.50 byte keyBytes[] = new byte[keyLenBytes]; 7.51 @@ -81,10 +94,6 @@ 7.52 System.out.println("Algorithm: " + key.getAlgorithm() + "(" 7.53 + key.getEncoded().length * 8 + "bit)"); 7.54 } 7.55 - input = new byte[msgSize]; 7.56 - for (int i=0; i<input.length; i++) { 7.57 - input[i] = (byte) (i & 0xff); 7.58 - } 7.59 7.60 cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); 7.61 dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); 7.62 @@ -103,10 +112,35 @@ 7.63 childShowCipher(); 7.64 } 7.65 7.66 + inputLength = msgSize + encInputOffset; 7.67 + if (testingMisalignment) { 7.68 + encodeLength = cipher.getOutputSize(msgSize - lastChunkSize) + encOutputOffset; 7.69 + encodeLength += cipher.getOutputSize(lastChunkSize); 7.70 + decodeLength = dCipher.getOutputSize(encodeLength - lastChunkSize) + decOutputOffset; 7.71 + decodeLength += dCipher.getOutputSize(lastChunkSize); 7.72 + } else { 7.73 + encodeLength = cipher.getOutputSize(msgSize) + encOutputOffset; 7.74 + decodeLength = dCipher.getOutputSize(encodeLength) + decOutputOffset; 7.75 + } 7.76 + 7.77 + input = new byte[inputLength]; 7.78 + for (int i=encInputOffset, j=0; i<inputLength; i++, j++) { 7.79 + input[i] = (byte) (j & 0xff); 7.80 + } 7.81 + 7.82 // do one encode and decode in preparation 7.83 - // this will also create the encode buffer and decode buffer 7.84 - encode = cipher.doFinal(input); 7.85 - decode = dCipher.doFinal(encode); 7.86 + encode = new byte[encodeLength]; 7.87 + decode = new byte[decodeLength]; 7.88 + if (testingMisalignment) { 7.89 + decodeMsgSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset); 7.90 + decodeMsgSize += cipher.doFinal(input, (encInputOffset + msgSize - lastChunkSize), lastChunkSize, encode, (encOutputOffset + decodeMsgSize)); 7.91 + 7.92 + int tempSize = dCipher.update(encode, encOutputOffset, (decodeMsgSize - lastChunkSize), decode, decOutputOffset); 7.93 + dCipher.doFinal(encode, (encOutputOffset + decodeMsgSize - lastChunkSize), lastChunkSize, decode, (decOutputOffset + tempSize)); 7.94 + } else { 7.95 + decodeMsgSize = cipher.doFinal(input, encInputOffset, msgSize, encode, encOutputOffset); 7.96 + dCipher.doFinal(encode, encOutputOffset, decodeMsgSize, decode, decOutputOffset); 7.97 + } 7.98 if (checkOutput) { 7.99 expectedEncode = (byte[]) encode.clone(); 7.100 expectedDecode = (byte[]) decode.clone();
8.1 --- a/test/compiler/7184394/TestAESDecode.java Thu May 01 15:02:46 2014 -0700 8.2 +++ b/test/compiler/7184394/TestAESDecode.java Wed Apr 30 14:14:01 2014 -0700 8.3 @@ -1,5 +1,5 @@ 8.4 /* 8.5 - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 8.6 + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. 8.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 8.8 * 8.9 * This code is free software; you can redistribute it and/or modify it 8.10 @@ -33,14 +33,15 @@ 8.11 public void run() { 8.12 try { 8.13 if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams); 8.14 + decode = new byte[decodeLength]; 8.15 + if (testingMisalignment) { 8.16 + int tempSize = dCipher.update(encode, encOutputOffset, (decodeMsgSize - lastChunkSize), decode, decOutputOffset); 8.17 + dCipher.doFinal(encode, (encOutputOffset + decodeMsgSize - lastChunkSize), lastChunkSize, decode, (decOutputOffset + tempSize)); 8.18 + } else { 8.19 + dCipher.doFinal(encode, encOutputOffset, decodeMsgSize, decode, decOutputOffset); 8.20 + } 8.21 if (checkOutput) { 8.22 - // checked version creates new output buffer each time 8.23 - decode = dCipher.doFinal(encode, 0, encode.length); 8.24 compareArrays(decode, expectedDecode); 8.25 - } else { 8.26 - // non-checked version outputs to existing encode buffer for maximum speed 8.27 - decode = new byte[dCipher.getOutputSize(encode.length)]; 8.28 - dCipher.doFinal(encode, 0, encode.length, decode); 8.29 } 8.30 } 8.31 catch (Exception e) {
9.1 --- a/test/compiler/7184394/TestAESEncode.java Thu May 01 15:02:46 2014 -0700 9.2 +++ b/test/compiler/7184394/TestAESEncode.java Wed Apr 30 14:14:01 2014 -0700 9.3 @@ -1,5 +1,5 @@ 9.4 /* 9.5 - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 9.6 + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. 9.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 9.8 * 9.9 * This code is free software; you can redistribute it and/or modify it 9.10 @@ -33,14 +33,15 @@ 9.11 public void run() { 9.12 try { 9.13 if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams); 9.14 + encode = new byte[encodeLength]; 9.15 + if (testingMisalignment) { 9.16 + int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset); 9.17 + cipher.doFinal(input, (encInputOffset + msgSize - lastChunkSize), lastChunkSize, encode, (encOutputOffset + tempSize)); 9.18 + } else { 9.19 + cipher.doFinal(input, encInputOffset, msgSize, encode, encOutputOffset); 9.20 + } 9.21 if (checkOutput) { 9.22 - // checked version creates new output buffer each time 9.23 - encode = cipher.doFinal(input, 0, msgSize); 9.24 compareArrays(encode, expectedEncode); 9.25 - } else { 9.26 - // non-checked version outputs to existing encode buffer for maximum speed 9.27 - encode = new byte[cipher.getOutputSize(msgSize)]; 9.28 - cipher.doFinal(input, 0, msgSize, encode); 9.29 } 9.30 } 9.31 catch (Exception e) {
10.1 --- a/test/compiler/7184394/TestAESMain.java Thu May 01 15:02:46 2014 -0700 10.2 +++ b/test/compiler/7184394/TestAESMain.java Wed Apr 30 14:14:01 2014 -0700 10.3 @@ -1,5 +1,5 @@ 10.4 /* 10.5 - * Copyright (c) 2012, 2014 Oracle and/or its affiliates. All rights reserved. 10.6 + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. 10.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 10.8 * 10.9 * This code is free software; you can redistribute it and/or modify it 10.10 @@ -28,7 +28,19 @@ 10.11 * @summary add intrinsics to use AES instructions 10.12 * 10.13 * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain 10.14 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 TestAESMain 10.15 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencOutputOffset=1 TestAESMain 10.16 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DdecOutputOffset=1 TestAESMain 10.17 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 -DencOutputOffset=1 TestAESMain 10.18 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain 10.19 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding TestAESMain 10.20 * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain 10.21 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 TestAESMain 10.22 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencOutputOffset=1 TestAESMain 10.23 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DdecOutputOffset=1 TestAESMain 10.24 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain 10.25 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain 10.26 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding TestAESMain 10.27 * 10.28 * @author Tom Deneau 10.29 */ 10.30 @@ -36,12 +48,13 @@ 10.31 public class TestAESMain { 10.32 public static void main(String[] args) { 10.33 int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 1000000); 10.34 + int warmupIters = (args.length > 1 ? Integer.valueOf(args[1]) : 20000); 10.35 System.out.println(iters + " iterations"); 10.36 TestAESEncode etest = new TestAESEncode(); 10.37 etest.prepare(); 10.38 - // warm-up for 20K iterations 10.39 + // warm-up 10.40 System.out.println("Starting encryption warm-up"); 10.41 - for (int i=0; i<20000; i++) { 10.42 + for (int i=0; i<warmupIters; i++) { 10.43 etest.run(); 10.44 } 10.45 System.out.println("Finished encryption warm-up"); 10.46 @@ -54,9 +67,9 @@ 10.47 10.48 TestAESDecode dtest = new TestAESDecode(); 10.49 dtest.prepare(); 10.50 - // warm-up for 20K iterations 10.51 + // warm-up 10.52 System.out.println("Starting decryption warm-up"); 10.53 - for (int i=0; i<20000; i++) { 10.54 + for (int i=0; i<warmupIters; i++) { 10.55 dtest.run(); 10.56 } 10.57 System.out.println("Finished decryption warm-up");