Thu, 02 Dec 2010 01:02:55 -0800
6998985: faulty generic arraycopy on windows x86_64: 4th arg overwritten with oop
Reviewed-by: kvn, never
src/cpu/x86/vm/stubGenerator_x86_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Dec 01 15:47:52 2010 -0800 1.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Dec 02 01:02:55 2010 -0800 1.3 @@ -2197,9 +2197,6 @@ 1.4 1.5 __ enter(); // required for proper stackwalking of RuntimeStub frame 1.6 1.7 - checkcast_copy_entry = __ pc(); 1.8 - BLOCK_COMMENT("Entry:"); 1.9 - 1.10 #ifdef ASSERT 1.11 // caller guarantees that the arrays really are different 1.12 // otherwise, we would have to make conjoint checks 1.13 @@ -2210,26 +2207,28 @@ 1.14 } 1.15 #endif //ASSERT 1.16 1.17 - // allocate spill slots for r13, r14 1.18 - enum { 1.19 - saved_r13_offset, 1.20 - saved_r14_offset, 1.21 - saved_rbp_offset, 1.22 - saved_rip_offset, 1.23 - saved_rarg0_offset 1.24 - }; 1.25 - __ subptr(rsp, saved_rbp_offset * wordSize); 1.26 - __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); 1.27 - __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); 1.28 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx 1.29 // ckoff => rcx, ckval => r8 1.30 // r9 and r10 may be used to save non-volatile registers 1.31 #ifdef _WIN64 1.32 // last argument (#4) is on stack on Win64 1.33 - const int ckval_offset = saved_rarg0_offset + 4; 1.34 - __ movptr(ckval, Address(rsp, ckval_offset * wordSize)); 1.35 + __ movptr(ckval, Address(rsp, 6 * wordSize)); 1.36 #endif 1.37 1.38 + // Caller of this entry point must set up the argument registers. 1.39 + checkcast_copy_entry = __ pc(); 1.40 + BLOCK_COMMENT("Entry:"); 1.41 + 1.42 + // allocate spill slots for r13, r14 1.43 + enum { 1.44 + saved_r13_offset, 1.45 + saved_r14_offset, 1.46 + saved_rbp_offset 1.47 + }; 1.48 + __ subptr(rsp, saved_rbp_offset * wordSize); 1.49 + __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); 1.50 + __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); 1.51 + 1.52 // check that int operands are properly extended to size_t 1.53 assert_clean_int(length, rax); 1.54 assert_clean_int(ckoff, rax); 1.55 @@ -2443,11 +2442,10 @@ 1.56 const Register src_pos = c_rarg1; // source position 1.57 const Register dst = c_rarg2; // destination array oop 1.58 const Register dst_pos = c_rarg3; // destination position 1.59 - // elements count is on stack on Win64 1.60 -#ifdef _WIN64 1.61 -#define C_RARG4 Address(rsp, 6 * wordSize) 1.62 +#ifndef _WIN64 1.63 + const Register length = c_rarg4; 1.64 #else 1.65 -#define C_RARG4 c_rarg4 1.66 + const Address length(rsp, 6 * wordSize); // elements count is on stack on Win64 1.67 #endif 1.68 1.69 { int modulus = CodeEntryAlignment; 1.70 @@ -2514,27 +2512,27 @@ 1.71 // registers used as temp 1.72 const Register r11_length = r11; // elements count to copy 1.73 const Register r10_src_klass = r10; // array klass 1.74 - const Register r9_dst_klass = r9; // dest array klass 1.75 1.76 // if (length < 0) return -1; 1.77 - __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value) 1.78 + __ movl(r11_length, length); // length (elements count, 32-bits value) 1.79 __ testl(r11_length, r11_length); 1.80 __ jccb(Assembler::negative, L_failed_0); 1.81 1.82 __ load_klass(r10_src_klass, src); 1.83 #ifdef ASSERT 1.84 // assert(src->klass() != NULL); 1.85 - BLOCK_COMMENT("assert klasses not null"); 1.86 - { Label L1, L2; 1.87 + { 1.88 + BLOCK_COMMENT("assert klasses not null {"); 1.89 + Label L1, L2; 1.90 __ testptr(r10_src_klass, r10_src_klass); 1.91 __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL 1.92 __ bind(L1); 1.93 __ stop("broken null klass"); 1.94 __ bind(L2); 1.95 - __ load_klass(r9_dst_klass, dst); 1.96 - __ cmpq(r9_dst_klass, 0); 1.97 + __ load_klass(rax, dst); 1.98 + __ cmpq(rax, 0); 1.99 __ jcc(Assembler::equal, L1); // this would be broken also 1.100 - BLOCK_COMMENT("assert done"); 1.101 + BLOCK_COMMENT("} assert klasses not null done"); 1.102 } 1.103 #endif 1.104 1.105 @@ -2546,34 +2544,36 @@ 1.106 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 1.107 // 1.108 1.109 - int lh_offset = klassOopDesc::header_size() * HeapWordSize + 1.110 - Klass::layout_helper_offset_in_bytes(); 1.111 + const int lh_offset = klassOopDesc::header_size() * HeapWordSize + 1.112 + Klass::layout_helper_offset_in_bytes(); 1.113 + 1.114 + // Handle objArrays completely differently... 1.115 + const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 1.116 + __ cmpl(Address(r10_src_klass, lh_offset), objArray_lh); 1.117 + __ jcc(Assembler::equal, L_objArray); 1.118 + 1.119 + // if (src->klass() != dst->klass()) return -1; 1.120 + __ load_klass(rax, dst); 1.121 + __ cmpq(r10_src_klass, rax); 1.122 + __ jcc(Assembler::notEqual, L_failed); 1.123 1.124 const Register rax_lh = rax; // layout helper 1.125 - 1.126 __ movl(rax_lh, Address(r10_src_klass, lh_offset)); 1.127 1.128 - // Handle objArrays completely differently... 1.129 - jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 1.130 - __ cmpl(rax_lh, objArray_lh); 1.131 - __ jcc(Assembler::equal, L_objArray); 1.132 - 1.133 - // if (src->klass() != dst->klass()) return -1; 1.134 - __ load_klass(r9_dst_klass, dst); 1.135 - __ cmpq(r10_src_klass, r9_dst_klass); 1.136 - __ jcc(Assembler::notEqual, L_failed); 1.137 - 1.138 // if (!src->is_Array()) return -1; 1.139 __ cmpl(rax_lh, Klass::_lh_neutral_value); 1.140 __ jcc(Assembler::greaterEqual, L_failed); 1.141 1.142 // At this point, it is known to be a typeArray (array_tag 0x3). 1.143 #ifdef ASSERT 1.144 - { Label L; 1.145 + { 1.146 + BLOCK_COMMENT("assert primitive array {"); 1.147 + Label L; 1.148 __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); 1.149 __ jcc(Assembler::greaterEqual, L); 1.150 __ stop("must be a primitive array"); 1.151 __ bind(L); 1.152 + BLOCK_COMMENT("} assert primitive array done"); 1.153 } 1.154 #endif 1.155 1.156 @@ -2631,11 +2631,14 @@ 1.157 1.158 __ BIND(L_copy_longs); 1.159 #ifdef ASSERT 1.160 - { Label L; 1.161 + { 1.162 + BLOCK_COMMENT("assert long copy {"); 1.163 + Label L; 1.164 __ cmpl(rax_elsize, LogBytesPerLong); 1.165 __ jcc(Assembler::equal, L); 1.166 __ stop("must be long copy, but elsize is wrong"); 1.167 __ bind(L); 1.168 + BLOCK_COMMENT("} assert long copy done"); 1.169 } 1.170 #endif 1.171 __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr 1.172 @@ -2645,12 +2648,12 @@ 1.173 1.174 // objArrayKlass 1.175 __ BIND(L_objArray); 1.176 - // live at this point: r10_src_klass, src[_pos], dst[_pos] 1.177 + // live at this point: r10_src_klass, r11_length, src[_pos], dst[_pos] 1.178 1.179 Label L_plain_copy, L_checkcast_copy; 1.180 // test array classes for subtyping 1.181 - __ load_klass(r9_dst_klass, dst); 1.182 - __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality 1.183 + __ load_klass(rax, dst); 1.184 + __ cmpq(r10_src_klass, rax); // usual case is exact equality 1.185 __ jcc(Assembler::notEqual, L_checkcast_copy); 1.186 1.187 // Identically typed arrays can be copied without element-wise checks. 1.188 @@ -2666,41 +2669,33 @@ 1.189 __ jump(RuntimeAddress(oop_copy_entry)); 1.190 1.191 __ BIND(L_checkcast_copy); 1.192 - // live at this point: r10_src_klass, !r11_length 1.193 + // live at this point: r10_src_klass, r11_length, rax (dst_klass) 1.194 { 1.195 - // assert(r11_length == C_RARG4); // will reload from here 1.196 - Register r11_dst_klass = r11; 1.197 - __ load_klass(r11_dst_klass, dst); 1.198 - 1.199 // Before looking at dst.length, make sure dst is also an objArray. 1.200 - __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh); 1.201 + __ cmpl(Address(rax, lh_offset), objArray_lh); 1.202 __ jcc(Assembler::notEqual, L_failed); 1.203 1.204 // It is safe to examine both src.length and dst.length. 1.205 -#ifndef _WIN64 1.206 - arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4, 1.207 - rax, L_failed); 1.208 -#else 1.209 - __ movl(r11_length, C_RARG4); // reload 1.210 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 1.211 rax, L_failed); 1.212 + 1.213 + const Register r11_dst_klass = r11; 1.214 __ load_klass(r11_dst_klass, dst); // reload 1.215 -#endif 1.216 1.217 // Marshal the base address arguments now, freeing registers. 1.218 __ lea(from, Address(src, src_pos, TIMES_OOP, 1.219 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 1.220 __ lea(to, Address(dst, dst_pos, TIMES_OOP, 1.221 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 1.222 - __ movl(count, C_RARG4); // length (reloaded) 1.223 + __ movl(count, length); // length (reloaded) 1.224 Register sco_temp = c_rarg3; // this register is free now 1.225 assert_different_registers(from, to, count, sco_temp, 1.226 r11_dst_klass, r10_src_klass); 1.227 assert_clean_int(count, sco_temp); 1.228 1.229 // Generate the type check. 1.230 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 1.231 - Klass::super_check_offset_offset_in_bytes()); 1.232 + const int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 1.233 + Klass::super_check_offset_offset_in_bytes()); 1.234 __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 1.235 assert_clean_int(sco_temp, rax); 1.236 generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); 1.237 @@ -2709,12 +2704,14 @@ 1.238 int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 1.239 objArrayKlass::element_klass_offset_in_bytes()); 1.240 __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); 1.241 - __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 1.242 + __ movl( sco_temp, Address(r11_dst_klass, sco_offset)); 1.243 assert_clean_int(sco_temp, rax); 1.244 1.245 // the checkcast_copy loop needs two extra arguments: 1.246 assert(c_rarg3 == sco_temp, "#3 already in place"); 1.247 - __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass 1.248 + // Set up arguments for checkcast_copy_entry. 1.249 + setup_arg_regs(4); 1.250 + __ movptr(r8, r11_dst_klass); // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris 1.251 __ jump(RuntimeAddress(checkcast_copy_entry)); 1.252 } 1.253 1.254 @@ -2727,8 +2724,6 @@ 1.255 return start; 1.256 } 1.257 1.258 -#undef length_arg 1.259 - 1.260 void generate_arraycopy_stubs() { 1.261 // Call the conjoint generation methods immediately after 1.262 // the disjoint ones so that short branches from the former