Wed, 23 Dec 2009 02:57:31 -0800
Merge
1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Tue Dec 22 16:35:08 2009 -0800 1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Dec 23 02:57:31 2009 -0800 1.3 @@ -7666,7 +7666,7 @@ 1.4 1.5 #ifdef ASSERT 1.6 Label L; 1.7 - testl(tmp, tmp); 1.8 + testptr(tmp, tmp); 1.9 jccb(Assembler::notZero, L); 1.10 hlt(); 1.11 bind(L);
2.1 --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp Tue Dec 22 16:35:08 2009 -0800 2.2 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp Wed Dec 23 02:57:31 2009 -0800 2.3 @@ -196,6 +196,9 @@ 2.4 } else { 2.5 assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic"); 2.6 movl(reg, Address(rsi, bcp_offset)); 2.7 + // Check if the secondary index definition is still ~x, otherwise 2.8 + // we have to change the following assembler code to calculate the 2.9 + // plain index. 2.10 assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line"); 2.11 notl(reg); // convert to plain index 2.12 }
3.1 --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 3.2 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 3.3 @@ -185,12 +185,30 @@ 3.4 } 3.5 3.6 3.7 +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, 3.8 + int bcp_offset, 3.9 + bool giant_index) { 3.10 + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); 3.11 + if (!giant_index) { 3.12 + load_unsigned_short(index, Address(r13, bcp_offset)); 3.13 + } else { 3.14 + assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic"); 3.15 + movl(index, Address(r13, bcp_offset)); 3.16 + // Check if the secondary index definition is still ~x, otherwise 3.17 + // we have to change the following assembler code to calculate the 3.18 + // plain index. 3.19 + assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line"); 3.20 + notl(index); // convert to plain index 3.21 + } 3.22 +} 3.23 + 3.24 + 3.25 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, 3.26 Register index, 3.27 - int bcp_offset) { 3.28 - assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); 3.29 + int bcp_offset, 3.30 + bool giant_index) { 3.31 assert(cache != index, "must use different registers"); 3.32 - load_unsigned_short(index, Address(r13, bcp_offset)); 3.33 + get_cache_index_at_bcp(index, bcp_offset, giant_index); 3.34 movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize)); 3.35 assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); 3.36 // convert from field index to ConstantPoolCacheEntry index 3.37 @@ -200,10 +218,10 @@ 3.38 3.39 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, 3.40 Register tmp, 3.41 - int bcp_offset) { 3.42 - assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); 3.43 + int bcp_offset, 3.44 + bool giant_index) { 3.45 assert(cache != tmp, "must use different register"); 3.46 - load_unsigned_short(tmp, Address(r13, bcp_offset)); 3.47 + get_cache_index_at_bcp(tmp, bcp_offset, giant_index); 3.48 assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); 3.49 // convert from field index to ConstantPoolCacheEntry index 3.50 // and from word offset to byte offset 3.51 @@ -1236,7 +1254,8 @@ 3.52 3.53 void InterpreterMacroAssembler::profile_virtual_call(Register receiver, 3.54 Register mdp, 3.55 - Register reg2) { 3.56 + Register reg2, 3.57 + bool receiver_can_be_null) { 3.58 if (ProfileInterpreter) { 3.59 Label profile_continue; 3.60 3.61 @@ -1246,8 +1265,15 @@ 3.62 // We are making a call. Increment the count. 3.63 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); 3.64 3.65 + Label skip_receiver_profile; 3.66 + if (receiver_can_be_null) { 3.67 + testptr(receiver, receiver); 3.68 + jcc(Assembler::zero, skip_receiver_profile); 3.69 + } 3.70 + 3.71 // Record the receiver type. 3.72 record_klass_in_profile(receiver, mdp, reg2); 3.73 + bind(skip_receiver_profile); 3.74 3.75 // The method data pointer needs to be updated to reflect the new target. 3.76 update_mdp_by_constant(mdp,
4.1 --- a/src/cpu/x86/vm/interp_masm_x86_64.hpp Tue Dec 22 16:35:08 2009 -0800 4.2 +++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp Wed Dec 23 02:57:31 2009 -0800 4.3 @@ -95,9 +95,10 @@ 4.4 4.5 void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); 4.6 void get_cache_and_index_at_bcp(Register cache, Register index, 4.7 - int bcp_offset); 4.8 + int bcp_offset, bool giant_index = false); 4.9 void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, 4.10 - int bcp_offset); 4.11 + int bcp_offset, bool giant_index = false); 4.12 + void get_cache_index_at_bcp(Register index, int bcp_offset, bool giant_index = false); 4.13 4.14 4.15 void pop_ptr(Register r = rax); 4.16 @@ -236,7 +237,8 @@ 4.17 void profile_call(Register mdp); 4.18 void profile_final_call(Register mdp); 4.19 void profile_virtual_call(Register receiver, Register mdp, 4.20 - Register scratch2); 4.21 + Register scratch2, 4.22 + bool receiver_can_be_null = false); 4.23 void profile_ret(Register return_bci, Register mdp); 4.24 void profile_null_seen(Register mdp); 4.25 void profile_typecheck(Register mdp, Register klass, Register scratch);
5.1 --- a/src/cpu/x86/vm/interpreter_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 5.2 +++ b/src/cpu/x86/vm/interpreter_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 5.3 @@ -277,12 +277,11 @@ 5.4 address entry_point = __ pc(); 5.5 5.6 // abstract method entry 5.7 - // remove return address. Not really needed, since exception 5.8 - // handling throws away expression stack 5.9 - __ pop(rbx); 5.10 5.11 - // adjust stack to what a normal return would do 5.12 - __ mov(rsp, r13); 5.13 + // pop return address, reset last_sp to NULL 5.14 + __ empty_expression_stack(); 5.15 + __ restore_bcp(); // rsi must be correct for exception handler (was destroyed) 5.16 + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) 5.17 5.18 // throw exception 5.19 __ call_VM(noreg, CAST_FROM_FN_PTR(address, 5.20 @@ -300,7 +299,10 @@ 5.21 if (!EnableMethodHandles) { 5.22 return generate_abstract_entry(); 5.23 } 5.24 - return generate_abstract_entry(); //6815692// 5.25 + 5.26 + address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm); 5.27 + 5.28 + return entry_point; 5.29 } 5.30 5.31
6.1 --- a/src/cpu/x86/vm/methodHandles_x86.cpp Tue Dec 22 16:35:08 2009 -0800 6.2 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp Wed Dec 23 02:57:31 2009 -0800 6.3 @@ -448,7 +448,7 @@ 6.4 rbx_index, Address::times_ptr, 6.5 base + vtableEntry::method_offset_in_bytes()); 6.6 Register rbx_method = rbx_temp; 6.7 - __ movl(rbx_method, vtable_entry_addr); 6.8 + __ movptr(rbx_method, vtable_entry_addr); 6.9 6.10 __ verify_oop(rbx_method); 6.11 __ jmp(rbx_method_fie);
7.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 7.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 7.3 @@ -2935,6 +2935,16 @@ 7.4 7.5 // arraycopy stubs used by compilers 7.6 generate_arraycopy_stubs(); 7.7 + 7.8 + // generic method handle stubs 7.9 + if (EnableMethodHandles && SystemDictionary::MethodHandle_klass() != NULL) { 7.10 + for (MethodHandles::EntryKind ek = MethodHandles::_EK_FIRST; 7.11 + ek < MethodHandles::_EK_LIMIT; 7.12 + ek = MethodHandles::EntryKind(1 + (int)ek)) { 7.13 + StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek)); 7.14 + MethodHandles::generate_method_handle_stub(_masm, ek); 7.15 + } 7.16 + } 7.17 } 7.18 7.19 public:
8.1 --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 8.2 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 8.3 @@ -100,21 +100,26 @@ 8.4 return entry; 8.5 } 8.6 8.7 -// Arguments are: required type in rarg1, failing object (or NULL) in rarg2 8.8 +// Arguments are: required type at TOS+8, failing object (or NULL) at TOS+4. 8.9 address TemplateInterpreterGenerator::generate_WrongMethodType_handler() { 8.10 address entry = __ pc(); 8.11 8.12 __ pop(c_rarg2); // failing object is at TOS 8.13 __ pop(c_rarg1); // required type is at TOS+8 8.14 8.15 - // expression stack must be empty before entering the VM if an 8.16 - // exception happened 8.17 + __ verify_oop(c_rarg1); 8.18 + __ verify_oop(c_rarg2); 8.19 + 8.20 + // Various method handle types use interpreter registers as temps. 8.21 + __ restore_bcp(); 8.22 + __ restore_locals(); 8.23 + 8.24 + // Expression stack must be empty before entering the VM for an exception. 8.25 __ empty_expression_stack(); 8.26 8.27 __ call_VM(noreg, 8.28 CAST_FROM_FN_PTR(address, 8.29 - InterpreterRuntime:: 8.30 - throw_WrongMethodTypeException), 8.31 + InterpreterRuntime::throw_WrongMethodTypeException), 8.32 // pass required type, failing object (or NULL) 8.33 c_rarg1, c_rarg2); 8.34 return entry; 8.35 @@ -182,15 +187,29 @@ 8.36 __ restore_bcp(); 8.37 __ restore_locals(); 8.38 8.39 - __ get_cache_and_index_at_bcp(rbx, rcx, 1); 8.40 + Label L_got_cache, L_giant_index; 8.41 + if (EnableInvokeDynamic) { 8.42 + __ cmpb(Address(r13, 0), Bytecodes::_invokedynamic); 8.43 + __ jcc(Assembler::equal, L_giant_index); 8.44 + } 8.45 + __ get_cache_and_index_at_bcp(rbx, rcx, 1, false); 8.46 + __ bind(L_got_cache); 8.47 __ movl(rbx, Address(rbx, rcx, 8.48 - Address::times_8, 8.49 + Address::times_ptr, 8.50 in_bytes(constantPoolCacheOopDesc::base_offset()) + 8.51 3 * wordSize)); 8.52 __ andl(rbx, 0xFF); 8.53 if (TaggedStackInterpreter) __ shll(rbx, 1); // 2 slots per parameter. 8.54 __ lea(rsp, Address(rsp, rbx, Address::times_8)); 8.55 __ dispatch_next(state, step); 8.56 + 8.57 + // out of the main line of code... 8.58 + if (EnableInvokeDynamic) { 8.59 + __ bind(L_giant_index); 8.60 + __ get_cache_and_index_at_bcp(rbx, rcx, 1, true); 8.61 + __ jmp(L_got_cache); 8.62 + } 8.63 + 8.64 return entry; 8.65 } 8.66
9.1 --- a/src/cpu/x86/vm/templateTable_x86_32.cpp Tue Dec 22 16:35:08 2009 -0800 9.2 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp Wed Dec 23 02:57:31 2009 -0800 9.3 @@ -3146,7 +3146,6 @@ 9.4 __ profile_call(rsi); 9.5 } 9.6 9.7 - Label handle_unlinked_site; 9.8 __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx))); 9.9 __ null_check(rcx); 9.10 __ prepare_to_jump_from_interpreted();
10.1 --- a/src/cpu/x86/vm/templateTable_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 10.2 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 10.3 @@ -203,18 +203,15 @@ 10.4 __ jcc(Assembler::notEqual, fast_patch); 10.5 __ get_method(scratch); 10.6 // Let breakpoint table handling rewrite to quicker bytecode 10.7 - __ call_VM(noreg, 10.8 - CAST_FROM_FN_PTR(address, 10.9 - InterpreterRuntime::set_original_bytecode_at), 10.10 - scratch, r13, bc); 10.11 + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, r13, bc); 10.12 #ifndef ASSERT 10.13 __ jmpb(patch_done); 10.14 +#else 10.15 + __ jmp(patch_done); 10.16 +#endif 10.17 __ bind(fast_patch); 10.18 } 10.19 -#else 10.20 - __ jmp(patch_done); 10.21 - __ bind(fast_patch); 10.22 - } 10.23 +#ifdef ASSERT 10.24 Label okay; 10.25 __ load_unsigned_byte(scratch, at_bcp(0)); 10.26 __ cmpl(scratch, (int) Bytecodes::java_code(bytecode)); 10.27 @@ -2054,26 +2051,28 @@ 10.28 } 10.29 } 10.30 10.31 -void TemplateTable::resolve_cache_and_index(int byte_no, 10.32 - Register Rcache, 10.33 - Register index) { 10.34 +void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) { 10.35 assert(byte_no == 1 || byte_no == 2, "byte_no out of range"); 10.36 + bool is_invokedynamic = (bytecode() == Bytecodes::_invokedynamic); 10.37 10.38 const Register temp = rbx; 10.39 assert_different_registers(Rcache, index, temp); 10.40 10.41 const int shift_count = (1 + byte_no) * BitsPerByte; 10.42 Label resolved; 10.43 - __ get_cache_and_index_at_bcp(Rcache, index, 1); 10.44 - __ movl(temp, Address(Rcache, 10.45 - index, Address::times_8, 10.46 - constantPoolCacheOopDesc::base_offset() + 10.47 - ConstantPoolCacheEntry::indices_offset())); 10.48 - __ shrl(temp, shift_count); 10.49 - // have we resolved this bytecode? 10.50 - __ andl(temp, 0xFF); 10.51 - __ cmpl(temp, (int) bytecode()); 10.52 - __ jcc(Assembler::equal, resolved); 10.53 + __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic); 10.54 + if (is_invokedynamic) { 10.55 + // we are resolved if the f1 field contains a non-null CallSite object 10.56 + __ cmpptr(Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()), (int32_t) NULL_WORD); 10.57 + __ jcc(Assembler::notEqual, resolved); 10.58 + } else { 10.59 + __ movl(temp, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset())); 10.60 + __ shrl(temp, shift_count); 10.61 + // have we resolved this bytecode? 10.62 + __ andl(temp, 0xFF); 10.63 + __ cmpl(temp, (int) bytecode()); 10.64 + __ jcc(Assembler::equal, resolved); 10.65 + } 10.66 10.67 // resolve first time through 10.68 address entry; 10.69 @@ -2090,6 +2089,9 @@ 10.70 case Bytecodes::_invokeinterface: 10.71 entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); 10.72 break; 10.73 + case Bytecodes::_invokedynamic: 10.74 + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); 10.75 + break; 10.76 default: 10.77 ShouldNotReachHere(); 10.78 break; 10.79 @@ -2098,7 +2100,7 @@ 10.80 __ call_VM(noreg, entry, temp); 10.81 10.82 // Update registers with resolved info 10.83 - __ get_cache_and_index_at_bcp(Rcache, index, 1); 10.84 + __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic); 10.85 __ bind(resolved); 10.86 } 10.87 10.88 @@ -2832,15 +2834,14 @@ 10.89 ShouldNotReachHere(); 10.90 } 10.91 10.92 -void TemplateTable::prepare_invoke(Register method, 10.93 - Register index, 10.94 - int byte_no, 10.95 - Bytecodes::Code code) { 10.96 +void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) { 10.97 // determine flags 10.98 + Bytecodes::Code code = bytecode(); 10.99 const bool is_invokeinterface = code == Bytecodes::_invokeinterface; 10.100 + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; 10.101 const bool is_invokevirtual = code == Bytecodes::_invokevirtual; 10.102 const bool is_invokespecial = code == Bytecodes::_invokespecial; 10.103 - const bool load_receiver = code != Bytecodes::_invokestatic; 10.104 + const bool load_receiver = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic); 10.105 const bool receiver_null_check = is_invokespecial; 10.106 const bool save_flags = is_invokeinterface || is_invokevirtual; 10.107 // setup registers & access constant pool cache 10.108 @@ -2858,9 +2859,13 @@ 10.109 __ movl(recv, flags); 10.110 __ andl(recv, 0xFF); 10.111 if (TaggedStackInterpreter) __ shll(recv, 1); // index*2 10.112 - __ movptr(recv, Address(rsp, recv, Address::times_8, 10.113 - -Interpreter::expr_offset_in_bytes(1))); 10.114 - __ verify_oop(recv); 10.115 + Address recv_addr(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1)); 10.116 + if (is_invokedynamic) { 10.117 + __ lea(recv, recv_addr); 10.118 + } else { 10.119 + __ movptr(recv, recv_addr); 10.120 + __ verify_oop(recv); 10.121 + } 10.122 } 10.123 10.124 // do null check if needed 10.125 @@ -2878,10 +2883,14 @@ 10.126 ConstantPoolCacheEntry::verify_tosBits(); 10.127 // load return address 10.128 { 10.129 - ExternalAddress return_5((address)Interpreter::return_5_addrs_by_index_table()); 10.130 - ExternalAddress return_3((address)Interpreter::return_3_addrs_by_index_table()); 10.131 - __ lea(rscratch1, (is_invokeinterface ? return_5 : return_3)); 10.132 - __ movptr(flags, Address(rscratch1, flags, Address::times_8)); 10.133 + address table_addr; 10.134 + if (is_invokeinterface || is_invokedynamic) 10.135 + table_addr = (address)Interpreter::return_5_addrs_by_index_table(); 10.136 + else 10.137 + table_addr = (address)Interpreter::return_3_addrs_by_index_table(); 10.138 + ExternalAddress table(table_addr); 10.139 + __ lea(rscratch1, table); 10.140 + __ movptr(flags, Address(rscratch1, flags, Address::times_ptr)); 10.141 } 10.142 10.143 // push return address 10.144 @@ -2947,7 +2956,7 @@ 10.145 10.146 void TemplateTable::invokevirtual(int byte_no) { 10.147 transition(vtos, vtos); 10.148 - prepare_invoke(rbx, noreg, byte_no, bytecode()); 10.149 + prepare_invoke(rbx, noreg, byte_no); 10.150 10.151 // rbx: index 10.152 // rcx: receiver 10.153 @@ -2959,7 +2968,7 @@ 10.154 10.155 void TemplateTable::invokespecial(int byte_no) { 10.156 transition(vtos, vtos); 10.157 - prepare_invoke(rbx, noreg, byte_no, bytecode()); 10.158 + prepare_invoke(rbx, noreg, byte_no); 10.159 // do the call 10.160 __ verify_oop(rbx); 10.161 __ profile_call(rax); 10.162 @@ -2969,7 +2978,7 @@ 10.163 10.164 void TemplateTable::invokestatic(int byte_no) { 10.165 transition(vtos, vtos); 10.166 - prepare_invoke(rbx, noreg, byte_no, bytecode()); 10.167 + prepare_invoke(rbx, noreg, byte_no); 10.168 // do the call 10.169 __ verify_oop(rbx); 10.170 __ profile_call(rax); 10.171 @@ -2983,7 +2992,7 @@ 10.172 10.173 void TemplateTable::invokeinterface(int byte_no) { 10.174 transition(vtos, vtos); 10.175 - prepare_invoke(rax, rbx, byte_no, bytecode()); 10.176 + prepare_invoke(rax, rbx, byte_no); 10.177 10.178 // rax: Interface 10.179 // rbx: index 10.180 @@ -3072,7 +3081,24 @@ 10.181 return; 10.182 } 10.183 10.184 - __ stop("invokedynamic NYI");//6815692// 10.185 + prepare_invoke(rax, rbx, byte_no); 10.186 + 10.187 + // rax: CallSite object (f1) 10.188 + // rbx: unused (f2) 10.189 + // rcx: receiver address 10.190 + // rdx: flags (unused) 10.191 + 10.192 + if (ProfileInterpreter) { 10.193 + Label L; 10.194 + // %%% should make a type profile for any invokedynamic that takes a ref argument 10.195 + // profile this call 10.196 + __ profile_call(r13); 10.197 + } 10.198 + 10.199 + __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx))); 10.200 + __ null_check(rcx); 10.201 + __ prepare_to_jump_from_interpreted(); 10.202 + __ jump_to_method_handle_entry(rcx, rdx); 10.203 } 10.204 10.205
11.1 --- a/src/cpu/x86/vm/templateTable_x86_64.hpp Tue Dec 22 16:35:08 2009 -0800 11.2 +++ b/src/cpu/x86/vm/templateTable_x86_64.hpp Wed Dec 23 02:57:31 2009 -0800 11.3 @@ -22,8 +22,7 @@ 11.4 * 11.5 */ 11.6 11.7 - static void prepare_invoke(Register method, Register index, int byte_no, 11.8 - Bytecodes::Code code); 11.9 + static void prepare_invoke(Register method, Register index, int byte_no); 11.10 static void invokevirtual_helper(Register index, Register recv, 11.11 Register flags); 11.12 static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
12.1 --- a/src/share/vm/classfile/classFileParser.cpp Tue Dec 22 16:35:08 2009 -0800 12.2 +++ b/src/share/vm/classfile/classFileParser.cpp Wed Dec 23 02:57:31 2009 -0800 12.3 @@ -2511,23 +2511,12 @@ 12.4 fac_ptr->nonstatic_byte_count -= 1; 12.5 (*fields_ptr)->ushort_at_put(i + instanceKlass::signature_index_offset, 12.6 word_sig_index); 12.7 - if (wordSize == jintSize) { 12.8 - fac_ptr->nonstatic_word_count += 1; 12.9 - } else { 12.10 - fac_ptr->nonstatic_double_count += 1; 12.11 - } 12.12 - 12.13 - FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i+4); 12.14 + fac_ptr->nonstatic_word_count += 1; 12.15 + 12.16 + FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i + instanceKlass::low_offset); 12.17 assert(atype == NONSTATIC_BYTE, ""); 12.18 FieldAllocationType new_atype = NONSTATIC_WORD; 12.19 - if (wordSize > jintSize) { 12.20 - if (Universe::field_type_should_be_aligned(T_LONG)) { 12.21 - atype = NONSTATIC_ALIGNED_DOUBLE; 12.22 - } else { 12.23 - atype = NONSTATIC_DOUBLE; 12.24 - } 12.25 - } 12.26 - (*fields_ptr)->ushort_at_put(i+4, new_atype); 12.27 + (*fields_ptr)->ushort_at_put(i + instanceKlass::low_offset, new_atype); 12.28 12.29 found_vmentry = true; 12.30 break; 12.31 @@ -3085,7 +3074,7 @@ 12.32 int len = fields->length(); 12.33 for (int i = 0; i < len; i += instanceKlass::next_offset) { 12.34 int real_offset; 12.35 - FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i+4); 12.36 + FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i + instanceKlass::low_offset); 12.37 switch (atype) { 12.38 case STATIC_OOP: 12.39 real_offset = next_static_oop_offset; 12.40 @@ -3173,8 +3162,8 @@ 12.41 default: 12.42 ShouldNotReachHere(); 12.43 } 12.44 - fields->short_at_put(i+4, extract_low_short_from_int(real_offset) ); 12.45 - fields->short_at_put(i+5, extract_high_short_from_int(real_offset) ); 12.46 + fields->short_at_put(i + instanceKlass::low_offset, extract_low_short_from_int(real_offset)); 12.47 + fields->short_at_put(i + instanceKlass::high_offset, extract_high_short_from_int(real_offset)); 12.48 } 12.49 12.50 // Size of instances
13.1 --- a/src/share/vm/code/nmethod.cpp Tue Dec 22 16:35:08 2009 -0800 13.2 +++ b/src/share/vm/code/nmethod.cpp Wed Dec 23 02:57:31 2009 -0800 13.3 @@ -414,9 +414,8 @@ 13.4 } 13.5 13.6 const char* nmethod::compile_kind() const { 13.7 - if (method() == NULL) return "unloaded"; 13.8 - if (is_native_method()) return "c2n"; 13.9 if (is_osr_method()) return "osr"; 13.10 + if (method() != NULL && is_native_method()) return "c2n"; 13.11 return NULL; 13.12 } 13.13 13.14 @@ -1127,6 +1126,9 @@ 13.15 } 13.16 flags.state = unloaded; 13.17 13.18 + // Log the unloading. 13.19 + log_state_change(); 13.20 + 13.21 // The methodOop is gone at this point 13.22 assert(_method == NULL, "Tautology"); 13.23 13.24 @@ -1137,8 +1139,6 @@ 13.25 13.26 void nmethod::invalidate_osr_method() { 13.27 assert(_entry_bci != InvocationEntryBci, "wrong kind of nmethod"); 13.28 - if (_entry_bci != InvalidOSREntryBci) 13.29 - inc_decompile_count(); 13.30 // Remove from list of active nmethods 13.31 if (method() != NULL) 13.32 instanceKlass::cast(method()->method_holder())->remove_osr_nmethod(this); 13.33 @@ -1146,59 +1146,63 @@ 13.34 _entry_bci = InvalidOSREntryBci; 13.35 } 13.36 13.37 -void nmethod::log_state_change(int state) const { 13.38 +void nmethod::log_state_change() const { 13.39 if (LogCompilation) { 13.40 if (xtty != NULL) { 13.41 ttyLocker ttyl; // keep the following output all in one block 13.42 - xtty->begin_elem("make_not_entrant %sthread='" UINTX_FORMAT "'", 13.43 - (state == zombie ? "zombie='1' " : ""), 13.44 - os::current_thread_id()); 13.45 + if (flags.state == unloaded) { 13.46 + xtty->begin_elem("make_unloaded thread='" UINTX_FORMAT "'", 13.47 + os::current_thread_id()); 13.48 + } else { 13.49 + xtty->begin_elem("make_not_entrant thread='" UINTX_FORMAT "'%s", 13.50 + os::current_thread_id(), 13.51 + (flags.state == zombie ? " zombie='1'" : "")); 13.52 + } 13.53 log_identity(xtty); 13.54 xtty->stamp(); 13.55 xtty->end_elem(); 13.56 } 13.57 } 13.58 - if (PrintCompilation) { 13.59 - print_on(tty, state == zombie ? "made zombie " : "made not entrant "); 13.60 + if (PrintCompilation && flags.state != unloaded) { 13.61 + print_on(tty, flags.state == zombie ? "made zombie " : "made not entrant "); 13.62 tty->cr(); 13.63 } 13.64 } 13.65 13.66 // Common functionality for both make_not_entrant and make_zombie 13.67 -void nmethod::make_not_entrant_or_zombie(int state) { 13.68 +bool nmethod::make_not_entrant_or_zombie(int state) { 13.69 assert(state == zombie || state == not_entrant, "must be zombie or not_entrant"); 13.70 13.71 - // Code for an on-stack-replacement nmethod is removed when a class gets unloaded. 13.72 - // They never become zombie/non-entrant, so the nmethod sweeper will never remove 13.73 - // them. Instead the entry_bci is set to InvalidOSREntryBci, so the osr nmethod 13.74 - // will never be used anymore. That the nmethods only gets removed when class unloading 13.75 - // happens, make life much simpler, since the nmethods are not just going to disappear 13.76 - // out of the blue. 13.77 - if (is_osr_method()) { 13.78 - if (osr_entry_bci() != InvalidOSREntryBci) { 13.79 - // only log this once 13.80 - log_state_change(state); 13.81 - } 13.82 - invalidate_osr_method(); 13.83 - return; 13.84 + // If the method is already zombie there is nothing to do 13.85 + if (is_zombie()) { 13.86 + return false; 13.87 } 13.88 13.89 - // If the method is already zombie or set to the state we want, nothing to do 13.90 - if (is_zombie() || (state == not_entrant && is_not_entrant())) { 13.91 - return; 13.92 - } 13.93 - 13.94 - log_state_change(state); 13.95 - 13.96 // Make sure the nmethod is not flushed in case of a safepoint in code below. 13.97 nmethodLocker nml(this); 13.98 13.99 { 13.100 + // invalidate osr nmethod before acquiring the patching lock since 13.101 + // they both acquire leaf locks and we don't want a deadlock. 13.102 + // This logic is equivalent to the logic below for patching the 13.103 + // verified entry point of regular methods. 13.104 + if (is_osr_method()) { 13.105 + // this effectively makes the osr nmethod not entrant 13.106 + invalidate_osr_method(); 13.107 + } 13.108 + 13.109 // Enter critical section. Does not block for safepoint. 13.110 MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag); 13.111 + 13.112 + if (flags.state == state) { 13.113 + // another thread already performed this transition so nothing 13.114 + // to do, but return false to indicate this. 13.115 + return false; 13.116 + } 13.117 + 13.118 // The caller can be calling the method statically or through an inline 13.119 // cache call. 13.120 - if (!is_not_entrant()) { 13.121 + if (!is_osr_method() && !is_not_entrant()) { 13.122 NativeJump::patch_verified_entry(entry_point(), verified_entry_point(), 13.123 SharedRuntime::get_handle_wrong_method_stub()); 13.124 assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, ""); 13.125 @@ -1217,6 +1221,10 @@ 13.126 13.127 // Change state 13.128 flags.state = state; 13.129 + 13.130 + // Log the transition once 13.131 + log_state_change(); 13.132 + 13.133 } // leave critical region under Patching_lock 13.134 13.135 if (state == not_entrant) { 13.136 @@ -1240,7 +1248,6 @@ 13.137 // It's a true state change, so mark the method as decompiled. 13.138 inc_decompile_count(); 13.139 13.140 - 13.141 // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event 13.142 // and it hasn't already been reported for this nmethod then report it now. 13.143 // (the event may have been reported earilier if the GC marked it for unloading). 13.144 @@ -1268,7 +1275,7 @@ 13.145 13.146 // Check whether method got unloaded at a safepoint before this, 13.147 // if so we can skip the flushing steps below 13.148 - if (method() == NULL) return; 13.149 + if (method() == NULL) return true; 13.150 13.151 // Remove nmethod from method. 13.152 // We need to check if both the _code and _from_compiled_code_entry_point 13.153 @@ -1282,6 +1289,8 @@ 13.154 HandleMark hm; 13.155 method()->clear_code(); 13.156 } 13.157 + 13.158 + return true; 13.159 } 13.160 13.161
14.1 --- a/src/share/vm/code/nmethod.hpp Tue Dec 22 16:35:08 2009 -0800 14.2 +++ b/src/share/vm/code/nmethod.hpp Wed Dec 23 02:57:31 2009 -0800 14.3 @@ -252,7 +252,9 @@ 14.4 void* operator new(size_t size, int nmethod_size); 14.5 14.6 const char* reloc_string_for(u_char* begin, u_char* end); 14.7 - void make_not_entrant_or_zombie(int state); 14.8 + // Returns true if this thread changed the state of the nmethod or 14.9 + // false if another thread performed the transition. 14.10 + bool make_not_entrant_or_zombie(int state); 14.11 void inc_decompile_count(); 14.12 14.13 // used to check that writes to nmFlags are done consistently. 14.14 @@ -375,10 +377,12 @@ 14.15 bool is_zombie() const { return flags.state == zombie; } 14.16 bool is_unloaded() const { return flags.state == unloaded; } 14.17 14.18 - // Make the nmethod non entrant. The nmethod will continue to be alive. 14.19 - // It is used when an uncommon trap happens. 14.20 - void make_not_entrant() { make_not_entrant_or_zombie(not_entrant); } 14.21 - void make_zombie() { make_not_entrant_or_zombie(zombie); } 14.22 + // Make the nmethod non entrant. The nmethod will continue to be 14.23 + // alive. It is used when an uncommon trap happens. Returns true 14.24 + // if this thread changed the state of the nmethod or false if 14.25 + // another thread performed the transition. 14.26 + bool make_not_entrant() { return make_not_entrant_or_zombie(not_entrant); } 14.27 + bool make_zombie() { return make_not_entrant_or_zombie(zombie); } 14.28 14.29 // used by jvmti to track if the unload event has been reported 14.30 bool unload_reported() { return _unload_reported; } 14.31 @@ -563,7 +567,7 @@ 14.32 // Logging 14.33 void log_identity(xmlStream* log) const; 14.34 void log_new_nmethod() const; 14.35 - void log_state_change(int state) const; 14.36 + void log_state_change() const; 14.37 14.38 // Prints a comment for one native instruction (reloc info, pc desc) 14.39 void print_code_comment_on(outputStream* st, int column, address begin, address end);
15.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Tue Dec 22 16:35:08 2009 -0800 15.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Dec 23 02:57:31 2009 -0800 15.3 @@ -42,28 +42,49 @@ 15.4 _n_periods(0), 15.5 _threads(NULL), _n_threads(0) 15.6 { 15.7 - if (G1ConcRefine) { 15.8 - _n_threads = (int)thread_num(); 15.9 - if (_n_threads > 0) { 15.10 - _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); 15.11 - int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); 15.12 - ConcurrentG1RefineThread *next = NULL; 15.13 - for (int i = _n_threads - 1; i >= 0; i--) { 15.14 - ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); 15.15 - assert(t != NULL, "Conc refine should have been created"); 15.16 - assert(t->cg1r() == this, "Conc refine thread should refer to this"); 15.17 - _threads[i] = t; 15.18 - next = t; 15.19 - } 15.20 - } 15.21 + 15.22 + // Ergomonically select initial concurrent refinement parameters 15.23 + if (FLAG_IS_DEFAULT(G1ConcRefineGreenZone)) { 15.24 + FLAG_SET_DEFAULT(G1ConcRefineGreenZone, MAX2<int>(ParallelGCThreads, 1)); 15.25 + } 15.26 + set_green_zone(G1ConcRefineGreenZone); 15.27 + 15.28 + if (FLAG_IS_DEFAULT(G1ConcRefineYellowZone)) { 15.29 + FLAG_SET_DEFAULT(G1ConcRefineYellowZone, green_zone() * 3); 15.30 + } 15.31 + set_yellow_zone(MAX2<int>(G1ConcRefineYellowZone, green_zone())); 15.32 + 15.33 + if (FLAG_IS_DEFAULT(G1ConcRefineRedZone)) { 15.34 + FLAG_SET_DEFAULT(G1ConcRefineRedZone, yellow_zone() * 2); 15.35 + } 15.36 + set_red_zone(MAX2<int>(G1ConcRefineRedZone, yellow_zone())); 15.37 + _n_worker_threads = thread_num(); 15.38 + // We need one extra thread to do the young gen rset size sampling. 15.39 + _n_threads = _n_worker_threads + 1; 15.40 + reset_threshold_step(); 15.41 + 15.42 + _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); 15.43 + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); 15.44 + ConcurrentG1RefineThread *next = NULL; 15.45 + for (int i = _n_threads - 1; i >= 0; i--) { 15.46 + ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); 15.47 + assert(t != NULL, "Conc refine should have been created"); 15.48 + assert(t->cg1r() == this, "Conc refine thread should refer to this"); 15.49 + _threads[i] = t; 15.50 + next = t; 15.51 } 15.52 } 15.53 15.54 -size_t ConcurrentG1Refine::thread_num() { 15.55 - if (G1ConcRefine) { 15.56 - return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; 15.57 +void ConcurrentG1Refine::reset_threshold_step() { 15.58 + if (FLAG_IS_DEFAULT(G1ConcRefineThresholdStep)) { 15.59 + _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1); 15.60 + } else { 15.61 + _thread_threshold_step = G1ConcRefineThresholdStep; 15.62 } 15.63 - return 0; 15.64 +} 15.65 + 15.66 +int ConcurrentG1Refine::thread_num() { 15.67 + return MAX2<int>((G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads, 1); 15.68 } 15.69 15.70 void ConcurrentG1Refine::init() { 15.71 @@ -123,6 +144,15 @@ 15.72 } 15.73 } 15.74 15.75 +void ConcurrentG1Refine::reinitialize_threads() { 15.76 + reset_threshold_step(); 15.77 + if (_threads != NULL) { 15.78 + for (int i = 0; i < _n_threads; i++) { 15.79 + _threads[i]->initialize(); 15.80 + } 15.81 + } 15.82 +} 15.83 + 15.84 ConcurrentG1Refine::~ConcurrentG1Refine() { 15.85 if (G1ConcRSLogCacheSize > 0) { 15.86 assert(_card_counts != NULL, "Logic"); 15.87 @@ -384,4 +414,3 @@ 15.88 st->cr(); 15.89 } 15.90 } 15.91 -
16.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Tue Dec 22 16:35:08 2009 -0800 16.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Wed Dec 23 02:57:31 2009 -0800 16.3 @@ -29,6 +29,31 @@ 16.4 class ConcurrentG1Refine: public CHeapObj { 16.5 ConcurrentG1RefineThread** _threads; 16.6 int _n_threads; 16.7 + int _n_worker_threads; 16.8 + /* 16.9 + * The value of the update buffer queue length falls into one of 3 zones: 16.10 + * green, yellow, red. If the value is in [0, green) nothing is 16.11 + * done, the buffers are left unprocessed to enable the caching effect of the 16.12 + * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement 16.13 + * threads are gradually activated. In [yellow, red) all threads are 16.14 + * running. If the length becomes red (max queue length) the mutators start 16.15 + * processing the buffers. 16.16 + * 16.17 + * There are some interesting cases (with G1AdaptiveConcRefine turned off): 16.18 + * 1) green = yellow = red = 0. In this case the mutator will process all 16.19 + * buffers. Except for those that are created by the deferred updates 16.20 + * machinery during a collection. 16.21 + * 2) green = 0. Means no caching. Can be a good way to minimize the 16.22 + * amount of time spent updating rsets during a collection. 16.23 + */ 16.24 + int _green_zone; 16.25 + int _yellow_zone; 16.26 + int _red_zone; 16.27 + 16.28 + int _thread_threshold_step; 16.29 + 16.30 + // Reset the threshold step value based of the current zone boundaries. 16.31 + void reset_threshold_step(); 16.32 16.33 // The cache for card refinement. 16.34 bool _use_cache; 16.35 @@ -147,6 +172,8 @@ 16.36 void init(); // Accomplish some initialization that has to wait. 16.37 void stop(); 16.38 16.39 + void reinitialize_threads(); 16.40 + 16.41 // Iterate over the conc refine threads 16.42 void threads_do(ThreadClosure *tc); 16.43 16.44 @@ -178,7 +205,20 @@ 16.45 16.46 void clear_and_record_card_counts(); 16.47 16.48 - static size_t thread_num(); 16.49 + static int thread_num(); 16.50 16.51 void print_worker_threads_on(outputStream* st) const; 16.52 + 16.53 + void set_green_zone(int x) { _green_zone = x; } 16.54 + void set_yellow_zone(int x) { _yellow_zone = x; } 16.55 + void set_red_zone(int x) { _red_zone = x; } 16.56 + 16.57 + int green_zone() const { return _green_zone; } 16.58 + int yellow_zone() const { return _yellow_zone; } 16.59 + int red_zone() const { return _red_zone; } 16.60 + 16.61 + int total_thread_num() const { return _n_threads; } 16.62 + int worker_thread_num() const { return _n_worker_threads; } 16.63 + 16.64 + int thread_threshold_step() const { return _thread_threshold_step; } 16.65 };
17.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Tue Dec 22 16:35:08 2009 -0800 17.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Wed Dec 23 02:57:31 2009 -0800 17.3 @@ -25,10 +25,6 @@ 17.4 #include "incls/_precompiled.incl" 17.5 #include "incls/_concurrentG1RefineThread.cpp.incl" 17.6 17.7 -// ======= Concurrent Mark Thread ======== 17.8 - 17.9 -// The CM thread is created when the G1 garbage collector is used 17.10 - 17.11 ConcurrentG1RefineThread:: 17.12 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, 17.13 int worker_id_offset, int worker_id) : 17.14 @@ -37,19 +33,42 @@ 17.15 _worker_id(worker_id), 17.16 _active(false), 17.17 _next(next), 17.18 + _monitor(NULL), 17.19 _cg1r(cg1r), 17.20 - _vtime_accum(0.0), 17.21 - _interval_ms(5.0) 17.22 + _vtime_accum(0.0) 17.23 { 17.24 + 17.25 + // Each thread has its own monitor. The i-th thread is responsible for signalling 17.26 + // to thread i+1 if the number of buffers in the queue exceeds a threashold for this 17.27 + // thread. Monitors are also used to wake up the threads during termination. 17.28 + // The 0th worker in notified by mutator threads and has a special monitor. 17.29 + // The last worker is used for young gen rset size sampling. 17.30 + if (worker_id > 0) { 17.31 + _monitor = new Monitor(Mutex::nonleaf, "Refinement monitor", true); 17.32 + } else { 17.33 + _monitor = DirtyCardQ_CBL_mon; 17.34 + } 17.35 + initialize(); 17.36 create_and_start(); 17.37 } 17.38 17.39 +void ConcurrentG1RefineThread::initialize() { 17.40 + if (_worker_id < cg1r()->worker_thread_num()) { 17.41 + // Current thread activation threshold 17.42 + _threshold = MIN2<int>(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(), 17.43 + cg1r()->yellow_zone()); 17.44 + // A thread deactivates once the number of buffer reached a deactivation threshold 17.45 + _deactivation_threshold = MAX2<int>(_threshold - cg1r()->thread_threshold_step(), cg1r()->green_zone()); 17.46 + } else { 17.47 + set_active(true); 17.48 + } 17.49 +} 17.50 + 17.51 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { 17.52 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 17.53 G1CollectorPolicy* g1p = g1h->g1_policy(); 17.54 if (g1p->adaptive_young_list_length()) { 17.55 int regions_visited = 0; 17.56 - 17.57 g1h->young_list_rs_length_sampling_init(); 17.58 while (g1h->young_list_rs_length_sampling_more()) { 17.59 g1h->young_list_rs_length_sampling_next(); 17.60 @@ -70,99 +89,121 @@ 17.61 } 17.62 } 17.63 17.64 +void ConcurrentG1RefineThread::run_young_rs_sampling() { 17.65 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.66 + _vtime_start = os::elapsedVTime(); 17.67 + while(!_should_terminate) { 17.68 + _sts.join(); 17.69 + sample_young_list_rs_lengths(); 17.70 + _sts.leave(); 17.71 + 17.72 + if (os::supports_vtime()) { 17.73 + _vtime_accum = (os::elapsedVTime() - _vtime_start); 17.74 + } else { 17.75 + _vtime_accum = 0.0; 17.76 + } 17.77 + 17.78 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 17.79 + if (_should_terminate) { 17.80 + break; 17.81 + } 17.82 + _monitor->wait(Mutex::_no_safepoint_check_flag, G1ConcRefineServiceInterval); 17.83 + } 17.84 +} 17.85 + 17.86 +void ConcurrentG1RefineThread::wait_for_completed_buffers() { 17.87 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.88 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 17.89 + while (!_should_terminate && !is_active()) { 17.90 + _monitor->wait(Mutex::_no_safepoint_check_flag); 17.91 + } 17.92 +} 17.93 + 17.94 +bool ConcurrentG1RefineThread::is_active() { 17.95 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.96 + return _worker_id > 0 ? _active : dcqs.process_completed_buffers(); 17.97 +} 17.98 + 17.99 +void ConcurrentG1RefineThread::activate() { 17.100 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 17.101 + if (_worker_id > 0) { 17.102 + if (G1TraceConcurrentRefinement) { 17.103 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.104 + gclog_or_tty->print_cr("G1-Refine-activated worker %d, on threshold %d, current %d", 17.105 + _worker_id, _threshold, (int)dcqs.completed_buffers_num()); 17.106 + } 17.107 + set_active(true); 17.108 + } else { 17.109 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.110 + dcqs.set_process_completed(true); 17.111 + } 17.112 + _monitor->notify(); 17.113 +} 17.114 + 17.115 +void ConcurrentG1RefineThread::deactivate() { 17.116 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 17.117 + if (_worker_id > 0) { 17.118 + if (G1TraceConcurrentRefinement) { 17.119 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.120 + gclog_or_tty->print_cr("G1-Refine-deactivated worker %d, off threshold %d, current %d", 17.121 + _worker_id, _deactivation_threshold, (int)dcqs.completed_buffers_num()); 17.122 + } 17.123 + set_active(false); 17.124 + } else { 17.125 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.126 + dcqs.set_process_completed(false); 17.127 + } 17.128 +} 17.129 + 17.130 void ConcurrentG1RefineThread::run() { 17.131 initialize_in_thread(); 17.132 - _vtime_start = os::elapsedVTime(); 17.133 wait_for_universe_init(); 17.134 17.135 + if (_worker_id >= cg1r()->worker_thread_num()) { 17.136 + run_young_rs_sampling(); 17.137 + terminate(); 17.138 + } 17.139 + 17.140 + _vtime_start = os::elapsedVTime(); 17.141 while (!_should_terminate) { 17.142 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17.143 - // Wait for completed log buffers to exist. 17.144 - { 17.145 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 17.146 - while (((_worker_id == 0 && !dcqs.process_completed_buffers()) || 17.147 - (_worker_id > 0 && !is_active())) && 17.148 - !_should_terminate) { 17.149 - DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); 17.150 - } 17.151 + 17.152 + // Wait for work 17.153 + wait_for_completed_buffers(); 17.154 + 17.155 + if (_should_terminate) { 17.156 + break; 17.157 } 17.158 17.159 - if (_should_terminate) { 17.160 - return; 17.161 - } 17.162 + _sts.join(); 17.163 17.164 - // Now we take them off (this doesn't hold locks while it applies 17.165 - // closures.) (If we did a full collection, then we'll do a full 17.166 - // traversal. 17.167 - _sts.join(); 17.168 - int n_logs = 0; 17.169 - int lower_limit = 0; 17.170 - double start_vtime_sec; // only used when G1SmoothConcRefine is on 17.171 - int prev_buffer_num; // only used when G1SmoothConcRefine is on 17.172 - // This thread activation threshold 17.173 - int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id; 17.174 - // Next thread activation threshold 17.175 - int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold; 17.176 - int deactivation_threshold = MAX2<int>(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0); 17.177 + do { 17.178 + int curr_buffer_num = (int)dcqs.completed_buffers_num(); 17.179 + // If the number of the buffers falls down into the yellow zone, 17.180 + // that means that the transition period after the evacuation pause has ended. 17.181 + if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) { 17.182 + dcqs.set_completed_queue_padding(0); 17.183 + } 17.184 17.185 - if (G1SmoothConcRefine) { 17.186 - lower_limit = 0; 17.187 - start_vtime_sec = os::elapsedVTime(); 17.188 - prev_buffer_num = (int) dcqs.completed_buffers_num(); 17.189 - } else { 17.190 - lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now. 17.191 - } 17.192 - while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) { 17.193 - double end_vtime_sec; 17.194 - double elapsed_vtime_sec; 17.195 - int elapsed_vtime_ms; 17.196 - int curr_buffer_num = (int) dcqs.completed_buffers_num(); 17.197 - 17.198 - if (G1SmoothConcRefine) { 17.199 - end_vtime_sec = os::elapsedVTime(); 17.200 - elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 17.201 - elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); 17.202 - 17.203 - if (curr_buffer_num > prev_buffer_num || 17.204 - curr_buffer_num > next_threshold) { 17.205 - decreaseInterval(elapsed_vtime_ms); 17.206 - } else if (curr_buffer_num < prev_buffer_num) { 17.207 - increaseInterval(elapsed_vtime_ms); 17.208 - } 17.209 - } 17.210 - if (_worker_id == 0) { 17.211 - sample_young_list_rs_lengths(); 17.212 - } else if (curr_buffer_num < deactivation_threshold) { 17.213 + if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) { 17.214 // If the number of the buffer has fallen below our threshold 17.215 // we should deactivate. The predecessor will reactivate this 17.216 // thread should the number of the buffers cross the threshold again. 17.217 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 17.218 deactivate(); 17.219 - if (G1TraceConcurrentRefinement) { 17.220 - gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id); 17.221 - } 17.222 break; 17.223 } 17.224 17.225 // Check if we need to activate the next thread. 17.226 - if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) { 17.227 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 17.228 + if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) { 17.229 _next->activate(); 17.230 - DirtyCardQ_CBL_mon->notify_all(); 17.231 - if (G1TraceConcurrentRefinement) { 17.232 - gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id); 17.233 - } 17.234 } 17.235 + } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, cg1r()->green_zone())); 17.236 17.237 - if (G1SmoothConcRefine) { 17.238 - prev_buffer_num = curr_buffer_num; 17.239 - _sts.leave(); 17.240 - os::sleep(Thread::current(), (jlong) _interval_ms, false); 17.241 - _sts.join(); 17.242 - start_vtime_sec = os::elapsedVTime(); 17.243 - } 17.244 - n_logs++; 17.245 + // We can exit the loop above while being active if there was a yield request. 17.246 + if (is_active()) { 17.247 + deactivate(); 17.248 } 17.249 + 17.250 _sts.leave(); 17.251 17.252 if (os::supports_vtime()) { 17.253 @@ -172,7 +213,6 @@ 17.254 } 17.255 } 17.256 assert(_should_terminate, "just checking"); 17.257 - 17.258 terminate(); 17.259 } 17.260 17.261 @@ -191,8 +231,8 @@ 17.262 } 17.263 17.264 { 17.265 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 17.266 - DirtyCardQ_CBL_mon->notify_all(); 17.267 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 17.268 + _monitor->notify(); 17.269 } 17.270 17.271 {
18.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Tue Dec 22 16:35:08 2009 -0800 18.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Wed Dec 23 02:57:31 2009 -0800 18.3 @@ -40,42 +40,36 @@ 18.4 // when the number of the rset update buffer crosses a certain threshold. A successor 18.5 // would self-deactivate when the number of the buffers falls below the threshold. 18.6 bool _active; 18.7 - ConcurrentG1RefineThread * _next; 18.8 - public: 18.9 - virtual void run(); 18.10 + ConcurrentG1RefineThread* _next; 18.11 + Monitor* _monitor; 18.12 + ConcurrentG1Refine* _cg1r; 18.13 18.14 - bool is_active() { return _active; } 18.15 - void activate() { _active = true; } 18.16 - void deactivate() { _active = false; } 18.17 + int _thread_threshold_step; 18.18 + // This thread activation threshold 18.19 + int _threshold; 18.20 + // This thread deactivation threshold 18.21 + int _deactivation_threshold; 18.22 18.23 - private: 18.24 - ConcurrentG1Refine* _cg1r; 18.25 + void sample_young_list_rs_lengths(); 18.26 + void run_young_rs_sampling(); 18.27 + void wait_for_completed_buffers(); 18.28 18.29 - double _interval_ms; 18.30 - 18.31 - void decreaseInterval(int processing_time_ms) { 18.32 - double min_interval_ms = (double) processing_time_ms; 18.33 - _interval_ms = 0.8 * _interval_ms; 18.34 - if (_interval_ms < min_interval_ms) 18.35 - _interval_ms = min_interval_ms; 18.36 - } 18.37 - void increaseInterval(int processing_time_ms) { 18.38 - double max_interval_ms = 9.0 * (double) processing_time_ms; 18.39 - _interval_ms = 1.1 * _interval_ms; 18.40 - if (max_interval_ms > 0 && _interval_ms > max_interval_ms) 18.41 - _interval_ms = max_interval_ms; 18.42 - } 18.43 - 18.44 - void sleepBeforeNextCycle(); 18.45 + void set_active(bool x) { _active = x; } 18.46 + bool is_active(); 18.47 + void activate(); 18.48 + void deactivate(); 18.49 18.50 // For use by G1CollectedHeap, which is a friend. 18.51 static SuspendibleThreadSet* sts() { return &_sts; } 18.52 18.53 - public: 18.54 +public: 18.55 + virtual void run(); 18.56 // Constructor 18.57 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, 18.58 int worker_id_offset, int worker_id); 18.59 18.60 + void initialize(); 18.61 + 18.62 // Printing 18.63 void print() const; 18.64 void print_on(outputStream* st) const; 18.65 @@ -83,13 +77,10 @@ 18.66 // Total virtual time so far. 18.67 double vtime_accum() { return _vtime_accum; } 18.68 18.69 - ConcurrentG1Refine* cg1r() { return _cg1r; } 18.70 - 18.71 - void sample_young_list_rs_lengths(); 18.72 + ConcurrentG1Refine* cg1r() { return _cg1r; } 18.73 18.74 // Yield for GC 18.75 - void yield(); 18.76 - 18.77 + void yield(); 18.78 // shutdown 18.79 void stop(); 18.80 };
19.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Dec 22 16:35:08 2009 -0800 19.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed Dec 23 02:57:31 2009 -0800 19.3 @@ -760,7 +760,6 @@ 19.4 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 19.5 19.6 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 19.7 - satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); 19.8 satb_mq_set.set_active_all_threads(true); 19.9 19.10 // update_g1_committed() will be called at the end of an evac pause
20.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Tue Dec 22 16:35:08 2009 -0800 20.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Wed Dec 23 02:57:31 2009 -0800 20.3 @@ -61,8 +61,8 @@ 20.4 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 20.5 #endif // _MSC_VER 20.6 20.7 -DirtyCardQueueSet::DirtyCardQueueSet() : 20.8 - PtrQueueSet(true /*notify_when_complete*/), 20.9 +DirtyCardQueueSet::DirtyCardQueueSet(bool notify_when_complete) : 20.10 + PtrQueueSet(notify_when_complete), 20.11 _closure(NULL), 20.12 _shared_dirty_card_queue(this, true /*perm*/), 20.13 _free_ids(NULL), 20.14 @@ -77,12 +77,12 @@ 20.15 } 20.16 20.17 void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, 20.18 + int process_completed_threshold, 20.19 int max_completed_queue, 20.20 Mutex* lock, PtrQueueSet* fl_owner) { 20.21 - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); 20.22 + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, 20.23 + max_completed_queue, fl_owner); 20.24 set_buffer_size(G1UpdateBufferSize); 20.25 - set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold); 20.26 - 20.27 _shared_dirty_card_queue.set_lock(lock); 20.28 _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); 20.29 } 20.30 @@ -154,9 +154,10 @@ 20.31 return b; 20.32 } 20.33 20.34 -DirtyCardQueueSet::CompletedBufferNode* 20.35 + 20.36 +BufferNode* 20.37 DirtyCardQueueSet::get_completed_buffer(int stop_at) { 20.38 - CompletedBufferNode* nd = NULL; 20.39 + BufferNode* nd = NULL; 20.40 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 20.41 20.42 if ((int)_n_completed_buffers <= stop_at) { 20.43 @@ -166,10 +167,11 @@ 20.44 20.45 if (_completed_buffers_head != NULL) { 20.46 nd = _completed_buffers_head; 20.47 - _completed_buffers_head = nd->next; 20.48 + _completed_buffers_head = nd->next(); 20.49 if (_completed_buffers_head == NULL) 20.50 _completed_buffers_tail = NULL; 20.51 _n_completed_buffers--; 20.52 + assert(_n_completed_buffers >= 0, "Invariant"); 20.53 } 20.54 debug_only(assert_completed_buffer_list_len_correct_locked()); 20.55 return nd; 20.56 @@ -177,20 +179,19 @@ 20.57 20.58 bool DirtyCardQueueSet:: 20.59 apply_closure_to_completed_buffer_helper(int worker_i, 20.60 - CompletedBufferNode* nd) { 20.61 + BufferNode* nd) { 20.62 if (nd != NULL) { 20.63 + void **buf = BufferNode::make_buffer_from_node(nd); 20.64 + size_t index = nd->index(); 20.65 bool b = 20.66 - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 20.67 - nd->index, _sz, 20.68 + DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 20.69 + index, _sz, 20.70 true, worker_i); 20.71 - void** buf = nd->buf; 20.72 - size_t index = nd->index; 20.73 - delete nd; 20.74 if (b) { 20.75 deallocate_buffer(buf); 20.76 return true; // In normal case, go on to next buffer. 20.77 } else { 20.78 - enqueue_complete_buffer(buf, index, true); 20.79 + enqueue_complete_buffer(buf, index); 20.80 return false; 20.81 } 20.82 } else { 20.83 @@ -203,32 +204,33 @@ 20.84 bool during_pause) 20.85 { 20.86 assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause"); 20.87 - CompletedBufferNode* nd = get_completed_buffer(stop_at); 20.88 + BufferNode* nd = get_completed_buffer(stop_at); 20.89 bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); 20.90 if (res) Atomic::inc(&_processed_buffers_rs_thread); 20.91 return res; 20.92 } 20.93 20.94 void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() { 20.95 - CompletedBufferNode* nd = _completed_buffers_head; 20.96 + BufferNode* nd = _completed_buffers_head; 20.97 while (nd != NULL) { 20.98 bool b = 20.99 - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz, 20.100 - false); 20.101 + DirtyCardQueue::apply_closure_to_buffer(_closure, 20.102 + BufferNode::make_buffer_from_node(nd), 20.103 + 0, _sz, false); 20.104 guarantee(b, "Should not stop early."); 20.105 - nd = nd->next; 20.106 + nd = nd->next(); 20.107 } 20.108 } 20.109 20.110 void DirtyCardQueueSet::abandon_logs() { 20.111 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 20.112 - CompletedBufferNode* buffers_to_delete = NULL; 20.113 + BufferNode* buffers_to_delete = NULL; 20.114 { 20.115 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 20.116 while (_completed_buffers_head != NULL) { 20.117 - CompletedBufferNode* nd = _completed_buffers_head; 20.118 - _completed_buffers_head = nd->next; 20.119 - nd->next = buffers_to_delete; 20.120 + BufferNode* nd = _completed_buffers_head; 20.121 + _completed_buffers_head = nd->next(); 20.122 + nd->set_next(buffers_to_delete); 20.123 buffers_to_delete = nd; 20.124 } 20.125 _n_completed_buffers = 0; 20.126 @@ -236,10 +238,9 @@ 20.127 debug_only(assert_completed_buffer_list_len_correct_locked()); 20.128 } 20.129 while (buffers_to_delete != NULL) { 20.130 - CompletedBufferNode* nd = buffers_to_delete; 20.131 - buffers_to_delete = nd->next; 20.132 - deallocate_buffer(nd->buf); 20.133 - delete nd; 20.134 + BufferNode* nd = buffers_to_delete; 20.135 + buffers_to_delete = nd->next(); 20.136 + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); 20.137 } 20.138 // Since abandon is done only at safepoints, we can safely manipulate 20.139 // these queues.
21.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Tue Dec 22 16:35:08 2009 -0800 21.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Wed Dec 23 02:57:31 2009 -0800 21.3 @@ -84,11 +84,12 @@ 21.4 jint _processed_buffers_rs_thread; 21.5 21.6 public: 21.7 - DirtyCardQueueSet(); 21.8 + DirtyCardQueueSet(bool notify_when_complete = true); 21.9 21.10 void initialize(Monitor* cbl_mon, Mutex* fl_lock, 21.11 - int max_completed_queue = 0, 21.12 - Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); 21.13 + int process_completed_threshold, 21.14 + int max_completed_queue, 21.15 + Mutex* lock, PtrQueueSet* fl_owner = NULL); 21.16 21.17 // The number of parallel ids that can be claimed to allow collector or 21.18 // mutator threads to do card-processing work. 21.19 @@ -123,9 +124,9 @@ 21.20 bool during_pause = false); 21.21 21.22 bool apply_closure_to_completed_buffer_helper(int worker_i, 21.23 - CompletedBufferNode* nd); 21.24 + BufferNode* nd); 21.25 21.26 - CompletedBufferNode* get_completed_buffer(int stop_at); 21.27 + BufferNode* get_completed_buffer(int stop_at); 21.28 21.29 // Applies the current closure to all completed buffers, 21.30 // non-consumptively.
22.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Dec 22 16:35:08 2009 -0800 22.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Dec 23 02:57:31 2009 -0800 22.3 @@ -1375,6 +1375,7 @@ 22.4 G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : 22.5 SharedHeap(policy_), 22.6 _g1_policy(policy_), 22.7 + _dirty_card_queue_set(false), 22.8 _ref_processor(NULL), 22.9 _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), 22.10 _bot_shared(NULL), 22.11 @@ -1460,8 +1461,6 @@ 22.12 Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); 22.13 Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); 22.14 22.15 - // We allocate this in any case, but only do no work if the command line 22.16 - // param is off. 22.17 _cg1r = new ConcurrentG1Refine(); 22.18 22.19 // Reserve the maximum. 22.20 @@ -1594,18 +1593,20 @@ 22.21 22.22 JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, 22.23 SATB_Q_FL_lock, 22.24 - 0, 22.25 + G1SATBProcessCompletedThreshold, 22.26 Shared_SATB_Q_lock); 22.27 22.28 JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, 22.29 DirtyCardQ_FL_lock, 22.30 - G1UpdateBufferQueueMaxLength, 22.31 + concurrent_g1_refine()->yellow_zone(), 22.32 + concurrent_g1_refine()->red_zone(), 22.33 Shared_DirtyCardQ_lock); 22.34 22.35 if (G1DeferredRSUpdate) { 22.36 dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, 22.37 DirtyCardQ_FL_lock, 22.38 - 0, 22.39 + -1, // never trigger processing 22.40 + -1, // no limit on length 22.41 Shared_DirtyCardQ_lock, 22.42 &JavaThread::dirty_card_queue_set()); 22.43 } 22.44 @@ -4239,10 +4240,11 @@ 22.45 RedirtyLoggedCardTableEntryFastClosure redirty; 22.46 dirty_card_queue_set().set_closure(&redirty); 22.47 dirty_card_queue_set().apply_closure_to_all_completed_buffers(); 22.48 - JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); 22.49 + 22.50 + DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set(); 22.51 + dcq.merge_bufferlists(&dirty_card_queue_set()); 22.52 assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); 22.53 } 22.54 - 22.55 COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); 22.56 } 22.57
23.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Tue Dec 22 16:35:08 2009 -0800 23.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Dec 23 02:57:31 2009 -0800 23.3 @@ -1914,6 +1914,10 @@ 23.4 calculate_young_list_min_length(); 23.5 calculate_young_list_target_config(); 23.6 23.7 + // Note that _mmu_tracker->max_gc_time() returns the time in seconds. 23.8 + double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSUpdatePauseFractionPercent / 100.0; 23.9 + adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms); 23.10 + 23.11 // </NEW PREDICTION> 23.12 23.13 _target_pause_time_ms = -1.0; 23.14 @@ -1921,6 +1925,47 @@ 23.15 23.16 // <NEW PREDICTION> 23.17 23.18 +void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time, 23.19 + double update_rs_processed_buffers, 23.20 + double goal_ms) { 23.21 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 23.22 + ConcurrentG1Refine *cg1r = G1CollectedHeap::heap()->concurrent_g1_refine(); 23.23 + 23.24 + if (G1AdaptiveConcRefine) { 23.25 + const int k_gy = 3, k_gr = 6; 23.26 + const double inc_k = 1.1, dec_k = 0.9; 23.27 + 23.28 + int g = cg1r->green_zone(); 23.29 + if (update_rs_time > goal_ms) { 23.30 + g = (int)(g * dec_k); // Can become 0, that's OK. That would mean a mutator-only processing. 23.31 + } else { 23.32 + if (update_rs_time < goal_ms && update_rs_processed_buffers > g) { 23.33 + g = (int)MAX2(g * inc_k, g + 1.0); 23.34 + } 23.35 + } 23.36 + // Change the refinement threads params 23.37 + cg1r->set_green_zone(g); 23.38 + cg1r->set_yellow_zone(g * k_gy); 23.39 + cg1r->set_red_zone(g * k_gr); 23.40 + cg1r->reinitialize_threads(); 23.41 + 23.42 + int processing_threshold_delta = MAX2((int)(cg1r->green_zone() * sigma()), 1); 23.43 + int processing_threshold = MIN2(cg1r->green_zone() + processing_threshold_delta, 23.44 + cg1r->yellow_zone()); 23.45 + // Change the barrier params 23.46 + dcqs.set_process_completed_threshold(processing_threshold); 23.47 + dcqs.set_max_completed_queue(cg1r->red_zone()); 23.48 + } 23.49 + 23.50 + int curr_queue_size = dcqs.completed_buffers_num(); 23.51 + if (curr_queue_size >= cg1r->yellow_zone()) { 23.52 + dcqs.set_completed_queue_padding(curr_queue_size); 23.53 + } else { 23.54 + dcqs.set_completed_queue_padding(0); 23.55 + } 23.56 + dcqs.notify_if_necessary(); 23.57 +} 23.58 + 23.59 double 23.60 G1CollectorPolicy:: 23.61 predict_young_collection_elapsed_time_ms(size_t adjustment) {
24.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Tue Dec 22 16:35:08 2009 -0800 24.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Wed Dec 23 02:57:31 2009 -0800 24.3 @@ -316,6 +316,10 @@ 24.4 bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group); 24.5 #endif // PRODUCT 24.6 24.7 + void adjust_concurrent_refinement(double update_rs_time, 24.8 + double update_rs_processed_buffers, 24.9 + double goal_ms); 24.10 + 24.11 protected: 24.12 double _pause_time_target_ms; 24.13 double _recorded_young_cset_choice_time_ms;
25.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Dec 22 16:35:08 2009 -0800 25.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Dec 23 02:57:31 2009 -0800 25.3 @@ -85,7 +85,7 @@ 25.4 diagnostic(bool, G1SummarizeZFStats, false, \ 25.5 "Summarize zero-filling info") \ 25.6 \ 25.7 - develop(bool, G1TraceConcurrentRefinement, false, \ 25.8 + diagnostic(bool, G1TraceConcurrentRefinement, false, \ 25.9 "Trace G1 concurrent refinement") \ 25.10 \ 25.11 product(intx, G1MarkStackSize, 2 * 1024 * 1024, \ 25.12 @@ -94,19 +94,6 @@ 25.13 product(intx, G1MarkRegionStackSize, 1024 * 1024, \ 25.14 "Size of the region stack for concurrent marking.") \ 25.15 \ 25.16 - develop(bool, G1ConcRefine, true, \ 25.17 - "If true, run concurrent rem set refinement for G1") \ 25.18 - \ 25.19 - develop(intx, G1ConcRefineTargTraversals, 4, \ 25.20 - "Number of concurrent refinement we try to achieve") \ 25.21 - \ 25.22 - develop(intx, G1ConcRefineInitialDelta, 4, \ 25.23 - "Number of heap regions of alloc ahead of starting collection " \ 25.24 - "pause to start concurrent refinement (initially)") \ 25.25 - \ 25.26 - develop(bool, G1SmoothConcRefine, true, \ 25.27 - "Attempts to smooth out the overhead of concurrent refinement") \ 25.28 - \ 25.29 develop(bool, G1ConcZeroFill, true, \ 25.30 "If true, run concurrent zero-filling thread") \ 25.31 \ 25.32 @@ -178,13 +165,38 @@ 25.33 product(intx, G1UpdateBufferSize, 256, \ 25.34 "Size of an update buffer") \ 25.35 \ 25.36 - product(intx, G1UpdateBufferQueueProcessingThreshold, 5, \ 25.37 + product(intx, G1ConcRefineYellowZone, 0, \ 25.38 "Number of enqueued update buffers that will " \ 25.39 - "trigger concurrent processing") \ 25.40 + "trigger concurrent processing. Will be selected ergonomically " \ 25.41 + "by default.") \ 25.42 \ 25.43 - product(intx, G1UpdateBufferQueueMaxLength, 30, \ 25.44 + product(intx, G1ConcRefineRedZone, 0, \ 25.45 "Maximum number of enqueued update buffers before mutator " \ 25.46 - "threads start processing new ones instead of enqueueing them") \ 25.47 + "threads start processing new ones instead of enqueueing them. " \ 25.48 + "Will be selected ergonomically by default. Zero will disable " \ 25.49 + "concurrent processing.") \ 25.50 + \ 25.51 + product(intx, G1ConcRefineGreenZone, 0, \ 25.52 + "The number of update buffers that are left in the queue by the " \ 25.53 + "concurrent processing threads. Will be selected ergonomically " \ 25.54 + "by default.") \ 25.55 + \ 25.56 + product(intx, G1ConcRefineServiceInterval, 300, \ 25.57 + "The last concurrent refinement thread wakes up every " \ 25.58 + "specified number of milliseconds to do miscellaneous work.") \ 25.59 + \ 25.60 + product(intx, G1ConcRefineThresholdStep, 0, \ 25.61 + "Each time the rset update queue increases by this amount " \ 25.62 + "activate the next refinement thread if available. " \ 25.63 + "Will be selected ergonomically by default.") \ 25.64 + \ 25.65 + product(intx, G1RSUpdatePauseFractionPercent, 10, \ 25.66 + "A target percentage of time that is allowed to be spend on " \ 25.67 + "process RS update buffers during the collection pause.") \ 25.68 + \ 25.69 + product(bool, G1AdaptiveConcRefine, true, \ 25.70 + "Select green, yellow and red zones adaptively to meet the " \ 25.71 + "the pause requirements.") \ 25.72 \ 25.73 develop(intx, G1ConcRSLogCacheSize, 10, \ 25.74 "Log base 2 of the length of conc RS hot-card cache.") \
26.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Tue Dec 22 16:35:08 2009 -0800 26.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Wed Dec 23 02:57:31 2009 -0800 26.3 @@ -64,8 +64,8 @@ 26.4 while (_index == 0) { 26.5 handle_zero_index(); 26.6 } 26.7 + 26.8 assert(_index > 0, "postcondition"); 26.9 - 26.10 _index -= oopSize; 26.11 _buf[byte_index_to_index((int)_index)] = ptr; 26.12 assert(0 <= _index && _index <= _sz, "Invariant."); 26.13 @@ -99,95 +99,110 @@ 26.14 assert(_sz > 0, "Didn't set a buffer size."); 26.15 MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); 26.16 if (_fl_owner->_buf_free_list != NULL) { 26.17 - void** res = _fl_owner->_buf_free_list; 26.18 - _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; 26.19 + void** res = BufferNode::make_buffer_from_node(_fl_owner->_buf_free_list); 26.20 + _fl_owner->_buf_free_list = _fl_owner->_buf_free_list->next(); 26.21 _fl_owner->_buf_free_list_sz--; 26.22 - // Just override the next pointer with NULL, just in case we scan this part 26.23 - // of the buffer. 26.24 - res[0] = NULL; 26.25 return res; 26.26 } else { 26.27 - return (void**) NEW_C_HEAP_ARRAY(char, _sz); 26.28 + // Allocate space for the BufferNode in front of the buffer. 26.29 + char *b = NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size()); 26.30 + return BufferNode::make_buffer_from_block(b); 26.31 } 26.32 } 26.33 26.34 void PtrQueueSet::deallocate_buffer(void** buf) { 26.35 assert(_sz > 0, "Didn't set a buffer size."); 26.36 MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); 26.37 - buf[0] = (void*)_fl_owner->_buf_free_list; 26.38 - _fl_owner->_buf_free_list = buf; 26.39 + BufferNode *node = BufferNode::make_node_from_buffer(buf); 26.40 + node->set_next(_fl_owner->_buf_free_list); 26.41 + _fl_owner->_buf_free_list = node; 26.42 _fl_owner->_buf_free_list_sz++; 26.43 } 26.44 26.45 void PtrQueueSet::reduce_free_list() { 26.46 + assert(_fl_owner == this, "Free list reduction is allowed only for the owner"); 26.47 // For now we'll adopt the strategy of deleting half. 26.48 MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); 26.49 size_t n = _buf_free_list_sz / 2; 26.50 while (n > 0) { 26.51 assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong."); 26.52 - void** head = _buf_free_list; 26.53 - _buf_free_list = (void**)_buf_free_list[0]; 26.54 - FREE_C_HEAP_ARRAY(char, head); 26.55 + void* b = BufferNode::make_block_from_node(_buf_free_list); 26.56 + _buf_free_list = _buf_free_list->next(); 26.57 + FREE_C_HEAP_ARRAY(char, b); 26.58 _buf_free_list_sz --; 26.59 n--; 26.60 } 26.61 } 26.62 26.63 -void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) { 26.64 - // I use explicit locking here because there's a bailout in the middle. 26.65 - _cbl_mon->lock_without_safepoint_check(); 26.66 +void PtrQueue::handle_zero_index() { 26.67 + assert(0 == _index, "Precondition."); 26.68 + // This thread records the full buffer and allocates a new one (while 26.69 + // holding the lock if there is one). 26.70 + if (_buf != NULL) { 26.71 + if (_lock) { 26.72 + locking_enqueue_completed_buffer(_buf); 26.73 + } else { 26.74 + if (qset()->process_or_enqueue_complete_buffer(_buf)) { 26.75 + // Recycle the buffer. No allocation. 26.76 + _sz = qset()->buffer_size(); 26.77 + _index = _sz; 26.78 + return; 26.79 + } 26.80 + } 26.81 + } 26.82 + // Reallocate the buffer 26.83 + _buf = qset()->allocate_buffer(); 26.84 + _sz = qset()->buffer_size(); 26.85 + _index = _sz; 26.86 + assert(0 <= _index && _index <= _sz, "Invariant."); 26.87 +} 26.88 26.89 - Thread* thread = Thread::current(); 26.90 - assert( ignore_max_completed || 26.91 - thread->is_Java_thread() || 26.92 - SafepointSynchronize::is_at_safepoint(), 26.93 - "invariant" ); 26.94 - ignore_max_completed = ignore_max_completed || !thread->is_Java_thread(); 26.95 +bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) { 26.96 + if (Thread::current()->is_Java_thread()) { 26.97 + // We don't lock. It is fine to be epsilon-precise here. 26.98 + if (_max_completed_queue == 0 || _max_completed_queue > 0 && 26.99 + _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) { 26.100 + bool b = mut_process_buffer(buf); 26.101 + if (b) { 26.102 + // True here means that the buffer hasn't been deallocated and the caller may reuse it. 26.103 + return true; 26.104 + } 26.105 + } 26.106 + } 26.107 + // The buffer will be enqueued. The caller will have to get a new one. 26.108 + enqueue_complete_buffer(buf); 26.109 + return false; 26.110 +} 26.111 26.112 - if (!ignore_max_completed && _max_completed_queue > 0 && 26.113 - _n_completed_buffers >= (size_t) _max_completed_queue) { 26.114 - _cbl_mon->unlock(); 26.115 - bool b = mut_process_buffer(buf); 26.116 - if (b) { 26.117 - deallocate_buffer(buf); 26.118 - return; 26.119 - } 26.120 - 26.121 - // Otherwise, go ahead and enqueue the buffer. Must reaquire the lock. 26.122 - _cbl_mon->lock_without_safepoint_check(); 26.123 - } 26.124 - 26.125 - // Here we still hold the _cbl_mon. 26.126 - CompletedBufferNode* cbn = new CompletedBufferNode; 26.127 - cbn->buf = buf; 26.128 - cbn->next = NULL; 26.129 - cbn->index = index; 26.130 +void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) { 26.131 + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 26.132 + BufferNode* cbn = BufferNode::new_from_buffer(buf); 26.133 + cbn->set_index(index); 26.134 if (_completed_buffers_tail == NULL) { 26.135 assert(_completed_buffers_head == NULL, "Well-formedness"); 26.136 _completed_buffers_head = cbn; 26.137 _completed_buffers_tail = cbn; 26.138 } else { 26.139 - _completed_buffers_tail->next = cbn; 26.140 + _completed_buffers_tail->set_next(cbn); 26.141 _completed_buffers_tail = cbn; 26.142 } 26.143 _n_completed_buffers++; 26.144 26.145 - if (!_process_completed && 26.146 + if (!_process_completed && _process_completed_threshold >= 0 && 26.147 _n_completed_buffers >= _process_completed_threshold) { 26.148 _process_completed = true; 26.149 if (_notify_when_complete) 26.150 - _cbl_mon->notify_all(); 26.151 + _cbl_mon->notify(); 26.152 } 26.153 debug_only(assert_completed_buffer_list_len_correct_locked()); 26.154 - _cbl_mon->unlock(); 26.155 } 26.156 26.157 int PtrQueueSet::completed_buffers_list_length() { 26.158 int n = 0; 26.159 - CompletedBufferNode* cbn = _completed_buffers_head; 26.160 + BufferNode* cbn = _completed_buffers_head; 26.161 while (cbn != NULL) { 26.162 n++; 26.163 - cbn = cbn->next; 26.164 + cbn = cbn->next(); 26.165 } 26.166 return n; 26.167 } 26.168 @@ -198,7 +213,7 @@ 26.169 } 26.170 26.171 void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() { 26.172 - guarantee((size_t)completed_buffers_list_length() == _n_completed_buffers, 26.173 + guarantee(completed_buffers_list_length() == _n_completed_buffers, 26.174 "Completed buffer length is wrong."); 26.175 } 26.176 26.177 @@ -207,12 +222,8 @@ 26.178 _sz = sz * oopSize; 26.179 } 26.180 26.181 -void PtrQueueSet::set_process_completed_threshold(size_t sz) { 26.182 - _process_completed_threshold = sz; 26.183 -} 26.184 - 26.185 -// Merge lists of buffers. Notify waiting threads if the length of the list 26.186 -// exceeds threshold. The source queue is emptied as a result. The queues 26.187 +// Merge lists of buffers. Notify the processing threads. 26.188 +// The source queue is emptied as a result. The queues 26.189 // must share the monitor. 26.190 void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { 26.191 assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); 26.192 @@ -224,7 +235,7 @@ 26.193 } else { 26.194 assert(_completed_buffers_head != NULL, "Well formedness"); 26.195 if (src->_completed_buffers_head != NULL) { 26.196 - _completed_buffers_tail->next = src->_completed_buffers_head; 26.197 + _completed_buffers_tail->set_next(src->_completed_buffers_head); 26.198 _completed_buffers_tail = src->_completed_buffers_tail; 26.199 } 26.200 } 26.201 @@ -237,31 +248,13 @@ 26.202 assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || 26.203 _completed_buffers_head != NULL && _completed_buffers_tail != NULL, 26.204 "Sanity"); 26.205 +} 26.206 26.207 - if (!_process_completed && 26.208 - _n_completed_buffers >= _process_completed_threshold) { 26.209 +void PtrQueueSet::notify_if_necessary() { 26.210 + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 26.211 + if (_n_completed_buffers >= _process_completed_threshold || _max_completed_queue == 0) { 26.212 _process_completed = true; 26.213 if (_notify_when_complete) 26.214 - _cbl_mon->notify_all(); 26.215 + _cbl_mon->notify(); 26.216 } 26.217 } 26.218 - 26.219 -// Merge free lists of the two queues. The free list of the source 26.220 -// queue is emptied as a result. The queues must share the same 26.221 -// mutex that guards free lists. 26.222 -void PtrQueueSet::merge_freelists(PtrQueueSet* src) { 26.223 - assert(_fl_lock == src->_fl_lock, "Should share the same lock"); 26.224 - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); 26.225 - if (_buf_free_list != NULL) { 26.226 - void **p = _buf_free_list; 26.227 - while (*p != NULL) { 26.228 - p = (void**)*p; 26.229 - } 26.230 - *p = src->_buf_free_list; 26.231 - } else { 26.232 - _buf_free_list = src->_buf_free_list; 26.233 - } 26.234 - _buf_free_list_sz += src->_buf_free_list_sz; 26.235 - src->_buf_free_list = NULL; 26.236 - src->_buf_free_list_sz = 0; 26.237 -}
27.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Tue Dec 22 16:35:08 2009 -0800 27.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Wed Dec 23 02:57:31 2009 -0800 27.3 @@ -27,8 +27,10 @@ 27.4 // the addresses of modified old-generation objects. This type supports 27.5 // this operation. 27.6 27.7 +// The definition of placement operator new(size_t, void*) in the <new>. 27.8 +#include <new> 27.9 + 27.10 class PtrQueueSet; 27.11 - 27.12 class PtrQueue VALUE_OBJ_CLASS_SPEC { 27.13 27.14 protected: 27.15 @@ -77,7 +79,7 @@ 27.16 else enqueue_known_active(ptr); 27.17 } 27.18 27.19 - inline void handle_zero_index(); 27.20 + void handle_zero_index(); 27.21 void locking_enqueue_completed_buffer(void** buf); 27.22 27.23 void enqueue_known_active(void* ptr); 27.24 @@ -126,34 +128,65 @@ 27.25 27.26 }; 27.27 27.28 +class BufferNode { 27.29 + size_t _index; 27.30 + BufferNode* _next; 27.31 +public: 27.32 + BufferNode() : _index(0), _next(NULL) { } 27.33 + BufferNode* next() const { return _next; } 27.34 + void set_next(BufferNode* n) { _next = n; } 27.35 + size_t index() const { return _index; } 27.36 + void set_index(size_t i) { _index = i; } 27.37 + 27.38 + // Align the size of the structure to the size of the pointer 27.39 + static size_t aligned_size() { 27.40 + static const size_t alignment = round_to(sizeof(BufferNode), sizeof(void*)); 27.41 + return alignment; 27.42 + } 27.43 + 27.44 + // BufferNode is allocated before the buffer. 27.45 + // The chunk of memory that holds both of them is a block. 27.46 + 27.47 + // Produce a new BufferNode given a buffer. 27.48 + static BufferNode* new_from_buffer(void** buf) { 27.49 + return new (make_block_from_buffer(buf)) BufferNode; 27.50 + } 27.51 + 27.52 + // The following are the required conversion routines: 27.53 + static BufferNode* make_node_from_buffer(void** buf) { 27.54 + return (BufferNode*)make_block_from_buffer(buf); 27.55 + } 27.56 + static void** make_buffer_from_node(BufferNode *node) { 27.57 + return make_buffer_from_block(node); 27.58 + } 27.59 + static void* make_block_from_node(BufferNode *node) { 27.60 + return (void*)node; 27.61 + } 27.62 + static void** make_buffer_from_block(void* p) { 27.63 + return (void**)((char*)p + aligned_size()); 27.64 + } 27.65 + static void* make_block_from_buffer(void** p) { 27.66 + return (void*)((char*)p - aligned_size()); 27.67 + } 27.68 +}; 27.69 + 27.70 // A PtrQueueSet represents resources common to a set of pointer queues. 27.71 // In particular, the individual queues allocate buffers from this shared 27.72 // set, and return completed buffers to the set. 27.73 // All these variables are are protected by the TLOQ_CBL_mon. XXX ??? 27.74 class PtrQueueSet VALUE_OBJ_CLASS_SPEC { 27.75 - 27.76 protected: 27.77 - 27.78 - class CompletedBufferNode: public CHeapObj { 27.79 - public: 27.80 - void** buf; 27.81 - size_t index; 27.82 - CompletedBufferNode* next; 27.83 - CompletedBufferNode() : buf(NULL), 27.84 - index(0), next(NULL){ } 27.85 - }; 27.86 - 27.87 Monitor* _cbl_mon; // Protects the fields below. 27.88 - CompletedBufferNode* _completed_buffers_head; 27.89 - CompletedBufferNode* _completed_buffers_tail; 27.90 - size_t _n_completed_buffers; 27.91 - size_t _process_completed_threshold; 27.92 + BufferNode* _completed_buffers_head; 27.93 + BufferNode* _completed_buffers_tail; 27.94 + int _n_completed_buffers; 27.95 + int _process_completed_threshold; 27.96 volatile bool _process_completed; 27.97 27.98 // This (and the interpretation of the first element as a "next" 27.99 // pointer) are protected by the TLOQ_FL_lock. 27.100 Mutex* _fl_lock; 27.101 - void** _buf_free_list; 27.102 + BufferNode* _buf_free_list; 27.103 size_t _buf_free_list_sz; 27.104 // Queue set can share a freelist. The _fl_owner variable 27.105 // specifies the owner. It is set to "this" by default. 27.106 @@ -170,6 +203,7 @@ 27.107 // Maximum number of elements allowed on completed queue: after that, 27.108 // enqueuer does the work itself. Zero indicates no maximum. 27.109 int _max_completed_queue; 27.110 + int _completed_queue_padding; 27.111 27.112 int completed_buffers_list_length(); 27.113 void assert_completed_buffer_list_len_correct_locked(); 27.114 @@ -191,9 +225,12 @@ 27.115 // Because of init-order concerns, we can't pass these as constructor 27.116 // arguments. 27.117 void initialize(Monitor* cbl_mon, Mutex* fl_lock, 27.118 - int max_completed_queue = 0, 27.119 + int process_completed_threshold, 27.120 + int max_completed_queue, 27.121 PtrQueueSet *fl_owner = NULL) { 27.122 _max_completed_queue = max_completed_queue; 27.123 + _process_completed_threshold = process_completed_threshold; 27.124 + _completed_queue_padding = 0; 27.125 assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); 27.126 _cbl_mon = cbl_mon; 27.127 _fl_lock = fl_lock; 27.128 @@ -208,14 +245,17 @@ 27.129 void deallocate_buffer(void** buf); 27.130 27.131 // Declares that "buf" is a complete buffer. 27.132 - void enqueue_complete_buffer(void** buf, size_t index = 0, 27.133 - bool ignore_max_completed = false); 27.134 + void enqueue_complete_buffer(void** buf, size_t index = 0); 27.135 + 27.136 + // To be invoked by the mutator. 27.137 + bool process_or_enqueue_complete_buffer(void** buf); 27.138 27.139 bool completed_buffers_exist_dirty() { 27.140 return _n_completed_buffers > 0; 27.141 } 27.142 27.143 bool process_completed_buffers() { return _process_completed; } 27.144 + void set_process_completed(bool x) { _process_completed = x; } 27.145 27.146 bool active() { return _all_active; } 27.147 27.148 @@ -226,15 +266,24 @@ 27.149 // Get the buffer size. 27.150 size_t buffer_size() { return _sz; } 27.151 27.152 - // Set the number of completed buffers that triggers log processing. 27.153 - void set_process_completed_threshold(size_t sz); 27.154 + // Get/Set the number of completed buffers that triggers log processing. 27.155 + void set_process_completed_threshold(int sz) { _process_completed_threshold = sz; } 27.156 + int process_completed_threshold() const { return _process_completed_threshold; } 27.157 27.158 // Must only be called at a safe point. Indicates that the buffer free 27.159 // list size may be reduced, if that is deemed desirable. 27.160 void reduce_free_list(); 27.161 27.162 - size_t completed_buffers_num() { return _n_completed_buffers; } 27.163 + int completed_buffers_num() { return _n_completed_buffers; } 27.164 27.165 void merge_bufferlists(PtrQueueSet* src); 27.166 - void merge_freelists(PtrQueueSet* src); 27.167 + 27.168 + void set_max_completed_queue(int m) { _max_completed_queue = m; } 27.169 + int max_completed_queue() { return _max_completed_queue; } 27.170 + 27.171 + void set_completed_queue_padding(int padding) { _completed_queue_padding = padding; } 27.172 + int completed_queue_padding() { return _completed_queue_padding; } 27.173 + 27.174 + // Notify the consumer if the number of buffers crossed the threshold 27.175 + void notify_if_necessary(); 27.176 };
28.1 --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp Tue Dec 22 16:35:08 2009 -0800 28.2 +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp Wed Dec 23 02:57:31 2009 -0800 28.3 @@ -67,9 +67,9 @@ 28.4 {} 28.5 28.6 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, 28.7 - int max_completed_queue, 28.8 + int process_completed_threshold, 28.9 Mutex* lock) { 28.10 - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); 28.11 + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1); 28.12 _shared_satb_queue.set_lock(lock); 28.13 if (ParallelGCThreads > 0) { 28.14 _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads); 28.15 @@ -122,12 +122,12 @@ 28.16 28.17 bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, 28.18 int worker) { 28.19 - CompletedBufferNode* nd = NULL; 28.20 + BufferNode* nd = NULL; 28.21 { 28.22 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 28.23 if (_completed_buffers_head != NULL) { 28.24 nd = _completed_buffers_head; 28.25 - _completed_buffers_head = nd->next; 28.26 + _completed_buffers_head = nd->next(); 28.27 if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; 28.28 _n_completed_buffers--; 28.29 if (_n_completed_buffers == 0) _process_completed = false; 28.30 @@ -135,9 +135,9 @@ 28.31 } 28.32 ObjectClosure* cl = (par ? _par_closures[worker] : _closure); 28.33 if (nd != NULL) { 28.34 - ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz); 28.35 - deallocate_buffer(nd->buf); 28.36 - delete nd; 28.37 + void **buf = BufferNode::make_buffer_from_node(nd); 28.38 + ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz); 28.39 + deallocate_buffer(buf); 28.40 return true; 28.41 } else { 28.42 return false; 28.43 @@ -145,13 +145,13 @@ 28.44 } 28.45 28.46 void SATBMarkQueueSet::abandon_partial_marking() { 28.47 - CompletedBufferNode* buffers_to_delete = NULL; 28.48 + BufferNode* buffers_to_delete = NULL; 28.49 { 28.50 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 28.51 while (_completed_buffers_head != NULL) { 28.52 - CompletedBufferNode* nd = _completed_buffers_head; 28.53 - _completed_buffers_head = nd->next; 28.54 - nd->next = buffers_to_delete; 28.55 + BufferNode* nd = _completed_buffers_head; 28.56 + _completed_buffers_head = nd->next(); 28.57 + nd->set_next(buffers_to_delete); 28.58 buffers_to_delete = nd; 28.59 } 28.60 _completed_buffers_tail = NULL; 28.61 @@ -159,10 +159,9 @@ 28.62 DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked()); 28.63 } 28.64 while (buffers_to_delete != NULL) { 28.65 - CompletedBufferNode* nd = buffers_to_delete; 28.66 - buffers_to_delete = nd->next; 28.67 - deallocate_buffer(nd->buf); 28.68 - delete nd; 28.69 + BufferNode* nd = buffers_to_delete; 28.70 + buffers_to_delete = nd->next(); 28.71 + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); 28.72 } 28.73 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 28.74 // So we can safely manipulate these queues.
29.1 --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp Tue Dec 22 16:35:08 2009 -0800 29.2 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp Wed Dec 23 02:57:31 2009 -0800 29.3 @@ -60,8 +60,8 @@ 29.4 SATBMarkQueueSet(); 29.5 29.6 void initialize(Monitor* cbl_mon, Mutex* fl_lock, 29.7 - int max_completed_queue = 0, 29.8 - Mutex* lock = NULL); 29.9 + int process_completed_threshold, 29.10 + Mutex* lock); 29.11 29.12 static void handle_zero_index_for_thread(JavaThread* t); 29.13
30.1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 Tue Dec 22 16:35:08 2009 -0800 30.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Wed Dec 23 02:57:31 2009 -0800 30.3 @@ -109,7 +109,6 @@ 30.4 dirtyCardQueue.cpp dirtyCardQueue.hpp 30.5 dirtyCardQueue.cpp heapRegionRemSet.hpp 30.6 dirtyCardQueue.cpp mutexLocker.hpp 30.7 -dirtyCardQueue.cpp ptrQueue.inline.hpp 30.8 dirtyCardQueue.cpp safepoint.hpp 30.9 dirtyCardQueue.cpp thread.hpp 30.10 dirtyCardQueue.cpp thread_<os_family>.inline.hpp 30.11 @@ -319,7 +318,6 @@ 30.12 ptrQueue.cpp mutex.hpp 30.13 ptrQueue.cpp mutexLocker.hpp 30.14 ptrQueue.cpp ptrQueue.hpp 30.15 -ptrQueue.cpp ptrQueue.inline.hpp 30.16 ptrQueue.cpp thread_<os_family>.inline.hpp 30.17 30.18 ptrQueue.hpp allocation.hpp 30.19 @@ -329,7 +327,6 @@ 30.20 30.21 satbQueue.cpp allocation.inline.hpp 30.22 satbQueue.cpp mutexLocker.hpp 30.23 -satbQueue.cpp ptrQueue.inline.hpp 30.24 satbQueue.cpp satbQueue.hpp 30.25 satbQueue.cpp sharedHeap.hpp 30.26 satbQueue.cpp thread.hpp