jdk8-mips64-public/hotspot: changeset 1549:9749fbc4859b

     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Tue Dec 22 16:35:08 2009 -0800
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Dec 23 02:57:31 2009 -0800
     1.3 @@ -7666,7 +7666,7 @@
     1.4  
     1.5  #ifdef ASSERT
     1.6    Label L;
     1.7 -  testl(tmp, tmp);
     1.8 +  testptr(tmp, tmp);
     1.9    jccb(Assembler::notZero, L);
    1.10    hlt();
    1.11    bind(L);

     2.1 --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Tue Dec 22 16:35:08 2009 -0800
     2.2 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Wed Dec 23 02:57:31 2009 -0800
     2.3 @@ -196,6 +196,9 @@
     2.4    } else {
     2.5      assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic");
     2.6      movl(reg, Address(rsi, bcp_offset));
     2.7 +    // Check if the secondary index definition is still ~x, otherwise
     2.8 +    // we have to change the following assembler code to calculate the
     2.9 +    // plain index.
    2.10      assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line");
    2.11      notl(reg);  // convert to plain index
    2.12    }

     3.1 --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Tue Dec 22 16:35:08 2009 -0800
     3.2 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Dec 23 02:57:31 2009 -0800
     3.3 @@ -185,12 +185,30 @@
     3.4  }
     3.5  
     3.6  
     3.7 +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
     3.8 +                                                       int bcp_offset,
     3.9 +                                                       bool giant_index) {
    3.10 +  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
    3.11 +  if (!giant_index) {
    3.12 +    load_unsigned_short(index, Address(r13, bcp_offset));
    3.13 +  } else {
    3.14 +    assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic");
    3.15 +    movl(index, Address(r13, bcp_offset));
    3.16 +    // Check if the secondary index definition is still ~x, otherwise
    3.17 +    // we have to change the following assembler code to calculate the
    3.18 +    // plain index.
    3.19 +    assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line");
    3.20 +    notl(index);  // convert to plain index
    3.21 +  }
    3.22 +}
    3.23 +
    3.24 +
    3.25  void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
    3.26                                                             Register index,
    3.27 -                                                           int bcp_offset) {
    3.28 -  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
    3.29 +                                                           int bcp_offset,
    3.30 +                                                           bool giant_index) {
    3.31    assert(cache != index, "must use different registers");
    3.32 -  load_unsigned_short(index, Address(r13, bcp_offset));
    3.33 +  get_cache_index_at_bcp(index, bcp_offset, giant_index);
    3.34    movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
    3.35    assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
    3.36    // convert from field index to ConstantPoolCacheEntry index
    3.37 @@ -200,10 +218,10 @@
    3.38  
    3.39  void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
    3.40                                                                 Register tmp,
    3.41 -                                                               int bcp_offset) {
    3.42 -  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
    3.43 +                                                               int bcp_offset,
    3.44 +                                                               bool giant_index) {
    3.45    assert(cache != tmp, "must use different register");
    3.46 -  load_unsigned_short(tmp, Address(r13, bcp_offset));
    3.47 +  get_cache_index_at_bcp(tmp, bcp_offset, giant_index);
    3.48    assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
    3.49    // convert from field index to ConstantPoolCacheEntry index
    3.50    // and from word offset to byte offset
    3.51 @@ -1236,7 +1254,8 @@
    3.52  
    3.53  void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
    3.54                                                       Register mdp,
    3.55 -                                                     Register reg2) {
    3.56 +                                                     Register reg2,
    3.57 +                                                     bool receiver_can_be_null) {
    3.58    if (ProfileInterpreter) {
    3.59      Label profile_continue;
    3.60  
    3.61 @@ -1246,8 +1265,15 @@
    3.62      // We are making a call.  Increment the count.
    3.63      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
    3.64  
    3.65 +    Label skip_receiver_profile;
    3.66 +    if (receiver_can_be_null) {
    3.67 +      testptr(receiver, receiver);
    3.68 +      jcc(Assembler::zero, skip_receiver_profile);
    3.69 +    }
    3.70 +
    3.71      // Record the receiver type.
    3.72      record_klass_in_profile(receiver, mdp, reg2);
    3.73 +    bind(skip_receiver_profile);
    3.74  
    3.75      // The method data pointer needs to be updated to reflect the new target.
    3.76      update_mdp_by_constant(mdp,

     4.1 --- a/src/cpu/x86/vm/interp_masm_x86_64.hpp	Tue Dec 22 16:35:08 2009 -0800
     4.2 +++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp	Wed Dec 23 02:57:31 2009 -0800
     4.3 @@ -95,9 +95,10 @@
     4.4  
     4.5    void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
     4.6    void get_cache_and_index_at_bcp(Register cache, Register index,
     4.7 -                                  int bcp_offset);
     4.8 +                                  int bcp_offset, bool giant_index = false);
     4.9    void get_cache_entry_pointer_at_bcp(Register cache, Register tmp,
    4.10 -                                      int bcp_offset);
    4.11 +                                      int bcp_offset, bool giant_index = false);
    4.12 +  void get_cache_index_at_bcp(Register index, int bcp_offset, bool giant_index = false);
    4.13  
    4.14  
    4.15    void pop_ptr(Register r = rax);
    4.16 @@ -236,7 +237,8 @@
    4.17    void profile_call(Register mdp);
    4.18    void profile_final_call(Register mdp);
    4.19    void profile_virtual_call(Register receiver, Register mdp,
    4.20 -                            Register scratch2);
    4.21 +                            Register scratch2,
    4.22 +                            bool receiver_can_be_null = false);
    4.23    void profile_ret(Register return_bci, Register mdp);
    4.24    void profile_null_seen(Register mdp);
    4.25    void profile_typecheck(Register mdp, Register klass, Register scratch);

     5.1 --- a/src/cpu/x86/vm/interpreter_x86_64.cpp	Tue Dec 22 16:35:08 2009 -0800
     5.2 +++ b/src/cpu/x86/vm/interpreter_x86_64.cpp	Wed Dec 23 02:57:31 2009 -0800
     5.3 @@ -277,12 +277,11 @@
     5.4    address entry_point = __ pc();
     5.5  
     5.6    // abstract method entry
     5.7 -  // remove return address. Not really needed, since exception
     5.8 -  // handling throws away expression stack
     5.9 -  __ pop(rbx);
    5.10  
    5.11 -  // adjust stack to what a normal return would do
    5.12 -  __ mov(rsp, r13);
    5.13 +  //  pop return address, reset last_sp to NULL
    5.14 +  __ empty_expression_stack();
    5.15 +  __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
    5.16 +  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
    5.17  
    5.18    // throw exception
    5.19    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
    5.20 @@ -300,7 +299,10 @@
    5.21    if (!EnableMethodHandles) {
    5.22      return generate_abstract_entry();
    5.23    }
    5.24 -  return generate_abstract_entry(); //6815692//
    5.25 +
    5.26 +  address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm);
    5.27 +
    5.28 +  return entry_point;
    5.29  }
    5.30  
    5.31

     6.1 --- a/src/cpu/x86/vm/methodHandles_x86.cpp	Tue Dec 22 16:35:08 2009 -0800
     6.2 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Dec 23 02:57:31 2009 -0800
     6.3 @@ -448,7 +448,7 @@
     6.4                                  rbx_index, Address::times_ptr,
     6.5                                  base + vtableEntry::method_offset_in_bytes());
     6.6        Register rbx_method = rbx_temp;
     6.7 -      __ movl(rbx_method, vtable_entry_addr);
     6.8 +      __ movptr(rbx_method, vtable_entry_addr);
     6.9  
    6.10        __ verify_oop(rbx_method);
    6.11        __ jmp(rbx_method_fie);

     7.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Dec 22 16:35:08 2009 -0800
     7.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Dec 23 02:57:31 2009 -0800
     7.3 @@ -2935,6 +2935,16 @@
     7.4  
     7.5      // arraycopy stubs used by compilers
     7.6      generate_arraycopy_stubs();
     7.7 +
     7.8 +    // generic method handle stubs
     7.9 +    if (EnableMethodHandles && SystemDictionary::MethodHandle_klass() != NULL) {
    7.10 +      for (MethodHandles::EntryKind ek = MethodHandles::_EK_FIRST;
    7.11 +           ek < MethodHandles::_EK_LIMIT;
    7.12 +           ek = MethodHandles::EntryKind(1 + (int)ek)) {
    7.13 +        StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek));
    7.14 +        MethodHandles::generate_method_handle_stub(_masm, ek);
    7.15 +      }
    7.16 +    }
    7.17    }
    7.18  
    7.19   public:

     8.1 --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Tue Dec 22 16:35:08 2009 -0800
     8.2 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Wed Dec 23 02:57:31 2009 -0800
     8.3 @@ -100,21 +100,26 @@
     8.4    return entry;
     8.5  }
     8.6  
     8.7 -// Arguments are: required type in rarg1, failing object (or NULL) in rarg2
     8.8 +// Arguments are: required type at TOS+8, failing object (or NULL) at TOS+4.
     8.9  address TemplateInterpreterGenerator::generate_WrongMethodType_handler() {
    8.10    address entry = __ pc();
    8.11  
    8.12    __ pop(c_rarg2);              // failing object is at TOS
    8.13    __ pop(c_rarg1);              // required type is at TOS+8
    8.14  
    8.15 -  // expression stack must be empty before entering the VM if an
    8.16 -  // exception happened
    8.17 +  __ verify_oop(c_rarg1);
    8.18 +  __ verify_oop(c_rarg2);
    8.19 +
    8.20 +  // Various method handle types use interpreter registers as temps.
    8.21 +  __ restore_bcp();
    8.22 +  __ restore_locals();
    8.23 +
    8.24 +  // Expression stack must be empty before entering the VM for an exception.
    8.25    __ empty_expression_stack();
    8.26  
    8.27    __ call_VM(noreg,
    8.28               CAST_FROM_FN_PTR(address,
    8.29 -                              InterpreterRuntime::
    8.30 -                              throw_WrongMethodTypeException),
    8.31 +                              InterpreterRuntime::throw_WrongMethodTypeException),
    8.32               // pass required type, failing object (or NULL)
    8.33               c_rarg1, c_rarg2);
    8.34    return entry;
    8.35 @@ -182,15 +187,29 @@
    8.36    __ restore_bcp();
    8.37    __ restore_locals();
    8.38  
    8.39 -  __ get_cache_and_index_at_bcp(rbx, rcx, 1);
    8.40 +  Label L_got_cache, L_giant_index;
    8.41 +  if (EnableInvokeDynamic) {
    8.42 +    __ cmpb(Address(r13, 0), Bytecodes::_invokedynamic);
    8.43 +    __ jcc(Assembler::equal, L_giant_index);
    8.44 +  }
    8.45 +  __ get_cache_and_index_at_bcp(rbx, rcx, 1, false);
    8.46 +  __ bind(L_got_cache);
    8.47    __ movl(rbx, Address(rbx, rcx,
    8.48 -                       Address::times_8,
    8.49 +                       Address::times_ptr,
    8.50                         in_bytes(constantPoolCacheOopDesc::base_offset()) +
    8.51                         3 * wordSize));
    8.52    __ andl(rbx, 0xFF);
    8.53    if (TaggedStackInterpreter) __ shll(rbx, 1); // 2 slots per parameter.
    8.54    __ lea(rsp, Address(rsp, rbx, Address::times_8));
    8.55    __ dispatch_next(state, step);
    8.56 +
    8.57 +  // out of the main line of code...
    8.58 +  if (EnableInvokeDynamic) {
    8.59 +    __ bind(L_giant_index);
    8.60 +    __ get_cache_and_index_at_bcp(rbx, rcx, 1, true);
    8.61 +    __ jmp(L_got_cache);
    8.62 +  }
    8.63 +
    8.64    return entry;
    8.65  }
    8.66

     9.1 --- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Tue Dec 22 16:35:08 2009 -0800
     9.2 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Dec 23 02:57:31 2009 -0800
     9.3 @@ -3146,7 +3146,6 @@
     9.4      __ profile_call(rsi);
     9.5    }
     9.6  
     9.7 -  Label handle_unlinked_site;
     9.8    __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx)));
     9.9    __ null_check(rcx);
    9.10    __ prepare_to_jump_from_interpreted();

    10.1 --- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Tue Dec 22 16:35:08 2009 -0800
    10.2 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Dec 23 02:57:31 2009 -0800
    10.3 @@ -203,18 +203,15 @@
    10.4      __ jcc(Assembler::notEqual, fast_patch);
    10.5      __ get_method(scratch);
    10.6      // Let breakpoint table handling rewrite to quicker bytecode
    10.7 -    __ call_VM(noreg,
    10.8 -               CAST_FROM_FN_PTR(address,
    10.9 -                                InterpreterRuntime::set_original_bytecode_at),
   10.10 -               scratch, r13, bc);
   10.11 +    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, r13, bc);
   10.12  #ifndef ASSERT
   10.13      __ jmpb(patch_done);
   10.14 +#else
   10.15 +    __ jmp(patch_done);
   10.16 +#endif
   10.17      __ bind(fast_patch);
   10.18    }
   10.19 -#else
   10.20 -    __ jmp(patch_done);
   10.21 -    __ bind(fast_patch);
   10.22 -  }
   10.23 +#ifdef ASSERT
   10.24    Label okay;
   10.25    __ load_unsigned_byte(scratch, at_bcp(0));
   10.26    __ cmpl(scratch, (int) Bytecodes::java_code(bytecode));
   10.27 @@ -2054,26 +2051,28 @@
   10.28    }
   10.29  }
   10.30  
   10.31 -void TemplateTable::resolve_cache_and_index(int byte_no,
   10.32 -                                            Register Rcache,
   10.33 -                                            Register index) {
   10.34 +void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) {
   10.35    assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
   10.36 +  bool is_invokedynamic = (bytecode() == Bytecodes::_invokedynamic);
   10.37  
   10.38    const Register temp = rbx;
   10.39    assert_different_registers(Rcache, index, temp);
   10.40  
   10.41    const int shift_count = (1 + byte_no) * BitsPerByte;
   10.42    Label resolved;
   10.43 -  __ get_cache_and_index_at_bcp(Rcache, index, 1);
   10.44 -  __ movl(temp, Address(Rcache,
   10.45 -                        index, Address::times_8,
   10.46 -                        constantPoolCacheOopDesc::base_offset() +
   10.47 -                        ConstantPoolCacheEntry::indices_offset()));
   10.48 -  __ shrl(temp, shift_count);
   10.49 -  // have we resolved this bytecode?
   10.50 -  __ andl(temp, 0xFF);
   10.51 -  __ cmpl(temp, (int) bytecode());
   10.52 -  __ jcc(Assembler::equal, resolved);
   10.53 +  __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
   10.54 +  if (is_invokedynamic) {
   10.55 +    // we are resolved if the f1 field contains a non-null CallSite object
   10.56 +    __ cmpptr(Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()), (int32_t) NULL_WORD);
   10.57 +    __ jcc(Assembler::notEqual, resolved);
   10.58 +  } else {
   10.59 +    __ movl(temp, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
   10.60 +    __ shrl(temp, shift_count);
   10.61 +    // have we resolved this bytecode?
   10.62 +    __ andl(temp, 0xFF);
   10.63 +    __ cmpl(temp, (int) bytecode());
   10.64 +    __ jcc(Assembler::equal, resolved);
   10.65 +  }
   10.66  
   10.67    // resolve first time through
   10.68    address entry;
   10.69 @@ -2090,6 +2089,9 @@
   10.70    case Bytecodes::_invokeinterface:
   10.71      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
   10.72      break;
   10.73 +  case Bytecodes::_invokedynamic:
   10.74 +    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
   10.75 +    break;
   10.76    default:
   10.77      ShouldNotReachHere();
   10.78      break;
   10.79 @@ -2098,7 +2100,7 @@
   10.80    __ call_VM(noreg, entry, temp);
   10.81  
   10.82    // Update registers with resolved info
   10.83 -  __ get_cache_and_index_at_bcp(Rcache, index, 1);
   10.84 +  __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
   10.85    __ bind(resolved);
   10.86  }
   10.87  
   10.88 @@ -2832,15 +2834,14 @@
   10.89    ShouldNotReachHere();
   10.90  }
   10.91  
   10.92 -void TemplateTable::prepare_invoke(Register method,
   10.93 -                                   Register index,
   10.94 -                                   int byte_no,
   10.95 -                                   Bytecodes::Code code) {
   10.96 +void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) {
   10.97    // determine flags
   10.98 +  Bytecodes::Code code = bytecode();
   10.99    const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  10.100 +  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  10.101    const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  10.102    const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  10.103 -  const bool load_receiver       = code != Bytecodes::_invokestatic;
  10.104 +  const bool load_receiver      = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic);
  10.105    const bool receiver_null_check = is_invokespecial;
  10.106    const bool save_flags = is_invokeinterface || is_invokevirtual;
  10.107    // setup registers & access constant pool cache
  10.108 @@ -2858,9 +2859,13 @@
  10.109      __ movl(recv, flags);
  10.110      __ andl(recv, 0xFF);
  10.111      if (TaggedStackInterpreter) __ shll(recv, 1);  // index*2
  10.112 -    __ movptr(recv, Address(rsp, recv, Address::times_8,
  10.113 -                                 -Interpreter::expr_offset_in_bytes(1)));
  10.114 -    __ verify_oop(recv);
  10.115 +    Address recv_addr(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1));
  10.116 +    if (is_invokedynamic) {
  10.117 +      __ lea(recv, recv_addr);
  10.118 +    } else {
  10.119 +      __ movptr(recv, recv_addr);
  10.120 +      __ verify_oop(recv);
  10.121 +    }
  10.122    }
  10.123  
  10.124    // do null check if needed
  10.125 @@ -2878,10 +2883,14 @@
  10.126    ConstantPoolCacheEntry::verify_tosBits();
  10.127    // load return address
  10.128    {
  10.129 -    ExternalAddress return_5((address)Interpreter::return_5_addrs_by_index_table());
  10.130 -    ExternalAddress return_3((address)Interpreter::return_3_addrs_by_index_table());
  10.131 -    __ lea(rscratch1, (is_invokeinterface ? return_5 : return_3));
  10.132 -    __ movptr(flags, Address(rscratch1, flags, Address::times_8));
  10.133 +    address table_addr;
  10.134 +    if (is_invokeinterface || is_invokedynamic)
  10.135 +      table_addr = (address)Interpreter::return_5_addrs_by_index_table();
  10.136 +    else
  10.137 +      table_addr = (address)Interpreter::return_3_addrs_by_index_table();
  10.138 +    ExternalAddress table(table_addr);
  10.139 +    __ lea(rscratch1, table);
  10.140 +    __ movptr(flags, Address(rscratch1, flags, Address::times_ptr));
  10.141    }
  10.142  
  10.143    // push return address
  10.144 @@ -2947,7 +2956,7 @@
  10.145  
  10.146  void TemplateTable::invokevirtual(int byte_no) {
  10.147    transition(vtos, vtos);
  10.148 -  prepare_invoke(rbx, noreg, byte_no, bytecode());
  10.149 +  prepare_invoke(rbx, noreg, byte_no);
  10.150  
  10.151    // rbx: index
  10.152    // rcx: receiver
  10.153 @@ -2959,7 +2968,7 @@
  10.154  
  10.155  void TemplateTable::invokespecial(int byte_no) {
  10.156    transition(vtos, vtos);
  10.157 -  prepare_invoke(rbx, noreg, byte_no, bytecode());
  10.158 +  prepare_invoke(rbx, noreg, byte_no);
  10.159    // do the call
  10.160    __ verify_oop(rbx);
  10.161    __ profile_call(rax);
  10.162 @@ -2969,7 +2978,7 @@
  10.163  
  10.164  void TemplateTable::invokestatic(int byte_no) {
  10.165    transition(vtos, vtos);
  10.166 -  prepare_invoke(rbx, noreg, byte_no, bytecode());
  10.167 +  prepare_invoke(rbx, noreg, byte_no);
  10.168    // do the call
  10.169    __ verify_oop(rbx);
  10.170    __ profile_call(rax);
  10.171 @@ -2983,7 +2992,7 @@
  10.172  
  10.173  void TemplateTable::invokeinterface(int byte_no) {
  10.174    transition(vtos, vtos);
  10.175 -  prepare_invoke(rax, rbx, byte_no, bytecode());
  10.176 +  prepare_invoke(rax, rbx, byte_no);
  10.177  
  10.178    // rax: Interface
  10.179    // rbx: index
  10.180 @@ -3072,7 +3081,24 @@
  10.181      return;
  10.182    }
  10.183  
  10.184 -  __ stop("invokedynamic NYI");//6815692//
  10.185 +  prepare_invoke(rax, rbx, byte_no);
  10.186 +
  10.187 +  // rax: CallSite object (f1)
  10.188 +  // rbx: unused (f2)
  10.189 +  // rcx: receiver address
  10.190 +  // rdx: flags (unused)
  10.191 +
  10.192 +  if (ProfileInterpreter) {
  10.193 +    Label L;
  10.194 +    // %%% should make a type profile for any invokedynamic that takes a ref argument
  10.195 +    // profile this call
  10.196 +    __ profile_call(r13);
  10.197 +  }
  10.198 +
  10.199 +  __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx)));
  10.200 +  __ null_check(rcx);
  10.201 +  __ prepare_to_jump_from_interpreted();
  10.202 +  __ jump_to_method_handle_entry(rcx, rdx);
  10.203  }
  10.204  
  10.205

    11.1 --- a/src/cpu/x86/vm/templateTable_x86_64.hpp	Tue Dec 22 16:35:08 2009 -0800
    11.2 +++ b/src/cpu/x86/vm/templateTable_x86_64.hpp	Wed Dec 23 02:57:31 2009 -0800
    11.3 @@ -22,8 +22,7 @@
    11.4   *
    11.5   */
    11.6  
    11.7 -  static void prepare_invoke(Register method, Register index, int byte_no,
    11.8 -                             Bytecodes::Code code);
    11.9 +  static void prepare_invoke(Register method, Register index, int byte_no);
   11.10    static void invokevirtual_helper(Register index, Register recv,
   11.11                                     Register flags);
   11.12    static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);

    12.1 --- a/src/share/vm/classfile/classFileParser.cpp	Tue Dec 22 16:35:08 2009 -0800
    12.2 +++ b/src/share/vm/classfile/classFileParser.cpp	Wed Dec 23 02:57:31 2009 -0800
    12.3 @@ -2511,23 +2511,12 @@
    12.4        fac_ptr->nonstatic_byte_count -= 1;
    12.5        (*fields_ptr)->ushort_at_put(i + instanceKlass::signature_index_offset,
    12.6                                     word_sig_index);
    12.7 -      if (wordSize == jintSize) {
    12.8 -        fac_ptr->nonstatic_word_count += 1;
    12.9 -      } else {
   12.10 -        fac_ptr->nonstatic_double_count += 1;
   12.11 -      }
   12.12 -
   12.13 -      FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i+4);
   12.14 +      fac_ptr->nonstatic_word_count += 1;
   12.15 +
   12.16 +      FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i + instanceKlass::low_offset);
   12.17        assert(atype == NONSTATIC_BYTE, "");
   12.18        FieldAllocationType new_atype = NONSTATIC_WORD;
   12.19 -      if (wordSize > jintSize) {
   12.20 -        if (Universe::field_type_should_be_aligned(T_LONG)) {
   12.21 -          atype = NONSTATIC_ALIGNED_DOUBLE;
   12.22 -        } else {
   12.23 -          atype = NONSTATIC_DOUBLE;
   12.24 -        }
   12.25 -      }
   12.26 -      (*fields_ptr)->ushort_at_put(i+4, new_atype);
   12.27 +      (*fields_ptr)->ushort_at_put(i + instanceKlass::low_offset, new_atype);
   12.28  
   12.29        found_vmentry = true;
   12.30        break;
   12.31 @@ -3085,7 +3074,7 @@
   12.32      int len = fields->length();
   12.33      for (int i = 0; i < len; i += instanceKlass::next_offset) {
   12.34        int real_offset;
   12.35 -      FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i+4);
   12.36 +      FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i + instanceKlass::low_offset);
   12.37        switch (atype) {
   12.38          case STATIC_OOP:
   12.39            real_offset = next_static_oop_offset;
   12.40 @@ -3173,8 +3162,8 @@
   12.41          default:
   12.42            ShouldNotReachHere();
   12.43        }
   12.44 -      fields->short_at_put(i+4, extract_low_short_from_int(real_offset) );
   12.45 -      fields->short_at_put(i+5, extract_high_short_from_int(real_offset) );
   12.46 +      fields->short_at_put(i + instanceKlass::low_offset,  extract_low_short_from_int(real_offset));
   12.47 +      fields->short_at_put(i + instanceKlass::high_offset, extract_high_short_from_int(real_offset));
   12.48      }
   12.49  
   12.50      // Size of instances

    13.1 --- a/src/share/vm/code/nmethod.cpp	Tue Dec 22 16:35:08 2009 -0800
    13.2 +++ b/src/share/vm/code/nmethod.cpp	Wed Dec 23 02:57:31 2009 -0800
    13.3 @@ -414,9 +414,8 @@
    13.4  }
    13.5  
    13.6  const char* nmethod::compile_kind() const {
    13.7 -  if (method() == NULL)    return "unloaded";
    13.8 -  if (is_native_method())  return "c2n";
    13.9    if (is_osr_method())     return "osr";
   13.10 +  if (method() != NULL && is_native_method())  return "c2n";
   13.11    return NULL;
   13.12  }
   13.13  
   13.14 @@ -1127,6 +1126,9 @@
   13.15    }
   13.16    flags.state = unloaded;
   13.17  
   13.18 +  // Log the unloading.
   13.19 +  log_state_change();
   13.20 +
   13.21    // The methodOop is gone at this point
   13.22    assert(_method == NULL, "Tautology");
   13.23  
   13.24 @@ -1137,8 +1139,6 @@
   13.25  
   13.26  void nmethod::invalidate_osr_method() {
   13.27    assert(_entry_bci != InvocationEntryBci, "wrong kind of nmethod");
   13.28 -  if (_entry_bci != InvalidOSREntryBci)
   13.29 -    inc_decompile_count();
   13.30    // Remove from list of active nmethods
   13.31    if (method() != NULL)
   13.32      instanceKlass::cast(method()->method_holder())->remove_osr_nmethod(this);
   13.33 @@ -1146,59 +1146,63 @@
   13.34    _entry_bci = InvalidOSREntryBci;
   13.35  }
   13.36  
   13.37 -void nmethod::log_state_change(int state) const {
   13.38 +void nmethod::log_state_change() const {
   13.39    if (LogCompilation) {
   13.40      if (xtty != NULL) {
   13.41        ttyLocker ttyl;  // keep the following output all in one block
   13.42 -      xtty->begin_elem("make_not_entrant %sthread='" UINTX_FORMAT "'",
   13.43 -                       (state == zombie ? "zombie='1' " : ""),
   13.44 -                       os::current_thread_id());
   13.45 +      if (flags.state == unloaded) {
   13.46 +        xtty->begin_elem("make_unloaded thread='" UINTX_FORMAT "'",
   13.47 +                         os::current_thread_id());
   13.48 +      } else {
   13.49 +        xtty->begin_elem("make_not_entrant thread='" UINTX_FORMAT "'%s",
   13.50 +                         os::current_thread_id(),
   13.51 +                         (flags.state == zombie ? " zombie='1'" : ""));
   13.52 +      }
   13.53        log_identity(xtty);
   13.54        xtty->stamp();
   13.55        xtty->end_elem();
   13.56      }
   13.57    }
   13.58 -  if (PrintCompilation) {
   13.59 -    print_on(tty, state == zombie ? "made zombie " : "made not entrant ");
   13.60 +  if (PrintCompilation && flags.state != unloaded) {
   13.61 +    print_on(tty, flags.state == zombie ? "made zombie " : "made not entrant ");
   13.62      tty->cr();
   13.63    }
   13.64  }
   13.65  
   13.66  // Common functionality for both make_not_entrant and make_zombie
   13.67 -void nmethod::make_not_entrant_or_zombie(int state) {
   13.68 +bool nmethod::make_not_entrant_or_zombie(int state) {
   13.69    assert(state == zombie || state == not_entrant, "must be zombie or not_entrant");
   13.70  
   13.71 -  // Code for an on-stack-replacement nmethod is removed when a class gets unloaded.
   13.72 -  // They never become zombie/non-entrant, so the nmethod sweeper will never remove
   13.73 -  // them. Instead the entry_bci is set to InvalidOSREntryBci, so the osr nmethod
   13.74 -  // will never be used anymore. That the nmethods only gets removed when class unloading
   13.75 -  // happens, make life much simpler, since the nmethods are not just going to disappear
   13.76 -  // out of the blue.
   13.77 -  if (is_osr_method()) {
   13.78 -    if (osr_entry_bci() != InvalidOSREntryBci) {
   13.79 -      // only log this once
   13.80 -      log_state_change(state);
   13.81 -    }
   13.82 -    invalidate_osr_method();
   13.83 -    return;
   13.84 +  // If the method is already zombie there is nothing to do
   13.85 +  if (is_zombie()) {
   13.86 +    return false;
   13.87    }
   13.88  
   13.89 -  // If the method is already zombie or set to the state we want, nothing to do
   13.90 -  if (is_zombie() || (state == not_entrant && is_not_entrant())) {
   13.91 -    return;
   13.92 -  }
   13.93 -
   13.94 -  log_state_change(state);
   13.95 -
   13.96    // Make sure the nmethod is not flushed in case of a safepoint in code below.
   13.97    nmethodLocker nml(this);
   13.98  
   13.99    {
  13.100 +    // invalidate osr nmethod before acquiring the patching lock since
  13.101 +    // they both acquire leaf locks and we don't want a deadlock.
  13.102 +    // This logic is equivalent to the logic below for patching the
  13.103 +    // verified entry point of regular methods.
  13.104 +    if (is_osr_method()) {
  13.105 +      // this effectively makes the osr nmethod not entrant
  13.106 +      invalidate_osr_method();
  13.107 +    }
  13.108 +
  13.109      // Enter critical section.  Does not block for safepoint.
  13.110      MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
  13.111 +
  13.112 +    if (flags.state == state) {
  13.113 +      // another thread already performed this transition so nothing
  13.114 +      // to do, but return false to indicate this.
  13.115 +      return false;
  13.116 +    }
  13.117 +
  13.118      // The caller can be calling the method statically or through an inline
  13.119      // cache call.
  13.120 -    if (!is_not_entrant()) {
  13.121 +    if (!is_osr_method() && !is_not_entrant()) {
  13.122        NativeJump::patch_verified_entry(entry_point(), verified_entry_point(),
  13.123                    SharedRuntime::get_handle_wrong_method_stub());
  13.124        assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, "");
  13.125 @@ -1217,6 +1221,10 @@
  13.126  
  13.127      // Change state
  13.128      flags.state = state;
  13.129 +
  13.130 +    // Log the transition once
  13.131 +    log_state_change();
  13.132 +
  13.133    } // leave critical region under Patching_lock
  13.134  
  13.135    if (state == not_entrant) {
  13.136 @@ -1240,7 +1248,6 @@
  13.137    // It's a true state change, so mark the method as decompiled.
  13.138    inc_decompile_count();
  13.139  
  13.140 -
  13.141    // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event
  13.142    // and it hasn't already been reported for this nmethod then report it now.
  13.143    // (the event may have been reported earilier if the GC marked it for unloading).
  13.144 @@ -1268,7 +1275,7 @@
  13.145  
  13.146    // Check whether method got unloaded at a safepoint before this,
  13.147    // if so we can skip the flushing steps below
  13.148 -  if (method() == NULL) return;
  13.149 +  if (method() == NULL) return true;
  13.150  
  13.151    // Remove nmethod from method.
  13.152    // We need to check if both the _code and _from_compiled_code_entry_point
  13.153 @@ -1282,6 +1289,8 @@
  13.154      HandleMark hm;
  13.155      method()->clear_code();
  13.156    }
  13.157 +
  13.158 +  return true;
  13.159  }
  13.160  
  13.161

    14.1 --- a/src/share/vm/code/nmethod.hpp	Tue Dec 22 16:35:08 2009 -0800
    14.2 +++ b/src/share/vm/code/nmethod.hpp	Wed Dec 23 02:57:31 2009 -0800
    14.3 @@ -252,7 +252,9 @@
    14.4    void* operator new(size_t size, int nmethod_size);
    14.5  
    14.6    const char* reloc_string_for(u_char* begin, u_char* end);
    14.7 -  void make_not_entrant_or_zombie(int state);
    14.8 +  // Returns true if this thread changed the state of the nmethod or
    14.9 +  // false if another thread performed the transition.
   14.10 +  bool make_not_entrant_or_zombie(int state);
   14.11    void inc_decompile_count();
   14.12  
   14.13    // used to check that writes to nmFlags are done consistently.
   14.14 @@ -375,10 +377,12 @@
   14.15    bool  is_zombie() const                         { return flags.state == zombie; }
   14.16    bool  is_unloaded() const                       { return flags.state == unloaded;   }
   14.17  
   14.18 -  // Make the nmethod non entrant. The nmethod will continue to be alive.
   14.19 -  // It is used when an uncommon trap happens.
   14.20 -  void  make_not_entrant()                        { make_not_entrant_or_zombie(not_entrant); }
   14.21 -  void  make_zombie()                             { make_not_entrant_or_zombie(zombie); }
   14.22 +  // Make the nmethod non entrant. The nmethod will continue to be
   14.23 +  // alive.  It is used when an uncommon trap happens.  Returns true
   14.24 +  // if this thread changed the state of the nmethod or false if
   14.25 +  // another thread performed the transition.
   14.26 +  bool  make_not_entrant()                        { return make_not_entrant_or_zombie(not_entrant); }
   14.27 +  bool  make_zombie()                             { return make_not_entrant_or_zombie(zombie); }
   14.28  
   14.29    // used by jvmti to track if the unload event has been reported
   14.30    bool  unload_reported()                         { return _unload_reported; }
   14.31 @@ -563,7 +567,7 @@
   14.32    // Logging
   14.33    void log_identity(xmlStream* log) const;
   14.34    void log_new_nmethod() const;
   14.35 -  void log_state_change(int state) const;
   14.36 +  void log_state_change() const;
   14.37  
   14.38    // Prints a comment for one native instruction (reloc info, pc desc)
   14.39    void print_code_comment_on(outputStream* st, int column, address begin, address end);

    15.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue Dec 22 16:35:08 2009 -0800
    15.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Dec 23 02:57:31 2009 -0800
    15.3 @@ -42,28 +42,49 @@
    15.4    _n_periods(0),
    15.5    _threads(NULL), _n_threads(0)
    15.6  {
    15.7 -  if (G1ConcRefine) {
    15.8 -    _n_threads = (int)thread_num();
    15.9 -    if (_n_threads > 0) {
   15.10 -      _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
   15.11 -      int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
   15.12 -      ConcurrentG1RefineThread *next = NULL;
   15.13 -      for (int i = _n_threads - 1; i >= 0; i--) {
   15.14 -        ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
   15.15 -        assert(t != NULL, "Conc refine should have been created");
   15.16 -        assert(t->cg1r() == this, "Conc refine thread should refer to this");
   15.17 -        _threads[i] = t;
   15.18 -        next = t;
   15.19 -      }
   15.20 -    }
   15.21 +
   15.22 +  // Ergomonically select initial concurrent refinement parameters
   15.23 +  if (FLAG_IS_DEFAULT(G1ConcRefineGreenZone)) {
   15.24 +    FLAG_SET_DEFAULT(G1ConcRefineGreenZone, MAX2<int>(ParallelGCThreads, 1));
   15.25 +  }
   15.26 +  set_green_zone(G1ConcRefineGreenZone);
   15.27 +
   15.28 +  if (FLAG_IS_DEFAULT(G1ConcRefineYellowZone)) {
   15.29 +    FLAG_SET_DEFAULT(G1ConcRefineYellowZone, green_zone() * 3);
   15.30 +  }
   15.31 +  set_yellow_zone(MAX2<int>(G1ConcRefineYellowZone, green_zone()));
   15.32 +
   15.33 +  if (FLAG_IS_DEFAULT(G1ConcRefineRedZone)) {
   15.34 +    FLAG_SET_DEFAULT(G1ConcRefineRedZone, yellow_zone() * 2);
   15.35 +  }
   15.36 +  set_red_zone(MAX2<int>(G1ConcRefineRedZone, yellow_zone()));
   15.37 +  _n_worker_threads = thread_num();
   15.38 +  // We need one extra thread to do the young gen rset size sampling.
   15.39 +  _n_threads = _n_worker_threads + 1;
   15.40 +  reset_threshold_step();
   15.41 +
   15.42 +  _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
   15.43 +  int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
   15.44 +  ConcurrentG1RefineThread *next = NULL;
   15.45 +  for (int i = _n_threads - 1; i >= 0; i--) {
   15.46 +    ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
   15.47 +    assert(t != NULL, "Conc refine should have been created");
   15.48 +    assert(t->cg1r() == this, "Conc refine thread should refer to this");
   15.49 +    _threads[i] = t;
   15.50 +    next = t;
   15.51    }
   15.52  }
   15.53  
   15.54 -size_t ConcurrentG1Refine::thread_num() {
   15.55 -  if (G1ConcRefine) {
   15.56 -    return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
   15.57 +void ConcurrentG1Refine::reset_threshold_step() {
   15.58 +  if (FLAG_IS_DEFAULT(G1ConcRefineThresholdStep)) {
   15.59 +    _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
   15.60 +  } else {
   15.61 +    _thread_threshold_step = G1ConcRefineThresholdStep;
   15.62    }
   15.63 -  return 0;
   15.64 +}
   15.65 +
   15.66 +int ConcurrentG1Refine::thread_num() {
   15.67 +  return MAX2<int>((G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads, 1);
   15.68  }
   15.69  
   15.70  void ConcurrentG1Refine::init() {
   15.71 @@ -123,6 +144,15 @@
   15.72    }
   15.73  }
   15.74  
   15.75 +void ConcurrentG1Refine::reinitialize_threads() {
   15.76 +  reset_threshold_step();
   15.77 +  if (_threads != NULL) {
   15.78 +    for (int i = 0; i < _n_threads; i++) {
   15.79 +      _threads[i]->initialize();
   15.80 +    }
   15.81 +  }
   15.82 +}
   15.83 +
   15.84  ConcurrentG1Refine::~ConcurrentG1Refine() {
   15.85    if (G1ConcRSLogCacheSize > 0) {
   15.86      assert(_card_counts != NULL, "Logic");
   15.87 @@ -384,4 +414,3 @@
   15.88      st->cr();
   15.89    }
   15.90  }
   15.91 -

    16.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Tue Dec 22 16:35:08 2009 -0800
    16.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Dec 23 02:57:31 2009 -0800
    16.3 @@ -29,6 +29,31 @@
    16.4  class ConcurrentG1Refine: public CHeapObj {
    16.5    ConcurrentG1RefineThread** _threads;
    16.6    int _n_threads;
    16.7 +  int _n_worker_threads;
    16.8 + /*
    16.9 +  * The value of the update buffer queue length falls into one of 3 zones:
   16.10 +  * green, yellow, red. If the value is in [0, green) nothing is
   16.11 +  * done, the buffers are left unprocessed to enable the caching effect of the
   16.12 +  * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
   16.13 +  * threads are gradually activated. In [yellow, red) all threads are
   16.14 +  * running. If the length becomes red (max queue length) the mutators start
   16.15 +  * processing the buffers.
   16.16 +  *
   16.17 +  * There are some interesting cases (with G1AdaptiveConcRefine turned off):
   16.18 +  * 1) green = yellow = red = 0. In this case the mutator will process all
   16.19 +  *    buffers. Except for those that are created by the deferred updates
   16.20 +  *    machinery during a collection.
   16.21 +  * 2) green = 0. Means no caching. Can be a good way to minimize the
   16.22 +  *    amount of time spent updating rsets during a collection.
   16.23 +  */
   16.24 +  int _green_zone;
   16.25 +  int _yellow_zone;
   16.26 +  int _red_zone;
   16.27 +
   16.28 +  int _thread_threshold_step;
   16.29 +
   16.30 +  // Reset the threshold step value based of the current zone boundaries.
   16.31 +  void reset_threshold_step();
   16.32  
   16.33    // The cache for card refinement.
   16.34    bool   _use_cache;
   16.35 @@ -147,6 +172,8 @@
   16.36    void init(); // Accomplish some initialization that has to wait.
   16.37    void stop();
   16.38  
   16.39 +  void reinitialize_threads();
   16.40 +
   16.41    // Iterate over the conc refine threads
   16.42    void threads_do(ThreadClosure *tc);
   16.43  
   16.44 @@ -178,7 +205,20 @@
   16.45  
   16.46    void clear_and_record_card_counts();
   16.47  
   16.48 -  static size_t thread_num();
   16.49 +  static int thread_num();
   16.50  
   16.51    void print_worker_threads_on(outputStream* st) const;
   16.52 +
   16.53 +  void set_green_zone(int x)  { _green_zone = x;  }
   16.54 +  void set_yellow_zone(int x) { _yellow_zone = x; }
   16.55 +  void set_red_zone(int x)    { _red_zone = x;    }
   16.56 +
   16.57 +  int green_zone() const      { return _green_zone;  }
   16.58 +  int yellow_zone() const     { return _yellow_zone; }
   16.59 +  int red_zone() const        { return _red_zone;    }
   16.60 +
   16.61 +  int total_thread_num() const  { return _n_threads;        }
   16.62 +  int worker_thread_num() const { return _n_worker_threads; }
   16.63 +
   16.64 +  int thread_threshold_step() const { return _thread_threshold_step; }
   16.65  };

    17.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Tue Dec 22 16:35:08 2009 -0800
    17.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Wed Dec 23 02:57:31 2009 -0800
    17.3 @@ -25,10 +25,6 @@
    17.4  #include "incls/_precompiled.incl"
    17.5  #include "incls/_concurrentG1RefineThread.cpp.incl"
    17.6  
    17.7 -// ======= Concurrent Mark Thread ========
    17.8 -
    17.9 -// The CM thread is created when the G1 garbage collector is used
   17.10 -
   17.11  ConcurrentG1RefineThread::
   17.12  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
   17.13                           int worker_id_offset, int worker_id) :
   17.14 @@ -37,19 +33,42 @@
   17.15    _worker_id(worker_id),
   17.16    _active(false),
   17.17    _next(next),
   17.18 +  _monitor(NULL),
   17.19    _cg1r(cg1r),
   17.20 -  _vtime_accum(0.0),
   17.21 -  _interval_ms(5.0)
   17.22 +  _vtime_accum(0.0)
   17.23  {
   17.24 +
   17.25 +  // Each thread has its own monitor. The i-th thread is responsible for signalling
   17.26 +  // to thread i+1 if the number of buffers in the queue exceeds a threashold for this
   17.27 +  // thread. Monitors are also used to wake up the threads during termination.
   17.28 +  // The 0th worker in notified by mutator threads and has a special monitor.
   17.29 +  // The last worker is used for young gen rset size sampling.
   17.30 +  if (worker_id > 0) {
   17.31 +    _monitor = new Monitor(Mutex::nonleaf, "Refinement monitor", true);
   17.32 +  } else {
   17.33 +    _monitor = DirtyCardQ_CBL_mon;
   17.34 +  }
   17.35 +  initialize();
   17.36    create_and_start();
   17.37  }
   17.38  
   17.39 +void ConcurrentG1RefineThread::initialize() {
   17.40 +  if (_worker_id < cg1r()->worker_thread_num()) {
   17.41 +    // Current thread activation threshold
   17.42 +    _threshold = MIN2<int>(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(),
   17.43 +                           cg1r()->yellow_zone());
   17.44 +    // A thread deactivates once the number of buffer reached a deactivation threshold
   17.45 +    _deactivation_threshold = MAX2<int>(_threshold - cg1r()->thread_threshold_step(), cg1r()->green_zone());
   17.46 +  } else {
   17.47 +    set_active(true);
   17.48 +  }
   17.49 +}
   17.50 +
   17.51  void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
   17.52    G1CollectedHeap* g1h = G1CollectedHeap::heap();
   17.53    G1CollectorPolicy* g1p = g1h->g1_policy();
   17.54    if (g1p->adaptive_young_list_length()) {
   17.55      int regions_visited = 0;
   17.56 -
   17.57      g1h->young_list_rs_length_sampling_init();
   17.58      while (g1h->young_list_rs_length_sampling_more()) {
   17.59        g1h->young_list_rs_length_sampling_next();
   17.60 @@ -70,99 +89,121 @@
   17.61    }
   17.62  }
   17.63  
   17.64 +void ConcurrentG1RefineThread::run_young_rs_sampling() {
   17.65 +  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   17.66 +  _vtime_start = os::elapsedVTime();
   17.67 +  while(!_should_terminate) {
   17.68 +    _sts.join();
   17.69 +    sample_young_list_rs_lengths();
   17.70 +    _sts.leave();
   17.71 +
   17.72 +    if (os::supports_vtime()) {
   17.73 +      _vtime_accum = (os::elapsedVTime() - _vtime_start);
   17.74 +    } else {
   17.75 +      _vtime_accum = 0.0;
   17.76 +    }
   17.77 +
   17.78 +    MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
   17.79 +    if (_should_terminate) {
   17.80 +      break;
   17.81 +    }
   17.82 +    _monitor->wait(Mutex::_no_safepoint_check_flag, G1ConcRefineServiceInterval);
   17.83 +  }
   17.84 +}
   17.85 +
   17.86 +void ConcurrentG1RefineThread::wait_for_completed_buffers() {
   17.87 +  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   17.88 +  MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
   17.89 +  while (!_should_terminate && !is_active()) {
   17.90 +    _monitor->wait(Mutex::_no_safepoint_check_flag);
   17.91 +  }
   17.92 +}
   17.93 +
   17.94 +bool ConcurrentG1RefineThread::is_active() {
   17.95 +  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   17.96 +  return _worker_id > 0 ? _active : dcqs.process_completed_buffers();
   17.97 +}
   17.98 +
   17.99 +void ConcurrentG1RefineThread::activate() {
  17.100 +  MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
  17.101 +  if (_worker_id > 0) {
  17.102 +    if (G1TraceConcurrentRefinement) {
  17.103 +      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
  17.104 +      gclog_or_tty->print_cr("G1-Refine-activated worker %d, on threshold %d, current %d",
  17.105 +                             _worker_id, _threshold, (int)dcqs.completed_buffers_num());
  17.106 +    }
  17.107 +    set_active(true);
  17.108 +  } else {
  17.109 +    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
  17.110 +    dcqs.set_process_completed(true);
  17.111 +  }
  17.112 +  _monitor->notify();
  17.113 +}
  17.114 +
  17.115 +void ConcurrentG1RefineThread::deactivate() {
  17.116 +  MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
  17.117 +  if (_worker_id > 0) {
  17.118 +    if (G1TraceConcurrentRefinement) {
  17.119 +      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
  17.120 +      gclog_or_tty->print_cr("G1-Refine-deactivated worker %d, off threshold %d, current %d",
  17.121 +                             _worker_id, _deactivation_threshold, (int)dcqs.completed_buffers_num());
  17.122 +    }
  17.123 +    set_active(false);
  17.124 +  } else {
  17.125 +    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
  17.126 +    dcqs.set_process_completed(false);
  17.127 +  }
  17.128 +}
  17.129 +
  17.130  void ConcurrentG1RefineThread::run() {
  17.131    initialize_in_thread();
  17.132 -  _vtime_start = os::elapsedVTime();
  17.133    wait_for_universe_init();
  17.134  
  17.135 +  if (_worker_id >= cg1r()->worker_thread_num()) {
  17.136 +    run_young_rs_sampling();
  17.137 +    terminate();
  17.138 +  }
  17.139 +
  17.140 +  _vtime_start = os::elapsedVTime();
  17.141    while (!_should_terminate) {
  17.142      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
  17.143 -    // Wait for completed log buffers to exist.
  17.144 -    {
  17.145 -      MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
  17.146 -      while (((_worker_id == 0 && !dcqs.process_completed_buffers()) ||
  17.147 -              (_worker_id > 0 && !is_active())) &&
  17.148 -             !_should_terminate) {
  17.149 -         DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
  17.150 -      }
  17.151 +
  17.152 +    // Wait for work
  17.153 +    wait_for_completed_buffers();
  17.154 +
  17.155 +    if (_should_terminate) {
  17.156 +      break;
  17.157      }
  17.158  
  17.159 -    if (_should_terminate) {
  17.160 -      return;
  17.161 -    }
  17.162 +    _sts.join();
  17.163  
  17.164 -    // Now we take them off (this doesn't hold locks while it applies
  17.165 -    // closures.)  (If we did a full collection, then we'll do a full
  17.166 -    // traversal.
  17.167 -    _sts.join();
  17.168 -    int n_logs = 0;
  17.169 -    int lower_limit = 0;
  17.170 -    double start_vtime_sec; // only used when G1SmoothConcRefine is on
  17.171 -    int prev_buffer_num; // only used when G1SmoothConcRefine is on
  17.172 -    // This thread activation threshold
  17.173 -    int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id;
  17.174 -    // Next thread activation threshold
  17.175 -    int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold;
  17.176 -    int deactivation_threshold = MAX2<int>(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0);
  17.177 +    do {
  17.178 +      int curr_buffer_num = (int)dcqs.completed_buffers_num();
  17.179 +      // If the number of the buffers falls down into the yellow zone,
  17.180 +      // that means that the transition period after the evacuation pause has ended.
  17.181 +      if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) {
  17.182 +        dcqs.set_completed_queue_padding(0);
  17.183 +      }
  17.184  
  17.185 -    if (G1SmoothConcRefine) {
  17.186 -      lower_limit = 0;
  17.187 -      start_vtime_sec = os::elapsedVTime();
  17.188 -      prev_buffer_num = (int) dcqs.completed_buffers_num();
  17.189 -    } else {
  17.190 -      lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now.
  17.191 -    }
  17.192 -    while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) {
  17.193 -      double end_vtime_sec;
  17.194 -      double elapsed_vtime_sec;
  17.195 -      int elapsed_vtime_ms;
  17.196 -      int curr_buffer_num = (int) dcqs.completed_buffers_num();
  17.197 -
  17.198 -      if (G1SmoothConcRefine) {
  17.199 -        end_vtime_sec = os::elapsedVTime();
  17.200 -        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  17.201 -        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
  17.202 -
  17.203 -        if (curr_buffer_num > prev_buffer_num ||
  17.204 -            curr_buffer_num > next_threshold) {
  17.205 -          decreaseInterval(elapsed_vtime_ms);
  17.206 -        } else if (curr_buffer_num < prev_buffer_num) {
  17.207 -          increaseInterval(elapsed_vtime_ms);
  17.208 -        }
  17.209 -      }
  17.210 -      if (_worker_id == 0) {
  17.211 -        sample_young_list_rs_lengths();
  17.212 -      } else if (curr_buffer_num < deactivation_threshold) {
  17.213 +      if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) {
  17.214          // If the number of the buffer has fallen below our threshold
  17.215          // we should deactivate. The predecessor will reactivate this
  17.216          // thread should the number of the buffers cross the threshold again.
  17.217 -        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
  17.218          deactivate();
  17.219 -        if (G1TraceConcurrentRefinement) {
  17.220 -          gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id);
  17.221 -        }
  17.222          break;
  17.223        }
  17.224  
  17.225        // Check if we need to activate the next thread.
  17.226 -      if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) {
  17.227 -        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
  17.228 +      if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) {
  17.229          _next->activate();
  17.230 -        DirtyCardQ_CBL_mon->notify_all();
  17.231 -        if (G1TraceConcurrentRefinement) {
  17.232 -          gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id);
  17.233 -        }
  17.234        }
  17.235 +    } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, cg1r()->green_zone()));
  17.236  
  17.237 -      if (G1SmoothConcRefine) {
  17.238 -        prev_buffer_num = curr_buffer_num;
  17.239 -        _sts.leave();
  17.240 -        os::sleep(Thread::current(), (jlong) _interval_ms, false);
  17.241 -        _sts.join();
  17.242 -        start_vtime_sec = os::elapsedVTime();
  17.243 -      }
  17.244 -      n_logs++;
  17.245 +    // We can exit the loop above while being active if there was a yield request.
  17.246 +    if (is_active()) {
  17.247 +      deactivate();
  17.248      }
  17.249 +
  17.250      _sts.leave();
  17.251  
  17.252      if (os::supports_vtime()) {
  17.253 @@ -172,7 +213,6 @@
  17.254      }
  17.255    }
  17.256    assert(_should_terminate, "just checking");
  17.257 -
  17.258    terminate();
  17.259  }
  17.260  
  17.261 @@ -191,8 +231,8 @@
  17.262    }
  17.263  
  17.264    {
  17.265 -    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
  17.266 -    DirtyCardQ_CBL_mon->notify_all();
  17.267 +    MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
  17.268 +    _monitor->notify();
  17.269    }
  17.270  
  17.271    {

    18.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Tue Dec 22 16:35:08 2009 -0800
    18.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Wed Dec 23 02:57:31 2009 -0800
    18.3 @@ -40,42 +40,36 @@
    18.4    // when the number of the rset update buffer crosses a certain threshold. A successor
    18.5    // would self-deactivate when the number of the buffers falls below the threshold.
    18.6    bool _active;
    18.7 -  ConcurrentG1RefineThread *       _next;
    18.8 - public:
    18.9 -  virtual void run();
   18.10 +  ConcurrentG1RefineThread* _next;
   18.11 +  Monitor* _monitor;
   18.12 +  ConcurrentG1Refine* _cg1r;
   18.13  
   18.14 -  bool is_active()  { return _active;  }
   18.15 -  void activate()   { _active = true;  }
   18.16 -  void deactivate() { _active = false; }
   18.17 +  int _thread_threshold_step;
   18.18 +  // This thread activation threshold
   18.19 +  int _threshold;
   18.20 +  // This thread deactivation threshold
   18.21 +  int _deactivation_threshold;
   18.22  
   18.23 - private:
   18.24 -  ConcurrentG1Refine*              _cg1r;
   18.25 +  void sample_young_list_rs_lengths();
   18.26 +  void run_young_rs_sampling();
   18.27 +  void wait_for_completed_buffers();
   18.28  
   18.29 -  double                           _interval_ms;
   18.30 -
   18.31 -  void decreaseInterval(int processing_time_ms) {
   18.32 -    double min_interval_ms = (double) processing_time_ms;
   18.33 -    _interval_ms = 0.8 * _interval_ms;
   18.34 -    if (_interval_ms < min_interval_ms)
   18.35 -      _interval_ms = min_interval_ms;
   18.36 -  }
   18.37 -  void increaseInterval(int processing_time_ms) {
   18.38 -    double max_interval_ms = 9.0 * (double) processing_time_ms;
   18.39 -    _interval_ms = 1.1 * _interval_ms;
   18.40 -    if (max_interval_ms > 0 && _interval_ms > max_interval_ms)
   18.41 -      _interval_ms = max_interval_ms;
   18.42 -  }
   18.43 -
   18.44 -  void sleepBeforeNextCycle();
   18.45 +  void set_active(bool x) { _active = x; }
   18.46 +  bool is_active();
   18.47 +  void activate();
   18.48 +  void deactivate();
   18.49  
   18.50    // For use by G1CollectedHeap, which is a friend.
   18.51    static SuspendibleThreadSet* sts() { return &_sts; }
   18.52  
   18.53 - public:
   18.54 +public:
   18.55 +  virtual void run();
   18.56    // Constructor
   18.57    ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next,
   18.58                             int worker_id_offset, int worker_id);
   18.59  
   18.60 +  void initialize();
   18.61 +
   18.62    // Printing
   18.63    void print() const;
   18.64    void print_on(outputStream* st) const;
   18.65 @@ -83,13 +77,10 @@
   18.66    // Total virtual time so far.
   18.67    double vtime_accum() { return _vtime_accum; }
   18.68  
   18.69 -  ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
   18.70 -
   18.71 -  void            sample_young_list_rs_lengths();
   18.72 +  ConcurrentG1Refine* cg1r() { return _cg1r;     }
   18.73  
   18.74    // Yield for GC
   18.75 -  void            yield();
   18.76 -
   18.77 +  void yield();
   18.78    // shutdown
   18.79    void stop();
   18.80  };

    19.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Dec 22 16:35:08 2009 -0800
    19.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Dec 23 02:57:31 2009 -0800
    19.3 @@ -760,7 +760,6 @@
    19.4    rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
    19.5  
    19.6    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
    19.7 -  satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
    19.8    satb_mq_set.set_active_all_threads(true);
    19.9  
   19.10    // update_g1_committed() will be called at the end of an evac pause

    20.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Tue Dec 22 16:35:08 2009 -0800
    20.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Wed Dec 23 02:57:31 2009 -0800
    20.3 @@ -61,8 +61,8 @@
    20.4  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
    20.5  #endif // _MSC_VER
    20.6  
    20.7 -DirtyCardQueueSet::DirtyCardQueueSet() :
    20.8 -  PtrQueueSet(true /*notify_when_complete*/),
    20.9 +DirtyCardQueueSet::DirtyCardQueueSet(bool notify_when_complete) :
   20.10 +  PtrQueueSet(notify_when_complete),
   20.11    _closure(NULL),
   20.12    _shared_dirty_card_queue(this, true /*perm*/),
   20.13    _free_ids(NULL),
   20.14 @@ -77,12 +77,12 @@
   20.15  }
   20.16  
   20.17  void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
   20.18 +                                   int process_completed_threshold,
   20.19                                     int max_completed_queue,
   20.20                                     Mutex* lock, PtrQueueSet* fl_owner) {
   20.21 -  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner);
   20.22 +  PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold,
   20.23 +                          max_completed_queue, fl_owner);
   20.24    set_buffer_size(G1UpdateBufferSize);
   20.25 -  set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold);
   20.26 -
   20.27    _shared_dirty_card_queue.set_lock(lock);
   20.28    _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
   20.29  }
   20.30 @@ -154,9 +154,10 @@
   20.31    return b;
   20.32  }
   20.33  
   20.34 -DirtyCardQueueSet::CompletedBufferNode*
   20.35 +
   20.36 +BufferNode*
   20.37  DirtyCardQueueSet::get_completed_buffer(int stop_at) {
   20.38 -  CompletedBufferNode* nd = NULL;
   20.39 +  BufferNode* nd = NULL;
   20.40    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
   20.41  
   20.42    if ((int)_n_completed_buffers <= stop_at) {
   20.43 @@ -166,10 +167,11 @@
   20.44  
   20.45    if (_completed_buffers_head != NULL) {
   20.46      nd = _completed_buffers_head;
   20.47 -    _completed_buffers_head = nd->next;
   20.48 +    _completed_buffers_head = nd->next();
   20.49      if (_completed_buffers_head == NULL)
   20.50        _completed_buffers_tail = NULL;
   20.51      _n_completed_buffers--;
   20.52 +    assert(_n_completed_buffers >= 0, "Invariant");
   20.53    }
   20.54    debug_only(assert_completed_buffer_list_len_correct_locked());
   20.55    return nd;
   20.56 @@ -177,20 +179,19 @@
   20.57  
   20.58  bool DirtyCardQueueSet::
   20.59  apply_closure_to_completed_buffer_helper(int worker_i,
   20.60 -                                         CompletedBufferNode* nd) {
   20.61 +                                         BufferNode* nd) {
   20.62    if (nd != NULL) {
   20.63 +    void **buf = BufferNode::make_buffer_from_node(nd);
   20.64 +    size_t index = nd->index();
   20.65      bool b =
   20.66 -      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf,
   20.67 -                                              nd->index, _sz,
   20.68 +      DirtyCardQueue::apply_closure_to_buffer(_closure, buf,
   20.69 +                                              index, _sz,
   20.70                                                true, worker_i);
   20.71 -    void** buf = nd->buf;
   20.72 -    size_t index = nd->index;
   20.73 -    delete nd;
   20.74      if (b) {
   20.75        deallocate_buffer(buf);
   20.76        return true;  // In normal case, go on to next buffer.
   20.77      } else {
   20.78 -      enqueue_complete_buffer(buf, index, true);
   20.79 +      enqueue_complete_buffer(buf, index);
   20.80        return false;
   20.81      }
   20.82    } else {
   20.83 @@ -203,32 +204,33 @@
   20.84                                                            bool during_pause)
   20.85  {
   20.86    assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
   20.87 -  CompletedBufferNode* nd = get_completed_buffer(stop_at);
   20.88 +  BufferNode* nd = get_completed_buffer(stop_at);
   20.89    bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
   20.90    if (res) Atomic::inc(&_processed_buffers_rs_thread);
   20.91    return res;
   20.92  }
   20.93  
   20.94  void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
   20.95 -  CompletedBufferNode* nd = _completed_buffers_head;
   20.96 +  BufferNode* nd = _completed_buffers_head;
   20.97    while (nd != NULL) {
   20.98      bool b =
   20.99 -      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz,
  20.100 -                                              false);
  20.101 +      DirtyCardQueue::apply_closure_to_buffer(_closure,
  20.102 +                                              BufferNode::make_buffer_from_node(nd),
  20.103 +                                              0, _sz, false);
  20.104      guarantee(b, "Should not stop early.");
  20.105 -    nd = nd->next;
  20.106 +    nd = nd->next();
  20.107    }
  20.108  }
  20.109  
  20.110  void DirtyCardQueueSet::abandon_logs() {
  20.111    assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
  20.112 -  CompletedBufferNode* buffers_to_delete = NULL;
  20.113 +  BufferNode* buffers_to_delete = NULL;
  20.114    {
  20.115      MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
  20.116      while (_completed_buffers_head != NULL) {
  20.117 -      CompletedBufferNode* nd = _completed_buffers_head;
  20.118 -      _completed_buffers_head = nd->next;
  20.119 -      nd->next = buffers_to_delete;
  20.120 +      BufferNode* nd = _completed_buffers_head;
  20.121 +      _completed_buffers_head = nd->next();
  20.122 +      nd->set_next(buffers_to_delete);
  20.123        buffers_to_delete = nd;
  20.124      }
  20.125      _n_completed_buffers = 0;
  20.126 @@ -236,10 +238,9 @@
  20.127      debug_only(assert_completed_buffer_list_len_correct_locked());
  20.128    }
  20.129    while (buffers_to_delete != NULL) {
  20.130 -    CompletedBufferNode* nd = buffers_to_delete;
  20.131 -    buffers_to_delete = nd->next;
  20.132 -    deallocate_buffer(nd->buf);
  20.133 -    delete nd;
  20.134 +    BufferNode* nd = buffers_to_delete;
  20.135 +    buffers_to_delete = nd->next();
  20.136 +    deallocate_buffer(BufferNode::make_buffer_from_node(nd));
  20.137    }
  20.138    // Since abandon is done only at safepoints, we can safely manipulate
  20.139    // these queues.

    21.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Tue Dec 22 16:35:08 2009 -0800
    21.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Wed Dec 23 02:57:31 2009 -0800
    21.3 @@ -84,11 +84,12 @@
    21.4    jint _processed_buffers_rs_thread;
    21.5  
    21.6  public:
    21.7 -  DirtyCardQueueSet();
    21.8 +  DirtyCardQueueSet(bool notify_when_complete = true);
    21.9  
   21.10    void initialize(Monitor* cbl_mon, Mutex* fl_lock,
   21.11 -                  int max_completed_queue = 0,
   21.12 -                  Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL);
   21.13 +                  int process_completed_threshold,
   21.14 +                  int max_completed_queue,
   21.15 +                  Mutex* lock, PtrQueueSet* fl_owner = NULL);
   21.16  
   21.17    // The number of parallel ids that can be claimed to allow collector or
   21.18    // mutator threads to do card-processing work.
   21.19 @@ -123,9 +124,9 @@
   21.20                                           bool during_pause = false);
   21.21  
   21.22    bool apply_closure_to_completed_buffer_helper(int worker_i,
   21.23 -                                                CompletedBufferNode* nd);
   21.24 +                                                BufferNode* nd);
   21.25  
   21.26 -  CompletedBufferNode* get_completed_buffer(int stop_at);
   21.27 +  BufferNode* get_completed_buffer(int stop_at);
   21.28  
   21.29    // Applies the current closure to all completed buffers,
   21.30    // non-consumptively.

    22.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Dec 22 16:35:08 2009 -0800
    22.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Dec 23 02:57:31 2009 -0800
    22.3 @@ -1375,6 +1375,7 @@
    22.4  G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
    22.5    SharedHeap(policy_),
    22.6    _g1_policy(policy_),
    22.7 +  _dirty_card_queue_set(false),
    22.8    _ref_processor(NULL),
    22.9    _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   22.10    _bot_shared(NULL),
   22.11 @@ -1460,8 +1461,6 @@
   22.12    Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
   22.13    Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
   22.14  
   22.15 -  // We allocate this in any case, but only do no work if the command line
   22.16 -  // param is off.
   22.17    _cg1r = new ConcurrentG1Refine();
   22.18  
   22.19    // Reserve the maximum.
   22.20 @@ -1594,18 +1593,20 @@
   22.21  
   22.22    JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
   22.23                                                 SATB_Q_FL_lock,
   22.24 -                                               0,
   22.25 +                                               G1SATBProcessCompletedThreshold,
   22.26                                                 Shared_SATB_Q_lock);
   22.27  
   22.28    JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
   22.29                                                  DirtyCardQ_FL_lock,
   22.30 -                                                G1UpdateBufferQueueMaxLength,
   22.31 +                                                concurrent_g1_refine()->yellow_zone(),
   22.32 +                                                concurrent_g1_refine()->red_zone(),
   22.33                                                  Shared_DirtyCardQ_lock);
   22.34  
   22.35    if (G1DeferredRSUpdate) {
   22.36      dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
   22.37                                        DirtyCardQ_FL_lock,
   22.38 -                                      0,
   22.39 +                                      -1, // never trigger processing
   22.40 +                                      -1, // no limit on length
   22.41                                        Shared_DirtyCardQ_lock,
   22.42                                        &JavaThread::dirty_card_queue_set());
   22.43    }
   22.44 @@ -4239,10 +4240,11 @@
   22.45      RedirtyLoggedCardTableEntryFastClosure redirty;
   22.46      dirty_card_queue_set().set_closure(&redirty);
   22.47      dirty_card_queue_set().apply_closure_to_all_completed_buffers();
   22.48 -    JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set());
   22.49 +
   22.50 +    DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
   22.51 +    dcq.merge_bufferlists(&dirty_card_queue_set());
   22.52      assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   22.53    }
   22.54 -
   22.55    COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
   22.56  }
   22.57

    23.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Dec 22 16:35:08 2009 -0800
    23.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Dec 23 02:57:31 2009 -0800
    23.3 @@ -1914,6 +1914,10 @@
    23.4    calculate_young_list_min_length();
    23.5    calculate_young_list_target_config();
    23.6  
    23.7 +  // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
    23.8 +  double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSUpdatePauseFractionPercent / 100.0;
    23.9 +  adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms);
   23.10 +
   23.11    // </NEW PREDICTION>
   23.12  
   23.13    _target_pause_time_ms = -1.0;
   23.14 @@ -1921,6 +1925,47 @@
   23.15  
   23.16  // <NEW PREDICTION>
   23.17  
   23.18 +void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time,
   23.19 +                                                     double update_rs_processed_buffers,
   23.20 +                                                     double goal_ms) {
   23.21 +  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   23.22 +  ConcurrentG1Refine *cg1r = G1CollectedHeap::heap()->concurrent_g1_refine();
   23.23 +
   23.24 +  if (G1AdaptiveConcRefine) {
   23.25 +    const int k_gy = 3, k_gr = 6;
   23.26 +    const double inc_k = 1.1, dec_k = 0.9;
   23.27 +
   23.28 +    int g = cg1r->green_zone();
   23.29 +    if (update_rs_time > goal_ms) {
   23.30 +      g = (int)(g * dec_k);  // Can become 0, that's OK. That would mean a mutator-only processing.
   23.31 +    } else {
   23.32 +      if (update_rs_time < goal_ms && update_rs_processed_buffers > g) {
   23.33 +        g = (int)MAX2(g * inc_k, g + 1.0);
   23.34 +      }
   23.35 +    }
   23.36 +    // Change the refinement threads params
   23.37 +    cg1r->set_green_zone(g);
   23.38 +    cg1r->set_yellow_zone(g * k_gy);
   23.39 +    cg1r->set_red_zone(g * k_gr);
   23.40 +    cg1r->reinitialize_threads();
   23.41 +
   23.42 +    int processing_threshold_delta = MAX2((int)(cg1r->green_zone() * sigma()), 1);
   23.43 +    int processing_threshold = MIN2(cg1r->green_zone() + processing_threshold_delta,
   23.44 +                                    cg1r->yellow_zone());
   23.45 +    // Change the barrier params
   23.46 +    dcqs.set_process_completed_threshold(processing_threshold);
   23.47 +    dcqs.set_max_completed_queue(cg1r->red_zone());
   23.48 +  }
   23.49 +
   23.50 +  int curr_queue_size = dcqs.completed_buffers_num();
   23.51 +  if (curr_queue_size >= cg1r->yellow_zone()) {
   23.52 +    dcqs.set_completed_queue_padding(curr_queue_size);
   23.53 +  } else {
   23.54 +    dcqs.set_completed_queue_padding(0);
   23.55 +  }
   23.56 +  dcqs.notify_if_necessary();
   23.57 +}
   23.58 +
   23.59  double
   23.60  G1CollectorPolicy::
   23.61  predict_young_collection_elapsed_time_ms(size_t adjustment) {

    24.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Dec 22 16:35:08 2009 -0800
    24.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Dec 23 02:57:31 2009 -0800
    24.3 @@ -316,6 +316,10 @@
    24.4    bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group);
    24.5  #endif // PRODUCT
    24.6  
    24.7 +  void adjust_concurrent_refinement(double update_rs_time,
    24.8 +                                    double update_rs_processed_buffers,
    24.9 +                                    double goal_ms);
   24.10 +
   24.11  protected:
   24.12    double _pause_time_target_ms;
   24.13    double _recorded_young_cset_choice_time_ms;

    25.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Dec 22 16:35:08 2009 -0800
    25.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Dec 23 02:57:31 2009 -0800
    25.3 @@ -85,7 +85,7 @@
    25.4    diagnostic(bool, G1SummarizeZFStats, false,                               \
    25.5            "Summarize zero-filling info")                                    \
    25.6                                                                              \
    25.7 -  develop(bool, G1TraceConcurrentRefinement, false,                         \
    25.8 +  diagnostic(bool, G1TraceConcurrentRefinement, false,                      \
    25.9            "Trace G1 concurrent refinement")                                 \
   25.10                                                                              \
   25.11    product(intx, G1MarkStackSize, 2 * 1024 * 1024,                           \
   25.12 @@ -94,19 +94,6 @@
   25.13    product(intx, G1MarkRegionStackSize, 1024 * 1024,                         \
   25.14            "Size of the region stack for concurrent marking.")               \
   25.15                                                                              \
   25.16 -  develop(bool, G1ConcRefine, true,                                         \
   25.17 -          "If true, run concurrent rem set refinement for G1")              \
   25.18 -                                                                            \
   25.19 -  develop(intx, G1ConcRefineTargTraversals, 4,                              \
   25.20 -          "Number of concurrent refinement we try to achieve")              \
   25.21 -                                                                            \
   25.22 -  develop(intx, G1ConcRefineInitialDelta, 4,                                \
   25.23 -          "Number of heap regions of alloc ahead of starting collection "   \
   25.24 -          "pause to start concurrent refinement (initially)")               \
   25.25 -                                                                            \
   25.26 -  develop(bool, G1SmoothConcRefine, true,                                   \
   25.27 -          "Attempts to smooth out the overhead of concurrent refinement")   \
   25.28 -                                                                            \
   25.29    develop(bool, G1ConcZeroFill, true,                                       \
   25.30            "If true, run concurrent zero-filling thread")                    \
   25.31                                                                              \
   25.32 @@ -178,13 +165,38 @@
   25.33    product(intx, G1UpdateBufferSize, 256,                                    \
   25.34            "Size of an update buffer")                                       \
   25.35                                                                              \
   25.36 -  product(intx, G1UpdateBufferQueueProcessingThreshold, 5,                  \
   25.37 +  product(intx, G1ConcRefineYellowZone, 0,                                  \
   25.38            "Number of enqueued update buffers that will "                    \
   25.39 -          "trigger concurrent processing")                                  \
   25.40 +          "trigger concurrent processing. Will be selected ergonomically "  \
   25.41 +          "by default.")                                                    \
   25.42                                                                              \
   25.43 -  product(intx, G1UpdateBufferQueueMaxLength, 30,                           \
   25.44 +  product(intx, G1ConcRefineRedZone, 0,                                     \
   25.45            "Maximum number of enqueued update buffers before mutator "       \
   25.46 -          "threads start processing new ones instead of enqueueing them")   \
   25.47 +          "threads start processing new ones instead of enqueueing them. "  \
   25.48 +          "Will be selected ergonomically by default. Zero will disable "   \
   25.49 +          "concurrent processing.")                                         \
   25.50 +                                                                            \
   25.51 +  product(intx, G1ConcRefineGreenZone, 0,                                   \
   25.52 +          "The number of update buffers that are left in the queue by the " \
   25.53 +          "concurrent processing threads. Will be selected ergonomically "  \
   25.54 +          "by default.")                                                    \
   25.55 +                                                                            \
   25.56 +  product(intx, G1ConcRefineServiceInterval, 300,                           \
   25.57 +          "The last concurrent refinement thread wakes up every "           \
   25.58 +          "specified number of milliseconds to do miscellaneous work.")     \
   25.59 +                                                                            \
   25.60 +  product(intx, G1ConcRefineThresholdStep, 0,                               \
   25.61 +          "Each time the rset update queue increases by this amount "       \
   25.62 +          "activate the next refinement thread if available. "              \
   25.63 +          "Will be selected ergonomically by default.")                     \
   25.64 +                                                                            \
   25.65 +  product(intx, G1RSUpdatePauseFractionPercent, 10,                         \
   25.66 +          "A target percentage of time that is allowed to be spend on "     \
   25.67 +          "process RS update buffers during the collection pause.")         \
   25.68 +                                                                            \
   25.69 +  product(bool, G1AdaptiveConcRefine, true,                                 \
   25.70 +          "Select green, yellow and red zones adaptively to meet the "      \
   25.71 +          "the pause requirements.")                                        \
   25.72                                                                              \
   25.73    develop(intx, G1ConcRSLogCacheSize, 10,                                   \
   25.74            "Log base 2 of the length of conc RS hot-card cache.")            \

    26.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Tue Dec 22 16:35:08 2009 -0800
    26.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Dec 23 02:57:31 2009 -0800
    26.3 @@ -64,8 +64,8 @@
    26.4    while (_index == 0) {
    26.5      handle_zero_index();
    26.6    }
    26.7 +
    26.8    assert(_index > 0, "postcondition");
    26.9 -
   26.10    _index -= oopSize;
   26.11    _buf[byte_index_to_index((int)_index)] = ptr;
   26.12    assert(0 <= _index && _index <= _sz, "Invariant.");
   26.13 @@ -99,95 +99,110 @@
   26.14    assert(_sz > 0, "Didn't set a buffer size.");
   26.15    MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
   26.16    if (_fl_owner->_buf_free_list != NULL) {
   26.17 -    void** res = _fl_owner->_buf_free_list;
   26.18 -    _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0];
   26.19 +    void** res = BufferNode::make_buffer_from_node(_fl_owner->_buf_free_list);
   26.20 +    _fl_owner->_buf_free_list = _fl_owner->_buf_free_list->next();
   26.21      _fl_owner->_buf_free_list_sz--;
   26.22 -    // Just override the next pointer with NULL, just in case we scan this part
   26.23 -    // of the buffer.
   26.24 -    res[0] = NULL;
   26.25      return res;
   26.26    } else {
   26.27 -    return (void**) NEW_C_HEAP_ARRAY(char, _sz);
   26.28 +    // Allocate space for the BufferNode in front of the buffer.
   26.29 +    char *b =  NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size());
   26.30 +    return BufferNode::make_buffer_from_block(b);
   26.31    }
   26.32  }
   26.33  
   26.34  void PtrQueueSet::deallocate_buffer(void** buf) {
   26.35    assert(_sz > 0, "Didn't set a buffer size.");
   26.36    MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
   26.37 -  buf[0] = (void*)_fl_owner->_buf_free_list;
   26.38 -  _fl_owner->_buf_free_list = buf;
   26.39 +  BufferNode *node = BufferNode::make_node_from_buffer(buf);
   26.40 +  node->set_next(_fl_owner->_buf_free_list);
   26.41 +  _fl_owner->_buf_free_list = node;
   26.42    _fl_owner->_buf_free_list_sz++;
   26.43  }
   26.44  
   26.45  void PtrQueueSet::reduce_free_list() {
   26.46 +  assert(_fl_owner == this, "Free list reduction is allowed only for the owner");
   26.47    // For now we'll adopt the strategy of deleting half.
   26.48    MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
   26.49    size_t n = _buf_free_list_sz / 2;
   26.50    while (n > 0) {
   26.51      assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
   26.52 -    void** head = _buf_free_list;
   26.53 -    _buf_free_list = (void**)_buf_free_list[0];
   26.54 -    FREE_C_HEAP_ARRAY(char, head);
   26.55 +    void* b = BufferNode::make_block_from_node(_buf_free_list);
   26.56 +    _buf_free_list = _buf_free_list->next();
   26.57 +    FREE_C_HEAP_ARRAY(char, b);
   26.58      _buf_free_list_sz --;
   26.59      n--;
   26.60    }
   26.61  }
   26.62  
   26.63 -void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) {
   26.64 -  // I use explicit locking here because there's a bailout in the middle.
   26.65 -  _cbl_mon->lock_without_safepoint_check();
   26.66 +void PtrQueue::handle_zero_index() {
   26.67 +  assert(0 == _index, "Precondition.");
   26.68 +  // This thread records the full buffer and allocates a new one (while
   26.69 +  // holding the lock if there is one).
   26.70 +  if (_buf != NULL) {
   26.71 +    if (_lock) {
   26.72 +      locking_enqueue_completed_buffer(_buf);
   26.73 +    } else {
   26.74 +      if (qset()->process_or_enqueue_complete_buffer(_buf)) {
   26.75 +        // Recycle the buffer. No allocation.
   26.76 +        _sz = qset()->buffer_size();
   26.77 +        _index = _sz;
   26.78 +        return;
   26.79 +      }
   26.80 +    }
   26.81 +  }
   26.82 +  // Reallocate the buffer
   26.83 +  _buf = qset()->allocate_buffer();
   26.84 +  _sz = qset()->buffer_size();
   26.85 +  _index = _sz;
   26.86 +  assert(0 <= _index && _index <= _sz, "Invariant.");
   26.87 +}
   26.88  
   26.89 -  Thread* thread = Thread::current();
   26.90 -  assert( ignore_max_completed ||
   26.91 -          thread->is_Java_thread() ||
   26.92 -          SafepointSynchronize::is_at_safepoint(),
   26.93 -          "invariant" );
   26.94 -  ignore_max_completed = ignore_max_completed || !thread->is_Java_thread();
   26.95 +bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) {
   26.96 +  if (Thread::current()->is_Java_thread()) {
   26.97 +    // We don't lock. It is fine to be epsilon-precise here.
   26.98 +    if (_max_completed_queue == 0 || _max_completed_queue > 0 &&
   26.99 +        _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) {
  26.100 +      bool b = mut_process_buffer(buf);
  26.101 +      if (b) {
  26.102 +        // True here means that the buffer hasn't been deallocated and the caller may reuse it.
  26.103 +        return true;
  26.104 +      }
  26.105 +    }
  26.106 +  }
  26.107 +  // The buffer will be enqueued. The caller will have to get a new one.
  26.108 +  enqueue_complete_buffer(buf);
  26.109 +  return false;
  26.110 +}
  26.111  
  26.112 -  if (!ignore_max_completed && _max_completed_queue > 0 &&
  26.113 -      _n_completed_buffers >= (size_t) _max_completed_queue) {
  26.114 -    _cbl_mon->unlock();
  26.115 -    bool b = mut_process_buffer(buf);
  26.116 -    if (b) {
  26.117 -      deallocate_buffer(buf);
  26.118 -      return;
  26.119 -    }
  26.120 -
  26.121 -    // Otherwise, go ahead and enqueue the buffer.  Must reaquire the lock.
  26.122 -    _cbl_mon->lock_without_safepoint_check();
  26.123 -  }
  26.124 -
  26.125 -  // Here we still hold the _cbl_mon.
  26.126 -  CompletedBufferNode* cbn = new CompletedBufferNode;
  26.127 -  cbn->buf = buf;
  26.128 -  cbn->next = NULL;
  26.129 -  cbn->index = index;
  26.130 +void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) {
  26.131 +  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
  26.132 +  BufferNode* cbn = BufferNode::new_from_buffer(buf);
  26.133 +  cbn->set_index(index);
  26.134    if (_completed_buffers_tail == NULL) {
  26.135      assert(_completed_buffers_head == NULL, "Well-formedness");
  26.136      _completed_buffers_head = cbn;
  26.137      _completed_buffers_tail = cbn;
  26.138    } else {
  26.139 -    _completed_buffers_tail->next = cbn;
  26.140 +    _completed_buffers_tail->set_next(cbn);
  26.141      _completed_buffers_tail = cbn;
  26.142    }
  26.143    _n_completed_buffers++;
  26.144  
  26.145 -  if (!_process_completed &&
  26.146 +  if (!_process_completed && _process_completed_threshold >= 0 &&
  26.147        _n_completed_buffers >= _process_completed_threshold) {
  26.148      _process_completed = true;
  26.149      if (_notify_when_complete)
  26.150 -      _cbl_mon->notify_all();
  26.151 +      _cbl_mon->notify();
  26.152    }
  26.153    debug_only(assert_completed_buffer_list_len_correct_locked());
  26.154 -  _cbl_mon->unlock();
  26.155  }
  26.156  
  26.157  int PtrQueueSet::completed_buffers_list_length() {
  26.158    int n = 0;
  26.159 -  CompletedBufferNode* cbn = _completed_buffers_head;
  26.160 +  BufferNode* cbn = _completed_buffers_head;
  26.161    while (cbn != NULL) {
  26.162      n++;
  26.163 -    cbn = cbn->next;
  26.164 +    cbn = cbn->next();
  26.165    }
  26.166    return n;
  26.167  }
  26.168 @@ -198,7 +213,7 @@
  26.169  }
  26.170  
  26.171  void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() {
  26.172 -  guarantee((size_t)completed_buffers_list_length() ==  _n_completed_buffers,
  26.173 +  guarantee(completed_buffers_list_length() ==  _n_completed_buffers,
  26.174              "Completed buffer length is wrong.");
  26.175  }
  26.176  
  26.177 @@ -207,12 +222,8 @@
  26.178    _sz = sz * oopSize;
  26.179  }
  26.180  
  26.181 -void PtrQueueSet::set_process_completed_threshold(size_t sz) {
  26.182 -  _process_completed_threshold = sz;
  26.183 -}
  26.184 -
  26.185 -// Merge lists of buffers. Notify waiting threads if the length of the list
  26.186 -// exceeds threshold. The source queue is emptied as a result. The queues
  26.187 +// Merge lists of buffers. Notify the processing threads.
  26.188 +// The source queue is emptied as a result. The queues
  26.189  // must share the monitor.
  26.190  void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
  26.191    assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
  26.192 @@ -224,7 +235,7 @@
  26.193    } else {
  26.194      assert(_completed_buffers_head != NULL, "Well formedness");
  26.195      if (src->_completed_buffers_head != NULL) {
  26.196 -      _completed_buffers_tail->next = src->_completed_buffers_head;
  26.197 +      _completed_buffers_tail->set_next(src->_completed_buffers_head);
  26.198        _completed_buffers_tail = src->_completed_buffers_tail;
  26.199      }
  26.200    }
  26.201 @@ -237,31 +248,13 @@
  26.202    assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
  26.203           _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
  26.204           "Sanity");
  26.205 +}
  26.206  
  26.207 -  if (!_process_completed &&
  26.208 -      _n_completed_buffers >= _process_completed_threshold) {
  26.209 +void PtrQueueSet::notify_if_necessary() {
  26.210 +  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
  26.211 +  if (_n_completed_buffers >= _process_completed_threshold || _max_completed_queue == 0) {
  26.212      _process_completed = true;
  26.213      if (_notify_when_complete)
  26.214 -      _cbl_mon->notify_all();
  26.215 +      _cbl_mon->notify();
  26.216    }
  26.217  }
  26.218 -
  26.219 -// Merge free lists of the two queues. The free list of the source
  26.220 -// queue is emptied as a result. The queues must share the same
  26.221 -// mutex that guards free lists.
  26.222 -void PtrQueueSet::merge_freelists(PtrQueueSet* src) {
  26.223 -  assert(_fl_lock == src->_fl_lock, "Should share the same lock");
  26.224 -  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
  26.225 -  if (_buf_free_list != NULL) {
  26.226 -    void **p = _buf_free_list;
  26.227 -    while (*p != NULL) {
  26.228 -      p = (void**)*p;
  26.229 -    }
  26.230 -    *p = src->_buf_free_list;
  26.231 -  } else {
  26.232 -    _buf_free_list = src->_buf_free_list;
  26.233 -  }
  26.234 -  _buf_free_list_sz += src->_buf_free_list_sz;
  26.235 -  src->_buf_free_list = NULL;
  26.236 -  src->_buf_free_list_sz = 0;
  26.237 -}

    27.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Tue Dec 22 16:35:08 2009 -0800
    27.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Wed Dec 23 02:57:31 2009 -0800
    27.3 @@ -27,8 +27,10 @@
    27.4  // the addresses of modified old-generation objects.  This type supports
    27.5  // this operation.
    27.6  
    27.7 +// The definition of placement operator new(size_t, void*) in the <new>.
    27.8 +#include <new>
    27.9 +
   27.10  class PtrQueueSet;
   27.11 -
   27.12  class PtrQueue VALUE_OBJ_CLASS_SPEC {
   27.13  
   27.14  protected:
   27.15 @@ -77,7 +79,7 @@
   27.16      else enqueue_known_active(ptr);
   27.17    }
   27.18  
   27.19 -  inline void handle_zero_index();
   27.20 +  void handle_zero_index();
   27.21    void locking_enqueue_completed_buffer(void** buf);
   27.22  
   27.23    void enqueue_known_active(void* ptr);
   27.24 @@ -126,34 +128,65 @@
   27.25  
   27.26  };
   27.27  
   27.28 +class BufferNode {
   27.29 +  size_t _index;
   27.30 +  BufferNode* _next;
   27.31 +public:
   27.32 +  BufferNode() : _index(0), _next(NULL) { }
   27.33 +  BufferNode* next() const     { return _next;  }
   27.34 +  void set_next(BufferNode* n) { _next = n;     }
   27.35 +  size_t index() const         { return _index; }
   27.36 +  void set_index(size_t i)     { _index = i;    }
   27.37 +
   27.38 +  // Align the size of the structure to the size of the pointer
   27.39 +  static size_t aligned_size() {
   27.40 +    static const size_t alignment = round_to(sizeof(BufferNode), sizeof(void*));
   27.41 +    return alignment;
   27.42 +  }
   27.43 +
   27.44 +  // BufferNode is allocated before the buffer.
   27.45 +  // The chunk of memory that holds both of them is a block.
   27.46 +
   27.47 +  // Produce a new BufferNode given a buffer.
   27.48 +  static BufferNode* new_from_buffer(void** buf) {
   27.49 +    return new (make_block_from_buffer(buf)) BufferNode;
   27.50 +  }
   27.51 +
   27.52 +  // The following are the required conversion routines:
   27.53 +  static BufferNode* make_node_from_buffer(void** buf) {
   27.54 +    return (BufferNode*)make_block_from_buffer(buf);
   27.55 +  }
   27.56 +  static void** make_buffer_from_node(BufferNode *node) {
   27.57 +    return make_buffer_from_block(node);
   27.58 +  }
   27.59 +  static void* make_block_from_node(BufferNode *node) {
   27.60 +    return (void*)node;
   27.61 +  }
   27.62 +  static void** make_buffer_from_block(void* p) {
   27.63 +    return (void**)((char*)p + aligned_size());
   27.64 +  }
   27.65 +  static void* make_block_from_buffer(void** p) {
   27.66 +    return (void*)((char*)p - aligned_size());
   27.67 +  }
   27.68 +};
   27.69 +
   27.70  // A PtrQueueSet represents resources common to a set of pointer queues.
   27.71  // In particular, the individual queues allocate buffers from this shared
   27.72  // set, and return completed buffers to the set.
   27.73  // All these variables are are protected by the TLOQ_CBL_mon. XXX ???
   27.74  class PtrQueueSet VALUE_OBJ_CLASS_SPEC {
   27.75 -
   27.76  protected:
   27.77 -
   27.78 -  class CompletedBufferNode: public CHeapObj {
   27.79 -  public:
   27.80 -    void** buf;
   27.81 -    size_t index;
   27.82 -    CompletedBufferNode* next;
   27.83 -    CompletedBufferNode() : buf(NULL),
   27.84 -      index(0), next(NULL){ }
   27.85 -  };
   27.86 -
   27.87    Monitor* _cbl_mon;  // Protects the fields below.
   27.88 -  CompletedBufferNode* _completed_buffers_head;
   27.89 -  CompletedBufferNode* _completed_buffers_tail;
   27.90 -  size_t _n_completed_buffers;
   27.91 -  size_t _process_completed_threshold;
   27.92 +  BufferNode* _completed_buffers_head;
   27.93 +  BufferNode* _completed_buffers_tail;
   27.94 +  int _n_completed_buffers;
   27.95 +  int _process_completed_threshold;
   27.96    volatile bool _process_completed;
   27.97  
   27.98    // This (and the interpretation of the first element as a "next"
   27.99    // pointer) are protected by the TLOQ_FL_lock.
  27.100    Mutex* _fl_lock;
  27.101 -  void** _buf_free_list;
  27.102 +  BufferNode* _buf_free_list;
  27.103    size_t _buf_free_list_sz;
  27.104    // Queue set can share a freelist. The _fl_owner variable
  27.105    // specifies the owner. It is set to "this" by default.
  27.106 @@ -170,6 +203,7 @@
  27.107    // Maximum number of elements allowed on completed queue: after that,
  27.108    // enqueuer does the work itself.  Zero indicates no maximum.
  27.109    int _max_completed_queue;
  27.110 +  int _completed_queue_padding;
  27.111  
  27.112    int completed_buffers_list_length();
  27.113    void assert_completed_buffer_list_len_correct_locked();
  27.114 @@ -191,9 +225,12 @@
  27.115    // Because of init-order concerns, we can't pass these as constructor
  27.116    // arguments.
  27.117    void initialize(Monitor* cbl_mon, Mutex* fl_lock,
  27.118 -                  int max_completed_queue = 0,
  27.119 +                  int process_completed_threshold,
  27.120 +                  int max_completed_queue,
  27.121                    PtrQueueSet *fl_owner = NULL) {
  27.122      _max_completed_queue = max_completed_queue;
  27.123 +    _process_completed_threshold = process_completed_threshold;
  27.124 +    _completed_queue_padding = 0;
  27.125      assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
  27.126      _cbl_mon = cbl_mon;
  27.127      _fl_lock = fl_lock;
  27.128 @@ -208,14 +245,17 @@
  27.129    void deallocate_buffer(void** buf);
  27.130  
  27.131    // Declares that "buf" is a complete buffer.
  27.132 -  void enqueue_complete_buffer(void** buf, size_t index = 0,
  27.133 -                               bool ignore_max_completed = false);
  27.134 +  void enqueue_complete_buffer(void** buf, size_t index = 0);
  27.135 +
  27.136 +  // To be invoked by the mutator.
  27.137 +  bool process_or_enqueue_complete_buffer(void** buf);
  27.138  
  27.139    bool completed_buffers_exist_dirty() {
  27.140      return _n_completed_buffers > 0;
  27.141    }
  27.142  
  27.143    bool process_completed_buffers() { return _process_completed; }
  27.144 +  void set_process_completed(bool x) { _process_completed = x; }
  27.145  
  27.146    bool active() { return _all_active; }
  27.147  
  27.148 @@ -226,15 +266,24 @@
  27.149    // Get the buffer size.
  27.150    size_t buffer_size() { return _sz; }
  27.151  
  27.152 -  // Set the number of completed buffers that triggers log processing.
  27.153 -  void set_process_completed_threshold(size_t sz);
  27.154 +  // Get/Set the number of completed buffers that triggers log processing.
  27.155 +  void set_process_completed_threshold(int sz) { _process_completed_threshold = sz; }
  27.156 +  int process_completed_threshold() const { return _process_completed_threshold; }
  27.157  
  27.158    // Must only be called at a safe point.  Indicates that the buffer free
  27.159    // list size may be reduced, if that is deemed desirable.
  27.160    void reduce_free_list();
  27.161  
  27.162 -  size_t completed_buffers_num() { return _n_completed_buffers; }
  27.163 +  int completed_buffers_num() { return _n_completed_buffers; }
  27.164  
  27.165    void merge_bufferlists(PtrQueueSet* src);
  27.166 -  void merge_freelists(PtrQueueSet* src);
  27.167 +
  27.168 +  void set_max_completed_queue(int m) { _max_completed_queue = m; }
  27.169 +  int max_completed_queue() { return _max_completed_queue; }
  27.170 +
  27.171 +  void set_completed_queue_padding(int padding) { _completed_queue_padding = padding; }
  27.172 +  int completed_queue_padding() { return _completed_queue_padding; }
  27.173 +
  27.174 +  // Notify the consumer if the number of buffers crossed the threshold
  27.175 +  void notify_if_necessary();
  27.176  };

    28.1 --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Dec 22 16:35:08 2009 -0800
    28.2 +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Dec 23 02:57:31 2009 -0800
    28.3 @@ -67,9 +67,9 @@
    28.4  {}
    28.5  
    28.6  void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
    28.7 -                                  int max_completed_queue,
    28.8 +                                  int process_completed_threshold,
    28.9                                    Mutex* lock) {
   28.10 -  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
   28.11 +  PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1);
   28.12    _shared_satb_queue.set_lock(lock);
   28.13    if (ParallelGCThreads > 0) {
   28.14      _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads);
   28.15 @@ -122,12 +122,12 @@
   28.16  
   28.17  bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
   28.18                                                                int worker) {
   28.19 -  CompletedBufferNode* nd = NULL;
   28.20 +  BufferNode* nd = NULL;
   28.21    {
   28.22      MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
   28.23      if (_completed_buffers_head != NULL) {
   28.24        nd = _completed_buffers_head;
   28.25 -      _completed_buffers_head = nd->next;
   28.26 +      _completed_buffers_head = nd->next();
   28.27        if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL;
   28.28        _n_completed_buffers--;
   28.29        if (_n_completed_buffers == 0) _process_completed = false;
   28.30 @@ -135,9 +135,9 @@
   28.31    }
   28.32    ObjectClosure* cl = (par ? _par_closures[worker] : _closure);
   28.33    if (nd != NULL) {
   28.34 -    ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz);
   28.35 -    deallocate_buffer(nd->buf);
   28.36 -    delete nd;
   28.37 +    void **buf = BufferNode::make_buffer_from_node(nd);
   28.38 +    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
   28.39 +    deallocate_buffer(buf);
   28.40      return true;
   28.41    } else {
   28.42      return false;
   28.43 @@ -145,13 +145,13 @@
   28.44  }
   28.45  
   28.46  void SATBMarkQueueSet::abandon_partial_marking() {
   28.47 -  CompletedBufferNode* buffers_to_delete = NULL;
   28.48 +  BufferNode* buffers_to_delete = NULL;
   28.49    {
   28.50      MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
   28.51      while (_completed_buffers_head != NULL) {
   28.52 -      CompletedBufferNode* nd = _completed_buffers_head;
   28.53 -      _completed_buffers_head = nd->next;
   28.54 -      nd->next = buffers_to_delete;
   28.55 +      BufferNode* nd = _completed_buffers_head;
   28.56 +      _completed_buffers_head = nd->next();
   28.57 +      nd->set_next(buffers_to_delete);
   28.58        buffers_to_delete = nd;
   28.59      }
   28.60      _completed_buffers_tail = NULL;
   28.61 @@ -159,10 +159,9 @@
   28.62      DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked());
   28.63    }
   28.64    while (buffers_to_delete != NULL) {
   28.65 -    CompletedBufferNode* nd = buffers_to_delete;
   28.66 -    buffers_to_delete = nd->next;
   28.67 -    deallocate_buffer(nd->buf);
   28.68 -    delete nd;
   28.69 +    BufferNode* nd = buffers_to_delete;
   28.70 +    buffers_to_delete = nd->next();
   28.71 +    deallocate_buffer(BufferNode::make_buffer_from_node(nd));
   28.72    }
   28.73    assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
   28.74    // So we can safely manipulate these queues.

    29.1 --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Dec 22 16:35:08 2009 -0800
    29.2 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Dec 23 02:57:31 2009 -0800
    29.3 @@ -60,8 +60,8 @@
    29.4    SATBMarkQueueSet();
    29.5  
    29.6    void initialize(Monitor* cbl_mon, Mutex* fl_lock,
    29.7 -                  int max_completed_queue = 0,
    29.8 -                  Mutex* lock = NULL);
    29.9 +                  int process_completed_threshold,
   29.10 +                  Mutex* lock);
   29.11  
   29.12    static void handle_zero_index_for_thread(JavaThread* t);
   29.13

    30.1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1	Tue Dec 22 16:35:08 2009 -0800
    30.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Wed Dec 23 02:57:31 2009 -0800
    30.3 @@ -109,7 +109,6 @@
    30.4  dirtyCardQueue.cpp                      dirtyCardQueue.hpp
    30.5  dirtyCardQueue.cpp			heapRegionRemSet.hpp
    30.6  dirtyCardQueue.cpp                      mutexLocker.hpp
    30.7 -dirtyCardQueue.cpp                      ptrQueue.inline.hpp
    30.8  dirtyCardQueue.cpp                      safepoint.hpp
    30.9  dirtyCardQueue.cpp                      thread.hpp
   30.10  dirtyCardQueue.cpp                      thread_<os_family>.inline.hpp
   30.11 @@ -319,7 +318,6 @@
   30.12  ptrQueue.cpp                            mutex.hpp
   30.13  ptrQueue.cpp                            mutexLocker.hpp
   30.14  ptrQueue.cpp                            ptrQueue.hpp
   30.15 -ptrQueue.cpp                            ptrQueue.inline.hpp
   30.16  ptrQueue.cpp                            thread_<os_family>.inline.hpp
   30.17  
   30.18  ptrQueue.hpp                            allocation.hpp
   30.19 @@ -329,7 +327,6 @@
   30.20  
   30.21  satbQueue.cpp                           allocation.inline.hpp
   30.22  satbQueue.cpp                           mutexLocker.hpp
   30.23 -satbQueue.cpp                           ptrQueue.inline.hpp
   30.24  satbQueue.cpp                           satbQueue.hpp
   30.25  satbQueue.cpp                           sharedHeap.hpp
   30.26  satbQueue.cpp                           thread.hpp

Mercurial > jdk8-mips64-public > hotspot / changeset

changeset

Merge