1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Sat Apr 09 21:16:12 2011 -0700 1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Apr 14 13:45:41 2011 -0700 1.3 @@ -6902,26 +6902,39 @@ 1.4 #ifndef SERIALGC 1.5 1.6 void MacroAssembler::g1_write_barrier_pre(Register obj, 1.7 -#ifndef _LP64 1.8 + Register pre_val, 1.9 Register thread, 1.10 -#endif 1.11 Register tmp, 1.12 - Register tmp2, 1.13 - bool tosca_live) { 1.14 - LP64_ONLY(Register thread = r15_thread;) 1.15 + bool tosca_live, 1.16 + bool expand_call) { 1.17 + 1.18 + // If expand_call is true then we expand the call_VM_leaf macro 1.19 + // directly to skip generating the check by 1.20 + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 1.21 + 1.22 +#ifdef _LP64 1.23 + assert(thread == r15_thread, "must be"); 1.24 +#endif // _LP64 1.25 + 1.26 + Label done; 1.27 + Label runtime; 1.28 + 1.29 + assert(pre_val != noreg, "check this code"); 1.30 + 1.31 + if (obj != noreg) { 1.32 + assert_different_registers(obj, pre_val, tmp); 1.33 + assert(pre_val != rax, "check this code"); 1.34 + } 1.35 + 1.36 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 1.37 PtrQueue::byte_offset_of_active())); 1.38 - 1.39 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 1.40 PtrQueue::byte_offset_of_index())); 1.41 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 1.42 PtrQueue::byte_offset_of_buf())); 1.43 1.44 1.45 - Label done; 1.46 - Label runtime; 1.47 - 1.48 - // if (!marking_in_progress) goto done; 1.49 + // Is marking active? 1.50 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 1.51 cmpl(in_progress, 0); 1.52 } else { 1.53 @@ -6930,65 +6943,92 @@ 1.54 } 1.55 jcc(Assembler::equal, done); 1.56 1.57 - // if (x.f == NULL) goto done; 1.58 -#ifdef _LP64 1.59 - load_heap_oop(tmp2, Address(obj, 0)); 1.60 -#else 1.61 - movptr(tmp2, Address(obj, 0)); 1.62 -#endif 1.63 - cmpptr(tmp2, (int32_t) NULL_WORD); 1.64 + // Do we need to load the previous value? 1.65 + if (obj != noreg) { 1.66 + load_heap_oop(pre_val, Address(obj, 0)); 1.67 + } 1.68 + 1.69 + // Is the previous value null? 1.70 + cmpptr(pre_val, (int32_t) NULL_WORD); 1.71 jcc(Assembler::equal, done); 1.72 1.73 // Can we store original value in the thread's buffer? 1.74 - 1.75 -#ifdef _LP64 1.76 - movslq(tmp, index); 1.77 - cmpq(tmp, 0); 1.78 -#else 1.79 - cmpl(index, 0); 1.80 -#endif 1.81 - jcc(Assembler::equal, runtime); 1.82 -#ifdef _LP64 1.83 - subq(tmp, wordSize); 1.84 - movl(index, tmp); 1.85 - addq(tmp, buffer); 1.86 -#else 1.87 - subl(index, wordSize); 1.88 - movl(tmp, buffer); 1.89 - addl(tmp, index); 1.90 -#endif 1.91 - movptr(Address(tmp, 0), tmp2); 1.92 + // Is index == 0? 1.93 + // (The index field is typed as size_t.) 1.94 + 1.95 + movptr(tmp, index); // tmp := *index_adr 1.96 + cmpptr(tmp, 0); // tmp == 0? 1.97 + jcc(Assembler::equal, runtime); // If yes, goto runtime 1.98 + 1.99 + subptr(tmp, wordSize); // tmp := tmp - wordSize 1.100 + movptr(index, tmp); // *index_adr := tmp 1.101 + addptr(tmp, buffer); // tmp := tmp + *buffer_adr 1.102 + 1.103 + // Record the previous value 1.104 + movptr(Address(tmp, 0), pre_val); 1.105 jmp(done); 1.106 + 1.107 bind(runtime); 1.108 // save the live input values 1.109 if(tosca_live) push(rax); 1.110 - push(obj); 1.111 -#ifdef _LP64 1.112 - call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread); 1.113 -#else 1.114 - push(thread); 1.115 - call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); 1.116 - pop(thread); 1.117 -#endif 1.118 - pop(obj); 1.119 + 1.120 + if (obj != noreg && obj != rax) 1.121 + push(obj); 1.122 + 1.123 + if (pre_val != rax) 1.124 + push(pre_val); 1.125 + 1.126 + // Calling the runtime using the regular call_VM_leaf mechanism generates 1.127 + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 1.128 + // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 1.129 + // 1.130 + // If we care generating the pre-barrier without a frame (e.g. in the 1.131 + // intrinsified Reference.get() routine) then ebp might be pointing to 1.132 + // the caller frame and so this check will most likely fail at runtime. 1.133 + // 1.134 + // Expanding the call directly bypasses the generation of the check. 1.135 + // So when we do not have have a full interpreter frame on the stack 1.136 + // expand_call should be passed true. 1.137 + 1.138 + NOT_LP64( push(thread); ) 1.139 + 1.140 + if (expand_call) { 1.141 + LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 1.142 + pass_arg1(this, thread); 1.143 + pass_arg0(this, pre_val); 1.144 + MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 1.145 + } else { 1.146 + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 1.147 + } 1.148 + 1.149 + NOT_LP64( pop(thread); ) 1.150 + 1.151 + // save the live input values 1.152 + if (pre_val != rax) 1.153 + pop(pre_val); 1.154 + 1.155 + if (obj != noreg && obj != rax) 1.156 + pop(obj); 1.157 + 1.158 if(tosca_live) pop(rax); 1.159 + 1.160 bind(done); 1.161 - 1.162 } 1.163 1.164 void MacroAssembler::g1_write_barrier_post(Register store_addr, 1.165 Register new_val, 1.166 -#ifndef _LP64 1.167 Register thread, 1.168 -#endif 1.169 Register tmp, 1.170 Register tmp2) { 1.171 - 1.172 - LP64_ONLY(Register thread = r15_thread;) 1.173 +#ifdef _LP64 1.174 + assert(thread == r15_thread, "must be"); 1.175 +#endif // _LP64 1.176 + 1.177 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 1.178 PtrQueue::byte_offset_of_index())); 1.179 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 1.180 PtrQueue::byte_offset_of_buf())); 1.181 + 1.182 BarrierSet* bs = Universe::heap()->barrier_set(); 1.183 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1.184 Label done; 1.185 @@ -7067,7 +7107,6 @@ 1.186 pop(store_addr); 1.187 1.188 bind(done); 1.189 - 1.190 } 1.191 1.192 #endif // SERIALGC