1.1 --- a/src/cpu/mips/vm/macroAssembler_mips.cpp Fri Nov 03 15:51:07 2017 +0800 1.2 +++ b/src/cpu/mips/vm/macroAssembler_mips.cpp Mon Oct 23 17:07:19 2017 +0800 1.3 @@ -1474,28 +1474,274 @@ 1.4 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); 1.5 } 1.6 1.7 + 1.8 ////////////////////////////////////////////////////////////////////////////////// 1.9 #if INCLUDE_ALL_GCS 1.10 1.11 void MacroAssembler::g1_write_barrier_pre(Register obj, 1.12 -#ifndef _LP64 1.13 + Register pre_val, 1.14 Register thread, 1.15 -#endif 1.16 Register tmp, 1.17 - Register tmp2, 1.18 - bool tosca_live) { 1.19 - Unimplemented(); 1.20 + bool tosca_live, 1.21 + bool expand_call) { 1.22 + 1.23 + // If expand_call is true then we expand the call_VM_leaf macro 1.24 + // directly to skip generating the check by 1.25 + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 1.26 + 1.27 +#ifdef _LP64 1.28 + assert(thread == TREG, "must be"); 1.29 +#endif // _LP64 1.30 + 1.31 + Label done; 1.32 + Label runtime; 1.33 + 1.34 + assert(pre_val != noreg, "check this code"); 1.35 + 1.36 + if (obj != noreg) { 1.37 + assert_different_registers(obj, pre_val, tmp); 1.38 + assert(pre_val != V0, "check this code"); 1.39 + } 1.40 + 1.41 + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 1.42 + PtrQueue::byte_offset_of_active())); 1.43 + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 1.44 + PtrQueue::byte_offset_of_index())); 1.45 + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 1.46 + PtrQueue::byte_offset_of_buf())); 1.47 + 1.48 + 1.49 + // Is marking active? 1.50 + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 1.51 + //cmpl(in_progress, 0); 1.52 + lw(AT, in_progress); 1.53 + } else { 1.54 + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 1.55 + //cmpb(in_progress, 0); 1.56 + lb(AT, in_progress); 1.57 + } 1.58 + //jcc(Assembler::equal, done); 1.59 + beq(AT, R0, done); 1.60 + nop(); 1.61 + 1.62 + // Do we need to load the previous value? 1.63 + if (obj != noreg) { 1.64 + load_heap_oop(pre_val, Address(obj, 0)); 1.65 + } 1.66 + 1.67 + // Is the previous value null? 1.68 + //cmpptr(pre_val, (int32_t) NULL_WORD); 1.69 + //jcc(Assembler::equal, done); 1.70 + beq(pre_val, R0, done); 1.71 + nop(); 1.72 + 1.73 + // Can we store original value in the thread's buffer? 1.74 + // Is index == 0? 1.75 + // (The index field is typed as size_t.) 1.76 + 1.77 + //movptr(tmp, index); // tmp := *index_adr 1.78 + ld(tmp, index); 1.79 + //cmpptr(tmp, 0); // tmp == 0? 1.80 + //jcc(Assembler::equal, runtime); // If yes, goto runtime 1.81 + beq(tmp, R0, runtime); 1.82 + nop(); 1.83 + 1.84 + //subptr(tmp, wordSize); // tmp := tmp - wordSize 1.85 + //movptr(index, tmp); // *index_adr := tmp 1.86 + //addptr(tmp, buffer); // tmp := tmp + *buffer_adr 1.87 + daddiu(tmp, tmp, -1 * wordSize); 1.88 + sd(tmp, index); 1.89 + ld(AT, buffer); 1.90 + daddu(tmp, tmp, AT); 1.91 + 1.92 + // Record the previous value 1.93 + //movptr(Address(tmp, 0), pre_val); 1.94 + //jmp(done); 1.95 + sd(pre_val, tmp, 0); 1.96 + beq(R0, R0, done); 1.97 + nop(); 1.98 + 1.99 + bind(runtime); 1.100 + // save the live input values 1.101 + //if(tosca_live) push(rax); 1.102 + if(tosca_live) push(V0); 1.103 + 1.104 + //if (obj != noreg && obj != rax) 1.105 + if (obj != noreg && obj != V0) 1.106 + push(obj); 1.107 + 1.108 + //if (pre_val != rax) 1.109 + if (pre_val != V0) 1.110 + push(pre_val); 1.111 + 1.112 + // Calling the runtime using the regular call_VM_leaf mechanism generates 1.113 + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 1.114 + // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 1.115 + // 1.116 + // If we care generating the pre-barrier without a frame (e.g. in the 1.117 + // intrinsified Reference.get() routine) then ebp might be pointing to 1.118 + // the caller frame and so this check will most likely fail at runtime. 1.119 + // 1.120 + // Expanding the call directly bypasses the generation of the check. 1.121 + // So when we do not have have a full interpreter frame on the stack 1.122 + // expand_call should be passed true. 1.123 + 1.124 + NOT_LP64( push(thread); ) 1.125 + 1.126 + if (expand_call) { 1.127 + //LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 1.128 + LP64_ONLY( assert(pre_val != A1, "smashed arg"); ) 1.129 + //pass_arg1(this, thread); 1.130 + if (thread != A1) move(A1, thread); 1.131 + //pass_arg0(this, pre_val); 1.132 + if (pre_val != A0) move(A0, pre_val); 1.133 + MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 1.134 + } else { 1.135 + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 1.136 + } 1.137 + 1.138 + NOT_LP64( pop(thread); ) 1.139 + 1.140 + // save the live input values 1.141 + //if (pre_val != rax) 1.142 + if (pre_val != V0) 1.143 + pop(pre_val); 1.144 + 1.145 + //if (obj != noreg && obj != rax) 1.146 + if (obj != noreg && obj != V0) 1.147 + pop(obj); 1.148 + 1.149 + //if(tosca_live) pop(rax); 1.150 + if(tosca_live) pop(V0); 1.151 + 1.152 + bind(done); 1.153 } 1.154 1.155 void MacroAssembler::g1_write_barrier_post(Register store_addr, 1.156 Register new_val, 1.157 -#ifndef _LP64 1.158 Register thread, 1.159 -#endif 1.160 Register tmp, 1.161 Register tmp2) { 1.162 - 1.163 - Unimplemented(); 1.164 + assert(tmp == AT, "must be"); 1.165 + assert(tmp2 == AT, "must be"); 1.166 +#ifdef _LP64 1.167 + assert(thread == TREG, "must be"); 1.168 +#endif // _LP64 1.169 + 1.170 + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 1.171 + PtrQueue::byte_offset_of_index())); 1.172 + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 1.173 + PtrQueue::byte_offset_of_buf())); 1.174 + 1.175 + BarrierSet* bs = Universe::heap()->barrier_set(); 1.176 + CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1.177 + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1.178 + 1.179 + Label done; 1.180 + Label runtime; 1.181 + 1.182 + // Does store cross heap regions? 1.183 + 1.184 + //movptr(tmp, store_addr); 1.185 + //xorptr(tmp, new_val); 1.186 + //shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 1.187 + //jcc(Assembler::equal, done); 1.188 + xorr(AT, store_addr, new_val); 1.189 + dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); 1.190 + beq(AT, R0, done); 1.191 + nop(); 1.192 + 1.193 + 1.194 + // crosses regions, storing NULL? 1.195 + 1.196 + //cmpptr(new_val, (int32_t) NULL_WORD); 1.197 + //jcc(Assembler::equal, done); 1.198 + beq(new_val, R0, done); 1.199 + nop(); 1.200 + 1.201 + // storing region crossing non-NULL, is card already dirty? 1.202 + 1.203 + const Register card_addr = tmp; 1.204 + const Register cardtable = tmp2; 1.205 + 1.206 + //movptr(card_addr, store_addr); 1.207 + //shrptr(card_addr, CardTableModRefBS::card_shift); 1.208 + move(card_addr, store_addr); 1.209 + dsrl(card_addr, card_addr, CardTableModRefBS::card_shift); 1.210 + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT 1.211 + // a valid address and therefore is not properly handled by the relocation code. 1.212 + //movptr(cardtable, (intptr_t)ct->byte_map_base); 1.213 + //addptr(card_addr, cardtable); 1.214 + set64(cardtable, (intptr_t)ct->byte_map_base); 1.215 + daddu(card_addr, card_addr, cardtable); 1.216 + 1.217 + //cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val()); 1.218 + //jcc(Assembler::equal, done); 1.219 + lb(AT, card_addr, 0); 1.220 + daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val()); 1.221 + beq(AT, R0, done); 1.222 + nop(); 1.223 + 1.224 + //membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); 1.225 + //cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 1.226 + //jcc(Assembler::equal, done); 1.227 + sync(); 1.228 + lb(AT, card_addr, 0); 1.229 + daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val()); 1.230 + beq(AT, R0, done); 1.231 + nop(); 1.232 + 1.233 + 1.234 + // storing a region crossing, non-NULL oop, card is clean. 1.235 + // dirty card and log. 1.236 + 1.237 + //movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 1.238 + move(AT, (int)CardTableModRefBS::dirty_card_val()); 1.239 + sb(AT, card_addr, 0); 1.240 + 1.241 + //cmpl(queue_index, 0); 1.242 + //jcc(Assembler::equal, runtime); 1.243 + //subl(queue_index, wordSize); 1.244 + //movptr(tmp2, buffer); 1.245 + lw(AT, queue_index); 1.246 + beq(AT, R0, runtime); 1.247 + nop(); 1.248 + daddiu(AT, AT, -1 * wordSize); 1.249 + sw(AT, queue_index); 1.250 + ld(tmp2, buffer); 1.251 +#ifdef _LP64 1.252 + //movslq(rscratch1, queue_index); 1.253 + //addq(tmp2, rscratch1); 1.254 + //movq(Address(tmp2, 0), card_addr); 1.255 + ld(AT, queue_index); 1.256 + daddu(tmp2, tmp2, AT); 1.257 + sd(card_addr, tmp2, 0); 1.258 +#else 1.259 + //addl(tmp2, queue_index); 1.260 + //movl(Address(tmp2, 0), card_addr); 1.261 + lw(AT, queue_index); 1.262 + addu32(tmp2, tmp2, AT); 1.263 + sw(card_addr, tmp2, 0); 1.264 +#endif 1.265 + //jmp(done); 1.266 + beq(R0, R0, done); 1.267 + nop(); 1.268 + 1.269 + bind(runtime); 1.270 + // save the live input values 1.271 + push(store_addr); 1.272 + push(new_val); 1.273 +#ifdef _LP64 1.274 + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG); 1.275 +#else 1.276 + push(thread); 1.277 + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 1.278 + pop(thread); 1.279 +#endif 1.280 + pop(new_val); 1.281 + pop(store_addr); 1.282 + 1.283 + bind(done); 1.284 } 1.285 1.286 #endif // INCLUDE_ALL_GCS