|
1 /* |
|
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. |
|
3 * Copyright (c) 2017, Loongson Technology. All rights reserved. |
|
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
5 * |
|
6 * This code is free software; you can redistribute it and/or modify it |
|
7 * under the terms of the GNU General Public License version 2 only, as |
|
8 * published by the Free Software Foundation. |
|
9 * |
|
10 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 * version 2 for more details (a copy is included in the LICENSE file that |
|
14 * accompanied this code). |
|
15 * |
|
16 * You should have received a copy of the GNU General Public License version |
|
17 * 2 along with this work; if not, write to the Free Software Foundation, |
|
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 * |
|
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 * or visit www.oracle.com if you need additional information or have any |
|
22 * questions. |
|
23 * |
|
24 */ |
|
25 |
|
26 #include "precompiled.hpp" |
|
27 #include "asm/assembler.hpp" |
|
28 #include "asm/assembler.inline.hpp" |
|
29 #include "asm/macroAssembler.inline.hpp" |
|
30 #include "compiler/disassembler.hpp" |
|
31 #include "gc_interface/collectedHeap.inline.hpp" |
|
32 #include "interpreter/interpreter.hpp" |
|
33 #include "memory/cardTableModRefBS.hpp" |
|
34 #include "memory/resourceArea.hpp" |
|
35 #include "memory/universe.hpp" |
|
36 #include "prims/methodHandles.hpp" |
|
37 #include "runtime/biasedLocking.hpp" |
|
38 #include "runtime/interfaceSupport.hpp" |
|
39 #include "runtime/objectMonitor.hpp" |
|
40 #include "runtime/os.hpp" |
|
41 #include "runtime/sharedRuntime.hpp" |
|
42 #include "runtime/stubRoutines.hpp" |
|
43 #include "utilities/macros.hpp" |
|
44 #if INCLUDE_ALL_GCS |
|
45 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" |
|
46 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" |
|
47 #include "gc_implementation/g1/heapRegion.hpp" |
|
48 #endif // INCLUDE_ALL_GCS |
|
49 |
|
50 // Implementation of MacroAssembler |
|
51 |
|
52 intptr_t MacroAssembler::i[32] = {0}; |
|
53 float MacroAssembler::f[32] = {0.0}; |
|
54 |
|
55 void MacroAssembler::print(outputStream *s) { |
|
56 unsigned int k; |
|
57 for(k=0; k<sizeof(i)/sizeof(i[0]); k++) { |
|
58 s->print_cr("i%d = 0x%.16lx", k, i[k]); |
|
59 } |
|
60 s->cr(); |
|
61 |
|
62 for(k=0; k<sizeof(f)/sizeof(f[0]); k++) { |
|
63 s->print_cr("f%d = %f", k, f[k]); |
|
64 } |
|
65 s->cr(); |
|
66 } |
|
67 |
|
68 int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } |
|
69 int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } |
|
70 |
|
71 void MacroAssembler::save_registers(MacroAssembler *masm) { |
|
72 #define __ masm-> |
|
73 for(int k=0; k<32; k++) { |
|
74 __ sw (as_Register(k), A0, i_offset(k)); |
|
75 } |
|
76 |
|
77 for(int k=0; k<32; k++) { |
|
78 __ swc1 (as_FloatRegister(k), A0, f_offset(k)); |
|
79 } |
|
80 #undef __ |
|
81 } |
|
82 |
|
83 void MacroAssembler::restore_registers(MacroAssembler *masm) { |
|
84 #define __ masm-> |
|
85 for(int k=0; k<32; k++) { |
|
86 __ lw (as_Register(k), A0, i_offset(k)); |
|
87 } |
|
88 |
|
89 for(int k=0; k<32; k++) { |
|
90 __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); |
|
91 } |
|
92 #undef __ |
|
93 } |
|
94 |
|
95 |
|
96 void MacroAssembler::pd_patch_instruction(address branch, address target) { |
|
97 jint& stub_inst = *(jint*) branch; |
|
98 |
|
99 /* * |
|
100 move(AT, RA); // dadd |
|
101 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); |
|
102 nop(); |
|
103 lui(T9, 0); // to be patched |
|
104 ori(T9, 0); |
|
105 daddu(T9, T9, RA); |
|
106 move(RA, AT); |
|
107 jr(T9); |
|
108 */ |
|
109 if(special(stub_inst) == dadd_op) { |
|
110 jint *pc = (jint *)branch; |
|
111 |
|
112 assert(opcode(pc[3]) == lui_op |
|
113 && opcode(pc[4]) == ori_op |
|
114 && special(pc[5]) == daddu_op, "Not a branch label patch"); |
|
115 if(!(opcode(pc[3]) == lui_op |
|
116 && opcode(pc[4]) == ori_op |
|
117 && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } |
|
118 |
|
119 int offset = target - branch; |
|
120 if (!is_simm16(offset)) |
|
121 { |
|
122 pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); |
|
123 pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); |
|
124 } |
|
125 else |
|
126 { |
|
127 /* revert to "beq + nop" */ |
|
128 CodeBuffer cb(branch, 4 * 10); |
|
129 MacroAssembler masm(&cb); |
|
130 #define __ masm. |
|
131 __ b(target); |
|
132 __ nop(); |
|
133 __ nop(); |
|
134 __ nop(); |
|
135 __ nop(); |
|
136 __ nop(); |
|
137 __ nop(); |
|
138 __ nop(); |
|
139 } |
|
140 return; |
|
141 } |
|
142 |
|
143 #ifndef PRODUCT |
|
144 if (!is_simm16((target - branch - 4) >> 2)) |
|
145 { |
|
146 tty->print_cr("Illegal patching: target=0x%lx", target); |
|
147 int *p = (int *)branch; |
|
148 for (int i = -10; i < 10; i++) |
|
149 { |
|
150 tty->print("0x%lx, ", p[i]); |
|
151 } |
|
152 tty->print_cr(""); |
|
153 } |
|
154 #endif |
|
155 |
|
156 stub_inst = patched_branch(target - branch, stub_inst, 0); |
|
157 } |
|
158 |
|
159 static inline address first_cache_address() { |
|
160 return CodeCache::low_bound() + sizeof(HeapBlock::Header); |
|
161 } |
|
162 |
|
163 static inline address last_cache_address() { |
|
164 return CodeCache::high_bound() - Assembler::InstructionSize; |
|
165 } |
|
166 |
|
167 int MacroAssembler::call_size(address target, bool far, bool patchable) { |
|
168 if (patchable) return 6 << Assembler::LogInstructionSize; |
|
169 if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop |
|
170 return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; |
|
171 } |
|
172 |
|
173 // Can we reach target using jal/j from anywhere |
|
174 // in the code cache (because code can be relocated)? |
|
175 bool MacroAssembler::reachable_from_cache(address target) { |
|
176 address cl = first_cache_address(); |
|
177 address ch = last_cache_address(); |
|
178 |
|
179 return fit_in_jal(target, cl) && fit_in_jal(target, ch); |
|
180 } |
|
181 |
|
182 void MacroAssembler::general_jump(address target) { |
|
183 if (reachable_from_cache(target)) { |
|
184 j(target); |
|
185 nop(); |
|
186 } else { |
|
187 set64(T9, (long)target); |
|
188 jr(T9); |
|
189 nop(); |
|
190 } |
|
191 } |
|
192 |
|
193 int MacroAssembler::insts_for_general_jump(address target) { |
|
194 if (reachable_from_cache(target)) { |
|
195 //j(target); |
|
196 //nop(); |
|
197 return 2; |
|
198 } else { |
|
199 //set64(T9, (long)target); |
|
200 //jr(T9); |
|
201 //nop(); |
|
202 return insts_for_set64((jlong)target) + 2; |
|
203 } |
|
204 } |
|
205 |
|
206 void MacroAssembler::patchable_jump(address target) { |
|
207 if (reachable_from_cache(target)) { |
|
208 nop(); |
|
209 nop(); |
|
210 nop(); |
|
211 nop(); |
|
212 j(target); |
|
213 nop(); |
|
214 } else { |
|
215 patchable_set48(T9, (long)target); |
|
216 jr(T9); |
|
217 nop(); |
|
218 } |
|
219 } |
|
220 |
|
221 int MacroAssembler::insts_for_patchable_jump(address target) { |
|
222 return 6; |
|
223 } |
|
224 |
|
225 void MacroAssembler::general_call(address target) { |
|
226 if (reachable_from_cache(target)) { |
|
227 jal(target); |
|
228 nop(); |
|
229 } else { |
|
230 set64(T9, (long)target); |
|
231 jalr(T9); |
|
232 nop(); |
|
233 } |
|
234 } |
|
235 |
|
236 int MacroAssembler::insts_for_general_call(address target) { |
|
237 if (reachable_from_cache(target)) { |
|
238 //jal(target); |
|
239 //nop(); |
|
240 return 2; |
|
241 } else { |
|
242 //set64(T9, (long)target); |
|
243 //jalr(T9); |
|
244 //nop(); |
|
245 return insts_for_set64((jlong)target) + 2; |
|
246 } |
|
247 } |
|
248 |
|
249 void MacroAssembler::patchable_call(address target) { |
|
250 if (reachable_from_cache(target)) { |
|
251 nop(); |
|
252 nop(); |
|
253 nop(); |
|
254 nop(); |
|
255 jal(target); |
|
256 nop(); |
|
257 } else { |
|
258 patchable_set48(T9, (long)target); |
|
259 jalr(T9); |
|
260 nop(); |
|
261 } |
|
262 } |
|
263 |
|
264 int MacroAssembler::insts_for_patchable_call(address target) { |
|
265 return 6; |
|
266 } |
|
267 |
|
268 void MacroAssembler::beq_far(Register rs, Register rt, address entry) |
|
269 { |
|
270 u_char * cur_pc = pc(); |
|
271 |
|
272 /* Jin: Near/Far jump */ |
|
273 if(is_simm16((entry - pc() - 4) / 4)) |
|
274 { |
|
275 Assembler::beq(rs, rt, offset(entry)); |
|
276 } |
|
277 else |
|
278 { |
|
279 Label not_jump; |
|
280 bne(rs, rt, not_jump); |
|
281 delayed()->nop(); |
|
282 |
|
283 b_far(entry); |
|
284 delayed()->nop(); |
|
285 |
|
286 bind(not_jump); |
|
287 has_delay_slot(); |
|
288 } |
|
289 } |
|
290 |
|
291 void MacroAssembler::beq_far(Register rs, Register rt, Label& L) |
|
292 { |
|
293 if (L.is_bound()) { |
|
294 beq_far(rs, rt, target(L)); |
|
295 } else { |
|
296 u_char * cur_pc = pc(); |
|
297 Label not_jump; |
|
298 bne(rs, rt, not_jump); |
|
299 delayed()->nop(); |
|
300 |
|
301 b_far(L); |
|
302 delayed()->nop(); |
|
303 |
|
304 bind(not_jump); |
|
305 has_delay_slot(); |
|
306 } |
|
307 } |
|
308 |
|
309 void MacroAssembler::bne_far(Register rs, Register rt, address entry) |
|
310 { |
|
311 u_char * cur_pc = pc(); |
|
312 |
|
313 /* Jin: Near/Far jump */ |
|
314 if(is_simm16((entry - pc() - 4) / 4)) |
|
315 { |
|
316 Assembler::bne(rs, rt, offset(entry)); |
|
317 } |
|
318 else |
|
319 { |
|
320 Label not_jump; |
|
321 beq(rs, rt, not_jump); |
|
322 delayed()->nop(); |
|
323 |
|
324 b_far(entry); |
|
325 delayed()->nop(); |
|
326 |
|
327 bind(not_jump); |
|
328 has_delay_slot(); |
|
329 } |
|
330 } |
|
331 |
|
332 void MacroAssembler::bne_far(Register rs, Register rt, Label& L) |
|
333 { |
|
334 if (L.is_bound()) { |
|
335 bne_far(rs, rt, target(L)); |
|
336 } else { |
|
337 u_char * cur_pc = pc(); |
|
338 Label not_jump; |
|
339 beq(rs, rt, not_jump); |
|
340 delayed()->nop(); |
|
341 |
|
342 b_far(L); |
|
343 delayed()->nop(); |
|
344 |
|
345 bind(not_jump); |
|
346 has_delay_slot(); |
|
347 } |
|
348 } |
|
349 |
|
350 void MacroAssembler::b_far(Label& L) |
|
351 { |
|
352 if (L.is_bound()) { |
|
353 b_far(target(L)); |
|
354 } else { |
|
355 volatile address dest = target(L); |
|
356 /* |
|
357 MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 |
|
358 0x00000055651ed514: dadd at, ra, zero |
|
359 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 |
|
360 |
|
361 0x00000055651ed51c: sll zero, zero, 0 |
|
362 0x00000055651ed520: lui t9, 0x0 |
|
363 0x00000055651ed524: ori t9, t9, 0x21b8 |
|
364 0x00000055651ed528: daddu t9, t9, ra |
|
365 0x00000055651ed52c: dadd ra, at, zero |
|
366 0x00000055651ed530: jr t9 |
|
367 0x00000055651ed534: sll zero, zero, 0 |
|
368 */ |
|
369 move(AT, RA); |
|
370 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); |
|
371 nop(); |
|
372 lui(T9, 0); // to be patched |
|
373 ori(T9, T9, 0); |
|
374 daddu(T9, T9, RA); |
|
375 move(RA, AT); |
|
376 jr(T9); |
|
377 } |
|
378 } |
|
379 |
|
380 void MacroAssembler::b_far(address entry) |
|
381 { |
|
382 u_char * cur_pc = pc(); |
|
383 |
|
384 /* Jin: Near/Far jump */ |
|
385 if(is_simm16((entry - pc() - 4) / 4)) |
|
386 { |
|
387 b(offset(entry)); |
|
388 } |
|
389 else |
|
390 { |
|
391 /* address must be bounded */ |
|
392 move(AT, RA); |
|
393 emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); |
|
394 nop(); |
|
395 li32(T9, entry - pc()); |
|
396 daddu(T9, T9, RA); |
|
397 move(RA, AT); |
|
398 jr(T9); |
|
399 } |
|
400 } |
|
401 |
|
402 void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) { |
|
403 addu_long(AT, base, offset); |
|
404 ld_ptr(rt, 0, AT); |
|
405 } |
|
406 |
|
407 void MacroAssembler::st_ptr(Register rt, Register offset, Register base) { |
|
408 addu_long(AT, base, offset); |
|
409 st_ptr(rt, 0, AT); |
|
410 } |
|
411 |
|
412 void MacroAssembler::ld_long(Register rt, Register offset, Register base) { |
|
413 addu_long(AT, base, offset); |
|
414 ld_long(rt, 0, AT); |
|
415 } |
|
416 |
|
417 void MacroAssembler::st_long(Register rt, Register offset, Register base) { |
|
418 addu_long(AT, base, offset); |
|
419 st_long(rt, 0, AT); |
|
420 } |
|
421 |
|
422 Address MacroAssembler::as_Address(AddressLiteral adr) { |
|
423 return Address(adr.target(), adr.rspec()); |
|
424 } |
|
425 |
|
426 Address MacroAssembler::as_Address(ArrayAddress adr) { |
|
427 return Address::make_array(adr); |
|
428 } |
|
429 |
|
430 // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). |
|
431 void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { |
|
432 Label again; |
|
433 |
|
434 li(tmp_reg1, counter_addr); |
|
435 bind(again); |
|
436 if(!Use3A2000) sync(); |
|
437 ll(tmp_reg2, tmp_reg1, 0); |
|
438 addi(tmp_reg2, tmp_reg2, inc); |
|
439 sc(tmp_reg2, tmp_reg1, 0); |
|
440 beq(tmp_reg2, R0, again); |
|
441 delayed()->nop(); |
|
442 } |
|
443 |
|
444 int MacroAssembler::biased_locking_enter(Register lock_reg, |
|
445 Register obj_reg, |
|
446 Register swap_reg, |
|
447 Register tmp_reg, |
|
448 bool swap_reg_contains_mark, |
|
449 Label& done, |
|
450 Label* slow_case, |
|
451 BiasedLockingCounters* counters) { |
|
452 assert(UseBiasedLocking, "why call this otherwise?"); |
|
453 bool need_tmp_reg = false; |
|
454 if (tmp_reg == noreg) { |
|
455 need_tmp_reg = true; |
|
456 tmp_reg = T9; |
|
457 } |
|
458 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); |
|
459 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); |
|
460 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); |
|
461 Address saved_mark_addr(lock_reg, 0); |
|
462 |
|
463 // Biased locking |
|
464 // See whether the lock is currently biased toward our thread and |
|
465 // whether the epoch is still valid |
|
466 // Note that the runtime guarantees sufficient alignment of JavaThread |
|
467 // pointers to allow age to be placed into low bits |
|
468 // First check to see whether biasing is even enabled for this object |
|
469 Label cas_label; |
|
470 int null_check_offset = -1; |
|
471 if (!swap_reg_contains_mark) { |
|
472 null_check_offset = offset(); |
|
473 ld_ptr(swap_reg, mark_addr); |
|
474 } |
|
475 |
|
476 if (need_tmp_reg) { |
|
477 push(tmp_reg); |
|
478 } |
|
479 move(tmp_reg, swap_reg); |
|
480 andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); |
|
481 #ifdef _LP64 |
|
482 daddi(AT, R0, markOopDesc::biased_lock_pattern); |
|
483 dsub(AT, AT, tmp_reg); |
|
484 #else |
|
485 addi(AT, R0, markOopDesc::biased_lock_pattern); |
|
486 sub(AT, AT, tmp_reg); |
|
487 #endif |
|
488 if (need_tmp_reg) { |
|
489 pop(tmp_reg); |
|
490 } |
|
491 |
|
492 bne(AT, R0, cas_label); |
|
493 delayed()->nop(); |
|
494 |
|
495 |
|
496 // The bias pattern is present in the object's header. Need to check |
|
497 // whether the bias owner and the epoch are both still current. |
|
498 // Note that because there is no current thread register on MIPS we |
|
499 // need to store off the mark word we read out of the object to |
|
500 // avoid reloading it and needing to recheck invariants below. This |
|
501 // store is unfortunate but it makes the overall code shorter and |
|
502 // simpler. |
|
503 st_ptr(swap_reg, saved_mark_addr); |
|
504 if (need_tmp_reg) { |
|
505 push(tmp_reg); |
|
506 } |
|
507 if (swap_reg_contains_mark) { |
|
508 null_check_offset = offset(); |
|
509 } |
|
510 load_prototype_header(tmp_reg, obj_reg); |
|
511 xorr(tmp_reg, tmp_reg, swap_reg); |
|
512 get_thread(swap_reg); |
|
513 xorr(swap_reg, swap_reg, tmp_reg); |
|
514 |
|
515 move(AT, ~((int) markOopDesc::age_mask_in_place)); |
|
516 andr(swap_reg, swap_reg, AT); |
|
517 |
|
518 if (PrintBiasedLockingStatistics) { |
|
519 Label L; |
|
520 bne(swap_reg, R0, L); |
|
521 delayed()->nop(); |
|
522 push(tmp_reg); |
|
523 push(A0); |
|
524 atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); |
|
525 pop(A0); |
|
526 pop(tmp_reg); |
|
527 bind(L); |
|
528 } |
|
529 if (need_tmp_reg) { |
|
530 pop(tmp_reg); |
|
531 } |
|
532 beq(swap_reg, R0, done); |
|
533 delayed()->nop(); |
|
534 Label try_revoke_bias; |
|
535 Label try_rebias; |
|
536 |
|
537 // At this point we know that the header has the bias pattern and |
|
538 // that we are not the bias owner in the current epoch. We need to |
|
539 // figure out more details about the state of the header in order to |
|
540 // know what operations can be legally performed on the object's |
|
541 // header. |
|
542 |
|
543 // If the low three bits in the xor result aren't clear, that means |
|
544 // the prototype header is no longer biased and we have to revoke |
|
545 // the bias on this object. |
|
546 |
|
547 move(AT, markOopDesc::biased_lock_mask_in_place); |
|
548 andr(AT, swap_reg, AT); |
|
549 bne(AT, R0, try_revoke_bias); |
|
550 delayed()->nop(); |
|
551 // Biasing is still enabled for this data type. See whether the |
|
552 // epoch of the current bias is still valid, meaning that the epoch |
|
553 // bits of the mark word are equal to the epoch bits of the |
|
554 // prototype header. (Note that the prototype header's epoch bits |
|
555 // only change at a safepoint.) If not, attempt to rebias the object |
|
556 // toward the current thread. Note that we must be absolutely sure |
|
557 // that the current epoch is invalid in order to do this because |
|
558 // otherwise the manipulations it performs on the mark word are |
|
559 // illegal. |
|
560 |
|
561 move(AT, markOopDesc::epoch_mask_in_place); |
|
562 andr(AT,swap_reg, AT); |
|
563 bne(AT, R0, try_rebias); |
|
564 delayed()->nop(); |
|
565 // The epoch of the current bias is still valid but we know nothing |
|
566 // about the owner; it might be set or it might be clear. Try to |
|
567 // acquire the bias of the object using an atomic operation. If this |
|
568 // fails we will go in to the runtime to revoke the object's bias. |
|
569 // Note that we first construct the presumed unbiased header so we |
|
570 // don't accidentally blow away another thread's valid bias. |
|
571 |
|
572 ld_ptr(swap_reg, saved_mark_addr); |
|
573 |
|
574 move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); |
|
575 andr(swap_reg, swap_reg, AT); |
|
576 |
|
577 if (need_tmp_reg) { |
|
578 push(tmp_reg); |
|
579 } |
|
580 get_thread(tmp_reg); |
|
581 orr(tmp_reg, tmp_reg, swap_reg); |
|
582 //if (os::is_MP()) { |
|
583 // sync(); |
|
584 //} |
|
585 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); |
|
586 if (need_tmp_reg) { |
|
587 pop(tmp_reg); |
|
588 } |
|
589 // If the biasing toward our thread failed, this means that |
|
590 // another thread succeeded in biasing it toward itself and we |
|
591 // need to revoke that bias. The revocation will occur in the |
|
592 // interpreter runtime in the slow case. |
|
593 if (PrintBiasedLockingStatistics) { |
|
594 Label L; |
|
595 bne(AT, R0, L); |
|
596 delayed()->nop(); |
|
597 push(tmp_reg); |
|
598 push(A0); |
|
599 atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); |
|
600 pop(A0); |
|
601 pop(tmp_reg); |
|
602 bind(L); |
|
603 } |
|
604 if (slow_case != NULL) { |
|
605 beq_far(AT, R0, *slow_case); |
|
606 delayed()->nop(); |
|
607 } |
|
608 b(done); |
|
609 delayed()->nop(); |
|
610 |
|
611 bind(try_rebias); |
|
612 // At this point we know the epoch has expired, meaning that the |
|
613 // current "bias owner", if any, is actually invalid. Under these |
|
614 // circumstances _only_, we are allowed to use the current header's |
|
615 // value as the comparison value when doing the cas to acquire the |
|
616 // bias in the current epoch. In other words, we allow transfer of |
|
617 // the bias from one thread to another directly in this situation. |
|
618 // |
|
619 // FIXME: due to a lack of registers we currently blow away the age |
|
620 // bits in this situation. Should attempt to preserve them. |
|
621 if (need_tmp_reg) { |
|
622 push(tmp_reg); |
|
623 } |
|
624 load_prototype_header(tmp_reg, obj_reg); |
|
625 get_thread(swap_reg); |
|
626 orr(tmp_reg, tmp_reg, swap_reg); |
|
627 ld_ptr(swap_reg, saved_mark_addr); |
|
628 |
|
629 //if (os::is_MP()) { |
|
630 // sync(); |
|
631 //} |
|
632 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); |
|
633 if (need_tmp_reg) { |
|
634 pop(tmp_reg); |
|
635 } |
|
636 // If the biasing toward our thread failed, then another thread |
|
637 // succeeded in biasing it toward itself and we need to revoke that |
|
638 // bias. The revocation will occur in the runtime in the slow case. |
|
639 if (PrintBiasedLockingStatistics) { |
|
640 Label L; |
|
641 bne(AT, R0, L); |
|
642 delayed()->nop(); |
|
643 push(AT); |
|
644 push(tmp_reg); |
|
645 atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); |
|
646 pop(tmp_reg); |
|
647 pop(AT); |
|
648 bind(L); |
|
649 } |
|
650 if (slow_case != NULL) { |
|
651 beq_far(AT, R0, *slow_case); |
|
652 delayed()->nop(); |
|
653 } |
|
654 |
|
655 b(done); |
|
656 delayed()->nop(); |
|
657 bind(try_revoke_bias); |
|
658 // The prototype mark in the klass doesn't have the bias bit set any |
|
659 // more, indicating that objects of this data type are not supposed |
|
660 // to be biased any more. We are going to try to reset the mark of |
|
661 // this object to the prototype value and fall through to the |
|
662 // CAS-based locking scheme. Note that if our CAS fails, it means |
|
663 // that another thread raced us for the privilege of revoking the |
|
664 // bias of this particular object, so it's okay to continue in the |
|
665 // normal locking code. |
|
666 // |
|
667 // FIXME: due to a lack of registers we currently blow away the age |
|
668 // bits in this situation. Should attempt to preserve them. |
|
669 ld_ptr(swap_reg, saved_mark_addr); |
|
670 |
|
671 if (need_tmp_reg) { |
|
672 push(tmp_reg); |
|
673 } |
|
674 load_prototype_header(tmp_reg, obj_reg); |
|
675 //if (os::is_MP()) { |
|
676 // lock(); |
|
677 //} |
|
678 cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); |
|
679 if (need_tmp_reg) { |
|
680 pop(tmp_reg); |
|
681 } |
|
682 // Fall through to the normal CAS-based lock, because no matter what |
|
683 // the result of the above CAS, some thread must have succeeded in |
|
684 // removing the bias bit from the object's header. |
|
685 if (PrintBiasedLockingStatistics) { |
|
686 Label L; |
|
687 bne(AT, R0, L); |
|
688 delayed()->nop(); |
|
689 push(AT); |
|
690 push(tmp_reg); |
|
691 atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); |
|
692 pop(tmp_reg); |
|
693 pop(AT); |
|
694 bind(L); |
|
695 } |
|
696 |
|
697 bind(cas_label); |
|
698 return null_check_offset; |
|
699 } |
|
700 |
|
701 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { |
|
702 assert(UseBiasedLocking, "why call this otherwise?"); |
|
703 |
|
704 // Check for biased locking unlock case, which is a no-op |
|
705 // Note: we do not have to check the thread ID for two reasons. |
|
706 // First, the interpreter checks for IllegalMonitorStateException at |
|
707 // a higher level. Second, if the bias was revoked while we held the |
|
708 // lock, the object could not be rebiased toward another thread, so |
|
709 // the bias bit would be clear. |
|
710 #ifdef _LP64 |
|
711 ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
|
712 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); |
|
713 daddi(AT, R0, markOopDesc::biased_lock_pattern); |
|
714 #else |
|
715 lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
|
716 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); |
|
717 addi(AT, R0, markOopDesc::biased_lock_pattern); |
|
718 #endif |
|
719 |
|
720 beq(AT, temp_reg, done); |
|
721 delayed()->nop(); |
|
722 } |
|
723 |
|
724 // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME. |
|
725 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf |
|
726 // this method will handle the stack problem, you need not to preserve the stack space for the argument now |
|
727 void MacroAssembler::call_VM_leaf_base(address entry_point, |
|
728 int number_of_arguments) { |
|
729 //call(RuntimeAddress(entry_point)); |
|
730 //increment(rsp, number_of_arguments * wordSize); |
|
731 Label L, E; |
|
732 |
|
733 assert(number_of_arguments <= 4, "just check"); |
|
734 |
|
735 andi(AT, SP, 0xf); |
|
736 beq(AT, R0, L); |
|
737 delayed()->nop(); |
|
738 daddi(SP, SP, -8); |
|
739 call(entry_point, relocInfo::runtime_call_type); |
|
740 delayed()->nop(); |
|
741 daddi(SP, SP, 8); |
|
742 b(E); |
|
743 delayed()->nop(); |
|
744 |
|
745 bind(L); |
|
746 call(entry_point, relocInfo::runtime_call_type); |
|
747 delayed()->nop(); |
|
748 bind(E); |
|
749 } |
|
750 |
|
751 |
|
752 void MacroAssembler::jmp(address entry) { |
|
753 patchable_set48(T9, (long)entry); |
|
754 jr(T9); |
|
755 } |
|
756 |
|
757 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { |
|
758 switch (rtype) { |
|
759 case relocInfo::runtime_call_type: |
|
760 case relocInfo::none: |
|
761 jmp(entry); |
|
762 break; |
|
763 default: |
|
764 { |
|
765 InstructionMark im(this); |
|
766 relocate(rtype); |
|
767 patchable_set48(T9, (long)entry); |
|
768 jr(T9); |
|
769 } |
|
770 break; |
|
771 } |
|
772 } |
|
773 |
|
774 void MacroAssembler::call(address entry) { |
|
775 // c/c++ code assume T9 is entry point, so we just always move entry to t9 |
|
776 // maybe there is some more graceful method to handle this. FIXME |
|
777 // For more info, see class NativeCall. |
|
778 #ifndef _LP64 |
|
779 move(T9, (int)entry); |
|
780 #else |
|
781 patchable_set48(T9, (long)entry); |
|
782 #endif |
|
783 jalr(T9); |
|
784 } |
|
785 |
|
786 void MacroAssembler::call(address entry, relocInfo::relocType rtype) { |
|
787 switch (rtype) { |
|
788 case relocInfo::runtime_call_type: |
|
789 case relocInfo::none: |
|
790 call(entry); |
|
791 break; |
|
792 default: |
|
793 { |
|
794 InstructionMark im(this); |
|
795 relocate(rtype); |
|
796 call(entry); |
|
797 } |
|
798 break; |
|
799 } |
|
800 } |
|
801 |
|
802 void MacroAssembler::call(address entry, RelocationHolder& rh) |
|
803 { |
|
804 switch (rh.type()) { |
|
805 case relocInfo::runtime_call_type: |
|
806 case relocInfo::none: |
|
807 call(entry); |
|
808 break; |
|
809 default: |
|
810 { |
|
811 InstructionMark im(this); |
|
812 relocate(rh); |
|
813 call(entry); |
|
814 } |
|
815 break; |
|
816 } |
|
817 } |
|
818 |
|
819 void MacroAssembler::ic_call(address entry) { |
|
820 RelocationHolder rh = virtual_call_Relocation::spec(pc()); |
|
821 patchable_set48(IC_Klass, (long)Universe::non_oop_word()); |
|
822 assert(entry != NULL, "call most probably wrong"); |
|
823 InstructionMark im(this); |
|
824 relocate(rh); |
|
825 patchable_call(entry); |
|
826 } |
|
827 |
|
828 void MacroAssembler::c2bool(Register r) { |
|
829 Label L; |
|
830 Assembler::beq(r, R0, L); |
|
831 delayed()->nop(); |
|
832 move(r, 1); |
|
833 bind(L); |
|
834 } |
|
835 |
|
836 #ifndef PRODUCT |
|
837 extern "C" void findpc(intptr_t x); |
|
838 #endif |
|
839 |
|
840 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { |
|
841 // In order to get locks to work, we need to fake a in_VM state |
|
842 JavaThread* thread = JavaThread::current(); |
|
843 JavaThreadState saved_state = thread->thread_state(); |
|
844 thread->set_thread_state(_thread_in_vm); |
|
845 if (ShowMessageBoxOnError) { |
|
846 JavaThread* thread = JavaThread::current(); |
|
847 JavaThreadState saved_state = thread->thread_state(); |
|
848 thread->set_thread_state(_thread_in_vm); |
|
849 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { |
|
850 ttyLocker ttyl; |
|
851 BytecodeCounter::print(); |
|
852 } |
|
853 // To see where a verify_oop failed, get $ebx+40/X for this frame. |
|
854 // This is the value of eip which points to where verify_oop will return. |
|
855 if (os::message_box(msg, "Execution stopped, print registers?")) { |
|
856 ttyLocker ttyl; |
|
857 tty->print_cr("eip = 0x%08x", eip); |
|
858 #ifndef PRODUCT |
|
859 tty->cr(); |
|
860 findpc(eip); |
|
861 tty->cr(); |
|
862 #endif |
|
863 tty->print_cr("rax, = 0x%08x", rax); |
|
864 tty->print_cr("rbx, = 0x%08x", rbx); |
|
865 tty->print_cr("rcx = 0x%08x", rcx); |
|
866 tty->print_cr("rdx = 0x%08x", rdx); |
|
867 tty->print_cr("rdi = 0x%08x", rdi); |
|
868 tty->print_cr("rsi = 0x%08x", rsi); |
|
869 tty->print_cr("rbp, = 0x%08x", rbp); |
|
870 tty->print_cr("rsp = 0x%08x", rsp); |
|
871 BREAKPOINT; |
|
872 } |
|
873 } else { |
|
874 ttyLocker ttyl; |
|
875 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); |
|
876 assert(false, "DEBUG MESSAGE"); |
|
877 } |
|
878 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); |
|
879 } |
|
880 |
|
881 void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { |
|
882 if ( ShowMessageBoxOnError ) { |
|
883 JavaThreadState saved_state = JavaThread::current()->thread_state(); |
|
884 JavaThread::current()->set_thread_state(_thread_in_vm); |
|
885 { |
|
886 // In order to get locks work, we need to fake a in_VM state |
|
887 ttyLocker ttyl; |
|
888 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); |
|
889 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { |
|
890 BytecodeCounter::print(); |
|
891 } |
|
892 |
|
893 // if (os::message_box(msg, "Execution stopped, print registers?")) |
|
894 // regs->print(::tty); |
|
895 } |
|
896 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); |
|
897 } |
|
898 else |
|
899 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); |
|
900 } |
|
901 |
|
902 |
|
903 void MacroAssembler::stop(const char* msg) { |
|
904 li(A0, (long)msg); |
|
905 #ifndef _LP64 |
|
906 //reserver space for argument. added by yjl 7/10/2005 |
|
907 addiu(SP, SP, - 1 * wordSize); |
|
908 #endif |
|
909 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); |
|
910 delayed()->nop(); |
|
911 #ifndef _LP64 |
|
912 //restore space for argument |
|
913 addiu(SP, SP, 1 * wordSize); |
|
914 #endif |
|
915 brk(17); |
|
916 } |
|
917 |
|
918 void MacroAssembler::warn(const char* msg) { |
|
919 #ifdef _LP64 |
|
920 pushad(); |
|
921 li(A0, (long)msg); |
|
922 push(S2); |
|
923 move(AT, -(StackAlignmentInBytes)); |
|
924 move(S2, SP); // use S2 as a sender SP holder |
|
925 andr(SP, SP, AT); // align stack as required by ABI |
|
926 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); |
|
927 delayed()->nop(); |
|
928 move(SP, S2); // use S2 as a sender SP holder |
|
929 pop(S2); |
|
930 popad(); |
|
931 #else |
|
932 pushad(); |
|
933 addi(SP, SP, -4); |
|
934 sw(A0, SP, -1 * wordSize); |
|
935 li(A0, (long)msg); |
|
936 addi(SP, SP, -1 * wordSize); |
|
937 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); |
|
938 delayed()->nop(); |
|
939 addi(SP, SP, 1 * wordSize); |
|
940 lw(A0, SP, -1 * wordSize); |
|
941 addi(SP, SP, 4); |
|
942 popad(); |
|
943 #endif |
|
944 } |
|
945 |
|
946 void MacroAssembler::print_reg(Register reg) { |
|
947 /* |
|
948 char *s = getenv("PRINT_REG"); |
|
949 if (s == NULL) |
|
950 return; |
|
951 if (strcmp(s, "1") != 0) |
|
952 return; |
|
953 */ |
|
954 void * cur_pc = pc(); |
|
955 pushad(); |
|
956 NOT_LP64(push(FP);) |
|
957 |
|
958 li(A0, (long)reg->name()); |
|
959 if (reg == SP) |
|
960 addiu(A1, SP, wordSize * 23); //23 registers saved in pushad() |
|
961 else if (reg == A0) |
|
962 ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code! |
|
963 else |
|
964 move(A1, reg); |
|
965 li(A2, (long)cur_pc); |
|
966 push(S2); |
|
967 move(AT, -(StackAlignmentInBytes)); |
|
968 move(S2, SP); // use S2 as a sender SP holder |
|
969 andr(SP, SP, AT); // align stack as required by ABI |
|
970 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type); |
|
971 delayed()->nop(); |
|
972 move(SP, S2); // use S2 as a sender SP holder |
|
973 pop(S2); |
|
974 NOT_LP64(pop(FP);) |
|
975 popad(); |
|
976 |
|
977 /* |
|
978 pushad(); |
|
979 #ifdef _LP64 |
|
980 if (reg == SP) |
|
981 addiu(A0, SP, wordSize * 23); //23 registers saved in pushad() |
|
982 else |
|
983 move(A0, reg); |
|
984 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); |
|
985 delayed()->nop(); |
|
986 #else |
|
987 push(FP); |
|
988 move(A0, reg); |
|
989 dsrl32(A1, reg, 0); |
|
990 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type); |
|
991 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); |
|
992 delayed()->nop(); |
|
993 pop(FP); |
|
994 #endif |
|
995 popad(); |
|
996 pushad(); |
|
997 NOT_LP64(push(FP);) |
|
998 char b[50]; |
|
999 sprintf((char *)b, " pc: %p\n",cur_pc); |
|
1000 li(A0, (long)(char *)b); |
|
1001 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); |
|
1002 delayed()->nop(); |
|
1003 NOT_LP64(pop(FP);) |
|
1004 popad(); |
|
1005 */ |
|
1006 } |
|
1007 |
|
1008 void MacroAssembler::print_reg(FloatRegister reg) { |
|
1009 void * cur_pc = pc(); |
|
1010 pushad(); |
|
1011 NOT_LP64(push(FP);) |
|
1012 li(A0, (long)reg->name()); |
|
1013 push(S2); |
|
1014 move(AT, -(StackAlignmentInBytes)); |
|
1015 move(S2, SP); // use S2 as a sender SP holder |
|
1016 andr(SP, SP, AT); // align stack as required by ABI |
|
1017 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); |
|
1018 delayed()->nop(); |
|
1019 move(SP, S2); // use S2 as a sender SP holder |
|
1020 pop(S2); |
|
1021 NOT_LP64(pop(FP);) |
|
1022 popad(); |
|
1023 |
|
1024 pushad(); |
|
1025 NOT_LP64(push(FP);) |
|
1026 #if 1 |
|
1027 move(FP, SP); |
|
1028 move(AT, -(StackAlignmentInBytes)); |
|
1029 andr(SP , SP , AT); |
|
1030 mov_d(F12, reg); |
|
1031 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type); |
|
1032 delayed()->nop(); |
|
1033 move(SP, FP); |
|
1034 #else |
|
1035 mov_s(F12, reg); |
|
1036 //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type); |
|
1037 //delayed()->nop(); |
|
1038 #endif |
|
1039 NOT_LP64(pop(FP);) |
|
1040 popad(); |
|
1041 |
|
1042 #if 0 |
|
1043 pushad(); |
|
1044 NOT_LP64(push(FP);) |
|
1045 char* b = new char[50]; |
|
1046 sprintf(b, " pc: %p\n", cur_pc); |
|
1047 li(A0, (long)b); |
|
1048 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); |
|
1049 delayed()->nop(); |
|
1050 NOT_LP64(pop(FP);) |
|
1051 popad(); |
|
1052 #endif |
|
1053 } |
|
1054 |
|
1055 void MacroAssembler::increment(Register reg, int imm) { |
|
1056 if (!imm) return; |
|
1057 if (is_simm16(imm)) { |
|
1058 #ifdef _LP64 |
|
1059 daddiu(reg, reg, imm); |
|
1060 #else |
|
1061 addiu(reg, reg, imm); |
|
1062 #endif |
|
1063 } else { |
|
1064 move(AT, imm); |
|
1065 #ifdef _LP64 |
|
1066 daddu(reg, reg, AT); |
|
1067 #else |
|
1068 addu(reg, reg, AT); |
|
1069 #endif |
|
1070 } |
|
1071 } |
|
1072 |
|
1073 void MacroAssembler::decrement(Register reg, int imm) { |
|
1074 increment(reg, -imm); |
|
1075 } |
|
1076 |
|
1077 |
|
1078 void MacroAssembler::call_VM(Register oop_result, |
|
1079 address entry_point, |
|
1080 bool check_exceptions) { |
|
1081 call_VM_helper(oop_result, entry_point, 0, check_exceptions); |
|
1082 } |
|
1083 |
|
1084 void MacroAssembler::call_VM(Register oop_result, |
|
1085 address entry_point, |
|
1086 Register arg_1, |
|
1087 bool check_exceptions) { |
|
1088 if (arg_1!=A1) move(A1, arg_1); |
|
1089 call_VM_helper(oop_result, entry_point, 1, check_exceptions); |
|
1090 } |
|
1091 |
|
1092 void MacroAssembler::call_VM(Register oop_result, |
|
1093 address entry_point, |
|
1094 Register arg_1, |
|
1095 Register arg_2, |
|
1096 bool check_exceptions) { |
|
1097 if (arg_1!=A1) move(A1, arg_1); |
|
1098 if (arg_2!=A2) move(A2, arg_2); |
|
1099 assert(arg_2 != A1, "smashed argument"); |
|
1100 call_VM_helper(oop_result, entry_point, 2, check_exceptions); |
|
1101 } |
|
1102 |
|
1103 void MacroAssembler::call_VM(Register oop_result, |
|
1104 address entry_point, |
|
1105 Register arg_1, |
|
1106 Register arg_2, |
|
1107 Register arg_3, |
|
1108 bool check_exceptions) { |
|
1109 if (arg_1!=A1) move(A1, arg_1); |
|
1110 if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); |
|
1111 if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); |
|
1112 call_VM_helper(oop_result, entry_point, 3, check_exceptions); |
|
1113 } |
|
1114 |
|
1115 void MacroAssembler::call_VM(Register oop_result, |
|
1116 Register last_java_sp, |
|
1117 address entry_point, |
|
1118 int number_of_arguments, |
|
1119 bool check_exceptions) { |
|
1120 call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); |
|
1121 } |
|
1122 |
|
1123 void MacroAssembler::call_VM(Register oop_result, |
|
1124 Register last_java_sp, |
|
1125 address entry_point, |
|
1126 Register arg_1, |
|
1127 bool check_exceptions) { |
|
1128 if (arg_1 != A1) move(A1, arg_1); |
|
1129 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); |
|
1130 } |
|
1131 |
|
1132 void MacroAssembler::call_VM(Register oop_result, |
|
1133 Register last_java_sp, |
|
1134 address entry_point, |
|
1135 Register arg_1, |
|
1136 Register arg_2, |
|
1137 bool check_exceptions) { |
|
1138 if (arg_1 != A1) move(A1, arg_1); |
|
1139 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); |
|
1140 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); |
|
1141 } |
|
1142 |
|
1143 void MacroAssembler::call_VM(Register oop_result, |
|
1144 Register last_java_sp, |
|
1145 address entry_point, |
|
1146 Register arg_1, |
|
1147 Register arg_2, |
|
1148 Register arg_3, |
|
1149 bool check_exceptions) { |
|
1150 if (arg_1 != A1) move(A1, arg_1); |
|
1151 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); |
|
1152 if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); |
|
1153 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); |
|
1154 } |
|
1155 |
|
1156 void MacroAssembler::call_VM_base(Register oop_result, |
|
1157 Register java_thread, |
|
1158 Register last_java_sp, |
|
1159 address entry_point, |
|
1160 int number_of_arguments, |
|
1161 bool check_exceptions) { |
|
1162 |
|
1163 address before_call_pc; |
|
1164 // determine java_thread register |
|
1165 if (!java_thread->is_valid()) { |
|
1166 #ifndef OPT_THREAD |
|
1167 java_thread = T2; |
|
1168 get_thread(java_thread); |
|
1169 #else |
|
1170 java_thread = TREG; |
|
1171 #endif |
|
1172 } |
|
1173 // determine last_java_sp register |
|
1174 if (!last_java_sp->is_valid()) { |
|
1175 last_java_sp = SP; |
|
1176 } |
|
1177 // debugging support |
|
1178 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); |
|
1179 assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); |
|
1180 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); |
|
1181 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); |
|
1182 |
|
1183 assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp"); |
|
1184 |
|
1185 // set last Java frame before call |
|
1186 before_call_pc = (address)pc(); |
|
1187 set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); |
|
1188 |
|
1189 // do the call |
|
1190 move(A0, java_thread); |
|
1191 call(entry_point, relocInfo::runtime_call_type); |
|
1192 delayed()->nop(); |
|
1193 |
|
1194 // restore the thread (cannot use the pushed argument since arguments |
|
1195 // may be overwritten by C code generated by an optimizing compiler); |
|
1196 // however can use the register value directly if it is callee saved. |
|
1197 #ifndef OPT_THREAD |
|
1198 if (java_thread >=S0 && java_thread <=S7) { |
|
1199 #ifdef ASSERT |
|
1200 { Label L; |
|
1201 get_thread(AT); |
|
1202 beq(java_thread, AT, L); |
|
1203 delayed()->nop(); |
|
1204 stop("MacroAssembler::call_VM_base: edi not callee saved?"); |
|
1205 bind(L); |
|
1206 } |
|
1207 #endif |
|
1208 } else { |
|
1209 get_thread(java_thread); |
|
1210 } |
|
1211 #endif |
|
1212 |
|
1213 // discard thread and arguments |
|
1214 ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); |
|
1215 // reset last Java frame |
|
1216 reset_last_Java_frame(java_thread, false, true); |
|
1217 |
|
1218 check_and_handle_popframe(java_thread); |
|
1219 check_and_handle_earlyret(java_thread); |
|
1220 if (check_exceptions) { |
|
1221 // check for pending exceptions (java_thread is set upon return) |
|
1222 Label L; |
|
1223 #ifdef _LP64 |
|
1224 ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); |
|
1225 #else |
|
1226 lw(AT, java_thread, in_bytes(Thread::pending_exception_offset())); |
|
1227 #endif |
|
1228 beq(AT, R0, L); |
|
1229 delayed()->nop(); |
|
1230 li(AT, before_call_pc); |
|
1231 push(AT); |
|
1232 jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); |
|
1233 delayed()->nop(); |
|
1234 bind(L); |
|
1235 } |
|
1236 |
|
1237 // get oop result if there is one and reset the value in the thread |
|
1238 if (oop_result->is_valid()) { |
|
1239 #ifdef _LP64 |
|
1240 ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); |
|
1241 sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); |
|
1242 #else |
|
1243 lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); |
|
1244 sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); |
|
1245 #endif |
|
1246 verify_oop(oop_result); |
|
1247 } |
|
1248 } |
|
1249 |
|
1250 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { |
|
1251 |
|
1252 move(V0, SP); |
|
1253 //we also reserve space for java_thread here |
|
1254 #ifndef _LP64 |
|
1255 daddi(SP, SP, (1 + number_of_arguments) * (- wordSize)); |
|
1256 #endif |
|
1257 move(AT, -(StackAlignmentInBytes)); |
|
1258 andr(SP, SP, AT); |
|
1259 call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); |
|
1260 |
|
1261 } |
|
1262 |
|
1263 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { |
|
1264 call_VM_leaf_base(entry_point, number_of_arguments); |
|
1265 } |
|
1266 |
|
1267 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { |
|
1268 if (arg_0 != A0) move(A0, arg_0); |
|
1269 call_VM_leaf(entry_point, 1); |
|
1270 } |
|
1271 |
|
1272 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { |
|
1273 if (arg_0 != A0) move(A0, arg_0); |
|
1274 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); |
|
1275 call_VM_leaf(entry_point, 2); |
|
1276 } |
|
1277 |
|
1278 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { |
|
1279 if (arg_0 != A0) move(A0, arg_0); |
|
1280 if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); |
|
1281 if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); |
|
1282 call_VM_leaf(entry_point, 3); |
|
1283 } |
|
1284 void MacroAssembler::super_call_VM_leaf(address entry_point) { |
|
1285 MacroAssembler::call_VM_leaf_base(entry_point, 0); |
|
1286 } |
|
1287 |
|
1288 |
|
1289 void MacroAssembler::super_call_VM_leaf(address entry_point, |
|
1290 Register arg_1) { |
|
1291 if (arg_1 != A0) move(A0, arg_1); |
|
1292 MacroAssembler::call_VM_leaf_base(entry_point, 1); |
|
1293 } |
|
1294 |
|
1295 |
|
1296 void MacroAssembler::super_call_VM_leaf(address entry_point, |
|
1297 Register arg_1, |
|
1298 Register arg_2) { |
|
1299 if (arg_1 != A0) move(A0, arg_1); |
|
1300 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); |
|
1301 MacroAssembler::call_VM_leaf_base(entry_point, 2); |
|
1302 } |
|
1303 void MacroAssembler::super_call_VM_leaf(address entry_point, |
|
1304 Register arg_1, |
|
1305 Register arg_2, |
|
1306 Register arg_3) { |
|
1307 if (arg_1 != A0) move(A0, arg_1); |
|
1308 if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); |
|
1309 if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); |
|
1310 MacroAssembler::call_VM_leaf_base(entry_point, 3); |
|
1311 } |
|
1312 |
|
1313 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { |
|
1314 } |
|
1315 |
|
1316 void MacroAssembler::check_and_handle_popframe(Register java_thread) { |
|
1317 } |
|
1318 |
|
1319 void MacroAssembler::null_check(Register reg, int offset) { |
|
1320 if (needs_explicit_null_check(offset)) { |
|
1321 // provoke OS NULL exception if reg = NULL by |
|
1322 // accessing M[reg] w/o changing any (non-CC) registers |
|
1323 // NOTE: cmpl is plenty here to provoke a segv |
|
1324 lw(AT, reg, 0); |
|
1325 /* Jin |
|
1326 nop(); |
|
1327 nop(); |
|
1328 nop(); |
|
1329 */ |
|
1330 // Note: should probably use testl(rax, Address(reg, 0)); |
|
1331 // may be shorter code (however, this version of |
|
1332 // testl needs to be implemented first) |
|
1333 } else { |
|
1334 // nothing to do, (later) access of M[reg + offset] |
|
1335 // will provoke OS NULL exception if reg = NULL |
|
1336 } |
|
1337 } |
|
1338 |
|
1339 void MacroAssembler::enter() { |
|
1340 push2(RA, FP); |
|
1341 move(FP, SP); |
|
1342 } |
|
1343 |
|
1344 void MacroAssembler::leave() { |
|
1345 #ifndef _LP64 |
|
1346 //move(SP, FP); |
|
1347 //pop2(FP, RA); |
|
1348 addi(SP, FP, 2 * wordSize); |
|
1349 lw(RA, SP, - 1 * wordSize); |
|
1350 lw(FP, SP, - 2 * wordSize); |
|
1351 #else |
|
1352 daddi(SP, FP, 2 * wordSize); |
|
1353 ld(RA, SP, - 1 * wordSize); |
|
1354 ld(FP, SP, - 2 * wordSize); |
|
1355 #endif |
|
1356 } |
|
1357 /* |
|
1358 void MacroAssembler::os_breakpoint() { |
|
1359 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability |
|
1360 // (e.g., MSVC can't call ps() otherwise) |
|
1361 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); |
|
1362 } |
|
1363 */ |
|
1364 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { |
|
1365 // determine java_thread register |
|
1366 if (!java_thread->is_valid()) { |
|
1367 #ifndef OPT_THREAD |
|
1368 java_thread = T1; |
|
1369 get_thread(java_thread); |
|
1370 #else |
|
1371 java_thread = TREG; |
|
1372 #endif |
|
1373 } |
|
1374 // we must set sp to zero to clear frame |
|
1375 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); |
|
1376 // must clear fp, so that compiled frames are not confused; it is possible |
|
1377 // that we need it only for debugging |
|
1378 if(clear_fp) |
|
1379 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); |
|
1380 |
|
1381 if (clear_pc) |
|
1382 st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); |
|
1383 } |
|
1384 |
|
1385 void MacroAssembler::reset_last_Java_frame(bool clear_fp, |
|
1386 bool clear_pc) { |
|
1387 Register thread = TREG; |
|
1388 #ifndef OPT_THREAD |
|
1389 get_thread(thread); |
|
1390 #endif |
|
1391 // we must set sp to zero to clear frame |
|
1392 sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); |
|
1393 // must clear fp, so that compiled frames are not confused; it is |
|
1394 // possible that we need it only for debugging |
|
1395 if (clear_fp) { |
|
1396 sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); |
|
1397 } |
|
1398 |
|
1399 if (clear_pc) { |
|
1400 sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); |
|
1401 } |
|
1402 } |
|
1403 |
|
1404 // Write serialization page so VM thread can do a pseudo remote membar. |
|
1405 // We use the current thread pointer to calculate a thread specific |
|
1406 // offset to write to within the page. This minimizes bus traffic |
|
1407 // due to cache line collision. |
|
1408 void MacroAssembler::serialize_memory(Register thread, Register tmp) { |
|
1409 move(tmp, thread); |
|
1410 srl(tmp, tmp,os::get_serialize_page_shift_count()); |
|
1411 move(AT, (os::vm_page_size() - sizeof(int))); |
|
1412 andr(tmp, tmp,AT); |
|
1413 sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page())); |
|
1414 } |
|
1415 |
|
1416 // Calls to C land |
|
1417 // |
|
1418 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded |
|
1419 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp |
|
1420 // has to be reset to 0. This is required to allow proper stack traversal. |
|
1421 void MacroAssembler::set_last_Java_frame(Register java_thread, |
|
1422 Register last_java_sp, |
|
1423 Register last_java_fp, |
|
1424 address last_java_pc) { |
|
1425 // determine java_thread register |
|
1426 if (!java_thread->is_valid()) { |
|
1427 #ifndef OPT_THREAD |
|
1428 java_thread = T2; |
|
1429 get_thread(java_thread); |
|
1430 #else |
|
1431 java_thread = TREG; |
|
1432 #endif |
|
1433 } |
|
1434 // determine last_java_sp register |
|
1435 if (!last_java_sp->is_valid()) { |
|
1436 last_java_sp = SP; |
|
1437 } |
|
1438 |
|
1439 // last_java_fp is optional |
|
1440 |
|
1441 if (last_java_fp->is_valid()) { |
|
1442 st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); |
|
1443 } |
|
1444 |
|
1445 // last_java_pc is optional |
|
1446 |
|
1447 if (last_java_pc != NULL) { |
|
1448 relocate(relocInfo::internal_pc_type); |
|
1449 patchable_set48(AT, (long)last_java_pc); |
|
1450 st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); |
|
1451 } |
|
1452 st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); |
|
1453 } |
|
1454 |
|
1455 void MacroAssembler::set_last_Java_frame(Register last_java_sp, |
|
1456 Register last_java_fp, |
|
1457 address last_java_pc) { |
|
1458 // determine last_java_sp register |
|
1459 if (!last_java_sp->is_valid()) { |
|
1460 last_java_sp = SP; |
|
1461 } |
|
1462 |
|
1463 Register thread = TREG; |
|
1464 #ifndef OPT_THREAD |
|
1465 get_thread(thread); |
|
1466 #endif |
|
1467 // last_java_fp is optional |
|
1468 if (last_java_fp->is_valid()) { |
|
1469 sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); |
|
1470 } |
|
1471 |
|
1472 // last_java_pc is optional |
|
1473 if (last_java_pc != NULL) { |
|
1474 Address java_pc(thread, |
|
1475 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); |
|
1476 li(AT, (intptr_t)(last_java_pc)); |
|
1477 sd(AT, java_pc); |
|
1478 } |
|
1479 |
|
1480 sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); |
|
1481 } |
|
1482 |
|
1483 ////////////////////////////////////////////////////////////////////////////////// |
|
1484 #if INCLUDE_ALL_GCS |
|
1485 |
|
1486 void MacroAssembler::g1_write_barrier_pre(Register obj, |
|
1487 #ifndef _LP64 |
|
1488 Register thread, |
|
1489 #endif |
|
1490 Register tmp, |
|
1491 Register tmp2, |
|
1492 bool tosca_live) { |
|
1493 Unimplemented(); |
|
1494 } |
|
1495 |
|
1496 void MacroAssembler::g1_write_barrier_post(Register store_addr, |
|
1497 Register new_val, |
|
1498 #ifndef _LP64 |
|
1499 Register thread, |
|
1500 #endif |
|
1501 Register tmp, |
|
1502 Register tmp2) { |
|
1503 |
|
1504 Unimplemented(); |
|
1505 } |
|
1506 |
|
1507 #endif // INCLUDE_ALL_GCS |
|
1508 ////////////////////////////////////////////////////////////////////////////////// |
|
1509 |
|
1510 |
|
1511 void MacroAssembler::store_check(Register obj) { |
|
1512 // Does a store check for the oop in register obj. The content of |
|
1513 // register obj is destroyed afterwards. |
|
1514 store_check_part_1(obj); |
|
1515 store_check_part_2(obj); |
|
1516 } |
|
1517 |
|
1518 void MacroAssembler::store_check(Register obj, Address dst) { |
|
1519 store_check(obj); |
|
1520 } |
|
1521 |
|
1522 |
|
1523 // split the store check operation so that other instructions can be scheduled inbetween |
|
1524 void MacroAssembler::store_check_part_1(Register obj) { |
|
1525 BarrierSet* bs = Universe::heap()->barrier_set(); |
|
1526 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); |
|
1527 #ifdef _LP64 |
|
1528 dsrl(obj, obj, CardTableModRefBS::card_shift); |
|
1529 #else |
|
1530 shr(obj, CardTableModRefBS::card_shift); |
|
1531 #endif |
|
1532 } |
|
1533 |
|
1534 void MacroAssembler::store_check_part_2(Register obj) { |
|
1535 BarrierSet* bs = Universe::heap()->barrier_set(); |
|
1536 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); |
|
1537 CardTableModRefBS* ct = (CardTableModRefBS*)bs; |
|
1538 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
|
1539 |
|
1540 li(AT, (long)ct->byte_map_base); |
|
1541 #ifdef _LP64 |
|
1542 dadd(AT, AT, obj); |
|
1543 #else |
|
1544 add(AT, AT, obj); |
|
1545 #endif |
|
1546 sb(R0, AT, 0); |
|
1547 sync(); |
|
1548 } |
|
1549 |
|
1550 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. |
|
1551 void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, |
|
1552 Register t1, Register t2, Label& slow_case) { |
|
1553 assert_different_registers(obj, var_size_in_bytes, t1, t2, AT); |
|
1554 |
|
1555 Register end = t2; |
|
1556 #ifndef OPT_THREAD |
|
1557 Register thread = t1; |
|
1558 get_thread(thread); |
|
1559 #else |
|
1560 Register thread = TREG; |
|
1561 #endif |
|
1562 verify_tlab(t1, t2);//blows t1&t2 |
|
1563 |
|
1564 ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset())); |
|
1565 |
|
1566 if (var_size_in_bytes == NOREG) { |
|
1567 // i dont think we need move con_size_in_bytes to a register first. |
|
1568 // by yjl 8/17/2005 |
|
1569 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); |
|
1570 addi(end, obj, con_size_in_bytes); |
|
1571 } else { |
|
1572 add(end, obj, var_size_in_bytes); |
|
1573 } |
|
1574 |
|
1575 ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset())); |
|
1576 sltu(AT, AT, end); |
|
1577 bne_far(AT, R0, slow_case); |
|
1578 delayed()->nop(); |
|
1579 |
|
1580 |
|
1581 // update the tlab top pointer |
|
1582 st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset())); |
|
1583 |
|
1584 // recover var_size_in_bytes if necessary |
|
1585 /*if (var_size_in_bytes == end) { |
|
1586 sub(var_size_in_bytes, end, obj); |
|
1587 }*/ |
|
1588 |
|
1589 verify_tlab(t1, t2); |
|
1590 } |
|
1591 |
|
1592 // Defines obj, preserves var_size_in_bytes |
|
1593 void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, |
|
1594 Register t1, Register t2, Label& slow_case) { |
|
1595 assert_different_registers(obj, var_size_in_bytes, t1, AT); |
|
1596 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq |
|
1597 // No allocation in the shared eden. |
|
1598 b_far(slow_case); |
|
1599 delayed()->nop(); |
|
1600 } else { |
|
1601 |
|
1602 #ifndef _LP64 |
|
1603 Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr())); |
|
1604 lui(t1, split_high((intptr_t)Universe::heap()->top_addr())); |
|
1605 #else |
|
1606 Address heap_top(t1); |
|
1607 li(t1, (long)Universe::heap()->top_addr()); |
|
1608 #endif |
|
1609 ld_ptr(obj, heap_top); |
|
1610 |
|
1611 Register end = t2; |
|
1612 Label retry; |
|
1613 |
|
1614 bind(retry); |
|
1615 if (var_size_in_bytes == NOREG) { |
|
1616 // i dont think we need move con_size_in_bytes to a register first. |
|
1617 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); |
|
1618 addi(end, obj, con_size_in_bytes); |
|
1619 } else { |
|
1620 add(end, obj, var_size_in_bytes); |
|
1621 } |
|
1622 // if end < obj then we wrapped around => object too long => slow case |
|
1623 sltu(AT, end, obj); |
|
1624 bne_far(AT, R0, slow_case); |
|
1625 delayed()->nop(); |
|
1626 |
|
1627 li(AT, (long)Universe::heap()->end_addr()); |
|
1628 sltu(AT, AT, end); |
|
1629 bne_far(AT, R0, slow_case); |
|
1630 delayed()->nop(); |
|
1631 // Compare obj with the top addr, and if still equal, store the new top addr in |
|
1632 // end at the address of the top addr pointer. Sets ZF if was equal, and clears |
|
1633 // it otherwise. Use lock prefix for atomicity on MPs. |
|
1634 //if (os::is_MP()) { |
|
1635 // sync(); |
|
1636 //} |
|
1637 |
|
1638 // if someone beat us on the allocation, try again, otherwise continue |
|
1639 cmpxchg(end, heap_top, obj); |
|
1640 beq_far(AT, R0, retry); //by yyq |
|
1641 delayed()->nop(); |
|
1642 |
|
1643 } |
|
1644 } |
|
1645 |
|
1646 // C2 doesn't invoke this one. |
|
1647 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { |
|
1648 Register top = T0; |
|
1649 Register t1 = T1; |
|
1650 /* Jin: tlab_refill() is called in |
|
1651 |
|
1652 [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id); |
|
1653 |
|
1654 In generate_code_for(), T2 has been assigned as a register(length), which is used |
|
1655 after calling tlab_refill(); |
|
1656 Therefore, tlab_refill() should not use T2. |
|
1657 |
|
1658 Source: |
|
1659 |
|
1660 Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException |
|
1661 at java.lang.System.arraycopy(Native Method) |
|
1662 at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array |
|
1663 at sun.misc.Resource.getBytes(Resource.java:117) |
|
1664 at java.net.URLClassLoader.defineClass(URLClassLoader.java:273) |
|
1665 at java.net.URLClassLoader.findClass(URLClassLoader.java:205) |
|
1666 at java.lang.ClassLoader.loadClass(ClassLoader.java:321) |
|
1667 */ |
|
1668 Register t2 = T9; |
|
1669 Register t3 = T3; |
|
1670 Register thread_reg = T8; |
|
1671 Label do_refill, discard_tlab; |
|
1672 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq |
|
1673 // No allocation in the shared eden. |
|
1674 b(slow_case); |
|
1675 delayed()->nop(); |
|
1676 } |
|
1677 |
|
1678 get_thread(thread_reg); |
|
1679 |
|
1680 ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); |
|
1681 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset())); |
|
1682 |
|
1683 // calculate amount of free space |
|
1684 sub(t1, t1, top); |
|
1685 shr(t1, LogHeapWordSize); |
|
1686 |
|
1687 // Retain tlab and allocate object in shared space if |
|
1688 // the amount free in the tlab is too large to discard. |
|
1689 ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); |
|
1690 slt(AT, t2, t1); |
|
1691 beq(AT, R0, discard_tlab); |
|
1692 delayed()->nop(); |
|
1693 |
|
1694 // Retain |
|
1695 |
|
1696 #ifndef _LP64 |
|
1697 move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); |
|
1698 #else |
|
1699 li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); |
|
1700 #endif |
|
1701 add(t2, t2, AT); |
|
1702 st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); |
|
1703 |
|
1704 if (TLABStats) { |
|
1705 // increment number of slow_allocations |
|
1706 lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); |
|
1707 addiu(AT, AT, 1); |
|
1708 sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); |
|
1709 } |
|
1710 b(try_eden); |
|
1711 delayed()->nop(); |
|
1712 |
|
1713 bind(discard_tlab); |
|
1714 if (TLABStats) { |
|
1715 // increment number of refills |
|
1716 lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); |
|
1717 addi(AT, AT, 1); |
|
1718 sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); |
|
1719 // accumulate wastage -- t1 is amount free in tlab |
|
1720 lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); |
|
1721 add(AT, AT, t1); |
|
1722 sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); |
|
1723 } |
|
1724 |
|
1725 // if tlab is currently allocated (top or end != null) then |
|
1726 // fill [top, end + alignment_reserve) with array object |
|
1727 beq(top, R0, do_refill); |
|
1728 delayed()->nop(); |
|
1729 |
|
1730 // set up the mark word |
|
1731 li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2)); |
|
1732 st_ptr(AT, top, oopDesc::mark_offset_in_bytes()); |
|
1733 |
|
1734 // set the length to the remaining space |
|
1735 addi(t1, t1, - typeArrayOopDesc::header_size(T_INT)); |
|
1736 addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve()); |
|
1737 shl(t1, log2_intptr(HeapWordSize/sizeof(jint))); |
|
1738 sw(t1, top, arrayOopDesc::length_offset_in_bytes()); |
|
1739 |
|
1740 // set klass to intArrayKlass |
|
1741 #ifndef _LP64 |
|
1742 lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr())); |
|
1743 lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr())); |
|
1744 #else |
|
1745 li(AT, (intptr_t)Universe::intArrayKlassObj_addr()); |
|
1746 ld_ptr(t1, AT, 0); |
|
1747 #endif |
|
1748 //st_ptr(t1, top, oopDesc::klass_offset_in_bytes()); |
|
1749 store_klass(top, t1); |
|
1750 |
|
1751 // refill the tlab with an eden allocation |
|
1752 bind(do_refill); |
|
1753 ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset())); |
|
1754 shl(t1, LogHeapWordSize); |
|
1755 // add object_size ?? |
|
1756 eden_allocate(top, t1, 0, t2, t3, slow_case); |
|
1757 |
|
1758 // Check that t1 was preserved in eden_allocate. |
|
1759 #ifdef ASSERT |
|
1760 if (UseTLAB) { |
|
1761 Label ok; |
|
1762 assert_different_registers(thread_reg, t1); |
|
1763 ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset())); |
|
1764 shl(AT, LogHeapWordSize); |
|
1765 beq(AT, t1, ok); |
|
1766 delayed()->nop(); |
|
1767 stop("assert(t1 != tlab size)"); |
|
1768 should_not_reach_here(); |
|
1769 |
|
1770 bind(ok); |
|
1771 } |
|
1772 #endif |
|
1773 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset())); |
|
1774 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); |
|
1775 add(top, top, t1); |
|
1776 addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); |
|
1777 st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset())); |
|
1778 verify_tlab(t1, t2); |
|
1779 b(retry); |
|
1780 delayed()->nop(); |
|
1781 } |
|
1782 |
|
1783 static const double pi_4 = 0.7853981633974483; |
|
1784 |
|
1785 // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME |
|
1786 // must get argument(a double) in F12/F13 |
|
1787 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { |
|
1788 //We need to preseve the register which maybe modified during the Call @Jerome |
|
1789 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { |
|
1790 //save all modified register here |
|
1791 // if (preserve_cpu_regs) { |
|
1792 // } |
|
1793 //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9 |
|
1794 pushad(); |
|
1795 //we should preserve the stack space before we call |
|
1796 addi(SP, SP, -wordSize * 2); |
|
1797 switch (trig){ |
|
1798 case 's' : |
|
1799 call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); |
|
1800 delayed()->nop(); |
|
1801 break; |
|
1802 case 'c': |
|
1803 call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); |
|
1804 delayed()->nop(); |
|
1805 break; |
|
1806 case 't': |
|
1807 call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); |
|
1808 delayed()->nop(); |
|
1809 break; |
|
1810 default:assert (false, "bad intrinsic"); |
|
1811 break; |
|
1812 |
|
1813 } |
|
1814 |
|
1815 addi(SP, SP, wordSize * 2); |
|
1816 popad(); |
|
1817 // if (preserve_cpu_regs) { |
|
1818 // } |
|
1819 } |
|
1820 |
|
1821 #ifdef _LP64 |
|
1822 void MacroAssembler::li(Register rd, long imm) { |
|
1823 if (imm <= max_jint && imm >= min_jint) { |
|
1824 li32(rd, (int)imm); |
|
1825 } else if (julong(imm) <= 0xFFFFFFFF) { |
|
1826 assert_not_delayed(); |
|
1827 // lui sign-extends, so we can't use that. |
|
1828 ori(rd, R0, julong(imm) >> 16); |
|
1829 dsll(rd, rd, 16); |
|
1830 ori(rd, rd, split_low(imm)); |
|
1831 //aoqi_test |
|
1832 //} else if ((imm > 0) && ((imm >> 48) == 0)) { |
|
1833 } else if ((imm > 0) && is_simm16(imm >> 32)) { |
|
1834 /* A 48-bit address */ |
|
1835 li48(rd, imm); |
|
1836 } else { |
|
1837 li64(rd, imm); |
|
1838 } |
|
1839 } |
|
1840 #else |
|
1841 void MacroAssembler::li(Register rd, long imm) { |
|
1842 li32(rd, (int)imm); |
|
1843 } |
|
1844 #endif |
|
1845 |
|
1846 void MacroAssembler::li32(Register reg, int imm) { |
|
1847 if (is_simm16(imm)) { |
|
1848 /* Jin: for imm < 0, we should use addi instead of addiu. |
|
1849 * |
|
1850 * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint) |
|
1851 * |
|
1852 * 78 move [int:-1|I] [a0|I] |
|
1853 * : daddi a0, zero, 0xffffffff (correct) |
|
1854 * : daddiu a0, zero, 0xffffffff (incorrect) |
|
1855 */ |
|
1856 if (imm >= 0) |
|
1857 addiu(reg, R0, imm); |
|
1858 else |
|
1859 addi(reg, R0, imm); |
|
1860 } else { |
|
1861 lui(reg, split_low(imm >> 16)); |
|
1862 if (split_low(imm)) |
|
1863 ori(reg, reg, split_low(imm)); |
|
1864 } |
|
1865 } |
|
1866 |
|
1867 #ifdef _LP64 |
|
1868 void MacroAssembler::set64(Register d, jlong value) { |
|
1869 assert_not_delayed(); |
|
1870 |
|
1871 int hi = (int)(value >> 32); |
|
1872 int lo = (int)(value & ~0); |
|
1873 |
|
1874 if (value == lo) { // 32-bit integer |
|
1875 if (is_simm16(value)) { |
|
1876 daddiu(d, R0, value); |
|
1877 } else { |
|
1878 lui(d, split_low(value >> 16)); |
|
1879 if (split_low(value)) { |
|
1880 ori(d, d, split_low(value)); |
|
1881 } |
|
1882 } |
|
1883 } else if (hi == 0) { // hardware zero-extends to upper 32 |
|
1884 ori(d, R0, julong(value) >> 16); |
|
1885 dsll(d, d, 16); |
|
1886 if (split_low(value)) { |
|
1887 ori(d, d, split_low(value)); |
|
1888 } |
|
1889 } else if ((value> 0) && is_simm16(value >> 32)) { // li48 |
|
1890 // 4 insts |
|
1891 li48(d, value); |
|
1892 } else { // li64 |
|
1893 // 6 insts |
|
1894 li64(d, value); |
|
1895 } |
|
1896 } |
|
1897 |
|
1898 |
|
1899 int MacroAssembler::insts_for_set64(jlong value) { |
|
1900 int hi = (int)(value >> 32); |
|
1901 int lo = (int)(value & ~0); |
|
1902 |
|
1903 int count = 0; |
|
1904 |
|
1905 if (value == lo) { // 32-bit integer |
|
1906 if (is_simm16(value)) { |
|
1907 //daddiu(d, R0, value); |
|
1908 count++; |
|
1909 } else { |
|
1910 //lui(d, split_low(value >> 16)); |
|
1911 count++; |
|
1912 if (split_low(value)) { |
|
1913 //ori(d, d, split_low(value)); |
|
1914 count++; |
|
1915 } |
|
1916 } |
|
1917 } else if (hi == 0) { // hardware zero-extends to upper 32 |
|
1918 //ori(d, R0, julong(value) >> 16); |
|
1919 //dsll(d, d, 16); |
|
1920 count += 2; |
|
1921 if (split_low(value)) { |
|
1922 //ori(d, d, split_low(value)); |
|
1923 count++; |
|
1924 } |
|
1925 } else if ((value> 0) && is_simm16(value >> 32)) { // li48 |
|
1926 // 4 insts |
|
1927 //li48(d, value); |
|
1928 count += 4; |
|
1929 } else { // li64 |
|
1930 // 6 insts |
|
1931 //li64(d, value); |
|
1932 count += 6; |
|
1933 } |
|
1934 |
|
1935 return count; |
|
1936 } |
|
1937 |
|
1938 void MacroAssembler::patchable_set48(Register d, jlong value) { |
|
1939 assert_not_delayed(); |
|
1940 |
|
1941 int hi = (int)(value >> 32); |
|
1942 int lo = (int)(value & ~0); |
|
1943 |
|
1944 int count = 0; |
|
1945 |
|
1946 if (value == lo) { // 32-bit integer |
|
1947 if (is_simm16(value)) { |
|
1948 daddiu(d, R0, value); |
|
1949 count += 1; |
|
1950 } else { |
|
1951 lui(d, split_low(value >> 16)); |
|
1952 count += 1; |
|
1953 if (split_low(value)) { |
|
1954 ori(d, d, split_low(value)); |
|
1955 count += 1; |
|
1956 } |
|
1957 } |
|
1958 } else if (hi == 0) { // hardware zero-extends to upper 32 |
|
1959 ori(d, R0, julong(value) >> 16); |
|
1960 dsll(d, d, 16); |
|
1961 count += 2; |
|
1962 if (split_low(value)) { |
|
1963 ori(d, d, split_low(value)); |
|
1964 count += 1; |
|
1965 } |
|
1966 } else if ((value> 0) && is_simm16(value >> 32)) { // li48 |
|
1967 // 4 insts |
|
1968 li48(d, value); |
|
1969 count += 4; |
|
1970 } else { // li64 |
|
1971 tty->print_cr("value = 0x%x", value); |
|
1972 guarantee(false, "Not supported yet !"); |
|
1973 } |
|
1974 |
|
1975 for (count; count < 4; count++) { |
|
1976 nop(); |
|
1977 } |
|
1978 } |
|
1979 |
|
1980 void MacroAssembler::patchable_set32(Register d, jlong value) { |
|
1981 assert_not_delayed(); |
|
1982 |
|
1983 int hi = (int)(value >> 32); |
|
1984 int lo = (int)(value & ~0); |
|
1985 |
|
1986 int count = 0; |
|
1987 |
|
1988 if (value == lo) { // 32-bit integer |
|
1989 if (is_simm16(value)) { |
|
1990 daddiu(d, R0, value); |
|
1991 count += 1; |
|
1992 } else { |
|
1993 lui(d, split_low(value >> 16)); |
|
1994 count += 1; |
|
1995 if (split_low(value)) { |
|
1996 ori(d, d, split_low(value)); |
|
1997 count += 1; |
|
1998 } |
|
1999 } |
|
2000 } else if (hi == 0) { // hardware zero-extends to upper 32 |
|
2001 ori(d, R0, julong(value) >> 16); |
|
2002 dsll(d, d, 16); |
|
2003 count += 2; |
|
2004 if (split_low(value)) { |
|
2005 ori(d, d, split_low(value)); |
|
2006 count += 1; |
|
2007 } |
|
2008 } else { |
|
2009 tty->print_cr("value = 0x%x", value); |
|
2010 guarantee(false, "Not supported yet !"); |
|
2011 } |
|
2012 |
|
2013 for (count; count < 3; count++) { |
|
2014 nop(); |
|
2015 } |
|
2016 } |
|
2017 |
|
2018 void MacroAssembler::patchable_call32(Register d, jlong value) { |
|
2019 assert_not_delayed(); |
|
2020 |
|
2021 int hi = (int)(value >> 32); |
|
2022 int lo = (int)(value & ~0); |
|
2023 |
|
2024 int count = 0; |
|
2025 |
|
2026 if (value == lo) { // 32-bit integer |
|
2027 if (is_simm16(value)) { |
|
2028 daddiu(d, R0, value); |
|
2029 count += 1; |
|
2030 } else { |
|
2031 lui(d, split_low(value >> 16)); |
|
2032 count += 1; |
|
2033 if (split_low(value)) { |
|
2034 ori(d, d, split_low(value)); |
|
2035 count += 1; |
|
2036 } |
|
2037 } |
|
2038 } else { |
|
2039 tty->print_cr("value = 0x%x", value); |
|
2040 guarantee(false, "Not supported yet !"); |
|
2041 } |
|
2042 |
|
2043 for (count; count < 2; count++) { |
|
2044 nop(); |
|
2045 } |
|
2046 } |
|
2047 |
|
2048 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { |
|
2049 assert(UseCompressedClassPointers, "should only be used for compressed header"); |
|
2050 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
|
2051 |
|
2052 int klass_index = oop_recorder()->find_index(k); |
|
2053 RelocationHolder rspec = metadata_Relocation::spec(klass_index); |
|
2054 long narrowKlass = (long)Klass::encode_klass(k); |
|
2055 |
|
2056 relocate(rspec, Assembler::narrow_oop_operand); |
|
2057 patchable_set48(dst, narrowKlass); |
|
2058 } |
|
2059 |
|
2060 |
|
2061 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { |
|
2062 assert(UseCompressedOops, "should only be used for compressed header"); |
|
2063 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
|
2064 |
|
2065 int oop_index = oop_recorder()->find_index(obj); |
|
2066 RelocationHolder rspec = oop_Relocation::spec(oop_index); |
|
2067 |
|
2068 relocate(rspec, Assembler::narrow_oop_operand); |
|
2069 patchable_set48(dst, oop_index); |
|
2070 } |
|
2071 |
|
2072 void MacroAssembler::li64(Register rd, long imm) { |
|
2073 assert_not_delayed(); |
|
2074 lui(rd, imm >> 48); |
|
2075 ori(rd, rd, split_low(imm >> 32)); |
|
2076 dsll(rd, rd, 16); |
|
2077 ori(rd, rd, split_low(imm >> 16)); |
|
2078 dsll(rd, rd, 16); |
|
2079 ori(rd, rd, split_low(imm)); |
|
2080 } |
|
2081 |
|
2082 void MacroAssembler::li48(Register rd, long imm) { |
|
2083 assert_not_delayed(); |
|
2084 assert(is_simm16(imm >> 32), "Not a 48-bit address"); |
|
2085 lui(rd, imm >> 32); |
|
2086 ori(rd, rd, split_low(imm >> 16)); |
|
2087 dsll(rd, rd, 16); |
|
2088 ori(rd, rd, split_low(imm)); |
|
2089 } |
|
2090 #endif |
|
2091 // NOTE: i dont push eax as i486. |
|
2092 // the x86 save eax for it use eax as the jump register |
|
2093 void MacroAssembler::verify_oop(Register reg, const char* s) { |
|
2094 /* |
|
2095 if (!VerifyOops) return; |
|
2096 |
|
2097 // Pass register number to verify_oop_subroutine |
|
2098 char* b = new char[strlen(s) + 50]; |
|
2099 sprintf(b, "verify_oop: %s: %s", reg->name(), s); |
|
2100 push(rax); // save rax, |
|
2101 push(reg); // pass register argument |
|
2102 ExternalAddress buffer((address) b); |
|
2103 // avoid using pushptr, as it modifies scratch registers |
|
2104 // and our contract is not to modify anything |
|
2105 movptr(rax, buffer.addr()); |
|
2106 push(rax); |
|
2107 // call indirectly to solve generation ordering problem |
|
2108 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); |
|
2109 call(rax); |
|
2110 */ |
|
2111 if (!VerifyOops) return; |
|
2112 const char * b = NULL; |
|
2113 stringStream ss; |
|
2114 ss.print("verify_oop: %s: %s", reg->name(), s); |
|
2115 b = code_string(ss.as_string()); |
|
2116 #ifdef _LP64 |
|
2117 pushad(); |
|
2118 move(A1, reg); |
|
2119 li(A0, (long)b); |
|
2120 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); |
|
2121 ld(T9, AT, 0); |
|
2122 jalr(T9); |
|
2123 delayed()->nop(); |
|
2124 popad(); |
|
2125 #else |
|
2126 // Pass register number to verify_oop_subroutine |
|
2127 sw(T0, SP, - wordSize); |
|
2128 sw(T1, SP, - 2*wordSize); |
|
2129 sw(RA, SP, - 3*wordSize); |
|
2130 sw(A0, SP ,- 4*wordSize); |
|
2131 sw(A1, SP ,- 5*wordSize); |
|
2132 sw(AT, SP ,- 6*wordSize); |
|
2133 sw(T9, SP ,- 7*wordSize); |
|
2134 addiu(SP, SP, - 7 * wordSize); |
|
2135 move(A1, reg); |
|
2136 li(A0, (long)b); |
|
2137 // call indirectly to solve generation ordering problem |
|
2138 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); |
|
2139 lw(T9, AT, 0); |
|
2140 jalr(T9); |
|
2141 delayed()->nop(); |
|
2142 lw(T0, SP, 6* wordSize); |
|
2143 lw(T1, SP, 5* wordSize); |
|
2144 lw(RA, SP, 4* wordSize); |
|
2145 lw(A0, SP, 3* wordSize); |
|
2146 lw(A1, SP, 2* wordSize); |
|
2147 lw(AT, SP, 1* wordSize); |
|
2148 lw(T9, SP, 0* wordSize); |
|
2149 addiu(SP, SP, 7 * wordSize); |
|
2150 #endif |
|
2151 } |
|
2152 |
|
2153 |
|
2154 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { |
|
2155 if (!VerifyOops) { |
|
2156 nop(); |
|
2157 return; |
|
2158 } |
|
2159 // Pass register number to verify_oop_subroutine |
|
2160 const char * b = NULL; |
|
2161 stringStream ss; |
|
2162 ss.print("verify_oop_addr: %s", s); |
|
2163 b = code_string(ss.as_string()); |
|
2164 |
|
2165 st_ptr(T0, SP, - wordSize); |
|
2166 st_ptr(T1, SP, - 2*wordSize); |
|
2167 st_ptr(RA, SP, - 3*wordSize); |
|
2168 st_ptr(A0, SP, - 4*wordSize); |
|
2169 st_ptr(A1, SP, - 5*wordSize); |
|
2170 st_ptr(AT, SP, - 6*wordSize); |
|
2171 st_ptr(T9, SP, - 7*wordSize); |
|
2172 ld_ptr(A1, addr); // addr may use SP, so load from it before change SP |
|
2173 addiu(SP, SP, - 7 * wordSize); |
|
2174 |
|
2175 li(A0, (long)b); |
|
2176 // call indirectly to solve generation ordering problem |
|
2177 li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); |
|
2178 ld_ptr(T9, AT, 0); |
|
2179 jalr(T9); |
|
2180 delayed()->nop(); |
|
2181 ld_ptr(T0, SP, 6* wordSize); |
|
2182 ld_ptr(T1, SP, 5* wordSize); |
|
2183 ld_ptr(RA, SP, 4* wordSize); |
|
2184 ld_ptr(A0, SP, 3* wordSize); |
|
2185 ld_ptr(A1, SP, 2* wordSize); |
|
2186 ld_ptr(AT, SP, 1* wordSize); |
|
2187 ld_ptr(T9, SP, 0* wordSize); |
|
2188 addiu(SP, SP, 7 * wordSize); |
|
2189 } |
|
2190 |
|
2191 // used registers : T0, T1 |
|
2192 void MacroAssembler::verify_oop_subroutine() { |
|
2193 // RA: ra |
|
2194 // A0: char* error message |
|
2195 // A1: oop object to verify |
|
2196 |
|
2197 Label exit, error; |
|
2198 // increment counter |
|
2199 li(T0, (long)StubRoutines::verify_oop_count_addr()); |
|
2200 lw(AT, T0, 0); |
|
2201 #ifdef _LP64 |
|
2202 daddi(AT, AT, 1); |
|
2203 #else |
|
2204 addi(AT, AT, 1); |
|
2205 #endif |
|
2206 sw(AT, T0, 0); |
|
2207 |
|
2208 // make sure object is 'reasonable' |
|
2209 beq(A1, R0, exit); // if obj is NULL it is ok |
|
2210 delayed()->nop(); |
|
2211 |
|
2212 // Check if the oop is in the right area of memory |
|
2213 //const int oop_mask = Universe::verify_oop_mask(); |
|
2214 //const int oop_bits = Universe::verify_oop_bits(); |
|
2215 const uintptr_t oop_mask = Universe::verify_oop_mask(); |
|
2216 const uintptr_t oop_bits = Universe::verify_oop_bits(); |
|
2217 li(AT, oop_mask); |
|
2218 andr(T0, A1, AT); |
|
2219 li(AT, oop_bits); |
|
2220 bne(T0, AT, error); |
|
2221 delayed()->nop(); |
|
2222 |
|
2223 // make sure klass is 'reasonable' |
|
2224 //add for compressedoops |
|
2225 reinit_heapbase(); |
|
2226 //add for compressedoops |
|
2227 load_klass(T0, A1); |
|
2228 beq(T0, R0, error); // if klass is NULL it is broken |
|
2229 delayed()->nop(); |
|
2230 #if 0 |
|
2231 //FIXME:wuhui. |
|
2232 // Check if the klass is in the right area of memory |
|
2233 //const int klass_mask = Universe::verify_klass_mask(); |
|
2234 //const int klass_bits = Universe::verify_klass_bits(); |
|
2235 const uintptr_t klass_mask = Universe::verify_klass_mask(); |
|
2236 const uintptr_t klass_bits = Universe::verify_klass_bits(); |
|
2237 |
|
2238 li(AT, klass_mask); |
|
2239 andr(T1, T0, AT); |
|
2240 li(AT, klass_bits); |
|
2241 bne(T1, AT, error); |
|
2242 delayed()->nop(); |
|
2243 // make sure klass' klass is 'reasonable' |
|
2244 //add for compressedoops |
|
2245 load_klass(T0, T0); |
|
2246 beq(T0, R0, error); // if klass' klass is NULL it is broken |
|
2247 delayed()->nop(); |
|
2248 |
|
2249 li(AT, klass_mask); |
|
2250 andr(T1, T0, AT); |
|
2251 li(AT, klass_bits); |
|
2252 bne(T1, AT, error); |
|
2253 delayed()->nop(); // if klass not in right area of memory it is broken too. |
|
2254 #endif |
|
2255 // return if everything seems ok |
|
2256 bind(exit); |
|
2257 |
|
2258 jr(RA); |
|
2259 delayed()->nop(); |
|
2260 |
|
2261 // handle errors |
|
2262 bind(error); |
|
2263 pushad(); |
|
2264 #ifndef _LP64 |
|
2265 addi(SP, SP, (-1) * wordSize); |
|
2266 #endif |
|
2267 call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); |
|
2268 delayed()->nop(); |
|
2269 #ifndef _LP64 |
|
2270 addiu(SP, SP, 1 * wordSize); |
|
2271 #endif |
|
2272 popad(); |
|
2273 jr(RA); |
|
2274 delayed()->nop(); |
|
2275 } |
|
2276 |
|
2277 void MacroAssembler::verify_tlab(Register t1, Register t2) { |
|
2278 #ifdef ASSERT |
|
2279 assert_different_registers(t1, t2, AT); |
|
2280 if (UseTLAB && VerifyOops) { |
|
2281 Label next, ok; |
|
2282 |
|
2283 get_thread(t1); |
|
2284 |
|
2285 ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); |
|
2286 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); |
|
2287 sltu(AT, t2, AT); |
|
2288 beq(AT, R0, next); |
|
2289 delayed()->nop(); |
|
2290 |
|
2291 stop("assert(top >= start)"); |
|
2292 |
|
2293 bind(next); |
|
2294 ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); |
|
2295 sltu(AT, AT, t2); |
|
2296 beq(AT, R0, ok); |
|
2297 delayed()->nop(); |
|
2298 |
|
2299 stop("assert(top <= end)"); |
|
2300 |
|
2301 bind(ok); |
|
2302 |
|
2303 } |
|
2304 #endif |
|
2305 } |
|
2306 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, |
|
2307 Register tmp, |
|
2308 int offset) { |
|
2309 intptr_t value = *delayed_value_addr; |
|
2310 if (value != 0) |
|
2311 return RegisterOrConstant(value + offset); |
|
2312 AddressLiteral a(delayed_value_addr); |
|
2313 // load indirectly to solve generation ordering problem |
|
2314 //movptr(tmp, ExternalAddress((address) delayed_value_addr)); |
|
2315 //ld(tmp, a); |
|
2316 if (offset != 0) |
|
2317 daddi(tmp,tmp, offset); |
|
2318 |
|
2319 return RegisterOrConstant(tmp); |
|
2320 } |
|
2321 |
|
2322 void MacroAssembler::hswap(Register reg) { |
|
2323 //short |
|
2324 //andi(reg, reg, 0xffff); |
|
2325 srl(AT, reg, 8); |
|
2326 sll(reg, reg, 24); |
|
2327 sra(reg, reg, 16); |
|
2328 orr(reg, reg, AT); |
|
2329 } |
|
2330 |
|
2331 void MacroAssembler::huswap(Register reg) { |
|
2332 #ifdef _LP64 |
|
2333 dsrl(AT, reg, 8); |
|
2334 dsll(reg, reg, 24); |
|
2335 dsrl(reg, reg, 16); |
|
2336 orr(reg, reg, AT); |
|
2337 andi(reg, reg, 0xffff); |
|
2338 #else |
|
2339 //andi(reg, reg, 0xffff); |
|
2340 srl(AT, reg, 8); |
|
2341 sll(reg, reg, 24); |
|
2342 srl(reg, reg, 16); |
|
2343 orr(reg, reg, AT); |
|
2344 #endif |
|
2345 } |
|
2346 |
|
2347 // something funny to do this will only one more register AT |
|
2348 // 32 bits |
|
2349 void MacroAssembler::swap(Register reg) { |
|
2350 srl(AT, reg, 8); |
|
2351 sll(reg, reg, 24); |
|
2352 orr(reg, reg, AT); |
|
2353 //reg : 4 1 2 3 |
|
2354 srl(AT, AT, 16); |
|
2355 xorr(AT, AT, reg); |
|
2356 andi(AT, AT, 0xff); |
|
2357 //AT : 0 0 0 1^3); |
|
2358 xorr(reg, reg, AT); |
|
2359 //reg : 4 1 2 1 |
|
2360 sll(AT, AT, 16); |
|
2361 xorr(reg, reg, AT); |
|
2362 //reg : 4 3 2 1 |
|
2363 } |
|
2364 |
|
2365 #ifdef _LP64 |
|
2366 |
|
2367 /* do 32-bit CAS using MIPS64 lld/scd |
|
2368 |
|
2369 Jin: cas_int should only compare 32-bits of the memory value. |
|
2370 However, lld/scd will do 64-bit operation, which violates the intention of cas_int. |
|
2371 To simulate a 32-bit atomic operation, the value loaded with LLD should be split into |
|
2372 tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval, |
|
2373 plus the high-32 bits or memory value, are stored togethor with SCD. |
|
2374 |
|
2375 Example: |
|
2376 |
|
2377 double d = 3.1415926; |
|
2378 System.err.println("hello" + d); |
|
2379 |
|
2380 sun.misc.FloatingDecimal$1.<init>() |
|
2381 | |
|
2382 `- java.util.concurrent.atomic.AtomicInteger::compareAndSet() |
|
2383 |
|
2384 38 cas_int [a7a7|J] [a0|I] [a6|I] |
|
2385 // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354 |
|
2386 // a6: 0x4ab325aa |
|
2387 |
|
2388 again: |
|
2389 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63" |
|
2390 |
|
2391 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended) |
|
2392 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits |
|
2393 0x00000055647f3c68: dsll32 t8, t8, 0 |
|
2394 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal |
|
2395 0x00000055647f3c70: sll zero, zero, 0 |
|
2396 |
|
2397 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended) |
|
2398 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF; |
|
2399 0x00000055647f3c7c: ori v1, v1, 0xffffffff |
|
2400 0x00000055647f3c80: and v1, a6, v1 |
|
2401 0x00000055647f3c84: or at, t8, v1 |
|
2402 0x00000055647f3c88: scd at, 0x0(a7) |
|
2403 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again |
|
2404 0x00000055647f3c90: sll zero, zero, 0 |
|
2405 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done |
|
2406 0x00000055647f3c98: sll zero, zero, 0 |
|
2407 nequal: |
|
2408 0x00000055647f45a4: dadd a0, t9, zero |
|
2409 0x00000055647f45a8: dadd at, zero, zero |
|
2410 done: |
|
2411 */ |
|
2412 |
|
2413 void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) { |
|
2414 /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */ |
|
2415 Label done, again, nequal; |
|
2416 |
|
2417 bind(again); |
|
2418 |
|
2419 if(!Use3A2000) sync(); |
|
2420 ll(AT, dest); |
|
2421 bne(AT, c_reg, nequal); |
|
2422 delayed()->nop(); |
|
2423 |
|
2424 move(AT, x_reg); |
|
2425 sc(AT, dest); |
|
2426 beq(AT, R0, again); |
|
2427 delayed()->nop(); |
|
2428 b(done); |
|
2429 delayed()->nop(); |
|
2430 |
|
2431 // not xchged |
|
2432 bind(nequal); |
|
2433 sync(); |
|
2434 move(c_reg, AT); |
|
2435 move(AT, R0); |
|
2436 |
|
2437 bind(done); |
|
2438 } |
|
2439 #endif // cmpxchg32 |
|
2440 |
|
2441 void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) { |
|
2442 Label done, again, nequal; |
|
2443 |
|
2444 bind(again); |
|
2445 #ifdef _LP64 |
|
2446 if(!Use3A2000) sync(); |
|
2447 lld(AT, dest); |
|
2448 #else |
|
2449 if(!Use3A2000) sync(); |
|
2450 ll(AT, dest); |
|
2451 #endif |
|
2452 bne(AT, c_reg, nequal); |
|
2453 delayed()->nop(); |
|
2454 |
|
2455 move(AT, x_reg); |
|
2456 #ifdef _LP64 |
|
2457 scd(AT, dest); |
|
2458 #else |
|
2459 sc(AT, dest); |
|
2460 #endif |
|
2461 beq(AT, R0, again); |
|
2462 delayed()->nop(); |
|
2463 b(done); |
|
2464 delayed()->nop(); |
|
2465 |
|
2466 // not xchged |
|
2467 bind(nequal); |
|
2468 sync(); |
|
2469 move(c_reg, AT); |
|
2470 move(AT, R0); |
|
2471 |
|
2472 bind(done); |
|
2473 } |
|
2474 |
|
2475 void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { |
|
2476 Label done, again, nequal; |
|
2477 |
|
2478 Register x_reg = x_regLo; |
|
2479 dsll32(x_regHi, x_regHi, 0); |
|
2480 dsll32(x_regLo, x_regLo, 0); |
|
2481 dsrl32(x_regLo, x_regLo, 0); |
|
2482 orr(x_reg, x_regLo, x_regHi); |
|
2483 |
|
2484 Register c_reg = c_regLo; |
|
2485 dsll32(c_regHi, c_regHi, 0); |
|
2486 dsll32(c_regLo, c_regLo, 0); |
|
2487 dsrl32(c_regLo, c_regLo, 0); |
|
2488 orr(c_reg, c_regLo, c_regHi); |
|
2489 |
|
2490 bind(again); |
|
2491 |
|
2492 if(!Use3A2000) sync(); |
|
2493 lld(AT, dest); |
|
2494 bne(AT, c_reg, nequal); |
|
2495 delayed()->nop(); |
|
2496 |
|
2497 //move(AT, x_reg); |
|
2498 dadd(AT, x_reg, R0); |
|
2499 scd(AT, dest); |
|
2500 beq(AT, R0, again); |
|
2501 delayed()->nop(); |
|
2502 b(done); |
|
2503 delayed()->nop(); |
|
2504 |
|
2505 // not xchged |
|
2506 bind(nequal); |
|
2507 sync(); |
|
2508 //move(c_reg, AT); |
|
2509 //move(AT, R0); |
|
2510 dadd(c_reg, AT, R0); |
|
2511 dadd(AT, R0, R0); |
|
2512 bind(done); |
|
2513 } |
|
2514 |
|
2515 // be sure the three register is different |
|
2516 void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { |
|
2517 assert_different_registers(tmp, fs, ft); |
|
2518 div_s(tmp, fs, ft); |
|
2519 trunc_l_s(tmp, tmp); |
|
2520 cvt_s_l(tmp, tmp); |
|
2521 mul_s(tmp, tmp, ft); |
|
2522 sub_s(fd, fs, tmp); |
|
2523 } |
|
2524 |
|
2525 // be sure the three register is different |
|
2526 void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { |
|
2527 assert_different_registers(tmp, fs, ft); |
|
2528 div_d(tmp, fs, ft); |
|
2529 trunc_l_d(tmp, tmp); |
|
2530 cvt_d_l(tmp, tmp); |
|
2531 mul_d(tmp, tmp, ft); |
|
2532 sub_d(fd, fs, tmp); |
|
2533 } |
|
2534 |
|
2535 // Fast_Lock and Fast_Unlock used by C2 |
|
2536 |
|
2537 // Because the transitions from emitted code to the runtime |
|
2538 // monitorenter/exit helper stubs are so slow it's critical that |
|
2539 // we inline both the stack-locking fast-path and the inflated fast path. |
|
2540 // |
|
2541 // See also: cmpFastLock and cmpFastUnlock. |
|
2542 // |
|
2543 // What follows is a specialized inline transliteration of the code |
|
2544 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat |
|
2545 // another option would be to emit TrySlowEnter and TrySlowExit methods |
|
2546 // at startup-time. These methods would accept arguments as |
|
2547 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure |
|
2548 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply |
|
2549 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. |
|
2550 // In practice, however, the # of lock sites is bounded and is usually small. |
|
2551 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer |
|
2552 // if the processor uses simple bimodal branch predictors keyed by EIP |
|
2553 // Since the helper routines would be called from multiple synchronization |
|
2554 // sites. |
|
2555 // |
|
2556 // An even better approach would be write "MonitorEnter()" and "MonitorExit()" |
|
2557 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites |
|
2558 // to those specialized methods. That'd give us a mostly platform-independent |
|
2559 // implementation that the JITs could optimize and inline at their pleasure. |
|
2560 // Done correctly, the only time we'd need to cross to native could would be |
|
2561 // to park() or unpark() threads. We'd also need a few more unsafe operators |
|
2562 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and |
|
2563 // (b) explicit barriers or fence operations. |
|
2564 // |
|
2565 // TODO: |
|
2566 // |
|
2567 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). |
|
2568 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. |
|
2569 // Given TLAB allocation, Self is usually manifested in a register, so passing it into |
|
2570 // the lock operators would typically be faster than reifying Self. |
|
2571 // |
|
2572 // * Ideally I'd define the primitives as: |
|
2573 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. |
|
2574 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED |
|
2575 // Unfortunately ADLC bugs prevent us from expressing the ideal form. |
|
2576 // Instead, we're stuck with a rather awkward and brittle register assignments below. |
|
2577 // Furthermore the register assignments are overconstrained, possibly resulting in |
|
2578 // sub-optimal code near the synchronization site. |
|
2579 // |
|
2580 // * Eliminate the sp-proximity tests and just use "== Self" tests instead. |
|
2581 // Alternately, use a better sp-proximity test. |
|
2582 // |
|
2583 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. |
|
2584 // Either one is sufficient to uniquely identify a thread. |
|
2585 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. |
|
2586 // |
|
2587 // * Intrinsify notify() and notifyAll() for the common cases where the |
|
2588 // object is locked by the calling thread but the waitlist is empty. |
|
2589 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). |
|
2590 // |
|
2591 // * use jccb and jmpb instead of jcc and jmp to improve code density. |
|
2592 // But beware of excessive branch density on AMD Opterons. |
|
2593 // |
|
2594 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success |
|
2595 // or failure of the fast-path. If the fast-path fails then we pass |
|
2596 // control to the slow-path, typically in C. In Fast_Lock and |
|
2597 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 |
|
2598 // will emit a conditional branch immediately after the node. |
|
2599 // So we have branches to branches and lots of ICC.ZF games. |
|
2600 // Instead, it might be better to have C2 pass a "FailureLabel" |
|
2601 // into Fast_Lock and Fast_Unlock. In the case of success, control |
|
2602 // will drop through the node. ICC.ZF is undefined at exit. |
|
2603 // In the case of failure, the node will branch directly to the |
|
2604 // FailureLabel |
|
2605 |
|
2606 |
|
2607 // obj: object to lock |
|
2608 // box: on-stack box address (displaced header location) - KILLED |
|
2609 // rax,: tmp -- KILLED |
|
2610 // scr: tmp -- KILLED |
|
2611 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) { |
|
2612 |
|
2613 // Ensure the register assignents are disjoint |
|
2614 guarantee (objReg != boxReg, "") ; |
|
2615 guarantee (objReg != tmpReg, "") ; |
|
2616 guarantee (objReg != scrReg, "") ; |
|
2617 guarantee (boxReg != tmpReg, "") ; |
|
2618 guarantee (boxReg != scrReg, "") ; |
|
2619 |
|
2620 |
|
2621 block_comment("FastLock"); |
|
2622 /* |
|
2623 move(AT, 0x0); |
|
2624 return; |
|
2625 */ |
|
2626 if (PrintBiasedLockingStatistics) { |
|
2627 push(tmpReg); |
|
2628 atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg); |
|
2629 pop(tmpReg); |
|
2630 } |
|
2631 |
|
2632 if (EmitSync & 1) { |
|
2633 move(AT, 0x0); |
|
2634 return; |
|
2635 } else |
|
2636 if (EmitSync & 2) { |
|
2637 Label DONE_LABEL ; |
|
2638 if (UseBiasedLocking) { |
|
2639 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. |
|
2640 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); |
|
2641 } |
|
2642 |
|
2643 ld(tmpReg, Address(objReg, 0)) ; // fetch markword |
|
2644 ori(tmpReg, tmpReg, 0x1); |
|
2645 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS |
|
2646 |
|
2647 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg |
|
2648 bne(AT, R0, DONE_LABEL); |
|
2649 delayed()->nop(); |
|
2650 |
|
2651 // Recursive locking |
|
2652 dsubu(tmpReg, tmpReg, SP); |
|
2653 li(AT, (7 - os::vm_page_size() )); |
|
2654 andr(tmpReg, tmpReg, AT); |
|
2655 sd(tmpReg, Address(boxReg, 0)); |
|
2656 bind(DONE_LABEL) ; |
|
2657 } else { |
|
2658 // Possible cases that we'll encounter in fast_lock |
|
2659 // ------------------------------------------------ |
|
2660 // * Inflated |
|
2661 // -- unlocked |
|
2662 // -- Locked |
|
2663 // = by self |
|
2664 // = by other |
|
2665 // * biased |
|
2666 // -- by Self |
|
2667 // -- by other |
|
2668 // * neutral |
|
2669 // * stack-locked |
|
2670 // -- by self |
|
2671 // = sp-proximity test hits |
|
2672 // = sp-proximity test generates false-negative |
|
2673 // -- by other |
|
2674 // |
|
2675 |
|
2676 Label IsInflated, DONE_LABEL, PopDone ; |
|
2677 |
|
2678 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage |
|
2679 // order to reduce the number of conditional branches in the most common cases. |
|
2680 // Beware -- there's a subtle invariant that fetch of the markword |
|
2681 // at [FETCH], below, will never observe a biased encoding (*101b). |
|
2682 // If this invariant is not held we risk exclusion (safety) failure. |
|
2683 if (UseBiasedLocking && !UseOptoBiasInlining) { |
|
2684 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); |
|
2685 } |
|
2686 |
|
2687 ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object. |
|
2688 andi(AT, tmpReg, markOopDesc::monitor_value); |
|
2689 bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias |
|
2690 delayed()->nop(); |
|
2691 |
|
2692 // Attempt stack-locking ... |
|
2693 ori (tmpReg, tmpReg, markOopDesc::unlocked_value); |
|
2694 sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS |
|
2695 //if (os::is_MP()) { |
|
2696 // sync(); |
|
2697 //} |
|
2698 |
|
2699 cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg |
|
2700 //AT == 1: unlocked |
|
2701 |
|
2702 if (PrintBiasedLockingStatistics) { |
|
2703 Label L; |
|
2704 beq(AT, R0, L); |
|
2705 delayed()->nop(); |
|
2706 push(T0); |
|
2707 push(T1); |
|
2708 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); |
|
2709 pop(T1); |
|
2710 pop(T0); |
|
2711 bind(L); |
|
2712 } |
|
2713 bne(AT, R0, DONE_LABEL); |
|
2714 delayed()->nop(); |
|
2715 |
|
2716 // Recursive locking |
|
2717 // The object is stack-locked: markword contains stack pointer to BasicLock. |
|
2718 // Locked by current thread if difference with current SP is less than one page. |
|
2719 dsubu(tmpReg, tmpReg, SP); |
|
2720 li(AT, 7 - os::vm_page_size() ); |
|
2721 andr(tmpReg, tmpReg, AT); |
|
2722 sd(tmpReg, Address(boxReg, 0)); |
|
2723 if (PrintBiasedLockingStatistics) { |
|
2724 Label L; |
|
2725 // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ |
|
2726 bne(tmpReg, R0, L); |
|
2727 delayed()->nop(); |
|
2728 push(T0); |
|
2729 push(T1); |
|
2730 atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); |
|
2731 pop(T1); |
|
2732 pop(T0); |
|
2733 bind(L); |
|
2734 } |
|
2735 sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */ |
|
2736 |
|
2737 b(DONE_LABEL) ; |
|
2738 delayed()->nop(); |
|
2739 |
|
2740 bind(IsInflated) ; |
|
2741 // The object's monitor m is unlocked iff m->owner == NULL, |
|
2742 // otherwise m->owner may contain a thread or a stack address. |
|
2743 |
|
2744 // TODO: someday avoid the ST-before-CAS penalty by |
|
2745 // relocating (deferring) the following ST. |
|
2746 // We should also think about trying a CAS without having |
|
2747 // fetched _owner. If the CAS is successful we may |
|
2748 // avoid an RTO->RTS upgrade on the $line. |
|
2749 // Without cast to int32_t a movptr will destroy r10 which is typically obj |
|
2750 li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); |
|
2751 sd(AT, Address(boxReg, 0)); |
|
2752 |
|
2753 move(boxReg, tmpReg) ; |
|
2754 ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; |
|
2755 // if (m->owner != 0) => AT = 0, goto slow path. |
|
2756 move(AT, R0); |
|
2757 bne(tmpReg, R0, DONE_LABEL); |
|
2758 delayed()->nop(); |
|
2759 |
|
2760 #ifndef OPT_THREAD |
|
2761 get_thread (TREG) ; |
|
2762 #endif |
|
2763 // It's inflated and appears unlocked |
|
2764 //if (os::is_MP()) { |
|
2765 // sync(); |
|
2766 //} |
|
2767 cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ; |
|
2768 // Intentional fall-through into DONE_LABEL ... |
|
2769 |
|
2770 |
|
2771 // DONE_LABEL is a hot target - we'd really like to place it at the |
|
2772 // start of cache line by padding with NOPs. |
|
2773 // See the AMD and Intel software optimization manuals for the |
|
2774 // most efficient "long" NOP encodings. |
|
2775 // Unfortunately none of our alignment mechanisms suffice. |
|
2776 bind(DONE_LABEL); |
|
2777 |
|
2778 // At DONE_LABEL the AT is set as follows ... |
|
2779 // Fast_Unlock uses the same protocol. |
|
2780 // AT == 1 -> Success |
|
2781 // AT == 0 -> Failure - force control through the slow-path |
|
2782 |
|
2783 // Avoid branch-to-branch on AMD processors |
|
2784 // This appears to be superstition. |
|
2785 if (EmitSync & 32) nop() ; |
|
2786 |
|
2787 } |
|
2788 } |
|
2789 |
|
2790 // obj: object to unlock |
|
2791 // box: box address (displaced header location), killed. Must be EAX. |
|
2792 // rbx,: killed tmp; cannot be obj nor box. |
|
2793 // |
|
2794 // Some commentary on balanced locking: |
|
2795 // |
|
2796 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. |
|
2797 // Methods that don't have provably balanced locking are forced to run in the |
|
2798 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. |
|
2799 // The interpreter provides two properties: |
|
2800 // I1: At return-time the interpreter automatically and quietly unlocks any |
|
2801 // objects acquired the current activation (frame). Recall that the |
|
2802 // interpreter maintains an on-stack list of locks currently held by |
|
2803 // a frame. |
|
2804 // I2: If a method attempts to unlock an object that is not held by the |
|
2805 // the frame the interpreter throws IMSX. |
|
2806 // |
|
2807 // Lets say A(), which has provably balanced locking, acquires O and then calls B(). |
|
2808 // B() doesn't have provably balanced locking so it runs in the interpreter. |
|
2809 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O |
|
2810 // is still locked by A(). |
|
2811 // |
|
2812 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
|
2813 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
|
2814 // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
|
2815 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
|
2816 |
|
2817 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { |
|
2818 |
|
2819 guarantee (objReg != boxReg, "") ; |
|
2820 guarantee (objReg != tmpReg, "") ; |
|
2821 guarantee (boxReg != tmpReg, "") ; |
|
2822 |
|
2823 |
|
2824 |
|
2825 block_comment("FastUnlock"); |
|
2826 |
|
2827 |
|
2828 if (EmitSync & 4) { |
|
2829 // Disable - inhibit all inlining. Force control through the slow-path |
|
2830 move(AT, 0x0); |
|
2831 return; |
|
2832 } else |
|
2833 if (EmitSync & 8) { |
|
2834 Label DONE_LABEL ; |
|
2835 if (UseBiasedLocking) { |
|
2836 biased_locking_exit(objReg, tmpReg, DONE_LABEL); |
|
2837 } |
|
2838 // classic stack-locking code ... |
|
2839 ld(tmpReg, Address(boxReg, 0)) ; |
|
2840 beq(tmpReg, R0, DONE_LABEL) ; |
|
2841 move(AT, 0x1); // delay slot |
|
2842 |
|
2843 cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box |
|
2844 bind(DONE_LABEL); |
|
2845 } else { |
|
2846 Label DONE_LABEL, Stacked, CheckSucc, Inflated ; |
|
2847 |
|
2848 // Critically, the biased locking test must have precedence over |
|
2849 // and appear before the (box->dhw == 0) recursive stack-lock test. |
|
2850 if (UseBiasedLocking && !UseOptoBiasInlining) { |
|
2851 biased_locking_exit(objReg, tmpReg, DONE_LABEL); |
|
2852 } |
|
2853 |
|
2854 ld(AT, Address(boxReg, 0)) ; // Examine the displaced header |
|
2855 beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock |
|
2856 delayed()->daddiu(AT, R0, 0x1); |
|
2857 |
|
2858 ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword |
|
2859 andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated? |
|
2860 beq(AT, R0, Stacked) ; // Inflated? |
|
2861 delayed()->nop(); |
|
2862 |
|
2863 bind(Inflated) ; |
|
2864 // It's inflated. |
|
2865 // Despite our balanced locking property we still check that m->_owner == Self |
|
2866 // as java routines or native JNI code called by this thread might |
|
2867 // have released the lock. |
|
2868 // Refer to the comments in synchronizer.cpp for how we might encode extra |
|
2869 // state in _succ so we can avoid fetching EntryList|cxq. |
|
2870 // |
|
2871 // I'd like to add more cases in fast_lock() and fast_unlock() -- |
|
2872 // such as recursive enter and exit -- but we have to be wary of |
|
2873 // I$ bloat, T$ effects and BP$ effects. |
|
2874 // |
|
2875 // If there's no contention try a 1-0 exit. That is, exit without |
|
2876 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how |
|
2877 // we detect and recover from the race that the 1-0 exit admits. |
|
2878 // |
|
2879 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier |
|
2880 // before it STs null into _owner, releasing the lock. Updates |
|
2881 // to data protected by the critical section must be visible before |
|
2882 // we drop the lock (and thus before any other thread could acquire |
|
2883 // the lock and observe the fields protected by the lock). |
|
2884 // IA32's memory-model is SPO, so STs are ordered with respect to |
|
2885 // each other and there's no need for an explicit barrier (fence). |
|
2886 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. |
|
2887 #ifndef OPT_THREAD |
|
2888 get_thread (TREG) ; |
|
2889 #endif |
|
2890 |
|
2891 // It's inflated |
|
2892 ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; |
|
2893 xorr(boxReg, boxReg, TREG); |
|
2894 |
|
2895 ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; |
|
2896 orr(boxReg, boxReg, AT); |
|
2897 |
|
2898 move(AT, R0); |
|
2899 bne(boxReg, R0, DONE_LABEL); |
|
2900 delayed()->nop(); |
|
2901 |
|
2902 ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; |
|
2903 ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; |
|
2904 orr(boxReg, boxReg, AT); |
|
2905 |
|
2906 move(AT, R0); |
|
2907 bne(boxReg, R0, DONE_LABEL); |
|
2908 delayed()->nop(); |
|
2909 |
|
2910 sync(); |
|
2911 sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; |
|
2912 move(AT, 0x1); |
|
2913 b(DONE_LABEL); |
|
2914 delayed()->nop(); |
|
2915 |
|
2916 bind (Stacked); |
|
2917 ld(tmpReg, Address(boxReg, 0)) ; |
|
2918 //if (os::is_MP()) { sync(); } |
|
2919 cmpxchg(tmpReg, Address(objReg, 0), boxReg); |
|
2920 |
|
2921 if (EmitSync & 65536) { |
|
2922 bind (CheckSucc); |
|
2923 } |
|
2924 |
|
2925 bind(DONE_LABEL); |
|
2926 |
|
2927 // Avoid branch to branch on AMD processors |
|
2928 if (EmitSync & 32768) { nop() ; } |
|
2929 } |
|
2930 } |
|
2931 |
|
2932 void MacroAssembler::align(int modulus) { |
|
2933 while (offset() % modulus != 0) nop(); |
|
2934 } |
|
2935 |
|
2936 |
|
2937 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { |
|
2938 //Unimplemented(); |
|
2939 } |
|
2940 |
|
2941 #ifdef _LP64 |
|
2942 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; |
|
2943 |
|
2944 /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */ |
|
2945 FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; |
|
2946 #else |
|
2947 Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; |
|
2948 |
|
2949 Register caller_saved_fpu_registers[] = {}; |
|
2950 #endif |
|
2951 |
|
2952 //We preserve all caller-saved register |
|
2953 void MacroAssembler::pushad(){ |
|
2954 int i; |
|
2955 |
|
2956 /* Fixed-point registers */ |
|
2957 int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); |
|
2958 daddi(SP, SP, -1 * len * wordSize); |
|
2959 for (i = 0; i < len; i++) |
|
2960 { |
|
2961 #ifdef _LP64 |
|
2962 sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); |
|
2963 #else |
|
2964 sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize); |
|
2965 #endif |
|
2966 } |
|
2967 |
|
2968 /* Floating-point registers */ |
|
2969 len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); |
|
2970 daddi(SP, SP, -1 * len * wordSize); |
|
2971 for (i = 0; i < len; i++) |
|
2972 { |
|
2973 #ifdef _LP64 |
|
2974 sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); |
|
2975 #else |
|
2976 swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); |
|
2977 #endif |
|
2978 } |
|
2979 }; |
|
2980 |
|
2981 void MacroAssembler::popad(){ |
|
2982 int i; |
|
2983 |
|
2984 /* Floating-point registers */ |
|
2985 int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); |
|
2986 for (i = 0; i < len; i++) |
|
2987 { |
|
2988 #ifdef _LP64 |
|
2989 ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); |
|
2990 #else |
|
2991 lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); |
|
2992 #endif |
|
2993 } |
|
2994 daddi(SP, SP, len * wordSize); |
|
2995 |
|
2996 /* Fixed-point registers */ |
|
2997 len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); |
|
2998 for (i = 0; i < len; i++) |
|
2999 { |
|
3000 #ifdef _LP64 |
|
3001 ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); |
|
3002 #else |
|
3003 lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize); |
|
3004 #endif |
|
3005 } |
|
3006 daddi(SP, SP, len * wordSize); |
|
3007 }; |
|
3008 |
|
3009 void MacroAssembler::push2(Register reg1, Register reg2) { |
|
3010 #ifdef _LP64 |
|
3011 daddi(SP, SP, -16); |
|
3012 sd(reg2, SP, 0); |
|
3013 sd(reg1, SP, 8); |
|
3014 #else |
|
3015 addi(SP, SP, -8); |
|
3016 sw(reg2, SP, 0); |
|
3017 sw(reg1, SP, 4); |
|
3018 #endif |
|
3019 } |
|
3020 |
|
3021 void MacroAssembler::pop2(Register reg1, Register reg2) { |
|
3022 #ifdef _LP64 |
|
3023 ld(reg1, SP, 0); |
|
3024 ld(reg2, SP, 8); |
|
3025 daddi(SP, SP, 16); |
|
3026 #else |
|
3027 lw(reg1, SP, 0); |
|
3028 lw(reg2, SP, 4); |
|
3029 addi(SP, SP, 8); |
|
3030 #endif |
|
3031 } |
|
3032 |
|
3033 //for UseCompressedOops Option |
|
3034 void MacroAssembler::load_klass(Register dst, Register src) { |
|
3035 #ifdef _LP64 |
|
3036 if(UseCompressedClassPointers){ |
|
3037 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
|
3038 decode_klass_not_null(dst); |
|
3039 } else |
|
3040 #endif |
|
3041 ld(dst, src, oopDesc::klass_offset_in_bytes()); |
|
3042 } |
|
3043 |
|
3044 void MacroAssembler::store_klass(Register dst, Register src) { |
|
3045 #ifdef _LP64 |
|
3046 if(UseCompressedClassPointers){ |
|
3047 encode_klass_not_null(src); |
|
3048 sw(src, dst, oopDesc::klass_offset_in_bytes()); |
|
3049 } else { |
|
3050 #endif |
|
3051 sd(src, dst, oopDesc::klass_offset_in_bytes()); |
|
3052 } |
|
3053 } |
|
3054 |
|
3055 void MacroAssembler::load_prototype_header(Register dst, Register src) { |
|
3056 load_klass(dst, src); |
|
3057 ld(dst, Address(dst, Klass::prototype_header_offset())); |
|
3058 } |
|
3059 |
|
3060 #ifdef _LP64 |
|
3061 void MacroAssembler::store_klass_gap(Register dst, Register src) { |
|
3062 if (UseCompressedClassPointers) { |
|
3063 sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); |
|
3064 } |
|
3065 } |
|
3066 |
|
3067 void MacroAssembler::load_heap_oop(Register dst, Address src) { |
|
3068 if(UseCompressedOops){ |
|
3069 lwu(dst, src); |
|
3070 decode_heap_oop(dst); |
|
3071 } else{ |
|
3072 ld(dst, src); |
|
3073 } |
|
3074 } |
|
3075 |
|
3076 void MacroAssembler::store_heap_oop(Address dst, Register src){ |
|
3077 if(UseCompressedOops){ |
|
3078 assert(!dst.uses(src), "not enough registers"); |
|
3079 encode_heap_oop(src); |
|
3080 sw(src, dst); |
|
3081 } else{ |
|
3082 sd(src, dst); |
|
3083 } |
|
3084 } |
|
3085 |
|
3086 #ifdef ASSERT |
|
3087 void MacroAssembler::verify_heapbase(const char* msg) { |
|
3088 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); |
|
3089 assert (Universe::heap() != NULL, "java heap should be initialized"); |
|
3090 } |
|
3091 #endif |
|
3092 |
|
3093 |
|
3094 // Algorithm must match oop.inline.hpp encode_heap_oop. |
|
3095 void MacroAssembler::encode_heap_oop(Register r) { |
|
3096 #ifdef ASSERT |
|
3097 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); |
|
3098 #endif |
|
3099 verify_oop(r, "broken oop in encode_heap_oop"); |
|
3100 if (Universe::narrow_oop_base() == NULL) { |
|
3101 if (Universe::narrow_oop_shift() != 0) { |
|
3102 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3103 shr(r, LogMinObjAlignmentInBytes); |
|
3104 } |
|
3105 return; |
|
3106 } |
|
3107 |
|
3108 movz(r, S5_heapbase, r); |
|
3109 dsub(r, r, S5_heapbase); |
|
3110 if (Universe::narrow_oop_shift() != 0) { |
|
3111 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3112 shr(r, LogMinObjAlignmentInBytes); |
|
3113 } |
|
3114 } |
|
3115 |
|
3116 void MacroAssembler::encode_heap_oop(Register dst, Register src) { |
|
3117 #ifdef ASSERT |
|
3118 verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); |
|
3119 #endif |
|
3120 verify_oop(src, "broken oop in encode_heap_oop"); |
|
3121 if (Universe::narrow_oop_base() == NULL) { |
|
3122 if (Universe::narrow_oop_shift() != 0) { |
|
3123 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3124 dsrl(dst, src, LogMinObjAlignmentInBytes); |
|
3125 } else { |
|
3126 if (dst != src) move(dst, src); |
|
3127 } |
|
3128 } else { |
|
3129 if (dst == src) { |
|
3130 movz(dst, S5_heapbase, dst); |
|
3131 dsub(dst, dst, S5_heapbase); |
|
3132 if (Universe::narrow_oop_shift() != 0) { |
|
3133 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3134 shr(dst, LogMinObjAlignmentInBytes); |
|
3135 } |
|
3136 } else { |
|
3137 dsub(dst, src, S5_heapbase); |
|
3138 if (Universe::narrow_oop_shift() != 0) { |
|
3139 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3140 shr(dst, LogMinObjAlignmentInBytes); |
|
3141 } |
|
3142 movz(dst, R0, src); |
|
3143 } |
|
3144 } |
|
3145 } |
|
3146 |
|
3147 void MacroAssembler::encode_heap_oop_not_null(Register r) { |
|
3148 assert (UseCompressedOops, "should be compressed"); |
|
3149 #ifdef ASSERT |
|
3150 if (CheckCompressedOops) { |
|
3151 Label ok; |
|
3152 bne(r, R0, ok); |
|
3153 delayed()->nop(); |
|
3154 stop("null oop passed to encode_heap_oop_not_null"); |
|
3155 bind(ok); |
|
3156 } |
|
3157 #endif |
|
3158 verify_oop(r, "broken oop in encode_heap_oop_not_null"); |
|
3159 if (Universe::narrow_oop_base() != NULL) { |
|
3160 dsub(r, r, S5_heapbase); |
|
3161 } |
|
3162 if (Universe::narrow_oop_shift() != 0) { |
|
3163 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3164 shr(r, LogMinObjAlignmentInBytes); |
|
3165 } |
|
3166 |
|
3167 } |
|
3168 |
|
3169 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { |
|
3170 assert (UseCompressedOops, "should be compressed"); |
|
3171 #ifdef ASSERT |
|
3172 if (CheckCompressedOops) { |
|
3173 Label ok; |
|
3174 bne(src, R0, ok); |
|
3175 delayed()->nop(); |
|
3176 stop("null oop passed to encode_heap_oop_not_null2"); |
|
3177 bind(ok); |
|
3178 } |
|
3179 #endif |
|
3180 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); |
|
3181 |
|
3182 if (Universe::narrow_oop_base() != NULL) { |
|
3183 dsub(dst, src, S5_heapbase); |
|
3184 if (Universe::narrow_oop_shift() != 0) { |
|
3185 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3186 shr(dst, LogMinObjAlignmentInBytes); |
|
3187 } |
|
3188 } else { |
|
3189 if (Universe::narrow_oop_shift() != 0) { |
|
3190 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3191 dsrl(dst, src, LogMinObjAlignmentInBytes); |
|
3192 } else { |
|
3193 if (dst != src) move(dst, src); |
|
3194 } |
|
3195 } |
|
3196 } |
|
3197 |
|
3198 void MacroAssembler::decode_heap_oop(Register r) { |
|
3199 #ifdef ASSERT |
|
3200 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); |
|
3201 #endif |
|
3202 if (Universe::narrow_oop_base() == NULL) { |
|
3203 if (Universe::narrow_oop_shift() != 0) { |
|
3204 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3205 shl(r, LogMinObjAlignmentInBytes); |
|
3206 } |
|
3207 } else { |
|
3208 move(AT, r); |
|
3209 if (Universe::narrow_oop_shift() != 0) { |
|
3210 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3211 shl(r, LogMinObjAlignmentInBytes); |
|
3212 } |
|
3213 dadd(r, r, S5_heapbase); |
|
3214 movz(r, R0, AT); |
|
3215 } |
|
3216 verify_oop(r, "broken oop in decode_heap_oop"); |
|
3217 } |
|
3218 |
|
3219 void MacroAssembler::decode_heap_oop(Register dst, Register src) { |
|
3220 #ifdef ASSERT |
|
3221 verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); |
|
3222 #endif |
|
3223 if (Universe::narrow_oop_base() == NULL) { |
|
3224 if (Universe::narrow_oop_shift() != 0) { |
|
3225 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3226 if (dst != src) nop(); // DON'T DELETE THIS GUY. |
|
3227 dsll(dst, src, LogMinObjAlignmentInBytes); |
|
3228 } else { |
|
3229 if (dst != src) move(dst, src); |
|
3230 } |
|
3231 } else { |
|
3232 if (dst == src) { |
|
3233 move(AT, dst); |
|
3234 if (Universe::narrow_oop_shift() != 0) { |
|
3235 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3236 shl(dst, LogMinObjAlignmentInBytes); |
|
3237 } |
|
3238 dadd(dst, dst, S5_heapbase); |
|
3239 movz(dst, R0, AT); |
|
3240 } else { |
|
3241 if (Universe::narrow_oop_shift() != 0) { |
|
3242 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3243 dsll(dst, src, LogMinObjAlignmentInBytes); |
|
3244 daddu(dst, dst, S5_heapbase); |
|
3245 } else { |
|
3246 daddu(dst, src, S5_heapbase); |
|
3247 } |
|
3248 movz(dst, R0, src); |
|
3249 } |
|
3250 } |
|
3251 verify_oop(dst, "broken oop in decode_heap_oop"); |
|
3252 } |
|
3253 |
|
3254 void MacroAssembler::decode_heap_oop_not_null(Register r) { |
|
3255 // Note: it will change flags |
|
3256 assert (UseCompressedOops, "should only be used for compressed headers"); |
|
3257 assert (Universe::heap() != NULL, "java heap should be initialized"); |
|
3258 // Cannot assert, unverified entry point counts instructions (see .ad file) |
|
3259 // vtableStubs also counts instructions in pd_code_size_limit. |
|
3260 // Also do not verify_oop as this is called by verify_oop. |
|
3261 if (Universe::narrow_oop_shift() != 0) { |
|
3262 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3263 shl(r, LogMinObjAlignmentInBytes); |
|
3264 if (Universe::narrow_oop_base() != NULL) { |
|
3265 daddu(r, r, S5_heapbase); |
|
3266 } |
|
3267 } else { |
|
3268 assert (Universe::narrow_oop_base() == NULL, "sanity"); |
|
3269 } |
|
3270 } |
|
3271 |
|
3272 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { |
|
3273 assert (UseCompressedOops, "should only be used for compressed headers"); |
|
3274 assert (Universe::heap() != NULL, "java heap should be initialized"); |
|
3275 |
|
3276 // Cannot assert, unverified entry point counts instructions (see .ad file) |
|
3277 // vtableStubs also counts instructions in pd_code_size_limit. |
|
3278 // Also do not verify_oop as this is called by verify_oop. |
|
3279 //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); |
|
3280 if (Universe::narrow_oop_shift() != 0) { |
|
3281 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
3282 if (LogMinObjAlignmentInBytes == Address::times_8) { |
|
3283 dsll(dst, src, LogMinObjAlignmentInBytes); |
|
3284 daddu(dst, dst, S5_heapbase); |
|
3285 } else { |
|
3286 dsll(dst, src, LogMinObjAlignmentInBytes); |
|
3287 if (Universe::narrow_oop_base() != NULL) { |
|
3288 daddu(dst, dst, S5_heapbase); |
|
3289 } |
|
3290 } |
|
3291 } else { |
|
3292 assert (Universe::narrow_oop_base() == NULL, "sanity"); |
|
3293 if (dst != src) { |
|
3294 move(dst, src); |
|
3295 } |
|
3296 } |
|
3297 } |
|
3298 |
|
3299 void MacroAssembler::encode_klass_not_null(Register r) { |
|
3300 if (Universe::narrow_klass_base() != NULL) { |
|
3301 assert(r != AT, "Encoding a klass in AT"); |
|
3302 set64(AT, (int64_t)Universe::narrow_klass_base()); |
|
3303 dsub(r, r, AT); |
|
3304 } |
|
3305 if (Universe::narrow_klass_shift() != 0) { |
|
3306 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
3307 shr(r, LogKlassAlignmentInBytes); |
|
3308 } |
|
3309 // Not neccessary for MIPS at all. |
|
3310 //if (Universe::narrow_klass_base() != NULL) { |
|
3311 // reinit_heapbase(); |
|
3312 //} |
|
3313 } |
|
3314 |
|
3315 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { |
|
3316 if (dst == src) { |
|
3317 encode_klass_not_null(src); |
|
3318 } else { |
|
3319 if (Universe::narrow_klass_base() != NULL) { |
|
3320 set64(dst, (int64_t)Universe::narrow_klass_base()); |
|
3321 dsub(dst, src, dst); |
|
3322 if (Universe::narrow_klass_shift() != 0) { |
|
3323 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
3324 shr(dst, LogKlassAlignmentInBytes); |
|
3325 } |
|
3326 } else { |
|
3327 if (Universe::narrow_klass_shift() != 0) { |
|
3328 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
3329 dsrl(dst, src, LogKlassAlignmentInBytes); |
|
3330 } else { |
|
3331 move(dst, src); |
|
3332 } |
|
3333 } |
|
3334 } |
|
3335 } |
|
3336 |
|
3337 // Function instr_size_for_decode_klass_not_null() counts the instructions |
|
3338 // generated by decode_klass_not_null(register r) and reinit_heapbase(), |
|
3339 // when (Universe::heap() != NULL). Hence, if the instructions they |
|
3340 // generate change, then this method needs to be updated. |
|
3341 int MacroAssembler::instr_size_for_decode_klass_not_null() { |
|
3342 assert (UseCompressedClassPointers, "only for compressed klass ptrs"); |
|
3343 if (Universe::narrow_klass_base() != NULL) { |
|
3344 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). |
|
3345 return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); |
|
3346 } else { |
|
3347 // longest load decode klass function, mov64, leaq |
|
3348 return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); |
|
3349 } |
|
3350 } |
|
3351 |
|
3352 void MacroAssembler::decode_klass_not_null(Register r) { |
|
3353 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
|
3354 assert(r != AT, "Decoding a klass in AT"); |
|
3355 // Cannot assert, unverified entry point counts instructions (see .ad file) |
|
3356 // vtableStubs also counts instructions in pd_code_size_limit. |
|
3357 // Also do not verify_oop as this is called by verify_oop. |
|
3358 if (Universe::narrow_klass_shift() != 0) { |
|
3359 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
3360 shl(r, LogKlassAlignmentInBytes); |
|
3361 } |
|
3362 if (Universe::narrow_klass_base() != NULL) { |
|
3363 set64(AT, (int64_t)Universe::narrow_klass_base()); |
|
3364 daddu(r, r, AT); |
|
3365 //Not neccessary for MIPS at all. |
|
3366 //reinit_heapbase(); |
|
3367 } |
|
3368 } |
|
3369 |
|
3370 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { |
|
3371 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
|
3372 |
|
3373 if (dst == src) { |
|
3374 decode_klass_not_null(dst); |
|
3375 } else { |
|
3376 // Cannot assert, unverified entry point counts instructions (see .ad file) |
|
3377 // vtableStubs also counts instructions in pd_code_size_limit. |
|
3378 // Also do not verify_oop as this is called by verify_oop. |
|
3379 set64(dst, (int64_t)Universe::narrow_klass_base()); |
|
3380 if (Universe::narrow_klass_shift() != 0) { |
|
3381 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
3382 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); |
|
3383 dsll(AT, src, Address::times_8); |
|
3384 daddu(dst, dst, AT); |
|
3385 } else { |
|
3386 daddu(dst, src, dst); |
|
3387 } |
|
3388 } |
|
3389 } |
|
3390 |
|
3391 void MacroAssembler::incrementl(Register reg, int value) { |
|
3392 if (value == min_jint) { |
|
3393 move(AT, value); |
|
3394 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT)); |
|
3395 return; |
|
3396 } |
|
3397 if (value < 0) { decrementl(reg, -value); return; } |
|
3398 if (value == 0) { ; return; } |
|
3399 |
|
3400 if(Assembler::is_simm16(value)) { |
|
3401 NOT_LP64(addiu(reg, reg, value)); |
|
3402 LP64_ONLY(move(AT, value); addu32(reg, reg, AT)); |
|
3403 } else { |
|
3404 move(AT, value); |
|
3405 LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT)); |
|
3406 } |
|
3407 } |
|
3408 |
|
3409 void MacroAssembler::decrementl(Register reg, int value) { |
|
3410 if (value == min_jint) { |
|
3411 move(AT, value); |
|
3412 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT)); |
|
3413 return; |
|
3414 } |
|
3415 if (value < 0) { incrementl(reg, -value); return; } |
|
3416 if (value == 0) { ; return; } |
|
3417 |
|
3418 if(Assembler::is_simm16(value)) { |
|
3419 NOT_LP64(addiu(reg, reg, -value)); |
|
3420 LP64_ONLY(move(AT, value); subu32(reg, reg, AT)); |
|
3421 } else { |
|
3422 move(AT, value); |
|
3423 LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT)); |
|
3424 } |
|
3425 } |
|
3426 |
|
3427 void MacroAssembler::reinit_heapbase() { |
|
3428 if (UseCompressedOops || UseCompressedClassPointers) { |
|
3429 if (Universe::heap() != NULL) { |
|
3430 if (Universe::narrow_oop_base() == NULL) { |
|
3431 move(S5_heapbase, R0); |
|
3432 } else { |
|
3433 set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); |
|
3434 } |
|
3435 } else { |
|
3436 set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); |
|
3437 ld(S5_heapbase, S5_heapbase, 0); |
|
3438 } |
|
3439 } |
|
3440 } |
|
3441 #endif // _LP64 |
|
3442 |
|
3443 void MacroAssembler::check_klass_subtype(Register sub_klass, |
|
3444 Register super_klass, |
|
3445 Register temp_reg, |
|
3446 Label& L_success) { |
|
3447 //implement ind gen_subtype_check |
|
3448 Label L_failure; |
|
3449 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); |
|
3450 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); |
|
3451 bind(L_failure); |
|
3452 } |
|
3453 |
|
3454 SkipIfEqual::SkipIfEqual( |
|
3455 MacroAssembler* masm, const bool* flag_addr, bool value) { |
|
3456 _masm = masm; |
|
3457 _masm->li(AT, (address)flag_addr); |
|
3458 _masm->lb(AT,AT,0); |
|
3459 _masm->addi(AT,AT,-value); |
|
3460 _masm->beq(AT,R0,_label); |
|
3461 _masm->delayed()->nop(); |
|
3462 } |
|
3463 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, |
|
3464 Register super_klass, |
|
3465 Register temp_reg, |
|
3466 Label* L_success, |
|
3467 Label* L_failure, |
|
3468 Label* L_slow_path, |
|
3469 RegisterOrConstant super_check_offset) { |
|
3470 assert_different_registers(sub_klass, super_klass, temp_reg); |
|
3471 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); |
|
3472 if (super_check_offset.is_register()) { |
|
3473 assert_different_registers(sub_klass, super_klass, |
|
3474 super_check_offset.as_register()); |
|
3475 } else if (must_load_sco) { |
|
3476 assert(temp_reg != noreg, "supply either a temp or a register offset"); |
|
3477 } |
|
3478 |
|
3479 Label L_fallthrough; |
|
3480 int label_nulls = 0; |
|
3481 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } |
|
3482 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } |
|
3483 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } |
|
3484 assert(label_nulls <= 1, "at most one NULL in the batch"); |
|
3485 |
|
3486 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
|
3487 int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
|
3488 // If the pointers are equal, we are done (e.g., String[] elements). |
|
3489 // This self-check enables sharing of secondary supertype arrays among |
|
3490 // non-primary types such as array-of-interface. Otherwise, each such |
|
3491 // type would need its own customized SSA. |
|
3492 // We move this check to the front of the fast path because many |
|
3493 // type checks are in fact trivially successful in this manner, |
|
3494 // so we get a nicely predicted branch right at the start of the check. |
|
3495 //cmpptr(sub_klass, super_klass); |
|
3496 //local_jcc(Assembler::equal, *L_success); |
|
3497 beq(sub_klass, super_klass, *L_success); |
|
3498 delayed()->nop(); |
|
3499 // Check the supertype display: |
|
3500 if (must_load_sco) { |
|
3501 // Positive movl does right thing on LP64. |
|
3502 lwu(temp_reg, super_klass, sco_offset); |
|
3503 super_check_offset = RegisterOrConstant(temp_reg); |
|
3504 } |
|
3505 dsll(AT, super_check_offset.register_or_noreg(), Address::times_1); |
|
3506 daddu(AT, sub_klass, AT); |
|
3507 ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1); |
|
3508 |
|
3509 // This check has worked decisively for primary supers. |
|
3510 // Secondary supers are sought in the super_cache ('super_cache_addr'). |
|
3511 // (Secondary supers are interfaces and very deeply nested subtypes.) |
|
3512 // This works in the same check above because of a tricky aliasing |
|
3513 // between the super_cache and the primary super display elements. |
|
3514 // (The 'super_check_addr' can address either, as the case requires.) |
|
3515 // Note that the cache is updated below if it does not help us find |
|
3516 // what we need immediately. |
|
3517 // So if it was a primary super, we can just fail immediately. |
|
3518 // Otherwise, it's the slow path for us (no success at this point). |
|
3519 |
|
3520 if (super_check_offset.is_register()) { |
|
3521 beq(super_klass, AT, *L_success); |
|
3522 delayed()->nop(); |
|
3523 addi(AT, super_check_offset.as_register(), -sc_offset); |
|
3524 if (L_failure == &L_fallthrough) { |
|
3525 beq(AT, R0, *L_slow_path); |
|
3526 delayed()->nop(); |
|
3527 } else { |
|
3528 bne(AT, R0, *L_failure); |
|
3529 delayed()->nop(); |
|
3530 b(*L_slow_path); |
|
3531 delayed()->nop(); |
|
3532 } |
|
3533 } else if (super_check_offset.as_constant() == sc_offset) { |
|
3534 // Need a slow path; fast failure is impossible. |
|
3535 if (L_slow_path == &L_fallthrough) { |
|
3536 beq(super_klass, AT, *L_success); |
|
3537 delayed()->nop(); |
|
3538 } else { |
|
3539 bne(super_klass, AT, *L_slow_path); |
|
3540 delayed()->nop(); |
|
3541 b(*L_success); |
|
3542 delayed()->nop(); |
|
3543 } |
|
3544 } else { |
|
3545 // No slow path; it's a fast decision. |
|
3546 if (L_failure == &L_fallthrough) { |
|
3547 beq(super_klass, AT, *L_success); |
|
3548 delayed()->nop(); |
|
3549 } else { |
|
3550 bne(super_klass, AT, *L_failure); |
|
3551 delayed()->nop(); |
|
3552 b(*L_success); |
|
3553 delayed()->nop(); |
|
3554 } |
|
3555 } |
|
3556 |
|
3557 bind(L_fallthrough); |
|
3558 |
|
3559 } |
|
3560 |
|
3561 |
|
3562 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, |
|
3563 Register super_klass, |
|
3564 Register temp_reg, |
|
3565 Register temp2_reg, |
|
3566 Label* L_success, |
|
3567 Label* L_failure, |
|
3568 bool set_cond_codes) { |
|
3569 assert_different_registers(sub_klass, super_klass, temp_reg); |
|
3570 if (temp2_reg != noreg) |
|
3571 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); |
|
3572 else |
|
3573 temp2_reg = T9; |
|
3574 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) |
|
3575 |
|
3576 Label L_fallthrough; |
|
3577 int label_nulls = 0; |
|
3578 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } |
|
3579 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } |
|
3580 assert(label_nulls <= 1, "at most one NULL in the batch"); |
|
3581 |
|
3582 // a couple of useful fields in sub_klass: |
|
3583 int ss_offset = in_bytes(Klass::secondary_supers_offset()); |
|
3584 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
|
3585 Address secondary_supers_addr(sub_klass, ss_offset); |
|
3586 Address super_cache_addr( sub_klass, sc_offset); |
|
3587 |
|
3588 // Do a linear scan of the secondary super-klass chain. |
|
3589 // This code is rarely used, so simplicity is a virtue here. |
|
3590 // The repne_scan instruction uses fixed registers, which we must spill. |
|
3591 // Don't worry too much about pre-existing connections with the input regs. |
|
3592 |
|
3593 #if 0 |
|
3594 assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super) |
|
3595 assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter) |
|
3596 #endif |
|
3597 |
|
3598 // Get super_klass value into rax (even if it was in rdi or rcx). |
|
3599 #ifndef PRODUCT |
|
3600 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; |
|
3601 ExternalAddress pst_counter_addr((address) pst_counter); |
|
3602 NOT_LP64( incrementl(pst_counter_addr) ); |
|
3603 //LP64_ONLY( lea(rcx, pst_counter_addr) ); |
|
3604 //LP64_ONLY( incrementl(Address(rcx, 0)) ); |
|
3605 #endif //PRODUCT |
|
3606 |
|
3607 // We will consult the secondary-super array. |
|
3608 ld(temp_reg, secondary_supers_addr); |
|
3609 // Load the array length. (Positive movl does right thing on LP64.) |
|
3610 lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes())); |
|
3611 // Skip to start of data. |
|
3612 daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes()); |
|
3613 |
|
3614 // Scan RCX words at [RDI] for an occurrence of RAX. |
|
3615 // Set NZ/Z based on last compare. |
|
3616 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does |
|
3617 // not change flags (only scas instruction which is repeated sets flags). |
|
3618 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. |
|
3619 |
|
3620 /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */ |
|
3621 Label Loop, subtype; |
|
3622 bind(Loop); |
|
3623 beq(temp2_reg, R0, *L_failure); |
|
3624 delayed()->nop(); |
|
3625 ld(AT, temp_reg, 0); |
|
3626 beq(AT, super_klass, subtype); |
|
3627 delayed()->daddi(temp_reg, temp_reg, 1 * wordSize); |
|
3628 b(Loop); |
|
3629 delayed()->daddi(temp2_reg, temp2_reg, -1); |
|
3630 |
|
3631 bind(subtype); |
|
3632 sd(super_klass, super_cache_addr); |
|
3633 if (L_success != &L_fallthrough) { |
|
3634 b(*L_success); |
|
3635 delayed()->nop(); |
|
3636 } |
|
3637 |
|
3638 // Success. Cache the super we found and proceed in triumph. |
|
3639 #undef IS_A_TEMP |
|
3640 |
|
3641 bind(L_fallthrough); |
|
3642 } |
|
3643 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { |
|
3644 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); |
|
3645 sd(R0, Address(java_thread, JavaThread::vm_result_offset())); |
|
3646 verify_oop(oop_result, "broken oop in call_VM_base"); |
|
3647 } |
|
3648 |
|
3649 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { |
|
3650 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); |
|
3651 sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); |
|
3652 } |
|
3653 |
|
3654 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, |
|
3655 int extra_slot_offset) { |
|
3656 // cf. TemplateTable::prepare_invoke(), if (load_receiver). |
|
3657 int stackElementSize = Interpreter::stackElementSize; |
|
3658 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); |
|
3659 #ifdef ASSERT |
|
3660 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); |
|
3661 assert(offset1 - offset == stackElementSize, "correct arithmetic"); |
|
3662 #endif |
|
3663 Register scale_reg = NOREG; |
|
3664 Address::ScaleFactor scale_factor = Address::no_scale; |
|
3665 if (arg_slot.is_constant()) { |
|
3666 offset += arg_slot.as_constant() * stackElementSize; |
|
3667 } else { |
|
3668 scale_reg = arg_slot.as_register(); |
|
3669 scale_factor = Address::times_8; |
|
3670 } |
|
3671 // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke. |
|
3672 // offset += wordSize; // return PC is on stack |
|
3673 if(scale_reg==NOREG) return Address(SP, offset); |
|
3674 else { |
|
3675 dsll(scale_reg, scale_reg, scale_factor); |
|
3676 daddu(scale_reg, SP, scale_reg); |
|
3677 return Address(scale_reg, offset); |
|
3678 } |
|
3679 } |
|
3680 |
|
3681 SkipIfEqual::~SkipIfEqual() { |
|
3682 _masm->bind(_label); |
|
3683 } |
|
3684 |
|
3685 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { |
|
3686 switch (size_in_bytes) { |
|
3687 #ifndef _LP64 |
|
3688 case 8: |
|
3689 assert(dst2 != noreg, "second dest register required"); |
|
3690 lw(dst, src); |
|
3691 lw(dst2, src.plus_disp(BytesPerInt)); |
|
3692 break; |
|
3693 #else |
|
3694 case 8: ld(dst, src); break; |
|
3695 #endif |
|
3696 case 4: lw(dst, src); break; |
|
3697 case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; |
|
3698 case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; |
|
3699 default: ShouldNotReachHere(); |
|
3700 } |
|
3701 } |
|
3702 |
|
3703 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { |
|
3704 switch (size_in_bytes) { |
|
3705 #ifndef _LP64 |
|
3706 case 8: |
|
3707 assert(src2 != noreg, "second source register required"); |
|
3708 sw(src, dst); |
|
3709 sw(src2, dst.plus_disp(BytesPerInt)); |
|
3710 break; |
|
3711 #else |
|
3712 case 8: sd(src, dst); break; |
|
3713 #endif |
|
3714 case 4: sw(src, dst); break; |
|
3715 case 2: sh(src, dst); break; |
|
3716 case 1: sb(src, dst); break; |
|
3717 default: ShouldNotReachHere(); |
|
3718 } |
|
3719 } |
|
3720 |
|
3721 // Look up the method for a megamorphic invokeinterface call. |
|
3722 // The target method is determined by <intf_klass, itable_index>. |
|
3723 // The receiver klass is in recv_klass. |
|
3724 // On success, the result will be in method_result, and execution falls through. |
|
3725 // On failure, execution transfers to the given label. |
|
3726 void MacroAssembler::lookup_interface_method(Register recv_klass, |
|
3727 Register intf_klass, |
|
3728 RegisterOrConstant itable_index, |
|
3729 Register method_result, |
|
3730 Register scan_temp, |
|
3731 Label& L_no_such_interface) { |
|
3732 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); |
|
3733 assert(itable_index.is_constant() || itable_index.as_register() == method_result, |
|
3734 "caller must use same register for non-constant itable index as for method"); |
|
3735 |
|
3736 // Compute start of first itableOffsetEntry (which is at the end of the vtable) |
|
3737 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; |
|
3738 int itentry_off = itableMethodEntry::method_offset_in_bytes(); |
|
3739 int scan_step = itableOffsetEntry::size() * wordSize; |
|
3740 int vte_size = vtableEntry::size() * wordSize; |
|
3741 Address::ScaleFactor times_vte_scale = Address::times_ptr; |
|
3742 assert(vte_size == wordSize, "else adjust times_vte_scale"); |
|
3743 |
|
3744 lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); |
|
3745 |
|
3746 // %%% Could store the aligned, prescaled offset in the klassoop. |
|
3747 dsll(scan_temp, scan_temp, times_vte_scale); |
|
3748 daddu(scan_temp, recv_klass, scan_temp); |
|
3749 daddiu(scan_temp, scan_temp, vtable_base); |
|
3750 if (HeapWordsPerLong > 1) { |
|
3751 // Round up to align_object_offset boundary |
|
3752 // see code for InstanceKlass::start_of_itable! |
|
3753 round_to(scan_temp, BytesPerLong); |
|
3754 } |
|
3755 |
|
3756 // Adjust recv_klass by scaled itable_index, so we can free itable_index. |
|
3757 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); |
|
3758 // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); |
|
3759 if (itable_index.is_constant()) { |
|
3760 set64(AT, (int)itable_index.is_constant()); |
|
3761 dsll(AT, AT, (int)Address::times_ptr); |
|
3762 } else { |
|
3763 dsll(AT, itable_index.as_register(), (int)Address::times_ptr); |
|
3764 } |
|
3765 daddu(AT, AT, recv_klass); |
|
3766 daddiu(recv_klass, AT, itentry_off); |
|
3767 |
|
3768 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { |
|
3769 // if (scan->interface() == intf) { |
|
3770 // result = (klass + scan->offset() + itable_index); |
|
3771 // } |
|
3772 // } |
|
3773 Label search, found_method; |
|
3774 |
|
3775 for (int peel = 1; peel >= 0; peel--) { |
|
3776 ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); |
|
3777 |
|
3778 if (peel) { |
|
3779 beq(intf_klass, method_result, found_method); |
|
3780 nop(); |
|
3781 } else { |
|
3782 bne(intf_klass, method_result, search); |
|
3783 nop(); |
|
3784 // (invert the test to fall through to found_method...) |
|
3785 } |
|
3786 |
|
3787 if (!peel) break; |
|
3788 |
|
3789 bind(search); |
|
3790 |
|
3791 // Check that the previous entry is non-null. A null entry means that |
|
3792 // the receiver class doesn't implement the interface, and wasn't the |
|
3793 // same as when the caller was compiled. |
|
3794 beq(method_result, R0, L_no_such_interface); |
|
3795 nop(); |
|
3796 daddiu(scan_temp, scan_temp, scan_step); |
|
3797 } |
|
3798 |
|
3799 bind(found_method); |
|
3800 |
|
3801 // Got a hit. |
|
3802 lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); |
|
3803 //ld(method_result, Address(recv_klass, scan_temp, Address::times_1)); |
|
3804 if(UseLoongsonISA) { |
|
3805 gsldx(method_result, recv_klass, scan_temp, 0); |
|
3806 } else { |
|
3807 daddu(AT, recv_klass, scan_temp); |
|
3808 ld(method_result, AT); |
|
3809 } |
|
3810 } |
|
3811 |
|
3812 |
|
3813 // virtual method calling |
|
3814 void MacroAssembler::lookup_virtual_method(Register recv_klass, |
|
3815 RegisterOrConstant vtable_index, |
|
3816 Register method_result) { |
|
3817 Register tmp = GP; |
|
3818 push(tmp); |
|
3819 |
|
3820 if (vtable_index.is_constant()) { |
|
3821 assert_different_registers(recv_klass, method_result, tmp); |
|
3822 } else { |
|
3823 assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); |
|
3824 } |
|
3825 const int base = InstanceKlass::vtable_start_offset() * wordSize; |
|
3826 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); |
|
3827 /* |
|
3828 Address vtable_entry_addr(recv_klass, |
|
3829 vtable_index, Address::times_ptr, |
|
3830 base + vtableEntry::method_offset_in_bytes()); |
|
3831 */ |
|
3832 if (vtable_index.is_constant()) { |
|
3833 set64(AT, vtable_index.as_constant()); |
|
3834 dsll(AT, AT, (int)Address::times_ptr); |
|
3835 } else { |
|
3836 dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); |
|
3837 } |
|
3838 set64(tmp, base + vtableEntry::method_offset_in_bytes()); |
|
3839 daddu(tmp, tmp, AT); |
|
3840 daddu(tmp, tmp, recv_klass); |
|
3841 ld(method_result, tmp, 0); |
|
3842 |
|
3843 pop(tmp); |
|
3844 } |