Tue, 26 Jul 2016 17:29:00 +0800
Performance of integer multiplication is more than 70% up.
------------------- Test Program -----------------------------------
public class Test {
public static void main(String[] args) {
int LEN = 650000000;
int result = 0;
for(int j = 1; j < 20; j++) {
result = 0;
for(int i = 1; i < LEN; i++){
result += i * (i + 1);
}
}
System.out.println("result = " + result);
}
}
------------------- Test Program End-----------------------------------
1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "interpreter/interpreter.hpp"
29 #include "interpreter/interpreterRuntime.hpp"
30 #include "interpreter/templateTable.hpp"
31 #include "memory/universe.inline.hpp"
32 #include "oops/methodData.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/sharedRuntime.hpp"
37 #include "runtime/stubRoutines.hpp"
38 #include "runtime/synchronizer.hpp"
41 #ifndef CC_INTERP
43 #define __ _masm->
45 // Platform-dependent initialization
47 void TemplateTable::pd_initialize() {
48 // No mips specific initialization
49 }
51 // Address computation: local variables
52 // we use t8 as the local variables pointer register, by yjl 6/27/2005
53 static inline Address iaddress(int n) {
54 return Address(LVP, Interpreter::local_offset_in_bytes(n));
55 }
57 static inline Address laddress(int n) {
58 return iaddress(n + 1);
59 }
61 static inline Address faddress(int n) {
62 return iaddress(n);
63 }
65 static inline Address daddress(int n) {
66 return laddress(n);
67 }
69 static inline Address aaddress(int n) {
70 return iaddress(n);
71 }
72 static inline Address haddress(int n) { return iaddress(n + 0); }
74 //FIXME , can not use dadd and dsll
75 /*
76 static inline Address iaddress(Register r) {
77 return Address(r14, r, Address::times_8, Interpreter::value_offset_in_bytes());
78 }
80 static inline Address laddress(Register r) {
81 return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
82 }
84 static inline Address faddress(Register r) {
85 return iaddress(r);
86 }
88 static inline Address daddress(Register r) {
89 return laddress(r);
90 }
92 static inline Address aaddress(Register r) {
93 return iaddress(r);
94 }
95 */
97 static inline Address at_sp() { return Address(SP, 0); }
98 static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); }
99 static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); }
101 // At top of Java expression stack which may be different than esp(). It
102 // isn't for category 1 objects.
103 static inline Address at_tos () {
104 Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0));
105 return tos;
106 }
108 static inline Address at_tos_p1() {
109 return Address(SP, Interpreter::expr_offset_in_bytes(1));
110 }
112 static inline Address at_tos_p2() {
113 return Address(SP, Interpreter::expr_offset_in_bytes(2));
114 }
116 static inline Address at_tos_p3() {
117 return Address(SP, Interpreter::expr_offset_in_bytes(3));
118 }
120 // we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
121 Address TemplateTable::at_bcp(int offset) {
122 assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
123 return Address(BCP, offset);
124 }
126 #define callee_saved_register(R) assert((R>=S0 && R<=S7), "should use callee saved registers!")
128 // bytecode folding
129 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
130 Register tmp_reg,
131 bool load_bc_into_bc_reg,/*=true*/
132 int byte_no) {
133 if (!RewriteBytecodes) {
134 return;
135 }
137 Label L_patch_done;
138 switch (bc) {
139 case Bytecodes::_fast_aputfield:
140 case Bytecodes::_fast_bputfield:
141 case Bytecodes::_fast_cputfield:
142 case Bytecodes::_fast_dputfield:
143 case Bytecodes::_fast_fputfield:
144 case Bytecodes::_fast_iputfield:
145 case Bytecodes::_fast_lputfield:
146 case Bytecodes::_fast_sputfield:
147 {
148 // We skip bytecode quickening for putfield instructions when the put_code written to the constant pool cache
149 // is zero. This is required so that every execution of this instruction calls out to
150 // InterpreterRuntime::resolve_get_put to do additional, required work.
151 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
152 assert(load_bc_into_bc_reg, "we use bc_reg as temp");
153 __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
154 __ daddi(bc_reg, R0, bc);
155 __ beq(tmp_reg, R0, L_patch_done);
156 __ delayed()->nop();
157 }
158 break;
159 default:
160 assert(byte_no == -1, "sanity");
161 // the pair bytecodes have already done the load.
162 if (load_bc_into_bc_reg) {
163 __ move(bc_reg, bc);
164 }
166 }
167 if (JvmtiExport::can_post_breakpoint()) {
168 Label L_fast_patch;
169 // if a breakpoint is present we can't rewrite the stream directly
170 __ lbu(tmp_reg, at_bcp(0));
171 __ move(AT, Bytecodes::_breakpoint);
172 __ bne(tmp_reg, AT, L_fast_patch);
173 __ delayed()->nop();
175 __ get_method(tmp_reg);
176 // Let breakpoint table handling rewrite to quicker bytecode
177 __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
178 InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
180 __ b(L_patch_done);
181 __ delayed()->nop();
182 __ bind(L_fast_patch);
183 }
185 #ifdef ASSERT
186 Label L_okay;
187 __ lbu(tmp_reg, at_bcp(0));
188 __ move(AT, (int)Bytecodes::java_code(bc));
189 __ beq(tmp_reg, AT, L_okay);
190 __ delayed()->nop();
191 __ beq(tmp_reg, bc_reg, L_patch_done);
192 __ delayed()->nop();
193 __ stop("patching the wrong bytecode");
194 __ bind(L_okay);
195 #endif
197 // patch bytecode
198 __ sb(bc_reg, at_bcp(0));
199 __ bind(L_patch_done);
200 }
203 // Individual instructions
205 void TemplateTable::nop() {
206 transition(vtos, vtos);
207 // nothing to do
208 }
210 void TemplateTable::shouldnotreachhere() {
211 transition(vtos, vtos);
212 __ stop("shouldnotreachhere bytecode");
213 }
215 void TemplateTable::aconst_null() {
216 transition(vtos, atos);
217 __ move(FSR, R0);
218 }
220 void TemplateTable::iconst(int value) {
221 transition(vtos, itos);
222 if (value == 0) {
223 __ move(FSR, R0);
224 } else {
225 __ move(FSR, value);
226 }
227 }
229 void TemplateTable::lconst(int value) {
230 transition(vtos, ltos);
231 if (value == 0) {
232 __ move(FSR, R0);
233 } else {
234 __ move(FSR, value);
235 }
236 assert(value >= 0, "check this code");
237 //__ move(SSR, R0);
238 }
240 void TemplateTable::fconst(int value) {
241 static float _f1 = 1.0, _f2 = 2.0;
242 transition(vtos, ftos);
243 float* p;
244 switch( value ) {
245 default: ShouldNotReachHere();
246 case 0: __ dmtc1(R0, FSF); return;
247 case 1: p = &_f1; break;
248 case 2: p = &_f2; break;
249 }
250 __ li(AT, (address)p);
251 __ lwc1(FSF, AT, 0);
252 }
254 void TemplateTable::dconst(int value) {
255 static double _d1 = 1.0;
256 transition(vtos, dtos);
257 double* p;
258 switch( value ) {
259 default: ShouldNotReachHere();
260 case 0: __ dmtc1(R0, FSF); return;
261 case 1: p = &_d1; break;
262 }
263 __ li(AT, (address)p);
264 __ ldc1(FSF, AT, 0);
265 }
267 void TemplateTable::bipush() {
268 transition(vtos, itos);
269 __ lb(FSR, at_bcp(1));
270 }
272 void TemplateTable::sipush() {
273 transition(vtos, itos);
274 __ get_2_byte_integer_at_bcp(FSR, AT, 1);
275 __ hswap(FSR);
276 }
278 // T1 : tags
279 // T2 : index
280 // T3 : cpool
281 // T8 : tag
282 void TemplateTable::ldc(bool wide) {
283 transition(vtos, vtos);
284 Label call_ldc, notFloat, notClass, Done;
285 // get index in cpool
286 if (wide) {
287 __ get_2_byte_integer_at_bcp(T2, AT, 1);
288 __ huswap(T2);
289 } else {
290 __ lbu(T2, at_bcp(1));
291 }
293 __ get_cpool_and_tags(T3, T1);
295 const int base_offset = ConstantPool::header_size() * wordSize;
296 const int tags_offset = Array<u1>::base_offset_in_bytes();
298 // get type
299 __ dadd(AT, T1, T2);
300 __ lb(T1, AT, tags_offset);
301 //now T1 is the tag
303 // unresolved string - get the resolved string
304 /*__ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedString);
305 __ beq(AT, R0, call_ldc);
306 __ delayed()->nop();*/
308 // unresolved class - get the resolved class
309 __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
310 __ beq(AT, R0, call_ldc);
311 __ delayed()->nop();
313 // unresolved class in error (resolution failed) - call into runtime
314 // so that the same error from first resolution attempt is thrown.
315 __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
316 __ beq(AT, R0, call_ldc);
317 __ delayed()->nop();
319 // resolved class - need to call vm to get java mirror of the class
320 __ daddiu(AT, T1, - JVM_CONSTANT_Class);
321 __ bne(AT, R0, notClass);
322 __ delayed()->dsll(T2, T2, Address::times_8);
324 __ bind(call_ldc);
326 __ move(A1, wide);
327 call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
328 // __ sw(FSR, SP, - 1 * wordSize);
329 __ push(atos);
330 __ b(Done);
331 // __ delayed()->daddi(SP, SP, - 1 * wordSize);
332 __ delayed()->nop();
333 __ bind(notClass);
335 __ daddiu(AT, T1, -JVM_CONSTANT_Float);
336 __ bne(AT, R0, notFloat);
337 __ delayed()->nop();
338 // ftos
339 __ dadd(AT, T3, T2);
340 __ lwc1(FSF, AT, base_offset);
341 __ push_f();
342 __ b(Done);
343 __ delayed()->nop();
345 __ bind(notFloat);
346 #ifdef ASSERT
347 {
348 Label L;
349 __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
350 __ beq(AT, R0, L);
351 __ delayed()->nop();
352 __ stop("unexpected tag type in ldc");
353 __ bind(L);
354 }
355 #endif
356 // atos and itos
357 __ dadd(T0, T3, T2);
358 __ lw(FSR, T0, base_offset);
359 __ push(itos);
360 __ b(Done);
361 __ delayed()->nop();
364 if (VerifyOops) {
365 __ verify_oop(FSR);
366 }
368 __ bind(Done);
369 }
371 // Fast path for caching oop constants.
372 void TemplateTable::fast_aldc(bool wide) {
373 transition(vtos, atos);
375 Register result = FSR;
376 Register tmp = SSR;
377 int index_size = wide ? sizeof(u2) : sizeof(u1);
379 Label resolved;
380 // We are resolved if the resolved reference cache entry contains a
381 // non-null object (String, MethodType, etc.)
382 assert_different_registers(result, tmp);
383 __ get_cache_index_at_bcp(tmp, 1, index_size);
384 __ load_resolved_reference_at_index(result, tmp);
385 __ bne(result, R0, resolved);
386 __ delayed()->nop();
388 address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
389 // first time invocation - must resolve first
390 int i = (int)bytecode();
391 __ move(tmp, i);
392 __ call_VM(result, entry, tmp);
394 __ bind(resolved);
396 if (VerifyOops) {
397 __ verify_oop(result);
398 }
399 }
402 // used register: T2, T3, T1
403 // T2 : index
404 // T3 : cpool
405 // T1 : tag
406 void TemplateTable::ldc2_w() {
407 transition(vtos, vtos);
408 Label Long, Done;
410 // get index in cpool
411 __ get_2_byte_integer_at_bcp(T2, AT, 1);
412 __ huswap(T2);
414 __ get_cpool_and_tags(T3, T1);
416 const int base_offset = ConstantPool::header_size() * wordSize;
417 const int tags_offset = Array<u1>::base_offset_in_bytes();
419 // get type in T1
420 __ dadd(AT, T1, T2);
421 __ lb(T1, AT, tags_offset);
423 __ daddiu(AT, T1, - JVM_CONSTANT_Double);
424 __ bne(AT, R0, Long);
425 __ delayed()->dsll(T2, T2, Address::times_8);
426 // dtos
427 __ daddu(AT, T3, T2);
428 __ ldc1(FSF, AT, base_offset + 0 * wordSize);
429 __ sdc1(FSF, SP, - 2 * wordSize);
430 __ b(Done);
431 __ delayed()->daddi(SP, SP, - 2 * wordSize);
433 // ltos
434 __ bind(Long);
435 __ dadd(AT, T3, T2);
436 __ ld(FSR, AT, base_offset + 0 * wordSize);
437 __ push(ltos);
439 __ bind(Done);
440 }
442 // we compute the actual local variable address here
443 // the x86 dont do so for it has scaled index memory access model, we dont have, so do here
444 void TemplateTable::locals_index(Register reg, int offset) {
445 __ lbu(reg, at_bcp(offset));
446 __ dsll(reg, reg, Address::times_8);
447 __ dsub(reg, LVP, reg);
448 }
450 // this method will do bytecode folding of the two form:
451 // iload iload iload caload
452 // used register : T2, T3
453 // T2 : bytecode
454 // T3 : folded code
455 void TemplateTable::iload() {
456 transition(vtos, itos);
457 if (RewriteFrequentPairs) {
458 Label rewrite, done;
459 // get the next bytecode in T2
460 __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
461 // if _iload, wait to rewrite to iload2. We only want to rewrite the
462 // last two iloads in a pair. Comparing against fast_iload means that
463 // the next bytecode is neither an iload or a caload, and therefore
464 // an iload pair.
465 __ move(AT, Bytecodes::_iload);
466 __ beq(AT, T2, done);
467 __ delayed()->nop();
469 __ move(T3, Bytecodes::_fast_iload2);
470 __ move(AT, Bytecodes::_fast_iload);
471 __ beq(AT, T2, rewrite);
472 __ delayed()->nop();
474 // if _caload, rewrite to fast_icaload
475 __ move(T3, Bytecodes::_fast_icaload);
476 __ move(AT, Bytecodes::_caload);
477 __ beq(AT, T2, rewrite);
478 __ delayed()->nop();
480 // rewrite so iload doesn't check again.
481 __ move(T3, Bytecodes::_fast_iload);
483 // rewrite
484 // T3 : fast bytecode
485 __ bind(rewrite);
486 patch_bytecode(Bytecodes::_iload, T3, T2, false);
487 __ bind(done);
488 }
490 // Get the local value into tos
491 locals_index(T2);
492 __ lw(FSR, T2, 0);
493 }
495 // used register T2
496 // T2 : index
497 void TemplateTable::fast_iload2() {
498 transition(vtos, itos);
499 locals_index(T2);
500 __ lw(FSR, T2, 0);
501 __ push(itos);
502 locals_index(T2, 3);
503 __ lw(FSR, T2, 0);
504 }
506 // used register T2
507 // T2 : index
508 void TemplateTable::fast_iload() {
509 transition(vtos, itos);
510 locals_index(T2);
511 __ lw(FSR, T2, 0);
512 }
514 // used register T2
515 // T2 : index
516 void TemplateTable::lload() {
518 transition(vtos, ltos);
519 locals_index(T2);
520 __ ld(FSR, T2, -wordSize);
521 __ ld(SSR, T2, 0);
522 }
524 // used register T2
525 // T2 : index
526 void TemplateTable::fload() {
527 transition(vtos, ftos);
528 locals_index(T2);
529 //FIXME, aoqi. How should the high 32bits be when store a single float into a 64bits register.
530 //__ mtc1(R0, FSF);
531 __ lwc1(FSF, T2, 0);
532 }
534 // used register T2
535 // T2 : index
536 void TemplateTable::dload() {
538 transition(vtos, dtos);
539 locals_index(T2);
540 /* if (TaggedStackInterpreter) {
541 // Get double out of locals array, onto temp stack and load with
542 // float instruction into ST0
543 __ dsll(AT,T2,Interpreter::stackElementScale());
544 __ dadd(AT, LVP, AT);
545 __ ldc1(FSF, AT, Interpreter::local_offset_in_bytes(1));
546 } else {*/
547 __ ldc1(FSF, T2, -wordSize);
548 __ ldc1(SSF, T2, 0);
549 // }
550 }
552 // used register T2
553 // T2 : index
554 void TemplateTable::aload()
555 {
556 transition(vtos, atos);
557 locals_index(T2);
558 __ ld(FSR, T2, 0);
559 }
561 void TemplateTable::locals_index_wide(Register reg) {
562 __ get_2_byte_integer_at_bcp(reg, AT, 2);
563 __ huswap(reg);
564 __ dsll(reg, reg, Address::times_8);
565 __ dsub(reg, LVP, reg);
566 }
568 // used register T2
569 // T2 : index
570 void TemplateTable::wide_iload() {
571 transition(vtos, itos);
572 locals_index_wide(T2);
573 __ ld(FSR, T2, 0);
574 }
576 // used register T2
577 // T2 : index
578 void TemplateTable::wide_lload() {
579 transition(vtos, ltos);
580 locals_index_wide(T2);
581 __ ld(FSR, T2, -4);
582 }
584 // used register T2
585 // T2 : index
586 void TemplateTable::wide_fload() {
587 transition(vtos, ftos);
588 locals_index_wide(T2);
589 __ lwc1(FSF, T2, 0);
590 }
592 // used register T2
593 // T2 : index
594 void TemplateTable::wide_dload() {
595 transition(vtos, dtos);
596 locals_index_wide(T2);
597 /* if (TaggedStackInterpreter) {
598 // Get double out of locals array, onto temp stack and load with
599 // float instruction into ST0
600 // __ movl(eax, laddress(ebx));
601 // __ movl(edx, haddress(ebx));
602 __ dsll(AT,T2,Interpreter::stackElementScale());
603 __ dadd(AT, LVP, AT);
604 __ ldc1(FSF, AT, Interpreter::local_offset_in_bytes(1));
606 // __ pushl(edx); // push hi first
607 // __ pushl(eax);
608 // __ fld_d(Address(esp));
609 // __ addl(esp, 2*wordSize);
610 } else {*/
611 __ ldc1(FSF, T2, -4);
612 //}
613 }
615 // used register T2
616 // T2 : index
617 void TemplateTable::wide_aload() {
618 transition(vtos, atos);
619 locals_index_wide(T2);
620 __ ld(FSR, T2, 0);
621 }
623 // we use A2 as the regiser for index, BE CAREFUL!
624 // we dont use our tge 29 now, for later optimization
625 void TemplateTable::index_check(Register array, Register index) {
626 // Pop ptr into array
627 __ pop_ptr(array);
628 index_check_without_pop(array, index);
629 }
631 void TemplateTable::index_check_without_pop(Register array, Register index) {
632 // destroys ebx
633 // check array
634 __ null_check(array, arrayOopDesc::length_offset_in_bytes());
636 // check index
637 Label ok;
638 __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
639 #ifndef OPT_RANGECHECK
640 __ sltu(AT, index, AT);
641 __ bne(AT, R0, ok);
642 __ delayed()->nop();
644 //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
645 if (A2 != index) __ move(A2, index);
646 __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
647 __ delayed()->nop();
648 __ bind(ok);
649 #else
650 __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
651 __ move(A2, index);
652 __ tgeu(A2, AT, 29);
653 #endif
654 }
656 void TemplateTable::iaload() {
657 transition(itos, itos);
658 // __ pop(SSR);
659 index_check(SSR, FSR);
660 __ dsll(FSR, FSR, 2);
661 __ dadd(FSR, SSR, FSR);
662 //FSR: index
663 __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
664 }
667 void TemplateTable::laload() {
668 transition(itos, ltos);
669 // __ pop(SSR);
670 index_check(SSR, FSR);
671 __ dsll(AT, FSR, Address::times_8);
672 __ dadd(AT, SSR, AT);
673 __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
674 }
676 void TemplateTable::faload() {
677 transition(itos, ftos);
678 // __ pop(SSR);
679 index_check(SSR, FSR);
680 __ shl(FSR, 2);
681 __ dadd(FSR, SSR, FSR);
682 __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
683 }
685 void TemplateTable::daload() {
686 transition(itos, dtos);
687 //__ pop(SSR);
688 index_check(SSR, FSR);
689 __ dsll(AT, FSR, 3);
690 __ dadd(AT, SSR, AT);
691 __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
692 }
694 void TemplateTable::aaload() {
695 transition(itos, atos);
696 //__ pop(SSR);
697 index_check(SSR, FSR);
698 __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
699 __ dadd(FSR, SSR, FSR);
700 //add for compressedoops
701 __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
702 }
704 void TemplateTable::baload() {
705 transition(itos, itos);
706 //__ pop(SSR);
707 index_check(SSR, FSR);
708 __ dadd(FSR, SSR, FSR);
709 __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
710 }
712 void TemplateTable::caload() {
713 transition(itos, itos);
714 // __ pop(SSR);
715 index_check(SSR, FSR);
716 __ dsll(FSR, FSR, Address::times_2);
717 __ dadd(FSR, SSR, FSR);
718 __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
719 }
721 // iload followed by caload frequent pair
722 // used register : T2
723 // T2 : index
724 void TemplateTable::fast_icaload() {
725 transition(vtos, itos);
726 // load index out of locals
727 locals_index(T2);
728 __ lw(FSR, T2, 0);
729 // __ pop(SSR);
730 index_check(SSR, FSR);
731 __ dsll(FSR, FSR, 1);
732 __ dadd(FSR, SSR, FSR);
733 __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
734 }
736 void TemplateTable::saload() {
737 transition(itos, itos);
738 // __ pop(SSR);
739 index_check(SSR, FSR);
740 __ dsll(FSR, FSR, Address::times_2);
741 __ dadd(FSR, SSR, FSR);
742 __ lh(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
743 }
745 void TemplateTable::iload(int n) {
746 transition(vtos, itos);
747 __ lw(FSR, iaddress(n));
748 }
750 void TemplateTable::lload(int n) {
751 transition(vtos, ltos);
752 __ ld(FSR, laddress(n));
753 }
755 void TemplateTable::fload(int n) {
756 transition(vtos, ftos);
757 //__ mtc1(R0, FSF);
758 __ lwc1(FSF, faddress(n));
759 }
760 //FIXME here
761 void TemplateTable::dload(int n) {
762 transition(vtos, dtos);
763 __ ldc1(FSF, laddress(n));
764 }
766 void TemplateTable::aload(int n) {
767 transition(vtos, atos);
768 __ ld(FSR, aaddress(n));
769 }
771 // used register : T2, T3
772 // T2 : bytecode
773 // T3 : folded code
774 void TemplateTable::aload_0() {
775 transition(vtos, atos);
776 // According to bytecode histograms, the pairs:
777 //
778 // _aload_0, _fast_igetfield
779 // _aload_0, _fast_agetfield
780 // _aload_0, _fast_fgetfield
781 //
782 // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
783 // bytecode checks if the next bytecode is either _fast_igetfield,
784 // _fast_agetfield or _fast_fgetfield and then rewrites the
785 // current bytecode into a pair bytecode; otherwise it rewrites the current
786 // bytecode into _fast_aload_0 that doesn't do the pair check anymore.
787 //
788 // Note: If the next bytecode is _getfield, the rewrite must be delayed,
789 // otherwise we may miss an opportunity for a pair.
790 //
791 // Also rewrite frequent pairs
792 // aload_0, aload_1
793 // aload_0, iload_1
794 // These bytecodes with a small amount of code are most profitable to rewrite
795 if (RewriteFrequentPairs) {
796 Label rewrite, done;
797 // get the next bytecode in T2
798 __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
800 // do actual aload_0
801 aload(0);
803 // if _getfield then wait with rewrite
804 __ move(AT, Bytecodes::_getfield);
805 __ beq(AT, T2, done);
806 __ delayed()->nop();
808 // if _igetfield then reqrite to _fast_iaccess_0
809 assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
810 Bytecodes::_aload_0, "fix bytecode definition");
811 __ move(T3, Bytecodes::_fast_iaccess_0);
812 __ move(AT, Bytecodes::_fast_igetfield);
813 __ beq(AT, T2, rewrite);
814 __ delayed()->nop();
816 // if _agetfield then reqrite to _fast_aaccess_0
817 assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
818 Bytecodes::_aload_0, "fix bytecode definition");
819 __ move(T3, Bytecodes::_fast_aaccess_0);
820 __ move(AT, Bytecodes::_fast_agetfield);
821 __ beq(AT, T2, rewrite);
822 __ delayed()->nop();
824 // if _fgetfield then reqrite to _fast_faccess_0
825 assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
826 Bytecodes::_aload_0, "fix bytecode definition");
827 __ move(T3, Bytecodes::_fast_faccess_0);
828 __ move(AT, Bytecodes::_fast_fgetfield);
829 __ beq(AT, T2, rewrite);
830 __ delayed()->nop();
832 // else rewrite to _fast_aload0
833 assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
834 Bytecodes::_aload_0, "fix bytecode definition");
835 __ move(T3, Bytecodes::_fast_aload_0);
837 // rewrite
838 __ bind(rewrite);
839 patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
841 __ bind(done);
842 } else {
843 aload(0);
844 }
845 }
847 void TemplateTable::istore() {
848 transition(itos, vtos);
849 locals_index(T2);
850 __ sw(FSR, T2, 0);
851 }
853 void TemplateTable::lstore() {
854 transition(ltos, vtos);
855 locals_index(T2);
856 __ sd(FSR, T2, -wordSize);
857 }
859 void TemplateTable::fstore() {
860 transition(ftos, vtos);
861 locals_index(T2);
862 __ swc1(FSF, T2, 0);
863 }
865 void TemplateTable::dstore() {
866 transition(dtos, vtos);
867 locals_index(T2);
868 __ sdc1(FSF, T2, -wordSize);
869 }
871 void TemplateTable::astore() {
872 transition(vtos, vtos);
873 // __ pop(FSR);
874 __ pop_ptr(FSR);
875 locals_index(T2);
876 __ sd(FSR, T2, 0);
877 }
879 void TemplateTable::wide_istore() {
880 transition(vtos, vtos);
881 // __ pop(FSR);
882 __ pop_i(FSR);
883 locals_index_wide(T2);
884 __ sd(FSR, T2, 0);
885 }
887 void TemplateTable::wide_lstore() {
888 transition(vtos, vtos);
889 //__ pop2(FSR, SSR);
890 //__ pop_l(FSR, SSR);
891 __ pop_l(FSR); //aoqi:FIXME Is this right?
892 locals_index_wide(T2);
893 __ sd(FSR, T2, -4);
894 }
896 void TemplateTable::wide_fstore() {
897 wide_istore();
898 }
900 void TemplateTable::wide_dstore() {
901 wide_lstore();
902 }
904 void TemplateTable::wide_astore() {
905 transition(vtos, vtos);
906 __ pop_ptr(FSR);
907 locals_index_wide(T2);
908 __ sd(FSR, T2, 0);
909 }
911 // used register : T2
912 void TemplateTable::iastore() {
913 transition(itos, vtos);
914 __ pop_i(SSR);
915 index_check(T2, SSR); // prefer index in ebx
916 __ dsll(SSR, SSR, Address::times_4);
917 __ dadd(T2, T2, SSR);
918 __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
919 }
923 // used register T2, T3
924 void TemplateTable::lastore() {
925 transition(ltos, vtos);
926 __ pop_i (T2);
927 index_check(T3, T2);
928 __ dsll(T2, T2, Address::times_8);
929 __ dadd(T3, T3, T2);
930 __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
931 }
933 // used register T2
934 void TemplateTable::fastore() {
935 transition(ftos, vtos);
936 __ pop_i(SSR);
937 index_check(T2, SSR);
938 __ dsll(SSR, SSR, Address::times_4);
939 __ dadd(T2, T2, SSR);
940 __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
941 }
943 // used register T2, T3
944 void TemplateTable::dastore() {
945 transition(dtos, vtos);
946 __ pop_i (T2);
947 index_check(T3, T2);
948 __ dsll(T2, T2, Address::times_8);
949 __ daddu(T3, T3, T2);
950 __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
952 }
954 // used register : T2, T3, T8
955 // T2 : array
956 // T3 : subklass
957 // T8 : supklass
958 void TemplateTable::aastore() {
959 Label is_null, ok_is_subtype, done;
960 transition(vtos, vtos);
961 // stack: ..., array, index, value
962 __ ld(FSR, at_tos()); // Value
963 __ lw(SSR, at_tos_p1()); // Index
964 __ ld(T2, at_tos_p2()); // Array
966 // index_check(T2, SSR);
967 index_check_without_pop(T2, SSR);
968 // do array store check - check for NULL value first
969 __ beq(FSR, R0, is_null);
970 __ delayed()->nop();
972 // Move subklass into T3
973 //__ ld(T3, Address(FSR, oopDesc::klass_offset_in_bytes()));
974 //add for compressedoops
975 __ load_klass(T3, FSR);
976 // Move superklass into T8
977 //__ ld(T8, Address(T2, oopDesc::klass_offset_in_bytes()));
978 //add for compressedoops
979 __ load_klass(T8, T2);
980 __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset()));
981 // Compress array+index*4+12 into a single register. T2
982 __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
983 __ dadd(T2, T2, AT);
984 __ daddi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
986 // Generate subtype check.
987 // Superklass in T8. Subklass in T3.
988 __ gen_subtype_check(T8, T3, ok_is_subtype); // <-- Jin
989 // Come here on failure
990 // object is at FSR
991 __ jmp(Interpreter::_throw_ArrayStoreException_entry); // <-- Jin
992 __ delayed()->nop();
993 // Come here on success
994 __ bind(ok_is_subtype);
995 //replace with do_oop_store->store_heap_oop
996 //__ sd(FSR, T2, 0);
997 __ store_heap_oop(Address(T2, 0), FSR); // <-- Jin
998 __ sync();
999 __ store_check(T2);
1000 __ b(done);
1001 __ delayed()->nop();
1003 // Have a NULL in FSR, EDX=T2, SSR=index. Store NULL at ary[idx]
1004 __ bind(is_null);
1005 __ profile_null_seen(T9);
1006 __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
1007 __ dadd(T2, T2, AT);
1008 //__ sd(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1009 __ store_heap_oop(Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), FSR); /* FSR is null here */
1010 __ sync();
1012 __ bind(done);
1013 __ daddi(SP, SP, 3 * Interpreter::stackElementSize);
1014 }
1016 void TemplateTable::bastore() {
1017 transition(itos, vtos);
1018 __ pop_i (SSR);
1019 index_check(T2, SSR);
1020 __ dadd(SSR, T2, SSR);
1021 __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
1022 }
1024 void TemplateTable::castore() {
1025 transition(itos, vtos);
1026 __ pop_i(SSR);
1027 index_check(T2, SSR);
1028 __ dsll(SSR, SSR, Address::times_2);
1029 __ dadd(SSR, T2, SSR);
1030 __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
1031 }
1033 void TemplateTable::sastore() {
1034 castore();
1035 }
1037 void TemplateTable::istore(int n) {
1038 transition(itos, vtos);
1039 __ sw(FSR, iaddress(n));
1040 }
1042 void TemplateTable::lstore(int n) {
1043 transition(ltos, vtos);
1044 __ sd(FSR, laddress(n));
1045 }
1047 void TemplateTable::fstore(int n) {
1048 transition(ftos, vtos);
1049 __ swc1(FSF, faddress(n));
1050 }
1052 void TemplateTable::dstore(int n) {
1053 transition(dtos, vtos);
1054 __ sdc1(FSF, laddress(n));
1055 }
1057 void TemplateTable::astore(int n) {
1058 transition(vtos, vtos);
1059 __ pop_ptr(FSR);
1060 __ sd(FSR, aaddress(n));
1061 }
1063 void TemplateTable::pop() {
1064 transition(vtos, vtos);
1065 __ daddi(SP, SP, Interpreter::stackElementSize);
1066 }
1068 void TemplateTable::pop2() {
1069 transition(vtos, vtos);
1070 __ daddi(SP, SP, 2 * Interpreter::stackElementSize);
1071 }
1073 void TemplateTable::dup() {
1074 transition(vtos, vtos);
1075 // stack: ..., a
1076 __ load_ptr(0, FSR);
1077 __ push_ptr(FSR);
1078 // stack: ..., a, a
1079 }
1081 // blows FSR
1082 void TemplateTable::dup_x1() {
1083 transition(vtos, vtos);
1084 // stack: ..., a, b
1085 __ load_ptr(0, FSR); // load b
1086 __ load_ptr(1, A5); // load a
1087 __ store_ptr(1, FSR); // store b
1088 __ store_ptr(0, A5); // store a
1089 __ push_ptr(FSR); // push b
1090 // stack: ..., b, a, b
1091 }
1093 // blows FSR
1094 void TemplateTable::dup_x2() {
1095 transition(vtos, vtos);
1096 // stack: ..., a, b, c
1097 __ load_ptr(0, FSR); // load c
1098 __ load_ptr(2, A5); // load a
1099 __ store_ptr(2, FSR); // store c in a
1100 __ push_ptr(FSR); // push c
1101 // stack: ..., c, b, c, c
1102 __ load_ptr(2, FSR); // load b
1103 __ store_ptr(2, A5); // store a in b
1104 // stack: ..., c, a, c, c
1105 __ store_ptr(1, FSR); // store b in c
1106 // stack: ..., c, a, b, c
1107 }
1109 // blows FSR
1110 void TemplateTable::dup2() {
1111 transition(vtos, vtos);
1112 // stack: ..., a, b
1113 __ load_ptr(1, FSR); // load a
1114 __ push_ptr(FSR); // push a
1115 __ load_ptr(1, FSR); // load b
1116 __ push_ptr(FSR); // push b
1117 // stack: ..., a, b, a, b
1118 }
1120 // blows FSR
1121 void TemplateTable::dup2_x1() {
1122 transition(vtos, vtos);
1123 // stack: ..., a, b, c
1124 __ load_ptr(0, T2); // load c
1125 __ load_ptr(1, FSR); // load b
1126 __ push_ptr(FSR); // push b
1127 __ push_ptr(T2); // push c
1128 // stack: ..., a, b, c, b, c
1129 __ store_ptr(3, T2); // store c in b
1130 // stack: ..., a, c, c, b, c
1131 __ load_ptr(4, T2); // load a
1132 __ store_ptr(2, T2); // store a in 2nd c
1133 // stack: ..., a, c, a, b, c
1134 __ store_ptr(4, FSR); // store b in a
1135 // stack: ..., b, c, a, b, c
1137 // stack: ..., b, c, a, b, c
1138 }
1140 // blows FSR, SSR
1141 void TemplateTable::dup2_x2() {
1142 transition(vtos, vtos);
1143 // stack: ..., a, b, c, d
1144 // stack: ..., a, b, c, d
1145 __ load_ptr(0, T2); // load d
1146 __ load_ptr(1, FSR); // load c
1147 __ push_ptr(FSR); // push c
1148 __ push_ptr(T2); // push d
1149 // stack: ..., a, b, c, d, c, d
1150 __ load_ptr(4, FSR); // load b
1151 __ store_ptr(2, FSR); // store b in d
1152 __ store_ptr(4, T2); // store d in b
1153 // stack: ..., a, d, c, b, c, d
1154 __ load_ptr(5, T2); // load a
1155 __ load_ptr(3, FSR); // load c
1156 __ store_ptr(3, T2); // store a in c
1157 __ store_ptr(5, FSR); // store c in a
1158 // stack: ..., c, d, a, b, c, d
1160 // stack: ..., c, d, a, b, c, d
1161 }
1163 // blows FSR
1164 void TemplateTable::swap() {
1165 transition(vtos, vtos);
1166 // stack: ..., a, b
1168 __ load_ptr(1, A5); // load a
1169 __ load_ptr(0, FSR); // load b
1170 __ store_ptr(0, A5); // store a in b
1171 __ store_ptr(1, FSR); // store b in a
1173 // stack: ..., b, a
1174 }
1176 void TemplateTable::iop2(Operation op) {
1177 transition(itos, itos);
1178 switch (op) {
1179 case add :
1180 __ pop_i(SSR);
1181 __ addu32(FSR, SSR, FSR);
1182 break;
1183 case sub :
1184 __ pop_i(SSR);
1185 __ subu32(FSR, SSR, FSR);
1186 break;
1187 case mul :
1188 __ lw(SSR, SP, 0);
1189 __ daddi(SP, SP, wordSize);
1190 __ mul(FSR, SSR, FSR);
1191 break;
1192 case _and :
1193 __ pop_i(SSR);
1194 __ andr(FSR, SSR, FSR);
1195 break;
1196 case _or :
1197 __ pop_i(SSR);
1198 __ orr(FSR, SSR, FSR);
1199 break;
1200 case _xor :
1201 __ pop_i(SSR);
1202 __ xorr(FSR, SSR, FSR);
1203 break;
1204 case shl :
1205 __ pop_i(SSR);
1206 __ sllv(FSR, SSR, FSR);
1207 break; // implicit masking of lower 5 bits by Intel shift instr. mips also
1208 case shr :
1209 __ pop_i(SSR);
1210 __ srav(FSR, SSR, FSR);
1211 break; // implicit masking of lower 5 bits by Intel shift instr. mips also
1212 case ushr :
1213 __ pop_i(SSR);
1214 __ srlv(FSR, SSR, FSR);
1215 break; // implicit masking of lower 5 bits by Intel shift instr. mips also
1216 default : ShouldNotReachHere();
1217 }
1218 }
1220 // the result stored in FSR, SSR,
1221 // used registers : T2, T3
1222 //FIXME, aoqi
1223 void TemplateTable::lop2(Operation op) {
1224 transition(ltos, ltos);
1225 //__ pop2(T2, T3);
1226 __ pop_l(T2, T3);
1227 #ifdef ASSERT
1228 {
1229 Label L;
1230 __ beq(T3, R0, L);
1231 __ delayed()->nop();
1232 // FIXME: stack verification required
1233 // __ stop("lop2, wrong stack"); // <--- Fu 20130930
1234 __ bind(L);
1235 }
1236 #endif
1237 switch (op) {
1238 case add :
1239 __ daddu(FSR, T2, FSR);
1240 //__ sltu(AT, FSR, T2);
1241 //__ daddu(SSR, T3, SSR);
1242 //__ daddu(SSR, SSR, AT);
1243 break;
1244 case sub :
1245 __ dsubu(FSR, T2, FSR);
1246 //__ sltu(AT, T2, FSR);
1247 //__ dsubu(SSR, T3, SSR);
1248 //__ dsubu(SSR, SSR, AT);
1249 break;
1250 case _and:
1251 __ andr(FSR, T2, FSR);
1252 //__ andr(SSR, T3, SSR);
1253 break;
1254 case _or :
1255 __ orr(FSR, T2, FSR);
1256 //__ orr(SSR, T3, SSR);
1257 break;
1258 case _xor:
1259 __ xorr(FSR, T2, FSR);
1260 //__ xorr(SSR, T3, SSR);
1261 break;
1262 default : ShouldNotReachHere();
1263 }
1264 }
1266 // java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
1267 // the result is 0x80000000
1268 // the godson2 cpu do the same, so we need not handle this specially like x86
1269 void TemplateTable::idiv() {
1270 transition(itos, itos);
1271 Label not_zero;
1272 //__ pop(SSR);
1273 __ pop_i(SSR);
1274 __ div(SSR, FSR);
1276 __ bne(FSR, R0, not_zero);
1277 __ delayed()->nop();
1278 //__ brk(7);
1279 __ jmp(Interpreter::_throw_ArithmeticException_entry);
1280 __ delayed()->nop();
1282 __ bind(not_zero);
1283 __ mflo(FSR);
1284 }
1286 void TemplateTable::irem() {
1287 transition(itos, itos);
1288 Label not_zero;
1289 //__ pop(SSR);
1290 __ pop_i(SSR);
1291 __ div(SSR, FSR);
1293 __ bne(FSR, R0, not_zero);
1294 __ delayed()->nop();
1295 //__ brk(7);
1296 __ jmp(Interpreter::_throw_ArithmeticException_entry);
1297 __ delayed()->nop();
1299 __ bind(not_zero);
1300 __ mfhi(FSR);
1301 }
1303 // the multiplier in SSR||FSR, the multiplicand in stack
1304 // the result in SSR||FSR
1305 // used registers : T2, T3
1306 void TemplateTable::lmul() {
1307 transition(ltos, ltos);
1308 Label done;
1310 __ pop_l(T2, T3);
1311 #ifdef ASSERT
1312 {
1313 Label L;
1314 __ orr(AT, T3, SSR);
1315 __ beq(AT, R0, L);
1316 __ delayed()->nop();
1317 //FIXME, aoqi
1318 //__ stop("lmul, wrong stack");
1319 __ bind(L);
1320 }
1321 #endif
1322 __ orr(AT, T2, FSR);
1323 __ beq(AT, R0, done);
1324 __ delayed()->nop();
1326 __ dmultu(T2, FSR);
1327 __ daddu(SSR, SSR, T3);
1328 __ nop();
1329 __ mflo(FSR);
1330 __ mfhi(SSR);
1331 __ b(done);
1332 __ delayed()->nop();
1334 __ bind(done);
1335 }
1337 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
1338 void TemplateTable::ldiv() {
1339 transition(ltos, ltos);
1340 Label normal;
1342 __ bne(FSR, R0, normal);
1343 __ delayed()->nop();
1345 //__ brk(7); //generate FPE
1346 __ jmp(Interpreter::_throw_ArithmeticException_entry);
1347 __ delayed()->nop();
1349 __ bind(normal);
1350 __ move(A1, FSR);
1351 __ pop_l(A2, A3);
1352 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), A1, A2);
1353 }
1355 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
1356 void TemplateTable::lrem() {
1357 transition(ltos, ltos);
1358 Label normal;
1360 __ bne(FSR, R0, normal);
1361 __ delayed()->nop();
1363 __ jmp(Interpreter::_throw_ArithmeticException_entry);
1364 __ delayed()->nop();
1366 __ bind(normal);
1367 __ move(A1, FSR);
1368 __ pop_l (A2, A3);
1369 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), A1, A2);
1370 }
1372 // result in FSR
1373 // used registers : T0
1374 void TemplateTable::lshl() {
1375 transition(itos, ltos);
1376 __ pop_l(T0, T1);
1377 #ifdef ASSERT
1378 {
1379 Label L;
1380 __ beq(T1, R0, L);
1381 __ delayed()->nop();
1382 //__ stop("lshl, wrong stack"); // <-- Fu 20130930
1383 __ bind(L);
1384 }
1385 #endif
1386 __ andi(FSR, FSR, 0x3f); // the bit to be shifted
1387 __ dsllv(FSR, T0, FSR);
1388 }
1390 // used registers : T0
1391 void TemplateTable::lshr() {
1392 transition(itos, ltos);
1393 __ pop_l(T0, T1);
1394 #ifdef ASSERT
1395 {
1396 Label L;
1397 __ beq(T1, R0, L);
1398 __ delayed()->nop();
1399 __ stop("lshr, wrong stack");
1400 __ bind(L);
1401 }
1402 #endif
1403 __ andi(FSR, FSR, 0x3f); // the bit to be shifted
1404 __ dsrav(FSR, T0, FSR);
1405 }
1407 // used registers : T0
1408 void TemplateTable::lushr() {
1409 transition(itos, ltos);
1410 __ pop_l(T0, T1);
1411 #ifdef ASSERT
1412 {
1413 Label L;
1414 __ beq(T1, R0, L);
1415 __ delayed()->nop();
1416 __ stop("lushr, wrong stack");
1417 __ bind(L);
1418 }
1419 #endif
1420 __ andi(FSR, FSR, 0x3f); // the bit to be shifted
1421 __ dsrlv(FSR, T0, FSR);
1422 }
1424 // result in FSF
1425 void TemplateTable::fop2(Operation op) {
1426 transition(ftos, ftos);
1427 __ pop_ftos_to_esp(); // pop ftos into esp
1428 switch (op) {
1429 case add:
1430 __ lwc1(FTF, at_sp());
1431 __ add_s(FSF, FTF, FSF);
1432 break;
1433 case sub:
1434 __ lwc1(FTF, at_sp());
1435 __ sub_s(FSF, FTF, FSF);
1436 break;
1437 case mul:
1438 __ lwc1(FTF, at_sp());
1439 __ mul_s(FSF, FTF, FSF);
1440 break;
1441 case div:
1442 __ lwc1(FTF, at_sp());
1443 __ div_s(FSF, FTF, FSF);
1444 break;
1445 case rem:
1446 __ mfc1(FSR, FSF);
1447 __ mtc1(FSR, F12);
1448 __ lwc1(FTF, at_sp());
1449 __ rem_s(FSF, FTF, F12, FSF);
1450 break;
1451 default : ShouldNotReachHere();
1452 }
1454 __ daddi(SP, SP, 1 * wordSize);
1455 }
1457 // result in SSF||FSF
1458 // i dont handle the strict flags
1459 void TemplateTable::dop2(Operation op) {
1460 transition(dtos, dtos);
1461 __ pop_dtos_to_esp(); // pop dtos into esp
1462 switch (op) {
1463 case add:
1464 __ ldc1(FTF, at_sp());
1465 __ add_d(FSF, FTF, FSF);
1466 break;
1467 case sub:
1468 __ ldc1(FTF, at_sp());
1469 __ sub_d(FSF, FTF, FSF);
1470 break;
1471 case mul:
1472 __ ldc1(FTF, at_sp());
1473 __ mul_d(FSF, FTF, FSF);
1474 break;
1475 case div:
1476 __ ldc1(FTF, at_sp());
1477 __ div_d(FSF, FTF, FSF);
1478 break;
1479 case rem:
1480 __ dmfc1(FSR, FSF);
1481 __ dmtc1(FSR, F12);
1482 __ ldc1(FTF, at_sp());
1483 __ rem_d(FSF, FTF, F12, FSF);
1484 break;
1485 default : ShouldNotReachHere();
1486 }
1488 __ daddi(SP, SP, 2 * wordSize);
1489 }
1491 void TemplateTable::ineg() {
1492 transition(itos, itos);
1493 __ neg(FSR);
1494 }
1496 void TemplateTable::lneg() {
1497 transition(ltos, ltos);
1498 __ dsubu(FSR, R0, FSR);
1499 }
1500 /*
1501 // Note: 'double' and 'long long' have 32-bits alignment on x86.
1502 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
1503 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
1504 // of 128-bits operands for SSE instructions.
1505 jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
1506 // Store the value to a 128-bits operand.
1507 operand[0] = lo;
1508 operand[1] = hi;
1509 return operand;
1510 }
1512 // Buffer for 128-bits masks used by SSE instructions.
1513 static jlong float_signflip_pool[2*2];
1514 static jlong double_signflip_pool[2*2];
1515 */
1516 void TemplateTable::fneg() {
1517 transition(ftos, ftos);
1518 __ neg_s(FSF, FSF);
1519 }
1521 void TemplateTable::dneg() {
1522 transition(dtos, dtos);
1523 __ neg_d(FSF, FSF);
1524 }
1526 // used registers : T2
1527 void TemplateTable::iinc() {
1528 transition(vtos, vtos);
1529 locals_index(T2);
1530 __ lw(FSR, T2, 0);
1531 __ lb(AT, at_bcp(2)); // get constant
1532 __ daddu(FSR, FSR, AT);
1533 __ sw(FSR, T2, 0);
1534 }
1536 // used register : T2
1537 void TemplateTable::wide_iinc() {
1538 transition(vtos, vtos);
1539 locals_index_wide(T2);
1540 __ get_2_byte_integer_at_bcp(FSR, AT, 4);
1541 __ hswap(FSR);
1542 __ lw(AT, T2, 0);
1543 __ daddu(FSR, AT, FSR);
1544 __ sw(FSR, T2, 0);
1545 }
1547 void TemplateTable::convert() {
1548 // Checking
1549 #ifdef ASSERT
1550 { TosState tos_in = ilgl;
1551 TosState tos_out = ilgl;
1552 switch (bytecode()) {
1553 case Bytecodes::_i2l: // fall through
1554 case Bytecodes::_i2f: // fall through
1555 case Bytecodes::_i2d: // fall through
1556 case Bytecodes::_i2b: // fall through
1557 case Bytecodes::_i2c: // fall through
1558 case Bytecodes::_i2s: tos_in = itos; break;
1559 case Bytecodes::_l2i: // fall through
1560 case Bytecodes::_l2f: // fall through
1561 case Bytecodes::_l2d: tos_in = ltos; break;
1562 case Bytecodes::_f2i: // fall through
1563 case Bytecodes::_f2l: // fall through
1564 case Bytecodes::_f2d: tos_in = ftos; break;
1565 case Bytecodes::_d2i: // fall through
1566 case Bytecodes::_d2l: // fall through
1567 case Bytecodes::_d2f: tos_in = dtos; break;
1568 default : ShouldNotReachHere();
1569 }
1570 switch (bytecode()) {
1571 case Bytecodes::_l2i: // fall through
1572 case Bytecodes::_f2i: // fall through
1573 case Bytecodes::_d2i: // fall through
1574 case Bytecodes::_i2b: // fall through
1575 case Bytecodes::_i2c: // fall through
1576 case Bytecodes::_i2s: tos_out = itos; break;
1577 case Bytecodes::_i2l: // fall through
1578 case Bytecodes::_f2l: // fall through
1579 case Bytecodes::_d2l: tos_out = ltos; break;
1580 case Bytecodes::_i2f: // fall through
1581 case Bytecodes::_l2f: // fall through
1582 case Bytecodes::_d2f: tos_out = ftos; break;
1583 case Bytecodes::_i2d: // fall through
1584 case Bytecodes::_l2d: // fall through
1585 case Bytecodes::_f2d: tos_out = dtos; break;
1586 default : ShouldNotReachHere();
1587 }
1588 transition(tos_in, tos_out);
1589 }
1590 #endif // ASSERT
1592 // Conversion
1593 // (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation)
1594 switch (bytecode()) {
1595 case Bytecodes::_i2l:
1596 //__ extend_sign(SSR, FSR);
1597 __ sll(FSR, FSR, 0);
1598 break;
1599 case Bytecodes::_i2f:
1600 __ mtc1(FSR, FSF);
1601 __ cvt_s_w(FSF, FSF);
1602 break;
1603 case Bytecodes::_i2d:
1604 __ mtc1(FSR, FSF);
1605 __ cvt_d_w(FSF, FSF);
1606 break;
1607 case Bytecodes::_i2b:
1608 __ dsll32(FSR, FSR, 24);
1609 __ dsra32(FSR, FSR, 24);
1610 break;
1611 case Bytecodes::_i2c:
1612 __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits
1613 break;
1614 case Bytecodes::_i2s:
1615 __ dsll32(FSR, FSR, 16);
1616 __ dsra32(FSR, FSR, 16);
1617 break;
1618 case Bytecodes::_l2i:
1619 __ dsll32(FSR, FSR, 0);
1620 __ dsra32(FSR, FSR, 0);
1621 break;
1622 case Bytecodes::_l2f:
1623 __ dmtc1(FSR, FSF);
1624 //__ mtc1(SSR, SSF);
1625 __ cvt_s_l(FSF, FSF);
1626 break;
1627 case Bytecodes::_l2d:
1628 __ dmtc1(FSR, FSF);
1629 //__ mtc1(SSR, SSF);
1630 __ cvt_d_l(FSF, FSF);
1631 break;
1632 case Bytecodes::_f2i:
1633 {
1634 Label L;
1635 /*
1636 __ c_un_s(FSF, FSF); //NaN?
1637 __ bc1t(L);
1638 __ delayed(); __ move(FSR, R0);
1639 */
1640 __ trunc_w_s(F12, FSF);
1641 __ cfc1(AT, 31);
1642 __ li(T0, 0x10000);
1643 __ andr(AT, AT, T0);
1644 __ beq(AT, R0, L);
1645 __ delayed()->mfc1(FSR, F12);
1647 __ mov_s(F12, FSF);
1648 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1649 __ bind(L);
1650 }
1651 break;
1652 case Bytecodes::_f2l:
1653 {
1654 Label L;
1655 /*
1656 __ move(SSR, R0);
1657 __ c_un_s(FSF, FSF); //NaN?
1658 __ bc1t(L);
1659 __ delayed();
1660 __ move(FSR, R0);
1661 */
1662 __ trunc_l_s(F12, FSF);
1663 __ cfc1(AT, 31);
1664 __ li(T0, 0x10000);
1665 __ andr(AT, AT, T0);
1666 __ beq(AT, R0, L);
1667 __ delayed()->dmfc1(FSR, F12);
1669 __ mov_s(F12, FSF);
1670 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1671 __ bind(L);
1672 }
1673 break;
1674 case Bytecodes::_f2d:
1675 __ cvt_d_s(FSF, FSF);
1676 break;
1677 case Bytecodes::_d2i:
1678 {
1679 Label L;
1680 /*
1681 __ c_un_d(FSF, FSF); //NaN?
1682 __ bc1t(L);
1683 __ delayed(); __ move(FSR, R0);
1684 */
1685 __ trunc_w_d(F12, FSF);
1686 __ cfc1(AT, 31);
1687 __ li(T0, 0x10000);
1688 __ andr(AT, AT, T0);
1689 __ beq(AT, R0, L);
1690 __ delayed()->mfc1(FSR, F12);
1692 __ mov_d(F12, FSF);
1693 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
1694 __ bind(L);
1695 }
1696 break;
1697 case Bytecodes::_d2l:
1698 {
1699 Label L;
1700 /*
1701 __ move(SSR, R0);
1702 __ c_un_d(FSF, FSF); //NaN?
1703 __ bc1t(L);
1704 __ delayed(); __ move(FSR, R0);
1705 */
1706 __ trunc_l_d(F12, FSF);
1707 __ cfc1(AT, 31);
1708 __ li(T0, 0x10000);
1709 __ andr(AT, AT, T0);
1710 __ beq(AT, R0, L);
1711 __ delayed()->dmfc1(FSR, F12);
1713 __ mov_d(F12, FSF);
1714 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
1715 __ bind(L);
1716 }
1717 break;
1718 case Bytecodes::_d2f:
1719 __ cvt_s_d(FSF, FSF);
1720 break;
1721 default :
1722 ShouldNotReachHere();
1723 }
1724 }
1726 void TemplateTable::lcmp() {
1727 transition(ltos, itos);
1729 Label low, high, done;
1730 __ pop(T0);
1731 __ pop(R0);
1732 __ slt(AT, T0, FSR);
1733 __ bne(AT, R0, low);
1734 __ delayed()->nop();
1736 __ bne(T0, FSR, high);
1737 __ delayed()->nop();
1739 __ li(FSR, (long)0);
1740 __ b(done);
1741 __ delayed()->nop();
1743 __ bind(low);
1744 __ li(FSR, (long)-1);
1745 __ b(done);
1746 __ delayed()->nop();
1748 __ bind(high);
1749 __ li(FSR, (long)1);
1750 __ b(done);
1751 __ delayed()->nop();
1753 __ bind(done);
1754 }
1756 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
1757 Label less, done;
1759 __ move(FSR, R0);
1761 if (is_float) {
1762 __ pop_ftos_to_esp();
1763 __ lwc1(FTF, at_sp());
1764 __ c_eq_s(FTF, FSF);
1765 __ bc1t(done);
1766 __ delayed()->daddi(SP, SP, 1 * wordSize);
1768 if (unordered_result<0)
1769 __ c_ult_s(FTF, FSF);
1770 else
1771 __ c_olt_s(FTF, FSF);
1772 } else {
1773 __ pop_dtos_to_esp();
1774 __ ldc1(FTF, at_sp());
1775 __ c_eq_d(FTF, FSF);
1776 __ bc1t(done);
1777 __ delayed()->daddi(SP, SP, 2 * wordSize);
1779 if (unordered_result<0)
1780 __ c_ult_d(FTF, FSF);
1781 else
1782 __ c_olt_d(FTF, FSF);
1783 }
1784 __ bc1t(less);
1785 __ delayed()->nop();
1786 __ move(FSR, 1);
1787 __ b(done);
1788 __ delayed()->nop();
1789 __ bind(less);
1790 __ move(FSR, -1);
1791 __ bind(done);
1792 }
1795 // used registers : T3, A7, Rnext
1796 // FSR : return bci, this is defined by the vm specification
1797 // T2 : MDO taken count
1798 // T3 : method
1799 // A7 : offset
1800 // Rnext : next bytecode, this is required by dispatch_base
1801 void TemplateTable::branch(bool is_jsr, bool is_wide) {
1802 __ get_method(T3);
1803 __ profile_taken_branch(A7, T2); // only C2 meaningful
1805 #ifndef CORE
1806 const ByteSize be_offset = MethodCounters::backedge_counter_offset()
1807 + InvocationCounter::counter_offset();
1808 const ByteSize inv_offset = MethodCounters::invocation_counter_offset()
1809 + InvocationCounter::counter_offset();
1810 const int method_offset = frame::interpreter_frame_method_offset * wordSize;
1811 #endif // CORE
1813 // Load up T4 with the branch displacement
1814 if (!is_wide) {
1815 __ get_2_byte_integer_at_bcp(A7, AT, 1);
1816 __ hswap(A7);
1817 } else {
1818 __ get_4_byte_integer_at_bcp(A7, AT, 1);
1819 __ swap(A7);
1820 }
1822 // Handle all the JSR stuff here, then exit.
1823 // It's much shorter and cleaner than intermingling with the
1824 // non-JSR normal-branch stuff occuring below.
1825 if (is_jsr) {
1826 // Pre-load the next target bytecode into Rnext
1827 __ dadd(AT, BCP, A7);
1828 __ lbu(Rnext, AT, 0);
1830 // compute return address as bci in FSR
1831 __ daddi(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
1832 __ ld(AT, T3, in_bytes(Method::const_offset()));
1833 __ dsub(FSR, FSR, AT);
1834 // Adjust the bcp in BCP by the displacement in A7
1835 __ dadd(BCP, BCP, A7);
1836 // jsr returns atos that is not an oop
1837 // __ dispatch_only_noverify(atos);
1838 // Push return address
1839 __ push_i(FSR);
1840 // jsr returns vtos
1841 __ dispatch_only_noverify(vtos);
1843 return;
1844 }
1846 // Normal (non-jsr) branch handling
1848 // Adjust the bcp in S0 by the displacement in T4
1849 __ dadd(BCP, BCP, A7);
1851 #ifdef CORE
1852 // Pre-load the next target bytecode into EBX
1853 __ lbu(Rnext, BCP, 0);
1854 // continue with the bytecode @ target
1855 __ dispatch_only(vtos);
1856 #else
1857 assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
1858 Label backedge_counter_overflow;
1859 Label profile_method;
1860 Label dispatch;
1861 if (UseLoopCounter) {
1862 // increment backedge counter for backward branches
1863 // eax: MDO
1864 // ebx: MDO bumped taken-count
1865 // T3: method
1866 // T4: target offset
1867 // BCP: target bcp
1868 // LVP: locals pointer
1869 __ bgtz(A7, dispatch); // check if forward or backward branch
1870 __ delayed()->nop();
1872 // check if MethodCounters exists
1873 Label has_counters;
1874 __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP
1875 __ bne(AT, R0, has_counters);
1876 __ nop();
1877 //__ push(T3);
1878 //__ push(A7);
1879 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
1880 T3);
1881 //__ pop(A7);
1882 //__ pop(T3);
1883 __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP
1884 __ beq(AT, R0, dispatch);
1885 __ nop();
1886 __ bind(has_counters);
1888 // increment back edge counter
1889 __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
1890 __ lw(T0, T1, in_bytes(be_offset));
1891 __ increment(T0, InvocationCounter::count_increment);
1892 __ sw(T0, T1, in_bytes(be_offset));
1894 // load invocation counter
1895 __ lw(T1, T1, in_bytes(inv_offset));
1896 // buffer bit added, mask no needed
1897 // by yjl 10/24/2005
1898 //__ move(AT, InvocationCounter::count_mask_value);
1899 //__ andr(T1, T1, AT);
1901 // dadd backedge counter & invocation counter
1902 __ dadd(T1, T1, T0);
1904 if (ProfileInterpreter) {
1905 // Test to see if we should create a method data oop
1906 //__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterProfileLimit)));
1907 //__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterProfileLimit)));
1908 // T1 : backedge counter & invocation counter
1909 __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
1910 __ lw(AT, AT, 0);
1911 __ slt(AT, T1, AT);
1912 __ bne(AT, R0, dispatch);
1913 __ delayed()->nop();
1915 // if no method data exists, go to profile method
1916 __ test_method_data_pointer(T1, profile_method);
1918 if (UseOnStackReplacement) {
1919 // check for overflow against ebx which is the MDO taken count
1920 //__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
1921 //__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
1922 __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
1923 __ lw(AT, AT, 0);
1924 // the value Rnext Is get from the beginning profile_taken_branch
1925 __ slt(AT, T2, AT);
1926 __ bne(AT, R0, dispatch);
1927 __ delayed()->nop();
1929 // When ProfileInterpreter is on, the backedge_count comes
1930 // from the methodDataOop, which value does not get reset on
1931 // the call to frequency_counter_overflow().
1932 // To avoid excessive calls to the overflow routine while
1933 // the method is being compiled, dadd a second test to make
1934 // sure the overflow function is called only once every
1935 // overflow_frequency.
1936 const int overflow_frequency = 1024;
1937 __ andi(AT, T2, overflow_frequency-1);
1938 __ beq(AT, R0, backedge_counter_overflow);
1939 __ delayed()->nop();
1940 }
1941 } else {
1942 if (UseOnStackReplacement) {
1943 // check for overflow against eax, which is the sum of the counters
1944 //__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
1945 //__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
1946 __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
1947 __ lw(AT, AT, 0);
1948 __ slt(AT, T1, AT);
1949 __ beq(AT, R0, backedge_counter_overflow);
1950 __ delayed()->nop();
1951 }
1952 }
1953 __ bind(dispatch);
1954 }
1956 // Pre-load the next target bytecode into Rnext
1957 __ lbu(Rnext, BCP, 0);
1959 // continue with the bytecode @ target
1960 // FSR: return bci for jsr's, unused otherwise
1961 // Rnext: target bytecode
1962 // BCP: target bcp
1963 __ dispatch_only(vtos);
1965 if (UseLoopCounter) {
1966 if (ProfileInterpreter) {
1967 // Out-of-line code to allocate method data oop.
1968 __ bind(profile_method);
1969 __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
1970 __ lbu(Rnext, BCP, 0);
1972 __ set_method_data_pointer_for_bcp();
1973 /*
1974 __ ld(T3, FP, method_offset);
1975 __ lw(T3, T3, in_bytes(Method::method_data_offset()));
1976 __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
1977 __ test_method_data_pointer(T3, dispatch);
1978 // offset non-null mdp by MDO::data_offset() + IR::profile_method()
1979 __ daddi(T3, T3, in_bytes(MethodData::data_offset()));
1980 __ dadd(T3, T3, T1);
1981 __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
1982 */
1983 __ b(dispatch);
1984 __ delayed()->nop();
1985 }
1987 if (UseOnStackReplacement) {
1988 // invocation counter overflow
1989 __ bind(backedge_counter_overflow);
1990 __ sub(A7, BCP, A7); // branch bcp
1991 call_VM(NOREG, CAST_FROM_FN_PTR(address,
1992 InterpreterRuntime::frequency_counter_overflow), A7);
1993 __ lbu(Rnext, BCP, 0);
1995 // V0: osr nmethod (osr ok) or NULL (osr not possible)
1996 // V1: osr adapter frame return address
1997 // Rnext: target bytecode
1998 // LVP: locals pointer
1999 // BCP: bcp
2000 __ beq(V0, R0, dispatch);
2001 __ delayed()->nop();
2002 // nmethod may have been invalidated (VM may block upon call_VM return)
2003 __ lw(T3, V0, nmethod::entry_bci_offset());
2004 __ move(AT, InvalidOSREntryBci);
2005 __ beq(AT, T3, dispatch);
2006 __ delayed()->nop();
2007 // We need to prepare to execute the OSR method. First we must
2008 // migrate the locals and monitors off of the stack.
2009 //eax V0: osr nmethod (osr ok) or NULL (osr not possible)
2010 //ebx V1: osr adapter frame return address
2011 //edx Rnext: target bytecode
2012 //edi LVP: locals pointer
2013 //esi BCP: bcp
2014 __ move(BCP, V0);
2015 // const Register thread = ecx;
2016 const Register thread = TREG;
2017 #ifndef OPT_THREAD
2018 __ get_thread(thread);
2019 #endif
2020 call_VM(noreg, CAST_FROM_FN_PTR(address,
2021 SharedRuntime::OSR_migration_begin));
2022 // eax is OSR buffer, move it to expected parameter location
2023 //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
2024 __ move(T0, V0);
2026 // pop the interpreter frame
2027 // __ movl(edx, Address(ebp, frame::interpreter_frame_sender_sp_offset
2028 // * wordSize)); // get sender sp
2029 __ ld(A7, Address(FP,
2030 frame::interpreter_frame_sender_sp_offset * wordSize));
2031 //FIXME, shall we keep the return address on the stack?
2032 __ leave(); // remove frame anchor
2033 // __ popl(edi); // get return address
2034 //__ daddi(SP, SP, wordSize); // get return address
2035 // __ pop(LVP);
2036 __ move(LVP, RA);
2037 // __ movl(esp, edx); // set sp to sender sp
2038 __ move(SP, A7);
2040 Label skip;
2041 Label chkint;
2043 // The interpreter frame we have removed may be returning to
2044 // either the callstub or the interpreter. Since we will
2045 // now be returning from a compiled (OSR) nmethod we must
2046 // adjust the return to the return were it can handler compiled
2047 // results and clean the fpu stack. This is very similar to
2048 // what a i2c adapter must do.
2050 // Are we returning to the call stub?
2051 #if 0
2052 // __ cmpl(edi, (int)StubRoutines::_call_stub_return_address);
2053 __ daddi(AT, LVP, -(int)StubRoutines::_call_stub_return_address);
2054 // __ jcc(Assembler::notEqual, chkint);
2055 __ bne(AT, R0, chkint);
2056 __ delayed()->nop();
2057 // yes adjust to the specialized call stub return.
2058 // assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL,
2059 // "must be set");
2060 assert(StubRoutines::gs2::get_call_stub_compiled_return() != NULL,
2061 "must be set");
2062 // __ movl(edi, (intptr_t) StubRoutines::i486::get_call_stub_compiled_return());
2063 __ move(LVP, (intptr_t) StubRoutines::gs2::get_call_stub_compiled_return());
2064 // __ jmp(skip);
2065 __ b(skip);
2066 __ delayed()->nop();
2067 __ bind(chkint);
2069 // Are we returning to the interpreter? Look for sentinel
2071 //__ cmpl(Address(edi, -8), Interpreter::return_sentinel);
2072 __ lw(AT, LVP , -8);
2073 __ daddi(AT, AT, -Interpreter::return_sentinel);
2074 //__ jcc(Assembler::notEqual, skip);
2075 __ bne(AT, R0, skip);
2076 __ delayed()->nop();
2077 // Adjust to compiled return back to interpreter
2079 // __ movl(edi, Address(edi, -4));
2080 __ lw(LVP, LVP, -4);
2082 __ bind(skip);
2083 #endif
2084 // Align stack pointer for compiled code (note that caller is
2085 // responsible for undoing this fixup by remembering the old SP
2086 // in an ebp-relative location)
2087 // __ andl(esp, -(StackAlignmentInBytes));
2088 __ move(AT, -(StackAlignmentInBytes));
2089 __ andr(SP , SP , AT);
2090 // push the (possibly adjusted) return address
2091 // __ pushl(edi);
2092 //__ push(LVP);
2093 // __ move(RA, LVP);
2094 // and begin the OSR nmethod
2095 // __ jmp(Address(esi, nmethod::osr_entry_point_offset()));
2096 //refer to osr_entry in c1_LIRAssembler_mips.cpp
2097 __ ld(AT, BCP, nmethod::osr_entry_point_offset());
2098 __ jr(AT);
2099 __ delayed()->nop();
2100 }
2101 }
2102 #endif // not CORE
2103 }
2105 void TemplateTable::if_0cmp(Condition cc) {
2106 transition(itos, vtos);
2107 // assume branch is more often taken than not (loops use backward branches)
2108 Label not_taken;
2109 switch(cc) {
2110 case not_equal:
2111 __ beq(FSR, R0, not_taken);
2112 break;
2113 case equal:
2114 __ bne(FSR, R0, not_taken);
2115 break;
2116 case less:
2117 __ bgez(FSR, not_taken);
2118 break;
2119 case less_equal:
2120 __ bgtz(FSR, not_taken);
2121 break;
2122 case greater:
2123 __ blez(FSR, not_taken);
2124 break;
2125 case greater_equal:
2126 __ bltz(FSR, not_taken);
2127 break;
2128 }
2129 __ delayed()->nop();
2131 branch(false, false);
2133 __ bind(not_taken);
2134 __ profile_not_taken_branch(FSR);
2135 }
2138 void TemplateTable::if_icmp(Condition cc) {
2139 transition(itos, vtos);
2140 // assume branch is more often taken than not (loops use backward branches)
2141 Label not_taken;
2143 __ pop_i(SSR);
2144 switch(cc) {
2145 case not_equal:
2146 __ beq(SSR, FSR, not_taken);
2147 break;
2148 case equal:
2149 __ bne(SSR, FSR, not_taken);
2150 break;
2151 case less:
2152 __ slt(AT, SSR, FSR);
2153 __ beq(AT, R0, not_taken);
2154 break;
2155 case less_equal:
2156 __ slt(AT, FSR, SSR);
2157 __ bne(AT, R0, not_taken);
2158 break;
2159 case greater:
2160 __ slt(AT, FSR, SSR);
2161 __ beq(AT, R0, not_taken);
2162 break;
2163 case greater_equal:
2164 __ slt(AT, SSR, FSR);
2165 __ bne(AT, R0, not_taken);
2166 break;
2167 }
2168 __ delayed()->nop();
2170 branch(false, false);
2172 __ bind(not_taken);
2173 __ profile_not_taken_branch(FSR);
2174 }
2177 void TemplateTable::if_nullcmp(Condition cc) {
2178 transition(atos, vtos);
2179 // assume branch is more often taken than not (loops use backward branches)
2180 Label not_taken;
2181 switch(cc) {
2182 case not_equal:
2183 __ beq(FSR, R0, not_taken);
2184 break;
2185 case equal:
2186 __ bne(FSR, R0, not_taken);
2187 break;
2188 default:
2189 ShouldNotReachHere();
2190 }
2191 __ delayed()->nop();
2193 branch(false, false);
2195 __ bind(not_taken);
2196 __ profile_not_taken_branch(FSR);
2197 }
2200 void TemplateTable::if_acmp(Condition cc) {
2201 transition(atos, vtos);
2202 // assume branch is more often taken than not (loops use backward branches)
2203 Label not_taken;
2204 // __ lw(SSR, SP, 0);
2205 __ pop_ptr(SSR);
2206 switch(cc) {
2207 case not_equal:
2208 __ beq(SSR, FSR, not_taken);
2209 break;
2210 case equal:
2211 __ bne(SSR, FSR, not_taken);
2212 break;
2213 default:
2214 ShouldNotReachHere();
2215 }
2216 // __ delayed()->daddi(SP, SP, 4);
2217 __ delayed()->nop();
2219 branch(false, false);
2221 __ bind(not_taken);
2222 __ profile_not_taken_branch(FSR);
2223 }
2225 // used registers : T1, T2, T3
2226 // T1 : method
2227 // T2 : returb bci
2228 void TemplateTable::ret() {
2229 transition(vtos, vtos);
2231 locals_index(T2);
2232 __ ld(T2, T2, 0);
2233 __ profile_ret(T2, T3);
2235 __ get_method(T1);
2236 __ ld(BCP, T1, in_bytes(Method::const_offset()));
2237 __ dadd(BCP, BCP, T2);
2238 __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
2240 __ dispatch_next(vtos);
2241 }
2243 // used registers : T1, T2, T3
2244 // T1 : method
2245 // T2 : returb bci
2246 void TemplateTable::wide_ret() {
2247 transition(vtos, vtos);
2249 locals_index_wide(T2);
2250 __ ld(T2, T2, 0); // get return bci, compute return bcp
2251 __ profile_ret(T2, T3);
2253 __ get_method(T1);
2254 __ ld(BCP, T1, in_bytes(Method::const_offset()));
2255 __ dadd(BCP, BCP, T2);
2256 __ daddi(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
2258 __ dispatch_next(vtos);
2259 }
2261 // used register T2, T3, A7, Rnext
2262 // T2 : bytecode pointer
2263 // T3 : low
2264 // A7 : high
2265 // Rnext : dest bytecode, required by dispatch_base
2266 void TemplateTable::tableswitch() {
2267 Label default_case, continue_execution;
2268 transition(itos, vtos);
2270 // align BCP
2271 __ daddi(T2, BCP, BytesPerInt);
2272 __ li(AT, -BytesPerInt);
2273 __ andr(T2, T2, AT);
2275 // load lo & hi
2276 __ lw(T3, T2, 1 * BytesPerInt);
2277 __ swap(T3);
2278 __ lw(A7, T2, 2 * BytesPerInt);
2279 __ swap(A7);
2281 // check against lo & hi
2282 __ slt(AT, FSR, T3);
2283 __ bne(AT, R0, default_case);
2284 __ delayed()->nop();
2286 __ slt(AT, A7, FSR);
2287 __ bne(AT, R0, default_case);
2288 __ delayed()->nop();
2290 // lookup dispatch offset, in A7 big endian
2291 __ dsub(FSR, FSR, T3);
2292 __ dsll(AT, FSR, Address::times_4);
2293 __ dadd(AT, T2, AT);
2294 __ lw(A7, AT, 3 * BytesPerInt);
2295 __ profile_switch_case(FSR, T9, T3);
2297 __ bind(continue_execution);
2298 __ swap(A7);
2299 __ dadd(BCP, BCP, A7);
2300 __ lbu(Rnext, BCP, 0);
2301 __ dispatch_only(vtos);
2303 // handle default
2304 __ bind(default_case);
2305 __ profile_switch_default(FSR);
2306 __ lw(A7, T2, 0);
2307 __ b(continue_execution);
2308 __ delayed()->nop();
2309 }
2311 void TemplateTable::lookupswitch() {
2312 transition(itos, itos);
2313 __ stop("lookupswitch bytecode should have been rewritten");
2314 }
2316 // used registers : T2, T3, A7, Rnext
2317 // T2 : bytecode pointer
2318 // T3 : pair index
2319 // A7 : offset
2320 // Rnext : dest bytecode
2321 // the data after the opcode is the same as lookupswitch
2322 // see Rewriter::rewrite_method for more information
2323 void TemplateTable::fast_linearswitch() {
2324 transition(itos, vtos);
2325 Label loop_entry, loop, found, continue_execution;
2327 // swap eax so we can avoid swapping the table entries
2328 __ swap(FSR);
2330 // align BCP
2331 __ daddi(T2, BCP, BytesPerInt);
2332 __ li(AT, -BytesPerInt);
2333 __ andr(T2, T2, AT);
2335 // set counter
2336 __ lw(T3, T2, BytesPerInt);
2337 __ swap(T3);
2338 __ b(loop_entry);
2339 __ delayed()->nop();
2341 // table search
2342 __ bind(loop);
2343 // get the entry value
2344 __ dsll(AT, T3, Address::times_8);
2345 __ dadd(AT, T2, AT);
2346 __ lw(AT, AT, 2 * BytesPerInt);
2348 // found?
2349 __ beq(FSR, AT, found);
2350 __ delayed()->nop();
2352 __ bind(loop_entry);
2353 __ bgtz(T3, loop);
2354 __ delayed()->daddiu(T3, T3, -1);
2356 // default case
2357 __ profile_switch_default(FSR);
2358 __ lw(A7, T2, 0);
2359 __ b(continue_execution);
2360 __ delayed()->nop();
2362 // entry found -> get offset
2363 __ bind(found);
2364 __ dsll(AT, T3, Address::times_8);
2365 __ dadd(AT, T2, AT);
2366 __ lw(A7, AT, 3 * BytesPerInt);
2367 __ profile_switch_case(T3, FSR, T2);
2369 // continue execution
2370 __ bind(continue_execution);
2371 __ swap(A7);
2372 __ dadd(BCP, BCP, A7);
2373 __ lbu(Rnext, BCP, 0);
2374 __ dispatch_only(vtos);
2375 }
2377 // used registers : T0, T1, T2, T3, A7, Rnext
2378 // T2 : pairs address(array)
2379 // Rnext : dest bytecode
2380 // the data after the opcode is the same as lookupswitch
2381 // see Rewriter::rewrite_method for more information
2382 void TemplateTable::fast_binaryswitch() {
2383 transition(itos, vtos);
2384 // Implementation using the following core algorithm:
2385 //
2386 // int binary_search(int key, LookupswitchPair* array, int n) {
2387 // // Binary search according to "Methodik des Programmierens" by
2388 // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
2389 // int i = 0;
2390 // int j = n;
2391 // while (i+1 < j) {
2392 // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
2393 // // with Q: for all i: 0 <= i < n: key < a[i]
2394 // // where a stands for the array and assuming that the (inexisting)
2395 // // element a[n] is infinitely big.
2396 // int h = (i + j) >> 1;
2397 // // i < h < j
2398 // if (key < array[h].fast_match()) {
2399 // j = h;
2400 // } else {
2401 // i = h;
2402 // }
2403 // }
2404 // // R: a[i] <= key < a[i+1] or Q
2405 // // (i.e., if key is within array, i is the correct index)
2406 // return i;
2407 // }
2409 // register allocation
2410 const Register array = T2;
2411 const Register i = T3, j = A7;
2412 const Register h = T1;
2413 const Register temp = T0;
2414 const Register key = FSR;
2416 // setup array
2417 __ daddi(array, BCP, 3*BytesPerInt);
2418 __ li(AT, -BytesPerInt);
2419 __ andr(array, array, AT);
2421 // initialize i & j
2422 __ move(i, R0);
2423 __ lw(j, array, - 1 * BytesPerInt);
2424 // Convert j into native byteordering
2425 __ swap(j);
2427 // and start
2428 Label entry;
2429 __ b(entry);
2430 __ delayed()->nop();
2432 // binary search loop
2433 {
2434 Label loop;
2435 __ bind(loop);
2436 // int h = (i + j) >> 1;
2437 __ dadd(h, i, j);
2438 __ dsrl(h, h, 1);
2439 // if (key < array[h].fast_match()) {
2440 // j = h;
2441 // } else {
2442 // i = h;
2443 // }
2444 // Convert array[h].match to native byte-ordering before compare
2445 __ dsll(AT, h, Address::times_8);
2446 __ dadd(AT, array, AT);
2447 __ lw(temp, AT, 0 * BytesPerInt);
2448 __ swap(temp);
2450 {
2451 Label set_i, end_of_if;
2452 __ slt(AT, key, temp);
2453 __ beq(AT, R0, set_i);
2454 __ delayed()->nop();
2456 __ b(end_of_if);
2457 __ delayed(); __ move(j, h);
2459 __ bind(set_i);
2460 __ move(i, h);
2462 __ bind(end_of_if);
2463 }
2464 // while (i+1 < j)
2465 __ bind(entry);
2466 __ daddi(h, i, 1);
2467 __ slt(AT, h, j);
2468 __ bne(AT, R0, loop);
2469 __ delayed()->nop();
2470 }
2472 // end of binary search, result index is i (must check again!)
2473 Label default_case;
2474 // Convert array[i].match to native byte-ordering before compare
2475 __ dsll(AT, i, Address::times_8);
2476 __ dadd(AT, array, AT);
2477 __ lw(temp, AT, 0 * BytesPerInt);
2478 __ swap(temp);
2479 __ bne(key, temp, default_case);
2480 __ delayed()->nop();
2482 // entry found -> j = offset
2483 __ dsll(AT, i, Address::times_8);
2484 __ dadd(AT, array, AT);
2485 __ lw(j, AT, 1 * BytesPerInt);
2486 __ profile_switch_case(i, key, array);
2487 __ swap(j);
2489 __ dadd(BCP, BCP, j);
2490 __ lbu(Rnext, BCP, 0);
2491 __ dispatch_only(vtos);
2493 // default case -> j = default offset
2494 __ bind(default_case);
2495 __ profile_switch_default(i);
2496 __ lw(j, array, - 2 * BytesPerInt);
2497 __ swap(j);
2498 __ dadd(BCP, BCP, j);
2499 __ lbu(Rnext, BCP, 0);
2500 __ dispatch_only(vtos);
2501 }
2503 void TemplateTable::_return(TosState state) {
2504 transition(state, state);
2505 assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
2506 if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2507 assert(state == vtos, "only valid state");
2508 __ ld(T1, aaddress(0));
2509 //__ ld(LVP, T1, oopDesc::klass_offset_in_bytes());
2510 __ load_klass(LVP, T1);
2511 __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
2512 __ move(AT, JVM_ACC_HAS_FINALIZER);
2513 __ andr(AT, AT, LVP);//by_css
2514 Label skip_register_finalizer;
2515 __ beq(AT, R0, skip_register_finalizer);
2516 __ delayed()->nop();
2517 __ call_VM(noreg, CAST_FROM_FN_PTR(address,
2518 InterpreterRuntime::register_finalizer), T1);
2519 __ bind(skip_register_finalizer);
2520 }
2521 __ remove_activation(state, T9);
2522 __ sync();
2524 __ jr(T9);
2525 __ delayed()->nop();
2526 }
2528 // ----------------------------------------------------------------------------
2529 // Volatile variables demand their effects be made known to all CPU's
2530 // in order. Store buffers on most chips allow reads & writes to
2531 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2532 // without some kind of memory barrier (i.e., it's not sufficient that
2533 // the interpreter does not reorder volatile references, the hardware
2534 // also must not reorder them).
2535 //
2536 // According to the new Java Memory Model (JMM):
2537 // (1) All volatiles are serialized wrt to each other. ALSO reads &
2538 // writes act as aquire & release, so:
2539 // (2) A read cannot let unrelated NON-volatile memory refs that
2540 // happen after the read float up to before the read. It's OK for
2541 // non-volatile memory refs that happen before the volatile read to
2542 // float down below it.
2543 // (3) Similar a volatile write cannot let unrelated NON-volatile
2544 // memory refs that happen BEFORE the write float down to after the
2545 // write. It's OK for non-volatile memory refs that happen after the
2546 // volatile write to float up before it.
2547 //
2548 // We only put in barriers around volatile refs (they are expensive),
2549 // not _between_ memory refs (that would require us to track the
2550 // flavor of the previous memory refs). Requirements (2) and (3)
2551 // require some barriers before volatile stores and after volatile
2552 // loads. These nearly cover requirement (1) but miss the
2553 // volatile-store-volatile-load case. This final case is placed after
2554 // volatile-stores although it could just as well go before
2555 // volatile-loads.
2556 //void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
2557 // order_constraint) {
2558 void TemplateTable::volatile_barrier( ) {
2559 // Helper function to insert a is-volatile test and memory barrier
2560 //if (os::is_MP()) { // Not needed on single CPU
2561 // __ membar(order_constraint);
2562 //}
2563 if( !os::is_MP() ) return; // Not needed on single CPU
2564 __ sync();
2565 }
2567 // we dont shift left 2 bits in get_cache_and_index_at_bcp
2568 // for we always need shift the index we use it. the ConstantPoolCacheEntry
2569 // is 16-byte long, index is the index in
2570 // ConstantPoolCache, so cache + base_offset() + index * 16 is
2571 // the corresponding ConstantPoolCacheEntry
2572 // used registers : T2
2573 // NOTE : the returned index need also shift left 4 to get the address!
2574 void TemplateTable::resolve_cache_and_index(int byte_no,
2575 Register Rcache,
2576 Register index,
2577 size_t index_size) {
2578 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2579 const Register temp = A1;
2580 assert_different_registers(Rcache, index);
2581 const int shift_count = (1 + byte_no)*BitsPerByte;
2582 Label resolved;
2583 __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
2584 // is resolved?
2585 int i = (int)bytecode();
2586 __ addi(temp, temp, -i);
2587 __ beq(temp, R0, resolved);
2588 __ delayed()->nop();
2589 // resolve first time through
2590 address entry;
2591 switch (bytecode()) {
2592 case Bytecodes::_getstatic : // fall through
2593 case Bytecodes::_putstatic : // fall through
2594 case Bytecodes::_getfield : // fall through
2595 case Bytecodes::_putfield :
2596 entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
2597 break;
2598 case Bytecodes::_invokevirtual : // fall through
2599 case Bytecodes::_invokespecial : // fall through
2600 case Bytecodes::_invokestatic : // fall through
2601 case Bytecodes::_invokeinterface:
2602 entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
2603 break;
2604 case Bytecodes::_invokehandle:
2605 entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
2606 break;
2607 case Bytecodes::_invokedynamic:
2608 entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
2609 break;
2610 default :
2611 fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
2612 }
2614 __ move(temp, i);
2615 __ call_VM(NOREG, entry, temp);
2617 // Update registers with resolved info
2618 __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
2619 __ bind(resolved);
2620 }
2622 // The Rcache and index registers must be set before call
2623 void TemplateTable::load_field_cp_cache_entry(Register obj,
2624 Register cache,
2625 Register index,
2626 Register off,
2627 Register flags,
2628 bool is_static = false) {
2629 assert_different_registers(cache, index, flags, off);
2630 ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2631 // Field offset
2632 __ dsll(AT, index, Address::times_ptr);
2633 __ dadd(AT, cache, AT);
2634 __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
2635 // Flags
2636 __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
2638 // klass overwrite register
2639 if (is_static) {
2640 __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
2641 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2642 __ ld(obj, Address(obj, mirror_offset));
2644 __ verify_oop(obj);
2645 }
2646 }
2648 // get the method, itable_index and flags of the current invoke
2649 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2650 Register method,
2651 Register itable_index,
2652 Register flags,
2653 bool is_invokevirtual,
2654 bool is_invokevfinal, /*unused*/
2655 bool is_invokedynamic) {
2656 // setup registers
2657 const Register cache = T3;
2658 const Register index = T1;
2659 assert_different_registers(method, flags);
2660 assert_different_registers(method, cache, index);
2661 assert_different_registers(itable_index, flags);
2662 assert_different_registers(itable_index, cache, index);
2663 assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
2664 // determine constant pool cache field offsets
2665 const int method_offset = in_bytes(
2666 ConstantPoolCache::base_offset() +
2667 ((byte_no == f2_byte)
2668 ? ConstantPoolCacheEntry::f2_offset()
2669 : ConstantPoolCacheEntry::f1_offset()
2670 )
2671 );
2672 const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
2673 ConstantPoolCacheEntry::flags_offset());
2674 // access constant pool cache fields
2675 const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
2676 ConstantPoolCacheEntry::f2_offset());
2677 size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
2678 resolve_cache_and_index(byte_no, cache, index, index_size);
2680 //assert(wordSize == 8, "adjust code below");
2681 // note we shift 4 not 2, for we get is the true inde
2682 // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
2683 __ dsll(AT, index, Address::times_ptr);
2684 __ dadd(AT, cache, AT);
2685 __ ld(method, AT, method_offset);
2688 if (itable_index != NOREG) {
2689 __ ld(itable_index, AT, index_offset);
2690 }
2691 __ ld(flags, AT, flags_offset);
2692 }
2695 // The registers cache and index expected to be set before call.
2696 // Correct values of the cache and index registers are preserved.
2697 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
2698 bool is_static, bool has_tos) {
2699 // do the JVMTI work here to avoid disturbing the register state below
2700 // We use c_rarg registers here because we want to use the register used in
2701 // the call to the VM
2702 if (JvmtiExport::can_post_field_access()) {
2703 // Check to see if a field access watch has been set before we take
2704 // the time to call into the VM.
2705 Label L1;
2706 assert_different_registers(cache, index, FSR);
2707 __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
2708 __ lw(FSR, AT, 0);
2709 __ beq(FSR, R0, L1);
2710 __ delayed()->nop();
2712 // We rely on the bytecode being resolved and the cpCache entry filled in.
2713 // cache entry pointer
2714 //__ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
2715 __ daddi(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
2716 __ shl(index, 4);
2717 __ dadd(cache, cache, index);
2718 if (is_static) {
2719 __ move(FSR, R0);
2720 } else {
2721 __ lw(FSR, SP, 0);
2722 __ verify_oop(FSR);
2723 }
2724 // FSR: object pointer or NULL
2725 // cache: cache entry pointer
2726 __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
2727 InterpreterRuntime::post_field_access), FSR, cache);
2728 __ get_cache_and_index_at_bcp(cache, index, 1);
2729 __ bind(L1);
2730 }
2731 }
2733 void TemplateTable::pop_and_check_object(Register r) {
2734 __ pop_ptr(r);
2735 __ null_check(r); // for field access must check obj.
2736 __ verify_oop(r);
2737 }
2739 // used registers : T1, T2, T3, T1
2740 // T1 : flags
2741 // T2 : off
2742 // T3 : obj
2743 // T1 : field address
2744 // The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
2745 // following mapping to the TosState states:
2746 // btos: 0
2747 // ctos: 1
2748 // stos: 2
2749 // itos: 3
2750 // ltos: 4
2751 // ftos: 5
2752 // dtos: 6
2753 // atos: 7
2754 // vtos: 8
2755 // see ConstantPoolCacheEntry::set_field for more info
2756 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
2757 transition(vtos, vtos);
2759 const Register cache = T3;
2760 const Register index = T0;
2762 const Register obj = T3;
2763 const Register off = T2;
2764 const Register flags = T1;
2765 resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
2766 //jvmti_post_field_access(cache, index, is_static, false);
2768 load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2770 if (!is_static) pop_and_check_object(obj);
2771 __ dadd(index, obj, off);
2774 Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
2776 assert(btos == 0, "change code, btos != 0");
2777 __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
2778 __ andi(flags, flags, 0xf);
2779 __ bne(flags, R0, notByte);
2780 __ delayed()->nop();
2782 // btos
2783 __ sync();
2784 __ lb(FSR, index, 0);
2785 __ sd(FSR, SP, - wordSize);
2787 // Rewrite bytecode to be faster
2788 if (!is_static) {
2789 patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
2790 }
2791 __ b(Done);
2792 __ delayed()->daddi(SP, SP, - wordSize);
2794 __ bind(notByte);
2795 __ move(AT, itos);
2796 __ bne(flags, AT, notInt);
2797 __ delayed()->nop();
2799 // itos
2800 __ sync();
2801 __ lw(FSR, index, 0);
2802 __ sd(FSR, SP, - wordSize);
2804 // Rewrite bytecode to be faster
2805 if (!is_static) {
2806 // patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
2807 patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
2808 }
2809 __ b(Done);
2810 __ delayed()->daddi(SP, SP, - wordSize);
2812 __ bind(notInt);
2813 __ move(AT, atos);
2814 __ bne(flags, AT, notObj);
2815 __ delayed()->nop();
2817 // atos
2818 //add for compressedoops
2819 __ sync();
2820 __ load_heap_oop(FSR, Address(index, 0));
2821 __ sd(FSR, SP, - wordSize);
2823 if (!is_static) {
2824 //patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
2825 patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
2826 }
2827 __ b(Done);
2828 __ delayed()->daddi(SP, SP, - wordSize);
2830 __ bind(notObj);
2831 __ move(AT, ctos);
2832 __ bne(flags, AT, notChar);
2833 __ delayed()->nop();
2835 // ctos
2836 __ sync();
2837 __ lhu(FSR, index, 0);
2838 __ sd(FSR, SP, - wordSize);
2840 if (!is_static) {
2841 patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
2842 }
2843 __ b(Done);
2844 __ delayed()->daddi(SP, SP, - wordSize);
2846 __ bind(notChar);
2847 __ move(AT, stos);
2848 __ bne(flags, AT, notShort);
2849 __ delayed()->nop();
2851 // stos
2852 __ sync();
2853 __ lh(FSR, index, 0);
2854 __ sd(FSR, SP, - wordSize);
2856 if (!is_static) {
2857 // patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
2858 patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
2859 }
2860 __ b(Done);
2861 __ delayed()->daddi(SP, SP, - wordSize);
2863 __ bind(notShort);
2864 __ move(AT, ltos);
2865 __ bne(flags, AT, notLong);
2866 __ delayed()->nop();
2868 // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
2869 // ltos
2870 __ sync();
2871 __ ld(FSR, index, 0 * wordSize);
2872 __ sd(FSR, SP, -2 * wordSize);
2873 __ sd(R0, SP, -1 * wordSize);
2875 // Don't rewrite to _fast_lgetfield for potential volatile case.
2876 __ b(Done);
2877 __ delayed()->daddi(SP, SP, - 2 * wordSize);
2879 __ bind(notLong);
2880 __ move(AT, ftos);
2881 __ bne(flags, AT, notFloat);
2882 __ delayed()->nop();
2884 // ftos
2885 __ sync();
2886 __ lwc1(FSF, index, 0);
2887 __ sdc1(FSF, SP, - wordSize);
2889 if (!is_static) {
2890 patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
2891 }
2892 __ b(Done);
2893 __ delayed()->daddi(SP, SP, - wordSize);
2895 __ bind(notFloat);
2896 __ move(AT, dtos);
2897 __ bne(flags, AT, notDouble);
2898 __ delayed()->nop();
2900 // dtos
2901 __ sync();
2902 __ ldc1(FSF, index, 0 * wordSize);
2903 __ sdc1(FSF, SP, - 2 * wordSize);
2904 __ sd(R0, SP, - 1 * wordSize);
2906 if (!is_static) {
2907 patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
2908 }
2909 __ b(Done);
2910 __ delayed()->daddi(SP, SP, - 2 * wordSize);
2912 __ bind(notDouble);
2914 __ stop("Bad state");
2916 __ bind(Done);
2917 }
2919 void TemplateTable::getfield(int byte_no) {
2920 getfield_or_static(byte_no, false);
2921 }
2923 void TemplateTable::getstatic(int byte_no) {
2924 getfield_or_static(byte_no, true);
2925 }
2926 /*
2927 // used registers : T1, T2, T3, T1
2928 // T1 : cache & cp entry
2929 // T2 : obj
2930 // T3 : flags & value pointer
2931 // T1 : index
2932 // see ConstantPoolCacheEntry::set_field for more info
2933 void TemplateTable::jvmti_post_field_mod(int byte_no, bool is_static) {
2934 */
2936 // The registers cache and index expected to be set before call.
2937 // The function may destroy various registers, just not the cache and index registers.
2938 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2939 ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2941 if (JvmtiExport::can_post_field_modification()) {
2942 // Check to see if a field modification watch has been set before we take
2943 // the time to call into the VM.
2944 Label L1;
2945 assert_different_registers(cache, index, AT);
2947 //__ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_modification_count_addr()));
2948 //__ lw(FSR, AT, Assembler::split_low((int)JvmtiExport::get_field_modification_count_addr()));
2949 __ li(AT, JvmtiExport::get_field_modification_count_addr());
2950 __ lw(FSR, AT, 0);
2951 __ beq(FSR, R0, L1);
2952 __ delayed()->nop();
2954 /* // We rely on the bytecode being resolved and the cpCache entry filled in.
2955 resolve_cache_and_index(byte_no, T1, T1);
2956 */
2957 // The cache and index registers have been already set.
2958 // This allows to eliminate this call but the cache and index
2959 // registers have to be correspondingly used after this line.
2960 // __ get_cache_and_index_at_bcp(eax, edx, 1);
2961 __ get_cache_and_index_at_bcp(T1, T9, 1);
2963 if (is_static) {
2964 __ move(T2, R0);
2965 } else {
2966 // Life is harder. The stack holds the value on top,
2967 // followed by the object.
2968 // We don't know the size of the value, though;
2969 // it could be one or two words
2970 // depending on its type. As a result, we must find
2971 // the type to determine where the object is.
2972 Label two_word, valsize_known;
2973 __ dsll(AT, T1, 4);
2974 __ dadd(AT, T1, AT);
2975 __ lw(T3, AT, in_bytes(cp_base_offset
2976 + ConstantPoolCacheEntry::flags_offset()));
2977 __ move(T2, SP);
2978 __ shr(T3, ConstantPoolCacheEntry::tos_state_shift);
2980 // Make sure we don't need to mask ecx for tos_state_shift
2981 // after the above shift
2982 ConstantPoolCacheEntry::verify_tos_state_shift();
2983 __ move(AT, ltos);
2984 __ beq(T3, AT, two_word);
2985 __ delayed()->nop();
2986 __ move(AT, dtos);
2987 __ beq(T3, AT, two_word);
2988 __ delayed()->nop();
2989 __ b(valsize_known);
2990 //__ delayed()->daddi(T2, T2, wordSize*1);
2991 __ delayed()->daddi(T2, T2,Interpreter::expr_offset_in_bytes(1) );
2993 __ bind(two_word);
2994 // __ daddi(T2, T2, wordSize*2);
2995 __ daddi(T2, T2,Interpreter::expr_offset_in_bytes(2));
2997 __ bind(valsize_known);
2998 // setup object pointer
2999 __ lw(T2, T2, 0*wordSize);
3000 }
3001 // cache entry pointer
3002 __ daddi(T1, T1, in_bytes(cp_base_offset));
3003 __ shl(T1, 4);
3004 __ daddu(T1, T1, T1);
3005 // object (tos)
3006 __ move(T3, SP);
3007 // T2: object pointer set up above (NULL if static)
3008 // T1: cache entry pointer
3009 // T3: jvalue object on the stack
3010 __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
3011 InterpreterRuntime::post_field_modification), T2, T1, T3);
3012 __ get_cache_and_index_at_bcp(cache, index, 1);
3013 __ bind(L1);
3014 }
3015 }
3017 // used registers : T0, T1, T2, T3, T8
3018 // T1 : flags
3019 // T2 : off
3020 // T3 : obj
3021 // T8 : volatile bit
3022 // see ConstantPoolCacheEntry::set_field for more info
3023 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
3024 transition(vtos, vtos);
3026 const Register cache = T3;
3027 const Register index = T0;
3028 const Register obj = T3;
3029 const Register off = T2;
3030 const Register flags = T1;
3031 const Register bc = T3;
3033 resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
3034 //TODO: LEE
3035 //jvmti_post_field_mod(cache, index, is_static);
3036 load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
3037 // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
3038 // volatile_barrier( );
3040 Label notVolatile, Done;
3041 __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
3042 __ andr(T8, flags, AT);
3044 Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
3046 assert(btos == 0, "change code, btos != 0");
3047 // btos
3048 __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
3049 __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
3050 __ bne(flags, R0, notByte);
3051 __ delayed()->nop();
3053 __ pop(btos);
3054 if (!is_static) {
3055 pop_and_check_object(obj);
3056 }
3057 __ dadd(AT, obj, off);
3058 __ sb(FSR, AT, 0);
3060 if (!is_static) {
3061 patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
3062 }
3063 __ b(Done);
3064 __ delayed()->nop();
3066 __ bind(notByte);
3067 // itos
3068 __ move(AT, itos);
3069 __ bne(flags, AT, notInt);
3070 __ delayed()->nop();
3072 __ pop(itos);
3073 if (!is_static) {
3074 pop_and_check_object(obj);
3075 }
3076 __ dadd(AT, obj, off);
3077 __ sw(FSR, AT, 0);
3079 if (!is_static) {
3080 patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
3081 }
3082 __ b(Done);
3083 __ delayed()->nop();
3084 __ bind(notInt);
3085 // atos
3086 __ move(AT, atos);
3087 __ bne(flags, AT, notObj);
3088 __ delayed()->nop();
3090 __ pop(atos);
3091 if (!is_static) {
3092 pop_and_check_object(obj);
3093 }
3095 __ dadd(AT, obj, off);
3096 //__ sd(FSR, AT, 0);
3097 __ store_heap_oop(Address(AT, 0), FSR);
3098 __ sync();
3099 __ store_check(obj);
3101 if (!is_static) {
3102 patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
3103 }
3104 __ b(Done);
3105 __ delayed()->nop();
3106 __ bind(notObj);
3107 // ctos
3108 __ move(AT, ctos);
3109 __ bne(flags, AT, notChar);
3110 __ delayed()->nop();
3112 __ pop(ctos);
3113 if (!is_static) {
3114 pop_and_check_object(obj);
3115 }
3116 __ dadd(AT, obj, off);
3117 __ sh(FSR, AT, 0);
3118 if (!is_static) {
3119 patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
3120 }
3121 __ b(Done);
3122 __ delayed()->nop();
3123 __ bind(notChar);
3124 // stos
3125 __ move(AT, stos);
3126 __ bne(flags, AT, notShort);
3127 __ delayed()->nop();
3129 __ pop(stos);
3130 if (!is_static) {
3131 pop_and_check_object(obj);
3132 }
3133 __ dadd(AT, obj, off);
3134 __ sh(FSR, AT, 0);
3135 if (!is_static) {
3136 patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
3137 }
3138 __ b(Done);
3139 __ delayed()->nop();
3140 __ bind(notShort);
3141 // ltos
3142 __ move(AT, ltos);
3143 __ bne(flags, AT, notLong);
3144 __ delayed()->nop();
3146 // FIXME: there is no simple method to load/store 64-bit data in a atomic operation
3147 // we just ignore the volatile flag.
3148 //Label notVolatileLong;
3149 //__ beq(T1, R0, notVolatileLong);
3150 //__ delayed()->nop();
3152 //addent = 2 * wordSize;
3153 // no need
3154 //__ lw(FSR, SP, 0);
3155 //__ lw(SSR, SP, 1 * wordSize);
3156 //if (!is_static) {
3157 // __ lw(T3, SP, addent);
3158 // addent += 1 * wordSize;
3159 // __ verify_oop(T3);
3160 //}
3162 //__ daddu(AT, T3, T2);
3164 // Replace with real volatile test
3165 // NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel
3166 // last modified by yjl 7/12/2005
3167 //__ ldc1(FSF, SP, 0);
3168 //__ sdc1(FSF, AT, 0);
3169 //volatile_barrier();
3171 // Don't rewrite volatile version
3172 //__ b(notVolatile);
3173 //__ delayed()->addiu(SP, SP, addent);
3175 //__ bind(notVolatileLong);
3177 //__ pop(ltos); // overwrites edx
3178 // __ lw(FSR, SP, 0 * wordSize);
3179 // __ lw(SSR, SP, 1 * wordSize);
3180 // __ daddi(SP, SP, 2*wordSize);
3181 __ pop(ltos);
3182 if (!is_static) {
3183 pop_and_check_object(obj);
3184 }
3185 __ dadd(AT, obj, off);
3186 __ sd(FSR, AT, 0);
3187 if (!is_static) {
3188 patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
3189 }
3190 __ b(notVolatile);
3191 __ delayed()->nop();
3193 __ bind(notLong);
3194 // ftos
3195 __ move(AT, ftos);
3196 __ bne(flags, AT, notFloat);
3197 __ delayed()->nop();
3199 __ pop(ftos);
3200 if (!is_static) {
3201 pop_and_check_object(obj);
3202 }
3203 __ dadd(AT, obj, off);
3204 __ swc1(FSF, AT, 0);
3205 if (!is_static) {
3206 patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
3207 }
3208 __ b(Done);
3209 __ delayed()->nop();
3210 __ bind(notFloat);
3211 // dtos
3212 __ move(AT, dtos);
3213 __ bne(flags, AT, notDouble);
3214 __ delayed()->nop();
3216 __ pop(dtos);
3217 if (!is_static) {
3218 pop_and_check_object(obj);
3219 }
3220 __ dadd(AT, obj, off);
3221 __ sdc1(FSF, AT, 0);
3222 if (!is_static) {
3223 patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
3224 }
3225 __ b(Done);
3226 __ delayed()->nop();
3227 __ bind(notDouble);
3229 __ stop("Bad state");
3231 __ bind(Done);
3233 // Check for volatile store
3234 __ beq(T8, R0, notVolatile);
3235 __ delayed()->nop();
3236 volatile_barrier( );
3237 __ bind(notVolatile);
3238 }
3240 void TemplateTable::putfield(int byte_no) {
3241 putfield_or_static(byte_no, false);
3242 }
3244 void TemplateTable::putstatic(int byte_no) {
3245 putfield_or_static(byte_no, true);
3246 }
3248 // used registers : T1, T2, T3
3249 // T1 : cp_entry
3250 // T2 : obj
3251 // T3 : value pointer
3252 void TemplateTable::jvmti_post_fast_field_mod() {
3253 if (JvmtiExport::can_post_field_modification()) {
3254 // Check to see if a field modification watch has been set before we take
3255 // the time to call into the VM.
3256 Label L2;
3257 //__ lui(AT, Assembler::split_high((intptr_t)JvmtiExport::get_field_modification_count_addr()));
3258 //__ lw(T3, AT, Assembler::split_low((intptr_t)JvmtiExport::get_field_modification_count_addr()));
3259 __ li(AT, JvmtiExport::get_field_modification_count_addr());
3260 __ lw(T3, AT, 0);
3261 __ beq(T3, R0, L2);
3262 __ delayed()->nop();
3263 //__ pop(T2);
3264 __ pop_ptr(T2);
3265 //__ lw(T2, SP, 0);
3266 __ verify_oop(T2);
3267 __ push_ptr(T2);
3268 __ li(AT, -sizeof(jvalue));
3269 __ daddu(SP, SP, AT);
3270 __ move(T3, SP);
3271 //__ push(T2);
3272 //__ move(T2, R0);
3274 switch (bytecode()) { // load values into the jvalue object
3275 case Bytecodes::_fast_bputfield:
3276 __ sb(FSR, SP, 0);
3277 break;
3278 case Bytecodes::_fast_sputfield:
3279 __ sh(FSR, SP, 0);
3280 break;
3281 case Bytecodes::_fast_cputfield:
3282 __ sh(FSR, SP, 0);
3283 break;
3284 case Bytecodes::_fast_iputfield:
3285 __ sw(FSR, SP, 0);
3286 break;
3287 case Bytecodes::_fast_lputfield:
3288 __ sd(FSR, SP, 0);
3289 break;
3290 case Bytecodes::_fast_fputfield:
3291 __ swc1(FSF, SP, 0);
3292 break;
3293 case Bytecodes::_fast_dputfield:
3294 __ sdc1(FSF, SP, 0);
3295 break;
3296 case Bytecodes::_fast_aputfield:
3297 __ sd(FSR, SP, 0);
3298 break;
3299 default: ShouldNotReachHere();
3300 }
3302 //__ pop(T2); // restore copy of object pointer
3304 // Save eax and sometimes edx because call_VM() will clobber them,
3305 // then use them for JVM/DI purposes
3306 __ push(FSR);
3307 if (bytecode() == Bytecodes::_fast_lputfield) __ push(SSR);
3308 // access constant pool cache entry
3309 __ get_cache_entry_pointer_at_bcp(T1, T2, 1);
3310 // no need, verified ahead
3311 __ verify_oop(T2);
3313 // ebx: object pointer copied above
3314 // eax: cache entry pointer
3315 // ecx: jvalue object on the stack
3316 __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
3317 InterpreterRuntime::post_field_modification), T2, T1, T3);
3318 if (bytecode() == Bytecodes::_fast_lputfield) __ pop(SSR); // restore high value
3319 //__ pop(FSR); // restore lower value
3320 //__ daddi(SP, SP, sizeof(jvalue)); // release jvalue object space
3321 __ lw(FSR, SP, 0);
3322 __ daddiu(SP, SP, sizeof(jvalue) + 1 * wordSize);
3323 __ bind(L2);
3324 }
3325 }
3327 // used registers : T2, T3, T1
3328 // T2 : index & off & field address
3329 // T3 : cache & obj
3330 // T1 : flags
3331 void TemplateTable::fast_storefield(TosState state) {
3332 transition(state, vtos);
3334 ByteSize base = ConstantPoolCache::base_offset();
3336 jvmti_post_fast_field_mod();
3338 // access constant pool cache
3339 __ get_cache_and_index_at_bcp(T3, T2, 1);
3341 // test for volatile with edx but edx is tos register for lputfield.
3342 __ dsll(AT, T2, Address::times_8);
3343 __ dadd(AT, T3, AT);
3344 __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
3346 // replace index with field offset from cache entry
3347 __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
3349 // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
3350 // volatile_barrier( );
3352 Label notVolatile, Done;
3353 // Check for volatile store
3354 __ move(AT, 1<<ConstantPoolCacheEntry::is_volatile_shift);
3355 __ andr(AT, T1, AT);
3356 __ beq(AT, R0, notVolatile);
3357 __ delayed()->nop();
3360 // Get object from stack
3361 // NOTE : the value in FSR/FSF now
3362 // __ pop(T3);
3363 // __ verify_oop(T3);
3364 pop_and_check_object(T3);
3365 // field addresses
3366 __ dadd(T2, T3, T2);
3368 // access field
3369 switch (bytecode()) {
3370 case Bytecodes::_fast_bputfield:
3371 __ sb(FSR, T2, 0);
3372 break;
3373 case Bytecodes::_fast_sputfield: // fall through
3374 case Bytecodes::_fast_cputfield:
3375 __ sh(FSR, T2, 0);
3376 break;
3377 case Bytecodes::_fast_iputfield:
3378 __ sw(FSR, T2, 0);
3379 break;
3380 case Bytecodes::_fast_lputfield:
3381 __ sd(FSR, T2, 0 * wordSize);
3382 break;
3383 case Bytecodes::_fast_fputfield:
3384 __ swc1(FSF, T2, 0);
3385 break;
3386 case Bytecodes::_fast_dputfield:
3387 __ sdc1(FSF, T2, 0 * wordSize);
3388 break;
3389 case Bytecodes::_fast_aputfield:
3390 __ store_heap_oop(Address(T2, 0), FSR);
3391 __ sync();
3392 __ store_check(T3);
3393 break;
3394 default:
3395 ShouldNotReachHere();
3396 }
3398 Label done;
3399 volatile_barrier( );
3400 __ b(done);
3401 __ delayed()->nop();
3403 // Same code as above, but don't need edx to test for volatile.
3404 __ bind(notVolatile);
3406 // Get object from stack
3407 // __ pop(T3);
3408 // __ verify_oop(T3);
3409 pop_and_check_object(T3);
3410 //get the field address
3411 __ dadd(T2, T3, T2);
3413 // access field
3414 switch (bytecode()) {
3415 case Bytecodes::_fast_bputfield:
3416 __ sb(FSR, T2, 0);
3417 break;
3418 case Bytecodes::_fast_sputfield: // fall through
3419 case Bytecodes::_fast_cputfield:
3420 __ sh(FSR, T2, 0);
3421 break;
3422 case Bytecodes::_fast_iputfield:
3423 __ sw(FSR, T2, 0);
3424 break;
3425 case Bytecodes::_fast_lputfield:
3426 __ sd(FSR, T2, 0 * wordSize);
3427 break;
3428 case Bytecodes::_fast_fputfield:
3429 __ swc1(FSF, T2, 0);
3430 break;
3431 case Bytecodes::_fast_dputfield:
3432 __ sdc1(FSF, T2, 0 * wordSize);
3433 break;
3434 case Bytecodes::_fast_aputfield:
3435 //add for compressedoops
3436 __ store_heap_oop(Address(T2, 0), FSR);
3437 __ sync();
3438 __ store_check(T3);
3439 break;
3440 default:
3441 ShouldNotReachHere();
3442 }
3443 __ bind(done);
3444 }
3446 // used registers : T2, T3, T1
3447 // T3 : cp_entry & cache
3448 // T2 : index & offset
3449 void TemplateTable::fast_accessfield(TosState state) {
3450 transition(atos, state);
3452 // do the JVMTI work here to avoid disturbing the register state below
3453 if (JvmtiExport::can_post_field_access()) {
3454 // Check to see if a field access watch has been set before we take
3455 // the time to call into the VM.
3456 Label L1;
3457 __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
3458 __ lw(T3, AT, 0);
3459 __ beq(T3, R0, L1);
3460 __ delayed()->nop();
3461 // access constant pool cache entry
3462 __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
3463 __ move(TSR, FSR);
3464 __ verify_oop(FSR);
3465 // FSR: object pointer copied above
3466 // T3: cache entry pointer
3467 __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
3468 FSR, T3);
3469 __ move(FSR, TSR);
3470 __ bind(L1);
3471 }
3473 // access constant pool cache
3474 __ get_cache_and_index_at_bcp(T3, T2, 1);
3475 // replace index with field offset from cache entry
3476 __ dsll(AT, T2, Address::times_8);
3477 //__ dsll(AT, T2, 4);
3478 __ dadd(AT, T3, AT);
3479 __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset()
3480 + ConstantPoolCacheEntry::f2_offset()));
3482 // eax: object
3483 __ verify_oop(FSR);
3484 // __ null_check(FSR, 0);
3485 __ null_check(FSR);
3486 // field addresses
3487 __ dadd(FSR, FSR, T2);
3489 // access field
3490 switch (bytecode()) {
3491 case Bytecodes::_fast_bgetfield:
3492 __ lb(FSR, FSR, 0);
3493 break;
3494 case Bytecodes::_fast_sgetfield:
3495 __ lh(FSR, FSR, 0);
3496 break;
3497 case Bytecodes::_fast_cgetfield:
3498 __ lhu(FSR, FSR, 0);
3499 break;
3500 case Bytecodes::_fast_igetfield:
3501 __ lw(FSR, FSR, 0);
3502 break;
3503 case Bytecodes::_fast_lgetfield:
3504 __ stop("should not be rewritten");
3505 break;
3506 case Bytecodes::_fast_fgetfield:
3507 __ lwc1(FSF, FSR, 0);
3508 break;
3509 case Bytecodes::_fast_dgetfield:
3510 __ ldc1(FSF, FSR, 0);
3511 break;
3512 case Bytecodes::_fast_agetfield:
3513 //add for compressedoops
3514 __ load_heap_oop(FSR, Address(FSR, 0));
3515 __ verify_oop(FSR);
3516 break;
3517 default:
3518 ShouldNotReachHere();
3519 }
3521 // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
3522 // volatile_barrier( );
3523 }
3525 // generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
3526 // used registers : T1, T2, T3, T1
3527 // T1 : obj & field address
3528 // T2 : off
3529 // T3 : cache
3530 // T1 : index
3531 void TemplateTable::fast_xaccess(TosState state) {
3532 transition(vtos, state);
3533 // get receiver
3534 __ ld(T1, aaddress(0));
3535 // access constant pool cache
3536 __ get_cache_and_index_at_bcp(T3, T2, 2);
3537 __ dsll(AT, T2, Address::times_8);
3538 __ dadd(AT, T3, AT);
3539 __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset()
3540 + ConstantPoolCacheEntry::f2_offset()));
3542 // make sure exception is reported in correct bcp range (getfield is next instruction)
3543 __ daddi(BCP, BCP, 1);
3544 // __ null_check(T1, 0);
3545 __ null_check(T1);
3546 __ dadd(T1, T1, T2);
3548 if (state == itos) {
3549 __ lw(FSR, T1, 0);
3550 } else if (state == atos) {
3551 //__ ld(FSR, T1, 0);
3552 __ load_heap_oop(FSR, Address(T1, 0));
3553 __ verify_oop(FSR);
3554 } else if (state == ftos) {
3555 __ lwc1(FSF, T1, 0);
3556 } else {
3557 ShouldNotReachHere();
3558 }
3559 __ daddi(BCP, BCP, -1);
3560 }
3562 //---------------------------------------------------
3563 //-------------------------------------------------
3564 // Calls
3566 void TemplateTable::count_calls(Register method, Register temp) {
3567 // implemented elsewhere
3568 ShouldNotReachHere();
3569 }
3571 // method, index, recv, flags: T1, T2, T3, T1
3572 // byte_no = 2 for _invokevirtual, 1 else
3573 // T0 : return address
3574 // get the method & index of the invoke, and push the return address of
3575 // the invoke(first word in the frame)
3576 // this address is where the return code jmp to.
3577 // NOTE : this method will set T3&T1 as recv&flags
3578 void TemplateTable::prepare_invoke(int byte_no,
3579 Register method, //linked method (or i-klass)
3580 Register index, //itable index, MethodType ,etc.
3581 Register recv, // if caller wants to see it
3582 Register flags // if caller wants to test it
3583 ) {
3584 // determine flags
3585 const Bytecodes::Code code = bytecode();
3586 const bool is_invokeinterface = code == Bytecodes::_invokeinterface;
3587 const bool is_invokedynamic = code == Bytecodes::_invokedynamic;
3588 const bool is_invokehandle = code == Bytecodes::_invokehandle;
3589 const bool is_invokevirtual = code == Bytecodes::_invokevirtual;
3590 const bool is_invokespecial = code == Bytecodes::_invokespecial;
3591 const bool load_receiver = (recv != noreg);
3592 const bool save_flags = (flags != noreg);
3593 assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
3594 assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
3595 assert(flags == noreg || flags == T1, "error flags reg.");
3596 assert(recv == noreg || recv == T3, "error recv reg.");
3597 // setup registers & access constant pool cache
3598 if(recv == noreg) recv = T3;
3599 if(flags == noreg) flags = T1;
3601 assert_different_registers(method, index, recv, flags);
3603 // save 'interpreter return address'
3604 __ save_bcp();
3606 load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
3607 if (is_invokedynamic || is_invokehandle) {
3608 Label L_no_push;
3609 __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
3610 __ andr(AT, AT, flags);
3611 __ beq(AT, R0, L_no_push);
3612 __ delayed()->nop();
3613 // Push the appendix as a trailing parameter.
3614 // This must be done before we get the receiver,
3615 // since the parameter_size includes it.
3616 Register tmp = SSR;
3617 __ push(tmp);
3618 __ move(tmp, index);
3619 assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
3620 __ load_resolved_reference_at_index(index, tmp);
3621 __ pop(tmp);
3622 __ push(index); // push appendix (MethodType, CallSite, etc.)
3623 __ bind(L_no_push);
3625 }
3627 // load receiver if needed (after appendix is pushed so parameter size is correct)
3628 // Note: no return address pushed yet
3629 if (load_receiver) {
3630 __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
3631 __ andr(recv, flags, AT);
3632 // 2014/07/31 Fu: Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
3633 const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address
3634 const int receiver_is_at_end = -1; // back off one slot to get receiver
3635 Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
3637 __ ld(recv, recv_addr);
3638 __ verify_oop(recv);
3639 }
3640 if(save_flags) {
3641 //__ movl(r13, flags);
3642 __ move(BCP, flags);
3643 }
3644 // compute return type
3645 __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
3646 __ andi(flags, flags, 0xf);
3648 // Make sure we don't need to mask flags for tos_state_shift after the above shift
3649 ConstantPoolCacheEntry::verify_tos_state_shift();
3650 // load return address
3651 {
3652 const address table = (address) Interpreter::invoke_return_entry_table_for(code);
3653 __ li(AT, (long)table);
3654 __ dsll(flags, flags, LogBytesPerWord);
3655 __ dadd(AT, AT, flags);
3656 __ ld(RA, AT, 0);
3657 }
3659 if (save_flags) {
3660 __ move(flags, BCP);
3661 __ restore_bcp();
3662 }
3663 }
3665 // used registers : T0, T3, T1, T2
3666 // T3 : recv, this two register using convention is by prepare_invoke
3667 // T1 : flags, klass
3668 // Rmethod : method, index must be Rmethod
3669 void TemplateTable::invokevirtual_helper(Register index, Register recv,
3670 Register flags) {
3672 assert_different_registers(index, recv, flags, T2);
3674 // Test for an invoke of a final method
3675 Label notFinal;
3676 __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
3677 __ andr(AT, flags, AT);
3678 __ beq(AT, R0, notFinal);
3679 __ delayed()->nop();
3681 Register method = index; // method must be Rmethod
3682 assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
3684 // do the call - the index is actually the method to call
3685 // the index is indeed methodOop, for this is vfinal,
3686 // see ConstantPoolCacheEntry::set_method for more info
3688 __ verify_oop(method);
3690 // It's final, need a null check here!
3691 __ null_check(recv);
3693 // profile this call
3694 __ profile_final_call(T2);
3696 // 2014/11/24 Fu
3697 // T2: tmp, used for mdp
3698 // method: callee
3699 // T9: tmp
3700 // is_virtual: true
3701 __ profile_arguments_type(T2, method, T9, true);
3703 // __ move(T0, recv);
3704 __ jump_from_interpreted(method, T2);
3706 __ bind(notFinal);
3708 // get receiver klass
3709 __ null_check(recv, oopDesc::klass_offset_in_bytes());
3710 // Keep recv in ecx for callee expects it there
3711 __ load_klass(T2, recv);
3712 __ verify_oop(T2);
3713 // profile this call
3714 __ profile_virtual_call(T2, T0, T1);
3716 // get target methodOop & entry point
3717 const int base = InstanceKlass::vtable_start_offset() * wordSize;
3718 assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
3719 __ dsll(AT, index, Address::times_8);
3720 __ dadd(AT, T2, AT);
3721 //this is a ualign read
3722 __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
3723 __ jump_from_interpreted(method, T2);
3725 }
3727 void TemplateTable::invokevirtual(int byte_no) {
3728 transition(vtos, vtos);
3729 assert(byte_no == f2_byte, "use this argument");
3730 prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
3731 // now recv & flags in T3, T1
3732 invokevirtual_helper(Rmethod, T3, T1);
3733 }
3735 // T9 : entry
3736 // Rmethod : method
3737 void TemplateTable::invokespecial(int byte_no) {
3738 transition(vtos, vtos);
3739 assert(byte_no == f1_byte, "use this argument");
3740 prepare_invoke(byte_no, Rmethod, NOREG, T3);
3741 // now recv & flags in T3, T1
3742 __ verify_oop(T3);
3743 __ null_check(T3);
3744 __ profile_call(T9);
3746 // 2014/11/24 Fu
3747 // T8: tmp, used for mdp
3748 // Rmethod: callee
3749 // T9: tmp
3750 // is_virtual: false
3751 __ profile_arguments_type(T8, Rmethod, T9, false);
3753 __ jump_from_interpreted(Rmethod, T9);
3754 __ move(T0, T3);//aoqi ?
3755 }
3757 void TemplateTable::invokestatic(int byte_no) {
3758 transition(vtos, vtos);
3759 assert(byte_no == f1_byte, "use this argument");
3760 prepare_invoke(byte_no, Rmethod, NOREG);
3761 __ verify_oop(Rmethod);
3763 __ profile_call(T9);
3765 // 2014/11/24 Fu
3766 // T8: tmp, used for mdp
3767 // Rmethod: callee
3768 // T9: tmp
3769 // is_virtual: false
3770 __ profile_arguments_type(T8, Rmethod, T9, false);
3772 __ jump_from_interpreted(Rmethod, T9);
3773 }
3775 // i have no idea what to do here, now. for future change. FIXME.
3776 void TemplateTable::fast_invokevfinal(int byte_no) {
3777 transition(vtos, vtos);
3778 assert(byte_no == f2_byte, "use this argument");
3779 __ stop("fast_invokevfinal not used on x86");
3780 }
3782 // used registers : T0, T1, T2, T3, T1, A7
3783 // T0 : itable, vtable, entry
3784 // T1 : interface
3785 // T3 : receiver
3786 // T1 : flags, klass
3787 // Rmethod : index, method, this is required by interpreter_entry
3788 void TemplateTable::invokeinterface(int byte_no) {
3789 transition(vtos, vtos);
3790 //this method will use T1-T4 and T0
3791 assert(byte_no == f1_byte, "use this argument");
3792 prepare_invoke(byte_no, T2, Rmethod, T3, T1);
3793 // T2: Interface
3794 // Rmethod: index
3795 // T3: receiver
3796 // T1: flags
3797 Label notMethod;
3798 __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
3799 __ andr(AT, T1, AT);
3800 __ beq(AT, R0, notMethod);
3801 __ delayed()->nop();
3803 // Special case of invokeinterface called for virtual method of
3804 // java.lang.Object. See cpCacheOop.cpp for details.
3805 // This code isn't produced by javac, but could be produced by
3806 // another compliant java compiler.
3807 invokevirtual_helper(Rmethod, T3, T1);
3809 __ bind(notMethod);
3810 // Get receiver klass into T1 - also a null check
3811 //__ ld(T1, T3, oopDesc::klass_offset_in_bytes());
3812 //add for compressedoops
3813 //__ restore_locals();
3814 //__ null_check(T3, oopDesc::klass_offset_in_bytes());
3815 __ load_klass(T1, T3);
3816 __ verify_oop(T1);
3818 // profile this call
3819 __ profile_virtual_call(T1, T0, FSR);
3821 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3822 // TODO: x86 add a new method lookup_interface_method // LEE
3823 const int base = InstanceKlass::vtable_start_offset() * wordSize;
3824 assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
3825 __ lw(AT, T1, InstanceKlass::vtable_length_offset() * wordSize);
3826 __ dsll(AT, AT, Address::times_8);
3827 __ dadd(T0, T1, AT);
3828 __ daddi(T0, T0, base);
3829 if (HeapWordsPerLong > 1) {
3830 // Round up to align_object_offset boundary
3831 __ round_to(T0, BytesPerLong);
3832 }
3833 // now T0 is the begin of the itable
3835 Label entry, search, interface_ok;
3837 ///__ jmp(entry);
3838 __ b(entry);
3839 __ delayed()->nop();
3841 __ bind(search);
3842 __ increment(T0, itableOffsetEntry::size() * wordSize);
3844 __ bind(entry);
3846 // Check that the entry is non-null. A null entry means that the receiver
3847 // class doesn't implement the interface, and wasn't the same as the
3848 // receiver class checked when the interface was resolved.
3849 __ ld(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
3850 __ bne(AT, R0, interface_ok);
3851 __ delayed()->nop();
3852 // throw exception
3853 // the call_VM checks for exception, so we should never return here.
3855 //__ pop();//FIXME here,
3856 // pop return address (pushed by prepare_invoke).
3857 // no need now, we just save the value in RA now
3859 __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
3860 __ should_not_reach_here();
3862 __ bind(interface_ok);
3863 //NOTICE here, no pop as x86 do
3864 //__ lw(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
3865 __ bne(AT, T2, search);
3866 __ delayed()->nop();
3868 // now we get vtable of the interface
3869 __ ld(T0, T0, itableOffsetEntry::offset_offset_in_bytes());
3870 __ daddu(T0, T1, T0);
3871 assert(itableMethodEntry::size() * wordSize == 8, "adjust the scaling in the code below");
3872 __ dsll(AT, Rmethod, Address::times_8);
3873 __ daddu(AT, T0, AT);
3874 // now we get the method
3875 __ ld(Rmethod, AT, 0);
3876 // Rnext: methodOop to call
3877 // T3: receiver
3878 // Check for abstract method error
3879 // Note: This should be done more efficiently via a throw_abstract_method_error
3880 // interpreter entry point and a conditional jump to it in case of a null
3881 // method.
3882 {
3883 Label L;
3884 ///__ testl(ebx, ebx);
3885 ///__ jcc(Assembler::notZero, L);
3886 __ bne(Rmethod, R0, L);
3887 __ delayed()->nop();
3889 // throw exception
3890 // note: must restore interpreter registers to canonical
3891 // state for exception handling to work correctly!
3892 ///__ popl(ebx); // pop return address (pushed by prepare_invoke)
3893 //__ restore_bcp(); // esi must be correct for exception handler
3894 //(was destroyed)
3895 //__ restore_locals(); // make sure locals pointer
3896 //is correct as well (was destroyed)
3897 ///__ call_VM(noreg, CAST_FROM_FN_PTR(address,
3898 //InterpreterRuntime::throw_AbstractMethodError));
3899 __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
3900 // the call_VM checks for exception, so we should never return here.
3901 __ should_not_reach_here();
3902 __ bind(L);
3903 }
3905 // 2014/11/24 Fu
3906 // T8: tmp, used for mdp
3907 // Rmethod: callee
3908 // T9: tmp
3909 // is_virtual: true
3910 __ profile_arguments_type(T8, Rmethod, T9, true);
3912 __ jump_from_interpreted(Rmethod, T9);
3913 }
3915 void TemplateTable::invokehandle(int byte_no) {
3916 transition(vtos, vtos);
3917 assert(byte_no == f1_byte, "use this argument");
3918 const Register T2_method = Rmethod;
3919 const Register FSR_mtype = FSR;
3920 const Register T3_recv = T3;
3922 if (!EnableInvokeDynamic) {
3923 // rewriter does not generate this bytecode
3924 __ should_not_reach_here();
3925 return;
3926 }
3928 prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
3929 //??__ verify_method_ptr(T2_method);
3930 __ verify_oop(T3_recv);
3931 __ null_check(T3_recv);
3933 // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
3934 // rbx: MH.invokeExact_MT method (from f2)
3936 // Note: rax_mtype is already pushed (if necessary) by prepare_invoke
3938 // FIXME: profile the LambdaForm also
3939 __ profile_final_call(T9);
3941 // 2014/11/24 Fu
3942 // T8: tmp, used for mdp
3943 // T2_method: callee
3944 // T9: tmp
3945 // is_virtual: true
3946 __ profile_arguments_type(T8, T2_method, T9, true);
3948 __ jump_from_interpreted(T2_method, T9);
3949 }
3951 void TemplateTable::invokedynamic(int byte_no) {
3952 transition(vtos, vtos);
3953 assert(byte_no == f1_byte, "use this argument");
3955 if (!EnableInvokeDynamic) {
3956 // We should not encounter this bytecode if !EnableInvokeDynamic.
3957 // The verifier will stop it. However, if we get past the verifier,
3958 // this will stop the thread in a reasonable way, without crashing the JVM.
3959 __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3960 InterpreterRuntime::throw_IncompatibleClassChangeError));
3961 // the call_VM checks for exception, so we should never return here.
3962 __ should_not_reach_here();
3963 return;
3964 }
3966 //const Register Rmethod = T2;
3967 const Register T2_callsite = T2;
3969 prepare_invoke(byte_no, Rmethod, T2_callsite);
3971 // rax: CallSite object (from cpool->resolved_references[f1])
3972 // rbx: MH.linkToCallSite method (from f2)
3974 // Note: rax_callsite is already pushed by prepare_invoke
3975 // %%% should make a type profile for any invokedynamic that takes a ref argument
3976 // profile this call
3977 __ profile_call(T9);
3979 // 2014/11/24 Fu
3980 // T8: tmp, used for mdp
3981 // Rmethod: callee
3982 // T9: tmp
3983 // is_virtual: false
3984 __ profile_arguments_type(T8, Rmethod, T9, false);
3986 __ verify_oop(T2_callsite);
3988 __ jump_from_interpreted(Rmethod, T9);
3989 }
3991 //----------------------------------------------------------------------------------------------------
3992 // Allocation
3993 // T1 : tags & buffer end & thread
3994 // T2 : object end
3995 // T3 : klass
3996 // T1 : object size
3997 // A1 : cpool
3998 // A2 : cp index
3999 // return object in FSR
4000 void TemplateTable::_new() {
4001 transition(vtos, atos);
4002 __ get_2_byte_integer_at_bcp(A2, AT, 1);
4003 __ huswap(A2);
4005 Label slow_case;
4006 Label done;
4007 Label initialize_header;
4008 Label initialize_object; // including clearing the fields
4009 Label allocate_shared;
4011 // get InstanceKlass in T3
4012 __ get_cpool_and_tags(A1, T1);
4013 __ dsll(AT, A2, Address::times_8);
4014 __ dadd(AT, A1, AT);
4015 __ ld(T3, AT, sizeof(ConstantPool));
4017 // make sure the class we're about to instantiate has been resolved.
4018 // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
4019 const int tags_offset = Array<u1>::base_offset_in_bytes();
4020 __ dadd(T1, T1, A2);
4021 __ lb(AT, T1, tags_offset);
4022 //__ addiu(AT, AT, - (int)JVM_CONSTANT_UnresolvedClass);
4023 __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
4024 //__ beq(AT, R0, slow_case);
4025 __ bne(AT, R0, slow_case);
4026 __ delayed()->nop();
4028 /*make sure klass is initialized & doesn't have finalizer*/
4030 // make sure klass is fully initialized
4031 __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
4032 __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
4033 __ bne(AT, R0, slow_case);
4034 __ delayed()->nop();
4036 // has_finalizer
4037 //__ lw(T1, T3, Klass::access_flags_offset() + sizeof(oopDesc));
4038 //__ move(AT, JVM_ACC_CAN_BE_FASTPATH_ALLOCATED);
4039 //__ andr(AT, T1, AT);
4040 __ lw(T1, T3, in_bytes(Klass::layout_helper_offset()) );
4041 __ andi(AT, T1, Klass::_lh_instance_slow_path_bit);
4042 __ bne(AT, R0, slow_case);
4043 __ delayed()->nop();
4045 // get instance_size in InstanceKlass (already aligned) in T0,
4046 // be sure to preserve this value
4047 //__ lw(T0, T3, Klass::size_helper_offset_in_bytes() + sizeof(oopDesc));
4048 //Klass::_size_helper is renamed Klass::_layout_helper. aoqi
4049 __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
4051 //
4052 // Allocate the instance
4053 // 1) Try to allocate in the TLAB
4054 // 2) if fail and the object is large allocate in the shared Eden
4055 // 3) if the above fails (or is not applicable), go to a slow case
4056 // (creates a new TLAB, etc.)
4058 const bool allow_shared_alloc =
4059 Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
4061 if (UseTLAB) {
4062 #ifndef OPT_THREAD
4063 const Register thread = T8;
4064 __ get_thread(thread);
4065 #else
4066 const Register thread = TREG;
4067 #endif
4068 // get tlab_top
4069 __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
4070 __ dadd(T2, FSR, T0);
4071 // get tlab_end
4072 __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
4073 __ slt(AT, AT, T2);
4074 // __ bne(AT, R0, allocate_shared);
4075 __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
4076 __ delayed()->nop();
4077 __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
4079 if (ZeroTLAB) {
4080 // the fields have been already cleared
4081 __ b_far(initialize_header);
4082 } else {
4083 // initialize both the header and fields
4084 __ b_far(initialize_object);
4085 }
4086 __ delayed()->nop();
4087 /*
4089 if (CMSIncrementalMode) {
4090 // No allocation in shared eden.
4091 ///__ jmp(slow_case);
4092 __ b(slow_case);
4093 __ delayed()->nop();
4094 }
4095 */
4096 }
4098 // Allocation in the shared Eden , if allowed
4099 // T0 : instance size in words
4100 if(allow_shared_alloc){
4101 __ bind(allocate_shared);
4102 Label retry;
4103 //Address heap_top(T1, (int)Universe::heap()->top_addr());
4104 Address heap_top(T1);
4105 //__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
4106 __ li(T1, (long)Universe::heap()->top_addr());
4108 __ ld(FSR, heap_top);
4109 __ bind(retry);
4110 __ dadd(T2, FSR, T0);
4111 //__ lui(AT, Assembler::split_high((int)Universe::heap()->end_addr()));
4112 //__ lw(AT, AT, Assembler::split_low((int)Universe::heap()->end_addr()));
4113 __ li(AT, (long)Universe::heap()->end_addr());
4114 __ ld(AT, AT, 0);
4115 __ slt(AT, AT, T2);
4116 __ bne(AT, R0, slow_case);
4117 __ delayed()->nop();
4119 // Compare FSR with the top addr, and if still equal, store the new
4120 // top addr in ebx at the address of the top addr pointer. Sets ZF if was
4121 // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
4122 //
4123 // FSR: object begin
4124 // T2: object end
4125 // T0: instance size in words
4127 // if someone beat us on the allocation, try again, otherwise continue
4128 //__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
4129 __ cmpxchg(T2, heap_top, FSR);
4130 __ beq(AT, R0, retry);
4131 __ delayed()->nop();
4132 }
4134 if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
4135 // The object is initialized before the header. If the object size is
4136 // zero, go directly to the header initialization.
4137 __ bind(initialize_object);
4138 __ li(AT, - sizeof(oopDesc));
4139 __ daddu(T0, T0, AT);
4140 __ beq_far(T0, R0, initialize_header);
4141 __ delayed()->nop();
4144 // T0 must have been multiple of 2
4145 #ifdef ASSERT
4146 // make sure T0 was multiple of 2
4147 Label L;
4148 __ andi(AT, T0, 1);
4149 __ beq(AT, R0, L);
4150 __ delayed()->nop();
4151 __ stop("object size is not multiple of 2 - adjust this code");
4152 __ bind(L);
4153 // edx must be > 0, no extra check needed here
4154 #endif
4156 // initialize remaining object fields: T0 is a multiple of 2
4157 {
4158 Label loop;
4159 __ dadd(T1, FSR, T0);
4160 __ daddi(T1, T1, -oopSize);
4162 __ bind(loop);
4163 __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
4164 // __ sd(R0, T1, sizeof(oopDesc) + 1 * oopSize);
4165 __ bne(T1, FSR, loop); //dont clear header
4166 __ delayed()->daddi(T1, T1, -oopSize);
4167 // actually sizeof(oopDesc)==8, so we can move
4168 // __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1
4169 }
4170 //klass in T3,
4171 // initialize object header only.
4172 __ bind(initialize_header);
4173 if (UseBiasedLocking) {
4174 // __ popl(ecx); // get saved klass back in the register.
4175 // __ movl(ebx, Address(ecx, Klass::prototype_header_offset_in_bytes()
4176 // + klassOopDesc::klass_part_offset_in_bytes()));
4177 __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
4178 // __ movl(Address(eax, oopDesc::mark_offset_in_bytes ()), ebx);
4179 __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
4180 } else {
4181 __ li(AT, (long)markOopDesc::prototype());
4182 __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
4183 }
4185 //__ sd(T3, FSR, oopDesc::klass_offset_in_bytes());
4186 __ store_klass_gap(FSR, R0);
4187 __ store_klass(FSR, T3);
4189 {
4190 SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
4191 // Trigger dtrace event for fastpath
4192 __ push(atos);
4193 __ call_VM_leaf(
4194 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
4195 __ pop(atos);
4196 }
4197 __ b(done);
4198 __ delayed()->nop();
4199 }
4200 // slow case
4201 __ bind(slow_case);
4202 call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
4204 // continue
4205 __ bind(done);
4206 __ sync();
4207 }
4209 void TemplateTable::newarray() {
4210 transition(itos, atos);
4211 __ lbu(A1, at_bcp(1));
4212 //type, count
4213 call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
4214 __ sync();
4215 }
4217 void TemplateTable::anewarray() {
4218 transition(itos, atos);
4219 __ get_2_byte_integer_at_bcp(A2, AT, 1);
4220 __ huswap(A2);
4221 __ get_constant_pool(A1);
4222 // cp, index, count
4223 call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
4224 __ sync();
4225 }
4227 void TemplateTable::arraylength() {
4228 transition(atos, itos);
4229 __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
4230 __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
4231 }
4233 // i use T2 as ebx, T3 as ecx, T1 as edx
4234 // when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
4235 // T2 : sub klass
4236 // T3 : cpool
4237 // T3 : super klass
4238 void TemplateTable::checkcast() {
4239 transition(atos, atos);
4240 Label done, is_null, ok_is_subtype, quicked, resolved;
4241 __ beq(FSR, R0, is_null);
4242 __ delayed()->nop();
4244 // Get cpool & tags index
4245 __ get_cpool_and_tags(T3, T1);
4246 __ get_2_byte_integer_at_bcp(T2, AT, 1);
4247 __ huswap(T2);
4249 // See if bytecode has already been quicked
4250 __ dadd(AT, T1, T2);
4251 __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
4252 __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
4253 __ beq(AT, R0, quicked);
4254 __ delayed()->nop();
4256 /* 2012/6/2 Jin: In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
4257 * Then, GC will move the object in V0 to another places in heap.
4258 * Therefore, We should never save such an object in register.
4259 * Instead, we should save it in the stack. It can be modified automatically by the GC thread.
4260 * After GC, the object address in FSR is changed to a new place.
4261 */
4262 __ push(atos);
4263 const Register thread = TREG;
4264 #ifndef OPT_THREAD
4265 __ get_thread(thread);
4266 #endif
4267 call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4268 __ get_vm_result_2(T3, thread);
4269 __ pop_ptr(FSR);
4270 __ b(resolved);
4271 __ delayed()->nop();
4273 // klass already in cp, get superklass in T3
4274 __ bind(quicked);
4275 __ dsll(AT, T2, Address::times_8);
4276 __ dadd(AT, T3, AT);
4277 __ ld(T3, AT, sizeof(ConstantPool));
4279 __ bind(resolved);
4281 // get subklass in T2
4282 //__ ld(T2, FSR, oopDesc::klass_offset_in_bytes());
4283 //add for compressedoops
4284 __ load_klass(T2, FSR);
4285 // Superklass in T3. Subklass in T2.
4286 __ gen_subtype_check(T3, T2, ok_is_subtype);
4288 // Come here on failure
4289 // object is at FSR
4290 __ jmp(Interpreter::_throw_ClassCastException_entry);
4291 __ delayed()->nop();
4293 // Come here on success
4294 __ bind(ok_is_subtype);
4296 // Collect counts on whether this check-cast sees NULLs a lot or not.
4297 if (ProfileInterpreter) {
4298 __ b(done);
4299 __ delayed()->nop();
4300 __ bind(is_null);
4301 __ profile_null_seen(T3);
4302 } else {
4303 __ bind(is_null);
4304 }
4305 __ bind(done);
4306 }
4308 // i use T3 as cpool, T1 as tags, T2 as index
4309 // object always in FSR, superklass in T3, subklass in T2
4310 void TemplateTable::instanceof() {
4311 transition(atos, itos);
4312 Label done, is_null, ok_is_subtype, quicked, resolved;
4314 __ beq(FSR, R0, is_null);
4315 __ delayed()->nop();
4317 // Get cpool & tags index
4318 __ get_cpool_and_tags(T3, T1);
4319 // get index
4320 __ get_2_byte_integer_at_bcp(T2, AT, 1);
4321 __ hswap(T2);
4323 // See if bytecode has already been quicked
4324 // quicked
4325 __ daddu(AT, T1, T2);
4326 __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
4327 __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
4328 __ beq(AT, R0, quicked);
4329 __ delayed()->nop();
4331 // get superklass in T3
4332 //__ move(TSR, FSR);
4333 // sometimes S2 may be changed during the call,
4334 // be careful if u use TSR as a saving place
4335 //__ push(FSR);
4336 __ push(atos);
4337 const Register thread = TREG;
4338 #ifndef OPT_THREAD
4339 __ get_thread(thread);
4340 #endif
4341 call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4342 __ get_vm_result_2(T3, thread);
4343 //__ lw(FSR, SP, 0);
4344 __ pop_ptr(FSR);
4345 __ b(resolved);
4346 __ delayed()->nop();
4347 //__ move(FSR, TSR);
4349 // get superklass in T3, subklass in T2
4350 __ bind(quicked);
4351 __ dsll(AT, T2, Address::times_8);
4352 __ daddu(AT, T3, AT);
4353 __ ld(T3, AT, sizeof(ConstantPool));
4355 __ bind(resolved);
4356 // get subklass in T2
4357 //__ ld(T2, FSR, oopDesc::klass_offset_in_bytes());
4358 //add for compressedoops
4359 __ load_klass(T2, FSR);
4361 // Superklass in T3. Subklass in T2.
4362 __ gen_subtype_check(T3, T2, ok_is_subtype);
4363 // Come here on failure
4364 __ b(done);
4365 __ delayed(); __ move(FSR, R0);
4367 // Come here on success
4368 __ bind(ok_is_subtype);
4369 __ move(FSR, 1);
4371 // Collect counts on whether this test sees NULLs a lot or not.
4372 if (ProfileInterpreter) {
4373 __ beq(R0, R0, done);
4374 __ nop();
4375 __ bind(is_null);
4376 __ profile_null_seen(T3);
4377 } else {
4378 __ bind(is_null); // same as 'done'
4379 }
4380 __ bind(done);
4381 // FSR = 0: obj == NULL or obj is not an instanceof the specified klass
4382 // FSR = 1: obj != NULL and obj is an instanceof the specified klass
4383 }
4385 //--------------------------------------------------------
4386 //--------------------------------------------
4387 // Breakpoints
4388 void TemplateTable::_breakpoint() {
4390 // Note: We get here even if we are single stepping..
4391 // jbug inists on setting breakpoints at every bytecode
4392 // even if we are in single step mode.
4394 transition(vtos, vtos);
4396 // get the unpatched byte code
4397 ///__ get_method(ecx);
4398 ///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at)
4399 //, ecx, esi);
4400 ///__ movl(ebx, eax);
4401 __ get_method(A1);
4402 __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at),
4403 A1, BCP);
4404 __ move(Rnext, V0); // Jin: Rnext will be used in dispatch_only_normal
4406 // post the breakpoint event
4407 ///__ get_method(ecx);
4408 ///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), ecx, esi);
4409 __ get_method(A1);
4410 __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
4412 // complete the execution of original bytecode
4413 __ dispatch_only_normal(vtos);
4414 }
4416 //----------------------------------------------------------------------------------------------------
4417 // Exceptions
4419 void TemplateTable::athrow() {
4420 transition(atos, vtos);
4421 __ null_check(FSR);
4422 __ jmp(Interpreter::throw_exception_entry());
4423 __ delayed()->nop();
4424 }
4426 //----------------------------------------------------------------------------------------------------
4427 // Synchronization
4428 //
4429 // Note: monitorenter & exit are symmetric routines; which is reflected
4430 // in the assembly code structure as well
4431 //
4432 // Stack layout:
4433 //
4434 // [expressions ] <--- SP = expression stack top
4435 // ..
4436 // [expressions ]
4437 // [monitor entry] <--- monitor block top = expression stack bot
4438 // ..
4439 // [monitor entry]
4440 // [frame data ] <--- monitor block bot
4441 // ...
4442 // [return addr ] <--- FP
4444 // we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
4445 // object always in FSR
4446 void TemplateTable::monitorenter() {
4447 transition(atos, vtos);
4448 // check for NULL object
4449 __ null_check(FSR);
4451 const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
4452 * wordSize);
4453 const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
4454 Label allocated;
4456 // initialize entry pointer
4457 __ move(c_rarg0, R0);
4459 // find a free slot in the monitor block (result in edx)
4460 {
4461 Label entry, loop, exit, next;
4462 __ ld(T2, monitor_block_top);
4463 __ b(entry);
4464 __ delayed()->daddi(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
4466 // free slot?
4467 __ bind(loop);
4468 __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
4469 __ bne(AT, R0, next);
4470 __ delayed()->nop();
4471 __ move(c_rarg0, T2);
4473 __ bind(next);
4474 __ beq(FSR, AT, exit);
4475 __ delayed()->nop();
4476 __ daddi(T2, T2, entry_size);
4478 __ bind(entry);
4479 __ bne(T3, T2, loop);
4480 __ delayed()->nop();
4481 __ bind(exit);
4482 }
4484 __ bne(c_rarg0, R0, allocated);
4485 __ delayed()->nop();
4487 // allocate one if there's no free slot
4488 {
4489 Label entry, loop;
4490 // 1. compute new pointers // SP: old expression stack top
4491 __ ld(c_rarg0, monitor_block_top);
4492 __ daddi(SP, SP, - entry_size);
4493 __ daddi(c_rarg0, c_rarg0, - entry_size);
4494 __ sd(c_rarg0, monitor_block_top);
4495 __ b(entry);
4496 __ delayed(); __ move(T3, SP);
4498 // 2. move expression stack contents
4499 __ bind(loop);
4500 __ ld(AT, T3, entry_size);
4501 __ sd(AT, T3, 0);
4502 __ daddi(T3, T3, wordSize);
4503 __ bind(entry);
4504 __ bne(T3, c_rarg0, loop);
4505 __ delayed()->nop();
4506 }
4508 __ bind(allocated);
4509 // Increment bcp to point to the next bytecode,
4510 // so exception handling for async. exceptions work correctly.
4511 // The object has already been poped from the stack, so the
4512 // expression stack looks correct.
4513 __ daddi(BCP, BCP, 1);
4514 __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
4515 __ lock_object(c_rarg0);
4516 // check to make sure this monitor doesn't cause stack overflow after locking
4517 __ save_bcp(); // in case of exception
4518 __ generate_stack_overflow_check(0);
4519 // The bcp has already been incremented. Just need to dispatch to next instruction.
4521 __ dispatch_next(vtos);
4522 }
4524 // T2 : top
4525 // c_rarg0 : entry
4526 void TemplateTable::monitorexit() {
4527 transition(atos, vtos);
4529 __ null_check(FSR);
4531 const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
4532 Label found;
4534 // find matching slot
4535 {
4536 Label entry, loop;
4537 __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
4538 __ b(entry);
4539 __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
4541 __ bind(loop);
4542 __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
4543 __ beq(FSR, AT, found);
4544 __ delayed()->nop();
4545 __ daddiu(c_rarg0, c_rarg0, entry_size);
4546 __ bind(entry);
4547 __ bne(T2, c_rarg0, loop);
4548 __ delayed()->nop();
4549 }
4551 // error handling. Unlocking was not block-structured
4552 Label end;
4553 __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
4554 InterpreterRuntime::throw_illegal_monitor_state_exception));
4555 __ should_not_reach_here();
4557 // call run-time routine
4558 // c_rarg0: points to monitor entry
4559 __ bind(found);
4560 __ move(TSR, FSR);
4561 __ unlock_object(c_rarg0);
4562 __ move(FSR, TSR);
4563 __ bind(end);
4564 }
4566 //--------------------------------------------------------------------------------------------------// Wide instructions
4568 void TemplateTable::wide() {
4569 transition(vtos, vtos);
4570 // Note: the esi increment step is part of the individual wide bytecode implementations
4571 __ lbu(Rnext, at_bcp(1));
4572 __ dsll(T9, Rnext, Address::times_8);
4573 __ li(AT, (long)Interpreter::_wentry_point);
4574 __ dadd(AT, T9, AT);
4575 __ ld(T9, AT, 0);
4576 __ jr(T9);
4577 __ delayed()->nop();
4578 }
4580 //--------------------------------------------------------------------------------------------------// Multi arrays
4582 void TemplateTable::multianewarray() {
4583 transition(vtos, atos);
4584 // last dim is on top of stack; we want address of first one:
4585 // first_addr = last_addr + (ndims - 1) * wordSize
4586 __ lbu(A1, at_bcp(3)); // dimension
4587 __ daddi(A1, A1, -1);
4588 __ dsll(A1, A1, Address::times_8);
4589 __ dadd(A1, SP, A1); // now A1 pointer to the count array on the stack
4590 call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
4591 __ lbu(AT, at_bcp(3));
4592 __ dsll(AT, AT, Address::times_8);
4593 __ dadd(SP, SP, AT);
4594 __ sync();
4595 }
4597 #endif // !CC_INTERP