Mon, 26 Sep 2011 10:24:05 -0700
7081933: Use zeroing elimination optimization for large array
Summary: Don't zero new typeArray during runtime call if the allocation is followed by arraycopy into it.
Reviewed-by: twisti
1 /*
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "assembler_sparc.inline.hpp"
28 #include "code/vtableStubs.hpp"
29 #include "interp_masm_sparc.hpp"
30 #include "memory/resourceArea.hpp"
31 #include "oops/instanceKlass.hpp"
32 #include "oops/klassVtable.hpp"
33 #include "runtime/sharedRuntime.hpp"
34 #include "vmreg_sparc.inline.hpp"
35 #ifdef COMPILER2
36 #include "opto/runtime.hpp"
37 #endif
39 // machine-dependent part of VtableStubs: create vtableStub of correct size and
40 // initialize its code
42 #define __ masm->
45 #ifndef PRODUCT
46 extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
47 #endif
50 // Used by compiler only; may use only caller saved, non-argument registers
51 // NOTE: %%%% if any change is made to this stub make sure that the function
52 // pd_code_size_limit is changed to ensure the correct size for VtableStub
53 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
54 const int sparc_code_length = VtableStub::pd_code_size_limit(true);
55 VtableStub* s = new(sparc_code_length) VtableStub(true, vtable_index);
56 ResourceMark rm;
57 CodeBuffer cb(s->entry_point(), sparc_code_length);
58 MacroAssembler* masm = new MacroAssembler(&cb);
60 #ifndef PRODUCT
61 if (CountCompiledCalls) {
62 __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G5, G3_scratch);
63 }
64 #endif /* PRODUCT */
66 assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0");
68 // get receiver klass
69 address npe_addr = __ pc();
70 __ load_klass(O0, G3_scratch);
72 // set methodOop (in case of interpreted method), and destination address
73 int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
74 #ifndef PRODUCT
75 if (DebugVtables) {
76 Label L;
77 // check offset vs vtable length
78 __ ld(G3_scratch, instanceKlass::vtable_length_offset()*wordSize, G5);
79 __ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L);
80 __ set(vtable_index, O2);
81 __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2);
82 __ bind(L);
83 }
84 #endif
85 int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
86 if( __ is_simm13(v_off) ) {
87 __ ld_ptr(G3, v_off, G5_method);
88 } else {
89 __ set(v_off,G5);
90 __ ld_ptr(G3, G5, G5_method);
91 }
93 #ifndef PRODUCT
94 if (DebugVtables) {
95 Label L;
96 __ br_notnull_short(G5_method, Assembler::pt, L);
97 __ stop("Vtable entry is ZERO");
98 __ bind(L);
99 }
100 #endif
102 address ame_addr = __ pc(); // if the vtable entry is null, the method is abstract
103 // NOTE: for vtable dispatches, the vtable entry will never be null.
105 __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3_scratch);
107 // jump to target (either compiled code or c2iadapter)
108 __ JMP(G3_scratch, 0);
109 // load methodOop (in case we call c2iadapter)
110 __ delayed()->nop();
112 masm->flush();
114 if (PrintMiscellaneous && (WizardMode || Verbose)) {
115 tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
116 vtable_index, s->entry_point(),
117 (int)(s->code_end() - s->entry_point()),
118 (int)(s->code_end() - __ pc()));
119 }
120 guarantee(__ pc() <= s->code_end(), "overflowed buffer");
121 // shut the door on sizing bugs
122 int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one
123 assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
125 s->set_exception_points(npe_addr, ame_addr);
126 return s;
127 }
130 // NOTE: %%%% if any change is made to this stub make sure that the function
131 // pd_code_size_limit is changed to ensure the correct size for VtableStub
132 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
133 const int sparc_code_length = VtableStub::pd_code_size_limit(false);
134 VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index);
135 ResourceMark rm;
136 CodeBuffer cb(s->entry_point(), sparc_code_length);
137 MacroAssembler* masm = new MacroAssembler(&cb);
139 Register G3_klassOop = G3_scratch;
140 Register G5_interface = G5; // Passed in as an argument
141 Label search;
143 // Entry arguments:
144 // G5_interface: Interface
145 // O0: Receiver
146 assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0");
148 // get receiver klass (also an implicit null-check)
149 address npe_addr = __ pc();
150 __ load_klass(O0, G3_klassOop);
151 __ verify_oop(G3_klassOop);
153 // Push a new window to get some temp registers. This chops the head of all
154 // my 64-bit %o registers in the LION build, but this is OK because no longs
155 // are passed in the %o registers. Instead, longs are passed in G1 and G4
156 // and so those registers are not available here.
157 __ save(SP,-frame::register_save_words*wordSize,SP);
159 #ifndef PRODUCT
160 if (CountCompiledCalls) {
161 __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), L0, L1);
162 }
163 #endif /* PRODUCT */
165 Label throw_icce;
167 Register L5_method = L5;
168 __ lookup_interface_method(// inputs: rec. class, interface, itable index
169 G3_klassOop, G5_interface, itable_index,
170 // outputs: method, scan temp. reg
171 L5_method, L2, L3,
172 throw_icce);
174 #ifndef PRODUCT
175 if (DebugVtables) {
176 Label L01;
177 __ br_notnull_short(L5_method, Assembler::pt, L01);
178 __ stop("methodOop is null");
179 __ bind(L01);
180 __ verify_oop(L5_method);
181 }
182 #endif
184 // If the following load is through a NULL pointer, we'll take an OS
185 // exception that should translate into an AbstractMethodError. We need the
186 // window count to be correct at that time.
187 __ restore(L5_method, 0, G5_method);
188 // Restore registers *before* the AME point.
190 address ame_addr = __ pc(); // if the vtable entry is null, the method is abstract
191 __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3_scratch);
193 // G5_method: methodOop
194 // O0: Receiver
195 // G3_scratch: entry point
196 __ JMP(G3_scratch, 0);
197 __ delayed()->nop();
199 __ bind(throw_icce);
200 AddressLiteral icce(StubRoutines::throw_IncompatibleClassChangeError_entry());
201 __ jump_to(icce, G3_scratch);
202 __ delayed()->restore();
204 masm->flush();
206 if (PrintMiscellaneous && (WizardMode || Verbose)) {
207 tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
208 itable_index, s->entry_point(),
209 (int)(s->code_end() - s->entry_point()),
210 (int)(s->code_end() - __ pc()));
211 }
212 guarantee(__ pc() <= s->code_end(), "overflowed buffer");
213 // shut the door on sizing bugs
214 int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one
215 assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
217 s->set_exception_points(npe_addr, ame_addr);
218 return s;
219 }
222 int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
223 if (TraceJumps || DebugVtables || CountCompiledCalls || VerifyOops) return 1000;
224 else {
225 const int slop = 2*BytesPerInstWord; // sethi;add (needed for long offsets)
226 if (is_vtable_stub) {
227 // ld;ld;ld,jmp,nop
228 const int basic = 5*BytesPerInstWord +
229 // shift;add for load_klass (only shift with zero heap based)
230 (UseCompressedOops ?
231 ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
232 return basic + slop;
233 } else {
234 const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
235 // shift;add for load_klass (only shift with zero heap based)
236 (UseCompressedOops ?
237 ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
238 return (basic + slop);
239 }
240 }
242 // In order to tune these parameters, run the JVM with VM options
243 // +PrintMiscellaneous and +WizardMode to see information about
244 // actual itable stubs. Look for lines like this:
245 // itable #1 at 0x5551212[116] left over: 8
246 // Reduce the constants so that the "left over" number is 8
247 // Do not aim at a left-over number of zero, because a very
248 // large vtable or itable offset (> 4K) will require an extra
249 // sethi/or pair of instructions.
250 //
251 // The JVM98 app. _202_jess has a megamorphic interface call.
252 // The itable code looks like this:
253 // Decoding VtableStub itbl[1]@16
254 // ld [ %o0 + 4 ], %g3
255 // save %sp, -64, %sp
256 // ld [ %g3 + 0xe8 ], %l2
257 // sll %l2, 2, %l2
258 // add %l2, 0x134, %l2
259 // and %l2, -8, %l2 ! NOT_LP64 only
260 // add %g3, %l2, %l2
261 // add %g3, 4, %g3
262 // ld [ %l2 ], %l5
263 // brz,pn %l5, throw_icce
264 // cmp %l5, %g5
265 // be %icc, success
266 // add %l2, 8, %l2
267 // loop:
268 // ld [ %l2 ], %l5
269 // brz,pn %l5, throw_icce
270 // cmp %l5, %g5
271 // bne,pn %icc, loop
272 // add %l2, 8, %l2
273 // success:
274 // ld [ %l2 + -4 ], %l2
275 // ld [ %g3 + %l2 ], %l5
276 // restore %l5, 0, %g5
277 // ld [ %g5 + 0x44 ], %g3
278 // jmp %g3
279 // nop
280 // throw_icce:
281 // sethi %hi(throw_ICCE_entry), %g3
282 // ! 5 more instructions here, LP64_ONLY
283 // jmp %g3 + %lo(throw_ICCE_entry)
284 // restore
285 }
288 int VtableStub::pd_code_alignment() {
289 // UltraSPARC cache line size is 8 instructions:
290 const unsigned int icache_line_size = 32;
291 return icache_line_size;
292 }