1.1 --- a/src/cpu/sparc/vm/sparc.ad Thu Aug 25 20:29:30 2011 -0700 1.2 +++ b/src/cpu/sparc/vm/sparc.ad Fri Aug 26 08:52:22 2011 -0700 1.3 @@ -460,6 +460,8 @@ 1.4 // Must be visible to the DFA in dfa_sparc.cpp 1.5 extern bool can_branch_register( Node *bol, Node *cmp ); 1.6 1.7 +extern bool use_block_zeroing(Node* count); 1.8 + 1.9 // Macros to extract hi & lo halves from a long pair. 1.10 // G0 is not part of any long pair, so assert on that. 1.11 // Prevents accidentally using G1 instead of G0. 1.12 @@ -521,6 +523,12 @@ 1.13 return false; 1.14 } 1.15 1.16 +bool use_block_zeroing(Node* count) { 1.17 + // Use BIS for zeroing if count is not constant 1.18 + // or it is >= BlockZeroingLowLimit. 1.19 + return UseBlockZeroing && (count->find_intptr_t_con(BlockZeroingLowLimit) >= BlockZeroingLowLimit); 1.20 +} 1.21 + 1.22 // **************************************************************************** 1.23 1.24 // REQUIRED FUNCTIONALITY 1.25 @@ -2810,25 +2818,6 @@ 1.26 __ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst); 1.27 %} 1.28 1.29 - // Compiler ensures base is doubleword aligned and cnt is count of doublewords 1.30 - enc_class enc_Clear_Array(iRegX cnt, iRegP base, iRegX temp) %{ 1.31 - MacroAssembler _masm(&cbuf); 1.32 - Register nof_bytes_arg = reg_to_register_object($cnt$$reg); 1.33 - Register nof_bytes_tmp = reg_to_register_object($temp$$reg); 1.34 - Register base_pointer_arg = reg_to_register_object($base$$reg); 1.35 - 1.36 - Label loop; 1.37 - __ mov(nof_bytes_arg, nof_bytes_tmp); 1.38 - 1.39 - // Loop and clear, walking backwards through the array. 1.40 - // nof_bytes_tmp (if >0) is always the number of bytes to zero 1.41 - __ bind(loop); 1.42 - __ deccc(nof_bytes_tmp, 8); 1.43 - __ br(Assembler::greaterEqual, true, Assembler::pt, loop); 1.44 - __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp); 1.45 - // %%%% this mini-loop must not cross a cache boundary! 1.46 - %} 1.47 - 1.48 1.49 enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{ 1.50 Label Ldone, Lloop; 1.51 @@ -10257,9 +10246,9 @@ 1.52 ins_pipe(long_memory_op); 1.53 %} 1.54 1.55 -// Count and Base registers are fixed because the allocator cannot 1.56 -// kill unknown registers. The encodings are generic. 1.57 +// The encodings are generic. 1.58 instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{ 1.59 + predicate(!use_block_zeroing(n->in(2)) ); 1.60 match(Set dummy (ClearArray cnt base)); 1.61 effect(TEMP temp, KILL ccr); 1.62 ins_cost(300); 1.63 @@ -10267,7 +10256,71 @@ 1.64 "loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n" 1.65 " BRge loop\t\t! Clearing loop\n" 1.66 " STX G0,[$base+$temp]\t! delay slot" %} 1.67 - ins_encode( enc_Clear_Array(cnt, base, temp) ); 1.68 + 1.69 + ins_encode %{ 1.70 + // Compiler ensures base is doubleword aligned and cnt is count of doublewords 1.71 + Register nof_bytes_arg = $cnt$$Register; 1.72 + Register nof_bytes_tmp = $temp$$Register; 1.73 + Register base_pointer_arg = $base$$Register; 1.74 + 1.75 + Label loop; 1.76 + __ mov(nof_bytes_arg, nof_bytes_tmp); 1.77 + 1.78 + // Loop and clear, walking backwards through the array. 1.79 + // nof_bytes_tmp (if >0) is always the number of bytes to zero 1.80 + __ bind(loop); 1.81 + __ deccc(nof_bytes_tmp, 8); 1.82 + __ br(Assembler::greaterEqual, true, Assembler::pt, loop); 1.83 + __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp); 1.84 + // %%%% this mini-loop must not cross a cache boundary! 1.85 + %} 1.86 + ins_pipe(long_memory_op); 1.87 +%} 1.88 + 1.89 +instruct clear_array_bis(g1RegX cnt, o0RegP base, Universe dummy, flagsReg ccr) %{ 1.90 + predicate(use_block_zeroing(n->in(2))); 1.91 + match(Set dummy (ClearArray cnt base)); 1.92 + effect(USE_KILL cnt, USE_KILL base, KILL ccr); 1.93 + ins_cost(300); 1.94 + format %{ "CLEAR [$base, $cnt]\t! ClearArray" %} 1.95 + 1.96 + ins_encode %{ 1.97 + 1.98 + assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation"); 1.99 + Register to = $base$$Register; 1.100 + Register count = $cnt$$Register; 1.101 + 1.102 + Label Ldone; 1.103 + __ nop(); // Separate short branches 1.104 + // Use BIS for zeroing (temp is not used). 1.105 + __ bis_zeroing(to, count, G0, Ldone); 1.106 + __ bind(Ldone); 1.107 + 1.108 + %} 1.109 + ins_pipe(long_memory_op); 1.110 +%} 1.111 + 1.112 +instruct clear_array_bis_2(g1RegX cnt, o0RegP base, iRegX tmp, Universe dummy, flagsReg ccr) %{ 1.113 + predicate(use_block_zeroing(n->in(2)) && !Assembler::is_simm13((int)BlockZeroingLowLimit)); 1.114 + match(Set dummy (ClearArray cnt base)); 1.115 + effect(TEMP tmp, USE_KILL cnt, USE_KILL base, KILL ccr); 1.116 + ins_cost(300); 1.117 + format %{ "CLEAR [$base, $cnt]\t! ClearArray" %} 1.118 + 1.119 + ins_encode %{ 1.120 + 1.121 + assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation"); 1.122 + Register to = $base$$Register; 1.123 + Register count = $cnt$$Register; 1.124 + Register temp = $tmp$$Register; 1.125 + 1.126 + Label Ldone; 1.127 + __ nop(); // Separate short branches 1.128 + // Use BIS for zeroing 1.129 + __ bis_zeroing(to, count, temp, Ldone); 1.130 + __ bind(Ldone); 1.131 + 1.132 + %} 1.133 ins_pipe(long_memory_op); 1.134 %} 1.135