src/cpu/x86/vm/assembler_x86.cpp

changeset 2118
d6f45b55c972
parent 2045
36519c19beeb
child 2148
d257356e35f0
     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Fri Aug 20 09:55:50 2010 -0700
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Aug 27 17:33:49 2010 -0700
     1.3 @@ -8767,6 +8767,186 @@
     1.4    bind(DONE);
     1.5  }
     1.6  
     1.7 +#ifdef PRODUCT
     1.8 +#define BLOCK_COMMENT(str) /* nothing */
     1.9 +#else
    1.10 +#define BLOCK_COMMENT(str) block_comment(str)
    1.11 +#endif
    1.12 +
    1.13 +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
    1.14 +void MacroAssembler::generate_fill(BasicType t, bool aligned,
    1.15 +                                   Register to, Register value, Register count,
    1.16 +                                   Register rtmp, XMMRegister xtmp) {
    1.17 +  assert_different_registers(to, value, count, rtmp);
    1.18 +  Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
    1.19 +  Label L_fill_2_bytes, L_fill_4_bytes;
    1.20 +
    1.21 +  int shift = -1;
    1.22 +  switch (t) {
    1.23 +    case T_BYTE:
    1.24 +      shift = 2;
    1.25 +      break;
    1.26 +    case T_SHORT:
    1.27 +      shift = 1;
    1.28 +      break;
    1.29 +    case T_INT:
    1.30 +      shift = 0;
    1.31 +      break;
    1.32 +    default: ShouldNotReachHere();
    1.33 +  }
    1.34 +
    1.35 +  if (t == T_BYTE) {
    1.36 +    andl(value, 0xff);
    1.37 +    movl(rtmp, value);
    1.38 +    shll(rtmp, 8);
    1.39 +    orl(value, rtmp);
    1.40 +  }
    1.41 +  if (t == T_SHORT) {
    1.42 +    andl(value, 0xffff);
    1.43 +  }
    1.44 +  if (t == T_BYTE || t == T_SHORT) {
    1.45 +    movl(rtmp, value);
    1.46 +    shll(rtmp, 16);
    1.47 +    orl(value, rtmp);
    1.48 +  }
    1.49 +
    1.50 +  cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
    1.51 +  jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
    1.52 +  if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
    1.53 +    // align source address at 4 bytes address boundary
    1.54 +    if (t == T_BYTE) {
    1.55 +      // One byte misalignment happens only for byte arrays
    1.56 +      testptr(to, 1);
    1.57 +      jccb(Assembler::zero, L_skip_align1);
    1.58 +      movb(Address(to, 0), value);
    1.59 +      increment(to);
    1.60 +      decrement(count);
    1.61 +      BIND(L_skip_align1);
    1.62 +    }
    1.63 +    // Two bytes misalignment happens only for byte and short (char) arrays
    1.64 +    testptr(to, 2);
    1.65 +    jccb(Assembler::zero, L_skip_align2);
    1.66 +    movw(Address(to, 0), value);
    1.67 +    addptr(to, 2);
    1.68 +    subl(count, 1<<(shift-1));
    1.69 +    BIND(L_skip_align2);
    1.70 +  }
    1.71 +  if (UseSSE < 2) {
    1.72 +    Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
    1.73 +    // Fill 32-byte chunks
    1.74 +    subl(count, 8 << shift);
    1.75 +    jcc(Assembler::less, L_check_fill_8_bytes);
    1.76 +    align(16);
    1.77 +
    1.78 +    BIND(L_fill_32_bytes_loop);
    1.79 +
    1.80 +    for (int i = 0; i < 32; i += 4) {
    1.81 +      movl(Address(to, i), value);
    1.82 +    }
    1.83 +
    1.84 +    addptr(to, 32);
    1.85 +    subl(count, 8 << shift);
    1.86 +    jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
    1.87 +    BIND(L_check_fill_8_bytes);
    1.88 +    addl(count, 8 << shift);
    1.89 +    jccb(Assembler::zero, L_exit);
    1.90 +    jmpb(L_fill_8_bytes);
    1.91 +
    1.92 +    //
    1.93 +    // length is too short, just fill qwords
    1.94 +    //
    1.95 +    BIND(L_fill_8_bytes_loop);
    1.96 +    movl(Address(to, 0), value);
    1.97 +    movl(Address(to, 4), value);
    1.98 +    addptr(to, 8);
    1.99 +    BIND(L_fill_8_bytes);
   1.100 +    subl(count, 1 << (shift + 1));
   1.101 +    jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
   1.102 +    // fall through to fill 4 bytes
   1.103 +  } else {
   1.104 +    Label L_fill_32_bytes;
   1.105 +    if (!UseUnalignedLoadStores) {
   1.106 +      // align to 8 bytes, we know we are 4 byte aligned to start
   1.107 +      testptr(to, 4);
   1.108 +      jccb(Assembler::zero, L_fill_32_bytes);
   1.109 +      movl(Address(to, 0), value);
   1.110 +      addptr(to, 4);
   1.111 +      subl(count, 1<<shift);
   1.112 +    }
   1.113 +    BIND(L_fill_32_bytes);
   1.114 +    {
   1.115 +      assert( UseSSE >= 2, "supported cpu only" );
   1.116 +      Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
   1.117 +      // Fill 32-byte chunks
   1.118 +      movdl(xtmp, value);
   1.119 +      pshufd(xtmp, xtmp, 0);
   1.120 +
   1.121 +      subl(count, 8 << shift);
   1.122 +      jcc(Assembler::less, L_check_fill_8_bytes);
   1.123 +      align(16);
   1.124 +
   1.125 +      BIND(L_fill_32_bytes_loop);
   1.126 +
   1.127 +      if (UseUnalignedLoadStores) {
   1.128 +        movdqu(Address(to, 0), xtmp);
   1.129 +        movdqu(Address(to, 16), xtmp);
   1.130 +      } else {
   1.131 +        movq(Address(to, 0), xtmp);
   1.132 +        movq(Address(to, 8), xtmp);
   1.133 +        movq(Address(to, 16), xtmp);
   1.134 +        movq(Address(to, 24), xtmp);
   1.135 +      }
   1.136 +
   1.137 +      addptr(to, 32);
   1.138 +      subl(count, 8 << shift);
   1.139 +      jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
   1.140 +      BIND(L_check_fill_8_bytes);
   1.141 +      addl(count, 8 << shift);
   1.142 +      jccb(Assembler::zero, L_exit);
   1.143 +      jmpb(L_fill_8_bytes);
   1.144 +
   1.145 +      //
   1.146 +      // length is too short, just fill qwords
   1.147 +      //
   1.148 +      BIND(L_fill_8_bytes_loop);
   1.149 +      movq(Address(to, 0), xtmp);
   1.150 +      addptr(to, 8);
   1.151 +      BIND(L_fill_8_bytes);
   1.152 +      subl(count, 1 << (shift + 1));
   1.153 +      jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
   1.154 +    }
   1.155 +  }
   1.156 +  // fill trailing 4 bytes
   1.157 +  BIND(L_fill_4_bytes);
   1.158 +  testl(count, 1<<shift);
   1.159 +  jccb(Assembler::zero, L_fill_2_bytes);
   1.160 +  movl(Address(to, 0), value);
   1.161 +  if (t == T_BYTE || t == T_SHORT) {
   1.162 +    addptr(to, 4);
   1.163 +    BIND(L_fill_2_bytes);
   1.164 +    // fill trailing 2 bytes
   1.165 +    testl(count, 1<<(shift-1));
   1.166 +    jccb(Assembler::zero, L_fill_byte);
   1.167 +    movw(Address(to, 0), value);
   1.168 +    if (t == T_BYTE) {
   1.169 +      addptr(to, 2);
   1.170 +      BIND(L_fill_byte);
   1.171 +      // fill trailing byte
   1.172 +      testl(count, 1);
   1.173 +      jccb(Assembler::zero, L_exit);
   1.174 +      movb(Address(to, 0), value);
   1.175 +    } else {
   1.176 +      BIND(L_fill_byte);
   1.177 +    }
   1.178 +  } else {
   1.179 +    BIND(L_fill_2_bytes);
   1.180 +  }
   1.181 +  BIND(L_exit);
   1.182 +}
   1.183 +#undef BIND
   1.184 +#undef BLOCK_COMMENT
   1.185 +
   1.186 +
   1.187  Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   1.188    switch (cond) {
   1.189      // Note some conditions are synonyms for others

mercurial