src/cpu/ppc/vm/assembler_ppc.hpp

changeset 6495
67fa91961822
parent 6458
ec28f9c041ff
child 6511
31e80afe3fed
     1.1 --- a/src/cpu/ppc/vm/assembler_ppc.hpp	Tue Dec 10 14:29:43 2013 +0100
     1.2 +++ b/src/cpu/ppc/vm/assembler_ppc.hpp	Wed Dec 11 00:06:11 2013 +0100
     1.3 @@ -98,7 +98,17 @@
     1.4      // Only 8 registers may contain integer parameters.
     1.5      n_register_parameters = 8,
     1.6      // Can have up to 8 floating registers.
     1.7 -    n_float_register_parameters = 8
     1.8 +    n_float_register_parameters = 8,
     1.9 +
    1.10 +    // PPC C calling conventions.
    1.11 +    // The first eight arguments are passed in int regs if they are int.
    1.12 +    n_int_register_parameters_c = 8,
    1.13 +    // The first thirteen float arguments are passed in float regs.
    1.14 +    n_float_register_parameters_c = 13,
    1.15 +    // Only the first 8 parameters are not placed on the stack. Aix disassembly
    1.16 +    // shows that xlC places all float args after argument 8 on the stack AND
    1.17 +    // in a register. This is not documented, but we follow this convention, too.
    1.18 +    n_regs_not_on_stack_c = 8,
    1.19    };
    1.20    // creation
    1.21    Argument(int number) : _number(number) {}
    1.22 @@ -662,6 +672,14 @@
    1.23      bcondCRbiIs1_bhintIsTaken    = bcondCRbiIs1 | bhintatIsTaken,
    1.24    };
    1.25  
    1.26 +  // Elemental Memory Barriers (>=Power 8)
    1.27 +  enum Elemental_Membar_mask_bits {
    1.28 +    StoreStore = 1 << 0,
    1.29 +    StoreLoad  = 1 << 1,
    1.30 +    LoadStore  = 1 << 2,
    1.31 +    LoadLoad   = 1 << 3
    1.32 +  };
    1.33 +
    1.34    // Branch prediction hints.
    1.35    inline static int add_bhint_to_boint(const int bhint, const int boint) {
    1.36      switch (boint) {
    1.37 @@ -753,17 +771,6 @@
    1.38  
    1.39    enum Predict { pt = 1, pn = 0 }; // pt = predict taken
    1.40  
    1.41 -  enum Membar_mask_bits { // page 184, v9
    1.42 -    StoreStore = 1 << 3,
    1.43 -    LoadStore  = 1 << 2,
    1.44 -    StoreLoad  = 1 << 1,
    1.45 -    LoadLoad   = 1 << 0,
    1.46 -
    1.47 -    Sync       = 1 << 6,
    1.48 -    MemIssue   = 1 << 5,
    1.49 -    Lookaside  = 1 << 4
    1.50 -  };
    1.51 -
    1.52    // instruction must start at passed address
    1.53    static int instr_len(unsigned char *instr) { return BytesPerInstWord; }
    1.54  
    1.55 @@ -875,19 +882,20 @@
    1.56    #define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit))
    1.57    // Extract instruction fields from instruction words.
    1.58   public:
    1.59 -  static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
    1.60 -  static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
    1.61 -  static int inv_rt_field(int x) { return inv_opp_u_field(x, 10,  6); }
    1.62 -  static int inv_rs_field(int x) { return inv_opp_u_field(x, 10,  6); }
    1.63 +  static int inv_ra_field(int x)  { return inv_opp_u_field(x, 15, 11); }
    1.64 +  static int inv_rb_field(int x)  { return inv_opp_u_field(x, 20, 16); }
    1.65 +  static int inv_rt_field(int x)  { return inv_opp_u_field(x, 10,  6); }
    1.66 +  static int inv_rta_field(int x) { return inv_opp_u_field(x, 15, 11); }
    1.67 +  static int inv_rs_field(int x)  { return inv_opp_u_field(x, 10,  6); }
    1.68    // Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0.
    1.69    // Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0.
    1.70 -  static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
    1.71 -  static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
    1.72 -  static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
    1.73 -  static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6);  }
    1.74 -  static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
    1.75 -  static int inv_bo_field(int x) { return inv_opp_u_field(x, 10,  6); }
    1.76 -  static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
    1.77 +  static int inv_ds_field(int x)  { return inv_opp_s_field(x, 29, 16) << 2; }
    1.78 +  static int inv_d1_field(int x)  { return inv_opp_s_field(x, 31, 16); }
    1.79 +  static int inv_si_field(int x)  { return inv_opp_s_field(x, 31, 16); }
    1.80 +  static int inv_to_field(int x)  { return inv_opp_u_field(x, 10, 6);  }
    1.81 +  static int inv_lk_field(int x)  { return inv_opp_u_field(x, 31, 31); }
    1.82 +  static int inv_bo_field(int x)  { return inv_opp_u_field(x, 10,  6); }
    1.83 +  static int inv_bi_field(int x)  { return inv_opp_u_field(x, 15, 11); }
    1.84  
    1.85    #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
    1.86    #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))
    1.87 @@ -925,6 +933,7 @@
    1.88    static int l10(      int         x)  { return  opp_u_field(x,             10, 10); }
    1.89    static int l15(      int         x)  { return  opp_u_field(x,             15, 15); }
    1.90    static int l910(     int         x)  { return  opp_u_field(x,             10,  9); }
    1.91 +  static int e1215(    int         x)  { return  opp_u_field(x,             15, 12); }
    1.92    static int lev(      int         x)  { return  opp_u_field(x,             26, 20); }
    1.93    static int li(       int         x)  { return  opp_s_field(x,             29,  6); }
    1.94    static int lk(       int         x)  { return  opp_u_field(x,             31, 31); }
    1.95 @@ -960,13 +969,13 @@
    1.96    static int sr(       int         x)  { return  opp_u_field(x,             15, 12); }
    1.97    static int tbr(      int         x)  { return  opp_u_field(x,             20, 11); }
    1.98    static int th(       int         x)  { return  opp_u_field(x,             10,  7); }
    1.99 -  static int thct(     int         x)  { assert((x&8)==0, "must be valid cache specification");  return th(x); }
   1.100 -  static int thds(     int         x)  { assert((x&8)==8, "must be valid stream specification"); return th(x); }
   1.101 +  static int thct(     int         x)  { assert((x&8) == 0, "must be valid cache specification");  return th(x); }
   1.102 +  static int thds(     int         x)  { assert((x&8) == 8, "must be valid stream specification"); return th(x); }
   1.103    static int to(       int         x)  { return  opp_u_field(x,             10,  6); }
   1.104    static int u(        int         x)  { return  opp_u_field(x,             19, 16); }
   1.105    static int ui(       int         x)  { return  opp_u_field(x,             31, 16); }
   1.106  
   1.107 -  // support vector instructions for >= Power6
   1.108 +  // Support vector instructions for >= Power6.
   1.109    static int vra(      int         x)  { return  opp_u_field(x,             15, 11); }
   1.110    static int vrb(      int         x)  { return  opp_u_field(x,             20, 16); }
   1.111    static int vrc(      int         x)  { return  opp_u_field(x,             25, 21); }
   1.112 @@ -1090,8 +1099,8 @@
   1.113    inline void subfic( Register d, Register a, int si16);
   1.114    inline void add(    Register d, Register a, Register b);
   1.115    inline void add_(   Register d, Register a, Register b);
   1.116 -  inline void subf(   Register d, Register a, Register b);
   1.117 -  inline void sub(    Register d, Register a, Register b);
   1.118 +  inline void subf(   Register d, Register a, Register b);  // d = b - a    "Sub_from", as in ppc spec.
   1.119 +  inline void sub(    Register d, Register a, Register b);  // d = a - b    Swap operands of subf for readability.
   1.120    inline void subf_(  Register d, Register a, Register b);
   1.121    inline void addc(   Register d, Register a, Register b);
   1.122    inline void addc_(  Register d, Register a, Register b);
   1.123 @@ -1204,7 +1213,7 @@
   1.124    }
   1.125    // endgroup opcode for Power6
   1.126    static bool is_endgroup(int x) {
   1.127 -    return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0;
   1.128 +    return is_ori(x) && inv_ra_field(x) == 1 && inv_rs_field(x) == 1 && inv_d1_field(x) == 0;
   1.129    }
   1.130  
   1.131  
   1.132 @@ -1227,9 +1236,13 @@
   1.133    inline void cmpld( ConditionRegister crx, Register a, Register b);
   1.134  
   1.135    inline void isel(   Register d, Register a, Register b, int bc);
   1.136 +  // Convenient version which takes: Condition register, Condition code and invert flag. Omit b to keep old value.
   1.137 +  inline void isel(   Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b = noreg);
   1.138 +  // Set d = 0 if (cr.cc) equals 1, otherwise b.
   1.139 +  inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
   1.140  
   1.141    // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
   1.142 -         void andi(   Register a, Register s, int ui16);    // optimized version
   1.143 +         void andi(   Register a, Register s, int ui16);   // optimized version
   1.144    inline void andi_(  Register a, Register s, int ui16);
   1.145    inline void andis_( Register a, Register s, int ui16);
   1.146    inline void ori(    Register a, Register s, int ui16);
   1.147 @@ -1553,10 +1566,7 @@
   1.148    inline void ptesync();
   1.149    inline void eieio();
   1.150    inline void isync();
   1.151 -
   1.152 -  inline void release();
   1.153 -  inline void acquire();
   1.154 -  inline void fence();
   1.155 +  inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
   1.156  
   1.157    // atomics
   1.158    inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   1.159 @@ -1938,7 +1948,7 @@
   1.160    inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
   1.161  
   1.162    // Load a 64 bit constant, optimized, not identifyable.
   1.163 -  // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
   1.164 +  // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a
   1.165    // 16 bit immediate offset. This is useful if the offset can be encoded in
   1.166    // a succeeding instruction.
   1.167           int load_const_optimized(Register d, long a,  Register tmp = noreg, bool return_simm16_rest = false);

mercurial