1.1 --- a/src/cpu/ppc/vm/assembler_ppc.hpp Tue Dec 10 14:29:43 2013 +0100 1.2 +++ b/src/cpu/ppc/vm/assembler_ppc.hpp Wed Dec 11 00:06:11 2013 +0100 1.3 @@ -98,7 +98,17 @@ 1.4 // Only 8 registers may contain integer parameters. 1.5 n_register_parameters = 8, 1.6 // Can have up to 8 floating registers. 1.7 - n_float_register_parameters = 8 1.8 + n_float_register_parameters = 8, 1.9 + 1.10 + // PPC C calling conventions. 1.11 + // The first eight arguments are passed in int regs if they are int. 1.12 + n_int_register_parameters_c = 8, 1.13 + // The first thirteen float arguments are passed in float regs. 1.14 + n_float_register_parameters_c = 13, 1.15 + // Only the first 8 parameters are not placed on the stack. Aix disassembly 1.16 + // shows that xlC places all float args after argument 8 on the stack AND 1.17 + // in a register. This is not documented, but we follow this convention, too. 1.18 + n_regs_not_on_stack_c = 8, 1.19 }; 1.20 // creation 1.21 Argument(int number) : _number(number) {} 1.22 @@ -662,6 +672,14 @@ 1.23 bcondCRbiIs1_bhintIsTaken = bcondCRbiIs1 | bhintatIsTaken, 1.24 }; 1.25 1.26 + // Elemental Memory Barriers (>=Power 8) 1.27 + enum Elemental_Membar_mask_bits { 1.28 + StoreStore = 1 << 0, 1.29 + StoreLoad = 1 << 1, 1.30 + LoadStore = 1 << 2, 1.31 + LoadLoad = 1 << 3 1.32 + }; 1.33 + 1.34 // Branch prediction hints. 1.35 inline static int add_bhint_to_boint(const int bhint, const int boint) { 1.36 switch (boint) { 1.37 @@ -753,17 +771,6 @@ 1.38 1.39 enum Predict { pt = 1, pn = 0 }; // pt = predict taken 1.40 1.41 - enum Membar_mask_bits { // page 184, v9 1.42 - StoreStore = 1 << 3, 1.43 - LoadStore = 1 << 2, 1.44 - StoreLoad = 1 << 1, 1.45 - LoadLoad = 1 << 0, 1.46 - 1.47 - Sync = 1 << 6, 1.48 - MemIssue = 1 << 5, 1.49 - Lookaside = 1 << 4 1.50 - }; 1.51 - 1.52 // instruction must start at passed address 1.53 static int instr_len(unsigned char *instr) { return BytesPerInstWord; } 1.54 1.55 @@ -875,19 +882,20 @@ 1.56 #define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit)) 1.57 // Extract instruction fields from instruction words. 1.58 public: 1.59 - static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); } 1.60 - static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); } 1.61 - static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.62 - static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.63 + static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); } 1.64 + static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); } 1.65 + static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.66 + static int inv_rta_field(int x) { return inv_opp_u_field(x, 15, 11); } 1.67 + static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.68 // Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0. 1.69 // Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0. 1.70 - static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; } 1.71 - static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); } 1.72 - static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); } 1.73 - static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.74 - static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); } 1.75 - static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.76 - static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); } 1.77 + static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; } 1.78 + static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); } 1.79 + static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); } 1.80 + static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.81 + static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); } 1.82 + static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); } 1.83 + static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); } 1.84 1.85 #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit)) 1.86 #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit)) 1.87 @@ -925,6 +933,7 @@ 1.88 static int l10( int x) { return opp_u_field(x, 10, 10); } 1.89 static int l15( int x) { return opp_u_field(x, 15, 15); } 1.90 static int l910( int x) { return opp_u_field(x, 10, 9); } 1.91 + static int e1215( int x) { return opp_u_field(x, 15, 12); } 1.92 static int lev( int x) { return opp_u_field(x, 26, 20); } 1.93 static int li( int x) { return opp_s_field(x, 29, 6); } 1.94 static int lk( int x) { return opp_u_field(x, 31, 31); } 1.95 @@ -960,13 +969,13 @@ 1.96 static int sr( int x) { return opp_u_field(x, 15, 12); } 1.97 static int tbr( int x) { return opp_u_field(x, 20, 11); } 1.98 static int th( int x) { return opp_u_field(x, 10, 7); } 1.99 - static int thct( int x) { assert((x&8)==0, "must be valid cache specification"); return th(x); } 1.100 - static int thds( int x) { assert((x&8)==8, "must be valid stream specification"); return th(x); } 1.101 + static int thct( int x) { assert((x&8) == 0, "must be valid cache specification"); return th(x); } 1.102 + static int thds( int x) { assert((x&8) == 8, "must be valid stream specification"); return th(x); } 1.103 static int to( int x) { return opp_u_field(x, 10, 6); } 1.104 static int u( int x) { return opp_u_field(x, 19, 16); } 1.105 static int ui( int x) { return opp_u_field(x, 31, 16); } 1.106 1.107 - // support vector instructions for >= Power6 1.108 + // Support vector instructions for >= Power6. 1.109 static int vra( int x) { return opp_u_field(x, 15, 11); } 1.110 static int vrb( int x) { return opp_u_field(x, 20, 16); } 1.111 static int vrc( int x) { return opp_u_field(x, 25, 21); } 1.112 @@ -1090,8 +1099,8 @@ 1.113 inline void subfic( Register d, Register a, int si16); 1.114 inline void add( Register d, Register a, Register b); 1.115 inline void add_( Register d, Register a, Register b); 1.116 - inline void subf( Register d, Register a, Register b); 1.117 - inline void sub( Register d, Register a, Register b); 1.118 + inline void subf( Register d, Register a, Register b); // d = b - a "Sub_from", as in ppc spec. 1.119 + inline void sub( Register d, Register a, Register b); // d = a - b Swap operands of subf for readability. 1.120 inline void subf_( Register d, Register a, Register b); 1.121 inline void addc( Register d, Register a, Register b); 1.122 inline void addc_( Register d, Register a, Register b); 1.123 @@ -1204,7 +1213,7 @@ 1.124 } 1.125 // endgroup opcode for Power6 1.126 static bool is_endgroup(int x) { 1.127 - return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0; 1.128 + return is_ori(x) && inv_ra_field(x) == 1 && inv_rs_field(x) == 1 && inv_d1_field(x) == 0; 1.129 } 1.130 1.131 1.132 @@ -1227,9 +1236,13 @@ 1.133 inline void cmpld( ConditionRegister crx, Register a, Register b); 1.134 1.135 inline void isel( Register d, Register a, Register b, int bc); 1.136 + // Convenient version which takes: Condition register, Condition code and invert flag. Omit b to keep old value. 1.137 + inline void isel( Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b = noreg); 1.138 + // Set d = 0 if (cr.cc) equals 1, otherwise b. 1.139 + inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg); 1.140 1.141 // PPC 1, section 3.3.11, Fixed-Point Logical Instructions 1.142 - void andi( Register a, Register s, int ui16); // optimized version 1.143 + void andi( Register a, Register s, int ui16); // optimized version 1.144 inline void andi_( Register a, Register s, int ui16); 1.145 inline void andis_( Register a, Register s, int ui16); 1.146 inline void ori( Register a, Register s, int ui16); 1.147 @@ -1553,10 +1566,7 @@ 1.148 inline void ptesync(); 1.149 inline void eieio(); 1.150 inline void isync(); 1.151 - 1.152 - inline void release(); 1.153 - inline void acquire(); 1.154 - inline void fence(); 1.155 + inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8) 1.156 1.157 // atomics 1.158 inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); 1.159 @@ -1938,7 +1948,7 @@ 1.160 inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg); 1.161 1.162 // Load a 64 bit constant, optimized, not identifyable. 1.163 - // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a 1.164 + // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a 1.165 // 16 bit immediate offset. This is useful if the offset can be encoded in 1.166 // a succeeding instruction. 1.167 int load_const_optimized(Register d, long a, Register tmp = noreg, bool return_simm16_rest = false);