[C2] Performance of scimark.monte_carlo is about 5% up.

Thu, 30 Mar 2017 08:45:59 -0400

author
fujie
date
Thu, 30 Mar 2017 08:45:59 -0400
changeset 390
d3aefa77da6c
parent 389
76857a2c3534
child 391
910b77f150c4

[C2] Performance of scimark.monte_carlo is about 5% up.

src/cpu/mips/vm/icache_mips.cpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/mips_64.ad file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/icache_mips.cpp	Tue Mar 28 16:09:10 2017 -0400
     1.2 +++ b/src/cpu/mips/vm/icache_mips.cpp	Thu Mar 30 08:45:59 2017 -0400
     1.3 @@ -40,18 +40,34 @@
     1.4  void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {};
     1.5  
     1.6  void ICache::call_flush_stub(address start, int lines) {
     1.7 -  cacheflush(start, lines * line_size , ICACHE);
     1.8 +	//in fact, the current os implementation simply flush all ICACHE&DCACHE
     1.9 +#ifndef CACHE_OPT
    1.10 +	/* Loongson3A supports automatic synchronization between Icache and Dcache.
    1.11 +         * No manual synchronization is needed. */
    1.12 +	cacheflush(start, lines * line_size , ICACHE);
    1.13 +#endif
    1.14 +//	sysmips(3, 0, 0, 0);
    1.15  }
    1.16  
    1.17  void ICache::invalidate_word(address addr) {
    1.18 -  cacheflush(addr,4, ICACHE);
    1.19 +	//cacheflush(addr, 4, ICACHE);
    1.20 +
    1.21 +#ifndef CACHE_OPT
    1.22 +	cacheflush(addr,4, ICACHE);
    1.23 +#endif
    1.24 +//	sysmips(3, 0, 0, 0);
    1.25  }
    1.26  
    1.27  void ICache::invalidate_range(address start, int nbytes) {
    1.28 -  cacheflush(start, nbytes, ICACHE);
    1.29 +#ifndef CACHE_OPT
    1.30 +	cacheflush(start, nbytes, ICACHE);
    1.31 +#endif
    1.32 +//	sysmips(3, 0, 0, 0);
    1.33  }
    1.34  
    1.35  void ICache::invalidate_all() {
    1.36 -  sysmips(3, 0, 0, 0);
    1.37 +#ifndef CACHE_OPT
    1.38 +	sysmips(3, 0, 0, 0);
    1.39 +#endif
    1.40  }
    1.41  
     2.1 --- a/src/cpu/mips/vm/mips_64.ad	Tue Mar 28 16:09:10 2017 -0400
     2.2 +++ b/src/cpu/mips/vm/mips_64.ad	Thu Mar 30 08:45:59 2017 -0400
     2.3 @@ -1704,15 +1704,55 @@
     2.4       int  disp = $mem$$disp;
     2.5  
     2.6       if( index != 0 ) {
     2.7 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
     2.8 -        __ gslbx(as_Register(dst), as_Register(base), as_Register(index), disp);
     2.9 +        if( Assembler::is_simm16(disp) ) { 
    2.10 +           if( UseLoongsonISA ) {
    2.11 +              if (scale == 0) {
    2.12 +                 __ gslbx(as_Register(dst), as_Register(base), as_Register(index), disp);
    2.13 +              } else {
    2.14 +                 __ dsll(AT, as_Register(index), scale);
    2.15 +                 __ gslbx(as_Register(dst), as_Register(base), AT, disp);
    2.16 +              }
    2.17 +           } else {
    2.18 +              if (scale == 0) {
    2.19 +                 __ addu(AT, as_Register(base), as_Register(index));
    2.20 +              } else {
    2.21 +                 __ dsll(AT, as_Register(index), scale);
    2.22 +                 __ addu(AT, as_Register(base), AT);
    2.23 +              }
    2.24 +              __ lb(as_Register(dst), AT, disp);
    2.25 +           }
    2.26 +        } else {
    2.27 +           if (scale == 0) {
    2.28 +              __ addu(AT, as_Register(base), as_Register(index));
    2.29 +           } else {
    2.30 +              __ dsll(AT, as_Register(index), scale);
    2.31 +              __ addu(AT, as_Register(base), AT);
    2.32 +           }
    2.33 +           __ move(T9, disp);
    2.34 +           if( UseLoongsonISA ) {
    2.35 +              __ gslbx(as_Register(dst), AT, T9, 0);
    2.36 +           } else {
    2.37 +              __ addu(AT, AT, T9); 
    2.38 +              __ lb(as_Register(dst), AT, 0);
    2.39 +           }
    2.40 +        }    
    2.41       } else {
    2.42 -        __ lb(as_Register(dst), as_Register(base), disp);
    2.43 +        if( Assembler::is_simm16(disp) ) { 
    2.44 +           __ lb(as_Register(dst), as_Register(base), disp);
    2.45 +        } else {
    2.46 +           __ move(T9, disp);   
    2.47 +           if( UseLoongsonISA ) {
    2.48 +              __ gslbx(as_Register(dst), as_Register(base), T9, 0);
    2.49 +           } else {
    2.50 +              __ addu(AT, as_Register(base), T9); 
    2.51 +              __ lb(as_Register(dst), AT, 0);
    2.52 +           }
    2.53 +        }    
    2.54       }
    2.55    %}
    2.56  
    2.57    //Load byte unsigned
    2.58 -  enc_class load_UB_enc (mRegI dst, umemory mem) %{
    2.59 +  enc_class load_UB_enc (mRegI dst, memory mem) %{
    2.60       MacroAssembler _masm(&cbuf);
    2.61       int  dst = $dst$$reg;
    2.62       int  base = $mem$$base;
    2.63 @@ -1720,8 +1760,29 @@
    2.64       int  scale = $mem$$scale;
    2.65       int  disp = $mem$$disp;
    2.66  
    2.67 -     assert(index == 0, "no index");
    2.68 -     __ lbu(as_Register(dst), as_Register(base), disp);
    2.69 +     if( index != 0 ) {
    2.70 +        if (scale == 0) {
    2.71 +           __ daddu(AT, as_Register(base), as_Register(index));
    2.72 +        } else {
    2.73 +           __ dsll(AT, as_Register(index), scale);
    2.74 +           __ daddu(AT, as_Register(base), AT);
    2.75 +        }
    2.76 +        if( Assembler::is_simm16(disp) ) { 
    2.77 +           __ lbu(as_Register(dst), AT, disp);
    2.78 +        } else {
    2.79 +           __ move(T9, disp);
    2.80 +           __ daddu(AT, AT, T9); 
    2.81 +           __ lbu(as_Register(dst), AT, 0);
    2.82 +        }    
    2.83 +     } else {
    2.84 +        if( Assembler::is_simm16(disp) ) { 
    2.85 +           __ lbu(as_Register(dst), as_Register(base), disp);
    2.86 +        } else {
    2.87 +           __ move(T9, disp);   
    2.88 +           __ daddu(AT, as_Register(base), T9); 
    2.89 +           __ lbu(as_Register(dst), AT, 0);
    2.90 +        }    
    2.91 +     }
    2.92    %}
    2.93  
    2.94    enc_class store_B_reg_enc (memory mem, mRegI src) %{
    2.95 @@ -1733,58 +1794,334 @@
    2.96       int  disp = $mem$$disp;
    2.97  
    2.98       if( index != 0 ) {
    2.99 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.100 -        __ gssbx(as_Register(src), as_Register(base), as_Register(index), disp);
   2.101 +        if (scale == 0) {
   2.102 +           if( Assembler::is_simm(disp, 8) ) { 
   2.103 +              if (UseLoongsonISA) {
   2.104 +                 __ gssbx(as_Register(src), as_Register(base), as_Register(index), disp);
   2.105 +              } else {
   2.106 +                 __ addu(AT, as_Register(base), as_Register(index));
   2.107 +                 __ sb(as_Register(src), AT, disp);
   2.108 +              }
   2.109 +           } else if( Assembler::is_simm16(disp) ) { 
   2.110 +              __ addu(AT, as_Register(base), as_Register(index));
   2.111 +              __ sb(as_Register(src), AT, disp);
   2.112 +           } else {
   2.113 +              __ addu(AT, as_Register(base), as_Register(index));
   2.114 +              __ move(T9, disp);
   2.115 +              if (UseLoongsonISA) {
   2.116 +                 __ gssbx(as_Register(src), AT, T9, 0);
   2.117 +              } else {
   2.118 +                 __ addu(AT, AT, T9); 
   2.119 +                 __ sb(as_Register(src), AT, 0);
   2.120 +              }
   2.121 +           } 
   2.122 +        } else {
   2.123 +           __ dsll(AT, as_Register(index), scale);
   2.124 +           if( Assembler::is_simm(disp, 8) ) { 
   2.125 +              if (UseLoongsonISA) {
   2.126 +                 __ gssbx(as_Register(src), AT, as_Register(base), disp);
   2.127 +              } else {
   2.128 +                 __ addu(AT, as_Register(base), AT);
   2.129 +                 __ sb(as_Register(src), AT, disp);
   2.130 +              }
   2.131 +           } else if( Assembler::is_simm16(disp) ) { 
   2.132 +              __ addu(AT, as_Register(base), AT);
   2.133 +              __ sb(as_Register(src), AT, disp);
   2.134 +           } else {
   2.135 +              __ addu(AT, as_Register(base), AT);
   2.136 +              __ move(T9, disp);
   2.137 +              if (UseLoongsonISA) {
   2.138 +                 __ gssbx(as_Register(src), AT, T9, 0);
   2.139 +              } else {
   2.140 +                 __ addu(AT, AT, T9); 
   2.141 +                 __ sb(as_Register(src), AT, 0);
   2.142 +              }
   2.143 +           }    
   2.144 +        }
   2.145       } else {
   2.146 -        __ sb(as_Register(src), as_Register(base), disp);
   2.147 +        if( Assembler::is_simm16(disp) ) { 
   2.148 +           __ sb(as_Register(src), as_Register(base), disp);
   2.149 +        } else {
   2.150 +           __ move(T9, disp);   
   2.151 +           if (UseLoongsonISA) {
   2.152 +              __ gssbx(as_Register(src), as_Register(base), T9, 0);
   2.153 +           } else {
   2.154 +              __ addu(AT, as_Register(base), T9); 
   2.155 +              __ sb(as_Register(src), AT, 0);
   2.156 +           }
   2.157 +        }    
   2.158       }
   2.159    %}
   2.160  
   2.161 -  enc_class store_B0_enc (memory mem) %{
   2.162 +  enc_class store_B_immI_enc (memory mem, immI8 src) %{
   2.163       MacroAssembler _masm(&cbuf);
   2.164       int  base = $mem$$base;
   2.165       int  index = $mem$$index;
   2.166       int  scale = $mem$$scale;
   2.167       int  disp = $mem$$disp;
   2.168 +     int value = $src$$constant;
   2.169  
   2.170       if( index != 0 ) {
   2.171 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.172 -        __ gssbx(R0, as_Register(base), as_Register(index), disp);
   2.173 +        if (!UseLoongsonISA) {
   2.174 +           if (scale == 0) {
   2.175 +              __ daddu(AT, as_Register(base), as_Register(index));
   2.176 +           } else {
   2.177 +              __ dsll(AT, as_Register(index), scale);
   2.178 +              __ daddu(AT, as_Register(base), AT);
   2.179 +           }
   2.180 +           if( Assembler::is_simm16(disp) ) { 
   2.181 +              if (value == 0) {
   2.182 +                 __ sb(R0, AT, disp);
   2.183 +              } else {
   2.184 +                 __ move(T9, value);
   2.185 +                 __ sb(T9, AT, disp);
   2.186 +              }
   2.187 +           } else {
   2.188 +              if (value == 0) {
   2.189 +                 __ move(T9, disp);
   2.190 +                 __ daddu(AT, AT, T9); 
   2.191 +                 __ sb(R0, AT, 0);
   2.192 +              } else {
   2.193 +                 __ move(T9, disp);
   2.194 +                 __ daddu(AT, AT, T9); 
   2.195 +                 __ move(T9, value);
   2.196 +                 __ sb(T9, AT, 0);
   2.197 +              }
   2.198 +           }    
   2.199 +        } else {
   2.200 +
   2.201 +           if (scale == 0) {
   2.202 +              if( Assembler::is_simm(disp, 8) ) { 
   2.203 +                 if (value == 0) {
   2.204 +                    __ gssbx(R0, as_Register(base), as_Register(index), disp);
   2.205 +                 } else {
   2.206 +                    __ move(T9, value);
   2.207 +                    __ gssbx(T9, as_Register(base), as_Register(index), disp);
   2.208 +                 }
   2.209 +              } else if( Assembler::is_simm16(disp) ) { 
   2.210 +                 __ daddu(AT, as_Register(base), as_Register(index));
   2.211 +                 if (value == 0) {
   2.212 +                    __ sb(R0, AT, disp);
   2.213 +                 } else {
   2.214 +                    __ move(T9, value);
   2.215 +                    __ sb(T9, AT, disp);
   2.216 +                 }
   2.217 +              } else {
   2.218 +                 if (value == 0) {
   2.219 +                    __ daddu(AT, as_Register(base), as_Register(index));
   2.220 +                    __ move(T9, disp);
   2.221 +                    __ gssbx(R0, AT, T9, 0);
   2.222 +                 } else {
   2.223 +                    __ move(AT, disp);
   2.224 +                    __ move(T9, value);
   2.225 +                    __ daddu(AT, as_Register(base), AT);
   2.226 +                    __ gssbx(T9, AT, as_Register(index), 0);
   2.227 +                 }
   2.228 +              }    
   2.229 +
   2.230 +           } else {
   2.231 +
   2.232 +              if( Assembler::is_simm(disp, 8) ) { 
   2.233 +                 __ dsll(AT, as_Register(index), scale);
   2.234 +                 if (value == 0) {
   2.235 +                    __ gssbx(R0, as_Register(base), AT, disp);
   2.236 +                 } else {
   2.237 +                    __ move(T9, value);
   2.238 +                    __ gssbx(T9, as_Register(base), AT, disp);
   2.239 +                 }
   2.240 +              } else if( Assembler::is_simm16(disp) ) { 
   2.241 +                 __ dsll(AT, as_Register(index), scale);
   2.242 +                 __ daddu(AT, as_Register(base), AT);
   2.243 +                 if (value == 0) {
   2.244 +                    __ sb(R0, AT, disp);
   2.245 +                 } else {
   2.246 +                    __ move(T9, value);
   2.247 +                    __ sb(T9, AT, disp);
   2.248 +                 }
   2.249 +              } else {
   2.250 +                 __ dsll(AT, as_Register(index), scale);
   2.251 +                 if (value == 0) {
   2.252 +                    __ daddu(AT, as_Register(base), AT);
   2.253 +                    __ move(T9, disp);
   2.254 +                    __ gssbx(R0, AT, T9, 0);
   2.255 +                 } else {
   2.256 +                    __ move(T9, disp);
   2.257 +                    __ daddu(AT, AT, T9); 
   2.258 +                    __ move(T9, value);
   2.259 +                    __ gssbx(T9, as_Register(base), AT, 0);
   2.260 +                 }
   2.261 +              }    
   2.262 +           }
   2.263 +        }
   2.264       } else {
   2.265 -        __ sb(R0, as_Register(base), disp);
   2.266 +        if( Assembler::is_simm16(disp) ) { 
   2.267 +           if (value == 0) {
   2.268 +              __ sb(R0, as_Register(base), disp);
   2.269 +           } else {
   2.270 +              __ move(AT, value);
   2.271 +              __ sb(AT, as_Register(base), disp);
   2.272 +           }
   2.273 +        } else {
   2.274 +           if (value == 0) {
   2.275 +              __ move(T9, disp);   
   2.276 +              if (UseLoongsonISA) {
   2.277 +                __ gssbx(R0, as_Register(base), T9, 0);
   2.278 +              } else {
   2.279 +                __ daddu(AT, as_Register(base), T9); 
   2.280 +                __ sb(R0, AT, 0);
   2.281 +              }
   2.282 +           } else {
   2.283 +              __ move(T9, disp);   
   2.284 +              if (UseLoongsonISA) {
   2.285 +                __ move(AT, value);
   2.286 +                __ gssbx(AT, as_Register(base), T9, 0);
   2.287 +              } else {
   2.288 +                __ daddu(AT, as_Register(base), T9); 
   2.289 +                __ move(T9, value);
   2.290 +                __ sb(T9, AT, 0);
   2.291 +              }
   2.292 +           }
   2.293 +        }    
   2.294       }
   2.295    %}
   2.296  
   2.297 -  enc_class store_B_reg_sync_enc (memory mem, mRegI src) %{
   2.298 -     MacroAssembler _masm(&cbuf);
   2.299 -     int  src = $src$$reg;
   2.300 -     int  base = $mem$$base;
   2.301 -     int  index = $mem$$index;
   2.302 -     int  scale = $mem$$scale;
   2.303 -     int  disp = $mem$$disp;
   2.304 -
   2.305 -     if( index != 0 ) {
   2.306 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.307 -        __ gssbx(as_Register(src), as_Register(base), as_Register(index), disp);
   2.308 -     } else {
   2.309 -        __ sb(as_Register(src), as_Register(base), disp);
   2.310 -     }
   2.311 -     __ sync();
   2.312 -  %}
   2.313 -
   2.314 -  enc_class store_B0_sync_enc (memory mem) %{
   2.315 +
   2.316 +  enc_class store_B_immI_enc_sync (memory mem, immI8 src) %{
   2.317       MacroAssembler _masm(&cbuf);
   2.318       int  base = $mem$$base;
   2.319       int  index = $mem$$index;
   2.320       int  scale = $mem$$scale;
   2.321       int  disp = $mem$$disp;
   2.322 +     int value = $src$$constant;
   2.323  
   2.324       if( index != 0 ) {
   2.325 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.326 -        __ gssbx(R0, as_Register(base), as_Register(index), disp);
   2.327 +		 if ( UseLoongsonISA ) {
   2.328 +			if ( Assembler::is_simm(disp,8) ) {
   2.329 +				if ( scale == 0 ) {
   2.330 +					if ( value == 0 ) {
   2.331 +						__ gssbx(R0, as_Register(base), as_Register(index), disp);
   2.332 +					} else {
   2.333 +						__ move(AT, value);
   2.334 +						__ gssbx(AT, as_Register(base), as_Register(index), disp);
   2.335 +					}
   2.336 +				} else {
   2.337 +					__ dsll(AT, as_Register(index), scale);
   2.338 +					if ( value == 0 ) {
   2.339 +						__ gssbx(R0, as_Register(base), AT, disp);
   2.340 +					} else {
   2.341 +						__ move(T9, value);
   2.342 +						__ gssbx(T9, as_Register(base), AT, disp);
   2.343 +					}
   2.344 +				}
   2.345 +			} else if ( Assembler::is_simm16(disp) ) {
   2.346 +				if ( scale == 0 ) {
   2.347 +					__ daddu(AT, as_Register(base), as_Register(index));
   2.348 +					if ( value == 0 ){
   2.349 +						__ sb(R0, AT, disp);
   2.350 +					} else {
   2.351 +						__ move(T9, value);
   2.352 +						__ sb(T9, AT, disp);
   2.353 +					}
   2.354 +				} else {
   2.355 +					__ dsll(AT, as_Register(index), scale);
   2.356 +					__ daddu(AT, as_Register(base), AT);
   2.357 +					if ( value == 0 ) {
   2.358 +						__ sb(R0, AT, disp);
   2.359 +					} else {
   2.360 +						__ move(T9, value);
   2.361 +						__ sb(T9, AT, disp);
   2.362 +					}
   2.363 +				}
   2.364 +			} else {
   2.365 +				if ( scale == 0 ) {
   2.366 +					__ move(AT, disp);
   2.367 +					__ daddu(AT, as_Register(index), AT);
   2.368 +					if ( value == 0 ) {
   2.369 +						__ gssbx(R0, as_Register(base), AT, 0);
   2.370 +					} else {
   2.371 +						__ move(T9, value);
   2.372 +						__ gssbx(T9, as_Register(base), AT, 0);
   2.373 +					}
   2.374 +				} else {
   2.375 +					__ dsll(AT, as_Register(index), scale);
   2.376 +					__ move(T9, disp);
   2.377 +					__ daddu(AT, AT, T9);
   2.378 +					if ( value == 0 ) {
   2.379 +						__ gssbx(R0, as_Register(base), AT, 0);
   2.380 +					} else {
   2.381 +						__ move(T9, value);
   2.382 +						__ gssbx(T9, as_Register(base), AT, 0);
   2.383 +					}
   2.384 +				}
   2.385 +			}
   2.386 +		 } else { //not use loongson isa
   2.387 +		    if (scale == 0) {
   2.388 +			   __ daddu(AT, as_Register(base), as_Register(index));
   2.389 +		    } else {
   2.390 +			   __ dsll(AT, as_Register(index), scale);
   2.391 +			   __ daddu(AT, as_Register(base), AT);
   2.392 +		    }
   2.393 +		    if( Assembler::is_simm16(disp) ) { 
   2.394 +			   if (value == 0) {
   2.395 +			      __ sb(R0, AT, disp);
   2.396 +			   } else {
   2.397 +		          __ move(T9, value);
   2.398 +				  __ sb(T9, AT, disp);
   2.399 +			  }
   2.400 +			} else {
   2.401 +		       if (value == 0) {
   2.402 +	              __ move(T9, disp);
   2.403 +				  __ daddu(AT, AT, T9); 
   2.404 +			      __ sb(R0, AT, 0);
   2.405 +		       } else {
   2.406 +	              __ move(T9, disp);
   2.407 +				  __ daddu(AT, AT, T9); 
   2.408 +			      __ move(T9, value);
   2.409 +		          __ sb(T9, AT, 0);
   2.410 +	           }
   2.411 +			}
   2.412 +		}    
   2.413       } else {
   2.414 -        __ sb(R0, as_Register(base), disp);
   2.415 +		 if ( UseLoongsonISA ){
   2.416 +			if ( Assembler::is_simm16(disp) ){
   2.417 +				if ( value == 0 ) {
   2.418 +					__ sb(R0, as_Register(base), disp);
   2.419 +				} else {
   2.420 +					__ move(AT, value);
   2.421 +					__ sb(AT, as_Register(base), disp);
   2.422 +				}
   2.423 +			} else {
   2.424 +				__ move(AT, disp);
   2.425 +				if ( value == 0 ) {
   2.426 +					__ gssbx(R0, as_Register(base), AT, 0);
   2.427 +				} else {
   2.428 +					__ move(T9, value);
   2.429 +					__ gssbx(T9, as_Register(base), AT, 0);
   2.430 +				}
   2.431 +			}
   2.432 +		 } else {
   2.433 +		    if( Assembler::is_simm16(disp) ) { 
   2.434 +	           if (value == 0) {
   2.435 +			      __ sb(R0, as_Register(base), disp);
   2.436 +		       } else {
   2.437 +	              __ move(AT, value);
   2.438 +				  __ sb(AT, as_Register(base), disp);
   2.439 +			   }
   2.440 +		    } else {
   2.441 +	           if (value == 0) {
   2.442 +				  __ move(T9, disp);   
   2.443 +			      __ daddu(AT, as_Register(base), T9); 
   2.444 +		          __ sb(R0, AT, 0);
   2.445 +	           } else {
   2.446 +				  __ move(T9, disp);   
   2.447 +			      __ daddu(AT, as_Register(base), T9); 
   2.448 +		          __ move(T9, value);
   2.449 +	              __ sb(T9, AT, 0);
   2.450 +			   }
   2.451 +		    }    
   2.452 +		}
   2.453       }
   2.454 +
   2.455       __ sync();
   2.456    %}
   2.457  
   2.458 @@ -1798,15 +2135,72 @@
   2.459       int  disp = $mem$$disp;
   2.460  
   2.461       if( index != 0 ) {
   2.462 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.463 -	__ gslhx(as_Register(dst), as_Register(base), as_Register(index), disp);
   2.464 -     } else {
   2.465 -        __ lh(as_Register(dst), as_Register(base), disp);
   2.466 +		 if ( UseLoongsonISA ) {
   2.467 +			if ( Assembler::is_simm(disp, 8) ) {
   2.468 +				if (scale == 0) {
   2.469 +					__ gslhx(as_Register(dst), as_Register(base), as_Register(index), disp);
   2.470 +				} else {
   2.471 +					__ dsll(AT, as_Register(index), scale);
   2.472 +					__ gslhx(as_Register(dst), as_Register(base), AT, disp);
   2.473 +				}
   2.474 +			} else if ( Assembler::is_simm16(disp) ) {
   2.475 +				if (scale == 0) {
   2.476 +					__ daddu(AT, as_Register(base), as_Register(index));
   2.477 +					__ lh(as_Register(dst), AT, disp);
   2.478 +				} else {
   2.479 +					__ dsll(AT, as_Register(index), scale);
   2.480 +					__ daddu(AT, as_Register(base), AT);
   2.481 +					__ lh(as_Register(dst), AT, disp);
   2.482 +				}
   2.483 +			} else {
   2.484 +				if (scale == 0) {
   2.485 +					__ move(AT, disp);
   2.486 +					__ daddu(AT, as_Register(index), AT);
   2.487 +					__ gslhx(as_Register(dst), as_Register(base), AT, 0);
   2.488 +				} else {
   2.489 +					__ dsll(AT, as_Register(index), scale);
   2.490 +					__ move(T9, disp);
   2.491 +					__ daddu(AT, AT, T9);
   2.492 +					__ gslhx(as_Register(dst), as_Register(base), AT, 0);
   2.493 +				}
   2.494 +			}
   2.495 +		 } else { // not use loongson isa
   2.496 +		    if (scale == 0) {
   2.497 +			   __ daddu(AT, as_Register(base), as_Register(index));
   2.498 +		    } else {
   2.499 +			   __ dsll(AT, as_Register(index), scale);
   2.500 +		       __ daddu(AT, as_Register(base), AT);
   2.501 +			}
   2.502 +		    if( Assembler::is_simm16(disp) ) { 
   2.503 +		       __ lh(as_Register(dst), AT, disp);
   2.504 +	        } else {
   2.505 +	           __ move(T9, disp);
   2.506 +			   __ daddu(AT, AT, T9); 
   2.507 +		       __ lh(as_Register(dst), AT, 0);
   2.508 +	        }    
   2.509 +		}
   2.510 +     } else { // index is 0
   2.511 +		 if ( UseLoongsonISA ) {
   2.512 +			if ( Assembler::is_simm16(disp) ) {
   2.513 +				__ lh(as_Register(dst), as_Register(base), disp);
   2.514 +			} else {
   2.515 +				__ move(T9, disp);
   2.516 +				__ gslhx(as_Register(dst), as_Register(base), T9, 0);
   2.517 +			}
   2.518 +		 } else { //not use loongson isa
   2.519 +		    if( Assembler::is_simm16(disp) ) { 
   2.520 +			   __ lh(as_Register(dst), as_Register(base), disp);
   2.521 +		    } else {
   2.522 +	           __ move(T9, disp);   
   2.523 +			   __ daddu(AT, as_Register(base), T9); 
   2.524 +		       __ lh(as_Register(dst), AT, 0);
   2.525 +	        }    
   2.526 +		 }
   2.527       }
   2.528    %}
   2.529  
   2.530    // Load Char (16bit unsigned)
   2.531 -  enc_class load_C_enc (mRegI dst, umemory mem) %{
   2.532 +  enc_class load_C_enc (mRegI dst, memory mem) %{
   2.533       MacroAssembler _masm(&cbuf);
   2.534       int  dst = $dst$$reg;
   2.535       int  base = $mem$$base;
   2.536 @@ -1814,8 +2208,29 @@
   2.537       int  scale = $mem$$scale;
   2.538       int  disp = $mem$$disp;
   2.539  
   2.540 -     assert(index == 0, "no index");
   2.541 -     __ lhu(as_Register(dst), as_Register(base), disp);
   2.542 +     if( index != 0 ) {
   2.543 +        if (scale == 0) {
   2.544 +           __ daddu(AT, as_Register(base), as_Register(index));
   2.545 +        } else {
   2.546 +           __ dsll(AT, as_Register(index), scale);
   2.547 +           __ daddu(AT, as_Register(base), AT);
   2.548 +        }
   2.549 +        if( Assembler::is_simm16(disp) ) { 
   2.550 +           __ lhu(as_Register(dst), AT, disp);
   2.551 +        } else {
   2.552 +           __ move(T9, disp);
   2.553 +           __ addu(AT, AT, T9); 
   2.554 +           __ lhu(as_Register(dst), AT, 0);
   2.555 +        }    
   2.556 +     } else {
   2.557 +        if( Assembler::is_simm16(disp) ) { 
   2.558 +           __ lhu(as_Register(dst), as_Register(base), disp);
   2.559 +        } else {
   2.560 +           __ move(T9, disp);   
   2.561 +           __ daddu(AT, as_Register(base), T9); 
   2.562 +           __ lhu(as_Register(dst), AT, 0);
   2.563 +        }    
   2.564 +     }
   2.565    %}
   2.566  
   2.567    // Store Char (16bit unsigned)
   2.568 @@ -1828,10 +2243,50 @@
   2.569       int  disp = $mem$$disp;
   2.570  
   2.571       if( index != 0 ) {
   2.572 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.573 -        __ gsshx(as_Register(src), as_Register(base), as_Register(index), disp);
   2.574 +        if( Assembler::is_simm16(disp) ) { 
   2.575 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
   2.576 +              if (scale == 0) {
   2.577 +                 __ gsshx(as_Register(src), as_Register(base), as_Register(index), disp);
   2.578 +              } else {
   2.579 +                 __ dsll(AT, as_Register(index), scale);
   2.580 +                 __ gsshx(as_Register(src), as_Register(base), AT, disp);
   2.581 +              }
   2.582 +           } else {
   2.583 +              if (scale == 0) {
   2.584 +                 __ addu(AT, as_Register(base), as_Register(index));
   2.585 +              } else {
   2.586 +                 __ dsll(AT, as_Register(index), scale);
   2.587 +                 __ addu(AT, as_Register(base), AT);
   2.588 +              }
   2.589 +              __ sh(as_Register(src), AT, disp);
   2.590 +           }
   2.591 +        } else {
   2.592 +           if (scale == 0) {
   2.593 +              __ addu(AT, as_Register(base), as_Register(index));
   2.594 +           } else {
   2.595 +              __ dsll(AT, as_Register(index), scale);
   2.596 +              __ addu(AT, as_Register(base), AT);
   2.597 +           }
   2.598 +           __ move(T9, disp);
   2.599 +           if( UseLoongsonISA ) {
   2.600 +              __ gsshx(as_Register(src), AT, T9, 0);
   2.601 +           } else {
   2.602 +              __ addu(AT, AT, T9); 
   2.603 +              __ sh(as_Register(src), AT, 0);
   2.604 +           }
   2.605 +        }    
   2.606       } else {
   2.607 -        __ sh(as_Register(src), as_Register(base), disp);
   2.608 +        if( Assembler::is_simm16(disp) ) { 
   2.609 +           __ sh(as_Register(src), as_Register(base), disp);
   2.610 +        } else {
   2.611 +           __ move(T9, disp);   
   2.612 +           if( UseLoongsonISA ) {
   2.613 +              __ gsshx(as_Register(src), as_Register(base), T9, 0);
   2.614 +           } else {
   2.615 +              __ addu(AT, as_Register(base), T9); 
   2.616 +              __ sh(as_Register(src), AT, 0);
   2.617 +           }
   2.618 +        }    
   2.619       }
   2.620    %}
   2.621  
   2.622 @@ -1843,10 +2298,50 @@
   2.623       int  disp = $mem$$disp;
   2.624  
   2.625       if( index != 0 ) {
   2.626 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.627 -        __ gsshx(R0, as_Register(base), as_Register(index), disp);
   2.628 +        if( Assembler::is_simm16(disp) ) { 
   2.629 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
   2.630 +              if (scale == 0) {
   2.631 +                 __ gsshx(R0, as_Register(base), as_Register(index), disp);
   2.632 +              } else {
   2.633 +                 __ dsll(AT, as_Register(index), scale);
   2.634 +                 __ gsshx(R0, as_Register(base), AT, disp);
   2.635 +              }
   2.636 +           } else {
   2.637 +              if (scale == 0) {
   2.638 +                 __ addu(AT, as_Register(base), as_Register(index));
   2.639 +              } else {
   2.640 +                 __ dsll(AT, as_Register(index), scale);
   2.641 +                 __ addu(AT, as_Register(base), AT);
   2.642 +              }
   2.643 +              __ sh(R0, AT, disp);
   2.644 +           }
   2.645 +        } else {
   2.646 +           if (scale == 0) {
   2.647 +              __ addu(AT, as_Register(base), as_Register(index));
   2.648 +           } else {
   2.649 +              __ dsll(AT, as_Register(index), scale);
   2.650 +              __ addu(AT, as_Register(base), AT);
   2.651 +           }
   2.652 +           __ move(T9, disp);
   2.653 +           if( UseLoongsonISA ) {
   2.654 +              __ gsshx(R0, AT, T9, 0);
   2.655 +           } else {
   2.656 +              __ addu(AT, AT, T9); 
   2.657 +              __ sh(R0, AT, 0);
   2.658 +           }
   2.659 +        }    
   2.660       } else {
   2.661 -        __ sh(R0, as_Register(base), disp);
   2.662 +        if( Assembler::is_simm16(disp) ) { 
   2.663 +           __ sh(R0, as_Register(base), disp);
   2.664 +        } else {
   2.665 +           __ move(T9, disp);   
   2.666 +           if( UseLoongsonISA ) {
   2.667 +              __ gsshx(R0, as_Register(base), T9, 0);
   2.668 +           } else {
   2.669 +              __ addu(AT, as_Register(base), T9); 
   2.670 +              __ sh(R0, AT, 0);
   2.671 +           }
   2.672 +        }    
   2.673       }
   2.674    %}
   2.675  
   2.676 @@ -1859,10 +2354,50 @@
   2.677       int  disp = $mem$$disp;
   2.678  
   2.679       if( index != 0 ) {
   2.680 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.681 -        __ gslwx(as_Register(dst), as_Register(base), as_Register(index), disp);
   2.682 +        if( Assembler::is_simm16(disp) ) { 
   2.683 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
   2.684 +              if (scale == 0) {
   2.685 +                 __ gslwx(as_Register(dst), as_Register(base), as_Register(index), disp);
   2.686 +              } else {
   2.687 +                 __ dsll(AT, as_Register(index), scale);
   2.688 +                 __ gslwx(as_Register(dst), as_Register(base), AT, disp);
   2.689 +              }
   2.690 +           } else {
   2.691 +              if (scale == 0) {
   2.692 +                 __ addu(AT, as_Register(base), as_Register(index));
   2.693 +              } else {
   2.694 +                 __ dsll(AT, as_Register(index), scale);
   2.695 +                 __ addu(AT, as_Register(base), AT);
   2.696 +              }
   2.697 +              __ lw(as_Register(dst), AT, disp);
   2.698 +           }
   2.699 +        } else {
   2.700 +           if (scale == 0) {
   2.701 +              __ addu(AT, as_Register(base), as_Register(index));
   2.702 +           } else {
   2.703 +              __ dsll(AT, as_Register(index), scale);
   2.704 +              __ addu(AT, as_Register(base), AT);
   2.705 +           }
   2.706 +           __ move(T9, disp);
   2.707 +           if( UseLoongsonISA ) {
   2.708 +              __ gslwx(as_Register(dst), AT, T9, 0);
   2.709 +           } else {
   2.710 +              __ addu(AT, AT, T9); 
   2.711 +              __ lw(as_Register(dst), AT, 0);
   2.712 +           }
   2.713 +        }    
   2.714       } else {
   2.715 -        __ lw(as_Register(dst), as_Register(base), disp);
   2.716 +        if( Assembler::is_simm16(disp) ) { 
   2.717 +           __ lw(as_Register(dst), as_Register(base), disp);
   2.718 +        } else {
   2.719 +           __ move(T9, disp);   
   2.720 +           if( UseLoongsonISA ) {
   2.721 +              __ gslwx(as_Register(dst), as_Register(base), T9, 0);
   2.722 +           } else {
   2.723 +              __ addu(AT, as_Register(base), T9); 
   2.724 +              __ lw(as_Register(dst), AT, 0);
   2.725 +           }
   2.726 +        }    
   2.727       }
   2.728    %}
   2.729  
   2.730 @@ -1875,41 +2410,224 @@
   2.731       int  disp = $mem$$disp;
   2.732  
   2.733       if( index != 0 ) {
   2.734 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.735 -        __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
   2.736 +        if( Assembler::is_simm16(disp) ) { 
   2.737 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
   2.738 +              if (scale == 0) {
   2.739 +                 __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
   2.740 +              } else {
   2.741 +                 __ dsll(AT, as_Register(index), scale);
   2.742 +                 __ gsswx(as_Register(src), as_Register(base), AT, disp);
   2.743 +              }
   2.744 +           } else {
   2.745 +              if (scale == 0) {
   2.746 +                 __ addu(AT, as_Register(base), as_Register(index));
   2.747 +              } else {
   2.748 +                 __ dsll(AT, as_Register(index), scale);
   2.749 +                 __ addu(AT, as_Register(base), AT);
   2.750 +              }
   2.751 +              __ sw(as_Register(src), AT, disp);
   2.752 +           }
   2.753 +        } else {
   2.754 +           if (scale == 0) {
   2.755 +              __ addu(AT, as_Register(base), as_Register(index));
   2.756 +           } else {
   2.757 +              __ dsll(AT, as_Register(index), scale);
   2.758 +              __ addu(AT, as_Register(base), AT);
   2.759 +           }
   2.760 +           __ move(T9, disp);
   2.761 +           if( UseLoongsonISA ) {
   2.762 +              __ gsswx(as_Register(src), AT, T9, 0);
   2.763 +           } else {
   2.764 +              __ addu(AT, AT, T9); 
   2.765 +              __ sw(as_Register(src), AT, 0);
   2.766 +           }
   2.767 +        }    
   2.768       } else {
   2.769 -        __ sw(as_Register(src), as_Register(base), disp);
   2.770 +        if( Assembler::is_simm16(disp) ) { 
   2.771 +           __ sw(as_Register(src), as_Register(base), disp);
   2.772 +        } else {
   2.773 +           __ move(T9, disp);   
   2.774 +           if( UseLoongsonISA ) {
   2.775 +              __ gsswx(as_Register(src), as_Register(base), T9, 0);
   2.776 +           } else {
   2.777 +              __ addu(AT, as_Register(base), T9); 
   2.778 +              __ sw(as_Register(src), AT, 0);
   2.779 +           }
   2.780 +        }    
   2.781       }
   2.782    %}
   2.783  
   2.784 -  enc_class store_I_immI0_enc (memory mem) %{
   2.785 +  enc_class store_I_immI_enc (memory mem, immI src) %{
   2.786       MacroAssembler _masm(&cbuf);
   2.787 -     int  base  = $mem$$base;
   2.788 +     int  base = $mem$$base;
   2.789       int  index = $mem$$index;
   2.790       int  scale = $mem$$scale;
   2.791 -     int  disp  = $mem$$disp;
   2.792 +     int  disp = $mem$$disp;
   2.793 +     int value = $src$$constant;
   2.794  
   2.795       if( index != 0 ) {
   2.796 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.797 -        __ gsswx(R0, as_Register(base), as_Register(index), disp);
   2.798 +        if ( UseLoongsonISA ) {
   2.799 +           if ( Assembler::is_simm(disp, 8) ) {
   2.800 +              if ( scale == 0 ) {
   2.801 +                 if ( value == 0 ) {
   2.802 +                    __ gsswx(R0, as_Register(base), as_Register(index), disp);
   2.803 +                 } else {
   2.804 +                    __ move(T9, value);
   2.805 +                    __ gsswx(T9, as_Register(base), as_Register(index), disp);
   2.806 +                 }
   2.807 +              } else {
   2.808 +                 __ dsll(AT, as_Register(index), scale);
   2.809 +                 if ( value == 0 ) {
   2.810 +                    __ gsswx(R0, as_Register(base), AT, disp);
   2.811 +                 } else {
   2.812 +                    __ move(T9, value);
   2.813 +                    __ gsswx(T9, as_Register(base), AT, disp);
   2.814 +                 }
   2.815 +              }
   2.816 +           } else if ( Assembler::is_simm16(disp) ) {
   2.817 +                if ( scale == 0 ) {
   2.818 +                   __ daddu(AT, as_Register(base), as_Register(index));
   2.819 +                   if ( value == 0 ) {
   2.820 +                      __ sw(R0, AT, disp);
   2.821 +                   } else {
   2.822 +                      __ move(T9, value);
   2.823 +					  __ sw(T9, AT, disp);
   2.824 +                   }
   2.825 +                } else {
   2.826 +				   __ dsll(AT, as_Register(index), scale);
   2.827 +                   __ daddu(AT, as_Register(base), AT);
   2.828 +                   if ( value == 0 ) {
   2.829 +                      __ sw(R0, AT, disp);
   2.830 +                   } else {
   2.831 +                      __ move(T9, value);
   2.832 +                      __ sw(T9, AT, disp);
   2.833 +				   }
   2.834 +				}
   2.835 +			} else {
   2.836 +                 if ( scale == 0 ) {
   2.837 +                    __ move(T9, disp);
   2.838 +                    __ daddu(AT, as_Register(index), T9);
   2.839 +                    if ( value ==0 ) {
   2.840 +                       __ gsswx(R0, as_Register(base), AT, 0);
   2.841 +                    } else {
   2.842 +                       __ move(T9, value);
   2.843 +                       __ gsswx(T9, as_Register(base), AT, 0);
   2.844 +					}
   2.845 +                 } else {
   2.846 +                      __ dsll(AT, as_Register(index), scale);
   2.847 +					  __ move(T9, disp);
   2.848 +					  __ daddu(AT, AT, T9);
   2.849 +                      if ( value == 0 ) {
   2.850 +                         __ gsswx(R0, as_Register(base), AT, 0);
   2.851 +					  } else {
   2.852 +						 __ move(T9, value);
   2.853 +						 __ gsswx(T9, as_Register(base), AT, 0);
   2.854 +					  }
   2.855 +				 }
   2.856 +			}
   2.857 +		} else { //not use loongson isa
   2.858 +             if (scale == 0) {
   2.859 +                __ daddu(AT, as_Register(base), as_Register(index));
   2.860 +             } else {
   2.861 +                __ dsll(AT, as_Register(index), scale);
   2.862 +			    __ daddu(AT, as_Register(base), AT);
   2.863 +		     }
   2.864 +	         if( Assembler::is_simm16(disp) ) { 
   2.865 +                if (value == 0) {
   2.866 +                   __ sw(R0, AT, disp);
   2.867 +                } else {
   2.868 +		           __ move(T9, value);
   2.869 +			       __ sw(T9, AT, disp);
   2.870 +			    }
   2.871 +		     } else {
   2.872 +                if (value == 0) {
   2.873 +				   __ move(T9, disp);
   2.874 +			       __ daddu(AT, AT, T9); 
   2.875 +			       __ sw(R0, AT, 0);
   2.876 +			    } else {
   2.877 +			       __ move(T9, disp);
   2.878 +			       __ daddu(AT, AT, T9); 
   2.879 +			       __ move(T9, value);
   2.880 +			       __ sw(T9, AT, 0);
   2.881 +			    }
   2.882 +			 }    
   2.883 +		}
   2.884       } else {
   2.885 -        __ sw(R0, as_Register(base), disp);
   2.886 +		 if ( UseLoongsonISA ) {
   2.887 +			if ( Assembler::is_simm16(disp) ) {
   2.888 +				if ( value == 0 ) {
   2.889 +					__ sw(R0, as_Register(base), disp);
   2.890 +				} else {
   2.891 +					__ move(AT, value);
   2.892 +					__ sw(AT, as_Register(base), disp);
   2.893 +				}
   2.894 +			} else {
   2.895 +				__ move(T9, disp);
   2.896 +				if ( value == 0 ) {
   2.897 +					__ gsswx(R0, as_Register(base), T9, 0);
   2.898 +				} else {
   2.899 +					__ move(AT, value);
   2.900 +					__ gsswx(AT, as_Register(base), T9, 0);
   2.901 +				}
   2.902 +			}
   2.903 +		 } else {
   2.904 +		    if( Assembler::is_simm16(disp) ) { 
   2.905 +			   if (value == 0) {
   2.906 +		          __ sw(R0, as_Register(base), disp);
   2.907 +	           } else {
   2.908 +	              __ move(AT, value);
   2.909 +				  __ sw(AT, as_Register(base), disp);
   2.910 +			   }
   2.911 +		    } else {
   2.912 +	           if (value == 0) {
   2.913 +	              __ move(T9, disp);   
   2.914 +				  __ daddu(AT, as_Register(base), T9); 
   2.915 +			      __ sw(R0, AT, 0);
   2.916 +		      } else {
   2.917 +			      __ move(T9, disp);   
   2.918 +		          __ daddu(AT, as_Register(base), T9); 
   2.919 +				  __ move(T9, value);
   2.920 +			      __ sw(T9, AT, 0);
   2.921 +		       }
   2.922 +	        }
   2.923 +		}
   2.924       }
   2.925    %}
   2.926  
   2.927 -  enc_class load_N_enc (mRegN dst, umemory mem) %{
   2.928 +  enc_class load_N_enc (mRegN dst, memory mem) %{
   2.929       MacroAssembler _masm(&cbuf);
   2.930       int  dst = $dst$$reg;
   2.931       int  base = $mem$$base;
   2.932       int  index = $mem$$index;
   2.933       int  scale = $mem$$scale;
   2.934       int  disp = $mem$$disp;
   2.935 -
   2.936 -     relocInfo::relocType disp_reloc = $mem->disp_reloc();
   2.937 -     assert(disp_reloc == relocInfo::none, "cannot have disp");
   2.938 -
   2.939 -     assert(index == 0, "no index");
   2.940 -     __ lwu(as_Register(dst), as_Register(base), disp);
   2.941 +	 relocInfo::relocType disp_reloc = $mem->disp_reloc();
   2.942 +	 assert(disp_reloc == relocInfo::none, "cannot have disp");
   2.943 +
   2.944 +     if( index != 0 ) {
   2.945 +        if (scale == 0) {
   2.946 +           __ daddu(AT, as_Register(base), as_Register(index));
   2.947 +        } else {
   2.948 +           __ dsll(AT, as_Register(index), scale);
   2.949 +           __ daddu(AT, as_Register(base), AT);
   2.950 +        }
   2.951 +        if( Assembler::is_simm16(disp) ) { 
   2.952 +           __ lwu(as_Register(dst), AT, disp);
   2.953 +        } else {
   2.954 +           __ set64(T9, disp);
   2.955 +           __ daddu(AT, AT, T9);
   2.956 +           __ lwu(as_Register(dst), AT, 0);
   2.957 +        }    
   2.958 +     } else {
   2.959 +        if( Assembler::is_simm16(disp) ) { 
   2.960 +           __ lwu(as_Register(dst), as_Register(base), disp);
   2.961 +        } else {
   2.962 +           __ set64(T9, disp);   
   2.963 +           __ daddu(AT, as_Register(base), T9);
   2.964 +           __ lwu(as_Register(dst), AT, 0);
   2.965 +        }    
   2.966 +     }
   2.967 +
   2.968    %}
   2.969  
   2.970  
   2.971 @@ -1920,16 +2638,71 @@
   2.972       int  index = $mem$$index;
   2.973       int  scale = $mem$$scale;
   2.974       int  disp = $mem$$disp;
   2.975 -
   2.976 -     relocInfo::relocType disp_reloc = $mem->disp_reloc();
   2.977 -     assert(disp_reloc == relocInfo::none, "cannot have disp");
   2.978 +	 relocInfo::relocType disp_reloc = $mem->disp_reloc();
   2.979 +	 assert(disp_reloc == relocInfo::none, "cannot have disp");
   2.980  
   2.981       if( index != 0 ) {
   2.982 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
   2.983 -        __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp);
   2.984 +        if ( UseLoongsonISA ) {
   2.985 +           if ( Assembler::is_simm(disp, 8) ) {
   2.986 +              if ( scale != 0 ) {
   2.987 +                 __ dsll(AT, as_Register(index), scale);
   2.988 +                 __ gsldx(as_Register(dst), as_Register(base), AT, disp);
   2.989 +              } else {
   2.990 +                 __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp);
   2.991 +              }
   2.992 +           } else if ( Assembler::is_simm16(disp) ){
   2.993 +              if ( scale != 0 ) {
   2.994 +                 __ dsll(AT, as_Register(index), scale);
   2.995 +                 __ daddu(AT, AT, as_Register(base));
   2.996 +              } else {
   2.997 +                 __ daddu(AT, as_Register(index), as_Register(base));
   2.998 +              }
   2.999 +              __ ld(as_Register(dst), AT, disp);
  2.1000 +           } else {
  2.1001 +                if ( scale != 0 ) {
  2.1002 +                   __ dsll(AT, as_Register(index), scale);
  2.1003 +                   __ move(T9, disp);
  2.1004 +                   __ daddu(AT, AT, T9);
  2.1005 +                } else {
  2.1006 +                   __ move(T9, disp);
  2.1007 +                   __ daddu(AT, as_Register(index), T9);
  2.1008 +                }
  2.1009 +                __ gsldx(as_Register(dst), as_Register(base), AT, 0);
  2.1010 +           }
  2.1011 +	    } else { //not use loongson isa
  2.1012 +             if (scale == 0) {
  2.1013 +                __ daddu(AT, as_Register(base), as_Register(index));
  2.1014 +             } else {
  2.1015 +                __ dsll(AT, as_Register(index), scale);
  2.1016 +                __ daddu(AT, as_Register(base), AT);
  2.1017 +             }     
  2.1018 +             if( Assembler::is_simm16(disp) ) { 
  2.1019 +                __ ld(as_Register(dst), AT, disp);
  2.1020 +             } else {
  2.1021 +                __ set64(T9, disp);
  2.1022 +                __ daddu(AT, AT, T9);
  2.1023 +                __ ld(as_Register(dst), AT, 0);
  2.1024 +             }  
  2.1025 +	    }    
  2.1026       } else {
  2.1027 -        __ ld(as_Register(dst), as_Register(base), disp);
  2.1028 +	      if ( UseLoongsonISA ) {
  2.1029 +	         if ( Assembler::is_simm16(disp) ){
  2.1030 +	            __ ld(as_Register(dst), as_Register(base), disp);
  2.1031 +	         } else {
  2.1032 +	            __ set64(T9, disp);
  2.1033 + 	            __ gsldx(as_Register(dst), as_Register(base), T9, 0);
  2.1034 +	         }
  2.1035 +	      } else { //not use loongson isa
  2.1036 +	         if( Assembler::is_simm16(disp) ) { 
  2.1037 +                __ ld(as_Register(dst), as_Register(base), disp);
  2.1038 +             } else {
  2.1039 +                __ set64(T9, disp);   
  2.1040 +                __ daddu(AT, as_Register(base), T9);
  2.1041 +                __ ld(as_Register(dst), AT, 0);
  2.1042 +             }
  2.1043 +	      }
  2.1044       }
  2.1045 +//     if( disp_reloc != relocInfo::none) __ ld(as_Register(dst), as_Register(dst), 0);
  2.1046    %}
  2.1047  
  2.1048    enc_class store_P_reg_enc (memory mem, mRegP src) %{
  2.1049 @@ -1941,10 +2714,65 @@
  2.1050       int  disp = $mem$$disp;
  2.1051  
  2.1052       if( index != 0 ) {
  2.1053 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1054 -        __ gssdx(as_Register(src), as_Register(base), as_Register(index), disp);
  2.1055 +	    if ( UseLoongsonISA ){
  2.1056 +	       if ( Assembler::is_simm(disp, 8) ) {
  2.1057 +	          if ( scale == 0 ) {
  2.1058 +		         __ gssdx(as_Register(src), as_Register(base), as_Register(index), disp);
  2.1059 +	          } else {
  2.1060 +		         __ dsll(AT, as_Register(index), scale);
  2.1061 +        	     __ gssdx(as_Register(src), as_Register(base), AT, disp);
  2.1062 +	          }
  2.1063 +	       } else if ( Assembler::is_simm16(disp) ) {
  2.1064 +	          if ( scale == 0 ) {
  2.1065 +		         __ daddu(AT, as_Register(base), as_Register(index));
  2.1066 +	          } else {
  2.1067 +                 __ dsll(AT, as_Register(index), scale);
  2.1068 +                 __ daddu(AT, as_Register(base), AT);
  2.1069 +	          }
  2.1070 +              __ sd(as_Register(src), AT, disp);
  2.1071 +	       } else {
  2.1072 +	          if ( scale == 0 ) { 
  2.1073 +                 __ move(T9, disp);
  2.1074 +                 __ daddu(AT, as_Register(index), T9);
  2.1075 +              } else {
  2.1076 +                 __ dsll(AT, as_Register(index), scale);
  2.1077 +                 __ move(T9, disp);
  2.1078 +                 __ daddu(AT, AT, T9);
  2.1079 +              }     
  2.1080 +              __ gssdx(as_Register(src), as_Register(base), AT, 0); 
  2.1081 +           }
  2.1082 +	    } else { //not use loongson isa
  2.1083 +           if (scale == 0) {
  2.1084 +              __ daddu(AT, as_Register(base), as_Register(index));
  2.1085 +           } else {
  2.1086 +              __ dsll(AT, as_Register(index), scale);
  2.1087 +              __ daddu(AT, as_Register(base), AT);
  2.1088 +           }
  2.1089 +           if( Assembler::is_simm16(disp) ) { 
  2.1090 +              __ sd(as_Register(src), AT, disp);
  2.1091 +           } else {
  2.1092 +              __ move(T9, disp);
  2.1093 +              __ daddu(AT, AT, T9); 
  2.1094 +              __ sd(as_Register(src), AT, 0);
  2.1095 +           }    
  2.1096 +	    }
  2.1097       } else {
  2.1098 -        __ sd(as_Register(src), as_Register(base), disp);
  2.1099 +          if ( UseLoongsonISA ) {
  2.1100 +             if ( Assembler::is_simm16(disp) ) {
  2.1101 +	            __ sd(as_Register(src), as_Register(base), disp);
  2.1102 +	         } else {
  2.1103 +	            __ move(T9, disp);
  2.1104 +                __ gssdx(as_Register(src), as_Register(base), T9, 0);
  2.1105 +	         }
  2.1106 +	      } else {
  2.1107 +             if( Assembler::is_simm16(disp) ) { 
  2.1108 +                 __ sd(as_Register(src), as_Register(base), disp);
  2.1109 +             } else {
  2.1110 +                 __ move(T9, disp);   
  2.1111 +                 __ daddu(AT, as_Register(base), T9); 
  2.1112 +                 __ sd(as_Register(src), AT, 0);
  2.1113 +             }    
  2.1114 +	      }
  2.1115       }
  2.1116    %}
  2.1117  
  2.1118 @@ -1957,10 +2785,65 @@
  2.1119       int  disp = $mem$$disp;
  2.1120  
  2.1121       if( index != 0 ) {
  2.1122 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1123 -        __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
  2.1124 +        if ( UseLoongsonISA ){
  2.1125 +	       if ( Assembler::is_simm(disp, 8) ) {
  2.1126 +              if ( scale == 0 ) {
  2.1127 +	             __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
  2.1128 +	          } else {
  2.1129 +                 __ dsll(AT, as_Register(index), scale);
  2.1130 +                 __ gsswx(as_Register(src), as_Register(base), AT, disp);
  2.1131 +	          }
  2.1132 +	       } else if ( Assembler::is_simm16(disp) ) {
  2.1133 +              if ( scale == 0 ) {
  2.1134 +                 __ daddu(AT, as_Register(base), as_Register(index));
  2.1135 +	          } else {
  2.1136 +		         __ dsll(AT, as_Register(index), scale);
  2.1137 +		         __ daddu(AT, as_Register(base), AT);
  2.1138 +	          }
  2.1139 + 	          __ sw(as_Register(src), AT, disp);
  2.1140 +	       } else {
  2.1141 +	          if ( scale == 0 ) {
  2.1142 +	             __ move(T9, disp);
  2.1143 +                 __ daddu(AT, as_Register(index), T9);
  2.1144 +	          } else {
  2.1145 +                 __ dsll(AT, as_Register(index), scale);
  2.1146 +	 	         __ move(T9, disp);
  2.1147 +                 __ daddu(AT, AT, T9);
  2.1148 +	          }
  2.1149 +	          __ gsswx(as_Register(src), as_Register(base), AT, 0);
  2.1150 +	       }
  2.1151 +	    } else { //not use loongson isa
  2.1152 +	       if (scale == 0) {
  2.1153 +              __ daddu(AT, as_Register(base), as_Register(index));
  2.1154 +           } else {
  2.1155 +              __ dsll(AT, as_Register(index), scale);
  2.1156 +              __ daddu(AT, as_Register(base), AT);
  2.1157 +           }
  2.1158 +           if( Assembler::is_simm16(disp) ) { 
  2.1159 +              __ sw(as_Register(src), AT, disp);
  2.1160 +           } else {
  2.1161 +              __ move(T9, disp);
  2.1162 +              __ daddu(AT, AT, T9);
  2.1163 +              __ sw(as_Register(src), AT, 0);
  2.1164 +           }
  2.1165 +	    }
  2.1166       } else {
  2.1167 -	__ sw(as_Register(src), as_Register(base), disp);
  2.1168 +        if ( UseLoongsonISA ) {
  2.1169 +           if ( Assembler::is_simm16(disp) ) {
  2.1170 +	          __ sw(as_Register(src), as_Register(base), disp);
  2.1171 +	       } else {
  2.1172 +	          __ move(T9, disp);
  2.1173 +	          __ gsswx(as_Register(src), as_Register(base), T9, 0);
  2.1174 +	       }
  2.1175 + 	    } else {
  2.1176 +           if( Assembler::is_simm16(disp) ) { 
  2.1177 +              __ sw(as_Register(src), as_Register(base), disp);
  2.1178 +           } else {
  2.1179 +              __ move(T9, disp);   
  2.1180 +              __ daddu(AT, as_Register(base), T9); 
  2.1181 +              __ sw(as_Register(src), AT, 0);
  2.1182 +           }
  2.1183 +	    }    
  2.1184       }
  2.1185    %}
  2.1186  
  2.1187 @@ -1972,29 +2855,303 @@
  2.1188       int  disp = $mem$$disp;
  2.1189  
  2.1190       if( index != 0 ) {
  2.1191 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1192 -        __ gssdx(R0, as_Register(base), as_Register(index), disp);
  2.1193 +        if (scale == 0) {
  2.1194 +           if( Assembler::is_simm16(disp) ) { 
  2.1195 +              if (UseLoongsonISA && Assembler::is_simm(disp, 8)) {
  2.1196 +                __ gssdx(R0, as_Register(base), as_Register(index), disp);
  2.1197 +              } else {
  2.1198 +                __ daddu(AT, as_Register(base), as_Register(index));
  2.1199 +                __ sd(R0, AT, disp);
  2.1200 +              }
  2.1201 +           } else {
  2.1202 +              __ daddu(AT, as_Register(base), as_Register(index));
  2.1203 +              __ move(T9, disp);
  2.1204 +              if(UseLoongsonISA) {
  2.1205 +                __ gssdx(R0, AT, T9, 0);
  2.1206 +              } else {
  2.1207 +                __ daddu(AT, AT, T9); 
  2.1208 +                __ sd(R0, AT, 0);
  2.1209 +              }
  2.1210 +           }    
  2.1211 +        } else {
  2.1212 +           __ dsll(AT, as_Register(index), scale);
  2.1213 +           if( Assembler::is_simm16(disp) ) { 
  2.1214 +              if (UseLoongsonISA && Assembler::is_simm(disp, 8)) {
  2.1215 +                __ gssdx(R0, as_Register(base), AT, disp);
  2.1216 +              } else {
  2.1217 +                __ daddu(AT, as_Register(base), AT);
  2.1218 +                __ sd(R0, AT, disp);
  2.1219 +              }
  2.1220 +           } else {
  2.1221 +              __ daddu(AT, as_Register(base), AT);
  2.1222 +              __ move(T9, disp);
  2.1223 +              if (UseLoongsonISA) {
  2.1224 +                __ gssdx(R0, AT, T9, 0);
  2.1225 +              } else {
  2.1226 +                __ daddu(AT, AT, T9); 
  2.1227 +                __ sd(R0, AT, 0);
  2.1228 +              }
  2.1229 +           }    
  2.1230 +        }
  2.1231       } else {
  2.1232 -        __ sd(R0, as_Register(base), disp);
  2.1233 +        if( Assembler::is_simm16(disp) ) { 
  2.1234 +           __ sd(R0, as_Register(base), disp);
  2.1235 +        } else {
  2.1236 +           __ move(T9, disp);   
  2.1237 +           if (UseLoongsonISA) {
  2.1238 +             __ gssdx(R0, as_Register(base), T9, 0);
  2.1239 +           } else {
  2.1240 +             __ daddu(AT, as_Register(base), T9); 
  2.1241 +             __ sd(R0, AT, 0);
  2.1242 +           }
  2.1243 +        }    
  2.1244       }
  2.1245    %}
  2.1246  
  2.1247 -
  2.1248 -  enc_class storeImmN0_enc(memory mem) %{
  2.1249 +  enc_class store_P_immP_enc (memory mem, immP31 src) %{
  2.1250       MacroAssembler _masm(&cbuf);
  2.1251       int  base = $mem$$base;
  2.1252       int  index = $mem$$index;
  2.1253       int  scale = $mem$$scale;
  2.1254       int  disp = $mem$$disp;
  2.1255 -
  2.1256 -     if(index != 0){
  2.1257 -       assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1258 -       __ gsswx(R0, as_Register(base), as_Register(index), disp);
  2.1259 +     long value = $src$$constant;
  2.1260 +
  2.1261 +     if( index != 0 ) {
  2.1262 +        if (scale == 0) {
  2.1263 +           __ daddu(AT, as_Register(base), as_Register(index));
  2.1264 +        } else {
  2.1265 +           __ dsll(AT, as_Register(index), scale);
  2.1266 +           __ daddu(AT, as_Register(base), AT);
  2.1267 +        }
  2.1268 +        if( Assembler::is_simm16(disp) ) { 
  2.1269 +           if (value == 0) {
  2.1270 +              __ sd(R0, AT, disp);
  2.1271 +           } else {
  2.1272 +              __ move(T9, value);
  2.1273 +              __ sd(T9, AT, disp);
  2.1274 +           }
  2.1275 +        } else {
  2.1276 +           if (value == 0) {
  2.1277 +              __ move(T9, disp);
  2.1278 +              __ daddu(AT, AT, T9);
  2.1279 +              __ sd(R0, AT, 0);
  2.1280 +           } else {
  2.1281 +              __ move(T9, disp);
  2.1282 +              __ daddu(AT, AT, T9);
  2.1283 +              __ move(T9, value);
  2.1284 +              __ sd(T9, AT, 0);
  2.1285 +           }
  2.1286 +        }
  2.1287       } else {
  2.1288 -       __ sw(R0, as_Register(base), disp);
  2.1289 +        if( Assembler::is_simm16(disp) ) { 
  2.1290 +           if (value == 0) {
  2.1291 +              __ sd(R0, as_Register(base), disp);
  2.1292 +           } else {
  2.1293 +              __ move(AT, value);
  2.1294 +              __ sd(AT, as_Register(base), disp);
  2.1295 +           }
  2.1296 +        } else {
  2.1297 +           if (value == 0) {
  2.1298 +              __ move(T9, disp);
  2.1299 +              __ daddu(AT, as_Register(base), T9);
  2.1300 +              __ sd(R0, AT, 0);
  2.1301 +           } else {
  2.1302 +              __ move(T9, disp);
  2.1303 +              __ daddu(AT, as_Register(base), T9);
  2.1304 +              __ move(T9, value);
  2.1305 +              __ sd(T9, AT, 0);
  2.1306 +           }
  2.1307 +        }
  2.1308       }
  2.1309 +  %}
  2.1310 +
  2.1311 +  enc_class storeImmN0_enc(memory mem, ImmN0 src) %{
  2.1312 +     MacroAssembler _masm(&cbuf);
  2.1313 +     int  base = $mem$$base;
  2.1314 +     int  index = $mem$$index;
  2.1315 +     int  scale = $mem$$scale;
  2.1316 +     int  disp = $mem$$disp;
  2.1317 +
  2.1318 +	 if(index!=0){
  2.1319 +                 if (scale == 0) {
  2.1320 +                    __ daddu(AT, as_Register(base), as_Register(index));
  2.1321 +                 } else {
  2.1322 +                    __ dsll(AT, as_Register(index), scale);
  2.1323 +                    __ daddu(AT, as_Register(base), AT);
  2.1324 +                 }
  2.1325 +
  2.1326 +		 if( Assembler::is_simm16(disp) ) { 
  2.1327 +			 __ sw(R0, AT, disp);
  2.1328 +		 } else {
  2.1329 +			 __ move(T9, disp);
  2.1330 +			 __ daddu(AT, AT, T9); 
  2.1331 +			 __ sw(R0, AT, 0);
  2.1332 +		 }   
  2.1333 +	 } 
  2.1334 +     else {
  2.1335 +		 if( Assembler::is_simm16(disp) ) { 
  2.1336 +			 __ sw(R0, as_Register(base), disp);
  2.1337 +		 } else {
  2.1338 +			 __ move(T9, disp);
  2.1339 +			 __ daddu(AT, as_Register(base), T9); 
  2.1340 +			 __ sw(R0, AT, 0);
  2.1341 +		 }   
  2.1342 +	 }
  2.1343    %} 
  2.1344  
  2.1345 +  enc_class storeImmN_enc (memory mem, immN src) %{
  2.1346 +     MacroAssembler _masm(&cbuf);
  2.1347 +     int  base = $mem$$base;
  2.1348 +     int  index = $mem$$index;
  2.1349 +     int  scale = $mem$$scale;
  2.1350 +     int  disp = $mem$$disp;
  2.1351 +     long * value = (long *)$src$$constant;
  2.1352 +
  2.1353 +     if (value == NULL) {
  2.1354 +         guarantee(Assembler::is_simm16(disp), "FIXME: disp is not simm16!");
  2.1355 +         if (index == 0) {
  2.1356 +             __ sw(R0, as_Register(base), disp);
  2.1357 +         } else {
  2.1358 +             if (scale == 0) {
  2.1359 +                __ daddu(AT, as_Register(base), as_Register(index));
  2.1360 +             } else {
  2.1361 +                __ dsll(AT, as_Register(index), scale);
  2.1362 +                __ daddu(AT, as_Register(base), AT);
  2.1363 +             }
  2.1364 +             __ sw(R0, AT, disp);
  2.1365 +         }
  2.1366 +
  2.1367 +         return;
  2.1368 +     }
  2.1369 +
  2.1370 +     int oop_index = __ oop_recorder()->find_index((jobject)value);
  2.1371 +     RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2.1372 +
  2.1373 +     guarantee(scale == 0, "FIXME: scale is not zero !");
  2.1374 +     guarantee(value != 0, "FIXME: value is zero !");
  2.1375 +
  2.1376 +    if (index != 0) {
  2.1377 +         if (scale == 0) {
  2.1378 +            __ daddu(AT, as_Register(base), as_Register(index));
  2.1379 +         } else {
  2.1380 +            __ dsll(AT, as_Register(index), scale);
  2.1381 +            __ daddu(AT, as_Register(base), AT);
  2.1382 +         }
  2.1383 +         if( Assembler::is_simm16(disp) ) { 
  2.1384 +                 if(rspec.type() != relocInfo::none) {
  2.1385 +                         __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1386 +                         __ patchable_set48(T9, oop_index);
  2.1387 +                 } else {
  2.1388 +                         __ set64(T9, oop_index);
  2.1389 +                 }
  2.1390 +                 __ sw(T9, AT, disp);
  2.1391 +         } else {
  2.1392 +                 __ move(T9, disp);
  2.1393 +                 __ addu(AT, AT, T9);
  2.1394 +
  2.1395 +                 if(rspec.type() != relocInfo::none) {
  2.1396 +                         __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1397 +                         __ patchable_set48(T9, oop_index);
  2.1398 +                 } else {
  2.1399 +                         __ set64(T9, oop_index);
  2.1400 +                 }
  2.1401 +                 __ sw(T9, AT, 0);
  2.1402 +         }
  2.1403 +     }
  2.1404 +     else {
  2.1405 +         if( Assembler::is_simm16(disp) ) { 
  2.1406 +                 if($src->constant_reloc() != relocInfo::none) {
  2.1407 +                         __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1408 +                         __ patchable_set48(T9, oop_index);
  2.1409 +                 } else {
  2.1410 +                         __ set64(T9, oop_index);
  2.1411 +                 }
  2.1412 +                 __ sw(T9, as_Register(base), disp);
  2.1413 +         } else {
  2.1414 +                 __ move(T9, disp);
  2.1415 +                 __ daddu(AT, as_Register(base), T9);
  2.1416 +
  2.1417 +                 if($src->constant_reloc() != relocInfo::none){
  2.1418 +                         __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1419 +                         __ patchable_set48(T9, oop_index);
  2.1420 +                 } else {
  2.1421 +                         __ set64(T9, oop_index);
  2.1422 +                 }
  2.1423 +                 __ sw(T9, AT, 0);
  2.1424 +         }
  2.1425 +     }
  2.1426 +  %}
  2.1427 +
  2.1428 +  enc_class storeImmNKlass_enc (memory mem, immNKlass src) %{
  2.1429 +     MacroAssembler _masm(&cbuf);
  2.1430 +
  2.1431 +     assert (UseCompressedOops, "should only be used for compressed headers");
  2.1432 +     assert (__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
  2.1433 +
  2.1434 +     int  base = $mem$$base;
  2.1435 +     int  index = $mem$$index;
  2.1436 +     int  scale = $mem$$scale;
  2.1437 +     int  disp = $mem$$disp;
  2.1438 +     long value = $src$$constant;
  2.1439 +
  2.1440 +         int klass_index = __ oop_recorder()->find_index((Klass*)value);
  2.1441 +         RelocationHolder rspec = metadata_Relocation::spec(klass_index);
  2.1442 +         long narrowp = Klass::encode_klass((Klass*)value);
  2.1443 +
  2.1444 +         if(index!=0){
  2.1445 +                 if (scale == 0) {
  2.1446 +                    __ daddu(AT, as_Register(base), as_Register(index));
  2.1447 +                 } else {
  2.1448 +                    __ dsll(AT, as_Register(index), scale);
  2.1449 +                    __ daddu(AT, as_Register(base), AT);
  2.1450 +                 }
  2.1451 +
  2.1452 +                 if( Assembler::is_simm16(disp) ) { 
  2.1453 +                         if(rspec.type() != relocInfo::none){
  2.1454 +                                 __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1455 +                                 __ patchable_set48(T9, narrowp);
  2.1456 +                         } else {
  2.1457 +                                 __ set64(T9, narrowp);
  2.1458 +                         }
  2.1459 +                         __ sw(T9, AT, disp);
  2.1460 +                 } else {
  2.1461 +                         __ move(T9, disp);
  2.1462 +                         __ daddu(AT, AT, T9);
  2.1463 +
  2.1464 +                         if(rspec.type() != relocInfo::none){
  2.1465 +                                 __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1466 +                                 __ patchable_set48(T9, narrowp);
  2.1467 +                         } else {
  2.1468 +                                 __ set64(T9, narrowp);
  2.1469 +                         }
  2.1470 +
  2.1471 +                         __ sw(T9, AT, 0);
  2.1472 +                 }
  2.1473 +         } else {
  2.1474 +                 if( Assembler::is_simm16(disp) ) { 
  2.1475 +                         if(rspec.type() != relocInfo::none){
  2.1476 +                                 __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1477 +                                 __ patchable_set48(T9, narrowp);
  2.1478 +                         }
  2.1479 +                         else {
  2.1480 +                                 __ set64(T9, narrowp);
  2.1481 +                         }
  2.1482 +                         __ sw(T9, as_Register(base), disp);
  2.1483 +                 } else {
  2.1484 +                         __ move(T9, disp);
  2.1485 +                         __ daddu(AT, as_Register(base), T9);
  2.1486 +
  2.1487 +                         if(rspec.type() != relocInfo::none){
  2.1488 +                                 __ relocate(rspec, Assembler::narrow_oop_operand);
  2.1489 +                                 __ patchable_set48(T9, narrowp);
  2.1490 +                         } else {
  2.1491 +                                 __ set64(T9, narrowp);
  2.1492 +                         }
  2.1493 +                         __ sw(T9, AT, 0);
  2.1494 +                 }
  2.1495 +         }
  2.1496 +  %}
  2.1497 +
  2.1498    enc_class load_L_enc (mRegL dst, memory mem) %{
  2.1499       MacroAssembler _masm(&cbuf);
  2.1500       int  base = $mem$$base;
  2.1501 @@ -2003,11 +3160,31 @@
  2.1502       int  disp = $mem$$disp;
  2.1503       Register  dst_reg = as_Register($dst$$reg);
  2.1504  
  2.1505 +     // For implicit null check
  2.1506 +     __ lb(AT, as_Register(base), 0);
  2.1507 +
  2.1508       if( index != 0 ) {
  2.1509 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1510 -        __ gsldx(dst_reg, as_Register(base), as_Register(index), disp);
  2.1511 +        if (scale == 0) {
  2.1512 +           __ daddu(AT, as_Register(base), as_Register(index));
  2.1513 +        } else {
  2.1514 +           __ dsll(AT, as_Register(index), scale);
  2.1515 +           __ daddu(AT, as_Register(base), AT);
  2.1516 +        }
  2.1517 +        if( Assembler::is_simm16(disp) ) { 
  2.1518 +           __ ld(dst_reg, AT, disp);
  2.1519 +        } else {
  2.1520 +           __ move(T9, disp);
  2.1521 +           __ daddu(AT, AT, T9); 
  2.1522 +           __ ld(dst_reg, AT, 0);
  2.1523 +        }    
  2.1524       } else {
  2.1525 -        __ ld(dst_reg, as_Register(base), disp);
  2.1526 +        if( Assembler::is_simm16(disp) ) { 
  2.1527 +           __ ld(dst_reg, as_Register(base), disp);
  2.1528 +        } else {
  2.1529 +           __ move(T9, disp);   
  2.1530 +           __ daddu(AT, as_Register(base), T9); 
  2.1531 +           __ ld(dst_reg, AT, 0);
  2.1532 +        }    
  2.1533       }
  2.1534    %}
  2.1535  
  2.1536 @@ -2020,14 +3197,31 @@
  2.1537       Register  src_reg = as_Register($src$$reg);
  2.1538  
  2.1539       if( index != 0 ) {
  2.1540 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1541 -        __ gssdx(src_reg, as_Register(base), as_Register(index), disp);
  2.1542 +        if (scale == 0) {
  2.1543 +           __ daddu(AT, as_Register(base), as_Register(index));
  2.1544 +        } else {
  2.1545 +           __ dsll(AT, as_Register(index), scale);
  2.1546 +           __ daddu(AT, as_Register(base), AT);
  2.1547 +        }
  2.1548 +        if( Assembler::is_simm16(disp) ) { 
  2.1549 +           __ sd(src_reg, AT, disp);
  2.1550 +        } else {
  2.1551 +           __ move(T9, disp);
  2.1552 +           __ daddu(AT, AT, T9); 
  2.1553 +           __ sd(src_reg, AT, 0);
  2.1554 +        }    
  2.1555       } else {
  2.1556 -        __ sd(src_reg, as_Register(base), disp);
  2.1557 +        if( Assembler::is_simm16(disp) ) { 
  2.1558 +           __ sd(src_reg, as_Register(base), disp);
  2.1559 +        } else {
  2.1560 +           __ move(T9, disp);   
  2.1561 +           __ daddu(AT, as_Register(base), T9); 
  2.1562 +           __ sd(src_reg, AT, 0);
  2.1563 +        }    
  2.1564       }
  2.1565    %}
  2.1566  
  2.1567 -  enc_class store_L_immL0_enc (memory mem) %{
  2.1568 +  enc_class store_L_immL0_enc (memory mem, immL0 src) %{
  2.1569       MacroAssembler _masm(&cbuf);
  2.1570       int  base = $mem$$base;
  2.1571       int  index = $mem$$index;
  2.1572 @@ -2035,10 +3229,68 @@
  2.1573       int  disp = $mem$$disp;
  2.1574  
  2.1575       if( index != 0 ) {
  2.1576 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1577 -        __ gssdx(R0, as_Register(base), as_Register(index), disp);
  2.1578 +        // For implicit null check
  2.1579 +        __ lb(AT, as_Register(base), 0);
  2.1580 +
  2.1581 +        if (scale == 0) {
  2.1582 +           __ daddu(AT, as_Register(base), as_Register(index));
  2.1583 +        } else {
  2.1584 +           __ dsll(AT, as_Register(index), scale);
  2.1585 +           __ daddu(AT, as_Register(base), AT);
  2.1586 +        }
  2.1587 +        if( Assembler::is_simm16(disp) ) { 
  2.1588 +           __ sd(R0, AT, disp);
  2.1589 +        } else {
  2.1590 +           __ move(T9, disp);
  2.1591 +           __ addu(AT, AT, T9); 
  2.1592 +           __ sd(R0, AT, 0);
  2.1593 +        }    
  2.1594       } else {
  2.1595 -        __ sd(R0, as_Register(base), disp);
  2.1596 +        if( Assembler::is_simm16(disp) ) { 
  2.1597 +           __ sd(R0, as_Register(base), disp);
  2.1598 +        } else {
  2.1599 +           __ move(T9, disp);   
  2.1600 +           __ addu(AT, as_Register(base), T9); 
  2.1601 +           __ sd(R0, AT, 0);
  2.1602 +        }    
  2.1603 +     }
  2.1604 +  %}
  2.1605 +
  2.1606 +  enc_class store_L_immL_enc (memory mem, immL src) %{
  2.1607 +     MacroAssembler _masm(&cbuf);
  2.1608 +     int  base = $mem$$base;
  2.1609 +     int  index = $mem$$index;
  2.1610 +     int  scale = $mem$$scale;
  2.1611 +     int  disp = $mem$$disp;
  2.1612 +     long  imm = $src$$constant;
  2.1613 +
  2.1614 +     if( index != 0 ) {
  2.1615 +        if (scale == 0) {
  2.1616 +           __ daddu(AT, as_Register(base), as_Register(index));
  2.1617 +        } else {
  2.1618 +           __ dsll(AT, as_Register(index), scale);
  2.1619 +           __ daddu(AT, as_Register(base), AT);
  2.1620 +        }
  2.1621 +        if( Assembler::is_simm16(disp) ) { 
  2.1622 +           __ set64(T9, imm);
  2.1623 +           __ sd(T9, AT, disp);
  2.1624 +        } else {
  2.1625 +           __ move(T9, disp);
  2.1626 +           __ addu(AT, AT, T9);
  2.1627 +           __ set64(T9, imm);
  2.1628 +           __ sd(T9, AT, 0);
  2.1629 +        }
  2.1630 +     } else {
  2.1631 +        if( Assembler::is_simm16(disp) ) { 
  2.1632 +           __ move(AT, as_Register(base));
  2.1633 +           __ set64(T9, imm);
  2.1634 +           __ sd(T9, AT, disp);
  2.1635 +        } else {
  2.1636 +           __ move(T9, disp);
  2.1637 +           __ addu(AT, as_Register(base), T9);
  2.1638 +           __ set64(T9, imm);
  2.1639 +           __ sd(T9, AT, 0);
  2.1640 +        }
  2.1641       }
  2.1642    %}
  2.1643  
  2.1644 @@ -2051,10 +3303,50 @@
  2.1645       FloatRegister dst = $dst$$FloatRegister;
  2.1646  
  2.1647       if( index != 0 ) {
  2.1648 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1649 -        __ gslwxc1(dst, as_Register(base), as_Register(index), disp);
  2.1650 +        if( Assembler::is_simm16(disp) ) { 
  2.1651 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
  2.1652 +              if (scale == 0) {
  2.1653 +                 __ gslwxc1(dst, as_Register(base), as_Register(index), disp);
  2.1654 +              } else {
  2.1655 +                 __ dsll(AT, as_Register(index), scale);
  2.1656 +                 __ gslwxc1(dst, as_Register(base), AT, disp);
  2.1657 +              }
  2.1658 +           } else {
  2.1659 +              if (scale == 0) {
  2.1660 +                 __ daddu(AT, as_Register(base), as_Register(index));
  2.1661 +              } else {
  2.1662 +                 __ dsll(AT, as_Register(index), scale);
  2.1663 +                 __ daddu(AT, as_Register(base), AT);
  2.1664 +              }
  2.1665 +              __ lwc1(dst, AT, disp);
  2.1666 +           }
  2.1667 +        } else {
  2.1668 +           if (scale == 0) {
  2.1669 +              __ daddu(AT, as_Register(base), as_Register(index));
  2.1670 +           } else {
  2.1671 +              __ dsll(AT, as_Register(index), scale);
  2.1672 +              __ daddu(AT, as_Register(base), AT);
  2.1673 +           }
  2.1674 +           __ move(T9, disp);
  2.1675 +           if( UseLoongsonISA ) {
  2.1676 +              __ gslwxc1(dst, AT, T9, 0);
  2.1677 +           } else {
  2.1678 +              __ daddu(AT, AT, T9); 
  2.1679 +              __ lwc1(dst, AT, 0);
  2.1680 +           }
  2.1681 +        }    
  2.1682       } else {
  2.1683 -        __ lwc1(dst, as_Register(base), disp);
  2.1684 +        if( Assembler::is_simm16(disp) ) { 
  2.1685 +           __ lwc1(dst, as_Register(base), disp);
  2.1686 +        } else {
  2.1687 +           __ move(T9, disp);   
  2.1688 +           if( UseLoongsonISA ) {
  2.1689 +              __ gslwxc1(dst, as_Register(base), T9, 0);
  2.1690 +           } else {
  2.1691 +              __ daddu(AT, as_Register(base), T9); 
  2.1692 +              __ lwc1(dst, AT, 0);
  2.1693 +           }
  2.1694 +        }    
  2.1695       }
  2.1696    %}
  2.1697  
  2.1698 @@ -2067,10 +3359,50 @@
  2.1699       FloatRegister src = $src$$FloatRegister;
  2.1700  
  2.1701       if( index != 0 ) {
  2.1702 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1703 -        __ gsswxc1(src, as_Register(base), as_Register(index), disp);
  2.1704 +        if( Assembler::is_simm16(disp) ) { 
  2.1705 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
  2.1706 +              if (scale == 0) {
  2.1707 +                 __ gsswxc1(src, as_Register(base), as_Register(index), disp);
  2.1708 +              } else {
  2.1709 +                 __ dsll(AT, as_Register(index), scale);
  2.1710 +                 __ gsswxc1(src, as_Register(base), AT, disp);
  2.1711 +              }
  2.1712 +           } else {
  2.1713 +              if (scale == 0) {
  2.1714 +                 __ daddu(AT, as_Register(base), as_Register(index));
  2.1715 +              } else {
  2.1716 +                 __ dsll(AT, as_Register(index), scale);
  2.1717 +                 __ daddu(AT, as_Register(base), AT);
  2.1718 +              }
  2.1719 +              __ swc1(src, AT, disp);
  2.1720 +           }
  2.1721 +        } else {
  2.1722 +           if (scale == 0) {
  2.1723 +              __ daddu(AT, as_Register(base), as_Register(index));
  2.1724 +           } else {
  2.1725 +              __ dsll(AT, as_Register(index), scale);
  2.1726 +              __ daddu(AT, as_Register(base), AT);
  2.1727 +           }
  2.1728 +           __ move(T9, disp);
  2.1729 +           if( UseLoongsonISA ) {
  2.1730 +              __ gsswxc1(src, AT, T9, 0);
  2.1731 +           } else {
  2.1732 +              __ daddu(AT, AT, T9); 
  2.1733 +              __ swc1(src, AT, 0);
  2.1734 +           }
  2.1735 +        }    
  2.1736       } else {
  2.1737 -        __ swc1(src, as_Register(base), disp);
  2.1738 +        if( Assembler::is_simm16(disp) ) { 
  2.1739 +           __ swc1(src, as_Register(base), disp);
  2.1740 +        } else {
  2.1741 +           __ move(T9, disp);   
  2.1742 +           if( UseLoongsonISA ) {
  2.1743 +              __ gslwxc1(src, as_Register(base), T9, 0);
  2.1744 +           } else {
  2.1745 +              __ daddu(AT, as_Register(base), T9); 
  2.1746 +              __ swc1(src, AT, 0);
  2.1747 +           }
  2.1748 +        }    
  2.1749       }
  2.1750    %}
  2.1751  
  2.1752 @@ -2083,26 +3415,106 @@
  2.1753       FloatRegister dst_reg = as_FloatRegister($dst$$reg);
  2.1754  
  2.1755       if( index != 0 ) {
  2.1756 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1757 -        __ gsldxc1(dst_reg, as_Register(base), as_Register(index), disp);
  2.1758 +        if( Assembler::is_simm16(disp) ) { 
  2.1759 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
  2.1760 +              if (scale == 0) {
  2.1761 +                 __ gsldxc1(dst_reg, as_Register(base), as_Register(index), disp);
  2.1762 +              } else {
  2.1763 +                 __ dsll(AT, as_Register(index), scale);
  2.1764 +                 __ gsldxc1(dst_reg, as_Register(base), AT, disp);
  2.1765 +              }
  2.1766 +           } else {
  2.1767 +              if (scale == 0) {
  2.1768 +                 __ daddu(AT, as_Register(base), as_Register(index));
  2.1769 +              } else {
  2.1770 +                 __ dsll(AT, as_Register(index), scale);
  2.1771 +                 __ daddu(AT, as_Register(base), AT);
  2.1772 +              }
  2.1773 +              __ ldc1(dst_reg, AT, disp);
  2.1774 +           }
  2.1775 +        } else {
  2.1776 +           if (scale == 0) {
  2.1777 +              __ daddu(AT, as_Register(base), as_Register(index));
  2.1778 +           } else {
  2.1779 +              __ dsll(AT, as_Register(index), scale);
  2.1780 +              __ daddu(AT, as_Register(base), AT);
  2.1781 +           }
  2.1782 +           __ move(T9, disp);
  2.1783 +           if( UseLoongsonISA ) {
  2.1784 +              __ gsldxc1(dst_reg, AT, T9, 0);
  2.1785 +           } else {
  2.1786 +              __ addu(AT, AT, T9); 
  2.1787 +              __ ldc1(dst_reg, AT, 0);
  2.1788 +           }
  2.1789 +        }    
  2.1790       } else {
  2.1791 -        __ ldc1(dst_reg, as_Register(base), disp);
  2.1792 +        if( Assembler::is_simm16(disp) ) { 
  2.1793 +           __ ldc1(dst_reg, as_Register(base), disp);
  2.1794 +        } else {
  2.1795 +           __ move(T9, disp);   
  2.1796 +           if( UseLoongsonISA ) {
  2.1797 +              __ gsldxc1(dst_reg, as_Register(base), T9, 0);
  2.1798 +           } else {
  2.1799 +              __ addu(AT, as_Register(base), T9); 
  2.1800 +              __ ldc1(dst_reg, AT, 0);
  2.1801 +           }
  2.1802 +        }    
  2.1803       }
  2.1804    %}
  2.1805  
  2.1806    enc_class store_D_reg_enc (memory mem, regD src) %{
  2.1807       MacroAssembler _masm(&cbuf);
  2.1808 -     int  base  = $mem$$base;
  2.1809 +     int  base = $mem$$base;
  2.1810       int  index = $mem$$index;
  2.1811       int  scale = $mem$$scale;
  2.1812 -     int  disp  = $mem$$disp;
  2.1813 +     int  disp = $mem$$disp;
  2.1814       FloatRegister src_reg = as_FloatRegister($src$$reg);
  2.1815  
  2.1816       if( index != 0 ) {
  2.1817 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.1818 -        __ gssdxc1(src_reg, as_Register(base), as_Register(index), disp);
  2.1819 +        if( Assembler::is_simm16(disp) ) { 
  2.1820 +           if( UseLoongsonISA && Assembler::is_simm(disp, 8) ) {
  2.1821 +              if (scale == 0) {
  2.1822 +                 __ gssdxc1(src_reg, as_Register(base), as_Register(index), disp);
  2.1823 +              } else {
  2.1824 +                 __ dsll(AT, as_Register(index), scale);
  2.1825 +                 __ gssdxc1(src_reg, as_Register(base), AT, disp);
  2.1826 +              }
  2.1827 +           } else {
  2.1828 +              if (scale == 0) {
  2.1829 +                 __ daddu(AT, as_Register(base), as_Register(index));
  2.1830 +              } else {
  2.1831 +                 __ dsll(AT, as_Register(index), scale);
  2.1832 +                 __ daddu(AT, as_Register(base), AT);
  2.1833 +              }
  2.1834 +              __ sdc1(src_reg, AT, disp);
  2.1835 +           }
  2.1836 +        } else {
  2.1837 +           if (scale == 0) {
  2.1838 +              __ daddu(AT, as_Register(base), as_Register(index));
  2.1839 +           } else {
  2.1840 +              __ dsll(AT, as_Register(index), scale);
  2.1841 +              __ daddu(AT, as_Register(base), AT);
  2.1842 +           }
  2.1843 +           __ move(T9, disp);
  2.1844 +           if( UseLoongsonISA ) {
  2.1845 +              __ gssdxc1(src_reg, AT, T9, 0);
  2.1846 +           } else {
  2.1847 +              __ addu(AT, AT, T9); 
  2.1848 +              __ sdc1(src_reg, AT, 0);
  2.1849 +           }
  2.1850 +        }    
  2.1851       } else {
  2.1852 -        __ sdc1(src_reg, as_Register(base), disp);
  2.1853 +        if( Assembler::is_simm16(disp) ) { 
  2.1854 +           __ sdc1(src_reg, as_Register(base), disp);
  2.1855 +        } else {
  2.1856 +           __ move(T9, disp);   
  2.1857 +           if( UseLoongsonISA ) {
  2.1858 +              __ gssdxc1(src_reg, as_Register(base), T9, 0);
  2.1859 +           } else {
  2.1860 +              __ addu(AT, as_Register(base), T9); 
  2.1861 +              __ sdc1(src_reg, AT, 0);
  2.1862 +           }
  2.1863 +        }    
  2.1864       }
  2.1865    %}
  2.1866  
  2.1867 @@ -2663,6 +4075,17 @@
  2.1868    interface(CONST_INTER);
  2.1869  %}
  2.1870  
  2.1871 +operand immP31()
  2.1872 +%{
  2.1873 +  predicate(n->as_Type()->type()->reloc() == relocInfo::none
  2.1874 +            && (n->get_ptr() >> 31) == 0);
  2.1875 +  match(ConP);
  2.1876 +
  2.1877 +  op_cost(5);
  2.1878 +  format %{ %}
  2.1879 +  interface(CONST_INTER);
  2.1880 +%}
  2.1881 +
  2.1882  // NULL Pointer Immediate
  2.1883  operand immP0() %{
  2.1884    predicate( n->get_ptr() == 0 );
  2.1885 @@ -3933,13 +5356,223 @@
  2.1886  %}
  2.1887  
  2.1888  //----------Memory Operands----------------------------------------------------
  2.1889 -operand baseOffset16(mRegP reg, immL16 off)
  2.1890 +// Indirect Memory Operand
  2.1891 +operand indirect(mRegP reg) %{
  2.1892 +  constraint(ALLOC_IN_RC(p_reg));
  2.1893 +  match(reg);
  2.1894 +
  2.1895 +  format %{ "[$reg] @ indirect" %}
  2.1896 +  interface(MEMORY_INTER) %{
  2.1897 +    base($reg);
  2.1898 +    index(0x0);	/* NO_INDEX */
  2.1899 +    scale(0x0);
  2.1900 +    disp(0x0);
  2.1901 +  %}
  2.1902 +%}
  2.1903 +
  2.1904 +// Indirect Memory Plus Short Offset Operand
  2.1905 +operand indOffset8(mRegP reg, immL8 off)
  2.1906  %{
  2.1907    constraint(ALLOC_IN_RC(p_reg));
  2.1908    match(AddP reg off);
  2.1909  
  2.1910 +  op_cost(10);
  2.1911 +  format %{ "[$reg + $off (8-bit)] @ indOffset8" %}
  2.1912 +  interface(MEMORY_INTER) %{
  2.1913 +    base($reg);
  2.1914 +    index(0x0); /* NO_INDEX */
  2.1915 +    scale(0x0);
  2.1916 +    disp($off);
  2.1917 +  %}
  2.1918 +%}
  2.1919 +
  2.1920 +// Indirect Memory Times Scale Plus Index Register
  2.1921 +operand indIndexScale(mRegP reg, mRegL lreg, immI2 scale)
  2.1922 +%{
  2.1923 +  constraint(ALLOC_IN_RC(p_reg));
  2.1924 +  match(AddP reg (LShiftL lreg scale));
  2.1925 +
  2.1926 +  op_cost(10);
  2.1927 +  format %{"[$reg + $lreg << $scale] @ indIndexScale" %}
  2.1928 +  interface(MEMORY_INTER) %{
  2.1929 +    base($reg);
  2.1930 +    index($lreg);
  2.1931 +    scale($scale);
  2.1932 +    disp(0x0);
  2.1933 +  %}
  2.1934 +%}
  2.1935 +
  2.1936 +
  2.1937 +// [base + index + offset] 
  2.1938 +operand baseIndexOffset8(mRegP base, mRegL index, immL8 off)
  2.1939 +%{
  2.1940 +  constraint(ALLOC_IN_RC(p_reg));
  2.1941    op_cost(5);
  2.1942 -  format %{ "[$reg + $off (16-bit)] @ baseOffset16" %}
  2.1943 +  match(AddP (AddP base index) off);
  2.1944 +
  2.1945 +  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %}
  2.1946 +  interface(MEMORY_INTER) %{
  2.1947 +    base($base);
  2.1948 +    index($index);
  2.1949 +    scale(0x0);
  2.1950 +    disp($off);
  2.1951 +  %}
  2.1952 +%}
  2.1953 +
  2.1954 +// [base + index + offset] 
  2.1955 +operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off)
  2.1956 +%{
  2.1957 +  constraint(ALLOC_IN_RC(p_reg));
  2.1958 +  op_cost(5);
  2.1959 +  match(AddP (AddP base (ConvI2L index)) off);
  2.1960 +
  2.1961 +  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %}
  2.1962 +  interface(MEMORY_INTER) %{
  2.1963 +    base($base);
  2.1964 +    index($index);
  2.1965 +    scale(0x0);
  2.1966 +    disp($off);
  2.1967 +  %}
  2.1968 +%}
  2.1969 +
  2.1970 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
  2.1971 +operand indIndexScaleOffset8(mRegP reg, immL8 off, mRegL lreg, immI2 scale)
  2.1972 +%{
  2.1973 +  constraint(ALLOC_IN_RC(p_reg));
  2.1974 +  match(AddP (AddP reg (LShiftL lreg scale)) off);
  2.1975 +
  2.1976 +  op_cost(10);
  2.1977 +  format %{"[$reg + $off + $lreg << $scale] @ indIndexScaleOffset8" %}
  2.1978 +  interface(MEMORY_INTER) %{
  2.1979 +    base($reg);
  2.1980 +    index($lreg);
  2.1981 +    scale($scale);
  2.1982 +    disp($off);
  2.1983 +  %}
  2.1984 +%}
  2.1985 +
  2.1986 +operand indIndexScaleOffset8_convI2L(mRegP reg, immL8 off, mRegI ireg, immI2 scale)
  2.1987 +%{
  2.1988 +  constraint(ALLOC_IN_RC(p_reg));
  2.1989 +  match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
  2.1990 +
  2.1991 +  op_cost(10);
  2.1992 +  format %{"[$reg + $off + $ireg << $scale] @ indIndexScaleOffset8_convI2L" %}
  2.1993 +  interface(MEMORY_INTER) %{
  2.1994 +    base($reg);
  2.1995 +    index($ireg);
  2.1996 +    scale($scale);
  2.1997 +    disp($off);
  2.1998 +  %}
  2.1999 +%}
  2.2000 +
  2.2001 +// [base + index<<scale + offset] 
  2.2002 +operand basePosIndexScaleOffset8(mRegP base, mRegI index, immL8 off, immI_0_31 scale)
  2.2003 +%{
  2.2004 +  constraint(ALLOC_IN_RC(p_reg));
  2.2005 +  //predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  2.2006 +  op_cost(10);
  2.2007 +  match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off);
  2.2008 +
  2.2009 +  format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %}
  2.2010 +  interface(MEMORY_INTER) %{
  2.2011 +    base($base);
  2.2012 +    index($index);
  2.2013 +    scale($scale);
  2.2014 +    disp($off);
  2.2015 +  %}
  2.2016 +%}
  2.2017 +
  2.2018 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
  2.2019 +operand indIndexScaleOffsetNarrow(mRegN reg, immL8 off, mRegL lreg, immI2 scale)
  2.2020 +%{
  2.2021 +  predicate(Universe::narrow_oop_shift() == 0);
  2.2022 +  constraint(ALLOC_IN_RC(p_reg));
  2.2023 +  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
  2.2024 +
  2.2025 +  op_cost(10);
  2.2026 +  format %{"[$reg + $off + $lreg << $scale] @ indIndexScaleOffsetNarrow" %}
  2.2027 +  interface(MEMORY_INTER) %{
  2.2028 +    base($reg);
  2.2029 +    index($lreg);
  2.2030 +    scale($scale);
  2.2031 +    disp($off);
  2.2032 +  %}
  2.2033 +%}
  2.2034 +
  2.2035 +// [base + index<<scale + offset] for compressd Oops
  2.2036 +operand indPosIndexI2LScaleOffset8Narrow(mRegN base, mRegI index, immL8 off, immI_0_31 scale)
  2.2037 +%{
  2.2038 +  constraint(ALLOC_IN_RC(p_reg));
  2.2039 +  //predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  2.2040 +  predicate(Universe::narrow_oop_shift() == 0);
  2.2041 +  op_cost(10);
  2.2042 +  match(AddP (AddP (DecodeN base) (LShiftL (ConvI2L index) scale)) off);
  2.2043 +
  2.2044 +  format %{ "[$base + $index << $scale + $off (8-bit)] @ indPosIndexI2LScaleOffset8Narrow" %}
  2.2045 +  interface(MEMORY_INTER) %{
  2.2046 +    base($base);
  2.2047 +    index($index);
  2.2048 +    scale($scale);
  2.2049 +    disp($off);
  2.2050 +  %}
  2.2051 +%}
  2.2052 +
  2.2053 +//FIXME: I think it's better to limit the immI to be 16-bit at most!
  2.2054 +// Indirect Memory Plus Long Offset Operand
  2.2055 +operand indOffset32(mRegP reg, immL32 off) %{
  2.2056 +  constraint(ALLOC_IN_RC(p_reg));
  2.2057 +  op_cost(20);
  2.2058 +  match(AddP reg off);
  2.2059 +
  2.2060 +  format %{ "[$reg + $off (32-bit)] @ indOffset32" %}
  2.2061 +  interface(MEMORY_INTER) %{
  2.2062 +    base($reg);
  2.2063 +    index(0x0); 	/* NO_INDEX */
  2.2064 +    scale(0x0);
  2.2065 +    disp($off);
  2.2066 +  %}
  2.2067 +%}
  2.2068 +
  2.2069 +// Indirect Memory Plus Index Register 
  2.2070 +operand indIndex(mRegP addr, mRegL index) %{
  2.2071 +  constraint(ALLOC_IN_RC(p_reg));
  2.2072 +  match(AddP addr index);
  2.2073 +
  2.2074 +  op_cost(20);
  2.2075 +  format %{"[$addr + $index] @ indIndex" %}
  2.2076 +  interface(MEMORY_INTER) %{
  2.2077 +    base($addr);
  2.2078 +    index($index);
  2.2079 +    scale(0x0);
  2.2080 +    disp(0x0);
  2.2081 +  %}
  2.2082 +%}
  2.2083 +
  2.2084 +operand indirectNarrowKlass(mRegN reg)
  2.2085 +%{
  2.2086 +  predicate(Universe::narrow_klass_shift() == 0);
  2.2087 +  constraint(ALLOC_IN_RC(p_reg));
  2.2088 +  op_cost(10);
  2.2089 +  match(DecodeNKlass reg);
  2.2090 +
  2.2091 +  format %{ "[$reg] @ indirectNarrowKlass" %}
  2.2092 +  interface(MEMORY_INTER) %{
  2.2093 +    base($reg);
  2.2094 +    index(0x0);
  2.2095 +    scale(0x0);
  2.2096 +    disp(0x0);
  2.2097 +  %}
  2.2098 +%}
  2.2099 +
  2.2100 +operand indOffset8NarrowKlass(mRegN reg, immL8 off)
  2.2101 +%{
  2.2102 +  predicate(Universe::narrow_klass_shift() == 0);
  2.2103 +  constraint(ALLOC_IN_RC(p_reg));
  2.2104 +  op_cost(10);
  2.2105 +  match(AddP (DecodeNKlass reg) off);
  2.2106 +
  2.2107 +  format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %}
  2.2108    interface(MEMORY_INTER) %{
  2.2109      base($reg);
  2.2110      index(0x0);
  2.2111 @@ -3948,59 +5581,63 @@
  2.2112    %}
  2.2113  %}
  2.2114  
  2.2115 -operand gsBaseIndexOffset8(mRegP base, mRegL index, immL8 off)
  2.2116 -%{
  2.2117 -  predicate(UseLoongsonISA);
  2.2118 +operand indOffset32NarrowKlass(mRegN reg, immL32 off)
  2.2119 +%{
  2.2120 +  predicate(Universe::narrow_klass_shift() == 0);
  2.2121    constraint(ALLOC_IN_RC(p_reg));
  2.2122 -  match(AddP (AddP base index) off);
  2.2123 -
  2.2124 -  op_cost(5);
  2.2125 -  format %{ "[$base + $index + $off (8-bit)] @ gsBaseIndexOffset8" %}
  2.2126 +  op_cost(10);
  2.2127 +  match(AddP (DecodeNKlass reg) off);
  2.2128 +
  2.2129 +  format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %}
  2.2130    interface(MEMORY_INTER) %{
  2.2131 -    base($base);
  2.2132 -    index($index);
  2.2133 +    base($reg);
  2.2134 +    index(0x0);
  2.2135      scale(0x0);
  2.2136      disp($off);
  2.2137    %}
  2.2138  %}
  2.2139  
  2.2140 -operand gsBaseIndexI2LOffset8(mRegP base, mRegI index, immL8 off)
  2.2141 -%{
  2.2142 -  predicate(UseLoongsonISA);
  2.2143 +operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off)
  2.2144 +%{
  2.2145 +  predicate(Universe::narrow_klass_shift() == 0);
  2.2146    constraint(ALLOC_IN_RC(p_reg));
  2.2147 -  match(AddP (AddP base (ConvI2L index)) off);
  2.2148 -
  2.2149 -  op_cost(5);
  2.2150 -  format %{ "[$base + $index + $off (8-bit)] @ gsBaseIndexI2LOffset8" %}
  2.2151 +  match(AddP (AddP (DecodeNKlass reg) lreg) off);
  2.2152 +
  2.2153 +  op_cost(10);
  2.2154 +  format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %}
  2.2155    interface(MEMORY_INTER) %{
  2.2156 -    base($base);
  2.2157 -    index($index);
  2.2158 +    base($reg);
  2.2159 +    index($lreg);
  2.2160      scale(0x0);
  2.2161      disp($off);
  2.2162    %}
  2.2163  %}
  2.2164  
  2.2165 -operand gsBaseIndexOffset0(mRegP addr, mRegL index) %{
  2.2166 -  predicate(UseLoongsonISA);
  2.2167 +operand indIndexNarrowKlass(mRegN reg, mRegL lreg)
  2.2168 +%{
  2.2169 +  predicate(Universe::narrow_klass_shift() == 0);
  2.2170    constraint(ALLOC_IN_RC(p_reg));
  2.2171 -  match(AddP addr index);
  2.2172 +  match(AddP (DecodeNKlass reg) lreg);
  2.2173  
  2.2174    op_cost(10);
  2.2175 -  format %{"[$addr + $index] @ gsBaseIndexOffset0" %}
  2.2176 +  format %{"[$reg + $lreg] @ indIndexNarrowKlass" %}
  2.2177    interface(MEMORY_INTER) %{
  2.2178 -    base($addr);
  2.2179 -    index($index);
  2.2180 +    base($reg);
  2.2181 +    index($lreg);
  2.2182      scale(0x0);
  2.2183      disp(0x0);
  2.2184    %}
  2.2185  %}
  2.2186  
  2.2187 -operand baseOffset0(mRegP reg) %{
  2.2188 +// Indirect Memory Operand
  2.2189 +operand indirectNarrow(mRegN reg)
  2.2190 +%{
  2.2191 +  predicate(Universe::narrow_oop_shift() == 0);
  2.2192    constraint(ALLOC_IN_RC(p_reg));
  2.2193    op_cost(10);
  2.2194 -  match(reg);
  2.2195 -
  2.2196 -  format %{ "[$reg] @ baseOffset0" %}
  2.2197 +  match(DecodeN reg);
  2.2198 +
  2.2199 +  format %{ "[$reg] @ indirectNarrow" %}
  2.2200    interface(MEMORY_INTER) %{
  2.2201      base($reg);
  2.2202      index(0x0);
  2.2203 @@ -4009,14 +5646,15 @@
  2.2204    %}
  2.2205  %}
  2.2206  
  2.2207 -operand baseOffset16Narrow(mRegN reg, immL16 off)
  2.2208 -%{
  2.2209 -  predicate(Universe::narrow_oop_base() == 0 && Universe::narrow_oop_shift() == 0);
  2.2210 +// Indirect Memory Plus Short Offset Operand
  2.2211 +operand indOffset8Narrow(mRegN reg, immL8 off)
  2.2212 +%{
  2.2213 +  predicate(Universe::narrow_oop_shift() == 0);
  2.2214    constraint(ALLOC_IN_RC(p_reg));
  2.2215 +  op_cost(10);
  2.2216    match(AddP (DecodeN reg) off);
  2.2217  
  2.2218 -  op_cost(5);
  2.2219 -  format %{ "[$reg + $off (16-bit)] @ baseOffset16Narrow" %}
  2.2220 +  format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %}
  2.2221    interface(MEMORY_INTER) %{
  2.2222      base($reg);
  2.2223      index(0x0);
  2.2224 @@ -4025,14 +5663,15 @@
  2.2225    %}
  2.2226  %}
  2.2227  
  2.2228 -operand gsBaseIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
  2.2229 -%{
  2.2230 -  predicate(UseLoongsonISA && Universe::narrow_oop_base() == 0 && Universe::narrow_oop_shift() == 0);
  2.2231 +// Indirect Memory Plus Index Register Plus Offset Operand
  2.2232 +operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
  2.2233 +%{
  2.2234 +  predicate(Universe::narrow_oop_shift() == 0);
  2.2235    constraint(ALLOC_IN_RC(p_reg));
  2.2236    match(AddP (AddP (DecodeN reg) lreg) off);
  2.2237  
  2.2238 -  op_cost(5);
  2.2239 -  format %{"[$reg + $off + $lreg] @ gsBaseIndexOffset8Narrow" %}
  2.2240 +  op_cost(10);
  2.2241 +  format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %}
  2.2242    interface(MEMORY_INTER) %{
  2.2243      base($reg);
  2.2244      index($lreg);
  2.2245 @@ -4041,14 +5680,29 @@
  2.2246    %}
  2.2247  %}
  2.2248  
  2.2249 -operand baseOffset0Narrow(mRegN reg)
  2.2250 -%{
  2.2251 -  predicate(Universe::narrow_oop_base() == 0 && Universe::narrow_oop_shift() == 0);
  2.2252 +//----------Load Long Memory Operands------------------------------------------
  2.2253 +// The load-long idiom will use it's address expression again after loading
  2.2254 +// the first word of the long.  If the load-long destination overlaps with
  2.2255 +// registers used in the addressing expression, the 2nd half will be loaded
  2.2256 +// from a clobbered address.  Fix this by requiring that load-long use
  2.2257 +// address registers that do not overlap with the load-long target.
  2.2258 +
  2.2259 +// load-long support
  2.2260 +operand load_long_RegP() %{
  2.2261    constraint(ALLOC_IN_RC(p_reg));
  2.2262 -  match(DecodeN reg);
  2.2263 -
  2.2264 -  op_cost(10);
  2.2265 -  format %{ "[$reg] @ baseOffset0Narrow" %}
  2.2266 +  match(RegP);
  2.2267 +  match(mRegP);
  2.2268 +  op_cost(100);
  2.2269 +  format %{  %}
  2.2270 +  interface(REG_INTER);
  2.2271 +%}
  2.2272 +
  2.2273 +// Indirect Memory Operand Long
  2.2274 +operand load_long_indirect(load_long_RegP reg) %{
  2.2275 +  constraint(ALLOC_IN_RC(p_reg));
  2.2276 +  match(reg);
  2.2277 +
  2.2278 +  format %{ "[$reg]" %}
  2.2279    interface(MEMORY_INTER) %{
  2.2280      base($reg);
  2.2281      index(0x0);
  2.2282 @@ -4057,14 +5711,11 @@
  2.2283    %}
  2.2284  %}
  2.2285  
  2.2286 -operand baseOffset16NarrowKlass(mRegN reg, immL16 off)
  2.2287 -%{
  2.2288 -  predicate(Universe::narrow_klass_base() == 0 && Universe::narrow_klass_shift() == 0);
  2.2289 -  constraint(ALLOC_IN_RC(p_reg));
  2.2290 -  match(AddP (DecodeNKlass reg) off);
  2.2291 -
  2.2292 -  op_cost(5);
  2.2293 -  format %{ "[$reg + $off (16-bit)] @ baseOffset16NarrowKlass" %}
  2.2294 +// Indirect Memory Plus Long Offset Operand
  2.2295 +operand load_long_indOffset32(load_long_RegP reg, immL32 off) %{
  2.2296 +  match(AddP reg off);
  2.2297 +
  2.2298 +  format %{ "[$reg + $off]" %}
  2.2299    interface(MEMORY_INTER) %{
  2.2300      base($reg);
  2.2301      index(0x0);
  2.2302 @@ -4073,87 +5724,6 @@
  2.2303    %}
  2.2304  %}
  2.2305  
  2.2306 -operand baseOffset0NarrowKlass(mRegN reg)
  2.2307 -%{
  2.2308 -  predicate(Universe::narrow_klass_base() == 0 && Universe::narrow_klass_shift() == 0);
  2.2309 -  constraint(ALLOC_IN_RC(p_reg));
  2.2310 -  match(DecodeNKlass reg);
  2.2311 -
  2.2312 -  op_cost(10);
  2.2313 -  format %{ "[$reg] @ baseOffset0NarrowKlass" %}
  2.2314 -  interface(MEMORY_INTER) %{
  2.2315 -    base($reg);
  2.2316 -    index(0x0);
  2.2317 -    scale(0x0);
  2.2318 -    disp(0x0);
  2.2319 -  %}
  2.2320 -%}
  2.2321 -
  2.2322 -operand gsBaseIndexOffset8NarrowKlass(mRegN reg, mRegL lreg, immL8 off)
  2.2323 -%{
  2.2324 -  predicate(UseLoongsonISA && Universe::narrow_klass_base() == 0 && Universe::narrow_klass_shift() == 0);
  2.2325 -  constraint(ALLOC_IN_RC(p_reg));
  2.2326 -  match(AddP (AddP (DecodeNKlass reg) lreg) off);
  2.2327 -
  2.2328 -  op_cost(5);
  2.2329 -  format %{"[$reg + $off + $lreg] @ gsBaseIndexOffset8NarrowKlass" %}
  2.2330 -  interface(MEMORY_INTER) %{
  2.2331 -    base($reg);
  2.2332 -    index($lreg);
  2.2333 -    scale(0x0);
  2.2334 -    disp($off);
  2.2335 -  %}
  2.2336 -%}
  2.2337 -
  2.2338 -operand gsBaseIndexOffset0NarrowKlass(mRegN reg, mRegL lreg)
  2.2339 -%{
  2.2340 -  predicate(UseLoongsonISA && Universe::narrow_klass_base() == 0 && Universe::narrow_klass_shift() == 0);
  2.2341 -  constraint(ALLOC_IN_RC(p_reg));
  2.2342 -  match(AddP (DecodeNKlass reg) lreg);
  2.2343 -
  2.2344 -  op_cost(10);
  2.2345 -  format %{"[$reg + $lreg] @ gsBaseIndexOffset0NarrowKlass" %}
  2.2346 -  interface(MEMORY_INTER) %{
  2.2347 -    base($reg);
  2.2348 -    index($lreg);
  2.2349 -    scale(0x0);
  2.2350 -    disp(0x0);
  2.2351 -  %}
  2.2352 -%}
  2.2353 -
  2.2354 -
  2.2355 -//------------------------OPERAND CLASSES--------------------------------------
  2.2356 -opclass memory(
  2.2357 -  baseOffset16,
  2.2358 -  gsBaseIndexOffset8,
  2.2359 -  gsBaseIndexI2LOffset8,
  2.2360 -  gsBaseIndexOffset0,
  2.2361 -  baseOffset0,
  2.2362 -
  2.2363 -  baseOffset16Narrow,
  2.2364 -  gsBaseIndexOffset8Narrow,
  2.2365 -  baseOffset0Narrow,
  2.2366 -
  2.2367 -  baseOffset16NarrowKlass,
  2.2368 -  baseOffset0NarrowKlass,
  2.2369 -  gsBaseIndexOffset8NarrowKlass,
  2.2370 -  gsBaseIndexOffset0NarrowKlass
  2.2371 -);
  2.2372 -
  2.2373 -// For loading unsigned values
  2.2374 -// umemory --> unsigned memory
  2.2375 -opclass umemory(
  2.2376 -  baseOffset16,
  2.2377 -  baseOffset0,
  2.2378 -
  2.2379 -  baseOffset16Narrow,
  2.2380 -  baseOffset0Narrow,
  2.2381 -
  2.2382 -  baseOffset16NarrowKlass,
  2.2383 -  baseOffset0NarrowKlass
  2.2384 -);
  2.2385 -
  2.2386 -
  2.2387  //----------Conditional Branch Operands----------------------------------------
  2.2388  // Comparison Op  - This is the operation of the comparison, and is limited to
  2.2389  //                  the following set of codes:
  2.2390 @@ -4206,6 +5776,55 @@
  2.2391    %}
  2.2392  %}
  2.2393  
  2.2394 +/*
  2.2395 +// Comparison Code, unsigned compare.  Used by FP also, with
  2.2396 +// C2 (unordered) turned into GT or LT already.  The other bits
  2.2397 +// C0 and C3 are turned into Carry & Zero flags.
  2.2398 +operand cmpOpU() %{
  2.2399 +  match(Bool);
  2.2400 +
  2.2401 +  format %{ "" %}
  2.2402 +  interface(COND_INTER) %{
  2.2403 +    equal(0x4);
  2.2404 +    not_equal(0x5);
  2.2405 +    less(0x2);
  2.2406 +    greater_equal(0x3);
  2.2407 +    less_equal(0x6);
  2.2408 +    greater(0x7);
  2.2409 +  %}
  2.2410 +%}
  2.2411 +*/
  2.2412 +/*
  2.2413 +// Comparison Code for FP conditional move
  2.2414 +operand cmpOp_fcmov() %{
  2.2415 +  match(Bool);
  2.2416 +
  2.2417 +  format %{ "" %}
  2.2418 +  interface(COND_INTER) %{
  2.2419 +    equal        (0x01);
  2.2420 +    not_equal    (0x02);
  2.2421 +    greater      (0x03);
  2.2422 +    greater_equal(0x04);
  2.2423 +    less         (0x05);
  2.2424 +    less_equal   (0x06);
  2.2425 +  %}
  2.2426 +%}
  2.2427 +
  2.2428 +// Comparision Code used in long compares
  2.2429 +operand cmpOp_commute() %{
  2.2430 +  match(Bool);
  2.2431 +
  2.2432 +  format %{ "" %}
  2.2433 +  interface(COND_INTER) %{
  2.2434 +    equal(0x4);
  2.2435 +    not_equal(0x5);
  2.2436 +    less(0xF);
  2.2437 +    greater_equal(0xE);
  2.2438 +    less_equal(0xD);
  2.2439 +    greater(0xC);
  2.2440 +  %}
  2.2441 +%}
  2.2442 +*/
  2.2443  
  2.2444  //----------Special Memory Operands--------------------------------------------
  2.2445  // Stack Slot Operand - This operand is used for loading and storing temporary
  2.2446 @@ -4275,6 +5894,12 @@
  2.2447      disp($reg);  // Stack Offset
  2.2448    %}
  2.2449  %}
  2.2450 + 
  2.2451 +
  2.2452 +//------------------------OPERAND CLASSES--------------------------------------
  2.2453 +//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset );
  2.2454 +opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, load_long_indirect, load_long_indOffset32, baseIndexOffset8, baseIndexOffset8_convI2L, indIndexScaleOffset8, indIndexScaleOffset8_convI2L, basePosIndexScaleOffset8, indIndexScaleOffsetNarrow, indPosIndexI2LScaleOffset8Narrow, indOffset8Narrow, indIndexOffset8Narrow); 
  2.2455 +
  2.2456  
  2.2457  //----------PIPELINE-----------------------------------------------------------
  2.2458  // Rules which define the behavior of the target architectures pipeline.
  2.2459 @@ -4628,7 +6253,7 @@
  2.2460  %}
  2.2461  
  2.2462  // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
  2.2463 -instruct loadI2UB(mRegI dst, umemory mem, immI_255 mask) %{
  2.2464 +instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
  2.2465    match(Set dst (AndI (LoadI mem) mask));
  2.2466  
  2.2467    ins_cost(125);
  2.2468 @@ -4648,7 +6273,7 @@
  2.2469  %}
  2.2470  
  2.2471  // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
  2.2472 -instruct loadI2US(mRegI dst, umemory mem, immI_65535 mask) %{
  2.2473 +instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
  2.2474    match(Set dst (AndI (LoadI mem) mask));
  2.2475  
  2.2476    ins_cost(125);
  2.2477 @@ -4689,18 +6314,26 @@
  2.2478    ins_pipe( ialu_storeL );
  2.2479  %}
  2.2480  
  2.2481 -
  2.2482  instruct storeL_immL0(memory mem, immL0 zero) %{
  2.2483    match(Set mem (StoreL mem zero));
  2.2484  
  2.2485    ins_cost(180);
  2.2486 -  format %{ "sd    $mem,   zero #@storeL_immL0" %}
  2.2487 -  ins_encode(store_L_immL0_enc(mem));
  2.2488 +  format %{ "sd    zero, $mem #@storeL_immL0" %}
  2.2489 +  ins_encode(store_L_immL0_enc(mem, zero));
  2.2490    ins_pipe( ialu_storeL );
  2.2491  %}
  2.2492  
  2.2493 +instruct storeL_imm(memory mem, immL src) %{
  2.2494 +  match(Set mem (StoreL mem src));
  2.2495 +
  2.2496 +  ins_cost(200);
  2.2497 +  format %{ "sd    $src, $mem #@storeL_imm" %}
  2.2498 +  ins_encode(store_L_immL_enc(mem, src));
  2.2499 +  ins_pipe( ialu_storeL );
  2.2500 +%}
  2.2501 +
  2.2502  // Load Compressed Pointer
  2.2503 -instruct loadN(mRegN dst, umemory mem)
  2.2504 +instruct loadN(mRegN dst, memory mem)
  2.2505  %{
  2.2506     match(Set dst (LoadN mem));
  2.2507  
  2.2508 @@ -4710,7 +6343,7 @@
  2.2509     ins_pipe( ialu_loadI ); // XXX
  2.2510  %}
  2.2511  
  2.2512 -instruct loadN2P(mRegP dst, umemory mem)
  2.2513 +instruct loadN2P(mRegP dst, memory mem)
  2.2514  %{
  2.2515     match(Set dst (DecodeN (LoadN mem)));
  2.2516     predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
  2.2517 @@ -4742,7 +6375,7 @@
  2.2518  %}
  2.2519  
  2.2520  // Load narrow Klass Pointer
  2.2521 -instruct loadNKlass(mRegN dst, umemory mem)
  2.2522 +instruct loadNKlass(mRegN dst, memory mem)
  2.2523  %{
  2.2524    match(Set dst (LoadNKlass mem));
  2.2525  
  2.2526 @@ -4752,7 +6385,7 @@
  2.2527    ins_pipe( ialu_loadI ); // XXX
  2.2528  %}
  2.2529  
  2.2530 -instruct loadN2PKlass(mRegP dst, umemory mem)
  2.2531 +instruct loadN2PKlass(mRegP dst, memory mem)
  2.2532  %{
  2.2533    match(Set dst (DecodeNKlass (LoadNKlass mem)));
  2.2534    predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
  2.2535 @@ -4867,6 +6500,26 @@
  2.2536    ins_pipe( ialu_storeI );
  2.2537  %}
  2.2538  
  2.2539 +// Store NULL Pointer, mark word, or other simple pointer constant.
  2.2540 +instruct storeImmP(memory mem, immP31 src) %{
  2.2541 +  match(Set mem (StoreP mem src));
  2.2542 +
  2.2543 +  ins_cost(150);
  2.2544 +  format %{ "mov    $mem, $src #@storeImmP" %}
  2.2545 +  ins_encode(store_P_immP_enc(mem, src));
  2.2546 +  ins_pipe( ialu_storeI );
  2.2547 +%}
  2.2548 +
  2.2549 +// Store Byte Immediate
  2.2550 +instruct storeImmB(memory mem, immI8 src) %{
  2.2551 +  match(Set mem (StoreB mem src));
  2.2552 +
  2.2553 +  ins_cost(150);
  2.2554 +  format %{ "movb   $mem, $src #@storeImmB" %}
  2.2555 +  ins_encode(store_B_immI_enc(mem, src));
  2.2556 +  ins_pipe( ialu_storeI );
  2.2557 +%}
  2.2558 +
  2.2559  // Store Compressed Pointer
  2.2560  instruct storeN(memory mem, mRegN src)
  2.2561  %{
  2.2562 @@ -4915,8 +6568,28 @@
  2.2563    match(Set mem (StoreN mem zero));
  2.2564  
  2.2565    ins_cost(125); // XXX
  2.2566 -  format %{ "storeN0    $mem, R12\t# compressed ptr" %}
  2.2567 -  ins_encode(storeImmN0_enc(mem));
  2.2568 +  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
  2.2569 +  ins_encode(storeImmN0_enc(mem, zero));
  2.2570 +  ins_pipe( ialu_storeI );
  2.2571 +%}
  2.2572 +
  2.2573 +instruct storeImmN(memory mem, immN src)
  2.2574 +%{
  2.2575 +  match(Set mem (StoreN mem src));
  2.2576 +
  2.2577 +  ins_cost(150);
  2.2578 +  format %{ "storeImmN    $mem, $src\t# compressed ptr @ storeImmN" %}
  2.2579 +  ins_encode(storeImmN_enc(mem, src));
  2.2580 +  ins_pipe( ialu_storeI );
  2.2581 +%}
  2.2582 +
  2.2583 +instruct storeImmNKlass(memory mem, immNKlass src)
  2.2584 +%{
  2.2585 +  match(Set mem (StoreNKlass mem src));
  2.2586 +
  2.2587 +  ins_cost(150); // XXX
  2.2588 +  format %{ "sw    $mem, $src\t# compressed klass ptr @ storeImmNKlass" %}
  2.2589 +  ins_encode(storeImmNKlass_enc(mem, src));
  2.2590    ins_pipe( ialu_storeI );
  2.2591  %}
  2.2592  
  2.2593 @@ -4930,15 +6603,6 @@
  2.2594    ins_pipe( ialu_storeI );
  2.2595  %}
  2.2596  
  2.2597 -instruct storeB0(memory mem, immI0 zero) %{
  2.2598 -  match(Set mem (StoreB mem zero));
  2.2599 -
  2.2600 -  ins_cost(100);
  2.2601 -  format %{ "sb    $zero, $mem #@storeB0" %}
  2.2602 -  ins_encode(store_B0_enc(mem));
  2.2603 -  ins_pipe( ialu_storeI );
  2.2604 -%}
  2.2605 -
  2.2606  instruct storeB_convL2I(memory mem, mRegL src) %{
  2.2607    match(Set mem (StoreB mem (ConvL2I src)));
  2.2608  
  2.2609 @@ -4968,7 +6632,7 @@
  2.2610  %}
  2.2611  
  2.2612  // Load Byte (8bit UNsigned)
  2.2613 -instruct loadUB(mRegI dst, umemory mem) %{
  2.2614 +instruct loadUB(mRegI dst, memory mem) %{
  2.2615    match(Set dst (LoadUB mem));
  2.2616  
  2.2617    ins_cost(125);
  2.2618 @@ -4977,7 +6641,7 @@
  2.2619    ins_pipe( ialu_loadI );
  2.2620  %}
  2.2621  
  2.2622 -instruct loadUB_convI2L(mRegL dst, umemory mem) %{
  2.2623 +instruct loadUB_convI2L(mRegL dst, memory mem) %{
  2.2624    match(Set dst (ConvI2L (LoadUB mem)));
  2.2625  
  2.2626    ins_cost(125);
  2.2627 @@ -5016,12 +6680,12 @@
  2.2628  %}
  2.2629  
  2.2630  // Store Integer Immediate
  2.2631 -instruct storeI0(memory mem, immI0 zero) %{
  2.2632 -  match(Set mem (StoreI mem zero));
  2.2633 -
  2.2634 -  ins_cost(100);
  2.2635 -  format %{ "sw    $mem, $zero #@storeI0" %}
  2.2636 -  ins_encode(store_I_immI0_enc(mem));
  2.2637 +instruct storeImmI(memory mem, immI src) %{
  2.2638 +  match(Set mem (StoreI mem src));
  2.2639 +
  2.2640 +  ins_cost(150);
  2.2641 +  format %{ "mov    $mem, $src #@storeImmI" %}
  2.2642 +  ins_encode(store_I_immI_enc(mem, src));
  2.2643    ins_pipe( ialu_storeI );
  2.2644  %}
  2.2645  
  2.2646 @@ -9047,20 +10711,20 @@
  2.2647  %}
  2.2648  */
  2.2649  
  2.2650 -instruct lbu_and_lmask(mRegI dst, umemory mem,  immI_255 mask) %{
  2.2651 +instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
  2.2652    match(Set dst (AndI mask (LoadB mem)));
  2.2653    ins_cost(60);
  2.2654  
  2.2655 -  format %{ "lbu  $dst, $mem #@lbu_and_lmask" %}
  2.2656 +  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
  2.2657    ins_encode(load_UB_enc(dst, mem));
  2.2658    ins_pipe( ialu_loadI );
  2.2659  %}
  2.2660  
  2.2661 -instruct lbu_and_rmask(mRegI dst, umemory mem,  immI_255 mask) %{
  2.2662 +instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
  2.2663    match(Set dst (AndI (LoadB mem) mask));
  2.2664    ins_cost(60);
  2.2665  
  2.2666 -  format %{ "lbu  $dst, $mem #@lbu_and_rmask" %}
  2.2667 +  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
  2.2668    ins_encode(load_UB_enc(dst, mem));
  2.2669    ins_pipe( ialu_loadI );
  2.2670  %}
  2.2671 @@ -10704,7 +12368,7 @@
  2.2672  
  2.2673  // Prefetch instructions.
  2.2674  
  2.2675 -instruct prefetchrNTA( umemory mem ) %{
  2.2676 +instruct prefetchrNTA( memory mem ) %{
  2.2677    match(PrefetchRead mem);
  2.2678    ins_cost(125);
  2.2679  
  2.2680 @@ -10715,14 +12379,29 @@
  2.2681      int  scale = $mem$$scale;
  2.2682      int  disp = $mem$$disp;
  2.2683  
  2.2684 -    assert(index == 0, "no index");
  2.2685 -    __ daddiu(AT, as_Register(base), disp);
  2.2686 +    if( index != 0 ) {
  2.2687 +      if (scale == 0) {
  2.2688 +        __ daddu(AT, as_Register(base), as_Register(index));
  2.2689 +      } else {
  2.2690 +        __ dsll(AT, as_Register(index), scale);
  2.2691 +        __ daddu(AT, as_Register(base), AT);
  2.2692 +      }
  2.2693 +    } else {
  2.2694 +      __ move(AT, as_Register(base));
  2.2695 +    }
  2.2696 +    if( Assembler::is_simm16(disp) ) {
  2.2697 +      __ daddiu(AT, as_Register(base), disp);
  2.2698 +      __ daddiu(AT, AT, disp);
  2.2699 +    } else {
  2.2700 +      __ move(T9, disp);
  2.2701 +      __ daddu(AT, as_Register(base), T9);
  2.2702 +    }
  2.2703      __ pref(0, AT, 0); //hint: 0:load
  2.2704    %}
  2.2705    ins_pipe(pipe_slow);
  2.2706  %}
  2.2707  
  2.2708 -instruct prefetchwNTA( umemory mem ) %{
  2.2709 +instruct prefetchwNTA( memory mem ) %{
  2.2710    match(PrefetchWrite mem);
  2.2711    ins_cost(125);
  2.2712    format %{ "pref $mem\t# Prefetch to non-temporal cache for write @ prefetchwNTA" %}
  2.2713 @@ -10732,9 +12411,24 @@
  2.2714      int  scale = $mem$$scale;
  2.2715      int  disp = $mem$$disp;
  2.2716  
  2.2717 -    assert(index == 0, "no index");
  2.2718 -    __ daddiu(AT, as_Register(base), disp);
  2.2719 -    __ pref(1, AT, 0); //hint: 1:store
  2.2720 +    if( index != 0 ) {
  2.2721 +      if (scale == 0) {
  2.2722 +        __ daddu(AT, as_Register(base), as_Register(index));
  2.2723 +      } else {
  2.2724 +        __ dsll(AT, as_Register(index), scale);
  2.2725 +        __ daddu(AT, as_Register(base), AT);
  2.2726 +      }
  2.2727 +    } else {
  2.2728 +      __ move(AT, as_Register(base));
  2.2729 +    }
  2.2730 +    if( Assembler::is_simm16(disp) ) {
  2.2731 +      __ daddiu(AT, as_Register(base), disp);
  2.2732 +      __ daddiu(AT, AT, disp);
  2.2733 +    } else {
  2.2734 +      __ move(T9, disp);
  2.2735 +      __ daddu(AT, as_Register(base), T9);
  2.2736 +    }
  2.2737 +     __ pref(1, AT, 0); //hint: 1:store
  2.2738    %}
  2.2739    ins_pipe(pipe_slow);
  2.2740  %}
  2.2741 @@ -10754,10 +12448,50 @@
  2.2742       Register dst = R0;
  2.2743  
  2.2744       if( index != 0 ) {
  2.2745 -        assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.2746 -        __ gslbx(dst, as_Register(base), as_Register(index), disp);
  2.2747 +        if( Assembler::is_simm16(disp) ) { 
  2.2748 +           if( UseLoongsonISA ) {
  2.2749 +              if (scale == 0) {
  2.2750 +                 __ gslbx(dst, as_Register(base), as_Register(index), disp);
  2.2751 +              } else {
  2.2752 +                 __ dsll(AT, as_Register(index), scale);
  2.2753 +                 __ gslbx(dst, as_Register(base), AT, disp);
  2.2754 +              }
  2.2755 +           } else {
  2.2756 +              if (scale == 0) {
  2.2757 +                 __ addu(AT, as_Register(base), as_Register(index));
  2.2758 +              } else {
  2.2759 +                 __ dsll(AT, as_Register(index), scale);
  2.2760 +                 __ addu(AT, as_Register(base), AT);
  2.2761 +              }
  2.2762 +              __ lb(dst, AT, disp);
  2.2763 +           }
  2.2764 +        } else {
  2.2765 +           if (scale == 0) {
  2.2766 +              __ addu(AT, as_Register(base), as_Register(index));
  2.2767 +           } else {
  2.2768 +              __ dsll(AT, as_Register(index), scale);
  2.2769 +              __ addu(AT, as_Register(base), AT);
  2.2770 +           }
  2.2771 +           __ move(T9, disp);
  2.2772 +           if( UseLoongsonISA ) {
  2.2773 +              __ gslbx(dst, AT, T9, 0);
  2.2774 +           } else {
  2.2775 +              __ addu(AT, AT, T9); 
  2.2776 +              __ lb(dst, AT, 0);
  2.2777 +           }
  2.2778 +        }    
  2.2779       } else {
  2.2780 -        __ lb(dst, as_Register(base), disp);
  2.2781 +        if( Assembler::is_simm16(disp) ) { 
  2.2782 +           __ lb(dst, as_Register(base), disp);
  2.2783 +        } else {
  2.2784 +           __ move(T9, disp);   
  2.2785 +           if( UseLoongsonISA ) {
  2.2786 +              __ gslbx(dst, as_Register(base), T9, 0);
  2.2787 +           } else {
  2.2788 +              __ addu(AT, as_Register(base), T9); 
  2.2789 +              __ lb(dst, AT, 0);
  2.2790 +           }
  2.2791 +        }    
  2.2792       }
  2.2793    %}
  2.2794    ins_pipe(pipe_slow);
  2.2795 @@ -10778,7 +12512,7 @@
  2.2796  %}
  2.2797  
  2.2798  // Load Char (16bit unsigned)
  2.2799 -instruct loadUS(mRegI dst, umemory mem) %{
  2.2800 +instruct loadUS(mRegI dst, memory mem) %{
  2.2801    match(Set dst (LoadUS mem));
  2.2802  
  2.2803    ins_cost(125);
  2.2804 @@ -10787,7 +12521,7 @@
  2.2805    ins_pipe( ialu_loadI );
  2.2806  %}
  2.2807  
  2.2808 -instruct loadUS_convI2L(mRegL dst, umemory mem) %{
  2.2809 +instruct loadUS_convI2L(mRegL dst, memory mem) %{
  2.2810    match(Set dst (ConvI2L (LoadUS mem)));
  2.2811  
  2.2812    ins_cost(125);
  2.2813 @@ -10862,7 +12596,7 @@
  2.2814    ins_encode %{
  2.2815      FloatRegister dst = as_FloatRegister($dst$$reg);
  2.2816  
  2.2817 -    __ dmtc1(R0, dst);
  2.2818 +      __ dmtc1(R0, dst);
  2.2819    %}
  2.2820    ins_pipe( fpu_loadF );
  2.2821  %}
  2.2822 @@ -10912,10 +12646,66 @@
  2.2823      int      disp = $mem$$disp;
  2.2824  
  2.2825      if( index != 0 ) {
  2.2826 -       assert(UseLoongsonISA, "Only supported for Loongson CPUs");
  2.2827 -       __ gsswx(R0, as_Register(base), as_Register(index), disp);
  2.2828 -    } else {
  2.2829 -       __ sw(R0, as_Register(base), disp);
  2.2830 +		if ( UseLoongsonISA ) {
  2.2831 +			if ( Assembler::is_simm(disp, 8) ) {
  2.2832 +				if ( scale == 0 ) {
  2.2833 +					__ gsswx(R0, as_Register(base), as_Register(index), disp);
  2.2834 +				} else {
  2.2835 +					__ dsll(T9, as_Register(index), scale);
  2.2836 +					__ gsswx(R0, as_Register(base), T9, disp);
  2.2837 +				}
  2.2838 +			} else if ( Assembler::is_simm16(disp) ) {
  2.2839 +				if ( scale == 0 ) {
  2.2840 +					__ daddu(AT, as_Register(base), as_Register(index));
  2.2841 +				} else {
  2.2842 +					__ dsll(T9, as_Register(index), scale);
  2.2843 +					__ daddu(AT, as_Register(base), T9);
  2.2844 +				}
  2.2845 +				__ sw(R0, AT, disp);
  2.2846 +			} else {
  2.2847 +				if ( scale == 0 ) {
  2.2848 +					__ move(T9, disp);
  2.2849 +					__ daddu(AT, as_Register(index), T9);
  2.2850 +					__ gsswx(R0, as_Register(base), AT, 0);
  2.2851 +				} else {
  2.2852 +					__ dsll(T9, as_Register(index), scale);
  2.2853 +					__ move(AT, disp);
  2.2854 +					__ daddu(AT, AT, T9);
  2.2855 +					__ gsswx(R0, as_Register(base), AT, 0);
  2.2856 +				}
  2.2857 +			}
  2.2858 +		} else { //not use loongson isa
  2.2859 +		    if(scale != 0) {
  2.2860 +		       __ dsll(T9, as_Register(index), scale);
  2.2861 +		       __ daddu(AT, as_Register(base), T9);
  2.2862 +		    } else {
  2.2863 +		       __ daddu(AT, as_Register(base), as_Register(index));
  2.2864 +		    }
  2.2865 +		   if( Assembler::is_simm16(disp) ) { 
  2.2866 +		      __ sw(R0, AT, disp);
  2.2867 +		   } else {
  2.2868 +		      __ move(T9, disp);
  2.2869 +		      __ daddu(AT, AT, T9);
  2.2870 +			  __ sw(R0, AT, 0);
  2.2871 +	       }
  2.2872 +	   }
  2.2873 +    } else { //index is 0
  2.2874 +		if ( UseLoongsonISA ) {
  2.2875 +			if ( Assembler::is_simm16(disp) ) {
  2.2876 +				__ sw(R0, as_Register(base), disp);
  2.2877 +			} else {
  2.2878 +				__ move(T9, disp);
  2.2879 +				__ gsswx(R0, as_Register(base), T9, 0);
  2.2880 +			}
  2.2881 +		} else {
  2.2882 +		   if( Assembler::is_simm16(disp) ) { 
  2.2883 +		      __ sw(R0, as_Register(base), disp);
  2.2884 +		   } else {
  2.2885 +		      __ move(T9, disp);
  2.2886 +		      __ daddu(AT, as_Register(base), T9);
  2.2887 +			  __ sw(R0, AT, 0);
  2.2888 +		   }
  2.2889 +		}
  2.2890      }
  2.2891    %}
  2.2892    ins_pipe( ialu_storeI );
  2.2893 @@ -10950,6 +12740,87 @@
  2.2894    ins_pipe( fpu_storeF );
  2.2895  %}
  2.2896  
  2.2897 +instruct storeD_imm0( memory mem, immD0 zero) %{
  2.2898 +  match(Set mem (StoreD mem zero));
  2.2899 +
  2.2900 +  ins_cost(40);
  2.2901 +  format %{ "store   $mem, zero\t# store float @ storeD_imm0" %}
  2.2902 +  ins_encode %{
  2.2903 +    int      base = $mem$$base;
  2.2904 +    int     index = $mem$$index;
  2.2905 +    int     scale = $mem$$scale;
  2.2906 +    int      disp = $mem$$disp;
  2.2907 +
  2.2908 +    __ mtc1(R0, F30);
  2.2909 +    __ cvt_d_w(F30, F30);
  2.2910 +
  2.2911 +    if( index != 0 ) {
  2.2912 +		if ( UseLoongsonISA ) {
  2.2913 +			if ( Assembler::is_simm(disp, 8) ) {
  2.2914 +				if (scale == 0) {
  2.2915 +					__ gssdxc1(F30, as_Register(base), as_Register(index), disp);
  2.2916 +				} else {
  2.2917 +					__ dsll(T9, as_Register(index), scale);
  2.2918 +					__ gssdxc1(F30, as_Register(base), T9, disp);
  2.2919 +				}
  2.2920 +			} else if ( Assembler::is_simm16(disp) ) {
  2.2921 +				if (scale == 0) {
  2.2922 +					__ daddu(AT, as_Register(base), as_Register(index));
  2.2923 +					__ sdc1(F30, AT, disp);
  2.2924 +				} else {
  2.2925 +					__ dsll(T9, as_Register(index), scale);
  2.2926 +					__ daddu(AT, as_Register(base), T9);
  2.2927 +					__ sdc1(F30, AT, disp);
  2.2928 +				}
  2.2929 +			} else {
  2.2930 +				if (scale == 0) {
  2.2931 +					__ move(T9, disp);
  2.2932 +					__ daddu(AT, as_Register(index), T9);
  2.2933 +					__ gssdxc1(F30, as_Register(base), AT, 0);
  2.2934 +				} else {
  2.2935 +					__ move(T9, disp);
  2.2936 +					__ dsll(AT, as_Register(index), scale);
  2.2937 +					__ daddu(AT, AT, T9);
  2.2938 +					__ gssdxc1(F30, as_Register(base), AT, 0);
  2.2939 +				}
  2.2940 +			}
  2.2941 +		} else { // not use loongson isa
  2.2942 +		    if(scale != 0) {
  2.2943 +		       __ dsll(T9, as_Register(index), scale);
  2.2944 +		       __ daddu(AT, as_Register(base), T9);
  2.2945 +		    } else {
  2.2946 +		       __ daddu(AT, as_Register(base), as_Register(index));
  2.2947 +		    }
  2.2948 +		   if( Assembler::is_simm16(disp) ) { 
  2.2949 +		      __ sdc1(F30, AT, disp);
  2.2950 +		   } else {
  2.2951 +		      __ move(T9, disp);
  2.2952 +		      __ daddu(AT, AT, T9);
  2.2953 +		      __ sdc1(F30, AT, 0);
  2.2954 +		   }
  2.2955 +		}
  2.2956 +    } else {// index is 0
  2.2957 +		if ( UseLoongsonISA ) {
  2.2958 +			if ( Assembler::is_simm16(disp) ) {
  2.2959 +				__ sdc1(F30, as_Register(base), disp);
  2.2960 +			} else {
  2.2961 +				__ move(T9, disp);
  2.2962 +				__ gssdxc1(F30, as_Register(base), T9, 0);
  2.2963 +			}
  2.2964 +		} else {
  2.2965 +		   if( Assembler::is_simm16(disp) ) { 
  2.2966 +		      __ sdc1(F30, as_Register(base), disp);
  2.2967 +		   } else {
  2.2968 +		      __ move(T9, disp);
  2.2969 +		      __ daddu(AT, as_Register(base), T9);
  2.2970 +		      __ sdc1(F30, AT, 0);
  2.2971 +		   }
  2.2972 +		}
  2.2973 +    }
  2.2974 +  %}
  2.2975 +  ins_pipe( ialu_storeI );
  2.2976 +%}
  2.2977 +
  2.2978  instruct loadSSI(mRegI dst, stackSlotI src)
  2.2979  %{
  2.2980    match(Set dst src);
  2.2981 @@ -11108,21 +12979,13 @@
  2.2982  %}
  2.2983  
  2.2984  // Store CMS card-mark Immediate
  2.2985 -instruct storeImmCM(memory mem, mRegI src) %{
  2.2986 +instruct storeImmCM(memory mem, immI8 src) %{
  2.2987    match(Set mem (StoreCM mem src));
  2.2988  
  2.2989 -  ins_cost(500);
  2.2990 -  format %{ "sb   $src, $mem  (CMS card-mark) @ storeImmCM" %}
  2.2991 -  ins_encode(store_B_reg_sync_enc(mem, src));
  2.2992 -  ins_pipe( ialu_storeI );
  2.2993 -%}
  2.2994 -
  2.2995 -instruct storeI0CM(memory mem, immI0 zero) %{
  2.2996 -  match(Set mem (StoreCM mem zero));
  2.2997 -
  2.2998 -  ins_cost(450);
  2.2999 -  format %{ "sb   $zero, $mem  (CMS card-mark) @ storeI0CM" %}
  2.3000 -  ins_encode(store_B0_sync_enc(mem));
  2.3001 +  ins_cost(150);
  2.3002 +  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
  2.3003 +//  opcode(0xC6);
  2.3004 +  ins_encode(store_B_immI_enc_sync(mem, src));
  2.3005    ins_pipe( ialu_storeI );
  2.3006  %}
  2.3007  
  2.3008 @@ -11143,6 +13006,72 @@
  2.3009    ins_pipe( pipe_jump );
  2.3010  %}
  2.3011  
  2.3012 +instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem)
  2.3013 +%{
  2.3014 +  predicate(Universe::narrow_oop_shift() == 0);
  2.3015 +  match(Set dst mem);
  2.3016 +
  2.3017 +  ins_cost(110); 
  2.3018 +  format %{ "leaq    $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %}
  2.3019 +  ins_encode %{
  2.3020 +    Register  dst  = $dst$$Register;
  2.3021 +    Register  base = as_Register($mem$$base);
  2.3022 +    int       disp = $mem$$disp;
  2.3023 +
  2.3024 +    __ daddiu(dst, base, disp);
  2.3025 +  %}
  2.3026 +  ins_pipe( ialu_regI_imm16 );
  2.3027 +%}
  2.3028 +
  2.3029 +instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem)
  2.3030 +%{
  2.3031 +  match(Set dst mem);
  2.3032 +
  2.3033 +  ins_cost(110);
  2.3034 +  format %{ "leaq    $dst, $mem\t# @ PosIdxScaleOff8" %}
  2.3035 +  ins_encode %{
  2.3036 +    Register  dst   = $dst$$Register;
  2.3037 +    Register  base  = as_Register($mem$$base);
  2.3038 +    Register  index = as_Register($mem$$index);
  2.3039 +    int       scale = $mem$$scale;
  2.3040 +    int       disp  = $mem$$disp;
  2.3041 +
  2.3042 +    if (scale == 0) {
  2.3043 +       __ daddu(AT, base, index);
  2.3044 +       __ daddiu(dst, AT, disp);
  2.3045 +    } else {
  2.3046 +       __ dsll(AT, index, scale);
  2.3047 +       __ daddu(AT, base, AT);
  2.3048 +       __ daddiu(dst, AT, disp);
  2.3049 +    }
  2.3050 + %}
  2.3051 +
  2.3052 +  ins_pipe( ialu_regI_imm16 );
  2.3053 +%}
  2.3054 +
  2.3055 +instruct leaPIdxScale(mRegP dst, indIndexScale mem)
  2.3056 +%{
  2.3057 +  match(Set dst mem);
  2.3058 +
  2.3059 +  ins_cost(110);
  2.3060 +  format %{ "leaq    $dst, $mem\t# @ leaPIdxScale" %}
  2.3061 +  ins_encode %{
  2.3062 +    Register  dst   = $dst$$Register;
  2.3063 +    Register  base  = as_Register($mem$$base);
  2.3064 +    Register  index = as_Register($mem$$index);
  2.3065 +    int       scale = $mem$$scale;
  2.3066 +
  2.3067 +    if (scale == 0) {
  2.3068 +       __ daddu(dst, base, index);
  2.3069 +    } else {
  2.3070 +       __ dsll(AT, index, scale);
  2.3071 +       __ daddu(dst, base, AT);
  2.3072 +    }
  2.3073 + %}
  2.3074 +
  2.3075 +  ins_pipe( ialu_regI_imm16 );
  2.3076 +%}
  2.3077 +
  2.3078  // Jump Direct Conditional - Label defines a relative address from Jcc+1
  2.3079  instruct  jmpLoopEnd(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
  2.3080    match(CountedLoopEnd cop (CmpI src1 src2));
  2.3081 @@ -11658,7 +13587,7 @@
  2.3082  
  2.3083  // Match loading integer and casting it to unsigned int in long register.
  2.3084  // LoadI + ConvI2L + AndL 0xffffffff.
  2.3085 -instruct loadUI2L_rmask(mRegL dst, umemory mem, immL_32bits mask) %{
  2.3086 +instruct loadUI2L_rmask(mRegL dst, memory mem, immL_32bits mask) %{
  2.3087    match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
  2.3088  
  2.3089    format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
  2.3090 @@ -11666,7 +13595,7 @@
  2.3091    ins_pipe(ialu_loadI);
  2.3092  %}
  2.3093  
  2.3094 -instruct loadUI2L_lmask(mRegL dst, umemory mem, immL_32bits mask) %{
  2.3095 +instruct loadUI2L_lmask(mRegL dst, memory mem, immL_32bits mask) %{
  2.3096    match(Set dst (AndL mask (ConvI2L (LoadI mem))));
  2.3097  
  2.3098    format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}

mercurial