1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/ppc/vm/ppc.ad Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,12117 @@ 1.4 +// 1.5 +// Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 +// Copyright 2012, 2014 SAP AG. All rights reserved. 1.7 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 +// 1.9 +// This code is free software; you can redistribute it and/or modify it 1.10 +// under the terms of the GNU General Public License version 2 only, as 1.11 +// published by the Free Software Foundation. 1.12 +// 1.13 +// This code is distributed in the hope that it will be useful, but WITHOUT 1.14 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.15 +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.16 +// version 2 for more details (a copy is included in the LICENSE file that 1.17 +// accompanied this code). 1.18 +// 1.19 +// You should have received a copy of the GNU General Public License version 1.20 +// 2 along with this work; if not, write to the Free Software Foundation, 1.21 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.22 +// 1.23 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.24 +// or visit www.oracle.com if you need additional information or have any 1.25 +// questions. 1.26 +// 1.27 +// 1.28 + 1.29 +// 1.30 +// PPC64 Architecture Description File 1.31 +// 1.32 + 1.33 +//----------REGISTER DEFINITION BLOCK------------------------------------------ 1.34 +// This information is used by the matcher and the register allocator to 1.35 +// describe individual registers and classes of registers within the target 1.36 +// architecture. 1.37 +register %{ 1.38 +//----------Architecture Description Register Definitions---------------------- 1.39 +// General Registers 1.40 +// "reg_def" name (register save type, C convention save type, 1.41 +// ideal register type, encoding); 1.42 +// 1.43 +// Register Save Types: 1.44 +// 1.45 +// NS = No-Save: The register allocator assumes that these registers 1.46 +// can be used without saving upon entry to the method, & 1.47 +// that they do not need to be saved at call sites. 1.48 +// 1.49 +// SOC = Save-On-Call: The register allocator assumes that these registers 1.50 +// can be used without saving upon entry to the method, 1.51 +// but that they must be saved at call sites. 1.52 +// These are called "volatiles" on ppc. 1.53 +// 1.54 +// SOE = Save-On-Entry: The register allocator assumes that these registers 1.55 +// must be saved before using them upon entry to the 1.56 +// method, but they do not need to be saved at call 1.57 +// sites. 1.58 +// These are called "nonvolatiles" on ppc. 1.59 +// 1.60 +// AS = Always-Save: The register allocator assumes that these registers 1.61 +// must be saved before using them upon entry to the 1.62 +// method, & that they must be saved at call sites. 1.63 +// 1.64 +// Ideal Register Type is used to determine how to save & restore a 1.65 +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 1.66 +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 1.67 +// 1.68 +// The encoding number is the actual bit-pattern placed into the opcodes. 1.69 +// 1.70 +// PPC64 register definitions, based on the 64-bit PowerPC ELF ABI 1.71 +// Supplement Version 1.7 as of 2003-10-29. 1.72 +// 1.73 +// For each 64-bit register we must define two registers: the register 1.74 +// itself, e.g. R3, and a corresponding virtual other (32-bit-)'half', 1.75 +// e.g. R3_H, which is needed by the allocator, but is not used 1.76 +// for stores, loads, etc. 1.77 + 1.78 +// ---------------------------- 1.79 +// Integer/Long Registers 1.80 +// ---------------------------- 1.81 + 1.82 + // PPC64 has 32 64-bit integer registers. 1.83 + 1.84 + // types: v = volatile, nv = non-volatile, s = system 1.85 + reg_def R0 ( SOC, SOC, Op_RegI, 0, R0->as_VMReg() ); // v used in prologs 1.86 + reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() ); 1.87 + reg_def R1 ( NS, NS, Op_RegI, 1, R1->as_VMReg() ); // s SP 1.88 + reg_def R1_H ( NS, NS, Op_RegI, 99, R1->as_VMReg()->next() ); 1.89 + reg_def R2 ( SOC, SOC, Op_RegI, 2, R2->as_VMReg() ); // v TOC 1.90 + reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() ); 1.91 + reg_def R3 ( SOC, SOC, Op_RegI, 3, R3->as_VMReg() ); // v iarg1 & iret 1.92 + reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() ); 1.93 + reg_def R4 ( SOC, SOC, Op_RegI, 4, R4->as_VMReg() ); // iarg2 1.94 + reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() ); 1.95 + reg_def R5 ( SOC, SOC, Op_RegI, 5, R5->as_VMReg() ); // v iarg3 1.96 + reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() ); 1.97 + reg_def R6 ( SOC, SOC, Op_RegI, 6, R6->as_VMReg() ); // v iarg4 1.98 + reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() ); 1.99 + reg_def R7 ( SOC, SOC, Op_RegI, 7, R7->as_VMReg() ); // v iarg5 1.100 + reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() ); 1.101 + reg_def R8 ( SOC, SOC, Op_RegI, 8, R8->as_VMReg() ); // v iarg6 1.102 + reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() ); 1.103 + reg_def R9 ( SOC, SOC, Op_RegI, 9, R9->as_VMReg() ); // v iarg7 1.104 + reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() ); 1.105 + reg_def R10 ( SOC, SOC, Op_RegI, 10, R10->as_VMReg() ); // v iarg8 1.106 + reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next()); 1.107 + reg_def R11 ( SOC, SOC, Op_RegI, 11, R11->as_VMReg() ); // v ENV / scratch 1.108 + reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next()); 1.109 + reg_def R12 ( SOC, SOC, Op_RegI, 12, R12->as_VMReg() ); // v scratch 1.110 + reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next()); 1.111 + reg_def R13 ( NS, NS, Op_RegI, 13, R13->as_VMReg() ); // s system thread id 1.112 + reg_def R13_H( NS, NS, Op_RegI, 99, R13->as_VMReg()->next()); 1.113 + reg_def R14 ( SOC, SOE, Op_RegI, 14, R14->as_VMReg() ); // nv 1.114 + reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next()); 1.115 + reg_def R15 ( SOC, SOE, Op_RegI, 15, R15->as_VMReg() ); // nv 1.116 + reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next()); 1.117 + reg_def R16 ( SOC, SOE, Op_RegI, 16, R16->as_VMReg() ); // nv 1.118 + reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next()); 1.119 + reg_def R17 ( SOC, SOE, Op_RegI, 17, R17->as_VMReg() ); // nv 1.120 + reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next()); 1.121 + reg_def R18 ( SOC, SOE, Op_RegI, 18, R18->as_VMReg() ); // nv 1.122 + reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next()); 1.123 + reg_def R19 ( SOC, SOE, Op_RegI, 19, R19->as_VMReg() ); // nv 1.124 + reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next()); 1.125 + reg_def R20 ( SOC, SOE, Op_RegI, 20, R20->as_VMReg() ); // nv 1.126 + reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next()); 1.127 + reg_def R21 ( SOC, SOE, Op_RegI, 21, R21->as_VMReg() ); // nv 1.128 + reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next()); 1.129 + reg_def R22 ( SOC, SOE, Op_RegI, 22, R22->as_VMReg() ); // nv 1.130 + reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next()); 1.131 + reg_def R23 ( SOC, SOE, Op_RegI, 23, R23->as_VMReg() ); // nv 1.132 + reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next()); 1.133 + reg_def R24 ( SOC, SOE, Op_RegI, 24, R24->as_VMReg() ); // nv 1.134 + reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next()); 1.135 + reg_def R25 ( SOC, SOE, Op_RegI, 25, R25->as_VMReg() ); // nv 1.136 + reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next()); 1.137 + reg_def R26 ( SOC, SOE, Op_RegI, 26, R26->as_VMReg() ); // nv 1.138 + reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next()); 1.139 + reg_def R27 ( SOC, SOE, Op_RegI, 27, R27->as_VMReg() ); // nv 1.140 + reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next()); 1.141 + reg_def R28 ( SOC, SOE, Op_RegI, 28, R28->as_VMReg() ); // nv 1.142 + reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next()); 1.143 + reg_def R29 ( SOC, SOE, Op_RegI, 29, R29->as_VMReg() ); // nv 1.144 + reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next()); 1.145 + reg_def R30 ( SOC, SOE, Op_RegI, 30, R30->as_VMReg() ); // nv 1.146 + reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next()); 1.147 + reg_def R31 ( SOC, SOE, Op_RegI, 31, R31->as_VMReg() ); // nv 1.148 + reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next()); 1.149 + 1.150 + 1.151 +// ---------------------------- 1.152 +// Float/Double Registers 1.153 +// ---------------------------- 1.154 + 1.155 + // Double Registers 1.156 + // The rules of ADL require that double registers be defined in pairs. 1.157 + // Each pair must be two 32-bit values, but not necessarily a pair of 1.158 + // single float registers. In each pair, ADLC-assigned register numbers 1.159 + // must be adjacent, with the lower number even. Finally, when the 1.160 + // CPU stores such a register pair to memory, the word associated with 1.161 + // the lower ADLC-assigned number must be stored to the lower address. 1.162 + 1.163 + // PPC64 has 32 64-bit floating-point registers. Each can store a single 1.164 + // or double precision floating-point value. 1.165 + 1.166 + // types: v = volatile, nv = non-volatile, s = system 1.167 + reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); // v scratch 1.168 + reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() ); 1.169 + reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); // v farg1 & fret 1.170 + reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() ); 1.171 + reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); // v farg2 1.172 + reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() ); 1.173 + reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); // v farg3 1.174 + reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() ); 1.175 + reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); // v farg4 1.176 + reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() ); 1.177 + reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); // v farg5 1.178 + reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() ); 1.179 + reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); // v farg6 1.180 + reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() ); 1.181 + reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); // v farg7 1.182 + reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() ); 1.183 + reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); // v farg8 1.184 + reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() ); 1.185 + reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); // v farg9 1.186 + reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() ); 1.187 + reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); // v farg10 1.188 + reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next()); 1.189 + reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); // v farg11 1.190 + reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next()); 1.191 + reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); // v farg12 1.192 + reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next()); 1.193 + reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); // v farg13 1.194 + reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next()); 1.195 + reg_def F14 ( SOC, SOE, Op_RegF, 14, F14->as_VMReg() ); // nv 1.196 + reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next()); 1.197 + reg_def F15 ( SOC, SOE, Op_RegF, 15, F15->as_VMReg() ); // nv 1.198 + reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next()); 1.199 + reg_def F16 ( SOC, SOE, Op_RegF, 16, F16->as_VMReg() ); // nv 1.200 + reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next()); 1.201 + reg_def F17 ( SOC, SOE, Op_RegF, 17, F17->as_VMReg() ); // nv 1.202 + reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next()); 1.203 + reg_def F18 ( SOC, SOE, Op_RegF, 18, F18->as_VMReg() ); // nv 1.204 + reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next()); 1.205 + reg_def F19 ( SOC, SOE, Op_RegF, 19, F19->as_VMReg() ); // nv 1.206 + reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next()); 1.207 + reg_def F20 ( SOC, SOE, Op_RegF, 20, F20->as_VMReg() ); // nv 1.208 + reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next()); 1.209 + reg_def F21 ( SOC, SOE, Op_RegF, 21, F21->as_VMReg() ); // nv 1.210 + reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next()); 1.211 + reg_def F22 ( SOC, SOE, Op_RegF, 22, F22->as_VMReg() ); // nv 1.212 + reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next()); 1.213 + reg_def F23 ( SOC, SOE, Op_RegF, 23, F23->as_VMReg() ); // nv 1.214 + reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next()); 1.215 + reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); // nv 1.216 + reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next()); 1.217 + reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); // nv 1.218 + reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next()); 1.219 + reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); // nv 1.220 + reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next()); 1.221 + reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); // nv 1.222 + reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next()); 1.223 + reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); // nv 1.224 + reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next()); 1.225 + reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); // nv 1.226 + reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next()); 1.227 + reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); // nv 1.228 + reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next()); 1.229 + reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); // nv 1.230 + reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next()); 1.231 + 1.232 +// ---------------------------- 1.233 +// Special Registers 1.234 +// ---------------------------- 1.235 + 1.236 +// Condition Codes Flag Registers 1.237 + 1.238 + // PPC64 has 8 condition code "registers" which are all contained 1.239 + // in the CR register. 1.240 + 1.241 + // types: v = volatile, nv = non-volatile, s = system 1.242 + reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg()); // v 1.243 + reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg()); // v 1.244 + reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg()); // nv 1.245 + reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg()); // nv 1.246 + reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg()); // nv 1.247 + reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg()); // v 1.248 + reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg()); // v 1.249 + reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg()); // v 1.250 + 1.251 + // Special registers of PPC64 1.252 + 1.253 + reg_def SR_XER( SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg()); // v 1.254 + reg_def SR_LR( SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg()); // v 1.255 + reg_def SR_CTR( SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg()); // v 1.256 + reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg()); // v 1.257 + reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v 1.258 + reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v 1.259 + 1.260 + 1.261 +// ---------------------------- 1.262 +// Specify priority of register selection within phases of register 1.263 +// allocation. Highest priority is first. A useful heuristic is to 1.264 +// give registers a low priority when they are required by machine 1.265 +// instructions, like EAX and EDX on I486, and choose no-save registers 1.266 +// before save-on-call, & save-on-call before save-on-entry. Registers 1.267 +// which participate in fixed calling sequences should come last. 1.268 +// Registers which are used as pairs must fall on an even boundary. 1.269 + 1.270 +// It's worth about 1% on SPEC geomean to get this right. 1.271 + 1.272 +// Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration 1.273 +// in adGlobals_ppc64.hpp which defines the <register>_num values, e.g. 1.274 +// R3_num. Therefore, R3_num may not be (and in reality is not) 1.275 +// the same as R3->encoding()! Furthermore, we cannot make any 1.276 +// assumptions on ordering, e.g. R3_num may be less than R2_num. 1.277 +// Additionally, the function 1.278 +// static enum RC rc_class(OptoReg::Name reg ) 1.279 +// maps a given <register>_num value to its chunk type (except for flags) 1.280 +// and its current implementation relies on chunk0 and chunk1 having a 1.281 +// size of 64 each. 1.282 + 1.283 +// If you change this allocation class, please have a look at the 1.284 +// default values for the parameters RoundRobinIntegerRegIntervalStart 1.285 +// and RoundRobinFloatRegIntervalStart 1.286 + 1.287 +alloc_class chunk0 ( 1.288 + // Chunk0 contains *all* 64 integer registers halves. 1.289 + 1.290 + // "non-volatile" registers 1.291 + R14, R14_H, 1.292 + R15, R15_H, 1.293 + R17, R17_H, 1.294 + R18, R18_H, 1.295 + R19, R19_H, 1.296 + R20, R20_H, 1.297 + R21, R21_H, 1.298 + R22, R22_H, 1.299 + R23, R23_H, 1.300 + R24, R24_H, 1.301 + R25, R25_H, 1.302 + R26, R26_H, 1.303 + R27, R27_H, 1.304 + R28, R28_H, 1.305 + R29, R29_H, 1.306 + R30, R30_H, 1.307 + R31, R31_H, 1.308 + 1.309 + // scratch/special registers 1.310 + R11, R11_H, 1.311 + R12, R12_H, 1.312 + 1.313 + // argument registers 1.314 + R10, R10_H, 1.315 + R9, R9_H, 1.316 + R8, R8_H, 1.317 + R7, R7_H, 1.318 + R6, R6_H, 1.319 + R5, R5_H, 1.320 + R4, R4_H, 1.321 + R3, R3_H, 1.322 + 1.323 + // special registers, not available for allocation 1.324 + R16, R16_H, // R16_thread 1.325 + R13, R13_H, // system thread id 1.326 + R2, R2_H, // may be used for TOC 1.327 + R1, R1_H, // SP 1.328 + R0, R0_H // R0 (scratch) 1.329 +); 1.330 + 1.331 +// If you change this allocation class, please have a look at the 1.332 +// default values for the parameters RoundRobinIntegerRegIntervalStart 1.333 +// and RoundRobinFloatRegIntervalStart 1.334 + 1.335 +alloc_class chunk1 ( 1.336 + // Chunk1 contains *all* 64 floating-point registers halves. 1.337 + 1.338 + // scratch register 1.339 + F0, F0_H, 1.340 + 1.341 + // argument registers 1.342 + F13, F13_H, 1.343 + F12, F12_H, 1.344 + F11, F11_H, 1.345 + F10, F10_H, 1.346 + F9, F9_H, 1.347 + F8, F8_H, 1.348 + F7, F7_H, 1.349 + F6, F6_H, 1.350 + F5, F5_H, 1.351 + F4, F4_H, 1.352 + F3, F3_H, 1.353 + F2, F2_H, 1.354 + F1, F1_H, 1.355 + 1.356 + // non-volatile registers 1.357 + F14, F14_H, 1.358 + F15, F15_H, 1.359 + F16, F16_H, 1.360 + F17, F17_H, 1.361 + F18, F18_H, 1.362 + F19, F19_H, 1.363 + F20, F20_H, 1.364 + F21, F21_H, 1.365 + F22, F22_H, 1.366 + F23, F23_H, 1.367 + F24, F24_H, 1.368 + F25, F25_H, 1.369 + F26, F26_H, 1.370 + F27, F27_H, 1.371 + F28, F28_H, 1.372 + F29, F29_H, 1.373 + F30, F30_H, 1.374 + F31, F31_H 1.375 +); 1.376 + 1.377 +alloc_class chunk2 ( 1.378 + // Chunk2 contains *all* 8 condition code registers. 1.379 + 1.380 + CCR0, 1.381 + CCR1, 1.382 + CCR2, 1.383 + CCR3, 1.384 + CCR4, 1.385 + CCR5, 1.386 + CCR6, 1.387 + CCR7 1.388 +); 1.389 + 1.390 +alloc_class chunk3 ( 1.391 + // special registers 1.392 + // These registers are not allocated, but used for nodes generated by postalloc expand. 1.393 + SR_XER, 1.394 + SR_LR, 1.395 + SR_CTR, 1.396 + SR_VRSAVE, 1.397 + SR_SPEFSCR, 1.398 + SR_PPR 1.399 +); 1.400 + 1.401 +//-------Architecture Description Register Classes----------------------- 1.402 + 1.403 +// Several register classes are automatically defined based upon 1.404 +// information in this architecture description. 1.405 + 1.406 +// 1) reg_class inline_cache_reg ( as defined in frame section ) 1.407 +// 2) reg_class compiler_method_oop_reg ( as defined in frame section ) 1.408 +// 2) reg_class interpreter_method_oop_reg ( as defined in frame section ) 1.409 +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 1.410 +// 1.411 + 1.412 +// ---------------------------- 1.413 +// 32 Bit Register Classes 1.414 +// ---------------------------- 1.415 + 1.416 +// We specify registers twice, once as read/write, and once read-only. 1.417 +// We use the read-only registers for source operands. With this, we 1.418 +// can include preset read only registers in this class, as a hard-coded 1.419 +// '0'-register. (We used to simulate this on ppc.) 1.420 + 1.421 +// 32 bit registers that can be read and written i.e. these registers 1.422 +// can be dest (or src) of normal instructions. 1.423 +reg_class bits32_reg_rw( 1.424 +/*R0*/ // R0 1.425 +/*R1*/ // SP 1.426 + R2, // TOC 1.427 + R3, 1.428 + R4, 1.429 + R5, 1.430 + R6, 1.431 + R7, 1.432 + R8, 1.433 + R9, 1.434 + R10, 1.435 + R11, 1.436 + R12, 1.437 +/*R13*/ // system thread id 1.438 + R14, 1.439 + R15, 1.440 +/*R16*/ // R16_thread 1.441 + R17, 1.442 + R18, 1.443 + R19, 1.444 + R20, 1.445 + R21, 1.446 + R22, 1.447 + R23, 1.448 + R24, 1.449 + R25, 1.450 + R26, 1.451 + R27, 1.452 + R28, 1.453 +/*R29*/ // global TOC 1.454 +/*R30*/ // Narrow Oop Base 1.455 + R31 1.456 +); 1.457 + 1.458 +// 32 bit registers that can only be read i.e. these registers can 1.459 +// only be src of all instructions. 1.460 +reg_class bits32_reg_ro( 1.461 +/*R0*/ // R0 1.462 +/*R1*/ // SP 1.463 + R2 // TOC 1.464 + R3, 1.465 + R4, 1.466 + R5, 1.467 + R6, 1.468 + R7, 1.469 + R8, 1.470 + R9, 1.471 + R10, 1.472 + R11, 1.473 + R12, 1.474 +/*R13*/ // system thread id 1.475 + R14, 1.476 + R15, 1.477 +/*R16*/ // R16_thread 1.478 + R17, 1.479 + R18, 1.480 + R19, 1.481 + R20, 1.482 + R21, 1.483 + R22, 1.484 + R23, 1.485 + R24, 1.486 + R25, 1.487 + R26, 1.488 + R27, 1.489 + R28, 1.490 +/*R29*/ 1.491 +/*R30*/ // Narrow Oop Base 1.492 + R31 1.493 +); 1.494 + 1.495 +// Complement-required-in-pipeline operands for narrow oops. 1.496 +reg_class bits32_reg_ro_not_complement ( 1.497 +/*R0*/ // R0 1.498 + R1, // SP 1.499 + R2, // TOC 1.500 + R3, 1.501 + R4, 1.502 + R5, 1.503 + R6, 1.504 + R7, 1.505 + R8, 1.506 + R9, 1.507 + R10, 1.508 + R11, 1.509 + R12, 1.510 +/*R13,*/ // system thread id 1.511 + R14, 1.512 + R15, 1.513 + R16, // R16_thread 1.514 + R17, 1.515 + R18, 1.516 + R19, 1.517 + R20, 1.518 + R21, 1.519 + R22, 1.520 +/*R23, 1.521 + R24, 1.522 + R25, 1.523 + R26, 1.524 + R27, 1.525 + R28,*/ 1.526 +/*R29,*/ // TODO: let allocator handle TOC!! 1.527 +/*R30,*/ 1.528 + R31 1.529 +); 1.530 + 1.531 +// Complement-required-in-pipeline operands for narrow oops. 1.532 +// See 64-bit declaration. 1.533 +reg_class bits32_reg_ro_complement ( 1.534 + R23, 1.535 + R24, 1.536 + R25, 1.537 + R26, 1.538 + R27, 1.539 + R28 1.540 +); 1.541 + 1.542 +reg_class rscratch1_bits32_reg(R11); 1.543 +reg_class rscratch2_bits32_reg(R12); 1.544 +reg_class rarg1_bits32_reg(R3); 1.545 +reg_class rarg2_bits32_reg(R4); 1.546 +reg_class rarg3_bits32_reg(R5); 1.547 +reg_class rarg4_bits32_reg(R6); 1.548 + 1.549 +// ---------------------------- 1.550 +// 64 Bit Register Classes 1.551 +// ---------------------------- 1.552 +// 64-bit build means 64-bit pointers means hi/lo pairs 1.553 + 1.554 +reg_class rscratch1_bits64_reg(R11_H, R11); 1.555 +reg_class rscratch2_bits64_reg(R12_H, R12); 1.556 +reg_class rarg1_bits64_reg(R3_H, R3); 1.557 +reg_class rarg2_bits64_reg(R4_H, R4); 1.558 +reg_class rarg3_bits64_reg(R5_H, R5); 1.559 +reg_class rarg4_bits64_reg(R6_H, R6); 1.560 +// Thread register, 'written' by tlsLoadP, see there. 1.561 +reg_class thread_bits64_reg(R16_H, R16); 1.562 + 1.563 +reg_class r19_bits64_reg(R19_H, R19); 1.564 + 1.565 +// 64 bit registers that can be read and written i.e. these registers 1.566 +// can be dest (or src) of normal instructions. 1.567 +reg_class bits64_reg_rw( 1.568 +/*R0_H, R0*/ // R0 1.569 +/*R1_H, R1*/ // SP 1.570 + R2_H, R2, // TOC 1.571 + R3_H, R3, 1.572 + R4_H, R4, 1.573 + R5_H, R5, 1.574 + R6_H, R6, 1.575 + R7_H, R7, 1.576 + R8_H, R8, 1.577 + R9_H, R9, 1.578 + R10_H, R10, 1.579 + R11_H, R11, 1.580 + R12_H, R12, 1.581 +/*R13_H, R13*/ // system thread id 1.582 + R14_H, R14, 1.583 + R15_H, R15, 1.584 +/*R16_H, R16*/ // R16_thread 1.585 + R17_H, R17, 1.586 + R18_H, R18, 1.587 + R19_H, R19, 1.588 + R20_H, R20, 1.589 + R21_H, R21, 1.590 + R22_H, R22, 1.591 + R23_H, R23, 1.592 + R24_H, R24, 1.593 + R25_H, R25, 1.594 + R26_H, R26, 1.595 + R27_H, R27, 1.596 + R28_H, R28, 1.597 +/*R29_H, R29*/ 1.598 +/*R30_H, R30*/ 1.599 + R31_H, R31 1.600 +); 1.601 + 1.602 +// 64 bit registers used excluding r2, r11 and r12 1.603 +// Used to hold the TOC to avoid collisions with expanded LeafCall which uses 1.604 +// r2, r11 and r12 internally. 1.605 +reg_class bits64_reg_leaf_call( 1.606 +/*R0_H, R0*/ // R0 1.607 +/*R1_H, R1*/ // SP 1.608 +/*R2_H, R2*/ // TOC 1.609 + R3_H, R3, 1.610 + R4_H, R4, 1.611 + R5_H, R5, 1.612 + R6_H, R6, 1.613 + R7_H, R7, 1.614 + R8_H, R8, 1.615 + R9_H, R9, 1.616 + R10_H, R10, 1.617 +/*R11_H, R11*/ 1.618 +/*R12_H, R12*/ 1.619 +/*R13_H, R13*/ // system thread id 1.620 + R14_H, R14, 1.621 + R15_H, R15, 1.622 +/*R16_H, R16*/ // R16_thread 1.623 + R17_H, R17, 1.624 + R18_H, R18, 1.625 + R19_H, R19, 1.626 + R20_H, R20, 1.627 + R21_H, R21, 1.628 + R22_H, R22, 1.629 + R23_H, R23, 1.630 + R24_H, R24, 1.631 + R25_H, R25, 1.632 + R26_H, R26, 1.633 + R27_H, R27, 1.634 + R28_H, R28, 1.635 +/*R29_H, R29*/ 1.636 +/*R30_H, R30*/ 1.637 + R31_H, R31 1.638 +); 1.639 + 1.640 +// Used to hold the TOC to avoid collisions with expanded DynamicCall 1.641 +// which uses r19 as inline cache internally and expanded LeafCall which uses 1.642 +// r2, r11 and r12 internally. 1.643 +reg_class bits64_constant_table_base( 1.644 +/*R0_H, R0*/ // R0 1.645 +/*R1_H, R1*/ // SP 1.646 +/*R2_H, R2*/ // TOC 1.647 + R3_H, R3, 1.648 + R4_H, R4, 1.649 + R5_H, R5, 1.650 + R6_H, R6, 1.651 + R7_H, R7, 1.652 + R8_H, R8, 1.653 + R9_H, R9, 1.654 + R10_H, R10, 1.655 +/*R11_H, R11*/ 1.656 +/*R12_H, R12*/ 1.657 +/*R13_H, R13*/ // system thread id 1.658 + R14_H, R14, 1.659 + R15_H, R15, 1.660 +/*R16_H, R16*/ // R16_thread 1.661 + R17_H, R17, 1.662 + R18_H, R18, 1.663 +/*R19_H, R19*/ 1.664 + R20_H, R20, 1.665 + R21_H, R21, 1.666 + R22_H, R22, 1.667 + R23_H, R23, 1.668 + R24_H, R24, 1.669 + R25_H, R25, 1.670 + R26_H, R26, 1.671 + R27_H, R27, 1.672 + R28_H, R28, 1.673 +/*R29_H, R29*/ 1.674 +/*R30_H, R30*/ 1.675 + R31_H, R31 1.676 +); 1.677 + 1.678 +// 64 bit registers that can only be read i.e. these registers can 1.679 +// only be src of all instructions. 1.680 +reg_class bits64_reg_ro( 1.681 +/*R0_H, R0*/ // R0 1.682 + R1_H, R1, 1.683 + R2_H, R2, // TOC 1.684 + R3_H, R3, 1.685 + R4_H, R4, 1.686 + R5_H, R5, 1.687 + R6_H, R6, 1.688 + R7_H, R7, 1.689 + R8_H, R8, 1.690 + R9_H, R9, 1.691 + R10_H, R10, 1.692 + R11_H, R11, 1.693 + R12_H, R12, 1.694 +/*R13_H, R13*/ // system thread id 1.695 + R14_H, R14, 1.696 + R15_H, R15, 1.697 + R16_H, R16, // R16_thread 1.698 + R17_H, R17, 1.699 + R18_H, R18, 1.700 + R19_H, R19, 1.701 + R20_H, R20, 1.702 + R21_H, R21, 1.703 + R22_H, R22, 1.704 + R23_H, R23, 1.705 + R24_H, R24, 1.706 + R25_H, R25, 1.707 + R26_H, R26, 1.708 + R27_H, R27, 1.709 + R28_H, R28, 1.710 +/*R29_H, R29*/ // TODO: let allocator handle TOC!! 1.711 +/*R30_H, R30,*/ 1.712 + R31_H, R31 1.713 +); 1.714 + 1.715 +// Complement-required-in-pipeline operands. 1.716 +reg_class bits64_reg_ro_not_complement ( 1.717 +/*R0_H, R0*/ // R0 1.718 + R1_H, R1, // SP 1.719 + R2_H, R2, // TOC 1.720 + R3_H, R3, 1.721 + R4_H, R4, 1.722 + R5_H, R5, 1.723 + R6_H, R6, 1.724 + R7_H, R7, 1.725 + R8_H, R8, 1.726 + R9_H, R9, 1.727 + R10_H, R10, 1.728 + R11_H, R11, 1.729 + R12_H, R12, 1.730 +/*R13_H, R13*/ // system thread id 1.731 + R14_H, R14, 1.732 + R15_H, R15, 1.733 + R16_H, R16, // R16_thread 1.734 + R17_H, R17, 1.735 + R18_H, R18, 1.736 + R19_H, R19, 1.737 + R20_H, R20, 1.738 + R21_H, R21, 1.739 + R22_H, R22, 1.740 +/*R23_H, R23, 1.741 + R24_H, R24, 1.742 + R25_H, R25, 1.743 + R26_H, R26, 1.744 + R27_H, R27, 1.745 + R28_H, R28,*/ 1.746 +/*R29_H, R29*/ // TODO: let allocator handle TOC!! 1.747 +/*R30_H, R30,*/ 1.748 + R31_H, R31 1.749 +); 1.750 + 1.751 +// Complement-required-in-pipeline operands. 1.752 +// This register mask is used for the trap instructions that implement 1.753 +// the null checks on AIX. The trap instruction first computes the 1.754 +// complement of the value it shall trap on. Because of this, the 1.755 +// instruction can not be scheduled in the same cycle as an other 1.756 +// instruction reading the normal value of the same register. So we 1.757 +// force the value to check into 'bits64_reg_ro_not_complement' 1.758 +// and then copy it to 'bits64_reg_ro_complement' for the trap. 1.759 +reg_class bits64_reg_ro_complement ( 1.760 + R23_H, R23, 1.761 + R24_H, R24, 1.762 + R25_H, R25, 1.763 + R26_H, R26, 1.764 + R27_H, R27, 1.765 + R28_H, R28 1.766 +); 1.767 + 1.768 + 1.769 +// ---------------------------- 1.770 +// Special Class for Condition Code Flags Register 1.771 + 1.772 +reg_class int_flags( 1.773 +/*CCR0*/ // scratch 1.774 +/*CCR1*/ // scratch 1.775 +/*CCR2*/ // nv! 1.776 +/*CCR3*/ // nv! 1.777 +/*CCR4*/ // nv! 1.778 + CCR5, 1.779 + CCR6, 1.780 + CCR7 1.781 +); 1.782 + 1.783 +reg_class int_flags_CR0(CCR0); 1.784 +reg_class int_flags_CR1(CCR1); 1.785 +reg_class int_flags_CR6(CCR6); 1.786 +reg_class ctr_reg(SR_CTR); 1.787 + 1.788 +// ---------------------------- 1.789 +// Float Register Classes 1.790 +// ---------------------------- 1.791 + 1.792 +reg_class flt_reg( 1.793 +/*F0*/ // scratch 1.794 + F1, 1.795 + F2, 1.796 + F3, 1.797 + F4, 1.798 + F5, 1.799 + F6, 1.800 + F7, 1.801 + F8, 1.802 + F9, 1.803 + F10, 1.804 + F11, 1.805 + F12, 1.806 + F13, 1.807 + F14, // nv! 1.808 + F15, // nv! 1.809 + F16, // nv! 1.810 + F17, // nv! 1.811 + F18, // nv! 1.812 + F19, // nv! 1.813 + F20, // nv! 1.814 + F21, // nv! 1.815 + F22, // nv! 1.816 + F23, // nv! 1.817 + F24, // nv! 1.818 + F25, // nv! 1.819 + F26, // nv! 1.820 + F27, // nv! 1.821 + F28, // nv! 1.822 + F29, // nv! 1.823 + F30, // nv! 1.824 + F31 // nv! 1.825 +); 1.826 + 1.827 +// Double precision float registers have virtual `high halves' that 1.828 +// are needed by the allocator. 1.829 +reg_class dbl_reg( 1.830 +/*F0, F0_H*/ // scratch 1.831 + F1, F1_H, 1.832 + F2, F2_H, 1.833 + F3, F3_H, 1.834 + F4, F4_H, 1.835 + F5, F5_H, 1.836 + F6, F6_H, 1.837 + F7, F7_H, 1.838 + F8, F8_H, 1.839 + F9, F9_H, 1.840 + F10, F10_H, 1.841 + F11, F11_H, 1.842 + F12, F12_H, 1.843 + F13, F13_H, 1.844 + F14, F14_H, // nv! 1.845 + F15, F15_H, // nv! 1.846 + F16, F16_H, // nv! 1.847 + F17, F17_H, // nv! 1.848 + F18, F18_H, // nv! 1.849 + F19, F19_H, // nv! 1.850 + F20, F20_H, // nv! 1.851 + F21, F21_H, // nv! 1.852 + F22, F22_H, // nv! 1.853 + F23, F23_H, // nv! 1.854 + F24, F24_H, // nv! 1.855 + F25, F25_H, // nv! 1.856 + F26, F26_H, // nv! 1.857 + F27, F27_H, // nv! 1.858 + F28, F28_H, // nv! 1.859 + F29, F29_H, // nv! 1.860 + F30, F30_H, // nv! 1.861 + F31, F31_H // nv! 1.862 +); 1.863 + 1.864 + %} 1.865 + 1.866 +//----------DEFINITION BLOCK--------------------------------------------------- 1.867 +// Define name --> value mappings to inform the ADLC of an integer valued name 1.868 +// Current support includes integer values in the range [0, 0x7FFFFFFF] 1.869 +// Format: 1.870 +// int_def <name> ( <int_value>, <expression>); 1.871 +// Generated Code in ad_<arch>.hpp 1.872 +// #define <name> (<expression>) 1.873 +// // value == <int_value> 1.874 +// Generated code in ad_<arch>.cpp adlc_verification() 1.875 +// assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>"); 1.876 +// 1.877 +definitions %{ 1.878 + // The default cost (of an ALU instruction). 1.879 + int_def DEFAULT_COST_LOW ( 30, 30); 1.880 + int_def DEFAULT_COST ( 100, 100); 1.881 + int_def HUGE_COST (1000000, 1000000); 1.882 + 1.883 + // Memory refs 1.884 + int_def MEMORY_REF_COST_LOW ( 200, DEFAULT_COST * 2); 1.885 + int_def MEMORY_REF_COST ( 300, DEFAULT_COST * 3); 1.886 + 1.887 + // Branches are even more expensive. 1.888 + int_def BRANCH_COST ( 900, DEFAULT_COST * 9); 1.889 + int_def CALL_COST ( 1300, DEFAULT_COST * 13); 1.890 +%} 1.891 + 1.892 + 1.893 +//----------SOURCE BLOCK------------------------------------------------------- 1.894 +// This is a block of C++ code which provides values, functions, and 1.895 +// definitions necessary in the rest of the architecture description. 1.896 +source_hpp %{ 1.897 + // Header information of the source block. 1.898 + // Method declarations/definitions which are used outside 1.899 + // the ad-scope can conveniently be defined here. 1.900 + // 1.901 + // To keep related declarations/definitions/uses close together, 1.902 + // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1.903 + 1.904 + // Returns true if Node n is followed by a MemBar node that 1.905 + // will do an acquire. If so, this node must not do the acquire 1.906 + // operation. 1.907 + bool followed_by_acquire(const Node *n); 1.908 +%} 1.909 + 1.910 +source %{ 1.911 + 1.912 +// Optimize load-acquire. 1.913 +// 1.914 +// Check if acquire is unnecessary due to following operation that does 1.915 +// acquire anyways. 1.916 +// Walk the pattern: 1.917 +// 1.918 +// n: Load.acq 1.919 +// | 1.920 +// MemBarAcquire 1.921 +// | | 1.922 +// Proj(ctrl) Proj(mem) 1.923 +// | | 1.924 +// MemBarRelease/Volatile 1.925 +// 1.926 +bool followed_by_acquire(const Node *load) { 1.927 + assert(load->is_Load(), "So far implemented only for loads."); 1.928 + 1.929 + // Find MemBarAcquire. 1.930 + const Node *mba = NULL; 1.931 + for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) { 1.932 + const Node *out = load->fast_out(i); 1.933 + if (out->Opcode() == Op_MemBarAcquire) { 1.934 + if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge. 1.935 + mba = out; 1.936 + break; 1.937 + } 1.938 + } 1.939 + if (!mba) return false; 1.940 + 1.941 + // Find following MemBar node. 1.942 + // 1.943 + // The following node must be reachable by control AND memory 1.944 + // edge to assure no other operations are in between the two nodes. 1.945 + // 1.946 + // So first get the Proj node, mem_proj, to use it to iterate forward. 1.947 + Node *mem_proj = NULL; 1.948 + for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) { 1.949 + mem_proj = mba->fast_out(i); // Throw out-of-bounds if proj not found 1.950 + assert(mem_proj->is_Proj(), "only projections here"); 1.951 + ProjNode *proj = mem_proj->as_Proj(); 1.952 + if (proj->_con == TypeFunc::Memory && 1.953 + !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only 1.954 + break; 1.955 + } 1.956 + assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken"); 1.957 + 1.958 + // Search MemBar behind Proj. If there are other memory operations 1.959 + // behind the Proj we lost. 1.960 + for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) { 1.961 + Node *x = mem_proj->fast_out(j); 1.962 + // Proj might have an edge to a store or load node which precedes the membar. 1.963 + if (x->is_Mem()) return false; 1.964 + 1.965 + // On PPC64 release and volatile are implemented by an instruction 1.966 + // that also has acquire semantics. I.e. there is no need for an 1.967 + // acquire before these. 1.968 + int xop = x->Opcode(); 1.969 + if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) { 1.970 + // Make sure we're not missing Call/Phi/MergeMem by checking 1.971 + // control edges. The control edge must directly lead back 1.972 + // to the MemBarAcquire 1.973 + Node *ctrl_proj = x->in(0); 1.974 + if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) { 1.975 + return true; 1.976 + } 1.977 + } 1.978 + } 1.979 + 1.980 + return false; 1.981 +} 1.982 + 1.983 +#define __ _masm. 1.984 + 1.985 +// Tertiary op of a LoadP or StoreP encoding. 1.986 +#define REGP_OP true 1.987 + 1.988 +// **************************************************************************** 1.989 + 1.990 +// REQUIRED FUNCTIONALITY 1.991 + 1.992 +// !!!!! Special hack to get all type of calls to specify the byte offset 1.993 +// from the start of the call to the point where the return address 1.994 +// will point. 1.995 + 1.996 +// PPC port: Removed use of lazy constant construct. 1.997 + 1.998 +int MachCallStaticJavaNode::ret_addr_offset() { 1.999 + // It's only a single branch-and-link instruction. 1.1000 + return 4; 1.1001 +} 1.1002 + 1.1003 +int MachCallDynamicJavaNode::ret_addr_offset() { 1.1004 + // Offset is 4 with postalloc expanded calls (bl is one instruction). We use 1.1005 + // postalloc expanded calls if we use inline caches and do not update method data. 1.1006 + if (UseInlineCaches) 1.1007 + return 4; 1.1008 + 1.1009 + int vtable_index = this->_vtable_index; 1.1010 + if (vtable_index < 0) { 1.1011 + // Must be invalid_vtable_index, not nonvirtual_vtable_index. 1.1012 + assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value"); 1.1013 + return 12; 1.1014 + } else { 1.1015 + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); 1.1016 + return 24; 1.1017 + } 1.1018 +} 1.1019 + 1.1020 +int MachCallRuntimeNode::ret_addr_offset() { 1.1021 +#if defined(ABI_ELFv2) 1.1022 + return 28; 1.1023 +#else 1.1024 + return 40; 1.1025 +#endif 1.1026 +} 1.1027 + 1.1028 +//============================================================================= 1.1029 + 1.1030 +// condition code conversions 1.1031 + 1.1032 +static int cc_to_boint(int cc) { 1.1033 + return Assembler::bcondCRbiIs0 | (cc & 8); 1.1034 +} 1.1035 + 1.1036 +static int cc_to_inverse_boint(int cc) { 1.1037 + return Assembler::bcondCRbiIs0 | (8-(cc & 8)); 1.1038 +} 1.1039 + 1.1040 +static int cc_to_biint(int cc, int flags_reg) { 1.1041 + return (flags_reg << 2) | (cc & 3); 1.1042 +} 1.1043 + 1.1044 +//============================================================================= 1.1045 + 1.1046 +// Compute padding required for nodes which need alignment. The padding 1.1047 +// is the number of bytes (not instructions) which will be inserted before 1.1048 +// the instruction. The padding must match the size of a NOP instruction. 1.1049 + 1.1050 +int string_indexOf_imm1_charNode::compute_padding(int current_offset) const { 1.1051 + return (3*4-current_offset)&31; 1.1052 +} 1.1053 + 1.1054 +int string_indexOf_imm1Node::compute_padding(int current_offset) const { 1.1055 + return (2*4-current_offset)&31; 1.1056 +} 1.1057 + 1.1058 +int string_indexOf_immNode::compute_padding(int current_offset) const { 1.1059 + return (3*4-current_offset)&31; 1.1060 +} 1.1061 + 1.1062 +int string_indexOfNode::compute_padding(int current_offset) const { 1.1063 + return (1*4-current_offset)&31; 1.1064 +} 1.1065 + 1.1066 +int string_compareNode::compute_padding(int current_offset) const { 1.1067 + return (4*4-current_offset)&31; 1.1068 +} 1.1069 + 1.1070 +int string_equals_immNode::compute_padding(int current_offset) const { 1.1071 + if (opnd_array(3)->constant() < 16) return 0; // Don't insert nops for short version (loop completely unrolled). 1.1072 + return (2*4-current_offset)&31; 1.1073 +} 1.1074 + 1.1075 +int string_equalsNode::compute_padding(int current_offset) const { 1.1076 + return (7*4-current_offset)&31; 1.1077 +} 1.1078 + 1.1079 +int inlineCallClearArrayNode::compute_padding(int current_offset) const { 1.1080 + return (2*4-current_offset)&31; 1.1081 +} 1.1082 + 1.1083 +//============================================================================= 1.1084 + 1.1085 +// Indicate if the safepoint node needs the polling page as an input. 1.1086 +bool SafePointNode::needs_polling_address_input() { 1.1087 + // The address is loaded from thread by a seperate node. 1.1088 + return true; 1.1089 +} 1.1090 + 1.1091 +//============================================================================= 1.1092 + 1.1093 +// Emit an interrupt that is caught by the debugger (for debugging compiler). 1.1094 +void emit_break(CodeBuffer &cbuf) { 1.1095 + MacroAssembler _masm(&cbuf); 1.1096 + __ illtrap(); 1.1097 +} 1.1098 + 1.1099 +#ifndef PRODUCT 1.1100 +void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 1.1101 + st->print("BREAKPOINT"); 1.1102 +} 1.1103 +#endif 1.1104 + 1.1105 +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1106 + emit_break(cbuf); 1.1107 +} 1.1108 + 1.1109 +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { 1.1110 + return MachNode::size(ra_); 1.1111 +} 1.1112 + 1.1113 +//============================================================================= 1.1114 + 1.1115 +void emit_nop(CodeBuffer &cbuf) { 1.1116 + MacroAssembler _masm(&cbuf); 1.1117 + __ nop(); 1.1118 +} 1.1119 + 1.1120 +static inline void emit_long(CodeBuffer &cbuf, int value) { 1.1121 + *((int*)(cbuf.insts_end())) = value; 1.1122 + cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord); 1.1123 +} 1.1124 + 1.1125 +//============================================================================= 1.1126 + 1.1127 +%} // interrupt source 1.1128 + 1.1129 +source_hpp %{ // Header information of the source block. 1.1130 + 1.1131 +//-------------------------------------------------------------- 1.1132 +//---< Used for optimization in Compile::Shorten_branches >--- 1.1133 +//-------------------------------------------------------------- 1.1134 + 1.1135 +const uint trampoline_stub_size = 6 * BytesPerInstWord; 1.1136 + 1.1137 +class CallStubImpl { 1.1138 + 1.1139 + public: 1.1140 + 1.1141 + // Emit call stub, compiled java to interpreter. 1.1142 + static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset); 1.1143 + 1.1144 + // Size of call trampoline stub. 1.1145 + // This doesn't need to be accurate to the byte, but it 1.1146 + // must be larger than or equal to the real size of the stub. 1.1147 + static uint size_call_trampoline() { 1.1148 + return trampoline_stub_size; 1.1149 + } 1.1150 + 1.1151 + // number of relocations needed by a call trampoline stub 1.1152 + static uint reloc_call_trampoline() { 1.1153 + return 5; 1.1154 + } 1.1155 + 1.1156 +}; 1.1157 + 1.1158 +%} // end source_hpp 1.1159 + 1.1160 +source %{ 1.1161 + 1.1162 +// Emit a trampoline stub for a call to a target which is too far away. 1.1163 +// 1.1164 +// code sequences: 1.1165 +// 1.1166 +// call-site: 1.1167 +// branch-and-link to <destination> or <trampoline stub> 1.1168 +// 1.1169 +// Related trampoline stub for this call-site in the stub section: 1.1170 +// load the call target from the constant pool 1.1171 +// branch via CTR (LR/link still points to the call-site above) 1.1172 + 1.1173 +void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) { 1.1174 + // Start the stub. 1.1175 + address stub = __ start_a_stub(Compile::MAX_stubs_size/2); 1.1176 + if (stub == NULL) { 1.1177 + Compile::current()->env()->record_out_of_memory_failure(); 1.1178 + return; 1.1179 + } 1.1180 + 1.1181 + // For java_to_interp stubs we use R11_scratch1 as scratch register 1.1182 + // and in call trampoline stubs we use R12_scratch2. This way we 1.1183 + // can distinguish them (see is_NativeCallTrampolineStub_at()). 1.1184 + Register reg_scratch = R12_scratch2; 1.1185 + 1.1186 + // Create a trampoline stub relocation which relates this trampoline stub 1.1187 + // with the call instruction at insts_call_instruction_offset in the 1.1188 + // instructions code-section. 1.1189 + __ relocate(trampoline_stub_Relocation::spec(__ code()->insts()->start() + insts_call_instruction_offset)); 1.1190 + const int stub_start_offset = __ offset(); 1.1191 + 1.1192 + // Now, create the trampoline stub's code: 1.1193 + // - load the TOC 1.1194 + // - load the call target from the constant pool 1.1195 + // - call 1.1196 + __ calculate_address_from_global_toc(reg_scratch, __ method_toc()); 1.1197 + __ ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, reg_scratch, false); 1.1198 + __ mtctr(reg_scratch); 1.1199 + __ bctr(); 1.1200 + 1.1201 + const address stub_start_addr = __ addr_at(stub_start_offset); 1.1202 + 1.1203 + // FIXME: Assert that the trampoline stub can be identified and patched. 1.1204 + 1.1205 + // Assert that the encoded destination_toc_offset can be identified and that it is correct. 1.1206 + assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(), 1.1207 + "encoded offset into the constant pool must match"); 1.1208 + // Trampoline_stub_size should be good. 1.1209 + assert((uint)(__ offset() - stub_start_offset) <= trampoline_stub_size, "should be good size"); 1.1210 + assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); 1.1211 + 1.1212 + // End the stub. 1.1213 + __ end_a_stub(); 1.1214 +} 1.1215 + 1.1216 +//============================================================================= 1.1217 + 1.1218 +// Emit an inline branch-and-link call and a related trampoline stub. 1.1219 +// 1.1220 +// code sequences: 1.1221 +// 1.1222 +// call-site: 1.1223 +// branch-and-link to <destination> or <trampoline stub> 1.1224 +// 1.1225 +// Related trampoline stub for this call-site in the stub section: 1.1226 +// load the call target from the constant pool 1.1227 +// branch via CTR (LR/link still points to the call-site above) 1.1228 +// 1.1229 + 1.1230 +typedef struct { 1.1231 + int insts_call_instruction_offset; 1.1232 + int ret_addr_offset; 1.1233 +} EmitCallOffsets; 1.1234 + 1.1235 +// Emit a branch-and-link instruction that branches to a trampoline. 1.1236 +// - Remember the offset of the branch-and-link instruction. 1.1237 +// - Add a relocation at the branch-and-link instruction. 1.1238 +// - Emit a branch-and-link. 1.1239 +// - Remember the return pc offset. 1.1240 +EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) { 1.1241 + EmitCallOffsets offsets = { -1, -1 }; 1.1242 + const int start_offset = __ offset(); 1.1243 + offsets.insts_call_instruction_offset = __ offset(); 1.1244 + 1.1245 + // No entry point given, use the current pc. 1.1246 + if (entry_point == NULL) entry_point = __ pc(); 1.1247 + 1.1248 + if (!Compile::current()->in_scratch_emit_size()) { 1.1249 + // Put the entry point as a constant into the constant pool. 1.1250 + const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none); 1.1251 + const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); 1.1252 + 1.1253 + // Emit the trampoline stub which will be related to the branch-and-link below. 1.1254 + CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset); 1.1255 + __ relocate(rtype); 1.1256 + } 1.1257 + 1.1258 + // Note: At this point we do not have the address of the trampoline 1.1259 + // stub, and the entry point might be too far away for bl, so __ pc() 1.1260 + // serves as dummy and the bl will be patched later. 1.1261 + __ bl((address) __ pc()); 1.1262 + 1.1263 + offsets.ret_addr_offset = __ offset() - start_offset; 1.1264 + 1.1265 + return offsets; 1.1266 +} 1.1267 + 1.1268 +//============================================================================= 1.1269 + 1.1270 +// Factory for creating loadConL* nodes for large/small constant pool. 1.1271 + 1.1272 +static inline jlong replicate_immF(float con) { 1.1273 + // Replicate float con 2 times and pack into vector. 1.1274 + int val = *((int*)&con); 1.1275 + jlong lval = val; 1.1276 + lval = (lval << 32) | (lval & 0xFFFFFFFFl); 1.1277 + return lval; 1.1278 +} 1.1279 + 1.1280 +//============================================================================= 1.1281 + 1.1282 +const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask(); 1.1283 +int Compile::ConstantTable::calculate_table_base_offset() const { 1.1284 + return 0; // absolute addressing, no offset 1.1285 +} 1.1286 + 1.1287 +bool MachConstantBaseNode::requires_postalloc_expand() const { return true; } 1.1288 +void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 1.1289 + Compile *C = ra_->C; 1.1290 + 1.1291 + iRegPdstOper *op_dst = new (C) iRegPdstOper(); 1.1292 + MachNode *m1 = new (C) loadToc_hiNode(); 1.1293 + MachNode *m2 = new (C) loadToc_loNode(); 1.1294 + 1.1295 + m1->add_req(NULL); 1.1296 + m2->add_req(NULL, m1); 1.1297 + m1->_opnds[0] = op_dst; 1.1298 + m2->_opnds[0] = op_dst; 1.1299 + m2->_opnds[1] = op_dst; 1.1300 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.1301 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.1302 + nodes->push(m1); 1.1303 + nodes->push(m2); 1.1304 +} 1.1305 + 1.1306 +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 1.1307 + // Is postalloc expanded. 1.1308 + ShouldNotReachHere(); 1.1309 +} 1.1310 + 1.1311 +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 1.1312 + return 0; 1.1313 +} 1.1314 + 1.1315 +#ifndef PRODUCT 1.1316 +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 1.1317 + st->print("-- \t// MachConstantBaseNode (empty encoding)"); 1.1318 +} 1.1319 +#endif 1.1320 + 1.1321 +//============================================================================= 1.1322 + 1.1323 +#ifndef PRODUCT 1.1324 +void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 1.1325 + Compile* C = ra_->C; 1.1326 + const long framesize = C->frame_slots() << LogBytesPerInt; 1.1327 + 1.1328 + st->print("PROLOG\n\t"); 1.1329 + if (C->need_stack_bang(framesize)) { 1.1330 + st->print("stack_overflow_check\n\t"); 1.1331 + } 1.1332 + 1.1333 + if (!false /* TODO: PPC port C->is_frameless_method()*/) { 1.1334 + st->print("save return pc\n\t"); 1.1335 + st->print("push frame %d\n\t", -framesize); 1.1336 + } 1.1337 +} 1.1338 +#endif 1.1339 + 1.1340 +// Macro used instead of the common __ to emulate the pipes of PPC. 1.1341 +// Instead of e.g. __ ld(...) one hase to write ___(ld) ld(...) This enables the 1.1342 +// micro scheduler to cope with "hand written" assembler like in the prolog. Though 1.1343 +// still no scheduling of this code is possible, the micro scheduler is aware of the 1.1344 +// code and can update its internal data. The following mechanism is used to achieve this: 1.1345 +// The micro scheduler calls size() of each compound node during scheduling. size() does a 1.1346 +// dummy emit and only during this dummy emit C->hb_scheduling() is not NULL. 1.1347 +#if 0 // TODO: PPC port 1.1348 +#define ___(op) if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \ 1.1349 + C->hb_scheduling()->_pdScheduling->PdEmulatePipe(ppc64Opcode_##op); \ 1.1350 + _masm. 1.1351 +#define ___stop if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \ 1.1352 + C->hb_scheduling()->_pdScheduling->PdEmulatePipe(archOpcode_none) 1.1353 +#define ___advance if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \ 1.1354 + C->hb_scheduling()->_pdScheduling->advance_offset 1.1355 +#else 1.1356 +#define ___(op) if (UsePower6SchedulerPPC64) \ 1.1357 + Unimplemented(); \ 1.1358 + _masm. 1.1359 +#define ___stop if (UsePower6SchedulerPPC64) \ 1.1360 + Unimplemented() 1.1361 +#define ___advance if (UsePower6SchedulerPPC64) \ 1.1362 + Unimplemented() 1.1363 +#endif 1.1364 + 1.1365 +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1366 + Compile* C = ra_->C; 1.1367 + MacroAssembler _masm(&cbuf); 1.1368 + 1.1369 + const long framesize = C->frame_size_in_bytes(); 1.1370 + assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment"); 1.1371 + 1.1372 + const bool method_is_frameless = false /* TODO: PPC port C->is_frameless_method()*/; 1.1373 + 1.1374 + const Register return_pc = R20; // Must match return_addr() in frame section. 1.1375 + const Register callers_sp = R21; 1.1376 + const Register push_frame_temp = R22; 1.1377 + const Register toc_temp = R23; 1.1378 + assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp); 1.1379 + 1.1380 + if (method_is_frameless) { 1.1381 + // Add nop at beginning of all frameless methods to prevent any 1.1382 + // oop instructions from getting overwritten by make_not_entrant 1.1383 + // (patching attempt would fail). 1.1384 + ___(nop) nop(); 1.1385 + } else { 1.1386 + // Get return pc. 1.1387 + ___(mflr) mflr(return_pc); 1.1388 + } 1.1389 + 1.1390 + // Calls to C2R adapters often do not accept exceptional returns. 1.1391 + // We require that their callers must bang for them. But be 1.1392 + // careful, because some VM calls (such as call site linkage) can 1.1393 + // use several kilobytes of stack. But the stack safety zone should 1.1394 + // account for that. See bugs 4446381, 4468289, 4497237. 1.1395 + 1.1396 + int bangsize = C->bang_size_in_bytes(); 1.1397 + assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect"); 1.1398 + if (C->need_stack_bang(bangsize) && UseStackBanging) { 1.1399 + // Unfortunately we cannot use the function provided in 1.1400 + // assembler.cpp as we have to emulate the pipes. So I had to 1.1401 + // insert the code of generate_stack_overflow_check(), see 1.1402 + // assembler.cpp for some illuminative comments. 1.1403 + const int page_size = os::vm_page_size(); 1.1404 + int bang_end = StackShadowPages * page_size; 1.1405 + 1.1406 + // This is how far the previous frame's stack banging extended. 1.1407 + const int bang_end_safe = bang_end; 1.1408 + 1.1409 + if (bangsize > page_size) { 1.1410 + bang_end += bangsize; 1.1411 + } 1.1412 + 1.1413 + int bang_offset = bang_end_safe; 1.1414 + 1.1415 + while (bang_offset <= bang_end) { 1.1416 + // Need at least one stack bang at end of shadow zone. 1.1417 + 1.1418 + // Again I had to copy code, this time from assembler_ppc64.cpp, 1.1419 + // bang_stack_with_offset - see there for comments. 1.1420 + 1.1421 + // Stack grows down, caller passes positive offset. 1.1422 + assert(bang_offset > 0, "must bang with positive offset"); 1.1423 + 1.1424 + long stdoffset = -bang_offset; 1.1425 + 1.1426 + if (Assembler::is_simm(stdoffset, 16)) { 1.1427 + // Signed 16 bit offset, a simple std is ok. 1.1428 + if (UseLoadInstructionsForStackBangingPPC64) { 1.1429 + ___(ld) ld(R0, (int)(signed short)stdoffset, R1_SP); 1.1430 + } else { 1.1431 + ___(std) std(R0, (int)(signed short)stdoffset, R1_SP); 1.1432 + } 1.1433 + } else if (Assembler::is_simm(stdoffset, 31)) { 1.1434 + // Use largeoffset calculations for addis & ld/std. 1.1435 + const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset); 1.1436 + const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset); 1.1437 + 1.1438 + Register tmp = R11; 1.1439 + ___(addis) addis(tmp, R1_SP, hi); 1.1440 + if (UseLoadInstructionsForStackBangingPPC64) { 1.1441 + ___(ld) ld(R0, lo, tmp); 1.1442 + } else { 1.1443 + ___(std) std(R0, lo, tmp); 1.1444 + } 1.1445 + } else { 1.1446 + ShouldNotReachHere(); 1.1447 + } 1.1448 + 1.1449 + bang_offset += page_size; 1.1450 + } 1.1451 + // R11 trashed 1.1452 + } // C->need_stack_bang(framesize) && UseStackBanging 1.1453 + 1.1454 + unsigned int bytes = (unsigned int)framesize; 1.1455 + long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes); 1.1456 + ciMethod *currMethod = C->method(); 1.1457 + 1.1458 + // Optimized version for most common case. 1.1459 + if (UsePower6SchedulerPPC64 && 1.1460 + !method_is_frameless && Assembler::is_simm((int)(-offset), 16) && 1.1461 + !(false /* ConstantsALot TODO: PPC port*/)) { 1.1462 + ___(or) mr(callers_sp, R1_SP); 1.1463 + ___(std) std(return_pc, _abi(lr), R1_SP); 1.1464 + ___(stdu) stdu(R1_SP, -offset, R1_SP); 1.1465 + return; 1.1466 + } 1.1467 + 1.1468 + if (!method_is_frameless) { 1.1469 + // Get callers sp. 1.1470 + ___(or) mr(callers_sp, R1_SP); 1.1471 + 1.1472 + // Push method's frame, modifies SP. 1.1473 + assert(Assembler::is_uimm(framesize, 32U), "wrong type"); 1.1474 + // The ABI is already accounted for in 'framesize' via the 1.1475 + // 'out_preserve' area. 1.1476 + Register tmp = push_frame_temp; 1.1477 + // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp). 1.1478 + if (Assembler::is_simm(-offset, 16)) { 1.1479 + ___(stdu) stdu(R1_SP, -offset, R1_SP); 1.1480 + } else { 1.1481 + long x = -offset; 1.1482 + // Had to insert load_const(tmp, -offset). 1.1483 + ___(addis) lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16))); 1.1484 + ___(ori) ori( tmp, tmp, ((x >> 32) & 0x0000ffff)); 1.1485 + ___(rldicr) sldi(tmp, tmp, 32); 1.1486 + ___(oris) oris(tmp, tmp, (x & 0xffff0000) >> 16); 1.1487 + ___(ori) ori( tmp, tmp, (x & 0x0000ffff)); 1.1488 + 1.1489 + ___(stdux) stdux(R1_SP, R1_SP, tmp); 1.1490 + } 1.1491 + } 1.1492 +#if 0 // TODO: PPC port 1.1493 + // For testing large constant pools, emit a lot of constants to constant pool. 1.1494 + // "Randomize" const_size. 1.1495 + if (ConstantsALot) { 1.1496 + const int num_consts = const_size(); 1.1497 + for (int i = 0; i < num_consts; i++) { 1.1498 + __ long_constant(0xB0B5B00BBABE); 1.1499 + } 1.1500 + } 1.1501 +#endif 1.1502 + if (!method_is_frameless) { 1.1503 + // Save return pc. 1.1504 + ___(std) std(return_pc, _abi(lr), callers_sp); 1.1505 + } 1.1506 +} 1.1507 +#undef ___ 1.1508 +#undef ___stop 1.1509 +#undef ___advance 1.1510 + 1.1511 +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 1.1512 + // Variable size. determine dynamically. 1.1513 + return MachNode::size(ra_); 1.1514 +} 1.1515 + 1.1516 +int MachPrologNode::reloc() const { 1.1517 + // Return number of relocatable values contained in this instruction. 1.1518 + return 1; // 1 reloc entry for load_const(toc). 1.1519 +} 1.1520 + 1.1521 +//============================================================================= 1.1522 + 1.1523 +#ifndef PRODUCT 1.1524 +void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 1.1525 + Compile* C = ra_->C; 1.1526 + 1.1527 + st->print("EPILOG\n\t"); 1.1528 + st->print("restore return pc\n\t"); 1.1529 + st->print("pop frame\n\t"); 1.1530 + 1.1531 + if (do_polling() && C->is_method_compilation()) { 1.1532 + st->print("touch polling page\n\t"); 1.1533 + } 1.1534 +} 1.1535 +#endif 1.1536 + 1.1537 +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1538 + Compile* C = ra_->C; 1.1539 + MacroAssembler _masm(&cbuf); 1.1540 + 1.1541 + const long framesize = ((long)C->frame_slots()) << LogBytesPerInt; 1.1542 + assert(framesize >= 0, "negative frame-size?"); 1.1543 + 1.1544 + const bool method_needs_polling = do_polling() && C->is_method_compilation(); 1.1545 + const bool method_is_frameless = false /* TODO: PPC port C->is_frameless_method()*/; 1.1546 + const Register return_pc = R11; 1.1547 + const Register polling_page = R12; 1.1548 + 1.1549 + if (!method_is_frameless) { 1.1550 + // Restore return pc relative to callers' sp. 1.1551 + __ ld(return_pc, ((int)framesize) + _abi(lr), R1_SP); 1.1552 + } 1.1553 + 1.1554 + if (method_needs_polling) { 1.1555 + if (LoadPollAddressFromThread) { 1.1556 + // TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread); 1.1557 + Unimplemented(); 1.1558 + } else { 1.1559 + __ load_const_optimized(polling_page, (long)(address) os::get_polling_page()); // TODO: PPC port: get_standard_polling_page() 1.1560 + } 1.1561 + } 1.1562 + 1.1563 + if (!method_is_frameless) { 1.1564 + // Move return pc to LR. 1.1565 + __ mtlr(return_pc); 1.1566 + // Pop frame (fixed frame-size). 1.1567 + __ addi(R1_SP, R1_SP, (int)framesize); 1.1568 + } 1.1569 + 1.1570 + if (method_needs_polling) { 1.1571 + // We need to mark the code position where the load from the safepoint 1.1572 + // polling page was emitted as relocInfo::poll_return_type here. 1.1573 + __ relocate(relocInfo::poll_return_type); 1.1574 + __ load_from_polling_page(polling_page); 1.1575 + } 1.1576 +} 1.1577 + 1.1578 +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 1.1579 + // Variable size. Determine dynamically. 1.1580 + return MachNode::size(ra_); 1.1581 +} 1.1582 + 1.1583 +int MachEpilogNode::reloc() const { 1.1584 + // Return number of relocatable values contained in this instruction. 1.1585 + return 1; // 1 for load_from_polling_page. 1.1586 +} 1.1587 + 1.1588 +const Pipeline * MachEpilogNode::pipeline() const { 1.1589 + return MachNode::pipeline_class(); 1.1590 +} 1.1591 + 1.1592 +// This method seems to be obsolete. It is declared in machnode.hpp 1.1593 +// and defined in all *.ad files, but it is never called. Should we 1.1594 +// get rid of it? 1.1595 +int MachEpilogNode::safepoint_offset() const { 1.1596 + assert(do_polling(), "no return for this epilog node"); 1.1597 + return 0; 1.1598 +} 1.1599 + 1.1600 +#if 0 // TODO: PPC port 1.1601 +void MachLoadPollAddrLateNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 1.1602 + MacroAssembler _masm(&cbuf); 1.1603 + if (LoadPollAddressFromThread) { 1.1604 + _masm.ld(R11, in_bytes(JavaThread::poll_address_offset()), R16_thread); 1.1605 + } else { 1.1606 + _masm.nop(); 1.1607 + } 1.1608 +} 1.1609 + 1.1610 +uint MachLoadPollAddrLateNode::size(PhaseRegAlloc* ra_) const { 1.1611 + if (LoadPollAddressFromThread) { 1.1612 + return 4; 1.1613 + } else { 1.1614 + return 4; 1.1615 + } 1.1616 +} 1.1617 + 1.1618 +#ifndef PRODUCT 1.1619 +void MachLoadPollAddrLateNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 1.1620 + st->print_cr(" LD R11, PollAddressOffset, R16_thread \t// LoadPollAddressFromThread"); 1.1621 +} 1.1622 +#endif 1.1623 + 1.1624 +const RegMask &MachLoadPollAddrLateNode::out_RegMask() const { 1.1625 + return RSCRATCH1_BITS64_REG_mask(); 1.1626 +} 1.1627 +#endif // PPC port 1.1628 + 1.1629 +// ============================================================================= 1.1630 + 1.1631 +// Figure out which register class each belongs in: rc_int, rc_float or 1.1632 +// rc_stack. 1.1633 +enum RC { rc_bad, rc_int, rc_float, rc_stack }; 1.1634 + 1.1635 +static enum RC rc_class(OptoReg::Name reg) { 1.1636 + // Return the register class for the given register. The given register 1.1637 + // reg is a <register>_num value, which is an index into the MachRegisterNumbers 1.1638 + // enumeration in adGlobals_ppc64.hpp. 1.1639 + 1.1640 + if (reg == OptoReg::Bad) return rc_bad; 1.1641 + 1.1642 + // We have 64 integer register halves, starting at index 0. 1.1643 + if (reg < 64) return rc_int; 1.1644 + 1.1645 + // We have 64 floating-point register halves, starting at index 64. 1.1646 + if (reg < 64+64) return rc_float; 1.1647 + 1.1648 + // Between float regs & stack are the flags regs. 1.1649 + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); 1.1650 + 1.1651 + return rc_stack; 1.1652 +} 1.1653 + 1.1654 +static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset, 1.1655 + bool do_print, Compile* C, outputStream *st) { 1.1656 + 1.1657 + assert(opcode == Assembler::LD_OPCODE || 1.1658 + opcode == Assembler::STD_OPCODE || 1.1659 + opcode == Assembler::LWZ_OPCODE || 1.1660 + opcode == Assembler::STW_OPCODE || 1.1661 + opcode == Assembler::LFD_OPCODE || 1.1662 + opcode == Assembler::STFD_OPCODE || 1.1663 + opcode == Assembler::LFS_OPCODE || 1.1664 + opcode == Assembler::STFS_OPCODE, 1.1665 + "opcode not supported"); 1.1666 + 1.1667 + if (cbuf) { 1.1668 + int d = 1.1669 + (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ? 1.1670 + Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/) 1.1671 + : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build. 1.1672 + emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP)); 1.1673 + } 1.1674 +#ifndef PRODUCT 1.1675 + else if (do_print) { 1.1676 + st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy", 1.1677 + op_str, 1.1678 + Matcher::regName[reg], 1.1679 + offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); 1.1680 + } 1.1681 +#endif 1.1682 + return 4; // size 1.1683 +} 1.1684 + 1.1685 +uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { 1.1686 + Compile* C = ra_->C; 1.1687 + 1.1688 + // Get registers to move. 1.1689 + OptoReg::Name src_hi = ra_->get_reg_second(in(1)); 1.1690 + OptoReg::Name src_lo = ra_->get_reg_first(in(1)); 1.1691 + OptoReg::Name dst_hi = ra_->get_reg_second(this); 1.1692 + OptoReg::Name dst_lo = ra_->get_reg_first(this); 1.1693 + 1.1694 + enum RC src_hi_rc = rc_class(src_hi); 1.1695 + enum RC src_lo_rc = rc_class(src_lo); 1.1696 + enum RC dst_hi_rc = rc_class(dst_hi); 1.1697 + enum RC dst_lo_rc = rc_class(dst_lo); 1.1698 + 1.1699 + assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); 1.1700 + if (src_hi != OptoReg::Bad) 1.1701 + assert((src_lo&1)==0 && src_lo+1==src_hi && 1.1702 + (dst_lo&1)==0 && dst_lo+1==dst_hi, 1.1703 + "expected aligned-adjacent pairs"); 1.1704 + // Generate spill code! 1.1705 + int size = 0; 1.1706 + 1.1707 + if (src_lo == dst_lo && src_hi == dst_hi) 1.1708 + return size; // Self copy, no move. 1.1709 + 1.1710 + // -------------------------------------- 1.1711 + // Memory->Memory Spill. Use R0 to hold the value. 1.1712 + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { 1.1713 + int src_offset = ra_->reg2offset(src_lo); 1.1714 + int dst_offset = ra_->reg2offset(dst_lo); 1.1715 + if (src_hi != OptoReg::Bad) { 1.1716 + assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack, 1.1717 + "expected same type of move for high parts"); 1.1718 + size += ld_st_helper(cbuf, "LD ", Assembler::LD_OPCODE, R0_num, src_offset, !do_size, C, st); 1.1719 + if (!cbuf && !do_size) st->print("\n\t"); 1.1720 + size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st); 1.1721 + } else { 1.1722 + size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st); 1.1723 + if (!cbuf && !do_size) st->print("\n\t"); 1.1724 + size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st); 1.1725 + } 1.1726 + return size; 1.1727 + } 1.1728 + 1.1729 + // -------------------------------------- 1.1730 + // Check for float->int copy; requires a trip through memory. 1.1731 + if (src_lo_rc == rc_float && dst_lo_rc == rc_int) { 1.1732 + Unimplemented(); 1.1733 + } 1.1734 + 1.1735 + // -------------------------------------- 1.1736 + // Check for integer reg-reg copy. 1.1737 + if (src_lo_rc == rc_int && dst_lo_rc == rc_int) { 1.1738 + Register Rsrc = as_Register(Matcher::_regEncode[src_lo]); 1.1739 + Register Rdst = as_Register(Matcher::_regEncode[dst_lo]); 1.1740 + size = (Rsrc != Rdst) ? 4 : 0; 1.1741 + 1.1742 + if (cbuf) { 1.1743 + MacroAssembler _masm(cbuf); 1.1744 + if (size) { 1.1745 + __ mr(Rdst, Rsrc); 1.1746 + } 1.1747 + } 1.1748 +#ifndef PRODUCT 1.1749 + else if (!do_size) { 1.1750 + if (size) { 1.1751 + st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 1.1752 + } else { 1.1753 + st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 1.1754 + } 1.1755 + } 1.1756 +#endif 1.1757 + return size; 1.1758 + } 1.1759 + 1.1760 + // Check for integer store. 1.1761 + if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) { 1.1762 + int dst_offset = ra_->reg2offset(dst_lo); 1.1763 + if (src_hi != OptoReg::Bad) { 1.1764 + assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack, 1.1765 + "expected same type of move for high parts"); 1.1766 + size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st); 1.1767 + } else { 1.1768 + size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st); 1.1769 + } 1.1770 + return size; 1.1771 + } 1.1772 + 1.1773 + // Check for integer load. 1.1774 + if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) { 1.1775 + int src_offset = ra_->reg2offset(src_lo); 1.1776 + if (src_hi != OptoReg::Bad) { 1.1777 + assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack, 1.1778 + "expected same type of move for high parts"); 1.1779 + size += ld_st_helper(cbuf, "LD ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st); 1.1780 + } else { 1.1781 + size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st); 1.1782 + } 1.1783 + return size; 1.1784 + } 1.1785 + 1.1786 + // Check for float reg-reg copy. 1.1787 + if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { 1.1788 + if (cbuf) { 1.1789 + MacroAssembler _masm(cbuf); 1.1790 + FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]); 1.1791 + FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]); 1.1792 + __ fmr(Rdst, Rsrc); 1.1793 + } 1.1794 +#ifndef PRODUCT 1.1795 + else if (!do_size) { 1.1796 + st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 1.1797 + } 1.1798 +#endif 1.1799 + return 4; 1.1800 + } 1.1801 + 1.1802 + // Check for float store. 1.1803 + if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { 1.1804 + int dst_offset = ra_->reg2offset(dst_lo); 1.1805 + if (src_hi != OptoReg::Bad) { 1.1806 + assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack, 1.1807 + "expected same type of move for high parts"); 1.1808 + size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st); 1.1809 + } else { 1.1810 + size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st); 1.1811 + } 1.1812 + return size; 1.1813 + } 1.1814 + 1.1815 + // Check for float load. 1.1816 + if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) { 1.1817 + int src_offset = ra_->reg2offset(src_lo); 1.1818 + if (src_hi != OptoReg::Bad) { 1.1819 + assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack, 1.1820 + "expected same type of move for high parts"); 1.1821 + size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st); 1.1822 + } else { 1.1823 + size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st); 1.1824 + } 1.1825 + return size; 1.1826 + } 1.1827 + 1.1828 + // -------------------------------------------------------------------- 1.1829 + // Check for hi bits still needing moving. Only happens for misaligned 1.1830 + // arguments to native calls. 1.1831 + if (src_hi == dst_hi) 1.1832 + return size; // Self copy; no move. 1.1833 + 1.1834 + assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad"); 1.1835 + ShouldNotReachHere(); // Unimplemented 1.1836 + return 0; 1.1837 +} 1.1838 + 1.1839 +#ifndef PRODUCT 1.1840 +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 1.1841 + if (!ra_) 1.1842 + st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); 1.1843 + else 1.1844 + implementation(NULL, ra_, false, st); 1.1845 +} 1.1846 +#endif 1.1847 + 1.1848 +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1849 + implementation(&cbuf, ra_, false, NULL); 1.1850 +} 1.1851 + 1.1852 +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1.1853 + return implementation(NULL, ra_, true, NULL); 1.1854 +} 1.1855 + 1.1856 +#if 0 // TODO: PPC port 1.1857 +ArchOpcode MachSpillCopyNode_archOpcode(MachSpillCopyNode *n, PhaseRegAlloc *ra_) { 1.1858 +#ifndef PRODUCT 1.1859 + if (ra_->node_regs_max_index() == 0) return archOpcode_undefined; 1.1860 +#endif 1.1861 + assert(ra_->node_regs_max_index() != 0, ""); 1.1862 + 1.1863 + // Get registers to move. 1.1864 + OptoReg::Name src_hi = ra_->get_reg_second(n->in(1)); 1.1865 + OptoReg::Name src_lo = ra_->get_reg_first(n->in(1)); 1.1866 + OptoReg::Name dst_hi = ra_->get_reg_second(n); 1.1867 + OptoReg::Name dst_lo = ra_->get_reg_first(n); 1.1868 + 1.1869 + enum RC src_lo_rc = rc_class(src_lo); 1.1870 + enum RC dst_lo_rc = rc_class(dst_lo); 1.1871 + 1.1872 + if (src_lo == dst_lo && src_hi == dst_hi) 1.1873 + return ppc64Opcode_none; // Self copy, no move. 1.1874 + 1.1875 + // -------------------------------------- 1.1876 + // Memory->Memory Spill. Use R0 to hold the value. 1.1877 + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { 1.1878 + return ppc64Opcode_compound; 1.1879 + } 1.1880 + 1.1881 + // -------------------------------------- 1.1882 + // Check for float->int copy; requires a trip through memory. 1.1883 + if (src_lo_rc == rc_float && dst_lo_rc == rc_int) { 1.1884 + Unimplemented(); 1.1885 + } 1.1886 + 1.1887 + // -------------------------------------- 1.1888 + // Check for integer reg-reg copy. 1.1889 + if (src_lo_rc == rc_int && dst_lo_rc == rc_int) { 1.1890 + Register Rsrc = as_Register(Matcher::_regEncode[src_lo]); 1.1891 + Register Rdst = as_Register(Matcher::_regEncode[dst_lo]); 1.1892 + if (Rsrc == Rdst) { 1.1893 + return ppc64Opcode_none; 1.1894 + } else { 1.1895 + return ppc64Opcode_or; 1.1896 + } 1.1897 + } 1.1898 + 1.1899 + // Check for integer store. 1.1900 + if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) { 1.1901 + if (src_hi != OptoReg::Bad) { 1.1902 + return ppc64Opcode_std; 1.1903 + } else { 1.1904 + return ppc64Opcode_stw; 1.1905 + } 1.1906 + } 1.1907 + 1.1908 + // Check for integer load. 1.1909 + if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) { 1.1910 + if (src_hi != OptoReg::Bad) { 1.1911 + return ppc64Opcode_ld; 1.1912 + } else { 1.1913 + return ppc64Opcode_lwz; 1.1914 + } 1.1915 + } 1.1916 + 1.1917 + // Check for float reg-reg copy. 1.1918 + if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { 1.1919 + return ppc64Opcode_fmr; 1.1920 + } 1.1921 + 1.1922 + // Check for float store. 1.1923 + if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { 1.1924 + if (src_hi != OptoReg::Bad) { 1.1925 + return ppc64Opcode_stfd; 1.1926 + } else { 1.1927 + return ppc64Opcode_stfs; 1.1928 + } 1.1929 + } 1.1930 + 1.1931 + // Check for float load. 1.1932 + if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) { 1.1933 + if (src_hi != OptoReg::Bad) { 1.1934 + return ppc64Opcode_lfd; 1.1935 + } else { 1.1936 + return ppc64Opcode_lfs; 1.1937 + } 1.1938 + } 1.1939 + 1.1940 + // -------------------------------------------------------------------- 1.1941 + // Check for hi bits still needing moving. Only happens for misaligned 1.1942 + // arguments to native calls. 1.1943 + if (src_hi == dst_hi) 1.1944 + return ppc64Opcode_none; // Self copy; no move. 1.1945 + 1.1946 + ShouldNotReachHere(); 1.1947 + return ppc64Opcode_undefined; 1.1948 +} 1.1949 +#endif // PPC port 1.1950 + 1.1951 +#ifndef PRODUCT 1.1952 +void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 1.1953 + st->print("NOP \t// %d nops to pad for loops.", _count); 1.1954 +} 1.1955 +#endif 1.1956 + 1.1957 +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const { 1.1958 + MacroAssembler _masm(&cbuf); 1.1959 + // _count contains the number of nops needed for padding. 1.1960 + for (int i = 0; i < _count; i++) { 1.1961 + __ nop(); 1.1962 + } 1.1963 +} 1.1964 + 1.1965 +uint MachNopNode::size(PhaseRegAlloc *ra_) const { 1.1966 + return _count * 4; 1.1967 +} 1.1968 + 1.1969 +#ifndef PRODUCT 1.1970 +void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 1.1971 + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1.1972 + int reg = ra_->get_reg_first(this); 1.1973 + st->print("ADDI %s, SP, %d \t// box node", Matcher::regName[reg], offset); 1.1974 +} 1.1975 +#endif 1.1976 + 1.1977 +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.1978 + MacroAssembler _masm(&cbuf); 1.1979 + 1.1980 + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1.1981 + int reg = ra_->get_encode(this); 1.1982 + 1.1983 + if (Assembler::is_simm(offset, 16)) { 1.1984 + __ addi(as_Register(reg), R1, offset); 1.1985 + } else { 1.1986 + ShouldNotReachHere(); 1.1987 + } 1.1988 +} 1.1989 + 1.1990 +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1.1991 + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). 1.1992 + return 4; 1.1993 +} 1.1994 + 1.1995 +#ifndef PRODUCT 1.1996 +void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 1.1997 + st->print_cr("---- MachUEPNode ----"); 1.1998 + st->print_cr("..."); 1.1999 +} 1.2000 +#endif 1.2001 + 1.2002 +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1.2003 + // This is the unverified entry point. 1.2004 + MacroAssembler _masm(&cbuf); 1.2005 + 1.2006 + // Inline_cache contains a klass. 1.2007 + Register ic_klass = as_Register(Matcher::inline_cache_reg_encode()); 1.2008 + Register receiver_klass = R0; // tmp 1.2009 + 1.2010 + assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1); 1.2011 + assert(R11_scratch1 == R11, "need prologue scratch register"); 1.2012 + 1.2013 + // Check for NULL argument if we don't have implicit null checks. 1.2014 + if (!ImplicitNullChecks || !os::zero_page_read_protected()) { 1.2015 + if (TrapBasedNullChecks) { 1.2016 + __ trap_null_check(R3_ARG1); 1.2017 + } else { 1.2018 + Label valid; 1.2019 + __ cmpdi(CCR0, R3_ARG1, 0); 1.2020 + __ bne_predict_taken(CCR0, valid); 1.2021 + // We have a null argument, branch to ic_miss_stub. 1.2022 + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), 1.2023 + relocInfo::runtime_call_type); 1.2024 + __ bind(valid); 1.2025 + } 1.2026 + } 1.2027 + // Assume argument is not NULL, load klass from receiver. 1.2028 + __ load_klass(receiver_klass, R3_ARG1); 1.2029 + 1.2030 + if (TrapBasedICMissChecks) { 1.2031 + __ trap_ic_miss_check(receiver_klass, ic_klass); 1.2032 + } else { 1.2033 + Label valid; 1.2034 + __ cmpd(CCR0, receiver_klass, ic_klass); 1.2035 + __ beq_predict_taken(CCR0, valid); 1.2036 + // We have an unexpected klass, branch to ic_miss_stub. 1.2037 + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), 1.2038 + relocInfo::runtime_call_type); 1.2039 + __ bind(valid); 1.2040 + } 1.2041 + 1.2042 + // Argument is valid and klass is as expected, continue. 1.2043 +} 1.2044 + 1.2045 +#if 0 // TODO: PPC port 1.2046 +// Optimize UEP code on z (save a load_const() call in main path). 1.2047 +int MachUEPNode::ep_offset() { 1.2048 + return 0; 1.2049 +} 1.2050 +#endif 1.2051 + 1.2052 +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1.2053 + // Variable size. Determine dynamically. 1.2054 + return MachNode::size(ra_); 1.2055 +} 1.2056 + 1.2057 +//============================================================================= 1.2058 + 1.2059 +%} // interrupt source 1.2060 + 1.2061 +source_hpp %{ // Header information of the source block. 1.2062 + 1.2063 +class HandlerImpl { 1.2064 + 1.2065 + public: 1.2066 + 1.2067 + static int emit_exception_handler(CodeBuffer &cbuf); 1.2068 + static int emit_deopt_handler(CodeBuffer& cbuf); 1.2069 + 1.2070 + static uint size_exception_handler() { 1.2071 + // The exception_handler is a b64_patchable. 1.2072 + return MacroAssembler::b64_patchable_size; 1.2073 + } 1.2074 + 1.2075 + static uint size_deopt_handler() { 1.2076 + // The deopt_handler is a bl64_patchable. 1.2077 + return MacroAssembler::bl64_patchable_size; 1.2078 + } 1.2079 + 1.2080 +}; 1.2081 + 1.2082 +%} // end source_hpp 1.2083 + 1.2084 +source %{ 1.2085 + 1.2086 +int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) { 1.2087 + MacroAssembler _masm(&cbuf); 1.2088 + 1.2089 + address base = __ start_a_stub(size_exception_handler()); 1.2090 + if (base == NULL) return 0; // CodeBuffer::expand failed 1.2091 + 1.2092 + int offset = __ offset(); 1.2093 + __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(), 1.2094 + relocInfo::runtime_call_type); 1.2095 + assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size"); 1.2096 + __ end_a_stub(); 1.2097 + 1.2098 + return offset; 1.2099 +} 1.2100 + 1.2101 +// The deopt_handler is like the exception handler, but it calls to 1.2102 +// the deoptimization blob instead of jumping to the exception blob. 1.2103 +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1.2104 + MacroAssembler _masm(&cbuf); 1.2105 + 1.2106 + address base = __ start_a_stub(size_deopt_handler()); 1.2107 + if (base == NULL) return 0; // CodeBuffer::expand failed 1.2108 + 1.2109 + int offset = __ offset(); 1.2110 + __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(), 1.2111 + relocInfo::runtime_call_type); 1.2112 + assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size"); 1.2113 + __ end_a_stub(); 1.2114 + 1.2115 + return offset; 1.2116 +} 1.2117 + 1.2118 +//============================================================================= 1.2119 + 1.2120 +// Use a frame slots bias for frameless methods if accessing the stack. 1.2121 +static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) { 1.2122 + if (as_Register(reg_enc) == R1_SP) { 1.2123 + return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes(); 1.2124 + } 1.2125 + return 0; 1.2126 +} 1.2127 + 1.2128 +const bool Matcher::match_rule_supported(int opcode) { 1.2129 + if (!has_match_rule(opcode)) 1.2130 + return false; 1.2131 + 1.2132 + switch (opcode) { 1.2133 + case Op_SqrtD: 1.2134 + return VM_Version::has_fsqrt(); 1.2135 + case Op_CountLeadingZerosI: 1.2136 + case Op_CountLeadingZerosL: 1.2137 + case Op_CountTrailingZerosI: 1.2138 + case Op_CountTrailingZerosL: 1.2139 + if (!UseCountLeadingZerosInstructionsPPC64) 1.2140 + return false; 1.2141 + break; 1.2142 + 1.2143 + case Op_PopCountI: 1.2144 + case Op_PopCountL: 1.2145 + return (UsePopCountInstruction && VM_Version::has_popcntw()); 1.2146 + 1.2147 + case Op_StrComp: 1.2148 + return SpecialStringCompareTo; 1.2149 + case Op_StrEquals: 1.2150 + return SpecialStringEquals; 1.2151 + case Op_StrIndexOf: 1.2152 + return SpecialStringIndexOf; 1.2153 + } 1.2154 + 1.2155 + return true; // Per default match rules are supported. 1.2156 +} 1.2157 + 1.2158 +int Matcher::regnum_to_fpu_offset(int regnum) { 1.2159 + // No user for this method? 1.2160 + Unimplemented(); 1.2161 + return 999; 1.2162 +} 1.2163 + 1.2164 +const bool Matcher::convL2FSupported(void) { 1.2165 + // fcfids can do the conversion (>= Power7). 1.2166 + // fcfid + frsp showed rounding problem when result should be 0x3f800001. 1.2167 + return VM_Version::has_fcfids(); // False means that conversion is done by runtime call. 1.2168 +} 1.2169 + 1.2170 +// Vector width in bytes. 1.2171 +const int Matcher::vector_width_in_bytes(BasicType bt) { 1.2172 + assert(MaxVectorSize == 8, ""); 1.2173 + return 8; 1.2174 +} 1.2175 + 1.2176 +// Vector ideal reg. 1.2177 +const int Matcher::vector_ideal_reg(int size) { 1.2178 + assert(MaxVectorSize == 8 && size == 8, ""); 1.2179 + return Op_RegL; 1.2180 +} 1.2181 + 1.2182 +const int Matcher::vector_shift_count_ideal_reg(int size) { 1.2183 + fatal("vector shift is not supported"); 1.2184 + return Node::NotAMachineReg; 1.2185 +} 1.2186 + 1.2187 +// Limits on vector size (number of elements) loaded into vector. 1.2188 +const int Matcher::max_vector_size(const BasicType bt) { 1.2189 + assert(is_java_primitive(bt), "only primitive type vectors"); 1.2190 + return vector_width_in_bytes(bt)/type2aelembytes(bt); 1.2191 +} 1.2192 + 1.2193 +const int Matcher::min_vector_size(const BasicType bt) { 1.2194 + return max_vector_size(bt); // Same as max. 1.2195 +} 1.2196 + 1.2197 +// PPC doesn't support misaligned vectors store/load. 1.2198 +const bool Matcher::misaligned_vectors_ok() { 1.2199 + return false; 1.2200 +} 1.2201 + 1.2202 +// PPC AES support not yet implemented 1.2203 +const bool Matcher::pass_original_key_for_aes() { 1.2204 + return false; 1.2205 +} 1.2206 + 1.2207 +// RETURNS: whether this branch offset is short enough that a short 1.2208 +// branch can be used. 1.2209 +// 1.2210 +// If the platform does not provide any short branch variants, then 1.2211 +// this method should return `false' for offset 0. 1.2212 +// 1.2213 +// `Compile::Fill_buffer' will decide on basis of this information 1.2214 +// whether to do the pass `Compile::Shorten_branches' at all. 1.2215 +// 1.2216 +// And `Compile::Shorten_branches' will decide on basis of this 1.2217 +// information whether to replace particular branch sites by short 1.2218 +// ones. 1.2219 +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1.2220 + // Is the offset within the range of a ppc64 pc relative branch? 1.2221 + bool b; 1.2222 + 1.2223 + const int safety_zone = 3 * BytesPerInstWord; 1.2224 + b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone), 1.2225 + 29 - 16 + 1 + 2); 1.2226 + return b; 1.2227 +} 1.2228 + 1.2229 +const bool Matcher::isSimpleConstant64(jlong value) { 1.2230 + // Probably always true, even if a temp register is required. 1.2231 + return true; 1.2232 +} 1.2233 +/* TODO: PPC port 1.2234 +// Make a new machine dependent decode node (with its operands). 1.2235 +MachTypeNode *Matcher::make_decode_node(Compile *C) { 1.2236 + assert(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0, 1.2237 + "This method is only implemented for unscaled cOops mode so far"); 1.2238 + MachTypeNode *decode = new (C) decodeN_unscaledNode(); 1.2239 + decode->set_opnd_array(0, new (C) iRegPdstOper()); 1.2240 + decode->set_opnd_array(1, new (C) iRegNsrcOper()); 1.2241 + return decode; 1.2242 +} 1.2243 +*/ 1.2244 +// Threshold size for cleararray. 1.2245 +const int Matcher::init_array_short_size = 8 * BytesPerLong; 1.2246 + 1.2247 +// false => size gets scaled to BytesPerLong, ok. 1.2248 +const bool Matcher::init_array_count_is_in_bytes = false; 1.2249 + 1.2250 +// Use conditional move (CMOVL) on Power7. 1.2251 +const int Matcher::long_cmove_cost() { return 0; } // this only makes long cmoves more expensive than int cmoves 1.2252 + 1.2253 +// Suppress CMOVF. Conditional move available (sort of) on PPC64 only from P7 onwards. Not exploited yet. 1.2254 +// fsel doesn't accept a condition register as input, so this would be slightly different. 1.2255 +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } 1.2256 + 1.2257 +// Power6 requires postalloc expand (see block.cpp for description of postalloc expand). 1.2258 +const bool Matcher::require_postalloc_expand = true; 1.2259 + 1.2260 +// Should the Matcher clone shifts on addressing modes, expecting them to 1.2261 +// be subsumed into complex addressing expressions or compute them into 1.2262 +// registers? True for Intel but false for most RISCs. 1.2263 +const bool Matcher::clone_shift_expressions = false; 1.2264 + 1.2265 +// Do we need to mask the count passed to shift instructions or does 1.2266 +// the cpu only look at the lower 5/6 bits anyway? 1.2267 +// Off, as masks are generated in expand rules where required. 1.2268 +// Constant shift counts are handled in Ideal phase. 1.2269 +const bool Matcher::need_masked_shift_count = false; 1.2270 + 1.2271 +// This affects two different things: 1.2272 +// - how Decode nodes are matched 1.2273 +// - how ImplicitNullCheck opportunities are recognized 1.2274 +// If true, the matcher will try to remove all Decodes and match them 1.2275 +// (as operands) into nodes. NullChecks are not prepared to deal with 1.2276 +// Decodes by final_graph_reshaping(). 1.2277 +// If false, final_graph_reshaping() forces the decode behind the Cmp 1.2278 +// for a NullCheck. The matcher matches the Decode node into a register. 1.2279 +// Implicit_null_check optimization moves the Decode along with the 1.2280 +// memory operation back up before the NullCheck. 1.2281 +bool Matcher::narrow_oop_use_complex_address() { 1.2282 + // TODO: PPC port if (MatchDecodeNodes) return true; 1.2283 + return false; 1.2284 +} 1.2285 + 1.2286 +bool Matcher::narrow_klass_use_complex_address() { 1.2287 + NOT_LP64(ShouldNotCallThis()); 1.2288 + assert(UseCompressedClassPointers, "only for compressed klass code"); 1.2289 + // TODO: PPC port if (MatchDecodeNodes) return true; 1.2290 + return false; 1.2291 +} 1.2292 + 1.2293 +// Is it better to copy float constants, or load them directly from memory? 1.2294 +// Intel can load a float constant from a direct address, requiring no 1.2295 +// extra registers. Most RISCs will have to materialize an address into a 1.2296 +// register first, so they would do better to copy the constant from stack. 1.2297 +const bool Matcher::rematerialize_float_constants = false; 1.2298 + 1.2299 +// If CPU can load and store mis-aligned doubles directly then no fixup is 1.2300 +// needed. Else we split the double into 2 integer pieces and move it 1.2301 +// piece-by-piece. Only happens when passing doubles into C code as the 1.2302 +// Java calling convention forces doubles to be aligned. 1.2303 +const bool Matcher::misaligned_doubles_ok = true; 1.2304 + 1.2305 +void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1.2306 + Unimplemented(); 1.2307 +} 1.2308 + 1.2309 +// Advertise here if the CPU requires explicit rounding operations 1.2310 +// to implement the UseStrictFP mode. 1.2311 +const bool Matcher::strict_fp_requires_explicit_rounding = false; 1.2312 + 1.2313 +// Do floats take an entire double register or just half? 1.2314 +// 1.2315 +// A float occupies a ppc64 double register. For the allocator, a 1.2316 +// ppc64 double register appears as a pair of float registers. 1.2317 +bool Matcher::float_in_double() { return true; } 1.2318 + 1.2319 +// Do ints take an entire long register or just half? 1.2320 +// The relevant question is how the int is callee-saved: 1.2321 +// the whole long is written but de-opt'ing will have to extract 1.2322 +// the relevant 32 bits. 1.2323 +const bool Matcher::int_in_long = true; 1.2324 + 1.2325 +// Constants for c2c and c calling conventions. 1.2326 + 1.2327 +const MachRegisterNumbers iarg_reg[8] = { 1.2328 + R3_num, R4_num, R5_num, R6_num, 1.2329 + R7_num, R8_num, R9_num, R10_num 1.2330 +}; 1.2331 + 1.2332 +const MachRegisterNumbers farg_reg[13] = { 1.2333 + F1_num, F2_num, F3_num, F4_num, 1.2334 + F5_num, F6_num, F7_num, F8_num, 1.2335 + F9_num, F10_num, F11_num, F12_num, 1.2336 + F13_num 1.2337 +}; 1.2338 + 1.2339 +const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]); 1.2340 + 1.2341 +const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]); 1.2342 + 1.2343 +// Return whether or not this register is ever used as an argument. This 1.2344 +// function is used on startup to build the trampoline stubs in generateOptoStub. 1.2345 +// Registers not mentioned will be killed by the VM call in the trampoline, and 1.2346 +// arguments in those registers not be available to the callee. 1.2347 +bool Matcher::can_be_java_arg(int reg) { 1.2348 + // We return true for all registers contained in iarg_reg[] and 1.2349 + // farg_reg[] and their virtual halves. 1.2350 + // We must include the virtual halves in order to get STDs and LDs 1.2351 + // instead of STWs and LWs in the trampoline stubs. 1.2352 + 1.2353 + if ( reg == R3_num || reg == R3_H_num 1.2354 + || reg == R4_num || reg == R4_H_num 1.2355 + || reg == R5_num || reg == R5_H_num 1.2356 + || reg == R6_num || reg == R6_H_num 1.2357 + || reg == R7_num || reg == R7_H_num 1.2358 + || reg == R8_num || reg == R8_H_num 1.2359 + || reg == R9_num || reg == R9_H_num 1.2360 + || reg == R10_num || reg == R10_H_num) 1.2361 + return true; 1.2362 + 1.2363 + if ( reg == F1_num || reg == F1_H_num 1.2364 + || reg == F2_num || reg == F2_H_num 1.2365 + || reg == F3_num || reg == F3_H_num 1.2366 + || reg == F4_num || reg == F4_H_num 1.2367 + || reg == F5_num || reg == F5_H_num 1.2368 + || reg == F6_num || reg == F6_H_num 1.2369 + || reg == F7_num || reg == F7_H_num 1.2370 + || reg == F8_num || reg == F8_H_num 1.2371 + || reg == F9_num || reg == F9_H_num 1.2372 + || reg == F10_num || reg == F10_H_num 1.2373 + || reg == F11_num || reg == F11_H_num 1.2374 + || reg == F12_num || reg == F12_H_num 1.2375 + || reg == F13_num || reg == F13_H_num) 1.2376 + return true; 1.2377 + 1.2378 + return false; 1.2379 +} 1.2380 + 1.2381 +bool Matcher::is_spillable_arg(int reg) { 1.2382 + return can_be_java_arg(reg); 1.2383 +} 1.2384 + 1.2385 +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { 1.2386 + return false; 1.2387 +} 1.2388 + 1.2389 +// Register for DIVI projection of divmodI. 1.2390 +RegMask Matcher::divI_proj_mask() { 1.2391 + ShouldNotReachHere(); 1.2392 + return RegMask(); 1.2393 +} 1.2394 + 1.2395 +// Register for MODI projection of divmodI. 1.2396 +RegMask Matcher::modI_proj_mask() { 1.2397 + ShouldNotReachHere(); 1.2398 + return RegMask(); 1.2399 +} 1.2400 + 1.2401 +// Register for DIVL projection of divmodL. 1.2402 +RegMask Matcher::divL_proj_mask() { 1.2403 + ShouldNotReachHere(); 1.2404 + return RegMask(); 1.2405 +} 1.2406 + 1.2407 +// Register for MODL projection of divmodL. 1.2408 +RegMask Matcher::modL_proj_mask() { 1.2409 + ShouldNotReachHere(); 1.2410 + return RegMask(); 1.2411 +} 1.2412 + 1.2413 +const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1.2414 + return RegMask(); 1.2415 +} 1.2416 + 1.2417 +%} 1.2418 + 1.2419 +//----------ENCODING BLOCK----------------------------------------------------- 1.2420 +// This block specifies the encoding classes used by the compiler to output 1.2421 +// byte streams. Encoding classes are parameterized macros used by 1.2422 +// Machine Instruction Nodes in order to generate the bit encoding of the 1.2423 +// instruction. Operands specify their base encoding interface with the 1.2424 +// interface keyword. There are currently supported four interfaces, 1.2425 +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1.2426 +// operand to generate a function which returns its register number when 1.2427 +// queried. CONST_INTER causes an operand to generate a function which 1.2428 +// returns the value of the constant when queried. MEMORY_INTER causes an 1.2429 +// operand to generate four functions which return the Base Register, the 1.2430 +// Index Register, the Scale Value, and the Offset Value of the operand when 1.2431 +// queried. COND_INTER causes an operand to generate six functions which 1.2432 +// return the encoding code (ie - encoding bits for the instruction) 1.2433 +// associated with each basic boolean condition for a conditional instruction. 1.2434 +// 1.2435 +// Instructions specify two basic values for encoding. Again, a function 1.2436 +// is available to check if the constant displacement is an oop. They use the 1.2437 +// ins_encode keyword to specify their encoding classes (which must be 1.2438 +// a sequence of enc_class names, and their parameters, specified in 1.2439 +// the encoding block), and they use the 1.2440 +// opcode keyword to specify, in order, their primary, secondary, and 1.2441 +// tertiary opcode. Only the opcode sections which a particular instruction 1.2442 +// needs for encoding need to be specified. 1.2443 +encode %{ 1.2444 + enc_class enc_unimplemented %{ 1.2445 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.2446 + MacroAssembler _masm(&cbuf); 1.2447 + __ unimplemented("Unimplemented mach node encoding in AD file.", 13); 1.2448 + %} 1.2449 + 1.2450 + enc_class enc_untested %{ 1.2451 +#ifdef ASSERT 1.2452 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.2453 + MacroAssembler _masm(&cbuf); 1.2454 + __ untested("Untested mach node encoding in AD file."); 1.2455 +#else 1.2456 + // TODO: PPC port $archOpcode(ppc64Opcode_none); 1.2457 +#endif 1.2458 + %} 1.2459 + 1.2460 + enc_class enc_lbz(iRegIdst dst, memory mem) %{ 1.2461 + // TODO: PPC port $archOpcode(ppc64Opcode_lbz); 1.2462 + MacroAssembler _masm(&cbuf); 1.2463 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2464 + __ lbz($dst$$Register, Idisp, $mem$$base$$Register); 1.2465 + %} 1.2466 + 1.2467 + // Load acquire. 1.2468 + enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{ 1.2469 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.2470 + MacroAssembler _masm(&cbuf); 1.2471 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2472 + __ lbz($dst$$Register, Idisp, $mem$$base$$Register); 1.2473 + __ twi_0($dst$$Register); 1.2474 + __ isync(); 1.2475 + %} 1.2476 + 1.2477 + enc_class enc_lhz(iRegIdst dst, memory mem) %{ 1.2478 + // TODO: PPC port $archOpcode(ppc64Opcode_lhz); 1.2479 + 1.2480 + MacroAssembler _masm(&cbuf); 1.2481 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2482 + __ lhz($dst$$Register, Idisp, $mem$$base$$Register); 1.2483 + %} 1.2484 + 1.2485 + // Load acquire. 1.2486 + enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{ 1.2487 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.2488 + 1.2489 + MacroAssembler _masm(&cbuf); 1.2490 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2491 + __ lhz($dst$$Register, Idisp, $mem$$base$$Register); 1.2492 + __ twi_0($dst$$Register); 1.2493 + __ isync(); 1.2494 + %} 1.2495 + 1.2496 + enc_class enc_lwz(iRegIdst dst, memory mem) %{ 1.2497 + // TODO: PPC port $archOpcode(ppc64Opcode_lwz); 1.2498 + 1.2499 + MacroAssembler _masm(&cbuf); 1.2500 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2501 + __ lwz($dst$$Register, Idisp, $mem$$base$$Register); 1.2502 + %} 1.2503 + 1.2504 + // Load acquire. 1.2505 + enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{ 1.2506 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.2507 + 1.2508 + MacroAssembler _masm(&cbuf); 1.2509 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2510 + __ lwz($dst$$Register, Idisp, $mem$$base$$Register); 1.2511 + __ twi_0($dst$$Register); 1.2512 + __ isync(); 1.2513 + %} 1.2514 + 1.2515 + enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{ 1.2516 + // TODO: PPC port $archOpcode(ppc64Opcode_ld); 1.2517 + MacroAssembler _masm(&cbuf); 1.2518 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2519 + // Operand 'ds' requires 4-alignment. 1.2520 + assert((Idisp & 0x3) == 0, "unaligned offset"); 1.2521 + __ ld($dst$$Register, Idisp, $mem$$base$$Register); 1.2522 + %} 1.2523 + 1.2524 + // Load acquire. 1.2525 + enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{ 1.2526 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.2527 + MacroAssembler _masm(&cbuf); 1.2528 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2529 + // Operand 'ds' requires 4-alignment. 1.2530 + assert((Idisp & 0x3) == 0, "unaligned offset"); 1.2531 + __ ld($dst$$Register, Idisp, $mem$$base$$Register); 1.2532 + __ twi_0($dst$$Register); 1.2533 + __ isync(); 1.2534 + %} 1.2535 + 1.2536 + enc_class enc_lfd(RegF dst, memory mem) %{ 1.2537 + // TODO: PPC port $archOpcode(ppc64Opcode_lfd); 1.2538 + MacroAssembler _masm(&cbuf); 1.2539 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2540 + __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register); 1.2541 + %} 1.2542 + 1.2543 + enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{ 1.2544 + // TODO: PPC port $archOpcode(ppc64Opcode_ld); 1.2545 + 1.2546 + MacroAssembler _masm(&cbuf); 1.2547 + int toc_offset = 0; 1.2548 + 1.2549 + if (!ra_->C->in_scratch_emit_size()) { 1.2550 + address const_toc_addr; 1.2551 + // Create a non-oop constant, no relocation needed. 1.2552 + // If it is an IC, it has a virtual_call_Relocation. 1.2553 + const_toc_addr = __ long_constant((jlong)$src$$constant); 1.2554 + 1.2555 + // Get the constant's TOC offset. 1.2556 + toc_offset = __ offset_to_method_toc(const_toc_addr); 1.2557 + 1.2558 + // Keep the current instruction offset in mind. 1.2559 + ((loadConLNode*)this)->_cbuf_insts_offset = __ offset(); 1.2560 + } 1.2561 + 1.2562 + __ ld($dst$$Register, toc_offset, $toc$$Register); 1.2563 + %} 1.2564 + 1.2565 + enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{ 1.2566 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.2567 + 1.2568 + MacroAssembler _masm(&cbuf); 1.2569 + 1.2570 + if (!ra_->C->in_scratch_emit_size()) { 1.2571 + address const_toc_addr; 1.2572 + // Create a non-oop constant, no relocation needed. 1.2573 + // If it is an IC, it has a virtual_call_Relocation. 1.2574 + const_toc_addr = __ long_constant((jlong)$src$$constant); 1.2575 + 1.2576 + // Get the constant's TOC offset. 1.2577 + const int toc_offset = __ offset_to_method_toc(const_toc_addr); 1.2578 + // Store the toc offset of the constant. 1.2579 + ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset; 1.2580 + 1.2581 + // Also keep the current instruction offset in mind. 1.2582 + ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset(); 1.2583 + } 1.2584 + 1.2585 + __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset)); 1.2586 + %} 1.2587 + 1.2588 +%} // encode 1.2589 + 1.2590 +source %{ 1.2591 + 1.2592 +typedef struct { 1.2593 + loadConL_hiNode *_large_hi; 1.2594 + loadConL_loNode *_large_lo; 1.2595 + loadConLNode *_small; 1.2596 + MachNode *_last; 1.2597 +} loadConLNodesTuple; 1.2598 + 1.2599 +loadConLNodesTuple loadConLNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc, 1.2600 + OptoReg::Name reg_second, OptoReg::Name reg_first) { 1.2601 + loadConLNodesTuple nodes; 1.2602 + 1.2603 + const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; 1.2604 + if (large_constant_pool) { 1.2605 + // Create new nodes. 1.2606 + loadConL_hiNode *m1 = new (C) loadConL_hiNode(); 1.2607 + loadConL_loNode *m2 = new (C) loadConL_loNode(); 1.2608 + 1.2609 + // inputs for new nodes 1.2610 + m1->add_req(NULL, toc); 1.2611 + m2->add_req(NULL, m1); 1.2612 + 1.2613 + // operands for new nodes 1.2614 + m1->_opnds[0] = new (C) iRegLdstOper(); // dst 1.2615 + m1->_opnds[1] = immSrc; // src 1.2616 + m1->_opnds[2] = new (C) iRegPdstOper(); // toc 1.2617 + m2->_opnds[0] = new (C) iRegLdstOper(); // dst 1.2618 + m2->_opnds[1] = immSrc; // src 1.2619 + m2->_opnds[2] = new (C) iRegLdstOper(); // base 1.2620 + 1.2621 + // Initialize ins_attrib TOC fields. 1.2622 + m1->_const_toc_offset = -1; 1.2623 + m2->_const_toc_offset_hi_node = m1; 1.2624 + 1.2625 + // Initialize ins_attrib instruction offset. 1.2626 + m1->_cbuf_insts_offset = -1; 1.2627 + 1.2628 + // register allocation for new nodes 1.2629 + ra_->set_pair(m1->_idx, reg_second, reg_first); 1.2630 + ra_->set_pair(m2->_idx, reg_second, reg_first); 1.2631 + 1.2632 + // Create result. 1.2633 + nodes._large_hi = m1; 1.2634 + nodes._large_lo = m2; 1.2635 + nodes._small = NULL; 1.2636 + nodes._last = nodes._large_lo; 1.2637 + assert(m2->bottom_type()->isa_long(), "must be long"); 1.2638 + } else { 1.2639 + loadConLNode *m2 = new (C) loadConLNode(); 1.2640 + 1.2641 + // inputs for new nodes 1.2642 + m2->add_req(NULL, toc); 1.2643 + 1.2644 + // operands for new nodes 1.2645 + m2->_opnds[0] = new (C) iRegLdstOper(); // dst 1.2646 + m2->_opnds[1] = immSrc; // src 1.2647 + m2->_opnds[2] = new (C) iRegPdstOper(); // toc 1.2648 + 1.2649 + // Initialize ins_attrib instruction offset. 1.2650 + m2->_cbuf_insts_offset = -1; 1.2651 + 1.2652 + // register allocation for new nodes 1.2653 + ra_->set_pair(m2->_idx, reg_second, reg_first); 1.2654 + 1.2655 + // Create result. 1.2656 + nodes._large_hi = NULL; 1.2657 + nodes._large_lo = NULL; 1.2658 + nodes._small = m2; 1.2659 + nodes._last = nodes._small; 1.2660 + assert(m2->bottom_type()->isa_long(), "must be long"); 1.2661 + } 1.2662 + 1.2663 + return nodes; 1.2664 +} 1.2665 + 1.2666 +%} // source 1.2667 + 1.2668 +encode %{ 1.2669 + // Postalloc expand emitter for loading a long constant from the method's TOC. 1.2670 + // Enc_class needed as consttanttablebase is not supported by postalloc 1.2671 + // expand. 1.2672 + enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{ 1.2673 + // Create new nodes. 1.2674 + loadConLNodesTuple loadConLNodes = 1.2675 + loadConLNodesTuple_create(C, ra_, n_toc, op_src, 1.2676 + ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2677 + 1.2678 + // Push new nodes. 1.2679 + if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi); 1.2680 + if (loadConLNodes._last) nodes->push(loadConLNodes._last); 1.2681 + 1.2682 + // some asserts 1.2683 + assert(nodes->length() >= 1, "must have created at least 1 node"); 1.2684 + assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long"); 1.2685 + %} 1.2686 + 1.2687 + enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{ 1.2688 + // TODO: PPC port $archOpcode(ppc64Opcode_ld); 1.2689 + 1.2690 + MacroAssembler _masm(&cbuf); 1.2691 + int toc_offset = 0; 1.2692 + 1.2693 + if (!ra_->C->in_scratch_emit_size()) { 1.2694 + intptr_t val = $src$$constant; 1.2695 + relocInfo::relocType constant_reloc = $src->constant_reloc(); // src 1.2696 + address const_toc_addr; 1.2697 + if (constant_reloc == relocInfo::oop_type) { 1.2698 + // Create an oop constant and a corresponding relocation. 1.2699 + AddressLiteral a = __ allocate_oop_address((jobject)val); 1.2700 + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); 1.2701 + __ relocate(a.rspec()); 1.2702 + } else if (constant_reloc == relocInfo::metadata_type) { 1.2703 + AddressLiteral a = __ allocate_metadata_address((Metadata *)val); 1.2704 + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); 1.2705 + __ relocate(a.rspec()); 1.2706 + } else { 1.2707 + // Create a non-oop constant, no relocation needed. 1.2708 + const_toc_addr = __ long_constant((jlong)$src$$constant); 1.2709 + } 1.2710 + 1.2711 + // Get the constant's TOC offset. 1.2712 + toc_offset = __ offset_to_method_toc(const_toc_addr); 1.2713 + } 1.2714 + 1.2715 + __ ld($dst$$Register, toc_offset, $toc$$Register); 1.2716 + %} 1.2717 + 1.2718 + enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{ 1.2719 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.2720 + 1.2721 + MacroAssembler _masm(&cbuf); 1.2722 + if (!ra_->C->in_scratch_emit_size()) { 1.2723 + intptr_t val = $src$$constant; 1.2724 + relocInfo::relocType constant_reloc = $src->constant_reloc(); // src 1.2725 + address const_toc_addr; 1.2726 + if (constant_reloc == relocInfo::oop_type) { 1.2727 + // Create an oop constant and a corresponding relocation. 1.2728 + AddressLiteral a = __ allocate_oop_address((jobject)val); 1.2729 + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); 1.2730 + __ relocate(a.rspec()); 1.2731 + } else if (constant_reloc == relocInfo::metadata_type) { 1.2732 + AddressLiteral a = __ allocate_metadata_address((Metadata *)val); 1.2733 + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); 1.2734 + __ relocate(a.rspec()); 1.2735 + } else { // non-oop pointers, e.g. card mark base, heap top 1.2736 + // Create a non-oop constant, no relocation needed. 1.2737 + const_toc_addr = __ long_constant((jlong)$src$$constant); 1.2738 + } 1.2739 + 1.2740 + // Get the constant's TOC offset. 1.2741 + const int toc_offset = __ offset_to_method_toc(const_toc_addr); 1.2742 + // Store the toc offset of the constant. 1.2743 + ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset; 1.2744 + } 1.2745 + 1.2746 + __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset)); 1.2747 + %} 1.2748 + 1.2749 + // Postalloc expand emitter for loading a ptr constant from the method's TOC. 1.2750 + // Enc_class needed as consttanttablebase is not supported by postalloc 1.2751 + // expand. 1.2752 + enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{ 1.2753 + const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; 1.2754 + if (large_constant_pool) { 1.2755 + // Create new nodes. 1.2756 + loadConP_hiNode *m1 = new (C) loadConP_hiNode(); 1.2757 + loadConP_loNode *m2 = new (C) loadConP_loNode(); 1.2758 + 1.2759 + // inputs for new nodes 1.2760 + m1->add_req(NULL, n_toc); 1.2761 + m2->add_req(NULL, m1); 1.2762 + 1.2763 + // operands for new nodes 1.2764 + m1->_opnds[0] = new (C) iRegPdstOper(); // dst 1.2765 + m1->_opnds[1] = op_src; // src 1.2766 + m1->_opnds[2] = new (C) iRegPdstOper(); // toc 1.2767 + m2->_opnds[0] = new (C) iRegPdstOper(); // dst 1.2768 + m2->_opnds[1] = op_src; // src 1.2769 + m2->_opnds[2] = new (C) iRegLdstOper(); // base 1.2770 + 1.2771 + // Initialize ins_attrib TOC fields. 1.2772 + m1->_const_toc_offset = -1; 1.2773 + m2->_const_toc_offset_hi_node = m1; 1.2774 + 1.2775 + // Register allocation for new nodes. 1.2776 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2777 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2778 + 1.2779 + nodes->push(m1); 1.2780 + nodes->push(m2); 1.2781 + assert(m2->bottom_type()->isa_ptr(), "must be ptr"); 1.2782 + } else { 1.2783 + loadConPNode *m2 = new (C) loadConPNode(); 1.2784 + 1.2785 + // inputs for new nodes 1.2786 + m2->add_req(NULL, n_toc); 1.2787 + 1.2788 + // operands for new nodes 1.2789 + m2->_opnds[0] = new (C) iRegPdstOper(); // dst 1.2790 + m2->_opnds[1] = op_src; // src 1.2791 + m2->_opnds[2] = new (C) iRegPdstOper(); // toc 1.2792 + 1.2793 + // Register allocation for new nodes. 1.2794 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2795 + 1.2796 + nodes->push(m2); 1.2797 + assert(m2->bottom_type()->isa_ptr(), "must be ptr"); 1.2798 + } 1.2799 + %} 1.2800 + 1.2801 + // Enc_class needed as consttanttablebase is not supported by postalloc 1.2802 + // expand. 1.2803 + enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{ 1.2804 + bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; 1.2805 + 1.2806 + MachNode *m2; 1.2807 + if (large_constant_pool) { 1.2808 + m2 = new (C) loadConFCompNode(); 1.2809 + } else { 1.2810 + m2 = new (C) loadConFNode(); 1.2811 + } 1.2812 + // inputs for new nodes 1.2813 + m2->add_req(NULL, n_toc); 1.2814 + 1.2815 + // operands for new nodes 1.2816 + m2->_opnds[0] = op_dst; 1.2817 + m2->_opnds[1] = op_src; 1.2818 + m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase 1.2819 + 1.2820 + // register allocation for new nodes 1.2821 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2822 + nodes->push(m2); 1.2823 + %} 1.2824 + 1.2825 + // Enc_class needed as consttanttablebase is not supported by postalloc 1.2826 + // expand. 1.2827 + enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{ 1.2828 + bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; 1.2829 + 1.2830 + MachNode *m2; 1.2831 + if (large_constant_pool) { 1.2832 + m2 = new (C) loadConDCompNode(); 1.2833 + } else { 1.2834 + m2 = new (C) loadConDNode(); 1.2835 + } 1.2836 + // inputs for new nodes 1.2837 + m2->add_req(NULL, n_toc); 1.2838 + 1.2839 + // operands for new nodes 1.2840 + m2->_opnds[0] = op_dst; 1.2841 + m2->_opnds[1] = op_src; 1.2842 + m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase 1.2843 + 1.2844 + // register allocation for new nodes 1.2845 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2846 + nodes->push(m2); 1.2847 + %} 1.2848 + 1.2849 + enc_class enc_stw(iRegIsrc src, memory mem) %{ 1.2850 + // TODO: PPC port $archOpcode(ppc64Opcode_stw); 1.2851 + MacroAssembler _masm(&cbuf); 1.2852 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2853 + __ stw($src$$Register, Idisp, $mem$$base$$Register); 1.2854 + %} 1.2855 + 1.2856 + enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{ 1.2857 + // TODO: PPC port $archOpcode(ppc64Opcode_std); 1.2858 + MacroAssembler _masm(&cbuf); 1.2859 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2860 + // Operand 'ds' requires 4-alignment. 1.2861 + assert((Idisp & 0x3) == 0, "unaligned offset"); 1.2862 + __ std($src$$Register, Idisp, $mem$$base$$Register); 1.2863 + %} 1.2864 + 1.2865 + enc_class enc_stfs(RegF src, memory mem) %{ 1.2866 + // TODO: PPC port $archOpcode(ppc64Opcode_stfs); 1.2867 + MacroAssembler _masm(&cbuf); 1.2868 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2869 + __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register); 1.2870 + %} 1.2871 + 1.2872 + enc_class enc_stfd(RegF src, memory mem) %{ 1.2873 + // TODO: PPC port $archOpcode(ppc64Opcode_stfd); 1.2874 + MacroAssembler _masm(&cbuf); 1.2875 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.2876 + __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register); 1.2877 + %} 1.2878 + 1.2879 + // Use release_store for card-marking to ensure that previous 1.2880 + // oop-stores are visible before the card-mark change. 1.2881 + enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{ 1.2882 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.2883 + // FIXME: Implement this as a cmove and use a fixed condition code 1.2884 + // register which is written on every transition to compiled code, 1.2885 + // e.g. in call-stub and when returning from runtime stubs. 1.2886 + // 1.2887 + // Proposed code sequence for the cmove implementation: 1.2888 + // 1.2889 + // Label skip_release; 1.2890 + // __ beq(CCRfixed, skip_release); 1.2891 + // __ release(); 1.2892 + // __ bind(skip_release); 1.2893 + // __ stb(card mark); 1.2894 + 1.2895 + MacroAssembler _masm(&cbuf); 1.2896 + Label skip_storestore; 1.2897 + 1.2898 +#if 0 // TODO: PPC port 1.2899 + // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the 1.2900 + // StoreStore barrier conditionally. 1.2901 + __ lwz(R0, 0, $releaseFieldAddr$$Register); 1.2902 + __ cmpwi(CCR0, R0, 0); 1.2903 + __ beq_predict_taken(CCR0, skip_storestore); 1.2904 +#endif 1.2905 + __ li(R0, 0); 1.2906 + __ membar(Assembler::StoreStore); 1.2907 +#if 0 // TODO: PPC port 1.2908 + __ bind(skip_storestore); 1.2909 +#endif 1.2910 + 1.2911 + // Do the store. 1.2912 + if ($mem$$index == 0) { 1.2913 + __ stb(R0, $mem$$disp, $mem$$base$$Register); 1.2914 + } else { 1.2915 + assert(0 == $mem$$disp, "no displacement possible with indexed load/stores on ppc"); 1.2916 + __ stbx(R0, $mem$$base$$Register, $mem$$index$$Register); 1.2917 + } 1.2918 + %} 1.2919 + 1.2920 + enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{ 1.2921 + 1.2922 + if (VM_Version::has_isel()) { 1.2923 + // use isel instruction with Power 7 1.2924 + cmpP_reg_imm16Node *n_compare = new (C) cmpP_reg_imm16Node(); 1.2925 + encodeP_subNode *n_sub_base = new (C) encodeP_subNode(); 1.2926 + encodeP_shiftNode *n_shift = new (C) encodeP_shiftNode(); 1.2927 + cond_set_0_oopNode *n_cond_set = new (C) cond_set_0_oopNode(); 1.2928 + 1.2929 + n_compare->add_req(n_region, n_src); 1.2930 + n_compare->_opnds[0] = op_crx; 1.2931 + n_compare->_opnds[1] = op_src; 1.2932 + n_compare->_opnds[2] = new (C) immL16Oper(0); 1.2933 + 1.2934 + n_sub_base->add_req(n_region, n_src); 1.2935 + n_sub_base->_opnds[0] = op_dst; 1.2936 + n_sub_base->_opnds[1] = op_src; 1.2937 + n_sub_base->_bottom_type = _bottom_type; 1.2938 + 1.2939 + n_shift->add_req(n_region, n_sub_base); 1.2940 + n_shift->_opnds[0] = op_dst; 1.2941 + n_shift->_opnds[1] = op_dst; 1.2942 + n_shift->_bottom_type = _bottom_type; 1.2943 + 1.2944 + n_cond_set->add_req(n_region, n_compare, n_shift); 1.2945 + n_cond_set->_opnds[0] = op_dst; 1.2946 + n_cond_set->_opnds[1] = op_crx; 1.2947 + n_cond_set->_opnds[2] = op_dst; 1.2948 + n_cond_set->_bottom_type = _bottom_type; 1.2949 + 1.2950 + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); 1.2951 + ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2952 + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2953 + ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2954 + 1.2955 + nodes->push(n_compare); 1.2956 + nodes->push(n_sub_base); 1.2957 + nodes->push(n_shift); 1.2958 + nodes->push(n_cond_set); 1.2959 + 1.2960 + } else { 1.2961 + // before Power 7 1.2962 + moveRegNode *n_move = new (C) moveRegNode(); 1.2963 + cmpP_reg_imm16Node *n_compare = new (C) cmpP_reg_imm16Node(); 1.2964 + encodeP_shiftNode *n_shift = new (C) encodeP_shiftNode(); 1.2965 + cond_sub_baseNode *n_sub_base = new (C) cond_sub_baseNode(); 1.2966 + 1.2967 + n_move->add_req(n_region, n_src); 1.2968 + n_move->_opnds[0] = op_dst; 1.2969 + n_move->_opnds[1] = op_src; 1.2970 + ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop. 1.2971 + 1.2972 + n_compare->add_req(n_region, n_src); 1.2973 + n_compare->add_prec(n_move); 1.2974 + 1.2975 + n_compare->_opnds[0] = op_crx; 1.2976 + n_compare->_opnds[1] = op_src; 1.2977 + n_compare->_opnds[2] = new (C) immL16Oper(0); 1.2978 + 1.2979 + n_sub_base->add_req(n_region, n_compare, n_src); 1.2980 + n_sub_base->_opnds[0] = op_dst; 1.2981 + n_sub_base->_opnds[1] = op_crx; 1.2982 + n_sub_base->_opnds[2] = op_src; 1.2983 + n_sub_base->_bottom_type = _bottom_type; 1.2984 + 1.2985 + n_shift->add_req(n_region, n_sub_base); 1.2986 + n_shift->_opnds[0] = op_dst; 1.2987 + n_shift->_opnds[1] = op_dst; 1.2988 + n_shift->_bottom_type = _bottom_type; 1.2989 + 1.2990 + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2991 + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); 1.2992 + ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2993 + ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.2994 + 1.2995 + nodes->push(n_move); 1.2996 + nodes->push(n_compare); 1.2997 + nodes->push(n_sub_base); 1.2998 + nodes->push(n_shift); 1.2999 + } 1.3000 + 1.3001 + assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed. 1.3002 + %} 1.3003 + 1.3004 + enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{ 1.3005 + 1.3006 + encodeP_subNode *n1 = new (C) encodeP_subNode(); 1.3007 + n1->add_req(n_region, n_src); 1.3008 + n1->_opnds[0] = op_dst; 1.3009 + n1->_opnds[1] = op_src; 1.3010 + n1->_bottom_type = _bottom_type; 1.3011 + 1.3012 + encodeP_shiftNode *n2 = new (C) encodeP_shiftNode(); 1.3013 + n2->add_req(n_region, n1); 1.3014 + n2->_opnds[0] = op_dst; 1.3015 + n2->_opnds[1] = op_dst; 1.3016 + n2->_bottom_type = _bottom_type; 1.3017 + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3018 + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3019 + 1.3020 + nodes->push(n1); 1.3021 + nodes->push(n2); 1.3022 + assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed. 1.3023 + %} 1.3024 + 1.3025 + enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ 1.3026 + decodeN_shiftNode *n_shift = new (C) decodeN_shiftNode(); 1.3027 + cmpN_reg_imm0Node *n_compare = new (C) cmpN_reg_imm0Node(); 1.3028 + 1.3029 + n_compare->add_req(n_region, n_src); 1.3030 + n_compare->_opnds[0] = op_crx; 1.3031 + n_compare->_opnds[1] = op_src; 1.3032 + n_compare->_opnds[2] = new (C) immN_0Oper(TypeNarrowOop::NULL_PTR); 1.3033 + 1.3034 + n_shift->add_req(n_region, n_src); 1.3035 + n_shift->_opnds[0] = op_dst; 1.3036 + n_shift->_opnds[1] = op_src; 1.3037 + n_shift->_bottom_type = _bottom_type; 1.3038 + 1.3039 + if (VM_Version::has_isel()) { 1.3040 + // use isel instruction with Power 7 1.3041 + 1.3042 + decodeN_addNode *n_add_base = new (C) decodeN_addNode(); 1.3043 + n_add_base->add_req(n_region, n_shift); 1.3044 + n_add_base->_opnds[0] = op_dst; 1.3045 + n_add_base->_opnds[1] = op_dst; 1.3046 + n_add_base->_bottom_type = _bottom_type; 1.3047 + 1.3048 + cond_set_0_ptrNode *n_cond_set = new (C) cond_set_0_ptrNode(); 1.3049 + n_cond_set->add_req(n_region, n_compare, n_add_base); 1.3050 + n_cond_set->_opnds[0] = op_dst; 1.3051 + n_cond_set->_opnds[1] = op_crx; 1.3052 + n_cond_set->_opnds[2] = op_dst; 1.3053 + n_cond_set->_bottom_type = _bottom_type; 1.3054 + 1.3055 + assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); 1.3056 + ra_->set_oop(n_cond_set, true); 1.3057 + 1.3058 + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3059 + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); 1.3060 + ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3061 + ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3062 + 1.3063 + nodes->push(n_compare); 1.3064 + nodes->push(n_shift); 1.3065 + nodes->push(n_add_base); 1.3066 + nodes->push(n_cond_set); 1.3067 + 1.3068 + } else { 1.3069 + // before Power 7 1.3070 + cond_add_baseNode *n_add_base = new (C) cond_add_baseNode(); 1.3071 + 1.3072 + n_add_base->add_req(n_region, n_compare, n_shift); 1.3073 + n_add_base->_opnds[0] = op_dst; 1.3074 + n_add_base->_opnds[1] = op_crx; 1.3075 + n_add_base->_opnds[2] = op_dst; 1.3076 + n_add_base->_bottom_type = _bottom_type; 1.3077 + 1.3078 + assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); 1.3079 + ra_->set_oop(n_add_base, true); 1.3080 + 1.3081 + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3082 + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); 1.3083 + ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3084 + 1.3085 + nodes->push(n_compare); 1.3086 + nodes->push(n_shift); 1.3087 + nodes->push(n_add_base); 1.3088 + } 1.3089 + %} 1.3090 + 1.3091 + enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{ 1.3092 + decodeN_shiftNode *n1 = new (C) decodeN_shiftNode(); 1.3093 + n1->add_req(n_region, n_src); 1.3094 + n1->_opnds[0] = op_dst; 1.3095 + n1->_opnds[1] = op_src; 1.3096 + n1->_bottom_type = _bottom_type; 1.3097 + 1.3098 + decodeN_addNode *n2 = new (C) decodeN_addNode(); 1.3099 + n2->add_req(n_region, n1); 1.3100 + n2->_opnds[0] = op_dst; 1.3101 + n2->_opnds[1] = op_dst; 1.3102 + n2->_bottom_type = _bottom_type; 1.3103 + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3104 + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3105 + 1.3106 + assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); 1.3107 + ra_->set_oop(n2, true); 1.3108 + 1.3109 + nodes->push(n1); 1.3110 + nodes->push(n2); 1.3111 + %} 1.3112 + 1.3113 + enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{ 1.3114 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.3115 + 1.3116 + MacroAssembler _masm(&cbuf); 1.3117 + int cc = $cmp$$cmpcode; 1.3118 + int flags_reg = $crx$$reg; 1.3119 + Label done; 1.3120 + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); 1.3121 + // Branch if not (cmp crx). 1.3122 + __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done); 1.3123 + __ mr($dst$$Register, $src$$Register); 1.3124 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.3125 + __ bind(done); 1.3126 + %} 1.3127 + 1.3128 + enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{ 1.3129 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.3130 + 1.3131 + MacroAssembler _masm(&cbuf); 1.3132 + Label done; 1.3133 + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); 1.3134 + // Branch if not (cmp crx). 1.3135 + __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done); 1.3136 + __ li($dst$$Register, $src$$constant); 1.3137 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.3138 + __ bind(done); 1.3139 + %} 1.3140 + 1.3141 + // New atomics. 1.3142 + enc_class enc_GetAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ 1.3143 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3144 + 1.3145 + MacroAssembler _masm(&cbuf); 1.3146 + Register Rtmp = R0; 1.3147 + Register Rres = $res$$Register; 1.3148 + Register Rsrc = $src$$Register; 1.3149 + Register Rptr = $mem_ptr$$Register; 1.3150 + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); 1.3151 + Register Rold = RegCollision ? Rtmp : Rres; 1.3152 + 1.3153 + Label Lretry; 1.3154 + __ bind(Lretry); 1.3155 + __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); 1.3156 + __ add(Rtmp, Rsrc, Rold); 1.3157 + __ stwcx_(Rtmp, Rptr); 1.3158 + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1.3159 + __ bne_predict_not_taken(CCR0, Lretry); 1.3160 + } else { 1.3161 + __ bne( CCR0, Lretry); 1.3162 + } 1.3163 + if (RegCollision) __ subf(Rres, Rsrc, Rtmp); 1.3164 + __ fence(); 1.3165 + %} 1.3166 + 1.3167 + enc_class enc_GetAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ 1.3168 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3169 + 1.3170 + MacroAssembler _masm(&cbuf); 1.3171 + Register Rtmp = R0; 1.3172 + Register Rres = $res$$Register; 1.3173 + Register Rsrc = $src$$Register; 1.3174 + Register Rptr = $mem_ptr$$Register; 1.3175 + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); 1.3176 + Register Rold = RegCollision ? Rtmp : Rres; 1.3177 + 1.3178 + Label Lretry; 1.3179 + __ bind(Lretry); 1.3180 + __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); 1.3181 + __ add(Rtmp, Rsrc, Rold); 1.3182 + __ stdcx_(Rtmp, Rptr); 1.3183 + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1.3184 + __ bne_predict_not_taken(CCR0, Lretry); 1.3185 + } else { 1.3186 + __ bne( CCR0, Lretry); 1.3187 + } 1.3188 + if (RegCollision) __ subf(Rres, Rsrc, Rtmp); 1.3189 + __ fence(); 1.3190 + %} 1.3191 + 1.3192 + enc_class enc_GetAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ 1.3193 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3194 + 1.3195 + MacroAssembler _masm(&cbuf); 1.3196 + Register Rtmp = R0; 1.3197 + Register Rres = $res$$Register; 1.3198 + Register Rsrc = $src$$Register; 1.3199 + Register Rptr = $mem_ptr$$Register; 1.3200 + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); 1.3201 + Register Rold = RegCollision ? Rtmp : Rres; 1.3202 + 1.3203 + Label Lretry; 1.3204 + __ bind(Lretry); 1.3205 + __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); 1.3206 + __ stwcx_(Rsrc, Rptr); 1.3207 + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1.3208 + __ bne_predict_not_taken(CCR0, Lretry); 1.3209 + } else { 1.3210 + __ bne( CCR0, Lretry); 1.3211 + } 1.3212 + if (RegCollision) __ mr(Rres, Rtmp); 1.3213 + __ fence(); 1.3214 + %} 1.3215 + 1.3216 + enc_class enc_GetAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ 1.3217 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3218 + 1.3219 + MacroAssembler _masm(&cbuf); 1.3220 + Register Rtmp = R0; 1.3221 + Register Rres = $res$$Register; 1.3222 + Register Rsrc = $src$$Register; 1.3223 + Register Rptr = $mem_ptr$$Register; 1.3224 + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); 1.3225 + Register Rold = RegCollision ? Rtmp : Rres; 1.3226 + 1.3227 + Label Lretry; 1.3228 + __ bind(Lretry); 1.3229 + __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); 1.3230 + __ stdcx_(Rsrc, Rptr); 1.3231 + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1.3232 + __ bne_predict_not_taken(CCR0, Lretry); 1.3233 + } else { 1.3234 + __ bne( CCR0, Lretry); 1.3235 + } 1.3236 + if (RegCollision) __ mr(Rres, Rtmp); 1.3237 + __ fence(); 1.3238 + %} 1.3239 + 1.3240 + // This enc_class is needed so that scheduler gets proper 1.3241 + // input mapping for latency computation. 1.3242 + enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.3243 + // TODO: PPC port $archOpcode(ppc64Opcode_andc); 1.3244 + MacroAssembler _masm(&cbuf); 1.3245 + __ andc($dst$$Register, $src1$$Register, $src2$$Register); 1.3246 + %} 1.3247 + 1.3248 + enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{ 1.3249 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3250 + 1.3251 + MacroAssembler _masm(&cbuf); 1.3252 + 1.3253 + Label done; 1.3254 + __ cmpwi($crx$$CondRegister, $src$$Register, 0); 1.3255 + __ li($dst$$Register, $zero$$constant); 1.3256 + __ beq($crx$$CondRegister, done); 1.3257 + __ li($dst$$Register, $notzero$$constant); 1.3258 + __ bind(done); 1.3259 + %} 1.3260 + 1.3261 + enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{ 1.3262 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3263 + 1.3264 + MacroAssembler _masm(&cbuf); 1.3265 + 1.3266 + Label done; 1.3267 + __ cmpdi($crx$$CondRegister, $src$$Register, 0); 1.3268 + __ li($dst$$Register, $zero$$constant); 1.3269 + __ beq($crx$$CondRegister, done); 1.3270 + __ li($dst$$Register, $notzero$$constant); 1.3271 + __ bind(done); 1.3272 + %} 1.3273 + 1.3274 + enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{ 1.3275 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.3276 + 1.3277 + MacroAssembler _masm(&cbuf); 1.3278 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.3279 + Label done; 1.3280 + __ bso($crx$$CondRegister, done); 1.3281 + __ ld($dst$$Register, Idisp, $mem$$base$$Register); 1.3282 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.3283 + __ bind(done); 1.3284 + %} 1.3285 + 1.3286 + enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{ 1.3287 + // TODO: PPC port $archOpcode(ppc64Opcode_bc); 1.3288 + 1.3289 + MacroAssembler _masm(&cbuf); 1.3290 + Label d; // dummy 1.3291 + __ bind(d); 1.3292 + Label* p = ($lbl$$label); 1.3293 + // `p' is `NULL' when this encoding class is used only to 1.3294 + // determine the size of the encoded instruction. 1.3295 + Label& l = (NULL == p)? d : *(p); 1.3296 + int cc = $cmp$$cmpcode; 1.3297 + int flags_reg = $crx$$reg; 1.3298 + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); 1.3299 + int bhint = Assembler::bhintNoHint; 1.3300 + 1.3301 + if (UseStaticBranchPredictionForUncommonPathsPPC64) { 1.3302 + if (_prob <= PROB_NEVER) { 1.3303 + bhint = Assembler::bhintIsNotTaken; 1.3304 + } else if (_prob >= PROB_ALWAYS) { 1.3305 + bhint = Assembler::bhintIsTaken; 1.3306 + } 1.3307 + } 1.3308 + 1.3309 + __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), 1.3310 + cc_to_biint(cc, flags_reg), 1.3311 + l); 1.3312 + %} 1.3313 + 1.3314 + enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{ 1.3315 + // The scheduler doesn't know about branch shortening, so we set the opcode 1.3316 + // to ppc64Opcode_bc in order to hide this detail from the scheduler. 1.3317 + // TODO: PPC port $archOpcode(ppc64Opcode_bc); 1.3318 + 1.3319 + MacroAssembler _masm(&cbuf); 1.3320 + Label d; // dummy 1.3321 + __ bind(d); 1.3322 + Label* p = ($lbl$$label); 1.3323 + // `p' is `NULL' when this encoding class is used only to 1.3324 + // determine the size of the encoded instruction. 1.3325 + Label& l = (NULL == p)? d : *(p); 1.3326 + int cc = $cmp$$cmpcode; 1.3327 + int flags_reg = $crx$$reg; 1.3328 + int bhint = Assembler::bhintNoHint; 1.3329 + 1.3330 + if (UseStaticBranchPredictionForUncommonPathsPPC64) { 1.3331 + if (_prob <= PROB_NEVER) { 1.3332 + bhint = Assembler::bhintIsNotTaken; 1.3333 + } else if (_prob >= PROB_ALWAYS) { 1.3334 + bhint = Assembler::bhintIsTaken; 1.3335 + } 1.3336 + } 1.3337 + 1.3338 + // Tell the conditional far branch to optimize itself when being relocated. 1.3339 + __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), 1.3340 + cc_to_biint(cc, flags_reg), 1.3341 + l, 1.3342 + MacroAssembler::bc_far_optimize_on_relocate); 1.3343 + %} 1.3344 + 1.3345 + // Branch used with Power6 scheduling (can be shortened without changing the node). 1.3346 + enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{ 1.3347 + // The scheduler doesn't know about branch shortening, so we set the opcode 1.3348 + // to ppc64Opcode_bc in order to hide this detail from the scheduler. 1.3349 + // TODO: PPC port $archOpcode(ppc64Opcode_bc); 1.3350 + 1.3351 + MacroAssembler _masm(&cbuf); 1.3352 + Label d; // dummy 1.3353 + __ bind(d); 1.3354 + Label* p = ($lbl$$label); 1.3355 + // `p' is `NULL' when this encoding class is used only to 1.3356 + // determine the size of the encoded instruction. 1.3357 + Label& l = (NULL == p)? d : *(p); 1.3358 + int cc = $cmp$$cmpcode; 1.3359 + int flags_reg = $crx$$reg; 1.3360 + int bhint = Assembler::bhintNoHint; 1.3361 + 1.3362 + if (UseStaticBranchPredictionForUncommonPathsPPC64) { 1.3363 + if (_prob <= PROB_NEVER) { 1.3364 + bhint = Assembler::bhintIsNotTaken; 1.3365 + } else if (_prob >= PROB_ALWAYS) { 1.3366 + bhint = Assembler::bhintIsTaken; 1.3367 + } 1.3368 + } 1.3369 + 1.3370 +#if 0 // TODO: PPC port 1.3371 + if (_size == 8) { 1.3372 + // Tell the conditional far branch to optimize itself when being relocated. 1.3373 + __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), 1.3374 + cc_to_biint(cc, flags_reg), 1.3375 + l, 1.3376 + MacroAssembler::bc_far_optimize_on_relocate); 1.3377 + } else { 1.3378 + __ bc (Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), 1.3379 + cc_to_biint(cc, flags_reg), 1.3380 + l); 1.3381 + } 1.3382 +#endif 1.3383 + Unimplemented(); 1.3384 + %} 1.3385 + 1.3386 + // Postalloc expand emitter for loading a replicatef float constant from 1.3387 + // the method's TOC. 1.3388 + // Enc_class needed as consttanttablebase is not supported by postalloc 1.3389 + // expand. 1.3390 + enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{ 1.3391 + // Create new nodes. 1.3392 + 1.3393 + // Make an operand with the bit pattern to load as float. 1.3394 + immLOper *op_repl = new (C) immLOper((jlong)replicate_immF(op_src->constantF())); 1.3395 + 1.3396 + loadConLNodesTuple loadConLNodes = 1.3397 + loadConLNodesTuple_create(C, ra_, n_toc, op_repl, 1.3398 + ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.3399 + 1.3400 + // Push new nodes. 1.3401 + if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi); 1.3402 + if (loadConLNodes._last) nodes->push(loadConLNodes._last); 1.3403 + 1.3404 + assert(nodes->length() >= 1, "must have created at least 1 node"); 1.3405 + assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long"); 1.3406 + %} 1.3407 + 1.3408 + // This enc_class is needed so that scheduler gets proper 1.3409 + // input mapping for latency computation. 1.3410 + enc_class enc_poll(immI dst, iRegLdst poll) %{ 1.3411 + // TODO: PPC port $archOpcode(ppc64Opcode_ld); 1.3412 + // Fake operand dst needed for PPC scheduler. 1.3413 + assert($dst$$constant == 0x0, "dst must be 0x0"); 1.3414 + 1.3415 + MacroAssembler _masm(&cbuf); 1.3416 + // Mark the code position where the load from the safepoint 1.3417 + // polling page was emitted as relocInfo::poll_type. 1.3418 + __ relocate(relocInfo::poll_type); 1.3419 + __ load_from_polling_page($poll$$Register); 1.3420 + %} 1.3421 + 1.3422 + // A Java static call or a runtime call. 1.3423 + // 1.3424 + // Branch-and-link relative to a trampoline. 1.3425 + // The trampoline loads the target address and does a long branch to there. 1.3426 + // In case we call java, the trampoline branches to a interpreter_stub 1.3427 + // which loads the inline cache and the real call target from the constant pool. 1.3428 + // 1.3429 + // This basically looks like this: 1.3430 + // 1.3431 + // >>>> consts -+ -+ 1.3432 + // | |- offset1 1.3433 + // [call target1] | <-+ 1.3434 + // [IC cache] |- offset2 1.3435 + // [call target2] <--+ 1.3436 + // 1.3437 + // <<<< consts 1.3438 + // >>>> insts 1.3439 + // 1.3440 + // bl offset16 -+ -+ ??? // How many bits available? 1.3441 + // | | 1.3442 + // <<<< insts | | 1.3443 + // >>>> stubs | | 1.3444 + // | |- trampoline_stub_Reloc 1.3445 + // trampoline stub: | <-+ 1.3446 + // r2 = toc | 1.3447 + // r2 = [r2 + offset1] | // Load call target1 from const section 1.3448 + // mtctr r2 | 1.3449 + // bctr |- static_stub_Reloc 1.3450 + // comp_to_interp_stub: <---+ 1.3451 + // r1 = toc 1.3452 + // ICreg = [r1 + IC_offset] // Load IC from const section 1.3453 + // r1 = [r1 + offset2] // Load call target2 from const section 1.3454 + // mtctr r1 1.3455 + // bctr 1.3456 + // 1.3457 + // <<<< stubs 1.3458 + // 1.3459 + // The call instruction in the code either 1.3460 + // - Branches directly to a compiled method if the offset is encodable in instruction. 1.3461 + // - Branches to the trampoline stub if the offset to the compiled method is not encodable. 1.3462 + // - Branches to the compiled_to_interp stub if the target is interpreted. 1.3463 + // 1.3464 + // Further there are three relocations from the loads to the constants in 1.3465 + // the constant section. 1.3466 + // 1.3467 + // Usage of r1 and r2 in the stubs allows to distinguish them. 1.3468 + enc_class enc_java_static_call(method meth) %{ 1.3469 + // TODO: PPC port $archOpcode(ppc64Opcode_bl); 1.3470 + 1.3471 + MacroAssembler _masm(&cbuf); 1.3472 + address entry_point = (address)$meth$$method; 1.3473 + 1.3474 + if (!_method) { 1.3475 + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. 1.3476 + emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type); 1.3477 + } else { 1.3478 + // Remember the offset not the address. 1.3479 + const int start_offset = __ offset(); 1.3480 + // The trampoline stub. 1.3481 + if (!Compile::current()->in_scratch_emit_size()) { 1.3482 + // No entry point given, use the current pc. 1.3483 + // Make sure branch fits into 1.3484 + if (entry_point == 0) entry_point = __ pc(); 1.3485 + 1.3486 + // Put the entry point as a constant into the constant pool. 1.3487 + const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none); 1.3488 + const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); 1.3489 + 1.3490 + // Emit the trampoline stub which will be related to the branch-and-link below. 1.3491 + CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset); 1.3492 + __ relocate(_optimized_virtual ? 1.3493 + relocInfo::opt_virtual_call_type : relocInfo::static_call_type); 1.3494 + } 1.3495 + 1.3496 + // The real call. 1.3497 + // Note: At this point we do not have the address of the trampoline 1.3498 + // stub, and the entry point might be too far away for bl, so __ pc() 1.3499 + // serves as dummy and the bl will be patched later. 1.3500 + cbuf.set_insts_mark(); 1.3501 + __ bl(__ pc()); // Emits a relocation. 1.3502 + 1.3503 + // The stub for call to interpreter. 1.3504 + CompiledStaticCall::emit_to_interp_stub(cbuf); 1.3505 + } 1.3506 + %} 1.3507 + 1.3508 + // Emit a method handle call. 1.3509 + // 1.3510 + // Method handle calls from compiled to compiled are going thru a 1.3511 + // c2i -> i2c adapter, extending the frame for their arguments. The 1.3512 + // caller however, returns directly to the compiled callee, that has 1.3513 + // to cope with the extended frame. We restore the original frame by 1.3514 + // loading the callers sp and adding the calculated framesize. 1.3515 + enc_class enc_java_handle_call(method meth) %{ 1.3516 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3517 + 1.3518 + MacroAssembler _masm(&cbuf); 1.3519 + address entry_point = (address)$meth$$method; 1.3520 + 1.3521 + // Remember the offset not the address. 1.3522 + const int start_offset = __ offset(); 1.3523 + // The trampoline stub. 1.3524 + if (!ra_->C->in_scratch_emit_size()) { 1.3525 + // No entry point given, use the current pc. 1.3526 + // Make sure branch fits into 1.3527 + if (entry_point == 0) entry_point = __ pc(); 1.3528 + 1.3529 + // Put the entry point as a constant into the constant pool. 1.3530 + const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none); 1.3531 + const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); 1.3532 + 1.3533 + // Emit the trampoline stub which will be related to the branch-and-link below. 1.3534 + CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset); 1.3535 + assert(_optimized_virtual, "methodHandle call should be a virtual call"); 1.3536 + __ relocate(relocInfo::opt_virtual_call_type); 1.3537 + } 1.3538 + 1.3539 + // The real call. 1.3540 + // Note: At this point we do not have the address of the trampoline 1.3541 + // stub, and the entry point might be too far away for bl, so __ pc() 1.3542 + // serves as dummy and the bl will be patched later. 1.3543 + cbuf.set_insts_mark(); 1.3544 + __ bl(__ pc()); // Emits a relocation. 1.3545 + 1.3546 + assert(_method, "execute next statement conditionally"); 1.3547 + // The stub for call to interpreter. 1.3548 + CompiledStaticCall::emit_to_interp_stub(cbuf); 1.3549 + 1.3550 + // Restore original sp. 1.3551 + __ ld(R11_scratch1, 0, R1_SP); // Load caller sp. 1.3552 + const long framesize = ra_->C->frame_slots() << LogBytesPerInt; 1.3553 + unsigned int bytes = (unsigned int)framesize; 1.3554 + long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes); 1.3555 + if (Assembler::is_simm(-offset, 16)) { 1.3556 + __ addi(R1_SP, R11_scratch1, -offset); 1.3557 + } else { 1.3558 + __ load_const_optimized(R12_scratch2, -offset); 1.3559 + __ add(R1_SP, R11_scratch1, R12_scratch2); 1.3560 + } 1.3561 +#ifdef ASSERT 1.3562 + __ ld(R12_scratch2, 0, R1_SP); // Load from unextended_sp. 1.3563 + __ cmpd(CCR0, R11_scratch1, R12_scratch2); 1.3564 + __ asm_assert_eq("backlink changed", 0x8000); 1.3565 +#endif 1.3566 + // If fails should store backlink before unextending. 1.3567 + 1.3568 + if (ra_->C->env()->failing()) { 1.3569 + return; 1.3570 + } 1.3571 + %} 1.3572 + 1.3573 + // Second node of expanded dynamic call - the call. 1.3574 + enc_class enc_java_dynamic_call_sched(method meth) %{ 1.3575 + // TODO: PPC port $archOpcode(ppc64Opcode_bl); 1.3576 + 1.3577 + MacroAssembler _masm(&cbuf); 1.3578 + 1.3579 + if (!ra_->C->in_scratch_emit_size()) { 1.3580 + // Create a call trampoline stub for the given method. 1.3581 + const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method; 1.3582 + const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none); 1.3583 + const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const); 1.3584 + CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset()); 1.3585 + 1.3586 + if (ra_->C->env()->failing()) 1.3587 + return; 1.3588 + 1.3589 + // Build relocation at call site with ic position as data. 1.3590 + assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) || 1.3591 + (_load_ic_hi_node == NULL && _load_ic_node != NULL), 1.3592 + "must have one, but can't have both"); 1.3593 + assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) || 1.3594 + (_load_ic_node != NULL && _load_ic_node->_cbuf_insts_offset != -1), 1.3595 + "must contain instruction offset"); 1.3596 + const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL 1.3597 + ? _load_ic_hi_node->_cbuf_insts_offset 1.3598 + : _load_ic_node->_cbuf_insts_offset; 1.3599 + const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset); 1.3600 + assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr), 1.3601 + "should be load from TOC"); 1.3602 + 1.3603 + __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr)); 1.3604 + } 1.3605 + 1.3606 + // At this point I do not have the address of the trampoline stub, 1.3607 + // and the entry point might be too far away for bl. Pc() serves 1.3608 + // as dummy and bl will be patched later. 1.3609 + __ bl((address) __ pc()); 1.3610 + %} 1.3611 + 1.3612 + // postalloc expand emitter for virtual calls. 1.3613 + enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{ 1.3614 + 1.3615 + // Create the nodes for loading the IC from the TOC. 1.3616 + loadConLNodesTuple loadConLNodes_IC = 1.3617 + loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong)Universe::non_oop_word()), 1.3618 + OptoReg::Name(R19_H_num), OptoReg::Name(R19_num)); 1.3619 + 1.3620 + // Create the call node. 1.3621 + CallDynamicJavaDirectSchedNode *call = new (C) CallDynamicJavaDirectSchedNode(); 1.3622 + call->_method_handle_invoke = _method_handle_invoke; 1.3623 + call->_vtable_index = _vtable_index; 1.3624 + call->_method = _method; 1.3625 + call->_bci = _bci; 1.3626 + call->_optimized_virtual = _optimized_virtual; 1.3627 + call->_tf = _tf; 1.3628 + call->_entry_point = _entry_point; 1.3629 + call->_cnt = _cnt; 1.3630 + call->_argsize = _argsize; 1.3631 + call->_oop_map = _oop_map; 1.3632 + call->_jvms = _jvms; 1.3633 + call->_jvmadj = _jvmadj; 1.3634 + call->_in_rms = _in_rms; 1.3635 + call->_nesting = _nesting; 1.3636 + 1.3637 + // New call needs all inputs of old call. 1.3638 + // Req... 1.3639 + for (uint i = 0; i < req(); ++i) { 1.3640 + // The expanded node does not need toc any more. 1.3641 + // Add the inline cache constant here instead. This expresses the 1.3642 + // register of the inline cache must be live at the call. 1.3643 + // Else we would have to adapt JVMState by -1. 1.3644 + if (i == mach_constant_base_node_input()) { 1.3645 + call->add_req(loadConLNodes_IC._last); 1.3646 + } else { 1.3647 + call->add_req(in(i)); 1.3648 + } 1.3649 + } 1.3650 + // ...as well as prec 1.3651 + for (uint i = req(); i < len(); ++i) { 1.3652 + call->add_prec(in(i)); 1.3653 + } 1.3654 + 1.3655 + // Remember nodes loading the inline cache into r19. 1.3656 + call->_load_ic_hi_node = loadConLNodes_IC._large_hi; 1.3657 + call->_load_ic_node = loadConLNodes_IC._small; 1.3658 + 1.3659 + // Operands for new nodes. 1.3660 + call->_opnds[0] = _opnds[0]; 1.3661 + call->_opnds[1] = _opnds[1]; 1.3662 + 1.3663 + // Only the inline cache is associated with a register. 1.3664 + assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19"); 1.3665 + 1.3666 + // Push new nodes. 1.3667 + if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi); 1.3668 + if (loadConLNodes_IC._last) nodes->push(loadConLNodes_IC._last); 1.3669 + nodes->push(call); 1.3670 + %} 1.3671 + 1.3672 + // Compound version of call dynamic 1.3673 + // Toc is only passed so that it can be used in ins_encode statement. 1.3674 + // In the code we have to use $constanttablebase. 1.3675 + enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{ 1.3676 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3677 + MacroAssembler _masm(&cbuf); 1.3678 + int start_offset = __ offset(); 1.3679 + 1.3680 + Register Rtoc = (ra_) ? $constanttablebase : R2_TOC; 1.3681 +#if 0 1.3682 + int vtable_index = this->_vtable_index; 1.3683 + if (_vtable_index < 0) { 1.3684 + // Must be invalid_vtable_index, not nonvirtual_vtable_index. 1.3685 + assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value"); 1.3686 + Register ic_reg = as_Register(Matcher::inline_cache_reg_encode()); 1.3687 + 1.3688 + // Virtual call relocation will point to ic load. 1.3689 + address virtual_call_meta_addr = __ pc(); 1.3690 + // Load a clear inline cache. 1.3691 + AddressLiteral empty_ic((address) Universe::non_oop_word()); 1.3692 + __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc); 1.3693 + // CALL to fixup routine. Fixup routine uses ScopeDesc info 1.3694 + // to determine who we intended to call. 1.3695 + __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr)); 1.3696 + emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none); 1.3697 + assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset, 1.3698 + "Fix constant in ret_addr_offset()"); 1.3699 + } else { 1.3700 + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); 1.3701 + // Go thru the vtable. Get receiver klass. Receiver already 1.3702 + // checked for non-null. If we'll go thru a C2I adapter, the 1.3703 + // interpreter expects method in R19_method. 1.3704 + 1.3705 + __ load_klass(R11_scratch1, R3); 1.3706 + 1.3707 + int entry_offset = InstanceKlass::vtable_start_offset() + _vtable_index * vtableEntry::size(); 1.3708 + int v_off = entry_offset * wordSize + vtableEntry::method_offset_in_bytes(); 1.3709 + __ li(R19_method, v_off); 1.3710 + __ ldx(R19_method/*method oop*/, R19_method/*method offset*/, R11_scratch1/*class*/); 1.3711 + // NOTE: for vtable dispatches, the vtable entry will never be 1.3712 + // null. However it may very well end up in handle_wrong_method 1.3713 + // if the method is abstract for the particular class. 1.3714 + __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method); 1.3715 + // Call target. Either compiled code or C2I adapter. 1.3716 + __ mtctr(R11_scratch1); 1.3717 + __ bctrl(); 1.3718 + if (((MachCallDynamicJavaNode*)this)->ret_addr_offset() != __ offset() - start_offset) { 1.3719 + tty->print(" %d, %d\n", ((MachCallDynamicJavaNode*)this)->ret_addr_offset(),__ offset() - start_offset); 1.3720 + } 1.3721 + assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset, 1.3722 + "Fix constant in ret_addr_offset()"); 1.3723 + } 1.3724 +#endif 1.3725 + Unimplemented(); // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!). 1.3726 + %} 1.3727 + 1.3728 + // a runtime call 1.3729 + enc_class enc_java_to_runtime_call (method meth) %{ 1.3730 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.3731 + 1.3732 + MacroAssembler _masm(&cbuf); 1.3733 + const address start_pc = __ pc(); 1.3734 + 1.3735 +#if defined(ABI_ELFv2) 1.3736 + address entry= !($meth$$method) ? NULL : (address)$meth$$method; 1.3737 + __ call_c(entry, relocInfo::runtime_call_type); 1.3738 +#else 1.3739 + // The function we're going to call. 1.3740 + FunctionDescriptor fdtemp; 1.3741 + const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method; 1.3742 + 1.3743 + Register Rtoc = R12_scratch2; 1.3744 + // Calculate the method's TOC. 1.3745 + __ calculate_address_from_global_toc(Rtoc, __ method_toc()); 1.3746 + // Put entry, env, toc into the constant pool, this needs up to 3 constant 1.3747 + // pool entries; call_c_using_toc will optimize the call. 1.3748 + __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc); 1.3749 +#endif 1.3750 + 1.3751 + // Check the ret_addr_offset. 1.3752 + assert(((MachCallRuntimeNode*)this)->ret_addr_offset() == __ last_calls_return_pc() - start_pc, 1.3753 + "Fix constant in ret_addr_offset()"); 1.3754 + %} 1.3755 + 1.3756 + // Move to ctr for leaf call. 1.3757 + // This enc_class is needed so that scheduler gets proper 1.3758 + // input mapping for latency computation. 1.3759 + enc_class enc_leaf_call_mtctr(iRegLsrc src) %{ 1.3760 + // TODO: PPC port $archOpcode(ppc64Opcode_mtctr); 1.3761 + MacroAssembler _masm(&cbuf); 1.3762 + __ mtctr($src$$Register); 1.3763 + %} 1.3764 + 1.3765 + // Postalloc expand emitter for runtime leaf calls. 1.3766 + enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{ 1.3767 + loadConLNodesTuple loadConLNodes_Entry; 1.3768 +#if defined(ABI_ELFv2) 1.3769 + jlong entry_address = (jlong) this->entry_point(); 1.3770 + assert(entry_address, "need address here"); 1.3771 + loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper(entry_address), 1.3772 + OptoReg::Name(R12_H_num), OptoReg::Name(R12_num)); 1.3773 +#else 1.3774 + // Get the struct that describes the function we are about to call. 1.3775 + FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point(); 1.3776 + assert(fd, "need fd here"); 1.3777 + jlong entry_address = (jlong) fd->entry(); 1.3778 + // new nodes 1.3779 + loadConLNodesTuple loadConLNodes_Env; 1.3780 + loadConLNodesTuple loadConLNodes_Toc; 1.3781 + 1.3782 + // Create nodes and operands for loading the entry point. 1.3783 + loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper(entry_address), 1.3784 + OptoReg::Name(R12_H_num), OptoReg::Name(R12_num)); 1.3785 + 1.3786 + 1.3787 + // Create nodes and operands for loading the env pointer. 1.3788 + if (fd->env() != NULL) { 1.3789 + loadConLNodes_Env = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->env()), 1.3790 + OptoReg::Name(R11_H_num), OptoReg::Name(R11_num)); 1.3791 + } else { 1.3792 + loadConLNodes_Env._large_hi = NULL; 1.3793 + loadConLNodes_Env._large_lo = NULL; 1.3794 + loadConLNodes_Env._small = NULL; 1.3795 + loadConLNodes_Env._last = new (C) loadConL16Node(); 1.3796 + loadConLNodes_Env._last->_opnds[0] = new (C) iRegLdstOper(); 1.3797 + loadConLNodes_Env._last->_opnds[1] = new (C) immL16Oper(0); 1.3798 + ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num)); 1.3799 + } 1.3800 + 1.3801 + // Create nodes and operands for loading the Toc point. 1.3802 + loadConLNodes_Toc = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->toc()), 1.3803 + OptoReg::Name(R2_H_num), OptoReg::Name(R2_num)); 1.3804 +#endif // ABI_ELFv2 1.3805 + // mtctr node 1.3806 + MachNode *mtctr = new (C) CallLeafDirect_mtctrNode(); 1.3807 + 1.3808 + assert(loadConLNodes_Entry._last != NULL, "entry must exist"); 1.3809 + mtctr->add_req(0, loadConLNodes_Entry._last); 1.3810 + 1.3811 + mtctr->_opnds[0] = new (C) iRegLdstOper(); 1.3812 + mtctr->_opnds[1] = new (C) iRegLdstOper(); 1.3813 + 1.3814 + // call node 1.3815 + MachCallLeafNode *call = new (C) CallLeafDirectNode(); 1.3816 + 1.3817 + call->_opnds[0] = _opnds[0]; 1.3818 + call->_opnds[1] = new (C) methodOper((intptr_t) entry_address); // May get set later. 1.3819 + 1.3820 + // Make the new call node look like the old one. 1.3821 + call->_name = _name; 1.3822 + call->_tf = _tf; 1.3823 + call->_entry_point = _entry_point; 1.3824 + call->_cnt = _cnt; 1.3825 + call->_argsize = _argsize; 1.3826 + call->_oop_map = _oop_map; 1.3827 + guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms()."); 1.3828 + call->_jvms = NULL; 1.3829 + call->_jvmadj = _jvmadj; 1.3830 + call->_in_rms = _in_rms; 1.3831 + call->_nesting = _nesting; 1.3832 + 1.3833 + 1.3834 + // New call needs all inputs of old call. 1.3835 + // Req... 1.3836 + for (uint i = 0; i < req(); ++i) { 1.3837 + if (i != mach_constant_base_node_input()) { 1.3838 + call->add_req(in(i)); 1.3839 + } 1.3840 + } 1.3841 + 1.3842 + // These must be reqired edges, as the registers are live up to 1.3843 + // the call. Else the constants are handled as kills. 1.3844 + call->add_req(mtctr); 1.3845 +#if !defined(ABI_ELFv2) 1.3846 + call->add_req(loadConLNodes_Env._last); 1.3847 + call->add_req(loadConLNodes_Toc._last); 1.3848 +#endif 1.3849 + 1.3850 + // ...as well as prec 1.3851 + for (uint i = req(); i < len(); ++i) { 1.3852 + call->add_prec(in(i)); 1.3853 + } 1.3854 + 1.3855 + // registers 1.3856 + ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num)); 1.3857 + 1.3858 + // Insert the new nodes. 1.3859 + if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi); 1.3860 + if (loadConLNodes_Entry._last) nodes->push(loadConLNodes_Entry._last); 1.3861 +#if !defined(ABI_ELFv2) 1.3862 + if (loadConLNodes_Env._large_hi) nodes->push(loadConLNodes_Env._large_hi); 1.3863 + if (loadConLNodes_Env._last) nodes->push(loadConLNodes_Env._last); 1.3864 + if (loadConLNodes_Toc._large_hi) nodes->push(loadConLNodes_Toc._large_hi); 1.3865 + if (loadConLNodes_Toc._last) nodes->push(loadConLNodes_Toc._last); 1.3866 +#endif 1.3867 + nodes->push(mtctr); 1.3868 + nodes->push(call); 1.3869 + %} 1.3870 +%} 1.3871 + 1.3872 +//----------FRAME-------------------------------------------------------------- 1.3873 +// Definition of frame structure and management information. 1.3874 + 1.3875 +frame %{ 1.3876 + // What direction does stack grow in (assumed to be same for native & Java). 1.3877 + stack_direction(TOWARDS_LOW); 1.3878 + 1.3879 + // These two registers define part of the calling convention between 1.3880 + // compiled code and the interpreter. 1.3881 + 1.3882 + // Inline Cache Register or method for I2C. 1.3883 + inline_cache_reg(R19); // R19_method 1.3884 + 1.3885 + // Method Oop Register when calling interpreter. 1.3886 + interpreter_method_oop_reg(R19); // R19_method 1.3887 + 1.3888 + // Optional: name the operand used by cisc-spilling to access 1.3889 + // [stack_pointer + offset]. 1.3890 + cisc_spilling_operand_name(indOffset); 1.3891 + 1.3892 + // Number of stack slots consumed by a Monitor enter. 1.3893 + sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size)); 1.3894 + 1.3895 + // Compiled code's Frame Pointer. 1.3896 + frame_pointer(R1); // R1_SP 1.3897 + 1.3898 + // Interpreter stores its frame pointer in a register which is 1.3899 + // stored to the stack by I2CAdaptors. I2CAdaptors convert from 1.3900 + // interpreted java to compiled java. 1.3901 + // 1.3902 + // R14_state holds pointer to caller's cInterpreter. 1.3903 + interpreter_frame_pointer(R14); // R14_state 1.3904 + 1.3905 + stack_alignment(frame::alignment_in_bytes); 1.3906 + 1.3907 + in_preserve_stack_slots((frame::jit_in_preserve_size / VMRegImpl::stack_slot_size)); 1.3908 + 1.3909 + // Number of outgoing stack slots killed above the 1.3910 + // out_preserve_stack_slots for calls to C. Supports the var-args 1.3911 + // backing area for register parms. 1.3912 + // 1.3913 + varargs_C_out_slots_killed(((frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size)); 1.3914 + 1.3915 + // The after-PROLOG location of the return address. Location of 1.3916 + // return address specifies a type (REG or STACK) and a number 1.3917 + // representing the register number (i.e. - use a register name) or 1.3918 + // stack slot. 1.3919 + // 1.3920 + // A: Link register is stored in stack slot ... 1.3921 + // M: ... but it's in the caller's frame according to PPC-64 ABI. 1.3922 + // J: Therefore, we make sure that the link register is also in R11_scratch1 1.3923 + // at the end of the prolog. 1.3924 + // B: We use R20, now. 1.3925 + //return_addr(REG R20); 1.3926 + 1.3927 + // G: After reading the comments made by all the luminaries on their 1.3928 + // failure to tell the compiler where the return address really is, 1.3929 + // I hardly dare to try myself. However, I'm convinced it's in slot 1.3930 + // 4 what apparently works and saves us some spills. 1.3931 + return_addr(STACK 4); 1.3932 + 1.3933 + // This is the body of the function 1.3934 + // 1.3935 + // void Matcher::calling_convention(OptoRegPair* sig, // array of ideal regs 1.3936 + // uint length, // length of array 1.3937 + // bool is_outgoing) 1.3938 + // 1.3939 + // The `sig' array is to be updated. sig[j] represents the location 1.3940 + // of the j-th argument, either a register or a stack slot. 1.3941 + 1.3942 + // Comment taken from i486.ad: 1.3943 + // Body of function which returns an integer array locating 1.3944 + // arguments either in registers or in stack slots. Passed an array 1.3945 + // of ideal registers called "sig" and a "length" count. Stack-slot 1.3946 + // offsets are based on outgoing arguments, i.e. a CALLER setting up 1.3947 + // arguments for a CALLEE. Incoming stack arguments are 1.3948 + // automatically biased by the preserve_stack_slots field above. 1.3949 + calling_convention %{ 1.3950 + // No difference between ingoing/outgoing. Just pass false. 1.3951 + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 1.3952 + %} 1.3953 + 1.3954 + // Comment taken from i486.ad: 1.3955 + // Body of function which returns an integer array locating 1.3956 + // arguments either in registers or in stack slots. Passed an array 1.3957 + // of ideal registers called "sig" and a "length" count. Stack-slot 1.3958 + // offsets are based on outgoing arguments, i.e. a CALLER setting up 1.3959 + // arguments for a CALLEE. Incoming stack arguments are 1.3960 + // automatically biased by the preserve_stack_slots field above. 1.3961 + c_calling_convention %{ 1.3962 + // This is obviously always outgoing. 1.3963 + // C argument in register AND stack slot. 1.3964 + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 1.3965 + %} 1.3966 + 1.3967 + // Location of native (C/C++) and interpreter return values. This 1.3968 + // is specified to be the same as Java. In the 32-bit VM, long 1.3969 + // values are actually returned from native calls in O0:O1 and 1.3970 + // returned to the interpreter in I0:I1. The copying to and from 1.3971 + // the register pairs is done by the appropriate call and epilog 1.3972 + // opcodes. This simplifies the register allocator. 1.3973 + c_return_value %{ 1.3974 + assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) || 1.3975 + (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0), 1.3976 + "only return normal values"); 1.3977 + // enum names from opcodes.hpp: Op_Node Op_Set Op_RegN Op_RegI Op_RegP Op_RegF Op_RegD Op_RegL 1.3978 + static int typeToRegLo[Op_RegL+1] = { 0, 0, R3_num, R3_num, R3_num, F1_num, F1_num, R3_num }; 1.3979 + static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num }; 1.3980 + return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]); 1.3981 + %} 1.3982 + 1.3983 + // Location of compiled Java return values. Same as C 1.3984 + return_value %{ 1.3985 + assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) || 1.3986 + (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0), 1.3987 + "only return normal values"); 1.3988 + // enum names from opcodes.hpp: Op_Node Op_Set Op_RegN Op_RegI Op_RegP Op_RegF Op_RegD Op_RegL 1.3989 + static int typeToRegLo[Op_RegL+1] = { 0, 0, R3_num, R3_num, R3_num, F1_num, F1_num, R3_num }; 1.3990 + static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num }; 1.3991 + return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]); 1.3992 + %} 1.3993 +%} 1.3994 + 1.3995 + 1.3996 +//----------ATTRIBUTES--------------------------------------------------------- 1.3997 + 1.3998 +//----------Operand Attributes------------------------------------------------- 1.3999 +op_attrib op_cost(1); // Required cost attribute. 1.4000 + 1.4001 +//----------Instruction Attributes--------------------------------------------- 1.4002 + 1.4003 +// Cost attribute. required. 1.4004 +ins_attrib ins_cost(DEFAULT_COST); 1.4005 + 1.4006 +// Is this instruction a non-matching short branch variant of some 1.4007 +// long branch? Not required. 1.4008 +ins_attrib ins_short_branch(0); 1.4009 + 1.4010 +ins_attrib ins_is_TrapBasedCheckNode(true); 1.4011 + 1.4012 +// Number of constants. 1.4013 +// This instruction uses the given number of constants 1.4014 +// (optional attribute). 1.4015 +// This is needed to determine in time whether the constant pool will 1.4016 +// exceed 4000 entries. Before postalloc_expand the overall number of constants 1.4017 +// is determined. It's also used to compute the constant pool size 1.4018 +// in Output(). 1.4019 +ins_attrib ins_num_consts(0); 1.4020 + 1.4021 +// Required alignment attribute (must be a power of 2) specifies the 1.4022 +// alignment that some part of the instruction (not necessarily the 1.4023 +// start) requires. If > 1, a compute_padding() function must be 1.4024 +// provided for the instruction. 1.4025 +ins_attrib ins_alignment(1); 1.4026 + 1.4027 +// Enforce/prohibit rematerializations. 1.4028 +// - If an instruction is attributed with 'ins_cannot_rematerialize(true)' 1.4029 +// then rematerialization of that instruction is prohibited and the 1.4030 +// instruction's value will be spilled if necessary. 1.4031 +// Causes that MachNode::rematerialize() returns false. 1.4032 +// - If an instruction is attributed with 'ins_should_rematerialize(true)' 1.4033 +// then rematerialization should be enforced and a copy of the instruction 1.4034 +// should be inserted if possible; rematerialization is not guaranteed. 1.4035 +// Note: this may result in rematerializations in front of every use. 1.4036 +// Causes that MachNode::rematerialize() can return true. 1.4037 +// (optional attribute) 1.4038 +ins_attrib ins_cannot_rematerialize(false); 1.4039 +ins_attrib ins_should_rematerialize(false); 1.4040 + 1.4041 +// Instruction has variable size depending on alignment. 1.4042 +ins_attrib ins_variable_size_depending_on_alignment(false); 1.4043 + 1.4044 +// Instruction is a nop. 1.4045 +ins_attrib ins_is_nop(false); 1.4046 + 1.4047 +// Instruction is mapped to a MachIfFastLock node (instead of MachFastLock). 1.4048 +ins_attrib ins_use_mach_if_fast_lock_node(false); 1.4049 + 1.4050 +// Field for the toc offset of a constant. 1.4051 +// 1.4052 +// This is needed if the toc offset is not encodable as an immediate in 1.4053 +// the PPC load instruction. If so, the upper (hi) bits of the offset are 1.4054 +// added to the toc, and from this a load with immediate is performed. 1.4055 +// With postalloc expand, we get two nodes that require the same offset 1.4056 +// but which don't know about each other. The offset is only known 1.4057 +// when the constant is added to the constant pool during emitting. 1.4058 +// It is generated in the 'hi'-node adding the upper bits, and saved 1.4059 +// in this node. The 'lo'-node has a link to the 'hi'-node and reads 1.4060 +// the offset from there when it gets encoded. 1.4061 +ins_attrib ins_field_const_toc_offset(0); 1.4062 +ins_attrib ins_field_const_toc_offset_hi_node(0); 1.4063 + 1.4064 +// A field that can hold the instructions offset in the code buffer. 1.4065 +// Set in the nodes emitter. 1.4066 +ins_attrib ins_field_cbuf_insts_offset(-1); 1.4067 + 1.4068 +// Fields for referencing a call's load-IC-node. 1.4069 +// If the toc offset can not be encoded as an immediate in a load, we 1.4070 +// use two nodes. 1.4071 +ins_attrib ins_field_load_ic_hi_node(0); 1.4072 +ins_attrib ins_field_load_ic_node(0); 1.4073 + 1.4074 +//----------OPERANDS----------------------------------------------------------- 1.4075 +// Operand definitions must precede instruction definitions for correct 1.4076 +// parsing in the ADLC because operands constitute user defined types 1.4077 +// which are used in instruction definitions. 1.4078 +// 1.4079 +// Formats are generated automatically for constants and base registers. 1.4080 + 1.4081 +//----------Simple Operands---------------------------------------------------- 1.4082 +// Immediate Operands 1.4083 + 1.4084 +// Integer Immediate: 32-bit 1.4085 +operand immI() %{ 1.4086 + match(ConI); 1.4087 + op_cost(40); 1.4088 + format %{ %} 1.4089 + interface(CONST_INTER); 1.4090 +%} 1.4091 + 1.4092 +operand immI8() %{ 1.4093 + predicate(Assembler::is_simm(n->get_int(), 8)); 1.4094 + op_cost(0); 1.4095 + match(ConI); 1.4096 + format %{ %} 1.4097 + interface(CONST_INTER); 1.4098 +%} 1.4099 + 1.4100 +// Integer Immediate: 16-bit 1.4101 +operand immI16() %{ 1.4102 + predicate(Assembler::is_simm(n->get_int(), 16)); 1.4103 + op_cost(0); 1.4104 + match(ConI); 1.4105 + format %{ %} 1.4106 + interface(CONST_INTER); 1.4107 +%} 1.4108 + 1.4109 +// Integer Immediate: 32-bit, where lowest 16 bits are 0x0000. 1.4110 +operand immIhi16() %{ 1.4111 + predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0)); 1.4112 + match(ConI); 1.4113 + op_cost(0); 1.4114 + format %{ %} 1.4115 + interface(CONST_INTER); 1.4116 +%} 1.4117 + 1.4118 +operand immInegpow2() %{ 1.4119 + predicate(is_power_of_2_long((jlong) (julong) (juint) (-(n->get_int())))); 1.4120 + match(ConI); 1.4121 + op_cost(0); 1.4122 + format %{ %} 1.4123 + interface(CONST_INTER); 1.4124 +%} 1.4125 + 1.4126 +operand immIpow2minus1() %{ 1.4127 + predicate(is_power_of_2_long((((jlong) (n->get_int()))+1))); 1.4128 + match(ConI); 1.4129 + op_cost(0); 1.4130 + format %{ %} 1.4131 + interface(CONST_INTER); 1.4132 +%} 1.4133 + 1.4134 +operand immIpowerOf2() %{ 1.4135 + predicate(is_power_of_2_long((((jlong) (julong) (juint) (n->get_int()))))); 1.4136 + match(ConI); 1.4137 + op_cost(0); 1.4138 + format %{ %} 1.4139 + interface(CONST_INTER); 1.4140 +%} 1.4141 + 1.4142 +// Unsigned Integer Immediate: the values 0-31 1.4143 +operand uimmI5() %{ 1.4144 + predicate(Assembler::is_uimm(n->get_int(), 5)); 1.4145 + match(ConI); 1.4146 + op_cost(0); 1.4147 + format %{ %} 1.4148 + interface(CONST_INTER); 1.4149 +%} 1.4150 + 1.4151 +// Unsigned Integer Immediate: 6-bit 1.4152 +operand uimmI6() %{ 1.4153 + predicate(Assembler::is_uimm(n->get_int(), 6)); 1.4154 + match(ConI); 1.4155 + op_cost(0); 1.4156 + format %{ %} 1.4157 + interface(CONST_INTER); 1.4158 +%} 1.4159 + 1.4160 +// Unsigned Integer Immediate: 6-bit int, greater than 32 1.4161 +operand uimmI6_ge32() %{ 1.4162 + predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32); 1.4163 + match(ConI); 1.4164 + op_cost(0); 1.4165 + format %{ %} 1.4166 + interface(CONST_INTER); 1.4167 +%} 1.4168 + 1.4169 +// Unsigned Integer Immediate: 15-bit 1.4170 +operand uimmI15() %{ 1.4171 + predicate(Assembler::is_uimm(n->get_int(), 15)); 1.4172 + match(ConI); 1.4173 + op_cost(0); 1.4174 + format %{ %} 1.4175 + interface(CONST_INTER); 1.4176 +%} 1.4177 + 1.4178 +// Unsigned Integer Immediate: 16-bit 1.4179 +operand uimmI16() %{ 1.4180 + predicate(Assembler::is_uimm(n->get_int(), 16)); 1.4181 + match(ConI); 1.4182 + op_cost(0); 1.4183 + format %{ %} 1.4184 + interface(CONST_INTER); 1.4185 +%} 1.4186 + 1.4187 +// constant 'int 0'. 1.4188 +operand immI_0() %{ 1.4189 + predicate(n->get_int() == 0); 1.4190 + match(ConI); 1.4191 + op_cost(0); 1.4192 + format %{ %} 1.4193 + interface(CONST_INTER); 1.4194 +%} 1.4195 + 1.4196 +// constant 'int 1'. 1.4197 +operand immI_1() %{ 1.4198 + predicate(n->get_int() == 1); 1.4199 + match(ConI); 1.4200 + op_cost(0); 1.4201 + format %{ %} 1.4202 + interface(CONST_INTER); 1.4203 +%} 1.4204 + 1.4205 +// constant 'int -1'. 1.4206 +operand immI_minus1() %{ 1.4207 + predicate(n->get_int() == -1); 1.4208 + match(ConI); 1.4209 + op_cost(0); 1.4210 + format %{ %} 1.4211 + interface(CONST_INTER); 1.4212 +%} 1.4213 + 1.4214 +// int value 16. 1.4215 +operand immI_16() %{ 1.4216 + predicate(n->get_int() == 16); 1.4217 + match(ConI); 1.4218 + op_cost(0); 1.4219 + format %{ %} 1.4220 + interface(CONST_INTER); 1.4221 +%} 1.4222 + 1.4223 +// int value 24. 1.4224 +operand immI_24() %{ 1.4225 + predicate(n->get_int() == 24); 1.4226 + match(ConI); 1.4227 + op_cost(0); 1.4228 + format %{ %} 1.4229 + interface(CONST_INTER); 1.4230 +%} 1.4231 + 1.4232 +// Compressed oops constants 1.4233 +// Pointer Immediate 1.4234 +operand immN() %{ 1.4235 + match(ConN); 1.4236 + 1.4237 + op_cost(10); 1.4238 + format %{ %} 1.4239 + interface(CONST_INTER); 1.4240 +%} 1.4241 + 1.4242 +// NULL Pointer Immediate 1.4243 +operand immN_0() %{ 1.4244 + predicate(n->get_narrowcon() == 0); 1.4245 + match(ConN); 1.4246 + 1.4247 + op_cost(0); 1.4248 + format %{ %} 1.4249 + interface(CONST_INTER); 1.4250 +%} 1.4251 + 1.4252 +// Compressed klass constants 1.4253 +operand immNKlass() %{ 1.4254 + match(ConNKlass); 1.4255 + 1.4256 + op_cost(0); 1.4257 + format %{ %} 1.4258 + interface(CONST_INTER); 1.4259 +%} 1.4260 + 1.4261 +// This operand can be used to avoid matching of an instruct 1.4262 +// with chain rule. 1.4263 +operand immNKlass_NM() %{ 1.4264 + match(ConNKlass); 1.4265 + predicate(false); 1.4266 + op_cost(0); 1.4267 + format %{ %} 1.4268 + interface(CONST_INTER); 1.4269 +%} 1.4270 + 1.4271 +// Pointer Immediate: 64-bit 1.4272 +operand immP() %{ 1.4273 + match(ConP); 1.4274 + op_cost(0); 1.4275 + format %{ %} 1.4276 + interface(CONST_INTER); 1.4277 +%} 1.4278 + 1.4279 +// Operand to avoid match of loadConP. 1.4280 +// This operand can be used to avoid matching of an instruct 1.4281 +// with chain rule. 1.4282 +operand immP_NM() %{ 1.4283 + match(ConP); 1.4284 + predicate(false); 1.4285 + op_cost(0); 1.4286 + format %{ %} 1.4287 + interface(CONST_INTER); 1.4288 +%} 1.4289 + 1.4290 +// costant 'pointer 0'. 1.4291 +operand immP_0() %{ 1.4292 + predicate(n->get_ptr() == 0); 1.4293 + match(ConP); 1.4294 + op_cost(0); 1.4295 + format %{ %} 1.4296 + interface(CONST_INTER); 1.4297 +%} 1.4298 + 1.4299 +// pointer 0x0 or 0x1 1.4300 +operand immP_0or1() %{ 1.4301 + predicate((n->get_ptr() == 0) || (n->get_ptr() == 1)); 1.4302 + match(ConP); 1.4303 + op_cost(0); 1.4304 + format %{ %} 1.4305 + interface(CONST_INTER); 1.4306 +%} 1.4307 + 1.4308 +operand immL() %{ 1.4309 + match(ConL); 1.4310 + op_cost(40); 1.4311 + format %{ %} 1.4312 + interface(CONST_INTER); 1.4313 +%} 1.4314 + 1.4315 +// Long Immediate: 16-bit 1.4316 +operand immL16() %{ 1.4317 + predicate(Assembler::is_simm(n->get_long(), 16)); 1.4318 + match(ConL); 1.4319 + op_cost(0); 1.4320 + format %{ %} 1.4321 + interface(CONST_INTER); 1.4322 +%} 1.4323 + 1.4324 +// Long Immediate: 16-bit, 4-aligned 1.4325 +operand immL16Alg4() %{ 1.4326 + predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0)); 1.4327 + match(ConL); 1.4328 + op_cost(0); 1.4329 + format %{ %} 1.4330 + interface(CONST_INTER); 1.4331 +%} 1.4332 + 1.4333 +// Long Immediate: 32-bit, where lowest 16 bits are 0x0000. 1.4334 +operand immL32hi16() %{ 1.4335 + predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L)); 1.4336 + match(ConL); 1.4337 + op_cost(0); 1.4338 + format %{ %} 1.4339 + interface(CONST_INTER); 1.4340 +%} 1.4341 + 1.4342 +// Long Immediate: 32-bit 1.4343 +operand immL32() %{ 1.4344 + predicate(Assembler::is_simm(n->get_long(), 32)); 1.4345 + match(ConL); 1.4346 + op_cost(0); 1.4347 + format %{ %} 1.4348 + interface(CONST_INTER); 1.4349 +%} 1.4350 + 1.4351 +// Long Immediate: 64-bit, where highest 16 bits are not 0x0000. 1.4352 +operand immLhighest16() %{ 1.4353 + predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L); 1.4354 + match(ConL); 1.4355 + op_cost(0); 1.4356 + format %{ %} 1.4357 + interface(CONST_INTER); 1.4358 +%} 1.4359 + 1.4360 +operand immLnegpow2() %{ 1.4361 + predicate(is_power_of_2_long((jlong)-(n->get_long()))); 1.4362 + match(ConL); 1.4363 + op_cost(0); 1.4364 + format %{ %} 1.4365 + interface(CONST_INTER); 1.4366 +%} 1.4367 + 1.4368 +operand immLpow2minus1() %{ 1.4369 + predicate(is_power_of_2_long((((jlong) (n->get_long()))+1)) && 1.4370 + (n->get_long() != (jlong)0xffffffffffffffffL)); 1.4371 + match(ConL); 1.4372 + op_cost(0); 1.4373 + format %{ %} 1.4374 + interface(CONST_INTER); 1.4375 +%} 1.4376 + 1.4377 +// constant 'long 0'. 1.4378 +operand immL_0() %{ 1.4379 + predicate(n->get_long() == 0L); 1.4380 + match(ConL); 1.4381 + op_cost(0); 1.4382 + format %{ %} 1.4383 + interface(CONST_INTER); 1.4384 +%} 1.4385 + 1.4386 +// constat ' long -1'. 1.4387 +operand immL_minus1() %{ 1.4388 + predicate(n->get_long() == -1L); 1.4389 + match(ConL); 1.4390 + op_cost(0); 1.4391 + format %{ %} 1.4392 + interface(CONST_INTER); 1.4393 +%} 1.4394 + 1.4395 +// Long Immediate: low 32-bit mask 1.4396 +operand immL_32bits() %{ 1.4397 + predicate(n->get_long() == 0xFFFFFFFFL); 1.4398 + match(ConL); 1.4399 + op_cost(0); 1.4400 + format %{ %} 1.4401 + interface(CONST_INTER); 1.4402 +%} 1.4403 + 1.4404 +// Unsigned Long Immediate: 16-bit 1.4405 +operand uimmL16() %{ 1.4406 + predicate(Assembler::is_uimm(n->get_long(), 16)); 1.4407 + match(ConL); 1.4408 + op_cost(0); 1.4409 + format %{ %} 1.4410 + interface(CONST_INTER); 1.4411 +%} 1.4412 + 1.4413 +// Float Immediate 1.4414 +operand immF() %{ 1.4415 + match(ConF); 1.4416 + op_cost(40); 1.4417 + format %{ %} 1.4418 + interface(CONST_INTER); 1.4419 +%} 1.4420 + 1.4421 +// constant 'float +0.0'. 1.4422 +operand immF_0() %{ 1.4423 + predicate((n->getf() == 0) && 1.4424 + (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0)); 1.4425 + match(ConF); 1.4426 + op_cost(0); 1.4427 + format %{ %} 1.4428 + interface(CONST_INTER); 1.4429 +%} 1.4430 + 1.4431 +// Double Immediate 1.4432 +operand immD() %{ 1.4433 + match(ConD); 1.4434 + op_cost(40); 1.4435 + format %{ %} 1.4436 + interface(CONST_INTER); 1.4437 +%} 1.4438 + 1.4439 +// Integer Register Operands 1.4440 +// Integer Destination Register 1.4441 +// See definition of reg_class bits32_reg_rw. 1.4442 +operand iRegIdst() %{ 1.4443 + constraint(ALLOC_IN_RC(bits32_reg_rw)); 1.4444 + match(RegI); 1.4445 + match(rscratch1RegI); 1.4446 + match(rscratch2RegI); 1.4447 + match(rarg1RegI); 1.4448 + match(rarg2RegI); 1.4449 + match(rarg3RegI); 1.4450 + match(rarg4RegI); 1.4451 + format %{ %} 1.4452 + interface(REG_INTER); 1.4453 +%} 1.4454 + 1.4455 +// Integer Source Register 1.4456 +// See definition of reg_class bits32_reg_ro. 1.4457 +operand iRegIsrc() %{ 1.4458 + constraint(ALLOC_IN_RC(bits32_reg_ro)); 1.4459 + match(RegI); 1.4460 + match(rscratch1RegI); 1.4461 + match(rscratch2RegI); 1.4462 + match(rarg1RegI); 1.4463 + match(rarg2RegI); 1.4464 + match(rarg3RegI); 1.4465 + match(rarg4RegI); 1.4466 + format %{ %} 1.4467 + interface(REG_INTER); 1.4468 +%} 1.4469 + 1.4470 +operand rscratch1RegI() %{ 1.4471 + constraint(ALLOC_IN_RC(rscratch1_bits32_reg)); 1.4472 + match(iRegIdst); 1.4473 + format %{ %} 1.4474 + interface(REG_INTER); 1.4475 +%} 1.4476 + 1.4477 +operand rscratch2RegI() %{ 1.4478 + constraint(ALLOC_IN_RC(rscratch2_bits32_reg)); 1.4479 + match(iRegIdst); 1.4480 + format %{ %} 1.4481 + interface(REG_INTER); 1.4482 +%} 1.4483 + 1.4484 +operand rarg1RegI() %{ 1.4485 + constraint(ALLOC_IN_RC(rarg1_bits32_reg)); 1.4486 + match(iRegIdst); 1.4487 + format %{ %} 1.4488 + interface(REG_INTER); 1.4489 +%} 1.4490 + 1.4491 +operand rarg2RegI() %{ 1.4492 + constraint(ALLOC_IN_RC(rarg2_bits32_reg)); 1.4493 + match(iRegIdst); 1.4494 + format %{ %} 1.4495 + interface(REG_INTER); 1.4496 +%} 1.4497 + 1.4498 +operand rarg3RegI() %{ 1.4499 + constraint(ALLOC_IN_RC(rarg3_bits32_reg)); 1.4500 + match(iRegIdst); 1.4501 + format %{ %} 1.4502 + interface(REG_INTER); 1.4503 +%} 1.4504 + 1.4505 +operand rarg4RegI() %{ 1.4506 + constraint(ALLOC_IN_RC(rarg4_bits32_reg)); 1.4507 + match(iRegIdst); 1.4508 + format %{ %} 1.4509 + interface(REG_INTER); 1.4510 +%} 1.4511 + 1.4512 +operand rarg1RegL() %{ 1.4513 + constraint(ALLOC_IN_RC(rarg1_bits64_reg)); 1.4514 + match(iRegLdst); 1.4515 + format %{ %} 1.4516 + interface(REG_INTER); 1.4517 +%} 1.4518 + 1.4519 +operand rarg2RegL() %{ 1.4520 + constraint(ALLOC_IN_RC(rarg2_bits64_reg)); 1.4521 + match(iRegLdst); 1.4522 + format %{ %} 1.4523 + interface(REG_INTER); 1.4524 +%} 1.4525 + 1.4526 +operand rarg3RegL() %{ 1.4527 + constraint(ALLOC_IN_RC(rarg3_bits64_reg)); 1.4528 + match(iRegLdst); 1.4529 + format %{ %} 1.4530 + interface(REG_INTER); 1.4531 +%} 1.4532 + 1.4533 +operand rarg4RegL() %{ 1.4534 + constraint(ALLOC_IN_RC(rarg4_bits64_reg)); 1.4535 + match(iRegLdst); 1.4536 + format %{ %} 1.4537 + interface(REG_INTER); 1.4538 +%} 1.4539 + 1.4540 +// Pointer Destination Register 1.4541 +// See definition of reg_class bits64_reg_rw. 1.4542 +operand iRegPdst() %{ 1.4543 + constraint(ALLOC_IN_RC(bits64_reg_rw)); 1.4544 + match(RegP); 1.4545 + match(rscratch1RegP); 1.4546 + match(rscratch2RegP); 1.4547 + match(rarg1RegP); 1.4548 + match(rarg2RegP); 1.4549 + match(rarg3RegP); 1.4550 + match(rarg4RegP); 1.4551 + format %{ %} 1.4552 + interface(REG_INTER); 1.4553 +%} 1.4554 + 1.4555 +// Pointer Destination Register 1.4556 +// Operand not using r11 and r12 (killed in epilog). 1.4557 +operand iRegPdstNoScratch() %{ 1.4558 + constraint(ALLOC_IN_RC(bits64_reg_leaf_call)); 1.4559 + match(RegP); 1.4560 + match(rarg1RegP); 1.4561 + match(rarg2RegP); 1.4562 + match(rarg3RegP); 1.4563 + match(rarg4RegP); 1.4564 + format %{ %} 1.4565 + interface(REG_INTER); 1.4566 +%} 1.4567 + 1.4568 +// Pointer Source Register 1.4569 +// See definition of reg_class bits64_reg_ro. 1.4570 +operand iRegPsrc() %{ 1.4571 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4572 + match(RegP); 1.4573 + match(iRegPdst); 1.4574 + match(rscratch1RegP); 1.4575 + match(rscratch2RegP); 1.4576 + match(rarg1RegP); 1.4577 + match(rarg2RegP); 1.4578 + match(rarg3RegP); 1.4579 + match(rarg4RegP); 1.4580 + match(threadRegP); 1.4581 + format %{ %} 1.4582 + interface(REG_INTER); 1.4583 +%} 1.4584 + 1.4585 +// Thread operand. 1.4586 +operand threadRegP() %{ 1.4587 + constraint(ALLOC_IN_RC(thread_bits64_reg)); 1.4588 + match(iRegPdst); 1.4589 + format %{ "R16" %} 1.4590 + interface(REG_INTER); 1.4591 +%} 1.4592 + 1.4593 +operand rscratch1RegP() %{ 1.4594 + constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); 1.4595 + match(iRegPdst); 1.4596 + format %{ "R11" %} 1.4597 + interface(REG_INTER); 1.4598 +%} 1.4599 + 1.4600 +operand rscratch2RegP() %{ 1.4601 + constraint(ALLOC_IN_RC(rscratch2_bits64_reg)); 1.4602 + match(iRegPdst); 1.4603 + format %{ %} 1.4604 + interface(REG_INTER); 1.4605 +%} 1.4606 + 1.4607 +operand rarg1RegP() %{ 1.4608 + constraint(ALLOC_IN_RC(rarg1_bits64_reg)); 1.4609 + match(iRegPdst); 1.4610 + format %{ %} 1.4611 + interface(REG_INTER); 1.4612 +%} 1.4613 + 1.4614 +operand rarg2RegP() %{ 1.4615 + constraint(ALLOC_IN_RC(rarg2_bits64_reg)); 1.4616 + match(iRegPdst); 1.4617 + format %{ %} 1.4618 + interface(REG_INTER); 1.4619 +%} 1.4620 + 1.4621 +operand rarg3RegP() %{ 1.4622 + constraint(ALLOC_IN_RC(rarg3_bits64_reg)); 1.4623 + match(iRegPdst); 1.4624 + format %{ %} 1.4625 + interface(REG_INTER); 1.4626 +%} 1.4627 + 1.4628 +operand rarg4RegP() %{ 1.4629 + constraint(ALLOC_IN_RC(rarg4_bits64_reg)); 1.4630 + match(iRegPdst); 1.4631 + format %{ %} 1.4632 + interface(REG_INTER); 1.4633 +%} 1.4634 + 1.4635 +operand iRegNsrc() %{ 1.4636 + constraint(ALLOC_IN_RC(bits32_reg_ro)); 1.4637 + match(RegN); 1.4638 + match(iRegNdst); 1.4639 + 1.4640 + format %{ %} 1.4641 + interface(REG_INTER); 1.4642 +%} 1.4643 + 1.4644 +operand iRegNdst() %{ 1.4645 + constraint(ALLOC_IN_RC(bits32_reg_rw)); 1.4646 + match(RegN); 1.4647 + 1.4648 + format %{ %} 1.4649 + interface(REG_INTER); 1.4650 +%} 1.4651 + 1.4652 +// Long Destination Register 1.4653 +// See definition of reg_class bits64_reg_rw. 1.4654 +operand iRegLdst() %{ 1.4655 + constraint(ALLOC_IN_RC(bits64_reg_rw)); 1.4656 + match(RegL); 1.4657 + match(rscratch1RegL); 1.4658 + match(rscratch2RegL); 1.4659 + format %{ %} 1.4660 + interface(REG_INTER); 1.4661 +%} 1.4662 + 1.4663 +// Long Source Register 1.4664 +// See definition of reg_class bits64_reg_ro. 1.4665 +operand iRegLsrc() %{ 1.4666 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4667 + match(RegL); 1.4668 + match(iRegLdst); 1.4669 + match(rscratch1RegL); 1.4670 + match(rscratch2RegL); 1.4671 + format %{ %} 1.4672 + interface(REG_INTER); 1.4673 +%} 1.4674 + 1.4675 +// Special operand for ConvL2I. 1.4676 +operand iRegL2Isrc(iRegLsrc reg) %{ 1.4677 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4678 + match(ConvL2I reg); 1.4679 + format %{ "ConvL2I($reg)" %} 1.4680 + interface(REG_INTER) 1.4681 +%} 1.4682 + 1.4683 +operand rscratch1RegL() %{ 1.4684 + constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); 1.4685 + match(RegL); 1.4686 + format %{ %} 1.4687 + interface(REG_INTER); 1.4688 +%} 1.4689 + 1.4690 +operand rscratch2RegL() %{ 1.4691 + constraint(ALLOC_IN_RC(rscratch2_bits64_reg)); 1.4692 + match(RegL); 1.4693 + format %{ %} 1.4694 + interface(REG_INTER); 1.4695 +%} 1.4696 + 1.4697 +// Condition Code Flag Registers 1.4698 +operand flagsReg() %{ 1.4699 + constraint(ALLOC_IN_RC(int_flags)); 1.4700 + match(RegFlags); 1.4701 + format %{ %} 1.4702 + interface(REG_INTER); 1.4703 +%} 1.4704 + 1.4705 +// Condition Code Flag Register CR0 1.4706 +operand flagsRegCR0() %{ 1.4707 + constraint(ALLOC_IN_RC(int_flags_CR0)); 1.4708 + match(RegFlags); 1.4709 + format %{ "CR0" %} 1.4710 + interface(REG_INTER); 1.4711 +%} 1.4712 + 1.4713 +operand flagsRegCR1() %{ 1.4714 + constraint(ALLOC_IN_RC(int_flags_CR1)); 1.4715 + match(RegFlags); 1.4716 + format %{ "CR1" %} 1.4717 + interface(REG_INTER); 1.4718 +%} 1.4719 + 1.4720 +operand flagsRegCR6() %{ 1.4721 + constraint(ALLOC_IN_RC(int_flags_CR6)); 1.4722 + match(RegFlags); 1.4723 + format %{ "CR6" %} 1.4724 + interface(REG_INTER); 1.4725 +%} 1.4726 + 1.4727 +operand regCTR() %{ 1.4728 + constraint(ALLOC_IN_RC(ctr_reg)); 1.4729 + // RegFlags should work. Introducing a RegSpecial type would cause a 1.4730 + // lot of changes. 1.4731 + match(RegFlags); 1.4732 + format %{"SR_CTR" %} 1.4733 + interface(REG_INTER); 1.4734 +%} 1.4735 + 1.4736 +operand regD() %{ 1.4737 + constraint(ALLOC_IN_RC(dbl_reg)); 1.4738 + match(RegD); 1.4739 + format %{ %} 1.4740 + interface(REG_INTER); 1.4741 +%} 1.4742 + 1.4743 +operand regF() %{ 1.4744 + constraint(ALLOC_IN_RC(flt_reg)); 1.4745 + match(RegF); 1.4746 + format %{ %} 1.4747 + interface(REG_INTER); 1.4748 +%} 1.4749 + 1.4750 +// Special Registers 1.4751 + 1.4752 +// Method Register 1.4753 +operand inline_cache_regP(iRegPdst reg) %{ 1.4754 + constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg 1.4755 + match(reg); 1.4756 + format %{ %} 1.4757 + interface(REG_INTER); 1.4758 +%} 1.4759 + 1.4760 +operand compiler_method_oop_regP(iRegPdst reg) %{ 1.4761 + constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); // compiler_method_oop_reg 1.4762 + match(reg); 1.4763 + format %{ %} 1.4764 + interface(REG_INTER); 1.4765 +%} 1.4766 + 1.4767 +operand interpreter_method_oop_regP(iRegPdst reg) %{ 1.4768 + constraint(ALLOC_IN_RC(r19_bits64_reg)); // interpreter_method_oop_reg 1.4769 + match(reg); 1.4770 + format %{ %} 1.4771 + interface(REG_INTER); 1.4772 +%} 1.4773 + 1.4774 +// Operands to remove register moves in unscaled mode. 1.4775 +// Match read/write registers with an EncodeP node if neither shift nor add are required. 1.4776 +operand iRegP2N(iRegPsrc reg) %{ 1.4777 + predicate(false /* TODO: PPC port MatchDecodeNodes*/&& Universe::narrow_oop_shift() == 0); 1.4778 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4779 + match(EncodeP reg); 1.4780 + format %{ "$reg" %} 1.4781 + interface(REG_INTER) 1.4782 +%} 1.4783 + 1.4784 +operand iRegN2P(iRegNsrc reg) %{ 1.4785 + predicate(false /* TODO: PPC port MatchDecodeNodes*/); 1.4786 + constraint(ALLOC_IN_RC(bits32_reg_ro)); 1.4787 + match(DecodeN reg); 1.4788 + match(DecodeNKlass reg); 1.4789 + format %{ "$reg" %} 1.4790 + interface(REG_INTER) 1.4791 +%} 1.4792 + 1.4793 +//----------Complex Operands--------------------------------------------------- 1.4794 +// Indirect Memory Reference 1.4795 +operand indirect(iRegPsrc reg) %{ 1.4796 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4797 + match(reg); 1.4798 + op_cost(100); 1.4799 + format %{ "[$reg]" %} 1.4800 + interface(MEMORY_INTER) %{ 1.4801 + base($reg); 1.4802 + index(0x0); 1.4803 + scale(0x0); 1.4804 + disp(0x0); 1.4805 + %} 1.4806 +%} 1.4807 + 1.4808 +// Indirect with Offset 1.4809 +operand indOffset16(iRegPsrc reg, immL16 offset) %{ 1.4810 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4811 + match(AddP reg offset); 1.4812 + op_cost(100); 1.4813 + format %{ "[$reg + $offset]" %} 1.4814 + interface(MEMORY_INTER) %{ 1.4815 + base($reg); 1.4816 + index(0x0); 1.4817 + scale(0x0); 1.4818 + disp($offset); 1.4819 + %} 1.4820 +%} 1.4821 + 1.4822 +// Indirect with 4-aligned Offset 1.4823 +operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{ 1.4824 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4825 + match(AddP reg offset); 1.4826 + op_cost(100); 1.4827 + format %{ "[$reg + $offset]" %} 1.4828 + interface(MEMORY_INTER) %{ 1.4829 + base($reg); 1.4830 + index(0x0); 1.4831 + scale(0x0); 1.4832 + disp($offset); 1.4833 + %} 1.4834 +%} 1.4835 + 1.4836 +//----------Complex Operands for Compressed OOPs------------------------------- 1.4837 +// Compressed OOPs with narrow_oop_shift == 0. 1.4838 + 1.4839 +// Indirect Memory Reference, compressed OOP 1.4840 +operand indirectNarrow(iRegNsrc reg) %{ 1.4841 + predicate(false /* TODO: PPC port MatchDecodeNodes*/); 1.4842 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4843 + match(DecodeN reg); 1.4844 + match(DecodeNKlass reg); 1.4845 + op_cost(100); 1.4846 + format %{ "[$reg]" %} 1.4847 + interface(MEMORY_INTER) %{ 1.4848 + base($reg); 1.4849 + index(0x0); 1.4850 + scale(0x0); 1.4851 + disp(0x0); 1.4852 + %} 1.4853 +%} 1.4854 + 1.4855 +// Indirect with Offset, compressed OOP 1.4856 +operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{ 1.4857 + predicate(false /* TODO: PPC port MatchDecodeNodes*/); 1.4858 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4859 + match(AddP (DecodeN reg) offset); 1.4860 + match(AddP (DecodeNKlass reg) offset); 1.4861 + op_cost(100); 1.4862 + format %{ "[$reg + $offset]" %} 1.4863 + interface(MEMORY_INTER) %{ 1.4864 + base($reg); 1.4865 + index(0x0); 1.4866 + scale(0x0); 1.4867 + disp($offset); 1.4868 + %} 1.4869 +%} 1.4870 + 1.4871 +// Indirect with 4-aligned Offset, compressed OOP 1.4872 +operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{ 1.4873 + predicate(false /* TODO: PPC port MatchDecodeNodes*/); 1.4874 + constraint(ALLOC_IN_RC(bits64_reg_ro)); 1.4875 + match(AddP (DecodeN reg) offset); 1.4876 + match(AddP (DecodeNKlass reg) offset); 1.4877 + op_cost(100); 1.4878 + format %{ "[$reg + $offset]" %} 1.4879 + interface(MEMORY_INTER) %{ 1.4880 + base($reg); 1.4881 + index(0x0); 1.4882 + scale(0x0); 1.4883 + disp($offset); 1.4884 + %} 1.4885 +%} 1.4886 + 1.4887 +//----------Special Memory Operands-------------------------------------------- 1.4888 +// Stack Slot Operand 1.4889 +// 1.4890 +// This operand is used for loading and storing temporary values on 1.4891 +// the stack where a match requires a value to flow through memory. 1.4892 +operand stackSlotI(sRegI reg) %{ 1.4893 + constraint(ALLOC_IN_RC(stack_slots)); 1.4894 + op_cost(100); 1.4895 + //match(RegI); 1.4896 + format %{ "[sp+$reg]" %} 1.4897 + interface(MEMORY_INTER) %{ 1.4898 + base(0x1); // R1_SP 1.4899 + index(0x0); 1.4900 + scale(0x0); 1.4901 + disp($reg); // Stack Offset 1.4902 + %} 1.4903 +%} 1.4904 + 1.4905 +operand stackSlotL(sRegL reg) %{ 1.4906 + constraint(ALLOC_IN_RC(stack_slots)); 1.4907 + op_cost(100); 1.4908 + //match(RegL); 1.4909 + format %{ "[sp+$reg]" %} 1.4910 + interface(MEMORY_INTER) %{ 1.4911 + base(0x1); // R1_SP 1.4912 + index(0x0); 1.4913 + scale(0x0); 1.4914 + disp($reg); // Stack Offset 1.4915 + %} 1.4916 +%} 1.4917 + 1.4918 +operand stackSlotP(sRegP reg) %{ 1.4919 + constraint(ALLOC_IN_RC(stack_slots)); 1.4920 + op_cost(100); 1.4921 + //match(RegP); 1.4922 + format %{ "[sp+$reg]" %} 1.4923 + interface(MEMORY_INTER) %{ 1.4924 + base(0x1); // R1_SP 1.4925 + index(0x0); 1.4926 + scale(0x0); 1.4927 + disp($reg); // Stack Offset 1.4928 + %} 1.4929 +%} 1.4930 + 1.4931 +operand stackSlotF(sRegF reg) %{ 1.4932 + constraint(ALLOC_IN_RC(stack_slots)); 1.4933 + op_cost(100); 1.4934 + //match(RegF); 1.4935 + format %{ "[sp+$reg]" %} 1.4936 + interface(MEMORY_INTER) %{ 1.4937 + base(0x1); // R1_SP 1.4938 + index(0x0); 1.4939 + scale(0x0); 1.4940 + disp($reg); // Stack Offset 1.4941 + %} 1.4942 +%} 1.4943 + 1.4944 +operand stackSlotD(sRegD reg) %{ 1.4945 + constraint(ALLOC_IN_RC(stack_slots)); 1.4946 + op_cost(100); 1.4947 + //match(RegD); 1.4948 + format %{ "[sp+$reg]" %} 1.4949 + interface(MEMORY_INTER) %{ 1.4950 + base(0x1); // R1_SP 1.4951 + index(0x0); 1.4952 + scale(0x0); 1.4953 + disp($reg); // Stack Offset 1.4954 + %} 1.4955 +%} 1.4956 + 1.4957 +// Operands for expressing Control Flow 1.4958 +// NOTE: Label is a predefined operand which should not be redefined in 1.4959 +// the AD file. It is generically handled within the ADLC. 1.4960 + 1.4961 +//----------Conditional Branch Operands---------------------------------------- 1.4962 +// Comparison Op 1.4963 +// 1.4964 +// This is the operation of the comparison, and is limited to the 1.4965 +// following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE 1.4966 +// (!=). 1.4967 +// 1.4968 +// Other attributes of the comparison, such as unsignedness, are specified 1.4969 +// by the comparison instruction that sets a condition code flags register. 1.4970 +// That result is represented by a flags operand whose subtype is appropriate 1.4971 +// to the unsignedness (etc.) of the comparison. 1.4972 +// 1.4973 +// Later, the instruction which matches both the Comparison Op (a Bool) and 1.4974 +// the flags (produced by the Cmp) specifies the coding of the comparison op 1.4975 +// by matching a specific subtype of Bool operand below. 1.4976 + 1.4977 +// When used for floating point comparisons: unordered same as less. 1.4978 +operand cmpOp() %{ 1.4979 + match(Bool); 1.4980 + format %{ "" %} 1.4981 + interface(COND_INTER) %{ 1.4982 + // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'. 1.4983 + // BO & BI 1.4984 + equal(0xA); // 10 10: bcondCRbiIs1 & Condition::equal 1.4985 + not_equal(0x2); // 00 10: bcondCRbiIs0 & Condition::equal 1.4986 + less(0x8); // 10 00: bcondCRbiIs1 & Condition::less 1.4987 + greater_equal(0x0); // 00 00: bcondCRbiIs0 & Condition::less 1.4988 + less_equal(0x1); // 00 01: bcondCRbiIs0 & Condition::greater 1.4989 + greater(0x9); // 10 01: bcondCRbiIs1 & Condition::greater 1.4990 + overflow(0xB); // 10 11: bcondCRbiIs1 & Condition::summary_overflow 1.4991 + no_overflow(0x3); // 00 11: bcondCRbiIs0 & Condition::summary_overflow 1.4992 + %} 1.4993 +%} 1.4994 + 1.4995 +//----------OPERAND CLASSES---------------------------------------------------- 1.4996 +// Operand Classes are groups of operands that are used to simplify 1.4997 +// instruction definitions by not requiring the AD writer to specify 1.4998 +// seperate instructions for every form of operand when the 1.4999 +// instruction accepts multiple operand types with the same basic 1.5000 +// encoding and format. The classic case of this is memory operands. 1.5001 +// Indirect is not included since its use is limited to Compare & Swap. 1.5002 + 1.5003 +opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow); 1.5004 +// Memory operand where offsets are 4-aligned. Required for ld, std. 1.5005 +opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4); 1.5006 +opclass indirectMemory(indirect, indirectNarrow); 1.5007 + 1.5008 +// Special opclass for I and ConvL2I. 1.5009 +opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc); 1.5010 + 1.5011 +// Operand classes to match encode and decode. iRegN_P2N is only used 1.5012 +// for storeN. I have never seen an encode node elsewhere. 1.5013 +opclass iRegN_P2N(iRegNsrc, iRegP2N); 1.5014 +opclass iRegP_N2P(iRegPsrc, iRegN2P); 1.5015 + 1.5016 +//----------PIPELINE----------------------------------------------------------- 1.5017 + 1.5018 +pipeline %{ 1.5019 + 1.5020 +// See J.M.Tendler et al. "Power4 system microarchitecture", IBM 1.5021 +// J. Res. & Dev., No. 1, Jan. 2002. 1.5022 + 1.5023 +//----------ATTRIBUTES--------------------------------------------------------- 1.5024 +attributes %{ 1.5025 + 1.5026 + // Power4 instructions are of fixed length. 1.5027 + fixed_size_instructions; 1.5028 + 1.5029 + // TODO: if `bundle' means number of instructions fetched 1.5030 + // per cycle, this is 8. If `bundle' means Power4 `group', that is 1.5031 + // max instructions issued per cycle, this is 5. 1.5032 + max_instructions_per_bundle = 8; 1.5033 + 1.5034 + // A Power4 instruction is 4 bytes long. 1.5035 + instruction_unit_size = 4; 1.5036 + 1.5037 + // The Power4 processor fetches 64 bytes... 1.5038 + instruction_fetch_unit_size = 64; 1.5039 + 1.5040 + // ...in one line 1.5041 + instruction_fetch_units = 1 1.5042 + 1.5043 + // Unused, list one so that array generated by adlc is not empty. 1.5044 + // Aix compiler chokes if _nop_count = 0. 1.5045 + nops(fxNop); 1.5046 +%} 1.5047 + 1.5048 +//----------RESOURCES---------------------------------------------------------- 1.5049 +// Resources are the functional units available to the machine 1.5050 +resources( 1.5051 + PPC_BR, // branch unit 1.5052 + PPC_CR, // condition unit 1.5053 + PPC_FX1, // integer arithmetic unit 1 1.5054 + PPC_FX2, // integer arithmetic unit 2 1.5055 + PPC_LDST1, // load/store unit 1 1.5056 + PPC_LDST2, // load/store unit 2 1.5057 + PPC_FP1, // float arithmetic unit 1 1.5058 + PPC_FP2, // float arithmetic unit 2 1.5059 + PPC_LDST = PPC_LDST1 | PPC_LDST2, 1.5060 + PPC_FX = PPC_FX1 | PPC_FX2, 1.5061 + PPC_FP = PPC_FP1 | PPC_FP2 1.5062 + ); 1.5063 + 1.5064 +//----------PIPELINE DESCRIPTION----------------------------------------------- 1.5065 +// Pipeline Description specifies the stages in the machine's pipeline 1.5066 +pipe_desc( 1.5067 + // Power4 longest pipeline path 1.5068 + PPC_IF, // instruction fetch 1.5069 + PPC_IC, 1.5070 + //PPC_BP, // branch prediction 1.5071 + PPC_D0, // decode 1.5072 + PPC_D1, // decode 1.5073 + PPC_D2, // decode 1.5074 + PPC_D3, // decode 1.5075 + PPC_Xfer1, 1.5076 + PPC_GD, // group definition 1.5077 + PPC_MP, // map 1.5078 + PPC_ISS, // issue 1.5079 + PPC_RF, // resource fetch 1.5080 + PPC_EX1, // execute (all units) 1.5081 + PPC_EX2, // execute (FP, LDST) 1.5082 + PPC_EX3, // execute (FP, LDST) 1.5083 + PPC_EX4, // execute (FP) 1.5084 + PPC_EX5, // execute (FP) 1.5085 + PPC_EX6, // execute (FP) 1.5086 + PPC_WB, // write back 1.5087 + PPC_Xfer2, 1.5088 + PPC_CP 1.5089 + ); 1.5090 + 1.5091 +//----------PIPELINE CLASSES--------------------------------------------------- 1.5092 +// Pipeline Classes describe the stages in which input and output are 1.5093 +// referenced by the hardware pipeline. 1.5094 + 1.5095 +// Simple pipeline classes. 1.5096 + 1.5097 +// Default pipeline class. 1.5098 +pipe_class pipe_class_default() %{ 1.5099 + single_instruction; 1.5100 + fixed_latency(2); 1.5101 +%} 1.5102 + 1.5103 +// Pipeline class for empty instructions. 1.5104 +pipe_class pipe_class_empty() %{ 1.5105 + single_instruction; 1.5106 + fixed_latency(0); 1.5107 +%} 1.5108 + 1.5109 +// Pipeline class for compares. 1.5110 +pipe_class pipe_class_compare() %{ 1.5111 + single_instruction; 1.5112 + fixed_latency(16); 1.5113 +%} 1.5114 + 1.5115 +// Pipeline class for traps. 1.5116 +pipe_class pipe_class_trap() %{ 1.5117 + single_instruction; 1.5118 + fixed_latency(100); 1.5119 +%} 1.5120 + 1.5121 +// Pipeline class for memory operations. 1.5122 +pipe_class pipe_class_memory() %{ 1.5123 + single_instruction; 1.5124 + fixed_latency(16); 1.5125 +%} 1.5126 + 1.5127 +// Pipeline class for call. 1.5128 +pipe_class pipe_class_call() %{ 1.5129 + single_instruction; 1.5130 + fixed_latency(100); 1.5131 +%} 1.5132 + 1.5133 +// Define the class for the Nop node. 1.5134 +define %{ 1.5135 + MachNop = pipe_class_default; 1.5136 +%} 1.5137 + 1.5138 +%} 1.5139 + 1.5140 +//----------INSTRUCTIONS------------------------------------------------------- 1.5141 + 1.5142 +// Naming of instructions: 1.5143 +// opA_operB / opA_operB_operC: 1.5144 +// Operation 'op' with one or two source operands 'oper'. Result 1.5145 +// type is A, source operand types are B and C. 1.5146 +// Iff A == B == C, B and C are left out. 1.5147 +// 1.5148 +// The instructions are ordered according to the following scheme: 1.5149 +// - loads 1.5150 +// - load constants 1.5151 +// - prefetch 1.5152 +// - store 1.5153 +// - encode/decode 1.5154 +// - membar 1.5155 +// - conditional moves 1.5156 +// - compare & swap 1.5157 +// - arithmetic and logic operations 1.5158 +// * int: Add, Sub, Mul, Div, Mod 1.5159 +// * int: lShift, arShift, urShift, rot 1.5160 +// * float: Add, Sub, Mul, Div 1.5161 +// * and, or, xor ... 1.5162 +// - register moves: float <-> int, reg <-> stack, repl 1.5163 +// - cast (high level type cast, XtoP, castPP, castII, not_null etc. 1.5164 +// - conv (low level type cast requiring bit changes (sign extend etc) 1.5165 +// - compares, range & zero checks. 1.5166 +// - branches 1.5167 +// - complex operations, intrinsics, min, max, replicate 1.5168 +// - lock 1.5169 +// - Calls 1.5170 +// 1.5171 +// If there are similar instructions with different types they are sorted: 1.5172 +// int before float 1.5173 +// small before big 1.5174 +// signed before unsigned 1.5175 +// e.g., loadS before loadUS before loadI before loadF. 1.5176 + 1.5177 + 1.5178 +//----------Load/Store Instructions-------------------------------------------- 1.5179 + 1.5180 +//----------Load Instructions-------------------------------------------------- 1.5181 + 1.5182 +// Converts byte to int. 1.5183 +// As convB2I_reg, but without match rule. The match rule of convB2I_reg 1.5184 +// reuses the 'amount' operand, but adlc expects that operand specification 1.5185 +// and operands in match rule are equivalent. 1.5186 +instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{ 1.5187 + effect(DEF dst, USE src); 1.5188 + format %{ "EXTSB $dst, $src \t// byte->int" %} 1.5189 + size(4); 1.5190 + ins_encode %{ 1.5191 + // TODO: PPC port $archOpcode(ppc64Opcode_extsb); 1.5192 + __ extsb($dst$$Register, $src$$Register); 1.5193 + %} 1.5194 + ins_pipe(pipe_class_default); 1.5195 +%} 1.5196 + 1.5197 +instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{ 1.5198 + // match-rule, false predicate 1.5199 + match(Set dst (LoadB mem)); 1.5200 + predicate(false); 1.5201 + 1.5202 + format %{ "LBZ $dst, $mem" %} 1.5203 + size(4); 1.5204 + ins_encode( enc_lbz(dst, mem) ); 1.5205 + ins_pipe(pipe_class_memory); 1.5206 +%} 1.5207 + 1.5208 +instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{ 1.5209 + // match-rule, false predicate 1.5210 + match(Set dst (LoadB mem)); 1.5211 + predicate(false); 1.5212 + 1.5213 + format %{ "LBZ $dst, $mem\n\t" 1.5214 + "TWI $dst\n\t" 1.5215 + "ISYNC" %} 1.5216 + size(12); 1.5217 + ins_encode( enc_lbz_ac(dst, mem) ); 1.5218 + ins_pipe(pipe_class_memory); 1.5219 +%} 1.5220 + 1.5221 +// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B. 1.5222 +instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{ 1.5223 + match(Set dst (LoadB mem)); 1.5224 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5225 + ins_cost(MEMORY_REF_COST + DEFAULT_COST); 1.5226 + expand %{ 1.5227 + iRegIdst tmp; 1.5228 + loadUB_indirect(tmp, mem); 1.5229 + convB2I_reg_2(dst, tmp); 1.5230 + %} 1.5231 +%} 1.5232 + 1.5233 +instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{ 1.5234 + match(Set dst (LoadB mem)); 1.5235 + ins_cost(3*MEMORY_REF_COST + DEFAULT_COST); 1.5236 + expand %{ 1.5237 + iRegIdst tmp; 1.5238 + loadUB_indirect_ac(tmp, mem); 1.5239 + convB2I_reg_2(dst, tmp); 1.5240 + %} 1.5241 +%} 1.5242 + 1.5243 +instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{ 1.5244 + // match-rule, false predicate 1.5245 + match(Set dst (LoadB mem)); 1.5246 + predicate(false); 1.5247 + 1.5248 + format %{ "LBZ $dst, $mem" %} 1.5249 + size(4); 1.5250 + ins_encode( enc_lbz(dst, mem) ); 1.5251 + ins_pipe(pipe_class_memory); 1.5252 +%} 1.5253 + 1.5254 +instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{ 1.5255 + // match-rule, false predicate 1.5256 + match(Set dst (LoadB mem)); 1.5257 + predicate(false); 1.5258 + 1.5259 + format %{ "LBZ $dst, $mem\n\t" 1.5260 + "TWI $dst\n\t" 1.5261 + "ISYNC" %} 1.5262 + size(12); 1.5263 + ins_encode( enc_lbz_ac(dst, mem) ); 1.5264 + ins_pipe(pipe_class_memory); 1.5265 +%} 1.5266 + 1.5267 +// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B. 1.5268 +instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{ 1.5269 + match(Set dst (LoadB mem)); 1.5270 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5271 + ins_cost(MEMORY_REF_COST + DEFAULT_COST); 1.5272 + 1.5273 + expand %{ 1.5274 + iRegIdst tmp; 1.5275 + loadUB_indOffset16(tmp, mem); 1.5276 + convB2I_reg_2(dst, tmp); 1.5277 + %} 1.5278 +%} 1.5279 + 1.5280 +instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{ 1.5281 + match(Set dst (LoadB mem)); 1.5282 + ins_cost(3*MEMORY_REF_COST + DEFAULT_COST); 1.5283 + 1.5284 + expand %{ 1.5285 + iRegIdst tmp; 1.5286 + loadUB_indOffset16_ac(tmp, mem); 1.5287 + convB2I_reg_2(dst, tmp); 1.5288 + %} 1.5289 +%} 1.5290 + 1.5291 +// Load Unsigned Byte (8bit UNsigned) into an int reg. 1.5292 +instruct loadUB(iRegIdst dst, memory mem) %{ 1.5293 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5294 + match(Set dst (LoadUB mem)); 1.5295 + ins_cost(MEMORY_REF_COST); 1.5296 + 1.5297 + format %{ "LBZ $dst, $mem \t// byte, zero-extend to int" %} 1.5298 + size(4); 1.5299 + ins_encode( enc_lbz(dst, mem) ); 1.5300 + ins_pipe(pipe_class_memory); 1.5301 +%} 1.5302 + 1.5303 +// Load Unsigned Byte (8bit UNsigned) acquire. 1.5304 +instruct loadUB_ac(iRegIdst dst, memory mem) %{ 1.5305 + match(Set dst (LoadUB mem)); 1.5306 + ins_cost(3*MEMORY_REF_COST); 1.5307 + 1.5308 + format %{ "LBZ $dst, $mem \t// byte, zero-extend to int, acquire\n\t" 1.5309 + "TWI $dst\n\t" 1.5310 + "ISYNC" %} 1.5311 + size(12); 1.5312 + ins_encode( enc_lbz_ac(dst, mem) ); 1.5313 + ins_pipe(pipe_class_memory); 1.5314 +%} 1.5315 + 1.5316 +// Load Unsigned Byte (8bit UNsigned) into a Long Register. 1.5317 +instruct loadUB2L(iRegLdst dst, memory mem) %{ 1.5318 + match(Set dst (ConvI2L (LoadUB mem))); 1.5319 + predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf)); 1.5320 + ins_cost(MEMORY_REF_COST); 1.5321 + 1.5322 + format %{ "LBZ $dst, $mem \t// byte, zero-extend to long" %} 1.5323 + size(4); 1.5324 + ins_encode( enc_lbz(dst, mem) ); 1.5325 + ins_pipe(pipe_class_memory); 1.5326 +%} 1.5327 + 1.5328 +instruct loadUB2L_ac(iRegLdst dst, memory mem) %{ 1.5329 + match(Set dst (ConvI2L (LoadUB mem))); 1.5330 + ins_cost(3*MEMORY_REF_COST); 1.5331 + 1.5332 + format %{ "LBZ $dst, $mem \t// byte, zero-extend to long, acquire\n\t" 1.5333 + "TWI $dst\n\t" 1.5334 + "ISYNC" %} 1.5335 + size(12); 1.5336 + ins_encode( enc_lbz_ac(dst, mem) ); 1.5337 + ins_pipe(pipe_class_memory); 1.5338 +%} 1.5339 + 1.5340 +// Load Short (16bit signed) 1.5341 +instruct loadS(iRegIdst dst, memory mem) %{ 1.5342 + match(Set dst (LoadS mem)); 1.5343 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5344 + ins_cost(MEMORY_REF_COST); 1.5345 + 1.5346 + format %{ "LHA $dst, $mem" %} 1.5347 + size(4); 1.5348 + ins_encode %{ 1.5349 + // TODO: PPC port $archOpcode(ppc64Opcode_lha); 1.5350 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.5351 + __ lha($dst$$Register, Idisp, $mem$$base$$Register); 1.5352 + %} 1.5353 + ins_pipe(pipe_class_memory); 1.5354 +%} 1.5355 + 1.5356 +// Load Short (16bit signed) acquire. 1.5357 +instruct loadS_ac(iRegIdst dst, memory mem) %{ 1.5358 + match(Set dst (LoadS mem)); 1.5359 + ins_cost(3*MEMORY_REF_COST); 1.5360 + 1.5361 + format %{ "LHA $dst, $mem\t acquire\n\t" 1.5362 + "TWI $dst\n\t" 1.5363 + "ISYNC" %} 1.5364 + size(12); 1.5365 + ins_encode %{ 1.5366 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.5367 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.5368 + __ lha($dst$$Register, Idisp, $mem$$base$$Register); 1.5369 + __ twi_0($dst$$Register); 1.5370 + __ isync(); 1.5371 + %} 1.5372 + ins_pipe(pipe_class_memory); 1.5373 +%} 1.5374 + 1.5375 +// Load Char (16bit unsigned) 1.5376 +instruct loadUS(iRegIdst dst, memory mem) %{ 1.5377 + match(Set dst (LoadUS mem)); 1.5378 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5379 + ins_cost(MEMORY_REF_COST); 1.5380 + 1.5381 + format %{ "LHZ $dst, $mem" %} 1.5382 + size(4); 1.5383 + ins_encode( enc_lhz(dst, mem) ); 1.5384 + ins_pipe(pipe_class_memory); 1.5385 +%} 1.5386 + 1.5387 +// Load Char (16bit unsigned) acquire. 1.5388 +instruct loadUS_ac(iRegIdst dst, memory mem) %{ 1.5389 + match(Set dst (LoadUS mem)); 1.5390 + ins_cost(3*MEMORY_REF_COST); 1.5391 + 1.5392 + format %{ "LHZ $dst, $mem \t// acquire\n\t" 1.5393 + "TWI $dst\n\t" 1.5394 + "ISYNC" %} 1.5395 + size(12); 1.5396 + ins_encode( enc_lhz_ac(dst, mem) ); 1.5397 + ins_pipe(pipe_class_memory); 1.5398 +%} 1.5399 + 1.5400 +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register. 1.5401 +instruct loadUS2L(iRegLdst dst, memory mem) %{ 1.5402 + match(Set dst (ConvI2L (LoadUS mem))); 1.5403 + predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf)); 1.5404 + ins_cost(MEMORY_REF_COST); 1.5405 + 1.5406 + format %{ "LHZ $dst, $mem \t// short, zero-extend to long" %} 1.5407 + size(4); 1.5408 + ins_encode( enc_lhz(dst, mem) ); 1.5409 + ins_pipe(pipe_class_memory); 1.5410 +%} 1.5411 + 1.5412 +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire. 1.5413 +instruct loadUS2L_ac(iRegLdst dst, memory mem) %{ 1.5414 + match(Set dst (ConvI2L (LoadUS mem))); 1.5415 + ins_cost(3*MEMORY_REF_COST); 1.5416 + 1.5417 + format %{ "LHZ $dst, $mem \t// short, zero-extend to long, acquire\n\t" 1.5418 + "TWI $dst\n\t" 1.5419 + "ISYNC" %} 1.5420 + size(12); 1.5421 + ins_encode( enc_lhz_ac(dst, mem) ); 1.5422 + ins_pipe(pipe_class_memory); 1.5423 +%} 1.5424 + 1.5425 +// Load Integer. 1.5426 +instruct loadI(iRegIdst dst, memory mem) %{ 1.5427 + match(Set dst (LoadI mem)); 1.5428 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5429 + ins_cost(MEMORY_REF_COST); 1.5430 + 1.5431 + format %{ "LWZ $dst, $mem" %} 1.5432 + size(4); 1.5433 + ins_encode( enc_lwz(dst, mem) ); 1.5434 + ins_pipe(pipe_class_memory); 1.5435 +%} 1.5436 + 1.5437 +// Load Integer acquire. 1.5438 +instruct loadI_ac(iRegIdst dst, memory mem) %{ 1.5439 + match(Set dst (LoadI mem)); 1.5440 + ins_cost(3*MEMORY_REF_COST); 1.5441 + 1.5442 + format %{ "LWZ $dst, $mem \t// load acquire\n\t" 1.5443 + "TWI $dst\n\t" 1.5444 + "ISYNC" %} 1.5445 + size(12); 1.5446 + ins_encode( enc_lwz_ac(dst, mem) ); 1.5447 + ins_pipe(pipe_class_memory); 1.5448 +%} 1.5449 + 1.5450 +// Match loading integer and casting it to unsigned int in 1.5451 +// long register. 1.5452 +// LoadI + ConvI2L + AndL 0xffffffff. 1.5453 +instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{ 1.5454 + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 1.5455 + predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered()); 1.5456 + ins_cost(MEMORY_REF_COST); 1.5457 + 1.5458 + format %{ "LWZ $dst, $mem \t// zero-extend to long" %} 1.5459 + size(4); 1.5460 + ins_encode( enc_lwz(dst, mem) ); 1.5461 + ins_pipe(pipe_class_memory); 1.5462 +%} 1.5463 + 1.5464 +// Match loading integer and casting it to long. 1.5465 +instruct loadI2L(iRegLdst dst, memory mem) %{ 1.5466 + match(Set dst (ConvI2L (LoadI mem))); 1.5467 + predicate(_kids[0]->_leaf->as_Load()->is_unordered()); 1.5468 + ins_cost(MEMORY_REF_COST); 1.5469 + 1.5470 + format %{ "LWA $dst, $mem \t// loadI2L" %} 1.5471 + size(4); 1.5472 + ins_encode %{ 1.5473 + // TODO: PPC port $archOpcode(ppc64Opcode_lwa); 1.5474 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.5475 + __ lwa($dst$$Register, Idisp, $mem$$base$$Register); 1.5476 + %} 1.5477 + ins_pipe(pipe_class_memory); 1.5478 +%} 1.5479 + 1.5480 +// Match loading integer and casting it to long - acquire. 1.5481 +instruct loadI2L_ac(iRegLdst dst, memory mem) %{ 1.5482 + match(Set dst (ConvI2L (LoadI mem))); 1.5483 + ins_cost(3*MEMORY_REF_COST); 1.5484 + 1.5485 + format %{ "LWA $dst, $mem \t// loadI2L acquire" 1.5486 + "TWI $dst\n\t" 1.5487 + "ISYNC" %} 1.5488 + size(12); 1.5489 + ins_encode %{ 1.5490 + // TODO: PPC port $archOpcode(ppc64Opcode_lwa); 1.5491 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.5492 + __ lwa($dst$$Register, Idisp, $mem$$base$$Register); 1.5493 + __ twi_0($dst$$Register); 1.5494 + __ isync(); 1.5495 + %} 1.5496 + ins_pipe(pipe_class_memory); 1.5497 +%} 1.5498 + 1.5499 +// Load Long - aligned 1.5500 +instruct loadL(iRegLdst dst, memoryAlg4 mem) %{ 1.5501 + match(Set dst (LoadL mem)); 1.5502 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5503 + ins_cost(MEMORY_REF_COST); 1.5504 + 1.5505 + format %{ "LD $dst, $mem \t// long" %} 1.5506 + size(4); 1.5507 + ins_encode( enc_ld(dst, mem) ); 1.5508 + ins_pipe(pipe_class_memory); 1.5509 +%} 1.5510 + 1.5511 +// Load Long - aligned acquire. 1.5512 +instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{ 1.5513 + match(Set dst (LoadL mem)); 1.5514 + ins_cost(3*MEMORY_REF_COST); 1.5515 + 1.5516 + format %{ "LD $dst, $mem \t// long acquire\n\t" 1.5517 + "TWI $dst\n\t" 1.5518 + "ISYNC" %} 1.5519 + size(12); 1.5520 + ins_encode( enc_ld_ac(dst, mem) ); 1.5521 + ins_pipe(pipe_class_memory); 1.5522 +%} 1.5523 + 1.5524 +// Load Long - UNaligned 1.5525 +instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{ 1.5526 + match(Set dst (LoadL_unaligned mem)); 1.5527 + // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense). 1.5528 + ins_cost(MEMORY_REF_COST); 1.5529 + 1.5530 + format %{ "LD $dst, $mem \t// unaligned long" %} 1.5531 + size(4); 1.5532 + ins_encode( enc_ld(dst, mem) ); 1.5533 + ins_pipe(pipe_class_memory); 1.5534 +%} 1.5535 + 1.5536 +// Load nodes for superwords 1.5537 + 1.5538 +// Load Aligned Packed Byte 1.5539 +instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{ 1.5540 + predicate(n->as_LoadVector()->memory_size() == 8); 1.5541 + match(Set dst (LoadVector mem)); 1.5542 + ins_cost(MEMORY_REF_COST); 1.5543 + 1.5544 + format %{ "LD $dst, $mem \t// load 8-byte Vector" %} 1.5545 + size(4); 1.5546 + ins_encode( enc_ld(dst, mem) ); 1.5547 + ins_pipe(pipe_class_memory); 1.5548 +%} 1.5549 + 1.5550 +// Load Range, range = array length (=jint) 1.5551 +instruct loadRange(iRegIdst dst, memory mem) %{ 1.5552 + match(Set dst (LoadRange mem)); 1.5553 + ins_cost(MEMORY_REF_COST); 1.5554 + 1.5555 + format %{ "LWZ $dst, $mem \t// range" %} 1.5556 + size(4); 1.5557 + ins_encode( enc_lwz(dst, mem) ); 1.5558 + ins_pipe(pipe_class_memory); 1.5559 +%} 1.5560 + 1.5561 +// Load Compressed Pointer 1.5562 +instruct loadN(iRegNdst dst, memory mem) %{ 1.5563 + match(Set dst (LoadN mem)); 1.5564 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5565 + ins_cost(MEMORY_REF_COST); 1.5566 + 1.5567 + format %{ "LWZ $dst, $mem \t// load compressed ptr" %} 1.5568 + size(4); 1.5569 + ins_encode( enc_lwz(dst, mem) ); 1.5570 + ins_pipe(pipe_class_memory); 1.5571 +%} 1.5572 + 1.5573 +// Load Compressed Pointer acquire. 1.5574 +instruct loadN_ac(iRegNdst dst, memory mem) %{ 1.5575 + match(Set dst (LoadN mem)); 1.5576 + ins_cost(3*MEMORY_REF_COST); 1.5577 + 1.5578 + format %{ "LWZ $dst, $mem \t// load acquire compressed ptr\n\t" 1.5579 + "TWI $dst\n\t" 1.5580 + "ISYNC" %} 1.5581 + size(12); 1.5582 + ins_encode( enc_lwz_ac(dst, mem) ); 1.5583 + ins_pipe(pipe_class_memory); 1.5584 +%} 1.5585 + 1.5586 +// Load Compressed Pointer and decode it if narrow_oop_shift == 0. 1.5587 +instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{ 1.5588 + match(Set dst (DecodeN (LoadN mem))); 1.5589 + predicate(_kids[0]->_leaf->as_Load()->is_unordered() && Universe::narrow_oop_shift() == 0); 1.5590 + ins_cost(MEMORY_REF_COST); 1.5591 + 1.5592 + format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %} 1.5593 + size(4); 1.5594 + ins_encode( enc_lwz(dst, mem) ); 1.5595 + ins_pipe(pipe_class_memory); 1.5596 +%} 1.5597 + 1.5598 +// Load Pointer 1.5599 +instruct loadP(iRegPdst dst, memoryAlg4 mem) %{ 1.5600 + match(Set dst (LoadP mem)); 1.5601 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5602 + ins_cost(MEMORY_REF_COST); 1.5603 + 1.5604 + format %{ "LD $dst, $mem \t// ptr" %} 1.5605 + size(4); 1.5606 + ins_encode( enc_ld(dst, mem) ); 1.5607 + ins_pipe(pipe_class_memory); 1.5608 +%} 1.5609 + 1.5610 +// Load Pointer acquire. 1.5611 +instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{ 1.5612 + match(Set dst (LoadP mem)); 1.5613 + ins_cost(3*MEMORY_REF_COST); 1.5614 + 1.5615 + format %{ "LD $dst, $mem \t// ptr acquire\n\t" 1.5616 + "TWI $dst\n\t" 1.5617 + "ISYNC" %} 1.5618 + size(12); 1.5619 + ins_encode( enc_ld_ac(dst, mem) ); 1.5620 + ins_pipe(pipe_class_memory); 1.5621 +%} 1.5622 + 1.5623 +// LoadP + CastP2L 1.5624 +instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{ 1.5625 + match(Set dst (CastP2X (LoadP mem))); 1.5626 + predicate(_kids[0]->_leaf->as_Load()->is_unordered()); 1.5627 + ins_cost(MEMORY_REF_COST); 1.5628 + 1.5629 + format %{ "LD $dst, $mem \t// ptr + p2x" %} 1.5630 + size(4); 1.5631 + ins_encode( enc_ld(dst, mem) ); 1.5632 + ins_pipe(pipe_class_memory); 1.5633 +%} 1.5634 + 1.5635 +// Load compressed klass pointer. 1.5636 +instruct loadNKlass(iRegNdst dst, memory mem) %{ 1.5637 + match(Set dst (LoadNKlass mem)); 1.5638 + ins_cost(MEMORY_REF_COST); 1.5639 + 1.5640 + format %{ "LWZ $dst, $mem \t// compressed klass ptr" %} 1.5641 + size(4); 1.5642 + ins_encode( enc_lwz(dst, mem) ); 1.5643 + ins_pipe(pipe_class_memory); 1.5644 +%} 1.5645 + 1.5646 +//// Load compressed klass and decode it if narrow_klass_shift == 0. 1.5647 +//// TODO: will narrow_klass_shift ever be 0? 1.5648 +//instruct decodeNKlass2Klass(iRegPdst dst, memory mem) %{ 1.5649 +// match(Set dst (DecodeNKlass (LoadNKlass mem))); 1.5650 +// predicate(false /* TODO: PPC port Universe::narrow_klass_shift() == 0*); 1.5651 +// ins_cost(MEMORY_REF_COST); 1.5652 +// 1.5653 +// format %{ "LWZ $dst, $mem \t// DecodeNKlass (unscaled)" %} 1.5654 +// size(4); 1.5655 +// ins_encode( enc_lwz(dst, mem) ); 1.5656 +// ins_pipe(pipe_class_memory); 1.5657 +//%} 1.5658 + 1.5659 +// Load Klass Pointer 1.5660 +instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{ 1.5661 + match(Set dst (LoadKlass mem)); 1.5662 + ins_cost(MEMORY_REF_COST); 1.5663 + 1.5664 + format %{ "LD $dst, $mem \t// klass ptr" %} 1.5665 + size(4); 1.5666 + ins_encode( enc_ld(dst, mem) ); 1.5667 + ins_pipe(pipe_class_memory); 1.5668 +%} 1.5669 + 1.5670 +// Load Float 1.5671 +instruct loadF(regF dst, memory mem) %{ 1.5672 + match(Set dst (LoadF mem)); 1.5673 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5674 + ins_cost(MEMORY_REF_COST); 1.5675 + 1.5676 + format %{ "LFS $dst, $mem" %} 1.5677 + size(4); 1.5678 + ins_encode %{ 1.5679 + // TODO: PPC port $archOpcode(ppc64Opcode_lfs); 1.5680 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.5681 + __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register); 1.5682 + %} 1.5683 + ins_pipe(pipe_class_memory); 1.5684 +%} 1.5685 + 1.5686 +// Load Float acquire. 1.5687 +instruct loadF_ac(regF dst, memory mem) %{ 1.5688 + match(Set dst (LoadF mem)); 1.5689 + ins_cost(3*MEMORY_REF_COST); 1.5690 + 1.5691 + format %{ "LFS $dst, $mem \t// acquire\n\t" 1.5692 + "FCMPU cr0, $dst, $dst\n\t" 1.5693 + "BNE cr0, next\n" 1.5694 + "next:\n\t" 1.5695 + "ISYNC" %} 1.5696 + size(16); 1.5697 + ins_encode %{ 1.5698 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.5699 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.5700 + Label next; 1.5701 + __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register); 1.5702 + __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister); 1.5703 + __ bne(CCR0, next); 1.5704 + __ bind(next); 1.5705 + __ isync(); 1.5706 + %} 1.5707 + ins_pipe(pipe_class_memory); 1.5708 +%} 1.5709 + 1.5710 +// Load Double - aligned 1.5711 +instruct loadD(regD dst, memory mem) %{ 1.5712 + match(Set dst (LoadD mem)); 1.5713 + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); 1.5714 + ins_cost(MEMORY_REF_COST); 1.5715 + 1.5716 + format %{ "LFD $dst, $mem" %} 1.5717 + size(4); 1.5718 + ins_encode( enc_lfd(dst, mem) ); 1.5719 + ins_pipe(pipe_class_memory); 1.5720 +%} 1.5721 + 1.5722 +// Load Double - aligned acquire. 1.5723 +instruct loadD_ac(regD dst, memory mem) %{ 1.5724 + match(Set dst (LoadD mem)); 1.5725 + ins_cost(3*MEMORY_REF_COST); 1.5726 + 1.5727 + format %{ "LFD $dst, $mem \t// acquire\n\t" 1.5728 + "FCMPU cr0, $dst, $dst\n\t" 1.5729 + "BNE cr0, next\n" 1.5730 + "next:\n\t" 1.5731 + "ISYNC" %} 1.5732 + size(16); 1.5733 + ins_encode %{ 1.5734 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.5735 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.5736 + Label next; 1.5737 + __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register); 1.5738 + __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister); 1.5739 + __ bne(CCR0, next); 1.5740 + __ bind(next); 1.5741 + __ isync(); 1.5742 + %} 1.5743 + ins_pipe(pipe_class_memory); 1.5744 +%} 1.5745 + 1.5746 +// Load Double - UNaligned 1.5747 +instruct loadD_unaligned(regD dst, memory mem) %{ 1.5748 + match(Set dst (LoadD_unaligned mem)); 1.5749 + // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense). 1.5750 + ins_cost(MEMORY_REF_COST); 1.5751 + 1.5752 + format %{ "LFD $dst, $mem" %} 1.5753 + size(4); 1.5754 + ins_encode( enc_lfd(dst, mem) ); 1.5755 + ins_pipe(pipe_class_memory); 1.5756 +%} 1.5757 + 1.5758 +//----------Constants-------------------------------------------------------- 1.5759 + 1.5760 +// Load MachConstantTableBase: add hi offset to global toc. 1.5761 +// TODO: Handle hidden register r29 in bundler! 1.5762 +instruct loadToc_hi(iRegLdst dst) %{ 1.5763 + effect(DEF dst); 1.5764 + ins_cost(DEFAULT_COST); 1.5765 + 1.5766 + format %{ "ADDIS $dst, R29, DISP.hi \t// load TOC hi" %} 1.5767 + size(4); 1.5768 + ins_encode %{ 1.5769 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.5770 + __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc()); 1.5771 + %} 1.5772 + ins_pipe(pipe_class_default); 1.5773 +%} 1.5774 + 1.5775 +// Load MachConstantTableBase: add lo offset to global toc. 1.5776 +instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{ 1.5777 + effect(DEF dst, USE src); 1.5778 + ins_cost(DEFAULT_COST); 1.5779 + 1.5780 + format %{ "ADDI $dst, $src, DISP.lo \t// load TOC lo" %} 1.5781 + size(4); 1.5782 + ins_encode %{ 1.5783 + // TODO: PPC port $archOpcode(ppc64Opcode_ori); 1.5784 + __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc()); 1.5785 + %} 1.5786 + ins_pipe(pipe_class_default); 1.5787 +%} 1.5788 + 1.5789 +// Load 16-bit integer constant 0xssss???? 1.5790 +instruct loadConI16(iRegIdst dst, immI16 src) %{ 1.5791 + match(Set dst src); 1.5792 + 1.5793 + format %{ "LI $dst, $src" %} 1.5794 + size(4); 1.5795 + ins_encode %{ 1.5796 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.5797 + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); 1.5798 + %} 1.5799 + ins_pipe(pipe_class_default); 1.5800 +%} 1.5801 + 1.5802 +// Load integer constant 0x????0000 1.5803 +instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{ 1.5804 + match(Set dst src); 1.5805 + ins_cost(DEFAULT_COST); 1.5806 + 1.5807 + format %{ "LIS $dst, $src.hi" %} 1.5808 + size(4); 1.5809 + ins_encode %{ 1.5810 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.5811 + // Lis sign extends 16-bit src then shifts it 16 bit to the left. 1.5812 + __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16))); 1.5813 + %} 1.5814 + ins_pipe(pipe_class_default); 1.5815 +%} 1.5816 + 1.5817 +// Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted 1.5818 +// and sign extended), this adds the low 16 bits. 1.5819 +instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ 1.5820 + // no match-rule, false predicate 1.5821 + effect(DEF dst, USE src1, USE src2); 1.5822 + predicate(false); 1.5823 + 1.5824 + format %{ "ORI $dst, $src1.hi, $src2.lo" %} 1.5825 + size(4); 1.5826 + ins_encode %{ 1.5827 + // TODO: PPC port $archOpcode(ppc64Opcode_ori); 1.5828 + __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF); 1.5829 + %} 1.5830 + ins_pipe(pipe_class_default); 1.5831 +%} 1.5832 + 1.5833 +instruct loadConI_Ex(iRegIdst dst, immI src) %{ 1.5834 + match(Set dst src); 1.5835 + ins_cost(DEFAULT_COST*2); 1.5836 + 1.5837 + expand %{ 1.5838 + // Would like to use $src$$constant. 1.5839 + immI16 srcLo %{ _opnds[1]->constant() %} 1.5840 + // srcHi can be 0000 if srcLo sign-extends to a negative number. 1.5841 + immIhi16 srcHi %{ _opnds[1]->constant() %} 1.5842 + iRegIdst tmpI; 1.5843 + loadConIhi16(tmpI, srcHi); 1.5844 + loadConI32_lo16(dst, tmpI, srcLo); 1.5845 + %} 1.5846 +%} 1.5847 + 1.5848 +// No constant pool entries required. 1.5849 +instruct loadConL16(iRegLdst dst, immL16 src) %{ 1.5850 + match(Set dst src); 1.5851 + 1.5852 + format %{ "LI $dst, $src \t// long" %} 1.5853 + size(4); 1.5854 + ins_encode %{ 1.5855 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.5856 + __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF))); 1.5857 + %} 1.5858 + ins_pipe(pipe_class_default); 1.5859 +%} 1.5860 + 1.5861 +// Load long constant 0xssssssss????0000 1.5862 +instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{ 1.5863 + match(Set dst src); 1.5864 + ins_cost(DEFAULT_COST); 1.5865 + 1.5866 + format %{ "LIS $dst, $src.hi \t// long" %} 1.5867 + size(4); 1.5868 + ins_encode %{ 1.5869 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.5870 + __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16))); 1.5871 + %} 1.5872 + ins_pipe(pipe_class_default); 1.5873 +%} 1.5874 + 1.5875 +// To load a 32 bit constant: merge lower 16 bits into already loaded 1.5876 +// high 16 bits. 1.5877 +instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ 1.5878 + // no match-rule, false predicate 1.5879 + effect(DEF dst, USE src1, USE src2); 1.5880 + predicate(false); 1.5881 + 1.5882 + format %{ "ORI $dst, $src1, $src2.lo" %} 1.5883 + size(4); 1.5884 + ins_encode %{ 1.5885 + // TODO: PPC port $archOpcode(ppc64Opcode_ori); 1.5886 + __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF); 1.5887 + %} 1.5888 + ins_pipe(pipe_class_default); 1.5889 +%} 1.5890 + 1.5891 +// Load 32-bit long constant 1.5892 +instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{ 1.5893 + match(Set dst src); 1.5894 + ins_cost(DEFAULT_COST*2); 1.5895 + 1.5896 + expand %{ 1.5897 + // Would like to use $src$$constant. 1.5898 + immL16 srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%} 1.5899 + // srcHi can be 0000 if srcLo sign-extends to a negative number. 1.5900 + immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%} 1.5901 + iRegLdst tmpL; 1.5902 + loadConL32hi16(tmpL, srcHi); 1.5903 + loadConL32_lo16(dst, tmpL, srcLo); 1.5904 + %} 1.5905 +%} 1.5906 + 1.5907 +// Load long constant 0x????000000000000. 1.5908 +instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{ 1.5909 + match(Set dst src); 1.5910 + ins_cost(DEFAULT_COST); 1.5911 + 1.5912 + expand %{ 1.5913 + immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%} 1.5914 + immI shift32 %{ 32 %} 1.5915 + iRegLdst tmpL; 1.5916 + loadConL32hi16(tmpL, srcHi); 1.5917 + lshiftL_regL_immI(dst, tmpL, shift32); 1.5918 + %} 1.5919 +%} 1.5920 + 1.5921 +// Expand node for constant pool load: small offset. 1.5922 +instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{ 1.5923 + effect(DEF dst, USE src, USE toc); 1.5924 + ins_cost(MEMORY_REF_COST); 1.5925 + 1.5926 + ins_num_consts(1); 1.5927 + // Needed so that CallDynamicJavaDirect can compute the address of this 1.5928 + // instruction for relocation. 1.5929 + ins_field_cbuf_insts_offset(int); 1.5930 + 1.5931 + format %{ "LD $dst, offset, $toc \t// load long $src from TOC" %} 1.5932 + size(4); 1.5933 + ins_encode( enc_load_long_constL(dst, src, toc) ); 1.5934 + ins_pipe(pipe_class_memory); 1.5935 +%} 1.5936 + 1.5937 +// Expand node for constant pool load: large offset. 1.5938 +instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{ 1.5939 + effect(DEF dst, USE src, USE toc); 1.5940 + predicate(false); 1.5941 + 1.5942 + ins_num_consts(1); 1.5943 + ins_field_const_toc_offset(int); 1.5944 + // Needed so that CallDynamicJavaDirect can compute the address of this 1.5945 + // instruction for relocation. 1.5946 + ins_field_cbuf_insts_offset(int); 1.5947 + 1.5948 + format %{ "ADDIS $dst, $toc, offset \t// load long $src from TOC (hi)" %} 1.5949 + size(4); 1.5950 + ins_encode( enc_load_long_constL_hi(dst, toc, src) ); 1.5951 + ins_pipe(pipe_class_default); 1.5952 +%} 1.5953 + 1.5954 +// Expand node for constant pool load: large offset. 1.5955 +// No constant pool entries required. 1.5956 +instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{ 1.5957 + effect(DEF dst, USE src, USE base); 1.5958 + predicate(false); 1.5959 + 1.5960 + ins_field_const_toc_offset_hi_node(loadConL_hiNode*); 1.5961 + 1.5962 + format %{ "LD $dst, offset, $base \t// load long $src from TOC (lo)" %} 1.5963 + size(4); 1.5964 + ins_encode %{ 1.5965 + // TODO: PPC port $archOpcode(ppc64Opcode_ld); 1.5966 + int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset; 1.5967 + __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register); 1.5968 + %} 1.5969 + ins_pipe(pipe_class_memory); 1.5970 +%} 1.5971 + 1.5972 +// Load long constant from constant table. Expand in case of 1.5973 +// offset > 16 bit is needed. 1.5974 +// Adlc adds toc node MachConstantTableBase. 1.5975 +instruct loadConL_Ex(iRegLdst dst, immL src) %{ 1.5976 + match(Set dst src); 1.5977 + ins_cost(MEMORY_REF_COST); 1.5978 + 1.5979 + format %{ "LD $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %} 1.5980 + // We can not inline the enc_class for the expand as that does not support constanttablebase. 1.5981 + postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) ); 1.5982 +%} 1.5983 + 1.5984 +// Load NULL as compressed oop. 1.5985 +instruct loadConN0(iRegNdst dst, immN_0 src) %{ 1.5986 + match(Set dst src); 1.5987 + ins_cost(DEFAULT_COST); 1.5988 + 1.5989 + format %{ "LI $dst, $src \t// compressed ptr" %} 1.5990 + size(4); 1.5991 + ins_encode %{ 1.5992 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.5993 + __ li($dst$$Register, 0); 1.5994 + %} 1.5995 + ins_pipe(pipe_class_default); 1.5996 +%} 1.5997 + 1.5998 +// Load hi part of compressed oop constant. 1.5999 +instruct loadConN_hi(iRegNdst dst, immN src) %{ 1.6000 + effect(DEF dst, USE src); 1.6001 + ins_cost(DEFAULT_COST); 1.6002 + 1.6003 + format %{ "LIS $dst, $src \t// narrow oop hi" %} 1.6004 + size(4); 1.6005 + ins_encode %{ 1.6006 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.6007 + __ lis($dst$$Register, (int)(short)(($src$$constant >> 16) & 0xffff)); 1.6008 + %} 1.6009 + ins_pipe(pipe_class_default); 1.6010 +%} 1.6011 + 1.6012 +// Add lo part of compressed oop constant to already loaded hi part. 1.6013 +instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{ 1.6014 + effect(DEF dst, USE src1, USE src2); 1.6015 + ins_cost(DEFAULT_COST); 1.6016 + 1.6017 + format %{ "ORI $dst, $src1, $src2 \t// narrow oop lo" %} 1.6018 + size(4); 1.6019 + ins_encode %{ 1.6020 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.6021 + assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1.6022 + int oop_index = __ oop_recorder()->find_index((jobject)$src2$$constant); 1.6023 + RelocationHolder rspec = oop_Relocation::spec(oop_index); 1.6024 + __ relocate(rspec, 1); 1.6025 + __ ori($dst$$Register, $src1$$Register, $src2$$constant & 0xffff); 1.6026 + %} 1.6027 + ins_pipe(pipe_class_default); 1.6028 +%} 1.6029 + 1.6030 +// Needed to postalloc expand loadConN: ConN is loaded as ConI 1.6031 +// leaving the upper 32 bits with sign-extension bits. 1.6032 +// This clears these bits: dst = src & 0xFFFFFFFF. 1.6033 +// TODO: Eventually call this maskN_regN_FFFFFFFF. 1.6034 +instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{ 1.6035 + effect(DEF dst, USE src); 1.6036 + predicate(false); 1.6037 + 1.6038 + format %{ "MASK $dst, $src, 0xFFFFFFFF" %} // mask 1.6039 + size(4); 1.6040 + ins_encode %{ 1.6041 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.6042 + __ clrldi($dst$$Register, $src$$Register, 0x20); 1.6043 + %} 1.6044 + ins_pipe(pipe_class_default); 1.6045 +%} 1.6046 + 1.6047 +// Loading ConN must be postalloc expanded so that edges between 1.6048 +// the nodes are safe. They may not interfere with a safepoint. 1.6049 +// GL TODO: This needs three instructions: better put this into the constant pool. 1.6050 +instruct loadConN_Ex(iRegNdst dst, immN src) %{ 1.6051 + match(Set dst src); 1.6052 + ins_cost(DEFAULT_COST*2); 1.6053 + 1.6054 + format %{ "LoadN $dst, $src \t// postalloc expanded" %} // mask 1.6055 + postalloc_expand %{ 1.6056 + MachNode *m1 = new (C) loadConN_hiNode(); 1.6057 + MachNode *m2 = new (C) loadConN_loNode(); 1.6058 + MachNode *m3 = new (C) clearMs32bNode(); 1.6059 + m1->add_req(NULL); 1.6060 + m2->add_req(NULL, m1); 1.6061 + m3->add_req(NULL, m2); 1.6062 + m1->_opnds[0] = op_dst; 1.6063 + m1->_opnds[1] = op_src; 1.6064 + m2->_opnds[0] = op_dst; 1.6065 + m2->_opnds[1] = op_dst; 1.6066 + m2->_opnds[2] = op_src; 1.6067 + m3->_opnds[0] = op_dst; 1.6068 + m3->_opnds[1] = op_dst; 1.6069 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.6070 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.6071 + ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.6072 + nodes->push(m1); 1.6073 + nodes->push(m2); 1.6074 + nodes->push(m3); 1.6075 + %} 1.6076 +%} 1.6077 + 1.6078 +instruct loadConNKlass_hi(iRegNdst dst, immNKlass src) %{ 1.6079 + effect(DEF dst, USE src); 1.6080 + ins_cost(DEFAULT_COST); 1.6081 + 1.6082 + format %{ "LIS $dst, $src \t// narrow oop hi" %} 1.6083 + size(4); 1.6084 + ins_encode %{ 1.6085 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.6086 + intptr_t Csrc = Klass::encode_klass((Klass *)$src$$constant); 1.6087 + __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff)); 1.6088 + %} 1.6089 + ins_pipe(pipe_class_default); 1.6090 +%} 1.6091 + 1.6092 +// This needs a match rule so that build_oop_map knows this is 1.6093 +// not a narrow oop. 1.6094 +instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{ 1.6095 + match(Set dst src1); 1.6096 + effect(TEMP src2); 1.6097 + ins_cost(DEFAULT_COST); 1.6098 + 1.6099 + format %{ "ADDI $dst, $src1, $src2 \t// narrow oop lo" %} 1.6100 + size(4); 1.6101 + ins_encode %{ 1.6102 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.6103 + intptr_t Csrc = Klass::encode_klass((Klass *)$src1$$constant); 1.6104 + assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1.6105 + int klass_index = __ oop_recorder()->find_index((Klass *)$src1$$constant); 1.6106 + RelocationHolder rspec = metadata_Relocation::spec(klass_index); 1.6107 + 1.6108 + __ relocate(rspec, 1); 1.6109 + __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff); 1.6110 + %} 1.6111 + ins_pipe(pipe_class_default); 1.6112 +%} 1.6113 + 1.6114 +// Loading ConNKlass must be postalloc expanded so that edges between 1.6115 +// the nodes are safe. They may not interfere with a safepoint. 1.6116 +instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{ 1.6117 + match(Set dst src); 1.6118 + ins_cost(DEFAULT_COST*2); 1.6119 + 1.6120 + format %{ "LoadN $dst, $src \t// postalloc expanded" %} // mask 1.6121 + postalloc_expand %{ 1.6122 + // Load high bits into register. Sign extended. 1.6123 + MachNode *m1 = new (C) loadConNKlass_hiNode(); 1.6124 + m1->add_req(NULL); 1.6125 + m1->_opnds[0] = op_dst; 1.6126 + m1->_opnds[1] = op_src; 1.6127 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.6128 + nodes->push(m1); 1.6129 + 1.6130 + MachNode *m2 = m1; 1.6131 + if (!Assembler::is_uimm((jlong)Klass::encode_klass((Klass *)op_src->constant()), 31)) { 1.6132 + // Value might be 1-extended. Mask out these bits. 1.6133 + m2 = new (C) clearMs32bNode(); 1.6134 + m2->add_req(NULL, m1); 1.6135 + m2->_opnds[0] = op_dst; 1.6136 + m2->_opnds[1] = op_dst; 1.6137 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.6138 + nodes->push(m2); 1.6139 + } 1.6140 + 1.6141 + MachNode *m3 = new (C) loadConNKlass_loNode(); 1.6142 + m3->add_req(NULL, m2); 1.6143 + m3->_opnds[0] = op_dst; 1.6144 + m3->_opnds[1] = op_src; 1.6145 + m3->_opnds[2] = op_dst; 1.6146 + ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.6147 + nodes->push(m3); 1.6148 + %} 1.6149 +%} 1.6150 + 1.6151 +// 0x1 is used in object initialization (initial object header). 1.6152 +// No constant pool entries required. 1.6153 +instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{ 1.6154 + match(Set dst src); 1.6155 + 1.6156 + format %{ "LI $dst, $src \t// ptr" %} 1.6157 + size(4); 1.6158 + ins_encode %{ 1.6159 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.6160 + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); 1.6161 + %} 1.6162 + ins_pipe(pipe_class_default); 1.6163 +%} 1.6164 + 1.6165 +// Expand node for constant pool load: small offset. 1.6166 +// The match rule is needed to generate the correct bottom_type(), 1.6167 +// however this node should never match. The use of predicate is not 1.6168 +// possible since ADLC forbids predicates for chain rules. The higher 1.6169 +// costs do not prevent matching in this case. For that reason the 1.6170 +// operand immP_NM with predicate(false) is used. 1.6171 +instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{ 1.6172 + match(Set dst src); 1.6173 + effect(TEMP toc); 1.6174 + 1.6175 + ins_num_consts(1); 1.6176 + 1.6177 + format %{ "LD $dst, offset, $toc \t// load ptr $src from TOC" %} 1.6178 + size(4); 1.6179 + ins_encode( enc_load_long_constP(dst, src, toc) ); 1.6180 + ins_pipe(pipe_class_memory); 1.6181 +%} 1.6182 + 1.6183 +// Expand node for constant pool load: large offset. 1.6184 +instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{ 1.6185 + effect(DEF dst, USE src, USE toc); 1.6186 + predicate(false); 1.6187 + 1.6188 + ins_num_consts(1); 1.6189 + ins_field_const_toc_offset(int); 1.6190 + 1.6191 + format %{ "ADDIS $dst, $toc, offset \t// load ptr $src from TOC (hi)" %} 1.6192 + size(4); 1.6193 + ins_encode( enc_load_long_constP_hi(dst, src, toc) ); 1.6194 + ins_pipe(pipe_class_default); 1.6195 +%} 1.6196 + 1.6197 +// Expand node for constant pool load: large offset. 1.6198 +instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{ 1.6199 + match(Set dst src); 1.6200 + effect(TEMP base); 1.6201 + 1.6202 + ins_field_const_toc_offset_hi_node(loadConP_hiNode*); 1.6203 + 1.6204 + format %{ "LD $dst, offset, $base \t// load ptr $src from TOC (lo)" %} 1.6205 + size(4); 1.6206 + ins_encode %{ 1.6207 + // TODO: PPC port $archOpcode(ppc64Opcode_ld); 1.6208 + int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset; 1.6209 + __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register); 1.6210 + %} 1.6211 + ins_pipe(pipe_class_memory); 1.6212 +%} 1.6213 + 1.6214 +// Load pointer constant from constant table. Expand in case an 1.6215 +// offset > 16 bit is needed. 1.6216 +// Adlc adds toc node MachConstantTableBase. 1.6217 +instruct loadConP_Ex(iRegPdst dst, immP src) %{ 1.6218 + match(Set dst src); 1.6219 + ins_cost(MEMORY_REF_COST); 1.6220 + 1.6221 + // This rule does not use "expand" because then 1.6222 + // the result type is not known to be an Oop. An ADLC 1.6223 + // enhancement will be needed to make that work - not worth it! 1.6224 + 1.6225 + // If this instruction rematerializes, it prolongs the live range 1.6226 + // of the toc node, causing illegal graphs. 1.6227 + // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule(). 1.6228 + ins_cannot_rematerialize(true); 1.6229 + 1.6230 + format %{ "LD $dst, offset, $constanttablebase \t// load ptr $src from table, postalloc expanded" %} 1.6231 + postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) ); 1.6232 +%} 1.6233 + 1.6234 +// Expand node for constant pool load: small offset. 1.6235 +instruct loadConF(regF dst, immF src, iRegLdst toc) %{ 1.6236 + effect(DEF dst, USE src, USE toc); 1.6237 + ins_cost(MEMORY_REF_COST); 1.6238 + 1.6239 + ins_num_consts(1); 1.6240 + 1.6241 + format %{ "LFS $dst, offset, $toc \t// load float $src from TOC" %} 1.6242 + size(4); 1.6243 + ins_encode %{ 1.6244 + // TODO: PPC port $archOpcode(ppc64Opcode_lfs); 1.6245 + address float_address = __ float_constant($src$$constant); 1.6246 + __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register); 1.6247 + %} 1.6248 + ins_pipe(pipe_class_memory); 1.6249 +%} 1.6250 + 1.6251 +// Expand node for constant pool load: large offset. 1.6252 +instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{ 1.6253 + effect(DEF dst, USE src, USE toc); 1.6254 + ins_cost(MEMORY_REF_COST); 1.6255 + 1.6256 + ins_num_consts(1); 1.6257 + 1.6258 + format %{ "ADDIS $toc, $toc, offset_hi\n\t" 1.6259 + "LFS $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t" 1.6260 + "ADDIS $toc, $toc, -offset_hi"%} 1.6261 + size(12); 1.6262 + ins_encode %{ 1.6263 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.6264 + FloatRegister Rdst = $dst$$FloatRegister; 1.6265 + Register Rtoc = $toc$$Register; 1.6266 + address float_address = __ float_constant($src$$constant); 1.6267 + int offset = __ offset_to_method_toc(float_address); 1.6268 + int hi = (offset + (1<<15))>>16; 1.6269 + int lo = offset - hi * (1<<16); 1.6270 + 1.6271 + __ addis(Rtoc, Rtoc, hi); 1.6272 + __ lfs(Rdst, lo, Rtoc); 1.6273 + __ addis(Rtoc, Rtoc, -hi); 1.6274 + %} 1.6275 + ins_pipe(pipe_class_memory); 1.6276 +%} 1.6277 + 1.6278 +// Adlc adds toc node MachConstantTableBase. 1.6279 +instruct loadConF_Ex(regF dst, immF src) %{ 1.6280 + match(Set dst src); 1.6281 + ins_cost(MEMORY_REF_COST); 1.6282 + 1.6283 + // See loadConP. 1.6284 + ins_cannot_rematerialize(true); 1.6285 + 1.6286 + format %{ "LFS $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %} 1.6287 + postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) ); 1.6288 +%} 1.6289 + 1.6290 +// Expand node for constant pool load: small offset. 1.6291 +instruct loadConD(regD dst, immD src, iRegLdst toc) %{ 1.6292 + effect(DEF dst, USE src, USE toc); 1.6293 + ins_cost(MEMORY_REF_COST); 1.6294 + 1.6295 + ins_num_consts(1); 1.6296 + 1.6297 + format %{ "LFD $dst, offset, $toc \t// load double $src from TOC" %} 1.6298 + size(4); 1.6299 + ins_encode %{ 1.6300 + // TODO: PPC port $archOpcode(ppc64Opcode_lfd); 1.6301 + int offset = __ offset_to_method_toc(__ double_constant($src$$constant)); 1.6302 + __ lfd($dst$$FloatRegister, offset, $toc$$Register); 1.6303 + %} 1.6304 + ins_pipe(pipe_class_memory); 1.6305 +%} 1.6306 + 1.6307 +// Expand node for constant pool load: large offset. 1.6308 +instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{ 1.6309 + effect(DEF dst, USE src, USE toc); 1.6310 + ins_cost(MEMORY_REF_COST); 1.6311 + 1.6312 + ins_num_consts(1); 1.6313 + 1.6314 + format %{ "ADDIS $toc, $toc, offset_hi\n\t" 1.6315 + "LFD $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t" 1.6316 + "ADDIS $toc, $toc, -offset_hi" %} 1.6317 + size(12); 1.6318 + ins_encode %{ 1.6319 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.6320 + FloatRegister Rdst = $dst$$FloatRegister; 1.6321 + Register Rtoc = $toc$$Register; 1.6322 + address float_address = __ double_constant($src$$constant); 1.6323 + int offset = __ offset_to_method_toc(float_address); 1.6324 + int hi = (offset + (1<<15))>>16; 1.6325 + int lo = offset - hi * (1<<16); 1.6326 + 1.6327 + __ addis(Rtoc, Rtoc, hi); 1.6328 + __ lfd(Rdst, lo, Rtoc); 1.6329 + __ addis(Rtoc, Rtoc, -hi); 1.6330 + %} 1.6331 + ins_pipe(pipe_class_memory); 1.6332 +%} 1.6333 + 1.6334 +// Adlc adds toc node MachConstantTableBase. 1.6335 +instruct loadConD_Ex(regD dst, immD src) %{ 1.6336 + match(Set dst src); 1.6337 + ins_cost(MEMORY_REF_COST); 1.6338 + 1.6339 + // See loadConP. 1.6340 + ins_cannot_rematerialize(true); 1.6341 + 1.6342 + format %{ "ConD $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %} 1.6343 + postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) ); 1.6344 +%} 1.6345 + 1.6346 +// Prefetch instructions. 1.6347 +// Must be safe to execute with invalid address (cannot fault). 1.6348 + 1.6349 +instruct prefetchr(indirectMemory mem, iRegLsrc src) %{ 1.6350 + match(PrefetchRead (AddP mem src)); 1.6351 + ins_cost(MEMORY_REF_COST); 1.6352 + 1.6353 + format %{ "PREFETCH $mem, 0, $src \t// Prefetch read-many" %} 1.6354 + size(4); 1.6355 + ins_encode %{ 1.6356 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbt); 1.6357 + __ dcbt($src$$Register, $mem$$base$$Register); 1.6358 + %} 1.6359 + ins_pipe(pipe_class_memory); 1.6360 +%} 1.6361 + 1.6362 +instruct prefetchr_no_offset(indirectMemory mem) %{ 1.6363 + match(PrefetchRead mem); 1.6364 + ins_cost(MEMORY_REF_COST); 1.6365 + 1.6366 + format %{ "PREFETCH $mem" %} 1.6367 + size(4); 1.6368 + ins_encode %{ 1.6369 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbt); 1.6370 + __ dcbt($mem$$base$$Register); 1.6371 + %} 1.6372 + ins_pipe(pipe_class_memory); 1.6373 +%} 1.6374 + 1.6375 +instruct prefetchw(indirectMemory mem, iRegLsrc src) %{ 1.6376 + match(PrefetchWrite (AddP mem src)); 1.6377 + ins_cost(MEMORY_REF_COST); 1.6378 + 1.6379 + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many (and read)" %} 1.6380 + size(4); 1.6381 + ins_encode %{ 1.6382 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); 1.6383 + __ dcbtst($src$$Register, $mem$$base$$Register); 1.6384 + %} 1.6385 + ins_pipe(pipe_class_memory); 1.6386 +%} 1.6387 + 1.6388 +instruct prefetchw_no_offset(indirectMemory mem) %{ 1.6389 + match(PrefetchWrite mem); 1.6390 + ins_cost(MEMORY_REF_COST); 1.6391 + 1.6392 + format %{ "PREFETCH $mem" %} 1.6393 + size(4); 1.6394 + ins_encode %{ 1.6395 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); 1.6396 + __ dcbtst($mem$$base$$Register); 1.6397 + %} 1.6398 + ins_pipe(pipe_class_memory); 1.6399 +%} 1.6400 + 1.6401 +// Special prefetch versions which use the dcbz instruction. 1.6402 +instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{ 1.6403 + match(PrefetchAllocation (AddP mem src)); 1.6404 + predicate(AllocatePrefetchStyle == 3); 1.6405 + ins_cost(MEMORY_REF_COST); 1.6406 + 1.6407 + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %} 1.6408 + size(4); 1.6409 + ins_encode %{ 1.6410 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); 1.6411 + __ dcbz($src$$Register, $mem$$base$$Register); 1.6412 + %} 1.6413 + ins_pipe(pipe_class_memory); 1.6414 +%} 1.6415 + 1.6416 +instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{ 1.6417 + match(PrefetchAllocation mem); 1.6418 + predicate(AllocatePrefetchStyle == 3); 1.6419 + ins_cost(MEMORY_REF_COST); 1.6420 + 1.6421 + format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %} 1.6422 + size(4); 1.6423 + ins_encode %{ 1.6424 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); 1.6425 + __ dcbz($mem$$base$$Register); 1.6426 + %} 1.6427 + ins_pipe(pipe_class_memory); 1.6428 +%} 1.6429 + 1.6430 +instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{ 1.6431 + match(PrefetchAllocation (AddP mem src)); 1.6432 + predicate(AllocatePrefetchStyle != 3); 1.6433 + ins_cost(MEMORY_REF_COST); 1.6434 + 1.6435 + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %} 1.6436 + size(4); 1.6437 + ins_encode %{ 1.6438 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); 1.6439 + __ dcbtst($src$$Register, $mem$$base$$Register); 1.6440 + %} 1.6441 + ins_pipe(pipe_class_memory); 1.6442 +%} 1.6443 + 1.6444 +instruct prefetch_alloc_no_offset(indirectMemory mem) %{ 1.6445 + match(PrefetchAllocation mem); 1.6446 + predicate(AllocatePrefetchStyle != 3); 1.6447 + ins_cost(MEMORY_REF_COST); 1.6448 + 1.6449 + format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %} 1.6450 + size(4); 1.6451 + ins_encode %{ 1.6452 + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); 1.6453 + __ dcbtst($mem$$base$$Register); 1.6454 + %} 1.6455 + ins_pipe(pipe_class_memory); 1.6456 +%} 1.6457 + 1.6458 +//----------Store Instructions------------------------------------------------- 1.6459 + 1.6460 +// Store Byte 1.6461 +instruct storeB(memory mem, iRegIsrc src) %{ 1.6462 + match(Set mem (StoreB mem src)); 1.6463 + ins_cost(MEMORY_REF_COST); 1.6464 + 1.6465 + format %{ "STB $src, $mem \t// byte" %} 1.6466 + size(4); 1.6467 + ins_encode %{ 1.6468 + // TODO: PPC port $archOpcode(ppc64Opcode_stb); 1.6469 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.6470 + __ stb($src$$Register, Idisp, $mem$$base$$Register); 1.6471 + %} 1.6472 + ins_pipe(pipe_class_memory); 1.6473 +%} 1.6474 + 1.6475 +// Store Char/Short 1.6476 +instruct storeC(memory mem, iRegIsrc src) %{ 1.6477 + match(Set mem (StoreC mem src)); 1.6478 + ins_cost(MEMORY_REF_COST); 1.6479 + 1.6480 + format %{ "STH $src, $mem \t// short" %} 1.6481 + size(4); 1.6482 + ins_encode %{ 1.6483 + // TODO: PPC port $archOpcode(ppc64Opcode_sth); 1.6484 + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); 1.6485 + __ sth($src$$Register, Idisp, $mem$$base$$Register); 1.6486 + %} 1.6487 + ins_pipe(pipe_class_memory); 1.6488 +%} 1.6489 + 1.6490 +// Store Integer 1.6491 +instruct storeI(memory mem, iRegIsrc src) %{ 1.6492 + match(Set mem (StoreI mem src)); 1.6493 + ins_cost(MEMORY_REF_COST); 1.6494 + 1.6495 + format %{ "STW $src, $mem" %} 1.6496 + size(4); 1.6497 + ins_encode( enc_stw(src, mem) ); 1.6498 + ins_pipe(pipe_class_memory); 1.6499 +%} 1.6500 + 1.6501 +// ConvL2I + StoreI. 1.6502 +instruct storeI_convL2I(memory mem, iRegLsrc src) %{ 1.6503 + match(Set mem (StoreI mem (ConvL2I src))); 1.6504 + ins_cost(MEMORY_REF_COST); 1.6505 + 1.6506 + format %{ "STW l2i($src), $mem" %} 1.6507 + size(4); 1.6508 + ins_encode( enc_stw(src, mem) ); 1.6509 + ins_pipe(pipe_class_memory); 1.6510 +%} 1.6511 + 1.6512 +// Store Long 1.6513 +instruct storeL(memoryAlg4 mem, iRegLsrc src) %{ 1.6514 + match(Set mem (StoreL mem src)); 1.6515 + ins_cost(MEMORY_REF_COST); 1.6516 + 1.6517 + format %{ "STD $src, $mem \t// long" %} 1.6518 + size(4); 1.6519 + ins_encode( enc_std(src, mem) ); 1.6520 + ins_pipe(pipe_class_memory); 1.6521 +%} 1.6522 + 1.6523 +// Store super word nodes. 1.6524 + 1.6525 +// Store Aligned Packed Byte long register to memory 1.6526 +instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{ 1.6527 + predicate(n->as_StoreVector()->memory_size() == 8); 1.6528 + match(Set mem (StoreVector mem src)); 1.6529 + ins_cost(MEMORY_REF_COST); 1.6530 + 1.6531 + format %{ "STD $mem, $src \t// packed8B" %} 1.6532 + size(4); 1.6533 + ins_encode( enc_std(src, mem) ); 1.6534 + ins_pipe(pipe_class_memory); 1.6535 +%} 1.6536 + 1.6537 +// Store Compressed Oop 1.6538 +instruct storeN(memory dst, iRegN_P2N src) %{ 1.6539 + match(Set dst (StoreN dst src)); 1.6540 + ins_cost(MEMORY_REF_COST); 1.6541 + 1.6542 + format %{ "STW $src, $dst \t// compressed oop" %} 1.6543 + size(4); 1.6544 + ins_encode( enc_stw(src, dst) ); 1.6545 + ins_pipe(pipe_class_memory); 1.6546 +%} 1.6547 + 1.6548 +// Store Compressed KLass 1.6549 +instruct storeNKlass(memory dst, iRegN_P2N src) %{ 1.6550 + match(Set dst (StoreNKlass dst src)); 1.6551 + ins_cost(MEMORY_REF_COST); 1.6552 + 1.6553 + format %{ "STW $src, $dst \t// compressed klass" %} 1.6554 + size(4); 1.6555 + ins_encode( enc_stw(src, dst) ); 1.6556 + ins_pipe(pipe_class_memory); 1.6557 +%} 1.6558 + 1.6559 +// Store Pointer 1.6560 +instruct storeP(memoryAlg4 dst, iRegPsrc src) %{ 1.6561 + match(Set dst (StoreP dst src)); 1.6562 + ins_cost(MEMORY_REF_COST); 1.6563 + 1.6564 + format %{ "STD $src, $dst \t// ptr" %} 1.6565 + size(4); 1.6566 + ins_encode( enc_std(src, dst) ); 1.6567 + ins_pipe(pipe_class_memory); 1.6568 +%} 1.6569 + 1.6570 +// Store Float 1.6571 +instruct storeF(memory mem, regF src) %{ 1.6572 + match(Set mem (StoreF mem src)); 1.6573 + ins_cost(MEMORY_REF_COST); 1.6574 + 1.6575 + format %{ "STFS $src, $mem" %} 1.6576 + size(4); 1.6577 + ins_encode( enc_stfs(src, mem) ); 1.6578 + ins_pipe(pipe_class_memory); 1.6579 +%} 1.6580 + 1.6581 +// Store Double 1.6582 +instruct storeD(memory mem, regD src) %{ 1.6583 + match(Set mem (StoreD mem src)); 1.6584 + ins_cost(MEMORY_REF_COST); 1.6585 + 1.6586 + format %{ "STFD $src, $mem" %} 1.6587 + size(4); 1.6588 + ins_encode( enc_stfd(src, mem) ); 1.6589 + ins_pipe(pipe_class_memory); 1.6590 +%} 1.6591 + 1.6592 +//----------Store Instructions With Zeros-------------------------------------- 1.6593 + 1.6594 +// Card-mark for CMS garbage collection. 1.6595 +// This cardmark does an optimization so that it must not always 1.6596 +// do a releasing store. For this, it gets the address of 1.6597 +// CMSCollectorCardTableModRefBSExt::_requires_release as input. 1.6598 +// (Using releaseFieldAddr in the match rule is a hack.) 1.6599 +instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{ 1.6600 + match(Set mem (StoreCM mem releaseFieldAddr)); 1.6601 + predicate(false); 1.6602 + ins_cost(MEMORY_REF_COST); 1.6603 + 1.6604 + // See loadConP. 1.6605 + ins_cannot_rematerialize(true); 1.6606 + 1.6607 + format %{ "STB #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %} 1.6608 + ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) ); 1.6609 + ins_pipe(pipe_class_memory); 1.6610 +%} 1.6611 + 1.6612 +// Card-mark for CMS garbage collection. 1.6613 +// This cardmark does an optimization so that it must not always 1.6614 +// do a releasing store. For this, it needs the constant address of 1.6615 +// CMSCollectorCardTableModRefBSExt::_requires_release. 1.6616 +// This constant address is split off here by expand so we can use 1.6617 +// adlc / matcher functionality to load it from the constant section. 1.6618 +instruct storeCM_CMS_ExEx(memory mem, immI_0 zero) %{ 1.6619 + match(Set mem (StoreCM mem zero)); 1.6620 + predicate(UseConcMarkSweepGC); 1.6621 + 1.6622 + expand %{ 1.6623 + immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %} 1.6624 + iRegLdst releaseFieldAddress; 1.6625 + loadConL_Ex(releaseFieldAddress, baseImm); 1.6626 + storeCM_CMS(mem, releaseFieldAddress); 1.6627 + %} 1.6628 +%} 1.6629 + 1.6630 +instruct storeCM_G1(memory mem, immI_0 zero) %{ 1.6631 + match(Set mem (StoreCM mem zero)); 1.6632 + predicate(UseG1GC); 1.6633 + ins_cost(MEMORY_REF_COST); 1.6634 + 1.6635 + ins_cannot_rematerialize(true); 1.6636 + 1.6637 + format %{ "STB #0, $mem \t// CMS card-mark byte store (G1)" %} 1.6638 + size(8); 1.6639 + ins_encode %{ 1.6640 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.6641 + __ li(R0, 0); 1.6642 + //__ release(); // G1: oops are allowed to get visible after dirty marking 1.6643 + guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias"); 1.6644 + __ stb(R0, $mem$$disp, $mem$$base$$Register); 1.6645 + %} 1.6646 + ins_pipe(pipe_class_memory); 1.6647 +%} 1.6648 + 1.6649 +// Convert oop pointer into compressed form. 1.6650 + 1.6651 +// Nodes for postalloc expand. 1.6652 + 1.6653 +// Shift node for expand. 1.6654 +instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{ 1.6655 + // The match rule is needed to make it a 'MachTypeNode'! 1.6656 + match(Set dst (EncodeP src)); 1.6657 + predicate(false); 1.6658 + 1.6659 + format %{ "SRDI $dst, $src, 3 \t// encode" %} 1.6660 + size(4); 1.6661 + ins_encode %{ 1.6662 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.6663 + __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f); 1.6664 + %} 1.6665 + ins_pipe(pipe_class_default); 1.6666 +%} 1.6667 + 1.6668 +// Add node for expand. 1.6669 +instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{ 1.6670 + // The match rule is needed to make it a 'MachTypeNode'! 1.6671 + match(Set dst (EncodeP src)); 1.6672 + predicate(false); 1.6673 + 1.6674 + format %{ "SUB $dst, $src, oop_base \t// encode" %} 1.6675 + size(4); 1.6676 + ins_encode %{ 1.6677 + // TODO: PPC port $archOpcode(ppc64Opcode_subf); 1.6678 + __ subf($dst$$Register, R30, $src$$Register); 1.6679 + %} 1.6680 + ins_pipe(pipe_class_default); 1.6681 +%} 1.6682 + 1.6683 +// Conditional sub base. 1.6684 +instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ 1.6685 + // The match rule is needed to make it a 'MachTypeNode'! 1.6686 + match(Set dst (EncodeP (Binary crx src1))); 1.6687 + predicate(false); 1.6688 + 1.6689 + ins_variable_size_depending_on_alignment(true); 1.6690 + 1.6691 + format %{ "BEQ $crx, done\n\t" 1.6692 + "SUB $dst, $src1, R30 \t// encode: subtract base if != NULL\n" 1.6693 + "done:" %} 1.6694 + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); 1.6695 + ins_encode %{ 1.6696 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.6697 + Label done; 1.6698 + __ beq($crx$$CondRegister, done); 1.6699 + __ subf($dst$$Register, R30, $src1$$Register); 1.6700 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.6701 + __ bind(done); 1.6702 + %} 1.6703 + ins_pipe(pipe_class_default); 1.6704 +%} 1.6705 + 1.6706 +// Power 7 can use isel instruction 1.6707 +instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ 1.6708 + // The match rule is needed to make it a 'MachTypeNode'! 1.6709 + match(Set dst (EncodeP (Binary crx src1))); 1.6710 + predicate(false); 1.6711 + 1.6712 + format %{ "CMOVE $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %} 1.6713 + size(4); 1.6714 + ins_encode %{ 1.6715 + // This is a Power7 instruction for which no machine description exists. 1.6716 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.6717 + __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register); 1.6718 + %} 1.6719 + ins_pipe(pipe_class_default); 1.6720 +%} 1.6721 + 1.6722 +// base != 0 1.6723 +// 32G aligned narrow oop base. 1.6724 +instruct encodeP_32GAligned(iRegNdst dst, iRegPsrc src) %{ 1.6725 + match(Set dst (EncodeP src)); 1.6726 + predicate(false /* TODO: PPC port Universe::narrow_oop_base_disjoint()*/); 1.6727 + 1.6728 + format %{ "EXTRDI $dst, $src, #32, #3 \t// encode with 32G aligned base" %} 1.6729 + size(4); 1.6730 + ins_encode %{ 1.6731 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.6732 + __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32); 1.6733 + %} 1.6734 + ins_pipe(pipe_class_default); 1.6735 +%} 1.6736 + 1.6737 +// shift != 0, base != 0 1.6738 +instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{ 1.6739 + match(Set dst (EncodeP src)); 1.6740 + effect(TEMP crx); 1.6741 + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull && 1.6742 + Universe::narrow_oop_shift() != 0 && 1.6743 + true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/); 1.6744 + 1.6745 + format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %} 1.6746 + postalloc_expand( postalloc_expand_encode_oop(dst, src, crx)); 1.6747 +%} 1.6748 + 1.6749 +// shift != 0, base != 0 1.6750 +instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{ 1.6751 + match(Set dst (EncodeP src)); 1.6752 + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull && 1.6753 + Universe::narrow_oop_shift() != 0 && 1.6754 + true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/); 1.6755 + 1.6756 + format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %} 1.6757 + postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) ); 1.6758 +%} 1.6759 + 1.6760 +// shift != 0, base == 0 1.6761 +// TODO: This is the same as encodeP_shift. Merge! 1.6762 +instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{ 1.6763 + match(Set dst (EncodeP src)); 1.6764 + predicate(Universe::narrow_oop_shift() != 0 && 1.6765 + Universe::narrow_oop_base() ==0); 1.6766 + 1.6767 + format %{ "SRDI $dst, $src, #3 \t// encodeP, $src != NULL" %} 1.6768 + size(4); 1.6769 + ins_encode %{ 1.6770 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.6771 + __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f); 1.6772 + %} 1.6773 + ins_pipe(pipe_class_default); 1.6774 +%} 1.6775 + 1.6776 +// Compressed OOPs with narrow_oop_shift == 0. 1.6777 +// shift == 0, base == 0 1.6778 +instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{ 1.6779 + match(Set dst (EncodeP src)); 1.6780 + predicate(Universe::narrow_oop_shift() == 0); 1.6781 + 1.6782 + format %{ "MR $dst, $src \t// Ptr->Narrow" %} 1.6783 + // variable size, 0 or 4. 1.6784 + ins_encode %{ 1.6785 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.6786 + __ mr_if_needed($dst$$Register, $src$$Register); 1.6787 + %} 1.6788 + ins_pipe(pipe_class_default); 1.6789 +%} 1.6790 + 1.6791 +// Decode nodes. 1.6792 + 1.6793 +// Shift node for expand. 1.6794 +instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{ 1.6795 + // The match rule is needed to make it a 'MachTypeNode'! 1.6796 + match(Set dst (DecodeN src)); 1.6797 + predicate(false); 1.6798 + 1.6799 + format %{ "SLDI $dst, $src, #3 \t// DecodeN" %} 1.6800 + size(4); 1.6801 + ins_encode %{ 1.6802 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.6803 + __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift()); 1.6804 + %} 1.6805 + ins_pipe(pipe_class_default); 1.6806 +%} 1.6807 + 1.6808 +// Add node for expand. 1.6809 +instruct decodeN_add(iRegPdst dst, iRegPdst src) %{ 1.6810 + // The match rule is needed to make it a 'MachTypeNode'! 1.6811 + match(Set dst (DecodeN src)); 1.6812 + predicate(false); 1.6813 + 1.6814 + format %{ "ADD $dst, $src, R30 \t// DecodeN, add oop base" %} 1.6815 + size(4); 1.6816 + ins_encode %{ 1.6817 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.6818 + __ add($dst$$Register, $src$$Register, R30); 1.6819 + %} 1.6820 + ins_pipe(pipe_class_default); 1.6821 +%} 1.6822 + 1.6823 +// conditianal add base for expand 1.6824 +instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ 1.6825 + // The match rule is needed to make it a 'MachTypeNode'! 1.6826 + // NOTICE that the rule is nonsense - we just have to make sure that: 1.6827 + // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) 1.6828 + // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC. 1.6829 + match(Set dst (DecodeN (Binary crx src1))); 1.6830 + predicate(false); 1.6831 + 1.6832 + ins_variable_size_depending_on_alignment(true); 1.6833 + 1.6834 + format %{ "BEQ $crx, done\n\t" 1.6835 + "ADD $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n" 1.6836 + "done:" %} 1.6837 + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8); 1.6838 + ins_encode %{ 1.6839 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.6840 + Label done; 1.6841 + __ beq($crx$$CondRegister, done); 1.6842 + __ add($dst$$Register, $src1$$Register, R30); 1.6843 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.6844 + __ bind(done); 1.6845 + %} 1.6846 + ins_pipe(pipe_class_default); 1.6847 +%} 1.6848 + 1.6849 +instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ 1.6850 + // The match rule is needed to make it a 'MachTypeNode'! 1.6851 + // NOTICE that the rule is nonsense - we just have to make sure that: 1.6852 + // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) 1.6853 + // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC. 1.6854 + match(Set dst (DecodeN (Binary crx src1))); 1.6855 + predicate(false); 1.6856 + 1.6857 + format %{ "CMOVE $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %} 1.6858 + size(4); 1.6859 + ins_encode %{ 1.6860 + // This is a Power7 instruction for which no machine description exists. 1.6861 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.6862 + __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register); 1.6863 + %} 1.6864 + ins_pipe(pipe_class_default); 1.6865 +%} 1.6866 + 1.6867 +// shift != 0, base != 0 1.6868 +instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ 1.6869 + match(Set dst (DecodeN src)); 1.6870 + predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull && 1.6871 + n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) && 1.6872 + Universe::narrow_oop_shift() != 0 && 1.6873 + Universe::narrow_oop_base() != 0); 1.6874 + effect(TEMP crx); 1.6875 + 1.6876 + format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %} 1.6877 + postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) ); 1.6878 +%} 1.6879 + 1.6880 +// shift != 0, base == 0 1.6881 +instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{ 1.6882 + match(Set dst (DecodeN src)); 1.6883 + predicate(Universe::narrow_oop_shift() != 0 && 1.6884 + Universe::narrow_oop_base() == 0); 1.6885 + 1.6886 + format %{ "SLDI $dst, $src, #3 \t// DecodeN (zerobased)" %} 1.6887 + size(4); 1.6888 + ins_encode %{ 1.6889 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.6890 + __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift()); 1.6891 + %} 1.6892 + ins_pipe(pipe_class_default); 1.6893 +%} 1.6894 + 1.6895 +// src != 0, shift != 0, base != 0 1.6896 +instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{ 1.6897 + match(Set dst (DecodeN src)); 1.6898 + predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull || 1.6899 + n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) && 1.6900 + Universe::narrow_oop_shift() != 0 && 1.6901 + Universe::narrow_oop_base() != 0); 1.6902 + 1.6903 + format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %} 1.6904 + postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src)); 1.6905 +%} 1.6906 + 1.6907 +// Compressed OOPs with narrow_oop_shift == 0. 1.6908 +instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{ 1.6909 + match(Set dst (DecodeN src)); 1.6910 + predicate(Universe::narrow_oop_shift() == 0); 1.6911 + ins_cost(DEFAULT_COST); 1.6912 + 1.6913 + format %{ "MR $dst, $src \t// DecodeN (unscaled)" %} 1.6914 + // variable size, 0 or 4. 1.6915 + ins_encode %{ 1.6916 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.6917 + __ mr_if_needed($dst$$Register, $src$$Register); 1.6918 + %} 1.6919 + ins_pipe(pipe_class_default); 1.6920 +%} 1.6921 + 1.6922 +// Convert compressed oop into int for vectors alignment masking. 1.6923 +instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{ 1.6924 + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); 1.6925 + predicate(Universe::narrow_oop_shift() == 0); 1.6926 + ins_cost(DEFAULT_COST); 1.6927 + 1.6928 + format %{ "MR $dst, $src \t// (int)DecodeN (unscaled)" %} 1.6929 + // variable size, 0 or 4. 1.6930 + ins_encode %{ 1.6931 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.6932 + __ mr_if_needed($dst$$Register, $src$$Register); 1.6933 + %} 1.6934 + ins_pipe(pipe_class_default); 1.6935 +%} 1.6936 + 1.6937 +// Convert klass pointer into compressed form. 1.6938 + 1.6939 +// Nodes for postalloc expand. 1.6940 + 1.6941 +// Shift node for expand. 1.6942 +instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{ 1.6943 + // The match rule is needed to make it a 'MachTypeNode'! 1.6944 + match(Set dst (EncodePKlass src)); 1.6945 + predicate(false); 1.6946 + 1.6947 + format %{ "SRDI $dst, $src, 3 \t// encode" %} 1.6948 + size(4); 1.6949 + ins_encode %{ 1.6950 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.6951 + __ srdi($dst$$Register, $src$$Register, Universe::narrow_klass_shift()); 1.6952 + %} 1.6953 + ins_pipe(pipe_class_default); 1.6954 +%} 1.6955 + 1.6956 +// Add node for expand. 1.6957 +instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{ 1.6958 + // The match rule is needed to make it a 'MachTypeNode'! 1.6959 + match(Set dst (EncodePKlass (Binary base src))); 1.6960 + predicate(false); 1.6961 + 1.6962 + format %{ "SUB $dst, $base, $src \t// encode" %} 1.6963 + size(4); 1.6964 + ins_encode %{ 1.6965 + // TODO: PPC port $archOpcode(ppc64Opcode_subf); 1.6966 + __ subf($dst$$Register, $base$$Register, $src$$Register); 1.6967 + %} 1.6968 + ins_pipe(pipe_class_default); 1.6969 +%} 1.6970 + 1.6971 +// base != 0 1.6972 +// 32G aligned narrow oop base. 1.6973 +instruct encodePKlass_32GAligned(iRegNdst dst, iRegPsrc src) %{ 1.6974 + match(Set dst (EncodePKlass src)); 1.6975 + predicate(false /* TODO: PPC port Universe::narrow_klass_base_disjoint()*/); 1.6976 + 1.6977 + format %{ "EXTRDI $dst, $src, #32, #3 \t// encode with 32G aligned base" %} 1.6978 + size(4); 1.6979 + ins_encode %{ 1.6980 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.6981 + __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32); 1.6982 + %} 1.6983 + ins_pipe(pipe_class_default); 1.6984 +%} 1.6985 + 1.6986 +// shift != 0, base != 0 1.6987 +instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{ 1.6988 + match(Set dst (EncodePKlass (Binary base src))); 1.6989 + predicate(false); 1.6990 + 1.6991 + format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %} 1.6992 + postalloc_expand %{ 1.6993 + encodePKlass_sub_baseNode *n1 = new (C) encodePKlass_sub_baseNode(); 1.6994 + n1->add_req(n_region, n_base, n_src); 1.6995 + n1->_opnds[0] = op_dst; 1.6996 + n1->_opnds[1] = op_base; 1.6997 + n1->_opnds[2] = op_src; 1.6998 + n1->_bottom_type = _bottom_type; 1.6999 + 1.7000 + encodePKlass_shiftNode *n2 = new (C) encodePKlass_shiftNode(); 1.7001 + n2->add_req(n_region, n1); 1.7002 + n2->_opnds[0] = op_dst; 1.7003 + n2->_opnds[1] = op_dst; 1.7004 + n2->_bottom_type = _bottom_type; 1.7005 + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.7006 + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.7007 + 1.7008 + nodes->push(n1); 1.7009 + nodes->push(n2); 1.7010 + %} 1.7011 +%} 1.7012 + 1.7013 +// shift != 0, base != 0 1.7014 +instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{ 1.7015 + match(Set dst (EncodePKlass src)); 1.7016 + //predicate(Universe::narrow_klass_shift() != 0 && 1.7017 + // true /* TODO: PPC port Universe::narrow_klass_base_overlaps()*/); 1.7018 + 1.7019 + //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %} 1.7020 + ins_cost(DEFAULT_COST*2); // Don't count constant. 1.7021 + expand %{ 1.7022 + immL baseImm %{ (jlong)(intptr_t)Universe::narrow_klass_base() %} 1.7023 + iRegLdst base; 1.7024 + loadConL_Ex(base, baseImm); 1.7025 + encodePKlass_not_null_Ex(dst, base, src); 1.7026 + %} 1.7027 +%} 1.7028 + 1.7029 +// Decode nodes. 1.7030 + 1.7031 +// Shift node for expand. 1.7032 +instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{ 1.7033 + // The match rule is needed to make it a 'MachTypeNode'! 1.7034 + match(Set dst (DecodeNKlass src)); 1.7035 + predicate(false); 1.7036 + 1.7037 + format %{ "SLDI $dst, $src, #3 \t// DecodeNKlass" %} 1.7038 + size(4); 1.7039 + ins_encode %{ 1.7040 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.7041 + __ sldi($dst$$Register, $src$$Register, Universe::narrow_klass_shift()); 1.7042 + %} 1.7043 + ins_pipe(pipe_class_default); 1.7044 +%} 1.7045 + 1.7046 +// Add node for expand. 1.7047 + 1.7048 +instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{ 1.7049 + // The match rule is needed to make it a 'MachTypeNode'! 1.7050 + match(Set dst (DecodeNKlass (Binary base src))); 1.7051 + predicate(false); 1.7052 + 1.7053 + format %{ "ADD $dst, $base, $src \t// DecodeNKlass, add klass base" %} 1.7054 + size(4); 1.7055 + ins_encode %{ 1.7056 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.7057 + __ add($dst$$Register, $base$$Register, $src$$Register); 1.7058 + %} 1.7059 + ins_pipe(pipe_class_default); 1.7060 +%} 1.7061 + 1.7062 +// src != 0, shift != 0, base != 0 1.7063 +instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{ 1.7064 + match(Set dst (DecodeNKlass (Binary base src))); 1.7065 + //effect(kill src); // We need a register for the immediate result after shifting. 1.7066 + predicate(false); 1.7067 + 1.7068 + format %{ "DecodeNKlass $dst = $base + ($src << 3) \t// $src != NULL, postalloc expanded" %} 1.7069 + postalloc_expand %{ 1.7070 + decodeNKlass_add_baseNode *n1 = new (C) decodeNKlass_add_baseNode(); 1.7071 + n1->add_req(n_region, n_base, n_src); 1.7072 + n1->_opnds[0] = op_dst; 1.7073 + n1->_opnds[1] = op_base; 1.7074 + n1->_opnds[2] = op_src; 1.7075 + n1->_bottom_type = _bottom_type; 1.7076 + 1.7077 + decodeNKlass_shiftNode *n2 = new (C) decodeNKlass_shiftNode(); 1.7078 + n2->add_req(n_region, n1); 1.7079 + n2->_opnds[0] = op_dst; 1.7080 + n2->_opnds[1] = op_dst; 1.7081 + n2->_bottom_type = _bottom_type; 1.7082 + 1.7083 + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.7084 + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.7085 + 1.7086 + nodes->push(n1); 1.7087 + nodes->push(n2); 1.7088 + %} 1.7089 +%} 1.7090 + 1.7091 +// src != 0, shift != 0, base != 0 1.7092 +instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{ 1.7093 + match(Set dst (DecodeNKlass src)); 1.7094 + // predicate(Universe::narrow_klass_shift() != 0 && 1.7095 + // Universe::narrow_klass_base() != 0); 1.7096 + 1.7097 + //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %} 1.7098 + 1.7099 + ins_cost(DEFAULT_COST*2); // Don't count constant. 1.7100 + expand %{ 1.7101 + // We add first, then we shift. Like this, we can get along with one register less. 1.7102 + // But we have to load the base pre-shifted. 1.7103 + immL baseImm %{ (jlong)((intptr_t)Universe::narrow_klass_base() >> Universe::narrow_klass_shift()) %} 1.7104 + iRegLdst base; 1.7105 + loadConL_Ex(base, baseImm); 1.7106 + decodeNKlass_notNull_addBase_Ex(dst, base, src); 1.7107 + %} 1.7108 +%} 1.7109 + 1.7110 +//----------MemBar Instructions----------------------------------------------- 1.7111 +// Memory barrier flavors 1.7112 + 1.7113 +instruct membar_acquire() %{ 1.7114 + match(LoadFence); 1.7115 + ins_cost(4*MEMORY_REF_COST); 1.7116 + 1.7117 + format %{ "MEMBAR-acquire" %} 1.7118 + size(4); 1.7119 + ins_encode %{ 1.7120 + // TODO: PPC port $archOpcode(ppc64Opcode_lwsync); 1.7121 + __ acquire(); 1.7122 + %} 1.7123 + ins_pipe(pipe_class_default); 1.7124 +%} 1.7125 + 1.7126 +instruct unnecessary_membar_acquire() %{ 1.7127 + match(MemBarAcquire); 1.7128 + ins_cost(0); 1.7129 + 1.7130 + format %{ " -- \t// redundant MEMBAR-acquire - empty" %} 1.7131 + size(0); 1.7132 + ins_encode( /*empty*/ ); 1.7133 + ins_pipe(pipe_class_default); 1.7134 +%} 1.7135 + 1.7136 +instruct membar_acquire_lock() %{ 1.7137 + match(MemBarAcquireLock); 1.7138 + ins_cost(0); 1.7139 + 1.7140 + format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %} 1.7141 + size(0); 1.7142 + ins_encode( /*empty*/ ); 1.7143 + ins_pipe(pipe_class_default); 1.7144 +%} 1.7145 + 1.7146 +instruct membar_release() %{ 1.7147 + match(MemBarRelease); 1.7148 + match(StoreFence); 1.7149 + ins_cost(4*MEMORY_REF_COST); 1.7150 + 1.7151 + format %{ "MEMBAR-release" %} 1.7152 + size(4); 1.7153 + ins_encode %{ 1.7154 + // TODO: PPC port $archOpcode(ppc64Opcode_lwsync); 1.7155 + __ release(); 1.7156 + %} 1.7157 + ins_pipe(pipe_class_default); 1.7158 +%} 1.7159 + 1.7160 +instruct membar_storestore() %{ 1.7161 + match(MemBarStoreStore); 1.7162 + ins_cost(4*MEMORY_REF_COST); 1.7163 + 1.7164 + format %{ "MEMBAR-store-store" %} 1.7165 + size(4); 1.7166 + ins_encode %{ 1.7167 + // TODO: PPC port $archOpcode(ppc64Opcode_lwsync); 1.7168 + __ membar(Assembler::StoreStore); 1.7169 + %} 1.7170 + ins_pipe(pipe_class_default); 1.7171 +%} 1.7172 + 1.7173 +instruct membar_release_lock() %{ 1.7174 + match(MemBarReleaseLock); 1.7175 + ins_cost(0); 1.7176 + 1.7177 + format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %} 1.7178 + size(0); 1.7179 + ins_encode( /*empty*/ ); 1.7180 + ins_pipe(pipe_class_default); 1.7181 +%} 1.7182 + 1.7183 +instruct membar_volatile() %{ 1.7184 + match(MemBarVolatile); 1.7185 + ins_cost(4*MEMORY_REF_COST); 1.7186 + 1.7187 + format %{ "MEMBAR-volatile" %} 1.7188 + size(4); 1.7189 + ins_encode %{ 1.7190 + // TODO: PPC port $archOpcode(ppc64Opcode_sync); 1.7191 + __ fence(); 1.7192 + %} 1.7193 + ins_pipe(pipe_class_default); 1.7194 +%} 1.7195 + 1.7196 +// This optimization is wrong on PPC. The following pattern is not supported: 1.7197 +// MemBarVolatile 1.7198 +// ^ ^ 1.7199 +// | | 1.7200 +// CtrlProj MemProj 1.7201 +// ^ ^ 1.7202 +// | | 1.7203 +// | Load 1.7204 +// | 1.7205 +// MemBarVolatile 1.7206 +// 1.7207 +// The first MemBarVolatile could get optimized out! According to 1.7208 +// Vladimir, this pattern can not occur on Oracle platforms. 1.7209 +// However, it does occur on PPC64 (because of membars in 1.7210 +// inline_unsafe_load_store). 1.7211 +// 1.7212 +// Add this node again if we found a good solution for inline_unsafe_load_store(). 1.7213 +// Don't forget to look at the implementation of post_store_load_barrier again, 1.7214 +// we did other fixes in that method. 1.7215 +//instruct unnecessary_membar_volatile() %{ 1.7216 +// match(MemBarVolatile); 1.7217 +// predicate(Matcher::post_store_load_barrier(n)); 1.7218 +// ins_cost(0); 1.7219 +// 1.7220 +// format %{ " -- \t// redundant MEMBAR-volatile - empty" %} 1.7221 +// size(0); 1.7222 +// ins_encode( /*empty*/ ); 1.7223 +// ins_pipe(pipe_class_default); 1.7224 +//%} 1.7225 + 1.7226 +instruct membar_CPUOrder() %{ 1.7227 + match(MemBarCPUOrder); 1.7228 + ins_cost(0); 1.7229 + 1.7230 + format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %} 1.7231 + size(0); 1.7232 + ins_encode( /*empty*/ ); 1.7233 + ins_pipe(pipe_class_default); 1.7234 +%} 1.7235 + 1.7236 +//----------Conditional Move--------------------------------------------------- 1.7237 + 1.7238 +// Cmove using isel. 1.7239 +instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ 1.7240 + match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); 1.7241 + predicate(VM_Version::has_isel()); 1.7242 + ins_cost(DEFAULT_COST); 1.7243 + 1.7244 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7245 + size(4); 1.7246 + ins_encode %{ 1.7247 + // This is a Power7 instruction for which no machine description 1.7248 + // exists. Anyways, the scheduler should be off on Power7. 1.7249 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7250 + int cc = $cmp$$cmpcode; 1.7251 + __ isel($dst$$Register, $crx$$CondRegister, 1.7252 + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.7253 + %} 1.7254 + ins_pipe(pipe_class_default); 1.7255 +%} 1.7256 + 1.7257 +instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ 1.7258 + match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); 1.7259 + predicate(!VM_Version::has_isel()); 1.7260 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7261 + 1.7262 + ins_variable_size_depending_on_alignment(true); 1.7263 + 1.7264 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7265 + // Worst case is branch + move + stop, no stop without scheduler 1.7266 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7267 + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); 1.7268 + ins_pipe(pipe_class_default); 1.7269 +%} 1.7270 + 1.7271 +instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{ 1.7272 + match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); 1.7273 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7274 + 1.7275 + ins_variable_size_depending_on_alignment(true); 1.7276 + 1.7277 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7278 + // Worst case is branch + move + stop, no stop without scheduler 1.7279 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7280 + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); 1.7281 + ins_pipe(pipe_class_default); 1.7282 +%} 1.7283 + 1.7284 +// Cmove using isel. 1.7285 +instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ 1.7286 + match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); 1.7287 + predicate(VM_Version::has_isel()); 1.7288 + ins_cost(DEFAULT_COST); 1.7289 + 1.7290 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7291 + size(4); 1.7292 + ins_encode %{ 1.7293 + // This is a Power7 instruction for which no machine description 1.7294 + // exists. Anyways, the scheduler should be off on Power7. 1.7295 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7296 + int cc = $cmp$$cmpcode; 1.7297 + __ isel($dst$$Register, $crx$$CondRegister, 1.7298 + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.7299 + %} 1.7300 + ins_pipe(pipe_class_default); 1.7301 +%} 1.7302 + 1.7303 +instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ 1.7304 + match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); 1.7305 + predicate(!VM_Version::has_isel()); 1.7306 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7307 + 1.7308 + ins_variable_size_depending_on_alignment(true); 1.7309 + 1.7310 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7311 + // Worst case is branch + move + stop, no stop without scheduler. 1.7312 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7313 + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); 1.7314 + ins_pipe(pipe_class_default); 1.7315 +%} 1.7316 + 1.7317 +instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{ 1.7318 + match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); 1.7319 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7320 + 1.7321 + ins_variable_size_depending_on_alignment(true); 1.7322 + 1.7323 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7324 + // Worst case is branch + move + stop, no stop without scheduler. 1.7325 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7326 + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); 1.7327 + ins_pipe(pipe_class_default); 1.7328 +%} 1.7329 + 1.7330 +// Cmove using isel. 1.7331 +instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ 1.7332 + match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); 1.7333 + predicate(VM_Version::has_isel()); 1.7334 + ins_cost(DEFAULT_COST); 1.7335 + 1.7336 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7337 + size(4); 1.7338 + ins_encode %{ 1.7339 + // This is a Power7 instruction for which no machine description 1.7340 + // exists. Anyways, the scheduler should be off on Power7. 1.7341 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7342 + int cc = $cmp$$cmpcode; 1.7343 + __ isel($dst$$Register, $crx$$CondRegister, 1.7344 + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.7345 + %} 1.7346 + ins_pipe(pipe_class_default); 1.7347 +%} 1.7348 + 1.7349 +// Conditional move for RegN. Only cmov(reg, reg). 1.7350 +instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ 1.7351 + match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); 1.7352 + predicate(!VM_Version::has_isel()); 1.7353 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7354 + 1.7355 + ins_variable_size_depending_on_alignment(true); 1.7356 + 1.7357 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7358 + // Worst case is branch + move + stop, no stop without scheduler. 1.7359 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7360 + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); 1.7361 + ins_pipe(pipe_class_default); 1.7362 +%} 1.7363 + 1.7364 +instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{ 1.7365 + match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); 1.7366 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7367 + 1.7368 + ins_variable_size_depending_on_alignment(true); 1.7369 + 1.7370 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7371 + // Worst case is branch + move + stop, no stop without scheduler. 1.7372 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7373 + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); 1.7374 + ins_pipe(pipe_class_default); 1.7375 +%} 1.7376 + 1.7377 +// Cmove using isel. 1.7378 +instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{ 1.7379 + match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); 1.7380 + predicate(VM_Version::has_isel()); 1.7381 + ins_cost(DEFAULT_COST); 1.7382 + 1.7383 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7384 + size(4); 1.7385 + ins_encode %{ 1.7386 + // This is a Power7 instruction for which no machine description 1.7387 + // exists. Anyways, the scheduler should be off on Power7. 1.7388 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7389 + int cc = $cmp$$cmpcode; 1.7390 + __ isel($dst$$Register, $crx$$CondRegister, 1.7391 + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.7392 + %} 1.7393 + ins_pipe(pipe_class_default); 1.7394 +%} 1.7395 + 1.7396 +instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{ 1.7397 + match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); 1.7398 + predicate(!VM_Version::has_isel()); 1.7399 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7400 + 1.7401 + ins_variable_size_depending_on_alignment(true); 1.7402 + 1.7403 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7404 + // Worst case is branch + move + stop, no stop without scheduler. 1.7405 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7406 + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); 1.7407 + ins_pipe(pipe_class_default); 1.7408 +%} 1.7409 + 1.7410 +instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{ 1.7411 + match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); 1.7412 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7413 + 1.7414 + ins_variable_size_depending_on_alignment(true); 1.7415 + 1.7416 + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} 1.7417 + // Worst case is branch + move + stop, no stop without scheduler. 1.7418 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.7419 + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); 1.7420 + ins_pipe(pipe_class_default); 1.7421 +%} 1.7422 + 1.7423 +instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{ 1.7424 + match(Set dst (CMoveF (Binary cmp crx) (Binary dst src))); 1.7425 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7426 + 1.7427 + ins_variable_size_depending_on_alignment(true); 1.7428 + 1.7429 + format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %} 1.7430 + // Worst case is branch + move + stop, no stop without scheduler. 1.7431 + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); 1.7432 + ins_encode %{ 1.7433 + // TODO: PPC port $archOpcode(ppc64Opcode_cmovef); 1.7434 + Label done; 1.7435 + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); 1.7436 + // Branch if not (cmp crx). 1.7437 + __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done); 1.7438 + __ fmr($dst$$FloatRegister, $src$$FloatRegister); 1.7439 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.7440 + __ bind(done); 1.7441 + %} 1.7442 + ins_pipe(pipe_class_default); 1.7443 +%} 1.7444 + 1.7445 +instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{ 1.7446 + match(Set dst (CMoveD (Binary cmp crx) (Binary dst src))); 1.7447 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.7448 + 1.7449 + ins_variable_size_depending_on_alignment(true); 1.7450 + 1.7451 + format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %} 1.7452 + // Worst case is branch + move + stop, no stop without scheduler. 1.7453 + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); 1.7454 + ins_encode %{ 1.7455 + // TODO: PPC port $archOpcode(ppc64Opcode_cmovef); 1.7456 + Label done; 1.7457 + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); 1.7458 + // Branch if not (cmp crx). 1.7459 + __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done); 1.7460 + __ fmr($dst$$FloatRegister, $src$$FloatRegister); 1.7461 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.7462 + __ bind(done); 1.7463 + %} 1.7464 + ins_pipe(pipe_class_default); 1.7465 +%} 1.7466 + 1.7467 +//----------Conditional_store-------------------------------------------------- 1.7468 +// Conditional-store of the updated heap-top. 1.7469 +// Used during allocation of the shared heap. 1.7470 +// Sets flags (EQ) on success. Implemented with a CASA on Sparc. 1.7471 + 1.7472 +// As compareAndSwapL, but return flag register instead of boolean value in 1.7473 +// int register. 1.7474 +// Used by sun/misc/AtomicLongCSImpl.java. 1.7475 +// Mem_ptr must be a memory operand, else this node does not get 1.7476 +// Flag_needs_anti_dependence_check set by adlc. If this is not set this node 1.7477 +// can be rematerialized which leads to errors. 1.7478 +instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{ 1.7479 + match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal))); 1.7480 + format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} 1.7481 + ins_encode %{ 1.7482 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7483 + __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register, 1.7484 + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.7485 + noreg, NULL, true); 1.7486 + %} 1.7487 + ins_pipe(pipe_class_default); 1.7488 +%} 1.7489 + 1.7490 +// As compareAndSwapP, but return flag register instead of boolean value in 1.7491 +// int register. 1.7492 +// This instruction is matched if UseTLAB is off. 1.7493 +// Mem_ptr must be a memory operand, else this node does not get 1.7494 +// Flag_needs_anti_dependence_check set by adlc. If this is not set this node 1.7495 +// can be rematerialized which leads to errors. 1.7496 +instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{ 1.7497 + match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal))); 1.7498 + format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} 1.7499 + ins_encode %{ 1.7500 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7501 + __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register, 1.7502 + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.7503 + noreg, NULL, true); 1.7504 + %} 1.7505 + ins_pipe(pipe_class_default); 1.7506 +%} 1.7507 + 1.7508 +// Implement LoadPLocked. Must be ordered against changes of the memory location 1.7509 +// by storePConditional. 1.7510 +// Don't know whether this is ever used. 1.7511 +instruct loadPLocked(iRegPdst dst, memory mem) %{ 1.7512 + match(Set dst (LoadPLocked mem)); 1.7513 + ins_cost(MEMORY_REF_COST); 1.7514 + 1.7515 + format %{ "LD $dst, $mem \t// loadPLocked\n\t" 1.7516 + "TWI $dst\n\t" 1.7517 + "ISYNC" %} 1.7518 + size(12); 1.7519 + ins_encode( enc_ld_ac(dst, mem) ); 1.7520 + ins_pipe(pipe_class_memory); 1.7521 +%} 1.7522 + 1.7523 +//----------Compare-And-Swap--------------------------------------------------- 1.7524 + 1.7525 +// CompareAndSwap{P,I,L} have more than one output, therefore "CmpI 1.7526 +// (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))" cannot be 1.7527 +// matched. 1.7528 + 1.7529 +instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{ 1.7530 + match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2))); 1.7531 + format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} 1.7532 + // Variable size: instruction count smaller if regs are disjoint. 1.7533 + ins_encode %{ 1.7534 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7535 + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. 1.7536 + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, 1.7537 + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.7538 + $res$$Register, true); 1.7539 + %} 1.7540 + ins_pipe(pipe_class_default); 1.7541 +%} 1.7542 + 1.7543 +instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{ 1.7544 + match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2))); 1.7545 + format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} 1.7546 + // Variable size: instruction count smaller if regs are disjoint. 1.7547 + ins_encode %{ 1.7548 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7549 + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. 1.7550 + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, 1.7551 + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.7552 + $res$$Register, true); 1.7553 + %} 1.7554 + ins_pipe(pipe_class_default); 1.7555 +%} 1.7556 + 1.7557 +instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{ 1.7558 + match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2))); 1.7559 + format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %} 1.7560 + // Variable size: instruction count smaller if regs are disjoint. 1.7561 + ins_encode %{ 1.7562 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7563 + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. 1.7564 + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, 1.7565 + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.7566 + $res$$Register, NULL, true); 1.7567 + %} 1.7568 + ins_pipe(pipe_class_default); 1.7569 +%} 1.7570 + 1.7571 +instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{ 1.7572 + match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2))); 1.7573 + format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %} 1.7574 + // Variable size: instruction count smaller if regs are disjoint. 1.7575 + ins_encode %{ 1.7576 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.7577 + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. 1.7578 + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, 1.7579 + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.7580 + $res$$Register, NULL, true); 1.7581 + %} 1.7582 + ins_pipe(pipe_class_default); 1.7583 +%} 1.7584 + 1.7585 +instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ 1.7586 + match(Set res (GetAndAddI mem_ptr src)); 1.7587 + format %{ "GetAndAddI $res, $mem_ptr, $src" %} 1.7588 + // Variable size: instruction count smaller if regs are disjoint. 1.7589 + ins_encode( enc_GetAndAddI(res, mem_ptr, src) ); 1.7590 + ins_pipe(pipe_class_default); 1.7591 +%} 1.7592 + 1.7593 +instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ 1.7594 + match(Set res (GetAndAddL mem_ptr src)); 1.7595 + format %{ "GetAndAddL $res, $mem_ptr, $src" %} 1.7596 + // Variable size: instruction count smaller if regs are disjoint. 1.7597 + ins_encode( enc_GetAndAddL(res, mem_ptr, src) ); 1.7598 + ins_pipe(pipe_class_default); 1.7599 +%} 1.7600 + 1.7601 +instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ 1.7602 + match(Set res (GetAndSetI mem_ptr src)); 1.7603 + format %{ "GetAndSetI $res, $mem_ptr, $src" %} 1.7604 + // Variable size: instruction count smaller if regs are disjoint. 1.7605 + ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); 1.7606 + ins_pipe(pipe_class_default); 1.7607 +%} 1.7608 + 1.7609 +instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ 1.7610 + match(Set res (GetAndSetL mem_ptr src)); 1.7611 + format %{ "GetAndSetL $res, $mem_ptr, $src" %} 1.7612 + // Variable size: instruction count smaller if regs are disjoint. 1.7613 + ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); 1.7614 + ins_pipe(pipe_class_default); 1.7615 +%} 1.7616 + 1.7617 +instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{ 1.7618 + match(Set res (GetAndSetP mem_ptr src)); 1.7619 + format %{ "GetAndSetP $res, $mem_ptr, $src" %} 1.7620 + // Variable size: instruction count smaller if regs are disjoint. 1.7621 + ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); 1.7622 + ins_pipe(pipe_class_default); 1.7623 +%} 1.7624 + 1.7625 +instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{ 1.7626 + match(Set res (GetAndSetN mem_ptr src)); 1.7627 + format %{ "GetAndSetN $res, $mem_ptr, $src" %} 1.7628 + // Variable size: instruction count smaller if regs are disjoint. 1.7629 + ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); 1.7630 + ins_pipe(pipe_class_default); 1.7631 +%} 1.7632 + 1.7633 +//----------Arithmetic Instructions-------------------------------------------- 1.7634 +// Addition Instructions 1.7635 + 1.7636 +// Register Addition 1.7637 +instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{ 1.7638 + match(Set dst (AddI src1 src2)); 1.7639 + format %{ "ADD $dst, $src1, $src2" %} 1.7640 + size(4); 1.7641 + ins_encode %{ 1.7642 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.7643 + __ add($dst$$Register, $src1$$Register, $src2$$Register); 1.7644 + %} 1.7645 + ins_pipe(pipe_class_default); 1.7646 +%} 1.7647 + 1.7648 +// Expand does not work with above instruct. (??) 1.7649 +instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.7650 + // no match-rule 1.7651 + effect(DEF dst, USE src1, USE src2); 1.7652 + format %{ "ADD $dst, $src1, $src2" %} 1.7653 + size(4); 1.7654 + ins_encode %{ 1.7655 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.7656 + __ add($dst$$Register, $src1$$Register, $src2$$Register); 1.7657 + %} 1.7658 + ins_pipe(pipe_class_default); 1.7659 +%} 1.7660 + 1.7661 +instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{ 1.7662 + match(Set dst (AddI (AddI (AddI src1 src2) src3) src4)); 1.7663 + ins_cost(DEFAULT_COST*3); 1.7664 + 1.7665 + expand %{ 1.7666 + // FIXME: we should do this in the ideal world. 1.7667 + iRegIdst tmp1; 1.7668 + iRegIdst tmp2; 1.7669 + addI_reg_reg(tmp1, src1, src2); 1.7670 + addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg. 1.7671 + addI_reg_reg(dst, tmp1, tmp2); 1.7672 + %} 1.7673 +%} 1.7674 + 1.7675 +// Immediate Addition 1.7676 +instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ 1.7677 + match(Set dst (AddI src1 src2)); 1.7678 + format %{ "ADDI $dst, $src1, $src2" %} 1.7679 + size(4); 1.7680 + ins_encode %{ 1.7681 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.7682 + __ addi($dst$$Register, $src1$$Register, $src2$$constant); 1.7683 + %} 1.7684 + ins_pipe(pipe_class_default); 1.7685 +%} 1.7686 + 1.7687 +// Immediate Addition with 16-bit shifted operand 1.7688 +instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{ 1.7689 + match(Set dst (AddI src1 src2)); 1.7690 + format %{ "ADDIS $dst, $src1, $src2" %} 1.7691 + size(4); 1.7692 + ins_encode %{ 1.7693 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.7694 + __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16); 1.7695 + %} 1.7696 + ins_pipe(pipe_class_default); 1.7697 +%} 1.7698 + 1.7699 +// Long Addition 1.7700 +instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.7701 + match(Set dst (AddL src1 src2)); 1.7702 + format %{ "ADD $dst, $src1, $src2 \t// long" %} 1.7703 + size(4); 1.7704 + ins_encode %{ 1.7705 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.7706 + __ add($dst$$Register, $src1$$Register, $src2$$Register); 1.7707 + %} 1.7708 + ins_pipe(pipe_class_default); 1.7709 +%} 1.7710 + 1.7711 +// Expand does not work with above instruct. (??) 1.7712 +instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.7713 + // no match-rule 1.7714 + effect(DEF dst, USE src1, USE src2); 1.7715 + format %{ "ADD $dst, $src1, $src2 \t// long" %} 1.7716 + size(4); 1.7717 + ins_encode %{ 1.7718 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.7719 + __ add($dst$$Register, $src1$$Register, $src2$$Register); 1.7720 + %} 1.7721 + ins_pipe(pipe_class_default); 1.7722 +%} 1.7723 + 1.7724 +instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{ 1.7725 + match(Set dst (AddL (AddL (AddL src1 src2) src3) src4)); 1.7726 + ins_cost(DEFAULT_COST*3); 1.7727 + 1.7728 + expand %{ 1.7729 + // FIXME: we should do this in the ideal world. 1.7730 + iRegLdst tmp1; 1.7731 + iRegLdst tmp2; 1.7732 + addL_reg_reg(tmp1, src1, src2); 1.7733 + addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg. 1.7734 + addL_reg_reg(dst, tmp1, tmp2); 1.7735 + %} 1.7736 +%} 1.7737 + 1.7738 +// AddL + ConvL2I. 1.7739 +instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.7740 + match(Set dst (ConvL2I (AddL src1 src2))); 1.7741 + 1.7742 + format %{ "ADD $dst, $src1, $src2 \t// long + l2i" %} 1.7743 + size(4); 1.7744 + ins_encode %{ 1.7745 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.7746 + __ add($dst$$Register, $src1$$Register, $src2$$Register); 1.7747 + %} 1.7748 + ins_pipe(pipe_class_default); 1.7749 +%} 1.7750 + 1.7751 +// No constant pool entries required. 1.7752 +instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ 1.7753 + match(Set dst (AddL src1 src2)); 1.7754 + 1.7755 + format %{ "ADDI $dst, $src1, $src2" %} 1.7756 + size(4); 1.7757 + ins_encode %{ 1.7758 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.7759 + __ addi($dst$$Register, $src1$$Register, $src2$$constant); 1.7760 + %} 1.7761 + ins_pipe(pipe_class_default); 1.7762 +%} 1.7763 + 1.7764 +// Long Immediate Addition with 16-bit shifted operand. 1.7765 +// No constant pool entries required. 1.7766 +instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{ 1.7767 + match(Set dst (AddL src1 src2)); 1.7768 + 1.7769 + format %{ "ADDIS $dst, $src1, $src2" %} 1.7770 + size(4); 1.7771 + ins_encode %{ 1.7772 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.7773 + __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16); 1.7774 + %} 1.7775 + ins_pipe(pipe_class_default); 1.7776 +%} 1.7777 + 1.7778 +// Pointer Register Addition 1.7779 +instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{ 1.7780 + match(Set dst (AddP src1 src2)); 1.7781 + format %{ "ADD $dst, $src1, $src2" %} 1.7782 + size(4); 1.7783 + ins_encode %{ 1.7784 + // TODO: PPC port $archOpcode(ppc64Opcode_add); 1.7785 + __ add($dst$$Register, $src1$$Register, $src2$$Register); 1.7786 + %} 1.7787 + ins_pipe(pipe_class_default); 1.7788 +%} 1.7789 + 1.7790 +// Pointer Immediate Addition 1.7791 +// No constant pool entries required. 1.7792 +instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{ 1.7793 + match(Set dst (AddP src1 src2)); 1.7794 + 1.7795 + format %{ "ADDI $dst, $src1, $src2" %} 1.7796 + size(4); 1.7797 + ins_encode %{ 1.7798 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.7799 + __ addi($dst$$Register, $src1$$Register, $src2$$constant); 1.7800 + %} 1.7801 + ins_pipe(pipe_class_default); 1.7802 +%} 1.7803 + 1.7804 +// Pointer Immediate Addition with 16-bit shifted operand. 1.7805 +// No constant pool entries required. 1.7806 +instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{ 1.7807 + match(Set dst (AddP src1 src2)); 1.7808 + 1.7809 + format %{ "ADDIS $dst, $src1, $src2" %} 1.7810 + size(4); 1.7811 + ins_encode %{ 1.7812 + // TODO: PPC port $archOpcode(ppc64Opcode_addis); 1.7813 + __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16); 1.7814 + %} 1.7815 + ins_pipe(pipe_class_default); 1.7816 +%} 1.7817 + 1.7818 +//--------------------- 1.7819 +// Subtraction Instructions 1.7820 + 1.7821 +// Register Subtraction 1.7822 +instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.7823 + match(Set dst (SubI src1 src2)); 1.7824 + format %{ "SUBF $dst, $src2, $src1" %} 1.7825 + size(4); 1.7826 + ins_encode %{ 1.7827 + // TODO: PPC port $archOpcode(ppc64Opcode_subf); 1.7828 + __ subf($dst$$Register, $src2$$Register, $src1$$Register); 1.7829 + %} 1.7830 + ins_pipe(pipe_class_default); 1.7831 +%} 1.7832 + 1.7833 +// Immediate Subtraction 1.7834 +// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal), 1.7835 +// so this rule seems to be unused. 1.7836 +instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ 1.7837 + match(Set dst (SubI src1 src2)); 1.7838 + format %{ "SUBI $dst, $src1, $src2" %} 1.7839 + size(4); 1.7840 + ins_encode %{ 1.7841 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.7842 + __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); 1.7843 + %} 1.7844 + ins_pipe(pipe_class_default); 1.7845 +%} 1.7846 + 1.7847 +// SubI from constant (using subfic). 1.7848 +instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{ 1.7849 + match(Set dst (SubI src1 src2)); 1.7850 + format %{ "SUBI $dst, $src1, $src2" %} 1.7851 + 1.7852 + size(4); 1.7853 + ins_encode %{ 1.7854 + // TODO: PPC port $archOpcode(ppc64Opcode_subfic); 1.7855 + __ subfic($dst$$Register, $src2$$Register, $src1$$constant); 1.7856 + %} 1.7857 + ins_pipe(pipe_class_default); 1.7858 +%} 1.7859 + 1.7860 +// Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for 1.7861 +// positive integers and 0xF...F for negative ones. 1.7862 +instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{ 1.7863 + // no match-rule, false predicate 1.7864 + effect(DEF dst, USE src); 1.7865 + predicate(false); 1.7866 + 1.7867 + format %{ "SRAWI $dst, $src, #31" %} 1.7868 + size(4); 1.7869 + ins_encode %{ 1.7870 + // TODO: PPC port $archOpcode(ppc64Opcode_srawi); 1.7871 + __ srawi($dst$$Register, $src$$Register, 0x1f); 1.7872 + %} 1.7873 + ins_pipe(pipe_class_default); 1.7874 +%} 1.7875 + 1.7876 +instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{ 1.7877 + match(Set dst (AbsI src)); 1.7878 + ins_cost(DEFAULT_COST*3); 1.7879 + 1.7880 + expand %{ 1.7881 + iRegIdst tmp1; 1.7882 + iRegIdst tmp2; 1.7883 + signmask32I_regI(tmp1, src); 1.7884 + xorI_reg_reg(tmp2, tmp1, src); 1.7885 + subI_reg_reg(dst, tmp2, tmp1); 1.7886 + %} 1.7887 +%} 1.7888 + 1.7889 +instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{ 1.7890 + match(Set dst (SubI zero src2)); 1.7891 + format %{ "NEG $dst, $src2" %} 1.7892 + size(4); 1.7893 + ins_encode %{ 1.7894 + // TODO: PPC port $archOpcode(ppc64Opcode_neg); 1.7895 + __ neg($dst$$Register, $src2$$Register); 1.7896 + %} 1.7897 + ins_pipe(pipe_class_default); 1.7898 +%} 1.7899 + 1.7900 +// Long subtraction 1.7901 +instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.7902 + match(Set dst (SubL src1 src2)); 1.7903 + format %{ "SUBF $dst, $src2, $src1 \t// long" %} 1.7904 + size(4); 1.7905 + ins_encode %{ 1.7906 + // TODO: PPC port $archOpcode(ppc64Opcode_subf); 1.7907 + __ subf($dst$$Register, $src2$$Register, $src1$$Register); 1.7908 + %} 1.7909 + ins_pipe(pipe_class_default); 1.7910 +%} 1.7911 + 1.7912 +// SubL + convL2I. 1.7913 +instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.7914 + match(Set dst (ConvL2I (SubL src1 src2))); 1.7915 + 1.7916 + format %{ "SUBF $dst, $src2, $src1 \t// long + l2i" %} 1.7917 + size(4); 1.7918 + ins_encode %{ 1.7919 + // TODO: PPC port $archOpcode(ppc64Opcode_subf); 1.7920 + __ subf($dst$$Register, $src2$$Register, $src1$$Register); 1.7921 + %} 1.7922 + ins_pipe(pipe_class_default); 1.7923 +%} 1.7924 + 1.7925 +// Immediate Subtraction 1.7926 +// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal), 1.7927 +// so this rule seems to be unused. 1.7928 +// No constant pool entries required. 1.7929 +instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ 1.7930 + match(Set dst (SubL src1 src2)); 1.7931 + 1.7932 + format %{ "SUBI $dst, $src1, $src2 \t// long" %} 1.7933 + size(4); 1.7934 + ins_encode %{ 1.7935 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.7936 + __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); 1.7937 + %} 1.7938 + ins_pipe(pipe_class_default); 1.7939 +%} 1.7940 + 1.7941 +// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for 1.7942 +// positive longs and 0xF...F for negative ones. 1.7943 +instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{ 1.7944 + // no match-rule, false predicate 1.7945 + effect(DEF dst, USE src); 1.7946 + predicate(false); 1.7947 + 1.7948 + format %{ "SRADI $dst, $src, #63" %} 1.7949 + size(4); 1.7950 + ins_encode %{ 1.7951 + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); 1.7952 + __ sradi($dst$$Register, $src$$Register, 0x3f); 1.7953 + %} 1.7954 + ins_pipe(pipe_class_default); 1.7955 +%} 1.7956 + 1.7957 +// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for 1.7958 +// positive longs and 0xF...F for negative ones. 1.7959 +instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{ 1.7960 + // no match-rule, false predicate 1.7961 + effect(DEF dst, USE src); 1.7962 + predicate(false); 1.7963 + 1.7964 + format %{ "SRADI $dst, $src, #63" %} 1.7965 + size(4); 1.7966 + ins_encode %{ 1.7967 + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); 1.7968 + __ sradi($dst$$Register, $src$$Register, 0x3f); 1.7969 + %} 1.7970 + ins_pipe(pipe_class_default); 1.7971 +%} 1.7972 + 1.7973 +// Long negation 1.7974 +instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{ 1.7975 + match(Set dst (SubL zero src2)); 1.7976 + format %{ "NEG $dst, $src2 \t// long" %} 1.7977 + size(4); 1.7978 + ins_encode %{ 1.7979 + // TODO: PPC port $archOpcode(ppc64Opcode_neg); 1.7980 + __ neg($dst$$Register, $src2$$Register); 1.7981 + %} 1.7982 + ins_pipe(pipe_class_default); 1.7983 +%} 1.7984 + 1.7985 +// NegL + ConvL2I. 1.7986 +instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{ 1.7987 + match(Set dst (ConvL2I (SubL zero src2))); 1.7988 + 1.7989 + format %{ "NEG $dst, $src2 \t// long + l2i" %} 1.7990 + size(4); 1.7991 + ins_encode %{ 1.7992 + // TODO: PPC port $archOpcode(ppc64Opcode_neg); 1.7993 + __ neg($dst$$Register, $src2$$Register); 1.7994 + %} 1.7995 + ins_pipe(pipe_class_default); 1.7996 +%} 1.7997 + 1.7998 +// Multiplication Instructions 1.7999 +// Integer Multiplication 1.8000 + 1.8001 +// Register Multiplication 1.8002 +instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8003 + match(Set dst (MulI src1 src2)); 1.8004 + ins_cost(DEFAULT_COST); 1.8005 + 1.8006 + format %{ "MULLW $dst, $src1, $src2" %} 1.8007 + size(4); 1.8008 + ins_encode %{ 1.8009 + // TODO: PPC port $archOpcode(ppc64Opcode_mullw); 1.8010 + __ mullw($dst$$Register, $src1$$Register, $src2$$Register); 1.8011 + %} 1.8012 + ins_pipe(pipe_class_default); 1.8013 +%} 1.8014 + 1.8015 +// Immediate Multiplication 1.8016 +instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ 1.8017 + match(Set dst (MulI src1 src2)); 1.8018 + ins_cost(DEFAULT_COST); 1.8019 + 1.8020 + format %{ "MULLI $dst, $src1, $src2" %} 1.8021 + size(4); 1.8022 + ins_encode %{ 1.8023 + // TODO: PPC port $archOpcode(ppc64Opcode_mulli); 1.8024 + __ mulli($dst$$Register, $src1$$Register, $src2$$constant); 1.8025 + %} 1.8026 + ins_pipe(pipe_class_default); 1.8027 +%} 1.8028 + 1.8029 +instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.8030 + match(Set dst (MulL src1 src2)); 1.8031 + ins_cost(DEFAULT_COST); 1.8032 + 1.8033 + format %{ "MULLD $dst $src1, $src2 \t// long" %} 1.8034 + size(4); 1.8035 + ins_encode %{ 1.8036 + // TODO: PPC port $archOpcode(ppc64Opcode_mulld); 1.8037 + __ mulld($dst$$Register, $src1$$Register, $src2$$Register); 1.8038 + %} 1.8039 + ins_pipe(pipe_class_default); 1.8040 +%} 1.8041 + 1.8042 +// Multiply high for optimized long division by constant. 1.8043 +instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.8044 + match(Set dst (MulHiL src1 src2)); 1.8045 + ins_cost(DEFAULT_COST); 1.8046 + 1.8047 + format %{ "MULHD $dst $src1, $src2 \t// long" %} 1.8048 + size(4); 1.8049 + ins_encode %{ 1.8050 + // TODO: PPC port $archOpcode(ppc64Opcode_mulhd); 1.8051 + __ mulhd($dst$$Register, $src1$$Register, $src2$$Register); 1.8052 + %} 1.8053 + ins_pipe(pipe_class_default); 1.8054 +%} 1.8055 + 1.8056 +// Immediate Multiplication 1.8057 +instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ 1.8058 + match(Set dst (MulL src1 src2)); 1.8059 + ins_cost(DEFAULT_COST); 1.8060 + 1.8061 + format %{ "MULLI $dst, $src1, $src2" %} 1.8062 + size(4); 1.8063 + ins_encode %{ 1.8064 + // TODO: PPC port $archOpcode(ppc64Opcode_mulli); 1.8065 + __ mulli($dst$$Register, $src1$$Register, $src2$$constant); 1.8066 + %} 1.8067 + ins_pipe(pipe_class_default); 1.8068 +%} 1.8069 + 1.8070 +// Integer Division with Immediate -1: Negate. 1.8071 +instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{ 1.8072 + match(Set dst (DivI src1 src2)); 1.8073 + ins_cost(DEFAULT_COST); 1.8074 + 1.8075 + format %{ "NEG $dst, $src1 \t// /-1" %} 1.8076 + size(4); 1.8077 + ins_encode %{ 1.8078 + // TODO: PPC port $archOpcode(ppc64Opcode_neg); 1.8079 + __ neg($dst$$Register, $src1$$Register); 1.8080 + %} 1.8081 + ins_pipe(pipe_class_default); 1.8082 +%} 1.8083 + 1.8084 +// Integer Division with constant, but not -1. 1.8085 +// We should be able to improve this by checking the type of src2. 1.8086 +// It might well be that src2 is known to be positive. 1.8087 +instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8088 + match(Set dst (DivI src1 src2)); 1.8089 + predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1 1.8090 + ins_cost(2*DEFAULT_COST); 1.8091 + 1.8092 + format %{ "DIVW $dst, $src1, $src2 \t// /not-1" %} 1.8093 + size(4); 1.8094 + ins_encode %{ 1.8095 + // TODO: PPC port $archOpcode(ppc64Opcode_divw); 1.8096 + __ divw($dst$$Register, $src1$$Register, $src2$$Register); 1.8097 + %} 1.8098 + ins_pipe(pipe_class_default); 1.8099 +%} 1.8100 + 1.8101 +instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{ 1.8102 + effect(USE_DEF dst, USE src1, USE crx); 1.8103 + predicate(false); 1.8104 + 1.8105 + ins_variable_size_depending_on_alignment(true); 1.8106 + 1.8107 + format %{ "CMOVE $dst, neg($src1), $crx" %} 1.8108 + // Worst case is branch + move + stop, no stop without scheduler. 1.8109 + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); 1.8110 + ins_encode %{ 1.8111 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.8112 + Label done; 1.8113 + __ bne($crx$$CondRegister, done); 1.8114 + __ neg($dst$$Register, $src1$$Register); 1.8115 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.8116 + __ bind(done); 1.8117 + %} 1.8118 + ins_pipe(pipe_class_default); 1.8119 +%} 1.8120 + 1.8121 +// Integer Division with Registers not containing constants. 1.8122 +instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8123 + match(Set dst (DivI src1 src2)); 1.8124 + ins_cost(10*DEFAULT_COST); 1.8125 + 1.8126 + expand %{ 1.8127 + immI16 imm %{ (int)-1 %} 1.8128 + flagsReg tmp1; 1.8129 + cmpI_reg_imm16(tmp1, src2, imm); // check src2 == -1 1.8130 + divI_reg_regnotMinus1(dst, src1, src2); // dst = src1 / src2 1.8131 + cmovI_bne_negI_reg(dst, tmp1, src1); // cmove dst = neg(src1) if src2 == -1 1.8132 + %} 1.8133 +%} 1.8134 + 1.8135 +// Long Division with Immediate -1: Negate. 1.8136 +instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{ 1.8137 + match(Set dst (DivL src1 src2)); 1.8138 + ins_cost(DEFAULT_COST); 1.8139 + 1.8140 + format %{ "NEG $dst, $src1 \t// /-1, long" %} 1.8141 + size(4); 1.8142 + ins_encode %{ 1.8143 + // TODO: PPC port $archOpcode(ppc64Opcode_neg); 1.8144 + __ neg($dst$$Register, $src1$$Register); 1.8145 + %} 1.8146 + ins_pipe(pipe_class_default); 1.8147 +%} 1.8148 + 1.8149 +// Long Division with constant, but not -1. 1.8150 +instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.8151 + match(Set dst (DivL src1 src2)); 1.8152 + predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1. 1.8153 + ins_cost(2*DEFAULT_COST); 1.8154 + 1.8155 + format %{ "DIVD $dst, $src1, $src2 \t// /not-1, long" %} 1.8156 + size(4); 1.8157 + ins_encode %{ 1.8158 + // TODO: PPC port $archOpcode(ppc64Opcode_divd); 1.8159 + __ divd($dst$$Register, $src1$$Register, $src2$$Register); 1.8160 + %} 1.8161 + ins_pipe(pipe_class_default); 1.8162 +%} 1.8163 + 1.8164 +instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{ 1.8165 + effect(USE_DEF dst, USE src1, USE crx); 1.8166 + predicate(false); 1.8167 + 1.8168 + ins_variable_size_depending_on_alignment(true); 1.8169 + 1.8170 + format %{ "CMOVE $dst, neg($src1), $crx" %} 1.8171 + // Worst case is branch + move + stop, no stop without scheduler. 1.8172 + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); 1.8173 + ins_encode %{ 1.8174 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.8175 + Label done; 1.8176 + __ bne($crx$$CondRegister, done); 1.8177 + __ neg($dst$$Register, $src1$$Register); 1.8178 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.8179 + __ bind(done); 1.8180 + %} 1.8181 + ins_pipe(pipe_class_default); 1.8182 +%} 1.8183 + 1.8184 +// Long Division with Registers not containing constants. 1.8185 +instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.8186 + match(Set dst (DivL src1 src2)); 1.8187 + ins_cost(10*DEFAULT_COST); 1.8188 + 1.8189 + expand %{ 1.8190 + immL16 imm %{ (int)-1 %} 1.8191 + flagsReg tmp1; 1.8192 + cmpL_reg_imm16(tmp1, src2, imm); // check src2 == -1 1.8193 + divL_reg_regnotMinus1(dst, src1, src2); // dst = src1 / src2 1.8194 + cmovL_bne_negL_reg(dst, tmp1, src1); // cmove dst = neg(src1) if src2 == -1 1.8195 + %} 1.8196 +%} 1.8197 + 1.8198 +// Integer Remainder with registers. 1.8199 +instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8200 + match(Set dst (ModI src1 src2)); 1.8201 + ins_cost(10*DEFAULT_COST); 1.8202 + 1.8203 + expand %{ 1.8204 + immI16 imm %{ (int)-1 %} 1.8205 + flagsReg tmp1; 1.8206 + iRegIdst tmp2; 1.8207 + iRegIdst tmp3; 1.8208 + cmpI_reg_imm16(tmp1, src2, imm); // check src2 == -1 1.8209 + divI_reg_regnotMinus1(tmp2, src1, src2); // tmp2 = src1 / src2 1.8210 + cmovI_bne_negI_reg(tmp2, tmp1, src1); // cmove tmp2 = neg(src1) if src2 == -1 1.8211 + mulI_reg_reg(tmp3, src2, tmp2); // tmp3 = src2 * tmp2 1.8212 + subI_reg_reg(dst, src1, tmp3); // dst = src1 - tmp3 1.8213 + %} 1.8214 +%} 1.8215 + 1.8216 +// Long Remainder with registers 1.8217 +instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ 1.8218 + match(Set dst (ModL src1 src2)); 1.8219 + ins_cost(10*DEFAULT_COST); 1.8220 + 1.8221 + expand %{ 1.8222 + immL16 imm %{ (int)-1 %} 1.8223 + flagsReg tmp1; 1.8224 + iRegLdst tmp2; 1.8225 + iRegLdst tmp3; 1.8226 + cmpL_reg_imm16(tmp1, src2, imm); // check src2 == -1 1.8227 + divL_reg_regnotMinus1(tmp2, src1, src2); // tmp2 = src1 / src2 1.8228 + cmovL_bne_negL_reg(tmp2, tmp1, src1); // cmove tmp2 = neg(src1) if src2 == -1 1.8229 + mulL_reg_reg(tmp3, src2, tmp2); // tmp3 = src2 * tmp2 1.8230 + subL_reg_reg(dst, src1, tmp3); // dst = src1 - tmp3 1.8231 + %} 1.8232 +%} 1.8233 + 1.8234 +// Integer Shift Instructions 1.8235 + 1.8236 +// Register Shift Left 1.8237 + 1.8238 +// Clear all but the lowest #mask bits. 1.8239 +// Used to normalize shift amounts in registers. 1.8240 +instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{ 1.8241 + // no match-rule, false predicate 1.8242 + effect(DEF dst, USE src, USE mask); 1.8243 + predicate(false); 1.8244 + 1.8245 + format %{ "MASK $dst, $src, $mask \t// clear $mask upper bits" %} 1.8246 + size(4); 1.8247 + ins_encode %{ 1.8248 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.8249 + __ clrldi($dst$$Register, $src$$Register, $mask$$constant); 1.8250 + %} 1.8251 + ins_pipe(pipe_class_default); 1.8252 +%} 1.8253 + 1.8254 +instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8255 + // no match-rule, false predicate 1.8256 + effect(DEF dst, USE src1, USE src2); 1.8257 + predicate(false); 1.8258 + 1.8259 + format %{ "SLW $dst, $src1, $src2" %} 1.8260 + size(4); 1.8261 + ins_encode %{ 1.8262 + // TODO: PPC port $archOpcode(ppc64Opcode_slw); 1.8263 + __ slw($dst$$Register, $src1$$Register, $src2$$Register); 1.8264 + %} 1.8265 + ins_pipe(pipe_class_default); 1.8266 +%} 1.8267 + 1.8268 +instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8269 + match(Set dst (LShiftI src1 src2)); 1.8270 + ins_cost(DEFAULT_COST*2); 1.8271 + expand %{ 1.8272 + uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %} 1.8273 + iRegIdst tmpI; 1.8274 + maskI_reg_imm(tmpI, src2, mask); 1.8275 + lShiftI_reg_reg(dst, src1, tmpI); 1.8276 + %} 1.8277 +%} 1.8278 + 1.8279 +// Register Shift Left Immediate 1.8280 +instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{ 1.8281 + match(Set dst (LShiftI src1 src2)); 1.8282 + 1.8283 + format %{ "SLWI $dst, $src1, ($src2 & 0x1f)" %} 1.8284 + size(4); 1.8285 + ins_encode %{ 1.8286 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); 1.8287 + __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f); 1.8288 + %} 1.8289 + ins_pipe(pipe_class_default); 1.8290 +%} 1.8291 + 1.8292 +// AndI with negpow2-constant + LShiftI 1.8293 +instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{ 1.8294 + match(Set dst (LShiftI (AndI src1 src2) src3)); 1.8295 + predicate(UseRotateAndMaskInstructionsPPC64); 1.8296 + 1.8297 + format %{ "RLWINM $dst, lShiftI(AndI($src1, $src2), $src3)" %} 1.8298 + size(4); 1.8299 + ins_encode %{ 1.8300 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi 1.8301 + long src2 = $src2$$constant; 1.8302 + long src3 = $src3$$constant; 1.8303 + long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2); 1.8304 + if (maskbits >= 32) { 1.8305 + __ li($dst$$Register, 0); // addi 1.8306 + } else { 1.8307 + __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f); 1.8308 + } 1.8309 + %} 1.8310 + ins_pipe(pipe_class_default); 1.8311 +%} 1.8312 + 1.8313 +// RShiftI + AndI with negpow2-constant + LShiftI 1.8314 +instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{ 1.8315 + match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3)); 1.8316 + predicate(UseRotateAndMaskInstructionsPPC64); 1.8317 + 1.8318 + format %{ "RLWINM $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %} 1.8319 + size(4); 1.8320 + ins_encode %{ 1.8321 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi 1.8322 + long src2 = $src2$$constant; 1.8323 + long src3 = $src3$$constant; 1.8324 + long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2); 1.8325 + if (maskbits >= 32) { 1.8326 + __ li($dst$$Register, 0); // addi 1.8327 + } else { 1.8328 + __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f); 1.8329 + } 1.8330 + %} 1.8331 + ins_pipe(pipe_class_default); 1.8332 +%} 1.8333 + 1.8334 +instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ 1.8335 + // no match-rule, false predicate 1.8336 + effect(DEF dst, USE src1, USE src2); 1.8337 + predicate(false); 1.8338 + 1.8339 + format %{ "SLD $dst, $src1, $src2" %} 1.8340 + size(4); 1.8341 + ins_encode %{ 1.8342 + // TODO: PPC port $archOpcode(ppc64Opcode_sld); 1.8343 + __ sld($dst$$Register, $src1$$Register, $src2$$Register); 1.8344 + %} 1.8345 + ins_pipe(pipe_class_default); 1.8346 +%} 1.8347 + 1.8348 +// Register Shift Left 1.8349 +instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ 1.8350 + match(Set dst (LShiftL src1 src2)); 1.8351 + ins_cost(DEFAULT_COST*2); 1.8352 + expand %{ 1.8353 + uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %} 1.8354 + iRegIdst tmpI; 1.8355 + maskI_reg_imm(tmpI, src2, mask); 1.8356 + lShiftL_regL_regI(dst, src1, tmpI); 1.8357 + %} 1.8358 +%} 1.8359 + 1.8360 +// Register Shift Left Immediate 1.8361 +instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{ 1.8362 + match(Set dst (LShiftL src1 src2)); 1.8363 + format %{ "SLDI $dst, $src1, ($src2 & 0x3f)" %} 1.8364 + size(4); 1.8365 + ins_encode %{ 1.8366 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.8367 + __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); 1.8368 + %} 1.8369 + ins_pipe(pipe_class_default); 1.8370 +%} 1.8371 + 1.8372 +// If we shift more than 32 bits, we need not convert I2L. 1.8373 +instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{ 1.8374 + match(Set dst (LShiftL (ConvI2L src1) src2)); 1.8375 + ins_cost(DEFAULT_COST); 1.8376 + 1.8377 + size(4); 1.8378 + format %{ "SLDI $dst, i2l($src1), $src2" %} 1.8379 + ins_encode %{ 1.8380 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.8381 + __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); 1.8382 + %} 1.8383 + ins_pipe(pipe_class_default); 1.8384 +%} 1.8385 + 1.8386 +// Shift a postivie int to the left. 1.8387 +// Clrlsldi clears the upper 32 bits and shifts. 1.8388 +instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{ 1.8389 + match(Set dst (LShiftL (ConvI2L src1) src2)); 1.8390 + predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int()); 1.8391 + 1.8392 + format %{ "SLDI $dst, i2l(positive_int($src1)), $src2" %} 1.8393 + size(4); 1.8394 + ins_encode %{ 1.8395 + // TODO: PPC port $archOpcode(ppc64Opcode_rldic); 1.8396 + __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant); 1.8397 + %} 1.8398 + ins_pipe(pipe_class_default); 1.8399 +%} 1.8400 + 1.8401 +instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8402 + // no match-rule, false predicate 1.8403 + effect(DEF dst, USE src1, USE src2); 1.8404 + predicate(false); 1.8405 + 1.8406 + format %{ "SRAW $dst, $src1, $src2" %} 1.8407 + size(4); 1.8408 + ins_encode %{ 1.8409 + // TODO: PPC port $archOpcode(ppc64Opcode_sraw); 1.8410 + __ sraw($dst$$Register, $src1$$Register, $src2$$Register); 1.8411 + %} 1.8412 + ins_pipe(pipe_class_default); 1.8413 +%} 1.8414 + 1.8415 +// Register Arithmetic Shift Right 1.8416 +instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8417 + match(Set dst (RShiftI src1 src2)); 1.8418 + ins_cost(DEFAULT_COST*2); 1.8419 + expand %{ 1.8420 + uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %} 1.8421 + iRegIdst tmpI; 1.8422 + maskI_reg_imm(tmpI, src2, mask); 1.8423 + arShiftI_reg_reg(dst, src1, tmpI); 1.8424 + %} 1.8425 +%} 1.8426 + 1.8427 +// Register Arithmetic Shift Right Immediate 1.8428 +instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{ 1.8429 + match(Set dst (RShiftI src1 src2)); 1.8430 + 1.8431 + format %{ "SRAWI $dst, $src1, ($src2 & 0x1f)" %} 1.8432 + size(4); 1.8433 + ins_encode %{ 1.8434 + // TODO: PPC port $archOpcode(ppc64Opcode_srawi); 1.8435 + __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f); 1.8436 + %} 1.8437 + ins_pipe(pipe_class_default); 1.8438 +%} 1.8439 + 1.8440 +instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ 1.8441 + // no match-rule, false predicate 1.8442 + effect(DEF dst, USE src1, USE src2); 1.8443 + predicate(false); 1.8444 + 1.8445 + format %{ "SRAD $dst, $src1, $src2" %} 1.8446 + size(4); 1.8447 + ins_encode %{ 1.8448 + // TODO: PPC port $archOpcode(ppc64Opcode_srad); 1.8449 + __ srad($dst$$Register, $src1$$Register, $src2$$Register); 1.8450 + %} 1.8451 + ins_pipe(pipe_class_default); 1.8452 +%} 1.8453 + 1.8454 +// Register Shift Right Arithmetic Long 1.8455 +instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ 1.8456 + match(Set dst (RShiftL src1 src2)); 1.8457 + ins_cost(DEFAULT_COST*2); 1.8458 + 1.8459 + expand %{ 1.8460 + uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %} 1.8461 + iRegIdst tmpI; 1.8462 + maskI_reg_imm(tmpI, src2, mask); 1.8463 + arShiftL_regL_regI(dst, src1, tmpI); 1.8464 + %} 1.8465 +%} 1.8466 + 1.8467 +// Register Shift Right Immediate 1.8468 +instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{ 1.8469 + match(Set dst (RShiftL src1 src2)); 1.8470 + 1.8471 + format %{ "SRADI $dst, $src1, ($src2 & 0x3f)" %} 1.8472 + size(4); 1.8473 + ins_encode %{ 1.8474 + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); 1.8475 + __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); 1.8476 + %} 1.8477 + ins_pipe(pipe_class_default); 1.8478 +%} 1.8479 + 1.8480 +// RShiftL + ConvL2I 1.8481 +instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{ 1.8482 + match(Set dst (ConvL2I (RShiftL src1 src2))); 1.8483 + 1.8484 + format %{ "SRADI $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %} 1.8485 + size(4); 1.8486 + ins_encode %{ 1.8487 + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); 1.8488 + __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); 1.8489 + %} 1.8490 + ins_pipe(pipe_class_default); 1.8491 +%} 1.8492 + 1.8493 +instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8494 + // no match-rule, false predicate 1.8495 + effect(DEF dst, USE src1, USE src2); 1.8496 + predicate(false); 1.8497 + 1.8498 + format %{ "SRW $dst, $src1, $src2" %} 1.8499 + size(4); 1.8500 + ins_encode %{ 1.8501 + // TODO: PPC port $archOpcode(ppc64Opcode_srw); 1.8502 + __ srw($dst$$Register, $src1$$Register, $src2$$Register); 1.8503 + %} 1.8504 + ins_pipe(pipe_class_default); 1.8505 +%} 1.8506 + 1.8507 +// Register Shift Right 1.8508 +instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8509 + match(Set dst (URShiftI src1 src2)); 1.8510 + ins_cost(DEFAULT_COST*2); 1.8511 + 1.8512 + expand %{ 1.8513 + uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %} 1.8514 + iRegIdst tmpI; 1.8515 + maskI_reg_imm(tmpI, src2, mask); 1.8516 + urShiftI_reg_reg(dst, src1, tmpI); 1.8517 + %} 1.8518 +%} 1.8519 + 1.8520 +// Register Shift Right Immediate 1.8521 +instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{ 1.8522 + match(Set dst (URShiftI src1 src2)); 1.8523 + 1.8524 + format %{ "SRWI $dst, $src1, ($src2 & 0x1f)" %} 1.8525 + size(4); 1.8526 + ins_encode %{ 1.8527 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); 1.8528 + __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f); 1.8529 + %} 1.8530 + ins_pipe(pipe_class_default); 1.8531 +%} 1.8532 + 1.8533 +instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ 1.8534 + // no match-rule, false predicate 1.8535 + effect(DEF dst, USE src1, USE src2); 1.8536 + predicate(false); 1.8537 + 1.8538 + format %{ "SRD $dst, $src1, $src2" %} 1.8539 + size(4); 1.8540 + ins_encode %{ 1.8541 + // TODO: PPC port $archOpcode(ppc64Opcode_srd); 1.8542 + __ srd($dst$$Register, $src1$$Register, $src2$$Register); 1.8543 + %} 1.8544 + ins_pipe(pipe_class_default); 1.8545 +%} 1.8546 + 1.8547 +// Register Shift Right 1.8548 +instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ 1.8549 + match(Set dst (URShiftL src1 src2)); 1.8550 + ins_cost(DEFAULT_COST*2); 1.8551 + 1.8552 + expand %{ 1.8553 + uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %} 1.8554 + iRegIdst tmpI; 1.8555 + maskI_reg_imm(tmpI, src2, mask); 1.8556 + urShiftL_regL_regI(dst, src1, tmpI); 1.8557 + %} 1.8558 +%} 1.8559 + 1.8560 +// Register Shift Right Immediate 1.8561 +instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{ 1.8562 + match(Set dst (URShiftL src1 src2)); 1.8563 + 1.8564 + format %{ "SRDI $dst, $src1, ($src2 & 0x3f)" %} 1.8565 + size(4); 1.8566 + ins_encode %{ 1.8567 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.8568 + __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); 1.8569 + %} 1.8570 + ins_pipe(pipe_class_default); 1.8571 +%} 1.8572 + 1.8573 +// URShiftL + ConvL2I. 1.8574 +instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{ 1.8575 + match(Set dst (ConvL2I (URShiftL src1 src2))); 1.8576 + 1.8577 + format %{ "SRDI $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %} 1.8578 + size(4); 1.8579 + ins_encode %{ 1.8580 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.8581 + __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); 1.8582 + %} 1.8583 + ins_pipe(pipe_class_default); 1.8584 +%} 1.8585 + 1.8586 +// Register Shift Right Immediate with a CastP2X 1.8587 +instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{ 1.8588 + match(Set dst (URShiftL (CastP2X src1) src2)); 1.8589 + 1.8590 + format %{ "SRDI $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %} 1.8591 + size(4); 1.8592 + ins_encode %{ 1.8593 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.8594 + __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); 1.8595 + %} 1.8596 + ins_pipe(pipe_class_default); 1.8597 +%} 1.8598 + 1.8599 +instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{ 1.8600 + match(Set dst (ConvL2I (ConvI2L src))); 1.8601 + 1.8602 + format %{ "EXTSW $dst, $src \t// int->int" %} 1.8603 + size(4); 1.8604 + ins_encode %{ 1.8605 + // TODO: PPC port $archOpcode(ppc64Opcode_extsw); 1.8606 + __ extsw($dst$$Register, $src$$Register); 1.8607 + %} 1.8608 + ins_pipe(pipe_class_default); 1.8609 +%} 1.8610 + 1.8611 +//----------Rotate Instructions------------------------------------------------ 1.8612 + 1.8613 +// Rotate Left by 8-bit immediate 1.8614 +instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{ 1.8615 + match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); 1.8616 + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 1.8617 + 1.8618 + format %{ "ROTLWI $dst, $src, $lshift" %} 1.8619 + size(4); 1.8620 + ins_encode %{ 1.8621 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); 1.8622 + __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant); 1.8623 + %} 1.8624 + ins_pipe(pipe_class_default); 1.8625 +%} 1.8626 + 1.8627 +// Rotate Right by 8-bit immediate 1.8628 +instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{ 1.8629 + match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); 1.8630 + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 1.8631 + 1.8632 + format %{ "ROTRWI $dst, $rshift" %} 1.8633 + size(4); 1.8634 + ins_encode %{ 1.8635 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); 1.8636 + __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant); 1.8637 + %} 1.8638 + ins_pipe(pipe_class_default); 1.8639 +%} 1.8640 + 1.8641 +//----------Floating Point Arithmetic Instructions----------------------------- 1.8642 + 1.8643 +// Add float single precision 1.8644 +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1.8645 + match(Set dst (AddF src1 src2)); 1.8646 + 1.8647 + format %{ "FADDS $dst, $src1, $src2" %} 1.8648 + size(4); 1.8649 + ins_encode %{ 1.8650 + // TODO: PPC port $archOpcode(ppc64Opcode_fadds); 1.8651 + __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8652 + %} 1.8653 + ins_pipe(pipe_class_default); 1.8654 +%} 1.8655 + 1.8656 +// Add float double precision 1.8657 +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1.8658 + match(Set dst (AddD src1 src2)); 1.8659 + 1.8660 + format %{ "FADD $dst, $src1, $src2" %} 1.8661 + size(4); 1.8662 + ins_encode %{ 1.8663 + // TODO: PPC port $archOpcode(ppc64Opcode_fadd); 1.8664 + __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8665 + %} 1.8666 + ins_pipe(pipe_class_default); 1.8667 +%} 1.8668 + 1.8669 +// Sub float single precision 1.8670 +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1.8671 + match(Set dst (SubF src1 src2)); 1.8672 + 1.8673 + format %{ "FSUBS $dst, $src1, $src2" %} 1.8674 + size(4); 1.8675 + ins_encode %{ 1.8676 + // TODO: PPC port $archOpcode(ppc64Opcode_fsubs); 1.8677 + __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8678 + %} 1.8679 + ins_pipe(pipe_class_default); 1.8680 +%} 1.8681 + 1.8682 +// Sub float double precision 1.8683 +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1.8684 + match(Set dst (SubD src1 src2)); 1.8685 + format %{ "FSUB $dst, $src1, $src2" %} 1.8686 + size(4); 1.8687 + ins_encode %{ 1.8688 + // TODO: PPC port $archOpcode(ppc64Opcode_fsub); 1.8689 + __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8690 + %} 1.8691 + ins_pipe(pipe_class_default); 1.8692 +%} 1.8693 + 1.8694 +// Mul float single precision 1.8695 +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1.8696 + match(Set dst (MulF src1 src2)); 1.8697 + format %{ "FMULS $dst, $src1, $src2" %} 1.8698 + size(4); 1.8699 + ins_encode %{ 1.8700 + // TODO: PPC port $archOpcode(ppc64Opcode_fmuls); 1.8701 + __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8702 + %} 1.8703 + ins_pipe(pipe_class_default); 1.8704 +%} 1.8705 + 1.8706 +// Mul float double precision 1.8707 +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1.8708 + match(Set dst (MulD src1 src2)); 1.8709 + format %{ "FMUL $dst, $src1, $src2" %} 1.8710 + size(4); 1.8711 + ins_encode %{ 1.8712 + // TODO: PPC port $archOpcode(ppc64Opcode_fmul); 1.8713 + __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8714 + %} 1.8715 + ins_pipe(pipe_class_default); 1.8716 +%} 1.8717 + 1.8718 +// Div float single precision 1.8719 +instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1.8720 + match(Set dst (DivF src1 src2)); 1.8721 + format %{ "FDIVS $dst, $src1, $src2" %} 1.8722 + size(4); 1.8723 + ins_encode %{ 1.8724 + // TODO: PPC port $archOpcode(ppc64Opcode_fdivs); 1.8725 + __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8726 + %} 1.8727 + ins_pipe(pipe_class_default); 1.8728 +%} 1.8729 + 1.8730 +// Div float double precision 1.8731 +instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1.8732 + match(Set dst (DivD src1 src2)); 1.8733 + format %{ "FDIV $dst, $src1, $src2" %} 1.8734 + size(4); 1.8735 + ins_encode %{ 1.8736 + // TODO: PPC port $archOpcode(ppc64Opcode_fdiv); 1.8737 + __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.8738 + %} 1.8739 + ins_pipe(pipe_class_default); 1.8740 +%} 1.8741 + 1.8742 +// Absolute float single precision 1.8743 +instruct absF_reg(regF dst, regF src) %{ 1.8744 + match(Set dst (AbsF src)); 1.8745 + format %{ "FABS $dst, $src \t// float" %} 1.8746 + size(4); 1.8747 + ins_encode %{ 1.8748 + // TODO: PPC port $archOpcode(ppc64Opcode_fabs); 1.8749 + __ fabs($dst$$FloatRegister, $src$$FloatRegister); 1.8750 + %} 1.8751 + ins_pipe(pipe_class_default); 1.8752 +%} 1.8753 + 1.8754 +// Absolute float double precision 1.8755 +instruct absD_reg(regD dst, regD src) %{ 1.8756 + match(Set dst (AbsD src)); 1.8757 + format %{ "FABS $dst, $src \t// double" %} 1.8758 + size(4); 1.8759 + ins_encode %{ 1.8760 + // TODO: PPC port $archOpcode(ppc64Opcode_fabs); 1.8761 + __ fabs($dst$$FloatRegister, $src$$FloatRegister); 1.8762 + %} 1.8763 + ins_pipe(pipe_class_default); 1.8764 +%} 1.8765 + 1.8766 +instruct negF_reg(regF dst, regF src) %{ 1.8767 + match(Set dst (NegF src)); 1.8768 + format %{ "FNEG $dst, $src \t// float" %} 1.8769 + size(4); 1.8770 + ins_encode %{ 1.8771 + // TODO: PPC port $archOpcode(ppc64Opcode_fneg); 1.8772 + __ fneg($dst$$FloatRegister, $src$$FloatRegister); 1.8773 + %} 1.8774 + ins_pipe(pipe_class_default); 1.8775 +%} 1.8776 + 1.8777 +instruct negD_reg(regD dst, regD src) %{ 1.8778 + match(Set dst (NegD src)); 1.8779 + format %{ "FNEG $dst, $src \t// double" %} 1.8780 + size(4); 1.8781 + ins_encode %{ 1.8782 + // TODO: PPC port $archOpcode(ppc64Opcode_fneg); 1.8783 + __ fneg($dst$$FloatRegister, $src$$FloatRegister); 1.8784 + %} 1.8785 + ins_pipe(pipe_class_default); 1.8786 +%} 1.8787 + 1.8788 +// AbsF + NegF. 1.8789 +instruct negF_absF_reg(regF dst, regF src) %{ 1.8790 + match(Set dst (NegF (AbsF src))); 1.8791 + format %{ "FNABS $dst, $src \t// float" %} 1.8792 + size(4); 1.8793 + ins_encode %{ 1.8794 + // TODO: PPC port $archOpcode(ppc64Opcode_fnabs); 1.8795 + __ fnabs($dst$$FloatRegister, $src$$FloatRegister); 1.8796 + %} 1.8797 + ins_pipe(pipe_class_default); 1.8798 +%} 1.8799 + 1.8800 +// AbsD + NegD. 1.8801 +instruct negD_absD_reg(regD dst, regD src) %{ 1.8802 + match(Set dst (NegD (AbsD src))); 1.8803 + format %{ "FNABS $dst, $src \t// double" %} 1.8804 + size(4); 1.8805 + ins_encode %{ 1.8806 + // TODO: PPC port $archOpcode(ppc64Opcode_fnabs); 1.8807 + __ fnabs($dst$$FloatRegister, $src$$FloatRegister); 1.8808 + %} 1.8809 + ins_pipe(pipe_class_default); 1.8810 +%} 1.8811 + 1.8812 +// VM_Version::has_fsqrt() decides if this node will be used. 1.8813 +// Sqrt float double precision 1.8814 +instruct sqrtD_reg(regD dst, regD src) %{ 1.8815 + match(Set dst (SqrtD src)); 1.8816 + format %{ "FSQRT $dst, $src" %} 1.8817 + size(4); 1.8818 + ins_encode %{ 1.8819 + // TODO: PPC port $archOpcode(ppc64Opcode_fsqrt); 1.8820 + __ fsqrt($dst$$FloatRegister, $src$$FloatRegister); 1.8821 + %} 1.8822 + ins_pipe(pipe_class_default); 1.8823 +%} 1.8824 + 1.8825 +// Single-precision sqrt. 1.8826 +instruct sqrtF_reg(regF dst, regF src) %{ 1.8827 + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1.8828 + predicate(VM_Version::has_fsqrts()); 1.8829 + ins_cost(DEFAULT_COST); 1.8830 + 1.8831 + format %{ "FSQRTS $dst, $src" %} 1.8832 + size(4); 1.8833 + ins_encode %{ 1.8834 + // TODO: PPC port $archOpcode(ppc64Opcode_fsqrts); 1.8835 + __ fsqrts($dst$$FloatRegister, $src$$FloatRegister); 1.8836 + %} 1.8837 + ins_pipe(pipe_class_default); 1.8838 +%} 1.8839 + 1.8840 +instruct roundDouble_nop(regD dst) %{ 1.8841 + match(Set dst (RoundDouble dst)); 1.8842 + ins_cost(0); 1.8843 + 1.8844 + format %{ " -- \t// RoundDouble not needed - empty" %} 1.8845 + size(0); 1.8846 + // PPC results are already "rounded" (i.e., normal-format IEEE). 1.8847 + ins_encode( /*empty*/ ); 1.8848 + ins_pipe(pipe_class_default); 1.8849 +%} 1.8850 + 1.8851 +instruct roundFloat_nop(regF dst) %{ 1.8852 + match(Set dst (RoundFloat dst)); 1.8853 + ins_cost(0); 1.8854 + 1.8855 + format %{ " -- \t// RoundFloat not needed - empty" %} 1.8856 + size(0); 1.8857 + // PPC results are already "rounded" (i.e., normal-format IEEE). 1.8858 + ins_encode( /*empty*/ ); 1.8859 + ins_pipe(pipe_class_default); 1.8860 +%} 1.8861 + 1.8862 +//----------Logical Instructions----------------------------------------------- 1.8863 + 1.8864 +// And Instructions 1.8865 + 1.8866 +// Register And 1.8867 +instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.8868 + match(Set dst (AndI src1 src2)); 1.8869 + format %{ "AND $dst, $src1, $src2" %} 1.8870 + size(4); 1.8871 + ins_encode %{ 1.8872 + // TODO: PPC port $archOpcode(ppc64Opcode_and); 1.8873 + __ andr($dst$$Register, $src1$$Register, $src2$$Register); 1.8874 + %} 1.8875 + ins_pipe(pipe_class_default); 1.8876 +%} 1.8877 + 1.8878 +// Immediate And 1.8879 +instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{ 1.8880 + match(Set dst (AndI src1 src2)); 1.8881 + effect(KILL cr0); 1.8882 + 1.8883 + format %{ "ANDI $dst, $src1, $src2" %} 1.8884 + size(4); 1.8885 + ins_encode %{ 1.8886 + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); 1.8887 + // FIXME: avoid andi_ ? 1.8888 + __ andi_($dst$$Register, $src1$$Register, $src2$$constant); 1.8889 + %} 1.8890 + ins_pipe(pipe_class_default); 1.8891 +%} 1.8892 + 1.8893 +// Immediate And where the immediate is a negative power of 2. 1.8894 +instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{ 1.8895 + match(Set dst (AndI src1 src2)); 1.8896 + format %{ "ANDWI $dst, $src1, $src2" %} 1.8897 + size(4); 1.8898 + ins_encode %{ 1.8899 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.8900 + __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)(julong)(juint)-($src2$$constant))); 1.8901 + %} 1.8902 + ins_pipe(pipe_class_default); 1.8903 +%} 1.8904 + 1.8905 +instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{ 1.8906 + match(Set dst (AndI src1 src2)); 1.8907 + format %{ "ANDWI $dst, $src1, $src2" %} 1.8908 + size(4); 1.8909 + ins_encode %{ 1.8910 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.8911 + __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1))); 1.8912 + %} 1.8913 + ins_pipe(pipe_class_default); 1.8914 +%} 1.8915 + 1.8916 +instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{ 1.8917 + match(Set dst (AndI src1 src2)); 1.8918 + predicate(UseRotateAndMaskInstructionsPPC64); 1.8919 + format %{ "ANDWI $dst, $src1, $src2" %} 1.8920 + size(4); 1.8921 + ins_encode %{ 1.8922 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); 1.8923 + __ rlwinm($dst$$Register, $src1$$Register, 0, 1.8924 + (31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f); 1.8925 + %} 1.8926 + ins_pipe(pipe_class_default); 1.8927 +%} 1.8928 + 1.8929 +// Register And Long 1.8930 +instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.8931 + match(Set dst (AndL src1 src2)); 1.8932 + ins_cost(DEFAULT_COST); 1.8933 + 1.8934 + format %{ "AND $dst, $src1, $src2 \t// long" %} 1.8935 + size(4); 1.8936 + ins_encode %{ 1.8937 + // TODO: PPC port $archOpcode(ppc64Opcode_and); 1.8938 + __ andr($dst$$Register, $src1$$Register, $src2$$Register); 1.8939 + %} 1.8940 + ins_pipe(pipe_class_default); 1.8941 +%} 1.8942 + 1.8943 +// Immediate And long 1.8944 +instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{ 1.8945 + match(Set dst (AndL src1 src2)); 1.8946 + effect(KILL cr0); 1.8947 + ins_cost(DEFAULT_COST); 1.8948 + 1.8949 + format %{ "ANDI $dst, $src1, $src2 \t// long" %} 1.8950 + size(4); 1.8951 + ins_encode %{ 1.8952 + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); 1.8953 + // FIXME: avoid andi_ ? 1.8954 + __ andi_($dst$$Register, $src1$$Register, $src2$$constant); 1.8955 + %} 1.8956 + ins_pipe(pipe_class_default); 1.8957 +%} 1.8958 + 1.8959 +// Immediate And Long where the immediate is a negative power of 2. 1.8960 +instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{ 1.8961 + match(Set dst (AndL src1 src2)); 1.8962 + format %{ "ANDDI $dst, $src1, $src2" %} 1.8963 + size(4); 1.8964 + ins_encode %{ 1.8965 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.8966 + __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)-$src2$$constant)); 1.8967 + %} 1.8968 + ins_pipe(pipe_class_default); 1.8969 +%} 1.8970 + 1.8971 +instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{ 1.8972 + match(Set dst (AndL src1 src2)); 1.8973 + format %{ "ANDDI $dst, $src1, $src2" %} 1.8974 + size(4); 1.8975 + ins_encode %{ 1.8976 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.8977 + __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1))); 1.8978 + %} 1.8979 + ins_pipe(pipe_class_default); 1.8980 +%} 1.8981 + 1.8982 +// AndL + ConvL2I. 1.8983 +instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{ 1.8984 + match(Set dst (ConvL2I (AndL src1 src2))); 1.8985 + ins_cost(DEFAULT_COST); 1.8986 + 1.8987 + format %{ "ANDDI $dst, $src1, $src2 \t// long + l2i" %} 1.8988 + size(4); 1.8989 + ins_encode %{ 1.8990 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.8991 + __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1))); 1.8992 + %} 1.8993 + ins_pipe(pipe_class_default); 1.8994 +%} 1.8995 + 1.8996 +// Or Instructions 1.8997 + 1.8998 +// Register Or 1.8999 +instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.9000 + match(Set dst (OrI src1 src2)); 1.9001 + format %{ "OR $dst, $src1, $src2" %} 1.9002 + size(4); 1.9003 + ins_encode %{ 1.9004 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9005 + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); 1.9006 + %} 1.9007 + ins_pipe(pipe_class_default); 1.9008 +%} 1.9009 + 1.9010 +// Expand does not work with above instruct. (??) 1.9011 +instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.9012 + // no match-rule 1.9013 + effect(DEF dst, USE src1, USE src2); 1.9014 + format %{ "OR $dst, $src1, $src2" %} 1.9015 + size(4); 1.9016 + ins_encode %{ 1.9017 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9018 + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); 1.9019 + %} 1.9020 + ins_pipe(pipe_class_default); 1.9021 +%} 1.9022 + 1.9023 +instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{ 1.9024 + match(Set dst (OrI (OrI (OrI src1 src2) src3) src4)); 1.9025 + ins_cost(DEFAULT_COST*3); 1.9026 + 1.9027 + expand %{ 1.9028 + // FIXME: we should do this in the ideal world. 1.9029 + iRegIdst tmp1; 1.9030 + iRegIdst tmp2; 1.9031 + orI_reg_reg(tmp1, src1, src2); 1.9032 + orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg. 1.9033 + orI_reg_reg(dst, tmp1, tmp2); 1.9034 + %} 1.9035 +%} 1.9036 + 1.9037 +// Immediate Or 1.9038 +instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{ 1.9039 + match(Set dst (OrI src1 src2)); 1.9040 + format %{ "ORI $dst, $src1, $src2" %} 1.9041 + size(4); 1.9042 + ins_encode %{ 1.9043 + // TODO: PPC port $archOpcode(ppc64Opcode_ori); 1.9044 + __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF); 1.9045 + %} 1.9046 + ins_pipe(pipe_class_default); 1.9047 +%} 1.9048 + 1.9049 +// Register Or Long 1.9050 +instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.9051 + match(Set dst (OrL src1 src2)); 1.9052 + ins_cost(DEFAULT_COST); 1.9053 + 1.9054 + size(4); 1.9055 + format %{ "OR $dst, $src1, $src2 \t// long" %} 1.9056 + ins_encode %{ 1.9057 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9058 + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); 1.9059 + %} 1.9060 + ins_pipe(pipe_class_default); 1.9061 +%} 1.9062 + 1.9063 +// OrL + ConvL2I. 1.9064 +instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.9065 + match(Set dst (ConvL2I (OrL src1 src2))); 1.9066 + ins_cost(DEFAULT_COST); 1.9067 + 1.9068 + format %{ "OR $dst, $src1, $src2 \t// long + l2i" %} 1.9069 + size(4); 1.9070 + ins_encode %{ 1.9071 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9072 + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); 1.9073 + %} 1.9074 + ins_pipe(pipe_class_default); 1.9075 +%} 1.9076 + 1.9077 +// Immediate Or long 1.9078 +instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{ 1.9079 + match(Set dst (OrL src1 con)); 1.9080 + ins_cost(DEFAULT_COST); 1.9081 + 1.9082 + format %{ "ORI $dst, $src1, $con \t// long" %} 1.9083 + size(4); 1.9084 + ins_encode %{ 1.9085 + // TODO: PPC port $archOpcode(ppc64Opcode_ori); 1.9086 + __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF); 1.9087 + %} 1.9088 + ins_pipe(pipe_class_default); 1.9089 +%} 1.9090 + 1.9091 +// Xor Instructions 1.9092 + 1.9093 +// Register Xor 1.9094 +instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.9095 + match(Set dst (XorI src1 src2)); 1.9096 + format %{ "XOR $dst, $src1, $src2" %} 1.9097 + size(4); 1.9098 + ins_encode %{ 1.9099 + // TODO: PPC port $archOpcode(ppc64Opcode_xor); 1.9100 + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); 1.9101 + %} 1.9102 + ins_pipe(pipe_class_default); 1.9103 +%} 1.9104 + 1.9105 +// Expand does not work with above instruct. (??) 1.9106 +instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.9107 + // no match-rule 1.9108 + effect(DEF dst, USE src1, USE src2); 1.9109 + format %{ "XOR $dst, $src1, $src2" %} 1.9110 + size(4); 1.9111 + ins_encode %{ 1.9112 + // TODO: PPC port $archOpcode(ppc64Opcode_xor); 1.9113 + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); 1.9114 + %} 1.9115 + ins_pipe(pipe_class_default); 1.9116 +%} 1.9117 + 1.9118 +instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{ 1.9119 + match(Set dst (XorI (XorI (XorI src1 src2) src3) src4)); 1.9120 + ins_cost(DEFAULT_COST*3); 1.9121 + 1.9122 + expand %{ 1.9123 + // FIXME: we should do this in the ideal world. 1.9124 + iRegIdst tmp1; 1.9125 + iRegIdst tmp2; 1.9126 + xorI_reg_reg(tmp1, src1, src2); 1.9127 + xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg. 1.9128 + xorI_reg_reg(dst, tmp1, tmp2); 1.9129 + %} 1.9130 +%} 1.9131 + 1.9132 +// Immediate Xor 1.9133 +instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{ 1.9134 + match(Set dst (XorI src1 src2)); 1.9135 + format %{ "XORI $dst, $src1, $src2" %} 1.9136 + size(4); 1.9137 + ins_encode %{ 1.9138 + // TODO: PPC port $archOpcode(ppc64Opcode_xori); 1.9139 + __ xori($dst$$Register, $src1$$Register, $src2$$constant); 1.9140 + %} 1.9141 + ins_pipe(pipe_class_default); 1.9142 +%} 1.9143 + 1.9144 +// Register Xor Long 1.9145 +instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.9146 + match(Set dst (XorL src1 src2)); 1.9147 + ins_cost(DEFAULT_COST); 1.9148 + 1.9149 + format %{ "XOR $dst, $src1, $src2 \t// long" %} 1.9150 + size(4); 1.9151 + ins_encode %{ 1.9152 + // TODO: PPC port $archOpcode(ppc64Opcode_xor); 1.9153 + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); 1.9154 + %} 1.9155 + ins_pipe(pipe_class_default); 1.9156 +%} 1.9157 + 1.9158 +// XorL + ConvL2I. 1.9159 +instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.9160 + match(Set dst (ConvL2I (XorL src1 src2))); 1.9161 + ins_cost(DEFAULT_COST); 1.9162 + 1.9163 + format %{ "XOR $dst, $src1, $src2 \t// long + l2i" %} 1.9164 + size(4); 1.9165 + ins_encode %{ 1.9166 + // TODO: PPC port $archOpcode(ppc64Opcode_xor); 1.9167 + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); 1.9168 + %} 1.9169 + ins_pipe(pipe_class_default); 1.9170 +%} 1.9171 + 1.9172 +// Immediate Xor Long 1.9173 +instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{ 1.9174 + match(Set dst (XorL src1 src2)); 1.9175 + ins_cost(DEFAULT_COST); 1.9176 + 1.9177 + format %{ "XORI $dst, $src1, $src2 \t// long" %} 1.9178 + size(4); 1.9179 + ins_encode %{ 1.9180 + // TODO: PPC port $archOpcode(ppc64Opcode_xori); 1.9181 + __ xori($dst$$Register, $src1$$Register, $src2$$constant); 1.9182 + %} 1.9183 + ins_pipe(pipe_class_default); 1.9184 +%} 1.9185 + 1.9186 +instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{ 1.9187 + match(Set dst (XorI src1 src2)); 1.9188 + ins_cost(DEFAULT_COST); 1.9189 + 1.9190 + format %{ "NOT $dst, $src1 ($src2)" %} 1.9191 + size(4); 1.9192 + ins_encode %{ 1.9193 + // TODO: PPC port $archOpcode(ppc64Opcode_nor); 1.9194 + __ nor($dst$$Register, $src1$$Register, $src1$$Register); 1.9195 + %} 1.9196 + ins_pipe(pipe_class_default); 1.9197 +%} 1.9198 + 1.9199 +instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{ 1.9200 + match(Set dst (XorL src1 src2)); 1.9201 + ins_cost(DEFAULT_COST); 1.9202 + 1.9203 + format %{ "NOT $dst, $src1 ($src2) \t// long" %} 1.9204 + size(4); 1.9205 + ins_encode %{ 1.9206 + // TODO: PPC port $archOpcode(ppc64Opcode_nor); 1.9207 + __ nor($dst$$Register, $src1$$Register, $src1$$Register); 1.9208 + %} 1.9209 + ins_pipe(pipe_class_default); 1.9210 +%} 1.9211 + 1.9212 +// And-complement 1.9213 +instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{ 1.9214 + match(Set dst (AndI (XorI src1 src2) src3)); 1.9215 + ins_cost(DEFAULT_COST); 1.9216 + 1.9217 + format %{ "ANDW $dst, xori($src1, $src2), $src3" %} 1.9218 + size(4); 1.9219 + ins_encode( enc_andc(dst, src3, src1) ); 1.9220 + ins_pipe(pipe_class_default); 1.9221 +%} 1.9222 + 1.9223 +// And-complement 1.9224 +instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.9225 + // no match-rule, false predicate 1.9226 + effect(DEF dst, USE src1, USE src2); 1.9227 + predicate(false); 1.9228 + 1.9229 + format %{ "ANDC $dst, $src1, $src2" %} 1.9230 + size(4); 1.9231 + ins_encode %{ 1.9232 + // TODO: PPC port $archOpcode(ppc64Opcode_andc); 1.9233 + __ andc($dst$$Register, $src1$$Register, $src2$$Register); 1.9234 + %} 1.9235 + ins_pipe(pipe_class_default); 1.9236 +%} 1.9237 + 1.9238 +//----------Moves between int/long and float/double---------------------------- 1.9239 +// 1.9240 +// The following rules move values from int/long registers/stack-locations 1.9241 +// to float/double registers/stack-locations and vice versa, without doing any 1.9242 +// conversions. These rules are used to implement the bit-conversion methods 1.9243 +// of java.lang.Float etc., e.g. 1.9244 +// int floatToIntBits(float value) 1.9245 +// float intBitsToFloat(int bits) 1.9246 +// 1.9247 +// Notes on the implementation on ppc64: 1.9248 +// We only provide rules which move between a register and a stack-location, 1.9249 +// because we always have to go through memory when moving between a float 1.9250 +// register and an integer register. 1.9251 + 1.9252 +//---------- Chain stack slots between similar types -------- 1.9253 + 1.9254 +// These are needed so that the rules below can match. 1.9255 + 1.9256 +// Load integer from stack slot 1.9257 +instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{ 1.9258 + match(Set dst src); 1.9259 + ins_cost(MEMORY_REF_COST); 1.9260 + 1.9261 + format %{ "LWZ $dst, $src" %} 1.9262 + size(4); 1.9263 + ins_encode( enc_lwz(dst, src) ); 1.9264 + ins_pipe(pipe_class_memory); 1.9265 +%} 1.9266 + 1.9267 +// Store integer to stack slot 1.9268 +instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{ 1.9269 + match(Set dst src); 1.9270 + ins_cost(MEMORY_REF_COST); 1.9271 + 1.9272 + format %{ "STW $src, $dst \t// stk" %} 1.9273 + size(4); 1.9274 + ins_encode( enc_stw(src, dst) ); // rs=rt 1.9275 + ins_pipe(pipe_class_memory); 1.9276 +%} 1.9277 + 1.9278 +// Load long from stack slot 1.9279 +instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{ 1.9280 + match(Set dst src); 1.9281 + ins_cost(MEMORY_REF_COST); 1.9282 + 1.9283 + format %{ "LD $dst, $src \t// long" %} 1.9284 + size(4); 1.9285 + ins_encode( enc_ld(dst, src) ); 1.9286 + ins_pipe(pipe_class_memory); 1.9287 +%} 1.9288 + 1.9289 +// Store long to stack slot 1.9290 +instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{ 1.9291 + match(Set dst src); 1.9292 + ins_cost(MEMORY_REF_COST); 1.9293 + 1.9294 + format %{ "STD $src, $dst \t// long" %} 1.9295 + size(4); 1.9296 + ins_encode( enc_std(src, dst) ); // rs=rt 1.9297 + ins_pipe(pipe_class_memory); 1.9298 +%} 1.9299 + 1.9300 +//----------Moves between int and float 1.9301 + 1.9302 +// Move float value from float stack-location to integer register. 1.9303 +instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{ 1.9304 + match(Set dst (MoveF2I src)); 1.9305 + ins_cost(MEMORY_REF_COST); 1.9306 + 1.9307 + format %{ "LWZ $dst, $src \t// MoveF2I" %} 1.9308 + size(4); 1.9309 + ins_encode( enc_lwz(dst, src) ); 1.9310 + ins_pipe(pipe_class_memory); 1.9311 +%} 1.9312 + 1.9313 +// Move float value from float register to integer stack-location. 1.9314 +instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{ 1.9315 + match(Set dst (MoveF2I src)); 1.9316 + ins_cost(MEMORY_REF_COST); 1.9317 + 1.9318 + format %{ "STFS $src, $dst \t// MoveF2I" %} 1.9319 + size(4); 1.9320 + ins_encode( enc_stfs(src, dst) ); 1.9321 + ins_pipe(pipe_class_memory); 1.9322 +%} 1.9323 + 1.9324 +// Move integer value from integer stack-location to float register. 1.9325 +instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{ 1.9326 + match(Set dst (MoveI2F src)); 1.9327 + ins_cost(MEMORY_REF_COST); 1.9328 + 1.9329 + format %{ "LFS $dst, $src \t// MoveI2F" %} 1.9330 + size(4); 1.9331 + ins_encode %{ 1.9332 + // TODO: PPC port $archOpcode(ppc64Opcode_lfs); 1.9333 + int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_); 1.9334 + __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register); 1.9335 + %} 1.9336 + ins_pipe(pipe_class_memory); 1.9337 +%} 1.9338 + 1.9339 +// Move integer value from integer register to float stack-location. 1.9340 +instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{ 1.9341 + match(Set dst (MoveI2F src)); 1.9342 + ins_cost(MEMORY_REF_COST); 1.9343 + 1.9344 + format %{ "STW $src, $dst \t// MoveI2F" %} 1.9345 + size(4); 1.9346 + ins_encode( enc_stw(src, dst) ); 1.9347 + ins_pipe(pipe_class_memory); 1.9348 +%} 1.9349 + 1.9350 +//----------Moves between long and float 1.9351 + 1.9352 +instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{ 1.9353 + // no match-rule, false predicate 1.9354 + effect(DEF dst, USE src); 1.9355 + predicate(false); 1.9356 + 1.9357 + format %{ "storeD $src, $dst \t// STACK" %} 1.9358 + size(4); 1.9359 + ins_encode( enc_stfd(src, dst) ); 1.9360 + ins_pipe(pipe_class_default); 1.9361 +%} 1.9362 + 1.9363 +//----------Moves between long and double 1.9364 + 1.9365 +// Move double value from double stack-location to long register. 1.9366 +instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{ 1.9367 + match(Set dst (MoveD2L src)); 1.9368 + ins_cost(MEMORY_REF_COST); 1.9369 + size(4); 1.9370 + format %{ "LD $dst, $src \t// MoveD2L" %} 1.9371 + ins_encode( enc_ld(dst, src) ); 1.9372 + ins_pipe(pipe_class_memory); 1.9373 +%} 1.9374 + 1.9375 +// Move double value from double register to long stack-location. 1.9376 +instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{ 1.9377 + match(Set dst (MoveD2L src)); 1.9378 + effect(DEF dst, USE src); 1.9379 + ins_cost(MEMORY_REF_COST); 1.9380 + 1.9381 + format %{ "STFD $src, $dst \t// MoveD2L" %} 1.9382 + size(4); 1.9383 + ins_encode( enc_stfd(src, dst) ); 1.9384 + ins_pipe(pipe_class_memory); 1.9385 +%} 1.9386 + 1.9387 +// Move long value from long stack-location to double register. 1.9388 +instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{ 1.9389 + match(Set dst (MoveL2D src)); 1.9390 + ins_cost(MEMORY_REF_COST); 1.9391 + 1.9392 + format %{ "LFD $dst, $src \t// MoveL2D" %} 1.9393 + size(4); 1.9394 + ins_encode( enc_lfd(dst, src) ); 1.9395 + ins_pipe(pipe_class_memory); 1.9396 +%} 1.9397 + 1.9398 +// Move long value from long register to double stack-location. 1.9399 +instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{ 1.9400 + match(Set dst (MoveL2D src)); 1.9401 + ins_cost(MEMORY_REF_COST); 1.9402 + 1.9403 + format %{ "STD $src, $dst \t// MoveL2D" %} 1.9404 + size(4); 1.9405 + ins_encode( enc_std(src, dst) ); 1.9406 + ins_pipe(pipe_class_memory); 1.9407 +%} 1.9408 + 1.9409 +//----------Register Move Instructions----------------------------------------- 1.9410 + 1.9411 +// Replicate for Superword 1.9412 + 1.9413 +instruct moveReg(iRegLdst dst, iRegIsrc src) %{ 1.9414 + predicate(false); 1.9415 + effect(DEF dst, USE src); 1.9416 + 1.9417 + format %{ "MR $dst, $src \t// replicate " %} 1.9418 + // variable size, 0 or 4. 1.9419 + ins_encode %{ 1.9420 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9421 + __ mr_if_needed($dst$$Register, $src$$Register); 1.9422 + %} 1.9423 + ins_pipe(pipe_class_default); 1.9424 +%} 1.9425 + 1.9426 +//----------Cast instructions (Java-level type cast)--------------------------- 1.9427 + 1.9428 +// Cast Long to Pointer for unsafe natives. 1.9429 +instruct castX2P(iRegPdst dst, iRegLsrc src) %{ 1.9430 + match(Set dst (CastX2P src)); 1.9431 + 1.9432 + format %{ "MR $dst, $src \t// Long->Ptr" %} 1.9433 + // variable size, 0 or 4. 1.9434 + ins_encode %{ 1.9435 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9436 + __ mr_if_needed($dst$$Register, $src$$Register); 1.9437 + %} 1.9438 + ins_pipe(pipe_class_default); 1.9439 +%} 1.9440 + 1.9441 +// Cast Pointer to Long for unsafe natives. 1.9442 +instruct castP2X(iRegLdst dst, iRegP_N2P src) %{ 1.9443 + match(Set dst (CastP2X src)); 1.9444 + 1.9445 + format %{ "MR $dst, $src \t// Ptr->Long" %} 1.9446 + // variable size, 0 or 4. 1.9447 + ins_encode %{ 1.9448 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9449 + __ mr_if_needed($dst$$Register, $src$$Register); 1.9450 + %} 1.9451 + ins_pipe(pipe_class_default); 1.9452 +%} 1.9453 + 1.9454 +instruct castPP(iRegPdst dst) %{ 1.9455 + match(Set dst (CastPP dst)); 1.9456 + format %{ " -- \t// castPP of $dst" %} 1.9457 + size(0); 1.9458 + ins_encode( /*empty*/ ); 1.9459 + ins_pipe(pipe_class_default); 1.9460 +%} 1.9461 + 1.9462 +instruct castII(iRegIdst dst) %{ 1.9463 + match(Set dst (CastII dst)); 1.9464 + format %{ " -- \t// castII of $dst" %} 1.9465 + size(0); 1.9466 + ins_encode( /*empty*/ ); 1.9467 + ins_pipe(pipe_class_default); 1.9468 +%} 1.9469 + 1.9470 +instruct checkCastPP(iRegPdst dst) %{ 1.9471 + match(Set dst (CheckCastPP dst)); 1.9472 + format %{ " -- \t// checkcastPP of $dst" %} 1.9473 + size(0); 1.9474 + ins_encode( /*empty*/ ); 1.9475 + ins_pipe(pipe_class_default); 1.9476 +%} 1.9477 + 1.9478 +//----------Convert instructions----------------------------------------------- 1.9479 + 1.9480 +// Convert to boolean. 1.9481 + 1.9482 +// int_to_bool(src) : { 1 if src != 0 1.9483 +// { 0 else 1.9484 +// 1.9485 +// strategy: 1.9486 +// 1) Count leading zeros of 32 bit-value src, 1.9487 +// this returns 32 (0b10.0000) iff src == 0 and <32 otherwise. 1.9488 +// 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise. 1.9489 +// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0. 1.9490 + 1.9491 +// convI2Bool 1.9492 +instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{ 1.9493 + match(Set dst (Conv2B src)); 1.9494 + predicate(UseCountLeadingZerosInstructionsPPC64); 1.9495 + ins_cost(DEFAULT_COST); 1.9496 + 1.9497 + expand %{ 1.9498 + immI shiftAmount %{ 0x5 %} 1.9499 + uimmI16 mask %{ 0x1 %} 1.9500 + iRegIdst tmp1; 1.9501 + iRegIdst tmp2; 1.9502 + countLeadingZerosI(tmp1, src); 1.9503 + urShiftI_reg_imm(tmp2, tmp1, shiftAmount); 1.9504 + xorI_reg_uimm16(dst, tmp2, mask); 1.9505 + %} 1.9506 +%} 1.9507 + 1.9508 +instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{ 1.9509 + match(Set dst (Conv2B src)); 1.9510 + effect(TEMP crx); 1.9511 + predicate(!UseCountLeadingZerosInstructionsPPC64); 1.9512 + ins_cost(DEFAULT_COST); 1.9513 + 1.9514 + format %{ "CMPWI $crx, $src, #0 \t// convI2B" 1.9515 + "LI $dst, #0\n\t" 1.9516 + "BEQ $crx, done\n\t" 1.9517 + "LI $dst, #1\n" 1.9518 + "done:" %} 1.9519 + size(16); 1.9520 + ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) ); 1.9521 + ins_pipe(pipe_class_compare); 1.9522 +%} 1.9523 + 1.9524 +// ConvI2B + XorI 1.9525 +instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{ 1.9526 + match(Set dst (XorI (Conv2B src) mask)); 1.9527 + predicate(UseCountLeadingZerosInstructionsPPC64); 1.9528 + ins_cost(DEFAULT_COST); 1.9529 + 1.9530 + expand %{ 1.9531 + immI shiftAmount %{ 0x5 %} 1.9532 + iRegIdst tmp1; 1.9533 + countLeadingZerosI(tmp1, src); 1.9534 + urShiftI_reg_imm(dst, tmp1, shiftAmount); 1.9535 + %} 1.9536 +%} 1.9537 + 1.9538 +instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{ 1.9539 + match(Set dst (XorI (Conv2B src) mask)); 1.9540 + effect(TEMP crx); 1.9541 + predicate(!UseCountLeadingZerosInstructionsPPC64); 1.9542 + ins_cost(DEFAULT_COST); 1.9543 + 1.9544 + format %{ "CMPWI $crx, $src, #0 \t// Xor(convI2B($src), $mask)" 1.9545 + "LI $dst, #1\n\t" 1.9546 + "BEQ $crx, done\n\t" 1.9547 + "LI $dst, #0\n" 1.9548 + "done:" %} 1.9549 + size(16); 1.9550 + ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) ); 1.9551 + ins_pipe(pipe_class_compare); 1.9552 +%} 1.9553 + 1.9554 +// AndI 0b0..010..0 + ConvI2B 1.9555 +instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{ 1.9556 + match(Set dst (Conv2B (AndI src mask))); 1.9557 + predicate(UseRotateAndMaskInstructionsPPC64); 1.9558 + ins_cost(DEFAULT_COST); 1.9559 + 1.9560 + format %{ "RLWINM $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %} 1.9561 + size(4); 1.9562 + ins_encode %{ 1.9563 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); 1.9564 + __ rlwinm($dst$$Register, $src$$Register, (32-log2_long((jlong)$mask$$constant)) & 0x1f, 31, 31); 1.9565 + %} 1.9566 + ins_pipe(pipe_class_default); 1.9567 +%} 1.9568 + 1.9569 +// Convert pointer to boolean. 1.9570 +// 1.9571 +// ptr_to_bool(src) : { 1 if src != 0 1.9572 +// { 0 else 1.9573 +// 1.9574 +// strategy: 1.9575 +// 1) Count leading zeros of 64 bit-value src, 1.9576 +// this returns 64 (0b100.0000) iff src == 0 and <64 otherwise. 1.9577 +// 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise. 1.9578 +// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0. 1.9579 + 1.9580 +// ConvP2B 1.9581 +instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{ 1.9582 + match(Set dst (Conv2B src)); 1.9583 + predicate(UseCountLeadingZerosInstructionsPPC64); 1.9584 + ins_cost(DEFAULT_COST); 1.9585 + 1.9586 + expand %{ 1.9587 + immI shiftAmount %{ 0x6 %} 1.9588 + uimmI16 mask %{ 0x1 %} 1.9589 + iRegIdst tmp1; 1.9590 + iRegIdst tmp2; 1.9591 + countLeadingZerosP(tmp1, src); 1.9592 + urShiftI_reg_imm(tmp2, tmp1, shiftAmount); 1.9593 + xorI_reg_uimm16(dst, tmp2, mask); 1.9594 + %} 1.9595 +%} 1.9596 + 1.9597 +instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{ 1.9598 + match(Set dst (Conv2B src)); 1.9599 + effect(TEMP crx); 1.9600 + predicate(!UseCountLeadingZerosInstructionsPPC64); 1.9601 + ins_cost(DEFAULT_COST); 1.9602 + 1.9603 + format %{ "CMPDI $crx, $src, #0 \t// convP2B" 1.9604 + "LI $dst, #0\n\t" 1.9605 + "BEQ $crx, done\n\t" 1.9606 + "LI $dst, #1\n" 1.9607 + "done:" %} 1.9608 + size(16); 1.9609 + ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) ); 1.9610 + ins_pipe(pipe_class_compare); 1.9611 +%} 1.9612 + 1.9613 +// ConvP2B + XorI 1.9614 +instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{ 1.9615 + match(Set dst (XorI (Conv2B src) mask)); 1.9616 + predicate(UseCountLeadingZerosInstructionsPPC64); 1.9617 + ins_cost(DEFAULT_COST); 1.9618 + 1.9619 + expand %{ 1.9620 + immI shiftAmount %{ 0x6 %} 1.9621 + iRegIdst tmp1; 1.9622 + countLeadingZerosP(tmp1, src); 1.9623 + urShiftI_reg_imm(dst, tmp1, shiftAmount); 1.9624 + %} 1.9625 +%} 1.9626 + 1.9627 +instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{ 1.9628 + match(Set dst (XorI (Conv2B src) mask)); 1.9629 + effect(TEMP crx); 1.9630 + predicate(!UseCountLeadingZerosInstructionsPPC64); 1.9631 + ins_cost(DEFAULT_COST); 1.9632 + 1.9633 + format %{ "CMPDI $crx, $src, #0 \t// XorI(convP2B($src), $mask)" 1.9634 + "LI $dst, #1\n\t" 1.9635 + "BEQ $crx, done\n\t" 1.9636 + "LI $dst, #0\n" 1.9637 + "done:" %} 1.9638 + size(16); 1.9639 + ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) ); 1.9640 + ins_pipe(pipe_class_compare); 1.9641 +%} 1.9642 + 1.9643 +// if src1 < src2, return -1 else return 0 1.9644 +instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.9645 + match(Set dst (CmpLTMask src1 src2)); 1.9646 + ins_cost(DEFAULT_COST*4); 1.9647 + 1.9648 + expand %{ 1.9649 + iRegLdst src1s; 1.9650 + iRegLdst src2s; 1.9651 + iRegLdst diff; 1.9652 + convI2L_reg(src1s, src1); // Ensure proper sign extension. 1.9653 + convI2L_reg(src2s, src2); // Ensure proper sign extension. 1.9654 + subL_reg_reg(diff, src1s, src2s); 1.9655 + // Need to consider >=33 bit result, therefore we need signmaskL. 1.9656 + signmask64I_regL(dst, diff); 1.9657 + %} 1.9658 +%} 1.9659 + 1.9660 +instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{ 1.9661 + match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0 1.9662 + format %{ "SRAWI $dst, $src1, $src2 \t// CmpLTMask" %} 1.9663 + size(4); 1.9664 + ins_encode %{ 1.9665 + // TODO: PPC port $archOpcode(ppc64Opcode_srawi); 1.9666 + __ srawi($dst$$Register, $src1$$Register, 0x1f); 1.9667 + %} 1.9668 + ins_pipe(pipe_class_default); 1.9669 +%} 1.9670 + 1.9671 +//----------Arithmetic Conversion Instructions--------------------------------- 1.9672 + 1.9673 +// Convert to Byte -- nop 1.9674 +// Convert to Short -- nop 1.9675 + 1.9676 +// Convert to Int 1.9677 + 1.9678 +instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{ 1.9679 + match(Set dst (RShiftI (LShiftI src amount) amount)); 1.9680 + format %{ "EXTSB $dst, $src \t// byte->int" %} 1.9681 + size(4); 1.9682 + ins_encode %{ 1.9683 + // TODO: PPC port $archOpcode(ppc64Opcode_extsb); 1.9684 + __ extsb($dst$$Register, $src$$Register); 1.9685 + %} 1.9686 + ins_pipe(pipe_class_default); 1.9687 +%} 1.9688 + 1.9689 +// LShiftI 16 + RShiftI 16 converts short to int. 1.9690 +instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{ 1.9691 + match(Set dst (RShiftI (LShiftI src amount) amount)); 1.9692 + format %{ "EXTSH $dst, $src \t// short->int" %} 1.9693 + size(4); 1.9694 + ins_encode %{ 1.9695 + // TODO: PPC port $archOpcode(ppc64Opcode_extsh); 1.9696 + __ extsh($dst$$Register, $src$$Register); 1.9697 + %} 1.9698 + ins_pipe(pipe_class_default); 1.9699 +%} 1.9700 + 1.9701 +// ConvL2I + ConvI2L: Sign extend int in long register. 1.9702 +instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{ 1.9703 + match(Set dst (ConvI2L (ConvL2I src))); 1.9704 + 1.9705 + format %{ "EXTSW $dst, $src \t// long->long" %} 1.9706 + size(4); 1.9707 + ins_encode %{ 1.9708 + // TODO: PPC port $archOpcode(ppc64Opcode_extsw); 1.9709 + __ extsw($dst$$Register, $src$$Register); 1.9710 + %} 1.9711 + ins_pipe(pipe_class_default); 1.9712 +%} 1.9713 + 1.9714 +instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{ 1.9715 + match(Set dst (ConvL2I src)); 1.9716 + format %{ "MR $dst, $src \t// long->int" %} 1.9717 + // variable size, 0 or 4 1.9718 + ins_encode %{ 1.9719 + // TODO: PPC port $archOpcode(ppc64Opcode_or); 1.9720 + __ mr_if_needed($dst$$Register, $src$$Register); 1.9721 + %} 1.9722 + ins_pipe(pipe_class_default); 1.9723 +%} 1.9724 + 1.9725 +instruct convD2IRaw_regD(regD dst, regD src) %{ 1.9726 + // no match-rule, false predicate 1.9727 + effect(DEF dst, USE src); 1.9728 + predicate(false); 1.9729 + 1.9730 + format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %} 1.9731 + size(4); 1.9732 + ins_encode %{ 1.9733 + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);; 1.9734 + __ fctiwz($dst$$FloatRegister, $src$$FloatRegister); 1.9735 + %} 1.9736 + ins_pipe(pipe_class_default); 1.9737 +%} 1.9738 + 1.9739 +instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{ 1.9740 + // no match-rule, false predicate 1.9741 + effect(DEF dst, USE crx, USE src); 1.9742 + predicate(false); 1.9743 + 1.9744 + ins_variable_size_depending_on_alignment(true); 1.9745 + 1.9746 + format %{ "cmovI $crx, $dst, $src" %} 1.9747 + // Worst case is branch + move + stop, no stop without scheduler. 1.9748 + size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); 1.9749 + ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) ); 1.9750 + ins_pipe(pipe_class_default); 1.9751 +%} 1.9752 + 1.9753 +instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{ 1.9754 + // no match-rule, false predicate 1.9755 + effect(DEF dst, USE crx, USE mem); 1.9756 + predicate(false); 1.9757 + 1.9758 + format %{ "CmovI $dst, $crx, $mem \t// postalloc expanded" %} 1.9759 + postalloc_expand %{ 1.9760 + // 1.9761 + // replaces 1.9762 + // 1.9763 + // region dst crx mem 1.9764 + // \ | | / 1.9765 + // dst=cmovI_bso_stackSlotL_conLvalue0 1.9766 + // 1.9767 + // with 1.9768 + // 1.9769 + // region dst 1.9770 + // \ / 1.9771 + // dst=loadConI16(0) 1.9772 + // | 1.9773 + // ^ region dst crx mem 1.9774 + // | \ | | / 1.9775 + // dst=cmovI_bso_stackSlotL 1.9776 + // 1.9777 + 1.9778 + // Create new nodes. 1.9779 + MachNode *m1 = new (C) loadConI16Node(); 1.9780 + MachNode *m2 = new (C) cmovI_bso_stackSlotLNode(); 1.9781 + 1.9782 + // inputs for new nodes 1.9783 + m1->add_req(n_region); 1.9784 + m2->add_req(n_region, n_crx, n_mem); 1.9785 + 1.9786 + // precedences for new nodes 1.9787 + m2->add_prec(m1); 1.9788 + 1.9789 + // operands for new nodes 1.9790 + m1->_opnds[0] = op_dst; 1.9791 + m1->_opnds[1] = new (C) immI16Oper(0); 1.9792 + 1.9793 + m2->_opnds[0] = op_dst; 1.9794 + m2->_opnds[1] = op_crx; 1.9795 + m2->_opnds[2] = op_mem; 1.9796 + 1.9797 + // registers for new nodes 1.9798 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst 1.9799 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst 1.9800 + 1.9801 + // Insert new nodes. 1.9802 + nodes->push(m1); 1.9803 + nodes->push(m2); 1.9804 + %} 1.9805 +%} 1.9806 + 1.9807 +// Double to Int conversion, NaN is mapped to 0. 1.9808 +instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{ 1.9809 + match(Set dst (ConvD2I src)); 1.9810 + ins_cost(DEFAULT_COST); 1.9811 + 1.9812 + expand %{ 1.9813 + regD tmpD; 1.9814 + stackSlotL tmpS; 1.9815 + flagsReg crx; 1.9816 + cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN. 1.9817 + convD2IRaw_regD(tmpD, src); // Convert float to int (speculated). 1.9818 + moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated). 1.9819 + cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. 1.9820 + %} 1.9821 +%} 1.9822 + 1.9823 +instruct convF2IRaw_regF(regF dst, regF src) %{ 1.9824 + // no match-rule, false predicate 1.9825 + effect(DEF dst, USE src); 1.9826 + predicate(false); 1.9827 + 1.9828 + format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %} 1.9829 + size(4); 1.9830 + ins_encode %{ 1.9831 + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz); 1.9832 + __ fctiwz($dst$$FloatRegister, $src$$FloatRegister); 1.9833 + %} 1.9834 + ins_pipe(pipe_class_default); 1.9835 +%} 1.9836 + 1.9837 +// Float to Int conversion, NaN is mapped to 0. 1.9838 +instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{ 1.9839 + match(Set dst (ConvF2I src)); 1.9840 + ins_cost(DEFAULT_COST); 1.9841 + 1.9842 + expand %{ 1.9843 + regF tmpF; 1.9844 + stackSlotL tmpS; 1.9845 + flagsReg crx; 1.9846 + cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN. 1.9847 + convF2IRaw_regF(tmpF, src); // Convert float to int (speculated). 1.9848 + moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated). 1.9849 + cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. 1.9850 + %} 1.9851 +%} 1.9852 + 1.9853 +// Convert to Long 1.9854 + 1.9855 +instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{ 1.9856 + match(Set dst (ConvI2L src)); 1.9857 + format %{ "EXTSW $dst, $src \t// int->long" %} 1.9858 + size(4); 1.9859 + ins_encode %{ 1.9860 + // TODO: PPC port $archOpcode(ppc64Opcode_extsw); 1.9861 + __ extsw($dst$$Register, $src$$Register); 1.9862 + %} 1.9863 + ins_pipe(pipe_class_default); 1.9864 +%} 1.9865 + 1.9866 +// Zero-extend: convert unsigned int to long (convUI2L). 1.9867 +instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{ 1.9868 + match(Set dst (AndL (ConvI2L src) mask)); 1.9869 + ins_cost(DEFAULT_COST); 1.9870 + 1.9871 + format %{ "CLRLDI $dst, $src, #32 \t// zero-extend int to long" %} 1.9872 + size(4); 1.9873 + ins_encode %{ 1.9874 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.9875 + __ clrldi($dst$$Register, $src$$Register, 32); 1.9876 + %} 1.9877 + ins_pipe(pipe_class_default); 1.9878 +%} 1.9879 + 1.9880 +// Zero-extend: convert unsigned int to long in long register. 1.9881 +instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{ 1.9882 + match(Set dst (AndL src mask)); 1.9883 + ins_cost(DEFAULT_COST); 1.9884 + 1.9885 + format %{ "CLRLDI $dst, $src, #32 \t// zero-extend int to long" %} 1.9886 + size(4); 1.9887 + ins_encode %{ 1.9888 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 1.9889 + __ clrldi($dst$$Register, $src$$Register, 32); 1.9890 + %} 1.9891 + ins_pipe(pipe_class_default); 1.9892 +%} 1.9893 + 1.9894 +instruct convF2LRaw_regF(regF dst, regF src) %{ 1.9895 + // no match-rule, false predicate 1.9896 + effect(DEF dst, USE src); 1.9897 + predicate(false); 1.9898 + 1.9899 + format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %} 1.9900 + size(4); 1.9901 + ins_encode %{ 1.9902 + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz); 1.9903 + __ fctidz($dst$$FloatRegister, $src$$FloatRegister); 1.9904 + %} 1.9905 + ins_pipe(pipe_class_default); 1.9906 +%} 1.9907 + 1.9908 +instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{ 1.9909 + // no match-rule, false predicate 1.9910 + effect(DEF dst, USE crx, USE src); 1.9911 + predicate(false); 1.9912 + 1.9913 + ins_variable_size_depending_on_alignment(true); 1.9914 + 1.9915 + format %{ "cmovL $crx, $dst, $src" %} 1.9916 + // Worst case is branch + move + stop, no stop without scheduler. 1.9917 + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); 1.9918 + ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) ); 1.9919 + ins_pipe(pipe_class_default); 1.9920 +%} 1.9921 + 1.9922 +instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{ 1.9923 + // no match-rule, false predicate 1.9924 + effect(DEF dst, USE crx, USE mem); 1.9925 + predicate(false); 1.9926 + 1.9927 + format %{ "CmovL $dst, $crx, $mem \t// postalloc expanded" %} 1.9928 + postalloc_expand %{ 1.9929 + // 1.9930 + // replaces 1.9931 + // 1.9932 + // region dst crx mem 1.9933 + // \ | | / 1.9934 + // dst=cmovL_bso_stackSlotL_conLvalue0 1.9935 + // 1.9936 + // with 1.9937 + // 1.9938 + // region dst 1.9939 + // \ / 1.9940 + // dst=loadConL16(0) 1.9941 + // | 1.9942 + // ^ region dst crx mem 1.9943 + // | \ | | / 1.9944 + // dst=cmovL_bso_stackSlotL 1.9945 + // 1.9946 + 1.9947 + // Create new nodes. 1.9948 + MachNode *m1 = new (C) loadConL16Node(); 1.9949 + MachNode *m2 = new (C) cmovL_bso_stackSlotLNode(); 1.9950 + 1.9951 + // inputs for new nodes 1.9952 + m1->add_req(n_region); 1.9953 + m2->add_req(n_region, n_crx, n_mem); 1.9954 + m2->add_prec(m1); 1.9955 + 1.9956 + // operands for new nodes 1.9957 + m1->_opnds[0] = op_dst; 1.9958 + m1->_opnds[1] = new (C) immL16Oper(0); 1.9959 + m2->_opnds[0] = op_dst; 1.9960 + m2->_opnds[1] = op_crx; 1.9961 + m2->_opnds[2] = op_mem; 1.9962 + 1.9963 + // registers for new nodes 1.9964 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst 1.9965 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst 1.9966 + 1.9967 + // Insert new nodes. 1.9968 + nodes->push(m1); 1.9969 + nodes->push(m2); 1.9970 + %} 1.9971 +%} 1.9972 + 1.9973 +// Float to Long conversion, NaN is mapped to 0. 1.9974 +instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{ 1.9975 + match(Set dst (ConvF2L src)); 1.9976 + ins_cost(DEFAULT_COST); 1.9977 + 1.9978 + expand %{ 1.9979 + regF tmpF; 1.9980 + stackSlotL tmpS; 1.9981 + flagsReg crx; 1.9982 + cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN. 1.9983 + convF2LRaw_regF(tmpF, src); // Convert float to long (speculated). 1.9984 + moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated). 1.9985 + cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. 1.9986 + %} 1.9987 +%} 1.9988 + 1.9989 +instruct convD2LRaw_regD(regD dst, regD src) %{ 1.9990 + // no match-rule, false predicate 1.9991 + effect(DEF dst, USE src); 1.9992 + predicate(false); 1.9993 + 1.9994 + format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %} 1.9995 + size(4); 1.9996 + ins_encode %{ 1.9997 + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz); 1.9998 + __ fctidz($dst$$FloatRegister, $src$$FloatRegister); 1.9999 + %} 1.10000 + ins_pipe(pipe_class_default); 1.10001 +%} 1.10002 + 1.10003 +// Double to Long conversion, NaN is mapped to 0. 1.10004 +instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{ 1.10005 + match(Set dst (ConvD2L src)); 1.10006 + ins_cost(DEFAULT_COST); 1.10007 + 1.10008 + expand %{ 1.10009 + regD tmpD; 1.10010 + stackSlotL tmpS; 1.10011 + flagsReg crx; 1.10012 + cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN. 1.10013 + convD2LRaw_regD(tmpD, src); // Convert float to long (speculated). 1.10014 + moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated). 1.10015 + cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. 1.10016 + %} 1.10017 +%} 1.10018 + 1.10019 +// Convert to Float 1.10020 + 1.10021 +// Placed here as needed in expand. 1.10022 +instruct convL2DRaw_regD(regD dst, regD src) %{ 1.10023 + // no match-rule, false predicate 1.10024 + effect(DEF dst, USE src); 1.10025 + predicate(false); 1.10026 + 1.10027 + format %{ "FCFID $dst, $src \t// convL2D" %} 1.10028 + size(4); 1.10029 + ins_encode %{ 1.10030 + // TODO: PPC port $archOpcode(ppc64Opcode_fcfid); 1.10031 + __ fcfid($dst$$FloatRegister, $src$$FloatRegister); 1.10032 + %} 1.10033 + ins_pipe(pipe_class_default); 1.10034 +%} 1.10035 + 1.10036 +// Placed here as needed in expand. 1.10037 +instruct convD2F_reg(regF dst, regD src) %{ 1.10038 + match(Set dst (ConvD2F src)); 1.10039 + format %{ "FRSP $dst, $src \t// convD2F" %} 1.10040 + size(4); 1.10041 + ins_encode %{ 1.10042 + // TODO: PPC port $archOpcode(ppc64Opcode_frsp); 1.10043 + __ frsp($dst$$FloatRegister, $src$$FloatRegister); 1.10044 + %} 1.10045 + ins_pipe(pipe_class_default); 1.10046 +%} 1.10047 + 1.10048 +// Integer to Float conversion. 1.10049 +instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{ 1.10050 + match(Set dst (ConvI2F src)); 1.10051 + predicate(!VM_Version::has_fcfids()); 1.10052 + ins_cost(DEFAULT_COST); 1.10053 + 1.10054 + expand %{ 1.10055 + iRegLdst tmpL; 1.10056 + stackSlotL tmpS; 1.10057 + regD tmpD; 1.10058 + regD tmpD2; 1.10059 + convI2L_reg(tmpL, src); // Sign-extension int to long. 1.10060 + regL_to_stkL(tmpS, tmpL); // Store long to stack. 1.10061 + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. 1.10062 + convL2DRaw_regD(tmpD2, tmpD); // Convert to double. 1.10063 + convD2F_reg(dst, tmpD2); // Convert double to float. 1.10064 + %} 1.10065 +%} 1.10066 + 1.10067 +instruct convL2FRaw_regF(regF dst, regD src) %{ 1.10068 + // no match-rule, false predicate 1.10069 + effect(DEF dst, USE src); 1.10070 + predicate(false); 1.10071 + 1.10072 + format %{ "FCFIDS $dst, $src \t// convL2F" %} 1.10073 + size(4); 1.10074 + ins_encode %{ 1.10075 + // TODO: PPC port $archOpcode(ppc64Opcode_fcfid); 1.10076 + __ fcfids($dst$$FloatRegister, $src$$FloatRegister); 1.10077 + %} 1.10078 + ins_pipe(pipe_class_default); 1.10079 +%} 1.10080 + 1.10081 +// Integer to Float conversion. Special version for Power7. 1.10082 +instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{ 1.10083 + match(Set dst (ConvI2F src)); 1.10084 + predicate(VM_Version::has_fcfids()); 1.10085 + ins_cost(DEFAULT_COST); 1.10086 + 1.10087 + expand %{ 1.10088 + iRegLdst tmpL; 1.10089 + stackSlotL tmpS; 1.10090 + regD tmpD; 1.10091 + convI2L_reg(tmpL, src); // Sign-extension int to long. 1.10092 + regL_to_stkL(tmpS, tmpL); // Store long to stack. 1.10093 + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. 1.10094 + convL2FRaw_regF(dst, tmpD); // Convert to float. 1.10095 + %} 1.10096 +%} 1.10097 + 1.10098 +// L2F to avoid runtime call. 1.10099 +instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{ 1.10100 + match(Set dst (ConvL2F src)); 1.10101 + predicate(VM_Version::has_fcfids()); 1.10102 + ins_cost(DEFAULT_COST); 1.10103 + 1.10104 + expand %{ 1.10105 + stackSlotL tmpS; 1.10106 + regD tmpD; 1.10107 + regL_to_stkL(tmpS, src); // Store long to stack. 1.10108 + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. 1.10109 + convL2FRaw_regF(dst, tmpD); // Convert to float. 1.10110 + %} 1.10111 +%} 1.10112 + 1.10113 +// Moved up as used in expand. 1.10114 +//instruct convD2F_reg(regF dst, regD src) %{%} 1.10115 + 1.10116 +// Convert to Double 1.10117 + 1.10118 +// Integer to Double conversion. 1.10119 +instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{ 1.10120 + match(Set dst (ConvI2D src)); 1.10121 + ins_cost(DEFAULT_COST); 1.10122 + 1.10123 + expand %{ 1.10124 + iRegLdst tmpL; 1.10125 + stackSlotL tmpS; 1.10126 + regD tmpD; 1.10127 + convI2L_reg(tmpL, src); // Sign-extension int to long. 1.10128 + regL_to_stkL(tmpS, tmpL); // Store long to stack. 1.10129 + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. 1.10130 + convL2DRaw_regD(dst, tmpD); // Convert to double. 1.10131 + %} 1.10132 +%} 1.10133 + 1.10134 +// Long to Double conversion 1.10135 +instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{ 1.10136 + match(Set dst (ConvL2D src)); 1.10137 + ins_cost(DEFAULT_COST + MEMORY_REF_COST); 1.10138 + 1.10139 + expand %{ 1.10140 + regD tmpD; 1.10141 + moveL2D_stack_reg(tmpD, src); 1.10142 + convL2DRaw_regD(dst, tmpD); 1.10143 + %} 1.10144 +%} 1.10145 + 1.10146 +instruct convF2D_reg(regD dst, regF src) %{ 1.10147 + match(Set dst (ConvF2D src)); 1.10148 + format %{ "FMR $dst, $src \t// float->double" %} 1.10149 + // variable size, 0 or 4 1.10150 + ins_encode %{ 1.10151 + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); 1.10152 + __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister); 1.10153 + %} 1.10154 + ins_pipe(pipe_class_default); 1.10155 +%} 1.10156 + 1.10157 +//----------Control Flow Instructions------------------------------------------ 1.10158 +// Compare Instructions 1.10159 + 1.10160 +// Compare Integers 1.10161 +instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{ 1.10162 + match(Set crx (CmpI src1 src2)); 1.10163 + size(4); 1.10164 + format %{ "CMPW $crx, $src1, $src2" %} 1.10165 + ins_encode %{ 1.10166 + // TODO: PPC port $archOpcode(ppc64Opcode_cmp); 1.10167 + __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register); 1.10168 + %} 1.10169 + ins_pipe(pipe_class_compare); 1.10170 +%} 1.10171 + 1.10172 +instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{ 1.10173 + match(Set crx (CmpI src1 src2)); 1.10174 + format %{ "CMPWI $crx, $src1, $src2" %} 1.10175 + size(4); 1.10176 + ins_encode %{ 1.10177 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpi); 1.10178 + __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant); 1.10179 + %} 1.10180 + ins_pipe(pipe_class_compare); 1.10181 +%} 1.10182 + 1.10183 +// (src1 & src2) == 0? 1.10184 +instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{ 1.10185 + match(Set cr0 (CmpI (AndI src1 src2) zero)); 1.10186 + // r0 is killed 1.10187 + format %{ "ANDI R0, $src1, $src2 \t// BTST int" %} 1.10188 + size(4); 1.10189 + ins_encode %{ 1.10190 + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); 1.10191 + // FIXME: avoid andi_ ? 1.10192 + __ andi_(R0, $src1$$Register, $src2$$constant); 1.10193 + %} 1.10194 + ins_pipe(pipe_class_compare); 1.10195 +%} 1.10196 + 1.10197 +instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{ 1.10198 + match(Set crx (CmpL src1 src2)); 1.10199 + format %{ "CMPD $crx, $src1, $src2" %} 1.10200 + size(4); 1.10201 + ins_encode %{ 1.10202 + // TODO: PPC port $archOpcode(ppc64Opcode_cmp); 1.10203 + __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register); 1.10204 + %} 1.10205 + ins_pipe(pipe_class_compare); 1.10206 +%} 1.10207 + 1.10208 +instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{ 1.10209 + match(Set crx (CmpL src1 src2)); 1.10210 + format %{ "CMPDI $crx, $src1, $src2" %} 1.10211 + size(4); 1.10212 + ins_encode %{ 1.10213 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpi); 1.10214 + __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant); 1.10215 + %} 1.10216 + ins_pipe(pipe_class_compare); 1.10217 +%} 1.10218 + 1.10219 +instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{ 1.10220 + match(Set cr0 (CmpL (AndL src1 src2) zero)); 1.10221 + // r0 is killed 1.10222 + format %{ "AND R0, $src1, $src2 \t// BTST long" %} 1.10223 + size(4); 1.10224 + ins_encode %{ 1.10225 + // TODO: PPC port $archOpcode(ppc64Opcode_and_); 1.10226 + __ and_(R0, $src1$$Register, $src2$$Register); 1.10227 + %} 1.10228 + ins_pipe(pipe_class_compare); 1.10229 +%} 1.10230 + 1.10231 +instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{ 1.10232 + match(Set cr0 (CmpL (AndL src1 src2) zero)); 1.10233 + // r0 is killed 1.10234 + format %{ "ANDI R0, $src1, $src2 \t// BTST long" %} 1.10235 + size(4); 1.10236 + ins_encode %{ 1.10237 + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); 1.10238 + // FIXME: avoid andi_ ? 1.10239 + __ andi_(R0, $src1$$Register, $src2$$constant); 1.10240 + %} 1.10241 + ins_pipe(pipe_class_compare); 1.10242 +%} 1.10243 + 1.10244 +instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{ 1.10245 + // no match-rule, false predicate 1.10246 + effect(DEF dst, USE crx); 1.10247 + predicate(false); 1.10248 + 1.10249 + ins_variable_size_depending_on_alignment(true); 1.10250 + 1.10251 + format %{ "cmovI $crx, $dst, -1, 0, +1" %} 1.10252 + // Worst case is branch + move + branch + move + stop, no stop without scheduler. 1.10253 + size(false /* TODO: PPC PORTInsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 20 : 16); 1.10254 + ins_encode %{ 1.10255 + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); 1.10256 + Label done; 1.10257 + // li(Rdst, 0); // equal -> 0 1.10258 + __ beq($crx$$CondRegister, done); 1.10259 + __ li($dst$$Register, 1); // greater -> +1 1.10260 + __ bgt($crx$$CondRegister, done); 1.10261 + __ li($dst$$Register, -1); // unordered or less -> -1 1.10262 + // TODO: PPC port__ endgroup_if_needed(_size == 20); 1.10263 + __ bind(done); 1.10264 + %} 1.10265 + ins_pipe(pipe_class_compare); 1.10266 +%} 1.10267 + 1.10268 +instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{ 1.10269 + // no match-rule, false predicate 1.10270 + effect(DEF dst, USE crx); 1.10271 + predicate(false); 1.10272 + 1.10273 + format %{ "CmovI $crx, $dst, -1, 0, +1 \t// postalloc expanded" %} 1.10274 + postalloc_expand %{ 1.10275 + // 1.10276 + // replaces 1.10277 + // 1.10278 + // region crx 1.10279 + // \ | 1.10280 + // dst=cmovI_conIvalueMinus1_conIvalue0_conIvalue1 1.10281 + // 1.10282 + // with 1.10283 + // 1.10284 + // region 1.10285 + // \ 1.10286 + // dst=loadConI16(0) 1.10287 + // | 1.10288 + // ^ region crx 1.10289 + // | \ | 1.10290 + // dst=cmovI_conIvalueMinus1_conIvalue1 1.10291 + // 1.10292 + 1.10293 + // Create new nodes. 1.10294 + MachNode *m1 = new (C) loadConI16Node(); 1.10295 + MachNode *m2 = new (C) cmovI_conIvalueMinus1_conIvalue1Node(); 1.10296 + 1.10297 + // inputs for new nodes 1.10298 + m1->add_req(n_region); 1.10299 + m2->add_req(n_region, n_crx); 1.10300 + m2->add_prec(m1); 1.10301 + 1.10302 + // operands for new nodes 1.10303 + m1->_opnds[0] = op_dst; 1.10304 + m1->_opnds[1] = new (C) immI16Oper(0); 1.10305 + m2->_opnds[0] = op_dst; 1.10306 + m2->_opnds[1] = op_crx; 1.10307 + 1.10308 + // registers for new nodes 1.10309 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst 1.10310 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst 1.10311 + 1.10312 + // Insert new nodes. 1.10313 + nodes->push(m1); 1.10314 + nodes->push(m2); 1.10315 + %} 1.10316 +%} 1.10317 + 1.10318 +// Manifest a CmpL3 result in an integer register. Very painful. 1.10319 +// This is the test to avoid. 1.10320 +// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0) 1.10321 +instruct cmpL3_reg_reg_ExEx(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ 1.10322 + match(Set dst (CmpL3 src1 src2)); 1.10323 + ins_cost(DEFAULT_COST*5+BRANCH_COST); 1.10324 + 1.10325 + expand %{ 1.10326 + flagsReg tmp1; 1.10327 + cmpL_reg_reg(tmp1, src1, src2); 1.10328 + cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1); 1.10329 + %} 1.10330 +%} 1.10331 + 1.10332 +// Implicit range checks. 1.10333 +// A range check in the ideal world has one of the following shapes: 1.10334 +// - (If le (CmpU length index)), (IfTrue throw exception) 1.10335 +// - (If lt (CmpU index length)), (IfFalse throw exception) 1.10336 +// 1.10337 +// Match range check 'If le (CmpU length index)'. 1.10338 +instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{ 1.10339 + match(If cmp (CmpU src_length index)); 1.10340 + effect(USE labl); 1.10341 + predicate(TrapBasedRangeChecks && 1.10342 + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le && 1.10343 + PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS && 1.10344 + (Matcher::branches_to_uncommon_trap(_leaf))); 1.10345 + 1.10346 + ins_is_TrapBasedCheckNode(true); 1.10347 + 1.10348 + format %{ "TWI $index $cmp $src_length \t// RangeCheck => trap $labl" %} 1.10349 + size(4); 1.10350 + ins_encode %{ 1.10351 + // TODO: PPC port $archOpcode(ppc64Opcode_twi); 1.10352 + if ($cmp$$cmpcode == 0x1 /* less_equal */) { 1.10353 + __ trap_range_check_le($src_length$$Register, $index$$constant); 1.10354 + } else { 1.10355 + // Both successors are uncommon traps, probability is 0. 1.10356 + // Node got flipped during fixup flow. 1.10357 + assert($cmp$$cmpcode == 0x9, "must be greater"); 1.10358 + __ trap_range_check_g($src_length$$Register, $index$$constant); 1.10359 + } 1.10360 + %} 1.10361 + ins_pipe(pipe_class_trap); 1.10362 +%} 1.10363 + 1.10364 +// Match range check 'If lt (CmpU index length)'. 1.10365 +instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{ 1.10366 + match(If cmp (CmpU src_index src_length)); 1.10367 + effect(USE labl); 1.10368 + predicate(TrapBasedRangeChecks && 1.10369 + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt && 1.10370 + _leaf->as_If()->_prob >= PROB_ALWAYS && 1.10371 + (Matcher::branches_to_uncommon_trap(_leaf))); 1.10372 + 1.10373 + ins_is_TrapBasedCheckNode(true); 1.10374 + 1.10375 + format %{ "TW $src_index $cmp $src_length \t// RangeCheck => trap $labl" %} 1.10376 + size(4); 1.10377 + ins_encode %{ 1.10378 + // TODO: PPC port $archOpcode(ppc64Opcode_tw); 1.10379 + if ($cmp$$cmpcode == 0x0 /* greater_equal */) { 1.10380 + __ trap_range_check_ge($src_index$$Register, $src_length$$Register); 1.10381 + } else { 1.10382 + // Both successors are uncommon traps, probability is 0. 1.10383 + // Node got flipped during fixup flow. 1.10384 + assert($cmp$$cmpcode == 0x8, "must be less"); 1.10385 + __ trap_range_check_l($src_index$$Register, $src_length$$Register); 1.10386 + } 1.10387 + %} 1.10388 + ins_pipe(pipe_class_trap); 1.10389 +%} 1.10390 + 1.10391 +// Match range check 'If lt (CmpU index length)'. 1.10392 +instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{ 1.10393 + match(If cmp (CmpU src_index length)); 1.10394 + effect(USE labl); 1.10395 + predicate(TrapBasedRangeChecks && 1.10396 + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt && 1.10397 + _leaf->as_If()->_prob >= PROB_ALWAYS && 1.10398 + (Matcher::branches_to_uncommon_trap(_leaf))); 1.10399 + 1.10400 + ins_is_TrapBasedCheckNode(true); 1.10401 + 1.10402 + format %{ "TWI $src_index $cmp $length \t// RangeCheck => trap $labl" %} 1.10403 + size(4); 1.10404 + ins_encode %{ 1.10405 + // TODO: PPC port $archOpcode(ppc64Opcode_twi); 1.10406 + if ($cmp$$cmpcode == 0x0 /* greater_equal */) { 1.10407 + __ trap_range_check_ge($src_index$$Register, $length$$constant); 1.10408 + } else { 1.10409 + // Both successors are uncommon traps, probability is 0. 1.10410 + // Node got flipped during fixup flow. 1.10411 + assert($cmp$$cmpcode == 0x8, "must be less"); 1.10412 + __ trap_range_check_l($src_index$$Register, $length$$constant); 1.10413 + } 1.10414 + %} 1.10415 + ins_pipe(pipe_class_trap); 1.10416 +%} 1.10417 + 1.10418 +instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{ 1.10419 + match(Set crx (CmpU src1 src2)); 1.10420 + format %{ "CMPLW $crx, $src1, $src2 \t// unsigned" %} 1.10421 + size(4); 1.10422 + ins_encode %{ 1.10423 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); 1.10424 + __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register); 1.10425 + %} 1.10426 + ins_pipe(pipe_class_compare); 1.10427 +%} 1.10428 + 1.10429 +instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{ 1.10430 + match(Set crx (CmpU src1 src2)); 1.10431 + size(4); 1.10432 + format %{ "CMPLWI $crx, $src1, $src2" %} 1.10433 + ins_encode %{ 1.10434 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpli); 1.10435 + __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant); 1.10436 + %} 1.10437 + ins_pipe(pipe_class_compare); 1.10438 +%} 1.10439 + 1.10440 +// Implicit zero checks (more implicit null checks). 1.10441 +// No constant pool entries required. 1.10442 +instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{ 1.10443 + match(If cmp (CmpN value zero)); 1.10444 + effect(USE labl); 1.10445 + predicate(TrapBasedNullChecks && 1.10446 + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne && 1.10447 + _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) && 1.10448 + Matcher::branches_to_uncommon_trap(_leaf)); 1.10449 + ins_cost(1); 1.10450 + 1.10451 + ins_is_TrapBasedCheckNode(true); 1.10452 + 1.10453 + format %{ "TDI $value $cmp $zero \t// ZeroCheckN => trap $labl" %} 1.10454 + size(4); 1.10455 + ins_encode %{ 1.10456 + // TODO: PPC port $archOpcode(ppc64Opcode_tdi); 1.10457 + if ($cmp$$cmpcode == 0xA) { 1.10458 + __ trap_null_check($value$$Register); 1.10459 + } else { 1.10460 + // Both successors are uncommon traps, probability is 0. 1.10461 + // Node got flipped during fixup flow. 1.10462 + assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)"); 1.10463 + __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned); 1.10464 + } 1.10465 + %} 1.10466 + ins_pipe(pipe_class_trap); 1.10467 +%} 1.10468 + 1.10469 +// Compare narrow oops. 1.10470 +instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{ 1.10471 + match(Set crx (CmpN src1 src2)); 1.10472 + 1.10473 + size(4); 1.10474 + ins_cost(DEFAULT_COST); 1.10475 + format %{ "CMPLW $crx, $src1, $src2 \t// compressed ptr" %} 1.10476 + ins_encode %{ 1.10477 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); 1.10478 + __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register); 1.10479 + %} 1.10480 + ins_pipe(pipe_class_compare); 1.10481 +%} 1.10482 + 1.10483 +instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{ 1.10484 + match(Set crx (CmpN src1 src2)); 1.10485 + // Make this more expensive than zeroCheckN_iReg_imm0. 1.10486 + ins_cost(DEFAULT_COST); 1.10487 + 1.10488 + format %{ "CMPLWI $crx, $src1, $src2 \t// compressed ptr" %} 1.10489 + size(4); 1.10490 + ins_encode %{ 1.10491 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpli); 1.10492 + __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant); 1.10493 + %} 1.10494 + ins_pipe(pipe_class_compare); 1.10495 +%} 1.10496 + 1.10497 +// Implicit zero checks (more implicit null checks). 1.10498 +// No constant pool entries required. 1.10499 +instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{ 1.10500 + match(If cmp (CmpP value zero)); 1.10501 + effect(USE labl); 1.10502 + predicate(TrapBasedNullChecks && 1.10503 + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne && 1.10504 + _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) && 1.10505 + Matcher::branches_to_uncommon_trap(_leaf)); 1.10506 + 1.10507 + ins_is_TrapBasedCheckNode(true); 1.10508 + 1.10509 + format %{ "TDI $value $cmp $zero \t// ZeroCheckP => trap $labl" %} 1.10510 + size(4); 1.10511 + ins_encode %{ 1.10512 + // TODO: PPC port $archOpcode(ppc64Opcode_tdi); 1.10513 + if ($cmp$$cmpcode == 0xA) { 1.10514 + __ trap_null_check($value$$Register); 1.10515 + } else { 1.10516 + // Both successors are uncommon traps, probability is 0. 1.10517 + // Node got flipped during fixup flow. 1.10518 + assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)"); 1.10519 + __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned); 1.10520 + } 1.10521 + %} 1.10522 + ins_pipe(pipe_class_trap); 1.10523 +%} 1.10524 + 1.10525 +// Compare Pointers 1.10526 +instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{ 1.10527 + match(Set crx (CmpP src1 src2)); 1.10528 + format %{ "CMPLD $crx, $src1, $src2 \t// ptr" %} 1.10529 + size(4); 1.10530 + ins_encode %{ 1.10531 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); 1.10532 + __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register); 1.10533 + %} 1.10534 + ins_pipe(pipe_class_compare); 1.10535 +%} 1.10536 + 1.10537 +// Used in postalloc expand. 1.10538 +instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{ 1.10539 + // This match rule prevents reordering of node before a safepoint. 1.10540 + // This only makes sense if this instructions is used exclusively 1.10541 + // for the expansion of EncodeP! 1.10542 + match(Set crx (CmpP src1 src2)); 1.10543 + predicate(false); 1.10544 + 1.10545 + format %{ "CMPDI $crx, $src1, $src2" %} 1.10546 + size(4); 1.10547 + ins_encode %{ 1.10548 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpi); 1.10549 + __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant); 1.10550 + %} 1.10551 + ins_pipe(pipe_class_compare); 1.10552 +%} 1.10553 + 1.10554 +//----------Float Compares---------------------------------------------------- 1.10555 + 1.10556 +instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{ 1.10557 + // no match-rule, false predicate 1.10558 + effect(DEF crx, USE src1, USE src2); 1.10559 + predicate(false); 1.10560 + 1.10561 + format %{ "cmpFUrd $crx, $src1, $src2" %} 1.10562 + size(4); 1.10563 + ins_encode %{ 1.10564 + // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu); 1.10565 + __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.10566 + %} 1.10567 + ins_pipe(pipe_class_default); 1.10568 +%} 1.10569 + 1.10570 +instruct cmov_bns_less(flagsReg crx) %{ 1.10571 + // no match-rule, false predicate 1.10572 + effect(DEF crx); 1.10573 + predicate(false); 1.10574 + 1.10575 + ins_variable_size_depending_on_alignment(true); 1.10576 + 1.10577 + format %{ "cmov $crx" %} 1.10578 + // Worst case is branch + move + stop, no stop without scheduler. 1.10579 + size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 16 : 12); 1.10580 + ins_encode %{ 1.10581 + // TODO: PPC port $archOpcode(ppc64Opcode_cmovecr); 1.10582 + Label done; 1.10583 + __ bns($crx$$CondRegister, done); // not unordered -> keep crx 1.10584 + __ li(R0, 0); 1.10585 + __ cmpwi($crx$$CondRegister, R0, 1); // unordered -> set crx to 'less' 1.10586 + // TODO PPC port __ endgroup_if_needed(_size == 16); 1.10587 + __ bind(done); 1.10588 + %} 1.10589 + ins_pipe(pipe_class_default); 1.10590 +%} 1.10591 + 1.10592 +// Compare floating, generate condition code. 1.10593 +instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{ 1.10594 + // FIXME: should we match 'If cmp (CmpF src1 src2))' ?? 1.10595 + // 1.10596 + // The following code sequence occurs a lot in mpegaudio: 1.10597 + // 1.10598 + // block BXX: 1.10599 + // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0): 1.10600 + // cmpFUrd CCR6, F11, F9 1.10601 + // 4: instruct cmov_bns_less (cmpF_reg_reg-1): 1.10602 + // cmov CCR6 1.10603 + // 8: instruct branchConSched: 1.10604 + // B_FARle CCR6, B56 P=0.500000 C=-1.000000 1.10605 + match(Set crx (CmpF src1 src2)); 1.10606 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.10607 + 1.10608 + format %{ "CmpF $crx, $src1, $src2 \t// postalloc expanded" %} 1.10609 + postalloc_expand %{ 1.10610 + // 1.10611 + // replaces 1.10612 + // 1.10613 + // region src1 src2 1.10614 + // \ | | 1.10615 + // crx=cmpF_reg_reg 1.10616 + // 1.10617 + // with 1.10618 + // 1.10619 + // region src1 src2 1.10620 + // \ | | 1.10621 + // crx=cmpFUnordered_reg_reg 1.10622 + // | 1.10623 + // ^ region 1.10624 + // | \ 1.10625 + // crx=cmov_bns_less 1.10626 + // 1.10627 + 1.10628 + // Create new nodes. 1.10629 + MachNode *m1 = new (C) cmpFUnordered_reg_regNode(); 1.10630 + MachNode *m2 = new (C) cmov_bns_lessNode(); 1.10631 + 1.10632 + // inputs for new nodes 1.10633 + m1->add_req(n_region, n_src1, n_src2); 1.10634 + m2->add_req(n_region); 1.10635 + m2->add_prec(m1); 1.10636 + 1.10637 + // operands for new nodes 1.10638 + m1->_opnds[0] = op_crx; 1.10639 + m1->_opnds[1] = op_src1; 1.10640 + m1->_opnds[2] = op_src2; 1.10641 + m2->_opnds[0] = op_crx; 1.10642 + 1.10643 + // registers for new nodes 1.10644 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx 1.10645 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx 1.10646 + 1.10647 + // Insert new nodes. 1.10648 + nodes->push(m1); 1.10649 + nodes->push(m2); 1.10650 + %} 1.10651 +%} 1.10652 + 1.10653 +// Compare float, generate -1,0,1 1.10654 +instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{ 1.10655 + match(Set dst (CmpF3 src1 src2)); 1.10656 + ins_cost(DEFAULT_COST*5+BRANCH_COST); 1.10657 + 1.10658 + expand %{ 1.10659 + flagsReg tmp1; 1.10660 + cmpFUnordered_reg_reg(tmp1, src1, src2); 1.10661 + cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1); 1.10662 + %} 1.10663 +%} 1.10664 + 1.10665 +instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{ 1.10666 + // no match-rule, false predicate 1.10667 + effect(DEF crx, USE src1, USE src2); 1.10668 + predicate(false); 1.10669 + 1.10670 + format %{ "cmpFUrd $crx, $src1, $src2" %} 1.10671 + size(4); 1.10672 + ins_encode %{ 1.10673 + // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu); 1.10674 + __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister); 1.10675 + %} 1.10676 + ins_pipe(pipe_class_default); 1.10677 +%} 1.10678 + 1.10679 +instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{ 1.10680 + match(Set crx (CmpD src1 src2)); 1.10681 + ins_cost(DEFAULT_COST+BRANCH_COST); 1.10682 + 1.10683 + format %{ "CmpD $crx, $src1, $src2 \t// postalloc expanded" %} 1.10684 + postalloc_expand %{ 1.10685 + // 1.10686 + // replaces 1.10687 + // 1.10688 + // region src1 src2 1.10689 + // \ | | 1.10690 + // crx=cmpD_reg_reg 1.10691 + // 1.10692 + // with 1.10693 + // 1.10694 + // region src1 src2 1.10695 + // \ | | 1.10696 + // crx=cmpDUnordered_reg_reg 1.10697 + // | 1.10698 + // ^ region 1.10699 + // | \ 1.10700 + // crx=cmov_bns_less 1.10701 + // 1.10702 + 1.10703 + // create new nodes 1.10704 + MachNode *m1 = new (C) cmpDUnordered_reg_regNode(); 1.10705 + MachNode *m2 = new (C) cmov_bns_lessNode(); 1.10706 + 1.10707 + // inputs for new nodes 1.10708 + m1->add_req(n_region, n_src1, n_src2); 1.10709 + m2->add_req(n_region); 1.10710 + m2->add_prec(m1); 1.10711 + 1.10712 + // operands for new nodes 1.10713 + m1->_opnds[0] = op_crx; 1.10714 + m1->_opnds[1] = op_src1; 1.10715 + m1->_opnds[2] = op_src2; 1.10716 + m2->_opnds[0] = op_crx; 1.10717 + 1.10718 + // registers for new nodes 1.10719 + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx 1.10720 + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx 1.10721 + 1.10722 + // Insert new nodes. 1.10723 + nodes->push(m1); 1.10724 + nodes->push(m2); 1.10725 + %} 1.10726 +%} 1.10727 + 1.10728 +// Compare double, generate -1,0,1 1.10729 +instruct cmpD3_reg_reg_ExEx(iRegIdst dst, regD src1, regD src2) %{ 1.10730 + match(Set dst (CmpD3 src1 src2)); 1.10731 + ins_cost(DEFAULT_COST*5+BRANCH_COST); 1.10732 + 1.10733 + expand %{ 1.10734 + flagsReg tmp1; 1.10735 + cmpDUnordered_reg_reg(tmp1, src1, src2); 1.10736 + cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1); 1.10737 + %} 1.10738 +%} 1.10739 + 1.10740 +//----------Branches--------------------------------------------------------- 1.10741 +// Jump 1.10742 + 1.10743 +// Direct Branch. 1.10744 +instruct branch(label labl) %{ 1.10745 + match(Goto); 1.10746 + effect(USE labl); 1.10747 + ins_cost(BRANCH_COST); 1.10748 + 1.10749 + format %{ "B $labl" %} 1.10750 + size(4); 1.10751 + ins_encode %{ 1.10752 + // TODO: PPC port $archOpcode(ppc64Opcode_b); 1.10753 + Label d; // dummy 1.10754 + __ bind(d); 1.10755 + Label* p = $labl$$label; 1.10756 + // `p' is `NULL' when this encoding class is used only to 1.10757 + // determine the size of the encoded instruction. 1.10758 + Label& l = (NULL == p)? d : *(p); 1.10759 + __ b(l); 1.10760 + %} 1.10761 + ins_pipe(pipe_class_default); 1.10762 +%} 1.10763 + 1.10764 +// Conditional Near Branch 1.10765 +instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{ 1.10766 + // Same match rule as `branchConFar'. 1.10767 + match(If cmp crx); 1.10768 + effect(USE lbl); 1.10769 + ins_cost(BRANCH_COST); 1.10770 + 1.10771 + // If set to 1 this indicates that the current instruction is a 1.10772 + // short variant of a long branch. This avoids using this 1.10773 + // instruction in first-pass matching. It will then only be used in 1.10774 + // the `Shorten_branches' pass. 1.10775 + ins_short_branch(1); 1.10776 + 1.10777 + format %{ "B$cmp $crx, $lbl" %} 1.10778 + size(4); 1.10779 + ins_encode( enc_bc(crx, cmp, lbl) ); 1.10780 + ins_pipe(pipe_class_default); 1.10781 +%} 1.10782 + 1.10783 +// This is for cases when the ppc64 `bc' instruction does not 1.10784 +// reach far enough. So we emit a far branch here, which is more 1.10785 +// expensive. 1.10786 +// 1.10787 +// Conditional Far Branch 1.10788 +instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{ 1.10789 + // Same match rule as `branchCon'. 1.10790 + match(If cmp crx); 1.10791 + effect(USE crx, USE lbl); 1.10792 + predicate(!false /* TODO: PPC port HB_Schedule*/); 1.10793 + // Higher cost than `branchCon'. 1.10794 + ins_cost(5*BRANCH_COST); 1.10795 + 1.10796 + // This is not a short variant of a branch, but the long variant. 1.10797 + ins_short_branch(0); 1.10798 + 1.10799 + format %{ "B_FAR$cmp $crx, $lbl" %} 1.10800 + size(8); 1.10801 + ins_encode( enc_bc_far(crx, cmp, lbl) ); 1.10802 + ins_pipe(pipe_class_default); 1.10803 +%} 1.10804 + 1.10805 +// Conditional Branch used with Power6 scheduler (can be far or short). 1.10806 +instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{ 1.10807 + // Same match rule as `branchCon'. 1.10808 + match(If cmp crx); 1.10809 + effect(USE crx, USE lbl); 1.10810 + predicate(false /* TODO: PPC port HB_Schedule*/); 1.10811 + // Higher cost than `branchCon'. 1.10812 + ins_cost(5*BRANCH_COST); 1.10813 + 1.10814 + // Actually size doesn't depend on alignment but on shortening. 1.10815 + ins_variable_size_depending_on_alignment(true); 1.10816 + // long variant. 1.10817 + ins_short_branch(0); 1.10818 + 1.10819 + format %{ "B_FAR$cmp $crx, $lbl" %} 1.10820 + size(8); // worst case 1.10821 + ins_encode( enc_bc_short_far(crx, cmp, lbl) ); 1.10822 + ins_pipe(pipe_class_default); 1.10823 +%} 1.10824 + 1.10825 +instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{ 1.10826 + match(CountedLoopEnd cmp crx); 1.10827 + effect(USE labl); 1.10828 + ins_cost(BRANCH_COST); 1.10829 + 1.10830 + // short variant. 1.10831 + ins_short_branch(1); 1.10832 + 1.10833 + format %{ "B$cmp $crx, $labl \t// counted loop end" %} 1.10834 + size(4); 1.10835 + ins_encode( enc_bc(crx, cmp, labl) ); 1.10836 + ins_pipe(pipe_class_default); 1.10837 +%} 1.10838 + 1.10839 +instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{ 1.10840 + match(CountedLoopEnd cmp crx); 1.10841 + effect(USE labl); 1.10842 + predicate(!false /* TODO: PPC port HB_Schedule */); 1.10843 + ins_cost(BRANCH_COST); 1.10844 + 1.10845 + // Long variant. 1.10846 + ins_short_branch(0); 1.10847 + 1.10848 + format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %} 1.10849 + size(8); 1.10850 + ins_encode( enc_bc_far(crx, cmp, labl) ); 1.10851 + ins_pipe(pipe_class_default); 1.10852 +%} 1.10853 + 1.10854 +// Conditional Branch used with Power6 scheduler (can be far or short). 1.10855 +instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{ 1.10856 + match(CountedLoopEnd cmp crx); 1.10857 + effect(USE labl); 1.10858 + predicate(false /* TODO: PPC port HB_Schedule */); 1.10859 + // Higher cost than `branchCon'. 1.10860 + ins_cost(5*BRANCH_COST); 1.10861 + 1.10862 + // Actually size doesn't depend on alignment but on shortening. 1.10863 + ins_variable_size_depending_on_alignment(true); 1.10864 + // Long variant. 1.10865 + ins_short_branch(0); 1.10866 + 1.10867 + format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %} 1.10868 + size(8); // worst case 1.10869 + ins_encode( enc_bc_short_far(crx, cmp, labl) ); 1.10870 + ins_pipe(pipe_class_default); 1.10871 +%} 1.10872 + 1.10873 +// ============================================================================ 1.10874 +// Java runtime operations, intrinsics and other complex operations. 1.10875 + 1.10876 +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 1.10877 +// array for an instance of the superklass. Set a hidden internal cache on a 1.10878 +// hit (cache is checked with exposed code in gen_subtype_check()). Return 1.10879 +// not zero for a miss or zero for a hit. The encoding ALSO sets flags. 1.10880 +// 1.10881 +// GL TODO: Improve this. 1.10882 +// - result should not be a TEMP 1.10883 +// - Add match rule as on sparc avoiding additional Cmp. 1.10884 +instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass, 1.10885 + iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{ 1.10886 + match(Set result (PartialSubtypeCheck subklass superklass)); 1.10887 + effect(TEMP result, TEMP tmp_klass, TEMP tmp_arrayptr); 1.10888 + ins_cost(DEFAULT_COST*10); 1.10889 + 1.10890 + format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %} 1.10891 + ins_encode %{ 1.10892 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.10893 + __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register, 1.10894 + $tmp_klass$$Register, NULL, $result$$Register); 1.10895 + %} 1.10896 + ins_pipe(pipe_class_default); 1.10897 +%} 1.10898 + 1.10899 +// inlined locking and unlocking 1.10900 + 1.10901 +instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ 1.10902 + match(Set crx (FastLock oop box)); 1.10903 + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); 1.10904 + // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking); 1.10905 + 1.10906 + format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3" %} 1.10907 + ins_encode %{ 1.10908 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.10909 + __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, 1.10910 + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); 1.10911 + // If locking was successfull, crx should indicate 'EQ'. 1.10912 + // The compiler generates a branch to the runtime call to 1.10913 + // _complete_monitor_locking_Java for the case where crx is 'NE'. 1.10914 + %} 1.10915 + ins_pipe(pipe_class_compare); 1.10916 +%} 1.10917 + 1.10918 +instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ 1.10919 + match(Set crx (FastUnlock oop box)); 1.10920 + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); 1.10921 + 1.10922 + format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %} 1.10923 + ins_encode %{ 1.10924 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.10925 + __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, 1.10926 + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); 1.10927 + // If unlocking was successfull, crx should indicate 'EQ'. 1.10928 + // The compiler generates a branch to the runtime call to 1.10929 + // _complete_monitor_unlocking_Java for the case where crx is 'NE'. 1.10930 + %} 1.10931 + ins_pipe(pipe_class_compare); 1.10932 +%} 1.10933 + 1.10934 +// Align address. 1.10935 +instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{ 1.10936 + match(Set dst (CastX2P (AndL (CastP2X src) mask))); 1.10937 + 1.10938 + format %{ "ANDDI $dst, $src, $mask \t// next aligned address" %} 1.10939 + size(4); 1.10940 + ins_encode %{ 1.10941 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); 1.10942 + __ clrrdi($dst$$Register, $src$$Register, log2_long((jlong)-$mask$$constant)); 1.10943 + %} 1.10944 + ins_pipe(pipe_class_default); 1.10945 +%} 1.10946 + 1.10947 +// Array size computation. 1.10948 +instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{ 1.10949 + match(Set dst (SubL (CastP2X end) (CastP2X start))); 1.10950 + 1.10951 + format %{ "SUB $dst, $end, $start \t// array size in bytes" %} 1.10952 + size(4); 1.10953 + ins_encode %{ 1.10954 + // TODO: PPC port $archOpcode(ppc64Opcode_subf); 1.10955 + __ subf($dst$$Register, $start$$Register, $end$$Register); 1.10956 + %} 1.10957 + ins_pipe(pipe_class_default); 1.10958 +%} 1.10959 + 1.10960 +// Clear-array with dynamic array-size. 1.10961 +instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{ 1.10962 + match(Set dummy (ClearArray cnt base)); 1.10963 + effect(USE_KILL cnt, USE_KILL base, KILL ctr); 1.10964 + ins_cost(MEMORY_REF_COST); 1.10965 + 1.10966 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. 1.10967 + 1.10968 + format %{ "ClearArray $cnt, $base" %} 1.10969 + ins_encode %{ 1.10970 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.10971 + __ clear_memory_doubleword($base$$Register, $cnt$$Register); // kills cnt, base, R0 1.10972 + %} 1.10973 + ins_pipe(pipe_class_default); 1.10974 +%} 1.10975 + 1.10976 +// String_IndexOf for needle of length 1. 1.10977 +// 1.10978 +// Match needle into immediate operands: no loadConP node needed. Saves one 1.10979 +// register and two instructions over string_indexOf_imm1Node. 1.10980 +// 1.10981 +// Assumes register result differs from all input registers. 1.10982 +// 1.10983 +// Preserves registers haystack, haycnt 1.10984 +// Kills registers tmp1, tmp2 1.10985 +// Defines registers result 1.10986 +// 1.10987 +// Use dst register classes if register gets killed, as it is the case for tmp registers! 1.10988 +// 1.10989 +// Unfortunately this does not match too often. In many situations the AddP is used 1.10990 +// by several nodes, even several StrIndexOf nodes, breaking the match tree. 1.10991 +instruct string_indexOf_imm1_char(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, 1.10992 + immP needleImm, immL offsetImm, immI_1 needlecntImm, 1.10993 + iRegIdst tmp1, iRegIdst tmp2, 1.10994 + flagsRegCR0 cr0, flagsRegCR1 cr1) %{ 1.10995 + predicate(SpecialStringIndexOf); // type check implicit by parameter type, See Matcher::match_rule_supported 1.10996 + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm))); 1.10997 + 1.10998 + effect(TEMP result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1); 1.10999 + 1.11000 + ins_cost(150); 1.11001 + format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]" 1.11002 + "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %} 1.11003 + 1.11004 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted 1.11005 + ins_encode %{ 1.11006 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11007 + immPOper *needleOper = (immPOper *)$needleImm; 1.11008 + const TypeOopPtr *t = needleOper->type()->isa_oopptr(); 1.11009 + ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char * 1.11010 + 1.11011 + __ string_indexof_1($result$$Register, 1.11012 + $haystack$$Register, $haycnt$$Register, 1.11013 + R0, needle_values->char_at(0), 1.11014 + $tmp1$$Register, $tmp2$$Register); 1.11015 + %} 1.11016 + ins_pipe(pipe_class_compare); 1.11017 +%} 1.11018 + 1.11019 +// String_IndexOf for needle of length 1. 1.11020 +// 1.11021 +// Special case requires less registers and emits less instructions. 1.11022 +// 1.11023 +// Assumes register result differs from all input registers. 1.11024 +// 1.11025 +// Preserves registers haystack, haycnt 1.11026 +// Kills registers tmp1, tmp2, needle 1.11027 +// Defines registers result 1.11028 +// 1.11029 +// Use dst register classes if register gets killed, as it is the case for tmp registers! 1.11030 +instruct string_indexOf_imm1(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, 1.11031 + rscratch2RegP needle, immI_1 needlecntImm, 1.11032 + iRegIdst tmp1, iRegIdst tmp2, 1.11033 + flagsRegCR0 cr0, flagsRegCR1 cr1) %{ 1.11034 + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); 1.11035 + effect(USE_KILL needle, /* TDEF needle, */ TEMP result, 1.11036 + TEMP tmp1, TEMP tmp2); 1.11037 + // Required for EA: check if it is still a type_array. 1.11038 + predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() && 1.11039 + n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array()); 1.11040 + ins_cost(180); 1.11041 + 1.11042 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. 1.11043 + 1.11044 + format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]" 1.11045 + " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %} 1.11046 + ins_encode %{ 1.11047 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11048 + Node *ndl = in(operand_index($needle)); // The node that defines needle. 1.11049 + ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array(); 1.11050 + guarantee(needle_values, "sanity"); 1.11051 + if (needle_values != NULL) { 1.11052 + __ string_indexof_1($result$$Register, 1.11053 + $haystack$$Register, $haycnt$$Register, 1.11054 + R0, needle_values->char_at(0), 1.11055 + $tmp1$$Register, $tmp2$$Register); 1.11056 + } else { 1.11057 + __ string_indexof_1($result$$Register, 1.11058 + $haystack$$Register, $haycnt$$Register, 1.11059 + $needle$$Register, 0, 1.11060 + $tmp1$$Register, $tmp2$$Register); 1.11061 + } 1.11062 + %} 1.11063 + ins_pipe(pipe_class_compare); 1.11064 +%} 1.11065 + 1.11066 +// String_IndexOf. 1.11067 +// 1.11068 +// Length of needle as immediate. This saves instruction loading constant needle 1.11069 +// length. 1.11070 +// @@@ TODO Specify rules for length < 8 or so, and roll out comparison of needle 1.11071 +// completely or do it in vector instruction. This should save registers for 1.11072 +// needlecnt and needle. 1.11073 +// 1.11074 +// Assumes register result differs from all input registers. 1.11075 +// Overwrites haycnt, needlecnt. 1.11076 +// Use dst register classes if register gets killed, as it is the case for tmp registers! 1.11077 +instruct string_indexOf_imm(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, 1.11078 + iRegPsrc needle, uimmI15 needlecntImm, 1.11079 + iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5, 1.11080 + flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{ 1.11081 + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); 1.11082 + effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP result, 1.11083 + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6); 1.11084 + // Required for EA: check if it is still a type_array. 1.11085 + predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() && 1.11086 + n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array()); 1.11087 + ins_cost(250); 1.11088 + 1.11089 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. 1.11090 + 1.11091 + format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]" 1.11092 + " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %} 1.11093 + ins_encode %{ 1.11094 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11095 + Node *ndl = in(operand_index($needle)); // The node that defines needle. 1.11096 + ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array(); 1.11097 + 1.11098 + __ string_indexof($result$$Register, 1.11099 + $haystack$$Register, $haycnt$$Register, 1.11100 + $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant, 1.11101 + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register); 1.11102 + %} 1.11103 + ins_pipe(pipe_class_compare); 1.11104 +%} 1.11105 + 1.11106 +// StrIndexOf node. 1.11107 +// 1.11108 +// Assumes register result differs from all input registers. 1.11109 +// Overwrites haycnt, needlecnt. 1.11110 +// Use dst register classes if register gets killed, as it is the case for tmp registers! 1.11111 +instruct string_indexOf(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt, 1.11112 + iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, 1.11113 + flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{ 1.11114 + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); 1.11115 + effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/ 1.11116 + TEMP result, 1.11117 + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6); 1.11118 + predicate(SpecialStringIndexOf); // See Matcher::match_rule_supported. 1.11119 + ins_cost(300); 1.11120 + 1.11121 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. 1.11122 + 1.11123 + format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]" 1.11124 + " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %} 1.11125 + ins_encode %{ 1.11126 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11127 + __ string_indexof($result$$Register, 1.11128 + $haystack$$Register, $haycnt$$Register, 1.11129 + $needle$$Register, NULL, $needlecnt$$Register, 0, // needlecnt not constant. 1.11130 + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register); 1.11131 + %} 1.11132 + ins_pipe(pipe_class_compare); 1.11133 +%} 1.11134 + 1.11135 +// String equals with immediate. 1.11136 +instruct string_equals_imm(iRegPsrc str1, iRegPsrc str2, uimmI15 cntImm, iRegIdst result, 1.11137 + iRegPdst tmp1, iRegPdst tmp2, 1.11138 + flagsRegCR0 cr0, flagsRegCR6 cr6, regCTR ctr) %{ 1.11139 + match(Set result (StrEquals (Binary str1 str2) cntImm)); 1.11140 + effect(TEMP result, TEMP tmp1, TEMP tmp2, 1.11141 + KILL cr0, KILL cr6, KILL ctr); 1.11142 + predicate(SpecialStringEquals); // See Matcher::match_rule_supported. 1.11143 + ins_cost(250); 1.11144 + 1.11145 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. 1.11146 + 1.11147 + format %{ "String Equals SCL [0..$cntImm]($str1),[0..$cntImm]($str2)" 1.11148 + " -> $result \t// KILL $cr0, $cr6, $ctr, TEMP $result, $tmp1, $tmp2" %} 1.11149 + ins_encode %{ 1.11150 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11151 + __ char_arrays_equalsImm($str1$$Register, $str2$$Register, $cntImm$$constant, 1.11152 + $result$$Register, $tmp1$$Register, $tmp2$$Register); 1.11153 + %} 1.11154 + ins_pipe(pipe_class_compare); 1.11155 +%} 1.11156 + 1.11157 +// String equals. 1.11158 +// Use dst register classes if register gets killed, as it is the case for TEMP operands! 1.11159 +instruct string_equals(iRegPsrc str1, iRegPsrc str2, iRegIsrc cnt, iRegIdst result, 1.11160 + iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3, iRegPdst tmp4, iRegPdst tmp5, 1.11161 + flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{ 1.11162 + match(Set result (StrEquals (Binary str1 str2) cnt)); 1.11163 + effect(TEMP result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, 1.11164 + KILL cr0, KILL cr1, KILL cr6, KILL ctr); 1.11165 + predicate(SpecialStringEquals); // See Matcher::match_rule_supported. 1.11166 + ins_cost(300); 1.11167 + 1.11168 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. 1.11169 + 1.11170 + format %{ "String Equals [0..$cnt]($str1),[0..$cnt]($str2) -> $result" 1.11171 + " \t// KILL $cr0, $cr1, $cr6, $ctr, TEMP $result, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %} 1.11172 + ins_encode %{ 1.11173 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11174 + __ char_arrays_equals($str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register, 1.11175 + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register); 1.11176 + %} 1.11177 + ins_pipe(pipe_class_compare); 1.11178 +%} 1.11179 + 1.11180 +// String compare. 1.11181 +// Char[] pointers are passed in. 1.11182 +// Use dst register classes if register gets killed, as it is the case for TEMP operands! 1.11183 +instruct string_compare(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result, 1.11184 + iRegPdst tmp, flagsRegCR0 cr0, regCTR ctr) %{ 1.11185 + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 1.11186 + effect(USE_KILL cnt1, USE_KILL cnt2, USE_KILL str1, USE_KILL str2, TEMP result, TEMP tmp, KILL cr0, KILL ctr); 1.11187 + ins_cost(300); 1.11188 + 1.11189 + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. 1.11190 + 1.11191 + format %{ "String Compare $str1[0..$cnt1], $str2[0..$cnt2] -> $result" 1.11192 + " \t// TEMP $tmp, $result KILLs $str1, $cnt1, $str2, $cnt2, $cr0, $ctr" %} 1.11193 + ins_encode %{ 1.11194 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11195 + __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, 1.11196 + $result$$Register, $tmp$$Register); 1.11197 + %} 1.11198 + ins_pipe(pipe_class_compare); 1.11199 +%} 1.11200 + 1.11201 +//---------- Min/Max Instructions --------------------------------------------- 1.11202 + 1.11203 +instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.11204 + match(Set dst (MinI src1 src2)); 1.11205 + ins_cost(DEFAULT_COST*6); 1.11206 + 1.11207 + expand %{ 1.11208 + iRegLdst src1s; 1.11209 + iRegLdst src2s; 1.11210 + iRegLdst diff; 1.11211 + iRegLdst sm; 1.11212 + iRegLdst doz; // difference or zero 1.11213 + convI2L_reg(src1s, src1); // Ensure proper sign extension. 1.11214 + convI2L_reg(src2s, src2); // Ensure proper sign extension. 1.11215 + subL_reg_reg(diff, src2s, src1s); 1.11216 + // Need to consider >=33 bit result, therefore we need signmaskL. 1.11217 + signmask64L_regL(sm, diff); 1.11218 + andL_reg_reg(doz, diff, sm); // <=0 1.11219 + addI_regL_regL(dst, doz, src1s); 1.11220 + %} 1.11221 +%} 1.11222 + 1.11223 +instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ 1.11224 + match(Set dst (MaxI src1 src2)); 1.11225 + ins_cost(DEFAULT_COST*6); 1.11226 + 1.11227 + expand %{ 1.11228 + iRegLdst src1s; 1.11229 + iRegLdst src2s; 1.11230 + iRegLdst diff; 1.11231 + iRegLdst sm; 1.11232 + iRegLdst doz; // difference or zero 1.11233 + convI2L_reg(src1s, src1); // Ensure proper sign extension. 1.11234 + convI2L_reg(src2s, src2); // Ensure proper sign extension. 1.11235 + subL_reg_reg(diff, src2s, src1s); 1.11236 + // Need to consider >=33 bit result, therefore we need signmaskL. 1.11237 + signmask64L_regL(sm, diff); 1.11238 + andcL_reg_reg(doz, diff, sm); // >=0 1.11239 + addI_regL_regL(dst, doz, src1s); 1.11240 + %} 1.11241 +%} 1.11242 + 1.11243 +//---------- Population Count Instructions ------------------------------------ 1.11244 + 1.11245 +// Popcnt for Power7. 1.11246 +instruct popCountI(iRegIdst dst, iRegIsrc src) %{ 1.11247 + match(Set dst (PopCountI src)); 1.11248 + predicate(UsePopCountInstruction && VM_Version::has_popcntw()); 1.11249 + ins_cost(DEFAULT_COST); 1.11250 + 1.11251 + format %{ "POPCNTW $dst, $src" %} 1.11252 + size(4); 1.11253 + ins_encode %{ 1.11254 + // TODO: PPC port $archOpcode(ppc64Opcode_popcntb); 1.11255 + __ popcntw($dst$$Register, $src$$Register); 1.11256 + %} 1.11257 + ins_pipe(pipe_class_default); 1.11258 +%} 1.11259 + 1.11260 +// Popcnt for Power7. 1.11261 +instruct popCountL(iRegIdst dst, iRegLsrc src) %{ 1.11262 + predicate(UsePopCountInstruction && VM_Version::has_popcntw()); 1.11263 + match(Set dst (PopCountL src)); 1.11264 + ins_cost(DEFAULT_COST); 1.11265 + 1.11266 + format %{ "POPCNTD $dst, $src" %} 1.11267 + size(4); 1.11268 + ins_encode %{ 1.11269 + // TODO: PPC port $archOpcode(ppc64Opcode_popcntb); 1.11270 + __ popcntd($dst$$Register, $src$$Register); 1.11271 + %} 1.11272 + ins_pipe(pipe_class_default); 1.11273 +%} 1.11274 + 1.11275 +instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{ 1.11276 + match(Set dst (CountLeadingZerosI src)); 1.11277 + predicate(UseCountLeadingZerosInstructionsPPC64); // See Matcher::match_rule_supported. 1.11278 + ins_cost(DEFAULT_COST); 1.11279 + 1.11280 + format %{ "CNTLZW $dst, $src" %} 1.11281 + size(4); 1.11282 + ins_encode %{ 1.11283 + // TODO: PPC port $archOpcode(ppc64Opcode_cntlzw); 1.11284 + __ cntlzw($dst$$Register, $src$$Register); 1.11285 + %} 1.11286 + ins_pipe(pipe_class_default); 1.11287 +%} 1.11288 + 1.11289 +instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{ 1.11290 + match(Set dst (CountLeadingZerosL src)); 1.11291 + predicate(UseCountLeadingZerosInstructionsPPC64); // See Matcher::match_rule_supported. 1.11292 + ins_cost(DEFAULT_COST); 1.11293 + 1.11294 + format %{ "CNTLZD $dst, $src" %} 1.11295 + size(4); 1.11296 + ins_encode %{ 1.11297 + // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd); 1.11298 + __ cntlzd($dst$$Register, $src$$Register); 1.11299 + %} 1.11300 + ins_pipe(pipe_class_default); 1.11301 +%} 1.11302 + 1.11303 +instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{ 1.11304 + // no match-rule, false predicate 1.11305 + effect(DEF dst, USE src); 1.11306 + predicate(false); 1.11307 + 1.11308 + format %{ "CNTLZD $dst, $src" %} 1.11309 + size(4); 1.11310 + ins_encode %{ 1.11311 + // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd); 1.11312 + __ cntlzd($dst$$Register, $src$$Register); 1.11313 + %} 1.11314 + ins_pipe(pipe_class_default); 1.11315 +%} 1.11316 + 1.11317 +instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{ 1.11318 + match(Set dst (CountTrailingZerosI src)); 1.11319 + predicate(UseCountLeadingZerosInstructionsPPC64); 1.11320 + ins_cost(DEFAULT_COST); 1.11321 + 1.11322 + expand %{ 1.11323 + immI16 imm1 %{ (int)-1 %} 1.11324 + immI16 imm2 %{ (int)32 %} 1.11325 + immI_minus1 m1 %{ -1 %} 1.11326 + iRegIdst tmpI1; 1.11327 + iRegIdst tmpI2; 1.11328 + iRegIdst tmpI3; 1.11329 + addI_reg_imm16(tmpI1, src, imm1); 1.11330 + andcI_reg_reg(tmpI2, src, m1, tmpI1); 1.11331 + countLeadingZerosI(tmpI3, tmpI2); 1.11332 + subI_imm16_reg(dst, imm2, tmpI3); 1.11333 + %} 1.11334 +%} 1.11335 + 1.11336 +instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{ 1.11337 + match(Set dst (CountTrailingZerosL src)); 1.11338 + predicate(UseCountLeadingZerosInstructionsPPC64); 1.11339 + ins_cost(DEFAULT_COST); 1.11340 + 1.11341 + expand %{ 1.11342 + immL16 imm1 %{ (long)-1 %} 1.11343 + immI16 imm2 %{ (int)64 %} 1.11344 + iRegLdst tmpL1; 1.11345 + iRegLdst tmpL2; 1.11346 + iRegIdst tmpL3; 1.11347 + addL_reg_imm16(tmpL1, src, imm1); 1.11348 + andcL_reg_reg(tmpL2, tmpL1, src); 1.11349 + countLeadingZerosL(tmpL3, tmpL2); 1.11350 + subI_imm16_reg(dst, imm2, tmpL3); 1.11351 + %} 1.11352 +%} 1.11353 + 1.11354 +// Expand nodes for byte_reverse_int. 1.11355 +instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{ 1.11356 + effect(DEF dst, USE src, USE pos, USE shift); 1.11357 + predicate(false); 1.11358 + 1.11359 + format %{ "INSRWI $dst, $src, $pos, $shift" %} 1.11360 + size(4); 1.11361 + ins_encode %{ 1.11362 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi); 1.11363 + __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant); 1.11364 + %} 1.11365 + ins_pipe(pipe_class_default); 1.11366 +%} 1.11367 + 1.11368 +// As insrwi_a, but with USE_DEF. 1.11369 +instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{ 1.11370 + effect(USE_DEF dst, USE src, USE pos, USE shift); 1.11371 + predicate(false); 1.11372 + 1.11373 + format %{ "INSRWI $dst, $src, $pos, $shift" %} 1.11374 + size(4); 1.11375 + ins_encode %{ 1.11376 + // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi); 1.11377 + __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant); 1.11378 + %} 1.11379 + ins_pipe(pipe_class_default); 1.11380 +%} 1.11381 + 1.11382 +// Just slightly faster than java implementation. 1.11383 +instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{ 1.11384 + match(Set dst (ReverseBytesI src)); 1.11385 + predicate(UseCountLeadingZerosInstructionsPPC64); 1.11386 + ins_cost(DEFAULT_COST); 1.11387 + 1.11388 + expand %{ 1.11389 + immI16 imm24 %{ (int) 24 %} 1.11390 + immI16 imm16 %{ (int) 16 %} 1.11391 + immI16 imm8 %{ (int) 8 %} 1.11392 + immI16 imm4 %{ (int) 4 %} 1.11393 + immI16 imm0 %{ (int) 0 %} 1.11394 + iRegLdst tmpI1; 1.11395 + iRegLdst tmpI2; 1.11396 + iRegLdst tmpI3; 1.11397 + 1.11398 + urShiftI_reg_imm(tmpI1, src, imm24); 1.11399 + insrwi_a(dst, tmpI1, imm24, imm8); 1.11400 + urShiftI_reg_imm(tmpI2, src, imm16); 1.11401 + insrwi(dst, tmpI2, imm8, imm16); 1.11402 + urShiftI_reg_imm(tmpI3, src, imm8); 1.11403 + insrwi(dst, tmpI3, imm8, imm8); 1.11404 + insrwi(dst, src, imm0, imm8); 1.11405 + %} 1.11406 +%} 1.11407 + 1.11408 +//---------- Replicate Vector Instructions ------------------------------------ 1.11409 + 1.11410 +// Insrdi does replicate if src == dst. 1.11411 +instruct repl32(iRegLdst dst) %{ 1.11412 + predicate(false); 1.11413 + effect(USE_DEF dst); 1.11414 + 1.11415 + format %{ "INSRDI $dst, #0, $dst, #32 \t// replicate" %} 1.11416 + size(4); 1.11417 + ins_encode %{ 1.11418 + // TODO: PPC port $archOpcode(ppc64Opcode_rldimi); 1.11419 + __ insrdi($dst$$Register, $dst$$Register, 32, 0); 1.11420 + %} 1.11421 + ins_pipe(pipe_class_default); 1.11422 +%} 1.11423 + 1.11424 +// Insrdi does replicate if src == dst. 1.11425 +instruct repl48(iRegLdst dst) %{ 1.11426 + predicate(false); 1.11427 + effect(USE_DEF dst); 1.11428 + 1.11429 + format %{ "INSRDI $dst, #0, $dst, #48 \t// replicate" %} 1.11430 + size(4); 1.11431 + ins_encode %{ 1.11432 + // TODO: PPC port $archOpcode(ppc64Opcode_rldimi); 1.11433 + __ insrdi($dst$$Register, $dst$$Register, 48, 0); 1.11434 + %} 1.11435 + ins_pipe(pipe_class_default); 1.11436 +%} 1.11437 + 1.11438 +// Insrdi does replicate if src == dst. 1.11439 +instruct repl56(iRegLdst dst) %{ 1.11440 + predicate(false); 1.11441 + effect(USE_DEF dst); 1.11442 + 1.11443 + format %{ "INSRDI $dst, #0, $dst, #56 \t// replicate" %} 1.11444 + size(4); 1.11445 + ins_encode %{ 1.11446 + // TODO: PPC port $archOpcode(ppc64Opcode_rldimi); 1.11447 + __ insrdi($dst$$Register, $dst$$Register, 56, 0); 1.11448 + %} 1.11449 + ins_pipe(pipe_class_default); 1.11450 +%} 1.11451 + 1.11452 +instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{ 1.11453 + match(Set dst (ReplicateB src)); 1.11454 + predicate(n->as_Vector()->length() == 8); 1.11455 + expand %{ 1.11456 + moveReg(dst, src); 1.11457 + repl56(dst); 1.11458 + repl48(dst); 1.11459 + repl32(dst); 1.11460 + %} 1.11461 +%} 1.11462 + 1.11463 +instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{ 1.11464 + match(Set dst (ReplicateB zero)); 1.11465 + predicate(n->as_Vector()->length() == 8); 1.11466 + format %{ "LI $dst, #0 \t// replicate8B" %} 1.11467 + size(4); 1.11468 + ins_encode %{ 1.11469 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.11470 + __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF))); 1.11471 + %} 1.11472 + ins_pipe(pipe_class_default); 1.11473 +%} 1.11474 + 1.11475 +instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{ 1.11476 + match(Set dst (ReplicateB src)); 1.11477 + predicate(n->as_Vector()->length() == 8); 1.11478 + format %{ "LI $dst, #-1 \t// replicate8B" %} 1.11479 + size(4); 1.11480 + ins_encode %{ 1.11481 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.11482 + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); 1.11483 + %} 1.11484 + ins_pipe(pipe_class_default); 1.11485 +%} 1.11486 + 1.11487 +instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{ 1.11488 + match(Set dst (ReplicateS src)); 1.11489 + predicate(n->as_Vector()->length() == 4); 1.11490 + expand %{ 1.11491 + moveReg(dst, src); 1.11492 + repl48(dst); 1.11493 + repl32(dst); 1.11494 + %} 1.11495 +%} 1.11496 + 1.11497 +instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{ 1.11498 + match(Set dst (ReplicateS zero)); 1.11499 + predicate(n->as_Vector()->length() == 4); 1.11500 + format %{ "LI $dst, #0 \t// replicate4C" %} 1.11501 + size(4); 1.11502 + ins_encode %{ 1.11503 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.11504 + __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF))); 1.11505 + %} 1.11506 + ins_pipe(pipe_class_default); 1.11507 +%} 1.11508 + 1.11509 +instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{ 1.11510 + match(Set dst (ReplicateS src)); 1.11511 + predicate(n->as_Vector()->length() == 4); 1.11512 + format %{ "LI $dst, -1 \t// replicate4C" %} 1.11513 + size(4); 1.11514 + ins_encode %{ 1.11515 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.11516 + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); 1.11517 + %} 1.11518 + ins_pipe(pipe_class_default); 1.11519 +%} 1.11520 + 1.11521 +instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{ 1.11522 + match(Set dst (ReplicateI src)); 1.11523 + predicate(n->as_Vector()->length() == 2); 1.11524 + ins_cost(2 * DEFAULT_COST); 1.11525 + expand %{ 1.11526 + moveReg(dst, src); 1.11527 + repl32(dst); 1.11528 + %} 1.11529 +%} 1.11530 + 1.11531 +instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{ 1.11532 + match(Set dst (ReplicateI zero)); 1.11533 + predicate(n->as_Vector()->length() == 2); 1.11534 + format %{ "LI $dst, #0 \t// replicate4C" %} 1.11535 + size(4); 1.11536 + ins_encode %{ 1.11537 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.11538 + __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF))); 1.11539 + %} 1.11540 + ins_pipe(pipe_class_default); 1.11541 +%} 1.11542 + 1.11543 +instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{ 1.11544 + match(Set dst (ReplicateI src)); 1.11545 + predicate(n->as_Vector()->length() == 2); 1.11546 + format %{ "LI $dst, -1 \t// replicate4C" %} 1.11547 + size(4); 1.11548 + ins_encode %{ 1.11549 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.11550 + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); 1.11551 + %} 1.11552 + ins_pipe(pipe_class_default); 1.11553 +%} 1.11554 + 1.11555 +// Move float to int register via stack, replicate. 1.11556 +instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{ 1.11557 + match(Set dst (ReplicateF src)); 1.11558 + predicate(n->as_Vector()->length() == 2); 1.11559 + ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST); 1.11560 + expand %{ 1.11561 + stackSlotL tmpS; 1.11562 + iRegIdst tmpI; 1.11563 + moveF2I_reg_stack(tmpS, src); // Move float to stack. 1.11564 + moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg. 1.11565 + moveReg(dst, tmpI); // Move int to long reg. 1.11566 + repl32(dst); // Replicate bitpattern. 1.11567 + %} 1.11568 +%} 1.11569 + 1.11570 +// Replicate scalar constant to packed float values in Double register 1.11571 +instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{ 1.11572 + match(Set dst (ReplicateF src)); 1.11573 + predicate(n->as_Vector()->length() == 2); 1.11574 + ins_cost(5 * DEFAULT_COST); 1.11575 + 1.11576 + format %{ "LD $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %} 1.11577 + postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) ); 1.11578 +%} 1.11579 + 1.11580 +// Replicate scalar zero constant to packed float values in Double register 1.11581 +instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{ 1.11582 + match(Set dst (ReplicateF zero)); 1.11583 + predicate(n->as_Vector()->length() == 2); 1.11584 + 1.11585 + format %{ "LI $dst, #0 \t// replicate2F" %} 1.11586 + ins_encode %{ 1.11587 + // TODO: PPC port $archOpcode(ppc64Opcode_addi); 1.11588 + __ li($dst$$Register, 0x0); 1.11589 + %} 1.11590 + ins_pipe(pipe_class_default); 1.11591 +%} 1.11592 + 1.11593 +// ============================================================================ 1.11594 +// Safepoint Instruction 1.11595 + 1.11596 +instruct safePoint_poll(iRegPdst poll) %{ 1.11597 + match(SafePoint poll); 1.11598 + predicate(LoadPollAddressFromThread); 1.11599 + 1.11600 + // It caused problems to add the effect that r0 is killed, but this 1.11601 + // effect no longer needs to be mentioned, since r0 is not contained 1.11602 + // in a reg_class. 1.11603 + 1.11604 + format %{ "LD R0, #0, $poll \t// Safepoint poll for GC" %} 1.11605 + size(4); 1.11606 + ins_encode( enc_poll(0x0, poll) ); 1.11607 + ins_pipe(pipe_class_default); 1.11608 +%} 1.11609 + 1.11610 +// Safepoint without per-thread support. Load address of page to poll 1.11611 +// as constant. 1.11612 +// Rscratch2RegP is R12. 1.11613 +// LoadConPollAddr node is added in pd_post_matching_hook(). It must be 1.11614 +// a seperate node so that the oop map is at the right location. 1.11615 +instruct safePoint_poll_conPollAddr(rscratch2RegP poll) %{ 1.11616 + match(SafePoint poll); 1.11617 + predicate(!LoadPollAddressFromThread); 1.11618 + 1.11619 + // It caused problems to add the effect that r0 is killed, but this 1.11620 + // effect no longer needs to be mentioned, since r0 is not contained 1.11621 + // in a reg_class. 1.11622 + 1.11623 + format %{ "LD R0, #0, R12 \t// Safepoint poll for GC" %} 1.11624 + ins_encode( enc_poll(0x0, poll) ); 1.11625 + ins_pipe(pipe_class_default); 1.11626 +%} 1.11627 + 1.11628 +// ============================================================================ 1.11629 +// Call Instructions 1.11630 + 1.11631 +// Call Java Static Instruction 1.11632 + 1.11633 +// Schedulable version of call static node. 1.11634 +instruct CallStaticJavaDirect(method meth) %{ 1.11635 + match(CallStaticJava); 1.11636 + effect(USE meth); 1.11637 + predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke()); 1.11638 + ins_cost(CALL_COST); 1.11639 + 1.11640 + ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */); 1.11641 + 1.11642 + format %{ "CALL,static $meth \t// ==> " %} 1.11643 + size(4); 1.11644 + ins_encode( enc_java_static_call(meth) ); 1.11645 + ins_pipe(pipe_class_call); 1.11646 +%} 1.11647 + 1.11648 +// Schedulable version of call static node. 1.11649 +instruct CallStaticJavaDirectHandle(method meth) %{ 1.11650 + match(CallStaticJava); 1.11651 + effect(USE meth); 1.11652 + predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); 1.11653 + ins_cost(CALL_COST); 1.11654 + 1.11655 + ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */); 1.11656 + 1.11657 + format %{ "CALL,static $meth \t// ==> " %} 1.11658 + ins_encode( enc_java_handle_call(meth) ); 1.11659 + ins_pipe(pipe_class_call); 1.11660 +%} 1.11661 + 1.11662 +// Call Java Dynamic Instruction 1.11663 + 1.11664 +// Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call). 1.11665 +// Loading of IC was postalloc expanded. The nodes loading the IC are reachable 1.11666 +// via fields ins_field_load_ic_hi_node and ins_field_load_ic_node. 1.11667 +// The call destination must still be placed in the constant pool. 1.11668 +instruct CallDynamicJavaDirectSched(method meth) %{ 1.11669 + match(CallDynamicJava); // To get all the data fields we need ... 1.11670 + effect(USE meth); 1.11671 + predicate(false); // ... but never match. 1.11672 + 1.11673 + ins_field_load_ic_hi_node(loadConL_hiNode*); 1.11674 + ins_field_load_ic_node(loadConLNode*); 1.11675 + ins_num_consts(1 /* 1 patchable constant: call destination */); 1.11676 + 1.11677 + format %{ "BL \t// dynamic $meth ==> " %} 1.11678 + size(4); 1.11679 + ins_encode( enc_java_dynamic_call_sched(meth) ); 1.11680 + ins_pipe(pipe_class_call); 1.11681 +%} 1.11682 + 1.11683 +// Schedulable (i.e. postalloc expanded) version of call dynamic java. 1.11684 +// We use postalloc expanded calls if we use inline caches 1.11685 +// and do not update method data. 1.11686 +// 1.11687 +// This instruction has two constants: inline cache (IC) and call destination. 1.11688 +// Loading the inline cache will be postalloc expanded, thus leaving a call with 1.11689 +// one constant. 1.11690 +instruct CallDynamicJavaDirectSched_Ex(method meth) %{ 1.11691 + match(CallDynamicJava); 1.11692 + effect(USE meth); 1.11693 + predicate(UseInlineCaches); 1.11694 + ins_cost(CALL_COST); 1.11695 + 1.11696 + ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */); 1.11697 + 1.11698 + format %{ "CALL,dynamic $meth \t// postalloc expanded" %} 1.11699 + postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) ); 1.11700 +%} 1.11701 + 1.11702 +// Compound version of call dynamic java 1.11703 +// We use postalloc expanded calls if we use inline caches 1.11704 +// and do not update method data. 1.11705 +instruct CallDynamicJavaDirect(method meth) %{ 1.11706 + match(CallDynamicJava); 1.11707 + effect(USE meth); 1.11708 + predicate(!UseInlineCaches); 1.11709 + ins_cost(CALL_COST); 1.11710 + 1.11711 + // Enc_java_to_runtime_call needs up to 4 constants (method data oop). 1.11712 + ins_num_consts(4); 1.11713 + 1.11714 + format %{ "CALL,dynamic $meth \t// ==> " %} 1.11715 + ins_encode( enc_java_dynamic_call(meth, constanttablebase) ); 1.11716 + ins_pipe(pipe_class_call); 1.11717 +%} 1.11718 + 1.11719 +// Call Runtime Instruction 1.11720 + 1.11721 +instruct CallRuntimeDirect(method meth) %{ 1.11722 + match(CallRuntime); 1.11723 + effect(USE meth); 1.11724 + ins_cost(CALL_COST); 1.11725 + 1.11726 + // Enc_java_to_runtime_call needs up to 3 constants: call target, 1.11727 + // env for callee, C-toc. 1.11728 + ins_num_consts(3); 1.11729 + 1.11730 + format %{ "CALL,runtime" %} 1.11731 + ins_encode( enc_java_to_runtime_call(meth) ); 1.11732 + ins_pipe(pipe_class_call); 1.11733 +%} 1.11734 + 1.11735 +// Call Leaf 1.11736 + 1.11737 +// Used by postalloc expand of CallLeafDirect_Ex (mtctr). 1.11738 +instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{ 1.11739 + effect(DEF dst, USE src); 1.11740 + 1.11741 + ins_num_consts(1); 1.11742 + 1.11743 + format %{ "MTCTR $src" %} 1.11744 + size(4); 1.11745 + ins_encode( enc_leaf_call_mtctr(src) ); 1.11746 + ins_pipe(pipe_class_default); 1.11747 +%} 1.11748 + 1.11749 +// Used by postalloc expand of CallLeafDirect_Ex (actual call). 1.11750 +instruct CallLeafDirect(method meth) %{ 1.11751 + match(CallLeaf); // To get the data all the data fields we need ... 1.11752 + effect(USE meth); 1.11753 + predicate(false); // but never match. 1.11754 + 1.11755 + format %{ "BCTRL \t// leaf call $meth ==> " %} 1.11756 + size(4); 1.11757 + ins_encode %{ 1.11758 + // TODO: PPC port $archOpcode(ppc64Opcode_bctrl); 1.11759 + __ bctrl(); 1.11760 + %} 1.11761 + ins_pipe(pipe_class_call); 1.11762 +%} 1.11763 + 1.11764 +// postalloc expand of CallLeafDirect. 1.11765 +// Load adress to call from TOC, then bl to it. 1.11766 +instruct CallLeafDirect_Ex(method meth) %{ 1.11767 + match(CallLeaf); 1.11768 + effect(USE meth); 1.11769 + ins_cost(CALL_COST); 1.11770 + 1.11771 + // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target, 1.11772 + // env for callee, C-toc. 1.11773 + ins_num_consts(3); 1.11774 + 1.11775 + format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %} 1.11776 + postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) ); 1.11777 +%} 1.11778 + 1.11779 +// Call runtime without safepoint - same as CallLeaf. 1.11780 +// postalloc expand of CallLeafNoFPDirect. 1.11781 +// Load adress to call from TOC, then bl to it. 1.11782 +instruct CallLeafNoFPDirect_Ex(method meth) %{ 1.11783 + match(CallLeafNoFP); 1.11784 + effect(USE meth); 1.11785 + ins_cost(CALL_COST); 1.11786 + 1.11787 + // Enc_java_to_runtime_call needs up to 3 constants: call target, 1.11788 + // env for callee, C-toc. 1.11789 + ins_num_consts(3); 1.11790 + 1.11791 + format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %} 1.11792 + postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) ); 1.11793 +%} 1.11794 + 1.11795 +// Tail Call; Jump from runtime stub to Java code. 1.11796 +// Also known as an 'interprocedural jump'. 1.11797 +// Target of jump will eventually return to caller. 1.11798 +// TailJump below removes the return address. 1.11799 +instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_oop) %{ 1.11800 + match(TailCall jump_target method_oop); 1.11801 + ins_cost(CALL_COST); 1.11802 + 1.11803 + format %{ "MTCTR $jump_target \t// $method_oop holds method oop\n\t" 1.11804 + "BCTR \t// tail call" %} 1.11805 + size(8); 1.11806 + ins_encode %{ 1.11807 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11808 + __ mtctr($jump_target$$Register); 1.11809 + __ bctr(); 1.11810 + %} 1.11811 + ins_pipe(pipe_class_call); 1.11812 +%} 1.11813 + 1.11814 +// Return Instruction 1.11815 +instruct Ret() %{ 1.11816 + match(Return); 1.11817 + format %{ "BLR \t// branch to link register" %} 1.11818 + size(4); 1.11819 + ins_encode %{ 1.11820 + // TODO: PPC port $archOpcode(ppc64Opcode_blr); 1.11821 + // LR is restored in MachEpilogNode. Just do the RET here. 1.11822 + __ blr(); 1.11823 + %} 1.11824 + ins_pipe(pipe_class_default); 1.11825 +%} 1.11826 + 1.11827 +// Tail Jump; remove the return address; jump to target. 1.11828 +// TailCall above leaves the return address around. 1.11829 +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). 1.11830 +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a 1.11831 +// "restore" before this instruction (in Epilogue), we need to materialize it 1.11832 +// in %i0. 1.11833 +instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{ 1.11834 + match(TailJump jump_target ex_oop); 1.11835 + ins_cost(CALL_COST); 1.11836 + 1.11837 + format %{ "LD R4_ARG2 = LR\n\t" 1.11838 + "MTCTR $jump_target\n\t" 1.11839 + "BCTR \t// TailJump, exception oop: $ex_oop" %} 1.11840 + size(12); 1.11841 + ins_encode %{ 1.11842 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11843 + __ ld(R4_ARG2/* issuing pc */, _abi(lr), R1_SP); 1.11844 + __ mtctr($jump_target$$Register); 1.11845 + __ bctr(); 1.11846 + %} 1.11847 + ins_pipe(pipe_class_call); 1.11848 +%} 1.11849 + 1.11850 +// Create exception oop: created by stack-crawling runtime code. 1.11851 +// Created exception is now available to this handler, and is setup 1.11852 +// just prior to jumping to this handler. No code emitted. 1.11853 +instruct CreateException(rarg1RegP ex_oop) %{ 1.11854 + match(Set ex_oop (CreateEx)); 1.11855 + ins_cost(0); 1.11856 + 1.11857 + format %{ " -- \t// exception oop; no code emitted" %} 1.11858 + size(0); 1.11859 + ins_encode( /*empty*/ ); 1.11860 + ins_pipe(pipe_class_default); 1.11861 +%} 1.11862 + 1.11863 +// Rethrow exception: The exception oop will come in the first 1.11864 +// argument position. Then JUMP (not call) to the rethrow stub code. 1.11865 +instruct RethrowException() %{ 1.11866 + match(Rethrow); 1.11867 + ins_cost(CALL_COST); 1.11868 + 1.11869 + format %{ "Jmp rethrow_stub" %} 1.11870 + ins_encode %{ 1.11871 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.11872 + cbuf.set_insts_mark(); 1.11873 + __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type); 1.11874 + %} 1.11875 + ins_pipe(pipe_class_call); 1.11876 +%} 1.11877 + 1.11878 +// Die now. 1.11879 +instruct ShouldNotReachHere() %{ 1.11880 + match(Halt); 1.11881 + ins_cost(CALL_COST); 1.11882 + 1.11883 + format %{ "ShouldNotReachHere" %} 1.11884 + size(4); 1.11885 + ins_encode %{ 1.11886 + // TODO: PPC port $archOpcode(ppc64Opcode_tdi); 1.11887 + __ trap_should_not_reach_here(); 1.11888 + %} 1.11889 + ins_pipe(pipe_class_default); 1.11890 +%} 1.11891 + 1.11892 +// This name is KNOWN by the ADLC and cannot be changed. The ADLC 1.11893 +// forces a 'TypeRawPtr::BOTTOM' output type for this guy. 1.11894 +// Get a DEF on threadRegP, no costs, no encoding, use 1.11895 +// 'ins_should_rematerialize(true)' to avoid spilling. 1.11896 +instruct tlsLoadP(threadRegP dst) %{ 1.11897 + match(Set dst (ThreadLocal)); 1.11898 + ins_cost(0); 1.11899 + 1.11900 + ins_should_rematerialize(true); 1.11901 + 1.11902 + format %{ " -- \t// $dst=Thread::current(), empty" %} 1.11903 + size(0); 1.11904 + ins_encode( /*empty*/ ); 1.11905 + ins_pipe(pipe_class_empty); 1.11906 +%} 1.11907 + 1.11908 +//---Some PPC specific nodes--------------------------------------------------- 1.11909 + 1.11910 +// Stop a group. 1.11911 +instruct endGroup() %{ 1.11912 + ins_cost(0); 1.11913 + 1.11914 + ins_is_nop(true); 1.11915 + 1.11916 + format %{ "End Bundle (ori r1, r1, 0)" %} 1.11917 + size(4); 1.11918 + ins_encode %{ 1.11919 + // TODO: PPC port $archOpcode(ppc64Opcode_endgroup); 1.11920 + __ endgroup(); 1.11921 + %} 1.11922 + ins_pipe(pipe_class_default); 1.11923 +%} 1.11924 + 1.11925 +// Nop instructions 1.11926 + 1.11927 +instruct fxNop() %{ 1.11928 + ins_cost(0); 1.11929 + 1.11930 + ins_is_nop(true); 1.11931 + 1.11932 + format %{ "fxNop" %} 1.11933 + size(4); 1.11934 + ins_encode %{ 1.11935 + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); 1.11936 + __ nop(); 1.11937 + %} 1.11938 + ins_pipe(pipe_class_default); 1.11939 +%} 1.11940 + 1.11941 +instruct fpNop0() %{ 1.11942 + ins_cost(0); 1.11943 + 1.11944 + ins_is_nop(true); 1.11945 + 1.11946 + format %{ "fpNop0" %} 1.11947 + size(4); 1.11948 + ins_encode %{ 1.11949 + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); 1.11950 + __ fpnop0(); 1.11951 + %} 1.11952 + ins_pipe(pipe_class_default); 1.11953 +%} 1.11954 + 1.11955 +instruct fpNop1() %{ 1.11956 + ins_cost(0); 1.11957 + 1.11958 + ins_is_nop(true); 1.11959 + 1.11960 + format %{ "fpNop1" %} 1.11961 + size(4); 1.11962 + ins_encode %{ 1.11963 + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); 1.11964 + __ fpnop1(); 1.11965 + %} 1.11966 + ins_pipe(pipe_class_default); 1.11967 +%} 1.11968 + 1.11969 +instruct brNop0() %{ 1.11970 + ins_cost(0); 1.11971 + size(4); 1.11972 + format %{ "brNop0" %} 1.11973 + ins_encode %{ 1.11974 + // TODO: PPC port $archOpcode(ppc64Opcode_mcrf); 1.11975 + __ brnop0(); 1.11976 + %} 1.11977 + ins_is_nop(true); 1.11978 + ins_pipe(pipe_class_default); 1.11979 +%} 1.11980 + 1.11981 +instruct brNop1() %{ 1.11982 + ins_cost(0); 1.11983 + 1.11984 + ins_is_nop(true); 1.11985 + 1.11986 + format %{ "brNop1" %} 1.11987 + size(4); 1.11988 + ins_encode %{ 1.11989 + // TODO: PPC port $archOpcode(ppc64Opcode_mcrf); 1.11990 + __ brnop1(); 1.11991 + %} 1.11992 + ins_pipe(pipe_class_default); 1.11993 +%} 1.11994 + 1.11995 +instruct brNop2() %{ 1.11996 + ins_cost(0); 1.11997 + 1.11998 + ins_is_nop(true); 1.11999 + 1.12000 + format %{ "brNop2" %} 1.12001 + size(4); 1.12002 + ins_encode %{ 1.12003 + // TODO: PPC port $archOpcode(ppc64Opcode_mcrf); 1.12004 + __ brnop2(); 1.12005 + %} 1.12006 + ins_pipe(pipe_class_default); 1.12007 +%} 1.12008 + 1.12009 +//----------PEEPHOLE RULES----------------------------------------------------- 1.12010 +// These must follow all instruction definitions as they use the names 1.12011 +// defined in the instructions definitions. 1.12012 +// 1.12013 +// peepmatch ( root_instr_name [preceeding_instruction]* ); 1.12014 +// 1.12015 +// peepconstraint %{ 1.12016 +// (instruction_number.operand_name relational_op instruction_number.operand_name 1.12017 +// [, ...] ); 1.12018 +// // instruction numbers are zero-based using left to right order in peepmatch 1.12019 +// 1.12020 +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 1.12021 +// // provide an instruction_number.operand_name for each operand that appears 1.12022 +// // in the replacement instruction's match rule 1.12023 +// 1.12024 +// ---------VM FLAGS--------------------------------------------------------- 1.12025 +// 1.12026 +// All peephole optimizations can be turned off using -XX:-OptoPeephole 1.12027 +// 1.12028 +// Each peephole rule is given an identifying number starting with zero and 1.12029 +// increasing by one in the order seen by the parser. An individual peephole 1.12030 +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 1.12031 +// on the command-line. 1.12032 +// 1.12033 +// ---------CURRENT LIMITATIONS---------------------------------------------- 1.12034 +// 1.12035 +// Only match adjacent instructions in same basic block 1.12036 +// Only equality constraints 1.12037 +// Only constraints between operands, not (0.dest_reg == EAX_enc) 1.12038 +// Only one replacement instruction 1.12039 +// 1.12040 +// ---------EXAMPLE---------------------------------------------------------- 1.12041 +// 1.12042 +// // pertinent parts of existing instructions in architecture description 1.12043 +// instruct movI(eRegI dst, eRegI src) %{ 1.12044 +// match(Set dst (CopyI src)); 1.12045 +// %} 1.12046 +// 1.12047 +// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ 1.12048 +// match(Set dst (AddI dst src)); 1.12049 +// effect(KILL cr); 1.12050 +// %} 1.12051 +// 1.12052 +// // Change (inc mov) to lea 1.12053 +// peephole %{ 1.12054 +// // increment preceeded by register-register move 1.12055 +// peepmatch ( incI_eReg movI ); 1.12056 +// // require that the destination register of the increment 1.12057 +// // match the destination register of the move 1.12058 +// peepconstraint ( 0.dst == 1.dst ); 1.12059 +// // construct a replacement instruction that sets 1.12060 +// // the destination to ( move's source register + one ) 1.12061 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12062 +// %} 1.12063 +// 1.12064 +// Implementation no longer uses movX instructions since 1.12065 +// machine-independent system no longer uses CopyX nodes. 1.12066 +// 1.12067 +// peephole %{ 1.12068 +// peepmatch ( incI_eReg movI ); 1.12069 +// peepconstraint ( 0.dst == 1.dst ); 1.12070 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12071 +// %} 1.12072 +// 1.12073 +// peephole %{ 1.12074 +// peepmatch ( decI_eReg movI ); 1.12075 +// peepconstraint ( 0.dst == 1.dst ); 1.12076 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12077 +// %} 1.12078 +// 1.12079 +// peephole %{ 1.12080 +// peepmatch ( addI_eReg_imm movI ); 1.12081 +// peepconstraint ( 0.dst == 1.dst ); 1.12082 +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 1.12083 +// %} 1.12084 +// 1.12085 +// peephole %{ 1.12086 +// peepmatch ( addP_eReg_imm movP ); 1.12087 +// peepconstraint ( 0.dst == 1.dst ); 1.12088 +// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 1.12089 +// %} 1.12090 + 1.12091 +// // Change load of spilled value to only a spill 1.12092 +// instruct storeI(memory mem, eRegI src) %{ 1.12093 +// match(Set mem (StoreI mem src)); 1.12094 +// %} 1.12095 +// 1.12096 +// instruct loadI(eRegI dst, memory mem) %{ 1.12097 +// match(Set dst (LoadI mem)); 1.12098 +// %} 1.12099 +// 1.12100 +peephole %{ 1.12101 + peepmatch ( loadI storeI ); 1.12102 + peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 1.12103 + peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 1.12104 +%} 1.12105 + 1.12106 +peephole %{ 1.12107 + peepmatch ( loadL storeL ); 1.12108 + peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 1.12109 + peepreplace ( storeL( 1.mem 1.mem 1.src ) ); 1.12110 +%} 1.12111 + 1.12112 +peephole %{ 1.12113 + peepmatch ( loadP storeP ); 1.12114 + peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem ); 1.12115 + peepreplace ( storeP( 1.dst 1.dst 1.src ) ); 1.12116 +%} 1.12117 + 1.12118 +//----------SMARTSPILL RULES--------------------------------------------------- 1.12119 +// These must follow all instruction definitions as they use the names 1.12120 +// defined in the instructions definitions.