1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/matcher.hpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,502 @@ 1.4 +/* 1.5 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#ifndef SHARE_VM_OPTO_MATCHER_HPP 1.29 +#define SHARE_VM_OPTO_MATCHER_HPP 1.30 + 1.31 +#include "libadt/vectset.hpp" 1.32 +#include "memory/resourceArea.hpp" 1.33 +#include "opto/node.hpp" 1.34 +#include "opto/phaseX.hpp" 1.35 +#include "opto/regmask.hpp" 1.36 + 1.37 +class Compile; 1.38 +class Node; 1.39 +class MachNode; 1.40 +class MachTypeNode; 1.41 +class MachOper; 1.42 + 1.43 +//---------------------------Matcher------------------------------------------- 1.44 +class Matcher : public PhaseTransform { 1.45 + friend class VMStructs; 1.46 + // Private arena of State objects 1.47 + ResourceArea _states_arena; 1.48 + 1.49 + VectorSet _visited; // Visit bits 1.50 + 1.51 + // Used to control the Label pass 1.52 + VectorSet _shared; // Shared Ideal Node 1.53 + VectorSet _dontcare; // Nothing the matcher cares about 1.54 + 1.55 + // Private methods which perform the actual matching and reduction 1.56 + // Walks the label tree, generating machine nodes 1.57 + MachNode *ReduceInst( State *s, int rule, Node *&mem); 1.58 + void ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach); 1.59 + uint ReduceInst_Interior(State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds); 1.60 + void ReduceOper( State *s, int newrule, Node *&mem, MachNode *mach ); 1.61 + 1.62 + // If this node already matched using "rule", return the MachNode for it. 1.63 + MachNode* find_shared_node(Node* n, uint rule); 1.64 + 1.65 + // Convert a dense opcode number to an expanded rule number 1.66 + const int *_reduceOp; 1.67 + const int *_leftOp; 1.68 + const int *_rightOp; 1.69 + 1.70 + // Map dense opcode number to info on when rule is swallowed constant. 1.71 + const bool *_swallowed; 1.72 + 1.73 + // Map dense rule number to determine if this is an instruction chain rule 1.74 + const uint _begin_inst_chain_rule; 1.75 + const uint _end_inst_chain_rule; 1.76 + 1.77 + // We want to clone constants and possible CmpI-variants. 1.78 + // If we do not clone CmpI, then we can have many instances of 1.79 + // condition codes alive at once. This is OK on some chips and 1.80 + // bad on others. Hence the machine-dependent table lookup. 1.81 + const char *_must_clone; 1.82 + 1.83 + // Find shared Nodes, or Nodes that otherwise are Matcher roots 1.84 + void find_shared( Node *n ); 1.85 +#ifdef X86 1.86 + bool is_bmi_pattern(Node *n, Node *m); 1.87 +#endif 1.88 + 1.89 + // Debug and profile information for nodes in old space: 1.90 + GrowableArray<Node_Notes*>* _old_node_note_array; 1.91 + 1.92 + // Node labeling iterator for instruction selection 1.93 + Node *Label_Root( const Node *n, State *svec, Node *control, const Node *mem ); 1.94 + 1.95 + Node *transform( Node *dummy ); 1.96 + 1.97 + Node_List _projection_list; // For Machine nodes killing many values 1.98 + 1.99 + Node_Array _shared_nodes; 1.100 + 1.101 + debug_only(Node_Array _old2new_map;) // Map roots of ideal-trees to machine-roots 1.102 + debug_only(Node_Array _new2old_map;) // Maps machine nodes back to ideal 1.103 + 1.104 + // Accessors for the inherited field PhaseTransform::_nodes: 1.105 + void grow_new_node_array(uint idx_limit) { 1.106 + _nodes.map(idx_limit-1, NULL); 1.107 + } 1.108 + bool has_new_node(const Node* n) const { 1.109 + return _nodes.at(n->_idx) != NULL; 1.110 + } 1.111 + Node* new_node(const Node* n) const { 1.112 + assert(has_new_node(n), "set before get"); 1.113 + return _nodes.at(n->_idx); 1.114 + } 1.115 + void set_new_node(const Node* n, Node *nn) { 1.116 + assert(!has_new_node(n), "set only once"); 1.117 + _nodes.map(n->_idx, nn); 1.118 + } 1.119 + 1.120 +#ifdef ASSERT 1.121 + // Make sure only new nodes are reachable from this node 1.122 + void verify_new_nodes_only(Node* root); 1.123 + 1.124 + Node* _mem_node; // Ideal memory node consumed by mach node 1.125 +#endif 1.126 + 1.127 + // Mach node for ConP #NULL 1.128 + MachNode* _mach_null; 1.129 + 1.130 +public: 1.131 + int LabelRootDepth; 1.132 + // Convert ideal machine register to a register mask for spill-loads 1.133 + static const RegMask *idealreg2regmask[]; 1.134 + RegMask *idealreg2spillmask [_last_machine_leaf]; 1.135 + RegMask *idealreg2debugmask [_last_machine_leaf]; 1.136 + RegMask *idealreg2mhdebugmask[_last_machine_leaf]; 1.137 + void init_spill_mask( Node *ret ); 1.138 + // Convert machine register number to register mask 1.139 + static uint mreg2regmask_max; 1.140 + static RegMask mreg2regmask[]; 1.141 + static RegMask STACK_ONLY_mask; 1.142 + 1.143 + MachNode* mach_null() const { return _mach_null; } 1.144 + 1.145 + bool is_shared( Node *n ) { return _shared.test(n->_idx) != 0; } 1.146 + void set_shared( Node *n ) { _shared.set(n->_idx); } 1.147 + bool is_visited( Node *n ) { return _visited.test(n->_idx) != 0; } 1.148 + void set_visited( Node *n ) { _visited.set(n->_idx); } 1.149 + bool is_dontcare( Node *n ) { return _dontcare.test(n->_idx) != 0; } 1.150 + void set_dontcare( Node *n ) { _dontcare.set(n->_idx); } 1.151 + 1.152 + // Mode bit to tell DFA and expand rules whether we are running after 1.153 + // (or during) register selection. Usually, the matcher runs before, 1.154 + // but it will also get called to generate post-allocation spill code. 1.155 + // In this situation, it is a deadly error to attempt to allocate more 1.156 + // temporary registers. 1.157 + bool _allocation_started; 1.158 + 1.159 + // Machine register names 1.160 + static const char *regName[]; 1.161 + // Machine register encodings 1.162 + static const unsigned char _regEncode[]; 1.163 + // Machine Node names 1.164 + const char **_ruleName; 1.165 + // Rules that are cheaper to rematerialize than to spill 1.166 + static const uint _begin_rematerialize; 1.167 + static const uint _end_rematerialize; 1.168 + 1.169 + // An array of chars, from 0 to _last_Mach_Reg. 1.170 + // No Save = 'N' (for register windows) 1.171 + // Save on Entry = 'E' 1.172 + // Save on Call = 'C' 1.173 + // Always Save = 'A' (same as SOE + SOC) 1.174 + const char *_register_save_policy; 1.175 + const char *_c_reg_save_policy; 1.176 + // Convert a machine register to a machine register type, so-as to 1.177 + // properly match spill code. 1.178 + const int *_register_save_type; 1.179 + // Maps from machine register to boolean; true if machine register can 1.180 + // be holding a call argument in some signature. 1.181 + static bool can_be_java_arg( int reg ); 1.182 + // Maps from machine register to boolean; true if machine register holds 1.183 + // a spillable argument. 1.184 + static bool is_spillable_arg( int reg ); 1.185 + 1.186 + // List of IfFalse or IfTrue Nodes that indicate a taken null test. 1.187 + // List is valid in the post-matching space. 1.188 + Node_List _null_check_tests; 1.189 + void collect_null_checks( Node *proj, Node *orig_proj ); 1.190 + void validate_null_checks( ); 1.191 + 1.192 + Matcher(); 1.193 + 1.194 + // Get a projection node at position pos 1.195 + Node* get_projection(uint pos) { 1.196 + return _projection_list[pos]; 1.197 + } 1.198 + 1.199 + // Push a projection node onto the projection list 1.200 + void push_projection(Node* node) { 1.201 + _projection_list.push(node); 1.202 + } 1.203 + 1.204 + Node* pop_projection() { 1.205 + return _projection_list.pop(); 1.206 + } 1.207 + 1.208 + // Number of nodes in the projection list 1.209 + uint number_of_projections() const { 1.210 + return _projection_list.size(); 1.211 + } 1.212 + 1.213 + // Select instructions for entire method 1.214 + void match(); 1.215 + 1.216 + // Helper for match 1.217 + OptoReg::Name warp_incoming_stk_arg( VMReg reg ); 1.218 + 1.219 + // Transform, then walk. Does implicit DCE while walking. 1.220 + // Name changed from "transform" to avoid it being virtual. 1.221 + Node *xform( Node *old_space_node, int Nodes ); 1.222 + 1.223 + // Match a single Ideal Node - turn it into a 1-Node tree; Label & Reduce. 1.224 + MachNode *match_tree( const Node *n ); 1.225 + MachNode *match_sfpt( SafePointNode *sfpt ); 1.226 + // Helper for match_sfpt 1.227 + OptoReg::Name warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ); 1.228 + 1.229 + // Initialize first stack mask and related masks. 1.230 + void init_first_stack_mask(); 1.231 + 1.232 + // If we should save-on-entry this register 1.233 + bool is_save_on_entry( int reg ); 1.234 + 1.235 + // Fixup the save-on-entry registers 1.236 + void Fixup_Save_On_Entry( ); 1.237 + 1.238 + // --- Frame handling --- 1.239 + 1.240 + // Register number of the stack slot corresponding to the incoming SP. 1.241 + // Per the Big Picture in the AD file, it is: 1.242 + // SharedInfo::stack0 + locks + in_preserve_stack_slots + pad2. 1.243 + OptoReg::Name _old_SP; 1.244 + 1.245 + // Register number of the stack slot corresponding to the highest incoming 1.246 + // argument on the stack. Per the Big Picture in the AD file, it is: 1.247 + // _old_SP + out_preserve_stack_slots + incoming argument size. 1.248 + OptoReg::Name _in_arg_limit; 1.249 + 1.250 + // Register number of the stack slot corresponding to the new SP. 1.251 + // Per the Big Picture in the AD file, it is: 1.252 + // _in_arg_limit + pad0 1.253 + OptoReg::Name _new_SP; 1.254 + 1.255 + // Register number of the stack slot corresponding to the highest outgoing 1.256 + // argument on the stack. Per the Big Picture in the AD file, it is: 1.257 + // _new_SP + max outgoing arguments of all calls 1.258 + OptoReg::Name _out_arg_limit; 1.259 + 1.260 + OptoRegPair *_parm_regs; // Array of machine registers per argument 1.261 + RegMask *_calling_convention_mask; // Array of RegMasks per argument 1.262 + 1.263 + // Does matcher have a match rule for this ideal node? 1.264 + static const bool has_match_rule(int opcode); 1.265 + static const bool _hasMatchRule[_last_opcode]; 1.266 + 1.267 + // Does matcher have a match rule for this ideal node and is the 1.268 + // predicate (if there is one) true? 1.269 + // NOTE: If this function is used more commonly in the future, ADLC 1.270 + // should generate this one. 1.271 + static const bool match_rule_supported(int opcode); 1.272 + 1.273 + // Used to determine if we have fast l2f conversion 1.274 + // USII has it, USIII doesn't 1.275 + static const bool convL2FSupported(void); 1.276 + 1.277 + // Vector width in bytes 1.278 + static const int vector_width_in_bytes(BasicType bt); 1.279 + 1.280 + // Limits on vector size (number of elements). 1.281 + static const int max_vector_size(const BasicType bt); 1.282 + static const int min_vector_size(const BasicType bt); 1.283 + static const bool vector_size_supported(const BasicType bt, int size) { 1.284 + return (Matcher::max_vector_size(bt) >= size && 1.285 + Matcher::min_vector_size(bt) <= size); 1.286 + } 1.287 + 1.288 + // Vector ideal reg 1.289 + static const int vector_ideal_reg(int len); 1.290 + static const int vector_shift_count_ideal_reg(int len); 1.291 + 1.292 + // CPU supports misaligned vectors store/load. 1.293 + static const bool misaligned_vectors_ok(); 1.294 + 1.295 + // Should original key array reference be passed to AES stubs 1.296 + static const bool pass_original_key_for_aes(); 1.297 + 1.298 + // Used to determine a "low complexity" 64-bit constant. (Zero is simple.) 1.299 + // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI). 1.300 + // Depends on the details of 64-bit constant generation on the CPU. 1.301 + static const bool isSimpleConstant64(jlong con); 1.302 + 1.303 + // These calls are all generated by the ADLC 1.304 + 1.305 + // TRUE - grows up, FALSE - grows down (Intel) 1.306 + virtual bool stack_direction() const; 1.307 + 1.308 + // Java-Java calling convention 1.309 + // (what you use when Java calls Java) 1.310 + 1.311 + // Alignment of stack in bytes, standard Intel word alignment is 4. 1.312 + // Sparc probably wants at least double-word (8). 1.313 + static uint stack_alignment_in_bytes(); 1.314 + // Alignment of stack, measured in stack slots. 1.315 + // The size of stack slots is defined by VMRegImpl::stack_slot_size. 1.316 + static uint stack_alignment_in_slots() { 1.317 + return stack_alignment_in_bytes() / (VMRegImpl::stack_slot_size); 1.318 + } 1.319 + 1.320 + // Array mapping arguments to registers. Argument 0 is usually the 'this' 1.321 + // pointer. Registers can include stack-slots and regular registers. 1.322 + static void calling_convention( BasicType *, VMRegPair *, uint len, bool is_outgoing ); 1.323 + 1.324 + // Convert a sig into a calling convention register layout 1.325 + // and find interesting things about it. 1.326 + static OptoReg::Name find_receiver( bool is_outgoing ); 1.327 + // Return address register. On Intel it is a stack-slot. On PowerPC 1.328 + // it is the Link register. On Sparc it is r31? 1.329 + virtual OptoReg::Name return_addr() const; 1.330 + RegMask _return_addr_mask; 1.331 + // Return value register. On Intel it is EAX. On Sparc i0/o0. 1.332 + static OptoRegPair return_value(int ideal_reg, bool is_outgoing); 1.333 + static OptoRegPair c_return_value(int ideal_reg, bool is_outgoing); 1.334 + RegMask _return_value_mask; 1.335 + // Inline Cache Register 1.336 + static OptoReg::Name inline_cache_reg(); 1.337 + static int inline_cache_reg_encode(); 1.338 + 1.339 + // Register for DIVI projection of divmodI 1.340 + static RegMask divI_proj_mask(); 1.341 + // Register for MODI projection of divmodI 1.342 + static RegMask modI_proj_mask(); 1.343 + 1.344 + // Register for DIVL projection of divmodL 1.345 + static RegMask divL_proj_mask(); 1.346 + // Register for MODL projection of divmodL 1.347 + static RegMask modL_proj_mask(); 1.348 + 1.349 + // Use hardware DIV instruction when it is faster than 1.350 + // a code which use multiply for division by constant. 1.351 + static bool use_asm_for_ldiv_by_con( jlong divisor ); 1.352 + 1.353 + static const RegMask method_handle_invoke_SP_save_mask(); 1.354 + 1.355 + // Java-Interpreter calling convention 1.356 + // (what you use when calling between compiled-Java and Interpreted-Java 1.357 + 1.358 + // Number of callee-save + always-save registers 1.359 + // Ignores frame pointer and "special" registers 1.360 + static int number_of_saved_registers(); 1.361 + 1.362 + // The Method-klass-holder may be passed in the inline_cache_reg 1.363 + // and then expanded into the inline_cache_reg and a method_oop register 1.364 + 1.365 + static OptoReg::Name interpreter_method_oop_reg(); 1.366 + static int interpreter_method_oop_reg_encode(); 1.367 + 1.368 + static OptoReg::Name compiler_method_oop_reg(); 1.369 + static const RegMask &compiler_method_oop_reg_mask(); 1.370 + static int compiler_method_oop_reg_encode(); 1.371 + 1.372 + // Interpreter's Frame Pointer Register 1.373 + static OptoReg::Name interpreter_frame_pointer_reg(); 1.374 + 1.375 + // Java-Native calling convention 1.376 + // (what you use when intercalling between Java and C++ code) 1.377 + 1.378 + // Array mapping arguments to registers. Argument 0 is usually the 'this' 1.379 + // pointer. Registers can include stack-slots and regular registers. 1.380 + static void c_calling_convention( BasicType*, VMRegPair *, uint ); 1.381 + // Frame pointer. The frame pointer is kept at the base of the stack 1.382 + // and so is probably the stack pointer for most machines. On Intel 1.383 + // it is ESP. On the PowerPC it is R1. On Sparc it is SP. 1.384 + OptoReg::Name c_frame_pointer() const; 1.385 + static RegMask c_frame_ptr_mask; 1.386 + 1.387 + // !!!!! Special stuff for building ScopeDescs 1.388 + virtual int regnum_to_fpu_offset(int regnum); 1.389 + 1.390 + // Is this branch offset small enough to be addressed by a short branch? 1.391 + bool is_short_branch_offset(int rule, int br_size, int offset); 1.392 + 1.393 + // Optional scaling for the parameter to the ClearArray/CopyArray node. 1.394 + static const bool init_array_count_is_in_bytes; 1.395 + 1.396 + // Threshold small size (in bytes) for a ClearArray/CopyArray node. 1.397 + // Anything this size or smaller may get converted to discrete scalar stores. 1.398 + static const int init_array_short_size; 1.399 + 1.400 + // Some hardware needs 2 CMOV's for longs. 1.401 + static const int long_cmove_cost(); 1.402 + 1.403 + // Some hardware have expensive CMOV for float and double. 1.404 + static const int float_cmove_cost(); 1.405 + 1.406 + // Should the Matcher clone shifts on addressing modes, expecting them to 1.407 + // be subsumed into complex addressing expressions or compute them into 1.408 + // registers? True for Intel but false for most RISCs 1.409 + static const bool clone_shift_expressions; 1.410 + 1.411 + static bool narrow_oop_use_complex_address(); 1.412 + static bool narrow_klass_use_complex_address(); 1.413 + 1.414 + // Generate implicit null check for narrow oops if it can fold 1.415 + // into address expression (x64). 1.416 + // 1.417 + // [R12 + narrow_oop_reg<<3 + offset] // fold into address expression 1.418 + // NullCheck narrow_oop_reg 1.419 + // 1.420 + // When narrow oops can't fold into address expression (Sparc) and 1.421 + // base is not null use decode_not_null and normal implicit null check. 1.422 + // Note, decode_not_null node can be used here since it is referenced 1.423 + // only on non null path but it requires special handling, see 1.424 + // collect_null_checks(): 1.425 + // 1.426 + // decode_not_null narrow_oop_reg, oop_reg // 'shift' and 'add base' 1.427 + // [oop_reg + offset] 1.428 + // NullCheck oop_reg 1.429 + // 1.430 + // With Zero base and when narrow oops can not fold into address 1.431 + // expression use normal implicit null check since only shift 1.432 + // is needed to decode narrow oop. 1.433 + // 1.434 + // decode narrow_oop_reg, oop_reg // only 'shift' 1.435 + // [oop_reg + offset] 1.436 + // NullCheck oop_reg 1.437 + // 1.438 + inline static bool gen_narrow_oop_implicit_null_checks() { 1.439 + return Universe::narrow_oop_use_implicit_null_checks() && 1.440 + (narrow_oop_use_complex_address() || 1.441 + Universe::narrow_oop_base() != NULL); 1.442 + } 1.443 + 1.444 + // Is it better to copy float constants, or load them directly from memory? 1.445 + // Intel can load a float constant from a direct address, requiring no 1.446 + // extra registers. Most RISCs will have to materialize an address into a 1.447 + // register first, so they may as well materialize the constant immediately. 1.448 + static const bool rematerialize_float_constants; 1.449 + 1.450 + // If CPU can load and store mis-aligned doubles directly then no fixup is 1.451 + // needed. Else we split the double into 2 integer pieces and move it 1.452 + // piece-by-piece. Only happens when passing doubles into C code or when 1.453 + // calling i2c adapters as the Java calling convention forces doubles to be 1.454 + // aligned. 1.455 + static const bool misaligned_doubles_ok; 1.456 + 1.457 + // Does the CPU require postalloc expand (see block.cpp for description of 1.458 + // postalloc expand)? 1.459 + static const bool require_postalloc_expand; 1.460 + 1.461 + // Perform a platform dependent implicit null fixup. This is needed 1.462 + // on windows95 to take care of some unusual register constraints. 1.463 + void pd_implicit_null_fixup(MachNode *load, uint idx); 1.464 + 1.465 + // Advertise here if the CPU requires explicit rounding operations 1.466 + // to implement the UseStrictFP mode. 1.467 + static const bool strict_fp_requires_explicit_rounding; 1.468 + 1.469 + // Are floats conerted to double when stored to stack during deoptimization? 1.470 + static bool float_in_double(); 1.471 + // Do ints take an entire long register or just half? 1.472 + static const bool int_in_long; 1.473 + 1.474 + // Do the processor's shift instructions only use the low 5/6 bits 1.475 + // of the count for 32/64 bit ints? If not we need to do the masking 1.476 + // ourselves. 1.477 + static const bool need_masked_shift_count; 1.478 + 1.479 + // This routine is run whenever a graph fails to match. 1.480 + // If it returns, the compiler should bailout to interpreter without error. 1.481 + // In non-product mode, SoftMatchFailure is false to detect non-canonical 1.482 + // graphs. Print a message and exit. 1.483 + static void soft_match_failure() { 1.484 + if( SoftMatchFailure ) return; 1.485 + else { fatal("SoftMatchFailure is not allowed except in product"); } 1.486 + } 1.487 + 1.488 + // Check for a following volatile memory barrier without an 1.489 + // intervening load and thus we don't need a barrier here. We 1.490 + // retain the Node to act as a compiler ordering barrier. 1.491 + static bool post_store_load_barrier(const Node* mb); 1.492 + 1.493 + // Does n lead to an uncommon trap that can cause deoptimization? 1.494 + static bool branches_to_uncommon_trap(const Node *n); 1.495 + 1.496 +#ifdef ASSERT 1.497 + void dump_old2new_map(); // machine-independent to machine-dependent 1.498 + 1.499 + Node* find_old_node(Node* new_node) { 1.500 + return _new2old_map[new_node->_idx]; 1.501 + } 1.502 +#endif 1.503 +}; 1.504 + 1.505 +#endif // SHARE_VM_OPTO_MATCHER_HPP