duke@435: /* duke@435: * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved. duke@435: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. duke@435: * duke@435: * This code is free software; you can redistribute it and/or modify it duke@435: * under the terms of the GNU General Public License version 2 only, as duke@435: * published by the Free Software Foundation. duke@435: * duke@435: * This code is distributed in the hope that it will be useful, but WITHOUT duke@435: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or duke@435: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License duke@435: * version 2 for more details (a copy is included in the LICENSE file that duke@435: * accompanied this code). duke@435: * duke@435: * You should have received a copy of the GNU General Public License version duke@435: * 2 along with this work; if not, write to the Free Software Foundation, duke@435: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. duke@435: * duke@435: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, duke@435: * CA 95054 USA or visit www.sun.com if you need additional information or duke@435: * have any questions. duke@435: */ duke@435: duke@435: // duke@435: // S U P E R W O R D T R A N S F O R M duke@435: // duke@435: // SuperWords are short, fixed length vectors. duke@435: // duke@435: // Algorithm from: duke@435: // duke@435: // Exploiting SuperWord Level Parallelism with duke@435: // Multimedia Instruction Sets duke@435: // by duke@435: // Samuel Larsen and Saman Amarasighe duke@435: // MIT Laboratory for Computer Science duke@435: // date duke@435: // May 2000 duke@435: // published in duke@435: // ACM SIGPLAN Notices duke@435: // Proceedings of ACM PLDI '00, Volume 35 Issue 5 duke@435: // duke@435: // Definition 3.1 A Pack is an n-tuple, , where duke@435: // s1,...,sn are independent isomorphic statements in a basic duke@435: // block. duke@435: // duke@435: // Definition 3.2 A PackSet is a set of Packs. duke@435: // duke@435: // Definition 3.3 A Pair is a Pack of size two, where the duke@435: // first statement is considered the left element, and the duke@435: // second statement is considered the right element. duke@435: duke@435: class SWPointer; duke@435: class OrderedPair; duke@435: duke@435: // ========================= Dependence Graph ===================== duke@435: duke@435: class DepMem; duke@435: duke@435: //------------------------------DepEdge--------------------------- duke@435: // An edge in the dependence graph. The edges incident to a dependence duke@435: // node are threaded through _next_in for incoming edges and _next_out duke@435: // for outgoing edges. duke@435: class DepEdge : public ResourceObj { duke@435: protected: duke@435: DepMem* _pred; duke@435: DepMem* _succ; duke@435: DepEdge* _next_in; // list of in edges, null terminated duke@435: DepEdge* _next_out; // list of out edges, null terminated duke@435: duke@435: public: duke@435: DepEdge(DepMem* pred, DepMem* succ, DepEdge* next_in, DepEdge* next_out) : duke@435: _pred(pred), _succ(succ), _next_in(next_in), _next_out(next_out) {} duke@435: duke@435: DepEdge* next_in() { return _next_in; } duke@435: DepEdge* next_out() { return _next_out; } duke@435: DepMem* pred() { return _pred; } duke@435: DepMem* succ() { return _succ; } duke@435: duke@435: void print(); duke@435: }; duke@435: duke@435: //------------------------------DepMem--------------------------- duke@435: // A node in the dependence graph. _in_head starts the threaded list of duke@435: // incoming edges, and _out_head starts the list of outgoing edges. duke@435: class DepMem : public ResourceObj { duke@435: protected: duke@435: Node* _node; // Corresponding ideal node duke@435: DepEdge* _in_head; // Head of list of in edges, null terminated duke@435: DepEdge* _out_head; // Head of list of out edges, null terminated duke@435: duke@435: public: duke@435: DepMem(Node* node) : _node(node), _in_head(NULL), _out_head(NULL) {} duke@435: duke@435: Node* node() { return _node; } duke@435: DepEdge* in_head() { return _in_head; } duke@435: DepEdge* out_head() { return _out_head; } duke@435: void set_in_head(DepEdge* hd) { _in_head = hd; } duke@435: void set_out_head(DepEdge* hd) { _out_head = hd; } duke@435: duke@435: int in_cnt(); // Incoming edge count duke@435: int out_cnt(); // Outgoing edge count duke@435: duke@435: void print(); duke@435: }; duke@435: duke@435: //------------------------------DepGraph--------------------------- duke@435: class DepGraph VALUE_OBJ_CLASS_SPEC { duke@435: protected: duke@435: Arena* _arena; duke@435: GrowableArray _map; duke@435: DepMem* _root; duke@435: DepMem* _tail; duke@435: duke@435: public: duke@435: DepGraph(Arena* a) : _arena(a), _map(a, 8, 0, NULL) { duke@435: _root = new (_arena) DepMem(NULL); duke@435: _tail = new (_arena) DepMem(NULL); duke@435: } duke@435: duke@435: DepMem* root() { return _root; } duke@435: DepMem* tail() { return _tail; } duke@435: duke@435: // Return dependence node corresponding to an ideal node duke@435: DepMem* dep(Node* node) { return _map.at(node->_idx); } duke@435: duke@435: // Make a new dependence graph node for an ideal node. duke@435: DepMem* make_node(Node* node); duke@435: duke@435: // Make a new dependence graph edge dprec->dsucc duke@435: DepEdge* make_edge(DepMem* dpred, DepMem* dsucc); duke@435: duke@435: DepEdge* make_edge(Node* pred, Node* succ) { return make_edge(dep(pred), dep(succ)); } duke@435: DepEdge* make_edge(DepMem* pred, Node* succ) { return make_edge(pred, dep(succ)); } duke@435: DepEdge* make_edge(Node* pred, DepMem* succ) { return make_edge(dep(pred), succ); } duke@435: duke@435: void init() { _map.clear(); } // initialize duke@435: duke@435: void print(Node* n) { dep(n)->print(); } duke@435: void print(DepMem* d) { d->print(); } duke@435: }; duke@435: duke@435: //------------------------------DepPreds--------------------------- duke@435: // Iterator over predecessors in the dependence graph and duke@435: // non-memory-graph inputs of ideal nodes. duke@435: class DepPreds : public StackObj { duke@435: private: duke@435: Node* _n; duke@435: int _next_idx, _end_idx; duke@435: DepEdge* _dep_next; duke@435: Node* _current; duke@435: bool _done; duke@435: duke@435: public: duke@435: DepPreds(Node* n, DepGraph& dg); duke@435: Node* current() { return _current; } duke@435: bool done() { return _done; } duke@435: void next(); duke@435: }; duke@435: duke@435: //------------------------------DepSuccs--------------------------- duke@435: // Iterator over successors in the dependence graph and duke@435: // non-memory-graph outputs of ideal nodes. duke@435: class DepSuccs : public StackObj { duke@435: private: duke@435: Node* _n; duke@435: int _next_idx, _end_idx; duke@435: DepEdge* _dep_next; duke@435: Node* _current; duke@435: bool _done; duke@435: duke@435: public: duke@435: DepSuccs(Node* n, DepGraph& dg); duke@435: Node* current() { return _current; } duke@435: bool done() { return _done; } duke@435: void next(); duke@435: }; duke@435: duke@435: duke@435: // ========================= SuperWord ===================== duke@435: duke@435: // -----------------------------SWNodeInfo--------------------------------- duke@435: // Per node info needed by SuperWord duke@435: class SWNodeInfo VALUE_OBJ_CLASS_SPEC { duke@435: public: duke@435: int _alignment; // memory alignment for a node duke@435: int _depth; // Max expression (DAG) depth from block start duke@435: const Type* _velt_type; // vector element type duke@435: Node_List* _my_pack; // pack containing this node duke@435: duke@435: SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {} duke@435: static const SWNodeInfo initial; duke@435: }; duke@435: duke@435: // -----------------------------SuperWord--------------------------------- duke@435: // Transforms scalar operations into packed (superword) operations. duke@435: class SuperWord : public ResourceObj { duke@435: private: duke@435: PhaseIdealLoop* _phase; duke@435: Arena* _arena; duke@435: PhaseIterGVN &_igvn; duke@435: duke@435: enum consts { top_align = -1, bottom_align = -666 }; duke@435: duke@435: GrowableArray _packset; // Packs for the current block duke@435: duke@435: GrowableArray _bb_idx; // Map from Node _idx to index within block duke@435: duke@435: GrowableArray _block; // Nodes in current block duke@435: GrowableArray _data_entry; // Nodes with all inputs from outside duke@435: GrowableArray _mem_slice_head; // Memory slice head nodes duke@435: GrowableArray _mem_slice_tail; // Memory slice tail nodes duke@435: duke@435: GrowableArray _node_info; // Info needed per node duke@435: duke@435: MemNode* _align_to_ref; // Memory reference that pre-loop will align to duke@435: duke@435: GrowableArray _disjoint_ptrs; // runtime disambiguated pointer pairs duke@435: duke@435: DepGraph _dg; // Dependence graph duke@435: duke@435: // Scratch pads duke@435: VectorSet _visited; // Visited set duke@435: VectorSet _post_visited; // Post-visited set duke@435: Node_Stack _n_idx_list; // List of (node,index) pairs duke@435: GrowableArray _nlist; // List of nodes duke@435: GrowableArray _stk; // Stack of nodes duke@435: duke@435: public: duke@435: SuperWord(PhaseIdealLoop* phase); duke@435: duke@435: void transform_loop(IdealLoopTree* lpt); duke@435: duke@435: // Accessors for SWPointer duke@435: PhaseIdealLoop* phase() { return _phase; } duke@435: IdealLoopTree* lpt() { return _lpt; } duke@435: PhiNode* iv() { return _iv; } duke@435: duke@435: private: duke@435: IdealLoopTree* _lpt; // Current loop tree node duke@435: LoopNode* _lp; // Current LoopNode duke@435: Node* _bb; // Current basic block duke@435: PhiNode* _iv; // Induction var duke@435: duke@435: // Accessors duke@435: Arena* arena() { return _arena; } duke@435: duke@435: Node* bb() { return _bb; } duke@435: void set_bb(Node* bb) { _bb = bb; } duke@435: duke@435: void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; } duke@435: duke@435: LoopNode* lp() { return _lp; } duke@435: void set_lp(LoopNode* lp) { _lp = lp; duke@435: _iv = lp->as_CountedLoop()->phi()->as_Phi(); } duke@435: int iv_stride() { return lp()->as_CountedLoop()->stride_con(); } duke@435: duke@435: int vector_width_in_bytes() { return Matcher::vector_width_in_bytes(); } duke@435: duke@435: MemNode* align_to_ref() { return _align_to_ref; } duke@435: void set_align_to_ref(MemNode* m) { _align_to_ref = m; } duke@435: duke@435: Node* ctrl(Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; } duke@435: duke@435: // block accessors duke@435: bool in_bb(Node* n) { return n != NULL && n->outcnt() > 0 && ctrl(n) == _bb; } duke@435: int bb_idx(Node* n) { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); } duke@435: void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); } duke@435: duke@435: // visited set accessors duke@435: void visited_clear() { _visited.Clear(); } duke@435: void visited_set(Node* n) { return _visited.set(bb_idx(n)); } duke@435: int visited_test(Node* n) { return _visited.test(bb_idx(n)); } duke@435: int visited_test_set(Node* n) { return _visited.test_set(bb_idx(n)); } duke@435: void post_visited_clear() { _post_visited.Clear(); } duke@435: void post_visited_set(Node* n) { return _post_visited.set(bb_idx(n)); } duke@435: int post_visited_test(Node* n) { return _post_visited.test(bb_idx(n)); } duke@435: duke@435: // Ensure node_info contains element "i" duke@435: void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); } duke@435: duke@435: // memory alignment for a node duke@435: int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; } duke@435: void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; } duke@435: duke@435: // Max expression (DAG) depth from beginning of the block for each node duke@435: int depth(Node* n) { return _node_info.adr_at(bb_idx(n))->_depth; } duke@435: void set_depth(Node* n, int d) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; } duke@435: duke@435: // vector element type duke@435: const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; } duke@435: void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; } duke@435: duke@435: // my_pack duke@435: Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } duke@435: void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } duke@435: duke@435: // methods duke@435: duke@435: // Extract the superword level parallelism duke@435: void SLP_extract(); duke@435: // Find the adjacent memory references and create pack pairs for them. duke@435: void find_adjacent_refs(); duke@435: // Find a memory reference to align the loop induction variable to. duke@435: void find_align_to_ref(Node_List &memops); duke@435: // Can the preloop align the reference to position zero in the vector? duke@435: bool ref_is_alignable(SWPointer& p); duke@435: // Construct dependency graph. duke@435: void dependence_graph(); duke@435: // Return a memory slice (node list) in predecessor order starting at "start" duke@435: void mem_slice_preds(Node* start, Node* stop, GrowableArray &preds); duke@435: // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and s1 aligned at "align" duke@435: bool stmts_can_pack(Node* s1, Node* s2, int align); duke@435: // Does s exist in a pack at position pos? duke@435: bool exists_at(Node* s, uint pos); duke@435: // Is s1 immediately before s2 in memory? duke@435: bool are_adjacent_refs(Node* s1, Node* s2); duke@435: // Are s1 and s2 similar? duke@435: bool isomorphic(Node* s1, Node* s2); duke@435: // Is there no data path from s1 to s2 or s2 to s1? duke@435: bool independent(Node* s1, Node* s2); duke@435: // Helper for independent duke@435: bool independent_path(Node* shallow, Node* deep, uint dp=0); duke@435: void set_alignment(Node* s1, Node* s2, int align); duke@435: int data_size(Node* s); duke@435: // Extend packset by following use->def and def->use links from pack members. duke@435: void extend_packlist(); duke@435: // Extend the packset by visiting operand definitions of nodes in pack p duke@435: bool follow_use_defs(Node_List* p); duke@435: // Extend the packset by visiting uses of nodes in pack p duke@435: bool follow_def_uses(Node_List* p); duke@435: // Estimate the savings from executing s1 and s2 as a pack duke@435: int est_savings(Node* s1, Node* s2); duke@435: int adjacent_profit(Node* s1, Node* s2); duke@435: int pack_cost(int ct); duke@435: int unpack_cost(int ct); duke@435: // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last duke@435: void combine_packs(); duke@435: // Construct the map from nodes to packs. duke@435: void construct_my_pack_map(); duke@435: // Remove packs that are not implemented or not profitable. duke@435: void filter_packs(); duke@435: // Adjust the memory graph for the packed operations duke@435: void schedule(); duke@435: // Within a pack, move stores down to the last executed store, duke@435: // and move loads up to the first executed load. duke@435: void co_locate_pack(Node_List* p); duke@435: // Convert packs into vector node operations duke@435: void output(); duke@435: // Create a vector operand for the nodes in pack p for operand: in(opd_idx) duke@435: VectorNode* vector_opd(Node_List* p, int opd_idx); duke@435: // Can code be generated for pack p? duke@435: bool implemented(Node_List* p); duke@435: // For pack p, are all operands and all uses (with in the block) vector? duke@435: bool profitable(Node_List* p); duke@435: // If a use of pack p is not a vector use, then replace the use with an extract operation. duke@435: void insert_extracts(Node_List* p); duke@435: // Is use->in(u_idx) a vector use? duke@435: bool is_vector_use(Node* use, int u_idx); duke@435: // Construct reverse postorder list of block members duke@435: void construct_bb(); duke@435: // Initialize per node info duke@435: void initialize_bb(); duke@435: // Insert n into block after pos duke@435: void bb_insert_after(Node* n, int pos); duke@435: // Compute max depth for expressions from beginning of block duke@435: void compute_max_depth(); duke@435: // Compute necessary vector element type for expressions duke@435: void compute_vector_element_type(); duke@435: // Are s1 and s2 in a pack pair and ordered as s1,s2? duke@435: bool in_packset(Node* s1, Node* s2); duke@435: // Is s in pack p? duke@435: Node_List* in_pack(Node* s, Node_List* p); duke@435: // Remove the pack at position pos in the packset duke@435: void remove_pack_at(int pos); duke@435: // Return the node executed first in pack p. duke@435: Node* executed_first(Node_List* p); duke@435: // Return the node executed last in pack p. duke@435: Node* executed_last(Node_List* p); duke@435: // Alignment within a vector memory reference duke@435: int memory_alignment(MemNode* s, int iv_adjust_in_bytes); duke@435: // (Start, end] half-open range defining which operands are vector duke@435: void vector_opd_range(Node* n, uint* start, uint* end); duke@435: // Smallest type containing range of values duke@435: static const Type* container_type(const Type* t); duke@435: // Adjust pre-loop limit so that in main loop, a load/store reference duke@435: // to align_to_ref will be a position zero in the vector. duke@435: void align_initial_loop_index(MemNode* align_to_ref); duke@435: // Find pre loop end from main loop. Returns null if none. duke@435: CountedLoopEndNode* get_pre_loop_end(CountedLoopNode *cl); duke@435: // Is the use of d1 in u1 at the same operand position as d2 in u2? duke@435: bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2); duke@435: void init(); duke@435: duke@435: // print methods duke@435: void print_packset(); duke@435: void print_pack(Node_List* p); duke@435: void print_bb(); duke@435: void print_stmt(Node* s); duke@435: char* blank(uint depth); duke@435: }; duke@435: duke@435: duke@435: //------------------------------SWPointer--------------------------- duke@435: // Information about an address for dependence checking and vector alignment duke@435: class SWPointer VALUE_OBJ_CLASS_SPEC { duke@435: protected: duke@435: MemNode* _mem; // My memory reference node duke@435: SuperWord* _slp; // SuperWord class duke@435: duke@435: Node* _base; // NULL if unsafe nonheap reference duke@435: Node* _adr; // address pointer duke@435: jint _scale; // multipler for iv (in bytes), 0 if no loop iv duke@435: jint _offset; // constant offset (in bytes) duke@435: Node* _invar; // invariant offset (in bytes), NULL if none duke@435: bool _negate_invar; // if true then use: (0 - _invar) duke@435: duke@435: PhaseIdealLoop* phase() { return _slp->phase(); } duke@435: IdealLoopTree* lpt() { return _slp->lpt(); } duke@435: PhiNode* iv() { return _slp->iv(); } // Induction var duke@435: duke@435: bool invariant(Node* n) { duke@435: Node *n_c = phase()->get_ctrl(n); duke@435: return !lpt()->is_member(phase()->get_loop(n_c)); duke@435: } duke@435: duke@435: // Match: k*iv + offset duke@435: bool scaled_iv_plus_offset(Node* n); duke@435: // Match: k*iv where k is a constant that's not zero duke@435: bool scaled_iv(Node* n); duke@435: // Match: offset is (k [+/- invariant]) duke@435: bool offset_plus_k(Node* n, bool negate = false); duke@435: duke@435: public: duke@435: enum CMP { duke@435: Less = 1, duke@435: Greater = 2, duke@435: Equal = 4, duke@435: NotEqual = (Less | Greater), duke@435: NotComparable = (Less | Greater | Equal) duke@435: }; duke@435: duke@435: SWPointer(MemNode* mem, SuperWord* slp); duke@435: // Following is used to create a temporary object during duke@435: // the pattern match of an address expression. duke@435: SWPointer(SWPointer* p); duke@435: duke@435: bool valid() { return _adr != NULL; } duke@435: bool has_iv() { return _scale != 0; } duke@435: duke@435: Node* base() { return _base; } duke@435: Node* adr() { return _adr; } duke@435: int scale_in_bytes() { return _scale; } duke@435: Node* invar() { return _invar; } duke@435: bool negate_invar() { return _negate_invar; } duke@435: int offset_in_bytes() { return _offset; } duke@435: int memory_size() { return _mem->memory_size(); } duke@435: duke@435: // Comparable? duke@435: int cmp(SWPointer& q) { duke@435: if (valid() && q.valid() && duke@435: (_adr == q._adr || _base == _adr && q._base == q._adr) && duke@435: _scale == q._scale && duke@435: _invar == q._invar && duke@435: _negate_invar == q._negate_invar) { duke@435: bool overlap = q._offset < _offset + memory_size() && duke@435: _offset < q._offset + q.memory_size(); duke@435: return overlap ? Equal : (_offset < q._offset ? Less : Greater); duke@435: } else { duke@435: return NotComparable; duke@435: } duke@435: } duke@435: duke@435: bool not_equal(SWPointer& q) { return not_equal(cmp(q)); } duke@435: bool equal(SWPointer& q) { return equal(cmp(q)); } duke@435: bool comparable(SWPointer& q) { return comparable(cmp(q)); } duke@435: static bool not_equal(int cmp) { return cmp <= NotEqual; } duke@435: static bool equal(int cmp) { return cmp == Equal; } duke@435: static bool comparable(int cmp) { return cmp < NotComparable; } duke@435: duke@435: void print(); duke@435: }; duke@435: duke@435: duke@435: //------------------------------OrderedPair--------------------------- duke@435: // Ordered pair of Node*. duke@435: class OrderedPair VALUE_OBJ_CLASS_SPEC { duke@435: protected: duke@435: Node* _p1; duke@435: Node* _p2; duke@435: public: duke@435: OrderedPair() : _p1(NULL), _p2(NULL) {} duke@435: OrderedPair(Node* p1, Node* p2) { duke@435: if (p1->_idx < p2->_idx) { duke@435: _p1 = p1; _p2 = p2; duke@435: } else { duke@435: _p1 = p2; _p2 = p1; duke@435: } duke@435: } duke@435: duke@435: bool operator==(const OrderedPair &rhs) { duke@435: return _p1 == rhs._p1 && _p2 == rhs._p2; duke@435: } duke@435: void print() { tty->print(" (%d, %d)", _p1->_idx, _p2->_idx); } duke@435: duke@435: static const OrderedPair initial; duke@435: };