Fri, 20 Dec 2013 13:51:14 +0100
8030863: PPC64: (part 220): ConstantTableBase for calls between args and jvms
Summary: Add ConstantTableBase node edge after parameters and before jvms. Adapt jvms offsets.
Reviewed-by: kvn
1.1 --- a/src/cpu/ppc/vm/ppc.ad Mon Dec 23 10:26:08 2013 -0800 1.2 +++ b/src/cpu/ppc/vm/ppc.ad Fri Dec 20 13:51:14 2013 +0100 1.3 @@ -3563,9 +3563,6 @@ 1.4 1.5 // postalloc expand emitter for virtual calls. 1.6 enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{ 1.7 - // Toc is in return address field, though not accessible via postalloc_expand 1.8 - // functionaliy. 1.9 - Node *toc = in(TypeFunc::ReturnAdr); 1.10 1.11 // Create the nodes for loading the IC from the TOC. 1.12 loadConLNodesTuple loadConLNodes_IC = 1.13 @@ -3592,23 +3589,21 @@ 1.14 // New call needs all inputs of old call. 1.15 // Req... 1.16 for (uint i = 0; i < req(); ++i) { 1.17 - if (i != TypeFunc::ReturnAdr) { 1.18 + // The expanded node does not need toc any more. 1.19 + // Add the inline cache constant here instead. This expresses the 1.20 + // register of the inline cache must be live at the call. 1.21 + // Else we would have to adapt JVMState by -1. 1.22 + if (i == mach_constant_base_node_input()) { 1.23 + call->add_req(loadConLNodes_IC._last); 1.24 + } else { 1.25 call->add_req(in(i)); 1.26 - } else { 1.27 - // The expanded node does not need toc any more. 1.28 - call->add_req(C->top()); 1.29 } 1.30 } 1.31 // ...as well as prec 1.32 - for (uint i = req(); i < len() ; ++i) { 1.33 + for (uint i = req(); i < len(); ++i) { 1.34 call->add_prec(in(i)); 1.35 } 1.36 1.37 - // The cache must come before the call, but it's not a req edge. 1.38 - // GL: actually it should be a req edge to express that the 1.39 - // register must be live in the Call. But as R19 is declared to be 1.40 - // the inline_cache_reg that's fine. 1.41 - call->add_prec(loadConLNodes_IC._last); 1.42 // Remember nodes loading the inline cache into r19. 1.43 call->_load_ic_hi_node = loadConLNodes_IC._large_hi; 1.44 call->_load_ic_node = loadConLNodes_IC._small; 1.45 @@ -3638,13 +3633,13 @@ 1.46 // Must be invalid_vtable_index, not nonvirtual_vtable_index. 1.47 assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value"); 1.48 Register ic_reg = as_Register(Matcher::inline_cache_reg_encode()); 1.49 - AddressLiteral oop = __ allocate_metadata_address((Metadata *)Universe::non_oop_word()); 1.50 - 1.51 - address virtual_call_oop_addr = __ pc(); 1.52 - __ load_const_from_method_toc(ic_reg, oop, Rtoc); 1.53 + AddressLiteral meta = __ allocate_metadata_address((Metadata *)Universe::non_oop_word()); 1.54 + 1.55 + address virtual_call_meta_addr = __ pc(); 1.56 + __ load_const_from_method_toc(ic_reg, meta, Rtoc); 1.57 // CALL to fixup routine. Fixup routine uses ScopeDesc info 1.58 // to determine who we intended to call. 1.59 - __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr)); 1.60 + __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr)); 1.61 emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none); 1.62 assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset, 1.63 "Fix constant in ret_addr_offset()"); 1.64 @@ -3674,6 +3669,7 @@ 1.65 "Fix constant in ret_addr_offset()"); 1.66 } 1.67 #endif 1.68 + guarantee(0, "Fix handling of toc edge: messes up derived/base pairs."); 1.69 Unimplemented(); // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!). 1.70 %} 1.71 1.72 @@ -3775,16 +3771,14 @@ 1.73 // New call needs all inputs of old call. 1.74 // Req... 1.75 for (uint i = 0; i < req(); ++i) { 1.76 - if (i != TypeFunc::ReturnAdr) { 1.77 + if (i != mach_constant_base_node_input()) { 1.78 call->add_req(in(i)); 1.79 - } else { 1.80 - // put the mtctr where ReturnAdr would be 1.81 - call->add_req(mtctr); 1.82 } 1.83 } 1.84 1.85 // These must be reqired edges, as the registers are live up to 1.86 // the call. Else the constants are handled as kills. 1.87 + call->add_req(mtctr); 1.88 call->add_req(loadConLNodes_Env._last); 1.89 call->add_req(loadConLNodes_Toc._last); 1.90 1.91 @@ -3818,7 +3812,7 @@ 1.92 // These two registers define part of the calling convention between 1.93 // compiled code and the interpreter. 1.94 1.95 - // Inline Cache Register or methodOop for I2C. 1.96 + // Inline Cache Register or method for I2C. 1.97 inline_cache_reg(R19); // R19_method 1.98 1.99 // Method Oop Register when calling interpreter. 1.100 @@ -6149,8 +6143,8 @@ 1.101 size(4); 1.102 ins_encode %{ 1.103 // TODO: PPC port $archOpcode(ppc64Opcode_ld); 1.104 - int offset = ra_->C->in_scratch_emit_size() ? 0 : MacroAssembler::largeoffset_si16_si16_lo(_const_toc_offset_hi_node->_const_toc_offset); 1.105 - __ ld($dst$$Register, offset, $base$$Register); 1.106 + int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset; 1.107 + __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register); 1.108 %} 1.109 ins_pipe(pipe_class_memory); 1.110 %} 1.111 @@ -6784,7 +6778,7 @@ 1.112 Label done; 1.113 __ beq($crx$$CondRegister, done); 1.114 __ add($dst$$Register, $src1$$Register, R30); 1.115 - // TODO PPC port __ endgroup_if_needed(_size == 12); 1.116 + // TODO PPC port __ endgroup_if_needed(_size == 12); 1.117 __ bind(done); 1.118 %} 1.119 ins_pipe(pipe_class_default);
2.1 --- a/src/share/vm/adlc/archDesc.cpp Mon Dec 23 10:26:08 2013 -0800 2.2 +++ b/src/share/vm/adlc/archDesc.cpp Fri Dec 20 13:51:14 2013 +0100 2.3 @@ -172,7 +172,8 @@ 2.4 _internalOps(cmpstr,hashstr, Form::arena), 2.5 _internalMatch(cmpstr,hashstr, Form::arena), 2.6 _chainRules(cmpstr,hashstr, Form::arena), 2.7 - _cisc_spill_operand(NULL) { 2.8 + _cisc_spill_operand(NULL), 2.9 + _needs_clone_jvms(false) { 2.10 2.11 // Initialize the opcode to MatchList table with NULLs 2.12 for( int i=0; i<_last_opcode; ++i ) {
3.1 --- a/src/share/vm/adlc/archDesc.hpp Mon Dec 23 10:26:08 2013 -0800 3.2 +++ b/src/share/vm/adlc/archDesc.hpp Fri Dec 20 13:51:14 2013 +0100 3.3 @@ -121,6 +121,12 @@ 3.4 // to access [stack_pointer + offset] 3.5 OperandForm *_cisc_spill_operand; 3.6 3.7 + // If a Call node uses $constanttablebase, it gets MachConstantBaseNode 3.8 + // by the matcher and the matcher will modify the jvms. If so, jvm states 3.9 + // always have to be cloned when a node is cloned. Adlc generates 3.10 + // Compile::needs_clone_jvms() accordingly. 3.11 + bool _needs_clone_jvms; 3.12 + 3.13 // Methods for outputting the DFA 3.14 void gen_match(FILE *fp, MatchList &mlist, ProductionState &status, Dict &operands_chained_from); 3.15 void chain_rule(FILE *fp, const char *indent, const char *ideal, 3.16 @@ -289,6 +295,7 @@ 3.17 void addPreHeaderBlocks(FILE *fp_hpp); 3.18 void addHeaderBlocks(FILE *fp_hpp); 3.19 void addSourceBlocks(FILE *fp_cpp); 3.20 + void generate_needs_clone_jvms(FILE *fp_cpp); 3.21 void generate_adlc_verification(FILE *fp_cpp); 3.22 3.23 // output declaration of class State
4.1 --- a/src/share/vm/adlc/main.cpp Mon Dec 23 10:26:08 2013 -0800 4.2 +++ b/src/share/vm/adlc/main.cpp Fri Dec 20 13:51:14 2013 +0100 4.3 @@ -306,6 +306,7 @@ 4.4 AD.buildInstructMatchCheck(AD._CPP_file._fp); // .cpp 4.5 // define methods for machine dependent frame management 4.6 AD.buildFrameMethods(AD._CPP_file._fp); // .cpp 4.7 + AD.generate_needs_clone_jvms(AD._CPP_file._fp); 4.8 4.9 // do this last: 4.10 AD.addPreprocessorChecks(AD._CPP_file._fp); // .cpp
5.1 --- a/src/share/vm/adlc/output_c.cpp Mon Dec 23 10:26:08 2013 -0800 5.2 +++ b/src/share/vm/adlc/output_c.cpp Fri Dec 20 13:51:14 2013 +0100 5.3 @@ -1842,17 +1842,23 @@ 5.4 // There are nodes that don't use $constantablebase, but still require that it 5.5 // is an input to the node. Example: divF_reg_immN, Repl32B_imm on x86_64. 5.6 if (node->is_mach_constant() || node->needs_constant_base()) { 5.7 - fprintf(fp," add_req(C->mach_constant_base_node());\n"); 5.8 + if (node->is_ideal_call() != Form::invalid_type && 5.9 + node->is_ideal_call() != Form::JAVA_LEAF) { 5.10 + fprintf(fp, " // MachConstantBaseNode added in matcher.\n"); 5.11 + _needs_clone_jvms = true; 5.12 + } else { 5.13 + fprintf(fp, " add_req(C->mach_constant_base_node());\n"); 5.14 + } 5.15 } 5.16 5.17 - fprintf(fp,"\n"); 5.18 - if( node->expands() ) { 5.19 - fprintf(fp," return result;\n"); 5.20 + fprintf(fp, "\n"); 5.21 + if (node->expands()) { 5.22 + fprintf(fp, " return result;\n"); 5.23 } else { 5.24 - fprintf(fp," return this;\n"); 5.25 + fprintf(fp, " return this;\n"); 5.26 } 5.27 - fprintf(fp,"}\n"); 5.28 - fprintf(fp,"\n"); 5.29 + fprintf(fp, "}\n"); 5.30 + fprintf(fp, "\n"); 5.31 } 5.32 5.33 5.34 @@ -3642,6 +3648,11 @@ 5.35 return callconv; 5.36 } 5.37 5.38 +void ArchDesc::generate_needs_clone_jvms(FILE *fp_cpp) { 5.39 + fprintf(fp_cpp, "bool Compile::needs_clone_jvms() { return %s; }\n\n", 5.40 + _needs_clone_jvms ? "true" : "false"); 5.41 +} 5.42 + 5.43 //---------------------------generate_assertion_checks------------------- 5.44 void ArchDesc::generate_adlc_verification(FILE *fp_cpp) { 5.45 fprintf(fp_cpp, "\n");
6.1 --- a/src/share/vm/adlc/output_h.cpp Mon Dec 23 10:26:08 2013 -0800 6.2 +++ b/src/share/vm/adlc/output_h.cpp Fri Dec 20 13:51:14 2013 +0100 6.3 @@ -1665,7 +1665,15 @@ 6.4 6.5 if (instr->needs_constant_base() && 6.6 !instr->is_mach_constant()) { // These inherit the funcion from MachConstantNode. 6.7 - fprintf(fp," virtual uint mach_constant_base_node_input() const { return req()-1; }\n"); 6.8 + fprintf(fp," virtual uint mach_constant_base_node_input() const { "); 6.9 + if (instr->is_ideal_call() != Form::invalid_type && 6.10 + instr->is_ideal_call() != Form::JAVA_LEAF) { 6.11 + // MachConstantBase goes behind arguments, but before jvms. 6.12 + fprintf(fp,"assert(tf() && tf()->domain(), \"\"); return tf()->domain()->cnt();"); 6.13 + } else { 6.14 + fprintf(fp,"return req()-1;"); 6.15 + } 6.16 + fprintf(fp," }\n"); 6.17 } 6.18 6.19 // Allow machine-independent optimization, invert the sense of the IF test
7.1 --- a/src/share/vm/opto/callnode.cpp Mon Dec 23 10:26:08 2013 -0800 7.2 +++ b/src/share/vm/opto/callnode.cpp Fri Dec 20 13:51:14 2013 +0100 7.3 @@ -595,6 +595,18 @@ 7.4 } 7.5 } 7.6 7.7 +// Adapt offsets in in-array after adding or removing an edge. 7.8 +// Prerequisite is that the JVMState is used by only one node. 7.9 +void JVMState::adapt_position(int delta) { 7.10 + for (JVMState* jvms = this; jvms != NULL; jvms = jvms->caller()) { 7.11 + jvms->set_locoff(jvms->locoff() + delta); 7.12 + jvms->set_stkoff(jvms->stkoff() + delta); 7.13 + jvms->set_monoff(jvms->monoff() + delta); 7.14 + jvms->set_scloff(jvms->scloff() + delta); 7.15 + jvms->set_endoff(jvms->endoff() + delta); 7.16 + } 7.17 +} 7.18 + 7.19 //============================================================================= 7.20 uint CallNode::cmp( const Node &n ) const 7.21 { return _tf == ((CallNode&)n)._tf && _jvms == ((CallNode&)n)._jvms; }
8.1 --- a/src/share/vm/opto/callnode.hpp Mon Dec 23 10:26:08 2013 -0800 8.2 +++ b/src/share/vm/opto/callnode.hpp Fri Dec 20 13:51:14 2013 +0100 8.3 @@ -299,6 +299,7 @@ 8.4 JVMState* clone_deep(Compile* C) const; // recursively clones caller chain 8.5 JVMState* clone_shallow(Compile* C) const; // retains uncloned caller 8.6 void set_map_deep(SafePointNode *map);// reset map for all callers 8.7 + void adapt_position(int delta); // Adapt offsets in in-array after adding an edge. 8.8 8.9 #ifndef PRODUCT 8.10 void format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const; 8.11 @@ -559,9 +560,15 @@ 8.12 // Are we guaranteed that this node is a safepoint? Not true for leaf calls and 8.13 // for some macro nodes whose expansion does not have a safepoint on the fast path. 8.14 virtual bool guaranteed_safepoint() { return true; } 8.15 - // For macro nodes, the JVMState gets modified during expansion, so when cloning 8.16 - // the node the JVMState must be cloned. 8.17 - virtual void clone_jvms(Compile* C) { } // default is not to clone 8.18 + // For macro nodes, the JVMState gets modified during expansion. If calls 8.19 + // use MachConstantBase, it gets modified during matching. So when cloning 8.20 + // the node the JVMState must be cloned. Default is not to clone. 8.21 + virtual void clone_jvms(Compile* C) { 8.22 + if (C->needs_clone_jvms() && jvms() != NULL) { 8.23 + set_jvms(jvms()->clone_deep(C)); 8.24 + jvms()->set_map_deep(this); 8.25 + } 8.26 + } 8.27 8.28 // Returns true if the call may modify n 8.29 virtual bool may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase);
9.1 --- a/src/share/vm/opto/compile.hpp Mon Dec 23 10:26:08 2013 -0800 9.2 +++ b/src/share/vm/opto/compile.hpp Fri Dec 20 13:51:14 2013 +0100 9.3 @@ -758,6 +758,8 @@ 9.4 9.5 MachConstantBaseNode* mach_constant_base_node(); 9.6 bool has_mach_constant_base_node() const { return _mach_constant_base_node != NULL; } 9.7 + // Generated by adlc, true if CallNode requires MachConstantBase. 9.8 + bool needs_clone_jvms(); 9.9 9.10 // Handy undefined Node 9.11 Node* top() const { return _top; }
10.1 --- a/src/share/vm/opto/matcher.cpp Mon Dec 23 10:26:08 2013 -0800 10.2 +++ b/src/share/vm/opto/matcher.cpp Fri Dec 20 13:51:14 2013 +0100 10.3 @@ -1338,12 +1338,24 @@ 10.4 } 10.5 10.6 // Debug inputs begin just after the last incoming parameter 10.7 - assert( (mcall == NULL) || (mcall->jvms() == NULL) || 10.8 - (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "" ); 10.9 + assert((mcall == NULL) || (mcall->jvms() == NULL) || 10.10 + (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), ""); 10.11 10.12 // Move the OopMap 10.13 msfpt->_oop_map = sfpt->_oop_map; 10.14 10.15 + // Add additional edges. 10.16 + if (msfpt->mach_constant_base_node_input() != (uint)-1 && !msfpt->is_MachCallLeaf()) { 10.17 + // For these calls we can not add MachConstantBase in expand(), as the 10.18 + // ins are not complete then. 10.19 + msfpt->ins_req(msfpt->mach_constant_base_node_input(), C->mach_constant_base_node()); 10.20 + if (msfpt->jvms() && 10.21 + msfpt->mach_constant_base_node_input() <= msfpt->jvms()->debug_start() + msfpt->_jvmadj) { 10.22 + // We added an edge before jvms, so we must adapt the position of the ins. 10.23 + msfpt->jvms()->adapt_position(+1); 10.24 + } 10.25 + } 10.26 + 10.27 // Registers killed by the call are set in the local scheduling pass 10.28 // of Global Code Motion. 10.29 return msfpt;