src/cpu/x86/vm/frame_x86.cpp

Tue, 14 Oct 2008 15:10:26 -0700

author
kvn
date
Tue, 14 Oct 2008 15:10:26 -0700
changeset 840
2649e5276dd7
parent 739
dc7f315e41f7
child 1228
eacd97c88873
permissions
-rw-r--r--

6532536: Optimize arraycopy stubs for Intel cpus
Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus
Reviewed-by: rasbold

     1 /*
     2  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 # include "incls/_precompiled.incl"
    26 # include "incls/_frame_x86.cpp.incl"
    28 #ifdef ASSERT
    29 void RegisterMap::check_location_valid() {
    30 }
    31 #endif
    34 // Profiling/safepoint support
    36 bool frame::safe_for_sender(JavaThread *thread) {
    37   address   sp = (address)_sp;
    38   address   fp = (address)_fp;
    39   address   unextended_sp = (address)_unextended_sp;
    40   // sp must be within the stack
    41   bool sp_safe = (sp <= thread->stack_base()) &&
    42                  (sp >= thread->stack_base() - thread->stack_size());
    44   if (!sp_safe) {
    45     return false;
    46   }
    48   // unextended sp must be within the stack and above or equal sp
    49   bool unextended_sp_safe = (unextended_sp <= thread->stack_base()) &&
    50                             (unextended_sp >= sp);
    52   if (!unextended_sp_safe) {
    53     return false;
    54   }
    56   // an fp must be within the stack and above (but not equal) sp
    57   bool fp_safe = (fp <= thread->stack_base()) && (fp > sp);
    59   // We know sp/unextended_sp are safe only fp is questionable here
    61   // If the current frame is known to the code cache then we can attempt to
    62   // to construct the sender and do some validation of it. This goes a long way
    63   // toward eliminating issues when we get in frame construction code
    65   if (_cb != NULL ) {
    67     // First check if frame is complete and tester is reliable
    68     // Unfortunately we can only check frame complete for runtime stubs and nmethod
    69     // other generic buffer blobs are more problematic so we just assume they are
    70     // ok. adapter blobs never have a frame complete and are never ok.
    72     if (!_cb->is_frame_complete_at(_pc)) {
    73       if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
    74         return false;
    75       }
    76     }
    77     // Entry frame checks
    78     if (is_entry_frame()) {
    79       // an entry frame must have a valid fp.
    81       if (!fp_safe) return false;
    83       // Validate the JavaCallWrapper an entry frame must have
    85       address jcw = (address)entry_frame_call_wrapper();
    87       bool jcw_safe = (jcw <= thread->stack_base()) && ( jcw > fp);
    89       return jcw_safe;
    91     }
    93     intptr_t* sender_sp = NULL;
    94     address   sender_pc = NULL;
    96     if (is_interpreted_frame()) {
    97       // fp must be safe
    98       if (!fp_safe) {
    99         return false;
   100       }
   102       sender_pc = (address) this->fp()[return_addr_offset];
   103       sender_sp = (intptr_t*) addr_at(sender_sp_offset);
   105     } else {
   106       // must be some sort of compiled/runtime frame
   107       // fp does not have to be safe (although it could be check for c1?)
   109       sender_sp = _unextended_sp + _cb->frame_size();
   110       // On Intel the return_address is always the word on the stack
   111       sender_pc = (address) *(sender_sp-1);
   112     }
   114     // We must always be able to find a recognizable pc
   115     CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
   116     if (sender_pc == NULL ||  sender_blob == NULL) {
   117       return false;
   118     }
   121     // If the potential sender is the interpreter then we can do some more checking
   122     if (Interpreter::contains(sender_pc)) {
   124       // ebp is always saved in a recognizable place in any code we generate. However
   125       // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved ebp
   126       // is really a frame pointer.
   128       intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset);
   129       bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp > sender_sp);
   131       if (!saved_fp_safe) {
   132         return false;
   133       }
   135       // construct the potential sender
   137       frame sender(sender_sp, saved_fp, sender_pc);
   139       return sender.is_interpreted_frame_valid(thread);
   141     }
   143     // Could just be some random pointer within the codeBlob
   145     if (!sender_blob->instructions_contains(sender_pc)) return false;
   147     // We should never be able to see an adapter if the current frame is something from code cache
   149     if ( sender_blob->is_adapter_blob()) {
   150       return false;
   151     }
   153     // Could be the call_stub
   155     if (StubRoutines::returns_to_call_stub(sender_pc)) {
   156       intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset);
   157       bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp > sender_sp);
   159       if (!saved_fp_safe) {
   160         return false;
   161       }
   163       // construct the potential sender
   165       frame sender(sender_sp, saved_fp, sender_pc);
   167       // Validate the JavaCallWrapper an entry frame must have
   168       address jcw = (address)sender.entry_frame_call_wrapper();
   170       bool jcw_safe = (jcw <= thread->stack_base()) && ( jcw > (address)sender.fp());
   172       return jcw_safe;
   173     }
   175     // If the frame size is 0 something is bad because every nmethod has a non-zero frame size
   176     // because the return address counts against the callee's frame.
   178     if (sender_blob->frame_size() == 0) {
   179       assert(!sender_blob->is_nmethod(), "should count return address at least");
   180       return false;
   181     }
   183     // We should never be able to see anything here except an nmethod. If something in the
   184     // code cache (current frame) is called by an entity within the code cache that entity
   185     // should not be anything but the call stub (already covered), the interpreter (already covered)
   186     // or an nmethod.
   188     assert(sender_blob->is_nmethod(), "Impossible call chain");
   190     // Could put some more validation for the potential non-interpreted sender
   191     // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
   193     // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
   195     // We've validated the potential sender that would be created
   196     return true;
   197   }
   199   // Must be native-compiled frame. Since sender will try and use fp to find
   200   // linkages it must be safe
   202   if (!fp_safe) {
   203     return false;
   204   }
   206   // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
   208   if ( (address) this->fp()[return_addr_offset] == NULL) return false;
   211   // could try and do some more potential verification of native frame if we could think of some...
   213   return true;
   215 }
   218 void frame::patch_pc(Thread* thread, address pc) {
   219   if (TracePcPatching) {
   220     tty->print_cr("patch_pc at address" INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "] ",
   221                   &((address *)sp())[-1], ((address *)sp())[-1], pc);
   222   }
   223   ((address *)sp())[-1] = pc;
   224   _cb = CodeCache::find_blob(pc);
   225   if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) {
   226     address orig = (((nmethod*)_cb)->get_original_pc(this));
   227     assert(orig == _pc, "expected original to be stored before patching");
   228     _deopt_state = is_deoptimized;
   229     // leave _pc as is
   230   } else {
   231     _deopt_state = not_deoptimized;
   232     _pc = pc;
   233   }
   234 }
   236 bool frame::is_interpreted_frame() const  {
   237   return Interpreter::contains(pc());
   238 }
   240 int frame::frame_size() const {
   241   RegisterMap map(JavaThread::current(), false);
   242   frame sender = this->sender(&map);
   243   return sender.sp() - sp();
   244 }
   246 intptr_t* frame::entry_frame_argument_at(int offset) const {
   247   // convert offset to index to deal with tsi
   248   int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
   249   // Entry frame's arguments are always in relation to unextended_sp()
   250   return &unextended_sp()[index];
   251 }
   253 // sender_sp
   254 #ifdef CC_INTERP
   255 intptr_t* frame::interpreter_frame_sender_sp() const {
   256   assert(is_interpreted_frame(), "interpreted frame expected");
   257   // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
   258   // seems odd and if we always know interpreted vs. non then sender_sp() is really
   259   // doing too much work.
   260   return get_interpreterState()->sender_sp();
   261 }
   263 // monitor elements
   265 BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
   266   return get_interpreterState()->monitor_base();
   267 }
   269 BasicObjectLock* frame::interpreter_frame_monitor_end() const {
   270   return (BasicObjectLock*) get_interpreterState()->stack_base();
   271 }
   273 #else // CC_INTERP
   275 intptr_t* frame::interpreter_frame_sender_sp() const {
   276   assert(is_interpreted_frame(), "interpreted frame expected");
   277   return (intptr_t*) at(interpreter_frame_sender_sp_offset);
   278 }
   280 void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
   281   assert(is_interpreted_frame(), "interpreted frame expected");
   282   ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
   283 }
   286 // monitor elements
   288 BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
   289   return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
   290 }
   292 BasicObjectLock* frame::interpreter_frame_monitor_end() const {
   293   BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
   294   // make sure the pointer points inside the frame
   295   assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
   296   assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
   297   return result;
   298 }
   300 void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
   301   *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
   302 }
   304 // Used by template based interpreter deoptimization
   305 void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
   306     *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
   307 }
   308 #endif // CC_INTERP
   310 frame frame::sender_for_entry_frame(RegisterMap* map) const {
   311   assert(map != NULL, "map must be set");
   312   // Java frame called from C; skip all C frames and return top C
   313   // frame of that chunk as the sender
   314   JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
   315   assert(!entry_frame_is_first(), "next Java fp must be non zero");
   316   assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
   317   map->clear();
   318   assert(map->include_argument_oops(), "should be set by clear");
   319   if (jfa->last_Java_pc() != NULL ) {
   320     frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
   321     return fr;
   322   }
   323   frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
   324   return fr;
   325 }
   327 frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
   328   // sp is the raw sp from the sender after adapter or interpreter extension
   329   intptr_t* sp = (intptr_t*) addr_at(sender_sp_offset);
   331   // This is the sp before any possible extension (adapter/locals).
   332   intptr_t* unextended_sp = interpreter_frame_sender_sp();
   334   // The interpreter and compiler(s) always save EBP/RBP in a known
   335   // location on entry. We must record where that location is
   336   // so this if EBP/RBP was live on callout from c2 we can find
   337   // the saved copy no matter what it called.
   339   // Since the interpreter always saves EBP/RBP if we record where it is then
   340   // we don't have to always save EBP/RBP on entry and exit to c2 compiled
   341   // code, on entry will be enough.
   342 #ifdef COMPILER2
   343   if (map->update_map()) {
   344     map->set_location(rbp->as_VMReg(), (address) addr_at(link_offset));
   345 #ifdef AMD64
   346     // this is weird "H" ought to be at a higher address however the
   347     // oopMaps seems to have the "H" regs at the same address and the
   348     // vanilla register.
   349     // XXXX make this go away
   350     if (true) {
   351       map->set_location(rbp->as_VMReg()->next(), (address)addr_at(link_offset));
   352     }
   353 #endif // AMD64
   354   }
   355 #endif /* COMPILER2 */
   356   return frame(sp, unextended_sp, link(), sender_pc());
   357 }
   360 //------------------------------sender_for_compiled_frame-----------------------
   361 frame frame::sender_for_compiled_frame(RegisterMap* map) const {
   362   assert(map != NULL, "map must be set");
   363   const bool c1_compiled = _cb->is_compiled_by_c1();
   365   // frame owned by optimizing compiler
   366   intptr_t* sender_sp = NULL;
   368   assert(_cb->frame_size() >= 0, "must have non-zero frame size");
   369   sender_sp = unextended_sp() + _cb->frame_size();
   371   // On Intel the return_address is always the word on the stack
   372   address sender_pc = (address) *(sender_sp-1);
   374   // This is the saved value of ebp which may or may not really be an fp.
   375   // it is only an fp if the sender is an interpreter frame (or c1?)
   377   intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset);
   379   if (map->update_map()) {
   380     // Tell GC to use argument oopmaps for some runtime stubs that need it.
   381     // For C1, the runtime stub might not have oop maps, so set this flag
   382     // outside of update_register_map.
   383     map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
   384     if (_cb->oop_maps() != NULL) {
   385       OopMapSet::update_register_map(this, map);
   386     }
   387     // Since the prolog does the save and restore of epb there is no oopmap
   388     // for it so we must fill in its location as if there was an oopmap entry
   389     // since if our caller was compiled code there could be live jvm state in it.
   390     map->set_location(rbp->as_VMReg(), (address) (sender_sp - frame::sender_sp_offset));
   391 #ifdef AMD64
   392     // this is weird "H" ought to be at a higher address however the
   393     // oopMaps seems to have the "H" regs at the same address and the
   394     // vanilla register.
   395     // XXXX make this go away
   396     if (true) {
   397       map->set_location(rbp->as_VMReg()->next(), (address) (sender_sp - frame::sender_sp_offset));
   398     }
   399 #endif // AMD64
   400   }
   402   assert(sender_sp != sp(), "must have changed");
   403   return frame(sender_sp, saved_fp, sender_pc);
   404 }
   406 frame frame::sender(RegisterMap* map) const {
   407   // Default is we done have to follow them. The sender_for_xxx will
   408   // update it accordingly
   409   map->set_include_argument_oops(false);
   411   if (is_entry_frame())       return sender_for_entry_frame(map);
   412   if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
   413   assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
   415   if (_cb != NULL) {
   416     return sender_for_compiled_frame(map);
   417   }
   418   // Must be native-compiled frame, i.e. the marshaling code for native
   419   // methods that exists in the core system.
   420   return frame(sender_sp(), link(), sender_pc());
   421 }
   424 bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
   425   assert(is_interpreted_frame(), "must be interpreter frame");
   426   methodOop method = interpreter_frame_method();
   427   // When unpacking an optimized frame the frame pointer is
   428   // adjusted with:
   429   int diff = (method->max_locals() - method->size_of_parameters()) *
   430              Interpreter::stackElementWords();
   431   return _fp == (fp - diff);
   432 }
   434 void frame::pd_gc_epilog() {
   435   // nothing done here now
   436 }
   438 bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
   439 // QQQ
   440 #ifdef CC_INTERP
   441 #else
   442   assert(is_interpreted_frame(), "Not an interpreted frame");
   443   // These are reasonable sanity checks
   444   if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
   445     return false;
   446   }
   447   if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
   448     return false;
   449   }
   450   if (fp() + interpreter_frame_initial_sp_offset < sp()) {
   451     return false;
   452   }
   453   // These are hacks to keep us out of trouble.
   454   // The problem with these is that they mask other problems
   455   if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
   456     return false;
   457   }
   459   // do some validation of frame elements
   461   // first the method
   463   methodOop m = *interpreter_frame_method_addr();
   465   // validate the method we'd find in this potential sender
   466   if (!Universe::heap()->is_valid_method(m)) return false;
   468   // stack frames shouldn't be much larger than max_stack elements
   470   if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
   471     return false;
   472   }
   474   // validate bci/bcx
   476   intptr_t  bcx    = interpreter_frame_bcx();
   477   if (m->validate_bci_from_bcx(bcx) < 0) {
   478     return false;
   479   }
   481   // validate constantPoolCacheOop
   483   constantPoolCacheOop cp = *interpreter_frame_cache_addr();
   485   if (cp == NULL ||
   486       !Space::is_aligned(cp) ||
   487       !Universe::heap()->is_permanent((void*)cp)) return false;
   489   // validate locals
   491   address locals =  (address) *interpreter_frame_locals_addr();
   493   if (locals > thread->stack_base() || locals < (address) fp()) return false;
   495   // We'd have to be pretty unlucky to be mislead at this point
   497 #endif // CC_INTERP
   498   return true;
   499 }
   501 BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
   502 #ifdef CC_INTERP
   503   // Needed for JVMTI. The result should always be in the interpreterState object
   504   assert(false, "NYI");
   505   interpreterState istate = get_interpreterState();
   506 #endif // CC_INTERP
   507   assert(is_interpreted_frame(), "interpreted frame expected");
   508   methodOop method = interpreter_frame_method();
   509   BasicType type = method->result_type();
   511   intptr_t* tos_addr;
   512   if (method->is_native()) {
   513     // Prior to calling into the runtime to report the method_exit the possible
   514     // return value is pushed to the native stack. If the result is a jfloat/jdouble
   515     // then ST0 is saved before EAX/EDX. See the note in generate_native_result
   516     tos_addr = (intptr_t*)sp();
   517     if (type == T_FLOAT || type == T_DOUBLE) {
   518     // QQQ seems like this code is equivalent on the two platforms
   519 #ifdef AMD64
   520       // This is times two because we do a push(ltos) after pushing XMM0
   521       // and that takes two interpreter stack slots.
   522       tos_addr += 2 * Interpreter::stackElementWords();
   523 #else
   524       tos_addr += 2;
   525 #endif // AMD64
   526     }
   527   } else {
   528     tos_addr = (intptr_t*)interpreter_frame_tos_address();
   529   }
   531   switch (type) {
   532     case T_OBJECT  :
   533     case T_ARRAY   : {
   534       oop obj;
   535       if (method->is_native()) {
   536 #ifdef CC_INTERP
   537         obj = istate->_oop_temp;
   538 #else
   539         obj = (oop) at(interpreter_frame_oop_temp_offset);
   540 #endif // CC_INTERP
   541       } else {
   542         oop* obj_p = (oop*)tos_addr;
   543         obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
   544       }
   545       assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
   546       *oop_result = obj;
   547       break;
   548     }
   549     case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
   550     case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
   551     case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
   552     case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
   553     case T_INT     : value_result->i = *(jint*)tos_addr; break;
   554     case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
   555     case T_FLOAT   : {
   556 #ifdef AMD64
   557         value_result->f = *(jfloat*)tos_addr;
   558 #else
   559       if (method->is_native()) {
   560         jdouble d = *(jdouble*)tos_addr;  // Result was in ST0 so need to convert to jfloat
   561         value_result->f = (jfloat)d;
   562       } else {
   563         value_result->f = *(jfloat*)tos_addr;
   564       }
   565 #endif // AMD64
   566       break;
   567     }
   568     case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
   569     case T_VOID    : /* Nothing to do */ break;
   570     default        : ShouldNotReachHere();
   571   }
   573   return type;
   574 }
   577 intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
   578   int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
   579   return &interpreter_frame_tos_address()[index];
   580 }

mercurial