src/cpu/ppc/vm/sharedRuntime_ppc.cpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,3255 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright 2012, 2014 SAP AG. All rights reserved.
     1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8 + *
     1.9 + * This code is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License version 2 only, as
    1.11 + * published by the Free Software Foundation.
    1.12 + *
    1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.16 + * version 2 for more details (a copy is included in the LICENSE file that
    1.17 + * accompanied this code).
    1.18 + *
    1.19 + * You should have received a copy of the GNU General Public License version
    1.20 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.22 + *
    1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.24 + * or visit www.oracle.com if you need additional information or have any
    1.25 + * questions.
    1.26 + *
    1.27 + */
    1.28 +
    1.29 +#include "precompiled.hpp"
    1.30 +#include "asm/macroAssembler.inline.hpp"
    1.31 +#include "code/debugInfoRec.hpp"
    1.32 +#include "code/icBuffer.hpp"
    1.33 +#include "code/vtableStubs.hpp"
    1.34 +#include "interpreter/interpreter.hpp"
    1.35 +#include "oops/compiledICHolder.hpp"
    1.36 +#include "prims/jvmtiRedefineClassesTrace.hpp"
    1.37 +#include "runtime/sharedRuntime.hpp"
    1.38 +#include "runtime/vframeArray.hpp"
    1.39 +#include "vmreg_ppc.inline.hpp"
    1.40 +#include "adfiles/ad_ppc_64.hpp"
    1.41 +#ifdef COMPILER1
    1.42 +#include "c1/c1_Runtime1.hpp"
    1.43 +#endif
    1.44 +#ifdef COMPILER2
    1.45 +#include "opto/runtime.hpp"
    1.46 +#endif
    1.47 +
    1.48 +#define __ masm->
    1.49 +
    1.50 +#ifdef PRODUCT
    1.51 +#define BLOCK_COMMENT(str) // nothing
    1.52 +#else
    1.53 +#define BLOCK_COMMENT(str) __ block_comment(str)
    1.54 +#endif
    1.55 +
    1.56 +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
    1.57 +
    1.58 +
    1.59 +class RegisterSaver {
    1.60 + // Used for saving volatile registers.
    1.61 + public:
    1.62 +
    1.63 +  // Support different return pc locations.
    1.64 +  enum ReturnPCLocation {
    1.65 +    return_pc_is_lr,
    1.66 +    return_pc_is_r4,
    1.67 +    return_pc_is_thread_saved_exception_pc
    1.68 +  };
    1.69 +
    1.70 +  static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
    1.71 +                         int* out_frame_size_in_bytes,
    1.72 +                         bool generate_oop_map,
    1.73 +                         int return_pc_adjustment,
    1.74 +                         ReturnPCLocation return_pc_location);
    1.75 +  static void    restore_live_registers_and_pop_frame(MacroAssembler* masm,
    1.76 +                         int frame_size_in_bytes,
    1.77 +                         bool restore_ctr);
    1.78 +
    1.79 +  static void push_frame_and_save_argument_registers(MacroAssembler* masm,
    1.80 +                         Register r_temp,
    1.81 +                         int frame_size,
    1.82 +                         int total_args,
    1.83 +                         const VMRegPair *regs, const VMRegPair *regs2 = NULL);
    1.84 +  static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
    1.85 +                         int frame_size,
    1.86 +                         int total_args,
    1.87 +                         const VMRegPair *regs, const VMRegPair *regs2 = NULL);
    1.88 +
    1.89 +  // During deoptimization only the result registers need to be restored
    1.90 +  // all the other values have already been extracted.
    1.91 +  static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes);
    1.92 +
    1.93 +  // Constants and data structures:
    1.94 +
    1.95 +  typedef enum {
    1.96 +    int_reg           = 0,
    1.97 +    float_reg         = 1,
    1.98 +    special_reg       = 2
    1.99 +  } RegisterType;
   1.100 +
   1.101 +  typedef enum {
   1.102 +    reg_size          = 8,
   1.103 +    half_reg_size     = reg_size / 2,
   1.104 +  } RegisterConstants;
   1.105 +
   1.106 +  typedef struct {
   1.107 +    RegisterType        reg_type;
   1.108 +    int                 reg_num;
   1.109 +    VMReg               vmreg;
   1.110 +  } LiveRegType;
   1.111 +};
   1.112 +
   1.113 +
   1.114 +#define RegisterSaver_LiveSpecialReg(regname) \
   1.115 +  { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
   1.116 +
   1.117 +#define RegisterSaver_LiveIntReg(regname) \
   1.118 +  { RegisterSaver::int_reg,     regname->encoding(), regname->as_VMReg() }
   1.119 +
   1.120 +#define RegisterSaver_LiveFloatReg(regname) \
   1.121 +  { RegisterSaver::float_reg,   regname->encoding(), regname->as_VMReg() }
   1.122 +
   1.123 +static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
   1.124 +  // Live registers which get spilled to the stack. Register
   1.125 +  // positions in this array correspond directly to the stack layout.
   1.126 +
   1.127 +  //
   1.128 +  // live special registers:
   1.129 +  //
   1.130 +  RegisterSaver_LiveSpecialReg(SR_CTR),
   1.131 +  //
   1.132 +  // live float registers:
   1.133 +  //
   1.134 +  RegisterSaver_LiveFloatReg( F0  ),
   1.135 +  RegisterSaver_LiveFloatReg( F1  ),
   1.136 +  RegisterSaver_LiveFloatReg( F2  ),
   1.137 +  RegisterSaver_LiveFloatReg( F3  ),
   1.138 +  RegisterSaver_LiveFloatReg( F4  ),
   1.139 +  RegisterSaver_LiveFloatReg( F5  ),
   1.140 +  RegisterSaver_LiveFloatReg( F6  ),
   1.141 +  RegisterSaver_LiveFloatReg( F7  ),
   1.142 +  RegisterSaver_LiveFloatReg( F8  ),
   1.143 +  RegisterSaver_LiveFloatReg( F9  ),
   1.144 +  RegisterSaver_LiveFloatReg( F10 ),
   1.145 +  RegisterSaver_LiveFloatReg( F11 ),
   1.146 +  RegisterSaver_LiveFloatReg( F12 ),
   1.147 +  RegisterSaver_LiveFloatReg( F13 ),
   1.148 +  RegisterSaver_LiveFloatReg( F14 ),
   1.149 +  RegisterSaver_LiveFloatReg( F15 ),
   1.150 +  RegisterSaver_LiveFloatReg( F16 ),
   1.151 +  RegisterSaver_LiveFloatReg( F17 ),
   1.152 +  RegisterSaver_LiveFloatReg( F18 ),
   1.153 +  RegisterSaver_LiveFloatReg( F19 ),
   1.154 +  RegisterSaver_LiveFloatReg( F20 ),
   1.155 +  RegisterSaver_LiveFloatReg( F21 ),
   1.156 +  RegisterSaver_LiveFloatReg( F22 ),
   1.157 +  RegisterSaver_LiveFloatReg( F23 ),
   1.158 +  RegisterSaver_LiveFloatReg( F24 ),
   1.159 +  RegisterSaver_LiveFloatReg( F25 ),
   1.160 +  RegisterSaver_LiveFloatReg( F26 ),
   1.161 +  RegisterSaver_LiveFloatReg( F27 ),
   1.162 +  RegisterSaver_LiveFloatReg( F28 ),
   1.163 +  RegisterSaver_LiveFloatReg( F29 ),
   1.164 +  RegisterSaver_LiveFloatReg( F30 ),
   1.165 +  RegisterSaver_LiveFloatReg( F31 ),
   1.166 +  //
   1.167 +  // live integer registers:
   1.168 +  //
   1.169 +  RegisterSaver_LiveIntReg(   R0  ),
   1.170 +  //RegisterSaver_LiveIntReg( R1  ), // stack pointer
   1.171 +  RegisterSaver_LiveIntReg(   R2  ),
   1.172 +  RegisterSaver_LiveIntReg(   R3  ),
   1.173 +  RegisterSaver_LiveIntReg(   R4  ),
   1.174 +  RegisterSaver_LiveIntReg(   R5  ),
   1.175 +  RegisterSaver_LiveIntReg(   R6  ),
   1.176 +  RegisterSaver_LiveIntReg(   R7  ),
   1.177 +  RegisterSaver_LiveIntReg(   R8  ),
   1.178 +  RegisterSaver_LiveIntReg(   R9  ),
   1.179 +  RegisterSaver_LiveIntReg(   R10 ),
   1.180 +  RegisterSaver_LiveIntReg(   R11 ),
   1.181 +  RegisterSaver_LiveIntReg(   R12 ),
   1.182 +  //RegisterSaver_LiveIntReg( R13 ), // system thread id
   1.183 +  RegisterSaver_LiveIntReg(   R14 ),
   1.184 +  RegisterSaver_LiveIntReg(   R15 ),
   1.185 +  RegisterSaver_LiveIntReg(   R16 ),
   1.186 +  RegisterSaver_LiveIntReg(   R17 ),
   1.187 +  RegisterSaver_LiveIntReg(   R18 ),
   1.188 +  RegisterSaver_LiveIntReg(   R19 ),
   1.189 +  RegisterSaver_LiveIntReg(   R20 ),
   1.190 +  RegisterSaver_LiveIntReg(   R21 ),
   1.191 +  RegisterSaver_LiveIntReg(   R22 ),
   1.192 +  RegisterSaver_LiveIntReg(   R23 ),
   1.193 +  RegisterSaver_LiveIntReg(   R24 ),
   1.194 +  RegisterSaver_LiveIntReg(   R25 ),
   1.195 +  RegisterSaver_LiveIntReg(   R26 ),
   1.196 +  RegisterSaver_LiveIntReg(   R27 ),
   1.197 +  RegisterSaver_LiveIntReg(   R28 ),
   1.198 +  RegisterSaver_LiveIntReg(   R29 ),
   1.199 +  RegisterSaver_LiveIntReg(   R31 ),
   1.200 +  RegisterSaver_LiveIntReg(   R30 ), // r30 must be the last register
   1.201 +};
   1.202 +
   1.203 +OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
   1.204 +                         int* out_frame_size_in_bytes,
   1.205 +                         bool generate_oop_map,
   1.206 +                         int return_pc_adjustment,
   1.207 +                         ReturnPCLocation return_pc_location) {
   1.208 +  // Push an abi_reg_args-frame and store all registers which may be live.
   1.209 +  // If requested, create an OopMap: Record volatile registers as
   1.210 +  // callee-save values in an OopMap so their save locations will be
   1.211 +  // propagated to the RegisterMap of the caller frame during
   1.212 +  // StackFrameStream construction (needed for deoptimization; see
   1.213 +  // compiledVFrame::create_stack_value).
   1.214 +  // If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment.
   1.215 +
   1.216 +  int i;
   1.217 +  int offset;
   1.218 +
   1.219 +  // calcualte frame size
   1.220 +  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
   1.221 +                                   sizeof(RegisterSaver::LiveRegType);
   1.222 +  const int register_save_size   = regstosave_num * reg_size;
   1.223 +  const int frame_size_in_bytes  = round_to(register_save_size, frame::alignment_in_bytes)
   1.224 +                                   + frame::abi_reg_args_size;
   1.225 +  *out_frame_size_in_bytes       = frame_size_in_bytes;
   1.226 +  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
   1.227 +  const int register_save_offset = frame_size_in_bytes - register_save_size;
   1.228 +
   1.229 +  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
   1.230 +  OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : NULL;
   1.231 +
   1.232 +  BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
   1.233 +
   1.234 +  // Save r30 in the last slot of the not yet pushed frame so that we
   1.235 +  // can use it as scratch reg.
   1.236 +  __ std(R30, -reg_size, R1_SP);
   1.237 +  assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
   1.238 +         "consistency check");
   1.239 +
   1.240 +  // save the flags
   1.241 +  // Do the save_LR_CR by hand and adjust the return pc if requested.
   1.242 +  __ mfcr(R30);
   1.243 +  __ std(R30, _abi(cr), R1_SP);
   1.244 +  switch (return_pc_location) {
   1.245 +    case return_pc_is_lr:    __ mflr(R30);           break;
   1.246 +    case return_pc_is_r4:    __ mr(R30, R4);     break;
   1.247 +    case return_pc_is_thread_saved_exception_pc:
   1.248 +                                 __ ld(R30, thread_(saved_exception_pc)); break;
   1.249 +    default: ShouldNotReachHere();
   1.250 +  }
   1.251 +  if (return_pc_adjustment != 0)
   1.252 +    __ addi(R30, R30, return_pc_adjustment);
   1.253 +  __ std(R30, _abi(lr), R1_SP);
   1.254 +
   1.255 +  // push a new frame
   1.256 +  __ push_frame(frame_size_in_bytes, R30);
   1.257 +
   1.258 +  // save all registers (ints and floats)
   1.259 +  offset = register_save_offset;
   1.260 +  for (int i = 0; i < regstosave_num; i++) {
   1.261 +    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
   1.262 +    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
   1.263 +
   1.264 +    switch (reg_type) {
   1.265 +      case RegisterSaver::int_reg: {
   1.266 +        if (reg_num != 30) { // We spilled R30 right at the beginning.
   1.267 +          __ std(as_Register(reg_num), offset, R1_SP);
   1.268 +        }
   1.269 +        break;
   1.270 +      }
   1.271 +      case RegisterSaver::float_reg: {
   1.272 +        __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
   1.273 +        break;
   1.274 +      }
   1.275 +      case RegisterSaver::special_reg: {
   1.276 +        if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
   1.277 +          __ mfctr(R30);
   1.278 +          __ std(R30, offset, R1_SP);
   1.279 +        } else {
   1.280 +          Unimplemented();
   1.281 +        }
   1.282 +        break;
   1.283 +      }
   1.284 +      default:
   1.285 +        ShouldNotReachHere();
   1.286 +    }
   1.287 +
   1.288 +    if (generate_oop_map) {
   1.289 +      map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
   1.290 +                            RegisterSaver_LiveRegs[i].vmreg);
   1.291 +      map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2),
   1.292 +                            RegisterSaver_LiveRegs[i].vmreg->next());
   1.293 +    }
   1.294 +    offset += reg_size;
   1.295 +  }
   1.296 +
   1.297 +  BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
   1.298 +
   1.299 +  // And we're done.
   1.300 +  return map;
   1.301 +}
   1.302 +
   1.303 +
   1.304 +// Pop the current frame and restore all the registers that we
   1.305 +// saved.
   1.306 +void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
   1.307 +                                                         int frame_size_in_bytes,
   1.308 +                                                         bool restore_ctr) {
   1.309 +  int i;
   1.310 +  int offset;
   1.311 +  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
   1.312 +                                   sizeof(RegisterSaver::LiveRegType);
   1.313 +  const int register_save_size   = regstosave_num * reg_size;
   1.314 +  const int register_save_offset = frame_size_in_bytes - register_save_size;
   1.315 +
   1.316 +  BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
   1.317 +
   1.318 +  // restore all registers (ints and floats)
   1.319 +  offset = register_save_offset;
   1.320 +  for (int i = 0; i < regstosave_num; i++) {
   1.321 +    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
   1.322 +    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
   1.323 +
   1.324 +    switch (reg_type) {
   1.325 +      case RegisterSaver::int_reg: {
   1.326 +        if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
   1.327 +          __ ld(as_Register(reg_num), offset, R1_SP);
   1.328 +        break;
   1.329 +      }
   1.330 +      case RegisterSaver::float_reg: {
   1.331 +        __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
   1.332 +        break;
   1.333 +      }
   1.334 +      case RegisterSaver::special_reg: {
   1.335 +        if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
   1.336 +          if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
   1.337 +            __ ld(R30, offset, R1_SP);
   1.338 +            __ mtctr(R30);
   1.339 +          }
   1.340 +        } else {
   1.341 +          Unimplemented();
   1.342 +        }
   1.343 +        break;
   1.344 +      }
   1.345 +      default:
   1.346 +        ShouldNotReachHere();
   1.347 +    }
   1.348 +    offset += reg_size;
   1.349 +  }
   1.350 +
   1.351 +  // pop the frame
   1.352 +  __ pop_frame();
   1.353 +
   1.354 +  // restore the flags
   1.355 +  __ restore_LR_CR(R30);
   1.356 +
   1.357 +  // restore scratch register's value
   1.358 +  __ ld(R30, -reg_size, R1_SP);
   1.359 +
   1.360 +  BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
   1.361 +}
   1.362 +
   1.363 +void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
   1.364 +                                                           int frame_size,int total_args, const VMRegPair *regs,
   1.365 +                                                           const VMRegPair *regs2) {
   1.366 +  __ push_frame(frame_size, r_temp);
   1.367 +  int st_off = frame_size - wordSize;
   1.368 +  for (int i = 0; i < total_args; i++) {
   1.369 +    VMReg r_1 = regs[i].first();
   1.370 +    VMReg r_2 = regs[i].second();
   1.371 +    if (!r_1->is_valid()) {
   1.372 +      assert(!r_2->is_valid(), "");
   1.373 +      continue;
   1.374 +    }
   1.375 +    if (r_1->is_Register()) {
   1.376 +      Register r = r_1->as_Register();
   1.377 +      __ std(r, st_off, R1_SP);
   1.378 +      st_off -= wordSize;
   1.379 +    } else if (r_1->is_FloatRegister()) {
   1.380 +      FloatRegister f = r_1->as_FloatRegister();
   1.381 +      __ stfd(f, st_off, R1_SP);
   1.382 +      st_off -= wordSize;
   1.383 +    }
   1.384 +  }
   1.385 +  if (regs2 != NULL) {
   1.386 +    for (int i = 0; i < total_args; i++) {
   1.387 +      VMReg r_1 = regs2[i].first();
   1.388 +      VMReg r_2 = regs2[i].second();
   1.389 +      if (!r_1->is_valid()) {
   1.390 +        assert(!r_2->is_valid(), "");
   1.391 +        continue;
   1.392 +      }
   1.393 +      if (r_1->is_Register()) {
   1.394 +        Register r = r_1->as_Register();
   1.395 +        __ std(r, st_off, R1_SP);
   1.396 +        st_off -= wordSize;
   1.397 +      } else if (r_1->is_FloatRegister()) {
   1.398 +        FloatRegister f = r_1->as_FloatRegister();
   1.399 +        __ stfd(f, st_off, R1_SP);
   1.400 +        st_off -= wordSize;
   1.401 +      }
   1.402 +    }
   1.403 +  }
   1.404 +}
   1.405 +
   1.406 +void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
   1.407 +                                                             int total_args, const VMRegPair *regs,
   1.408 +                                                             const VMRegPair *regs2) {
   1.409 +  int st_off = frame_size - wordSize;
   1.410 +  for (int i = 0; i < total_args; i++) {
   1.411 +    VMReg r_1 = regs[i].first();
   1.412 +    VMReg r_2 = regs[i].second();
   1.413 +    if (r_1->is_Register()) {
   1.414 +      Register r = r_1->as_Register();
   1.415 +      __ ld(r, st_off, R1_SP);
   1.416 +      st_off -= wordSize;
   1.417 +    } else if (r_1->is_FloatRegister()) {
   1.418 +      FloatRegister f = r_1->as_FloatRegister();
   1.419 +      __ lfd(f, st_off, R1_SP);
   1.420 +      st_off -= wordSize;
   1.421 +    }
   1.422 +  }
   1.423 +  if (regs2 != NULL)
   1.424 +    for (int i = 0; i < total_args; i++) {
   1.425 +      VMReg r_1 = regs2[i].first();
   1.426 +      VMReg r_2 = regs2[i].second();
   1.427 +      if (r_1->is_Register()) {
   1.428 +        Register r = r_1->as_Register();
   1.429 +        __ ld(r, st_off, R1_SP);
   1.430 +        st_off -= wordSize;
   1.431 +      } else if (r_1->is_FloatRegister()) {
   1.432 +        FloatRegister f = r_1->as_FloatRegister();
   1.433 +        __ lfd(f, st_off, R1_SP);
   1.434 +        st_off -= wordSize;
   1.435 +      }
   1.436 +    }
   1.437 +  __ pop_frame();
   1.438 +}
   1.439 +
   1.440 +// Restore the registers that might be holding a result.
   1.441 +void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes) {
   1.442 +  int i;
   1.443 +  int offset;
   1.444 +  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
   1.445 +                                   sizeof(RegisterSaver::LiveRegType);
   1.446 +  const int register_save_size   = regstosave_num * reg_size;
   1.447 +  const int register_save_offset = frame_size_in_bytes - register_save_size;
   1.448 +
   1.449 +  // restore all result registers (ints and floats)
   1.450 +  offset = register_save_offset;
   1.451 +  for (int i = 0; i < regstosave_num; i++) {
   1.452 +    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
   1.453 +    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
   1.454 +    switch (reg_type) {
   1.455 +      case RegisterSaver::int_reg: {
   1.456 +        if (as_Register(reg_num)==R3_RET) // int result_reg
   1.457 +          __ ld(as_Register(reg_num), offset, R1_SP);
   1.458 +        break;
   1.459 +      }
   1.460 +      case RegisterSaver::float_reg: {
   1.461 +        if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
   1.462 +          __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
   1.463 +        break;
   1.464 +      }
   1.465 +      case RegisterSaver::special_reg: {
   1.466 +        // Special registers don't hold a result.
   1.467 +        break;
   1.468 +      }
   1.469 +      default:
   1.470 +        ShouldNotReachHere();
   1.471 +    }
   1.472 +    offset += reg_size;
   1.473 +  }
   1.474 +}
   1.475 +
   1.476 +// Is vector's size (in bytes) bigger than a size saved by default?
   1.477 +bool SharedRuntime::is_wide_vector(int size) {
   1.478 +  ResourceMark rm;
   1.479 +  // Note, MaxVectorSize == 8 on PPC64.
   1.480 +  assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size));
   1.481 +  return size > 8;
   1.482 +}
   1.483 +#ifdef COMPILER2
   1.484 +static int reg2slot(VMReg r) {
   1.485 +  return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
   1.486 +}
   1.487 +
   1.488 +static int reg2offset(VMReg r) {
   1.489 +  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
   1.490 +}
   1.491 +#endif
   1.492 +
   1.493 +// ---------------------------------------------------------------------------
   1.494 +// Read the array of BasicTypes from a signature, and compute where the
   1.495 +// arguments should go. Values in the VMRegPair regs array refer to 4-byte
   1.496 +// quantities. Values less than VMRegImpl::stack0 are registers, those above
   1.497 +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
   1.498 +// as framesizes are fixed.
   1.499 +// VMRegImpl::stack0 refers to the first slot 0(sp).
   1.500 +// and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
   1.501 +// up to RegisterImpl::number_of_registers) are the 64-bit
   1.502 +// integer registers.
   1.503 +
   1.504 +// Note: the INPUTS in sig_bt are in units of Java argument words, which are
   1.505 +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
   1.506 +// units regardless of build. Of course for i486 there is no 64 bit build
   1.507 +
   1.508 +// The Java calling convention is a "shifted" version of the C ABI.
   1.509 +// By skipping the first C ABI register we can call non-static jni methods
   1.510 +// with small numbers of arguments without having to shuffle the arguments
   1.511 +// at all. Since we control the java ABI we ought to at least get some
   1.512 +// advantage out of it.
   1.513 +
   1.514 +const VMReg java_iarg_reg[8] = {
   1.515 +  R3->as_VMReg(),
   1.516 +  R4->as_VMReg(),
   1.517 +  R5->as_VMReg(),
   1.518 +  R6->as_VMReg(),
   1.519 +  R7->as_VMReg(),
   1.520 +  R8->as_VMReg(),
   1.521 +  R9->as_VMReg(),
   1.522 +  R10->as_VMReg()
   1.523 +};
   1.524 +
   1.525 +const VMReg java_farg_reg[13] = {
   1.526 +  F1->as_VMReg(),
   1.527 +  F2->as_VMReg(),
   1.528 +  F3->as_VMReg(),
   1.529 +  F4->as_VMReg(),
   1.530 +  F5->as_VMReg(),
   1.531 +  F6->as_VMReg(),
   1.532 +  F7->as_VMReg(),
   1.533 +  F8->as_VMReg(),
   1.534 +  F9->as_VMReg(),
   1.535 +  F10->as_VMReg(),
   1.536 +  F11->as_VMReg(),
   1.537 +  F12->as_VMReg(),
   1.538 +  F13->as_VMReg()
   1.539 +};
   1.540 +
   1.541 +const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
   1.542 +const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
   1.543 +
   1.544 +int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
   1.545 +                                           VMRegPair *regs,
   1.546 +                                           int total_args_passed,
   1.547 +                                           int is_outgoing) {
   1.548 +  // C2c calling conventions for compiled-compiled calls.
   1.549 +  // Put 8 ints/longs into registers _AND_ 13 float/doubles into
   1.550 +  // registers _AND_ put the rest on the stack.
   1.551 +
   1.552 +  const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats
   1.553 +  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
   1.554 +
   1.555 +  int i;
   1.556 +  VMReg reg;
   1.557 +  int stk = 0;
   1.558 +  int ireg = 0;
   1.559 +  int freg = 0;
   1.560 +
   1.561 +  // We put the first 8 arguments into registers and the rest on the
   1.562 +  // stack, float arguments are already in their argument registers
   1.563 +  // due to c2c calling conventions (see calling_convention).
   1.564 +  for (int i = 0; i < total_args_passed; ++i) {
   1.565 +    switch(sig_bt[i]) {
   1.566 +    case T_BOOLEAN:
   1.567 +    case T_CHAR:
   1.568 +    case T_BYTE:
   1.569 +    case T_SHORT:
   1.570 +    case T_INT:
   1.571 +      if (ireg < num_java_iarg_registers) {
   1.572 +        // Put int/ptr in register
   1.573 +        reg = java_iarg_reg[ireg];
   1.574 +        ++ireg;
   1.575 +      } else {
   1.576 +        // Put int/ptr on stack.
   1.577 +        reg = VMRegImpl::stack2reg(stk);
   1.578 +        stk += inc_stk_for_intfloat;
   1.579 +      }
   1.580 +      regs[i].set1(reg);
   1.581 +      break;
   1.582 +    case T_LONG:
   1.583 +      assert(sig_bt[i+1] == T_VOID, "expecting half");
   1.584 +      if (ireg < num_java_iarg_registers) {
   1.585 +        // Put long in register.
   1.586 +        reg = java_iarg_reg[ireg];
   1.587 +        ++ireg;
   1.588 +      } else {
   1.589 +        // Put long on stack. They must be aligned to 2 slots.
   1.590 +        if (stk & 0x1) ++stk;
   1.591 +        reg = VMRegImpl::stack2reg(stk);
   1.592 +        stk += inc_stk_for_longdouble;
   1.593 +      }
   1.594 +      regs[i].set2(reg);
   1.595 +      break;
   1.596 +    case T_OBJECT:
   1.597 +    case T_ARRAY:
   1.598 +    case T_ADDRESS:
   1.599 +      if (ireg < num_java_iarg_registers) {
   1.600 +        // Put ptr in register.
   1.601 +        reg = java_iarg_reg[ireg];
   1.602 +        ++ireg;
   1.603 +      } else {
   1.604 +        // Put ptr on stack. Objects must be aligned to 2 slots too,
   1.605 +        // because "64-bit pointers record oop-ishness on 2 aligned
   1.606 +        // adjacent registers." (see OopFlow::build_oop_map).
   1.607 +        if (stk & 0x1) ++stk;
   1.608 +        reg = VMRegImpl::stack2reg(stk);
   1.609 +        stk += inc_stk_for_longdouble;
   1.610 +      }
   1.611 +      regs[i].set2(reg);
   1.612 +      break;
   1.613 +    case T_FLOAT:
   1.614 +      if (freg < num_java_farg_registers) {
   1.615 +        // Put float in register.
   1.616 +        reg = java_farg_reg[freg];
   1.617 +        ++freg;
   1.618 +      } else {
   1.619 +        // Put float on stack.
   1.620 +        reg = VMRegImpl::stack2reg(stk);
   1.621 +        stk += inc_stk_for_intfloat;
   1.622 +      }
   1.623 +      regs[i].set1(reg);
   1.624 +      break;
   1.625 +    case T_DOUBLE:
   1.626 +      assert(sig_bt[i+1] == T_VOID, "expecting half");
   1.627 +      if (freg < num_java_farg_registers) {
   1.628 +        // Put double in register.
   1.629 +        reg = java_farg_reg[freg];
   1.630 +        ++freg;
   1.631 +      } else {
   1.632 +        // Put double on stack. They must be aligned to 2 slots.
   1.633 +        if (stk & 0x1) ++stk;
   1.634 +        reg = VMRegImpl::stack2reg(stk);
   1.635 +        stk += inc_stk_for_longdouble;
   1.636 +      }
   1.637 +      regs[i].set2(reg);
   1.638 +      break;
   1.639 +    case T_VOID:
   1.640 +      // Do not count halves.
   1.641 +      regs[i].set_bad();
   1.642 +      break;
   1.643 +    default:
   1.644 +      ShouldNotReachHere();
   1.645 +    }
   1.646 +  }
   1.647 +  return round_to(stk, 2);
   1.648 +}
   1.649 +
   1.650 +#ifdef COMPILER2
   1.651 +// Calling convention for calling C code.
   1.652 +int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
   1.653 +                                        VMRegPair *regs,
   1.654 +                                        VMRegPair *regs2,
   1.655 +                                        int total_args_passed) {
   1.656 +  // Calling conventions for C runtime calls and calls to JNI native methods.
   1.657 +  //
   1.658 +  // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
   1.659 +  // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
   1.660 +  // the first 13 flt/dbl's in the first 13 fp regs but additionally
   1.661 +  // copy flt/dbl to the stack if they are beyond the 8th argument.
   1.662 +
   1.663 +  const VMReg iarg_reg[8] = {
   1.664 +    R3->as_VMReg(),
   1.665 +    R4->as_VMReg(),
   1.666 +    R5->as_VMReg(),
   1.667 +    R6->as_VMReg(),
   1.668 +    R7->as_VMReg(),
   1.669 +    R8->as_VMReg(),
   1.670 +    R9->as_VMReg(),
   1.671 +    R10->as_VMReg()
   1.672 +  };
   1.673 +
   1.674 +  const VMReg farg_reg[13] = {
   1.675 +    F1->as_VMReg(),
   1.676 +    F2->as_VMReg(),
   1.677 +    F3->as_VMReg(),
   1.678 +    F4->as_VMReg(),
   1.679 +    F5->as_VMReg(),
   1.680 +    F6->as_VMReg(),
   1.681 +    F7->as_VMReg(),
   1.682 +    F8->as_VMReg(),
   1.683 +    F9->as_VMReg(),
   1.684 +    F10->as_VMReg(),
   1.685 +    F11->as_VMReg(),
   1.686 +    F12->as_VMReg(),
   1.687 +    F13->as_VMReg()
   1.688 +  };
   1.689 +
   1.690 +  // Check calling conventions consistency.
   1.691 +  assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
   1.692 +         sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
   1.693 +         "consistency");
   1.694 +
   1.695 +  // `Stk' counts stack slots. Due to alignment, 32 bit values occupy
   1.696 +  // 2 such slots, like 64 bit values do.
   1.697 +  const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats
   1.698 +  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
   1.699 +
   1.700 +  int i;
   1.701 +  VMReg reg;
   1.702 +  // Leave room for C-compatible ABI_REG_ARGS.
   1.703 +  int stk = (frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size;
   1.704 +  int arg = 0;
   1.705 +  int freg = 0;
   1.706 +
   1.707 +  // Avoid passing C arguments in the wrong stack slots.
   1.708 +#if defined(ABI_ELFv2)
   1.709 +  assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 96,
   1.710 +         "passing C arguments in wrong stack slots");
   1.711 +#else
   1.712 +  assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 112,
   1.713 +         "passing C arguments in wrong stack slots");
   1.714 +#endif
   1.715 +  // We fill-out regs AND regs2 if an argument must be passed in a
   1.716 +  // register AND in a stack slot. If regs2 is NULL in such a
   1.717 +  // situation, we bail-out with a fatal error.
   1.718 +  for (int i = 0; i < total_args_passed; ++i, ++arg) {
   1.719 +    // Initialize regs2 to BAD.
   1.720 +    if (regs2 != NULL) regs2[i].set_bad();
   1.721 +
   1.722 +    switch(sig_bt[i]) {
   1.723 +
   1.724 +    //
   1.725 +    // If arguments 0-7 are integers, they are passed in integer registers.
   1.726 +    // Argument i is placed in iarg_reg[i].
   1.727 +    //
   1.728 +    case T_BOOLEAN:
   1.729 +    case T_CHAR:
   1.730 +    case T_BYTE:
   1.731 +    case T_SHORT:
   1.732 +    case T_INT:
   1.733 +      // We must cast ints to longs and use full 64 bit stack slots
   1.734 +      // here. We do the cast in GraphKit::gen_stub() and just guard
   1.735 +      // here against loosing that change.
   1.736 +      assert(CCallingConventionRequiresIntsAsLongs,
   1.737 +             "argument of type int should be promoted to type long");
   1.738 +      guarantee(i > 0 && sig_bt[i-1] == T_LONG,
   1.739 +                "argument of type (bt) should have been promoted to type (T_LONG,bt) for bt in "
   1.740 +                "{T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
   1.741 +      // Do not count halves.
   1.742 +      regs[i].set_bad();
   1.743 +      --arg;
   1.744 +      break;
   1.745 +    case T_LONG:
   1.746 +      guarantee(sig_bt[i+1] == T_VOID    ||
   1.747 +                sig_bt[i+1] == T_BOOLEAN || sig_bt[i+1] == T_CHAR  ||
   1.748 +                sig_bt[i+1] == T_BYTE    || sig_bt[i+1] == T_SHORT ||
   1.749 +                sig_bt[i+1] == T_INT,
   1.750 +                "expecting type (T_LONG,half) or type (T_LONG,bt) with bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
   1.751 +    case T_OBJECT:
   1.752 +    case T_ARRAY:
   1.753 +    case T_ADDRESS:
   1.754 +    case T_METADATA:
   1.755 +      // Oops are already boxed if required (JNI).
   1.756 +      if (arg < Argument::n_int_register_parameters_c) {
   1.757 +        reg = iarg_reg[arg];
   1.758 +      } else {
   1.759 +        reg = VMRegImpl::stack2reg(stk);
   1.760 +        stk += inc_stk_for_longdouble;
   1.761 +      }
   1.762 +      regs[i].set2(reg);
   1.763 +      break;
   1.764 +
   1.765 +    //
   1.766 +    // Floats are treated differently from int regs:  The first 13 float arguments
   1.767 +    // are passed in registers (not the float args among the first 13 args).
   1.768 +    // Thus argument i is NOT passed in farg_reg[i] if it is float.  It is passed
   1.769 +    // in farg_reg[j] if argument i is the j-th float argument of this call.
   1.770 +    //
   1.771 +    case T_FLOAT:
   1.772 +      if (freg < Argument::n_float_register_parameters_c) {
   1.773 +        // Put float in register ...
   1.774 +        reg = farg_reg[freg];
   1.775 +        ++freg;
   1.776 +
   1.777 +        // Argument i for i > 8 is placed on the stack even if it's
   1.778 +        // placed in a register (if it's a float arg). Aix disassembly
   1.779 +        // shows that xlC places these float args on the stack AND in
   1.780 +        // a register. This is not documented, but we follow this
   1.781 +        // convention, too.
   1.782 +        if (arg >= Argument::n_regs_not_on_stack_c) {
   1.783 +          // ... and on the stack.
   1.784 +          guarantee(regs2 != NULL, "must pass float in register and stack slot");
   1.785 +          VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
   1.786 +          regs2[i].set1(reg2);
   1.787 +          stk += inc_stk_for_intfloat;
   1.788 +        }
   1.789 +
   1.790 +      } else {
   1.791 +        // Put float on stack.
   1.792 +        reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
   1.793 +        stk += inc_stk_for_intfloat;
   1.794 +      }
   1.795 +      regs[i].set1(reg);
   1.796 +      break;
   1.797 +    case T_DOUBLE:
   1.798 +      assert(sig_bt[i+1] == T_VOID, "expecting half");
   1.799 +      if (freg < Argument::n_float_register_parameters_c) {
   1.800 +        // Put double in register ...
   1.801 +        reg = farg_reg[freg];
   1.802 +        ++freg;
   1.803 +
   1.804 +        // Argument i for i > 8 is placed on the stack even if it's
   1.805 +        // placed in a register (if it's a double arg). Aix disassembly
   1.806 +        // shows that xlC places these float args on the stack AND in
   1.807 +        // a register. This is not documented, but we follow this
   1.808 +        // convention, too.
   1.809 +        if (arg >= Argument::n_regs_not_on_stack_c) {
   1.810 +          // ... and on the stack.
   1.811 +          guarantee(regs2 != NULL, "must pass float in register and stack slot");
   1.812 +          VMReg reg2 = VMRegImpl::stack2reg(stk);
   1.813 +          regs2[i].set2(reg2);
   1.814 +          stk += inc_stk_for_longdouble;
   1.815 +        }
   1.816 +      } else {
   1.817 +        // Put double on stack.
   1.818 +        reg = VMRegImpl::stack2reg(stk);
   1.819 +        stk += inc_stk_for_longdouble;
   1.820 +      }
   1.821 +      regs[i].set2(reg);
   1.822 +      break;
   1.823 +
   1.824 +    case T_VOID:
   1.825 +      // Do not count halves.
   1.826 +      regs[i].set_bad();
   1.827 +      --arg;
   1.828 +      break;
   1.829 +    default:
   1.830 +      ShouldNotReachHere();
   1.831 +    }
   1.832 +  }
   1.833 +
   1.834 +  return round_to(stk, 2);
   1.835 +}
   1.836 +#endif // COMPILER2
   1.837 +
   1.838 +static address gen_c2i_adapter(MacroAssembler *masm,
   1.839 +                            int total_args_passed,
   1.840 +                            int comp_args_on_stack,
   1.841 +                            const BasicType *sig_bt,
   1.842 +                            const VMRegPair *regs,
   1.843 +                            Label& call_interpreter,
   1.844 +                            const Register& ientry) {
   1.845 +
   1.846 +  address c2i_entrypoint;
   1.847 +
   1.848 +  const Register sender_SP = R21_sender_SP; // == R21_tmp1
   1.849 +  const Register code      = R22_tmp2;
   1.850 +  //const Register ientry  = R23_tmp3;
   1.851 +  const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
   1.852 +  const int num_value_regs = sizeof(value_regs) / sizeof(Register);
   1.853 +  int value_regs_index = 0;
   1.854 +
   1.855 +  const Register return_pc = R27_tmp7;
   1.856 +  const Register tmp       = R28_tmp8;
   1.857 +
   1.858 +  assert_different_registers(sender_SP, code, ientry, return_pc, tmp);
   1.859 +
   1.860 +  // Adapter needs TOP_IJAVA_FRAME_ABI.
   1.861 +  const int adapter_size = frame::top_ijava_frame_abi_size +
   1.862 +                           round_to(total_args_passed * wordSize, frame::alignment_in_bytes);
   1.863 +
   1.864 +  // regular (verified) c2i entry point
   1.865 +  c2i_entrypoint = __ pc();
   1.866 +
   1.867 +  // Does compiled code exists? If yes, patch the caller's callsite.
   1.868 +  __ ld(code, method_(code));
   1.869 +  __ cmpdi(CCR0, code, 0);
   1.870 +  __ ld(ientry, method_(interpreter_entry)); // preloaded
   1.871 +  __ beq(CCR0, call_interpreter);
   1.872 +
   1.873 +
   1.874 +  // Patch caller's callsite, method_(code) was not NULL which means that
   1.875 +  // compiled code exists.
   1.876 +  __ mflr(return_pc);
   1.877 +  __ std(return_pc, _abi(lr), R1_SP);
   1.878 +  RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
   1.879 +
   1.880 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
   1.881 +
   1.882 +  RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
   1.883 +  __ ld(return_pc, _abi(lr), R1_SP);
   1.884 +  __ ld(ientry, method_(interpreter_entry)); // preloaded
   1.885 +  __ mtlr(return_pc);
   1.886 +
   1.887 +
   1.888 +  // Call the interpreter.
   1.889 +  __ BIND(call_interpreter);
   1.890 +  __ mtctr(ientry);
   1.891 +
   1.892 +  // Get a copy of the current SP for loading caller's arguments.
   1.893 +  __ mr(sender_SP, R1_SP);
   1.894 +
   1.895 +  // Add space for the adapter.
   1.896 +  __ resize_frame(-adapter_size, R12_scratch2);
   1.897 +
   1.898 +  int st_off = adapter_size - wordSize;
   1.899 +
   1.900 +  // Write the args into the outgoing interpreter space.
   1.901 +  for (int i = 0; i < total_args_passed; i++) {
   1.902 +    VMReg r_1 = regs[i].first();
   1.903 +    VMReg r_2 = regs[i].second();
   1.904 +    if (!r_1->is_valid()) {
   1.905 +      assert(!r_2->is_valid(), "");
   1.906 +      continue;
   1.907 +    }
   1.908 +    if (r_1->is_stack()) {
   1.909 +      Register tmp_reg = value_regs[value_regs_index];
   1.910 +      value_regs_index = (value_regs_index + 1) % num_value_regs;
   1.911 +      // The calling convention produces OptoRegs that ignore the out
   1.912 +      // preserve area (JIT's ABI). We must account for it here.
   1.913 +      int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
   1.914 +      if (!r_2->is_valid()) {
   1.915 +        __ lwz(tmp_reg, ld_off, sender_SP);
   1.916 +      } else {
   1.917 +        __ ld(tmp_reg, ld_off, sender_SP);
   1.918 +      }
   1.919 +      // Pretend stack targets were loaded into tmp_reg.
   1.920 +      r_1 = tmp_reg->as_VMReg();
   1.921 +    }
   1.922 +
   1.923 +    if (r_1->is_Register()) {
   1.924 +      Register r = r_1->as_Register();
   1.925 +      if (!r_2->is_valid()) {
   1.926 +        __ stw(r, st_off, R1_SP);
   1.927 +        st_off-=wordSize;
   1.928 +      } else {
   1.929 +        // Longs are given 2 64-bit slots in the interpreter, but the
   1.930 +        // data is passed in only 1 slot.
   1.931 +        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
   1.932 +          DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
   1.933 +          st_off-=wordSize;
   1.934 +        }
   1.935 +        __ std(r, st_off, R1_SP);
   1.936 +        st_off-=wordSize;
   1.937 +      }
   1.938 +    } else {
   1.939 +      assert(r_1->is_FloatRegister(), "");
   1.940 +      FloatRegister f = r_1->as_FloatRegister();
   1.941 +      if (!r_2->is_valid()) {
   1.942 +        __ stfs(f, st_off, R1_SP);
   1.943 +        st_off-=wordSize;
   1.944 +      } else {
   1.945 +        // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
   1.946 +        // data is passed in only 1 slot.
   1.947 +        // One of these should get known junk...
   1.948 +        DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
   1.949 +        st_off-=wordSize;
   1.950 +        __ stfd(f, st_off, R1_SP);
   1.951 +        st_off-=wordSize;
   1.952 +      }
   1.953 +    }
   1.954 +  }
   1.955 +
   1.956 +  // Jump to the interpreter just as if interpreter was doing it.
   1.957 +
   1.958 +#ifdef CC_INTERP
   1.959 +  const Register tos = R17_tos;
   1.960 +#else
   1.961 +  const Register tos = R15_esp;
   1.962 +  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
   1.963 +#endif
   1.964 +
   1.965 +  // load TOS
   1.966 +  __ addi(tos, R1_SP, st_off);
   1.967 +
   1.968 +  // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
   1.969 +  assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
   1.970 +  __ bctr();
   1.971 +
   1.972 +  return c2i_entrypoint;
   1.973 +}
   1.974 +
   1.975 +static void gen_i2c_adapter(MacroAssembler *masm,
   1.976 +                            int total_args_passed,
   1.977 +                            int comp_args_on_stack,
   1.978 +                            const BasicType *sig_bt,
   1.979 +                            const VMRegPair *regs) {
   1.980 +
   1.981 +  // Load method's entry-point from method.
   1.982 +  __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
   1.983 +  __ mtctr(R12_scratch2);
   1.984 +
   1.985 +  // We will only enter here from an interpreted frame and never from after
   1.986 +  // passing thru a c2i. Azul allowed this but we do not. If we lose the
   1.987 +  // race and use a c2i we will remain interpreted for the race loser(s).
   1.988 +  // This removes all sorts of headaches on the x86 side and also eliminates
   1.989 +  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
   1.990 +
   1.991 +  // Note: r13 contains the senderSP on entry. We must preserve it since
   1.992 +  // we may do a i2c -> c2i transition if we lose a race where compiled
   1.993 +  // code goes non-entrant while we get args ready.
   1.994 +  // In addition we use r13 to locate all the interpreter args as
   1.995 +  // we must align the stack to 16 bytes on an i2c entry else we
   1.996 +  // lose alignment we expect in all compiled code and register
   1.997 +  // save code can segv when fxsave instructions find improperly
   1.998 +  // aligned stack pointer.
   1.999 +
  1.1000 +#ifdef CC_INTERP
  1.1001 +  const Register ld_ptr = R17_tos;
  1.1002 +#else
  1.1003 +  const Register ld_ptr = R15_esp;
  1.1004 +#endif
  1.1005 +
  1.1006 +  const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
  1.1007 +  const int num_value_regs = sizeof(value_regs) / sizeof(Register);
  1.1008 +  int value_regs_index = 0;
  1.1009 +
  1.1010 +  int ld_offset = total_args_passed*wordSize;
  1.1011 +
  1.1012 +  // Cut-out for having no stack args. Since up to 2 int/oop args are passed
  1.1013 +  // in registers, we will occasionally have no stack args.
  1.1014 +  int comp_words_on_stack = 0;
  1.1015 +  if (comp_args_on_stack) {
  1.1016 +    // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
  1.1017 +    // registers are below. By subtracting stack0, we either get a negative
  1.1018 +    // number (all values in registers) or the maximum stack slot accessed.
  1.1019 +
  1.1020 +    // Convert 4-byte c2 stack slots to words.
  1.1021 +    comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
  1.1022 +    // Round up to miminum stack alignment, in wordSize.
  1.1023 +    comp_words_on_stack = round_to(comp_words_on_stack, 2);
  1.1024 +    __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
  1.1025 +  }
  1.1026 +
  1.1027 +  // Now generate the shuffle code.  Pick up all register args and move the
  1.1028 +  // rest through register value=Z_R12.
  1.1029 +  BLOCK_COMMENT("Shuffle arguments");
  1.1030 +  for (int i = 0; i < total_args_passed; i++) {
  1.1031 +    if (sig_bt[i] == T_VOID) {
  1.1032 +      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
  1.1033 +      continue;
  1.1034 +    }
  1.1035 +
  1.1036 +    // Pick up 0, 1 or 2 words from ld_ptr.
  1.1037 +    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
  1.1038 +            "scrambled load targets?");
  1.1039 +    VMReg r_1 = regs[i].first();
  1.1040 +    VMReg r_2 = regs[i].second();
  1.1041 +    if (!r_1->is_valid()) {
  1.1042 +      assert(!r_2->is_valid(), "");
  1.1043 +      continue;
  1.1044 +    }
  1.1045 +    if (r_1->is_FloatRegister()) {
  1.1046 +      if (!r_2->is_valid()) {
  1.1047 +        __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
  1.1048 +        ld_offset-=wordSize;
  1.1049 +      } else {
  1.1050 +        // Skip the unused interpreter slot.
  1.1051 +        __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
  1.1052 +        ld_offset-=2*wordSize;
  1.1053 +      }
  1.1054 +    } else {
  1.1055 +      Register r;
  1.1056 +      if (r_1->is_stack()) {
  1.1057 +        // Must do a memory to memory move thru "value".
  1.1058 +        r = value_regs[value_regs_index];
  1.1059 +        value_regs_index = (value_regs_index + 1) % num_value_regs;
  1.1060 +      } else {
  1.1061 +        r = r_1->as_Register();
  1.1062 +      }
  1.1063 +      if (!r_2->is_valid()) {
  1.1064 +        // Not sure we need to do this but it shouldn't hurt.
  1.1065 +        if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
  1.1066 +          __ ld(r, ld_offset, ld_ptr);
  1.1067 +          ld_offset-=wordSize;
  1.1068 +        } else {
  1.1069 +          __ lwz(r, ld_offset, ld_ptr);
  1.1070 +          ld_offset-=wordSize;
  1.1071 +        }
  1.1072 +      } else {
  1.1073 +        // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
  1.1074 +        // data is passed in only 1 slot.
  1.1075 +        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
  1.1076 +          ld_offset-=wordSize;
  1.1077 +        }
  1.1078 +        __ ld(r, ld_offset, ld_ptr);
  1.1079 +        ld_offset-=wordSize;
  1.1080 +      }
  1.1081 +
  1.1082 +      if (r_1->is_stack()) {
  1.1083 +        // Now store value where the compiler expects it
  1.1084 +        int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
  1.1085 +
  1.1086 +        if (sig_bt[i] == T_INT   || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN ||
  1.1087 +            sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR  || sig_bt[i] == T_BYTE) {
  1.1088 +          __ stw(r, st_off, R1_SP);
  1.1089 +        } else {
  1.1090 +          __ std(r, st_off, R1_SP);
  1.1091 +        }
  1.1092 +      }
  1.1093 +    }
  1.1094 +  }
  1.1095 +
  1.1096 +  BLOCK_COMMENT("Store method");
  1.1097 +  // Store method into thread->callee_target.
  1.1098 +  // We might end up in handle_wrong_method if the callee is
  1.1099 +  // deoptimized as we race thru here. If that happens we don't want
  1.1100 +  // to take a safepoint because the caller frame will look
  1.1101 +  // interpreted and arguments are now "compiled" so it is much better
  1.1102 +  // to make this transition invisible to the stack walking
  1.1103 +  // code. Unfortunately if we try and find the callee by normal means
  1.1104 +  // a safepoint is possible. So we stash the desired callee in the
  1.1105 +  // thread and the vm will find there should this case occur.
  1.1106 +  __ std(R19_method, thread_(callee_target));
  1.1107 +
  1.1108 +  // Jump to the compiled code just as if compiled code was doing it.
  1.1109 +  __ bctr();
  1.1110 +}
  1.1111 +
  1.1112 +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
  1.1113 +                                                            int total_args_passed,
  1.1114 +                                                            int comp_args_on_stack,
  1.1115 +                                                            const BasicType *sig_bt,
  1.1116 +                                                            const VMRegPair *regs,
  1.1117 +                                                            AdapterFingerPrint* fingerprint) {
  1.1118 +  address i2c_entry;
  1.1119 +  address c2i_unverified_entry;
  1.1120 +  address c2i_entry;
  1.1121 +
  1.1122 +
  1.1123 +  // entry: i2c
  1.1124 +
  1.1125 +  __ align(CodeEntryAlignment);
  1.1126 +  i2c_entry = __ pc();
  1.1127 +  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
  1.1128 +
  1.1129 +
  1.1130 +  // entry: c2i unverified
  1.1131 +
  1.1132 +  __ align(CodeEntryAlignment);
  1.1133 +  BLOCK_COMMENT("c2i unverified entry");
  1.1134 +  c2i_unverified_entry = __ pc();
  1.1135 +
  1.1136 +  // inline_cache contains a compiledICHolder
  1.1137 +  const Register ic             = R19_method;
  1.1138 +  const Register ic_klass       = R11_scratch1;
  1.1139 +  const Register receiver_klass = R12_scratch2;
  1.1140 +  const Register code           = R21_tmp1;
  1.1141 +  const Register ientry         = R23_tmp3;
  1.1142 +
  1.1143 +  assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry);
  1.1144 +  assert(R11_scratch1 == R11, "need prologue scratch register");
  1.1145 +
  1.1146 +  Label call_interpreter;
  1.1147 +
  1.1148 +  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
  1.1149 +         "klass offset should reach into any page");
  1.1150 +  // Check for NULL argument if we don't have implicit null checks.
  1.1151 +  if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
  1.1152 +    if (TrapBasedNullChecks) {
  1.1153 +      __ trap_null_check(R3_ARG1);
  1.1154 +    } else {
  1.1155 +      Label valid;
  1.1156 +      __ cmpdi(CCR0, R3_ARG1, 0);
  1.1157 +      __ bne_predict_taken(CCR0, valid);
  1.1158 +      // We have a null argument, branch to ic_miss_stub.
  1.1159 +      __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
  1.1160 +                       relocInfo::runtime_call_type);
  1.1161 +      __ BIND(valid);
  1.1162 +    }
  1.1163 +  }
  1.1164 +  // Assume argument is not NULL, load klass from receiver.
  1.1165 +  __ load_klass(receiver_klass, R3_ARG1);
  1.1166 +
  1.1167 +  __ ld(ic_klass, CompiledICHolder::holder_klass_offset(), ic);
  1.1168 +
  1.1169 +  if (TrapBasedICMissChecks) {
  1.1170 +    __ trap_ic_miss_check(receiver_klass, ic_klass);
  1.1171 +  } else {
  1.1172 +    Label valid;
  1.1173 +    __ cmpd(CCR0, receiver_klass, ic_klass);
  1.1174 +    __ beq_predict_taken(CCR0, valid);
  1.1175 +    // We have an unexpected klass, branch to ic_miss_stub.
  1.1176 +    __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
  1.1177 +                     relocInfo::runtime_call_type);
  1.1178 +    __ BIND(valid);
  1.1179 +  }
  1.1180 +
  1.1181 +  // Argument is valid and klass is as expected, continue.
  1.1182 +
  1.1183 +  // Extract method from inline cache, verified entry point needs it.
  1.1184 +  __ ld(R19_method, CompiledICHolder::holder_method_offset(), ic);
  1.1185 +  assert(R19_method == ic, "the inline cache register is dead here");
  1.1186 +
  1.1187 +  __ ld(code, method_(code));
  1.1188 +  __ cmpdi(CCR0, code, 0);
  1.1189 +  __ ld(ientry, method_(interpreter_entry)); // preloaded
  1.1190 +  __ beq_predict_taken(CCR0, call_interpreter);
  1.1191 +
  1.1192 +  // Branch to ic_miss_stub.
  1.1193 +  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
  1.1194 +
  1.1195 +  // entry: c2i
  1.1196 +
  1.1197 +  c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry);
  1.1198 +
  1.1199 +  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
  1.1200 +}
  1.1201 +
  1.1202 +#ifdef COMPILER2
  1.1203 +// An oop arg. Must pass a handle not the oop itself.
  1.1204 +static void object_move(MacroAssembler* masm,
  1.1205 +                        int frame_size_in_slots,
  1.1206 +                        OopMap* oop_map, int oop_handle_offset,
  1.1207 +                        bool is_receiver, int* receiver_offset,
  1.1208 +                        VMRegPair src, VMRegPair dst,
  1.1209 +                        Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
  1.1210 +  assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
  1.1211 +         "receiver has already been moved");
  1.1212 +
  1.1213 +  // We must pass a handle. First figure out the location we use as a handle.
  1.1214 +
  1.1215 +  if (src.first()->is_stack()) {
  1.1216 +    // stack to stack or reg
  1.1217 +
  1.1218 +    const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
  1.1219 +    Label skip;
  1.1220 +    const int oop_slot_in_callers_frame = reg2slot(src.first());
  1.1221 +
  1.1222 +    guarantee(!is_receiver, "expecting receiver in register");
  1.1223 +    oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
  1.1224 +
  1.1225 +    __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
  1.1226 +    __ ld(  r_temp_2, reg2offset(src.first()), r_caller_sp);
  1.1227 +    __ cmpdi(CCR0, r_temp_2, 0);
  1.1228 +    __ bne(CCR0, skip);
  1.1229 +    // Use a NULL handle if oop is NULL.
  1.1230 +    __ li(r_handle, 0);
  1.1231 +    __ bind(skip);
  1.1232 +
  1.1233 +    if (dst.first()->is_stack()) {
  1.1234 +      // stack to stack
  1.1235 +      __ std(r_handle, reg2offset(dst.first()), R1_SP);
  1.1236 +    } else {
  1.1237 +      // stack to reg
  1.1238 +      // Nothing to do, r_handle is already the dst register.
  1.1239 +    }
  1.1240 +  } else {
  1.1241 +    // reg to stack or reg
  1.1242 +    const Register r_oop      = src.first()->as_Register();
  1.1243 +    const Register r_handle   = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
  1.1244 +    const int oop_slot        = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
  1.1245 +                                + oop_handle_offset; // in slots
  1.1246 +    const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
  1.1247 +    Label skip;
  1.1248 +
  1.1249 +    if (is_receiver) {
  1.1250 +      *receiver_offset = oop_offset;
  1.1251 +    }
  1.1252 +    oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
  1.1253 +
  1.1254 +    __ std( r_oop,    oop_offset, R1_SP);
  1.1255 +    __ addi(r_handle, R1_SP, oop_offset);
  1.1256 +
  1.1257 +    __ cmpdi(CCR0, r_oop, 0);
  1.1258 +    __ bne(CCR0, skip);
  1.1259 +    // Use a NULL handle if oop is NULL.
  1.1260 +    __ li(r_handle, 0);
  1.1261 +    __ bind(skip);
  1.1262 +
  1.1263 +    if (dst.first()->is_stack()) {
  1.1264 +      // reg to stack
  1.1265 +      __ std(r_handle, reg2offset(dst.first()), R1_SP);
  1.1266 +    } else {
  1.1267 +      // reg to reg
  1.1268 +      // Nothing to do, r_handle is already the dst register.
  1.1269 +    }
  1.1270 +  }
  1.1271 +}
  1.1272 +
  1.1273 +static void int_move(MacroAssembler*masm,
  1.1274 +                     VMRegPair src, VMRegPair dst,
  1.1275 +                     Register r_caller_sp, Register r_temp) {
  1.1276 +  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long-int");
  1.1277 +  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
  1.1278 +
  1.1279 +  if (src.first()->is_stack()) {
  1.1280 +    if (dst.first()->is_stack()) {
  1.1281 +      // stack to stack
  1.1282 +      __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
  1.1283 +      __ std(r_temp, reg2offset(dst.first()), R1_SP);
  1.1284 +    } else {
  1.1285 +      // stack to reg
  1.1286 +      __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
  1.1287 +    }
  1.1288 +  } else if (dst.first()->is_stack()) {
  1.1289 +    // reg to stack
  1.1290 +    __ extsw(r_temp, src.first()->as_Register());
  1.1291 +    __ std(r_temp, reg2offset(dst.first()), R1_SP);
  1.1292 +  } else {
  1.1293 +    // reg to reg
  1.1294 +    __ extsw(dst.first()->as_Register(), src.first()->as_Register());
  1.1295 +  }
  1.1296 +}
  1.1297 +
  1.1298 +static void long_move(MacroAssembler*masm,
  1.1299 +                      VMRegPair src, VMRegPair dst,
  1.1300 +                      Register r_caller_sp, Register r_temp) {
  1.1301 +  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
  1.1302 +  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
  1.1303 +
  1.1304 +  if (src.first()->is_stack()) {
  1.1305 +    if (dst.first()->is_stack()) {
  1.1306 +      // stack to stack
  1.1307 +      __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
  1.1308 +      __ std(r_temp, reg2offset(dst.first()), R1_SP);
  1.1309 +    } else {
  1.1310 +      // stack to reg
  1.1311 +      __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
  1.1312 +    }
  1.1313 +  } else if (dst.first()->is_stack()) {
  1.1314 +    // reg to stack
  1.1315 +    __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
  1.1316 +  } else {
  1.1317 +    // reg to reg
  1.1318 +    if (dst.first()->as_Register() != src.first()->as_Register())
  1.1319 +      __ mr(dst.first()->as_Register(), src.first()->as_Register());
  1.1320 +  }
  1.1321 +}
  1.1322 +
  1.1323 +static void float_move(MacroAssembler*masm,
  1.1324 +                       VMRegPair src, VMRegPair dst,
  1.1325 +                       Register r_caller_sp, Register r_temp) {
  1.1326 +  assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
  1.1327 +  assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
  1.1328 +
  1.1329 +  if (src.first()->is_stack()) {
  1.1330 +    if (dst.first()->is_stack()) {
  1.1331 +      // stack to stack
  1.1332 +      __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
  1.1333 +      __ stw(r_temp, reg2offset(dst.first()), R1_SP);
  1.1334 +    } else {
  1.1335 +      // stack to reg
  1.1336 +      __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
  1.1337 +    }
  1.1338 +  } else if (dst.first()->is_stack()) {
  1.1339 +    // reg to stack
  1.1340 +    __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
  1.1341 +  } else {
  1.1342 +    // reg to reg
  1.1343 +    if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
  1.1344 +      __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
  1.1345 +  }
  1.1346 +}
  1.1347 +
  1.1348 +static void double_move(MacroAssembler*masm,
  1.1349 +                        VMRegPair src, VMRegPair dst,
  1.1350 +                        Register r_caller_sp, Register r_temp) {
  1.1351 +  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
  1.1352 +  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
  1.1353 +
  1.1354 +  if (src.first()->is_stack()) {
  1.1355 +    if (dst.first()->is_stack()) {
  1.1356 +      // stack to stack
  1.1357 +      __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
  1.1358 +      __ std(r_temp, reg2offset(dst.first()), R1_SP);
  1.1359 +    } else {
  1.1360 +      // stack to reg
  1.1361 +      __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
  1.1362 +    }
  1.1363 +  } else if (dst.first()->is_stack()) {
  1.1364 +    // reg to stack
  1.1365 +    __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
  1.1366 +  } else {
  1.1367 +    // reg to reg
  1.1368 +    if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
  1.1369 +      __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
  1.1370 +  }
  1.1371 +}
  1.1372 +
  1.1373 +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1.1374 +  switch (ret_type) {
  1.1375 +    case T_BOOLEAN:
  1.1376 +    case T_CHAR:
  1.1377 +    case T_BYTE:
  1.1378 +    case T_SHORT:
  1.1379 +    case T_INT:
  1.1380 +      __ stw (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1381 +      break;
  1.1382 +    case T_ARRAY:
  1.1383 +    case T_OBJECT:
  1.1384 +    case T_LONG:
  1.1385 +      __ std (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1386 +      break;
  1.1387 +    case T_FLOAT:
  1.1388 +      __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1389 +      break;
  1.1390 +    case T_DOUBLE:
  1.1391 +      __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1392 +      break;
  1.1393 +    case T_VOID:
  1.1394 +      break;
  1.1395 +    default:
  1.1396 +      ShouldNotReachHere();
  1.1397 +      break;
  1.1398 +  }
  1.1399 +}
  1.1400 +
  1.1401 +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1.1402 +  switch (ret_type) {
  1.1403 +    case T_BOOLEAN:
  1.1404 +    case T_CHAR:
  1.1405 +    case T_BYTE:
  1.1406 +    case T_SHORT:
  1.1407 +    case T_INT:
  1.1408 +      __ lwz(R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1409 +      break;
  1.1410 +    case T_ARRAY:
  1.1411 +    case T_OBJECT:
  1.1412 +    case T_LONG:
  1.1413 +      __ ld (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1414 +      break;
  1.1415 +    case T_FLOAT:
  1.1416 +      __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1417 +      break;
  1.1418 +    case T_DOUBLE:
  1.1419 +      __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
  1.1420 +      break;
  1.1421 +    case T_VOID:
  1.1422 +      break;
  1.1423 +    default:
  1.1424 +      ShouldNotReachHere();
  1.1425 +      break;
  1.1426 +  }
  1.1427 +}
  1.1428 +
  1.1429 +static void save_or_restore_arguments(MacroAssembler* masm,
  1.1430 +                                      const int stack_slots,
  1.1431 +                                      const int total_in_args,
  1.1432 +                                      const int arg_save_area,
  1.1433 +                                      OopMap* map,
  1.1434 +                                      VMRegPair* in_regs,
  1.1435 +                                      BasicType* in_sig_bt) {
  1.1436 +  // If map is non-NULL then the code should store the values,
  1.1437 +  // otherwise it should load them.
  1.1438 +  int slot = arg_save_area;
  1.1439 +  // Save down double word first.
  1.1440 +  for (int i = 0; i < total_in_args; i++) {
  1.1441 +    if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
  1.1442 +      int offset = slot * VMRegImpl::stack_slot_size;
  1.1443 +      slot += VMRegImpl::slots_per_word;
  1.1444 +      assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
  1.1445 +      if (map != NULL) {
  1.1446 +        __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
  1.1447 +      } else {
  1.1448 +        __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
  1.1449 +      }
  1.1450 +    } else if (in_regs[i].first()->is_Register() &&
  1.1451 +        (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
  1.1452 +      int offset = slot * VMRegImpl::stack_slot_size;
  1.1453 +      if (map != NULL) {
  1.1454 +        __ std(in_regs[i].first()->as_Register(), offset, R1_SP);
  1.1455 +        if (in_sig_bt[i] == T_ARRAY) {
  1.1456 +          map->set_oop(VMRegImpl::stack2reg(slot));
  1.1457 +        }
  1.1458 +      } else {
  1.1459 +        __ ld(in_regs[i].first()->as_Register(), offset, R1_SP);
  1.1460 +      }
  1.1461 +      slot += VMRegImpl::slots_per_word;
  1.1462 +      assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
  1.1463 +    }
  1.1464 +  }
  1.1465 +  // Save or restore single word registers.
  1.1466 +  for (int i = 0; i < total_in_args; i++) {
  1.1467 +    // PPC64: pass ints as longs: must only deal with floats here.
  1.1468 +    if (in_regs[i].first()->is_FloatRegister()) {
  1.1469 +      if (in_sig_bt[i] == T_FLOAT) {
  1.1470 +        int offset = slot * VMRegImpl::stack_slot_size;
  1.1471 +        slot++;
  1.1472 +        assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
  1.1473 +        if (map != NULL) {
  1.1474 +          __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
  1.1475 +        } else {
  1.1476 +          __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
  1.1477 +        }
  1.1478 +      }
  1.1479 +    } else if (in_regs[i].first()->is_stack()) {
  1.1480 +      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
  1.1481 +        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
  1.1482 +        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
  1.1483 +      }
  1.1484 +    }
  1.1485 +  }
  1.1486 +}
  1.1487 +
  1.1488 +// Check GC_locker::needs_gc and enter the runtime if it's true. This
  1.1489 +// keeps a new JNI critical region from starting until a GC has been
  1.1490 +// forced. Save down any oops in registers and describe them in an
  1.1491 +// OopMap.
  1.1492 +static void check_needs_gc_for_critical_native(MacroAssembler* masm,
  1.1493 +                                               const int stack_slots,
  1.1494 +                                               const int total_in_args,
  1.1495 +                                               const int arg_save_area,
  1.1496 +                                               OopMapSet* oop_maps,
  1.1497 +                                               VMRegPair* in_regs,
  1.1498 +                                               BasicType* in_sig_bt,
  1.1499 +                                               Register tmp_reg ) {
  1.1500 +  __ block_comment("check GC_locker::needs_gc");
  1.1501 +  Label cont;
  1.1502 +  __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GC_locker::needs_gc_address());
  1.1503 +  __ cmplwi(CCR0, tmp_reg, 0);
  1.1504 +  __ beq(CCR0, cont);
  1.1505 +
  1.1506 +  // Save down any values that are live in registers and call into the
  1.1507 +  // runtime to halt for a GC.
  1.1508 +  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  1.1509 +  save_or_restore_arguments(masm, stack_slots, total_in_args,
  1.1510 +                            arg_save_area, map, in_regs, in_sig_bt);
  1.1511 +
  1.1512 +  __ mr(R3_ARG1, R16_thread);
  1.1513 +  __ set_last_Java_frame(R1_SP, noreg);
  1.1514 +
  1.1515 +  __ block_comment("block_for_jni_critical");
  1.1516 +  address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
  1.1517 +#if defined(ABI_ELFv2)
  1.1518 +  __ call_c(entry_point, relocInfo::runtime_call_type);
  1.1519 +#else
  1.1520 +  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type);
  1.1521 +#endif
  1.1522 +  address start           = __ pc() - __ offset(),
  1.1523 +          calls_return_pc = __ last_calls_return_pc();
  1.1524 +  oop_maps->add_gc_map(calls_return_pc - start, map);
  1.1525 +
  1.1526 +  __ reset_last_Java_frame();
  1.1527 +
  1.1528 +  // Reload all the register arguments.
  1.1529 +  save_or_restore_arguments(masm, stack_slots, total_in_args,
  1.1530 +                            arg_save_area, NULL, in_regs, in_sig_bt);
  1.1531 +
  1.1532 +  __ BIND(cont);
  1.1533 +
  1.1534 +#ifdef ASSERT
  1.1535 +  if (StressCriticalJNINatives) {
  1.1536 +    // Stress register saving.
  1.1537 +    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  1.1538 +    save_or_restore_arguments(masm, stack_slots, total_in_args,
  1.1539 +                              arg_save_area, map, in_regs, in_sig_bt);
  1.1540 +    // Destroy argument registers.
  1.1541 +    for (int i = 0; i < total_in_args; i++) {
  1.1542 +      if (in_regs[i].first()->is_Register()) {
  1.1543 +        const Register reg = in_regs[i].first()->as_Register();
  1.1544 +        __ neg(reg, reg);
  1.1545 +      } else if (in_regs[i].first()->is_FloatRegister()) {
  1.1546 +        __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
  1.1547 +      }
  1.1548 +    }
  1.1549 +
  1.1550 +    save_or_restore_arguments(masm, stack_slots, total_in_args,
  1.1551 +                              arg_save_area, NULL, in_regs, in_sig_bt);
  1.1552 +  }
  1.1553 +#endif
  1.1554 +}
  1.1555 +
  1.1556 +static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) {
  1.1557 +  if (src.first()->is_stack()) {
  1.1558 +    if (dst.first()->is_stack()) {
  1.1559 +      // stack to stack
  1.1560 +      __ ld(r_temp, reg2offset(src.first()), r_caller_sp);
  1.1561 +      __ std(r_temp, reg2offset(dst.first()), R1_SP);
  1.1562 +    } else {
  1.1563 +      // stack to reg
  1.1564 +      __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
  1.1565 +    }
  1.1566 +  } else if (dst.first()->is_stack()) {
  1.1567 +    // reg to stack
  1.1568 +    __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
  1.1569 +  } else {
  1.1570 +    if (dst.first() != src.first()) {
  1.1571 +      __ mr(dst.first()->as_Register(), src.first()->as_Register());
  1.1572 +    }
  1.1573 +  }
  1.1574 +}
  1.1575 +
  1.1576 +// Unpack an array argument into a pointer to the body and the length
  1.1577 +// if the array is non-null, otherwise pass 0 for both.
  1.1578 +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type,
  1.1579 +                                  VMRegPair body_arg, VMRegPair length_arg, Register r_caller_sp,
  1.1580 +                                  Register tmp_reg, Register tmp2_reg) {
  1.1581 +  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
  1.1582 +         "possible collision");
  1.1583 +  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
  1.1584 +         "possible collision");
  1.1585 +
  1.1586 +  // Pass the length, ptr pair.
  1.1587 +  Label set_out_args;
  1.1588 +  VMRegPair tmp, tmp2;
  1.1589 +  tmp.set_ptr(tmp_reg->as_VMReg());
  1.1590 +  tmp2.set_ptr(tmp2_reg->as_VMReg());
  1.1591 +  if (reg.first()->is_stack()) {
  1.1592 +    // Load the arg up from the stack.
  1.1593 +    move_ptr(masm, reg, tmp, r_caller_sp, /*unused*/ R0);
  1.1594 +    reg = tmp;
  1.1595 +  }
  1.1596 +  __ li(tmp2_reg, 0); // Pass zeros if Array=null.
  1.1597 +  if (tmp_reg != reg.first()->as_Register()) __ li(tmp_reg, 0);
  1.1598 +  __ cmpdi(CCR0, reg.first()->as_Register(), 0);
  1.1599 +  __ beq(CCR0, set_out_args);
  1.1600 +  __ lwa(tmp2_reg, arrayOopDesc::length_offset_in_bytes(), reg.first()->as_Register());
  1.1601 +  __ addi(tmp_reg, reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type));
  1.1602 +  __ bind(set_out_args);
  1.1603 +  move_ptr(masm, tmp, body_arg, r_caller_sp, /*unused*/ R0);
  1.1604 +  move_ptr(masm, tmp2, length_arg, r_caller_sp, /*unused*/ R0); // Same as move32_64 on PPC64.
  1.1605 +}
  1.1606 +
  1.1607 +static void verify_oop_args(MacroAssembler* masm,
  1.1608 +                            methodHandle method,
  1.1609 +                            const BasicType* sig_bt,
  1.1610 +                            const VMRegPair* regs) {
  1.1611 +  Register temp_reg = R19_method;  // not part of any compiled calling seq
  1.1612 +  if (VerifyOops) {
  1.1613 +    for (int i = 0; i < method->size_of_parameters(); i++) {
  1.1614 +      if (sig_bt[i] == T_OBJECT ||
  1.1615 +          sig_bt[i] == T_ARRAY) {
  1.1616 +        VMReg r = regs[i].first();
  1.1617 +        assert(r->is_valid(), "bad oop arg");
  1.1618 +        if (r->is_stack()) {
  1.1619 +          __ ld(temp_reg, reg2offset(r), R1_SP);
  1.1620 +          __ verify_oop(temp_reg);
  1.1621 +        } else {
  1.1622 +          __ verify_oop(r->as_Register());
  1.1623 +        }
  1.1624 +      }
  1.1625 +    }
  1.1626 +  }
  1.1627 +}
  1.1628 +
  1.1629 +static void gen_special_dispatch(MacroAssembler* masm,
  1.1630 +                                 methodHandle method,
  1.1631 +                                 const BasicType* sig_bt,
  1.1632 +                                 const VMRegPair* regs) {
  1.1633 +  verify_oop_args(masm, method, sig_bt, regs);
  1.1634 +  vmIntrinsics::ID iid = method->intrinsic_id();
  1.1635 +
  1.1636 +  // Now write the args into the outgoing interpreter space
  1.1637 +  bool     has_receiver   = false;
  1.1638 +  Register receiver_reg   = noreg;
  1.1639 +  int      member_arg_pos = -1;
  1.1640 +  Register member_reg     = noreg;
  1.1641 +  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
  1.1642 +  if (ref_kind != 0) {
  1.1643 +    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
  1.1644 +    member_reg = R19_method;  // known to be free at this point
  1.1645 +    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
  1.1646 +  } else if (iid == vmIntrinsics::_invokeBasic) {
  1.1647 +    has_receiver = true;
  1.1648 +  } else {
  1.1649 +    fatal(err_msg_res("unexpected intrinsic id %d", iid));
  1.1650 +  }
  1.1651 +
  1.1652 +  if (member_reg != noreg) {
  1.1653 +    // Load the member_arg into register, if necessary.
  1.1654 +    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
  1.1655 +    VMReg r = regs[member_arg_pos].first();
  1.1656 +    if (r->is_stack()) {
  1.1657 +      __ ld(member_reg, reg2offset(r), R1_SP);
  1.1658 +    } else {
  1.1659 +      // no data motion is needed
  1.1660 +      member_reg = r->as_Register();
  1.1661 +    }
  1.1662 +  }
  1.1663 +
  1.1664 +  if (has_receiver) {
  1.1665 +    // Make sure the receiver is loaded into a register.
  1.1666 +    assert(method->size_of_parameters() > 0, "oob");
  1.1667 +    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
  1.1668 +    VMReg r = regs[0].first();
  1.1669 +    assert(r->is_valid(), "bad receiver arg");
  1.1670 +    if (r->is_stack()) {
  1.1671 +      // Porting note:  This assumes that compiled calling conventions always
  1.1672 +      // pass the receiver oop in a register.  If this is not true on some
  1.1673 +      // platform, pick a temp and load the receiver from stack.
  1.1674 +      fatal("receiver always in a register");
  1.1675 +      receiver_reg = R11_scratch1;  // TODO (hs24): is R11_scratch1 really free at this point?
  1.1676 +      __ ld(receiver_reg, reg2offset(r), R1_SP);
  1.1677 +    } else {
  1.1678 +      // no data motion is needed
  1.1679 +      receiver_reg = r->as_Register();
  1.1680 +    }
  1.1681 +  }
  1.1682 +
  1.1683 +  // Figure out which address we are really jumping to:
  1.1684 +  MethodHandles::generate_method_handle_dispatch(masm, iid,
  1.1685 +                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
  1.1686 +}
  1.1687 +
  1.1688 +#endif // COMPILER2
  1.1689 +
  1.1690 +// ---------------------------------------------------------------------------
  1.1691 +// Generate a native wrapper for a given method. The method takes arguments
  1.1692 +// in the Java compiled code convention, marshals them to the native
  1.1693 +// convention (handlizes oops, etc), transitions to native, makes the call,
  1.1694 +// returns to java state (possibly blocking), unhandlizes any result and
  1.1695 +// returns.
  1.1696 +//
  1.1697 +// Critical native functions are a shorthand for the use of
  1.1698 +// GetPrimtiveArrayCritical and disallow the use of any other JNI
  1.1699 +// functions.  The wrapper is expected to unpack the arguments before
  1.1700 +// passing them to the callee and perform checks before and after the
  1.1701 +// native call to ensure that they GC_locker
  1.1702 +// lock_critical/unlock_critical semantics are followed.  Some other
  1.1703 +// parts of JNI setup are skipped like the tear down of the JNI handle
  1.1704 +// block and the check for pending exceptions it's impossible for them
  1.1705 +// to be thrown.
  1.1706 +//
  1.1707 +// They are roughly structured like this:
  1.1708 +//   if (GC_locker::needs_gc())
  1.1709 +//     SharedRuntime::block_for_jni_critical();
  1.1710 +//   tranistion to thread_in_native
  1.1711 +//   unpack arrray arguments and call native entry point
  1.1712 +//   check for safepoint in progress
  1.1713 +//   check if any thread suspend flags are set
  1.1714 +//     call into JVM and possible unlock the JNI critical
  1.1715 +//     if a GC was suppressed while in the critical native.
  1.1716 +//   transition back to thread_in_Java
  1.1717 +//   return to caller
  1.1718 +//
  1.1719 +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
  1.1720 +                                                methodHandle method,
  1.1721 +                                                int compile_id,
  1.1722 +                                                BasicType *in_sig_bt,
  1.1723 +                                                VMRegPair *in_regs,
  1.1724 +                                                BasicType ret_type) {
  1.1725 +#ifdef COMPILER2
  1.1726 +  if (method->is_method_handle_intrinsic()) {
  1.1727 +    vmIntrinsics::ID iid = method->intrinsic_id();
  1.1728 +    intptr_t start = (intptr_t)__ pc();
  1.1729 +    int vep_offset = ((intptr_t)__ pc()) - start;
  1.1730 +    gen_special_dispatch(masm,
  1.1731 +                         method,
  1.1732 +                         in_sig_bt,
  1.1733 +                         in_regs);
  1.1734 +    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
  1.1735 +    __ flush();
  1.1736 +    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
  1.1737 +    return nmethod::new_native_nmethod(method,
  1.1738 +                                       compile_id,
  1.1739 +                                       masm->code(),
  1.1740 +                                       vep_offset,
  1.1741 +                                       frame_complete,
  1.1742 +                                       stack_slots / VMRegImpl::slots_per_word,
  1.1743 +                                       in_ByteSize(-1),
  1.1744 +                                       in_ByteSize(-1),
  1.1745 +                                       (OopMapSet*)NULL);
  1.1746 +  }
  1.1747 +
  1.1748 +  bool is_critical_native = true;
  1.1749 +  address native_func = method->critical_native_function();
  1.1750 +  if (native_func == NULL) {
  1.1751 +    native_func = method->native_function();
  1.1752 +    is_critical_native = false;
  1.1753 +  }
  1.1754 +  assert(native_func != NULL, "must have function");
  1.1755 +
  1.1756 +  // First, create signature for outgoing C call
  1.1757 +  // --------------------------------------------------------------------------
  1.1758 +
  1.1759 +  int total_in_args = method->size_of_parameters();
  1.1760 +  // We have received a description of where all the java args are located
  1.1761 +  // on entry to the wrapper. We need to convert these args to where
  1.1762 +  // the jni function will expect them. To figure out where they go
  1.1763 +  // we convert the java signature to a C signature by inserting
  1.1764 +  // the hidden arguments as arg[0] and possibly arg[1] (static method)
  1.1765 +  //
  1.1766 +  // Additionally, on ppc64 we must convert integers to longs in the C
  1.1767 +  // signature. We do this in advance in order to have no trouble with
  1.1768 +  // indexes into the bt-arrays.
  1.1769 +  // So convert the signature and registers now, and adjust the total number
  1.1770 +  // of in-arguments accordingly.
  1.1771 +  int i2l_argcnt = convert_ints_to_longints_argcnt(total_in_args, in_sig_bt); // PPC64: pass ints as longs.
  1.1772 +
  1.1773 +  // Calculate the total number of C arguments and create arrays for the
  1.1774 +  // signature and the outgoing registers.
  1.1775 +  // On ppc64, we have two arrays for the outgoing registers, because
  1.1776 +  // some floating-point arguments must be passed in registers _and_
  1.1777 +  // in stack locations.
  1.1778 +  bool method_is_static = method->is_static();
  1.1779 +  int  total_c_args     = i2l_argcnt;
  1.1780 +
  1.1781 +  if (!is_critical_native) {
  1.1782 +    int n_hidden_args = method_is_static ? 2 : 1;
  1.1783 +    total_c_args += n_hidden_args;
  1.1784 +  } else {
  1.1785 +    // No JNIEnv*, no this*, but unpacked arrays (base+length).
  1.1786 +    for (int i = 0; i < total_in_args; i++) {
  1.1787 +      if (in_sig_bt[i] == T_ARRAY) {
  1.1788 +        total_c_args += 2; // PPC64: T_LONG, T_INT, T_ADDRESS (see convert_ints_to_longints and c_calling_convention)
  1.1789 +      }
  1.1790 +    }
  1.1791 +  }
  1.1792 +
  1.1793 +  BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
  1.1794 +  VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
  1.1795 +  VMRegPair *out_regs2  = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
  1.1796 +  BasicType* in_elem_bt = NULL;
  1.1797 +
  1.1798 +  // Create the signature for the C call:
  1.1799 +  //   1) add the JNIEnv*
  1.1800 +  //   2) add the class if the method is static
  1.1801 +  //   3) copy the rest of the incoming signature (shifted by the number of
  1.1802 +  //      hidden arguments).
  1.1803 +
  1.1804 +  int argc = 0;
  1.1805 +  if (!is_critical_native) {
  1.1806 +    convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs.
  1.1807 +
  1.1808 +    out_sig_bt[argc++] = T_ADDRESS;
  1.1809 +    if (method->is_static()) {
  1.1810 +      out_sig_bt[argc++] = T_OBJECT;
  1.1811 +    }
  1.1812 +
  1.1813 +    for (int i = 0; i < total_in_args ; i++ ) {
  1.1814 +      out_sig_bt[argc++] = in_sig_bt[i];
  1.1815 +    }
  1.1816 +  } else {
  1.1817 +    Thread* THREAD = Thread::current();
  1.1818 +    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, i2l_argcnt);
  1.1819 +    SignatureStream ss(method->signature());
  1.1820 +    int o = 0;
  1.1821 +    for (int i = 0; i < total_in_args ; i++, o++) {
  1.1822 +      if (in_sig_bt[i] == T_ARRAY) {
  1.1823 +        // Arrays are passed as int, elem* pair
  1.1824 +        Symbol* atype = ss.as_symbol(CHECK_NULL);
  1.1825 +        const char* at = atype->as_C_string();
  1.1826 +        if (strlen(at) == 2) {
  1.1827 +          assert(at[0] == '[', "must be");
  1.1828 +          switch (at[1]) {
  1.1829 +            case 'B': in_elem_bt[o] = T_BYTE; break;
  1.1830 +            case 'C': in_elem_bt[o] = T_CHAR; break;
  1.1831 +            case 'D': in_elem_bt[o] = T_DOUBLE; break;
  1.1832 +            case 'F': in_elem_bt[o] = T_FLOAT; break;
  1.1833 +            case 'I': in_elem_bt[o] = T_INT; break;
  1.1834 +            case 'J': in_elem_bt[o] = T_LONG; break;
  1.1835 +            case 'S': in_elem_bt[o] = T_SHORT; break;
  1.1836 +            case 'Z': in_elem_bt[o] = T_BOOLEAN; break;
  1.1837 +            default: ShouldNotReachHere();
  1.1838 +          }
  1.1839 +        }
  1.1840 +      } else {
  1.1841 +        in_elem_bt[o] = T_VOID;
  1.1842 +        switch(in_sig_bt[i]) { // PPC64: pass ints as longs.
  1.1843 +          case T_BOOLEAN:
  1.1844 +          case T_CHAR:
  1.1845 +          case T_BYTE:
  1.1846 +          case T_SHORT:
  1.1847 +          case T_INT: in_elem_bt[++o] = T_VOID; break;
  1.1848 +          default: break;
  1.1849 +        }
  1.1850 +      }
  1.1851 +      if (in_sig_bt[i] != T_VOID) {
  1.1852 +        assert(in_sig_bt[i] == ss.type(), "must match");
  1.1853 +        ss.next();
  1.1854 +      }
  1.1855 +    }
  1.1856 +    assert(i2l_argcnt==o, "must match");
  1.1857 +
  1.1858 +    convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs.
  1.1859 +
  1.1860 +    for (int i = 0; i < total_in_args ; i++ ) {
  1.1861 +      if (in_sig_bt[i] == T_ARRAY) {
  1.1862 +        // Arrays are passed as int, elem* pair.
  1.1863 +        out_sig_bt[argc++] = T_LONG; // PPC64: pass ints as longs.
  1.1864 +        out_sig_bt[argc++] = T_INT;
  1.1865 +        out_sig_bt[argc++] = T_ADDRESS;
  1.1866 +      } else {
  1.1867 +        out_sig_bt[argc++] = in_sig_bt[i];
  1.1868 +      }
  1.1869 +    }
  1.1870 +  }
  1.1871 +
  1.1872 +
  1.1873 +  // Compute the wrapper's frame size.
  1.1874 +  // --------------------------------------------------------------------------
  1.1875 +
  1.1876 +  // Now figure out where the args must be stored and how much stack space
  1.1877 +  // they require.
  1.1878 +  //
  1.1879 +  // Compute framesize for the wrapper. We need to handlize all oops in
  1.1880 +  // incoming registers.
  1.1881 +  //
  1.1882 +  // Calculate the total number of stack slots we will need:
  1.1883 +  //   1) abi requirements
  1.1884 +  //   2) outgoing arguments
  1.1885 +  //   3) space for inbound oop handle area
  1.1886 +  //   4) space for handlizing a klass if static method
  1.1887 +  //   5) space for a lock if synchronized method
  1.1888 +  //   6) workspace for saving return values, int <-> float reg moves, etc.
  1.1889 +  //   7) alignment
  1.1890 +  //
  1.1891 +  // Layout of the native wrapper frame:
  1.1892 +  // (stack grows upwards, memory grows downwards)
  1.1893 +  //
  1.1894 +  // NW     [ABI_REG_ARGS]             <-- 1) R1_SP
  1.1895 +  //        [outgoing arguments]       <-- 2) R1_SP + out_arg_slot_offset
  1.1896 +  //        [oopHandle area]           <-- 3) R1_SP + oop_handle_offset (save area for critical natives)
  1.1897 +  //        klass                      <-- 4) R1_SP + klass_offset
  1.1898 +  //        lock                       <-- 5) R1_SP + lock_offset
  1.1899 +  //        [workspace]                <-- 6) R1_SP + workspace_offset
  1.1900 +  //        [alignment] (optional)     <-- 7)
  1.1901 +  // caller [JIT_TOP_ABI_48]           <-- r_callers_sp
  1.1902 +  //
  1.1903 +  // - *_slot_offset Indicates offset from SP in number of stack slots.
  1.1904 +  // - *_offset      Indicates offset from SP in bytes.
  1.1905 +
  1.1906 +  int stack_slots = c_calling_convention(out_sig_bt, out_regs, out_regs2, total_c_args) // 1+2)
  1.1907 +                  + SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
  1.1908 +
  1.1909 +  // Now the space for the inbound oop handle area.
  1.1910 +  int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
  1.1911 +  if (is_critical_native) {
  1.1912 +    // Critical natives may have to call out so they need a save area
  1.1913 +    // for register arguments.
  1.1914 +    int double_slots = 0;
  1.1915 +    int single_slots = 0;
  1.1916 +    for (int i = 0; i < total_in_args; i++) {
  1.1917 +      if (in_regs[i].first()->is_Register()) {
  1.1918 +        const Register reg = in_regs[i].first()->as_Register();
  1.1919 +        switch (in_sig_bt[i]) {
  1.1920 +          case T_BOOLEAN:
  1.1921 +          case T_BYTE:
  1.1922 +          case T_SHORT:
  1.1923 +          case T_CHAR:
  1.1924 +          case T_INT:  /*single_slots++;*/ break; // PPC64: pass ints as longs.
  1.1925 +          case T_ARRAY:
  1.1926 +          case T_LONG: double_slots++; break;
  1.1927 +          default:  ShouldNotReachHere();
  1.1928 +        }
  1.1929 +      } else if (in_regs[i].first()->is_FloatRegister()) {
  1.1930 +        switch (in_sig_bt[i]) {
  1.1931 +          case T_FLOAT:  single_slots++; break;
  1.1932 +          case T_DOUBLE: double_slots++; break;
  1.1933 +          default:  ShouldNotReachHere();
  1.1934 +        }
  1.1935 +      }
  1.1936 +    }
  1.1937 +    total_save_slots = double_slots * 2 + round_to(single_slots, 2); // round to even
  1.1938 +  }
  1.1939 +
  1.1940 +  int oop_handle_slot_offset = stack_slots;
  1.1941 +  stack_slots += total_save_slots;                                                // 3)
  1.1942 +
  1.1943 +  int klass_slot_offset = 0;
  1.1944 +  int klass_offset      = -1;
  1.1945 +  if (method_is_static && !is_critical_native) {                                  // 4)
  1.1946 +    klass_slot_offset  = stack_slots;
  1.1947 +    klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
  1.1948 +    stack_slots       += VMRegImpl::slots_per_word;
  1.1949 +  }
  1.1950 +
  1.1951 +  int lock_slot_offset = 0;
  1.1952 +  int lock_offset      = -1;
  1.1953 +  if (method->is_synchronized()) {                                                // 5)
  1.1954 +    lock_slot_offset   = stack_slots;
  1.1955 +    lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
  1.1956 +    stack_slots       += VMRegImpl::slots_per_word;
  1.1957 +  }
  1.1958 +
  1.1959 +  int workspace_slot_offset = stack_slots;                                        // 6)
  1.1960 +  stack_slots         += 2;
  1.1961 +
  1.1962 +  // Now compute actual number of stack words we need.
  1.1963 +  // Rounding to make stack properly aligned.
  1.1964 +  stack_slots = round_to(stack_slots,                                             // 7)
  1.1965 +                         frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
  1.1966 +  int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
  1.1967 +
  1.1968 +
  1.1969 +  // Now we can start generating code.
  1.1970 +  // --------------------------------------------------------------------------
  1.1971 +
  1.1972 +  intptr_t start_pc = (intptr_t)__ pc();
  1.1973 +  intptr_t vep_start_pc;
  1.1974 +  intptr_t frame_done_pc;
  1.1975 +  intptr_t oopmap_pc;
  1.1976 +
  1.1977 +  Label    ic_miss;
  1.1978 +  Label    handle_pending_exception;
  1.1979 +
  1.1980 +  Register r_callers_sp = R21;
  1.1981 +  Register r_temp_1     = R22;
  1.1982 +  Register r_temp_2     = R23;
  1.1983 +  Register r_temp_3     = R24;
  1.1984 +  Register r_temp_4     = R25;
  1.1985 +  Register r_temp_5     = R26;
  1.1986 +  Register r_temp_6     = R27;
  1.1987 +  Register r_return_pc  = R28;
  1.1988 +
  1.1989 +  Register r_carg1_jnienv        = noreg;
  1.1990 +  Register r_carg2_classorobject = noreg;
  1.1991 +  if (!is_critical_native) {
  1.1992 +    r_carg1_jnienv        = out_regs[0].first()->as_Register();
  1.1993 +    r_carg2_classorobject = out_regs[1].first()->as_Register();
  1.1994 +  }
  1.1995 +
  1.1996 +
  1.1997 +  // Generate the Unverified Entry Point (UEP).
  1.1998 +  // --------------------------------------------------------------------------
  1.1999 +  assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
  1.2000 +
  1.2001 +  // Check ic: object class == cached class?
  1.2002 +  if (!method_is_static) {
  1.2003 +  Register ic = as_Register(Matcher::inline_cache_reg_encode());
  1.2004 +  Register receiver_klass = r_temp_1;
  1.2005 +
  1.2006 +  __ cmpdi(CCR0, R3_ARG1, 0);
  1.2007 +  __ beq(CCR0, ic_miss);
  1.2008 +  __ verify_oop(R3_ARG1);
  1.2009 +  __ load_klass(receiver_klass, R3_ARG1);
  1.2010 +
  1.2011 +  __ cmpd(CCR0, receiver_klass, ic);
  1.2012 +  __ bne(CCR0, ic_miss);
  1.2013 +  }
  1.2014 +
  1.2015 +
  1.2016 +  // Generate the Verified Entry Point (VEP).
  1.2017 +  // --------------------------------------------------------------------------
  1.2018 +  vep_start_pc = (intptr_t)__ pc();
  1.2019 +
  1.2020 +  __ save_LR_CR(r_temp_1);
  1.2021 +  __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
  1.2022 +  __ mr(r_callers_sp, R1_SP);                       // Remember frame pointer.
  1.2023 +  __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
  1.2024 +  frame_done_pc = (intptr_t)__ pc();
  1.2025 +
  1.2026 +  // Native nmethod wrappers never take possesion of the oop arguments.
  1.2027 +  // So the caller will gc the arguments.
  1.2028 +  // The only thing we need an oopMap for is if the call is static.
  1.2029 +  //
  1.2030 +  // An OopMap for lock (and class if static), and one for the VM call itself.
  1.2031 +  OopMapSet *oop_maps = new OopMapSet();
  1.2032 +  OopMap    *oop_map  = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  1.2033 +
  1.2034 +  if (is_critical_native) {
  1.2035 +    check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt, r_temp_1);
  1.2036 +  }
  1.2037 +
  1.2038 +  // Move arguments from register/stack to register/stack.
  1.2039 +  // --------------------------------------------------------------------------
  1.2040 +  //
  1.2041 +  // We immediately shuffle the arguments so that for any vm call we have
  1.2042 +  // to make from here on out (sync slow path, jvmti, etc.) we will have
  1.2043 +  // captured the oops from our caller and have a valid oopMap for them.
  1.2044 +  //
  1.2045 +  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
  1.2046 +  // (derived from JavaThread* which is in R16_thread) and, if static,
  1.2047 +  // the class mirror instead of a receiver. This pretty much guarantees that
  1.2048 +  // register layout will not match. We ignore these extra arguments during
  1.2049 +  // the shuffle. The shuffle is described by the two calling convention
  1.2050 +  // vectors we have in our possession. We simply walk the java vector to
  1.2051 +  // get the source locations and the c vector to get the destinations.
  1.2052 +
  1.2053 +  // Record sp-based slot for receiver on stack for non-static methods.
  1.2054 +  int receiver_offset = -1;
  1.2055 +
  1.2056 +  // We move the arguments backward because the floating point registers
  1.2057 +  // destination will always be to a register with a greater or equal
  1.2058 +  // register number or the stack.
  1.2059 +  //   in  is the index of the incoming Java arguments
  1.2060 +  //   out is the index of the outgoing C arguments
  1.2061 +
  1.2062 +#ifdef ASSERT
  1.2063 +  bool reg_destroyed[RegisterImpl::number_of_registers];
  1.2064 +  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  1.2065 +  for (int r = 0 ; r < RegisterImpl::number_of_registers ; r++) {
  1.2066 +    reg_destroyed[r] = false;
  1.2067 +  }
  1.2068 +  for (int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++) {
  1.2069 +    freg_destroyed[f] = false;
  1.2070 +  }
  1.2071 +#endif // ASSERT
  1.2072 +
  1.2073 +  for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
  1.2074 +
  1.2075 +#ifdef ASSERT
  1.2076 +    if (in_regs[in].first()->is_Register()) {
  1.2077 +      assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
  1.2078 +    } else if (in_regs[in].first()->is_FloatRegister()) {
  1.2079 +      assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
  1.2080 +    }
  1.2081 +    if (out_regs[out].first()->is_Register()) {
  1.2082 +      reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
  1.2083 +    } else if (out_regs[out].first()->is_FloatRegister()) {
  1.2084 +      freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
  1.2085 +    }
  1.2086 +    if (out_regs2[out].first()->is_Register()) {
  1.2087 +      reg_destroyed[out_regs2[out].first()->as_Register()->encoding()] = true;
  1.2088 +    } else if (out_regs2[out].first()->is_FloatRegister()) {
  1.2089 +      freg_destroyed[out_regs2[out].first()->as_FloatRegister()->encoding()] = true;
  1.2090 +    }
  1.2091 +#endif // ASSERT
  1.2092 +
  1.2093 +    switch (in_sig_bt[in]) {
  1.2094 +      case T_BOOLEAN:
  1.2095 +      case T_CHAR:
  1.2096 +      case T_BYTE:
  1.2097 +      case T_SHORT:
  1.2098 +      case T_INT:
  1.2099 +        guarantee(in > 0 && in_sig_bt[in-1] == T_LONG,
  1.2100 +                  "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
  1.2101 +        break;
  1.2102 +      case T_LONG:
  1.2103 +        if (in_sig_bt[in+1] == T_VOID) {
  1.2104 +          long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
  1.2105 +        } else {
  1.2106 +          guarantee(in_sig_bt[in+1] == T_BOOLEAN || in_sig_bt[in+1] == T_CHAR  ||
  1.2107 +                    in_sig_bt[in+1] == T_BYTE    || in_sig_bt[in+1] == T_SHORT ||
  1.2108 +                    in_sig_bt[in+1] == T_INT,
  1.2109 +                 "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
  1.2110 +          int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
  1.2111 +        }
  1.2112 +        break;
  1.2113 +      case T_ARRAY:
  1.2114 +        if (is_critical_native) {
  1.2115 +          int body_arg = out;
  1.2116 +          out -= 2; // Point to length arg. PPC64: pass ints as longs.
  1.2117 +          unpack_array_argument(masm, in_regs[in], in_elem_bt[in], out_regs[body_arg], out_regs[out],
  1.2118 +                                r_callers_sp, r_temp_1, r_temp_2);
  1.2119 +          break;
  1.2120 +        }
  1.2121 +      case T_OBJECT:
  1.2122 +        assert(!is_critical_native, "no oop arguments");
  1.2123 +        object_move(masm, stack_slots,
  1.2124 +                    oop_map, oop_handle_slot_offset,
  1.2125 +                    ((in == 0) && (!method_is_static)), &receiver_offset,
  1.2126 +                    in_regs[in], out_regs[out],
  1.2127 +                    r_callers_sp, r_temp_1, r_temp_2);
  1.2128 +        break;
  1.2129 +      case T_VOID:
  1.2130 +        break;
  1.2131 +      case T_FLOAT:
  1.2132 +        float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
  1.2133 +        if (out_regs2[out].first()->is_valid()) {
  1.2134 +          float_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
  1.2135 +        }
  1.2136 +        break;
  1.2137 +      case T_DOUBLE:
  1.2138 +        double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
  1.2139 +        if (out_regs2[out].first()->is_valid()) {
  1.2140 +          double_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
  1.2141 +        }
  1.2142 +        break;
  1.2143 +      case T_ADDRESS:
  1.2144 +        fatal("found type (T_ADDRESS) in java args");
  1.2145 +        break;
  1.2146 +      default:
  1.2147 +        ShouldNotReachHere();
  1.2148 +        break;
  1.2149 +    }
  1.2150 +  }
  1.2151 +
  1.2152 +  // Pre-load a static method's oop into ARG2.
  1.2153 +  // Used both by locking code and the normal JNI call code.
  1.2154 +  if (method_is_static && !is_critical_native) {
  1.2155 +    __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
  1.2156 +                        r_carg2_classorobject);
  1.2157 +
  1.2158 +    // Now handlize the static class mirror in carg2. It's known not-null.
  1.2159 +    __ std(r_carg2_classorobject, klass_offset, R1_SP);
  1.2160 +    oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
  1.2161 +    __ addi(r_carg2_classorobject, R1_SP, klass_offset);
  1.2162 +  }
  1.2163 +
  1.2164 +  // Get JNIEnv* which is first argument to native.
  1.2165 +  if (!is_critical_native) {
  1.2166 +    __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
  1.2167 +  }
  1.2168 +
  1.2169 +  // NOTE:
  1.2170 +  //
  1.2171 +  // We have all of the arguments setup at this point.
  1.2172 +  // We MUST NOT touch any outgoing regs from this point on.
  1.2173 +  // So if we must call out we must push a new frame.
  1.2174 +
  1.2175 +  // Get current pc for oopmap, and load it patchable relative to global toc.
  1.2176 +  oopmap_pc = (intptr_t) __ pc();
  1.2177 +  __ calculate_address_from_global_toc(r_return_pc, (address)oopmap_pc, true, true, true, true);
  1.2178 +
  1.2179 +  // We use the same pc/oopMap repeatedly when we call out.
  1.2180 +  oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
  1.2181 +
  1.2182 +  // r_return_pc now has the pc loaded that we will use when we finally call
  1.2183 +  // to native.
  1.2184 +
  1.2185 +  // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
  1.2186 +  assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
  1.2187 +
  1.2188 +
  1.2189 +# if 0
  1.2190 +  // DTrace method entry
  1.2191 +# endif
  1.2192 +
  1.2193 +  // Lock a synchronized method.
  1.2194 +  // --------------------------------------------------------------------------
  1.2195 +
  1.2196 +  if (method->is_synchronized()) {
  1.2197 +    assert(!is_critical_native, "unhandled");
  1.2198 +    ConditionRegister r_flag = CCR1;
  1.2199 +    Register          r_oop  = r_temp_4;
  1.2200 +    const Register    r_box  = r_temp_5;
  1.2201 +    Label             done, locked;
  1.2202 +
  1.2203 +    // Load the oop for the object or class. r_carg2_classorobject contains
  1.2204 +    // either the handlized oop from the incoming arguments or the handlized
  1.2205 +    // class mirror (if the method is static).
  1.2206 +    __ ld(r_oop, 0, r_carg2_classorobject);
  1.2207 +
  1.2208 +    // Get the lock box slot's address.
  1.2209 +    __ addi(r_box, R1_SP, lock_offset);
  1.2210 +
  1.2211 +#   ifdef ASSERT
  1.2212 +    if (UseBiasedLocking) {
  1.2213 +      // Making the box point to itself will make it clear it went unused
  1.2214 +      // but also be obviously invalid.
  1.2215 +      __ std(r_box, 0, r_box);
  1.2216 +    }
  1.2217 +#   endif // ASSERT
  1.2218 +
  1.2219 +    // Try fastpath for locking.
  1.2220 +    // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
  1.2221 +    __ compiler_fast_lock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
  1.2222 +    __ beq(r_flag, locked);
  1.2223 +
  1.2224 +    // None of the above fast optimizations worked so we have to get into the
  1.2225 +    // slow case of monitor enter. Inline a special case of call_VM that
  1.2226 +    // disallows any pending_exception.
  1.2227 +
  1.2228 +    // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
  1.2229 +    int frame_size = frame::abi_reg_args_size +
  1.2230 +                     round_to(total_c_args * wordSize, frame::alignment_in_bytes);
  1.2231 +    __ mr(R11_scratch1, R1_SP);
  1.2232 +    RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs, out_regs2);
  1.2233 +
  1.2234 +    // Do the call.
  1.2235 +    __ set_last_Java_frame(R11_scratch1, r_return_pc);
  1.2236 +    assert(r_return_pc->is_nonvolatile(), "expecting return pc to be in non-volatile register");
  1.2237 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
  1.2238 +    __ reset_last_Java_frame();
  1.2239 +
  1.2240 +    RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs, out_regs2);
  1.2241 +
  1.2242 +    __ asm_assert_mem8_is_zero(thread_(pending_exception),
  1.2243 +       "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C", 0);
  1.2244 +
  1.2245 +    __ bind(locked);
  1.2246 +  }
  1.2247 +
  1.2248 +
  1.2249 +  // Publish thread state
  1.2250 +  // --------------------------------------------------------------------------
  1.2251 +
  1.2252 +  // Use that pc we placed in r_return_pc a while back as the current frame anchor.
  1.2253 +  __ set_last_Java_frame(R1_SP, r_return_pc);
  1.2254 +
  1.2255 +  // Transition from _thread_in_Java to _thread_in_native.
  1.2256 +  __ li(R0, _thread_in_native);
  1.2257 +  __ release();
  1.2258 +  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
  1.2259 +  __ stw(R0, thread_(thread_state));
  1.2260 +  if (UseMembar) {
  1.2261 +    __ fence();
  1.2262 +  }
  1.2263 +
  1.2264 +
  1.2265 +  // The JNI call
  1.2266 +  // --------------------------------------------------------------------------
  1.2267 +#if defined(ABI_ELFv2)
  1.2268 +  __ call_c(native_func, relocInfo::runtime_call_type);
  1.2269 +#else
  1.2270 +  FunctionDescriptor* fd_native_method = (FunctionDescriptor*) native_func;
  1.2271 +  __ call_c(fd_native_method, relocInfo::runtime_call_type);
  1.2272 +#endif
  1.2273 +
  1.2274 +
  1.2275 +  // Now, we are back from the native code.
  1.2276 +
  1.2277 +
  1.2278 +  // Unpack the native result.
  1.2279 +  // --------------------------------------------------------------------------
  1.2280 +
  1.2281 +  // For int-types, we do any needed sign-extension required.
  1.2282 +  // Care must be taken that the return values (R3_RET and F1_RET)
  1.2283 +  // will survive any VM calls for blocking or unlocking.
  1.2284 +  // An OOP result (handle) is done specially in the slow-path code.
  1.2285 +
  1.2286 +  switch (ret_type) {
  1.2287 +    case T_VOID:    break;        // Nothing to do!
  1.2288 +    case T_FLOAT:   break;        // Got it where we want it (unless slow-path).
  1.2289 +    case T_DOUBLE:  break;        // Got it where we want it (unless slow-path).
  1.2290 +    case T_LONG:    break;        // Got it where we want it (unless slow-path).
  1.2291 +    case T_OBJECT:  break;        // Really a handle.
  1.2292 +                                  // Cannot de-handlize until after reclaiming jvm_lock.
  1.2293 +    case T_ARRAY:   break;
  1.2294 +
  1.2295 +    case T_BOOLEAN: {             // 0 -> false(0); !0 -> true(1)
  1.2296 +      Label skip_modify;
  1.2297 +      __ cmpwi(CCR0, R3_RET, 0);
  1.2298 +      __ beq(CCR0, skip_modify);
  1.2299 +      __ li(R3_RET, 1);
  1.2300 +      __ bind(skip_modify);
  1.2301 +      break;
  1.2302 +      }
  1.2303 +    case T_BYTE: {                // sign extension
  1.2304 +      __ extsb(R3_RET, R3_RET);
  1.2305 +      break;
  1.2306 +      }
  1.2307 +    case T_CHAR: {                // unsigned result
  1.2308 +      __ andi(R3_RET, R3_RET, 0xffff);
  1.2309 +      break;
  1.2310 +      }
  1.2311 +    case T_SHORT: {               // sign extension
  1.2312 +      __ extsh(R3_RET, R3_RET);
  1.2313 +      break;
  1.2314 +      }
  1.2315 +    case T_INT:                   // nothing to do
  1.2316 +      break;
  1.2317 +    default:
  1.2318 +      ShouldNotReachHere();
  1.2319 +      break;
  1.2320 +  }
  1.2321 +
  1.2322 +
  1.2323 +  // Publish thread state
  1.2324 +  // --------------------------------------------------------------------------
  1.2325 +
  1.2326 +  // Switch thread to "native transition" state before reading the
  1.2327 +  // synchronization state. This additional state is necessary because reading
  1.2328 +  // and testing the synchronization state is not atomic w.r.t. GC, as this
  1.2329 +  // scenario demonstrates:
  1.2330 +  //   - Java thread A, in _thread_in_native state, loads _not_synchronized
  1.2331 +  //     and is preempted.
  1.2332 +  //   - VM thread changes sync state to synchronizing and suspends threads
  1.2333 +  //     for GC.
  1.2334 +  //   - Thread A is resumed to finish this native method, but doesn't block
  1.2335 +  //     here since it didn't see any synchronization in progress, and escapes.
  1.2336 +
  1.2337 +  // Transition from _thread_in_native to _thread_in_native_trans.
  1.2338 +  __ li(R0, _thread_in_native_trans);
  1.2339 +  __ release();
  1.2340 +  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
  1.2341 +  __ stw(R0, thread_(thread_state));
  1.2342 +
  1.2343 +
  1.2344 +  // Must we block?
  1.2345 +  // --------------------------------------------------------------------------
  1.2346 +
  1.2347 +  // Block, if necessary, before resuming in _thread_in_Java state.
  1.2348 +  // In order for GC to work, don't clear the last_Java_sp until after blocking.
  1.2349 +  Label after_transition;
  1.2350 +  {
  1.2351 +    Label no_block, sync;
  1.2352 +
  1.2353 +    if (os::is_MP()) {
  1.2354 +      if (UseMembar) {
  1.2355 +        // Force this write out before the read below.
  1.2356 +        __ fence();
  1.2357 +      } else {
  1.2358 +        // Write serialization page so VM thread can do a pseudo remote membar.
  1.2359 +        // We use the current thread pointer to calculate a thread specific
  1.2360 +        // offset to write to within the page. This minimizes bus traffic
  1.2361 +        // due to cache line collision.
  1.2362 +        __ serialize_memory(R16_thread, r_temp_4, r_temp_5);
  1.2363 +      }
  1.2364 +    }
  1.2365 +
  1.2366 +    Register sync_state_addr = r_temp_4;
  1.2367 +    Register sync_state      = r_temp_5;
  1.2368 +    Register suspend_flags   = r_temp_6;
  1.2369 +
  1.2370 +    __ load_const(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/ sync_state);
  1.2371 +
  1.2372 +    // TODO: PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
  1.2373 +    __ lwz(sync_state, 0, sync_state_addr);
  1.2374 +
  1.2375 +    // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
  1.2376 +    __ lwz(suspend_flags, thread_(suspend_flags));
  1.2377 +
  1.2378 +    __ acquire();
  1.2379 +
  1.2380 +    Label do_safepoint;
  1.2381 +    // No synchronization in progress nor yet synchronized.
  1.2382 +    __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
  1.2383 +    // Not suspended.
  1.2384 +    __ cmpwi(CCR1, suspend_flags, 0);
  1.2385 +
  1.2386 +    __ bne(CCR0, sync);
  1.2387 +    __ beq(CCR1, no_block);
  1.2388 +
  1.2389 +    // Block. Save any potential method result value before the operation and
  1.2390 +    // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
  1.2391 +    // lets us share the oopMap we used when we went native rather than create
  1.2392 +    // a distinct one for this pc.
  1.2393 +    __ bind(sync);
  1.2394 +
  1.2395 +    address entry_point = is_critical_native
  1.2396 +      ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
  1.2397 +      : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
  1.2398 +    save_native_result(masm, ret_type, workspace_slot_offset);
  1.2399 +    __ call_VM_leaf(entry_point, R16_thread);
  1.2400 +    restore_native_result(masm, ret_type, workspace_slot_offset);
  1.2401 +
  1.2402 +    if (is_critical_native) {
  1.2403 +      __ b(after_transition); // No thread state transition here.
  1.2404 +    }
  1.2405 +    __ bind(no_block);
  1.2406 +  }
  1.2407 +
  1.2408 +  // Publish thread state.
  1.2409 +  // --------------------------------------------------------------------------
  1.2410 +
  1.2411 +  // Thread state is thread_in_native_trans. Any safepoint blocking has
  1.2412 +  // already happened so we can now change state to _thread_in_Java.
  1.2413 +
  1.2414 +  // Transition from _thread_in_native_trans to _thread_in_Java.
  1.2415 +  __ li(R0, _thread_in_Java);
  1.2416 +  __ release();
  1.2417 +  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
  1.2418 +  __ stw(R0, thread_(thread_state));
  1.2419 +  if (UseMembar) {
  1.2420 +    __ fence();
  1.2421 +  }
  1.2422 +  __ bind(after_transition);
  1.2423 +
  1.2424 +  // Reguard any pages if necessary.
  1.2425 +  // --------------------------------------------------------------------------
  1.2426 +
  1.2427 +  Label no_reguard;
  1.2428 +  __ lwz(r_temp_1, thread_(stack_guard_state));
  1.2429 +  __ cmpwi(CCR0, r_temp_1, JavaThread::stack_guard_yellow_disabled);
  1.2430 +  __ bne(CCR0, no_reguard);
  1.2431 +
  1.2432 +  save_native_result(masm, ret_type, workspace_slot_offset);
  1.2433 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
  1.2434 +  restore_native_result(masm, ret_type, workspace_slot_offset);
  1.2435 +
  1.2436 +  __ bind(no_reguard);
  1.2437 +
  1.2438 +
  1.2439 +  // Unlock
  1.2440 +  // --------------------------------------------------------------------------
  1.2441 +
  1.2442 +  if (method->is_synchronized()) {
  1.2443 +
  1.2444 +    ConditionRegister r_flag   = CCR1;
  1.2445 +    const Register r_oop       = r_temp_4;
  1.2446 +    const Register r_box       = r_temp_5;
  1.2447 +    const Register r_exception = r_temp_6;
  1.2448 +    Label done;
  1.2449 +
  1.2450 +    // Get oop and address of lock object box.
  1.2451 +    if (method_is_static) {
  1.2452 +      assert(klass_offset != -1, "");
  1.2453 +      __ ld(r_oop, klass_offset, R1_SP);
  1.2454 +    } else {
  1.2455 +      assert(receiver_offset != -1, "");
  1.2456 +      __ ld(r_oop, receiver_offset, R1_SP);
  1.2457 +    }
  1.2458 +    __ addi(r_box, R1_SP, lock_offset);
  1.2459 +
  1.2460 +    // Try fastpath for unlocking.
  1.2461 +    __ compiler_fast_unlock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
  1.2462 +    __ beq(r_flag, done);
  1.2463 +
  1.2464 +    // Save and restore any potential method result value around the unlocking operation.
  1.2465 +    save_native_result(masm, ret_type, workspace_slot_offset);
  1.2466 +
  1.2467 +    // Must save pending exception around the slow-path VM call. Since it's a
  1.2468 +    // leaf call, the pending exception (if any) can be kept in a register.
  1.2469 +    __ ld(r_exception, thread_(pending_exception));
  1.2470 +    assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
  1.2471 +    __ li(R0, 0);
  1.2472 +    __ std(R0, thread_(pending_exception));
  1.2473 +
  1.2474 +    // Slow case of monitor enter.
  1.2475 +    // Inline a special case of call_VM that disallows any pending_exception.
  1.2476 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box);
  1.2477 +
  1.2478 +    __ asm_assert_mem8_is_zero(thread_(pending_exception),
  1.2479 +       "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0);
  1.2480 +
  1.2481 +    restore_native_result(masm, ret_type, workspace_slot_offset);
  1.2482 +
  1.2483 +    // Check_forward_pending_exception jump to forward_exception if any pending
  1.2484 +    // exception is set. The forward_exception routine expects to see the
  1.2485 +    // exception in pending_exception and not in a register. Kind of clumsy,
  1.2486 +    // since all folks who branch to forward_exception must have tested
  1.2487 +    // pending_exception first and hence have it in a register already.
  1.2488 +    __ std(r_exception, thread_(pending_exception));
  1.2489 +
  1.2490 +    __ bind(done);
  1.2491 +  }
  1.2492 +
  1.2493 +# if 0
  1.2494 +  // DTrace method exit
  1.2495 +# endif
  1.2496 +
  1.2497 +  // Clear "last Java frame" SP and PC.
  1.2498 +  // --------------------------------------------------------------------------
  1.2499 +
  1.2500 +  __ reset_last_Java_frame();
  1.2501 +
  1.2502 +  // Unpack oop result.
  1.2503 +  // --------------------------------------------------------------------------
  1.2504 +
  1.2505 +  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
  1.2506 +    Label skip_unboxing;
  1.2507 +    __ cmpdi(CCR0, R3_RET, 0);
  1.2508 +    __ beq(CCR0, skip_unboxing);
  1.2509 +    __ ld(R3_RET, 0, R3_RET);
  1.2510 +    __ bind(skip_unboxing);
  1.2511 +    __ verify_oop(R3_RET);
  1.2512 +  }
  1.2513 +
  1.2514 +
  1.2515 +  // Reset handle block.
  1.2516 +  // --------------------------------------------------------------------------
  1.2517 +  if (!is_critical_native) {
  1.2518 +  __ ld(r_temp_1, thread_(active_handles));
  1.2519 +  // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
  1.2520 +  __ li(r_temp_2, 0);
  1.2521 +  __ stw(r_temp_2, JNIHandleBlock::top_offset_in_bytes(), r_temp_1);
  1.2522 +
  1.2523 +
  1.2524 +  // Check for pending exceptions.
  1.2525 +  // --------------------------------------------------------------------------
  1.2526 +  __ ld(r_temp_2, thread_(pending_exception));
  1.2527 +  __ cmpdi(CCR0, r_temp_2, 0);
  1.2528 +  __ bne(CCR0, handle_pending_exception);
  1.2529 +  }
  1.2530 +
  1.2531 +  // Return
  1.2532 +  // --------------------------------------------------------------------------
  1.2533 +
  1.2534 +  __ pop_frame();
  1.2535 +  __ restore_LR_CR(R11);
  1.2536 +  __ blr();
  1.2537 +
  1.2538 +
  1.2539 +  // Handler for pending exceptions (out-of-line).
  1.2540 +  // --------------------------------------------------------------------------
  1.2541 +
  1.2542 +  // Since this is a native call, we know the proper exception handler
  1.2543 +  // is the empty function. We just pop this frame and then jump to
  1.2544 +  // forward_exception_entry.
  1.2545 +  if (!is_critical_native) {
  1.2546 +  __ align(InteriorEntryAlignment);
  1.2547 +  __ bind(handle_pending_exception);
  1.2548 +
  1.2549 +  __ pop_frame();
  1.2550 +  __ restore_LR_CR(R11);
  1.2551 +  __ b64_patchable((address)StubRoutines::forward_exception_entry(),
  1.2552 +                       relocInfo::runtime_call_type);
  1.2553 +  }
  1.2554 +
  1.2555 +  // Handler for a cache miss (out-of-line).
  1.2556 +  // --------------------------------------------------------------------------
  1.2557 +
  1.2558 +  if (!method_is_static) {
  1.2559 +  __ align(InteriorEntryAlignment);
  1.2560 +  __ bind(ic_miss);
  1.2561 +
  1.2562 +  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
  1.2563 +                       relocInfo::runtime_call_type);
  1.2564 +  }
  1.2565 +
  1.2566 +  // Done.
  1.2567 +  // --------------------------------------------------------------------------
  1.2568 +
  1.2569 +  __ flush();
  1.2570 +
  1.2571 +  nmethod *nm = nmethod::new_native_nmethod(method,
  1.2572 +                                            compile_id,
  1.2573 +                                            masm->code(),
  1.2574 +                                            vep_start_pc-start_pc,
  1.2575 +                                            frame_done_pc-start_pc,
  1.2576 +                                            stack_slots / VMRegImpl::slots_per_word,
  1.2577 +                                            (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
  1.2578 +                                            in_ByteSize(lock_offset),
  1.2579 +                                            oop_maps);
  1.2580 +
  1.2581 +  if (is_critical_native) {
  1.2582 +    nm->set_lazy_critical_native(true);
  1.2583 +  }
  1.2584 +
  1.2585 +  return nm;
  1.2586 +#else
  1.2587 +  ShouldNotReachHere();
  1.2588 +  return NULL;
  1.2589 +#endif // COMPILER2
  1.2590 +}
  1.2591 +
  1.2592 +// This function returns the adjust size (in number of words) to a c2i adapter
  1.2593 +// activation for use during deoptimization.
  1.2594 +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
  1.2595 +  return round_to((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::alignment_in_bytes);
  1.2596 +}
  1.2597 +
  1.2598 +uint SharedRuntime::out_preserve_stack_slots() {
  1.2599 +#ifdef COMPILER2
  1.2600 +  return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
  1.2601 +#else
  1.2602 +  return 0;
  1.2603 +#endif
  1.2604 +}
  1.2605 +
  1.2606 +#ifdef COMPILER2
  1.2607 +// Frame generation for deopt and uncommon trap blobs.
  1.2608 +static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
  1.2609 +                                /* Read */
  1.2610 +                                Register unroll_block_reg,
  1.2611 +                                /* Update */
  1.2612 +                                Register frame_sizes_reg,
  1.2613 +                                Register number_of_frames_reg,
  1.2614 +                                Register pcs_reg,
  1.2615 +                                /* Invalidate */
  1.2616 +                                Register frame_size_reg,
  1.2617 +                                Register pc_reg) {
  1.2618 +
  1.2619 +  __ ld(pc_reg, 0, pcs_reg);
  1.2620 +  __ ld(frame_size_reg, 0, frame_sizes_reg);
  1.2621 +  __ std(pc_reg, _abi(lr), R1_SP);
  1.2622 +  __ push_frame(frame_size_reg, R0/*tmp*/);
  1.2623 +#ifdef CC_INTERP
  1.2624 +  __ std(R1_SP, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
  1.2625 +#else
  1.2626 +#ifdef ASSERT
  1.2627 +  __ load_const_optimized(pc_reg, 0x5afe);
  1.2628 +  __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
  1.2629 +#endif
  1.2630 +  __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
  1.2631 +#endif // CC_INTERP
  1.2632 +  __ addi(number_of_frames_reg, number_of_frames_reg, -1);
  1.2633 +  __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
  1.2634 +  __ addi(pcs_reg, pcs_reg, wordSize);
  1.2635 +}
  1.2636 +
  1.2637 +// Loop through the UnrollBlock info and create new frames.
  1.2638 +static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
  1.2639 +                                 /* read */
  1.2640 +                                 Register unroll_block_reg,
  1.2641 +                                 /* invalidate */
  1.2642 +                                 Register frame_sizes_reg,
  1.2643 +                                 Register number_of_frames_reg,
  1.2644 +                                 Register pcs_reg,
  1.2645 +                                 Register frame_size_reg,
  1.2646 +                                 Register pc_reg) {
  1.2647 +  Label loop;
  1.2648 +
  1.2649 + // _number_of_frames is of type int (deoptimization.hpp)
  1.2650 +  __ lwa(number_of_frames_reg,
  1.2651 +             Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(),
  1.2652 +             unroll_block_reg);
  1.2653 +  __ ld(pcs_reg,
  1.2654 +            Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(),
  1.2655 +            unroll_block_reg);
  1.2656 +  __ ld(frame_sizes_reg,
  1.2657 +            Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(),
  1.2658 +            unroll_block_reg);
  1.2659 +
  1.2660 +  // stack: (caller_of_deoptee, ...).
  1.2661 +
  1.2662 +  // At this point we either have an interpreter frame or a compiled
  1.2663 +  // frame on top of stack. If it is a compiled frame we push a new c2i
  1.2664 +  // adapter here
  1.2665 +
  1.2666 +  // Memorize top-frame stack-pointer.
  1.2667 +  __ mr(frame_size_reg/*old_sp*/, R1_SP);
  1.2668 +
  1.2669 +  // Resize interpreter top frame OR C2I adapter.
  1.2670 +
  1.2671 +  // At this moment, the top frame (which is the caller of the deoptee) is
  1.2672 +  // an interpreter frame or a newly pushed C2I adapter or an entry frame.
  1.2673 +  // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
  1.2674 +  // outgoing arguments.
  1.2675 +  //
  1.2676 +  // In order to push the interpreter frame for the deoptee, we need to
  1.2677 +  // resize the top frame such that we are able to place the deoptee's
  1.2678 +  // locals in the frame.
  1.2679 +  // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
  1.2680 +  // into a valid PARENT_IJAVA_FRAME_ABI.
  1.2681 +
  1.2682 +  __ lwa(R11_scratch1,
  1.2683 +             Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(),
  1.2684 +             unroll_block_reg);
  1.2685 +  __ neg(R11_scratch1, R11_scratch1);
  1.2686 +
  1.2687 +  // R11_scratch1 contains size of locals for frame resizing.
  1.2688 +  // R12_scratch2 contains top frame's lr.
  1.2689 +
  1.2690 +  // Resize frame by complete frame size prevents TOC from being
  1.2691 +  // overwritten by locals. A more stack space saving way would be
  1.2692 +  // to copy the TOC to its location in the new abi.
  1.2693 +  __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
  1.2694 +
  1.2695 +  // now, resize the frame
  1.2696 +  __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
  1.2697 +
  1.2698 +  // In the case where we have resized a c2i frame above, the optional
  1.2699 +  // alignment below the locals has size 32 (why?).
  1.2700 +  __ std(R12_scratch2, _abi(lr), R1_SP);
  1.2701 +
  1.2702 +  // Initialize initial_caller_sp.
  1.2703 +#ifdef CC_INTERP
  1.2704 +  __ std(frame_size_reg/*old_sp*/, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
  1.2705 +#else
  1.2706 +#ifdef ASSERT
  1.2707 + __ load_const_optimized(pc_reg, 0x5afe);
  1.2708 + __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
  1.2709 +#endif
  1.2710 + __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
  1.2711 +#endif // CC_INTERP
  1.2712 +
  1.2713 +#ifdef ASSERT
  1.2714 +  // Make sure that there is at least one entry in the array.
  1.2715 +  __ cmpdi(CCR0, number_of_frames_reg, 0);
  1.2716 +  __ asm_assert_ne("array_size must be > 0", 0x205);
  1.2717 +#endif
  1.2718 +
  1.2719 +  // Now push the new interpreter frames.
  1.2720 +  //
  1.2721 +  __ bind(loop);
  1.2722 +  // Allocate a new frame, fill in the pc.
  1.2723 +  push_skeleton_frame(masm, deopt,
  1.2724 +                      unroll_block_reg,
  1.2725 +                      frame_sizes_reg,
  1.2726 +                      number_of_frames_reg,
  1.2727 +                      pcs_reg,
  1.2728 +                      frame_size_reg,
  1.2729 +                      pc_reg);
  1.2730 +  __ cmpdi(CCR0, number_of_frames_reg, 0);
  1.2731 +  __ bne(CCR0, loop);
  1.2732 +
  1.2733 +  // Get the return address pointing into the frame manager.
  1.2734 +  __ ld(R0, 0, pcs_reg);
  1.2735 +  // Store it in the top interpreter frame.
  1.2736 +  __ std(R0, _abi(lr), R1_SP);
  1.2737 +  // Initialize frame_manager_lr of interpreter top frame.
  1.2738 +#ifdef CC_INTERP
  1.2739 +  __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
  1.2740 +#endif
  1.2741 +}
  1.2742 +#endif
  1.2743 +
  1.2744 +void SharedRuntime::generate_deopt_blob() {
  1.2745 +  // Allocate space for the code
  1.2746 +  ResourceMark rm;
  1.2747 +  // Setup code generation tools
  1.2748 +  CodeBuffer buffer("deopt_blob", 2048, 1024);
  1.2749 +  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
  1.2750 +  Label exec_mode_initialized;
  1.2751 +  int frame_size_in_words;
  1.2752 +  OopMap* map = NULL;
  1.2753 +  OopMapSet *oop_maps = new OopMapSet();
  1.2754 +
  1.2755 +  // size of ABI112 plus spill slots for R3_RET and F1_RET.
  1.2756 +  const int frame_size_in_bytes = frame::abi_reg_args_spill_size;
  1.2757 +  const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
  1.2758 +  int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
  1.2759 +
  1.2760 +  const Register exec_mode_reg = R21_tmp1;
  1.2761 +
  1.2762 +  const address start = __ pc();
  1.2763 +
  1.2764 +#ifdef COMPILER2
  1.2765 +  // --------------------------------------------------------------------------
  1.2766 +  // Prolog for non exception case!
  1.2767 +
  1.2768 +  // We have been called from the deopt handler of the deoptee.
  1.2769 +  //
  1.2770 +  // deoptee:
  1.2771 +  //                      ...
  1.2772 +  //                      call X
  1.2773 +  //                      ...
  1.2774 +  //  deopt_handler:      call_deopt_stub
  1.2775 +  //  cur. return pc  --> ...
  1.2776 +  //
  1.2777 +  // So currently SR_LR points behind the call in the deopt handler.
  1.2778 +  // We adjust it such that it points to the start of the deopt handler.
  1.2779 +  // The return_pc has been stored in the frame of the deoptee and
  1.2780 +  // will replace the address of the deopt_handler in the call
  1.2781 +  // to Deoptimization::fetch_unroll_info below.
  1.2782 +  // We can't grab a free register here, because all registers may
  1.2783 +  // contain live values, so let the RegisterSaver do the adjustment
  1.2784 +  // of the return pc.
  1.2785 +  const int return_pc_adjustment_no_exception = -HandlerImpl::size_deopt_handler();
  1.2786 +
  1.2787 +  // Push the "unpack frame"
  1.2788 +  // Save everything in sight.
  1.2789 +  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
  1.2790 +                                                                   &first_frame_size_in_bytes,
  1.2791 +                                                                   /*generate_oop_map=*/ true,
  1.2792 +                                                                   return_pc_adjustment_no_exception,
  1.2793 +                                                                   RegisterSaver::return_pc_is_lr);
  1.2794 +  assert(map != NULL, "OopMap must have been created");
  1.2795 +
  1.2796 +  __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
  1.2797 +  // Save exec mode for unpack_frames.
  1.2798 +  __ b(exec_mode_initialized);
  1.2799 +
  1.2800 +  // --------------------------------------------------------------------------
  1.2801 +  // Prolog for exception case
  1.2802 +
  1.2803 +  // An exception is pending.
  1.2804 +  // We have been called with a return (interpreter) or a jump (exception blob).
  1.2805 +  //
  1.2806 +  // - R3_ARG1: exception oop
  1.2807 +  // - R4_ARG2: exception pc
  1.2808 +
  1.2809 +  int exception_offset = __ pc() - start;
  1.2810 +
  1.2811 +  BLOCK_COMMENT("Prolog for exception case");
  1.2812 +
  1.2813 +  // The RegisterSaves doesn't need to adjust the return pc for this situation.
  1.2814 +  const int return_pc_adjustment_exception = 0;
  1.2815 +
  1.2816 +  // Push the "unpack frame".
  1.2817 +  // Save everything in sight.
  1.2818 +  assert(R4 == R4_ARG2, "exception pc must be in r4");
  1.2819 +  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
  1.2820 +                                                             &first_frame_size_in_bytes,
  1.2821 +                                                             /*generate_oop_map=*/ false,
  1.2822 +                                                             return_pc_adjustment_exception,
  1.2823 +                                                             RegisterSaver::return_pc_is_r4);
  1.2824 +
  1.2825 +  // Deopt during an exception. Save exec mode for unpack_frames.
  1.2826 +  __ li(exec_mode_reg, Deoptimization::Unpack_exception);
  1.2827 +
  1.2828 +  // Store exception oop and pc in thread (location known to GC).
  1.2829 +  // This is needed since the call to "fetch_unroll_info()" may safepoint.
  1.2830 +  __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
  1.2831 +  __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
  1.2832 +
  1.2833 +  // fall through
  1.2834 +
  1.2835 +  // --------------------------------------------------------------------------
  1.2836 +  __ BIND(exec_mode_initialized);
  1.2837 +
  1.2838 +  {
  1.2839 +  const Register unroll_block_reg = R22_tmp2;
  1.2840 +
  1.2841 +  // We need to set `last_Java_frame' because `fetch_unroll_info' will
  1.2842 +  // call `last_Java_frame()'. The value of the pc in the frame is not
  1.2843 +  // particularly important. It just needs to identify this blob.
  1.2844 +  __ set_last_Java_frame(R1_SP, noreg);
  1.2845 +
  1.2846 +  // With EscapeAnalysis turned on, this call may safepoint!
  1.2847 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread);
  1.2848 +  address calls_return_pc = __ last_calls_return_pc();
  1.2849 +  // Set an oopmap for the call site that describes all our saved registers.
  1.2850 +  oop_maps->add_gc_map(calls_return_pc - start, map);
  1.2851 +
  1.2852 +  __ reset_last_Java_frame();
  1.2853 +  // Save the return value.
  1.2854 +  __ mr(unroll_block_reg, R3_RET);
  1.2855 +
  1.2856 +  // Restore only the result registers that have been saved
  1.2857 +  // by save_volatile_registers(...).
  1.2858 +  RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
  1.2859 +
  1.2860 +  // In excp_deopt_mode, restore and clear exception oop which we
  1.2861 +  // stored in the thread during exception entry above. The exception
  1.2862 +  // oop will be the return value of this stub.
  1.2863 +  Label skip_restore_excp;
  1.2864 +  __ cmpdi(CCR0, exec_mode_reg, Deoptimization::Unpack_exception);
  1.2865 +  __ bne(CCR0, skip_restore_excp);
  1.2866 +  __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
  1.2867 +  __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
  1.2868 +  __ li(R0, 0);
  1.2869 +  __ std(R0, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
  1.2870 +  __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
  1.2871 +  __ BIND(skip_restore_excp);
  1.2872 +
  1.2873 +  // reload narrro_oop_base
  1.2874 +  if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
  1.2875 +    __ load_const_optimized(R30, Universe::narrow_oop_base());
  1.2876 +  }
  1.2877 +
  1.2878 +  __ pop_frame();
  1.2879 +
  1.2880 +  // stack: (deoptee, optional i2c, caller of deoptee, ...).
  1.2881 +
  1.2882 +  // pop the deoptee's frame
  1.2883 +  __ pop_frame();
  1.2884 +
  1.2885 +  // stack: (caller_of_deoptee, ...).
  1.2886 +
  1.2887 +  // Loop through the `UnrollBlock' info and create interpreter frames.
  1.2888 +  push_skeleton_frames(masm, true/*deopt*/,
  1.2889 +                       unroll_block_reg,
  1.2890 +                       R23_tmp3,
  1.2891 +                       R24_tmp4,
  1.2892 +                       R25_tmp5,
  1.2893 +                       R26_tmp6,
  1.2894 +                       R27_tmp7);
  1.2895 +
  1.2896 +  // stack: (skeletal interpreter frame, ..., optional skeletal
  1.2897 +  // interpreter frame, optional c2i, caller of deoptee, ...).
  1.2898 +  }
  1.2899 +
  1.2900 +  // push an `unpack_frame' taking care of float / int return values.
  1.2901 +  __ push_frame(frame_size_in_bytes, R0/*tmp*/);
  1.2902 +
  1.2903 +  // stack: (unpack frame, skeletal interpreter frame, ..., optional
  1.2904 +  // skeletal interpreter frame, optional c2i, caller of deoptee,
  1.2905 +  // ...).
  1.2906 +
  1.2907 +  // Spill live volatile registers since we'll do a call.
  1.2908 +  __ std( R3_RET, _abi_reg_args_spill(spill_ret),  R1_SP);
  1.2909 +  __ stfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
  1.2910 +
  1.2911 +  // Let the unpacker layout information in the skeletal frames just
  1.2912 +  // allocated.
  1.2913 +  __ get_PC_trash_LR(R3_RET);
  1.2914 +  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
  1.2915 +  // This is a call to a LEAF method, so no oop map is required.
  1.2916 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
  1.2917 +                  R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
  1.2918 +  __ reset_last_Java_frame();
  1.2919 +
  1.2920 +  // Restore the volatiles saved above.
  1.2921 +  __ ld( R3_RET, _abi_reg_args_spill(spill_ret),  R1_SP);
  1.2922 +  __ lfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
  1.2923 +
  1.2924 +  // Pop the unpack frame.
  1.2925 +  __ pop_frame();
  1.2926 +  __ restore_LR_CR(R0);
  1.2927 +
  1.2928 +  // stack: (top interpreter frame, ..., optional interpreter frame,
  1.2929 +  // optional c2i, caller of deoptee, ...).
  1.2930 +
  1.2931 +  // Initialize R14_state.
  1.2932 +#ifdef CC_INTERP
  1.2933 +  __ ld(R14_state, 0, R1_SP);
  1.2934 +  __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
  1.2935 +  // Also inititialize R15_prev_state.
  1.2936 +  __ restore_prev_state();
  1.2937 +#else
  1.2938 +  __ restore_interpreter_state(R11_scratch1);
  1.2939 +  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
  1.2940 +#endif // CC_INTERP
  1.2941 +
  1.2942 +
  1.2943 +  // Return to the interpreter entry point.
  1.2944 +  __ blr();
  1.2945 +  __ flush();
  1.2946 +#else // COMPILER2
  1.2947 +  __ unimplemented("deopt blob needed only with compiler");
  1.2948 +  int exception_offset = __ pc() - start;
  1.2949 +#endif // COMPILER2
  1.2950 +
  1.2951 +  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, 0, first_frame_size_in_bytes / wordSize);
  1.2952 +}
  1.2953 +
  1.2954 +#ifdef COMPILER2
  1.2955 +void SharedRuntime::generate_uncommon_trap_blob() {
  1.2956 +  // Allocate space for the code.
  1.2957 +  ResourceMark rm;
  1.2958 +  // Setup code generation tools.
  1.2959 +  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
  1.2960 +  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
  1.2961 +  address start = __ pc();
  1.2962 +
  1.2963 +  Register unroll_block_reg = R21_tmp1;
  1.2964 +  Register klass_index_reg  = R22_tmp2;
  1.2965 +  Register unc_trap_reg     = R23_tmp3;
  1.2966 +
  1.2967 +  OopMapSet* oop_maps = new OopMapSet();
  1.2968 +  int frame_size_in_bytes = frame::abi_reg_args_size;
  1.2969 +  OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
  1.2970 +
  1.2971 +  // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
  1.2972 +
  1.2973 +  // Push a dummy `unpack_frame' and call
  1.2974 +  // `Deoptimization::uncommon_trap' to pack the compiled frame into a
  1.2975 +  // vframe array and return the `UnrollBlock' information.
  1.2976 +
  1.2977 +  // Save LR to compiled frame.
  1.2978 +  __ save_LR_CR(R11_scratch1);
  1.2979 +
  1.2980 +  // Push an "uncommon_trap" frame.
  1.2981 +  __ push_frame_reg_args(0, R11_scratch1);
  1.2982 +
  1.2983 +  // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
  1.2984 +
  1.2985 +  // Set the `unpack_frame' as last_Java_frame.
  1.2986 +  // `Deoptimization::uncommon_trap' expects it and considers its
  1.2987 +  // sender frame as the deoptee frame.
  1.2988 +  // Remember the offset of the instruction whose address will be
  1.2989 +  // moved to R11_scratch1.
  1.2990 +  address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
  1.2991 +
  1.2992 +  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
  1.2993 +
  1.2994 +  __ mr(klass_index_reg, R3);
  1.2995 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
  1.2996 +                  R16_thread, klass_index_reg);
  1.2997 +
  1.2998 +  // Set an oopmap for the call site.
  1.2999 +  oop_maps->add_gc_map(gc_map_pc - start, map);
  1.3000 +
  1.3001 +  __ reset_last_Java_frame();
  1.3002 +
  1.3003 +  // Pop the `unpack frame'.
  1.3004 +  __ pop_frame();
  1.3005 +
  1.3006 +  // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
  1.3007 +
  1.3008 +  // Save the return value.
  1.3009 +  __ mr(unroll_block_reg, R3_RET);
  1.3010 +
  1.3011 +  // Pop the uncommon_trap frame.
  1.3012 +  __ pop_frame();
  1.3013 +
  1.3014 +  // stack: (caller_of_deoptee, ...).
  1.3015 +
  1.3016 +  // Allocate new interpreter frame(s) and possibly a c2i adapter
  1.3017 +  // frame.
  1.3018 +  push_skeleton_frames(masm, false/*deopt*/,
  1.3019 +                       unroll_block_reg,
  1.3020 +                       R22_tmp2,
  1.3021 +                       R23_tmp3,
  1.3022 +                       R24_tmp4,
  1.3023 +                       R25_tmp5,
  1.3024 +                       R26_tmp6);
  1.3025 +
  1.3026 +  // stack: (skeletal interpreter frame, ..., optional skeletal
  1.3027 +  // interpreter frame, optional c2i, caller of deoptee, ...).
  1.3028 +
  1.3029 +  // Push a dummy `unpack_frame' taking care of float return values.
  1.3030 +  // Call `Deoptimization::unpack_frames' to layout information in the
  1.3031 +  // interpreter frames just created.
  1.3032 +
  1.3033 +  // Push a simple "unpack frame" here.
  1.3034 +  __ push_frame_reg_args(0, R11_scratch1);
  1.3035 +
  1.3036 +  // stack: (unpack frame, skeletal interpreter frame, ..., optional
  1.3037 +  // skeletal interpreter frame, optional c2i, caller of deoptee,
  1.3038 +  // ...).
  1.3039 +
  1.3040 +  // Set the "unpack_frame" as last_Java_frame.
  1.3041 +  __ get_PC_trash_LR(R11_scratch1);
  1.3042 +  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
  1.3043 +
  1.3044 +  // Indicate it is the uncommon trap case.
  1.3045 +  __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
  1.3046 +  // Let the unpacker layout information in the skeletal frames just
  1.3047 +  // allocated.
  1.3048 +  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
  1.3049 +                  R16_thread, unc_trap_reg);
  1.3050 +
  1.3051 +  __ reset_last_Java_frame();
  1.3052 +  // Pop the `unpack frame'.
  1.3053 +  __ pop_frame();
  1.3054 +  // Restore LR from top interpreter frame.
  1.3055 +  __ restore_LR_CR(R11_scratch1);
  1.3056 +
  1.3057 +  // stack: (top interpreter frame, ..., optional interpreter frame,
  1.3058 +  // optional c2i, caller of deoptee, ...).
  1.3059 +
  1.3060 +#ifdef CC_INTERP
  1.3061 +  // Initialize R14_state, ...
  1.3062 +  __ ld(R11_scratch1, 0, R1_SP);
  1.3063 +  __ addi(R14_state, R11_scratch1, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
  1.3064 +  // also initialize R15_prev_state.
  1.3065 +  __ restore_prev_state();
  1.3066 +#else
  1.3067 +  __ restore_interpreter_state(R11_scratch1);
  1.3068 +  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
  1.3069 +#endif // CC_INTERP
  1.3070 +
  1.3071 +  // Return to the interpreter entry point.
  1.3072 +  __ blr();
  1.3073 +
  1.3074 +  masm->flush();
  1.3075 +
  1.3076 +  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
  1.3077 +}
  1.3078 +#endif // COMPILER2
  1.3079 +
  1.3080 +// Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
  1.3081 +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
  1.3082 +  assert(StubRoutines::forward_exception_entry() != NULL,
  1.3083 +         "must be generated before");
  1.3084 +
  1.3085 +  ResourceMark rm;
  1.3086 +  OopMapSet *oop_maps = new OopMapSet();
  1.3087 +  OopMap* map;
  1.3088 +
  1.3089 +  // Allocate space for the code. Setup code generation tools.
  1.3090 +  CodeBuffer buffer("handler_blob", 2048, 1024);
  1.3091 +  MacroAssembler* masm = new MacroAssembler(&buffer);
  1.3092 +
  1.3093 +  address start = __ pc();
  1.3094 +  int frame_size_in_bytes = 0;
  1.3095 +
  1.3096 +  RegisterSaver::ReturnPCLocation return_pc_location;
  1.3097 +  bool cause_return = (poll_type == POLL_AT_RETURN);
  1.3098 +  if (cause_return) {
  1.3099 +    // Nothing to do here. The frame has already been popped in MachEpilogNode.
  1.3100 +    // Register LR already contains the return pc.
  1.3101 +    return_pc_location = RegisterSaver::return_pc_is_lr;
  1.3102 +  } else {
  1.3103 +    // Use thread()->saved_exception_pc() as return pc.
  1.3104 +    return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
  1.3105 +  }
  1.3106 +
  1.3107 +  // Save registers, fpu state, and flags.
  1.3108 +  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
  1.3109 +                                                                   &frame_size_in_bytes,
  1.3110 +                                                                   /*generate_oop_map=*/ true,
  1.3111 +                                                                   /*return_pc_adjustment=*/0,
  1.3112 +                                                                   return_pc_location);
  1.3113 +
  1.3114 +  // The following is basically a call_VM. However, we need the precise
  1.3115 +  // address of the call in order to generate an oopmap. Hence, we do all the
  1.3116 +  // work outselves.
  1.3117 +  __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
  1.3118 +
  1.3119 +  // The return address must always be correct so that the frame constructor
  1.3120 +  // never sees an invalid pc.
  1.3121 +
  1.3122 +  // Do the call
  1.3123 +  __ call_VM_leaf(call_ptr, R16_thread);
  1.3124 +  address calls_return_pc = __ last_calls_return_pc();
  1.3125 +
  1.3126 +  // Set an oopmap for the call site. This oopmap will map all
  1.3127 +  // oop-registers and debug-info registers as callee-saved. This
  1.3128 +  // will allow deoptimization at this safepoint to find all possible
  1.3129 +  // debug-info recordings, as well as let GC find all oops.
  1.3130 +  oop_maps->add_gc_map(calls_return_pc - start, map);
  1.3131 +
  1.3132 +  Label noException;
  1.3133 +
  1.3134 +  // Clear the last Java frame.
  1.3135 +  __ reset_last_Java_frame();
  1.3136 +
  1.3137 +  BLOCK_COMMENT("  Check pending exception.");
  1.3138 +  const Register pending_exception = R0;
  1.3139 +  __ ld(pending_exception, thread_(pending_exception));
  1.3140 +  __ cmpdi(CCR0, pending_exception, 0);
  1.3141 +  __ beq(CCR0, noException);
  1.3142 +
  1.3143 +  // Exception pending
  1.3144 +  RegisterSaver::restore_live_registers_and_pop_frame(masm,
  1.3145 +                                                      frame_size_in_bytes,
  1.3146 +                                                      /*restore_ctr=*/true);
  1.3147 +
  1.3148 +  BLOCK_COMMENT("  Jump to forward_exception_entry.");
  1.3149 +  // Jump to forward_exception_entry, with the issuing PC in LR
  1.3150 +  // so it looks like the original nmethod called forward_exception_entry.
  1.3151 +  __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1.3152 +
  1.3153 +  // No exception case.
  1.3154 +  __ BIND(noException);
  1.3155 +
  1.3156 +
  1.3157 +  // Normal exit, restore registers and exit.
  1.3158 +  RegisterSaver::restore_live_registers_and_pop_frame(masm,
  1.3159 +                                                      frame_size_in_bytes,
  1.3160 +                                                      /*restore_ctr=*/true);
  1.3161 +
  1.3162 +  __ blr();
  1.3163 +
  1.3164 +  // Make sure all code is generated
  1.3165 +  masm->flush();
  1.3166 +
  1.3167 +  // Fill-out other meta info
  1.3168 +  // CodeBlob frame size is in words.
  1.3169 +  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
  1.3170 +}
  1.3171 +
  1.3172 +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
  1.3173 +//
  1.3174 +// Generate a stub that calls into the vm to find out the proper destination
  1.3175 +// of a java call. All the argument registers are live at this point
  1.3176 +// but since this is generic code we don't know what they are and the caller
  1.3177 +// must do any gc of the args.
  1.3178 +//
  1.3179 +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
  1.3180 +
  1.3181 +  // allocate space for the code
  1.3182 +  ResourceMark rm;
  1.3183 +
  1.3184 +  CodeBuffer buffer(name, 1000, 512);
  1.3185 +  MacroAssembler* masm = new MacroAssembler(&buffer);
  1.3186 +
  1.3187 +  int frame_size_in_bytes;
  1.3188 +
  1.3189 +  OopMapSet *oop_maps = new OopMapSet();
  1.3190 +  OopMap* map = NULL;
  1.3191 +
  1.3192 +  address start = __ pc();
  1.3193 +
  1.3194 +  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
  1.3195 +                                                                   &frame_size_in_bytes,
  1.3196 +                                                                   /*generate_oop_map*/ true,
  1.3197 +                                                                   /*return_pc_adjustment*/ 0,
  1.3198 +                                                                   RegisterSaver::return_pc_is_lr);
  1.3199 +
  1.3200 +  // Use noreg as last_Java_pc, the return pc will be reconstructed
  1.3201 +  // from the physical frame.
  1.3202 +  __ set_last_Java_frame(/*sp*/R1_SP, noreg);
  1.3203 +
  1.3204 +  int frame_complete = __ offset();
  1.3205 +
  1.3206 +  // Pass R19_method as 2nd (optional) argument, used by
  1.3207 +  // counter_overflow_stub.
  1.3208 +  __ call_VM_leaf(destination, R16_thread, R19_method);
  1.3209 +  address calls_return_pc = __ last_calls_return_pc();
  1.3210 +  // Set an oopmap for the call site.
  1.3211 +  // We need this not only for callee-saved registers, but also for volatile
  1.3212 +  // registers that the compiler might be keeping live across a safepoint.
  1.3213 +  // Create the oopmap for the call's return pc.
  1.3214 +  oop_maps->add_gc_map(calls_return_pc - start, map);
  1.3215 +
  1.3216 +  // R3_RET contains the address we are going to jump to assuming no exception got installed.
  1.3217 +
  1.3218 +  // clear last_Java_sp
  1.3219 +  __ reset_last_Java_frame();
  1.3220 +
  1.3221 +  // Check for pending exceptions.
  1.3222 +  BLOCK_COMMENT("Check for pending exceptions.");
  1.3223 +  Label pending;
  1.3224 +  __ ld(R11_scratch1, thread_(pending_exception));
  1.3225 +  __ cmpdi(CCR0, R11_scratch1, 0);
  1.3226 +  __ bne(CCR0, pending);
  1.3227 +
  1.3228 +  __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
  1.3229 +
  1.3230 +  RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
  1.3231 +
  1.3232 +  // Get the returned method.
  1.3233 +  __ get_vm_result_2(R19_method);
  1.3234 +
  1.3235 +  __ bctr();
  1.3236 +
  1.3237 +
  1.3238 +  // Pending exception after the safepoint.
  1.3239 +  __ BIND(pending);
  1.3240 +
  1.3241 +  RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
  1.3242 +
  1.3243 +  // exception pending => remove activation and forward to exception handler
  1.3244 +
  1.3245 +  __ li(R11_scratch1, 0);
  1.3246 +  __ ld(R3_ARG1, thread_(pending_exception));
  1.3247 +  __ std(R11_scratch1, in_bytes(JavaThread::vm_result_offset()), R16_thread);
  1.3248 +  __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  1.3249 +
  1.3250 +  // -------------
  1.3251 +  // Make sure all code is generated.
  1.3252 +  masm->flush();
  1.3253 +
  1.3254 +  // return the blob
  1.3255 +  // frame_size_words or bytes??
  1.3256 +  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
  1.3257 +                                       oop_maps, true);
  1.3258 +}

mercurial