diff -r e7a91a357527 -r 018d5b58dd4f src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Wed Apr 16 17:36:29 2008 -0400 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Apr 17 22:18:15 2008 -0400 @@ -1880,6 +1880,379 @@ } +#ifdef HAVE_DTRACE_H +// --------------------------------------------------------------------------- +// Generate a dtrace nmethod for a given signature. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// abi and then leaves nops at the position you would expect to call a native +// function. When the probe is enabled the nops are replaced with a trap +// instruction that dtrace inserts and the trace will cause a notification +// to dtrace. +// +// The probes are only able to take primitive types and java/lang/String as +// arguments. No other java types are allowed. Strings are converted to utf8 +// strings so that from dtrace point of view java strings are converted to C +// strings. There is an arbitrary fixed limit on the total space that a method +// can use for converting the strings. (256 chars per string in the signature). +// So any java string larger then this is truncated. + +nmethod *SharedRuntime::generate_dtrace_nmethod( + MacroAssembler *masm, methodHandle method) { + + // generate_dtrace_nmethod is guarded by a mutex so we are sure to + // be single threaded in this method. + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); + + // Fill in the signature array, for the calling-convention call. + int total_args_passed = method->size_of_parameters(); + + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); + + // The signature we are going to use for the trap that dtrace will see + // java/lang/String is converted. We drop "this" and any other object + // is converted to NULL. (A one-slot java/lang/Long object reference + // is converted to a two-slot long, which is why we double the allocation). + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); + + int i=0; + int total_strings = 0; + int first_arg_to_pass = 0; + int total_c_args = 0; + int box_offset = java_lang_boxing_object::value_offset_in_bytes(); + + if( !method->is_static() ) { // Pass in receiver first + in_sig_bt[i++] = T_OBJECT; + first_arg_to_pass = 1; + } + + // We need to convert the java args to where a native (non-jni) function + // would expect them. To figure out where they go we convert the java + // signature to a C signature. + + SignatureStream ss(method->signature()); + for ( ; !ss.at_return_type(); ss.next()) { + BasicType bt = ss.type(); + in_sig_bt[i++] = bt; // Collect remaining bits of signature + out_sig_bt[total_c_args++] = bt; + if( bt == T_OBJECT) { + symbolOop s = ss.as_symbol_or_null(); + if (s == vmSymbols::java_lang_String()) { + total_strings++; + out_sig_bt[total_c_args-1] = T_ADDRESS; + } else if (s == vmSymbols::java_lang_Boolean() || + s == vmSymbols::java_lang_Character() || + s == vmSymbols::java_lang_Byte() || + s == vmSymbols::java_lang_Short() || + s == vmSymbols::java_lang_Integer() || + s == vmSymbols::java_lang_Float()) { + out_sig_bt[total_c_args-1] = T_INT; + } else if (s == vmSymbols::java_lang_Long() || + s == vmSymbols::java_lang_Double()) { + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } + } else if ( bt == T_LONG || bt == T_DOUBLE ) { + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots + out_sig_bt[total_c_args++] = T_VOID; + } + } + + assert(i==total_args_passed, "validly parsed signature"); + + // Now get the compiled-Java layout as input arguments + int comp_args_on_stack; + comp_args_on_stack = SharedRuntime::java_calling_convention( + in_sig_bt, in_regs, total_args_passed, false); + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots). + + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now space for the string(s) we must convert + + int* string_locs = NEW_RESOURCE_ARRAY(int, total_strings + 1); + for (i = 0; i < total_strings ; i++) { + string_locs[i] = stack_slots; + stack_slots += max_dtrace_string_size / VMRegImpl::stack_slot_size; + } + + // + 2 for return address (which we own) and saved rbp, + + stack_slots += 2; + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | string[n] | + // |---------------------| <- string_locs[n] + // | string[n-1] | + // |---------------------| <- string_locs[n-1] + // | ... | + // | ... | + // |---------------------| <- string_locs[1] + // | string[0] | + // |---------------------| <- string_locs[0] + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + intptr_t start = (intptr_t)__ pc(); + + // First thing make an ic check to see if we should even be here + + // We are free to use all registers as temps without saving them and + // restoring them except rbp. rbp, is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + const Register ic_reg = rax; + const Register receiver = rcx; + Label hit; + Label exception_pending; + + + __ verify_oop(receiver); + __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes())); + __ jcc(Assembler::equal, hit); + + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // verified entry must be aligned for code patching. + // and the first 5 bytes must be in the same cache line + // if we align at 8 then we will be sure 5 bytes are in the same line + __ align(8); + + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; + + + // The instruction at the verified entry point must be 5 bytes or longer + // because it can be patched on the fly by make_non_entrant. The stack bang + // instruction fits that requirement. + + // Generate stack overflow check + + + if (UseStackBanging) { + if (stack_size <= StackShadowPages*os::vm_page_size()) { + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); + } else { + __ movl(rax, stack_size); + __ bang_stack_size(rax, rbx); + } + } else { + // need a 5 byte instruction to allow MT safe patching to non-entrant + __ fat_nop(); + } + + assert(((int)__ pc() - start - vep_offset) >= 5, + "valid size for make_non_entrant"); + + // Generate a new frame for the wrapper. + __ enter(); + + // -2 because return address is already present and so is saved rbp, + if (stack_size - 2*wordSize != 0) { + __ subl(rsp, stack_size - 2*wordSize); + } + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + + // First thing we do store all the args as if we are doing the call. + // Since the C calling convention is stack based that ensures that + // all the Java register args are stored before we need to convert any + // string we might have. + + int sid = 0; + int c_arg, j_arg; + int string_reg = 0; + + for (j_arg = first_arg_to_pass, c_arg = 0 ; + j_arg < total_args_passed ; j_arg++, c_arg++ ) { + + VMRegPair src = in_regs[j_arg]; + VMRegPair dst = out_regs[c_arg]; + assert(dst.first()->is_stack() || in_sig_bt[j_arg] == T_VOID, + "stack based abi assumed"); + + switch (in_sig_bt[j_arg]) { + + case T_ARRAY: + case T_OBJECT: + if (out_sig_bt[c_arg] == T_ADDRESS) { + // Any register based arg for a java string after the first + // will be destroyed by the call to get_utf so we store + // the original value in the location the utf string address + // will eventually be stored. + if (src.first()->is_reg()) { + if (string_reg++ != 0) { + simple_move32(masm, src, dst); + } + } + } else if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { + // need to unbox a one-word value + Register in_reg = rax; + if ( src.first()->is_reg() ) { + in_reg = src.first()->as_Register(); + } else { + simple_move32(masm, src, in_reg->as_VMReg()); + } + Label skipUnbox; + __ movl(Address(rsp, reg2offset_out(dst.first())), NULL_WORD); + if ( out_sig_bt[c_arg] == T_LONG ) { + __ movl(Address(rsp, reg2offset_out(dst.second())), NULL_WORD); + } + __ testl(in_reg, in_reg); + __ jcc(Assembler::zero, skipUnbox); + assert(dst.first()->is_stack() && + (!dst.second()->is_valid() || dst.second()->is_stack()), + "value(s) must go into stack slots"); + if ( out_sig_bt[c_arg] == T_LONG ) { + __ movl(rbx, Address(in_reg, + box_offset + VMRegImpl::stack_slot_size)); + __ movl(Address(rsp, reg2offset_out(dst.second())), rbx); + } + __ movl(in_reg, Address(in_reg, box_offset)); + __ movl(Address(rsp, reg2offset_out(dst.first())), in_reg); + __ bind(skipUnbox); + } else { + // Convert the arg to NULL + __ movl(Address(rsp, reg2offset_out(dst.first())), NULL_WORD); + } + if (out_sig_bt[c_arg] == T_LONG) { + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); + ++c_arg; // Move over the T_VOID To keep the loop indices in sync + } + break; + + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, src, dst); + break; + + case T_DOUBLE: + assert( j_arg + 1 < total_args_passed && + in_sig_bt[j_arg + 1] == T_VOID, "bad arg list"); + double_move(masm, src, dst); + break; + + case T_LONG : + long_move(masm, src, dst); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + simple_move32(masm, src, dst); + } + } + + // Now we must convert any string we have to utf8 + // + + for (sid = 0, j_arg = first_arg_to_pass, c_arg = 0 ; + sid < total_strings ; j_arg++, c_arg++ ) { + + if (out_sig_bt[c_arg] == T_ADDRESS) { + + Address utf8_addr = Address( + rsp, string_locs[sid++] * VMRegImpl::stack_slot_size); + __ leal(rax, utf8_addr); + + // The first string we find might still be in the original java arg + // register + VMReg orig_loc = in_regs[j_arg].first(); + Register string_oop; + + // This is where the argument will eventually reside + Address dest = Address(rsp, reg2offset_out(out_regs[c_arg].first())); + + if (sid == 1 && orig_loc->is_reg()) { + string_oop = orig_loc->as_Register(); + assert(string_oop != rax, "smashed arg"); + } else { + + if (orig_loc->is_reg()) { + // Get the copy of the jls object + __ movl(rcx, dest); + } else { + // arg is still in the original location + __ movl(rcx, Address(rbp, reg2offset_in(orig_loc))); + } + string_oop = rcx; + + } + Label nullString; + __ movl(dest, NULL_WORD); + __ testl(string_oop, string_oop); + __ jcc(Assembler::zero, nullString); + + // Now we can store the address of the utf string as the argument + __ movl(dest, rax); + + // And do the conversion + __ call_VM_leaf(CAST_FROM_FN_PTR( + address, SharedRuntime::get_utf), string_oop, rax); + __ bind(nullString); + } + + if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) { + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); + ++c_arg; // Move over the T_VOID To keep the loop indices in sync + } + } + + + // Ok now we are done. Need to place the nop that dtrace wants in order to + // patch in the trap + + int patch_offset = ((intptr_t)__ pc()) - start; + + __ nop(); + + + // Return + + __ leave(); + __ ret(0); + + __ flush(); + + nmethod *nm = nmethod::new_dtrace_nmethod( + method, masm->code(), vep_offset, patch_offset, frame_complete, + stack_slots / VMRegImpl::slots_per_word); + return nm; + +} + +#endif // HAVE_DTRACE_H + // this function returns the adjust size (in number of words) to a c2i adapter // activation for use during deoptimization int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {