1.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Mar 21 14:58:37 2016 -0700 1.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Feb 17 13:40:12 2016 +0300 1.3 @@ -3743,6 +3743,107 @@ 1.4 return start; 1.5 } 1.6 1.7 +/** 1.8 + * Arguments: 1.9 + * 1.10 + // Input: 1.11 + // c_rarg0 - x address 1.12 + // c_rarg1 - x length 1.13 + // c_rarg2 - z address 1.14 + // c_rarg3 - z lenth 1.15 + * 1.16 + */ 1.17 + address generate_squareToLen() { 1.18 + 1.19 + __ align(CodeEntryAlignment); 1.20 + StubCodeMark mark(this, "StubRoutines", "squareToLen"); 1.21 + 1.22 + address start = __ pc(); 1.23 + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 1.24 + // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...) 1.25 + const Register x = rdi; 1.26 + const Register len = rsi; 1.27 + const Register z = r8; 1.28 + const Register zlen = rcx; 1.29 + 1.30 + const Register tmp1 = r12; 1.31 + const Register tmp2 = r13; 1.32 + const Register tmp3 = r14; 1.33 + const Register tmp4 = r15; 1.34 + const Register tmp5 = rbx; 1.35 + 1.36 + BLOCK_COMMENT("Entry:"); 1.37 + __ enter(); // required for proper stackwalking of RuntimeStub frame 1.38 + 1.39 + setup_arg_regs(4); // x => rdi, len => rsi, z => rdx 1.40 + // zlen => rcx 1.41 + // r9 and r10 may be used to save non-volatile registers 1.42 + __ movptr(r8, rdx); 1.43 + __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); 1.44 + 1.45 + restore_arg_regs(); 1.46 + 1.47 + __ leave(); // required for proper stackwalking of RuntimeStub frame 1.48 + __ ret(0); 1.49 + 1.50 + return start; 1.51 + } 1.52 + 1.53 + /** 1.54 + * Arguments: 1.55 + * 1.56 + * Input: 1.57 + * c_rarg0 - out address 1.58 + * c_rarg1 - in address 1.59 + * c_rarg2 - offset 1.60 + * c_rarg3 - len 1.61 + * not Win64 1.62 + * c_rarg4 - k 1.63 + * Win64 1.64 + * rsp+40 - k 1.65 + */ 1.66 + address generate_mulAdd() { 1.67 + __ align(CodeEntryAlignment); 1.68 + StubCodeMark mark(this, "StubRoutines", "mulAdd"); 1.69 + 1.70 + address start = __ pc(); 1.71 + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 1.72 + // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) 1.73 + const Register out = rdi; 1.74 + const Register in = rsi; 1.75 + const Register offset = r11; 1.76 + const Register len = rcx; 1.77 + const Register k = r8; 1.78 + 1.79 + // Next registers will be saved on stack in mul_add(). 1.80 + const Register tmp1 = r12; 1.81 + const Register tmp2 = r13; 1.82 + const Register tmp3 = r14; 1.83 + const Register tmp4 = r15; 1.84 + const Register tmp5 = rbx; 1.85 + 1.86 + BLOCK_COMMENT("Entry:"); 1.87 + __ enter(); // required for proper stackwalking of RuntimeStub frame 1.88 + 1.89 + setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx 1.90 + // len => rcx, k => r8 1.91 + // r9 and r10 may be used to save non-volatile registers 1.92 +#ifdef _WIN64 1.93 + // last argument is on stack on Win64 1.94 + __ movl(k, Address(rsp, 6 * wordSize)); 1.95 +#endif 1.96 + __ movptr(r11, rdx); // move offset in rdx to offset(r11) 1.97 + __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); 1.98 + 1.99 + restore_arg_regs(); 1.100 + 1.101 + __ leave(); // required for proper stackwalking of RuntimeStub frame 1.102 + __ ret(0); 1.103 + 1.104 + return start; 1.105 + } 1.106 + 1.107 + 1.108 #undef __ 1.109 #define __ masm-> 1.110 1.111 @@ -3987,6 +4088,12 @@ 1.112 if (UseMultiplyToLenIntrinsic) { 1.113 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 1.114 } 1.115 + if (UseSquareToLenIntrinsic) { 1.116 + StubRoutines::_squareToLen = generate_squareToLen(); 1.117 + } 1.118 + if (UseMulAddIntrinsic) { 1.119 + StubRoutines::_mulAdd = generate_mulAdd(); 1.120 + } 1.121 #endif 1.122 } 1.123