#5250 Enabled PS instructions and added PS instructions handler

Thu, 06 Apr 2017 14:44:34 +0800

author
aoqi
date
Thu, 06 Apr 2017 14:44:34 +0800
changeset 395
eeea63acbe68
parent 394
61b347fb2a89
child 396
474ce9f32bce

#5250 Enabled PS instructions and added PS instructions handler
fix changeset 513:a7e5d5bbfda5
Since kernel does not have emulation of PS instructions yet, the emulation must be handled in JVM.
The method is to split a PS instruction into two float instructions which trigger kernel emulation of float instruction emulation.
The patch passed jtreg test hotspot/test/compiler/6340864/TestFloatVect.java, however the sys time is high.

src/cpu/mips/vm/mips_64.ad file | annotate | diff | comparison | revisions
src/os_cpu/linux_mips/vm/os_linux_mips.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/mips_64.ad	Wed Apr 05 12:41:12 2017 +0800
     1.2 +++ b/src/cpu/mips/vm/mips_64.ad	Thu Apr 06 14:44:34 2017 +0800
     1.3 @@ -13987,7 +13987,7 @@
     1.4  // Floats vector add
     1.5  // kernel does not have emulation of PS instructions yet, so PS instructions is disabled.
     1.6  instruct vadd2F(vecD dst, vecD src) %{
     1.7 -  predicate(n->as_Vector()->length() == 2 && !UseLoongsonISA);
     1.8 +  predicate(n->as_Vector()->length() == 2);
     1.9    match(Set dst (AddVF dst src));
    1.10    format %{ "add.ps   $dst,$src\t! add packed2F" %}
    1.11    ins_encode %{
    1.12 @@ -13997,7 +13997,7 @@
    1.13  %}
    1.14  
    1.15  instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{
    1.16 -  predicate(n->as_Vector()->length() == 2 && !UseLoongsonISA);
    1.17 +  predicate(n->as_Vector()->length() == 2);
    1.18    match(Set dst (AddVF src1 src2));
    1.19    format %{ "add.ps   $dst,$src1,$src2\t! add packed2F" %}
    1.20    ins_encode %{
    1.21 @@ -14010,7 +14010,7 @@
    1.22  
    1.23  // Floats vector sub
    1.24  instruct vsub2F(vecD dst, vecD src) %{
    1.25 -  predicate(n->as_Vector()->length() == 2 && !UseLoongsonISA);
    1.26 +  predicate(n->as_Vector()->length() == 2);
    1.27    match(Set dst (SubVF dst src));
    1.28    format %{ "sub.ps   $dst,$src\t! sub packed2F" %}
    1.29    ins_encode %{
    1.30 @@ -14023,7 +14023,7 @@
    1.31  
    1.32  // Floats vector mul
    1.33  instruct vmul2F(vecD dst, vecD src) %{
    1.34 -  predicate(n->as_Vector()->length() == 2 && !UseLoongsonISA);
    1.35 +  predicate(n->as_Vector()->length() == 2);
    1.36    match(Set dst (MulVF dst src));
    1.37    format %{ "mul.ps   $dst, $src\t! mul packed2F" %}
    1.38    ins_encode %{
    1.39 @@ -14033,7 +14033,7 @@
    1.40  %}
    1.41  
    1.42  instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{
    1.43 -  predicate(n->as_Vector()->length() == 2 && !UseLoongsonISA);
    1.44 +  predicate(n->as_Vector()->length() == 2);
    1.45    match(Set dst (MulVF src1 src2));
    1.46    format %{ "mul.ps   $dst, $src1, $src2\t! mul packed2F" %}
    1.47    ins_encode %{
    1.48 @@ -14045,6 +14045,19 @@
    1.49  // --------------------------------- DIV --------------------------------------
    1.50  // MIPS do not have div.ps
    1.51  
    1.52 +// --------------------------------- MADD --------------------------------------
    1.53 +// Floats vector madd
    1.54 +//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{
    1.55 +//  predicate(n->as_Vector()->length() == 2);
    1.56 +//  match(Set dst (AddVF (MulVF src1 src2) src3));
    1.57 +//  ins_cost(50);
    1.58 +//  format %{ "madd.ps   $dst, $src3, $src1, $src2\t! madd packed2F" %}
    1.59 +//  ins_encode %{
    1.60 +//    __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
    1.61 +//  %}
    1.62 +//  ins_pipe( fpu_regF_regF );
    1.63 +//%}
    1.64 +
    1.65  
    1.66  //----------PEEPHOLE RULES-----------------------------------------------------
    1.67  // These must follow all instruction definitions as they use the names
     2.1 --- a/src/os_cpu/linux_mips/vm/os_linux_mips.cpp	Wed Apr 05 12:41:12 2017 +0800
     2.2 +++ b/src/os_cpu/linux_mips/vm/os_linux_mips.cpp	Thu Apr 06 14:44:34 2017 +0800
     2.3 @@ -411,6 +411,106 @@
     2.4  #endif
     2.5            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
     2.6        }
     2.7 +      else if (thread->thread_state() == _thread_in_Java && sig == SIGILL) {
     2.8 +        //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here.
     2.9 +        //The method is to trigger kernel emulation of float emulation.
    2.10 +        int inst = *(int*)pc;
    2.11 +        int ops = (inst >> 26) & 0x3f;
    2.12 +        int ops_fmt = (inst >> 21) & 0x1f;
    2.13 +        int op = inst & 0x3f;
    2.14 +        if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) {
    2.15 +          int ft, fs, fd;
    2.16 +          ft = (inst >> 16) & 0x1f;
    2.17 +          fs = (inst >> 11) & 0x1f;
    2.18 +          fd = (inst >> 6) & 0x1f;
    2.19 +          float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower;
    2.20 +          double ft_value, fs_value, fd_value;
    2.21 +          ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
    2.22 +          fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
    2.23 +          asm volatile(
    2.24 +            "cvt.s.pl %0, %4\n\t"
    2.25 +            "cvt.s.pu %1, %4\n\t"
    2.26 +            "cvt.s.pl %2, %5\n\t"
    2.27 +            "cvt.s.pu %3, %5\n\t"
    2.28 +            : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper)
    2.29 +            : "f" (fs_value), "f" (ft_value)
    2.30 +          );
    2.31 +
    2.32 +          switch (op) {
    2.33 +            case Assembler::fadd_op:
    2.34 +              asm volatile(
    2.35 +                "add.s  %1, %3, %5\n\t"
    2.36 +                "add.s  %2, %4, %6\n\t"
    2.37 +                "pll.ps %0, %1, %2\n\t"
    2.38 +                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
    2.39 +                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
    2.40 +              );
    2.41 +              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
    2.42 +              stub = pc + 4;
    2.43 +              break;
    2.44 +            case Assembler::fsub_op:
    2.45 +              //fd = fs - ft
    2.46 +              asm volatile(
    2.47 +                "sub.s  %1, %3, %5\n\t"
    2.48 +                "sub.s  %2, %4, %6\n\t"
    2.49 +                "pll.ps %0, %1, %2\n\t"
    2.50 +                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
    2.51 +                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
    2.52 +              );
    2.53 +              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
    2.54 +              stub = pc + 4;
    2.55 +              break;
    2.56 +            case Assembler::fmul_op:
    2.57 +              asm volatile(
    2.58 +                "mul.s  %1, %3, %5\n\t"
    2.59 +                "mul.s  %2, %4, %6\n\t"
    2.60 +                "pll.ps %0, %1, %2\n\t"
    2.61 +                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
    2.62 +                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
    2.63 +              );
    2.64 +              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
    2.65 +              stub = pc + 4;
    2.66 +              break;
    2.67 +            default:
    2.68 +              tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op);
    2.69 +          }
    2.70 +        }
    2.71 +        else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) {
    2.72 +          // madd.ps is not used, the code below were not tested
    2.73 +          int fr, ft, fs, fd;
    2.74 +          float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower;
    2.75 +          double fr_value, ft_value, fs_value, fd_value;
    2.76 +          switch (op) {
    2.77 +            case Assembler::madd_ps_op:
    2.78 +              // fd = (fs * ft) + fr
    2.79 +              fr = (inst >> 21) & 0x1f;
    2.80 +              ft = (inst >> 16) & 0x1f;
    2.81 +              fs = (inst >> 11) & 0x1f;
    2.82 +              fd = (inst >> 6) & 0x1f;
    2.83 +              fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr];
    2.84 +              ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
    2.85 +              fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
    2.86 +              asm volatile(
    2.87 +                "cvt.s.pu %3, %9\n\t"
    2.88 +                "cvt.s.pl %4, %9\n\t"
    2.89 +                "cvt.s.pu %5, %10\n\t"
    2.90 +                "cvt.s.pl %6, %10\n\t"
    2.91 +                "cvt.s.pu %7, %11\n\t"
    2.92 +                "cvt.s.pl %8, %11\n\t"
    2.93 +                "madd.s %1, %3, %5, %7\n\t"
    2.94 +                "madd.s %2, %4, %6, %8\n\t"
    2.95 +                "pll.ps %0, %1, %2\n\t"
    2.96 +                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower)
    2.97 +                : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/
    2.98 +              );
    2.99 +              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
   2.100 +              stub = pc + 4;
   2.101 +              break;
   2.102 +            default:
   2.103 +              tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op);
   2.104 +          }
   2.105 +        }
   2.106 +      } //SIGILL
   2.107      } else if (thread->thread_state() == _thread_in_vm &&
   2.108                 sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
   2.109                 thread->doing_unsafe_access()) {

mercurial