Wed, 24 Apr 2019 11:48:37 -0400
8154156: PPC64: improve array copy stubs by using vector instructions
Reviewed-by: goetz, mdoerr
Contributed-by: Kazunori Ogata <ogatak@jp.ibm.com>
1.1 --- a/src/cpu/ppc/vm/assembler_ppc.hpp Mon Apr 15 16:27:50 2019 +0000 1.2 +++ b/src/cpu/ppc/vm/assembler_ppc.hpp Wed Apr 24 11:48:37 2019 -0400 1.3 @@ -469,6 +469,8 @@ 1.4 LVSR_OPCODE = (31u << OPCODE_SHIFT | 38u << 1), 1.5 1.6 // Vector-Scalar (VSX) instruction support. 1.7 + LXVD2X_OPCODE = (31u << OPCODE_SHIFT | 844u << 1), 1.8 + STXVD2X_OPCODE = (31u << OPCODE_SHIFT | 972u << 1), 1.9 MTVSRD_OPCODE = (31u << OPCODE_SHIFT | 179u << 1), 1.10 MFVSRD_OPCODE = (31u << OPCODE_SHIFT | 51u << 1), 1.11 1.12 @@ -670,8 +672,10 @@ 1.13 // Atomics. 1.14 LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1), 1.15 LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1), 1.16 + LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1), 1.17 STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1), 1.18 - STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1) 1.19 + STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1), 1.20 + STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1) 1.21 1.22 }; 1.23 1.24 @@ -1052,6 +1056,19 @@ 1.25 static int vrs( VectorRegister r) { return vrs(r->encoding());} 1.26 static int vrt( VectorRegister r) { return vrt(r->encoding());} 1.27 1.28 + // Support Vector-Scalar (VSX) instructions. 1.29 + static int vsra( int x) { return opp_u_field(x, 15, 11); } 1.30 + static int vsrb( int x) { return opp_u_field(x, 20, 16); } 1.31 + static int vsrc( int x) { return opp_u_field(x, 25, 21); } 1.32 + static int vsrs( int x) { return opp_u_field(x, 10, 6); } 1.33 + static int vsrt( int x) { return opp_u_field(x, 10, 6); } 1.34 + 1.35 + static int vsra( VectorSRegister r) { return vsra(r->encoding());} 1.36 + static int vsrb( VectorSRegister r) { return vsrb(r->encoding());} 1.37 + static int vsrc( VectorSRegister r) { return vsrc(r->encoding());} 1.38 + static int vsrs( VectorSRegister r) { return vsrs(r->encoding());} 1.39 + static int vsrt( VectorSRegister r) { return vsrt(r->encoding());} 1.40 + 1.41 static int vsplt_uim( int x) { return opp_u_field(x, 15, 12); } // for vsplt* instructions 1.42 static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions 1.43 static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction 1.44 @@ -1663,11 +1680,14 @@ 1.45 // atomics 1.46 inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); 1.47 inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); 1.48 + inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); 1.49 inline bool lxarx_hint_exclusive_access(); 1.50 inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); 1.51 inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false); 1.52 + inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false); 1.53 inline void stwcx_( Register s, Register a, Register b); 1.54 inline void stdcx_( Register s, Register a, Register b); 1.55 + inline void stqcx_( Register s, Register a, Register b); 1.56 1.57 // Instructions for adjusting thread priority for simultaneous 1.58 // multithreading (SMT) on Power5. 1.59 @@ -1943,6 +1963,8 @@ 1.60 inline void mfvscr( VectorRegister d); 1.61 1.62 // Vector-Scalar (VSX) instructions. 1.63 + inline void lxvd2x( VectorSRegister d, Register a, Register b); 1.64 + inline void stxvd2x( VectorSRegister d, Register a, Register b); 1.65 inline void mtvrd( VectorRegister d, Register a); 1.66 inline void mfvrd( Register a, VectorRegister d); 1.67 1.68 @@ -2022,10 +2044,13 @@ 1.69 // Atomics: use ra0mem to disallow R0 as base. 1.70 inline void lwarx_unchecked(Register d, Register b, int eh1); 1.71 inline void ldarx_unchecked(Register d, Register b, int eh1); 1.72 + inline void lqarx_unchecked(Register d, Register b, int eh1); 1.73 inline void lwarx( Register d, Register b, bool hint_exclusive_access); 1.74 inline void ldarx( Register d, Register b, bool hint_exclusive_access); 1.75 + inline void lqarx( Register d, Register b, bool hint_exclusive_access); 1.76 inline void stwcx_(Register s, Register b); 1.77 inline void stdcx_(Register s, Register b); 1.78 + inline void stqcx_(Register s, Register b); 1.79 inline void lfs( FloatRegister d, int si16); 1.80 inline void lfsx( FloatRegister d, Register b); 1.81 inline void lfd( FloatRegister d, int si16);
2.1 --- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp Mon Apr 15 16:27:50 2019 +0000 2.2 +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp Wed Apr 24 11:48:37 2019 -0400 2.3 @@ -1,6 +1,6 @@ 2.4 /* 2.5 - * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. 2.6 - * Copyright (c) 2012, 2018, SAP SE. All rights reserved. 2.7 + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. 2.8 + * Copyright (c) 2012, 2019, SAP SE. All rights reserved. 2.9 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.10 * 2.11 * This code is free software; you can redistribute it and/or modify it 2.12 @@ -504,11 +504,14 @@ 2.13 // Use ra0mem to disallow R0 as base. 2.14 inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } 2.15 inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } 2.16 +inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } 2.17 inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); } 2.18 inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } 2.19 inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } 2.20 +inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } 2.21 inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } 2.22 inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } 2.23 +inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } 2.24 2.25 // Instructions for adjusting thread priority 2.26 // for simultaneous multithreading (SMT) on POWER5. 2.27 @@ -624,6 +627,8 @@ 2.28 inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } 2.29 2.30 // Vector-Scalar (VSX) instructions. 2.31 +inline void Assembler::lxvd2x (VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(s1) | rb(s2)); } 2.32 +inline void Assembler::stxvd2x(VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(s1) | rb(s2)); } 2.33 inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec). 2.34 inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec). 2.35 2.36 @@ -833,10 +838,13 @@ 2.37 // ra0 version 2.38 inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } 2.39 inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } 2.40 +inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } 2.41 inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } 2.42 inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } 2.43 +inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } 2.44 inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); } 2.45 inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); } 2.46 +inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); } 2.47 2.48 // ra0 version 2.49 inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); }
3.1 --- a/src/cpu/ppc/vm/globals_ppc.hpp Mon Apr 15 16:27:50 2019 +0000 3.2 +++ b/src/cpu/ppc/vm/globals_ppc.hpp Wed Apr 24 11:48:37 2019 -0400 3.3 @@ -1,6 +1,6 @@ 3.4 /* 3.5 - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. 3.6 - * Copyright 2012, 2013 SAP AG. All rights reserved. 3.7 + * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. 3.8 + * Copyright 2012, 2018 SAP AG. All rights reserved. 3.9 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3.10 * 3.11 * This code is free software; you can redistribute it and/or modify it 3.12 @@ -81,6 +81,19 @@ 3.13 product(bool, ReoptimizeCallSequences, true, \ 3.14 "Reoptimize code-sequences of calls at runtime.") \ 3.15 \ 3.16 + /* Power 8: Configure Data Stream Control Register. */ \ 3.17 + product(uint64_t,DSCR_PPC64, (uintx)-1, \ 3.18 + "Power8 or later: Specify encoded value for Data Stream Control " \ 3.19 + "Register") \ 3.20 + product(uint64_t,DSCR_DPFD_PPC64, 8, \ 3.21 + "Power8 or later: DPFD (default prefetch depth) value of the " \ 3.22 + "Data Stream Control Register." \ 3.23 + " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \ 3.24 + product(uint64_t,DSCR_URG_PPC64, 8, \ 3.25 + "Power8 or later: URG (depth attainment urgency) value of the " \ 3.26 + "Data Stream Control Register." \ 3.27 + " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \ 3.28 + \ 3.29 product(bool, UseLoadInstructionsForStackBangingPPC64, false, \ 3.30 "Use load instructions for stack banging.") \ 3.31 \
4.1 --- a/src/cpu/ppc/vm/register_ppc.cpp Mon Apr 15 16:27:50 2019 +0000 4.2 +++ b/src/cpu/ppc/vm/register_ppc.cpp Wed Apr 24 11:48:37 2019 -0400 4.3 @@ -1,6 +1,6 @@ 4.4 /* 4.5 - * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. 4.6 - * Copyright 2012, 2013 SAP AG. All rights reserved. 4.7 + * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. 4.8 + * Copyright 2012, 2018 SAP AG. All rights reserved. 4.9 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.10 * 4.11 * This code is free software; you can redistribute it and/or modify it 4.12 @@ -75,3 +75,14 @@ 4.13 }; 4.14 return is_valid() ? names[encoding()] : "vnoreg"; 4.15 } 4.16 + 4.17 +const char* VectorSRegisterImpl::name() const { 4.18 + const char* names[number_of_registers] = { 4.19 + "VSR0", "VSR1", "VSR2", "VSR3", "VSR4", "VSR5", "VSR6", "VSR7", 4.20 + "VSR8", "VSR9", "VSR10", "VSR11", "VSR12", "VSR13", "VSR14", "VSR15", 4.21 + "VSR16", "VSR17", "VSR18", "VSR19", "VSR20", "VSR21", "VSR22", "VSR23", 4.22 + "VSR24", "VSR25", "VSR26", "VSR27", "VSR28", "VSR29", "VSR30", "VSR31" 4.23 + }; 4.24 + return is_valid() ? names[encoding()] : "vsnoreg"; 4.25 +} 4.26 +
5.1 --- a/src/cpu/ppc/vm/register_ppc.hpp Mon Apr 15 16:27:50 2019 +0000 5.2 +++ b/src/cpu/ppc/vm/register_ppc.hpp Wed Apr 24 11:48:37 2019 -0400 5.3 @@ -1,6 +1,6 @@ 5.4 /* 5.5 - * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. 5.6 - * Copyright 2012, 2014 SAP AG. All rights reserved. 5.7 + * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. 5.8 + * Copyright 2012, 2018 SAP AG. All rights reserved. 5.9 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5.10 * 5.11 * This code is free software; you can redistribute it and/or modify it 5.12 @@ -492,6 +492,106 @@ 5.13 #endif // DONT_USE_REGISTER_DEFINES 5.14 5.15 5.16 +// Use VectorSRegister as a shortcut. 5.17 +class VectorSRegisterImpl; 5.18 +typedef VectorSRegisterImpl* VectorSRegister; 5.19 + 5.20 +inline VectorSRegister as_VectorSRegister(int encoding) { 5.21 + return (VectorSRegister)(intptr_t)encoding; 5.22 +} 5.23 + 5.24 +// The implementation of Vector-Scalar (VSX) registers on POWER architecture. 5.25 +class VectorSRegisterImpl: public AbstractRegisterImpl { 5.26 + public: 5.27 + enum { 5.28 + number_of_registers = 32 5.29 + }; 5.30 + 5.31 + // construction 5.32 + inline friend VectorSRegister as_VectorSRegister(int encoding); 5.33 + 5.34 + // accessors 5.35 + int encoding() const { assert(is_valid(), "invalid register"); return value(); } 5.36 + 5.37 + // testers 5.38 + bool is_valid() const { return 0 <= value() && value() < number_of_registers; } 5.39 + 5.40 + const char* name() const; 5.41 +}; 5.42 + 5.43 +// The Vector-Scalar (VSX) registers of the POWER architecture. 5.44 + 5.45 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, vsnoreg, (-1)); 5.46 + 5.47 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR0, ( 0)); 5.48 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR1, ( 1)); 5.49 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR2, ( 2)); 5.50 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR3, ( 3)); 5.51 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR4, ( 4)); 5.52 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR5, ( 5)); 5.53 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR6, ( 6)); 5.54 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR7, ( 7)); 5.55 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR8, ( 8)); 5.56 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR9, ( 9)); 5.57 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR10, (10)); 5.58 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR11, (11)); 5.59 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR12, (12)); 5.60 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR13, (13)); 5.61 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR14, (14)); 5.62 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR15, (15)); 5.63 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR16, (16)); 5.64 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR17, (17)); 5.65 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR18, (18)); 5.66 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR19, (19)); 5.67 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR20, (20)); 5.68 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR21, (21)); 5.69 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR22, (22)); 5.70 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR23, (23)); 5.71 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR24, (24)); 5.72 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR25, (25)); 5.73 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR26, (26)); 5.74 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR27, (27)); 5.75 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR28, (28)); 5.76 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR29, (29)); 5.77 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR30, (30)); 5.78 +CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR31, (31)); 5.79 + 5.80 +#ifndef DONT_USE_REGISTER_DEFINES 5.81 +#define vsnoregi ((VectorSRegister)(vsnoreg_VectorSRegisterEnumValue)) 5.82 +#define VSR0 ((VectorSRegister)( VSR0_VectorSRegisterEnumValue)) 5.83 +#define VSR1 ((VectorSRegister)( VSR1_VectorSRegisterEnumValue)) 5.84 +#define VSR2 ((VectorSRegister)( VSR2_VectorSRegisterEnumValue)) 5.85 +#define VSR3 ((VectorSRegister)( VSR3_VectorSRegisterEnumValue)) 5.86 +#define VSR4 ((VectorSRegister)( VSR4_VectorSRegisterEnumValue)) 5.87 +#define VSR5 ((VectorSRegister)( VSR5_VectorSRegisterEnumValue)) 5.88 +#define VSR6 ((VectorSRegister)( VSR6_VectorSRegisterEnumValue)) 5.89 +#define VSR7 ((VectorSRegister)( VSR7_VectorSRegisterEnumValue)) 5.90 +#define VSR8 ((VectorSRegister)( VSR8_VectorSRegisterEnumValue)) 5.91 +#define VSR9 ((VectorSRegister)( VSR9_VectorSRegisterEnumValue)) 5.92 +#define VSR10 ((VectorSRegister)( VSR10_VectorSRegisterEnumValue)) 5.93 +#define VSR11 ((VectorSRegister)( VSR11_VectorSRegisterEnumValue)) 5.94 +#define VSR12 ((VectorSRegister)( VSR12_VectorSRegisterEnumValue)) 5.95 +#define VSR13 ((VectorSRegister)( VSR13_VectorSRegisterEnumValue)) 5.96 +#define VSR14 ((VectorSRegister)( VSR14_VectorSRegisterEnumValue)) 5.97 +#define VSR15 ((VectorSRegister)( VSR15_VectorSRegisterEnumValue)) 5.98 +#define VSR16 ((VectorSRegister)( VSR16_VectorSRegisterEnumValue)) 5.99 +#define VSR17 ((VectorSRegister)( VSR17_VectorSRegisterEnumValue)) 5.100 +#define VSR18 ((VectorSRegister)( VSR18_VectorSRegisterEnumValue)) 5.101 +#define VSR19 ((VectorSRegister)( VSR19_VectorSRegisterEnumValue)) 5.102 +#define VSR20 ((VectorSRegister)( VSR20_VectorSRegisterEnumValue)) 5.103 +#define VSR21 ((VectorSRegister)( VSR21_VectorSRegisterEnumValue)) 5.104 +#define VSR22 ((VectorSRegister)( VSR22_VectorSRegisterEnumValue)) 5.105 +#define VSR23 ((VectorSRegister)( VSR23_VectorSRegisterEnumValue)) 5.106 +#define VSR24 ((VectorSRegister)( VSR24_VectorSRegisterEnumValue)) 5.107 +#define VSR25 ((VectorSRegister)( VSR25_VectorSRegisterEnumValue)) 5.108 +#define VSR26 ((VectorSRegister)( VSR26_VectorSRegisterEnumValue)) 5.109 +#define VSR27 ((VectorSRegister)( VSR27_VectorSRegisterEnumValue)) 5.110 +#define VSR28 ((VectorSRegister)( VSR28_VectorSRegisterEnumValue)) 5.111 +#define VSR29 ((VectorSRegister)( VSR29_VectorSRegisterEnumValue)) 5.112 +#define VSR30 ((VectorSRegister)( VSR30_VectorSRegisterEnumValue)) 5.113 +#define VSR31 ((VectorSRegister)( VSR31_VectorSRegisterEnumValue)) 5.114 +#endif // DONT_USE_REGISTER_DEFINES 5.115 + 5.116 // Maximum number of incoming arguments that can be passed in i registers. 5.117 const int PPC_ARGS_IN_REGS_NUM = 8; 5.118
6.1 --- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp Mon Apr 15 16:27:50 2019 +0000 6.2 +++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp Wed Apr 24 11:48:37 2019 -0400 6.3 @@ -1,6 +1,6 @@ 6.4 /* 6.5 - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 6.6 - * Copyright (c) 2012, 2018, SAP SE. All rights reserved. 6.7 + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. 6.8 + * Copyright (c) 2012, 2019, SAP SE. All rights reserved. 6.9 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6.10 * 6.11 * This code is free software; you can redistribute it and/or modify it 6.12 @@ -1352,9 +1352,13 @@ 6.13 Register tmp3 = R8_ARG6; 6.14 Register tmp4 = R9_ARG7; 6.15 6.16 + VectorSRegister tmp_vsr1 = VSR1; 6.17 + VectorSRegister tmp_vsr2 = VSR2; 6.18 + 6.19 address start = __ function_entry(); 6.20 6.21 - Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8; 6.22 + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9; 6.23 + 6.24 // don't try anything fancy if arrays don't have many elements 6.25 __ li(tmp3, 0); 6.26 __ cmpwi(CCR0, R5_ARG3, 9); 6.27 @@ -1412,22 +1416,60 @@ 6.28 __ andi_(R5_ARG3, R5_ARG3, 15); 6.29 __ mtctr(tmp1); 6.30 6.31 - __ bind(l_8); 6.32 - // Use unrolled version for mass copying (copy 16 elements a time). 6.33 - // Load feeding store gets zero latency on Power6, however not on Power5. 6.34 - // Therefore, the following sequence is made for the good of both. 6.35 - __ ld(tmp1, 0, R3_ARG1); 6.36 - __ ld(tmp2, 8, R3_ARG1); 6.37 - __ ld(tmp3, 16, R3_ARG1); 6.38 - __ ld(tmp4, 24, R3_ARG1); 6.39 - __ std(tmp1, 0, R4_ARG2); 6.40 - __ std(tmp2, 8, R4_ARG2); 6.41 - __ std(tmp3, 16, R4_ARG2); 6.42 - __ std(tmp4, 24, R4_ARG2); 6.43 - __ addi(R3_ARG1, R3_ARG1, 32); 6.44 - __ addi(R4_ARG2, R4_ARG2, 32); 6.45 - __ bdnz(l_8); 6.46 - } 6.47 + if (!VM_Version::has_vsx()) { 6.48 + 6.49 + __ bind(l_8); 6.50 + // Use unrolled version for mass copying (copy 16 elements a time). 6.51 + // Load feeding store gets zero latency on Power6, however not on Power5. 6.52 + // Therefore, the following sequence is made for the good of both. 6.53 + __ ld(tmp1, 0, R3_ARG1); 6.54 + __ ld(tmp2, 8, R3_ARG1); 6.55 + __ ld(tmp3, 16, R3_ARG1); 6.56 + __ ld(tmp4, 24, R3_ARG1); 6.57 + __ std(tmp1, 0, R4_ARG2); 6.58 + __ std(tmp2, 8, R4_ARG2); 6.59 + __ std(tmp3, 16, R4_ARG2); 6.60 + __ std(tmp4, 24, R4_ARG2); 6.61 + __ addi(R3_ARG1, R3_ARG1, 32); 6.62 + __ addi(R4_ARG2, R4_ARG2, 32); 6.63 + __ bdnz(l_8); 6.64 + 6.65 + } else { // Processor supports VSX, so use it to mass copy. 6.66 + 6.67 + // Prefetch src data into L2 cache. 6.68 + __ dcbt(R3_ARG1, 0); 6.69 + 6.70 + // If supported set DSCR pre-fetch to deepest. 6.71 + if (VM_Version::has_mfdscr()) { 6.72 + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); 6.73 + __ mtdscr(tmp2); 6.74 + } 6.75 + __ li(tmp1, 16); 6.76 + 6.77 + // Backbranch target aligned to 32-byte. It's not aligned 16-byte 6.78 + // as loop contains < 8 instructions that fit inside a single 6.79 + // i-cache sector. 6.80 + __ align(32); 6.81 + 6.82 + __ bind(l_9); 6.83 + // Use loop with VSX load/store instructions to 6.84 + // copy 16 elements a time. 6.85 + __ lxvd2x(tmp_vsr1, 0, R3_ARG1); // Load from src. 6.86 + __ stxvd2x(tmp_vsr1, 0, R4_ARG2); // Store to dst. 6.87 + __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1); // Load from src + 16. 6.88 + __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16. 6.89 + __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32. 6.90 + __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32. 6.91 + __ bdnz(l_9); // Dec CTR and loop if not zero. 6.92 + 6.93 + // Restore DSCR pre-fetch value. 6.94 + if (VM_Version::has_mfdscr()) { 6.95 + __ load_const_optimized(tmp2, VM_Version::_dscr_val); 6.96 + __ mtdscr(tmp2); 6.97 + } 6.98 + 6.99 + } 6.100 + } // FasterArrayCopy 6.101 __ bind(l_6); 6.102 6.103 // copy 2 elements at a time
7.1 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp Mon Apr 15 16:27:50 2019 +0000 7.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp Wed Apr 24 11:48:37 2019 -0400 7.3 @@ -1,6 +1,6 @@ 7.4 /* 7.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 7.6 - * Copyright 2012, 2014 SAP AG. All rights reserved. 7.7 + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 7.8 + * Copyright 2012, 2018 SAP AG. All rights reserved. 7.9 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7.10 * 7.11 * This code is free software; you can redistribute it and/or modify it 7.12 @@ -45,7 +45,7 @@ 7.13 int VM_Version::_measured_cache_line_size = 128; // default value 7.14 const char* VM_Version::_features_str = ""; 7.15 bool VM_Version::_is_determine_features_test_running = false; 7.16 - 7.17 +uint64_t VM_Version::_dscr_val = 0; 7.18 7.19 #define MSG(flag) \ 7.20 if (flag && !FLAG_IS_DEFAULT(flag)) \ 7.21 @@ -60,7 +60,9 @@ 7.22 7.23 // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. 7.24 if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { 7.25 - if (VM_Version::has_popcntw()) { 7.26 + if (VM_Version::has_lqarx()) { 7.27 + FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8); 7.28 + } else if (VM_Version::has_popcntw()) { 7.29 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7); 7.30 } else if (VM_Version::has_cmpb()) { 7.31 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6); 7.32 @@ -71,8 +73,14 @@ 7.33 } 7.34 } 7.35 guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 || 7.36 - PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7, 7.37 - "PowerArchitecturePPC64 should be 0, 5, 6 or 7"); 7.38 + PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 || 7.39 + PowerArchitecturePPC64 == 8, 7.40 + "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8"); 7.41 + 7.42 + // Power 8: Configure Data Stream Control Register. 7.43 + if (PowerArchitecturePPC64 >= 8) { 7.44 + config_dscr(); 7.45 + } 7.46 7.47 if (!UseSIGTRAP) { 7.48 MSG(TrapBasedICMissChecks); 7.49 @@ -102,7 +110,7 @@ 7.50 // Create and print feature-string. 7.51 char buf[(num_features+1) * 16]; // Max 16 chars per feature. 7.52 jio_snprintf(buf, sizeof(buf), 7.53 - "ppc64%s%s%s%s%s%s%s%s%s%s", 7.54 + "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s", 7.55 (has_fsqrt() ? " fsqrt" : ""), 7.56 (has_isel() ? " isel" : ""), 7.57 (has_lxarxeh() ? " lxarxeh" : ""), 7.58 @@ -112,12 +120,17 @@ 7.59 (has_popcntw() ? " popcntw" : ""), 7.60 (has_fcfids() ? " fcfids" : ""), 7.61 (has_vand() ? " vand" : ""), 7.62 + (has_lqarx() ? " lqarx" : ""), 7.63 (has_vcipher() ? " aes" : ""), 7.64 - (has_vpmsumb() ? " vpmsumb" : "") 7.65 + (has_vpmsumb() ? " vpmsumb" : ""), 7.66 + (has_mfdscr() ? " mfdscr" : ""), 7.67 + (has_vsx() ? " vsx" : "") 7.68 // Make sure number of %s matches num_features! 7.69 ); 7.70 _features_str = strdup(buf); 7.71 - NOT_PRODUCT(if (Verbose) print_features();); 7.72 + if (Verbose) { 7.73 + print_features(); 7.74 + } 7.75 7.76 // PPC64 supports 8-byte compare-exchange operations (see 7.77 // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) 7.78 @@ -485,8 +498,11 @@ 7.79 a->popcntw(R7, R5); // code[7] -> popcntw 7.80 a->fcfids(F3, F4); // code[8] -> fcfids 7.81 a->vand(VR0, VR0, VR0); // code[9] -> vand 7.82 - a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher 7.83 - a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb 7.84 + a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[10] -> lqarx_m 7.85 + a->vcipher(VR0, VR1, VR2); // code[11] -> vcipher 7.86 + a->vpmsumb(VR0, VR1, VR2); // code[12] -> vpmsumb 7.87 + a->mfdscr(R0); // code[13] -> mfdscr 7.88 + a->lxvd2x(VSR0, 0, R3_ARG1); // code[14] -> vsx 7.89 a->blr(); 7.90 7.91 // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. 7.92 @@ -530,8 +546,11 @@ 7.93 if (code[feature_cntr++]) features |= popcntw_m; 7.94 if (code[feature_cntr++]) features |= fcfids_m; 7.95 if (code[feature_cntr++]) features |= vand_m; 7.96 + if (code[feature_cntr++]) features |= lqarx_m; 7.97 if (code[feature_cntr++]) features |= vcipher_m; 7.98 if (code[feature_cntr++]) features |= vpmsumb_m; 7.99 + if (code[feature_cntr++]) features |= mfdscr_m; 7.100 + if (code[feature_cntr++]) features |= vsx_m; 7.101 7.102 // Print the detection code. 7.103 if (PrintAssembly) { 7.104 @@ -543,6 +562,69 @@ 7.105 _features = features; 7.106 } 7.107 7.108 +// Power 8: Configure Data Stream Control Register. 7.109 +void VM_Version::config_dscr() { 7.110 + assert(has_lqarx(), "Only execute on Power 8 or later!"); 7.111 + 7.112 + // 7 InstWords for each call (function descriptor + blr instruction). 7.113 + const int code_size = (2+2*7)*BytesPerInstWord; 7.114 + 7.115 + // Allocate space for the code. 7.116 + ResourceMark rm; 7.117 + CodeBuffer cb("config_dscr", code_size, 0); 7.118 + MacroAssembler* a = new MacroAssembler(&cb); 7.119 + 7.120 + // Emit code. 7.121 + uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry(); 7.122 + uint32_t *code = (uint32_t *)a->pc(); 7.123 + a->mfdscr(R3); 7.124 + a->blr(); 7.125 + 7.126 + void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry(); 7.127 + a->mtdscr(R3); 7.128 + a->blr(); 7.129 + 7.130 + uint32_t *code_end = (uint32_t *)a->pc(); 7.131 + a->flush(); 7.132 + 7.133 + // Print the detection code. 7.134 + if (PrintAssembly) { 7.135 + ttyLocker ttyl; 7.136 + tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", p2i(code)); 7.137 + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); 7.138 + } 7.139 + 7.140 + // Apply the configuration if needed. 7.141 + _dscr_val = (*get_dscr)(); 7.142 + if (Verbose) { 7.143 + tty->print_cr("dscr value was 0x%lx" , _dscr_val); 7.144 + } 7.145 + bool change_requested = false; 7.146 + if (DSCR_PPC64 != (uintx)-1) { 7.147 + _dscr_val = DSCR_PPC64; 7.148 + change_requested = true; 7.149 + } 7.150 + if (DSCR_DPFD_PPC64 <= 7) { 7.151 + uint64_t mask = 0x7; 7.152 + if ((_dscr_val & mask) != DSCR_DPFD_PPC64) { 7.153 + _dscr_val = (_dscr_val & ~mask) | (DSCR_DPFD_PPC64); 7.154 + change_requested = true; 7.155 + } 7.156 + } 7.157 + if (DSCR_URG_PPC64 <= 7) { 7.158 + uint64_t mask = 0x7 << 6; 7.159 + if ((_dscr_val & mask) != DSCR_DPFD_PPC64 << 6) { 7.160 + _dscr_val = (_dscr_val & ~mask) | (DSCR_URG_PPC64 << 6); 7.161 + change_requested = true; 7.162 + } 7.163 + } 7.164 + if (change_requested) { 7.165 + (*set_dscr)(_dscr_val); 7.166 + if (Verbose) { 7.167 + tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)()); 7.168 + } 7.169 + } 7.170 +} 7.171 7.172 static int saved_features = 0; 7.173
8.1 --- a/src/cpu/ppc/vm/vm_version_ppc.hpp Mon Apr 15 16:27:50 2019 +0000 8.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.hpp Wed Apr 24 11:48:37 2019 -0400 8.3 @@ -1,6 +1,6 @@ 8.4 /* 8.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 8.6 - * Copyright 2012, 2014 SAP AG. All rights reserved. 8.7 + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 8.8 + * Copyright 2012, 2018 SAP AG. All rights reserved. 8.9 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 8.10 * 8.11 * This code is free software; you can redistribute it and/or modify it 8.12 @@ -42,8 +42,11 @@ 8.13 fcfids, 8.14 vand, 8.15 dcba, 8.16 + lqarx, 8.17 vcipher, 8.18 vpmsumb, 8.19 + mfdscr, 8.20 + vsx, 8.21 num_features // last entry to count features 8.22 }; 8.23 enum Feature_Flag_Set { 8.24 @@ -58,8 +61,11 @@ 8.25 fcfids_m = (1 << fcfids ), 8.26 vand_m = (1 << vand ), 8.27 dcba_m = (1 << dcba ), 8.28 + lqarx_m = (1 << lqarx ), 8.29 vcipher_m = (1 << vcipher), 8.30 vpmsumb_m = (1 << vpmsumb), 8.31 + mfdscr_m = (1 << mfdscr ), 8.32 + vsx_m = (1 << vsx ), 8.33 all_features_m = -1 8.34 }; 8.35 static int _features; 8.36 @@ -69,6 +75,7 @@ 8.37 8.38 static void print_features(); 8.39 static void determine_features(); // also measures cache line size 8.40 + static void config_dscr(); // Power 8: Configure Data Stream Control Register. 8.41 static void determine_section_size(); 8.42 static void power6_micro_bench(); 8.43 public: 8.44 @@ -87,8 +94,11 @@ 8.45 static bool has_fcfids() { return (_features & fcfids_m) != 0; } 8.46 static bool has_vand() { return (_features & vand_m) != 0; } 8.47 static bool has_dcba() { return (_features & dcba_m) != 0; } 8.48 + static bool has_lqarx() { return (_features & lqarx_m) != 0; } 8.49 static bool has_vcipher() { return (_features & vcipher_m) != 0; } 8.50 static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; } 8.51 + static bool has_mfdscr() { return (_features & mfdscr_m) != 0; } 8.52 + static bool has_vsx() { return (_features & vsx_m) != 0; } 8.53 8.54 static const char* cpu_features() { return _features_str; } 8.55 8.56 @@ -97,6 +107,9 @@ 8.57 // Assembler testing 8.58 static void allow_all(); 8.59 static void revert(); 8.60 + 8.61 + // POWER 8: DSCR current value. 8.62 + static uint64_t _dscr_val; 8.63 }; 8.64 8.65 #endif // CPU_PPC_VM_VM_VERSION_PPC_HPP