aoqi@0: /* aoqi@0: * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: * aoqi@0: */ aoqi@0: aoqi@0: /** aoqi@0: * @test aoqi@0: * @bug 6340864 aoqi@0: * @summary Implement vectorization optimizations in hotspot-server aoqi@0: * aoqi@0: * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect aoqi@0: */ aoqi@0: aoqi@0: public class TestByteVect { aoqi@0: private static final int ARRLEN = 997; aoqi@0: private static final int ITERS = 11000; aoqi@0: private static final int ADD_INIT = 63; aoqi@0: private static final int BIT_MASK = 0xB7; aoqi@0: private static final int VALUE = 3; aoqi@0: private static final int SHIFT = 8; aoqi@0: aoqi@0: public static void main(String args[]) { aoqi@0: System.out.println("Testing Byte vectors"); aoqi@0: int errn = test(); aoqi@0: if (errn > 0) { aoqi@0: System.err.println("FAILED: " + errn + " errors"); aoqi@0: System.exit(97); aoqi@0: } aoqi@0: System.out.println("PASSED"); aoqi@0: } aoqi@0: aoqi@0: static int test() { aoqi@0: byte[] a0 = new byte[ARRLEN]; aoqi@0: byte[] a1 = new byte[ARRLEN]; aoqi@0: byte[] a2 = new byte[ARRLEN]; aoqi@0: byte[] a3 = new byte[ARRLEN]; aoqi@0: byte[] a4 = new byte[ARRLEN]; aoqi@0: short[] p2 = new short[ARRLEN/2]; aoqi@0: int[] p4 = new int[ARRLEN/4]; aoqi@0: long[] p8 = new long[ARRLEN/8]; aoqi@0: // Initialize aoqi@0: int gold_sum = 0; aoqi@0: for (int i=0; i>>VALUE)); aoqi@0: } aoqi@0: test_srlv(a0, a1, VALUE); aoqi@0: for (int i=0; i>>VALUE)); aoqi@0: } aoqi@0: aoqi@0: test_srac(a0, a1); aoqi@0: for (int i=0; i>VALUE)); aoqi@0: } aoqi@0: test_srav(a0, a1, VALUE); aoqi@0: for (int i=0; i>VALUE)); aoqi@0: } aoqi@0: aoqi@0: test_sllc_n(a0, a1); aoqi@0: for (int i=0; i>>(-VALUE))); aoqi@0: } aoqi@0: test_srlv(a0, a1, -VALUE); aoqi@0: for (int i=0; i>>(-VALUE))); aoqi@0: } aoqi@0: aoqi@0: test_srac_n(a0, a1); aoqi@0: for (int i=0; i>(-VALUE))); aoqi@0: } aoqi@0: test_srav(a0, a1, -VALUE); aoqi@0: for (int i=0; i>(-VALUE))); aoqi@0: } aoqi@0: aoqi@0: test_sllc_o(a0, a1); aoqi@0: for (int i=0; i>>SHIFT)); aoqi@0: } aoqi@0: test_srlv(a0, a1, SHIFT); aoqi@0: for (int i=0; i>>SHIFT)); aoqi@0: } aoqi@0: aoqi@0: test_srac_o(a0, a1); aoqi@0: for (int i=0; i>SHIFT)); aoqi@0: } aoqi@0: test_srav(a0, a1, SHIFT); aoqi@0: for (int i=0; i>SHIFT)); aoqi@0: } aoqi@0: aoqi@0: test_sllc_on(a0, a1); aoqi@0: for (int i=0; i>>(-SHIFT))); aoqi@0: } aoqi@0: test_srlv(a0, a1, -SHIFT); aoqi@0: for (int i=0; i>>(-SHIFT))); aoqi@0: } aoqi@0: aoqi@0: test_srac_on(a0, a1); aoqi@0: for (int i=0; i>(-SHIFT))); aoqi@0: } aoqi@0: test_srav(a0, a1, -SHIFT); aoqi@0: for (int i=0; i>(-SHIFT))); aoqi@0: } aoqi@0: aoqi@0: test_sllc_add(a0, a1); aoqi@0: for (int i=0; i>>VALUE)); aoqi@0: } aoqi@0: test_srlv_add(a0, a1, ADD_INIT); aoqi@0: for (int i=0; i>>VALUE)); aoqi@0: } aoqi@0: aoqi@0: test_srac_add(a0, a1); aoqi@0: for (int i=0; i>VALUE)); aoqi@0: } aoqi@0: test_srav_add(a0, a1, ADD_INIT); aoqi@0: for (int i=0; i>VALUE)); aoqi@0: } aoqi@0: aoqi@0: test_sllc_and(a0, a1); aoqi@0: for (int i=0; i>>VALUE)); aoqi@0: } aoqi@0: test_srlv_and(a0, a1, BIT_MASK); aoqi@0: for (int i=0; i>>VALUE)); aoqi@0: } aoqi@0: aoqi@0: test_srac_and(a0, a1); aoqi@0: for (int i=0; i>VALUE)); aoqi@0: } aoqi@0: test_srav_and(a0, a1, BIT_MASK); aoqi@0: for (int i=0; i>VALUE)); aoqi@0: } aoqi@0: aoqi@0: test_pack2(p2, a1); aoqi@0: for (int i=0; i 0) aoqi@0: return errn; aoqi@0: aoqi@0: System.out.println("Time"); aoqi@0: long start, end; aoqi@0: aoqi@0: start = System.currentTimeMillis(); aoqi@0: for (int i=0; i>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlc_n(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>>(-VALUE)); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlc_o(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>>SHIFT); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlc_on(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>>(-SHIFT)); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlv(byte[] a0, byte[] a1, int b) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>>b); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlc_add(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlv_add(byte[] a0, byte[] a1, int b) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] + b)>>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlc_and(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srlv_and(byte[] a0, byte[] a1, int b) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] & b)>>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: static void test_srac(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srac_n(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>(-VALUE)); aoqi@0: } aoqi@0: } aoqi@0: static void test_srac_o(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>SHIFT); aoqi@0: } aoqi@0: } aoqi@0: static void test_srac_on(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>(-SHIFT)); aoqi@0: } aoqi@0: } aoqi@0: static void test_srav(byte[] a0, byte[] a1, int b) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)(a1[i]>>b); aoqi@0: } aoqi@0: } aoqi@0: static void test_srac_add(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srav_add(byte[] a0, byte[] a1, int b) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] + b)>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srac_and(byte[] a0, byte[] a1) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: static void test_srav_and(byte[] a0, byte[] a1, int b) { aoqi@0: for (int i = 0; i < a0.length; i+=1) { aoqi@0: a0[i] = (byte)((a1[i] & b)>>VALUE); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: static void test_pack2(short[] p2, byte[] a1) { aoqi@0: if (p2.length*2 > a1.length) return; aoqi@0: for (int i = 0; i < p2.length; i+=1) { aoqi@0: short l0 = (short)a1[i*2+0]; aoqi@0: short l1 = (short)a1[i*2+1]; aoqi@0: p2[i] = (short)((l1 << 8) | (l0 & 0xFF)); aoqi@0: } aoqi@0: } aoqi@0: static void test_unpack2(byte[] a0, short[] p2) { aoqi@0: if (p2.length*2 > a0.length) return; aoqi@0: for (int i = 0; i < p2.length; i+=1) { aoqi@0: short l = p2[i]; aoqi@0: a0[i*2+0] = (byte)(l & 0xFF); aoqi@0: a0[i*2+1] = (byte)(l >> 8); aoqi@0: } aoqi@0: } aoqi@0: static void test_pack2_swap(short[] p2, byte[] a1) { aoqi@0: if (p2.length*2 > a1.length) return; aoqi@0: for (int i = 0; i < p2.length; i+=1) { aoqi@0: short l0 = (short)a1[i*2+0]; aoqi@0: short l1 = (short)a1[i*2+1]; aoqi@0: p2[i] = (short)((l0 << 8) | (l1 & 0xFF)); aoqi@0: } aoqi@0: } aoqi@0: static void test_unpack2_swap(byte[] a0, short[] p2) { aoqi@0: if (p2.length*2 > a0.length) return; aoqi@0: for (int i = 0; i < p2.length; i+=1) { aoqi@0: short l = p2[i]; aoqi@0: a0[i*2+0] = (byte)(l >> 8); aoqi@0: a0[i*2+1] = (byte)(l & 0xFF); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: static void test_pack4(int[] p4, byte[] a1) { aoqi@0: if (p4.length*4 > a1.length) return; aoqi@0: for (int i = 0; i < p4.length; i+=1) { aoqi@0: int l0 = (int)a1[i*4+0]; aoqi@0: int l1 = (int)a1[i*4+1]; aoqi@0: int l2 = (int)a1[i*4+2]; aoqi@0: int l3 = (int)a1[i*4+3]; aoqi@0: p4[i] = (l0 & 0xFF) | aoqi@0: ((l1 & 0xFF) << 8) | aoqi@0: ((l2 & 0xFF) << 16) | aoqi@0: ((l3 & 0xFF) << 24); aoqi@0: } aoqi@0: } aoqi@0: static void test_unpack4(byte[] a0, int[] p4) { aoqi@0: if (p4.length*4 > a0.length) return; aoqi@0: for (int i = 0; i < p4.length; i+=1) { aoqi@0: int l = p4[i]; aoqi@0: a0[i*4+0] = (byte)(l & 0xFF); aoqi@0: a0[i*4+1] = (byte)(l >> 8); aoqi@0: a0[i*4+2] = (byte)(l >> 16); aoqi@0: a0[i*4+3] = (byte)(l >> 24); aoqi@0: } aoqi@0: } aoqi@0: static void test_pack4_swap(int[] p4, byte[] a1) { aoqi@0: if (p4.length*4 > a1.length) return; aoqi@0: for (int i = 0; i < p4.length; i+=1) { aoqi@0: int l0 = (int)a1[i*4+0]; aoqi@0: int l1 = (int)a1[i*4+1]; aoqi@0: int l2 = (int)a1[i*4+2]; aoqi@0: int l3 = (int)a1[i*4+3]; aoqi@0: p4[i] = (l3 & 0xFF) | aoqi@0: ((l2 & 0xFF) << 8) | aoqi@0: ((l1 & 0xFF) << 16) | aoqi@0: ((l0 & 0xFF) << 24); aoqi@0: } aoqi@0: } aoqi@0: static void test_unpack4_swap(byte[] a0, int[] p4) { aoqi@0: if (p4.length*4 > a0.length) return; aoqi@0: for (int i = 0; i < p4.length; i+=1) { aoqi@0: int l = p4[i]; aoqi@0: a0[i*4+0] = (byte)(l >> 24); aoqi@0: a0[i*4+1] = (byte)(l >> 16); aoqi@0: a0[i*4+2] = (byte)(l >> 8); aoqi@0: a0[i*4+3] = (byte)(l & 0xFF); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: static void test_pack8(long[] p8, byte[] a1) { aoqi@0: if (p8.length*8 > a1.length) return; aoqi@0: for (int i = 0; i < p8.length; i+=1) { aoqi@0: long l0 = (long)a1[i*8+0]; aoqi@0: long l1 = (long)a1[i*8+1]; aoqi@0: long l2 = (long)a1[i*8+2]; aoqi@0: long l3 = (long)a1[i*8+3]; aoqi@0: long l4 = (long)a1[i*8+4]; aoqi@0: long l5 = (long)a1[i*8+5]; aoqi@0: long l6 = (long)a1[i*8+6]; aoqi@0: long l7 = (long)a1[i*8+7]; aoqi@0: p8[i] = (l0 & 0xFFl) | aoqi@0: ((l1 & 0xFFl) << 8) | aoqi@0: ((l2 & 0xFFl) << 16) | aoqi@0: ((l3 & 0xFFl) << 24) | aoqi@0: ((l4 & 0xFFl) << 32) | aoqi@0: ((l5 & 0xFFl) << 40) | aoqi@0: ((l6 & 0xFFl) << 48) | aoqi@0: ((l7 & 0xFFl) << 56); aoqi@0: } aoqi@0: } aoqi@0: static void test_unpack8(byte[] a0, long[] p8) { aoqi@0: if (p8.length*8 > a0.length) return; aoqi@0: for (int i = 0; i < p8.length; i+=1) { aoqi@0: long l = p8[i]; aoqi@0: a0[i*8+0] = (byte)(l & 0xFFl); aoqi@0: a0[i*8+1] = (byte)(l >> 8); aoqi@0: a0[i*8+2] = (byte)(l >> 16); aoqi@0: a0[i*8+3] = (byte)(l >> 24); aoqi@0: a0[i*8+4] = (byte)(l >> 32); aoqi@0: a0[i*8+5] = (byte)(l >> 40); aoqi@0: a0[i*8+6] = (byte)(l >> 48); aoqi@0: a0[i*8+7] = (byte)(l >> 56); aoqi@0: } aoqi@0: } aoqi@0: static void test_pack8_swap(long[] p8, byte[] a1) { aoqi@0: if (p8.length*8 > a1.length) return; aoqi@0: for (int i = 0; i < p8.length; i+=1) { aoqi@0: long l0 = (long)a1[i*8+0]; aoqi@0: long l1 = (long)a1[i*8+1]; aoqi@0: long l2 = (long)a1[i*8+2]; aoqi@0: long l3 = (long)a1[i*8+3]; aoqi@0: long l4 = (long)a1[i*8+4]; aoqi@0: long l5 = (long)a1[i*8+5]; aoqi@0: long l6 = (long)a1[i*8+6]; aoqi@0: long l7 = (long)a1[i*8+7]; aoqi@0: p8[i] = (l7 & 0xFFl) | aoqi@0: ((l6 & 0xFFl) << 8) | aoqi@0: ((l5 & 0xFFl) << 16) | aoqi@0: ((l4 & 0xFFl) << 24) | aoqi@0: ((l3 & 0xFFl) << 32) | aoqi@0: ((l2 & 0xFFl) << 40) | aoqi@0: ((l1 & 0xFFl) << 48) | aoqi@0: ((l0 & 0xFFl) << 56); aoqi@0: } aoqi@0: } aoqi@0: static void test_unpack8_swap(byte[] a0, long[] p8) { aoqi@0: if (p8.length*8 > a0.length) return; aoqi@0: for (int i = 0; i < p8.length; i+=1) { aoqi@0: long l = p8[i]; aoqi@0: a0[i*8+0] = (byte)(l >> 56); aoqi@0: a0[i*8+1] = (byte)(l >> 48); aoqi@0: a0[i*8+2] = (byte)(l >> 40); aoqi@0: a0[i*8+3] = (byte)(l >> 32); aoqi@0: a0[i*8+4] = (byte)(l >> 24); aoqi@0: a0[i*8+5] = (byte)(l >> 16); aoqi@0: a0[i*8+6] = (byte)(l >> 8); aoqi@0: a0[i*8+7] = (byte)(l & 0xFFl); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: static int verify(String text, int i, byte elem, byte val) { aoqi@0: if (elem != val) { aoqi@0: System.err.println(text + "[" + i + "] = " + elem + " != " + val); aoqi@0: return 1; aoqi@0: } aoqi@0: return 0; aoqi@0: } aoqi@0: aoqi@0: static int verify(String text, int i, short elem, short val) { aoqi@0: if (elem != val) { aoqi@0: System.err.println(text + "[" + i + "] = " + elem + " != " + val); aoqi@0: return 1; aoqi@0: } aoqi@0: return 0; aoqi@0: } aoqi@0: aoqi@0: static int verify(String text, int i, int elem, int val) { aoqi@0: if (elem != val) { aoqi@0: System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val)); aoqi@0: return 1; aoqi@0: } aoqi@0: return 0; aoqi@0: } aoqi@0: aoqi@0: static int verify(String text, int i, long elem, long val) { aoqi@0: if (elem != val) { aoqi@0: System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); aoqi@0: return 1; aoqi@0: } aoqi@0: return 0; aoqi@0: } aoqi@0: }