1 /*
  2  *  Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
  3  *  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  *  This code is free software; you can redistribute it and/or modify it
  6  *  under the terms of the GNU General Public License version 2 only, as
  7  *  published by the Free Software Foundation.
  8  *
  9  *  This code is distributed in the hope that it will be useful, but WITHOUT
 10  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  *  version 2 for more details (a copy is included in the LICENSE file that
 13  *  accompanied this code).
 14  *
 15  *  You should have received a copy of the GNU General Public License version
 16  *  2 along with this work; if not, write to the Free Software Foundation,
 17  *  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  *  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  *  or visit www.oracle.com if you need additional information or have any
 21  *  questions.
 22  *
 23  */
 24 package org.openjdk.bench.jdk.incubator.vector;
 25 
 26 import java.nio.ByteBuffer;
 27 import java.nio.ByteOrder;
 28 import java.util.concurrent.TimeUnit;
 29 import jdk.incubator.foreign.CLinker;
 30 import jdk.incubator.foreign.MemoryAddress;
 31 import jdk.incubator.foreign.MemorySegment;
 32 import jdk.incubator.foreign.ResourceScope;
 33 import jdk.incubator.vector.ShortVector;
 34 import jdk.incubator.vector.VectorOperators;
 35 import jdk.incubator.vector.VectorSpecies;
 36 import org.openjdk.jmh.annotations.Benchmark;
 37 import org.openjdk.jmh.annotations.BenchmarkMode;
 38 import org.openjdk.jmh.annotations.CompilerControl;
 39 import org.openjdk.jmh.annotations.Fork;
 40 import org.openjdk.jmh.annotations.Measurement;
 41 import org.openjdk.jmh.annotations.Mode;
 42 import org.openjdk.jmh.annotations.OutputTimeUnit;
 43 import org.openjdk.jmh.annotations.Param;
 44 import org.openjdk.jmh.annotations.Setup;
 45 import org.openjdk.jmh.annotations.State;
 46 import org.openjdk.jmh.annotations.TearDown;
 47 import org.openjdk.jmh.annotations.Warmup;
 48 
 49 @BenchmarkMode(Mode.AverageTime)
 50 @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
 51 @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
 52 @State(org.openjdk.jmh.annotations.Scope.Thread)
 53 @OutputTimeUnit(TimeUnit.NANOSECONDS)
 54 @Fork(value = 1, jvmArgsAppend = {
 55     "--add-modules=jdk.incubator.foreign,jdk.incubator.vector",
 56     "-Dforeign.restricted=permit",
 57     "--enable-native-access", "ALL-UNNAMED"})
 58 public class TestLoadStoreShort {
 59   private static final VectorSpecies<Short> SPECIES = VectorSpecies.ofLargestShape(short.class);
 60 
 61   @Param("256")
 62   private int size;
 63 
 64   private int longSize;
 65 
 66   private short[] srcArray;
 67 
 68   private short[] dstArray;
 69 
 70 
 71   private ByteBuffer srcBufferHeap;
 72 
 73   private ByteBuffer dstBufferHeap;
 74 
 75   private ByteBuffer srcBufferNative;
 76 
 77   private ByteBuffer dstBufferNative;
 78 
 79 
 80   private ResourceScope implicitScope;
 81 
 82   private MemorySegment srcSegmentImplicit;
 83 
 84   private MemorySegment dstSegmentImplicit;
 85 
 86   private ByteBuffer srcBufferSegmentImplicit;
 87 
 88   private ByteBuffer dstBufferSegmentImplicit;
 89 
 90 
 91   private MemoryAddress srcAddress;
 92 
 93   private MemoryAddress dstAddress;
 94 
 95 //  private byte[] bigArray = new byte[Integer.MAX_VALUE];
 96 
 97   private volatile short[] a, b, c;
 98   @Setup
 99   public void setup() {
100     var longSize = size / Short.BYTES;
101     srcArray = new short[longSize];
102     dstArray = srcArray.clone();
103     for (int i = 0; i < srcArray.length; i++) {
104       srcArray[i] = (short) i;
105     }
106 
107 
108     srcBufferHeap = ByteBuffer.allocate(size);
109     dstBufferHeap = ByteBuffer.allocate(size);
110 
111     srcBufferNative = ByteBuffer.allocateDirect(size);
112     dstBufferNative = ByteBuffer.allocateDirect(size);
113 
114 
115     implicitScope = ResourceScope.newImplicitScope();
116     srcSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
117     srcBufferSegmentImplicit = srcSegmentImplicit.asByteBuffer();
118     dstSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
119     dstBufferSegmentImplicit = dstSegmentImplicit.asByteBuffer();
120 
121 
122     srcAddress = CLinker.allocateMemory(size);
123     dstAddress = CLinker.allocateMemory(size);
124 
125     this.longSize = longSize;
126 
127     a = new short[size];
128     b = new short[size];
129     c = new short[size];
130 
131   }
132 
133   @TearDown
134   public void tearDown() {
135     CLinker.freeMemory(srcAddress);
136     CLinker.freeMemory(dstAddress);
137   }
138 
139   @Benchmark
140   @CompilerControl(CompilerControl.Mode.PRINT)
141   public void array() {
142     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
143       var v = ShortVector.fromArray(SPECIES, srcArray, i);
144       v.intoArray(dstArray, i);
145     }
146   }
147 
148   @Benchmark
149   public void vectAdd1() {
150     var a = this.a;
151     var b = this.b;
152     var c = this.c;
153 
154     for (int i = 0; i < a.length; i += SPECIES.length()) {
155       ShortVector av = ShortVector.fromArray(SPECIES, a, i);
156       ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
157       av.lanewise(VectorOperators.ADD, bv).intoArray(c, i);
158     }
159   }
160 
161   @Benchmark
162   public void vectAdd2() {
163     var a = this.a;
164     var b = this.b;
165     var c = this.c;
166 
167     for (int i = 0; i < a.length/SPECIES.length(); i++) {
168       ShortVector av = ShortVector.fromArray(SPECIES, a, (i*SPECIES.length()));
169       ShortVector bv = ShortVector.fromArray(SPECIES, b, (i*SPECIES.length()));
170       av.lanewise(VectorOperators.ADD, bv).intoArray(c, (i*SPECIES.length()));
171     }
172   }
173 
174   @Benchmark
175   public void arrayAdd() {
176     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
177       var v = ShortVector.fromArray(SPECIES, srcArray, i);
178       v = v.add(v);
179       v.intoArray(dstArray, i);
180     }
181   }
182 
183   @Benchmark
184   public void bufferHeap() {
185     for (int i = 0; i < SPECIES.loopBound(longSize); i += SPECIES.length()) {
186       var v = ShortVector.fromByteBuffer(SPECIES, srcBufferHeap, i, ByteOrder.nativeOrder());
187       v.intoByteBuffer(dstBufferHeap, i, ByteOrder.nativeOrder());
188     }
189   }
190 
191   @Benchmark
192   public void bufferNative() {
193     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
194       var v = ShortVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder());
195       v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder());
196     }
197   }
198 
199   @Benchmark
200   public void bufferNativeAdd() {
201     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
202       var v = ShortVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder());
203       v = v.add(v);
204       v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder());
205     }
206   }
207 
208   @Benchmark
209   public void bufferSegmentImplicit() {
210     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
211       var v = ShortVector.fromByteBuffer(SPECIES, srcBufferSegmentImplicit, i, ByteOrder.nativeOrder());
212       v.intoByteBuffer(dstBufferSegmentImplicit, i, ByteOrder.nativeOrder());
213     }
214   }
215 
216   @Benchmark
217   public void bufferSegmentConfined() {
218     try (final var scope = ResourceScope.newConfinedScope()) {
219       final var srcBufferSegmentConfined = srcAddress.asSegment(size, scope).asByteBuffer();
220       final var dstBufferSegmentConfined = dstAddress.asSegment(size, scope).asByteBuffer();
221 
222       for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
223         var v = ShortVector.fromByteBuffer(SPECIES, srcBufferSegmentConfined, i, ByteOrder.nativeOrder());
224         v.intoByteBuffer(dstBufferSegmentConfined, i, ByteOrder.nativeOrder());
225       }
226     }
227   }
228 }