1 /*
  2  *  Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
  3  *  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  *  This code is free software; you can redistribute it and/or modify it
  6  *  under the terms of the GNU General Public License version 2 only, as
  7  *  published by the Free Software Foundation.
  8  *
  9  *  This code is distributed in the hope that it will be useful, but WITHOUT
 10  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  *  version 2 for more details (a copy is included in the LICENSE file that
 13  *  accompanied this code).
 14  *
 15  *  You should have received a copy of the GNU General Public License version
 16  *  2 along with this work; if not, write to the Free Software Foundation,
 17  *  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  *  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  *  or visit www.oracle.com if you need additional information or have any
 21  *  questions.
 22  *
 23  */
 24 package org.openjdk.bench.jdk.incubator.vector;
 25 
 26 import java.nio.ByteBuffer;
 27 import java.nio.ByteOrder;
 28 import java.util.concurrent.TimeUnit;
 29 import jdk.incubator.foreign.CLinker;
 30 import jdk.incubator.foreign.MemoryAccess;
 31 import jdk.incubator.foreign.MemoryAddress;
 32 import jdk.incubator.foreign.MemorySegment;
 33 import jdk.incubator.foreign.ResourceScope;
 34 import jdk.incubator.vector.ByteVector;
 35 import jdk.incubator.vector.VectorOperators;
 36 import jdk.incubator.vector.VectorSpecies;
 37 import org.openjdk.jmh.annotations.Benchmark;
 38 import org.openjdk.jmh.annotations.BenchmarkMode;
 39 import org.openjdk.jmh.annotations.CompilerControl;
 40 import org.openjdk.jmh.annotations.Fork;
 41 import org.openjdk.jmh.annotations.Measurement;
 42 import org.openjdk.jmh.annotations.Mode;
 43 import org.openjdk.jmh.annotations.OutputTimeUnit;
 44 import org.openjdk.jmh.annotations.Param;
 45 import org.openjdk.jmh.annotations.Setup;
 46 import org.openjdk.jmh.annotations.State;
 47 import org.openjdk.jmh.annotations.Warmup;
 48 
 49 @BenchmarkMode(Mode.AverageTime)
 50 @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
 51 @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
 52 @State(org.openjdk.jmh.annotations.Scope.Thread)
 53 @OutputTimeUnit(TimeUnit.NANOSECONDS)
 54 @Fork(value = 1, jvmArgsAppend = {
 55     "--add-modules=jdk.incubator.foreign,jdk.incubator.vector",
 56     "-Dforeign.restricted=permit",
 57     "--enable-native-access", "ALL-UNNAMED",
 58     "-Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=1"})
 59 public class TestLoadStoreBytes {
 60   private static final VectorSpecies<Byte> SPECIES = VectorSpecies.ofLargestShape(byte.class);
 61 
 62   @Param("1024")
 63   private int size;
 64 
 65   private byte[] srcArray;
 66 
 67   private byte[] dstArray;
 68 
 69 
 70   private ByteBuffer srcBufferHeap;
 71 
 72   private ByteBuffer dstBufferHeap;
 73 
 74   private ByteBuffer srcBufferNative;
 75 
 76   private ByteBuffer dstBufferNative;
 77 
 78 
 79   private ResourceScope implicitScope;
 80 
 81   private MemorySegment srcSegmentImplicit;
 82 
 83   private MemorySegment dstSegmentImplicit;
 84 
 85   private ByteBuffer srcBufferSegmentImplicit;
 86 
 87   private ByteBuffer dstBufferSegmentImplicit;
 88 
 89 
 90   private MemoryAddress srcAddress;
 91 
 92   private MemoryAddress dstAddress;
 93 
 94   byte[] a, b, c;
 95 
 96   @Setup
 97   public void setup() {
 98     srcArray = new byte[size];
 99     dstArray = srcArray.clone();
100     for (int i = 0; i < srcArray.length; i++) {
101       srcArray[i] = (byte) i;
102     }
103 
104 
105     srcBufferHeap = ByteBuffer.allocate(size);
106     dstBufferHeap = ByteBuffer.allocate(size);
107 
108     srcBufferNative = ByteBuffer.allocateDirect(size);
109     dstBufferNative = ByteBuffer.allocateDirect(size);
110 
111 
112     implicitScope = ResourceScope.newImplicitScope();
113     srcSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
114     srcBufferSegmentImplicit = srcSegmentImplicit.asByteBuffer();
115     dstSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
116     dstBufferSegmentImplicit = dstSegmentImplicit.asByteBuffer();
117 
118 
119     srcAddress = CLinker.allocateMemory(size);
120     dstAddress = CLinker.allocateMemory(size);
121 
122     a = new byte[size];
123     b = new byte[size];
124     c = new byte[size];
125   }
126 
127 
128   @Benchmark
129   public void array() {
130 //    final var srcArray = this.srcArray;
131     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
132       var v = ByteVector.fromArray(SPECIES, srcArray, i);
133       v.intoArray(dstArray, i);
134     }
135   }
136 
137   @Benchmark
138   public void array2() {
139 //    final var srcArray = this.srcArray;
140     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
141       var v = ByteVector.fromByteArray(SPECIES, srcArray, i, ByteOrder.nativeOrder());
142       v.intoByteArray(dstArray, i, ByteOrder.nativeOrder());
143     }
144   }
145 
146   @Benchmark
147   public void arrayScalar() {
148     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i ++) {
149       var v = srcArray[i];
150       dstArray[i] = v;
151     }
152   }
153 
154   @Benchmark
155   public void vectAdd1() {
156     var a = this.a;
157     var b = this.b;
158     var c = this.c;
159 
160     for (int i = 0; i < a.length; i += SPECIES.length()) {
161       ByteVector av = ByteVector.fromArray(SPECIES, a, i);
162       ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
163       av.lanewise(VectorOperators.ADD, bv).intoArray(c, i);
164     }
165   }
166 
167   @Benchmark
168   public void vectAdd2() {
169     var a = this.a;
170     var b = this.b;
171     var c = this.c;
172 
173     for (int i = 0; i < a.length/SPECIES.length(); i++) {
174       ByteVector av = ByteVector.fromArray(SPECIES, a, (i*SPECIES.length()));
175       ByteVector bv = ByteVector.fromArray(SPECIES, b, (i*SPECIES.length()));
176       av.lanewise(VectorOperators.ADD, bv).intoArray(c, (i*SPECIES.length()));
177     }
178   }
179 
180   @Benchmark
181   public void arrayAdd() {
182     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
183       var v = ByteVector.fromArray(SPECIES, srcArray, i);
184       v = v.add(v);
185       v.intoArray(dstArray, i);
186     }
187   }
188 
189   @Benchmark
190   public void bufferHeap() {
191     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
192       var v = ByteVector.fromByteBuffer(SPECIES, srcBufferHeap, i, ByteOrder.nativeOrder());
193       v.intoByteBuffer(dstBufferHeap, i, ByteOrder.nativeOrder());
194     }
195   }
196 
197   @Benchmark
198   public void bufferHeapScalar() {
199     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
200       var v = srcBufferHeap.get(i);
201       dstBufferHeap.put(i, v);
202     }
203   }
204 
205   @Benchmark
206   @CompilerControl(CompilerControl.Mode.PRINT)
207   public void bufferNative() {
208     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
209       var v = ByteVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder());
210       v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder());
211     }
212   }
213 
214   @Benchmark
215   public void bufferNativeScalar() {
216     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
217       var v = srcBufferNative.get(i);
218       dstBufferNative.put(i, v);
219     }
220   }
221 
222 
223   @Benchmark
224   public void bufferSegmentImplicit() {
225     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
226       var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentImplicit, i, ByteOrder.nativeOrder());
227       v.intoByteBuffer(dstBufferSegmentImplicit, i, ByteOrder.nativeOrder());
228     }
229   }
230 
231   @Benchmark
232   @CompilerControl(CompilerControl.Mode.PRINT)
233   public void segmentImplicitScalar() {
234     for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
235       var v = MemoryAccess.getByteAtOffset(srcSegmentImplicit, i);
236       MemoryAccess.setByteAtOffset(dstSegmentImplicit, i, v);
237     }
238   }
239 
240   @Benchmark
241   public void bufferSegmentConfined() {
242     try (final var scope = ResourceScope.newConfinedScope()) {
243       final var srcBufferSegmentConfined = srcAddress.asSegment(size, scope).asByteBuffer();
244       final var dstBufferSegmentConfined = dstAddress.asSegment(size, scope).asByteBuffer();
245 
246       for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
247         var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentConfined, i, ByteOrder.nativeOrder());
248         v.intoByteBuffer(dstBufferSegmentConfined, i, ByteOrder.nativeOrder());
249       }
250     }
251   }
252 }