1 /* 2 * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 package org.openjdk.bench.jdk.incubator.vector; 25 26 import java.nio.ByteBuffer; 27 import java.nio.ByteOrder; 28 import java.util.concurrent.TimeUnit; 29 import jdk.incubator.foreign.MemoryAddress; 30 import jdk.incubator.foreign.MemorySegment; 31 import jdk.incubator.foreign.ResourceScope; 32 import jdk.incubator.vector.ByteVector; 33 import jdk.incubator.vector.VectorOperators; 34 import jdk.incubator.vector.VectorSpecies; 35 import org.openjdk.jmh.annotations.Benchmark; 36 import org.openjdk.jmh.annotations.BenchmarkMode; 37 import org.openjdk.jmh.annotations.CompilerControl; 38 import org.openjdk.jmh.annotations.Fork; 39 import org.openjdk.jmh.annotations.Measurement; 40 import org.openjdk.jmh.annotations.Mode; 41 import org.openjdk.jmh.annotations.OutputTimeUnit; 42 import org.openjdk.jmh.annotations.Param; 43 import org.openjdk.jmh.annotations.Setup; 44 import org.openjdk.jmh.annotations.State; 45 import org.openjdk.jmh.annotations.Warmup; 46 47 import static jdk.incubator.foreign.ValueLayout.JAVA_BYTE; 48 49 @BenchmarkMode(Mode.AverageTime) 50 @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) 51 @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) 52 @State(org.openjdk.jmh.annotations.Scope.Thread) 53 @OutputTimeUnit(TimeUnit.NANOSECONDS) 54 @Fork(value = 1, jvmArgsAppend = { 55 "--add-modules=jdk.incubator.foreign,jdk.incubator.vector", 56 "-Dforeign.restricted=permit", 57 "--enable-native-access", "ALL-UNNAMED", 58 "-Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=1"}) 59 public class TestLoadStoreBytes { 60 private static final VectorSpecies<Byte> SPECIES = VectorSpecies.ofLargestShape(byte.class); 61 62 @Param("1024") 63 private int size; 64 65 private byte[] srcArray; 66 67 private byte[] dstArray; 68 69 70 private ByteBuffer srcBufferHeap; 71 72 private ByteBuffer dstBufferHeap; 73 74 private ByteBuffer srcBufferNative; 75 76 private ByteBuffer dstBufferNative; 77 78 79 private ResourceScope implicitScope; 80 81 private MemorySegment srcSegmentImplicit; 82 83 private MemorySegment dstSegmentImplicit; 84 85 private ByteBuffer srcBufferSegmentImplicit; 86 87 private ByteBuffer dstBufferSegmentImplicit; 88 89 90 private MemoryAddress srcAddress; 91 92 private MemoryAddress dstAddress; 93 94 byte[] a, b, c; 95 96 @Setup 97 public void setup() { 98 srcArray = new byte[size]; 99 dstArray = srcArray.clone(); 100 for (int i = 0; i < srcArray.length; i++) { 101 srcArray[i] = (byte) i; 102 } 103 104 105 srcBufferHeap = ByteBuffer.allocate(size); 106 dstBufferHeap = ByteBuffer.allocate(size); 107 108 srcBufferNative = ByteBuffer.allocateDirect(size); 109 dstBufferNative = ByteBuffer.allocateDirect(size); 110 111 112 implicitScope = ResourceScope.newImplicitScope(); 113 srcSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope); 114 srcBufferSegmentImplicit = srcSegmentImplicit.asByteBuffer(); 115 dstSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope); 116 dstBufferSegmentImplicit = dstSegmentImplicit.asByteBuffer(); 117 118 119 srcAddress = MemorySegment.allocateNative(size, implicitScope).address(); 120 dstAddress = MemorySegment.allocateNative(size, implicitScope).address(); 121 122 a = new byte[size]; 123 b = new byte[size]; 124 c = new byte[size]; 125 } 126 127 128 @Benchmark 129 public void array() { 130 // final var srcArray = this.srcArray; 131 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) { 132 var v = ByteVector.fromArray(SPECIES, srcArray, i); 133 v.intoArray(dstArray, i); 134 } 135 } 136 137 @Benchmark 138 public void array2() { 139 // final var srcArray = this.srcArray; 140 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) { 141 var v = ByteVector.fromByteArray(SPECIES, srcArray, i, ByteOrder.nativeOrder()); 142 v.intoByteArray(dstArray, i, ByteOrder.nativeOrder()); 143 } 144 } 145 146 @Benchmark 147 public void arrayScalar() { 148 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i ++) { 149 var v = srcArray[i]; 150 dstArray[i] = v; 151 } 152 } 153 154 @Benchmark 155 public void vectAdd1() { 156 var a = this.a; 157 var b = this.b; 158 var c = this.c; 159 160 for (int i = 0; i < a.length; i += SPECIES.length()) { 161 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 162 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 163 av.lanewise(VectorOperators.ADD, bv).intoArray(c, i); 164 } 165 } 166 167 @Benchmark 168 public void vectAdd2() { 169 var a = this.a; 170 var b = this.b; 171 var c = this.c; 172 173 for (int i = 0; i < a.length/SPECIES.length(); i++) { 174 ByteVector av = ByteVector.fromArray(SPECIES, a, (i*SPECIES.length())); 175 ByteVector bv = ByteVector.fromArray(SPECIES, b, (i*SPECIES.length())); 176 av.lanewise(VectorOperators.ADD, bv).intoArray(c, (i*SPECIES.length())); 177 } 178 } 179 180 @Benchmark 181 public void arrayAdd() { 182 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) { 183 var v = ByteVector.fromArray(SPECIES, srcArray, i); 184 v = v.add(v); 185 v.intoArray(dstArray, i); 186 } 187 } 188 189 @Benchmark 190 public void bufferHeap() { 191 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) { 192 var v = ByteVector.fromByteBuffer(SPECIES, srcBufferHeap, i, ByteOrder.nativeOrder()); 193 v.intoByteBuffer(dstBufferHeap, i, ByteOrder.nativeOrder()); 194 } 195 } 196 197 @Benchmark 198 public void bufferHeapScalar() { 199 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) { 200 var v = srcBufferHeap.get(i); 201 dstBufferHeap.put(i, v); 202 } 203 } 204 205 @Benchmark 206 @CompilerControl(CompilerControl.Mode.PRINT) 207 public void bufferNative() { 208 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) { 209 var v = ByteVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder()); 210 v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder()); 211 } 212 } 213 214 @Benchmark 215 public void bufferNativeScalar() { 216 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) { 217 var v = srcBufferNative.get(i); 218 dstBufferNative.put(i, v); 219 } 220 } 221 222 223 @Benchmark 224 public void bufferSegmentImplicit() { 225 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) { 226 var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentImplicit, i, ByteOrder.nativeOrder()); 227 v.intoByteBuffer(dstBufferSegmentImplicit, i, ByteOrder.nativeOrder()); 228 } 229 } 230 231 @Benchmark 232 @CompilerControl(CompilerControl.Mode.PRINT) 233 public void segmentImplicitScalar() { 234 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) { 235 var v = srcSegmentImplicit.get(JAVA_BYTE, i); 236 dstSegmentImplicit.set(JAVA_BYTE, i, v); 237 } 238 } 239 240 @Benchmark 241 public void bufferSegmentConfined() { 242 try (final var scope = ResourceScope.newConfinedScope()) { 243 final var srcBufferSegmentConfined = MemorySegment.ofAddress(srcAddress, size, scope).asByteBuffer(); 244 final var dstBufferSegmentConfined = MemorySegment.ofAddress(dstAddress, size, scope).asByteBuffer(); 245 246 for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) { 247 var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentConfined, i, ByteOrder.nativeOrder()); 248 v.intoByteBuffer(dstBufferSegmentConfined, i, ByteOrder.nativeOrder()); 249 } 250 } 251 } 252 }