1 /*
2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package org.openjdk.bench.jdk.incubator.vector.bigdata;
25
26 import jdk.incubator.vector.*;
27 import java.util.Base64;
28 import java.util.concurrent.TimeUnit;
29 import org.openjdk.jmh.annotations.*;
30
31 @BenchmarkMode(Mode.Throughput)
32 @Warmup(iterations = 3, time = 1)
33 @Measurement(iterations = 5, time = 1)
34 @OutputTimeUnit(TimeUnit.MILLISECONDS)
35 @State(Scope.Benchmark)
36 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
37 public class VectorDistance {
38
39 static final VectorSpecies<Float> SPECIES_FLOAT_128 = FloatVector.SPECIES_128;
40 static final VectorSpecies<Float> SPECIES_FLOAT_256 = FloatVector.SPECIES_256;
41 static final VectorSpecies<Float> SPECIES_FLOAT_MAX = FloatVector.SPECIES_MAX;
42 static final VectorSpecies<Double> SPECIES_DOUBLE_128 = DoubleVector.SPECIES_128;
43 static final VectorSpecies<Double> SPECIES_DOUBLE_256 = DoubleVector.SPECIES_256;
44 static final VectorSpecies<Double> SPECIES_DOUBLE_MAX = DoubleVector.SPECIES_MAX;
45
46
47 static long num = 0;
48 private static String x1 = "L5GSwXhHpEH05mNBHnmcQMTw3EBnagFCW1DGQHe/nUFO1B1BlJOpwCBJ9j" +
49 "+RkY1BzqKeQSglN0Gy7krB5CSfQFzxB8Djn5nB2KNFwKcSRMGYzRQ7qMGWQZ0FF0FTceDAIKjxv/zhdkHFZMHB6hU4QZbo2cCAryRB+7OOQCxbfEHRtBlBxPG6P0BYSD+Pgz9BqzOLv/nVO8C9x5/BQOY/wTTIx0GfW1BBGv2lQQwdDcGCqBfB12t/QKUBoEEejIXBPN9kQWsFbEGsGcnBkqJkwKhLgr/IQZxAelAWQfcYpcFQv0HBeiGCQWExhEDrKAnBpAwBQV4bVcFpGNjAyDsNQVOc+0CSc4nBgG/ZQQGRccEXts9BKhYzQNK5+MAlU0DBzPGWwPGRCcEZC5/ADxOcv7lUkEBomM5BuqKiwV2MU8HNGHDBSB84QZRSyMB8RZlBVFdZQXSVgcBTQQBCdWa/QBQ0qkGILUW/6NA9QQnkmsG+5PPBj0UowT6nYD9cwpjAS/w5wTbX2UH8Gb5AR/HUQMTNAMJ9MN9AgHoqPbbUyUFbe47BBHANQWZJBsGBuPlBy94EQADeXsG5eOtBnA+yQCRka8EMcGLBjuoRwb4k7sAasB5Bmk/UwaI1akErp6xBq5G5wNo1E8KHa7tB3IiKQTCffcHphK1BTgJzwVY3JEEip/VAlmgXQSeKCsLEABs/n1/xwL5u58CgQY49ahUWQoAJjj1hhqBASXrrQb6nM0H2fY+/thtbQAQobMAohvXAxM3xv7xyqD+MvpDBrlDiQfBvPcGA8X5AQE4SwXhGx7+uLA1AxY8xu2mVjEE7KlFBArveQFNMtUD3N7DB12BbQcyH4cFhSw3Bu5VWQeTW0z9o03TBxtMlQctp/8E/lLVAGUtTwZsGJMKv/R5A1HKVQV6RhsC1Ji5AcXLFQJd6f0HbB+e+ZDi8wV9tQ0FwCN/B+A89v2DrU0Bcpc5BglTeQH5dT0HePS9Al4XPwdA6YEFlueXAbWKSQSBWzkBy2RnCt9Yawl9b77+xgxBC9eCqQd8f0kFoBG9BVxrkQZh2QkHNW/zBEQiawLJEocDhutTA8zEYwbIvEUIO1T9BmlOTwIhbNEDhrtlAVk9BQARQaj89NQNC6usGwDfQrkBSJrlAON7FQQ8FqsEEc/TAY3zeQYsqUEHV8QPBHJoYQQdn5kGyCiJBlDMYQBBNoUFrxbw/NlmPP3B24j6ChIdBXk2bwdxdDMFQw1rA4hybQXTchr8d9wvBuCbLQSMKmMBH4RpBQIXePa5DT8IjgvtBgAetQZgGgMEprc1BAOeSPJ5XpEEMa0NBgX4uwX7XIsG2Ie0688iqQSpJPsCAy9LBAGHkPw==";
50 private static String x2 = "5R3ZwGPrxEFMKyNBLFSeQdYav0BQtDFCur7WQAgRYEGHFYC/MKZtvkiFUT+RNXfBVsGBP2KWSUCmAUTBIf+EQG57kMCtXo7BV1DuwLd98r+YzRQ7qKXNwBMSPUFNQffBPrxeQYw1t7/7JjFAKNaXP+cMSEG6GI5BuEx0wUANDMEvDqdAT9YEworQTEEiVBZBiMejQP7t67+iRwzB3HadQB1be0Ei5g5BMt+cQXvYTUHwZsLAuoy3QfrR6EFrIiHB5X8Dwc8XbUH8Yr8/AvGEwa5GkUH3F5tAP8YJQTiDyz+gKsRAFl/rwDxJuUAPyyxBvg2gQU6bjMEPEa7Bz6wYQpQy7MDF5LvB8HP+QCJdicHQDjpC6RpWQcGeY8FMK6vBoeUjQcPYmUG2QmRBBI0nwScESsGMAcxBvRmawRL2A8IByKNAgTQBQuxdDEGq8JBBHJWmQSBDfz8sLe9BE3gFwTdCPkHEaMxBhX8Xwe7BCcE/783Bt6EHwdpbpkHc5L/BCPzRwUdIQUEd/k3AoGNcQQwNmMEyuKRBtnWlwdCBAUI5Y5DBwOZYvdI+MsEu/ixBnpMrwRtYt8FECytC6JjEQW3RHcBtfn3B+sgQQcyQKcEI5ytByvw2wPZdaUH+aqLAQFQ+QPi4REBF/9lBCvJNQTdlEcIAMbzBtD+hwZWufsAEjus/YRyjwR1YuMHj0ZhBa4w+QORAhMEq9qdB/L8JQrjhyUAJBeBAKqoIQUnAq0GsLFdBkfrvQHc1zMHH6THBeggSwaJIOsAawwBBDDWqPwrAlkBYDqe/maUcQabhwsFF2VBBxY8xu5aMQUFDkHVBKhRRwHhgWsEA5jXBlh9NQVMaT0CWlhTAroaFQRyciUHQlp7BF4trQa8unsE4TfI+9XLJQDNpLcIXLZdAuX2MwShiTsFcQh5BrHMqQVI1+UBWe4fBAzi0wfe11UFAIjq9Y1iAQDxrTsEY6plB/JiXQfjFwkHkYGRBVNOhwCMxtEFbqZTA378WQeA/Sb+FrSXCqlYywtb5SsDcqlZBk1EtQZ/RREHZIxG/kcv8QekDIkHPsDXCBL4VQHN8CMGtNvvAC3YwweUuAkKkJCnANEtVQG9z/0DrwyTBQ9hnwWX3kMEdLB1CvIlKwQ0IO0HK1ErBvdRQQVpjMMCJDI/Bb4X8QYVipEGpG2nBeLGUvmBlBT7ISgRB4iGAQUunkkFDFLm/HNaqPzKTVkCITJG/XzlYwbj0XcGD60PBbpLwQbvrs8Az8RXB4ubxQXh/HEDtXLU/kONrwVBs4MGc2X1BJaHkQd0ByEAKXLJBTq7JwPPkJUGJIIRBlh57wX3FjcC2Ie060Qc6Qal5xcCfqQrCl7edQQ==";
51 static float[] queryVectorFloat = parseBase64ToVector(x1);
52 static float[] inputVectorFloat = parseBase64ToVector(x2);
53 static double[] queryVectorDouble;
54 static double[] inputVectorDouble;
55
56 static float normQueryVectorFloat;
57 static double normQueryVectorDouble;
58
59 public static float[] parseArray(byte[] input) {
60 if (input == null) {
61 return null;
62 }
63 float[] floatArr = new float[input.length / 4];
64 for (int i = 0; i < floatArr.length; i++) {
65 int l;
66 l = input[i << 2];
67 l &= 0xff;
68 l |= ((int) input[(i << 2) + 1] << 8);
69 l &= 0xffff;
70 l |= ((int) input[(i << 2) + 2] << 16);
71 l &= 0xffffff;
72 l |= ((int) input[(i << 2) + 3] << 24);
73 floatArr[i] = Float.intBitsToFloat(l);
74 }
75 return floatArr;
76 }
77
78 public static float[] parseBase64ToVector(String vectorBase64) {
79 return parseArray(Base64.getDecoder().decode(vectorBase64));
80 }
81
82 @Setup
83 public void init() {
84 queryVectorDouble = new double[queryVectorFloat.length];
85 inputVectorDouble = new double[inputVectorFloat.length];
86 for (int i = 0; i < queryVectorFloat.length; i++) {
87 queryVectorDouble[i] = (double)(queryVectorFloat[i]);
88 }
89 for (int i = 0; i < inputVectorFloat.length; i++) {
90 inputVectorDouble[i] = (double)(inputVectorFloat[i]);
91 }
92 float xSquare = 0;
93 for (int i = 0; i < queryVectorFloat.length; i++) {
94 xSquare += (float)(queryVectorFloat[i] * queryVectorFloat[i]);
95 }
96 normQueryVectorFloat = xSquare;
97 normQueryVectorDouble = (double)xSquare;
98 }
99
100 @Benchmark
101 public float cosinesimilOptimizedScalarFloat() {
102 float dotProduct = 0.0f;
103 float normInputVector = 0.0f;
104 for (int i = 0; i < queryVectorFloat.length; i++) {
105 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
106 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
107 }
108 float normalizedProduct = normQueryVectorFloat * normInputVector;
109 if (normalizedProduct == 0) {
110 return Float.MIN_VALUE;
111 }
112 return (float) (dotProduct / (Math.sqrt(normalizedProduct)));
113 }
114
115 @Benchmark
116 public float cosinesimilOptimizedVectorFloat128() {
117 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
118 vecSum = FloatVector.zero(SPECIES_FLOAT_128);
119 xSquareV = FloatVector.zero(SPECIES_FLOAT_128);
120 ySquareV = FloatVector.zero(SPECIES_FLOAT_128);;
121 int i = 0;
122 for (i = 0; i + (SPECIES_FLOAT_128.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_128.length()) {
123 vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i);
124 vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i);
125 vecSum = vecX.fma(vecY, vecSum);
126 ySquareV = vecY.fma(vecY, ySquareV);
127 }
128 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
129 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
130 for (; i < queryVectorFloat.length; i++) {
131 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
132 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
133 }
134 float normalizedProduct = normQueryVectorFloat * normInputVector;
135 return (float)(dotProduct / Math.sqrt(normalizedProduct));
136 }
137
138 @Benchmark
139 public float cosinesimilOptimizedVectorFloat256() {
140 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
141 vecSum = FloatVector.zero(SPECIES_FLOAT_256);
142 xSquareV = FloatVector.zero(SPECIES_FLOAT_256);
143 ySquareV = FloatVector.zero(SPECIES_FLOAT_256);;
144 int i = 0;
145 for (i = 0; i + (SPECIES_FLOAT_256.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_256.length()) {
146 vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i);
147 vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i);
148 vecSum = vecX.fma(vecY, vecSum);
149 ySquareV = vecY.fma(vecY, ySquareV);
150 }
151 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
152 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
153 for (; i < queryVectorFloat.length; i++) {
154 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
155 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
156 }
157 float normalizedProduct = normQueryVectorFloat * normInputVector;
158 return (float)(dotProduct / Math.sqrt(normalizedProduct));
159 }
160
161 @Benchmark
162 public float cosinesimilOptimizedVectorFloatMax() {
163 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
164 vecSum = FloatVector.zero(SPECIES_FLOAT_MAX);
165 xSquareV = FloatVector.zero(SPECIES_FLOAT_MAX);
166 ySquareV = FloatVector.zero(SPECIES_FLOAT_MAX);;
167 int i = 0;
168 for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) {
169 vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i);
170 vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i);
171 vecSum = vecX.fma(vecY, vecSum);
172 ySquareV = vecY.fma(vecY, ySquareV);
173 }
174 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
175 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
176 for (; i < queryVectorFloat.length; i++) {
177 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
178 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
179 }
180 float normalizedProduct = normQueryVectorFloat * normInputVector;
181 return (float)(dotProduct / Math.sqrt(normalizedProduct));
182 }
183
184 @Benchmark
185 public float cosinesimilScalarFloat() {
186 float dotProduct = 0.0f;
187 float normQueryVectorFloat = 0.0f;
188 float normInputVector = 0.0f;
189 for (int i = 0; i < queryVectorFloat.length; i++) {
190 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
191 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
192 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
193 }
194 float normalizedProduct = normQueryVectorFloat * normInputVector;
195 if (normalizedProduct == 0) {
196 return Float.MIN_VALUE;
197 }
198 return (float) (dotProduct / (Math.sqrt(normalizedProduct)));
199 }
200
201 @Benchmark
202 public float cosinesimilVectorFloat128() {
203 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
204 vecSum = FloatVector.zero(SPECIES_FLOAT_128);
205 xSquareV = FloatVector.zero(SPECIES_FLOAT_128);
206 ySquareV = FloatVector.zero(SPECIES_FLOAT_128);;
207 int i = 0;
208 for (i = 0; i + (SPECIES_FLOAT_128.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_128.length()) {
209 vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i);
210 vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i);
211 vecSum = vecX.fma(vecY, vecSum);
212 xSquareV = vecX.fma(vecX, xSquareV);
213 ySquareV = vecY.fma(vecY, ySquareV);
214 }
215 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
216 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
217 float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD);
218 for (; i < queryVectorFloat.length; i++) {
219 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
220 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
221 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
222 }
223 float normalizedProduct = normQueryVectorFloat * normInputVector;
224 return (float)(dotProduct / Math.sqrt(normalizedProduct));
225 }
226
227 @Benchmark
228 public float cosinesimilVectorFloat256() {
229 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
230 vecSum = FloatVector.zero(SPECIES_FLOAT_256);
231 xSquareV = FloatVector.zero(SPECIES_FLOAT_256);
232 ySquareV = FloatVector.zero(SPECIES_FLOAT_256);;
233 int i = 0;
234 for (i = 0; i + (SPECIES_FLOAT_256.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_256.length()) {
235 vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i);
236 vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i);
237 vecSum = vecX.fma(vecY, vecSum);
238 xSquareV = vecX.fma(vecX, xSquareV);
239 ySquareV = vecY.fma(vecY, ySquareV);
240 }
241 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
242 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
243 float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD);
244 for (; i < queryVectorFloat.length; i++) {
245 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
246 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
247 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
248 }
249 float normalizedProduct = normQueryVectorFloat * normInputVector;
250 return (float)(dotProduct / Math.sqrt(normalizedProduct));
251 }
252
253 @Benchmark
254 public float cosinesimilVectorFloatMax() {
255 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
256 vecSum = FloatVector.zero(SPECIES_FLOAT_MAX);
257 xSquareV = FloatVector.zero(SPECIES_FLOAT_MAX);
258 ySquareV = FloatVector.zero(SPECIES_FLOAT_MAX);
259 int i = 0;
260 for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) {
261 vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i);
262 vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i);
263 vecSum = vecX.fma(vecY, vecSum);
264 xSquareV = vecX.fma(vecX, xSquareV);
265 ySquareV = vecY.fma(vecY, ySquareV);
266 }
267 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
268 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
269 float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD);
270 for (; i < queryVectorFloat.length; i++) {
271 dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
272 normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
273 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
274 }
275 float normalizedProduct = normQueryVectorFloat * normInputVector;
276 return (float)(dotProduct / Math.sqrt(normalizedProduct));
277 }
278
279 @Benchmark
280 public double cosinesimilOptimizedScalarDouble() {
281 double dotProduct = 0.0;
282 double normInputVector = 0.0;
283 for (int i = 0; i < queryVectorDouble.length; i++) {
284 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
285 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
286 }
287 double normalizedProduct = normQueryVectorDouble * normInputVector;
288 return dotProduct / (Math.sqrt(normalizedProduct));
289 }
290
291 @Benchmark
292 public double cosinesimilOptimizedVectorDouble128() {
293 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
294 vecSum = DoubleVector.zero(SPECIES_DOUBLE_128);
295 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
296 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
297 int i = 0;
298 for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_128.length()) {
299 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i);
300 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i);
301 vecSum = vecX.fma(vecY, vecSum);
302 ySquareV = vecY.fma(vecY, ySquareV);
303 }
304 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
305 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
306 for (; i < queryVectorDouble.length; i++) {
307 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
308 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
309 }
310 double normalizedProduct = normQueryVectorDouble * normInputVector;
311 return (double)(dotProduct / Math.sqrt(normalizedProduct));
312 }
313
314 @Benchmark
315 public double cosinesimilOptimizedVectorDouble256() {
316 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
317 vecSum = DoubleVector.zero(SPECIES_DOUBLE_256);
318 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
319 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
320 int i = 0;
321 for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_256.length()) {
322 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i);
323 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i);
324 vecSum = vecX.fma(vecY, vecSum);
325 ySquareV = vecY.fma(vecY, ySquareV);
326 }
327 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
328 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
329 for (; i < queryVectorDouble.length; i++) {
330 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
331 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
332 }
333 double normalizedProduct = normQueryVectorDouble * normInputVector;
334 return (double)(dotProduct / Math.sqrt(normalizedProduct));
335 }
336
337 @Benchmark
338 public double cosinesimilOptimizedVectorDoubleMax() {
339 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
340 vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX);
341 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
342 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
343 int i = 0;
344 for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) {
345 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i);
346 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i);
347 vecSum = vecX.fma(vecY, vecSum);
348 ySquareV = vecY.fma(vecY, ySquareV);
349 }
350 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
351 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
352 for (; i < queryVectorDouble.length; i++) {
353 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
354 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
355 }
356 double normalizedProduct = normQueryVectorDouble * normInputVector;
357 return (double)(dotProduct / Math.sqrt(normalizedProduct));
358 }
359
360 @Benchmark
361 public double cosinesimilScalarDouble() {
362 double dotProduct = 0.0f;
363 double normQueryVectorDouble = 0.0f;
364 double normInputVector = 0.0f;
365 for (int i = 0; i < queryVectorDouble.length; i++) {
366 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
367 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
368 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
369 }
370 double normalizedProduct = normQueryVectorDouble * normInputVector;
371 return (double) (dotProduct / (Math.sqrt(normalizedProduct)));
372 }
373
374 @Benchmark
375 public double cosinesimilVectorDouble128() {
376 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
377 vecSum = DoubleVector.zero(SPECIES_DOUBLE_128);
378 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
379 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
380 int i = 0;
381 for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_128.length()) {
382 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i);
383 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i);
384 vecSum = vecX.fma(vecY, vecSum);
385 xSquareV = vecX.fma(vecX, xSquareV);
386 ySquareV = vecY.fma(vecY, ySquareV);
387 }
388 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
389 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
390 double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD);
391 for (; i < queryVectorDouble.length; i++) {
392 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
393 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
394 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
395 }
396 double normalizedProduct = normQueryVectorDouble * normInputVector;
397 return (double)(dotProduct / Math.sqrt(normalizedProduct));
398 }
399
400 @Benchmark
401 public double cosinesimilVectorDouble256() {
402 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
403 vecSum = DoubleVector.zero(SPECIES_DOUBLE_256);
404 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
405 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
406 int i = 0;
407 for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_256.length()) {
408 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i);
409 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i);
410 vecSum = vecX.fma(vecY, vecSum);
411 xSquareV = vecX.fma(vecX, xSquareV);
412 ySquareV = vecY.fma(vecY, ySquareV);
413 }
414 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
415 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
416 double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD);
417 for (; i < queryVectorDouble.length; i++) {
418 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
419 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
420 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
421 }
422 double normalizedProduct = normQueryVectorDouble * normInputVector;
423 return (double)(dotProduct / Math.sqrt(normalizedProduct));
424 }
425
426 @Benchmark
427 public double cosinesimilVectorDoubleMax() {
428 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
429 vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX);
430 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
431 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
432 int i = 0;
433 for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) {
434 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i);
435 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i);
436 vecSum = vecX.fma(vecY, vecSum);
437 xSquareV = vecX.fma(vecX, xSquareV);
438 ySquareV = vecY.fma(vecY, ySquareV);
439 }
440 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
441 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
442 double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD);
443 for (; i < queryVectorDouble.length; i++) {
444 dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
445 normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
446 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
447 }
448 double normalizedProduct = normQueryVectorDouble * normInputVector;
449 return (double)(dotProduct / Math.sqrt(normalizedProduct));
450 }
451
452 // l2Squared is used to compute Euclidean distance
453 @Benchmark
454 public float l2SquaredVectorFloat128() {
455 FloatVector vecX, vecY, vecSum, vecSquare, vecDiff;
456 vecSum = FloatVector.zero(SPECIES_FLOAT_128);
457 int i = 0;
458 for (i = 0; i + (SPECIES_FLOAT_128.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_128.length()) {
459 vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i);
460 vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i);
461 vecDiff = vecX.sub(vecY);
462 vecSquare = vecDiff.mul(vecDiff);
463 vecSum = vecDiff.fma(vecDiff, vecSum);
464 }
465 float sum = vecSum.reduceLanes(VectorOperators.ADD);
466 for (; i < inputVectorFloat.length; i++) {
467 float diff = queryVectorFloat[i] - inputVectorFloat[i];
468 sum += diff * diff;
469 }
470 return sum;
471 }
472
473 @Benchmark
474 public float l2SquaredVectorFloat256() {
475 FloatVector vecX, vecY, vecSum, vecSquare, vecDiff;
476 vecSum = FloatVector.zero(SPECIES_FLOAT_256);
477 int i = 0;
478 for (i = 0; i + (SPECIES_FLOAT_256.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_256.length()) {
479 vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i);
480 vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i);
481 vecDiff = vecX.sub(vecY);
482 vecSquare = vecDiff.mul(vecDiff);
483 vecSum = vecDiff.fma(vecDiff, vecSum);
484 }
485 float sum = vecSum.reduceLanes(VectorOperators.ADD);
486 for (; i < inputVectorFloat.length; i++) {
487 float diff = queryVectorFloat[i] - inputVectorFloat[i];
488 sum += diff * diff;
489 }
490 return sum;
491 }
492
493 @Benchmark
494 public float l2SquaredVectorFloatMax() {
495 FloatVector vecX, vecY, vecSum, vecSquare, vecDiff;
496 vecSum = FloatVector.zero(SPECIES_FLOAT_MAX);
497 int i = 0;
498 for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) {
499 vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i);
500 vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i);
501 vecDiff = vecX.sub(vecY);
502 vecSquare = vecDiff.mul(vecDiff);
503 vecSum = vecDiff.fma(vecDiff, vecSum);
504 }
505 float sum = vecSum.reduceLanes(VectorOperators.ADD);
506 for (; i < inputVectorFloat.length; i++) {
507 float diff = queryVectorFloat[i] - inputVectorFloat[i];
508 sum += diff * diff;
509 }
510 return sum;
511 }
512
513 @Benchmark
514 public double l2SquaredVectorDouble128() {
515 DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff;
516 vecSum = DoubleVector.zero(SPECIES_DOUBLE_128);
517 int i = 0;
518 for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_128.length()) {
519 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i);
520 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i);
521 vecDiff = vecX.sub(vecY);
522 vecSquare = vecDiff.mul(vecDiff);
523 vecSum = vecDiff.fma(vecDiff, vecSum);
524 }
525 double sum = vecSum.reduceLanes(VectorOperators.ADD);
526 for (; i < inputVectorDouble.length; i++) {
527 double diff = queryVectorDouble[i] - inputVectorDouble[i];
528 sum += diff * diff;
529 }
530 return sum;
531 }
532
533 @Benchmark
534 public double l2SquaredVectorDouble256() {
535 DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff;
536 vecSum = DoubleVector.zero(SPECIES_DOUBLE_256);
537 int i = 0;
538 for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_256.length()) {
539 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i);
540 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i);
541 vecDiff = vecX.sub(vecY);
542 vecSquare = vecDiff.mul(vecDiff);
543 vecSum = vecDiff.fma(vecDiff, vecSum);
544 }
545 double sum = vecSum.reduceLanes(VectorOperators.ADD);
546 for (; i < inputVectorDouble.length; i++) {
547 double diff = queryVectorDouble[i] - inputVectorDouble[i];
548 sum += diff * diff;
549 }
550 return sum;
551 }
552
553 @Benchmark
554 public double l2SquaredVectorDoubleMax() {
555 DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff;
556 vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX);
557 int i = 0;
558 for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) {
559 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i);
560 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i);
561 vecDiff = vecX.sub(vecY);
562 vecSquare = vecDiff.mul(vecDiff);
563 vecSum = vecDiff.fma(vecDiff, vecSum);
564 }
565 double sum = vecSum.reduceLanes(VectorOperators.ADD);
566 for (; i < inputVectorDouble.length; i++) {
567 double diff = queryVectorDouble[i] - inputVectorDouble[i];
568 sum += diff * diff;
569 }
570 return sum;
571 }
572
573 @Benchmark
574 public float l2SquaredScalar() {
575 float squaredDistance = 0;
576 for (int i = 0; i < inputVectorFloat.length; i++) {
577 float diff = queryVectorFloat[i] - inputVectorFloat[i];
578 squaredDistance += diff * diff;
579 }
580 return squaredDistance;
581 }
582
583 }