1 /*
  2  * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 package org.openjdk.bench.jdk.incubator.vector.bigdata;
 25 
 26 import jdk.incubator.vector.*;
 27 import java.util.Base64;
 28 import java.util.concurrent.TimeUnit;
 29 import org.openjdk.jmh.annotations.*;
 30 
 31 @BenchmarkMode(Mode.Throughput)
 32 @Warmup(iterations = 3, time = 1)
 33 @Measurement(iterations = 5, time = 1)
 34 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 35 @State(Scope.Benchmark)
 36 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
 37 public class VectorDistance {
 38 
 39     static final VectorSpecies<Float> SPECIES_FLOAT_128 = FloatVector.SPECIES_128;
 40     static final VectorSpecies<Float> SPECIES_FLOAT_256 = FloatVector.SPECIES_256;
 41     static final VectorSpecies<Float> SPECIES_FLOAT_MAX = FloatVector.SPECIES_MAX;
 42     static final VectorSpecies<Double> SPECIES_DOUBLE_128 = DoubleVector.SPECIES_128;
 43     static final VectorSpecies<Double> SPECIES_DOUBLE_256 = DoubleVector.SPECIES_256;
 44     static final VectorSpecies<Double> SPECIES_DOUBLE_MAX = DoubleVector.SPECIES_MAX;
 45 
 46 
 47     static long num = 0;
 48     private static String x1 = "L5GSwXhHpEH05mNBHnmcQMTw3EBnagFCW1DGQHe/nUFO1B1BlJOpwCBJ9j" +
 49                 "+RkY1BzqKeQSglN0Gy7krB5CSfQFzxB8Djn5nB2KNFwKcSRMGYzRQ7qMGWQZ0FF0FTceDAIKjxv/zhdkHFZMHB6hU4QZbo2cCAryRB+7OOQCxbfEHRtBlBxPG6P0BYSD+Pgz9BqzOLv/nVO8C9x5/BQOY/wTTIx0GfW1BBGv2lQQwdDcGCqBfB12t/QKUBoEEejIXBPN9kQWsFbEGsGcnBkqJkwKhLgr/IQZxAelAWQfcYpcFQv0HBeiGCQWExhEDrKAnBpAwBQV4bVcFpGNjAyDsNQVOc+0CSc4nBgG/ZQQGRccEXts9BKhYzQNK5+MAlU0DBzPGWwPGRCcEZC5/ADxOcv7lUkEBomM5BuqKiwV2MU8HNGHDBSB84QZRSyMB8RZlBVFdZQXSVgcBTQQBCdWa/QBQ0qkGILUW/6NA9QQnkmsG+5PPBj0UowT6nYD9cwpjAS/w5wTbX2UH8Gb5AR/HUQMTNAMJ9MN9AgHoqPbbUyUFbe47BBHANQWZJBsGBuPlBy94EQADeXsG5eOtBnA+yQCRka8EMcGLBjuoRwb4k7sAasB5Bmk/UwaI1akErp6xBq5G5wNo1E8KHa7tB3IiKQTCffcHphK1BTgJzwVY3JEEip/VAlmgXQSeKCsLEABs/n1/xwL5u58CgQY49ahUWQoAJjj1hhqBASXrrQb6nM0H2fY+/thtbQAQobMAohvXAxM3xv7xyqD+MvpDBrlDiQfBvPcGA8X5AQE4SwXhGx7+uLA1AxY8xu2mVjEE7KlFBArveQFNMtUD3N7DB12BbQcyH4cFhSw3Bu5VWQeTW0z9o03TBxtMlQctp/8E/lLVAGUtTwZsGJMKv/R5A1HKVQV6RhsC1Ji5AcXLFQJd6f0HbB+e+ZDi8wV9tQ0FwCN/B+A89v2DrU0Bcpc5BglTeQH5dT0HePS9Al4XPwdA6YEFlueXAbWKSQSBWzkBy2RnCt9Yawl9b77+xgxBC9eCqQd8f0kFoBG9BVxrkQZh2QkHNW/zBEQiawLJEocDhutTA8zEYwbIvEUIO1T9BmlOTwIhbNEDhrtlAVk9BQARQaj89NQNC6usGwDfQrkBSJrlAON7FQQ8FqsEEc/TAY3zeQYsqUEHV8QPBHJoYQQdn5kGyCiJBlDMYQBBNoUFrxbw/NlmPP3B24j6ChIdBXk2bwdxdDMFQw1rA4hybQXTchr8d9wvBuCbLQSMKmMBH4RpBQIXePa5DT8IjgvtBgAetQZgGgMEprc1BAOeSPJ5XpEEMa0NBgX4uwX7XIsG2Ie0688iqQSpJPsCAy9LBAGHkPw==";
 50     private static String x2 = "5R3ZwGPrxEFMKyNBLFSeQdYav0BQtDFCur7WQAgRYEGHFYC/MKZtvkiFUT+RNXfBVsGBP2KWSUCmAUTBIf+EQG57kMCtXo7BV1DuwLd98r+YzRQ7qKXNwBMSPUFNQffBPrxeQYw1t7/7JjFAKNaXP+cMSEG6GI5BuEx0wUANDMEvDqdAT9YEworQTEEiVBZBiMejQP7t67+iRwzB3HadQB1be0Ei5g5BMt+cQXvYTUHwZsLAuoy3QfrR6EFrIiHB5X8Dwc8XbUH8Yr8/AvGEwa5GkUH3F5tAP8YJQTiDyz+gKsRAFl/rwDxJuUAPyyxBvg2gQU6bjMEPEa7Bz6wYQpQy7MDF5LvB8HP+QCJdicHQDjpC6RpWQcGeY8FMK6vBoeUjQcPYmUG2QmRBBI0nwScESsGMAcxBvRmawRL2A8IByKNAgTQBQuxdDEGq8JBBHJWmQSBDfz8sLe9BE3gFwTdCPkHEaMxBhX8Xwe7BCcE/783Bt6EHwdpbpkHc5L/BCPzRwUdIQUEd/k3AoGNcQQwNmMEyuKRBtnWlwdCBAUI5Y5DBwOZYvdI+MsEu/ixBnpMrwRtYt8FECytC6JjEQW3RHcBtfn3B+sgQQcyQKcEI5ytByvw2wPZdaUH+aqLAQFQ+QPi4REBF/9lBCvJNQTdlEcIAMbzBtD+hwZWufsAEjus/YRyjwR1YuMHj0ZhBa4w+QORAhMEq9qdB/L8JQrjhyUAJBeBAKqoIQUnAq0GsLFdBkfrvQHc1zMHH6THBeggSwaJIOsAawwBBDDWqPwrAlkBYDqe/maUcQabhwsFF2VBBxY8xu5aMQUFDkHVBKhRRwHhgWsEA5jXBlh9NQVMaT0CWlhTAroaFQRyciUHQlp7BF4trQa8unsE4TfI+9XLJQDNpLcIXLZdAuX2MwShiTsFcQh5BrHMqQVI1+UBWe4fBAzi0wfe11UFAIjq9Y1iAQDxrTsEY6plB/JiXQfjFwkHkYGRBVNOhwCMxtEFbqZTA378WQeA/Sb+FrSXCqlYywtb5SsDcqlZBk1EtQZ/RREHZIxG/kcv8QekDIkHPsDXCBL4VQHN8CMGtNvvAC3YwweUuAkKkJCnANEtVQG9z/0DrwyTBQ9hnwWX3kMEdLB1CvIlKwQ0IO0HK1ErBvdRQQVpjMMCJDI/Bb4X8QYVipEGpG2nBeLGUvmBlBT7ISgRB4iGAQUunkkFDFLm/HNaqPzKTVkCITJG/XzlYwbj0XcGD60PBbpLwQbvrs8Az8RXB4ubxQXh/HEDtXLU/kONrwVBs4MGc2X1BJaHkQd0ByEAKXLJBTq7JwPPkJUGJIIRBlh57wX3FjcC2Ie060Qc6Qal5xcCfqQrCl7edQQ==";
 51     static float[] queryVectorFloat = parseBase64ToVector(x1);
 52     static float[] inputVectorFloat = parseBase64ToVector(x2);
 53     static double[] queryVectorDouble;
 54     static double[] inputVectorDouble;
 55 
 56     static float normQueryVectorFloat;
 57     static double normQueryVectorDouble;
 58 
 59     public static float[] parseArray(byte[] input) {
 60         if (input == null) {
 61             return null;
 62         }
 63         float[] floatArr = new float[input.length / 4];
 64         for (int i = 0; i < floatArr.length; i++) {
 65             int l;
 66             l = input[i << 2];
 67             l &= 0xff;
 68             l |= ((long) input[(i << 2) + 1] << 8);
 69             l &= 0xffff;
 70             l |= ((long) input[(i << 2) + 2] << 16);
 71             l &= 0xffffff;
 72             l |= ((long) input[(i << 2) + 3] << 24);
 73             floatArr[i] = Float.intBitsToFloat(l);
 74         }
 75         return floatArr;
 76     }
 77 
 78     public static float[] parseBase64ToVector(String vectorBase64) {
 79         return parseArray(Base64.getDecoder().decode(vectorBase64));
 80     }
 81 
 82     @Setup
 83     public void init() {
 84         queryVectorDouble = new double[queryVectorFloat.length];
 85         inputVectorDouble = new double[inputVectorFloat.length];
 86         for (int i = 0; i < queryVectorFloat.length; i++) {
 87             queryVectorDouble[i] = (double)(queryVectorFloat[i]);
 88         }
 89         for (int i = 0; i < inputVectorFloat.length; i++) {
 90             inputVectorDouble[i] = (double)(inputVectorFloat[i]);
 91         }
 92         float xSquare = 0;
 93         for (int i = 0; i < queryVectorFloat.length; i++) {
 94             xSquare += (float)(queryVectorFloat[i] * queryVectorFloat[i]);
 95         }
 96         normQueryVectorFloat = xSquare;
 97         normQueryVectorDouble = (double)xSquare;
 98     }
 99 
100     @Benchmark
101     public float cosinesimilOptimizedScalarFloat() {
102         float dotProduct = 0.0f;
103         float normInputVector = 0.0f;
104         for (int i = 0; i < queryVectorFloat.length; i++) {
105             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
106             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
107         }
108         float normalizedProduct = normQueryVectorFloat * normInputVector;
109         if (normalizedProduct == 0) {
110             return Float.MIN_VALUE;
111         }
112         return (float) (dotProduct / (Math.sqrt(normalizedProduct)));
113     }
114 
115     @Benchmark
116     public float cosinesimilOptimizedVectorFloat128() {
117         FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
118         vecSum = FloatVector.zero(SPECIES_FLOAT_128);
119         xSquareV = FloatVector.zero(SPECIES_FLOAT_128);
120         ySquareV = FloatVector.zero(SPECIES_FLOAT_128);;
121         int i = 0;
122         for (i = 0; i + (SPECIES_FLOAT_128.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_128.length()) {
123             vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i);
124             vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i);
125             vecSum = vecX.fma(vecY, vecSum);
126             ySquareV = vecY.fma(vecY, ySquareV);
127         }
128         float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
129         float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
130         for (; i < queryVectorFloat.length; i++) {
131             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
132             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
133         }
134         float normalizedProduct = normQueryVectorFloat * normInputVector;
135         return (float)(dotProduct / Math.sqrt(normalizedProduct));
136     }
137 
138     @Benchmark
139     public float cosinesimilOptimizedVectorFloat256() {
140         FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
141         vecSum = FloatVector.zero(SPECIES_FLOAT_256);
142         xSquareV = FloatVector.zero(SPECIES_FLOAT_256);
143         ySquareV = FloatVector.zero(SPECIES_FLOAT_256);;
144         int i = 0;
145         for (i = 0; i + (SPECIES_FLOAT_256.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_256.length()) {
146             vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i);
147             vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i);
148             vecSum = vecX.fma(vecY, vecSum);
149             ySquareV = vecY.fma(vecY, ySquareV);
150         }
151         float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
152         float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
153         for (; i < queryVectorFloat.length; i++) {
154             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
155             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
156         }
157         float normalizedProduct = normQueryVectorFloat * normInputVector;
158         return (float)(dotProduct / Math.sqrt(normalizedProduct));
159     }
160 
161     @Benchmark
162     public float cosinesimilOptimizedVectorFloatMax() {
163         FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
164         vecSum = FloatVector.zero(SPECIES_FLOAT_MAX);
165         xSquareV = FloatVector.zero(SPECIES_FLOAT_MAX);
166         ySquareV = FloatVector.zero(SPECIES_FLOAT_MAX);;
167         int i = 0;
168         for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) {
169             vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i);
170             vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i);
171             vecSum = vecX.fma(vecY, vecSum);
172             ySquareV = vecY.fma(vecY, ySquareV);
173         }
174         float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
175         float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
176         for (; i < queryVectorFloat.length; i++) {
177             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
178             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
179         }
180         float normalizedProduct = normQueryVectorFloat * normInputVector;
181         return (float)(dotProduct / Math.sqrt(normalizedProduct));
182     }
183 
184     @Benchmark
185     public float cosinesimilScalarFloat() {
186         float dotProduct = 0.0f;
187         float normQueryVectorFloat = 0.0f;
188         float normInputVector = 0.0f;
189         for (int i = 0; i < queryVectorFloat.length; i++) {
190             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
191             normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
192             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
193         }
194         float normalizedProduct = normQueryVectorFloat * normInputVector;
195         if (normalizedProduct == 0) {
196             return Float.MIN_VALUE;
197         }
198         return (float) (dotProduct / (Math.sqrt(normalizedProduct)));
199     }
200 
201     @Benchmark
202     public float cosinesimilVectorFloat128() {
203         FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
204         vecSum = FloatVector.zero(SPECIES_FLOAT_128);
205         xSquareV = FloatVector.zero(SPECIES_FLOAT_128);
206         ySquareV = FloatVector.zero(SPECIES_FLOAT_128);;
207         int i = 0;
208         for (i = 0; i + (SPECIES_FLOAT_128.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_128.length()) {
209             vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i);
210             vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i);
211             vecSum = vecX.fma(vecY, vecSum);
212             xSquareV = vecX.fma(vecX, xSquareV);
213             ySquareV = vecY.fma(vecY, ySquareV);
214         }
215         float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
216         float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
217         float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD);
218         for (; i < queryVectorFloat.length; i++) {
219             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
220             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
221             normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
222         }
223         float normalizedProduct = normQueryVectorFloat * normInputVector;
224         return (float)(dotProduct / Math.sqrt(normalizedProduct));
225     }
226 
227     @Benchmark
228     public float cosinesimilVectorFloat256() {
229         FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
230         vecSum = FloatVector.zero(SPECIES_FLOAT_256);
231         xSquareV = FloatVector.zero(SPECIES_FLOAT_256);
232         ySquareV = FloatVector.zero(SPECIES_FLOAT_256);;
233         int i = 0;
234         for (i = 0; i + (SPECIES_FLOAT_256.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_256.length()) {
235             vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i);
236             vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i);
237             vecSum = vecX.fma(vecY, vecSum);
238             xSquareV = vecX.fma(vecX, xSquareV);
239             ySquareV = vecY.fma(vecY, ySquareV);
240         }
241         float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
242         float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
243         float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD);
244         for (; i < queryVectorFloat.length; i++) {
245             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
246             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
247             normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
248         }
249         float normalizedProduct = normQueryVectorFloat * normInputVector;
250         return (float)(dotProduct / Math.sqrt(normalizedProduct));
251     }
252 
253     @Benchmark
254     public float cosinesimilVectorFloatMax() {
255         FloatVector vecX, vecY, vecSum, xSquareV, ySquareV;
256         vecSum = FloatVector.zero(SPECIES_FLOAT_MAX);
257         xSquareV = FloatVector.zero(SPECIES_FLOAT_MAX);
258         ySquareV = FloatVector.zero(SPECIES_FLOAT_MAX);
259         int i = 0;
260         for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) {
261             vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i);
262             vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i);
263             vecSum = vecX.fma(vecY, vecSum);
264             xSquareV = vecX.fma(vecX, xSquareV);
265             ySquareV = vecY.fma(vecY, ySquareV);
266         }
267         float dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
268         float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
269         float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD);
270         for (; i < queryVectorFloat.length; i++) {
271             dotProduct += queryVectorFloat[i] * inputVectorFloat[i];
272             normInputVector += inputVectorFloat[i] * inputVectorFloat[i];
273             normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i];
274         }
275         float normalizedProduct = normQueryVectorFloat * normInputVector;
276         return (float)(dotProduct / Math.sqrt(normalizedProduct));
277     }
278 
279     @Benchmark
280     public double cosinesimilOptimizedScalarDouble() {
281         double dotProduct = 0.0;
282         double normInputVector = 0.0;
283         for (int i = 0; i < queryVectorDouble.length; i++) {
284             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
285             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
286         }
287         double normalizedProduct = normQueryVectorDouble * normInputVector;
288         return dotProduct / (Math.sqrt(normalizedProduct));
289     }
290 
291     @Benchmark
292     public double cosinesimilOptimizedVectorDouble128() {
293         DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
294         vecSum = DoubleVector.zero(SPECIES_DOUBLE_128);
295         xSquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
296         ySquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
297         int i = 0;
298         for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_128.length()) {
299             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i);
300             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i);
301             vecSum = vecX.fma(vecY, vecSum);
302             ySquareV = vecY.fma(vecY, ySquareV);
303         }
304         double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
305         double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
306         for (; i < queryVectorDouble.length; i++) {
307             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
308             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
309         }
310         double normalizedProduct = normQueryVectorDouble * normInputVector;
311         return (double)(dotProduct / Math.sqrt(normalizedProduct));
312     }
313 
314     @Benchmark
315     public double cosinesimilOptimizedVectorDouble256() {
316         DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
317         vecSum = DoubleVector.zero(SPECIES_DOUBLE_256);
318         xSquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
319         ySquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
320         int i = 0;
321         for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_256.length()) {
322             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i);
323             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i);
324             vecSum = vecX.fma(vecY, vecSum);
325             ySquareV = vecY.fma(vecY, ySquareV);
326         }
327         double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
328         double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
329         for (; i < queryVectorDouble.length; i++) {
330             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
331             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
332         }
333         double normalizedProduct = normQueryVectorDouble * normInputVector;
334         return (double)(dotProduct / Math.sqrt(normalizedProduct));
335     }
336 
337     @Benchmark
338     public double cosinesimilOptimizedVectorDoubleMax() {
339         DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
340         vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX);
341         xSquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
342         ySquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
343         int i = 0;
344         for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) {
345             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i);
346             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i);
347             vecSum = vecX.fma(vecY, vecSum);
348             ySquareV = vecY.fma(vecY, ySquareV);
349         }
350         double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
351         double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
352         for (; i < queryVectorDouble.length; i++) {
353             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
354             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
355         }
356         double normalizedProduct = normQueryVectorDouble * normInputVector;
357         return (double)(dotProduct / Math.sqrt(normalizedProduct));
358     }
359 
360     @Benchmark
361     public double cosinesimilScalarDouble() {
362         double dotProduct = 0.0f;
363         double normQueryVectorDouble = 0.0f;
364         double normInputVector = 0.0f;
365         for (int i = 0; i < queryVectorDouble.length; i++) {
366             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
367             normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
368             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
369         }
370         double normalizedProduct = normQueryVectorDouble * normInputVector;
371         return (double) (dotProduct / (Math.sqrt(normalizedProduct)));
372     }
373 
374     @Benchmark
375     public double cosinesimilVectorDouble128() {
376         DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
377         vecSum = DoubleVector.zero(SPECIES_DOUBLE_128);
378         xSquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
379         ySquareV = DoubleVector.zero(SPECIES_DOUBLE_128);
380         int i = 0;
381         for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_128.length()) {
382             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i);
383             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i);
384             vecSum = vecX.fma(vecY, vecSum);
385             xSquareV = vecX.fma(vecX, xSquareV);
386             ySquareV = vecY.fma(vecY, ySquareV);
387         }
388         double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
389         double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
390         double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD);
391         for (; i < queryVectorDouble.length; i++) {
392             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
393             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
394             normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
395         }
396         double normalizedProduct = normQueryVectorDouble * normInputVector;
397         return (double)(dotProduct / Math.sqrt(normalizedProduct));
398     }
399 
400     @Benchmark
401     public double cosinesimilVectorDouble256() {
402         DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
403         vecSum = DoubleVector.zero(SPECIES_DOUBLE_256);
404         xSquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
405         ySquareV = DoubleVector.zero(SPECIES_DOUBLE_256);
406         int i = 0;
407         for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_256.length()) {
408             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i);
409             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i);
410             vecSum = vecX.fma(vecY, vecSum);
411             xSquareV = vecX.fma(vecX, xSquareV);
412             ySquareV = vecY.fma(vecY, ySquareV);
413         }
414         double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
415         double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
416         double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD);
417         for (; i < queryVectorDouble.length; i++) {
418             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
419             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
420             normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
421         }
422         double normalizedProduct = normQueryVectorDouble * normInputVector;
423         return (double)(dotProduct / Math.sqrt(normalizedProduct));
424     }
425 
426     @Benchmark
427     public double cosinesimilVectorDoubleMax() {
428         DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV;
429         vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX);
430         xSquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
431         ySquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX);
432         int i = 0;
433         for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) {
434             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i);
435             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i);
436             vecSum = vecX.fma(vecY, vecSum);
437             xSquareV = vecX.fma(vecX, xSquareV);
438             ySquareV = vecY.fma(vecY, ySquareV);
439         }
440         double dotProduct = vecSum.reduceLanes(VectorOperators.ADD);
441         double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD);
442         double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD);
443         for (; i < queryVectorDouble.length; i++) {
444             dotProduct += queryVectorDouble[i] * inputVectorDouble[i];
445             normInputVector += inputVectorDouble[i] * inputVectorDouble[i];
446             normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i];
447         }
448         double normalizedProduct = normQueryVectorDouble * normInputVector;
449         return (double)(dotProduct / Math.sqrt(normalizedProduct));
450     }
451 
452     // l2Squared is used to compute Euclidean distance
453     @Benchmark
454     public float l2SquaredVectorFloat128() {
455         FloatVector vecX, vecY, vecSum, vecSquare, vecDiff;
456         vecSum = FloatVector.zero(SPECIES_FLOAT_128);
457         int i = 0;
458         for (i = 0; i + (SPECIES_FLOAT_128.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_128.length()) {
459             vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i);
460             vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i);
461             vecDiff = vecX.sub(vecY);
462             vecSquare = vecDiff.mul(vecDiff);
463             vecSum = vecDiff.fma(vecDiff, vecSum);
464         }
465         float sum = vecSum.reduceLanes(VectorOperators.ADD);
466         for (; i < inputVectorFloat.length; i++) {
467             float diff = queryVectorFloat[i] - inputVectorFloat[i];
468             sum += diff * diff;
469         }
470         return sum;
471     }
472 
473     @Benchmark
474     public float l2SquaredVectorFloat256() {
475         FloatVector vecX, vecY, vecSum, vecSquare, vecDiff;
476         vecSum = FloatVector.zero(SPECIES_FLOAT_256);
477         int i = 0;
478         for (i = 0; i + (SPECIES_FLOAT_256.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_256.length()) {
479             vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i);
480             vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i);
481             vecDiff = vecX.sub(vecY);
482             vecSquare = vecDiff.mul(vecDiff);
483             vecSum = vecDiff.fma(vecDiff, vecSum);
484         }
485         float sum = vecSum.reduceLanes(VectorOperators.ADD);
486         for (; i < inputVectorFloat.length; i++) {
487             float diff = queryVectorFloat[i] - inputVectorFloat[i];
488             sum += diff * diff;
489         }
490         return sum;
491     }
492 
493     @Benchmark
494     public float l2SquaredVectorFloatMax() {
495         FloatVector vecX, vecY, vecSum, vecSquare, vecDiff;
496         vecSum = FloatVector.zero(SPECIES_FLOAT_MAX);
497         int i = 0;
498         for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) {
499             vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i);
500             vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i);
501             vecDiff = vecX.sub(vecY);
502             vecSquare = vecDiff.mul(vecDiff);
503             vecSum = vecDiff.fma(vecDiff, vecSum);
504         }
505         float sum = vecSum.reduceLanes(VectorOperators.ADD);
506         for (; i < inputVectorFloat.length; i++) {
507             float diff = queryVectorFloat[i] - inputVectorFloat[i];
508             sum += diff * diff;
509         }
510         return sum;
511     }
512 
513     @Benchmark
514     public double l2SquaredVectorDouble128() {
515         DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff;
516         vecSum = DoubleVector.zero(SPECIES_DOUBLE_128);
517         int i = 0;
518         for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_128.length()) {
519             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i);
520             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i);
521             vecDiff = vecX.sub(vecY);
522             vecSquare = vecDiff.mul(vecDiff);
523             vecSum = vecDiff.fma(vecDiff, vecSum);
524         }
525         double sum = vecSum.reduceLanes(VectorOperators.ADD);
526         for (; i < inputVectorDouble.length; i++) {
527             double diff = queryVectorDouble[i] - inputVectorDouble[i];
528             sum += diff * diff;
529         }
530         return sum;
531     }
532 
533     @Benchmark
534     public double l2SquaredVectorDouble256() {
535         DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff;
536         vecSum = DoubleVector.zero(SPECIES_DOUBLE_256);
537         int i = 0;
538         for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_256.length()) {
539             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i);
540             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i);
541             vecDiff = vecX.sub(vecY);
542             vecSquare = vecDiff.mul(vecDiff);
543             vecSum = vecDiff.fma(vecDiff, vecSum);
544         }
545         double sum = vecSum.reduceLanes(VectorOperators.ADD);
546         for (; i < inputVectorDouble.length; i++) {
547             double diff = queryVectorDouble[i] - inputVectorDouble[i];
548             sum += diff * diff;
549         }
550         return sum;
551     }
552 
553     @Benchmark
554     public double l2SquaredVectorDoubleMax() {
555         DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff;
556         vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX);
557         int i = 0;
558         for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) {
559             vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i);
560             vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i);
561             vecDiff = vecX.sub(vecY);
562             vecSquare = vecDiff.mul(vecDiff);
563             vecSum = vecDiff.fma(vecDiff, vecSum);
564         }
565         double sum = vecSum.reduceLanes(VectorOperators.ADD);
566         for (; i < inputVectorDouble.length; i++) {
567             double diff = queryVectorDouble[i] - inputVectorDouble[i];
568             sum += diff * diff;
569         }
570         return sum;
571     }
572 
573     @Benchmark
574     public float l2SquaredScalar() {
575         float squaredDistance = 0;
576         for (int i = 0; i < inputVectorFloat.length; i++) {
577             float diff = queryVectorFloat[i] - inputVectorFloat[i];
578             squaredDistance += diff * diff;
579         }
580         return squaredDistance;
581     }
582 
583 }