1 /* 2 * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package org.openjdk.bench.jdk.incubator.vector.bigdata; 25 26 import jdk.incubator.vector.*; 27 import java.util.Base64; 28 import java.util.concurrent.TimeUnit; 29 import org.openjdk.jmh.annotations.*; 30 31 @BenchmarkMode(Mode.Throughput) 32 @Warmup(iterations = 3, time = 1) 33 @Measurement(iterations = 5, time = 1) 34 @OutputTimeUnit(TimeUnit.MILLISECONDS) 35 @State(Scope.Benchmark) 36 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 37 public class VectorDistance { 38 39 static final VectorSpecies<Float> SPECIES_FLOAT_128 = FloatVector.SPECIES_128; 40 static final VectorSpecies<Float> SPECIES_FLOAT_256 = FloatVector.SPECIES_256; 41 static final VectorSpecies<Float> SPECIES_FLOAT_MAX = FloatVector.SPECIES_MAX; 42 static final VectorSpecies<Double> SPECIES_DOUBLE_128 = DoubleVector.SPECIES_128; 43 static final VectorSpecies<Double> SPECIES_DOUBLE_256 = DoubleVector.SPECIES_256; 44 static final VectorSpecies<Double> SPECIES_DOUBLE_MAX = DoubleVector.SPECIES_MAX; 45 46 47 static long num = 0; 48 private static String x1 = "L5GSwXhHpEH05mNBHnmcQMTw3EBnagFCW1DGQHe/nUFO1B1BlJOpwCBJ9j" + 49 "+RkY1BzqKeQSglN0Gy7krB5CSfQFzxB8Djn5nB2KNFwKcSRMGYzRQ7qMGWQZ0FF0FTceDAIKjxv/zhdkHFZMHB6hU4QZbo2cCAryRB+7OOQCxbfEHRtBlBxPG6P0BYSD+Pgz9BqzOLv/nVO8C9x5/BQOY/wTTIx0GfW1BBGv2lQQwdDcGCqBfB12t/QKUBoEEejIXBPN9kQWsFbEGsGcnBkqJkwKhLgr/IQZxAelAWQfcYpcFQv0HBeiGCQWExhEDrKAnBpAwBQV4bVcFpGNjAyDsNQVOc+0CSc4nBgG/ZQQGRccEXts9BKhYzQNK5+MAlU0DBzPGWwPGRCcEZC5/ADxOcv7lUkEBomM5BuqKiwV2MU8HNGHDBSB84QZRSyMB8RZlBVFdZQXSVgcBTQQBCdWa/QBQ0qkGILUW/6NA9QQnkmsG+5PPBj0UowT6nYD9cwpjAS/w5wTbX2UH8Gb5AR/HUQMTNAMJ9MN9AgHoqPbbUyUFbe47BBHANQWZJBsGBuPlBy94EQADeXsG5eOtBnA+yQCRka8EMcGLBjuoRwb4k7sAasB5Bmk/UwaI1akErp6xBq5G5wNo1E8KHa7tB3IiKQTCffcHphK1BTgJzwVY3JEEip/VAlmgXQSeKCsLEABs/n1/xwL5u58CgQY49ahUWQoAJjj1hhqBASXrrQb6nM0H2fY+/thtbQAQobMAohvXAxM3xv7xyqD+MvpDBrlDiQfBvPcGA8X5AQE4SwXhGx7+uLA1AxY8xu2mVjEE7KlFBArveQFNMtUD3N7DB12BbQcyH4cFhSw3Bu5VWQeTW0z9o03TBxtMlQctp/8E/lLVAGUtTwZsGJMKv/R5A1HKVQV6RhsC1Ji5AcXLFQJd6f0HbB+e+ZDi8wV9tQ0FwCN/B+A89v2DrU0Bcpc5BglTeQH5dT0HePS9Al4XPwdA6YEFlueXAbWKSQSBWzkBy2RnCt9Yawl9b77+xgxBC9eCqQd8f0kFoBG9BVxrkQZh2QkHNW/zBEQiawLJEocDhutTA8zEYwbIvEUIO1T9BmlOTwIhbNEDhrtlAVk9BQARQaj89NQNC6usGwDfQrkBSJrlAON7FQQ8FqsEEc/TAY3zeQYsqUEHV8QPBHJoYQQdn5kGyCiJBlDMYQBBNoUFrxbw/NlmPP3B24j6ChIdBXk2bwdxdDMFQw1rA4hybQXTchr8d9wvBuCbLQSMKmMBH4RpBQIXePa5DT8IjgvtBgAetQZgGgMEprc1BAOeSPJ5XpEEMa0NBgX4uwX7XIsG2Ie0688iqQSpJPsCAy9LBAGHkPw=="; 50 private static String x2 = "5R3ZwGPrxEFMKyNBLFSeQdYav0BQtDFCur7WQAgRYEGHFYC/MKZtvkiFUT+RNXfBVsGBP2KWSUCmAUTBIf+EQG57kMCtXo7BV1DuwLd98r+YzRQ7qKXNwBMSPUFNQffBPrxeQYw1t7/7JjFAKNaXP+cMSEG6GI5BuEx0wUANDMEvDqdAT9YEworQTEEiVBZBiMejQP7t67+iRwzB3HadQB1be0Ei5g5BMt+cQXvYTUHwZsLAuoy3QfrR6EFrIiHB5X8Dwc8XbUH8Yr8/AvGEwa5GkUH3F5tAP8YJQTiDyz+gKsRAFl/rwDxJuUAPyyxBvg2gQU6bjMEPEa7Bz6wYQpQy7MDF5LvB8HP+QCJdicHQDjpC6RpWQcGeY8FMK6vBoeUjQcPYmUG2QmRBBI0nwScESsGMAcxBvRmawRL2A8IByKNAgTQBQuxdDEGq8JBBHJWmQSBDfz8sLe9BE3gFwTdCPkHEaMxBhX8Xwe7BCcE/783Bt6EHwdpbpkHc5L/BCPzRwUdIQUEd/k3AoGNcQQwNmMEyuKRBtnWlwdCBAUI5Y5DBwOZYvdI+MsEu/ixBnpMrwRtYt8FECytC6JjEQW3RHcBtfn3B+sgQQcyQKcEI5ytByvw2wPZdaUH+aqLAQFQ+QPi4REBF/9lBCvJNQTdlEcIAMbzBtD+hwZWufsAEjus/YRyjwR1YuMHj0ZhBa4w+QORAhMEq9qdB/L8JQrjhyUAJBeBAKqoIQUnAq0GsLFdBkfrvQHc1zMHH6THBeggSwaJIOsAawwBBDDWqPwrAlkBYDqe/maUcQabhwsFF2VBBxY8xu5aMQUFDkHVBKhRRwHhgWsEA5jXBlh9NQVMaT0CWlhTAroaFQRyciUHQlp7BF4trQa8unsE4TfI+9XLJQDNpLcIXLZdAuX2MwShiTsFcQh5BrHMqQVI1+UBWe4fBAzi0wfe11UFAIjq9Y1iAQDxrTsEY6plB/JiXQfjFwkHkYGRBVNOhwCMxtEFbqZTA378WQeA/Sb+FrSXCqlYywtb5SsDcqlZBk1EtQZ/RREHZIxG/kcv8QekDIkHPsDXCBL4VQHN8CMGtNvvAC3YwweUuAkKkJCnANEtVQG9z/0DrwyTBQ9hnwWX3kMEdLB1CvIlKwQ0IO0HK1ErBvdRQQVpjMMCJDI/Bb4X8QYVipEGpG2nBeLGUvmBlBT7ISgRB4iGAQUunkkFDFLm/HNaqPzKTVkCITJG/XzlYwbj0XcGD60PBbpLwQbvrs8Az8RXB4ubxQXh/HEDtXLU/kONrwVBs4MGc2X1BJaHkQd0ByEAKXLJBTq7JwPPkJUGJIIRBlh57wX3FjcC2Ie060Qc6Qal5xcCfqQrCl7edQQ=="; 51 static float[] queryVectorFloat = parseBase64ToVector(x1); 52 static float[] inputVectorFloat = parseBase64ToVector(x2); 53 static double[] queryVectorDouble; 54 static double[] inputVectorDouble; 55 56 static float normQueryVectorFloat; 57 static double normQueryVectorDouble; 58 59 public static float[] parseArray(byte[] input) { 60 if (input == null) { 61 return null; 62 } 63 float[] floatArr = new float[input.length / 4]; 64 for (int i = 0; i < floatArr.length; i++) { 65 int l; 66 l = input[i << 2]; 67 l &= 0xff; 68 l |= ((long) input[(i << 2) + 1] << 8); 69 l &= 0xffff; 70 l |= ((long) input[(i << 2) + 2] << 16); 71 l &= 0xffffff; 72 l |= ((long) input[(i << 2) + 3] << 24); 73 floatArr[i] = Float.intBitsToFloat(l); 74 } 75 return floatArr; 76 } 77 78 public static float[] parseBase64ToVector(String vectorBase64) { 79 return parseArray(Base64.getDecoder().decode(vectorBase64)); 80 } 81 82 @Setup 83 public void init() { 84 queryVectorDouble = new double[queryVectorFloat.length]; 85 inputVectorDouble = new double[inputVectorFloat.length]; 86 for (int i = 0; i < queryVectorFloat.length; i++) { 87 queryVectorDouble[i] = (double)(queryVectorFloat[i]); 88 } 89 for (int i = 0; i < inputVectorFloat.length; i++) { 90 inputVectorDouble[i] = (double)(inputVectorFloat[i]); 91 } 92 float xSquare = 0; 93 for (int i = 0; i < queryVectorFloat.length; i++) { 94 xSquare += (float)(queryVectorFloat[i] * queryVectorFloat[i]); 95 } 96 normQueryVectorFloat = xSquare; 97 normQueryVectorDouble = (double)xSquare; 98 } 99 100 @Benchmark 101 public float cosinesimilOptimizedScalarFloat() { 102 float dotProduct = 0.0f; 103 float normInputVector = 0.0f; 104 for (int i = 0; i < queryVectorFloat.length; i++) { 105 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 106 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 107 } 108 float normalizedProduct = normQueryVectorFloat * normInputVector; 109 if (normalizedProduct == 0) { 110 return Float.MIN_VALUE; 111 } 112 return (float) (dotProduct / (Math.sqrt(normalizedProduct))); 113 } 114 115 @Benchmark 116 public float cosinesimilOptimizedVectorFloat128() { 117 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV; 118 vecSum = FloatVector.zero(SPECIES_FLOAT_128); 119 xSquareV = FloatVector.zero(SPECIES_FLOAT_128); 120 ySquareV = FloatVector.zero(SPECIES_FLOAT_128);; 121 int i = 0; 122 for (i = 0; i + (SPECIES_FLOAT_128.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_128.length()) { 123 vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i); 124 vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i); 125 vecSum = vecX.fma(vecY, vecSum); 126 ySquareV = vecY.fma(vecY, ySquareV); 127 } 128 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 129 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 130 for (; i < queryVectorFloat.length; i++) { 131 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 132 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 133 } 134 float normalizedProduct = normQueryVectorFloat * normInputVector; 135 return (float)(dotProduct / Math.sqrt(normalizedProduct)); 136 } 137 138 @Benchmark 139 public float cosinesimilOptimizedVectorFloat256() { 140 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV; 141 vecSum = FloatVector.zero(SPECIES_FLOAT_256); 142 xSquareV = FloatVector.zero(SPECIES_FLOAT_256); 143 ySquareV = FloatVector.zero(SPECIES_FLOAT_256);; 144 int i = 0; 145 for (i = 0; i + (SPECIES_FLOAT_256.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_256.length()) { 146 vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i); 147 vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i); 148 vecSum = vecX.fma(vecY, vecSum); 149 ySquareV = vecY.fma(vecY, ySquareV); 150 } 151 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 152 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 153 for (; i < queryVectorFloat.length; i++) { 154 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 155 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 156 } 157 float normalizedProduct = normQueryVectorFloat * normInputVector; 158 return (float)(dotProduct / Math.sqrt(normalizedProduct)); 159 } 160 161 @Benchmark 162 public float cosinesimilOptimizedVectorFloatMax() { 163 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV; 164 vecSum = FloatVector.zero(SPECIES_FLOAT_MAX); 165 xSquareV = FloatVector.zero(SPECIES_FLOAT_MAX); 166 ySquareV = FloatVector.zero(SPECIES_FLOAT_MAX);; 167 int i = 0; 168 for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) { 169 vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i); 170 vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i); 171 vecSum = vecX.fma(vecY, vecSum); 172 ySquareV = vecY.fma(vecY, ySquareV); 173 } 174 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 175 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 176 for (; i < queryVectorFloat.length; i++) { 177 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 178 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 179 } 180 float normalizedProduct = normQueryVectorFloat * normInputVector; 181 return (float)(dotProduct / Math.sqrt(normalizedProduct)); 182 } 183 184 @Benchmark 185 public float cosinesimilScalarFloat() { 186 float dotProduct = 0.0f; 187 float normQueryVectorFloat = 0.0f; 188 float normInputVector = 0.0f; 189 for (int i = 0; i < queryVectorFloat.length; i++) { 190 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 191 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i]; 192 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 193 } 194 float normalizedProduct = normQueryVectorFloat * normInputVector; 195 if (normalizedProduct == 0) { 196 return Float.MIN_VALUE; 197 } 198 return (float) (dotProduct / (Math.sqrt(normalizedProduct))); 199 } 200 201 @Benchmark 202 public float cosinesimilVectorFloat128() { 203 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV; 204 vecSum = FloatVector.zero(SPECIES_FLOAT_128); 205 xSquareV = FloatVector.zero(SPECIES_FLOAT_128); 206 ySquareV = FloatVector.zero(SPECIES_FLOAT_128);; 207 int i = 0; 208 for (i = 0; i + (SPECIES_FLOAT_128.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_128.length()) { 209 vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i); 210 vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i); 211 vecSum = vecX.fma(vecY, vecSum); 212 xSquareV = vecX.fma(vecX, xSquareV); 213 ySquareV = vecY.fma(vecY, ySquareV); 214 } 215 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 216 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 217 float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD); 218 for (; i < queryVectorFloat.length; i++) { 219 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 220 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 221 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i]; 222 } 223 float normalizedProduct = normQueryVectorFloat * normInputVector; 224 return (float)(dotProduct / Math.sqrt(normalizedProduct)); 225 } 226 227 @Benchmark 228 public float cosinesimilVectorFloat256() { 229 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV; 230 vecSum = FloatVector.zero(SPECIES_FLOAT_256); 231 xSquareV = FloatVector.zero(SPECIES_FLOAT_256); 232 ySquareV = FloatVector.zero(SPECIES_FLOAT_256);; 233 int i = 0; 234 for (i = 0; i + (SPECIES_FLOAT_256.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_256.length()) { 235 vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i); 236 vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i); 237 vecSum = vecX.fma(vecY, vecSum); 238 xSquareV = vecX.fma(vecX, xSquareV); 239 ySquareV = vecY.fma(vecY, ySquareV); 240 } 241 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 242 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 243 float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD); 244 for (; i < queryVectorFloat.length; i++) { 245 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 246 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 247 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i]; 248 } 249 float normalizedProduct = normQueryVectorFloat * normInputVector; 250 return (float)(dotProduct / Math.sqrt(normalizedProduct)); 251 } 252 253 @Benchmark 254 public float cosinesimilVectorFloatMax() { 255 FloatVector vecX, vecY, vecSum, xSquareV, ySquareV; 256 vecSum = FloatVector.zero(SPECIES_FLOAT_MAX); 257 xSquareV = FloatVector.zero(SPECIES_FLOAT_MAX); 258 ySquareV = FloatVector.zero(SPECIES_FLOAT_MAX); 259 int i = 0; 260 for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= queryVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) { 261 vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i); 262 vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i); 263 vecSum = vecX.fma(vecY, vecSum); 264 xSquareV = vecX.fma(vecX, xSquareV); 265 ySquareV = vecY.fma(vecY, ySquareV); 266 } 267 float dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 268 float normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 269 float normQueryVectorFloat = xSquareV.reduceLanes(VectorOperators.ADD); 270 for (; i < queryVectorFloat.length; i++) { 271 dotProduct += queryVectorFloat[i] * inputVectorFloat[i]; 272 normInputVector += inputVectorFloat[i] * inputVectorFloat[i]; 273 normQueryVectorFloat += queryVectorFloat[i] * queryVectorFloat[i]; 274 } 275 float normalizedProduct = normQueryVectorFloat * normInputVector; 276 return (float)(dotProduct / Math.sqrt(normalizedProduct)); 277 } 278 279 @Benchmark 280 public double cosinesimilOptimizedScalarDouble() { 281 double dotProduct = 0.0; 282 double normInputVector = 0.0; 283 for (int i = 0; i < queryVectorDouble.length; i++) { 284 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 285 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 286 } 287 double normalizedProduct = normQueryVectorDouble * normInputVector; 288 return dotProduct / (Math.sqrt(normalizedProduct)); 289 } 290 291 @Benchmark 292 public double cosinesimilOptimizedVectorDouble128() { 293 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV; 294 vecSum = DoubleVector.zero(SPECIES_DOUBLE_128); 295 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_128); 296 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_128); 297 int i = 0; 298 for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_128.length()) { 299 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i); 300 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i); 301 vecSum = vecX.fma(vecY, vecSum); 302 ySquareV = vecY.fma(vecY, ySquareV); 303 } 304 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 305 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 306 for (; i < queryVectorDouble.length; i++) { 307 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 308 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 309 } 310 double normalizedProduct = normQueryVectorDouble * normInputVector; 311 return (double)(dotProduct / Math.sqrt(normalizedProduct)); 312 } 313 314 @Benchmark 315 public double cosinesimilOptimizedVectorDouble256() { 316 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV; 317 vecSum = DoubleVector.zero(SPECIES_DOUBLE_256); 318 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_256); 319 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_256); 320 int i = 0; 321 for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_256.length()) { 322 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i); 323 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i); 324 vecSum = vecX.fma(vecY, vecSum); 325 ySquareV = vecY.fma(vecY, ySquareV); 326 } 327 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 328 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 329 for (; i < queryVectorDouble.length; i++) { 330 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 331 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 332 } 333 double normalizedProduct = normQueryVectorDouble * normInputVector; 334 return (double)(dotProduct / Math.sqrt(normalizedProduct)); 335 } 336 337 @Benchmark 338 public double cosinesimilOptimizedVectorDoubleMax() { 339 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV; 340 vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX); 341 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX); 342 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX); 343 int i = 0; 344 for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) { 345 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i); 346 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i); 347 vecSum = vecX.fma(vecY, vecSum); 348 ySquareV = vecY.fma(vecY, ySquareV); 349 } 350 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 351 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 352 for (; i < queryVectorDouble.length; i++) { 353 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 354 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 355 } 356 double normalizedProduct = normQueryVectorDouble * normInputVector; 357 return (double)(dotProduct / Math.sqrt(normalizedProduct)); 358 } 359 360 @Benchmark 361 public double cosinesimilScalarDouble() { 362 double dotProduct = 0.0f; 363 double normQueryVectorDouble = 0.0f; 364 double normInputVector = 0.0f; 365 for (int i = 0; i < queryVectorDouble.length; i++) { 366 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 367 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i]; 368 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 369 } 370 double normalizedProduct = normQueryVectorDouble * normInputVector; 371 return (double) (dotProduct / (Math.sqrt(normalizedProduct))); 372 } 373 374 @Benchmark 375 public double cosinesimilVectorDouble128() { 376 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV; 377 vecSum = DoubleVector.zero(SPECIES_DOUBLE_128); 378 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_128); 379 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_128); 380 int i = 0; 381 for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_128.length()) { 382 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i); 383 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i); 384 vecSum = vecX.fma(vecY, vecSum); 385 xSquareV = vecX.fma(vecX, xSquareV); 386 ySquareV = vecY.fma(vecY, ySquareV); 387 } 388 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 389 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 390 double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD); 391 for (; i < queryVectorDouble.length; i++) { 392 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 393 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 394 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i]; 395 } 396 double normalizedProduct = normQueryVectorDouble * normInputVector; 397 return (double)(dotProduct / Math.sqrt(normalizedProduct)); 398 } 399 400 @Benchmark 401 public double cosinesimilVectorDouble256() { 402 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV; 403 vecSum = DoubleVector.zero(SPECIES_DOUBLE_256); 404 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_256); 405 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_256); 406 int i = 0; 407 for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_256.length()) { 408 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i); 409 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i); 410 vecSum = vecX.fma(vecY, vecSum); 411 xSquareV = vecX.fma(vecX, xSquareV); 412 ySquareV = vecY.fma(vecY, ySquareV); 413 } 414 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 415 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 416 double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD); 417 for (; i < queryVectorDouble.length; i++) { 418 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 419 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 420 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i]; 421 } 422 double normalizedProduct = normQueryVectorDouble * normInputVector; 423 return (double)(dotProduct / Math.sqrt(normalizedProduct)); 424 } 425 426 @Benchmark 427 public double cosinesimilVectorDoubleMax() { 428 DoubleVector vecX, vecY, vecSum, xSquareV, ySquareV; 429 vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX); 430 xSquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX); 431 ySquareV = DoubleVector.zero(SPECIES_DOUBLE_MAX); 432 int i = 0; 433 for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= queryVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) { 434 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i); 435 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i); 436 vecSum = vecX.fma(vecY, vecSum); 437 xSquareV = vecX.fma(vecX, xSquareV); 438 ySquareV = vecY.fma(vecY, ySquareV); 439 } 440 double dotProduct = vecSum.reduceLanes(VectorOperators.ADD); 441 double normInputVector = ySquareV.reduceLanes(VectorOperators.ADD); 442 double normQueryVectorDouble = xSquareV.reduceLanes(VectorOperators.ADD); 443 for (; i < queryVectorDouble.length; i++) { 444 dotProduct += queryVectorDouble[i] * inputVectorDouble[i]; 445 normInputVector += inputVectorDouble[i] * inputVectorDouble[i]; 446 normQueryVectorDouble += queryVectorDouble[i] * queryVectorDouble[i]; 447 } 448 double normalizedProduct = normQueryVectorDouble * normInputVector; 449 return (double)(dotProduct / Math.sqrt(normalizedProduct)); 450 } 451 452 // l2Squared is used to compute Euclidean distance 453 @Benchmark 454 public float l2SquaredVectorFloat128() { 455 FloatVector vecX, vecY, vecSum, vecSquare, vecDiff; 456 vecSum = FloatVector.zero(SPECIES_FLOAT_128); 457 int i = 0; 458 for (i = 0; i + (SPECIES_FLOAT_128.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_128.length()) { 459 vecX = FloatVector.fromArray(SPECIES_FLOAT_128, queryVectorFloat, i); 460 vecY = FloatVector.fromArray(SPECIES_FLOAT_128, inputVectorFloat, i); 461 vecDiff = vecX.sub(vecY); 462 vecSquare = vecDiff.mul(vecDiff); 463 vecSum = vecDiff.fma(vecDiff, vecSum); 464 } 465 float sum = vecSum.reduceLanes(VectorOperators.ADD); 466 for (; i < inputVectorFloat.length; i++) { 467 float diff = queryVectorFloat[i] - inputVectorFloat[i]; 468 sum += diff * diff; 469 } 470 return sum; 471 } 472 473 @Benchmark 474 public float l2SquaredVectorFloat256() { 475 FloatVector vecX, vecY, vecSum, vecSquare, vecDiff; 476 vecSum = FloatVector.zero(SPECIES_FLOAT_256); 477 int i = 0; 478 for (i = 0; i + (SPECIES_FLOAT_256.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_256.length()) { 479 vecX = FloatVector.fromArray(SPECIES_FLOAT_256, queryVectorFloat, i); 480 vecY = FloatVector.fromArray(SPECIES_FLOAT_256, inputVectorFloat, i); 481 vecDiff = vecX.sub(vecY); 482 vecSquare = vecDiff.mul(vecDiff); 483 vecSum = vecDiff.fma(vecDiff, vecSum); 484 } 485 float sum = vecSum.reduceLanes(VectorOperators.ADD); 486 for (; i < inputVectorFloat.length; i++) { 487 float diff = queryVectorFloat[i] - inputVectorFloat[i]; 488 sum += diff * diff; 489 } 490 return sum; 491 } 492 493 @Benchmark 494 public float l2SquaredVectorFloatMax() { 495 FloatVector vecX, vecY, vecSum, vecSquare, vecDiff; 496 vecSum = FloatVector.zero(SPECIES_FLOAT_MAX); 497 int i = 0; 498 for (i = 0; i + (SPECIES_FLOAT_MAX.length()) <= inputVectorFloat.length; i += SPECIES_FLOAT_MAX.length()) { 499 vecX = FloatVector.fromArray(SPECIES_FLOAT_MAX, queryVectorFloat, i); 500 vecY = FloatVector.fromArray(SPECIES_FLOAT_MAX, inputVectorFloat, i); 501 vecDiff = vecX.sub(vecY); 502 vecSquare = vecDiff.mul(vecDiff); 503 vecSum = vecDiff.fma(vecDiff, vecSum); 504 } 505 float sum = vecSum.reduceLanes(VectorOperators.ADD); 506 for (; i < inputVectorFloat.length; i++) { 507 float diff = queryVectorFloat[i] - inputVectorFloat[i]; 508 sum += diff * diff; 509 } 510 return sum; 511 } 512 513 @Benchmark 514 public double l2SquaredVectorDouble128() { 515 DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff; 516 vecSum = DoubleVector.zero(SPECIES_DOUBLE_128); 517 int i = 0; 518 for (i = 0; i + (SPECIES_DOUBLE_128.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_128.length()) { 519 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_128, queryVectorDouble, i); 520 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_128, inputVectorDouble, i); 521 vecDiff = vecX.sub(vecY); 522 vecSquare = vecDiff.mul(vecDiff); 523 vecSum = vecDiff.fma(vecDiff, vecSum); 524 } 525 double sum = vecSum.reduceLanes(VectorOperators.ADD); 526 for (; i < inputVectorDouble.length; i++) { 527 double diff = queryVectorDouble[i] - inputVectorDouble[i]; 528 sum += diff * diff; 529 } 530 return sum; 531 } 532 533 @Benchmark 534 public double l2SquaredVectorDouble256() { 535 DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff; 536 vecSum = DoubleVector.zero(SPECIES_DOUBLE_256); 537 int i = 0; 538 for (i = 0; i + (SPECIES_DOUBLE_256.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_256.length()) { 539 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_256, queryVectorDouble, i); 540 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_256, inputVectorDouble, i); 541 vecDiff = vecX.sub(vecY); 542 vecSquare = vecDiff.mul(vecDiff); 543 vecSum = vecDiff.fma(vecDiff, vecSum); 544 } 545 double sum = vecSum.reduceLanes(VectorOperators.ADD); 546 for (; i < inputVectorDouble.length; i++) { 547 double diff = queryVectorDouble[i] - inputVectorDouble[i]; 548 sum += diff * diff; 549 } 550 return sum; 551 } 552 553 @Benchmark 554 public double l2SquaredVectorDoubleMax() { 555 DoubleVector vecX, vecY, vecSum, vecSquare, vecDiff; 556 vecSum = DoubleVector.zero(SPECIES_DOUBLE_MAX); 557 int i = 0; 558 for (i = 0; i + (SPECIES_DOUBLE_MAX.length()) <= inputVectorDouble.length; i += SPECIES_DOUBLE_MAX.length()) { 559 vecX = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, queryVectorDouble, i); 560 vecY = DoubleVector.fromArray(SPECIES_DOUBLE_MAX, inputVectorDouble, i); 561 vecDiff = vecX.sub(vecY); 562 vecSquare = vecDiff.mul(vecDiff); 563 vecSum = vecDiff.fma(vecDiff, vecSum); 564 } 565 double sum = vecSum.reduceLanes(VectorOperators.ADD); 566 for (; i < inputVectorDouble.length; i++) { 567 double diff = queryVectorDouble[i] - inputVectorDouble[i]; 568 sum += diff * diff; 569 } 570 return sum; 571 } 572 573 @Benchmark 574 public float l2SquaredScalar() { 575 float squaredDistance = 0; 576 for (int i = 0; i < inputVectorFloat.length; i++) { 577 float diff = queryVectorFloat[i] - inputVectorFloat[i]; 578 squaredDistance += diff * diff; 579 } 580 return squaredDistance; 581 } 582 583 }