1 /* 2 * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package org.openjdk.bench.jdk.incubator.vector.crypto; 25 26 import jdk.incubator.foreign.MemorySegment; 27 import org.openjdk.jmh.annotations.*; 28 import jdk.incubator.vector.*; 29 30 import java.nio.ByteOrder; 31 import java.util.Arrays; 32 33 @State(Scope.Thread) 34 @BenchmarkMode(Mode.Throughput) 35 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 36 @Warmup(iterations = 3, time = 3) 37 @Measurement(iterations = 8, time = 2) 38 public class Poly1305Bench { 39 40 @Param({"16384", "65536"}) 41 private int dataSize; 42 43 private Poly1305Vector poly1305_S128 = makePoly1305(VectorShape.S_128_BIT); 44 private Poly1305Vector poly1305_S256 = makePoly1305(VectorShape.S_256_BIT); 45 private Poly1305Vector poly1305_S512 = makePoly1305(VectorShape.S_512_BIT); 46 47 private byte[] in; 48 private byte[] out = new byte[16]; 49 private byte[] key = new byte[32]; 50 51 private static Poly1305Vector makePoly1305(VectorShape shape) { 52 Poly1305Vector poly = new Poly1305Vector(shape); 53 runKAT(poly); 54 return poly; 55 } 56 57 @Setup 58 public void setup() { 59 in = new byte[dataSize]; 60 } 61 62 @Benchmark 63 public void auth128() { 64 poly1305_S128.computeTag(key, in, out); 65 } 66 67 @Benchmark 68 public void auth256() { 69 poly1305_S256.computeTag(key, in, out); 70 } 71 72 @Benchmark 73 public void auth512() { 74 poly1305_S512.computeTag(key, in, out); 75 } 76 77 private static class Poly1305Vector { 78 79 private static final int BITS_PER_LIMB = 26; 80 private static final int LIMB_MASK = (1 << BITS_PER_LIMB) - 1; 81 private static final int KEY_LENGTH = 32; 82 private static final int RS_LENGTH = KEY_LENGTH / 2; 83 84 private final VectorSpecies<Long> longSpecies; 85 private final VectorSpecies<Integer> intSpecies; 86 private final int vectorWidth; 87 private final int parBlockCount; 88 89 private final VectorShuffle<Long> inShuffle0; 90 private final VectorShuffle<Long> inShuffle1; 91 private final VectorMask<Long> inMask; 92 93 public Poly1305Vector(VectorShape shape) { 94 95 this.longSpecies = VectorSpecies.of(long.class, shape); 96 int intSize = shape.vectorBitSize() / 2; 97 VectorShape intShape = VectorShape.forBitSize(intSize); 98 this.intSpecies = VectorSpecies.of(int.class, intShape); 99 this.vectorWidth = longSpecies.length(); 100 this.parBlockCount = vectorWidth * 16; 101 102 this.inShuffle0 = makeInShuffle0(); 103 this.inShuffle1 = makeInShuffle1(); 104 this.inMask = makeInMask(); 105 } 106 107 private VectorShuffle<Long> makeInShuffle0() { 108 int[] indexArr = new int[vectorWidth]; 109 for (int i = 0; i < indexArr.length; i++) { 110 indexArr[i] = (2 * i) % vectorWidth; 111 } 112 return VectorShuffle.fromArray(longSpecies, indexArr, 0); 113 } 114 private VectorShuffle<Long> makeInShuffle1() { 115 int[] indexArr = new int[vectorWidth]; 116 for (int i = 0; i < indexArr.length; i++) { 117 indexArr[i] = ((2 * i) % vectorWidth) + 1; 118 } 119 return VectorShuffle.fromArray(longSpecies, indexArr, 0); 120 } 121 private VectorMask<Long> makeInMask() { 122 boolean[] maskArr = new boolean[vectorWidth]; 123 for (int i = vectorWidth / 2; i < vectorWidth; i++) { 124 maskArr[i] = true; 125 } 126 return VectorMask.fromArray(longSpecies, maskArr, 0); 127 } 128 129 private static int[] fromByteArray(byte[] buf) { 130 int[] result = new int[5]; 131 132 result[0] 133 = (buf[0] & 0xFF) 134 + ((buf[1] & 0xFF) << 8) 135 + ((buf[2] & 0xFF) << 16) 136 + ((buf[3] & 0x03) << 24); 137 result[1] 138 = ((buf[3] & 0xFF) >> 2) 139 + ((buf[4] & 0xFF) << 6) 140 + ((buf[5] & 0xFF) << 14) 141 + ((buf[6] & 0x0F) << 22); 142 result[2] 143 = ((buf[6] & 0xFF) >> 4) 144 + ((buf[7] & 0xFF) << 4) 145 + ((buf[8] & 0xFF) << 12) 146 + ((buf[9] & 0x3F) << 20); 147 result[3] 148 = ((buf[9] & 0xFF) >> 6) 149 + ((buf[10] & 0xFF) << 2) 150 + ((buf[11] & 0xFF) << 10) 151 + ((buf[12] & 0xFF) << 18); 152 result[4] 153 = (buf[13] & 0xFF) 154 + ((buf[14] & 0xFF) << 8) 155 + ((buf[15] & 0xFF) << 16); 156 157 return result; 158 } 159 160 private static void toByteArray(long v0, long v1, long v2, long v3, 161 long v4, byte[] dst) { 162 163 dst[0] = (byte) v0; 164 v0 >>= 8; 165 dst[1] = (byte) v0; 166 v0 >>= 8; 167 dst[2] = (byte) v0; 168 v0 >>= 8; 169 dst[3] = (byte) v0; 170 171 dst[3] += (v1 & 0x3F) << 2; 172 v1 >>= 6; 173 dst[4] = (byte) v1; 174 v1 >>= 8; 175 dst[5] = (byte) v1; 176 v1 >>= 8; 177 dst[6] = (byte) v1; 178 179 dst[6] += (v2 & 0xF) << 4; 180 v2 >>= 4; 181 dst[7] = (byte) v2; 182 v2 >>= 8; 183 dst[8] = (byte) v2; 184 v2 >>= 8; 185 dst[9] = (byte) v2; 186 187 dst[9] += (v3 & 0x3) << 6; 188 v3 >>= 2; 189 dst[10] = (byte) v3; 190 v3 >>= 8; 191 dst[11] = (byte) v3; 192 v3 >>= 8; 193 dst[12] = (byte) v3; 194 195 dst[13] = (byte) v4; 196 v4 >>= 8; 197 dst[14] = (byte) v4; 198 v4 >>= 8; 199 dst[15] = (byte) v4; 200 } 201 202 protected static long carryValue(long x) { 203 return x >> BITS_PER_LIMB; 204 } 205 206 public static void carryReduce(int[] r, long c0, long c1, long c2, 207 long c3, long c4) { 208 209 long c; 210 211 c = carryValue(c3); c3 &= LIMB_MASK; c4 += c; 212 c = carryValue(c4); c4 &= LIMB_MASK; c0 += c * 5; 213 c = carryValue(c0); c0 &= LIMB_MASK; c1 += c; 214 c = carryValue(c1); c1 &= LIMB_MASK; c2 += c; 215 c = carryValue(c2); c2 &= LIMB_MASK; c3 += c; 216 c = carryValue(c3); c3 &= LIMB_MASK; c4 += c; 217 218 r[0] = (int) c0; 219 r[1] = (int) c1; 220 r[2] = (int) c2; 221 r[3] = (int) c3; 222 r[4] = (int) c4; 223 } 224 225 private int[] multiply(int[] a, int[] b) { 226 int[] result = new int[5]; 227 228 long a0 = a[0]; 229 long a1 = a[1]; 230 long a2 = a[2]; 231 long a3 = a[3]; 232 long a4 = a[4]; 233 234 long c0 = (a0 * b[0]) + 5 * (a1 * b[4]) + 5 * (a2 * b[3]) + 235 5 * (a3 * b[2]) + 5 * (a4 * b[1]); 236 long c1 = (a0 * b[1]) + (a1 * b[0]) + 5 * (a2 * b[4]) + 237 5 * (a3 * b[3]) + 5 * (a4 * b[2]); 238 long c2 = (a0 * b[2]) + (a1 * b[1]) + (a2 * b[0]) + 239 5 * (a3 * b[4]) + 5 * (a4 * b[3]); 240 long c3 = (a0 * b[3]) + (a1 * b[2]) + (a2 * b[1]) + (a3 * b[0]) + 241 5 * (a4 * b[4]); 242 long c4 = (a0 * b[4]) + (a1 * b[3]) + (a2 * b[2]) + (a3 * b[1]) + 243 (a4 * b[0]); 244 245 carryReduce(result, c0, c1, c2, c3, c4); 246 247 return result; 248 } 249 250 private LongVector rPowerVec(int[][] r, long[] temp, int maxIndex, 251 int secondIndex) { 252 253 for (int i = 0; i < temp.length; i++) { 254 temp[i] = r[maxIndex - i][secondIndex]; 255 } 256 return LongVector.fromArray(longSpecies, temp, 0); 257 } 258 259 public void computeTag(byte[] key, byte[] msg, byte[] out) { 260 261 byte[] keyBytes = key.clone(); 262 263 // setup key values 264 // Clamp the bytes in the "r" half of the key. 265 keyBytes[3] &= 15; 266 keyBytes[7] &= 15; 267 keyBytes[11] &= 15; 268 keyBytes[15] &= 15; 269 keyBytes[4] &= 252; 270 keyBytes[8] &= 252; 271 keyBytes[12] &= 252; 272 273 // Create IntegerModuloP elements from the r and s values 274 int[][] r = new int[vectorWidth][]; 275 r[0] = fromByteArray(keyBytes); 276 for (int i = 1; i < vectorWidth; i++) { 277 r[i] = multiply(r[i - 1], r[0]); 278 } 279 280 int rUpIndex = vectorWidth - 1; 281 IntVector rUp0_int = IntVector.broadcast(intSpecies, r[rUpIndex][0]); 282 IntVector rUp1_int = IntVector.broadcast(intSpecies, r[rUpIndex][1]); 283 IntVector rUp2_int = IntVector.broadcast(intSpecies, r[rUpIndex][2]); 284 IntVector rUp3_int = IntVector.broadcast(intSpecies, r[rUpIndex][3]); 285 IntVector rUp4_int = IntVector.broadcast(intSpecies, r[rUpIndex][4]); 286 287 IntVector r5Up1_int = rUp1_int.mul(5); 288 IntVector r5Up2_int = rUp2_int.mul(5); 289 IntVector r5Up3_int = rUp3_int.mul(5); 290 IntVector r5Up4_int = rUp4_int.mul(5); 291 292 MemorySegment msMsg = MemorySegment.ofArray(msg); 293 LongVector longMsg0 = LongVector.fromMemorySegment(longSpecies, msMsg, 0, ByteOrder.LITTLE_ENDIAN); 294 LongVector longMsg1 = 295 LongVector.fromMemorySegment(longSpecies, msMsg, vectorWidth * 8L, ByteOrder.LITTLE_ENDIAN); 296 297 LongVector inAlign0 = 298 longMsg0.rearrange(inShuffle0).blend(longMsg1.rearrange(inShuffle0), inMask); 299 LongVector inAlign1 = 300 longMsg0.rearrange(inShuffle1).blend(longMsg1.rearrange(inShuffle1), inMask); 301 302 IntVector a0 = (IntVector) 303 inAlign0.and(LIMB_MASK).castShape(intSpecies, 0); 304 IntVector a1 = (IntVector) 305 inAlign0.lanewise(VectorOperators.LSHR,26).and(LIMB_MASK).castShape(intSpecies, 0); 306 IntVector a2 = (IntVector) 307 inAlign0.lanewise(VectorOperators.LSHR,52).and(0xFFF).castShape(intSpecies, 0); 308 a2 = a2.or(inAlign1.and(0x3FFF).lanewise(VectorOperators.LSHL,12).castShape(intSpecies, 0)); 309 IntVector a3 = (IntVector) 310 inAlign1.lanewise(VectorOperators.LSHR,14).and(LIMB_MASK).castShape(intSpecies, 0); 311 IntVector a4 = (IntVector) 312 inAlign1.lanewise(VectorOperators.LSHR,40).and(0xFFFFFF).castShape(intSpecies, 0); 313 a4 = a4.or(1 << 24); 314 315 int numParBlocks = msg.length / parBlockCount - 1; 316 for (int i = 0; i < numParBlocks; i++) { 317 318 // multiply and reduce 319 LongVector c0 = (LongVector) 320 a0.castShape(longSpecies, 0).mul(rUp0_int.castShape(longSpecies, 0)) 321 .add(a1.castShape(longSpecies, 0).mul(r5Up4_int.castShape(longSpecies, 0))) 322 .add(a2.castShape(longSpecies, 0).mul(r5Up3_int.castShape(longSpecies, 0))) 323 .add(a3.castShape(longSpecies, 0).mul(r5Up2_int.castShape(longSpecies, 0))) 324 .add(a4.castShape(longSpecies, 0).mul(r5Up1_int.castShape(longSpecies, 0))); 325 326 LongVector c1 = (LongVector) 327 a0.castShape(longSpecies, 0).mul(rUp1_int.castShape(longSpecies, 0)) 328 .add(a1.castShape(longSpecies, 0).mul(rUp0_int.castShape(longSpecies, 0))) 329 .add(a2.castShape(longSpecies, 0).mul(r5Up4_int.castShape(longSpecies, 0))) 330 .add(a3.castShape(longSpecies, 0).mul(r5Up3_int.castShape(longSpecies, 0))) 331 .add(a4.castShape(longSpecies, 0).mul(r5Up2_int.castShape(longSpecies, 0))); 332 333 LongVector c2 = (LongVector) 334 a0.castShape(longSpecies, 0).mul(rUp2_int.castShape(longSpecies, 0)) 335 .add(a1.castShape(longSpecies, 0).mul(rUp1_int.castShape(longSpecies, 0))) 336 .add(a2.castShape(longSpecies, 0).mul(rUp0_int.castShape(longSpecies, 0))) 337 .add(a3.castShape(longSpecies, 0).mul(r5Up4_int.castShape(longSpecies, 0))) 338 .add(a4.castShape(longSpecies, 0).mul(r5Up3_int.castShape(longSpecies, 0))); 339 340 LongVector c3 = (LongVector) 341 a0.castShape(longSpecies, 0).mul(rUp3_int.castShape(longSpecies, 0)) 342 .add(a1.castShape(longSpecies, 0).mul(rUp2_int.castShape(longSpecies, 0))) 343 .add(a2.castShape(longSpecies, 0).mul(rUp1_int.castShape(longSpecies, 0))) 344 .add(a3.castShape(longSpecies, 0).mul(rUp0_int.castShape(longSpecies, 0))) 345 .add(a4.castShape(longSpecies, 0).mul(r5Up4_int.castShape(longSpecies, 0))); 346 347 LongVector c4 = (LongVector) 348 a0.castShape(longSpecies, 0).mul(rUp4_int.castShape(longSpecies, 0)) 349 .add(a1.castShape(longSpecies, 0).mul(rUp3_int.castShape(longSpecies, 0))) 350 .add(a2.castShape(longSpecies, 0).mul(rUp2_int.castShape(longSpecies, 0))) 351 .add(a3.castShape(longSpecies, 0).mul(rUp1_int.castShape(longSpecies, 0))) 352 .add(a4.castShape(longSpecies, 0).mul(rUp0_int.castShape(longSpecies, 0))); 353 354 // carry/reduce 355 // Note: this carry/reduce sequence might not be correct 356 c4 = c4.add(c3.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 357 c3 = c3.and(LIMB_MASK); 358 c0 = c0.add(c4.lanewise(VectorOperators.LSHR, BITS_PER_LIMB).mul(5)); 359 c4 = c4.and(LIMB_MASK); 360 c1 = c1.add(c0.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 361 c0 = c0.and(LIMB_MASK); 362 c2 = c2.add(c1.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 363 c1 = c1.and(LIMB_MASK); 364 c3 = c3.add(c2.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 365 c2 = c2.and(LIMB_MASK); 366 c4 = c4.add(c3.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 367 c3 = c3.and(LIMB_MASK); 368 369 a0 = (IntVector) c0.castShape(intSpecies, 0); 370 a1 = (IntVector) c1.castShape(intSpecies, 0); 371 a2 = (IntVector) c2.castShape(intSpecies, 0); 372 a3 = (IntVector) c3.castShape(intSpecies, 0); 373 a4 = (IntVector) c4.castShape(intSpecies, 0); 374 375 // fromByteArray and add next part of message 376 int start = parBlockCount * (i + 1); 377 378 longMsg0 = LongVector.fromMemorySegment(longSpecies, msMsg, start, ByteOrder.LITTLE_ENDIAN); 379 longMsg1 = LongVector.fromMemorySegment(longSpecies, msMsg, 380 start + vectorWidth * 8L, ByteOrder.LITTLE_ENDIAN); 381 382 inAlign0 = 383 longMsg0.rearrange(inShuffle0).blend(longMsg1.rearrange(inShuffle0), inMask); 384 inAlign1 = 385 longMsg0.rearrange(inShuffle1).blend(longMsg1.rearrange(inShuffle1), inMask); 386 387 IntVector in0 = (IntVector) 388 inAlign0.and(LIMB_MASK).castShape(intSpecies, 0); 389 IntVector in1 = (IntVector) 390 inAlign0.lanewise(VectorOperators.LSHR, 26).and(LIMB_MASK).castShape(intSpecies, 0); 391 IntVector in2 = (IntVector) 392 inAlign0.lanewise(VectorOperators.LSHR, 52).and(0xFFF).castShape(intSpecies, 0); 393 in2 = in2.or(inAlign1.and(0x3FFF).lanewise(VectorOperators.LSHL, 12).castShape(intSpecies, 0)); 394 IntVector in3 = (IntVector) 395 inAlign1.lanewise(VectorOperators.LSHR, 14).and(LIMB_MASK).castShape(intSpecies, 0); 396 IntVector in4 = (IntVector) 397 inAlign1.lanewise(VectorOperators.LSHR, 40).and(0xFFFFFF).castShape(intSpecies, 0); 398 in4 = in4.or(1 << 24); 399 400 a0 = a0.add(in0); 401 a1 = a1.add(in1); 402 a2 = a2.add(in2); 403 a3 = a3.add(in3); 404 a4 = a4.add(in4); 405 } 406 407 // multiply by powers of r 408 long[] rTemp = new long[vectorWidth]; 409 LongVector rFin0 = rPowerVec(r, rTemp, rUpIndex, 0); 410 LongVector rFin1 = rPowerVec(r, rTemp, rUpIndex, 1); 411 LongVector rFin2 = rPowerVec(r, rTemp, rUpIndex, 2); 412 LongVector rFin3 = rPowerVec(r, rTemp, rUpIndex, 3); 413 LongVector rFin4 = rPowerVec(r, rTemp, rUpIndex, 4); 414 415 LongVector r5Fin_1 = rFin1.mul(5); 416 LongVector r5Fin_2 = rFin2.mul(5); 417 LongVector r5Fin_3 = rFin3.mul(5); 418 LongVector r5Fin_4 = rFin4.mul(5); 419 420 LongVector c0 = (LongVector) a0.castShape(longSpecies, 0).mul(rFin0) 421 .add(a1.castShape(longSpecies, 0).mul(r5Fin_4)) 422 .add(a2.castShape(longSpecies, 0).mul(r5Fin_3)) 423 .add(a3.castShape(longSpecies, 0).mul(r5Fin_2)) 424 .add(a4.castShape(longSpecies, 0).mul(r5Fin_1)); 425 LongVector c1 = (LongVector) a0.castShape(longSpecies, 0).mul(rFin1) 426 .add(a1.castShape(longSpecies, 0).mul(rFin0)) 427 .add(a2.castShape(longSpecies, 0).mul(r5Fin_4)) 428 .add(a3.castShape(longSpecies, 0).mul(r5Fin_3)) 429 .add(a4.castShape(longSpecies, 0).mul(r5Fin_2)); 430 LongVector c2 = (LongVector) a0.castShape(longSpecies, 0).mul(rFin2) 431 .add(a1.castShape(longSpecies, 0).mul(rFin1)) 432 .add(a2.castShape(longSpecies, 0).mul(rFin0)) 433 .add(a3.castShape(longSpecies, 0).mul(r5Fin_4)) 434 .add(a4.castShape(longSpecies, 0).mul(r5Fin_3)); 435 LongVector c3 = (LongVector) a0.castShape(longSpecies, 0).mul(rFin3) 436 .add(a1.castShape(longSpecies, 0).mul(rFin2)) 437 .add(a2.castShape(longSpecies, 0).mul(rFin1)) 438 .add(a3.castShape(longSpecies, 0).mul(rFin0)) 439 .add(a4.castShape(longSpecies, 0).mul(r5Fin_4)); 440 LongVector c4 = (LongVector) a0.castShape(longSpecies, 0).mul(rFin4) 441 .add(a1.castShape(longSpecies, 0).mul(rFin3)) 442 .add(a2.castShape(longSpecies, 0).mul(rFin2)) 443 .add(a3.castShape(longSpecies, 0).mul(rFin1)) 444 .add(a4.castShape(longSpecies, 0).mul(rFin0)); 445 446 c4 = c4.add(c3.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 447 c3 = c3.and(LIMB_MASK); 448 c0 = c0.add(c4.lanewise(VectorOperators.LSHR, BITS_PER_LIMB).mul(5)); 449 c4 = c4.and(LIMB_MASK); 450 c1 = c1.add(c0.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 451 c0 = c0.and(LIMB_MASK); 452 c2 = c2.add(c1.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 453 c1 = c1.and(LIMB_MASK); 454 c3 = c3.add(c2.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 455 c2 = c2.and(LIMB_MASK); 456 c4 = c4.add(c3.lanewise(VectorOperators.LSHR, BITS_PER_LIMB)); 457 c3 = c3.and(LIMB_MASK); 458 459 a0 = (IntVector) c0.castShape(intSpecies, 0); 460 a1 = (IntVector) c1.castShape(intSpecies, 0); 461 a2 = (IntVector) c2.castShape(intSpecies, 0); 462 a3 = (IntVector) c3.castShape(intSpecies, 0); 463 a4 = (IntVector) c4.castShape(intSpecies, 0); 464 465 // collect lanes and calculate tag 466 long a0Fin = a0.reduceLanes(VectorOperators.ADD); 467 long a1Fin = a1.reduceLanes(VectorOperators.ADD); 468 long a2Fin = a2.reduceLanes(VectorOperators.ADD); 469 long a3Fin = a3.reduceLanes(VectorOperators.ADD); 470 long a4Fin = a4.reduceLanes(VectorOperators.ADD); 471 472 // carry/reduce the result 473 a4Fin = a4Fin + (a3Fin >>> BITS_PER_LIMB); 474 a3Fin = a3Fin & LIMB_MASK; 475 a0Fin = a0Fin + ((a4Fin >>> BITS_PER_LIMB) * 5); 476 a4Fin = a4Fin & LIMB_MASK; 477 a1Fin = a1Fin + (a0Fin >>> BITS_PER_LIMB); 478 a0Fin = a0Fin & LIMB_MASK; 479 a2Fin = a2Fin + (a1Fin >>> BITS_PER_LIMB); 480 a1Fin = a1Fin & LIMB_MASK; 481 a3Fin = a3Fin + (a2Fin >>> BITS_PER_LIMB); 482 a2Fin = a2Fin & LIMB_MASK; 483 a4Fin = a4Fin + (a3Fin >>> BITS_PER_LIMB); 484 a3Fin = a3Fin & LIMB_MASK; 485 486 byte[] s_arr = 487 Arrays.copyOfRange(keyBytes, RS_LENGTH, 2 * RS_LENGTH); 488 int[] s = fromByteArray(s_arr); 489 490 // Add in the s-half of the key to the accumulator 491 a0Fin += s[0]; 492 a1Fin += s[1]; 493 a2Fin += s[2]; 494 a3Fin += s[3]; 495 a4Fin += s[4]; 496 497 // final carry mod 2^130 498 a1Fin = a1Fin + (a0Fin >> BITS_PER_LIMB); 499 a0Fin = a0Fin & LIMB_MASK; 500 a2Fin = a2Fin + (a1Fin >> BITS_PER_LIMB); 501 a1Fin = a1Fin & LIMB_MASK; 502 a3Fin = a3Fin + (a2Fin >> BITS_PER_LIMB); 503 a2Fin = a2Fin & LIMB_MASK; 504 a4Fin = a4Fin + (a3Fin >> BITS_PER_LIMB); 505 a3Fin = a3Fin & LIMB_MASK; 506 a4Fin = a4Fin & LIMB_MASK; 507 508 // put result in buffer 509 toByteArray(a0Fin, a1Fin, a2Fin, a3Fin, a4Fin, out); 510 } 511 } 512 513 514 private static byte[] hexStringToByteArray(String str) { 515 byte[] result = new byte[str.length() / 2]; 516 for (int i = 0; i < result.length; i++) { 517 result[i] = (byte) Character.digit(str.charAt(2 * i), 16); 518 result[i] <<= 4; 519 result[i] += Character.digit(str.charAt(2 * i + 1), 16); 520 } 521 return result; 522 } 523 524 public static String byteArrayToHexString(byte[] arr) { 525 StringBuilder result = new StringBuilder(); 526 for (int i = 0; i < arr.length; ++i) { 527 byte curVal = arr[i]; 528 result.append(Character.forDigit(curVal >> 4 & 0xF, 16)); 529 result.append(Character.forDigit(curVal & 0xF, 16)); 530 } 531 return result.toString(); 532 } 533 534 private static void kat(Poly1305Vector poly1305, String key, String msg, 535 String expectedTag) { 536 537 kat(poly1305, hexStringToByteArray(key), hexStringToByteArray(msg), 538 hexStringToByteArray(expectedTag)); 539 } 540 541 private static void kat(Poly1305Vector poly1305, byte[] key, byte[] msg, 542 byte[] expectedTag) { 543 544 byte[] tag = new byte[expectedTag.length]; 545 poly1305.computeTag(key, msg, tag); 546 if (!Arrays.equals(tag, expectedTag)) { 547 throw new RuntimeException( 548 "bad tag: " + byteArrayToHexString(tag) + 549 " expected: " + byteArrayToHexString(expectedTag)); 550 } 551 } 552 553 /* 554 * Poly1305 Known Answer Tests to ensure that the implementation is correct. 555 */ 556 private static void runKAT(Poly1305Vector poly1305) { 557 kat(poly1305, 558 "d212b886dd4682a41f1759e6c5aef84760e5a63d4423ca7d1fb5c7ecfc5dac27", 559 "5d2ad39e2a7b0bc5f375488643acf391188d01ad936971457427bc053c4262a1" + 560 "598532850def8573213c5f79fa736703c57c03ec49b55617210998c8af408698" + 561 "866632a7ecf7e9a688605cbca919e17e2badd090a7a6d83ad90be0617fa44642" + 562 "cc9a1ca38514a026cbea51c287ec0b56719fc61183c88e9450ba85aa8ab7d390", 563 "7ccdfa8e82df540276e8172f705adce2"); 564 565 kat(poly1305, 566 "2b0b684c86910104aee1d261ac4d5a0f5443b4b7746cf7f8ba03921d273f6a9b", 567 "027b359f44a5d60f81073ceb74749207742529dcefa4a26a1817db2c8d50ba2b" + 568 "d9e170cd1930946872d95e4eae41389f362087871a749897e0fbe42494e6f0b3" + 569 "8db01e2059510b6fda4f422ce7d226433ba00940e1761baaff80d9b8f3a61d11" + 570 "a109e6082d231cf85aa718199e6eaaaf07bad562469ef1b8e639c727967bf6da" + 571 "bcd16fcb0fc102095325e2fac92e599e81c26900df1deb7b0a0b5c321a658024" + 572 "26506740509ece646fecf33a517b66e57577372156aae85765c6b473521d1019" + 573 "4f5fbe0e932cfee716e1d41c9154fb8e15b82ab7e807fb54f3d7d3e4c589cc9a" + 574 "492d17ea4fd27894fa9d22a9db6d5df674cd1e97e7e8758a360291f22dfe1cc3", 575 "84ca3a778faf0ab9f840fe5fb38ace27"); 576 577 kat(poly1305, 578 "870c6fa7da2eadb845ac8b0eeaed4cf856eca67bf96b64a29a2e6a881821fa8a", 579 "ff1a3b67a4f575be5f05c4054e4c7365838c2cbe786ba78900c8b43f197c3c4d" + 580 "120432a287e434669af579bcd56f3320e54d2f97a306f917f2f41b1c97cc69db" + 581 "4ac2051adccd687fa89f92504d1ab5c3006681d846c8051aabccca0024ef5ec4" + 582 "c43b8701ffc9d14fef8d55e229ed210a2b9bde996f5d7b545d15e1fc32764604" + 583 "b2a0384dd173aa800b7526c8ff397c05130bb6a1f2194968adaef6979b023cd8" + 584 "d9195d2739351c7e4ac6c43508634f813641f669e78cbcf732ccb1321a2cd2c4" + 585 "14c7df5b9ea3408f2e12fbf3a3cbdb98699dd5402725ec25f9fff9bcd0f93cb3" + 586 "cf0dac016fec41a5ef3ae8b8d258a09f530ad14ad2e52186041592eac0ea22ff" + 587 "8c3751009b516c60f7764cccbb816394ef035bd8cb2a38d5c6b9229e528e56cd" + 588 "62600b5219b64212642384e628f01d790eeef4963a7d1a63a9faff79d4acfa09" + 589 "78b58b0b623ae89389661aa408b16814d3baaca20978dce6888c3365f4ffd2fa" + 590 "8f031a44f2e870a06da21d7becf450d335e1386268bc189435e7955a477bc368", 591 "ff4e0ee6feb1c6a57e638a79fafc7c60"); 592 593 kat(poly1305, 594 "c27987ae88a833ae2ea90371b2e257c15773da3bc34516b6b075446e1f844a81", 595 "64e5a2e2940b173c7103ae931ced302a8f8c778f4e5c0b3677c51552655005d8" + 596 "504b724107e7262448c94db83fc9c6a2a26fc973360dce15c0553b73bb733d3e" + 597 "f61fcba8977e76c32523b80c3b45b1226b23ee17522f9b677880c69b356917ae" + 598 "3c792a0c5b0c77b90dfa51483626323b7a73fffb1b128c595d553bf62a8f5bb9" + 599 "fa48b4a850a932481bf607e8da84730c9052bba9316ec7eb84007a4eb5cbed5c" + 600 "7c67ef32d4c5cb6cfbccd738d239857c240de6d3d4e5af14d480feb63541e5d8" + 601 "036e088b2e32431e6fe0c4d3505aebe2e14bd02b6b15325f89aef048cd1236db" + 602 "4461a59304b7c61ece2c52ef8ac4cf2326e6aaff013494b1b191be4ae4381f57" + 603 "e72b947ee23d0a528087db9338bc28c68484929fc3436995b2083b06a765ceb7" + 604 "09e9dd41ba896d99832d6851189766e844137d9a83d2890bc2be7afc82f9ebb8" + 605 "bafe08ef5f7ff0cce9a1d08e6b797a17df04731f384a34b16e72e9f2ab070114" + 606 "8008945509fe378658dc51eb752248f48364be327cd1b6bd148c518a976ae95d" + 607 "d391f3b0d447251988c7e77400c9d44395b8f9f10cbc442a6804d0ad83e8c3e3" + 608 "9fa09c2140fac143c90f09a7d907c57e29b528d54c8bd927f39aee2cec671213" + 609 "c50fe657b29682d57a419e3e52dafb348cbe44b6c17e4be18f5c5e411734fcfc" + 610 "99b9ca26f29a21cc93374ef1bfa86ca2bb3be76b94b4ef69ec790c968a51e4d0", 611 "d4c09727f68fa3beb57ce9e74205b652"); 612 613 kat(poly1305, 614 "2593adf2efc0e49c7fde0d45de4f7a21ceb76df45c0e5a917ef1f6b7fbf4fb7e", 615 "23665b9a6d4f04c9d58347d32d64d4cbf8d4ca993a8bb73758e6eb2db9f97096" + 616 "d0f00aca8cb16460d2bc15c0136fa92482602f47b3ec78244c4dc619d9b28afa" + 617 "19b063c196bcff848eff179102cce29dfcc58bf90a2f6311e6d021e2573ccbb4" + 618 "4e06947167c9865127c0b7362196523f97c8157058f7aebff475f77e23393dc1" + 619 "a3031bbaf31270db3eadc00cca6ae073aa53160d095afdce0a202de8a23d9a38" + 620 "b0bed20cbe64e1ec77fc13ba0cfc9be52edb70475bf1aeaaff25e11f2e0ae47f" + 621 "f23cbd4a6219d276fcc6c8f12a739f11434c86d4e24397f96ef6e36d9195fa8a" + 622 "48eb55990d69feacfb754b5019a3ebafa98d5544077b46c136cc11de8ee7f8a6" + 623 "76a6696600088696233f4e8f060ba8a64890fb638469639bfb727ed758c36250" + 624 "a553b7ce1115509f2bb19c13cea87003a8eff45ce9e1cff0a21ba5ae19226d50" + 625 "e108db212a588e5f4c502468859b9b607922c3311b5d912bd9400e696d7debbb" + 626 "9ac5454cc7d0f95fc242c491f095a02f0d3bd7ead0f0b7358c9b1d85e4e9ab75" + 627 "24bb43867c94a21a4e0db6470a210c9dd937e4801396bd687127fa7c83014c85" + 628 "372553c56dfd6cd9b75fa10483aea825f8e3fa53c6bf17467e37c2e7439ed0ea" + 629 "6fb24d13d428965c44f1ac943c7bc77fa84711c91b41f5ee6d9a7d9091648a96" + 630 "cc7c261d7fc5d964446d1e3dcc41d32ecaa8d7791b8462563fcf7f96cd1d11d4" + 631 "34923e0150321356866f5bdafebc96f2661bfd3c1f104e96b6492cafcbe25fc6" + 632 "ec0c92a3bbec7328e1905d5951fae04625a2452f596027a5d9c64eed55165c8a" + 633 "23bc3f944b4fa9c7ad83ebc1777c7153d5de13d04c0a12e774b17906a62f5134" + 634 "685c2de31da08bd04840299fd62d56ffe95248365034e7ba95961cebf0542b24", 635 "b9f68b0996caf5135136b10b37fe5f81"); 636 637 kat(poly1305, 638 "e9c8c78bc0ad5751f094fd4657fe5ef2a3c232f6930eef3431cde76659f04210", 639 "914e57a2745fd475d7b8f982483fe11a05d7b55853239112d5ae99616c718b3c" + 640 "4a0c2d05e3ca1df509614c0fe051b414d404149ec422e0998e192e51518518c4" + 641 "b8acd9e3e3ff9f3b4ef931d3052755785d38e75821ceefa7da0bfe3f1fb2dc6d" + 642 "738e2a2332e53ce77d44547621bb7aa724dd8805c7c795088db865d6b13d9b3e" + 643 "8acec846efb072d105ab6e599f8292a7601087e0ba13af9f503dcfd426e26e4d" + 644 "fb22bf5a1ff1a82d67d9bd8871e6adc17aa39d221f2865f81da9ed566192c269" + 645 "3c85f0442924e603b9ae54b88dd0f21e92eedc40c08dd484c552e297894eeee8" + 646 "b5acc91d5ae16f56257bb0836b48e1a8fa72e83a8b10b7026a7f466c8b08eac5" + 647 "4359b70e639117cf688e263b891f004db94d77941380f3ab0559538c9398c859" + 648 "b76d2bbcd6b635e753160583e7adc263097a80520d003514e134a21597c1ec57" + 649 "55da3a70acc6951b4d4d81e98b9eb962d9e3bc37d5e8ebd61e2a3f61cc452a65" + 650 "56571e12c190d4e3d0f8cc61ffcb60324b4a6987e7375a832ff807682e0b4595" + 651 "66ef1f765638f3d2e837ed43ce2c1c7837f271c866908d865c3d9174fd4f8056" + 652 "265abfb88fbc207db7a12c0a0ad035e5a728725e98cb682d41fd0bcf3aef2fd7" + 653 "ab261727f310fc7cf3b34286c9e9ee235995315167191f3b4d77e5642fb57dbd" + 654 "fdb5ccadefc5d03866918ab1a3eff54b405d8946e2b0c2fa444d1b2be4c3d41d" + 655 "990515e7534190d66d10e38c36c5d3df0315db85ba10c924bef97d1faa07a5f8" + 656 "f04998a7d38689237a1912bea3f821357d8383d7c5cfa66ba5965b5a94bb702c" + 657 "e6583e59879021139355c5b90e0f9cd13b34f3357ffde404bbf34c97f9fd55b5" + 658 "53e42d8a6b370eded02c8a5221e15db701da56918412520e12fd1ef9f4748647" + 659 "858488d5e0abd5b9e01457768907e1d24581f9591771304192711292e4025fce" + 660 "bd92adb2297e9496852f80bd78578bbdb292ca209f7584ff76e9eb66ec8a111e" + 661 "add30dc7ef364c4f1339312f226fe0cfa7a5b1602417e469cf2c8e3874c51232" + 662 "00f2d90dbe7f3c3ff5c6c6484052a80eb6229a6ed6176ad600da185da624bea6", 663 "c1de44dd8ea245ca43e5587460feb514"); 664 } 665 }