< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp

Print this page




  13 * version 2 for more details (a copy is included in the LICENSE file that
  14 * accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License version
  17 * 2 along with this work; if not, write to the Free Software Foundation,
  18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 * or visit www.oracle.com if you need additional information or have any
  22 * questions.
  23 *
  24 */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/assembler.hpp"
  28 #include "asm/assembler.inline.hpp"
  29 #include "runtime/stubRoutines.hpp"
  30 #include "macroAssembler_x86.hpp"
  31 
  32 #ifdef _LP64
  33 
  34 void MacroAssembler::roundEnc(XMMRegister key, int rnum) {
  35     for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
  36       vaesenc(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
  37     }
  38 }
  39 
  40 void MacroAssembler::lastroundEnc(XMMRegister key, int rnum) {
  41     for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
  42       vaesenclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
  43     }
  44 }
  45 
  46 void MacroAssembler::roundDec(XMMRegister key, int rnum) {
  47     for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
  48       vaesdec(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
  49     }
  50 }
  51 
  52 void MacroAssembler::lastroundDec(XMMRegister key, int rnum) {
  53     for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
  54       vaesdeclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
  55     }
  56 }
  57 
  58 // Load key and shuffle operation
  59 void MacroAssembler::ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
  60     movdqu(xmmdst, Address(key, offset));
  61     if (xmm_shuf_mask != NULL) {
  62         pshufb(xmmdst, xmm_shuf_mask);
  63     } else {
  64        pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
  65     }
  66    evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
  67 }
  68 
  69 // AES-ECB Encrypt Operation
  70 void MacroAssembler::aesecb_encrypt(Register src_addr, Register dest_addr, Register key, Register len) {
  71 
  72     const Register pos = rax;
  73     const Register rounds = r12;
  74 
  75     Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT;
  76     push(r13);
  77     push(r12);
  78 
  79     // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
  80     // context for the registers used, where all instructions below are using 128-bit mode
  81     // On EVEX without VL and BW, these instructions will all be AVX.
  82     if (VM_Version::supports_avx512vlbw()) {
  83        movl(rax, 0xffff);
  84        kmovql(k1, rax);
  85     }
  86     push(len); // Save
  87     push(rbx);
  88 
  89     vzeroupper();
  90 
  91     xorptr(pos, pos);
  92 
  93     // Calculate number of rounds based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
  94     movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
  95 
  96     // Load Key shuf mask
  97     const XMMRegister xmm_key_shuf_mask = xmm31;  // used temporarily to swap key bytes up front
  98     movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
  99 
 100     // Load and shuffle key based on number of rounds
 101     ev_load_key(xmm8, key, 0 * 16, xmm_key_shuf_mask);
 102     ev_load_key(xmm9, key, 1 * 16, xmm_key_shuf_mask);
 103     ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask);
 104     ev_load_key(xmm23, key, 3 * 16, xmm_key_shuf_mask);
 105     ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask);
 106     ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask);
 107     ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask);
 108     ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask);
 109     ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask);
 110     ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask);
 111     ev_load_key(xmm24, key, 10 * 16, xmm_key_shuf_mask);
 112     cmpl(rounds, 52);
 113     jcc(Assembler::greaterEqual, KEY_192);
 114     jmp(Loop_start);
 115 
 116     bind(KEY_192);
 117     ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask);
 118     ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask);
 119     cmpl(rounds, 60);
 120     jcc(Assembler::equal, KEY_256);
 121     jmp(Loop_start);
 122 
 123     bind(KEY_256);
 124     ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask);
 125     ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask);
 126 
 127     bind(Loop_start);
 128     movq(rbx, len);
 129     // Divide length by 16 to convert it to number of blocks
 130     shrq(len, 4);
 131     shlq(rbx, 60);
 132     jcc(Assembler::equal, NO_PARTS);
 133     addq(len, 1);
 134     // Check if number of blocks is greater than or equal to 32
 135     // If true, 512 bytes are processed at a time (code marked by label LOOP)
 136     // If not, 16 bytes are processed (code marked by REMAINDER label)
 137     bind(NO_PARTS);
 138     movq(rbx, len);
 139     shrq(len, 5);
 140     jcc(Assembler::equal, REMAINDER);
 141     movl(r13, len);
 142     // Compute number of blocks that will be processed 512 bytes at a time
 143     // Subtract this from the total number of blocks which will then be processed by REMAINDER loop
 144     shlq(r13, 5);
 145     subq(rbx, r13);
 146     //Begin processing 512 bytes
 147     bind(LOOP);
 148     // Move 64 bytes of PT data into a zmm register, as a result 512 bytes of PT loaded in zmm0-7
 149     evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
 150     evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
 151     evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
 152     evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
 153     evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit);
 154     evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit);
 155     evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit);
 156     evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit);
 157     // Xor with the first round key
 158     evpxorq(xmm0, xmm0, xmm8, Assembler::AVX_512bit);
 159     evpxorq(xmm1, xmm1, xmm8, Assembler::AVX_512bit);
 160     evpxorq(xmm2, xmm2, xmm8, Assembler::AVX_512bit);
 161     evpxorq(xmm3, xmm3, xmm8, Assembler::AVX_512bit);
 162     evpxorq(xmm4, xmm4, xmm8, Assembler::AVX_512bit);
 163     evpxorq(xmm5, xmm5, xmm8, Assembler::AVX_512bit);
 164     evpxorq(xmm6, xmm6, xmm8, Assembler::AVX_512bit);
 165     evpxorq(xmm7, xmm7, xmm8, Assembler::AVX_512bit);
 166     // 9 Aes encode round operations
 167     roundEnc(xmm9,  7);
 168     roundEnc(xmm10, 7);
 169     roundEnc(xmm23, 7);
 170     roundEnc(xmm12, 7);
 171     roundEnc(xmm13, 7);
 172     roundEnc(xmm14, 7);
 173     roundEnc(xmm15, 7);
 174     roundEnc(xmm16, 7);
 175     roundEnc(xmm17, 7);
 176     cmpl(rounds, 52);
 177     jcc(Assembler::aboveEqual, AES192);
 178     // Aesenclast round operation for keysize = 128
 179     lastroundEnc(xmm24, 7);
 180     jmp(END_LOOP);
 181     //Additional 2 rounds of Aesenc operation for keysize = 192
 182     bind(AES192);
 183     roundEnc(xmm24, 7);
 184     roundEnc(xmm19, 7);
 185     cmpl(rounds, 60);
 186     jcc(Assembler::aboveEqual, AES256);
 187     // Aesenclast round for keysize = 192
 188     lastroundEnc(xmm20, 7);
 189     jmp(END_LOOP);
 190     // 2 rounds of Aesenc operation and Aesenclast for keysize = 256
 191     bind(AES256);
 192     roundEnc(xmm20, 7);
 193     roundEnc(xmm21, 7);
 194     lastroundEnc(xmm22, 7);
 195 
 196     bind(END_LOOP);
 197     // Move 512 bytes of CT to destination
 198     evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit);
 199     evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
 200     evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
 201     evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
 202     evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit);
 203     evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit);
 204     evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit);
 205     evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit);
 206 
 207     addq(pos, 512);
 208     decq(len);
 209     jcc(Assembler::notEqual, LOOP);
 210 
 211     bind(REMAINDER);
 212     vzeroupper();
 213     cmpq(rbx, 0);
 214     jcc(Assembler::equal, END);
 215     // Process 16 bytes at a time
 216     bind(LOOP2);
 217     movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0));
 218     vpxor(xmm1, xmm1, xmm8, Assembler::AVX_128bit);
 219     // xmm2 contains shuffled key for Aesenclast operation.
 220     vmovdqu(xmm2, xmm24);
 221 
 222     vaesenc(xmm1, xmm1, xmm9, Assembler::AVX_128bit);
 223     vaesenc(xmm1, xmm1, xmm10, Assembler::AVX_128bit);
 224     vaesenc(xmm1, xmm1, xmm23, Assembler::AVX_128bit);
 225     vaesenc(xmm1, xmm1, xmm12, Assembler::AVX_128bit);
 226     vaesenc(xmm1, xmm1, xmm13, Assembler::AVX_128bit);
 227     vaesenc(xmm1, xmm1, xmm14, Assembler::AVX_128bit);
 228     vaesenc(xmm1, xmm1, xmm15, Assembler::AVX_128bit);
 229     vaesenc(xmm1, xmm1, xmm16, Assembler::AVX_128bit);
 230     vaesenc(xmm1, xmm1, xmm17, Assembler::AVX_128bit);
 231 
 232     cmpl(rounds, 52);
 233     jcc(Assembler::below, LAST2);
 234     vmovdqu(xmm2, xmm20);
 235     vaesenc(xmm1, xmm1, xmm24, Assembler::AVX_128bit);
 236     vaesenc(xmm1, xmm1, xmm19, Assembler::AVX_128bit);
 237     cmpl(rounds, 60);
 238     jcc(Assembler::below, LAST2);
 239     vmovdqu(xmm2, xmm22);
 240     vaesenc(xmm1, xmm1, xmm20, Assembler::AVX_128bit);
 241     vaesenc(xmm1, xmm1, xmm21, Assembler::AVX_128bit);
 242 
 243     bind(LAST2);
 244     // Aesenclast round
 245     vaesenclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit);
 246     // Write 16 bytes of CT to destination
 247     movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1);
 248     addq(pos, 16);
 249     decq(rbx);
 250     jcc(Assembler::notEqual, LOOP2);
 251 
 252     bind(END);
 253     // Zero out the round keys
 254     evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit);
 255     evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit);
 256     evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit);
 257     evpxorq(xmm23, xmm23, xmm23, Assembler::AVX_512bit);
 258     evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit);
 259     evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit);
 260     evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit);
 261     evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit);
 262     evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit);
 263     evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit);
 264     evpxorq(xmm24, xmm24, xmm24, Assembler::AVX_512bit);
 265     cmpl(rounds, 44);
 266     jcc(Assembler::belowEqual, EXIT);
 267     evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit);
 268     evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit);
 269     cmpl(rounds, 52);
 270     jcc(Assembler::belowEqual, EXIT);
 271     evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit);
 272     evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit);
 273     bind(EXIT);
 274     pop(rbx);
 275     pop(rax); // return length
 276     pop(r12);
 277     pop(r13);
 278 }
 279 
 280 // AES-ECB Decrypt Operation
 281 void MacroAssembler::aesecb_decrypt(Register src_addr, Register dest_addr, Register key, Register len)  {
 282 
 283     Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT;
 284     const Register pos = rax;
 285     const Register rounds = r12;
 286     push(r13);
 287     push(r12);
 288 
 289     // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
 290     // context for the registers used, where all instructions below are using 128-bit mode
 291     // On EVEX without VL and BW, these instructions will all be AVX.
 292     if (VM_Version::supports_avx512vlbw()) {
 293        movl(rax, 0xffff);
 294        kmovql(k1, rax);
 295     }
 296 
 297     push(len); // Save
 298     push(rbx);
 299 
 300     vzeroupper();
 301 
 302     xorptr(pos, pos);
 303     // Calculate number of rounds i.e. based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
 304     movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
 305 
 306     // Load Key shuf mask
 307     const XMMRegister xmm_key_shuf_mask = xmm31;  // used temporarily to swap key bytes up front
 308     movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
 309 
 310     // Load and shuffle round keys. The java expanded key ordering is rotated one position in decryption.
 311     // So the first round key is loaded from 1*16 here and last round key is loaded from 0*16
 312     ev_load_key(xmm9,  key, 1 * 16, xmm_key_shuf_mask);
 313     ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask);
 314     ev_load_key(xmm11, key, 3 * 16, xmm_key_shuf_mask);
 315     ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask);
 316     ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask);
 317     ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask);
 318     ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask);
 319     ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask);
 320     ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask);
 321     ev_load_key(xmm18, key, 10 * 16, xmm_key_shuf_mask);
 322     ev_load_key(xmm27, key, 0 * 16, xmm_key_shuf_mask);
 323     cmpl(rounds, 52);
 324     jcc(Assembler::greaterEqual, KEY_192);
 325     jmp(Loop_start);
 326 
 327     bind(KEY_192);
 328     ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask);
 329     ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask);
 330     cmpl(rounds, 60);
 331     jcc(Assembler::equal, KEY_256);
 332     jmp(Loop_start);
 333 
 334     bind(KEY_256);
 335     ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask);
 336     ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask);
 337     bind(Loop_start);
 338     movq(rbx, len);
 339     // Convert input length to number of blocks
 340     shrq(len, 4);
 341     shlq(rbx, 60);
 342     jcc(Assembler::equal, NO_PARTS);
 343     addq(len, 1);
 344     // Check if number of blocks is greater than/ equal to 32
 345     // If true, blocks then 512 bytes are processed at a time (code marked by label LOOP)
 346     // If not, 16 bytes are processed (code marked by label REMAINDER)
 347     bind(NO_PARTS);
 348     movq(rbx, len);
 349     shrq(len, 5);
 350     jcc(Assembler::equal, REMAINDER);
 351     movl(r13, len);
 352     // Compute number of blocks that will be processed as 512 bytes at a time
 353     // Subtract this from the total number of blocks, which will then be processed by REMAINDER loop.
 354     shlq(r13, 5);
 355     subq(rbx, r13);
 356 
 357     bind(LOOP);
 358     // Move 64 bytes of CT data into a zmm register, as a result 512 bytes of CT loaded in zmm0-7
 359     evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
 360     evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
 361     evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
 362     evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
 363     evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit);
 364     evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit);
 365     evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit);
 366     evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit);
 367     // Xor with the first round key
 368     evpxorq(xmm0, xmm0, xmm9, Assembler::AVX_512bit);
 369     evpxorq(xmm1, xmm1, xmm9, Assembler::AVX_512bit);
 370     evpxorq(xmm2, xmm2, xmm9, Assembler::AVX_512bit);
 371     evpxorq(xmm3, xmm3, xmm9, Assembler::AVX_512bit);
 372     evpxorq(xmm4, xmm4, xmm9, Assembler::AVX_512bit);
 373     evpxorq(xmm5, xmm5, xmm9, Assembler::AVX_512bit);
 374     evpxorq(xmm6, xmm6, xmm9, Assembler::AVX_512bit);
 375     evpxorq(xmm7, xmm7, xmm9, Assembler::AVX_512bit);
 376     // 9 rounds of Aesdec
 377     roundDec(xmm10, 7);
 378     roundDec(xmm11, 7);
 379     roundDec(xmm12, 7);
 380     roundDec(xmm13, 7);
 381     roundDec(xmm14, 7);
 382     roundDec(xmm15, 7);
 383     roundDec(xmm16, 7);
 384     roundDec(xmm17, 7);
 385     roundDec(xmm18, 7);
 386     cmpl(rounds, 52);
 387     jcc(Assembler::aboveEqual, AES192);
 388     // Aesdeclast round for keysize = 128
 389     lastroundDec(xmm27, 7);
 390     jmp(END_LOOP);
 391 
 392     bind(AES192);
 393     // 2 Additional rounds for keysize = 192
 394     roundDec(xmm19, 7);
 395     roundDec(xmm20, 7);
 396     cmpl(rounds, 60);
 397     jcc(Assembler::aboveEqual, AES256);
 398     // Aesdeclast round for keysize = 192
 399     lastroundDec(xmm27, 7);
 400     jmp(END_LOOP);
 401     bind(AES256);
 402     // 2 Additional rounds and Aesdeclast for keysize = 256
 403     roundDec(xmm21, 7);
 404     roundDec(xmm22, 7);
 405     lastroundDec(xmm27, 7);
 406 
 407     bind(END_LOOP);
 408     // Write 512 bytes of PT to the destination
 409     evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit);
 410     evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
 411     evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
 412     evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
 413     evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit);
 414     evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit);
 415     evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit);
 416     evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit);
 417 
 418     addq(pos, 512);
 419     decq(len);
 420     jcc(Assembler::notEqual, LOOP);
 421 
 422     bind(REMAINDER);
 423     vzeroupper();
 424     cmpq(rbx, 0);
 425     jcc(Assembler::equal, END);
 426     // Process 16 bytes at a time
 427     bind(LOOP2);
 428     movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0));
 429     vpxor(xmm1, xmm1, xmm9, Assembler::AVX_128bit);
 430     // xmm2 contains shuffled key for Aesdeclast operation.
 431     vmovdqu(xmm2, xmm27);
 432 
 433     vaesdec(xmm1, xmm1, xmm10, Assembler::AVX_128bit);
 434     vaesdec(xmm1, xmm1, xmm11, Assembler::AVX_128bit);
 435     vaesdec(xmm1, xmm1, xmm12, Assembler::AVX_128bit);
 436     vaesdec(xmm1, xmm1, xmm13, Assembler::AVX_128bit);
 437     vaesdec(xmm1, xmm1, xmm14, Assembler::AVX_128bit);
 438     vaesdec(xmm1, xmm1, xmm15, Assembler::AVX_128bit);
 439     vaesdec(xmm1, xmm1, xmm16, Assembler::AVX_128bit);
 440     vaesdec(xmm1, xmm1, xmm17, Assembler::AVX_128bit);
 441     vaesdec(xmm1, xmm1, xmm18, Assembler::AVX_128bit);
 442 
 443     cmpl(rounds, 52);
 444     jcc(Assembler::below, LAST2);
 445     vaesdec(xmm1, xmm1, xmm19, Assembler::AVX_128bit);
 446     vaesdec(xmm1, xmm1, xmm20, Assembler::AVX_128bit);
 447     cmpl(rounds, 60);
 448     jcc(Assembler::below, LAST2);
 449     vaesdec(xmm1, xmm1, xmm21, Assembler::AVX_128bit);
 450     vaesdec(xmm1, xmm1, xmm22, Assembler::AVX_128bit);
 451 
 452     bind(LAST2);
 453     // Aesdeclast round
 454     vaesdeclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit);
 455     // Write 16 bytes of PT to destination
 456     movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1);
 457     addq(pos, 16);
 458     decq(rbx);
 459     jcc(Assembler::notEqual, LOOP2);
 460 
 461     bind(END);
 462     // Zero out the round keys
 463     evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit);
 464     evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit);
 465     evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit);
 466     evpxorq(xmm11, xmm11, xmm11, Assembler::AVX_512bit);
 467     evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit);
 468     evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit);
 469     evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit);
 470     evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit);
 471     evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit);
 472     evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit);
 473     evpxorq(xmm18, xmm18, xmm18, Assembler::AVX_512bit);
 474     evpxorq(xmm27, xmm27, xmm27, Assembler::AVX_512bit);
 475     cmpl(rounds, 44);
 476     jcc(Assembler::belowEqual, EXIT);
 477     evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit);
 478     evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit);
 479     cmpl(rounds, 52);
 480     jcc(Assembler::belowEqual, EXIT);
 481     evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit);
 482     evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit);
 483     bind(EXIT);
 484     pop(rbx);
 485     pop(rax); // return length
 486     pop(r12);
 487     pop(r13);
 488 }
 489 
 490 // Multiply 128 x 128 bits, using 4 pclmulqdq operations
 491 void MacroAssembler::schoolbookAAD(int i, Register htbl, XMMRegister data,
 492     XMMRegister tmp0, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3) {
 493     movdqu(xmm15, Address(htbl, i * 16));
 494     vpclmulhqlqdq(tmp3, data, xmm15); // 0x01
 495     vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
 496     vpclmulldq(tmp3, data, xmm15); // 0x00
 497     vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);
 498     vpclmulhdq(tmp3, data, xmm15); // 0x11
 499     vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit);
 500     vpclmullqhqdq(tmp3, data, xmm15); // 0x10
 501     vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
 502 }
 503 
 504 // Multiply two 128 bit numbers resulting in a 256 bit value
 505 // Result of the multiplication followed by reduction stored in state
 506 void MacroAssembler::gfmul(XMMRegister tmp0, XMMRegister state) {
 507     const XMMRegister tmp1 = xmm4;
 508     const XMMRegister tmp2 = xmm5;
 509     const XMMRegister tmp3 = xmm6;




  13 * version 2 for more details (a copy is included in the LICENSE file that
  14 * accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License version
  17 * 2 along with this work; if not, write to the Free Software Foundation,
  18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 * or visit www.oracle.com if you need additional information or have any
  22 * questions.
  23 *
  24 */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/assembler.hpp"
  28 #include "asm/assembler.inline.hpp"
  29 #include "runtime/stubRoutines.hpp"
  30 #include "macroAssembler_x86.hpp"
  31 
  32 #ifdef _LP64









































































































































































































































































































































































































































































  33 // Multiply 128 x 128 bits, using 4 pclmulqdq operations
  34 void MacroAssembler::schoolbookAAD(int i, Register htbl, XMMRegister data,
  35     XMMRegister tmp0, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3) {
  36     movdqu(xmm15, Address(htbl, i * 16));
  37     vpclmulhqlqdq(tmp3, data, xmm15); // 0x01
  38     vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
  39     vpclmulldq(tmp3, data, xmm15); // 0x00
  40     vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);
  41     vpclmulhdq(tmp3, data, xmm15); // 0x11
  42     vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit);
  43     vpclmullqhqdq(tmp3, data, xmm15); // 0x10
  44     vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
  45 }
  46 
  47 // Multiply two 128 bit numbers resulting in a 256 bit value
  48 // Result of the multiplication followed by reduction stored in state
  49 void MacroAssembler::gfmul(XMMRegister tmp0, XMMRegister state) {
  50     const XMMRegister tmp1 = xmm4;
  51     const XMMRegister tmp2 = xmm5;
  52     const XMMRegister tmp3 = xmm6;


< prev index next >