1 /* 2 * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_STUBGENERATOR_X86_64_HPP 26 #define CPU_X86_STUBGENERATOR_X86_64_HPP 27 28 #include "code/codeBlob.hpp" 29 #include "runtime/continuation.hpp" 30 #include "runtime/stubCodeGenerator.hpp" 31 #include "runtime/stubRoutines.hpp" 32 33 // Stub Code definitions 34 35 class StubGenerator: public StubCodeGenerator { 36 private: 37 38 // Call stubs are used to call Java from C. 39 address generate_call_stub(address& return_address); 40 41 // Return point for a Java call if there's an exception thrown in 42 // Java code. The exception is caught and transformed into a 43 // pending exception stored in JavaThread that can be tested from 44 // within the VM. 45 // 46 // Note: Usually the parameters are removed by the callee. In case 47 // of an exception crossing an activation frame boundary, that is 48 // not the case if the callee is compiled code => need to setup the 49 // rsp. 50 // 51 // rax: exception oop 52 53 address generate_catch_exception(); 54 55 // Continuation point for runtime calls returning with a pending 56 // exception. The pending exception check happened in the runtime 57 // or native call stub. The pending exception in Thread is 58 // converted into a Java-level exception. 59 // 60 // Contract with Java-level exception handlers: 61 // rax: exception 62 // rdx: throwing pc 63 // 64 // NOTE: At entry of this stub, exception-pc must be on stack !! 65 66 address generate_forward_exception(); 67 68 // Support for intptr_t OrderAccess::fence() 69 address generate_orderaccess_fence(); 70 71 // Support for intptr_t get_previous_sp() 72 // 73 // This routine is used to find the previous stack pointer for the 74 // caller. 75 address generate_get_previous_sp(); 76 77 //---------------------------------------------------------------------------------------------------- 78 // Support for void verify_mxcsr() 79 // 80 // This routine is used with -Xcheck:jni to verify that native 81 // JNI code does not return to Java code without restoring the 82 // MXCSR register to our expected state. 83 84 address generate_verify_mxcsr(); 85 86 address generate_f2i_fixup(); 87 address generate_f2l_fixup(); 88 address generate_d2i_fixup(); 89 address generate_d2l_fixup(); 90 91 address generate_count_leading_zeros_lut(); 92 address generate_popcount_avx_lut(); 93 address generate_iota_indices(); 94 address generate_vector_reverse_bit_lut(); 95 96 address generate_vector_reverse_byte_perm_mask_long(); 97 address generate_vector_reverse_byte_perm_mask_int(); 98 address generate_vector_reverse_byte_perm_mask_short(); 99 address generate_vector_byte_shuffle_mask(); 100 101 address generate_fp_mask(StubGenStubId stub_id, int64_t mask); 102 103 address generate_compress_perm_table(StubGenStubId stub_id); 104 105 address generate_expand_perm_table(StubGenStubId stub_id); 106 107 address generate_vector_mask(StubGenStubId stub_id, int64_t mask); 108 109 address generate_vector_byte_perm_mask(); 110 111 address generate_vector_fp_mask(StubGenStubId stub_id, int64_t mask); 112 113 address generate_vector_custom_i32(StubGenStubId stub_id, Assembler::AvxVectorLen len, 114 int32_t val0, int32_t val1, int32_t val2, int32_t val3, 115 int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0, 116 int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0, 117 int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0); 118 119 // Non-destructive plausibility checks for oops 120 address generate_verify_oop(); 121 122 // Verify that a register contains clean 32-bits positive value 123 // (high 32-bits are 0) so it could be used in 64-bits shifts. 124 void assert_clean_int(Register Rint, Register Rtmp); 125 126 // Generate overlap test for array copy stubs 127 void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf); 128 129 void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) { 130 assert(no_overlap_target != nullptr, "must be generated"); 131 array_overlap_test(no_overlap_target, nullptr, sf); 132 } 133 void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) { 134 array_overlap_test(nullptr, &L_no_overlap, sf); 135 } 136 137 138 // Shuffle first three arg regs on Windows into Linux/Solaris locations. 139 void setup_arg_regs(int nargs = 3); 140 void restore_arg_regs(); 141 142 #ifdef ASSERT 143 bool _regs_in_thread; 144 #endif 145 146 // This is used in places where r10 is a scratch register, and can 147 // be adapted if r9 is needed also. 148 void setup_arg_regs_using_thread(int nargs = 3); 149 150 void restore_arg_regs_using_thread(); 151 152 // Copy big chunks forward 153 void copy_bytes_forward(Register end_from, Register end_to, 154 Register qword_count, Register tmp1, 155 Register tmp2, Label& L_copy_bytes, 156 Label& L_copy_8_bytes, DecoratorSet decorators, 157 BasicType type); 158 159 // Copy big chunks backward 160 void copy_bytes_backward(Register from, Register dest, 161 Register qword_count, Register tmp1, 162 Register tmp2, Label& L_copy_bytes, 163 Label& L_copy_8_bytes, DecoratorSet decorators, 164 BasicType type); 165 166 void setup_argument_regs(BasicType type); 167 168 void restore_argument_regs(BasicType type); 169 170 #if COMPILER2_OR_JVMCI 171 // Following rules apply to AVX3 optimized arraycopy stubs: 172 // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs) 173 // for both special cases (various small block sizes) and aligned copy loop. This is the 174 // default configuration. 175 // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs) 176 // for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it. 177 // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a 178 // better performance for disjoint copies. For conjoint/backward copy vector based 179 // copy performs better. 180 // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over 181 // 64 byte vector registers (ZMMs). 182 183 address generate_disjoint_copy_avx3_masked(StubGenStubId stub_id, address* entry); 184 185 address generate_conjoint_copy_avx3_masked(StubGenStubId stub_id, address* entry, 186 address nooverlap_target); 187 188 void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from, 189 Register to, Register count, int shift, 190 Register index, Register temp, 191 bool use64byteVector, Label& L_entry, Label& L_exit); 192 193 void arraycopy_avx3_special_cases_256(XMMRegister xmm, KRegister mask, Register from, 194 Register to, Register count, int shift, 195 Register index, Register temp, Label& L_exit); 196 197 void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from, 198 Register to, Register start_index, Register end_index, 199 Register count, int shift, Register temp, 200 bool use64byteVector, Label& L_entry, Label& L_exit); 201 202 void arraycopy_avx3_large(Register to, Register from, Register temp1, Register temp2, 203 Register temp3, Register temp4, Register count, 204 XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 205 XMMRegister xmm4, int shift); 206 207 void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm, 208 int shift = Address::times_1, int offset = 0); 209 210 void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm, 211 bool conjoint, int shift = Address::times_1, int offset = 0, 212 bool use64byteVector = false); 213 214 void copy256_avx3(Register dst, Register src, Register index, XMMRegister xmm1, XMMRegister xmm2, 215 XMMRegister xmm3, XMMRegister xmm4, int shift, int offset = 0); 216 217 void copy64_masked_avx(Register dst, Register src, XMMRegister xmm, 218 KRegister mask, Register length, Register index, 219 Register temp, int shift = Address::times_1, int offset = 0, 220 bool use64byteVector = false); 221 222 void copy32_masked_avx(Register dst, Register src, XMMRegister xmm, 223 KRegister mask, Register length, Register index, 224 Register temp, int shift = Address::times_1, int offset = 0); 225 #endif // COMPILER2_OR_JVMCI 226 227 address generate_disjoint_byte_copy(address* entry); 228 229 address generate_conjoint_byte_copy(address nooverlap_target, address* entry); 230 231 address generate_disjoint_short_copy(address *entry); 232 233 address generate_fill(StubGenStubId stub_id); 234 235 address generate_conjoint_short_copy(address nooverlap_target, address *entry); 236 address generate_disjoint_int_oop_copy(StubGenStubId stub_id, address* entry); 237 address generate_conjoint_int_oop_copy(StubGenStubId stub_id, address nooverlap_target, 238 address *entry); 239 address generate_disjoint_long_oop_copy(StubGenStubId stub_id, address* entry); 240 address generate_conjoint_long_oop_copy(StubGenStubId stub_id, address nooverlap_target, 241 address *entry); 242 243 // Helper for generating a dynamic type check. 244 // Smashes no registers. 245 void generate_type_check(Register sub_klass, 246 Register super_check_offset, 247 Register super_klass, 248 Label& L_success); 249 250 // Generate checkcasting array copy stub 251 address generate_checkcast_copy(StubGenStubId stub_id, address *entry); 252 253 // Generate 'unsafe' array copy stub 254 // Though just as safe as the other stubs, it takes an unscaled 255 // size_t argument instead of an element count. 256 // 257 // Examines the alignment of the operands and dispatches 258 // to a long, int, short, or byte copy loop. 259 address generate_unsafe_copy(address byte_copy_entry, address short_copy_entry, 260 address int_copy_entry, address long_copy_entry); 261 262 // Generate 'unsafe' set memory stub 263 // Though just as safe as the other stubs, it takes an unscaled 264 // size_t argument instead of an element count. 265 // 266 // Examines the alignment of the operands and dispatches 267 // to an int, short, or byte copy loop. 268 address generate_unsafe_setmemory(address byte_copy_entry); 269 270 // Perform range checks on the proposed arraycopy. 271 // Kills temp, but nothing else. 272 // Also, clean the sign bits of src_pos and dst_pos. 273 void arraycopy_range_checks(Register src, // source array oop (c_rarg0) 274 Register src_pos, // source position (c_rarg1) 275 Register dst, // destination array oo (c_rarg2) 276 Register dst_pos, // destination position (c_rarg3) 277 Register length, 278 Register temp, 279 Label& L_failed); 280 281 // Generate generic array copy stubs 282 address generate_generic_copy(address byte_copy_entry, address short_copy_entry, 283 address int_copy_entry, address oop_copy_entry, 284 address long_copy_entry, address checkcast_copy_entry); 285 286 address generate_data_cache_writeback(); 287 288 address generate_data_cache_writeback_sync(); 289 290 void generate_arraycopy_stubs(); 291 292 293 // MD5 stubs 294 295 // ofs and limit are use for multi-block byte array. 296 // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) 297 address generate_md5_implCompress(StubGenStubId stub_id); 298 299 300 // SHA stubs 301 302 // ofs and limit are use for multi-block byte array. 303 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 304 address generate_sha1_implCompress(StubGenStubId stub_id); 305 306 // ofs and limit are use for multi-block byte array. 307 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 308 address generate_sha256_implCompress(StubGenStubId stub_id); 309 address generate_sha512_implCompress(StubGenStubId stub_id); 310 311 // Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. 312 address generate_pshuffle_byte_flip_mask_sha512(); 313 314 address generate_upper_word_mask(); 315 address generate_shuffle_byte_flip_mask(); 316 address generate_pshuffle_byte_flip_mask(); 317 318 319 // AES intrinsic stubs 320 321 address generate_aescrypt_encryptBlock(); 322 323 address generate_aescrypt_decryptBlock(); 324 325 address generate_cipherBlockChaining_encryptAESCrypt(); 326 327 // A version of CBC/AES Decrypt which does 4 blocks in a loop at a time 328 // to hide instruction latency 329 address generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 330 331 address generate_electronicCodeBook_encryptAESCrypt(); 332 333 void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len); 334 335 address generate_electronicCodeBook_decryptAESCrypt(); 336 337 void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len); 338 339 // Vector AES Galois Counter Mode implementation 340 address generate_galoisCounterMode_AESCrypt(); 341 void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, 342 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); 343 344 // AVX2 AES Galois Counter Mode implementation 345 address generate_avx2_galoisCounterMode_AESCrypt(); 346 void aesgcm_avx2(Register in, Register len, Register ct, Register out, Register key, 347 Register state, Register subkeyHtbl, Register counter); 348 349 // Vector AES Counter implementation 350 address generate_counterMode_VectorAESCrypt(); 351 void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, 352 Register len_reg, Register used, Register used_addr, Register saved_encCounter_start); 353 354 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time 355 // to hide instruction latency 356 address generate_counterMode_AESCrypt_Parallel(); 357 358 address generate_cipherBlockChaining_decryptVectorAESCrypt(); 359 360 address generate_key_shuffle_mask(); 361 362 void roundDec(XMMRegister xmm_reg); 363 void roundDeclast(XMMRegister xmm_reg); 364 void roundEnc(XMMRegister key, int rnum); 365 void lastroundEnc(XMMRegister key, int rnum); 366 void roundDec(XMMRegister key, int rnum); 367 void lastroundDec(XMMRegister key, int rnum); 368 void gfmul_avx512(XMMRegister ghash, XMMRegister hkey); 369 void ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl, 370 Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR, XMMRegister GHASH, 371 XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, 372 bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor, 373 bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset); 374 void generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl); 375 void initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl, 376 Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4, 377 XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset); 378 void gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl, 379 int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction); 380 void ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor, 381 Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset, 382 int in_disp, int displacement, int hashkey_offset); 383 void aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key, 384 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); 385 // AVX2 AES-GCM related functions 386 void initial_blocks_avx2(XMMRegister ctr, Register rounds, Register key, Register len, 387 Register in, Register out, Register ct, XMMRegister aad_hashx, Register pos); 388 void gfmul_avx2(XMMRegister GH, XMMRegister HK); 389 void generateHtbl_8_block_avx2(Register htbl); 390 void ghash8_encrypt8_parallel_avx2(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, Register in, 391 Register out, Register ct, Register pos, bool out_order, Register rounds, 392 XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, 393 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm8); 394 void ghash_last_8_avx2(Register subkeyHtbl); 395 396 // Load key and shuffle operation 397 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); 398 void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch); 399 400 // Utility routine for loading a 128-bit key word in little endian format 401 // can optionally specify that the shuffle mask is already in an xmmregister 402 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); 403 void load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch); 404 405 // Utility routine for increase 128bit counter (iv in CTR mode) 406 void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block); 407 void ev_add128(XMMRegister xmmdst, XMMRegister xmmsrc1, XMMRegister xmmsrc2, 408 int vector_len, KRegister ktmp, XMMRegister ones); 409 void generate_aes_stubs(); 410 411 412 // GHASH stubs 413 414 void generate_ghash_stubs(); 415 416 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, 417 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); 418 void gfmul(XMMRegister tmp0, XMMRegister t); 419 void generateHtbl_one_block(Register htbl, Register rscratch); 420 void generateHtbl_eight_blocks(Register htbl); 421 void avx_ghash(Register state, Register htbl, Register data, Register blocks); 422 423 // Used by GHASH and AES stubs. 424 address ghash_polynomial_addr(); 425 address ghash_shufflemask_addr(); 426 address ghash_long_swap_mask_addr(); // byte swap x86 long 427 address ghash_byte_swap_mask_addr(); // byte swap x86 byte array 428 429 // Single and multi-block ghash operations 430 address generate_ghash_processBlocks(); 431 432 // Ghash single and multi block operations using AVX instructions 433 address generate_avx_ghash_processBlocks(); 434 435 // ChaCha20 stubs and helper functions 436 void generate_chacha_stubs(); 437 address generate_chacha20Block_avx(); 438 address generate_chacha20Block_avx512(); 439 void cc20_quarter_round_avx(XMMRegister aVec, XMMRegister bVec, 440 XMMRegister cVec, XMMRegister dVec, XMMRegister scratch, 441 XMMRegister lrot8, XMMRegister lrot16, int vector_len); 442 void cc20_shift_lane_org(XMMRegister bVec, XMMRegister cVec, 443 XMMRegister dVec, int vector_len, bool colToDiag); 444 void cc20_keystream_collate_avx512(XMMRegister aVec, XMMRegister bVec, 445 XMMRegister cVec, XMMRegister dVec, Register baseAddr, int baseOffset); 446 447 // Poly1305 multiblock using IFMA instructions 448 address generate_poly1305_processBlocks(); 449 void poly1305_process_blocks_avx512(const Register input, const Register length, 450 const Register A0, const Register A1, const Register A2, 451 const Register R0, const Register R1, const Register C1); 452 void poly1305_multiply_scalar(const Register a0, const Register a1, const Register a2, 453 const Register r0, const Register r1, const Register c1, bool only128, 454 const Register t0, const Register t1, const Register t2, 455 const Register mulql, const Register mulqh); 456 void poly1305_multiply8_avx512(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2, 457 const XMMRegister R0, const XMMRegister R1, const XMMRegister R2, const XMMRegister R1P, const XMMRegister R2P, 458 const XMMRegister P0L, const XMMRegister P0H, const XMMRegister P1L, const XMMRegister P1H, const XMMRegister P2L, const XMMRegister P2H, 459 const XMMRegister TMP, const Register rscratch); 460 void poly1305_limbs(const Register limbs, const Register a0, const Register a1, const Register a2, const Register t0, const Register t1); 461 void poly1305_limbs_out(const Register a0, const Register a1, const Register a2, const Register limbs, const Register t0, const Register t1); 462 void poly1305_limbs_avx512(const XMMRegister D0, const XMMRegister D1, 463 const XMMRegister L0, const XMMRegister L1, const XMMRegister L2, bool padMSG, 464 const XMMRegister TMP, const Register rscratch); 465 //Poly305 AVX2 implementation 466 void poly1305_process_blocks_avx2(const Register input, const Register length, 467 const Register a0, const Register a1, const Register a2, 468 const Register r0, const Register r1, const Register c1); 469 void poly1305_msg_mul_reduce_vec4_avx2(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2, 470 const Address R0, const Address R1, const Address R2, 471 const Address R1P, const Address R2P, 472 const XMMRegister P0L, const XMMRegister P0H, 473 const XMMRegister P1L, const XMMRegister P1H, 474 const XMMRegister P2L, const XMMRegister P2H, 475 const XMMRegister YTMP1, const XMMRegister YTMP2, 476 const XMMRegister YTMP3, const XMMRegister YTMP4, 477 const XMMRegister YTMP5, const XMMRegister YTMP6, 478 const Register input, const Register length, const Register rscratch); 479 void poly1305_mul_reduce_vec4_avx2(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2, 480 const XMMRegister R0, const XMMRegister R1, const XMMRegister R2, 481 const XMMRegister R1P, const XMMRegister R2P, 482 const XMMRegister P0L, const XMMRegister P0H, 483 const XMMRegister P1L, const XMMRegister P1H, 484 const XMMRegister P2L, const XMMRegister P2H, 485 const XMMRegister YTMP1, const Register rscratch); 486 487 address generate_intpoly_montgomeryMult_P256(); 488 address generate_intpoly_assign(); 489 490 // SHA3 stubs 491 void generate_sha3_stubs(); 492 address generate_sha3_implCompress(StubGenStubId stub_id); 493 494 // BASE64 stubs 495 496 address base64_shuffle_addr(); 497 address base64_avx2_shuffle_addr(); 498 address base64_avx2_input_mask_addr(); 499 address base64_avx2_lut_addr(); 500 address base64_encoding_table_addr(); 501 502 // Code for generating Base64 encoding. 503 // Intrinsic function prototype in Base64.java: 504 // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) 505 address generate_base64_encodeBlock(); 506 507 // base64 AVX512vbmi tables 508 address base64_vbmi_lookup_lo_addr(); 509 address base64_vbmi_lookup_hi_addr(); 510 address base64_vbmi_lookup_lo_url_addr(); 511 address base64_vbmi_lookup_hi_url_addr(); 512 address base64_vbmi_pack_vec_addr(); 513 address base64_vbmi_join_0_1_addr(); 514 address base64_vbmi_join_1_2_addr(); 515 address base64_vbmi_join_2_3_addr(); 516 address base64_decoding_table_addr(); 517 address base64_AVX2_decode_tables_addr(); 518 address base64_AVX2_decode_LUT_tables_addr(); 519 520 // Code for generating Base64 decoding. 521 // 522 // Based on the article (and associated code) from https://arxiv.org/abs/1910.05109. 523 // 524 // Intrinsic function prototype in Base64.java: 525 // private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME); 526 address generate_base64_decodeBlock(); 527 528 address generate_updateBytesCRC32(); 529 address generate_updateBytesCRC32C(bool is_pclmulqdq_supported); 530 531 address generate_updateBytesAdler32(); 532 533 address generate_multiplyToLen(); 534 535 address generate_vectorizedMismatch(); 536 537 address generate_squareToLen(); 538 539 address generate_method_entry_barrier(); 540 541 address generate_mulAdd(); 542 543 address generate_bigIntegerRightShift(); 544 address generate_bigIntegerLeftShift(); 545 546 address generate_float16ToFloat(); 547 address generate_floatToFloat16(); 548 549 // Libm trigonometric stubs 550 551 address generate_libmSin(); 552 address generate_libmCos(); 553 address generate_libmTan(); 554 address generate_libmTanh(); 555 address generate_libmExp(); 556 address generate_libmPow(); 557 address generate_libmLog(); 558 address generate_libmLog10(); 559 address generate_libmFmod(); 560 561 // Shared constants 562 static address ZERO; 563 static address NEG_ZERO; 564 static address ONE; 565 static address ONEHALF; 566 static address SIGN_MASK; 567 static address TWO_POW_55; 568 static address TWO_POW_M55; 569 static address SHIFTER; 570 static address PI32INV; 571 static address PI_INV_TABLE; 572 static address Ctable; 573 static address SC_1; 574 static address SC_2; 575 static address SC_3; 576 static address SC_4; 577 static address PI_4; 578 static address P_1; 579 static address P_3; 580 static address P_2; 581 582 void generate_libm_stubs(); 583 584 #ifdef COMPILER2 585 void generate_string_indexof(address *fnptrs); 586 #endif 587 588 address generate_cont_thaw(StubGenStubId stub_id); 589 address generate_cont_thaw(); 590 591 // TODO: will probably need multiple return barriers depending on return type 592 address generate_cont_returnBarrier(); 593 address generate_cont_returnBarrier_exception(); 594 595 address generate_cont_preempt_stub(); 596 597 // TODO -- delete this as it is not implemented? 598 // 599 // Continuation point for throwing of implicit exceptions that are 600 // not handled in the current activation. Fabricates an exception 601 // oop and initiates normal exception dispatching in this 602 // frame. Since we need to preserve callee-saved values (currently 603 // only for C2, but done for C1 as well) we need a callee-saved oop 604 // map and therefore have to make these stubs into RuntimeStubs 605 // rather than BufferBlobs. If the compiler needs all registers to 606 // be preserved between the fault point and the exception handler 607 // then it must assume responsibility for that in 608 // AbstractCompiler::continuation_for_implicit_null_exception or 609 // continuation_for_implicit_division_by_zero_exception. All other 610 // implicit exceptions (e.g., NullPointerException or 611 // AbstractMethodError on entry) are either at call sites or 612 // otherwise assume that stack unwinding will be initiated, so 613 // caller saved registers were assumed volatile in the compiler. 614 address generate_throw_exception(const char* name, 615 address runtime_entry, 616 Register arg1 = noreg, 617 Register arg2 = noreg); 618 619 // shared exception handler for FFM upcall stubs 620 address generate_upcall_stub_exception_handler(); 621 address generate_upcall_stub_load_target(); 622 623 // Specialized stub implementations for UseSecondarySupersTable. 624 void generate_lookup_secondary_supers_table_stub(); 625 626 // Slow path implementation for UseSecondarySupersTable. 627 address generate_lookup_secondary_supers_table_slow_path_stub(); 628 629 void create_control_words(); 630 631 // Initialization 632 void generate_initial_stubs(); 633 void generate_continuation_stubs(); 634 void generate_compiler_stubs(); 635 void generate_final_stubs(); 636 637 public: 638 StubGenerator(CodeBuffer* code, StubGenBlobId blob_id); 639 }; 640 641 #endif // CPU_X86_STUBGENERATOR_X86_64_HPP