1 /* 2 * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_STUBGENERATOR_X86_64_HPP 26 #define CPU_X86_STUBGENERATOR_X86_64_HPP 27 28 #include "code/codeBlob.hpp" 29 #include "runtime/continuation.hpp" 30 #include "runtime/stubCodeGenerator.hpp" 31 32 // Stub Code definitions 33 34 class StubGenerator: public StubCodeGenerator { 35 private: 36 37 // Call stubs are used to call Java from C. 38 address generate_call_stub(address& return_address); 39 40 // Return point for a Java call if there's an exception thrown in 41 // Java code. The exception is caught and transformed into a 42 // pending exception stored in JavaThread that can be tested from 43 // within the VM. 44 // 45 // Note: Usually the parameters are removed by the callee. In case 46 // of an exception crossing an activation frame boundary, that is 47 // not the case if the callee is compiled code => need to setup the 48 // rsp. 49 // 50 // rax: exception oop 51 52 address generate_catch_exception(); 53 54 // Continuation point for runtime calls returning with a pending 55 // exception. The pending exception check happened in the runtime 56 // or native call stub. The pending exception in Thread is 57 // converted into a Java-level exception. 58 // 59 // Contract with Java-level exception handlers: 60 // rax: exception 61 // rdx: throwing pc 62 // 63 // NOTE: At entry of this stub, exception-pc must be on stack !! 64 65 address generate_forward_exception(); 66 67 // Support for intptr_t OrderAccess::fence() 68 address generate_orderaccess_fence(); 69 70 // Support for intptr_t get_previous_sp() 71 // 72 // This routine is used to find the previous stack pointer for the 73 // caller. 74 address generate_get_previous_sp(); 75 76 //---------------------------------------------------------------------------------------------------- 77 // Support for void verify_mxcsr() 78 // 79 // This routine is used with -Xcheck:jni to verify that native 80 // JNI code does not return to Java code without restoring the 81 // MXCSR register to our expected state. 82 83 address generate_verify_mxcsr(); 84 85 address generate_f2i_fixup(); 86 address generate_f2l_fixup(); 87 address generate_d2i_fixup(); 88 address generate_d2l_fixup(); 89 90 address generate_count_leading_zeros_lut(const char *stub_name); 91 address generate_popcount_avx_lut(const char *stub_name); 92 address generate_iota_indices(const char *stub_name); 93 address generate_vector_reverse_bit_lut(const char *stub_name); 94 95 address generate_vector_reverse_byte_perm_mask_long(const char *stub_name); 96 address generate_vector_reverse_byte_perm_mask_int(const char *stub_name); 97 address generate_vector_reverse_byte_perm_mask_short(const char *stub_name); 98 address generate_vector_byte_shuffle_mask(const char *stub_name); 99 100 address generate_fp_mask(const char *stub_name, int64_t mask); 101 102 address generate_compress_perm_table(const char *stub_name, int32_t esize); 103 104 address generate_expand_perm_table(const char *stub_name, int32_t esize); 105 106 address generate_vector_mask(const char *stub_name, int64_t mask); 107 108 address generate_vector_byte_perm_mask(const char *stub_name); 109 110 address generate_vector_fp_mask(const char *stub_name, int64_t mask); 111 112 address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len, 113 int32_t val0, int32_t val1, int32_t val2, int32_t val3, 114 int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0, 115 int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0, 116 int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0); 117 118 // Non-destructive plausibility checks for oops 119 address generate_verify_oop(); 120 121 // Verify that a register contains clean 32-bits positive value 122 // (high 32-bits are 0) so it could be used in 64-bits shifts. 123 void assert_clean_int(Register Rint, Register Rtmp); 124 125 // Generate overlap test for array copy stubs 126 void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf); 127 128 void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) { 129 assert(no_overlap_target != nullptr, "must be generated"); 130 array_overlap_test(no_overlap_target, nullptr, sf); 131 } 132 void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) { 133 array_overlap_test(nullptr, &L_no_overlap, sf); 134 } 135 136 137 // Shuffle first three arg regs on Windows into Linux/Solaris locations. 138 void setup_arg_regs(int nargs = 3); 139 void restore_arg_regs(); 140 141 #ifdef ASSERT 142 bool _regs_in_thread; 143 #endif 144 145 // This is used in places where r10 is a scratch register, and can 146 // be adapted if r9 is needed also. 147 void setup_arg_regs_using_thread(int nargs = 3); 148 149 void restore_arg_regs_using_thread(); 150 151 // Copy big chunks forward 152 void copy_bytes_forward(Register end_from, Register end_to, 153 Register qword_count, Register tmp1, 154 Register tmp2, Label& L_copy_bytes, 155 Label& L_copy_8_bytes, DecoratorSet decorators, 156 BasicType type); 157 158 // Copy big chunks backward 159 void copy_bytes_backward(Register from, Register dest, 160 Register qword_count, Register tmp1, 161 Register tmp2, Label& L_copy_bytes, 162 Label& L_copy_8_bytes, DecoratorSet decorators, 163 BasicType type); 164 165 void setup_argument_regs(BasicType type); 166 167 void restore_argument_regs(BasicType type); 168 169 #if COMPILER2_OR_JVMCI 170 // Following rules apply to AVX3 optimized arraycopy stubs: 171 // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs) 172 // for both special cases (various small block sizes) and aligned copy loop. This is the 173 // default configuration. 174 // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs) 175 // for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it. 176 // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a 177 // better performance for disjoint copies. For conjoint/backward copy vector based 178 // copy performs better. 179 // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over 180 // 64 byte vector registers (ZMMs). 181 182 address generate_disjoint_copy_avx3_masked(address* entry, const char *name, int shift, 183 bool aligned, bool is_oop, bool dest_uninitialized); 184 185 address generate_conjoint_copy_avx3_masked(address* entry, const char *name, int shift, 186 address nooverlap_target, bool aligned, bool is_oop, 187 bool dest_uninitialized); 188 189 void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from, 190 Register to, Register count, int shift, 191 Register index, Register temp, 192 bool use64byteVector, Label& L_entry, Label& L_exit); 193 194 void arraycopy_avx3_special_cases_256(XMMRegister xmm, KRegister mask, Register from, 195 Register to, Register count, int shift, 196 Register index, Register temp, Label& L_exit); 197 198 void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from, 199 Register to, Register start_index, Register end_index, 200 Register count, int shift, Register temp, 201 bool use64byteVector, Label& L_entry, Label& L_exit); 202 203 void arraycopy_avx3_large(Register to, Register from, Register temp1, Register temp2, 204 Register temp3, Register temp4, Register count, 205 XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, 206 XMMRegister xmm4, int shift); 207 208 void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm, 209 int shift = Address::times_1, int offset = 0); 210 211 void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm, 212 bool conjoint, int shift = Address::times_1, int offset = 0, 213 bool use64byteVector = false); 214 215 void copy256_avx3(Register dst, Register src, Register index, XMMRegister xmm1, XMMRegister xmm2, 216 XMMRegister xmm3, XMMRegister xmm4, int shift, int offset = 0); 217 218 void copy64_masked_avx(Register dst, Register src, XMMRegister xmm, 219 KRegister mask, Register length, Register index, 220 Register temp, int shift = Address::times_1, int offset = 0, 221 bool use64byteVector = false); 222 223 void copy32_masked_avx(Register dst, Register src, XMMRegister xmm, 224 KRegister mask, Register length, Register index, 225 Register temp, int shift = Address::times_1, int offset = 0); 226 #endif // COMPILER2_OR_JVMCI 227 228 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name); 229 230 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, 231 address* entry, const char *name); 232 233 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name); 234 235 address generate_fill(BasicType t, bool aligned, const char *name); 236 237 address generate_conjoint_short_copy(bool aligned, address nooverlap_target, 238 address *entry, const char *name); 239 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, 240 const char *name, bool dest_uninitialized = false); 241 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target, 242 address *entry, const char *name, 243 bool dest_uninitialized = false); 244 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, 245 const char *name, bool dest_uninitialized = false); 246 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, 247 address nooverlap_target, address *entry, 248 const char *name, bool dest_uninitialized = false); 249 250 // Helper for generating a dynamic type check. 251 // Smashes no registers. 252 void generate_type_check(Register sub_klass, 253 Register super_check_offset, 254 Register super_klass, 255 Label& L_success); 256 257 // Generate checkcasting array copy stub 258 address generate_checkcast_copy(const char *name, address *entry, 259 bool dest_uninitialized = false); 260 261 // Generate 'unsafe' array copy stub 262 // Though just as safe as the other stubs, it takes an unscaled 263 // size_t argument instead of an element count. 264 // 265 // Examines the alignment of the operands and dispatches 266 // to a long, int, short, or byte copy loop. 267 address generate_unsafe_copy(const char *name, 268 address byte_copy_entry, address short_copy_entry, 269 address int_copy_entry, address long_copy_entry); 270 271 // Generate 'unsafe' set memory stub 272 // Though just as safe as the other stubs, it takes an unscaled 273 // size_t argument instead of an element count. 274 // 275 // Examines the alignment of the operands and dispatches 276 // to an int, short, or byte copy loop. 277 address generate_unsafe_setmemory(const char *name, address byte_copy_entry); 278 279 // Perform range checks on the proposed arraycopy. 280 // Kills temp, but nothing else. 281 // Also, clean the sign bits of src_pos and dst_pos. 282 void arraycopy_range_checks(Register src, // source array oop (c_rarg0) 283 Register src_pos, // source position (c_rarg1) 284 Register dst, // destination array oo (c_rarg2) 285 Register dst_pos, // destination position (c_rarg3) 286 Register length, 287 Register temp, 288 Label& L_failed); 289 290 // Generate generic array copy stubs 291 address generate_generic_copy(const char *name, 292 address byte_copy_entry, address short_copy_entry, 293 address int_copy_entry, address oop_copy_entry, 294 address long_copy_entry, address checkcast_copy_entry); 295 296 address generate_data_cache_writeback(); 297 298 address generate_data_cache_writeback_sync(); 299 300 void generate_arraycopy_stubs(); 301 302 303 // MD5 stubs 304 305 // ofs and limit are use for multi-block byte array. 306 // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) 307 address generate_md5_implCompress(bool multi_block, const char *name); 308 309 310 // SHA stubs 311 312 // ofs and limit are use for multi-block byte array. 313 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 314 address generate_sha1_implCompress(bool multi_block, const char *name); 315 316 // ofs and limit are use for multi-block byte array. 317 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 318 address generate_sha256_implCompress(bool multi_block, const char *name); 319 address generate_sha512_implCompress(bool multi_block, const char *name); 320 321 // Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. 322 address generate_pshuffle_byte_flip_mask_sha512(); 323 324 address generate_upper_word_mask(); 325 address generate_shuffle_byte_flip_mask(); 326 address generate_pshuffle_byte_flip_mask(); 327 328 329 // AES intrinsic stubs 330 331 address generate_aescrypt_encryptBlock(); 332 333 address generate_aescrypt_decryptBlock(); 334 335 address generate_cipherBlockChaining_encryptAESCrypt(); 336 337 // A version of CBC/AES Decrypt which does 4 blocks in a loop at a time 338 // to hide instruction latency 339 address generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 340 341 address generate_electronicCodeBook_encryptAESCrypt(); 342 343 void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len); 344 345 address generate_electronicCodeBook_decryptAESCrypt(); 346 347 void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len); 348 349 // Vector AES Galois Counter Mode implementation 350 address generate_galoisCounterMode_AESCrypt(); 351 void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, 352 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); 353 354 // AVX2 AES Galois Counter Mode implementation 355 address generate_avx2_galoisCounterMode_AESCrypt(); 356 void aesgcm_avx2(Register in, Register len, Register ct, Register out, Register key, 357 Register state, Register subkeyHtbl, Register counter); 358 359 // Vector AES Counter implementation 360 address generate_counterMode_VectorAESCrypt(); 361 void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, 362 Register len_reg, Register used, Register used_addr, Register saved_encCounter_start); 363 364 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time 365 // to hide instruction latency 366 address generate_counterMode_AESCrypt_Parallel(); 367 368 address generate_cipherBlockChaining_decryptVectorAESCrypt(); 369 370 address generate_key_shuffle_mask(); 371 372 void roundDec(XMMRegister xmm_reg); 373 void roundDeclast(XMMRegister xmm_reg); 374 void roundEnc(XMMRegister key, int rnum); 375 void lastroundEnc(XMMRegister key, int rnum); 376 void roundDec(XMMRegister key, int rnum); 377 void lastroundDec(XMMRegister key, int rnum); 378 void gfmul_avx512(XMMRegister ghash, XMMRegister hkey); 379 void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch); 380 void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, 381 XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction, 382 XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos, 383 bool final_reduction, int index, XMMRegister counter_inc_mask); 384 // AVX2 AES-GCM related functions 385 void initial_blocks_avx2(XMMRegister ctr, Register rounds, Register key, Register len, 386 Register in, Register out, Register ct, XMMRegister aad_hashx, Register pos); 387 void gfmul_avx2(XMMRegister GH, XMMRegister HK); 388 void generateHtbl_8_block_avx2(Register htbl); 389 void ghash8_encrypt8_parallel_avx2(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, Register in, 390 Register out, Register ct, Register pos, bool out_order, Register rounds, 391 XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, 392 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm8); 393 void ghash_last_8_avx2(Register subkeyHtbl); 394 395 // Load key and shuffle operation 396 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); 397 void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch); 398 399 // Utility routine for loading a 128-bit key word in little endian format 400 // can optionally specify that the shuffle mask is already in an xmmregister 401 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); 402 void load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch); 403 404 // Utility routine for increase 128bit counter (iv in CTR mode) 405 void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block); 406 void ev_add128(XMMRegister xmmdst, XMMRegister xmmsrc1, XMMRegister xmmsrc2, 407 int vector_len, KRegister ktmp, XMMRegister ones); 408 void generate_aes_stubs(); 409 410 411 // GHASH stubs 412 413 void generate_ghash_stubs(); 414 415 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, 416 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); 417 void gfmul(XMMRegister tmp0, XMMRegister t); 418 void generateHtbl_one_block(Register htbl, Register rscratch); 419 void generateHtbl_eight_blocks(Register htbl); 420 void avx_ghash(Register state, Register htbl, Register data, Register blocks); 421 422 // Used by GHASH and AES stubs. 423 address ghash_polynomial_addr(); 424 address ghash_shufflemask_addr(); 425 address ghash_long_swap_mask_addr(); // byte swap x86 long 426 address ghash_byte_swap_mask_addr(); // byte swap x86 byte array 427 428 // Single and multi-block ghash operations 429 address generate_ghash_processBlocks(); 430 431 // Ghash single and multi block operations using AVX instructions 432 address generate_avx_ghash_processBlocks(); 433 434 // ChaCha20 stubs and helper functions 435 void generate_chacha_stubs(); 436 address generate_chacha20Block_avx(); 437 address generate_chacha20Block_avx512(); 438 void cc20_quarter_round_avx(XMMRegister aVec, XMMRegister bVec, 439 XMMRegister cVec, XMMRegister dVec, XMMRegister scratch, 440 XMMRegister lrot8, XMMRegister lrot16, int vector_len); 441 void cc20_shift_lane_org(XMMRegister bVec, XMMRegister cVec, 442 XMMRegister dVec, int vector_len, bool colToDiag); 443 void cc20_keystream_collate_avx512(XMMRegister aVec, XMMRegister bVec, 444 XMMRegister cVec, XMMRegister dVec, Register baseAddr, int baseOffset); 445 446 // Poly1305 multiblock using IFMA instructions 447 address generate_poly1305_processBlocks(); 448 void poly1305_process_blocks_avx512(const Register input, const Register length, 449 const Register A0, const Register A1, const Register A2, 450 const Register R0, const Register R1, const Register C1); 451 void poly1305_multiply_scalar(const Register a0, const Register a1, const Register a2, 452 const Register r0, const Register r1, const Register c1, bool only128, 453 const Register t0, const Register t1, const Register t2, 454 const Register mulql, const Register mulqh); 455 void poly1305_multiply8_avx512(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2, 456 const XMMRegister R0, const XMMRegister R1, const XMMRegister R2, const XMMRegister R1P, const XMMRegister R2P, 457 const XMMRegister P0L, const XMMRegister P0H, const XMMRegister P1L, const XMMRegister P1H, const XMMRegister P2L, const XMMRegister P2H, 458 const XMMRegister TMP, const Register rscratch); 459 void poly1305_limbs(const Register limbs, const Register a0, const Register a1, const Register a2, const Register t0, const Register t1); 460 void poly1305_limbs_out(const Register a0, const Register a1, const Register a2, const Register limbs, const Register t0, const Register t1); 461 void poly1305_limbs_avx512(const XMMRegister D0, const XMMRegister D1, 462 const XMMRegister L0, const XMMRegister L1, const XMMRegister L2, bool padMSG, 463 const XMMRegister TMP, const Register rscratch); 464 //Poly305 AVX2 implementation 465 void poly1305_process_blocks_avx2(const Register input, const Register length, 466 const Register a0, const Register a1, const Register a2, 467 const Register r0, const Register r1, const Register c1); 468 void poly1305_msg_mul_reduce_vec4_avx2(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2, 469 const Address R0, const Address R1, const Address R2, 470 const Address R1P, const Address R2P, 471 const XMMRegister P0L, const XMMRegister P0H, 472 const XMMRegister P1L, const XMMRegister P1H, 473 const XMMRegister P2L, const XMMRegister P2H, 474 const XMMRegister YTMP1, const XMMRegister YTMP2, 475 const XMMRegister YTMP3, const XMMRegister YTMP4, 476 const XMMRegister YTMP5, const XMMRegister YTMP6, 477 const Register input, const Register length, const Register rscratch); 478 void poly1305_mul_reduce_vec4_avx2(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2, 479 const XMMRegister R0, const XMMRegister R1, const XMMRegister R2, 480 const XMMRegister R1P, const XMMRegister R2P, 481 const XMMRegister P0L, const XMMRegister P0H, 482 const XMMRegister P1L, const XMMRegister P1H, 483 const XMMRegister P2L, const XMMRegister P2H, 484 const XMMRegister YTMP1, const Register rscratch); 485 486 address generate_intpoly_montgomeryMult_P256(); 487 address generate_intpoly_assign(); 488 489 // BASE64 stubs 490 491 address base64_shuffle_addr(); 492 address base64_avx2_shuffle_addr(); 493 address base64_avx2_input_mask_addr(); 494 address base64_avx2_lut_addr(); 495 address base64_encoding_table_addr(); 496 497 // Code for generating Base64 encoding. 498 // Intrinsic function prototype in Base64.java: 499 // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) 500 address generate_base64_encodeBlock(); 501 502 // base64 AVX512vbmi tables 503 address base64_vbmi_lookup_lo_addr(); 504 address base64_vbmi_lookup_hi_addr(); 505 address base64_vbmi_lookup_lo_url_addr(); 506 address base64_vbmi_lookup_hi_url_addr(); 507 address base64_vbmi_pack_vec_addr(); 508 address base64_vbmi_join_0_1_addr(); 509 address base64_vbmi_join_1_2_addr(); 510 address base64_vbmi_join_2_3_addr(); 511 address base64_decoding_table_addr(); 512 address base64_AVX2_decode_tables_addr(); 513 address base64_AVX2_decode_LUT_tables_addr(); 514 515 // Code for generating Base64 decoding. 516 // 517 // Based on the article (and associated code) from https://arxiv.org/abs/1910.05109. 518 // 519 // Intrinsic function prototype in Base64.java: 520 // private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME); 521 address generate_base64_decodeBlock(); 522 523 address generate_updateBytesCRC32(); 524 address generate_updateBytesCRC32C(bool is_pclmulqdq_supported); 525 526 address generate_updateBytesAdler32(); 527 528 address generate_multiplyToLen(); 529 530 address generate_vectorizedMismatch(); 531 532 address generate_squareToLen(); 533 534 address generate_method_entry_barrier(); 535 536 address generate_mulAdd(); 537 538 address generate_bigIntegerRightShift(); 539 address generate_bigIntegerLeftShift(); 540 541 address generate_float16ToFloat(); 542 address generate_floatToFloat16(); 543 544 // Libm trigonometric stubs 545 546 address generate_libmSin(); 547 address generate_libmCos(); 548 address generate_libmTan(); 549 address generate_libmExp(); 550 address generate_libmPow(); 551 address generate_libmLog(); 552 address generate_libmLog10(); 553 address generate_libmFmod(); 554 555 // Shared constants 556 static address ZERO; 557 static address NEG_ZERO; 558 static address ONE; 559 static address ONEHALF; 560 static address SIGN_MASK; 561 static address TWO_POW_55; 562 static address TWO_POW_M55; 563 static address SHIFTER; 564 static address PI32INV; 565 static address PI_INV_TABLE; 566 static address Ctable; 567 static address SC_1; 568 static address SC_2; 569 static address SC_3; 570 static address SC_4; 571 static address PI_4; 572 static address P_1; 573 static address P_3; 574 static address P_2; 575 576 void generate_libm_stubs(); 577 578 #ifdef COMPILER2 579 void generate_string_indexof(address *fnptrs); 580 #endif 581 582 address generate_cont_thaw(const char* label, Continuation::thaw_kind kind); 583 address generate_cont_thaw(); 584 585 // TODO: will probably need multiple return barriers depending on return type 586 address generate_cont_returnBarrier(); 587 address generate_cont_returnBarrier_exception(); 588 589 #if INCLUDE_JFR 590 void generate_jfr_stubs(); 591 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint. 592 // It returns a jobject handle to the event writer. 593 // The handle is dereferenced and the return value is the event writer oop. 594 RuntimeStub* generate_jfr_write_checkpoint(); 595 // For c2: call to runtime to return a buffer lease. 596 RuntimeStub* generate_jfr_return_lease(); 597 #endif // INCLUDE_JFR 598 599 // Continuation point for throwing of implicit exceptions that are 600 // not handled in the current activation. Fabricates an exception 601 // oop and initiates normal exception dispatching in this 602 // frame. Since we need to preserve callee-saved values (currently 603 // only for C2, but done for C1 as well) we need a callee-saved oop 604 // map and therefore have to make these stubs into RuntimeStubs 605 // rather than BufferBlobs. If the compiler needs all registers to 606 // be preserved between the fault point and the exception handler 607 // then it must assume responsibility for that in 608 // AbstractCompiler::continuation_for_implicit_null_exception or 609 // continuation_for_implicit_division_by_zero_exception. All other 610 // implicit exceptions (e.g., NullPointerException or 611 // AbstractMethodError on entry) are either at call sites or 612 // otherwise assume that stack unwinding will be initiated, so 613 // caller saved registers were assumed volatile in the compiler. 614 address generate_throw_exception(const char* name, 615 address runtime_entry, 616 Register arg1 = noreg, 617 Register arg2 = noreg); 618 619 // shared exception handler for FFM upcall stubs 620 address generate_upcall_stub_exception_handler(); 621 622 // interpreter or compiled code marshalling registers to/from inline type instance 623 address generate_return_value_stub(address destination, const char* name, bool has_res); 624 625 // Specialized stub implementations for UseSecondarySupersTable. 626 address generate_lookup_secondary_supers_table_stub(u1 super_klass_index); 627 628 // Slow path implementation for UseSecondarySupersTable. 629 address generate_lookup_secondary_supers_table_slow_path_stub(); 630 631 void create_control_words(); 632 633 // Initialization 634 void generate_initial_stubs(); 635 void generate_continuation_stubs(); 636 void generate_compiler_stubs(); 637 void generate_final_stubs(); 638 639 public: 640 StubGenerator(CodeBuffer* code, StubsKind kind); 641 }; 642 643 #endif // CPU_X86_STUBGENERATOR_X86_64_HPP