1 /*
2 * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_X86_STUBGENERATOR_X86_64_HPP
26 #define CPU_X86_STUBGENERATOR_X86_64_HPP
27
28 #include "code/codeBlob.hpp"
29 #include "runtime/continuation.hpp"
30 #include "runtime/stubCodeGenerator.hpp"
31 #include "runtime/stubRoutines.hpp"
32
33 // Stub Code definitions
34
35 class StubGenerator: public StubCodeGenerator {
36 private:
37
38 // Call stubs are used to call Java from C.
39 address generate_call_stub(address& return_address);
40
41 // Return point for a Java call if there's an exception thrown in
42 // Java code. The exception is caught and transformed into a
43 // pending exception stored in JavaThread that can be tested from
44 // within the VM.
45 //
46 // Note: Usually the parameters are removed by the callee. In case
47 // of an exception crossing an activation frame boundary, that is
48 // not the case if the callee is compiled code => need to setup the
49 // rsp.
50 //
51 // rax: exception oop
52
53 address generate_catch_exception();
54
55 // Continuation point for runtime calls returning with a pending
56 // exception. The pending exception check happened in the runtime
57 // or native call stub. The pending exception in Thread is
58 // converted into a Java-level exception.
59 //
60 // Contract with Java-level exception handlers:
61 // rax: exception
62 // rdx: throwing pc
63 //
64 // NOTE: At entry of this stub, exception-pc must be on stack !!
65
66 address generate_forward_exception();
67
68 // Support for intptr_t OrderAccess::fence()
69 address generate_orderaccess_fence();
70
71 //----------------------------------------------------------------------------------------------------
72 // Support for void verify_mxcsr()
73 //
74 // This routine is used with -Xcheck:jni to verify that native
75 // JNI code does not return to Java code without restoring the
76 // MXCSR register to our expected state.
77
78 address generate_verify_mxcsr();
79
80 address generate_f2i_fixup();
81 address generate_f2l_fixup();
82 address generate_d2i_fixup();
83 address generate_d2l_fixup();
84
85 address generate_count_leading_zeros_lut();
86 address generate_popcount_avx_lut();
87 address generate_iota_indices();
88 address generate_vector_reverse_bit_lut();
89
90 address generate_vector_reverse_byte_perm_mask_long();
91 address generate_vector_reverse_byte_perm_mask_int();
92 address generate_vector_reverse_byte_perm_mask_short();
93 address generate_vector_byte_shuffle_mask();
94
95 address generate_fp_mask(StubId stub_id, int64_t mask);
96
97 address generate_compress_perm_table(StubId stub_id);
98
99 address generate_expand_perm_table(StubId stub_id);
100
101 address generate_vector_mask(StubId stub_id, int64_t mask);
102
103 address generate_vector_byte_perm_mask();
104
105 address generate_vector_fp_mask(StubId stub_id, int64_t mask);
106
107 address generate_vector_custom_i32(StubId stub_id, Assembler::AvxVectorLen len,
108 int32_t val0, int32_t val1, int32_t val2, int32_t val3,
109 int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
110 int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
111 int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0);
112
113 // Non-destructive plausibility checks for oops
114 address generate_verify_oop();
115
116 // Verify that a register contains clean 32-bits positive value
117 // (high 32-bits are 0) so it could be used in 64-bits shifts.
118 void assert_clean_int(Register Rint, Register Rtmp);
119
120 // Generate overlap test for array copy stubs
121 void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf);
122
123 void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {
124 assert(no_overlap_target != nullptr, "must be generated");
125 array_overlap_test(no_overlap_target, nullptr, sf);
126 }
127 void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
128 array_overlap_test(nullptr, &L_no_overlap, sf);
129 }
130
131
132 // Shuffle first three arg regs on Windows into Linux/Solaris locations.
133 void setup_arg_regs(int nargs = 3);
134 void restore_arg_regs();
135
136 #ifdef ASSERT
137 bool _regs_in_thread;
138 #endif
139
140 // This is used in places where r10 is a scratch register, and can
141 // be adapted if r9 is needed also.
142 void setup_arg_regs_using_thread(int nargs = 3);
143
144 void restore_arg_regs_using_thread();
145
146 // Copy big chunks forward
147 void copy_bytes_forward(Register end_from, Register end_to,
148 Register qword_count, Register tmp1,
149 Register tmp2, Label& L_copy_bytes,
150 Label& L_copy_8_bytes, DecoratorSet decorators,
151 BasicType type);
152
153 // Copy big chunks backward
154 void copy_bytes_backward(Register from, Register dest,
155 Register qword_count, Register tmp1,
156 Register tmp2, Label& L_copy_bytes,
157 Label& L_copy_8_bytes, DecoratorSet decorators,
158 BasicType type);
159
160 void setup_argument_regs(BasicType type);
161
162 void restore_argument_regs(BasicType type);
163
164 #if COMPILER2_OR_JVMCI
165 // Following rules apply to AVX3 optimized arraycopy stubs:
166 // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
167 // for both special cases (various small block sizes) and aligned copy loop. This is the
168 // default configuration.
169 // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs)
170 // for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it.
171 // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a
172 // better performance for disjoint copies. For conjoint/backward copy vector based
173 // copy performs better.
174 // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over
175 // 64 byte vector registers (ZMMs).
176
177 address generate_disjoint_copy_avx3_masked(StubId stub_id, address* entry);
178
179 address generate_conjoint_copy_avx3_masked(StubId stub_id, address* entry,
180 address nooverlap_target);
181
182 void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
183 Register to, Register count, int shift,
184 Register index, Register temp,
185 bool use64byteVector, Label& L_entry, Label& L_exit);
186
187 void arraycopy_avx3_special_cases_256(XMMRegister xmm, KRegister mask, Register from,
188 Register to, Register count, int shift,
189 Register index, Register temp, Label& L_exit);
190
191 void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
192 Register to, Register start_index, Register end_index,
193 Register count, int shift, Register temp,
194 bool use64byteVector, Label& L_entry, Label& L_exit);
195
196 void arraycopy_avx3_large(Register to, Register from, Register temp1, Register temp2,
197 Register temp3, Register temp4, Register count,
198 XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
199 XMMRegister xmm4, int shift);
200
201 void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
202 int shift = Address::times_1, int offset = 0);
203
204 void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
205 bool conjoint, int shift = Address::times_1, int offset = 0,
206 bool use64byteVector = false);
207
208 void copy256_avx3(Register dst, Register src, Register index, XMMRegister xmm1, XMMRegister xmm2,
209 XMMRegister xmm3, XMMRegister xmm4, int shift, int offset = 0);
210
211 void copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
212 KRegister mask, Register length, Register index,
213 Register temp, int shift = Address::times_1, int offset = 0,
214 bool use64byteVector = false);
215
216 void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
217 KRegister mask, Register length, Register index,
218 Register temp, int shift = Address::times_1, int offset = 0);
219 #endif // COMPILER2_OR_JVMCI
220
221 address generate_disjoint_byte_copy(address* entry);
222
223 address generate_conjoint_byte_copy(address nooverlap_target, address* entry);
224
225 address generate_disjoint_short_copy(address *entry);
226
227 address generate_fill(StubId stub_id);
228
229 address generate_conjoint_short_copy(address nooverlap_target, address *entry);
230 address generate_disjoint_int_oop_copy(StubId stub_id, address* entry);
231 address generate_conjoint_int_oop_copy(StubId stub_id, address nooverlap_target,
232 address *entry);
233 address generate_disjoint_long_oop_copy(StubId stub_id, address* entry);
234 address generate_conjoint_long_oop_copy(StubId stub_id, address nooverlap_target,
235 address *entry);
236
237 // Helper for generating a dynamic type check.
238 // Smashes no registers.
239 void generate_type_check(Register sub_klass,
240 Register super_check_offset,
241 Register super_klass,
242 Label& L_success);
243
244 // Generate checkcasting array copy stub
245 address generate_checkcast_copy(StubId stub_id, address *entry);
246
247 // Generate 'unsafe' array copy stub
248 // Though just as safe as the other stubs, it takes an unscaled
249 // size_t argument instead of an element count.
250 //
251 // Examines the alignment of the operands and dispatches
252 // to a long, int, short, or byte copy loop.
253 address generate_unsafe_copy(address byte_copy_entry, address short_copy_entry,
254 address int_copy_entry, address long_copy_entry);
255
256 // Generate 'unsafe' set memory stub
257 // Though just as safe as the other stubs, it takes an unscaled
258 // size_t argument instead of an element count.
259 //
260 // Examines the alignment of the operands and dispatches
261 // to an int, short, or byte copy loop.
262 address generate_unsafe_setmemory(address byte_copy_entry);
263
264 // Perform range checks on the proposed arraycopy.
265 // Kills temp, but nothing else.
266 // Also, clean the sign bits of src_pos and dst_pos.
267 void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
268 Register src_pos, // source position (c_rarg1)
269 Register dst, // destination array oo (c_rarg2)
270 Register dst_pos, // destination position (c_rarg3)
271 Register length,
272 Register temp,
273 Label& L_failed);
274
275 // Generate generic array copy stubs
276 address generate_generic_copy(address byte_copy_entry, address short_copy_entry,
277 address int_copy_entry, address oop_copy_entry,
278 address long_copy_entry, address checkcast_copy_entry);
279
280 address generate_data_cache_writeback();
281
282 address generate_data_cache_writeback_sync();
283
284 void generate_arraycopy_stubs();
285
286
287 // MD5 stubs
288
289 // ofs and limit are use for multi-block byte array.
290 // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs)
291 address generate_md5_implCompress(StubId stub_id);
292
293
294 // SHA stubs
295
296 // ofs and limit are use for multi-block byte array.
297 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
298 address generate_sha1_implCompress(StubId stub_id);
299
300 // ofs and limit are use for multi-block byte array.
301 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
302 address generate_sha256_implCompress(StubId stub_id);
303 address generate_sha512_implCompress(StubId stub_id);
304
305 // Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
306 address generate_pshuffle_byte_flip_mask_sha512();
307
308 address generate_upper_word_mask();
309 address generate_shuffle_byte_flip_mask();
310 address generate_pshuffle_byte_flip_mask();
311
312
313 // AES intrinsic stubs
314
315 address generate_aescrypt_encryptBlock();
316
317 address generate_aescrypt_decryptBlock();
318
319 address generate_cipherBlockChaining_encryptAESCrypt();
320
321 // A version of CBC/AES Decrypt which does 4 blocks in a loop at a time
322 // to hide instruction latency
323 address generate_cipherBlockChaining_decryptAESCrypt_Parallel();
324
325 address generate_electronicCodeBook_encryptAESCrypt();
326
327 void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
328
329 address generate_electronicCodeBook_decryptAESCrypt();
330
331 void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
332
333 // Vector AES Galois Counter Mode implementation
334 address generate_galoisCounterMode_AESCrypt();
335 void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
336 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
337
338 // AVX2 AES Galois Counter Mode implementation
339 address generate_avx2_galoisCounterMode_AESCrypt();
340 void aesgcm_avx2(Register in, Register len, Register ct, Register out, Register key,
341 Register state, Register subkeyHtbl, Register counter);
342
343 // Vector AES Counter implementation
344 address generate_counterMode_VectorAESCrypt();
345 void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
346 Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
347
348 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
349 // to hide instruction latency
350 address generate_counterMode_AESCrypt_Parallel();
351
352 address generate_cipherBlockChaining_decryptVectorAESCrypt();
353
354 address generate_key_shuffle_mask();
355
356 void roundDec(XMMRegister xmm_reg);
357 void roundDeclast(XMMRegister xmm_reg);
358 void roundEnc(XMMRegister key, int rnum);
359 void lastroundEnc(XMMRegister key, int rnum);
360 void roundDec(XMMRegister key, int rnum);
361 void lastroundDec(XMMRegister key, int rnum);
362 void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
363 void ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl,
364 Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR, XMMRegister GHASH,
365 XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK,
366 bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor,
367 bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset);
368 void generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl);
369 void initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl,
370 Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4,
371 XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset);
372 void gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl,
373 int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction);
374 void ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor,
375 Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset,
376 int in_disp, int displacement, int hashkey_offset);
377 void aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key,
378 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
379 // AVX2 AES-GCM related functions
380 void initial_blocks_avx2(XMMRegister ctr, Register rounds, Register key, Register len,
381 Register in, Register out, Register ct, XMMRegister aad_hashx, Register pos);
382 void gfmul_avx2(XMMRegister GH, XMMRegister HK);
383 void generateHtbl_8_block_avx2(Register htbl);
384 void ghash8_encrypt8_parallel_avx2(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, Register in,
385 Register out, Register ct, Register pos, bool out_order, Register rounds,
386 XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
387 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm8);
388 void ghash_last_8_avx2(Register subkeyHtbl);
389
390 void check_key_offset(Register key, int offset, int load_size);
391
392 // Load key and shuffle operation
393 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
394 void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
395
396 // Utility routine for loading a 128-bit key word in little endian format
397 // can optionally specify that the shuffle mask is already in an xmmregister
398 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
399 void load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
400
401 // Utility routine for increase 128bit counter (iv in CTR mode)
402 void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block);
403 void ev_add128(XMMRegister xmmdst, XMMRegister xmmsrc1, XMMRegister xmmsrc2,
404 int vector_len, KRegister ktmp, XMMRegister ones);
405 void generate_aes_stubs();
406
407
408 // GHASH stubs
409
410 void generate_ghash_stubs();
411
412 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
413 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
414 void gfmul(XMMRegister tmp0, XMMRegister t);
415 void generateHtbl_one_block(Register htbl, Register rscratch);
416 void generateHtbl_eight_blocks(Register htbl);
417 void avx_ghash(Register state, Register htbl, Register data, Register blocks);
418
419 // Used by GHASH and AES stubs.
420 address ghash_polynomial_addr();
421 address ghash_shufflemask_addr();
422 address ghash_long_swap_mask_addr(); // byte swap x86 long
423 address ghash_byte_swap_mask_addr(); // byte swap x86 byte array
424
425 // Single and multi-block ghash operations
426 address generate_ghash_processBlocks();
427
428 // Ghash single and multi block operations using AVX instructions
429 address generate_avx_ghash_processBlocks();
430
431 // ChaCha20 stubs and helper functions
432 void generate_chacha_stubs();
433 address generate_chacha20Block_avx();
434 address generate_chacha20Block_avx512();
435 void cc20_quarter_round_avx(XMMRegister aVec, XMMRegister bVec,
436 XMMRegister cVec, XMMRegister dVec, XMMRegister scratch,
437 XMMRegister lrot8, XMMRegister lrot16, int vector_len);
438 void cc20_shift_lane_org(XMMRegister bVec, XMMRegister cVec,
439 XMMRegister dVec, int vector_len, bool colToDiag);
440 void cc20_keystream_collate_avx512(XMMRegister aVec, XMMRegister bVec,
441 XMMRegister cVec, XMMRegister dVec, Register baseAddr, int baseOffset);
442
443 // Poly1305 multiblock using IFMA instructions
444 address generate_poly1305_processBlocks();
445 void poly1305_process_blocks_avx512(const Register input, const Register length,
446 const Register A0, const Register A1, const Register A2,
447 const Register R0, const Register R1, const Register C1);
448 void poly1305_multiply_scalar(const Register a0, const Register a1, const Register a2,
449 const Register r0, const Register r1, const Register c1, bool only128,
450 const Register t0, const Register t1, const Register t2,
451 const Register mulql, const Register mulqh);
452 void poly1305_multiply8_avx512(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2,
453 const XMMRegister R0, const XMMRegister R1, const XMMRegister R2, const XMMRegister R1P, const XMMRegister R2P,
454 const XMMRegister P0L, const XMMRegister P0H, const XMMRegister P1L, const XMMRegister P1H, const XMMRegister P2L, const XMMRegister P2H,
455 const XMMRegister TMP, const Register rscratch);
456 void poly1305_limbs(const Register limbs, const Register a0, const Register a1, const Register a2, const Register t0, const Register t1);
457 void poly1305_limbs_out(const Register a0, const Register a1, const Register a2, const Register limbs, const Register t0, const Register t1);
458 void poly1305_limbs_avx512(const XMMRegister D0, const XMMRegister D1,
459 const XMMRegister L0, const XMMRegister L1, const XMMRegister L2, bool padMSG,
460 const XMMRegister TMP, const Register rscratch);
461 //Poly305 AVX2 implementation
462 void poly1305_process_blocks_avx2(const Register input, const Register length,
463 const Register a0, const Register a1, const Register a2,
464 const Register r0, const Register r1, const Register c1);
465 void poly1305_msg_mul_reduce_vec4_avx2(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2,
466 const Address R0, const Address R1, const Address R2,
467 const Address R1P, const Address R2P,
468 const XMMRegister P0L, const XMMRegister P0H,
469 const XMMRegister P1L, const XMMRegister P1H,
470 const XMMRegister P2L, const XMMRegister P2H,
471 const XMMRegister YTMP1, const XMMRegister YTMP2,
472 const XMMRegister YTMP3, const XMMRegister YTMP4,
473 const XMMRegister YTMP5, const XMMRegister YTMP6,
474 const Register input, const Register length, const Register rscratch);
475 void poly1305_mul_reduce_vec4_avx2(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2,
476 const XMMRegister R0, const XMMRegister R1, const XMMRegister R2,
477 const XMMRegister R1P, const XMMRegister R2P,
478 const XMMRegister P0L, const XMMRegister P0H,
479 const XMMRegister P1L, const XMMRegister P1H,
480 const XMMRegister P2L, const XMMRegister P2H,
481 const XMMRegister YTMP1, const Register rscratch);
482
483 address generate_intpoly_montgomeryMult_P256();
484 address generate_intpoly_assign();
485
486 // SHA3 stubs
487 void generate_sha3_stubs();
488
489 // Kyber stubs
490 void generate_kyber_stubs();
491
492 // Dilithium stubs
493 void generate_dilithium_stubs();
494
495 // BASE64 stubs
496 address base64_shuffle_addr();
497 address base64_avx2_shuffle_addr();
498 address base64_avx2_input_mask_addr();
499 address base64_avx2_lut_addr();
500 address base64_encoding_table_addr();
501
502 // Code for generating Base64 encoding.
503 // Intrinsic function prototype in Base64.java:
504 // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL)
505 address generate_base64_encodeBlock();
506
507 // base64 AVX512vbmi tables
508 address base64_vbmi_lookup_lo_addr();
509 address base64_vbmi_lookup_hi_addr();
510 address base64_vbmi_lookup_lo_url_addr();
511 address base64_vbmi_lookup_hi_url_addr();
512 address base64_vbmi_pack_vec_addr();
513 address base64_vbmi_join_0_1_addr();
514 address base64_vbmi_join_1_2_addr();
515 address base64_vbmi_join_2_3_addr();
516 address base64_decoding_table_addr();
517 address base64_AVX2_decode_tables_addr();
518 address base64_AVX2_decode_LUT_tables_addr();
519
520 // Code for generating Base64 decoding.
521 //
522 // Based on the article (and associated code) from https://arxiv.org/abs/1910.05109.
523 //
524 // Intrinsic function prototype in Base64.java:
525 // private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME);
526 address generate_base64_decodeBlock();
527
528 address generate_updateBytesCRC32();
529 address generate_updateBytesCRC32C(bool is_pclmulqdq_supported);
530
531 address generate_updateBytesAdler32();
532
533 address generate_multiplyToLen();
534
535 address generate_vectorizedMismatch();
536
537 address generate_squareToLen();
538
539 address generate_method_entry_barrier();
540
541 address generate_mulAdd();
542
543 address generate_bigIntegerRightShift();
544 address generate_bigIntegerLeftShift();
545
546 address generate_float16ToFloat();
547 address generate_floatToFloat16();
548
549 // Libm trigonometric stubs
550
551 address generate_libmSin();
552 address generate_libmCos();
553 address generate_libmTan();
554 address generate_libmSinh();
555 address generate_libmTanh();
556 address generate_libmCbrt();
557 address generate_libmExp();
558 address generate_libmPow();
559 address generate_libmLog();
560 address generate_libmLog10();
561 address generate_libmFmod();
562
563 // Shared constants
564 static address ZERO;
565 static address NEG_ZERO;
566 static address ONE;
567 static address ONEHALF;
568 static address SIGN_MASK;
569 static address TWO_POW_55;
570 static address TWO_POW_M55;
571 static address SHIFTER;
572 static address PI32INV;
573 static address PI_INV_TABLE;
574 static address Ctable;
575 static address SC_1;
576 static address SC_2;
577 static address SC_3;
578 static address SC_4;
579 static address PI_4;
580 static address P_1;
581 static address P_3;
582 static address P_2;
583
584 void generate_libm_stubs();
585
586 #ifdef COMPILER2
587 void generate_string_indexof(address *fnptrs);
588 #endif
589
590 address generate_cont_thaw(StubId stub_id);
591 address generate_cont_thaw();
592
593 // TODO: will probably need multiple return barriers depending on return type
594 address generate_cont_returnBarrier();
595 address generate_cont_returnBarrier_exception();
596
597 address generate_cont_preempt_stub();
598
599 // TODO -- delete this as it is not implemented?
600 //
601 // Continuation point for throwing of implicit exceptions that are
602 // not handled in the current activation. Fabricates an exception
603 // oop and initiates normal exception dispatching in this
604 // frame. Since we need to preserve callee-saved values (currently
605 // only for C2, but done for C1 as well) we need a callee-saved oop
606 // map and therefore have to make these stubs into RuntimeStubs
607 // rather than BufferBlobs. If the compiler needs all registers to
608 // be preserved between the fault point and the exception handler
609 // then it must assume responsibility for that in
610 // AbstractCompiler::continuation_for_implicit_null_exception or
611 // continuation_for_implicit_division_by_zero_exception. All other
612 // implicit exceptions (e.g., NullPointerException or
613 // AbstractMethodError on entry) are either at call sites or
614 // otherwise assume that stack unwinding will be initiated, so
615 // caller saved registers were assumed volatile in the compiler.
616 address generate_throw_exception(const char* name,
617 address runtime_entry,
618 Register arg1 = noreg,
619 Register arg2 = noreg);
620
621 // shared exception handler for FFM upcall stubs
622 address generate_upcall_stub_exception_handler();
623 address generate_upcall_stub_load_target();
624
625 // interpreter or compiled code marshalling registers to/from inline type instance
626 address generate_return_value_stub(address destination, const char* name, bool has_res);
627
628 // Specialized stub implementations for UseSecondarySupersTable.
629 void generate_lookup_secondary_supers_table_stub();
630
631 // Slow path implementation for UseSecondarySupersTable.
632 address generate_lookup_secondary_supers_table_slow_path_stub();
633
634 void create_control_words();
635
636 // Initialization
637 void generate_preuniverse_stubs();
638 void generate_initial_stubs();
639 void generate_continuation_stubs();
640 void generate_compiler_stubs();
641 void generate_final_stubs();
642
643 public:
644 StubGenerator(CodeBuffer* code, BlobId blob_id);
645 };
646
647 #endif // CPU_X86_STUBGENERATOR_X86_64_HPP