1 /*
  2  * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_STUBGENERATOR_X86_64_HPP
 26 #define CPU_X86_STUBGENERATOR_X86_64_HPP
 27 
 28 #include "code/codeBlob.hpp"
 29 #include "runtime/continuation.hpp"
 30 #include "runtime/stubCodeGenerator.hpp"
 31 
 32 // Stub Code definitions
 33 
 34 class StubGenerator: public StubCodeGenerator {
 35  private:
 36 
 37   // Call stubs are used to call Java from C.
 38   address generate_call_stub(address& return_address);
 39 
 40   // Return point for a Java call if there's an exception thrown in
 41   // Java code.  The exception is caught and transformed into a
 42   // pending exception stored in JavaThread that can be tested from
 43   // within the VM.
 44   //
 45   // Note: Usually the parameters are removed by the callee. In case
 46   // of an exception crossing an activation frame boundary, that is
 47   // not the case if the callee is compiled code => need to setup the
 48   // rsp.
 49   //
 50   // rax: exception oop
 51 
 52   address generate_catch_exception();
 53 
 54   // Continuation point for runtime calls returning with a pending
 55   // exception.  The pending exception check happened in the runtime
 56   // or native call stub.  The pending exception in Thread is
 57   // converted into a Java-level exception.
 58   //
 59   // Contract with Java-level exception handlers:
 60   // rax: exception
 61   // rdx: throwing pc
 62   //
 63   // NOTE: At entry of this stub, exception-pc must be on stack !!
 64 
 65   address generate_forward_exception();
 66 
 67   // Support for intptr_t OrderAccess::fence()
 68   address generate_orderaccess_fence();
 69 
 70   // Support for intptr_t get_previous_sp()
 71   //
 72   // This routine is used to find the previous stack pointer for the
 73   // caller.
 74   address generate_get_previous_sp();
 75 
 76   //----------------------------------------------------------------------------------------------------
 77   // Support for void verify_mxcsr()
 78   //
 79   // This routine is used with -Xcheck:jni to verify that native
 80   // JNI code does not return to Java code without restoring the
 81   // MXCSR register to our expected state.
 82 
 83   address generate_verify_mxcsr();
 84 
 85   address generate_f2i_fixup();
 86   address generate_f2l_fixup();
 87   address generate_d2i_fixup();
 88   address generate_d2l_fixup();
 89 
 90   address generate_count_leading_zeros_lut(const char *stub_name);
 91   address generate_popcount_avx_lut(const char *stub_name);
 92   address generate_iota_indices(const char *stub_name);
 93   address generate_vector_reverse_bit_lut(const char *stub_name);
 94 
 95   address generate_vector_reverse_byte_perm_mask_long(const char *stub_name);
 96   address generate_vector_reverse_byte_perm_mask_int(const char *stub_name);
 97   address generate_vector_reverse_byte_perm_mask_short(const char *stub_name);
 98   address generate_vector_byte_shuffle_mask(const char *stub_name);
 99 
100   address generate_fp_mask(const char *stub_name, int64_t mask);
101 
102   address generate_vector_mask(const char *stub_name, int64_t mask);
103 
104   address generate_vector_byte_perm_mask(const char *stub_name);
105 
106   address generate_vector_fp_mask(const char *stub_name, int64_t mask);
107 
108   address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
109                                      int32_t val0, int32_t val1, int32_t val2, int32_t val3,
110                                      int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
111                                      int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
112                                      int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0);
113 
114   // Non-destructive plausibility checks for oops
115   address generate_verify_oop();
116 
117   // Verify that a register contains clean 32-bits positive value
118   // (high 32-bits are 0) so it could be used in 64-bits shifts.
119   void assert_clean_int(Register Rint, Register Rtmp);
120 
121   //  Generate overlap test for array copy stubs
122   void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf);
123 
124   void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {
125     assert(no_overlap_target != NULL, "must be generated");
126     array_overlap_test(no_overlap_target, NULL, sf);
127   }
128   void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
129     array_overlap_test(NULL, &L_no_overlap, sf);
130   }
131 
132 
133   // Shuffle first three arg regs on Windows into Linux/Solaris locations.
134   void setup_arg_regs(int nargs = 3);
135   void restore_arg_regs();
136 
137 #ifdef ASSERT
138   bool _regs_in_thread;
139 #endif
140 
141   // This is used in places where r10 is a scratch register, and can
142   // be adapted if r9 is needed also.
143   void setup_arg_regs_using_thread();
144 
145   void restore_arg_regs_using_thread();
146 
147   // Copy big chunks forward
148   void copy_bytes_forward(Register end_from, Register end_to,
149                           Register qword_count, Register to,
150                           Label& L_copy_bytes, Label& L_copy_8_bytes);
151 
152   // Copy big chunks backward
153   void copy_bytes_backward(Register from, Register dest,
154                            Register qword_count, Register to,
155                            Label& L_copy_bytes, Label& L_copy_8_bytes);
156 
157   void setup_argument_regs(BasicType type);
158 
159   void restore_argument_regs(BasicType type);
160 
161 #if COMPILER2_OR_JVMCI
162   // Following rules apply to AVX3 optimized arraycopy stubs:
163   // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
164   //   for both special cases (various small block sizes) and aligned copy loop. This is the
165   //   default configuration.
166   // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs)
167   //   for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it.
168   // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a
169   //   better performance for disjoint copies. For conjoint/backward copy vector based
170   //   copy performs better.
171   // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over
172   //   64 byte vector registers (ZMMs).
173 
174   address generate_disjoint_copy_avx3_masked(address* entry, const char *name, int shift,
175                                              bool aligned, bool is_oop, bool dest_uninitialized);
176 
177   address generate_conjoint_copy_avx3_masked(address* entry, const char *name, int shift,
178                                              address nooverlap_target, bool aligned, bool is_oop,
179                                              bool dest_uninitialized);
180 
181   void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
182                                     Register to, Register count, int shift,
183                                     Register index, Register temp,
184                                     bool use64byteVector, Label& L_entry, Label& L_exit);
185 
186   void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
187                                              Register to, Register start_index, Register end_index,
188                                              Register count, int shift, Register temp,
189                                              bool use64byteVector, Label& L_entry, Label& L_exit);
190 
191   void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
192                   int shift = Address::times_1, int offset = 0);
193 
194   void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
195                   bool conjoint, int shift = Address::times_1, int offset = 0,
196                   bool use64byteVector = false);
197 
198   void copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
199                          KRegister mask, Register length, Register index,
200                          Register temp, int shift = Address::times_1, int offset = 0,
201                          bool use64byteVector = false);
202 
203   void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
204                          KRegister mask, Register length, Register index,
205                          Register temp, int shift = Address::times_1, int offset = 0);
206 #endif // COMPILER2_OR_JVMCI
207 
208   address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name);
209 
210   address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
211                                       address* entry, const char *name);
212 
213   address generate_disjoint_short_copy(bool aligned, address *entry, const char *name);
214 
215   address generate_fill(BasicType t, bool aligned, const char *name);
216 
217   address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
218                                        address *entry, const char *name);
219   address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
220                                          const char *name, bool dest_uninitialized = false);
221   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
222                                          address *entry, const char *name,
223                                          bool dest_uninitialized = false);
224   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
225                                           const char *name, bool dest_uninitialized = false);
226   address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
227                                           address nooverlap_target, address *entry,
228                                           const char *name, bool dest_uninitialized = false);
229 
230   // Helper for generating a dynamic type check.
231   // Smashes no registers.
232   void generate_type_check(Register sub_klass,
233                            Register super_check_offset,
234                            Register super_klass,
235                            Label& L_success);
236 
237   // Generate checkcasting array copy stub
238   address generate_checkcast_copy(const char *name, address *entry,
239                                   bool dest_uninitialized = false);
240 
241   // Generate 'unsafe' array copy stub
242   // Though just as safe as the other stubs, it takes an unscaled
243   // size_t argument instead of an element count.
244   //
245   // Examines the alignment of the operands and dispatches
246   // to a long, int, short, or byte copy loop.
247   address generate_unsafe_copy(const char *name,
248                                address byte_copy_entry, address short_copy_entry,
249                                address int_copy_entry, address long_copy_entry);
250 
251   // Perform range checks on the proposed arraycopy.
252   // Kills temp, but nothing else.
253   // Also, clean the sign bits of src_pos and dst_pos.
254   void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
255                               Register src_pos, // source position (c_rarg1)
256                               Register dst,     // destination array oo (c_rarg2)
257                               Register dst_pos, // destination position (c_rarg3)
258                               Register length,
259                               Register temp,
260                               Label& L_failed);
261 
262   // Generate generic array copy stubs
263   address generate_generic_copy(const char *name,
264                                 address byte_copy_entry, address short_copy_entry,
265                                 address int_copy_entry, address oop_copy_entry,
266                                 address long_copy_entry, address checkcast_copy_entry);
267 
268   address generate_data_cache_writeback();
269 
270   address generate_data_cache_writeback_sync();
271 
272   void generate_arraycopy_stubs();
273 
274 
275   // MD5 stubs
276 
277   // ofs and limit are use for multi-block byte array.
278   // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs)
279   address generate_md5_implCompress(bool multi_block, const char *name);
280 
281 
282   // SHA stubs
283 
284   // ofs and limit are use for multi-block byte array.
285   // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
286   address generate_sha1_implCompress(bool multi_block, const char *name);
287 
288   // ofs and limit are use for multi-block byte array.
289   // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
290   address generate_sha256_implCompress(bool multi_block, const char *name);
291   address generate_sha512_implCompress(bool multi_block, const char *name);
292 
293   // Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
294   address generate_pshuffle_byte_flip_mask_sha512();
295 
296   address generate_upper_word_mask();
297   address generate_shuffle_byte_flip_mask();
298   address generate_pshuffle_byte_flip_mask();
299 
300 
301   // AES intrinsic stubs
302 
303   address generate_aescrypt_encryptBlock();
304 
305   address generate_aescrypt_decryptBlock();
306 
307   address generate_cipherBlockChaining_encryptAESCrypt();
308 
309   // A version of CBC/AES Decrypt which does 4 blocks in a loop at a time
310   // to hide instruction latency
311   address generate_cipherBlockChaining_decryptAESCrypt_Parallel();
312 
313   address generate_electronicCodeBook_encryptAESCrypt();
314 
315   void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
316 
317   address generate_electronicCodeBook_decryptAESCrypt();
318 
319   void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
320 
321   // Vector AES Galois Counter Mode implementation
322   address generate_galoisCounterMode_AESCrypt();
323   void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
324                       Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
325 
326 
327  // Vector AES Counter implementation
328   address generate_counterMode_VectorAESCrypt();
329   void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
330                       Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
331 
332   // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
333   // to hide instruction latency
334   address generate_counterMode_AESCrypt_Parallel();
335 
336   address generate_cipherBlockChaining_decryptVectorAESCrypt();
337 
338   address generate_key_shuffle_mask();
339 
340   void roundDec(XMMRegister xmm_reg);
341   void roundDeclast(XMMRegister xmm_reg);
342   void roundEnc(XMMRegister key, int rnum);
343   void lastroundEnc(XMMRegister key, int rnum);
344   void roundDec(XMMRegister key, int rnum);
345   void lastroundDec(XMMRegister key, int rnum);
346   void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
347   void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch);
348   void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
349                                   XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
350                                   XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
351                                   bool final_reduction, int index, XMMRegister counter_inc_mask);
352   // Load key and shuffle operation
353   void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
354   void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
355 
356   // Utility routine for loading a 128-bit key word in little endian format
357   // can optionally specify that the shuffle mask is already in an xmmregister
358   void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
359   void load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
360 
361   // Utility routine for increase 128bit counter (iv in CTR mode)
362   void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block);
363 
364   void generate_aes_stubs();
365 
366 
367   // GHASH stubs
368 
369   void generate_ghash_stubs();
370 
371   void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
372                      XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
373   void gfmul(XMMRegister tmp0, XMMRegister t);
374   void generateHtbl_one_block(Register htbl, Register rscratch);
375   void generateHtbl_eight_blocks(Register htbl);
376   void avx_ghash(Register state, Register htbl, Register data, Register blocks);
377 
378   // Used by GHASH and AES stubs.
379   address ghash_polynomial_addr();
380   address ghash_shufflemask_addr();
381   address ghash_long_swap_mask_addr(); // byte swap x86 long
382   address ghash_byte_swap_mask_addr(); // byte swap x86 byte array
383 
384   // Single and multi-block ghash operations
385   address generate_ghash_processBlocks();
386 
387   // Ghash single and multi block operations using AVX instructions
388   address generate_avx_ghash_processBlocks();
389 
390   // ChaCha20 stubs and helper functions
391   void generate_chacha_stubs();
392   address generate_chacha20Block_avx();
393   address generate_chacha20Block_avx512();
394   void cc20_quarter_round_avx(XMMRegister aVec, XMMRegister bVec,
395     XMMRegister cVec, XMMRegister dVec, XMMRegister scratch,
396     XMMRegister lrot8, XMMRegister lrot16, int vector_len);
397   void cc20_shift_lane_org(XMMRegister bVec, XMMRegister cVec,
398     XMMRegister dVec, int vector_len, bool colToDiag);
399   void cc20_keystream_collate_avx512(XMMRegister aVec, XMMRegister bVec,
400     XMMRegister cVec, XMMRegister dVec, Register baseAddr, int baseOffset);
401 
402   // Poly1305 multiblock using IFMA instructions
403   address generate_poly1305_processBlocks();
404   void poly1305_process_blocks_avx512(const Register input, const Register length,
405                                       const Register A0, const Register A1, const Register A2,
406                                       const Register R0, const Register R1, const Register C1);
407   void poly1305_multiply_scalar(const Register a0, const Register a1, const Register a2,
408                                 const Register r0, const Register r1, const Register c1, bool only128,
409                                 const Register t0, const Register t1, const Register t2,
410                                 const Register mulql, const Register mulqh);
411   void poly1305_multiply8_avx512(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2,
412                                  const XMMRegister R0, const XMMRegister R1, const XMMRegister R2, const XMMRegister R1P, const XMMRegister R2P,
413                                  const XMMRegister P0L, const XMMRegister P0H, const XMMRegister P1L, const XMMRegister P1H, const XMMRegister P2L, const XMMRegister P2H,
414                                  const XMMRegister TMP, const Register rscratch);
415   void poly1305_limbs(const Register limbs, const Register a0, const Register a1, const Register a2, const Register t0, const Register t1);
416   void poly1305_limbs_out(const Register a0, const Register a1, const Register a2, const Register limbs, const Register t0, const Register t1);
417   void poly1305_limbs_avx512(const XMMRegister D0, const XMMRegister D1,
418                              const XMMRegister L0, const XMMRegister L1, const XMMRegister L2, bool padMSG,
419                              const XMMRegister TMP, const Register rscratch);
420 
421   // BASE64 stubs
422 
423   address base64_shuffle_addr();
424   address base64_avx2_shuffle_addr();
425   address base64_avx2_input_mask_addr();
426   address base64_avx2_lut_addr();
427   address base64_encoding_table_addr();
428 
429   // Code for generating Base64 encoding.
430   // Intrinsic function prototype in Base64.java:
431   // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL)
432   address generate_base64_encodeBlock();
433 
434   // base64 AVX512vbmi tables
435   address base64_vbmi_lookup_lo_addr();
436   address base64_vbmi_lookup_hi_addr();
437   address base64_vbmi_lookup_lo_url_addr();
438   address base64_vbmi_lookup_hi_url_addr();
439   address base64_vbmi_pack_vec_addr();
440   address base64_vbmi_join_0_1_addr();
441   address base64_vbmi_join_1_2_addr();
442   address base64_vbmi_join_2_3_addr();
443   address base64_decoding_table_addr();
444 
445   // Code for generating Base64 decoding.
446   //
447   // Based on the article (and associated code) from https://arxiv.org/abs/1910.05109.
448   //
449   // Intrinsic function prototype in Base64.java:
450   // private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME);
451   address generate_base64_decodeBlock();
452 
453   address generate_updateBytesCRC32();
454   address generate_updateBytesCRC32C(bool is_pclmulqdq_supported);
455 
456   address generate_updateBytesAdler32();
457 
458   address generate_multiplyToLen();
459 
460   address generate_vectorizedMismatch();
461 
462   address generate_squareToLen();
463 
464   address generate_method_entry_barrier();
465 
466   address generate_mulAdd();
467 
468   address generate_bigIntegerRightShift();
469   address generate_bigIntegerLeftShift();
470 
471 
472   // Libm trigonometric stubs
473 
474   address generate_libmSin();
475   address generate_libmCos();
476   address generate_libmTan();
477   address generate_libmExp();
478   address generate_libmPow();
479   address generate_libmLog();
480   address generate_libmLog10();
481 
482   // Shared constants
483   static address ZERO;
484   static address NEG_ZERO;
485   static address ONE;
486   static address ONEHALF;
487   static address SIGN_MASK;
488   static address TWO_POW_55;
489   static address TWO_POW_M55;
490   static address SHIFTER;
491   static address PI32INV;
492   static address PI_INV_TABLE;
493   static address Ctable;
494   static address SC_1;
495   static address SC_2;
496   static address SC_3;
497   static address SC_4;
498   static address PI_4;
499   static address P_1;
500   static address P_3;
501   static address P_2;
502 
503   void generate_libm_stubs();
504 
505 
506   address generate_cont_thaw(const char* label, Continuation::thaw_kind kind);
507   address generate_cont_thaw();
508 
509   // TODO: will probably need multiple return barriers depending on return type
510   address generate_cont_returnBarrier();
511   address generate_cont_returnBarrier_exception();
512 
513 #if INCLUDE_JFR
514 
515   // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
516   // It returns a jobject handle to the event writer.
517   // The handle is dereferenced and the return value is the event writer oop.
518   RuntimeStub* generate_jfr_write_checkpoint();
519 
520 #endif // INCLUDE_JFR
521 
522   // Continuation point for throwing of implicit exceptions that are
523   // not handled in the current activation. Fabricates an exception
524   // oop and initiates normal exception dispatching in this
525   // frame. Since we need to preserve callee-saved values (currently
526   // only for C2, but done for C1 as well) we need a callee-saved oop
527   // map and therefore have to make these stubs into RuntimeStubs
528   // rather than BufferBlobs.  If the compiler needs all registers to
529   // be preserved between the fault point and the exception handler
530   // then it must assume responsibility for that in
531   // AbstractCompiler::continuation_for_implicit_null_exception or
532   // continuation_for_implicit_division_by_zero_exception. All other
533   // implicit exceptions (e.g., NullPointerException or
534   // AbstractMethodError on entry) are either at call sites or
535   // otherwise assume that stack unwinding will be initiated, so
536   // caller saved registers were assumed volatile in the compiler.
537   address generate_throw_exception(const char* name,
538                                    address runtime_entry,
539                                    Register arg1 = noreg,
540                                    Register arg2 = noreg);
541 
542   // interpreter or compiled code marshalling registers to/from inline type instance
543   address generate_return_value_stub(address destination, const char* name, bool has_res);
544 
545   void create_control_words();
546 
547   // Initialization
548   void generate_initial();
549   void generate_phase1();
550   void generate_all();
551 
552  public:
553   StubGenerator(CodeBuffer* code, int phase) : StubCodeGenerator(code) {
554     DEBUG_ONLY( _regs_in_thread = false; )
555     if (phase == 0) {
556       generate_initial();
557     } else if (phase == 1) {
558       generate_phase1(); // stubs that must be available for the interpreter
559     } else {
560       generate_all();
561     }
562   }
563 };
564 
565 #endif // CPU_X86_STUBGENERATOR_X86_64_HPP