1 /*
  2  * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2012, 2023 SAP SE. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #ifndef CPU_PPC_MACROASSEMBLER_PPC_HPP
 27 #define CPU_PPC_MACROASSEMBLER_PPC_HPP
 28 
 29 #include "asm/assembler.hpp"
 30 #include "oops/accessDecorators.hpp"
 31 #include "runtime/rtmLocking.hpp"
 32 #include "utilities/macros.hpp"
 33 
 34 // MacroAssembler extends Assembler by a few frequently used macros.
 35 
 36 class ciTypeArray;
 37 class OopMap;
 38 
 39 class MacroAssembler: public Assembler {
 40  public:
 41   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
 42 
 43   // Indicates whether and, if so, which registers must be preserved when calling runtime code.
 44   enum PreservationLevel {
 45     PRESERVATION_NONE,
 46     PRESERVATION_FRAME_LR,
 47     PRESERVATION_FRAME_LR_GP_REGS,
 48     PRESERVATION_FRAME_LR_GP_FP_REGS
 49   };
 50 
 51   //
 52   // Optimized instruction emitters
 53   //
 54 
 55   inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; }
 56   inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); }
 57 
 58   // load d = *[a+si31]
 59   // Emits several instructions if the offset is not encodable in one instruction.
 60   void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop);
 61   void ld_largeoffset          (Register d, int si31, Register a, int emit_filler_nop);
 62   inline static bool is_ld_largeoffset(address a);
 63   inline static int get_ld_largeoffset_offset(address a);
 64 
 65   inline void round_to(Register r, int modulus);
 66 
 67   // Load/store with type given by parameter.
 68   void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed);
 69   void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes);
 70 
 71   // Move register if destination register and target register are different
 72   inline void mr_if_needed(Register rd, Register rs);
 73   inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
 74   // This is dedicated for emitting scheduled mach nodes. For better
 75   // readability of the ad file I put it here.
 76   // Endgroups are not needed if
 77   //  - the scheduler is off
 78   //  - the scheduler found that there is a natural group end, in that
 79   //    case it reduced the size of the instruction used in the test
 80   //    yielding 'needed'.
 81   inline void endgroup_if_needed(bool needed);
 82 
 83   // Memory barriers.
 84   inline void membar(int bits);
 85   inline void release();
 86   inline void acquire();
 87   inline void fence();
 88 
 89   // nop padding
 90   void align(int modulus, int max = 252, int rem = 0);
 91 
 92   // Align prefix opcode to make sure it's not on the last word of a
 93   // 64-byte block.
 94   //
 95   // Note: do not call align_prefix() in a .ad file (e.g. ppc.ad).  Instead
 96   // add ins_alignment(2) to the instruct definition and implement the
 97   // compute_padding() method of the instruct node to use
 98   // compute_prefix_padding().  See loadConI32Node::compute_padding() in
 99   // ppc.ad for an example.
100   void align_prefix();
101 
102   //
103   // Constants, loading constants, TOC support
104   //
105 
106   // Address of the global TOC.
107   inline static address global_toc();
108   // Offset of given address to the global TOC.
109   inline static int offset_to_global_toc(const address addr);
110 
111   // Address of TOC of the current method.
112   inline address method_toc();
113   // Offset of given address to TOC of the current method.
114   inline int offset_to_method_toc(const address addr);
115 
116   // Global TOC.
117   void calculate_address_from_global_toc(Register dst, address addr,
118                                          bool hi16 = true, bool lo16 = true,
119                                          bool add_relocation = true, bool emit_dummy_addr = false);
120   inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) {
121     calculate_address_from_global_toc(dst, addr, true, false);
122   };
123   inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) {
124     calculate_address_from_global_toc(dst, addr, false, true);
125   };
126 
127   inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
128   // Returns address of first instruction in sequence.
129   static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
130   static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
131 
132 #ifdef _LP64
133   // Patch narrow oop constant.
134   inline static bool is_set_narrow_oop(address a, address bound);
135   // Returns address of first instruction in sequence.
136   static address patch_set_narrow_oop(address a, address bound, narrowOop data);
137   static narrowOop get_narrow_oop(address a, address bound);
138 #endif
139 
140   inline static bool is_load_const_at(address a);
141 
142   // Emits an oop const to the constant pool, loads the constant, and
143   // sets a relocation info with address current_pc.
144   // Returns true if successful.
145   bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false);
146 
147   static bool is_load_const_from_method_toc_at(address a);
148   static int get_offset_of_load_const_from_method_toc_at(address a);
149 
150   // Get the 64 bit constant from a `load_const' sequence.
151   static long get_const(address load_const);
152 
153   // Patch the 64 bit constant of a `load_const' sequence. This is a
154   // low level procedure. It neither flushes the instruction cache nor
155   // is it atomic.
156   static void patch_const(address load_const, long x);
157 
158   // Metadata in code that we have to keep track of.
159   AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
160   AddressLiteral constant_metadata_address(Metadata* obj); // find_index
161   // Oops used directly in compiled code are stored in the constant pool,
162   // and loaded from there.
163   // Allocate new entry for oop in constant pool. Generate relocation.
164   AddressLiteral allocate_oop_address(jobject obj);
165   // Find oop obj in constant pool. Return relocation with it's index.
166   AddressLiteral constant_oop_address(jobject obj);
167 
168   // Find oop in constant pool and emit instructions to load it.
169   // Uses constant_oop_address.
170   inline void set_oop_constant(jobject obj, Register d);
171   // Same as load_address.
172   inline void set_oop         (AddressLiteral obj_addr, Register d);
173 
174   //
175   // branch, jump
176   //
177   // set dst to -1, 0, +1 as follows: if CCR0bi is "greater than", dst is set to 1,
178   // if CCR0bi is "equal", dst is set to 0, otherwise it's set to -1.
179   void inline set_cmp3(Register dst);
180   // set dst to (treat_unordered_like_less ? -1 : +1)
181   void inline set_cmpu3(Register dst, bool treat_unordered_like_less);
182 
183   inline void pd_patch_instruction(address branch, address target, const char* file, int line);
184   NOT_PRODUCT(static void pd_print_patched_instruction(address branch);)
185 
186   // Conditional far branch for destinations encodable in 24+2 bits.
187   // Same interface as bc, e.g. no inverse boint-field.
188   enum {
189     bc_far_optimize_not         = 0,
190     bc_far_optimize_on_relocate = 1
191   };
192   // optimize: flag for telling the conditional far branch to optimize
193   //           itself when relocated.
194   void bc_far(int boint, int biint, Label& dest, int optimize);
195   void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions
196   // Relocation of conditional far branches.
197   static bool    is_bc_far_at(address instruction_addr);
198   static address get_dest_of_bc_far_at(address instruction_addr);
199   static void    set_dest_of_bc_far_at(address instruction_addr, address dest);
200  private:
201   static bool inline is_bc_far_variant1_at(address instruction_addr);
202   static bool inline is_bc_far_variant2_at(address instruction_addr);
203   static bool inline is_bc_far_variant3_at(address instruction_addr);
204  public:
205 
206   // Convenience bc_far versions.
207   inline void blt_far(ConditionRegister crx, Label& L, int optimize);
208   inline void bgt_far(ConditionRegister crx, Label& L, int optimize);
209   inline void beq_far(ConditionRegister crx, Label& L, int optimize);
210   inline void bso_far(ConditionRegister crx, Label& L, int optimize);
211   inline void bge_far(ConditionRegister crx, Label& L, int optimize);
212   inline void ble_far(ConditionRegister crx, Label& L, int optimize);
213   inline void bne_far(ConditionRegister crx, Label& L, int optimize);
214   inline void bns_far(ConditionRegister crx, Label& L, int optimize);
215 
216   // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump.
217  private:
218   enum {
219     bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/),
220     bxx64_patchable_size              = bxx64_patchable_instruction_count * BytesPerInstWord,
221     bxx64_patchable_ret_addr_offset   = bxx64_patchable_size
222   };
223   void bxx64_patchable(address target, relocInfo::relocType rt, bool link);
224   static bool is_bxx64_patchable_at(            address instruction_addr, bool link);
225   // Does the instruction use a pc-relative encoding of the destination?
226   static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link);
227   static bool is_bxx64_patchable_variant1_at(   address instruction_addr, bool link);
228   // Load destination relative to global toc.
229   static bool is_bxx64_patchable_variant1b_at(  address instruction_addr, bool link);
230   static bool is_bxx64_patchable_variant2_at(   address instruction_addr, bool link);
231   static void set_dest_of_bxx64_patchable_at(   address instruction_addr, address target, bool link);
232   static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link);
233 
234  public:
235   // call
236   enum {
237     bl64_patchable_instruction_count = bxx64_patchable_instruction_count,
238     bl64_patchable_size              = bxx64_patchable_size,
239     bl64_patchable_ret_addr_offset   = bxx64_patchable_ret_addr_offset
240   };
241   inline void bl64_patchable(address target, relocInfo::relocType rt) {
242     bxx64_patchable(target, rt, /*link=*/true);
243   }
244   inline static bool is_bl64_patchable_at(address instruction_addr) {
245     return is_bxx64_patchable_at(instruction_addr, /*link=*/true);
246   }
247   inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) {
248     return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true);
249   }
250   inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) {
251     set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true);
252   }
253   inline static address get_dest_of_bl64_patchable_at(address instruction_addr) {
254     return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true);
255   }
256   // jump
257   enum {
258     b64_patchable_instruction_count = bxx64_patchable_instruction_count,
259     b64_patchable_size              = bxx64_patchable_size,
260   };
261   inline void b64_patchable(address target, relocInfo::relocType rt) {
262     bxx64_patchable(target, rt, /*link=*/false);
263   }
264   inline static bool is_b64_patchable_at(address instruction_addr) {
265     return is_bxx64_patchable_at(instruction_addr, /*link=*/false);
266   }
267   inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) {
268     return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false);
269   }
270   inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) {
271     set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false);
272   }
273   inline static address get_dest_of_b64_patchable_at(address instruction_addr) {
274     return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false);
275   }
276 
277   //
278   // Support for frame handling
279   //
280 
281   // some ABI-related functions
282 
283   // Clobbers all volatile, (non-floating-point) general-purpose registers for debugging purposes.
284   // This is especially useful for making calls to the JRT in places in which this hasn't been done before;
285   // e.g. with the introduction of LRBs (load reference barriers) for concurrent garbage collection.
286   void clobber_volatile_gprs(Register excluded_register = noreg);
287   void clobber_carg_stack_slots(Register tmp);
288 
289   void save_nonvolatile_gprs(   Register dst_base, int offset);
290   void restore_nonvolatile_gprs(Register src_base, int offset);
291 
292   enum {
293     num_volatile_gp_regs = 11,
294     num_volatile_fp_regs = 14,
295     num_volatile_regs = num_volatile_gp_regs + num_volatile_fp_regs
296   };
297 
298   void save_volatile_gprs(   Register dst_base, int offset,
299                              bool include_fp_regs = true, bool include_R3_RET_reg = true);
300   void restore_volatile_gprs(Register src_base, int offset,
301                              bool include_fp_regs = true, bool include_R3_RET_reg = true);
302   void save_LR_CR(   Register tmp);     // tmp contains LR on return.
303   void restore_LR_CR(Register tmp);
304 
305   // Get current PC using bl-next-instruction trick.
306   address get_PC_trash_LR(Register result);
307 
308   // Resize current frame either relatively wrt to current SP or absolute.
309   void resize_frame(Register offset, Register tmp);
310   void resize_frame(int      offset, Register tmp);
311   void resize_frame_absolute(Register addr, Register tmp1, Register tmp2);
312 
313   // Push a frame of size bytes.
314   void push_frame(Register bytes, Register tmp);
315 
316   // Push a frame of size `bytes'. No abi space provided.
317   void push_frame(unsigned int bytes, Register tmp);
318 
319   // Push a frame of size `bytes' plus native_abi_reg_args on top.
320   void push_frame_reg_args(unsigned int bytes, Register tmp);
321 
322   // Setup up a new C frame with a spill area for non-volatile GPRs and additional
323   // space for local variables
324   void push_frame_reg_args_nonvolatiles(unsigned int bytes, Register tmp);
325 
326   // pop current C frame
327   void pop_frame();
328 
329   //
330   // Calls
331   //
332 
333  private:
334   address _last_calls_return_pc;
335 
336 #if defined(ABI_ELFv2)
337   // Generic version of a call to C function.
338   // Updates and returns _last_calls_return_pc.
339   address branch_to(Register function_entry, bool and_link);
340 #else
341   // Generic version of a call to C function via a function descriptor
342   // with variable support for C calling conventions (TOC, ENV, etc.).
343   // updates and returns _last_calls_return_pc.
344   address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call,
345                     bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee);
346 #endif
347 
348  public:
349 
350   // Get the pc where the last call will return to. returns _last_calls_return_pc.
351   inline address last_calls_return_pc();
352 
353 #if defined(ABI_ELFv2)
354   // Call a C function via a function descriptor and use full C
355   // calling conventions. Updates and returns _last_calls_return_pc.
356   address call_c(Register function_entry);
357   // For tail calls: only branch, don't link, so callee returns to caller of this function.
358   address call_c_and_return_to_caller(Register function_entry);
359   address call_c(address function_entry, relocInfo::relocType rt);
360 #else
361   // Call a C function via a function descriptor and use full C
362   // calling conventions. Updates and returns _last_calls_return_pc.
363   address call_c(Register function_descriptor);
364   // For tail calls: only branch, don't link, so callee returns to caller of this function.
365   address call_c_and_return_to_caller(Register function_descriptor);
366   address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt);
367   address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt,
368                            Register toc);
369 #endif
370 
371  protected:
372 
373   // It is imperative that all calls into the VM are handled via the
374   // call_VM macros. They make sure that the stack linkage is setup
375   // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points
376   // while call_VM_leaf's correspond to LEAF entry points.
377   //
378   // This is the base routine called by the different versions of
379   // call_VM. The interpreter may customize this version by overriding
380   // it for its purposes (e.g., to save/restore additional registers
381   // when doing a VM call).
382   //
383   // If no last_java_sp is specified (noreg) then SP will be used instead.
384   virtual void call_VM_base(
385      // where an oop-result ends up if any; use noreg otherwise
386     Register        oop_result,
387     // to set up last_Java_frame in stubs; use noreg otherwise
388     Register        last_java_sp,
389     // the entry point
390     address         entry_point,
391     // flag which indicates if exception should be checked
392     bool            check_exception = true
393   );
394 
395   // Support for VM calls. This is the base routine called by the
396   // different versions of call_VM_leaf. The interpreter may customize
397   // this version by overriding it for its purposes (e.g., to
398   // save/restore additional registers when doing a VM call).
399   void call_VM_leaf_base(address entry_point);
400 
401  public:
402   // Call into the VM.
403   // Passes the thread pointer (in R3_ARG1) as a prepended argument.
404   // Makes sure oop return values are visible to the GC.
405   void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
406   void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
407   void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
408   void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg3, bool check_exceptions = true);
409   void call_VM_leaf(address entry_point);
410   void call_VM_leaf(address entry_point, Register arg_1);
411   void call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
412   void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
413 
414   // Call a stub function via a function descriptor, but don't save
415   // TOC before call, don't setup TOC and ENV for call, and don't
416   // restore TOC after call. Updates and returns _last_calls_return_pc.
417   inline address call_stub(Register function_entry);
418   inline void call_stub_and_return_to(Register function_entry, Register return_pc);
419 
420   void post_call_nop();
421 
422   //
423   // Java utilities
424   //
425 
426   // Read from the polling page, its address is already in a register.
427   inline void load_from_polling_page(Register polling_page_address, int offset = 0);
428   // Check whether instruction is a read access to the polling page
429   // which was emitted by load_from_polling_page(..).
430   static bool is_load_from_polling_page(int instruction, void* ucontext/*may be nullptr*/,
431                                         address* polling_address_ptr = nullptr);
432 
433   // Support for null-checks
434   //
435   // Generates code that causes a null OS exception if the content of reg is null.
436   // If the accessed location is M[reg + offset] and the offset is known, provide the
437   // offset. No explicit code generation is needed if the offset is within a certain
438   // range (0 <= offset <= page_size).
439 
440   // Stack overflow checking
441   void bang_stack_with_offset(int offset);
442 
443   // If instruction is a stack bang of the form ld, stdu, or
444   // stdux, return the banged address. Otherwise, return 0.
445   static address get_stack_bang_address(int instruction, void* ucontext);
446 
447   // Check for reserved stack access in method being exited. If the reserved
448   // stack area was accessed, protect it again and throw StackOverflowError.
449   void reserved_stack_check(Register return_pc);
450 
451   // Atomics
452   // CmpxchgX sets condition register to cmpX(current, compare).
453   // (flag == ne) => (dest_current_value != compare_value), (!swapped)
454   // (flag == eq) => (dest_current_value == compare_value), ( swapped)
455   static inline bool cmpxchgx_hint_acquire_lock()  { return true; }
456   // The stxcx will probably not be succeeded by a releasing store.
457   static inline bool cmpxchgx_hint_release_lock()  { return false; }
458   static inline bool cmpxchgx_hint_atomic_update() { return false; }
459 
460   // Cmpxchg semantics
461   enum {
462     MemBarNone = 0,
463     MemBarRel  = 1,
464     MemBarAcq  = 2,
465     MemBarFenceAfter = 4 // use powers of 2
466   };
467  private:
468   // Helper functions for word/sub-word atomics.
469   void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
470                                      Register addr_base, Register tmp1, Register tmp2, Register tmp3,
471                                      bool cmpxchgx_hint, bool is_add, int size);
472   void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
473                          Register compare_value, Register exchange_value,
474                          Register addr_base, Register tmp1, Register tmp2,
475                          Label &retry, Label &failed, bool cmpxchgx_hint, int size);
476   void cmpxchg_generic(ConditionRegister flag,
477                        Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
478                        Register tmp1, Register tmp2,
479                        int semantics, bool cmpxchgx_hint, Register int_flag_success, bool contention_hint, bool weak, int size);
480  public:
481   // Temps and addr_base are killed if processor does not support Power 8 instructions.
482   // Result will be sign extended.
483   void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base,
484                   Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
485     atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1);
486   }
487   // Temps and addr_base are killed if processor does not support Power 8 instructions.
488   // Result will be sign extended.
489   void getandseth(Register dest_current_value, Register exchange_value, Register addr_base,
490                   Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
491     atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2);
492   }
493   void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base,
494                   bool cmpxchgx_hint) {
495     atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4);
496   }
497   void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
498                   bool cmpxchgx_hint);
499   // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
500   // Result will be sign extended.
501   void getandaddb(Register dest_current_value, Register inc_value, Register addr_base,
502                   Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
503     atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1);
504   }
505   // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
506   // Result will be sign extended.
507   void getandaddh(Register dest_current_value, Register inc_value, Register addr_base,
508                   Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
509     atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2);
510   }
511   void getandaddw(Register dest_current_value, Register inc_value, Register addr_base,
512                   Register tmp1, bool cmpxchgx_hint) {
513     atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4);
514   }
515   void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base,
516                   Register tmp, bool cmpxchgx_hint);
517   // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
518   // compare_value must be at least 32 bit sign extended. Result will be sign extended.
519   void cmpxchgb(ConditionRegister flag,
520                 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
521                 Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false,
522                 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
523     cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
524                     semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 1);
525   }
526   // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
527   // compare_value must be at least 32 bit sign extended. Result will be sign extended.
528   void cmpxchgh(ConditionRegister flag,
529                 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
530                 Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false,
531                 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
532     cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
533                     semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 2);
534   }
535   void cmpxchgw(ConditionRegister flag,
536                 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
537                 int semantics, bool cmpxchgx_hint = false,
538                 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
539     cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, noreg, noreg,
540                     semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 4);
541   }
542   void cmpxchgd(ConditionRegister flag,
543                 Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
544                 Register addr_base, int semantics, bool cmpxchgx_hint = false,
545                 Register int_flag_success = noreg, Label* failed = nullptr, bool contention_hint = false, bool weak = false);
546 
547   // interface method calling
548   void lookup_interface_method(Register recv_klass,
549                                Register intf_klass,
550                                RegisterOrConstant itable_index,
551                                Register method_result,
552                                Register temp_reg, Register temp2_reg,
553                                Label& no_such_interface,
554                                bool return_method = true);
555 
556   // virtual method calling
557   void lookup_virtual_method(Register recv_klass,
558                              RegisterOrConstant vtable_index,
559                              Register method_result);
560 
561   // Test sub_klass against super_klass, with fast and slow paths.
562 
563   // The fast path produces a tri-state answer: yes / no / maybe-slow.
564   // One of the three labels can be null, meaning take the fall-through.
565   // If super_check_offset is -1, the value is loaded up from super_klass.
566   // No registers are killed, except temp_reg and temp2_reg.
567   // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
568   void check_klass_subtype_fast_path(Register sub_klass,
569                                      Register super_klass,
570                                      Register temp1_reg,
571                                      Register temp2_reg,
572                                      Label* L_success,
573                                      Label* L_failure,
574                                      Label* L_slow_path = nullptr, // default fall through
575                                      RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
576 
577   // The rest of the type check; must be wired to a corresponding fast path.
578   // It does not repeat the fast path logic, so don't use it standalone.
579   // The temp_reg can be noreg, if no temps are available.
580   // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
581   // Updates the sub's secondary super cache as necessary.
582   void check_klass_subtype_slow_path(Register sub_klass,
583                                      Register super_klass,
584                                      Register temp1_reg,
585                                      Register temp2_reg,
586                                      Label* L_success = nullptr,
587                                      Register result_reg = noreg);
588 
589   // Simplified, combined version, good for typical uses.
590   // Falls through on failure.
591   void check_klass_subtype(Register sub_klass,
592                            Register super_klass,
593                            Register temp1_reg,
594                            Register temp2_reg,
595                            Label& L_success);
596 
597   void clinit_barrier(Register klass,
598                       Register thread,
599                       Label* L_fast_path = nullptr,
600                       Label* L_slow_path = nullptr);
601 
602   // Method handle support (JSR 292).
603   RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0);
604 
605   void push_cont_fastpath();
606   void pop_cont_fastpath();
607   void inc_held_monitor_count(Register tmp);
608   void dec_held_monitor_count(Register tmp);
609   void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics);
610   void lightweight_lock(Register obj, Register hdr, Register t1, Label& slow);
611   void lightweight_unlock(Register obj, Register hdr, Label& slow);
612 
613   // allocation (for C1)
614   void tlab_allocate(
615     Register obj,                      // result: pointer to object after successful allocation
616     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
617     int      con_size_in_bytes,        // object size in bytes if   known at compile time
618     Register t1,                       // temp register
619     Label&   slow_case                 // continuation point if fast allocation fails
620   );
621   void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2);
622 
623   enum { trampoline_stub_size = 6 * 4 };
624   address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
625 
626   void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
627   void atomic_ori_int(Register addr, Register result, int uimm16);
628 
629 #if INCLUDE_RTM_OPT
630   void rtm_counters_update(Register abort_status, Register rtm_counters);
631   void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
632   void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
633                                    Metadata* method_data);
634   void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
635                      RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
636   void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
637                                Label& retryLabel, Label* checkRetry = nullptr);
638   void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
639   void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
640                          Register retry_on_abort_count,
641                          RTMLockingCounters* stack_rtm_counters,
642                          Metadata* method_data, bool profile_rtm,
643                          Label& DONE_LABEL, Label& IsInflated);
644   void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
645                             Register retry_on_busy_count, Register retry_on_abort_count,
646                             RTMLockingCounters* rtm_counters,
647                             Metadata* method_data, bool profile_rtm,
648                             Label& DONE_LABEL);
649 #endif
650 
651   void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
652                                  Register tmp1, Register tmp2, Register tmp3,
653                                  RTMLockingCounters* rtm_counters = nullptr,
654                                  RTMLockingCounters* stack_rtm_counters = nullptr,
655                                  Metadata* method_data = nullptr,
656                                  bool use_rtm = false, bool profile_rtm = false);
657 
658   void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
659                                    Register tmp1, Register tmp2, Register tmp3,
660                                    bool use_rtm = false);
661 
662   // Check if safepoint requested and if so branch
663   void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod);
664 
665   void resolve_jobject(Register value, Register tmp1, Register tmp2,
666                        MacroAssembler::PreservationLevel preservation_level);
667   void resolve_global_jobject(Register value, Register tmp1, Register tmp2,
668                               MacroAssembler::PreservationLevel preservation_level);
669 
670   // Support for managing the JavaThread pointer (i.e.; the reference to
671   // thread-local information).
672 
673   // Support for last Java frame (but use call_VM instead where possible):
674   // access R16_thread->last_Java_sp.
675   void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
676   void reset_last_Java_frame(void);
677   void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1);
678 
679   // Read vm result from thread: oop_result = R16_thread->result;
680   void get_vm_result  (Register oop_result);
681   void get_vm_result_2(Register metadata_result);
682 
683   static bool needs_explicit_null_check(intptr_t offset);
684   static bool uses_implicit_null_check(void* address);
685 
686   // Trap-instruction-based checks.
687   // Range checks can be distinguished from zero checks as they check 32 bit,
688   // zero checks all 64 bits (tw, td).
689   inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual);
690   static bool is_trap_null_check(int x) {
691     return is_tdi(x, traptoEqual,               -1/*any reg*/, 0) ||
692            is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0);
693   }
694 
695   inline void trap_ic_miss_check(Register a, Register b);
696   static bool is_trap_ic_miss_check(int x) {
697     return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/);
698   }
699 
700   // Implicit or explicit null check, jumps to static address exception_entry.
701   inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
702   inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided
703 
704   // Access heap oop, handle encoding and GC barriers.
705   // Some GC barriers call C so use needs_frame = true if an extra frame is needed at the current call site.
706   inline void access_store_at(BasicType type, DecoratorSet decorators,
707                               Register base, RegisterOrConstant ind_or_offs, Register val,
708                               Register tmp1, Register tmp2, Register tmp3,
709                               MacroAssembler::PreservationLevel preservation_level);
710   inline void access_load_at(BasicType type, DecoratorSet decorators,
711                              Register base, RegisterOrConstant ind_or_offs, Register dst,
712                              Register tmp1, Register tmp2,
713                              MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null = nullptr);
714 
715  public:
716   // Specify tmp1 for better code in certain compressed oops cases. Specify Label to bail out on null oop.
717   // tmp1, tmp2 and needs_frame are used with decorators ON_PHANTOM_OOP_REF or ON_WEAK_OOP_REF.
718   inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1,
719                             Register tmp1, Register tmp2,
720                             MacroAssembler::PreservationLevel preservation_level,
721                             DecoratorSet decorators = 0, Label *L_handle_null = nullptr);
722 
723   inline void store_heap_oop(Register d, RegisterOrConstant offs, Register s1,
724                              Register tmp1, Register tmp2, Register tmp3,
725                              MacroAssembler::PreservationLevel preservation_level, DecoratorSet decorators = 0);
726 
727   // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
728   // src == d allowed.
729   inline Register encode_heap_oop_not_null(Register d, Register src = noreg);
730   inline Register decode_heap_oop_not_null(Register d, Register src = noreg);
731 
732   // Null allowed.
733   inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier!
734   inline void decode_heap_oop(Register d);
735 
736   // Load/Store klass oop from klass field. Compress.
737   void load_klass(Register dst, Register src);
738   void load_klass_check_null(Register dst, Register src, Label* is_null = nullptr);
739   void store_klass(Register dst_oop, Register klass, Register tmp = R0);
740   void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
741 
742   void resolve_oop_handle(Register result, Register tmp1, Register tmp2,
743                           MacroAssembler::PreservationLevel preservation_level);
744   void resolve_weak_handle(Register result, Register tmp1, Register tmp2,
745                            MacroAssembler::PreservationLevel preservation_level);
746   void load_method_holder(Register holder, Register method);
747 
748   static int instr_size_for_decode_klass_not_null();
749   void decode_klass_not_null(Register dst, Register src = noreg);
750   Register encode_klass_not_null(Register dst, Register src = noreg);
751 
752   // SIGTRAP-based range checks for arrays.
753   inline void trap_range_check_l(Register a, Register b);
754   inline void trap_range_check_l(Register a, int si16);
755   static bool is_trap_range_check_l(int x) {
756     return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
757             is_twi(x, traptoLessThanUnsigned, -1/*any reg*/)                  );
758   }
759   inline void trap_range_check_le(Register a, int si16);
760   static bool is_trap_range_check_le(int x) {
761     return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/);
762   }
763   inline void trap_range_check_g(Register a, int si16);
764   static bool is_trap_range_check_g(int x) {
765     return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/);
766   }
767   inline void trap_range_check_ge(Register a, Register b);
768   inline void trap_range_check_ge(Register a, int si16);
769   static bool is_trap_range_check_ge(int x) {
770     return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
771             is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/)                  );
772   }
773   static bool is_trap_range_check(int x) {
774     return is_trap_range_check_l(x) || is_trap_range_check_le(x) ||
775            is_trap_range_check_g(x) || is_trap_range_check_ge(x);
776   }
777 
778   void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0);
779   void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0);
780   void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1);
781 
782   // Emitters for BigInteger.multiplyToLen intrinsic.
783   inline void multiply64(Register dest_hi, Register dest_lo,
784                          Register x, Register y);
785   void add2_with_carry(Register dest_hi, Register dest_lo,
786                        Register src1, Register src2);
787   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
788                              Register y, Register y_idx, Register z,
789                              Register carry, Register product_high, Register product,
790                              Register idx, Register kdx, Register tmp);
791   void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
792                               Register yz_idx, Register idx, Register carry,
793                               Register product_high, Register product, Register tmp,
794                               int offset);
795   void multiply_128_x_128_loop(Register x_xstart,
796                                Register y, Register z,
797                                Register yz_idx, Register idx, Register carry,
798                                Register product_high, Register product,
799                                Register carry2, Register tmp);
800   void muladd(Register out, Register in, Register offset, Register len, Register k,
801               Register tmp1, Register tmp2, Register carry);
802   void multiply_to_len(Register x, Register xlen,
803                        Register y, Register ylen,
804                        Register z, Register zlen,
805                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
806                        Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
807                        Register tmp11, Register tmp12, Register tmp13);
808 
809   // Emitters for CRC32 calculation.
810   // A note on invertCRC:
811   //   Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
812   //   CRC32 holds it's current crc value in the externally visible representation.
813   //   CRC32C holds it's current crc value in internal format, ready for updating.
814   //   Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
815   //   In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
816   //   The bool invertCRC parameter indicates whether bit-flipping is required before updates.
817   void load_reverse_32(Register dst, Register src);
818   int  crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
819   void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
820   void update_byte_crc32(Register crc, Register val, Register table);
821   void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
822                              Register data, bool loopAlignment);
823   void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
824                           Register t0,  Register t1,  Register t2,  Register t3,
825                           Register tc0, Register tc1, Register tc2, Register tc3);
826   void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
827                           Register t0,  Register t1,  Register t2,  Register t3,
828                           Register tc0, Register tc1, Register tc2, Register tc3,
829                           bool invertCRC);
830   void kernel_crc32_vpmsum(Register crc, Register buf, Register len, Register constants,
831                            Register t0, Register t1, Register t2, Register t3, Register t4,
832                            Register t5, Register t6, bool invertCRC);
833   void kernel_crc32_vpmsum_aligned(Register crc, Register buf, Register len, Register constants,
834                                    Register t0, Register t1, Register t2, Register t3, Register t4,
835                                    Register t5, Register t6);
836   // Version which internally decides what to use.
837   void crc32(Register crc, Register buf, Register len, Register t0, Register t1, Register t2,
838              Register t3, Register t4, Register t5, Register t6, Register t7, bool is_crc32c);
839 
840   void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
841                                   bool invertCRC);
842 
843   // SHA-2 auxiliary functions and public interfaces
844  private:
845   void sha256_deque(const VectorRegister src,
846       const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
847   void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
848   void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
849   void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
850       const int total_ws, const Register k, const VectorRegister* kpws,
851       const int total_kpws);
852   void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
853       const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
854       const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
855       const Register j, const Register k);
856   void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
857       const VectorRegister c, const VectorRegister d, const VectorRegister e,
858       const VectorRegister f, const VectorRegister g, const VectorRegister h,
859       const Register hptr);
860 
861   void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
862   void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
863   void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
864   void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
865   void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
866       const VectorRegister w2, const VectorRegister w3,
867       const VectorRegister w4, const VectorRegister w5,
868       const VectorRegister w6, const VectorRegister w7,
869       const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
870       const VectorRegister vRb, const Register k);
871 
872  public:
873   void sha256(bool multi_block);
874   void sha512(bool multi_block);
875 
876   void cache_wb(Address line);
877   void cache_wbsync(bool is_presync);
878 
879   //
880   // Debugging
881   //
882 
883   // assert on cr0
884   void asm_assert(bool check_equal, const char* msg);
885   void asm_assert_eq(const char* msg) { asm_assert(true, msg); }
886   void asm_assert_ne(const char* msg) { asm_assert(false, msg); }
887 
888  private:
889   void asm_assert_mems_zero(bool check_equal, int size, int mem_offset, Register mem_base,
890                             const char* msg);
891 
892  public:
893 
894   void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg) {
895     asm_assert_mems_zero(true,  8, mem_offset, mem_base, msg);
896   }
897   void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg) {
898     asm_assert_mems_zero(false, 8, mem_offset, mem_base, msg);
899   }
900 
901   // Calls verify_oop. If UseCompressedOops is on, decodes the oop.
902   // Preserves reg.
903   void verify_coop(Register reg, const char*);
904   // Emit code to verify that reg contains a valid oop if +VerifyOops is set.
905   void verify_oop(Register reg, const char* s = "broken oop");
906   void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop");
907 
908   // TODO: verify method and klass metadata (compare against vptr?)
909   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
910   void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
911 
912   // Convenience method returning function entry. For the ELFv1 case
913   // creates function descriptor at the current address and returns
914   // the pointer to it. For the ELFv2 case returns the current address.
915   inline address function_entry();
916 
917 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
918 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
919 
920  private:
921   void stop(int type, const char* msg);
922 
923  public:
924   enum {
925     stop_stop               = 0,
926     stop_untested           = 1,
927     stop_unimplemented      = 2,
928     stop_shouldnotreachhere = 3,
929     stop_msg_present        = -0x8000
930   };
931 
932   // Prints msg, dumps registers and stops execution.
933   void stop                 (const char* msg = nullptr) { stop(stop_stop,               msg); }
934   void untested             (const char* msg = nullptr) { stop(stop_untested,           msg); }
935   void unimplemented        (const char* msg = nullptr) { stop(stop_unimplemented,      msg); }
936   void should_not_reach_here(const char* msg = nullptr) { stop(stop_shouldnotreachhere, msg); }
937 
938   void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN;
939 };
940 
941 // class SkipIfEqualZero:
942 //
943 // Instantiating this class will result in assembly code being output that will
944 // jump around any code emitted between the creation of the instance and it's
945 // automatic destruction at the end of a scope block, depending on the value of
946 // the flag passed to the constructor, which will be checked at run-time.
947 class SkipIfEqualZero : public StackObj {
948  private:
949   MacroAssembler* _masm;
950   Label _label;
951 
952  public:
953    // 'Temp' is a temp register that this object can use (and trash).
954    explicit SkipIfEqualZero(MacroAssembler*, Register temp, const bool* flag_addr);
955    static void skip_to_label_if_equal_zero(MacroAssembler*, Register temp,
956                                            const bool* flag_addr, Label& label);
957    ~SkipIfEqualZero();
958 };
959 
960 #endif // CPU_PPC_MACROASSEMBLER_PPC_HPP