1 /*
  2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2014, 2020 Red Hat Inc. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/assembler.hpp"
 27 #include "asm/assembler.inline.hpp"
 28 #include "asm/macroAssembler.hpp"
 29 #include "compiler/disassembler.hpp"
 30 #include "immediate_aarch64.hpp"
 31 #include "memory/resourceArea.hpp"
 32 #include "metaprogramming/primitiveConversions.hpp"
 33 
 34 #ifndef PRODUCT
 35 const uintptr_t Assembler::asm_bp = 0x0000ffffac221240;
 36 #endif
 37 
 38 static float unpack(unsigned value);
 39 
 40 short Assembler::SIMD_Size_in_bytes[] = {
 41   // T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
 42        8,   16,   8,  16,   8,  16,   8,  16,  16
 43 };
 44 
 45 Assembler::SIMD_Arrangement Assembler::_esize2arrangement_table[9][2] = {
 46   // esize        isQ:false             isQ:true
 47   /*   0  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
 48   /*   1  */      {T8B,                 T16B},
 49   /*   2  */      {T4H,                 T8H},
 50   /*   3  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
 51   /*   4  */      {T2S,                 T4S},
 52   /*   5  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
 53   /*   6  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
 54   /*   7  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
 55   /*   8  */      {T1D,                 T2D}
 56   };
 57 
 58 Assembler::SIMD_RegVariant Assembler::_esize2regvariant[9] = {
 59   INVALID,
 60   B,
 61   H,
 62   INVALID,
 63   S,
 64   INVALID,
 65   INVALID,
 66   INVALID,
 67   D,
 68 };
 69 
 70 Assembler::SIMD_Arrangement Assembler::esize2arrangement(unsigned esize, bool isQ) {
 71   guarantee(esize < ARRAY_SIZE(_esize2arrangement_table) &&
 72          _esize2arrangement_table[esize][isQ] != INVALID_ARRANGEMENT, "unsupported element size");
 73   return _esize2arrangement_table[esize][isQ];
 74 }
 75 
 76 Assembler::SIMD_RegVariant Assembler::elemBytes_to_regVariant(unsigned esize) {
 77   guarantee(esize < ARRAY_SIZE(_esize2regvariant) && _esize2regvariant[esize] != INVALID,
 78          "unsupported element size");
 79   return _esize2regvariant[esize];
 80 }
 81 
 82 Assembler::SIMD_RegVariant Assembler::elemType_to_regVariant(BasicType bt) {
 83   return elemBytes_to_regVariant(type2aelembytes(bt));
 84 }
 85 
 86 unsigned Assembler::regVariant_to_elemBits(Assembler::SIMD_RegVariant T){
 87   guarantee(T != Q, "Invalid register variant");
 88   return 1 << (T + 3);
 89 }
 90 
 91 void Assembler::emit_data64(jlong data,
 92                             relocInfo::relocType rtype,
 93                             int format) {
 94   if (rtype == relocInfo::none) {
 95     emit_int64(data);
 96   } else {
 97     emit_data64(data, Relocation::spec_simple(rtype), format);
 98   }
 99 }
100 
101 void Assembler::emit_data64(jlong data,
102                             RelocationHolder const& rspec,
103                             int format) {
104 
105   assert(inst_mark() != nullptr, "must be inside InstructionMark");
106   // Do not use AbstractAssembler::relocate, which is not intended for
107   // embedded words.  Instead, relocate to the enclosing instruction.
108   code_section()->relocate(inst_mark(), rspec, format);
109   emit_int64(data);
110 }
111 
112 extern "C" {
113   void das(uint64_t start, int len) {
114     ResourceMark rm;
115     len <<= 2;
116     if (len < 0)
117       Disassembler::decode((address)start + len, (address)start);
118     else
119       Disassembler::decode((address)start, (address)start + len);
120   }
121 
122   JNIEXPORT void das1(uintptr_t insn) {
123     das(insn, 1);
124   }
125 }
126 
127 #define __ as->
128 
129 void Address::lea(MacroAssembler *as, Register r) const {
130   switch(_mode) {
131   case base_plus_offset: {
132     if (offset() == 0 && base() == r) // it's a nop
133       break;
134     if (offset() > 0)
135       __ add(r, base(), offset());
136     else
137       __ sub(r, base(), -offset());
138     break;
139   }
140   case base_plus_offset_reg: {
141     __ add(r, base(), index(), ext().op(), MAX2(ext().shift(), 0));
142     break;
143   }
144   case literal: {
145     as->code_section()->relocate(as->inst_mark(), rspec());
146     if (rspec().type() == relocInfo::none)
147       __ mov(r, target());
148     else
149       __ movptr(r, (uint64_t)target());
150     break;
151   }
152   default:
153     ShouldNotReachHere();
154   }
155 }
156 
157 #undef __
158 
159 #define starti Instruction_aarch64 current_insn(this);
160 
161 #define f current_insn.f
162 #define sf current_insn.sf
163 #define rf current_insn.rf
164 #define srf current_insn.srf
165 #define zrf current_insn.zrf
166 #define prf current_insn.prf
167 #define pgrf current_insn.pgrf
168 #define fixed current_insn.fixed
169 
170   void Assembler::adr(Register Rd, address adr) {
171     intptr_t offset = adr - pc();
172     int offset_lo = offset & 3;
173     offset >>= 2;
174     starti;
175     f(0, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
176     rf(Rd, 0);
177   }
178 
179   void Assembler::_adrp(Register Rd, address adr) {
180     uint64_t pc_page = (uint64_t)pc() >> 12;
181     uint64_t adr_page = (uint64_t)adr >> 12;
182     intptr_t offset = adr_page - pc_page;
183     int offset_lo = offset & 3;
184     offset >>= 2;
185     starti;
186     f(1, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
187     zrf(Rd, 0);
188   }
189 
190 // This encoding is similar (but not quite identical) to the encoding used
191 // by literal ld/st. see JDK-8324123.
192 // PRFM does not support writeback or pre/post index.
193 void Assembler::prfm(const Address &adr, prfop pfop) {
194   Address::mode mode = adr.getMode();
195   // PRFM does not support pre/post index
196   guarantee((mode != Address::pre) && (mode != Address::post), "prfm does not support pre/post indexing");
197   if (mode == Address::literal) {
198     starti;
199     f(0b11, 31, 30), f(0b011, 29, 27), f(0b000, 26, 24);
200     f(pfop, 4, 0);
201     int64_t offset = (adr.target() - pc()) >> 2;
202     sf(offset, 23, 5);
203   } else {
204     assert((mode == Address::base_plus_offset)
205             || (mode == Address::base_plus_offset_reg), "must be base_plus_offset/base_plus_offset_reg");
206     ld_st2(as_Register(pfop), adr, 0b11, 0b10);
207   }
208 }
209 
210 // An "all-purpose" add/subtract immediate, per ARM documentation:
211 // A "programmer-friendly" assembler may accept a negative immediate
212 // between -(2^24 -1) and -1 inclusive, causing it to convert a
213 // requested ADD operation to a SUB, or vice versa, and then encode
214 // the absolute value of the immediate as for uimm24.
215 void Assembler::add_sub_immediate(Instruction_aarch64 &current_insn,
216                                   Register Rd, Register Rn, unsigned uimm, int op,
217                                   int negated_op) {
218   bool sets_flags = op & 1;   // this op sets flags
219   union {
220     unsigned u;
221     int imm;
222   };
223   u = uimm;
224   bool shift = false;
225   bool neg = imm < 0;
226   if (neg) {
227     imm = -imm;
228     op = negated_op;
229   }
230   assert(Rd != sp || imm % 16 == 0, "misaligned stack");
231   if (imm >= (1 << 11)
232       && ((imm >> 12) << 12 == imm)) {
233     imm >>= 12;
234     shift = true;
235   }
236   f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10);
237 
238   // add/subtract immediate ops with the S bit set treat r31 as zr;
239   // with S unset they use sp.
240   if (sets_flags)
241     zrf(Rd, 0);
242   else
243     srf(Rd, 0);
244 
245   srf(Rn, 5);
246 }
247 
248 #undef f
249 #undef sf
250 #undef rf
251 #undef srf
252 #undef zrf
253 #undef prf
254 #undef pgrf
255 #undef fixed
256 
257 #undef starti
258 
259 #ifdef ASSERT
260 
261 void Address::assert_is_literal() const {
262   assert(_mode == literal, "addressing mode is non-literal: %d", _mode);
263 }
264 
265 void Address::assert_is_nonliteral() const {
266   assert(_mode != literal, "unexpected literal addressing mode");
267   assert(_mode != no_mode, "unexpected no_mode addressing mode");
268 }
269 
270 #endif // ASSERT
271 
272 static RelocationHolder address_relocation(address target, relocInfo::relocType rtype) {
273   switch (rtype) {
274   case relocInfo::oop_type:
275   case relocInfo::metadata_type:
276     // Oops are a special case. Normally they would be their own section
277     // but in cases like icBuffer they are literals in the code stream that
278     // we don't have a section for. We use none so that we get a literal address
279     // which is always patchable.
280     return RelocationHolder::none;
281   case relocInfo::external_word_type:
282     return external_word_Relocation::spec(target);
283   case relocInfo::internal_word_type:
284     return internal_word_Relocation::spec(target);
285   case relocInfo::opt_virtual_call_type:
286     return opt_virtual_call_Relocation::spec();
287   case relocInfo::static_call_type:
288     return static_call_Relocation::spec();
289   case relocInfo::runtime_call_type:
290     return runtime_call_Relocation::spec();
291   case relocInfo::poll_type:
292   case relocInfo::poll_return_type:
293     return Relocation::spec_simple(rtype);
294   case relocInfo::none:
295     return RelocationHolder::none;
296   default:
297     ShouldNotReachHere();
298     return RelocationHolder::none;
299   }
300 }
301 
302 Address::Address(address target, relocInfo::relocType rtype) :
303   _mode(literal),
304   _literal(target, address_relocation(target, rtype))
305 {}
306 
307 void Assembler::b(const Address &dest) {
308   code_section()->relocate(pc(), dest.rspec());
309   b(dest.target());
310 }
311 
312 void Assembler::bl(const Address &dest) {
313   code_section()->relocate(pc(), dest.rspec());
314   bl(dest.target());
315 }
316 
317 void Assembler::adr(Register r, const Address &dest) {
318   code_section()->relocate(pc(), dest.rspec());
319   adr(r, dest.target());
320 }
321 
322 void Assembler::br(Condition cc, Label &L) {
323   if (L.is_bound()) {
324     br(cc, target(L));
325   } else {
326     L.add_patch_at(code(), locator());
327     br(cc, pc());
328   }
329 }
330 
331 void Assembler::wrap_label(Label &L,
332                                  Assembler::uncond_branch_insn insn) {
333   if (L.is_bound()) {
334     (this->*insn)(target(L));
335   } else {
336     L.add_patch_at(code(), locator());
337     (this->*insn)(pc());
338   }
339 }
340 
341 void Assembler::wrap_label(Register r, Label &L,
342                                  compare_and_branch_insn insn) {
343   if (L.is_bound()) {
344     (this->*insn)(r, target(L));
345   } else {
346     L.add_patch_at(code(), locator());
347     (this->*insn)(r, pc());
348   }
349 }
350 
351 void Assembler::wrap_label(Register r, int bitpos, Label &L,
352                                  test_and_branch_insn insn) {
353   if (L.is_bound()) {
354     (this->*insn)(r, bitpos, target(L));
355   } else {
356     L.add_patch_at(code(), locator());
357     (this->*insn)(r, bitpos, pc());
358   }
359 }
360 
361 void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) {
362   if (L.is_bound()) {
363     (this->*insn)(target(L), op);
364   } else {
365     L.add_patch_at(code(), locator());
366     (this->*insn)(pc(), op);
367   }
368 }
369 
370 bool Assembler::operand_valid_for_add_sub_immediate(int64_t imm) {
371   return operand_valid_for_immediate_bits(imm, 12);
372 }
373 
374 bool Assembler::operand_valid_for_sve_add_sub_immediate(int64_t imm) {
375   return operand_valid_for_immediate_bits(imm, 8);
376 }
377 
378 bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) {
379   return encode_logical_immediate(is32, imm) != 0xffffffff;
380 }
381 
382 // Check immediate encoding for movi.
383 // Return the shift amount which can be {0, 8, 16, 24} for B/H/S types. As the D type
384 // movi does not have shift variant, in this case the return value is the immediate
385 // after encoding.
386 // Return -1 if the input imm64 can not be encoded.
387 int Assembler::operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T) {
388   if (T == T1D || T == T2D) {
389      // To encode into movi, the 64-bit imm must be in the form of
390      // 'aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh'
391      // and encoded in "a:b:c:d:e:f:g:h".
392      uint64_t tmp = imm64;
393      uint64_t one_byte = 0;
394      for (int i = 0; i < 8; i++) {
395        one_byte = tmp & 0xffULL;
396        if (one_byte != 0xffULL && one_byte != 0) {
397          return -1; // can not be encoded
398        }
399        tmp = tmp >> 8;
400      }
401 
402      imm64 &= 0x0101010101010101ULL;
403      imm64 |= (imm64 >> 7);
404      imm64 |= (imm64 >> 14);
405      imm64 |= (imm64 >> 28);
406 
407      return imm64 & 0xff;
408   }
409 
410   uint32_t imm32 = imm64 & 0xffffffffULL;
411   if (T == T8B || T == T16B) {       // 8-bit variant
412     if (0 == (imm32 & ~0xff))        return 0;
413   } else if(T == T4H || T == T8H) {  // 16-bit variant
414     if (0 == (imm32 & ~0xff))        return 0;
415     if (0 == (imm32 & ~0xff00))      return 8;
416   } else if (T == T2S || T == T4S) { // 32-bit variant
417     if (0 == (imm32 & ~0xff))        return 0;
418     if (0 == (imm32 & ~0xff00))      return 8;
419     if (0 == (imm32 & ~0xff0000))    return 16;
420     if (0 == (imm32 & ~0xff000000))  return 24;
421   } else {
422     assert(false, "unsupported");
423     ShouldNotReachHere();
424   }
425 
426   return -1;
427 }
428 
429 bool Assembler::operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm) {
430   return encode_sve_logical_immediate(elembits, imm) != 0xffffffff;
431 }
432 
433 static uint64_t doubleTo64Bits(jdouble d) {
434   union {
435     jdouble double_value;
436     uint64_t double_bits;
437   };
438 
439   double_value = d;
440   return double_bits;
441 }
442 
443 bool Assembler::operand_valid_for_float_immediate(double imm) {
444   // If imm is all zero bits we can use ZR as the source of a
445   // floating-point value.
446   if (doubleTo64Bits(imm) == 0)
447     return true;
448 
449   // Otherwise try to encode imm then convert the encoded value back
450   // and make sure it's the exact same bit pattern.
451   unsigned result = encoding_for_fp_immediate(imm);
452   return doubleTo64Bits(imm) == fp_immediate_for_encoding(result, true);
453 }
454 
455 int AbstractAssembler::code_fill_byte() {
456   return 0;
457 }
458 
459 // n.b. this is implemented in subclass MacroAssembler
460 void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); }
461 
462 bool asm_util::operand_valid_for_immediate_bits(int64_t imm, unsigned nbits) {
463   guarantee(nbits == 8 || nbits == 12, "invalid nbits value");
464   uint64_t uimm = (uint64_t)uabs((jlong)imm);
465   if (uimm < (UCONST64(1) << nbits))
466     return true;
467   if (uimm < (UCONST64(1) << (2 * nbits))
468       && ((uimm >> nbits) << nbits == uimm)) {
469     return true;
470   }
471   return false;
472 }
473 
474 // and now the routines called by the assembler which encapsulate the
475 // above encode and decode functions
476 
477 uint32_t
478 asm_util::encode_logical_immediate(bool is32, uint64_t imm)
479 {
480   if (is32) {
481     /* Allow all zeros or all ones in top 32-bits, so that
482        constant expressions like ~1 are permitted. */
483     if (imm >> 32 != 0 && imm >> 32 != 0xffffffff)
484       return 0xffffffff;
485     /* Replicate the 32 lower bits to the 32 upper bits.  */
486     imm &= 0xffffffff;
487     imm |= imm << 32;
488   }
489 
490   return encoding_for_logical_immediate(imm);
491 }
492 
493 uint32_t
494 asm_util::encode_sve_logical_immediate(unsigned elembits, uint64_t imm) {
495   guarantee(elembits == 8 || elembits == 16 ||
496             elembits == 32 || elembits == 64, "unsupported element size");
497   uint64_t upper = UCONST64(-1) << (elembits/2) << (elembits/2);
498   /* Allow all zeros or all ones in top bits, so that
499    * constant expressions like ~1 are permitted. */
500   if ((imm & ~upper) != imm && (imm | upper) != imm)
501     return 0xffffffff;
502 
503   // Replicate the immediate in different element sizes to 64 bits.
504   imm &= ~upper;
505   for (unsigned i = elembits; i < 64; i *= 2) {
506     imm |= (imm << i);
507   }
508 
509   return encoding_for_logical_immediate(imm);
510 }
511 
512 unsigned Assembler::pack(double value) {
513   float val = (float)value;
514   unsigned result = encoding_for_fp_immediate(val);
515   guarantee(unpack(result) == value,
516             "Invalid floating-point immediate operand");
517   return result;
518 }
519 
520 // Packed operands for  Floating-point Move (immediate)
521 
522 static float unpack(unsigned value) {
523   unsigned ival = fp_immediate_for_encoding(value, 0);
524   return PrimitiveConversions::cast<float>(ival);
525 }
526 
527 address Assembler::locate_next_instruction(address inst) {
528   return inst + Assembler::instruction_size;
529 }