1 /*
  2  * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #ifndef CPU_AARCH64_REGISTER_AARCH64_HPP
 27 #define CPU_AARCH64_REGISTER_AARCH64_HPP
 28 
 29 #include "asm/register.hpp"
 30 #include "utilities/checkedCast.hpp"
 31 #include "utilities/powerOfTwo.hpp"
 32 
 33 class VMRegImpl;
 34 typedef VMRegImpl* VMReg;
 35 
 36 class Register {
 37  private:
 38   int _encoding;
 39 
 40   constexpr explicit Register(int encoding) : _encoding(encoding) {}
 41 
 42  public:
 43   enum {
 44     number_of_registers          = 32,
 45     number_of_declared_registers = 34,  // Including SP and ZR.
 46     max_slots_per_register       =  2
 47   };
 48 
 49   class RegisterImpl: public AbstractRegisterImpl {
 50     friend class Register;
 51 
 52     static constexpr const RegisterImpl* first();
 53 
 54    public:
 55     // accessors
 56     constexpr int raw_encoding() const { return checked_cast<int>(this - first()); }
 57     constexpr int     encoding() const { assert(is_valid(), "invalid register"); return raw_encoding(); }
 58     constexpr bool    is_valid() const { return 0 <= raw_encoding() && raw_encoding() < number_of_registers; }
 59 
 60     // derived registers, offsets, and addresses
 61     inline Register successor() const;
 62 
 63     VMReg as_VMReg() const;
 64 
 65     const char* name() const;
 66   };
 67 
 68   inline friend constexpr Register as_Register(int encoding);
 69 
 70   constexpr Register() : _encoding(-1) {} // noreg
 71 
 72   int operator==(const Register r) const { return _encoding == r._encoding; }
 73   int operator!=(const Register r) const { return _encoding != r._encoding; }
 74 
 75   constexpr const RegisterImpl* operator->() const { return RegisterImpl::first() + _encoding; }
 76 
 77   // Actually available GP registers for use, depending on actual CPU capabilities and flags.
 78   static int available_gp_registers() {
 79     return number_of_registers;
 80   }
 81 };
 82 
 83 extern Register::RegisterImpl all_RegisterImpls[Register::number_of_declared_registers + 1] INTERNAL_VISIBILITY;
 84 
 85 inline constexpr const Register::RegisterImpl* Register::RegisterImpl::first() {
 86   return all_RegisterImpls + 1;
 87 }
 88 
 89 constexpr Register noreg = Register();
 90 
 91 inline constexpr Register as_Register(int encoding) {
 92   if (0 <= encoding && encoding < Register::number_of_declared_registers) {
 93     return Register(encoding);
 94   }
 95   return noreg;
 96 }
 97 
 98 inline Register Register::RegisterImpl::successor() const {
 99   assert(is_valid(), "sanity");
100   return as_Register(encoding() + 1);
101 }
102 
103 // The integer registers of the AArch64 architecture
104 constexpr Register r0  = as_Register( 0);
105 constexpr Register r1  = as_Register( 1);
106 constexpr Register r2  = as_Register( 2);
107 constexpr Register r3  = as_Register( 3);
108 constexpr Register r4  = as_Register( 4);
109 constexpr Register r5  = as_Register( 5);
110 constexpr Register r6  = as_Register( 6);
111 constexpr Register r7  = as_Register( 7);
112 constexpr Register r8  = as_Register( 8);
113 constexpr Register r9  = as_Register( 9);
114 constexpr Register r10 = as_Register(10);
115 constexpr Register r11 = as_Register(11);
116 constexpr Register r12 = as_Register(12);
117 constexpr Register r13 = as_Register(13);
118 constexpr Register r14 = as_Register(14);
119 constexpr Register r15 = as_Register(15);
120 constexpr Register r16 = as_Register(16);
121 constexpr Register r17 = as_Register(17);
122 
123 // In the ABI for Windows+AArch64 the register r18 is used to store the pointer
124 // to the current thread's TEB (where TLS variables are stored). We could
125 // carefully save and restore r18 at key places, however Win32 Structured
126 // Exception Handling (SEH) is using TLS to unwind the stack. If r18 is used
127 // for any other purpose at the time of an exception happening, SEH would not
128 // be able to unwind the stack properly and most likely crash.
129 //
130 // It's easier to avoid allocating r18 altogether.
131 //
132 // See https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#integer-registers
133 constexpr Register r18_tls = as_Register(18);
134 constexpr Register r19     = as_Register(19);
135 constexpr Register r20     = as_Register(20);
136 constexpr Register r21     = as_Register(21);
137 constexpr Register r22     = as_Register(22);
138 constexpr Register r23     = as_Register(23);
139 constexpr Register r24     = as_Register(24);
140 constexpr Register r25     = as_Register(25);
141 constexpr Register r26     = as_Register(26);
142 constexpr Register r27     = as_Register(27);
143 constexpr Register r28     = as_Register(28);
144 constexpr Register r29     = as_Register(29);
145 constexpr Register r30     = as_Register(30);
146 
147 
148 // r31 is not a general purpose register, but represents either the
149 // stack pointer or the zero/discard register depending on the
150 // instruction.
151 constexpr Register r31_sp = as_Register(31);
152 constexpr Register zr     = as_Register(32);
153 constexpr Register sp     = as_Register(33);
154 
155 // Used as a filler in instructions where a register field is unused.
156 constexpr Register dummy_reg = r31_sp;
157 
158 
159 // The implementation of floating point registers for the architecture
160 class FloatRegister {
161  private:
162   int _encoding;
163 
164   constexpr explicit FloatRegister(int encoding) : _encoding(encoding) {}
165 
166  public:
167   inline friend constexpr FloatRegister as_FloatRegister(int encoding);
168 
169   enum {
170     number_of_registers     = 32,
171     max_slots_per_register  =  4,
172     save_slots_per_register =  2,
173     slots_per_neon_register =  4,
174     extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register,
175     neon_vl = 16,
176     // VLmax: The maximum sve vector length is determined by the hardware
177     // sve_vl_min <= VLmax <= sve_vl_max.
178     sve_vl_min = 16,
179     // Maximum supported vector length across all CPUs
180     sve_vl_max = 256
181   };
182 
183   class FloatRegisterImpl: public AbstractRegisterImpl {
184     friend class FloatRegister;
185 
186     static constexpr const FloatRegisterImpl* first();
187 
188    public:
189     // accessors
190     constexpr int raw_encoding() const { return checked_cast<int>(this - first()); }
191     constexpr int     encoding() const { assert(is_valid(), "invalid register"); return raw_encoding(); }
192     constexpr bool    is_valid() const { return 0 <= raw_encoding() && raw_encoding() < number_of_registers; }
193 
194     // derived registers, offsets, and addresses
195     inline FloatRegister successor() const;
196 
197     VMReg as_VMReg() const;
198 
199     const char* name() const;
200   };
201 
202   constexpr FloatRegister() : _encoding(-1) {} // fnoreg
203 
204   int operator==(const FloatRegister r) const { return _encoding == r._encoding; }
205   int operator!=(const FloatRegister r) const { return _encoding != r._encoding; }
206 
207   constexpr const FloatRegisterImpl* operator->() const { return FloatRegisterImpl::first() + _encoding; }
208 };
209 
210 extern FloatRegister::FloatRegisterImpl all_FloatRegisterImpls[FloatRegister::number_of_registers + 1] INTERNAL_VISIBILITY;
211 
212 inline constexpr const FloatRegister::FloatRegisterImpl* FloatRegister::FloatRegisterImpl::first() {
213   return all_FloatRegisterImpls + 1;
214 }
215 
216 constexpr FloatRegister fnoreg = FloatRegister();
217 
218 inline constexpr FloatRegister as_FloatRegister(int encoding) {
219   if (0 <= encoding && encoding < FloatRegister::number_of_registers) {
220     return FloatRegister(encoding);
221   }
222   return fnoreg;
223 }
224 
225 inline FloatRegister FloatRegister::FloatRegisterImpl::successor() const {
226   assert(is_valid(), "sanity");
227   return as_FloatRegister((encoding() + 1) % number_of_registers);
228 }
229 
230 // The float registers of the AArch64 architecture
231 constexpr FloatRegister v0  = as_FloatRegister( 0);
232 constexpr FloatRegister v1  = as_FloatRegister( 1);
233 constexpr FloatRegister v2  = as_FloatRegister( 2);
234 constexpr FloatRegister v3  = as_FloatRegister( 3);
235 constexpr FloatRegister v4  = as_FloatRegister( 4);
236 constexpr FloatRegister v5  = as_FloatRegister( 5);
237 constexpr FloatRegister v6  = as_FloatRegister( 6);
238 constexpr FloatRegister v7  = as_FloatRegister( 7);
239 constexpr FloatRegister v8  = as_FloatRegister( 8);
240 constexpr FloatRegister v9  = as_FloatRegister( 9);
241 constexpr FloatRegister v10 = as_FloatRegister(10);
242 constexpr FloatRegister v11 = as_FloatRegister(11);
243 constexpr FloatRegister v12 = as_FloatRegister(12);
244 constexpr FloatRegister v13 = as_FloatRegister(13);
245 constexpr FloatRegister v14 = as_FloatRegister(14);
246 constexpr FloatRegister v15 = as_FloatRegister(15);
247 constexpr FloatRegister v16 = as_FloatRegister(16);
248 constexpr FloatRegister v17 = as_FloatRegister(17);
249 constexpr FloatRegister v18 = as_FloatRegister(18);
250 constexpr FloatRegister v19 = as_FloatRegister(19);
251 constexpr FloatRegister v20 = as_FloatRegister(20);
252 constexpr FloatRegister v21 = as_FloatRegister(21);
253 constexpr FloatRegister v22 = as_FloatRegister(22);
254 constexpr FloatRegister v23 = as_FloatRegister(23);
255 constexpr FloatRegister v24 = as_FloatRegister(24);
256 constexpr FloatRegister v25 = as_FloatRegister(25);
257 constexpr FloatRegister v26 = as_FloatRegister(26);
258 constexpr FloatRegister v27 = as_FloatRegister(27);
259 constexpr FloatRegister v28 = as_FloatRegister(28);
260 constexpr FloatRegister v29 = as_FloatRegister(29);
261 constexpr FloatRegister v30 = as_FloatRegister(30);
262 constexpr FloatRegister v31 = as_FloatRegister(31);
263 
264 // SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0].
265 constexpr FloatRegister z0  = v0;
266 constexpr FloatRegister z1  = v1;
267 constexpr FloatRegister z2  = v2;
268 constexpr FloatRegister z3  = v3;
269 constexpr FloatRegister z4  = v4;
270 constexpr FloatRegister z5  = v5;
271 constexpr FloatRegister z6  = v6;
272 constexpr FloatRegister z7  = v7;
273 constexpr FloatRegister z8  = v8;
274 constexpr FloatRegister z9  = v9;
275 constexpr FloatRegister z10 = v10;
276 constexpr FloatRegister z11 = v11;
277 constexpr FloatRegister z12 = v12;
278 constexpr FloatRegister z13 = v13;
279 constexpr FloatRegister z14 = v14;
280 constexpr FloatRegister z15 = v15;
281 constexpr FloatRegister z16 = v16;
282 constexpr FloatRegister z17 = v17;
283 constexpr FloatRegister z18 = v18;
284 constexpr FloatRegister z19 = v19;
285 constexpr FloatRegister z20 = v20;
286 constexpr FloatRegister z21 = v21;
287 constexpr FloatRegister z22 = v22;
288 constexpr FloatRegister z23 = v23;
289 constexpr FloatRegister z24 = v24;
290 constexpr FloatRegister z25 = v25;
291 constexpr FloatRegister z26 = v26;
292 constexpr FloatRegister z27 = v27;
293 constexpr FloatRegister z28 = v28;
294 constexpr FloatRegister z29 = v29;
295 constexpr FloatRegister z30 = v30;
296 constexpr FloatRegister z31 = v31;
297 
298 
299 // The implementation of predicate registers for the architecture
300 class PRegister {
301   int _encoding;
302 
303   constexpr explicit PRegister(int encoding) : _encoding(encoding) {}
304 
305 public:
306   inline friend constexpr PRegister as_PRegister(int encoding);
307 
308   enum {
309     number_of_registers = 16,
310     number_of_governing_registers = 8,
311     max_slots_per_register = 1
312   };
313 
314   constexpr PRegister() : _encoding(-1) {} // pnoreg
315 
316   class PRegisterImpl: public AbstractRegisterImpl {
317     friend class PRegister;
318 
319     static constexpr const PRegisterImpl* first();
320 
321    public:
322     // accessors
323     int raw_encoding() const  { return checked_cast<int>(this - first()); }
324     int encoding() const      { assert(is_valid(), "invalid register"); return raw_encoding(); }
325     bool is_valid() const     { return 0 <= raw_encoding() && raw_encoding() < number_of_registers; }
326     bool is_governing() const { return 0 <= raw_encoding() && raw_encoding() < number_of_governing_registers; }
327 
328     // derived registers, offsets, and addresses
329     inline PRegister successor() const;
330 
331     VMReg as_VMReg() const;
332 
333     const char* name() const;
334   };
335 
336   int operator==(const PRegister r) const { return _encoding == r._encoding; }
337   int operator!=(const PRegister r) const { return _encoding != r._encoding; }
338 
339   const PRegisterImpl* operator->() const { return PRegisterImpl::first() + _encoding; }
340 };
341 
342 extern PRegister::PRegisterImpl all_PRegisterImpls[PRegister::number_of_registers + 1] INTERNAL_VISIBILITY;
343 
344 inline constexpr const PRegister::PRegisterImpl* PRegister::PRegisterImpl::first() {
345   return all_PRegisterImpls + 1;
346 }
347 
348 constexpr PRegister pnoreg = PRegister();
349 
350 inline constexpr PRegister as_PRegister(int encoding) {
351   if (0 <= encoding && encoding < PRegister::number_of_registers) {
352     return PRegister(encoding);
353   }
354   return pnoreg;
355 }
356 
357 inline PRegister PRegister::PRegisterImpl::successor() const {
358   assert(is_valid(), "sanity");
359   return as_PRegister(encoding() + 1);
360 }
361 
362 // The predicate registers of SVE.
363 constexpr PRegister p0  = as_PRegister( 0);
364 constexpr PRegister p1  = as_PRegister( 1);
365 constexpr PRegister p2  = as_PRegister( 2);
366 constexpr PRegister p3  = as_PRegister( 3);
367 constexpr PRegister p4  = as_PRegister( 4);
368 constexpr PRegister p5  = as_PRegister( 5);
369 constexpr PRegister p6  = as_PRegister( 6);
370 constexpr PRegister p7  = as_PRegister( 7);
371 constexpr PRegister p8  = as_PRegister( 8);
372 constexpr PRegister p9  = as_PRegister( 9);
373 constexpr PRegister p10 = as_PRegister(10);
374 constexpr PRegister p11 = as_PRegister(11);
375 constexpr PRegister p12 = as_PRegister(12);
376 constexpr PRegister p13 = as_PRegister(13);
377 constexpr PRegister p14 = as_PRegister(14);
378 constexpr PRegister p15 = as_PRegister(15);
379 
380 // Need to know the total number of registers of all sorts for SharedInfo.
381 // Define a class that exports it.
382 class ConcreteRegisterImpl : public AbstractRegisterImpl {
383  public:
384   enum {
385     max_gpr = Register::number_of_registers * Register::max_slots_per_register,
386     max_fpr = max_gpr + FloatRegister::number_of_registers * FloatRegister::max_slots_per_register,
387     max_pr  = max_fpr + PRegister::number_of_registers * PRegister::max_slots_per_register,
388 
389     // A big enough number for C2: all the registers plus flags
390     // This number must be large enough to cover REG_COUNT (defined by c2) registers.
391     // There is no requirement that any ordering here matches any ordering c2 gives
392     // it's optoregs.
393     number_of_registers = max_pr + 1 // gpr/fpr/pr + flags
394   };
395 };
396 
397 typedef AbstractRegSet<Register> RegSet;
398 typedef AbstractRegSet<FloatRegister> FloatRegSet;
399 typedef AbstractRegSet<PRegister> PRegSet;
400 
401 template <>
402 inline Register AbstractRegSet<Register>::first() {
403   if (_bitset == 0) { return noreg; }
404   return as_Register(count_trailing_zeros(_bitset));
405 }
406 
407 template <>
408 inline FloatRegister AbstractRegSet<FloatRegister>::first() {
409   if (_bitset == 0) { return fnoreg; }
410   return as_FloatRegister(count_trailing_zeros(_bitset));
411 }
412 
413 inline Register as_Register(FloatRegister reg) {
414   return as_Register(reg->encoding());
415 }
416 
417 // High-level register class of an OptoReg or a VMReg register.
418 enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
419 
420 // AArch64 Vector Register Sequence management support
421 //
422 // VSeq implements an indexable (by operator[]) vector register
423 // sequence starting from a fixed base register and with a fixed delta
424 // (defaulted to 1, but sometimes 0 or 2) e.g. VSeq<4>(16) will return
425 // registers v16, ... v19 for indices 0, ... 3.
426 //
427 // Generator methods may iterate across sets of VSeq<4> to schedule an
428 // operation 4 times using distinct input and output registers,
429 // profiting from 4-way instruction parallelism.
430 //
431 // A VSeq<2> can be used to specify registers loaded with special
432 // constants e.g. <v30, v31> --> <MONT_Q, MONT_Q_INV_MOD_R>.
433 //
434 // A VSeq with base n and delta 0 can be used to generate code that
435 // combines values in another VSeq with the constant in register vn.
436 //
437 // A VSeq with base n and delta 2 can be used to select an odd or even
438 // indexed set of registers.
439 //
440 // Methods which accept arguments of type VSeq<8>, may split their
441 // inputs into front and back halves or odd and even halves (see
442 // convenience methods below).
443 
444 // helper macro for computing register masks
445 #define VS_MASK_BIT(base, delta, i) (1 << (base + delta * i))
446 
447 template<int N> class VSeq {
448   static_assert(N >= 2, "vector sequence length must be greater than 1");
449 private:
450   int _base;  // index of first register in sequence
451   int _delta; // increment to derive successive indices
452 public:
453   VSeq(FloatRegister base_reg, int delta = 1) : VSeq(base_reg->encoding(), delta) { }
454   VSeq(int base, int delta = 1) : _base(base), _delta(delta) {
455     assert (_base >= 0 && _base <= 31, "invalid base register");
456     assert ((_base + (N - 1) * _delta) >= 0, "register range underflow");
457     assert ((_base + (N - 1) * _delta) < 32, "register range overflow");
458   }
459   // indexed access to sequence
460   FloatRegister operator [](int i) const {
461     assert (0 <= i && i < N, "index out of bounds");
462     return as_FloatRegister(_base + i * _delta);
463   }
464   int mask() const {
465     int m = 0;
466     for (int i = 0; i < N; i++) {
467       m |= VS_MASK_BIT(_base, _delta, i);
468     }
469     return m;
470   }
471   int base() const { return _base; }
472   int delta() const { return _delta; }
473   bool is_constant() const { return _delta == 0; }
474 };
475 
476 // methods for use in asserts to check VSeq inputs and outputs are
477 // either disjoint or equal
478 
479 template<int N, int M> bool vs_disjoint(const VSeq<N>& n, const VSeq<M>& m) { return (n.mask() & m.mask()) == 0; }
480 template<int N> bool vs_same(const VSeq<N>& n, const VSeq<N>& m) { return n.mask() == m.mask(); }
481 
482 // method for use in asserts to check whether registers appearing in
483 // an output sequence will be written before they are read from an
484 // input sequence.
485 
486 template<int N> bool vs_write_before_read(const VSeq<N>& vout, const VSeq<N>& vin) {
487   int b_in = vin.base();
488   int d_in = vin.delta();
489   int b_out = vout.base();
490   int d_out = vout.delta();
491   int bit_in = 1 << b_in;
492   int bit_out = 1 << b_out;
493   int mask_read = vin.mask();   // all pending reads
494   int mask_write = 0;         // no writes as yet
495 
496 
497   for (int i = 0; i < N - 1; i++) {
498     // check whether a pending read clashes with a write
499     if ((mask_write & mask_read) != 0) {
500       return true;
501     }
502     // remove the pending input (so long as this is a constant
503     // sequence)
504     if (d_in != 0) {
505       mask_read ^= VS_MASK_BIT(b_in, d_in, i);
506     }
507     // record the next write
508     mask_write |= VS_MASK_BIT(b_out, d_out, i);
509   }
510   // no write before read
511   return false;
512 }
513 
514 // convenience methods for splitting 8-way or 4-way vector register
515 // sequences in half -- needed because vector operations can normally
516 // benefit from 4-way instruction parallelism or, occasionally, 2-way
517 // parallelism
518 
519 template<int N>
520 VSeq<N/2> vs_front(const VSeq<N>& v) {
521   static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
522   return VSeq<N/2>(v.base(), v.delta());
523 }
524 
525 template<int N>
526 VSeq<N/2> vs_back(const VSeq<N>& v) {
527   static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
528   return VSeq<N/2>(v.base() + N / 2 * v.delta(), v.delta());
529 }
530 
531 template<int N>
532 VSeq<N/2> vs_even(const VSeq<N>& v) {
533   static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
534   return VSeq<N/2>(v.base(), v.delta() * 2);
535 }
536 
537 template<int N>
538 VSeq<N/2> vs_odd(const VSeq<N>& v) {
539   static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
540   return VSeq<N/2>(v.base() + v.delta(), v.delta() * 2);
541 }
542 
543 // convenience method to construct a vector register sequence that
544 // indexes its elements in reverse order to the original
545 
546 template<int N>
547 VSeq<N> vs_reverse(const VSeq<N>& v) {
548   return VSeq<N>(v.base() + (N - 1) * v.delta(), -v.delta());
549 }
550 
551 #endif // CPU_AARCH64_REGISTER_AARCH64_HPP