1 /*
  2  * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef SHARE_OPTO_REGMASK_HPP
 26 #define SHARE_OPTO_REGMASK_HPP
 27 
 28 #include "code/vmreg.hpp"
 29 #include "opto/optoreg.hpp"
 30 #include "utilities/count_leading_zeros.hpp"
 31 #include "utilities/count_trailing_zeros.hpp"
 32 
 33 class LRG;
 34 
 35 //-------------Non-zero bit search methods used by RegMask---------------------
 36 // Find lowest 1, undefined if empty/0
 37 static unsigned int find_lowest_bit(uintptr_t mask) {
 38   return count_trailing_zeros(mask);
 39 }
 40 // Find highest 1, undefined if empty/0
 41 static unsigned int find_highest_bit(uintptr_t mask) {
 42   return count_leading_zeros(mask) ^ (BitsPerWord - 1U);
 43 }
 44 
 45 //------------------------------RegMask----------------------------------------
 46 // The ADL file describes how to print the machine-specific registers, as well
 47 // as any notion of register classes.  We provide a register mask, which is
 48 // just a collection of Register numbers.
 49 
 50 // The ADLC defines 2 macros, RM_SIZE and FORALL_BODY.
 51 // RM_SIZE is the size of a register mask in 32-bit words.
 52 // FORALL_BODY replicates a BODY macro once per word in the register mask.
 53 // The usage is somewhat clumsy and limited to the regmask.[h,c]pp files.
 54 // However, it means the ADLC can redefine the unroll macro and all loops
 55 // over register masks will be unrolled by the correct amount.
 56 
 57 class RegMask {
 58 
 59   friend class RegMaskIterator;
 60 
 61   // The RM_SIZE is aligned to 64-bit - assert that this holds
 62   LP64_ONLY(STATIC_ASSERT(is_aligned(RM_SIZE, 2)));
 63 
 64   static const unsigned int _WordBitMask = BitsPerWord - 1U;
 65   static const unsigned int _LogWordBits = LogBitsPerWord;
 66   static const unsigned int _RM_SIZE     = LP64_ONLY(RM_SIZE >> 1) NOT_LP64(RM_SIZE);
 67   static const unsigned int _RM_MAX      = _RM_SIZE - 1U;
 68 
 69   union {
 70     // Array of Register Mask bits.  This array is large enough to cover
 71     // all the machine registers and all parameters that need to be passed
 72     // on the stack (stack registers) up to some interesting limit.  Methods
 73     // that need more parameters will NOT be compiled.  On Intel, the limit
 74     // is something like 90+ parameters.
 75     int       _RM_I[RM_SIZE];
 76     uintptr_t _RM_UP[_RM_SIZE];
 77   };
 78 
 79   // The low and high water marks represents the lowest and highest word
 80   // that might contain set register mask bits, respectively. We guarantee
 81   // that there are no bits in words outside this range, but any word at
 82   // and between the two marks can still be 0.
 83   unsigned int _lwm;
 84   unsigned int _hwm;
 85 
 86  public:
 87   enum { CHUNK_SIZE = _RM_SIZE * BitsPerWord };
 88 
 89   // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
 90   // Also, consider the maximum alignment size for a normally allocated
 91   // value.  Since we allocate register pairs but not register quads (at
 92   // present), this alignment is SlotsPerLong (== 2).  A normally
 93   // aligned allocated register is either a single register, or a pair
 94   // of adjacent registers, the lower-numbered being even.
 95   // See also is_aligned_Pairs() below, and the padding added before
 96   // Matcher::_new_SP to keep allocated pairs aligned properly.
 97   // If we ever go to quad-word allocations, SlotsPerQuad will become
 98   // the controlling alignment constraint.  Note that this alignment
 99   // requirement is internal to the allocator, and independent of any
100   // particular platform.
101   enum { SlotsPerLong = 2,
102          SlotsPerVecA = 8,
103          SlotsPerVecS = 1,
104          SlotsPerVecD = 2,
105          SlotsPerVecX = 4,
106          SlotsPerVecY = 8,
107          SlotsPerVecZ = 16,
108          SlotsPerRegVectMask = X86_ONLY(2) NOT_X86(1)
109          };
110 
111   // A constructor only used by the ADLC output.  All mask fields are filled
112   // in directly.  Calls to this look something like RM(1,2,3,4);
113   RegMask(
114 #   define BODY(I) int a##I,
115     FORALL_BODY
116 #   undef BODY
117     int dummy = 0) {
118 #if defined(VM_LITTLE_ENDIAN) || !defined(_LP64)
119 #   define BODY(I) _RM_I[I] = a##I;
120 #else
121     // We need to swap ints.
122 #   define BODY(I) _RM_I[I ^ 1] = a##I;
123 #endif
124     FORALL_BODY
125 #   undef BODY
126     _lwm = 0;
127     _hwm = _RM_MAX;
128     while (_hwm > 0      && _RM_UP[_hwm] == 0) _hwm--;
129     while ((_lwm < _hwm) && _RM_UP[_lwm] == 0) _lwm++;
130     assert(valid_watermarks(), "post-condition");
131   }
132 
133   // Handy copying constructor
134   RegMask(RegMask *rm) {
135     _hwm = rm->_hwm;
136     _lwm = rm->_lwm;
137     for (unsigned i = 0; i < _RM_SIZE; i++) {
138       _RM_UP[i] = rm->_RM_UP[i];
139     }
140     assert(valid_watermarks(), "post-condition");
141   }
142 
143   // Construct an empty mask
144   RegMask() : _RM_UP(), _lwm(_RM_MAX), _hwm(0) {
145     assert(valid_watermarks(), "post-condition");
146   }
147 
148   // Construct a mask with a single bit
149   RegMask(OptoReg::Name reg) : RegMask() {
150     Insert(reg);
151   }
152 
153   // Check for register being in mask
154   bool Member(OptoReg::Name reg) const {
155     assert(reg < CHUNK_SIZE, "");
156 
157     unsigned r = (unsigned)reg;
158     return _RM_UP[r >> _LogWordBits] & (uintptr_t(1) << (r & _WordBitMask));
159   }
160 
161   // The last bit in the register mask indicates that the mask should repeat
162   // indefinitely with ONE bits.  Returns TRUE if mask is infinite or
163   // unbounded in size.  Returns FALSE if mask is finite size.
164   bool is_AllStack() const {
165     return (_RM_UP[_RM_MAX] & (uintptr_t(1) << _WordBitMask)) != 0;
166   }
167 
168   void set_AllStack() {
169     _RM_UP[_RM_MAX] |= (uintptr_t(1) << _WordBitMask);
170   }
171 
172   // Test for being a not-empty mask.
173   bool is_NotEmpty() const {
174     assert(valid_watermarks(), "sanity");
175     uintptr_t tmp = 0;
176     for (unsigned i = _lwm; i <= _hwm; i++) {
177       tmp |= _RM_UP[i];
178     }
179     return tmp;
180   }
181 
182   // Find lowest-numbered register from mask, or BAD if mask is empty.
183   OptoReg::Name find_first_elem() const {
184     assert(valid_watermarks(), "sanity");
185     for (unsigned i = _lwm; i <= _hwm; i++) {
186       uintptr_t bits = _RM_UP[i];
187       if (bits) {
188         return OptoReg::Name((i << _LogWordBits) + find_lowest_bit(bits));
189       }
190     }
191     return OptoReg::Name(OptoReg::Bad);
192   }
193 
194   // Get highest-numbered register from mask, or BAD if mask is empty.
195   OptoReg::Name find_last_elem() const {
196     assert(valid_watermarks(), "sanity");
197     // Careful not to overflow if _lwm == 0
198     unsigned i = _hwm + 1;
199     while (i > _lwm) {
200       uintptr_t bits = _RM_UP[--i];
201       if (bits) {
202         return OptoReg::Name((i << _LogWordBits) + find_highest_bit(bits));
203       }
204     }
205     return OptoReg::Name(OptoReg::Bad);
206   }
207 
208   // Clear out partial bits; leave only aligned adjacent bit pairs.
209   void clear_to_pairs();
210 
211 #ifdef ASSERT
212   // Verify watermarks are sane, i.e., within bounds and that no
213   // register words below or above the watermarks have bits set.
214   bool valid_watermarks() const {
215     assert(_hwm < _RM_SIZE, "_hwm out of range: %d", _hwm);
216     assert(_lwm < _RM_SIZE, "_lwm out of range: %d", _lwm);
217     for (unsigned i = 0; i < _lwm; i++) {
218       assert(_RM_UP[i] == 0, "_lwm too high: %d regs at: %d", _lwm, i);
219     }
220     for (unsigned i = _hwm + 1; i < _RM_SIZE; i++) {
221       assert(_RM_UP[i] == 0, "_hwm too low: %d regs at: %d", _hwm, i);
222     }
223     return true;
224   }
225 #endif // !ASSERT
226 
227   // Test that the mask contains only aligned adjacent bit pairs
228   bool is_aligned_pairs() const;
229 
230   // mask is a pair of misaligned registers
231   bool is_misaligned_pair() const;
232   // Test for single register
233   bool is_bound1() const;
234   // Test for a single adjacent pair
235   bool is_bound_pair() const;
236   // Test for a single adjacent set of ideal register's size.
237   bool is_bound(uint ireg) const;
238 
239   // Check that whether given reg number with size is valid
240   // for current regmask, where reg is the highest number.
241   bool is_valid_reg(OptoReg::Name reg, const int size) const;
242 
243   // Find the lowest-numbered register set in the mask.  Return the
244   // HIGHEST register number in the set, or BAD if no sets.
245   // Assert that the mask contains only bit sets.
246   OptoReg::Name find_first_set(LRG &lrg, const int size) const;
247 
248   // Clear out partial bits; leave only aligned adjacent bit sets of size.
249   void clear_to_sets(const unsigned int size);
250   // Smear out partial bits to aligned adjacent bit sets.
251   void smear_to_sets(const unsigned int size);
252   // Test that the mask contains only aligned adjacent bit sets
253   bool is_aligned_sets(const unsigned int size) const;
254 
255   // Test for a single adjacent set
256   bool is_bound_set(const unsigned int size) const;
257 
258   static bool is_vector(uint ireg);
259   static int num_registers(uint ireg);
260   static int num_registers(uint ireg, LRG &lrg);
261 
262   // Fast overlap test.  Non-zero if any registers in common.
263   bool overlap(const RegMask &rm) const {
264     assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
265     unsigned hwm = MIN2(_hwm, rm._hwm);
266     unsigned lwm = MAX2(_lwm, rm._lwm);
267     uintptr_t result = 0;
268     for (unsigned i = lwm; i <= hwm; i++) {
269       result |= _RM_UP[i] & rm._RM_UP[i];
270     }
271     return result;
272   }
273 
274   // Special test for register pressure based splitting
275   // UP means register only, Register plus stack, or stack only is DOWN
276   bool is_UP() const;
277 
278   // Clear a register mask
279   void Clear() {
280     _lwm = _RM_MAX;
281     _hwm = 0;
282     memset(_RM_UP, 0, sizeof(uintptr_t) * _RM_SIZE);
283     assert(valid_watermarks(), "sanity");
284   }
285 
286   // Fill a register mask with 1's
287   void Set_All() {
288     _lwm = 0;
289     _hwm = _RM_MAX;
290     memset(_RM_UP, 0xFF, sizeof(uintptr_t) * _RM_SIZE);
291     assert(valid_watermarks(), "sanity");
292   }
293 
294   // Insert register into mask
295   void Insert(OptoReg::Name reg) {
296     assert(reg != OptoReg::Bad, "sanity");
297     assert(reg != OptoReg::Special, "sanity");
298     assert(reg < CHUNK_SIZE, "sanity");
299     assert(valid_watermarks(), "pre-condition");
300     unsigned r = (unsigned)reg;
301     unsigned index = r >> _LogWordBits;
302     if (index > _hwm) _hwm = index;
303     if (index < _lwm) _lwm = index;
304     _RM_UP[index] |= (uintptr_t(1) << (r & _WordBitMask));
305     assert(valid_watermarks(), "post-condition");
306   }
307 
308   // Remove register from mask
309   void Remove(OptoReg::Name reg) {
310     assert(reg < CHUNK_SIZE, "");
311     unsigned r = (unsigned)reg;
312     _RM_UP[r >> _LogWordBits] &= ~(uintptr_t(1) << (r & _WordBitMask));
313   }
314 
315   // OR 'rm' into 'this'
316   void OR(const RegMask &rm) {
317     assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
318     // OR widens the live range
319     if (_lwm > rm._lwm) _lwm = rm._lwm;
320     if (_hwm < rm._hwm) _hwm = rm._hwm;
321     for (unsigned i = _lwm; i <= _hwm; i++) {
322       _RM_UP[i] |= rm._RM_UP[i];
323     }
324     assert(valid_watermarks(), "sanity");
325   }
326 
327   // AND 'rm' into 'this'
328   void AND(const RegMask &rm) {
329     assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
330     // Do not evaluate words outside the current watermark range, as they are
331     // already zero and an &= would not change that
332     for (unsigned i = _lwm; i <= _hwm; i++) {
333       _RM_UP[i] &= rm._RM_UP[i];
334     }
335     // Narrow the watermarks if &rm spans a narrower range.
336     // Update after to ensure non-overlapping words are zeroed out.
337     if (_lwm < rm._lwm) _lwm = rm._lwm;
338     if (_hwm > rm._hwm) _hwm = rm._hwm;
339   }
340 
341   // Subtract 'rm' from 'this'
342   void SUBTRACT(const RegMask &rm) {
343     assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
344     unsigned hwm = MIN2(_hwm, rm._hwm);
345     unsigned lwm = MAX2(_lwm, rm._lwm);
346     for (unsigned i = lwm; i <= hwm; i++) {
347       _RM_UP[i] &= ~rm._RM_UP[i];
348     }
349   }
350 
351   // Compute size of register mask: number of bits
352   uint Size() const;
353 
354 #ifndef PRODUCT
355   void print() const { dump(); }
356   void dump(outputStream *st = tty) const; // Print a mask
357 #endif
358 
359   static const RegMask Empty;   // Common empty mask
360   static const RegMask All;     // Common all mask
361 
362   static bool can_represent(OptoReg::Name reg) {
363     // NOTE: -1 in computation reflects the usage of the last
364     //       bit of the regmask as an infinite stack flag and
365     //       -7 is to keep mask aligned for largest value (VecZ).
366     return (int)reg < (int)(CHUNK_SIZE - 1);
367   }
368   static bool can_represent_arg(OptoReg::Name reg) {
369     // NOTE: -SlotsPerVecZ in computation reflects the need
370     //       to keep mask aligned for largest value (VecZ).
371     return (int)reg < (int)(CHUNK_SIZE - SlotsPerVecZ);
372   }
373 };
374 
375 class RegMaskIterator {
376  private:
377   uintptr_t _current_bits;
378   unsigned int _next_index;
379   OptoReg::Name _reg;
380   const RegMask& _rm;
381  public:
382   RegMaskIterator(const RegMask& rm) : _current_bits(0), _next_index(rm._lwm), _reg(OptoReg::Bad), _rm(rm) {
383     // Calculate the first element
384     next();
385   }
386 
387   bool has_next() {
388     return _reg != OptoReg::Bad;
389   }
390 
391   // Get the current element and calculate the next
392   OptoReg::Name next() {
393     OptoReg::Name r = _reg;
394 
395     // This bit shift scheme, borrowed from IndexSetIterator,
396     // shifts the _current_bits down by the number of trailing
397     // zeros - which leaves the "current" bit on position zero,
398     // then subtracts by 1 to clear it. This quirk avoids the
399     // undefined behavior that could arise if trying to shift
400     // away the bit with a single >> (next_bit + 1) shift when
401     // next_bit is 31/63. It also keeps number of shifts and
402     // arithmetic ops to a minimum.
403 
404     // We have previously found bits at _next_index - 1, and
405     // still have some left at the same index.
406     if (_current_bits != 0) {
407       unsigned int next_bit = find_lowest_bit(_current_bits);
408       assert(_reg != OptoReg::Bad, "can't be in a bad state");
409       assert(next_bit > 0, "must be");
410       assert(((_current_bits >> next_bit) & 0x1) == 1, "lowest bit must be set after shift");
411       _current_bits = (_current_bits >> next_bit) - 1;
412       _reg = OptoReg::add(_reg, next_bit);
413       return r;
414     }
415 
416     // Find the next word with bits
417     while (_next_index <= _rm._hwm) {
418       _current_bits = _rm._RM_UP[_next_index++];
419       if (_current_bits != 0) {
420         // Found a word. Calculate the first register element and
421         // prepare _current_bits by shifting it down and clearing
422         // the lowest bit
423         unsigned int next_bit = find_lowest_bit(_current_bits);
424         assert(((_current_bits >> next_bit) & 0x1) == 1, "lowest bit must be set after shift");
425         _current_bits = (_current_bits >> next_bit) - 1;
426         _reg = OptoReg::Name(((_next_index - 1) << RegMask::_LogWordBits) + next_bit);
427         return r;
428       }
429     }
430 
431     // No more bits
432     _reg = OptoReg::Name(OptoReg::Bad);
433     return r;
434   }
435 };
436 
437 // Do not use this constant directly in client code!
438 #undef RM_SIZE
439 
440 #endif // SHARE_OPTO_REGMASK_HPP