Old src/hotspot/cpu/x86/matcher

  1 /*
  2  * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_MATCHER_X86_HPP
 26 #define CPU_X86_MATCHER_X86_HPP
 27 
 28   // Defined within class Matcher
 29 
 30   // The ecx parameter to rep stosq for the ClearArray node is in words.
 31   static const bool init_array_count_is_in_bytes = false;
 32 
 33   // Whether this platform implements the scalable vector feature
 34   static const bool implements_scalable_vector = false;
 35 
 36   static constexpr bool supports_scalable_vector() {
 37     return false;
 38   }
 39 
 40   // x86 supports misaligned vectors store/load.
 41   static constexpr bool misaligned_vectors_ok() {
 42     return true;
 43   }
 44 
 45   // Whether code generation need accurate ConvI2L types.
 46   static const bool convi2l_type_required = true;
 47 
 48   // Do the processor's shift instructions only use the low 5/6 bits
 49   // of the count for 32/64 bit ints? If not we need to do the masking
 50   // ourselves.
 51   static const bool need_masked_shift_count = false;
 52 
 53   // Does the CPU require late expand (see block.cpp for description of late expand)?
 54   static const bool require_postalloc_expand = false;
 55 
 56   // x86 supports generic vector operands: vec and legVec.
 57   static const bool supports_generic_vector_operands = true;
 58 
 59   static constexpr bool isSimpleConstant64(jlong value) {
 60     // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
 61     //return value == (int) value;  // Cf. storeImmL and immL32.
 62 
 63     // Probably always true, even if a temp register is required.
 64 #ifdef _LP64
 65     return true;
 66 #else
 67     return false;
 68 #endif
 69   }
 70 
 71 #ifdef _LP64
 72   // No additional cost for CMOVL.
 73   static constexpr int long_cmove_cost() { return 0; }
 74 #else
 75   // Needs 2 CMOV's for longs.
 76   static constexpr int long_cmove_cost() { return 1; }
 77 #endif
 78 
 79 #ifdef _LP64
 80   // No CMOVF/CMOVD with SSE2
 81   static int float_cmove_cost() { return ConditionalMoveLimit; }
 82 #else
 83   // No CMOVF/CMOVD with SSE/SSE2
 84   static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
 85 #endif
 86 
 87   static bool narrow_oop_use_complex_address() {
 88     NOT_LP64(ShouldNotCallThis();)
 89     assert(UseCompressedOops, "only for compressed oops code");
 90     return (LogMinObjAlignmentInBytes <= 3);
 91   }
 92 
 93   static bool narrow_klass_use_complex_address() {
 94     NOT_LP64(ShouldNotCallThis();)
 95     assert(UseCompressedClassPointers, "only for compressed klass code");
 96     return (LogKlassAlignmentInBytes <= 3);
 97   }
 98 
 99   // Prefer ConN+DecodeN over ConP.
100   static bool const_oop_prefer_decode() {
101     NOT_LP64(ShouldNotCallThis();)
102     // Prefer ConN+DecodeN over ConP.
103     return true;
104   }
105 
106   // Prefer ConP over ConNKlass+DecodeNKlass.
107   static bool const_klass_prefer_decode() {
108     NOT_LP64(ShouldNotCallThis();)
109     return false;
110   }
111 
112   // Is it better to copy float constants, or load them directly from memory?
113   // Intel can load a float constant from a direct address, requiring no
114   // extra registers.  Most RISCs will have to materialize an address into a
115   // register first, so they would do better to copy the constant from stack.
116   static const bool rematerialize_float_constants = true;
117 
118   // If CPU can load and store mis-aligned doubles directly then no fixup is
119   // needed.  Else we split the double into 2 integer pieces and move it
120   // piece-by-piece.  Only happens when passing doubles into C code as the
121   // Java calling convention forces doubles to be aligned.
122   static const bool misaligned_doubles_ok = true;
123 
124   // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
125 #ifdef _LP64
126   static const bool strict_fp_requires_explicit_rounding = false;
127 #else
128   static const bool strict_fp_requires_explicit_rounding = true;
129 #endif
130 
131   // Are floats converted to double when stored to stack during deoptimization?
132   // On x64 it is stored without conversion so we can use normal access.
133   // On x32 it is stored with conversion only when FPU is used for floats.
134 #ifdef _LP64
135   static constexpr bool float_in_double() {
136     return false;
137   }
138 #else
139   static bool float_in_double() {
140     return (UseSSE == 0);
141   }
142 #endif
143 
144   // Do ints take an entire long register or just half?
145 #ifdef _LP64
146   static const bool int_in_long = true;
147 #else
148   static const bool int_in_long = false;
149 #endif
150 
151 
152   // Does the CPU supports vector variable shift instructions?
153   static bool supports_vector_variable_shifts(void) {
154     return (UseAVX >= 2);
155   }
156 
157   // Does the CPU supports vector variable rotate instructions?
158   static constexpr bool supports_vector_variable_rotates(void) {
159     return true;
160   }
161 
162   // Does the CPU supports vector constant rotate instructions?
163   static constexpr bool supports_vector_constant_rotates(int shift) {
164     return -0x80 <= shift && shift < 0x80;
165   }
166 
167   // Does the CPU supports vector unsigned comparison instructions?
168   static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
169     return true;
170   }
171 
172   // Some microarchitectures have mask registers used on vectors
173   static bool has_predicated_vectors(void) {
174     return VM_Version::supports_evex();
175   }
176 
177   // true means we have fast l2f conversion
178   // false means that conversion is done by runtime call
179   static constexpr bool convL2FSupported(void) {
180       return true;
181   }
182 
183   // Implements a variant of EncodeISOArrayNode that encode ASCII only
184   static const bool supports_encode_ascii_array = true;
185 
186   // Without predicated input, an all-one vector is needed for the alltrue vector test
187   static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
188     return is_alltrue && !is_predicate;
189   }
190 
191   // BoolTest mask for vector test intrinsics
192   static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
193     if (!is_alltrue) {
194       return BoolTest::ne;
195     }
196     if (!is_predicate) {
197       return BoolTest::lt;
198     }
199     if ((vlen == 8 && !VM_Version::supports_avx512dq()) || vlen < 8) {
200       return BoolTest::eq;
201     }
202     return BoolTest::lt;
203   }
204 
205   // Returns pre-selection estimated size of a vector operation.
206   // Currently, it's a rudimentary heuristic based on emitted code size for complex
207   // IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent
208   // generating bloated loop bodies.
209   static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
210     switch(vopc) {
211       default:
212         return 0;
213       case Op_MulVB:
214         return 7;
215       case Op_MulVL:
216         return VM_Version::supports_avx512vldq() ? 0 : 6;
217       case Op_VectorCastF2X: // fall through
218       case Op_VectorCastD2X:
219         return is_floating_point_type(ety) ? 0 : (is_subword_type(ety) ? 35 : 30);
220       case Op_CountTrailingZerosV:
221       case Op_CountLeadingZerosV:
222         return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40;
223       case Op_PopCountVI:
224         if (is_subword_type(ety)) {
225           return VM_Version::supports_avx512_bitalg() ? 0 : 50;
226         } else {
227           assert(ety == T_INT, "sanity"); // for documentation purposes
228           return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
229         }
230       case Op_PopCountVL:
231         return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
232       case Op_ReverseV:
233         return VM_Version::supports_gfni() ? 0 : 30;
234       case Op_RoundVF: // fall through
235       case Op_RoundVD:
236         return 30;
237     }
238   }
239 
240   // Returns pre-selection estimated size of a scalar operation.
241   static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
242     switch(vopc) {
243       default: return 0;
244       case Op_RoundF: // fall through
245       case Op_RoundD: {
246         return 30;
247       }
248     }
249   }
250 
251   // Is SIMD sort supported for this CPU?
252   static bool supports_simd_sort(BasicType bt) {
253     if (VM_Version::supports_avx512dq()) {
254       return true;
255     }
256     else if (VM_Version::supports_avx2() && !is_double_word_type(bt)) {
257       return true;
258     }
259     else {
260       return false;
261     }
262   }
263 
264 #endif // CPU_X86_MATCHER_X86_HPP