New src/hotspot/cpu/x86/matcher

  1 /*
  2  * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_MATCHER_X86_HPP
 26 #define CPU_X86_MATCHER_X86_HPP
 27 
 28   // Defined within class Matcher
 29 
 30   // The ecx parameter to rep stosq for the ClearArray node is in words.
 31   static const bool init_array_count_is_in_bytes = false;
 32 
 33   // Whether this platform implements the scalable vector feature
 34   static const bool implements_scalable_vector = false;
 35 
 36   static constexpr bool supports_scalable_vector() {
 37     return false;
 38   }
 39 
 40   // x86 supports misaligned vectors store/load.
 41   static constexpr bool misaligned_vectors_ok() {
 42     return true;
 43   }
 44 
 45   // Whether code generation need accurate ConvI2L types.
 46   static const bool convi2l_type_required = true;
 47 
 48   // Do the processor's shift instructions only use the low 5/6 bits
 49   // of the count for 32/64 bit ints? If not we need to do the masking
 50   // ourselves.
 51   static const bool need_masked_shift_count = false;
 52 
 53   // Does the CPU require late expand (see block.cpp for description of late expand)?
 54   static const bool require_postalloc_expand = false;
 55 
 56   // x86 supports generic vector operands: vec and legVec.
 57   static const bool supports_generic_vector_operands = true;
 58 
 59   static constexpr bool isSimpleConstant64(jlong value) {
 60     // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
 61     //return value == (int) value;  // Cf. storeImmL and immL32.
 62 
 63     // Probably always true, even if a temp register is required.
 64 #ifdef _LP64
 65     return true;
 66 #else
 67     return false;
 68 #endif
 69   }
 70 
 71 #ifdef _LP64
 72   // No additional cost for CMOVL.
 73   static constexpr int long_cmove_cost() { return 0; }
 74 #else
 75   // Needs 2 CMOV's for longs.
 76   static constexpr int long_cmove_cost() { return 1; }
 77 #endif
 78 
 79 #ifdef _LP64
 80   // No CMOVF/CMOVD with SSE2
 81   static int float_cmove_cost() { return ConditionalMoveLimit; }
 82 #else
 83   // No CMOVF/CMOVD with SSE/SSE2
 84   static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
 85 #endif
 86 
 87   static bool narrow_oop_use_complex_address() {
 88     NOT_LP64(ShouldNotCallThis();)
 89     assert(UseCompressedOops, "only for compressed oops code");
 90     return (LogMinObjAlignmentInBytes <= 3);
 91   }
 92 
 93   static bool narrow_klass_use_complex_address() {
 94     NOT_LP64(ShouldNotCallThis();)
 95     assert(UseCompressedClassPointers, "only for compressed klass code");
 96     return (CompressedKlassPointers::shift() <= 3);
 97   }
 98 
 99   // Prefer ConN+DecodeN over ConP.
100   static bool const_oop_prefer_decode() {
101     NOT_LP64(ShouldNotCallThis();)
102     // Prefer ConN+DecodeN over ConP.
103     return true;
104   }
105 
106   // Prefer ConP over ConNKlass+DecodeNKlass.
107   static bool const_klass_prefer_decode() {
108     NOT_LP64(ShouldNotCallThis();)
109     return false;
110   }
111 
112   // Is it better to copy float constants, or load them directly from memory?
113   // Intel can load a float constant from a direct address, requiring no
114   // extra registers.  Most RISCs will have to materialize an address into a
115   // register first, so they would do better to copy the constant from stack.
116   static const bool rematerialize_float_constants = true;
117 
118   // If CPU can load and store mis-aligned doubles directly then no fixup is
119   // needed.  Else we split the double into 2 integer pieces and move it
120   // piece-by-piece.  Only happens when passing doubles into C code as the
121   // Java calling convention forces doubles to be aligned.
122   static const bool misaligned_doubles_ok = true;
123 
124   // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
125 #ifdef _LP64
126   static const bool strict_fp_requires_explicit_rounding = false;
127 #else
128   static const bool strict_fp_requires_explicit_rounding = true;
129 #endif
130 
131   // Are floats converted to double when stored to stack during deoptimization?
132   // On x64 it is stored without conversion so we can use normal access.
133   // On x32 it is stored with conversion only when FPU is used for floats.
134 #ifdef _LP64
135   static constexpr bool float_in_double() {
136     return false;
137   }
138 #else
139   static bool float_in_double() {
140     return (UseSSE == 0);
141   }
142 #endif
143 
144   // Do ints take an entire long register or just half?
145 #ifdef _LP64
146   static const bool int_in_long = true;
147 #else
148   static const bool int_in_long = false;
149 #endif
150 
151 
152   // Does the CPU supports vector variable shift instructions?
153   static bool supports_vector_variable_shifts(void) {
154     return (UseAVX >= 2);
155   }
156 
157   // Does target support predicated operation emulation.
158   static bool supports_vector_predicate_op_emulation(int vopc, int vlen, BasicType bt) {
159     switch(vopc) {
160       case Op_LoadVectorGatherMasked:
161         return is_subword_type(bt) && VM_Version::supports_avx2();
162       default:
163         return false;
164     }
165   }
166 
167   // Does the CPU supports vector variable rotate instructions?
168   static constexpr bool supports_vector_variable_rotates(void) {
169     return true;
170   }
171 
172   // Does the CPU supports vector constant rotate instructions?
173   static constexpr bool supports_vector_constant_rotates(int shift) {
174     return -0x80 <= shift && shift < 0x80;
175   }
176 
177   // Does the CPU supports vector unsigned comparison instructions?
178   static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
179     return true;
180   }
181 
182   // Some microarchitectures have mask registers used on vectors
183   static bool has_predicated_vectors(void) {
184     return VM_Version::supports_evex();
185   }
186 
187   // true means we have fast l2f conversion
188   // false means that conversion is done by runtime call
189   static constexpr bool convL2FSupported(void) {
190       return true;
191   }
192 
193   // Implements a variant of EncodeISOArrayNode that encode ASCII only
194   static const bool supports_encode_ascii_array = true;
195 
196   // Without predicated input, an all-one vector is needed for the alltrue vector test
197   static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
198     return is_alltrue && !is_predicate;
199   }
200 
201   // BoolTest mask for vector test intrinsics
202   static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
203     if (!is_alltrue) {
204       return BoolTest::ne;
205     }
206     if (!is_predicate) {
207       return BoolTest::lt;
208     }
209     if ((vlen == 8 && !VM_Version::supports_avx512dq()) || vlen < 8) {
210       return BoolTest::eq;
211     }
212     return BoolTest::lt;
213   }
214 
215   // Returns pre-selection estimated size of a vector operation.
216   // Currently, it's a rudimentary heuristic based on emitted code size for complex
217   // IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent
218   // generating bloated loop bodies.
219   static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
220     switch(vopc) {
221       default:
222         return 0;
223       case Op_MulVB:
224         return 7;
225       case Op_MulVL:
226         return VM_Version::supports_avx512vldq() ? 0 : 6;
227       case Op_LoadVectorGather:
228       case Op_LoadVectorGatherMasked:
229         return is_subword_type(ety) ? 50 : 0;
230       case Op_VectorCastF2X: // fall through
231       case Op_VectorCastD2X:
232         return is_floating_point_type(ety) ? 0 : (is_subword_type(ety) ? 35 : 30);
233       case Op_CountTrailingZerosV:
234       case Op_CountLeadingZerosV:
235         return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40;
236       case Op_PopCountVI:
237         if (is_subword_type(ety)) {
238           return VM_Version::supports_avx512_bitalg() ? 0 : 50;
239         } else {
240           assert(ety == T_INT, "sanity"); // for documentation purposes
241           return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
242         }
243       case Op_PopCountVL:
244         return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
245       case Op_ReverseV:
246         return VM_Version::supports_gfni() ? 0 : 30;
247       case Op_RoundVF: // fall through
248       case Op_RoundVD:
249         return 30;
250     }
251   }
252 
253   // Returns pre-selection estimated size of a scalar operation.
254   static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
255     switch(vopc) {
256       default: return 0;
257       case Op_RoundF: // fall through
258       case Op_RoundD: {
259         return 30;
260       }
261     }
262   }
263 
264   // Is SIMD sort supported for this CPU?
265   static bool supports_simd_sort(BasicType bt) {
266     if (VM_Version::supports_avx512dq()) {
267       return true;
268     }
269     else if (VM_Version::supports_avx2() && !is_double_word_type(bt)) {
270       return true;
271     }
272     else {
273       return false;
274     }
275   }
276 
277 #endif // CPU_X86_MATCHER_X86_HPP