1 /*
  2  * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_MATCHER_X86_HPP
 26 #define CPU_X86_MATCHER_X86_HPP
 27 
 28   // Defined within class Matcher
 29 
 30   // The ecx parameter to rep stosq for the ClearArray node is in words.
 31   static const bool init_array_count_is_in_bytes = false;
 32 
 33   // Whether this platform implements the scalable vector feature
 34   static const bool implements_scalable_vector = false;
 35 
 36   static constexpr bool supports_scalable_vector() {
 37     return false;
 38   }
 39 
 40   // x86 supports misaligned vectors store/load.
 41   static constexpr bool misaligned_vectors_ok() {
 42     return true;
 43   }
 44 
 45   // Whether code generation need accurate ConvI2L types.
 46   static const bool convi2l_type_required = true;
 47 
 48   // Do the processor's shift instructions only use the low 5/6 bits
 49   // of the count for 32/64 bit ints? If not we need to do the masking
 50   // ourselves.
 51   static const bool need_masked_shift_count = false;
 52 
 53   // Does the CPU require late expand (see block.cpp for description of late expand)?
 54   static const bool require_postalloc_expand = false;
 55 
 56   // x86 supports generic vector operands: vec and legVec.
 57   static const bool supports_generic_vector_operands = true;
 58 
 59   static constexpr bool isSimpleConstant64(jlong value) {
 60     // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
 61     //return value == (int) value;  // Cf. storeImmL and immL32.
 62 
 63     // Probably always true, even if a temp register is required.
 64 #ifdef _LP64
 65     return true;
 66 #else
 67     return false;
 68 #endif
 69   }
 70 
 71 #ifdef _LP64
 72   // No additional cost for CMOVL.
 73   static constexpr int long_cmove_cost() { return 0; }
 74 #else
 75   // Needs 2 CMOV's for longs.
 76   static constexpr int long_cmove_cost() { return 1; }
 77 #endif
 78 
 79 #ifdef _LP64
 80   // No CMOVF/CMOVD with SSE2
 81   static int float_cmove_cost() { return ConditionalMoveLimit; }
 82 #else
 83   // No CMOVF/CMOVD with SSE/SSE2
 84   static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
 85 #endif
 86 
 87   static bool narrow_oop_use_complex_address() {
 88     NOT_LP64(ShouldNotCallThis();)
 89     assert(UseCompressedOops, "only for compressed oops code");
 90     return (LogMinObjAlignmentInBytes <= 3);
 91   }
 92 
 93   static bool narrow_klass_use_complex_address() {
 94     NOT_LP64(ShouldNotCallThis();)
 95     assert(UseCompressedClassPointers, "only for compressed klass code");
 96     return (LogKlassAlignmentInBytes <= 3);
 97   }
 98 
 99   // Prefer ConN+DecodeN over ConP.
100   static const bool const_oop_prefer_decode() {
101     NOT_LP64(ShouldNotCallThis();)
102     // Prefer ConN+DecodeN over ConP.
103     return true;
104   }
105 
106   // Prefer ConP over ConNKlass+DecodeNKlass.
107   static const bool const_klass_prefer_decode() {
108     NOT_LP64(ShouldNotCallThis();)
109     return false;
110   }
111 
112   // Is it better to copy float constants, or load them directly from memory?
113   // Intel can load a float constant from a direct address, requiring no
114   // extra registers.  Most RISCs will have to materialize an address into a
115   // register first, so they would do better to copy the constant from stack.
116   static const bool rematerialize_float_constants = true;
117 
118   // If CPU can load and store mis-aligned doubles directly then no fixup is
119   // needed.  Else we split the double into 2 integer pieces and move it
120   // piece-by-piece.  Only happens when passing doubles into C code as the
121   // Java calling convention forces doubles to be aligned.
122   static const bool misaligned_doubles_ok = true;
123 
124   // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
125 #ifdef _LP64
126   static const bool strict_fp_requires_explicit_rounding = false;
127 #else
128   static const bool strict_fp_requires_explicit_rounding = true;
129 #endif
130 
131   // Are floats converted to double when stored to stack during deoptimization?
132   // On x64 it is stored without conversion so we can use normal access.
133   // On x32 it is stored with conversion only when FPU is used for floats.
134 #ifdef _LP64
135   static constexpr bool float_in_double() {
136     return false;
137   }
138 #else
139   static bool float_in_double() {
140     return (UseSSE == 0);
141   }
142 #endif
143 
144   // Do ints take an entire long register or just half?
145 #ifdef _LP64
146   static const bool int_in_long = true;
147 #else
148   static const bool int_in_long = false;
149 #endif
150 
151 
152   // Does the CPU supports vector variable shift instructions?
153   static bool supports_vector_variable_shifts(void) {
154     return (UseAVX >= 2);
155   }
156 
157   // Does the CPU supports vector variable rotate instructions?
158   static constexpr bool supports_vector_variable_rotates(void) {
159     return true;
160   }
161 
162   // Does the CPU supports vector constant rotate instructions?
163   static constexpr bool supports_vector_constant_rotates(int shift) {
164     return -0x80 <= shift && shift < 0x80;
165   }
166 
167   // Does the CPU supports vector unsigned comparison instructions?
168   static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
169     return true;
170   }
171 
172   // Some microarchitectures have mask registers used on vectors
173   static const bool has_predicated_vectors(void) {
174     return VM_Version::supports_evex();
175   }
176 
177   // true means we have fast l2f conversion
178   // false means that conversion is done by runtime call
179   static constexpr bool convL2FSupported(void) {
180       return true;
181   }
182 
183   // Implements a variant of EncodeISOArrayNode that encode ASCII only
184   static const bool supports_encode_ascii_array = true;
185 
186   // Returns pre-selection estimated size of a vector operation.
187   static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
188     switch(vopc) {
189       default: return 0;
190       case Op_CountTrailingZerosV:
191       case Op_CountLeadingZerosV:
192          return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40;
193       case Op_PopCountVI:
194         return ((ety == T_INT && VM_Version::supports_avx512_vpopcntdq()) ||
195            (is_subword_type(ety) && VM_Version::supports_avx512_bitalg())) ? 0 : 50;
196       case Op_PopCountVL:
197         return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
198       case Op_ReverseV:  return VM_Version::supports_gfni() ? 0 : 30;
199       case Op_RoundVF: // fall through
200       case Op_RoundVD: {
201         return 30;
202       }
203     }
204   }
205 
206   // Returns pre-selection estimated size of a scalar operation.
207   static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
208     switch(vopc) {
209       default: return 0;
210       case Op_RoundF: // fall through
211       case Op_RoundD: {
212         return 30;
213       }
214     }
215   }
216 
217 #endif // CPU_X86_MATCHER_X86_HPP