1 /* 2 * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_MATCHER_X86_HPP 26 #define CPU_X86_MATCHER_X86_HPP 27 28 // Defined within class Matcher 29 30 // The ecx parameter to rep stosq for the ClearArray node is in words. 31 static const bool init_array_count_is_in_bytes = false; 32 33 // Whether this platform implements the scalable vector feature 34 static const bool implements_scalable_vector = false; 35 36 static constexpr bool supports_scalable_vector() { 37 return false; 38 } 39 40 // x86 supports misaligned vectors store/load. 41 static constexpr bool misaligned_vectors_ok() { 42 return true; 43 } 44 45 // Whether code generation need accurate ConvI2L types. 46 static const bool convi2l_type_required = true; 47 48 // Do the processor's shift instructions only use the low 5/6 bits 49 // of the count for 32/64 bit ints? If not we need to do the masking 50 // ourselves. 51 static const bool need_masked_shift_count = false; 52 53 // Does the CPU require late expand (see block.cpp for description of late expand)? 54 static const bool require_postalloc_expand = false; 55 56 // x86 supports generic vector operands: vec and legVec. 57 static const bool supports_generic_vector_operands = true; 58 59 static constexpr bool isSimpleConstant64(jlong value) { 60 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 61 //return value == (int) value; // Cf. storeImmL and immL32. 62 63 // Probably always true, even if a temp register is required. 64 #ifdef _LP64 65 return true; 66 #else 67 return false; 68 #endif 69 } 70 71 #ifdef _LP64 72 // No additional cost for CMOVL. 73 static constexpr int long_cmove_cost() { return 0; } 74 #else 75 // Needs 2 CMOV's for longs. 76 static constexpr int long_cmove_cost() { return 1; } 77 #endif 78 79 #ifdef _LP64 80 // No CMOVF/CMOVD with SSE2 81 static int float_cmove_cost() { return ConditionalMoveLimit; } 82 #else 83 // No CMOVF/CMOVD with SSE/SSE2 84 static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 85 #endif 86 87 static bool narrow_oop_use_complex_address() { 88 NOT_LP64(ShouldNotCallThis();) 89 assert(UseCompressedOops, "only for compressed oops code"); 90 return (LogMinObjAlignmentInBytes <= 3); 91 } 92 93 static bool narrow_klass_use_complex_address() { 94 NOT_LP64(ShouldNotCallThis();) 95 assert(UseCompressedClassPointers, "only for compressed klass code"); 96 return (LogKlassAlignmentInBytes <= 3); 97 } 98 99 // Prefer ConN+DecodeN over ConP. 100 static bool const_oop_prefer_decode() { 101 NOT_LP64(ShouldNotCallThis();) 102 // Prefer ConN+DecodeN over ConP. 103 return true; 104 } 105 106 // Prefer ConP over ConNKlass+DecodeNKlass. 107 static bool const_klass_prefer_decode() { 108 NOT_LP64(ShouldNotCallThis();) 109 return false; 110 } 111 112 // Is it better to copy float constants, or load them directly from memory? 113 // Intel can load a float constant from a direct address, requiring no 114 // extra registers. Most RISCs will have to materialize an address into a 115 // register first, so they would do better to copy the constant from stack. 116 static const bool rematerialize_float_constants = true; 117 118 // If CPU can load and store mis-aligned doubles directly then no fixup is 119 // needed. Else we split the double into 2 integer pieces and move it 120 // piece-by-piece. Only happens when passing doubles into C code as the 121 // Java calling convention forces doubles to be aligned. 122 static const bool misaligned_doubles_ok = true; 123 124 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. 125 #ifdef _LP64 126 static const bool strict_fp_requires_explicit_rounding = false; 127 #else 128 static const bool strict_fp_requires_explicit_rounding = true; 129 #endif 130 131 // Are floats converted to double when stored to stack during deoptimization? 132 // On x64 it is stored without conversion so we can use normal access. 133 // On x32 it is stored with conversion only when FPU is used for floats. 134 #ifdef _LP64 135 static constexpr bool float_in_double() { 136 return false; 137 } 138 #else 139 static bool float_in_double() { 140 return (UseSSE == 0); 141 } 142 #endif 143 144 // Do ints take an entire long register or just half? 145 #ifdef _LP64 146 static const bool int_in_long = true; 147 #else 148 static const bool int_in_long = false; 149 #endif 150 151 152 // Does the CPU supports vector variable shift instructions? 153 static bool supports_vector_variable_shifts(void) { 154 return (UseAVX >= 2); 155 } 156 157 // Does the CPU supports vector variable rotate instructions? 158 static constexpr bool supports_vector_variable_rotates(void) { 159 return true; 160 } 161 162 // Does the CPU supports vector constant rotate instructions? 163 static constexpr bool supports_vector_constant_rotates(int shift) { 164 return -0x80 <= shift && shift < 0x80; 165 } 166 167 // Does the CPU supports vector unsigned comparison instructions? 168 static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { 169 return true; 170 } 171 172 // Some microarchitectures have mask registers used on vectors 173 static bool has_predicated_vectors(void) { 174 return VM_Version::supports_evex(); 175 } 176 177 // true means we have fast l2f conversion 178 // false means that conversion is done by runtime call 179 static constexpr bool convL2FSupported(void) { 180 return true; 181 } 182 183 // Implements a variant of EncodeISOArrayNode that encode ASCII only 184 static const bool supports_encode_ascii_array = true; 185 186 // Without predicated input, an all-one vector is needed for the alltrue vector test 187 static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) { 188 return is_alltrue && !is_predicate; 189 } 190 191 // BoolTest mask for vector test intrinsics 192 static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) { 193 if (!is_alltrue) { 194 return BoolTest::ne; 195 } 196 if (!is_predicate) { 197 return BoolTest::lt; 198 } 199 if ((vlen == 8 && !VM_Version::supports_avx512dq()) || vlen < 8) { 200 return BoolTest::eq; 201 } 202 return BoolTest::lt; 203 } 204 205 // Returns pre-selection estimated size of a vector operation. 206 // Currently, it's a rudimentary heuristic based on emitted code size for complex 207 // IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent 208 // generating bloated loop bodies. 209 static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { 210 switch(vopc) { 211 default: 212 return 0; 213 case Op_MulVB: 214 return 7; 215 case Op_MulVL: 216 return VM_Version::supports_avx512vldq() ? 0 : 6; 217 case Op_VectorCastF2X: // fall through 218 case Op_VectorCastD2X: 219 return is_floating_point_type(ety) ? 0 : (is_subword_type(ety) ? 35 : 30); 220 case Op_CountTrailingZerosV: 221 case Op_CountLeadingZerosV: 222 return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40; 223 case Op_PopCountVI: 224 if (is_subword_type(ety)) { 225 return VM_Version::supports_avx512_bitalg() ? 0 : 50; 226 } else { 227 assert(ety == T_INT, "sanity"); // for documentation purposes 228 return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50; 229 } 230 case Op_PopCountVL: 231 return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40; 232 case Op_ReverseV: 233 return VM_Version::supports_gfni() ? 0 : 30; 234 case Op_RoundVF: // fall through 235 case Op_RoundVD: 236 return 30; 237 } 238 } 239 240 // Returns pre-selection estimated size of a scalar operation. 241 static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) { 242 switch(vopc) { 243 default: return 0; 244 case Op_RoundF: // fall through 245 case Op_RoundD: { 246 return 30; 247 } 248 } 249 } 250 251 // Is SIMD sort supported for this CPU? 252 static bool supports_simd_sort(BasicType bt) { 253 if (VM_Version::supports_avx512dq()) { 254 return true; 255 } 256 else if (VM_Version::supports_avx2() && !is_double_word_type(bt)) { 257 return true; 258 } 259 else { 260 return false; 261 } 262 } 263 264 #endif // CPU_X86_MATCHER_X86_HPP