1 /* 2 * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_MATCHER_X86_HPP 26 #define CPU_X86_MATCHER_X86_HPP 27 28 // Defined within class Matcher 29 30 // The ecx parameter to rep stosq for the ClearArray node is in words. 31 static const bool init_array_count_is_in_bytes = false; 32 33 // Whether this platform implements the scalable vector feature 34 static const bool implements_scalable_vector = false; 35 36 static constexpr bool supports_scalable_vector() { 37 return false; 38 } 39 40 // x86 supports misaligned vectors store/load. 41 static constexpr bool misaligned_vectors_ok() { 42 return true; 43 } 44 45 // Whether code generation need accurate ConvI2L types. 46 static const bool convi2l_type_required = true; 47 48 // Do the processor's shift instructions only use the low 5/6 bits 49 // of the count for 32/64 bit ints? If not we need to do the masking 50 // ourselves. 51 static const bool need_masked_shift_count = false; 52 53 // Does the CPU require late expand (see block.cpp for description of late expand)? 54 static const bool require_postalloc_expand = false; 55 56 // x86 supports generic vector operands: vec and legVec. 57 static const bool supports_generic_vector_operands = true; 58 59 static constexpr bool isSimpleConstant64(jlong value) { 60 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 61 //return value == (int) value; // Cf. storeImmL and immL32. 62 63 // Probably always true, even if a temp register is required. 64 #ifdef _LP64 65 return true; 66 #else 67 return false; 68 #endif 69 } 70 71 #ifdef _LP64 72 // No additional cost for CMOVL. 73 static constexpr int long_cmove_cost() { return 0; } 74 #else 75 // Needs 2 CMOV's for longs. 76 static constexpr int long_cmove_cost() { return 1; } 77 #endif 78 79 #ifdef _LP64 80 // No CMOVF/CMOVD with SSE2 81 static int float_cmove_cost() { return ConditionalMoveLimit; } 82 #else 83 // No CMOVF/CMOVD with SSE/SSE2 84 static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 85 #endif 86 87 static bool narrow_oop_use_complex_address() { 88 NOT_LP64(ShouldNotCallThis();) 89 assert(UseCompressedOops, "only for compressed oops code"); 90 return (LogMinObjAlignmentInBytes <= 3); 91 } 92 93 static bool narrow_klass_use_complex_address() { 94 NOT_LP64(ShouldNotCallThis();) 95 assert(UseCompressedClassPointers, "only for compressed klass code"); 96 return (LogKlassAlignmentInBytes <= 3); 97 } 98 99 // Prefer ConN+DecodeN over ConP. 100 static bool const_oop_prefer_decode() { 101 NOT_LP64(ShouldNotCallThis();) 102 // Prefer ConN+DecodeN over ConP. 103 return true; 104 } 105 106 // Prefer ConP over ConNKlass+DecodeNKlass. 107 static bool const_klass_prefer_decode() { 108 NOT_LP64(ShouldNotCallThis();) 109 return false; 110 } 111 112 // Is it better to copy float constants, or load them directly from memory? 113 // Intel can load a float constant from a direct address, requiring no 114 // extra registers. Most RISCs will have to materialize an address into a 115 // register first, so they would do better to copy the constant from stack. 116 static const bool rematerialize_float_constants = true; 117 118 // If CPU can load and store mis-aligned doubles directly then no fixup is 119 // needed. Else we split the double into 2 integer pieces and move it 120 // piece-by-piece. Only happens when passing doubles into C code as the 121 // Java calling convention forces doubles to be aligned. 122 static const bool misaligned_doubles_ok = true; 123 124 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. 125 #ifdef _LP64 126 static const bool strict_fp_requires_explicit_rounding = false; 127 #else 128 static const bool strict_fp_requires_explicit_rounding = true; 129 #endif 130 131 // Are floats converted to double when stored to stack during deoptimization? 132 // On x64 it is stored without conversion so we can use normal access. 133 // On x32 it is stored with conversion only when FPU is used for floats. 134 #ifdef _LP64 135 static constexpr bool float_in_double() { 136 return false; 137 } 138 #else 139 static bool float_in_double() { 140 return (UseSSE == 0); 141 } 142 #endif 143 144 // Do ints take an entire long register or just half? 145 #ifdef _LP64 146 static const bool int_in_long = true; 147 #else 148 static const bool int_in_long = false; 149 #endif 150 151 152 // Does the CPU supports vector variable shift instructions? 153 static bool supports_vector_variable_shifts(void) { 154 return (UseAVX >= 2); 155 } 156 157 // Does target support predicated operation emulation. 158 static bool supports_vector_predicate_op_emulation(int vopc, int vlen, BasicType bt) { 159 switch(vopc) { 160 case Op_LoadVectorGatherMasked: 161 return is_subword_type(bt) && VM_Version::supports_avx2(); 162 default: 163 return false; 164 } 165 } 166 167 // Does the CPU supports vector variable rotate instructions? 168 static constexpr bool supports_vector_variable_rotates(void) { 169 return true; 170 } 171 172 // Does the CPU supports vector constant rotate instructions? 173 static constexpr bool supports_vector_constant_rotates(int shift) { 174 return -0x80 <= shift && shift < 0x80; 175 } 176 177 // Does the CPU supports vector unsigned comparison instructions? 178 static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { 179 return true; 180 } 181 182 // Some microarchitectures have mask registers used on vectors 183 static bool has_predicated_vectors(void) { 184 return VM_Version::supports_evex(); 185 } 186 187 // true means we have fast l2f conversion 188 // false means that conversion is done by runtime call 189 static constexpr bool convL2FSupported(void) { 190 return true; 191 } 192 193 // Implements a variant of EncodeISOArrayNode that encode ASCII only 194 static const bool supports_encode_ascii_array = true; 195 196 // Without predicated input, an all-one vector is needed for the alltrue vector test 197 static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) { 198 return is_alltrue && !is_predicate; 199 } 200 201 // BoolTest mask for vector test intrinsics 202 static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) { 203 if (!is_alltrue) { 204 return BoolTest::ne; 205 } 206 if (!is_predicate) { 207 return BoolTest::lt; 208 } 209 if ((vlen == 8 && !VM_Version::supports_avx512dq()) || vlen < 8) { 210 return BoolTest::eq; 211 } 212 return BoolTest::lt; 213 } 214 215 // Returns pre-selection estimated size of a vector operation. 216 // Currently, it's a rudimentary heuristic based on emitted code size for complex 217 // IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent 218 // generating bloated loop bodies. 219 static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { 220 switch(vopc) { 221 default: 222 return 0; 223 case Op_MulVB: 224 return 7; 225 case Op_MulVL: 226 return VM_Version::supports_avx512vldq() ? 0 : 6; 227 case Op_LoadVectorGather: 228 case Op_LoadVectorGatherMasked: 229 return is_subword_type(ety) ? 50 : 0; 230 case Op_VectorCastF2X: // fall through 231 case Op_VectorCastD2X: 232 return is_floating_point_type(ety) ? 0 : (is_subword_type(ety) ? 35 : 30); 233 case Op_CountTrailingZerosV: 234 case Op_CountLeadingZerosV: 235 return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40; 236 case Op_PopCountVI: 237 if (is_subword_type(ety)) { 238 return VM_Version::supports_avx512_bitalg() ? 0 : 50; 239 } else { 240 assert(ety == T_INT, "sanity"); // for documentation purposes 241 return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50; 242 } 243 case Op_PopCountVL: 244 return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40; 245 case Op_ReverseV: 246 return VM_Version::supports_gfni() ? 0 : 30; 247 case Op_RoundVF: // fall through 248 case Op_RoundVD: 249 return 30; 250 } 251 } 252 253 // Returns pre-selection estimated size of a scalar operation. 254 static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) { 255 switch(vopc) { 256 default: return 0; 257 case Op_RoundF: // fall through 258 case Op_RoundD: { 259 return 30; 260 } 261 } 262 } 263 264 // Is SIMD sort supported for this CPU? 265 static bool supports_simd_sort(BasicType bt) { 266 if (VM_Version::supports_avx512dq()) { 267 return true; 268 } 269 else if (VM_Version::supports_avx2() && !is_double_word_type(bt)) { 270 return true; 271 } 272 else { 273 return false; 274 } 275 } 276 277 #endif // CPU_X86_MATCHER_X86_HPP