1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/powerOfTwo.hpp" 42 #include "utilities/virtualizationSupport.hpp" 43 44 int VM_Version::_cpu; 45 int VM_Version::_model; 46 int VM_Version::_stepping; 47 bool VM_Version::_has_intel_jcc_erratum; 48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 49 50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 52 #undef DECLARE_CPU_FEATURE_FLAG 53 54 // Address of instruction which causes SEGV 55 address VM_Version::_cpuinfo_segv_addr = nullptr; 56 // Address of instruction after the one which causes SEGV 57 address VM_Version::_cpuinfo_cont_addr = nullptr; 58 // Address of instruction which causes APX specific SEGV 59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 60 // Address of instruction after the one which causes APX specific SEGV 61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 62 63 static BufferBlob* stub_blob; 64 static const int stub_size = 2000; 65 66 extern "C" { 67 typedef void (*get_cpu_info_stub_t)(void*); 68 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 69 typedef void (*clear_apx_test_state_t)(void); 70 } 71 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 72 static detect_virt_stub_t detect_virt_stub = nullptr; 73 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 74 75 bool VM_Version::supports_clflush() { 76 // clflush should always be available on x86_64 77 // if not we are in real trouble because we rely on it 78 // to flush the code cache. 79 // Unfortunately, Assembler::clflush is currently called as part 80 // of generation of the code cache flush routine. This happens 81 // under Universe::init before the processor features are set 82 // up. Assembler::flush calls this routine to check that clflush 83 // is allowed. So, we give the caller a free pass if Universe init 84 // is still in progress. 85 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 86 return true; 87 } 88 89 #define CPUID_STANDARD_FN 0x0 90 #define CPUID_STANDARD_FN_1 0x1 91 #define CPUID_STANDARD_FN_4 0x4 92 #define CPUID_STANDARD_FN_B 0xb 93 94 #define CPUID_EXTENDED_FN 0x80000000 95 #define CPUID_EXTENDED_FN_1 0x80000001 96 #define CPUID_EXTENDED_FN_2 0x80000002 97 #define CPUID_EXTENDED_FN_3 0x80000003 98 #define CPUID_EXTENDED_FN_4 0x80000004 99 #define CPUID_EXTENDED_FN_7 0x80000007 100 #define CPUID_EXTENDED_FN_8 0x80000008 101 102 class VM_Version_StubGenerator: public StubCodeGenerator { 103 public: 104 105 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 106 107 address clear_apx_test_state() { 108 # define __ _masm-> 109 address start = __ pc(); 110 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 111 // handling guarantees that preserved register values post signal handling were 112 // re-instantiated by operating system and not because they were not modified externally. 113 114 bool save_apx = UseAPX; 115 VM_Version::set_apx_cpuFeatures(); 116 UseAPX = true; 117 // EGPR state save/restoration. 118 __ mov64(r16, 0L); 119 __ mov64(r31, 0L); 120 UseAPX = save_apx; 121 VM_Version::clean_cpuFeatures(); 122 __ ret(0); 123 return start; 124 } 125 126 address generate_get_cpu_info() { 127 // Flags to test CPU type. 128 const uint32_t HS_EFL_AC = 0x40000; 129 const uint32_t HS_EFL_ID = 0x200000; 130 // Values for when we don't have a CPUID instruction. 131 const int CPU_FAMILY_SHIFT = 8; 132 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 133 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 134 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 135 136 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 137 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 138 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 139 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 140 141 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 142 # define __ _masm-> 143 144 address start = __ pc(); 145 146 // 147 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 148 // 149 // rcx and rdx are first and second argument registers on windows 150 151 __ push(rbp); 152 __ mov(rbp, c_rarg0); // cpuid_info address 153 __ push(rbx); 154 __ push(rsi); 155 __ pushf(); // preserve rbx, and flags 156 __ pop(rax); 157 __ push(rax); 158 __ mov(rcx, rax); 159 // 160 // if we are unable to change the AC flag, we have a 386 161 // 162 __ xorl(rax, HS_EFL_AC); 163 __ push(rax); 164 __ popf(); 165 __ pushf(); 166 __ pop(rax); 167 __ cmpptr(rax, rcx); 168 __ jccb(Assembler::notEqual, detect_486); 169 170 __ movl(rax, CPU_FAMILY_386); 171 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 172 __ jmp(done); 173 174 // 175 // If we are unable to change the ID flag, we have a 486 which does 176 // not support the "cpuid" instruction. 177 // 178 __ bind(detect_486); 179 __ mov(rax, rcx); 180 __ xorl(rax, HS_EFL_ID); 181 __ push(rax); 182 __ popf(); 183 __ pushf(); 184 __ pop(rax); 185 __ cmpptr(rcx, rax); 186 __ jccb(Assembler::notEqual, detect_586); 187 188 __ bind(cpu486); 189 __ movl(rax, CPU_FAMILY_486); 190 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 191 __ jmp(done); 192 193 // 194 // At this point, we have a chip which supports the "cpuid" instruction 195 // 196 __ bind(detect_586); 197 __ xorl(rax, rax); 198 __ cpuid(); 199 __ orl(rax, rax); 200 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 201 // value of at least 1, we give up and 202 // assume a 486 203 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 204 __ movl(Address(rsi, 0), rax); 205 __ movl(Address(rsi, 4), rbx); 206 __ movl(Address(rsi, 8), rcx); 207 __ movl(Address(rsi,12), rdx); 208 209 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 210 __ jccb(Assembler::belowEqual, std_cpuid4); 211 212 // 213 // cpuid(0xB) Processor Topology 214 // 215 __ movl(rax, 0xb); 216 __ xorl(rcx, rcx); // Threads level 217 __ cpuid(); 218 219 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 220 __ movl(Address(rsi, 0), rax); 221 __ movl(Address(rsi, 4), rbx); 222 __ movl(Address(rsi, 8), rcx); 223 __ movl(Address(rsi,12), rdx); 224 225 __ movl(rax, 0xb); 226 __ movl(rcx, 1); // Cores level 227 __ cpuid(); 228 __ push(rax); 229 __ andl(rax, 0x1f); // Determine if valid topology level 230 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 231 __ andl(rax, 0xffff); 232 __ pop(rax); 233 __ jccb(Assembler::equal, std_cpuid4); 234 235 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 236 __ movl(Address(rsi, 0), rax); 237 __ movl(Address(rsi, 4), rbx); 238 __ movl(Address(rsi, 8), rcx); 239 __ movl(Address(rsi,12), rdx); 240 241 __ movl(rax, 0xb); 242 __ movl(rcx, 2); // Packages level 243 __ cpuid(); 244 __ push(rax); 245 __ andl(rax, 0x1f); // Determine if valid topology level 246 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 247 __ andl(rax, 0xffff); 248 __ pop(rax); 249 __ jccb(Assembler::equal, std_cpuid4); 250 251 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 252 __ movl(Address(rsi, 0), rax); 253 __ movl(Address(rsi, 4), rbx); 254 __ movl(Address(rsi, 8), rcx); 255 __ movl(Address(rsi,12), rdx); 256 257 // 258 // cpuid(0x4) Deterministic cache params 259 // 260 __ bind(std_cpuid4); 261 __ movl(rax, 4); 262 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 263 __ jccb(Assembler::greater, std_cpuid1); 264 265 __ xorl(rcx, rcx); // L1 cache 266 __ cpuid(); 267 __ push(rax); 268 __ andl(rax, 0x1f); // Determine if valid cache parameters used 269 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 270 __ pop(rax); 271 __ jccb(Assembler::equal, std_cpuid1); 272 273 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 274 __ movl(Address(rsi, 0), rax); 275 __ movl(Address(rsi, 4), rbx); 276 __ movl(Address(rsi, 8), rcx); 277 __ movl(Address(rsi,12), rdx); 278 279 // 280 // Standard cpuid(0x1) 281 // 282 __ bind(std_cpuid1); 283 __ movl(rax, 1); 284 __ cpuid(); 285 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 286 __ movl(Address(rsi, 0), rax); 287 __ movl(Address(rsi, 4), rbx); 288 __ movl(Address(rsi, 8), rcx); 289 __ movl(Address(rsi,12), rdx); 290 291 // 292 // Check if OS has enabled XGETBV instruction to access XCR0 293 // (OSXSAVE feature flag) and CPU supports AVX 294 // 295 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 296 __ cmpl(rcx, 0x18000000); 297 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 298 299 // 300 // XCR0, XFEATURE_ENABLED_MASK register 301 // 302 __ xorl(rcx, rcx); // zero for XCR0 register 303 __ xgetbv(); 304 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 305 __ movl(Address(rsi, 0), rax); 306 __ movl(Address(rsi, 4), rdx); 307 308 // 309 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 310 // 311 __ bind(sef_cpuid); 312 __ movl(rax, 7); 313 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 314 __ jccb(Assembler::greater, ext_cpuid); 315 // ECX = 0 316 __ xorl(rcx, rcx); 317 __ cpuid(); 318 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 319 __ movl(Address(rsi, 0), rax); 320 __ movl(Address(rsi, 4), rbx); 321 __ movl(Address(rsi, 8), rcx); 322 __ movl(Address(rsi, 12), rdx); 323 324 // 325 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 326 // 327 __ bind(sefsl1_cpuid); 328 __ movl(rax, 7); 329 __ movl(rcx, 1); 330 __ cpuid(); 331 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 332 __ movl(Address(rsi, 0), rax); 333 __ movl(Address(rsi, 4), rdx); 334 335 // 336 // Extended cpuid(0x80000000) 337 // 338 __ bind(ext_cpuid); 339 __ movl(rax, 0x80000000); 340 __ cpuid(); 341 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 342 __ jcc(Assembler::belowEqual, done); 343 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 344 __ jcc(Assembler::belowEqual, ext_cpuid1); 345 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 346 __ jccb(Assembler::belowEqual, ext_cpuid5); 347 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 348 __ jccb(Assembler::belowEqual, ext_cpuid7); 349 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 350 __ jccb(Assembler::belowEqual, ext_cpuid8); 351 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 352 __ jccb(Assembler::below, ext_cpuid8); 353 // 354 // Extended cpuid(0x8000001E) 355 // 356 __ movl(rax, 0x8000001E); 357 __ cpuid(); 358 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 359 __ movl(Address(rsi, 0), rax); 360 __ movl(Address(rsi, 4), rbx); 361 __ movl(Address(rsi, 8), rcx); 362 __ movl(Address(rsi,12), rdx); 363 364 // 365 // Extended cpuid(0x80000008) 366 // 367 __ bind(ext_cpuid8); 368 __ movl(rax, 0x80000008); 369 __ cpuid(); 370 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 371 __ movl(Address(rsi, 0), rax); 372 __ movl(Address(rsi, 4), rbx); 373 __ movl(Address(rsi, 8), rcx); 374 __ movl(Address(rsi,12), rdx); 375 376 // 377 // Extended cpuid(0x80000007) 378 // 379 __ bind(ext_cpuid7); 380 __ movl(rax, 0x80000007); 381 __ cpuid(); 382 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 383 __ movl(Address(rsi, 0), rax); 384 __ movl(Address(rsi, 4), rbx); 385 __ movl(Address(rsi, 8), rcx); 386 __ movl(Address(rsi,12), rdx); 387 388 // 389 // Extended cpuid(0x80000005) 390 // 391 __ bind(ext_cpuid5); 392 __ movl(rax, 0x80000005); 393 __ cpuid(); 394 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 395 __ movl(Address(rsi, 0), rax); 396 __ movl(Address(rsi, 4), rbx); 397 __ movl(Address(rsi, 8), rcx); 398 __ movl(Address(rsi,12), rdx); 399 400 // 401 // Extended cpuid(0x80000001) 402 // 403 __ bind(ext_cpuid1); 404 __ movl(rax, 0x80000001); 405 __ cpuid(); 406 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 407 __ movl(Address(rsi, 0), rax); 408 __ movl(Address(rsi, 4), rbx); 409 __ movl(Address(rsi, 8), rcx); 410 __ movl(Address(rsi,12), rdx); 411 412 // 413 // Check if OS has enabled XGETBV instruction to access XCR0 414 // (OSXSAVE feature flag) and CPU supports APX 415 // 416 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 417 // and XCRO[19] bit for OS support to save/restore extended GPR state. 418 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 419 __ movl(rax, 0x200000); 420 __ andl(rax, Address(rsi, 4)); 421 __ cmpl(rax, 0x200000); 422 __ jcc(Assembler::notEqual, vector_save_restore); 423 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 424 __ movl(rax, 0x80000); 425 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 426 __ cmpl(rax, 0x80000); 427 __ jcc(Assembler::notEqual, vector_save_restore); 428 429 #ifndef PRODUCT 430 bool save_apx = UseAPX; 431 VM_Version::set_apx_cpuFeatures(); 432 UseAPX = true; 433 __ mov64(r16, VM_Version::egpr_test_value()); 434 __ mov64(r31, VM_Version::egpr_test_value()); 435 __ xorl(rsi, rsi); 436 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 437 // Generate SEGV 438 __ movl(rax, Address(rsi, 0)); 439 440 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 441 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 442 __ movq(Address(rsi, 0), r16); 443 __ movq(Address(rsi, 8), r31); 444 445 UseAPX = save_apx; 446 #endif 447 __ bind(vector_save_restore); 448 // 449 // Check if OS has enabled XGETBV instruction to access XCR0 450 // (OSXSAVE feature flag) and CPU supports AVX 451 // 452 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 453 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 454 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 455 __ cmpl(rcx, 0x18000000); 456 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 457 458 __ movl(rax, 0x6); 459 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 460 __ cmpl(rax, 0x6); 461 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 462 463 // we need to bridge farther than imm8, so we use this island as a thunk 464 __ bind(done); 465 __ jmp(wrapup); 466 467 __ bind(start_simd_check); 468 // 469 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 470 // registers are not restored after a signal processing. 471 // Generate SEGV here (reference through null) 472 // and check upper YMM/ZMM bits after it. 473 // 474 int saved_useavx = UseAVX; 475 int saved_usesse = UseSSE; 476 477 // If UseAVX is uninitialized or is set by the user to include EVEX 478 if (use_evex) { 479 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 480 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 481 __ movl(rax, 0x10000); 482 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 483 __ cmpl(rax, 0x10000); 484 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 485 // check _cpuid_info.xem_xcr0_eax.bits.opmask 486 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 487 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 488 __ movl(rax, 0xE0); 489 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 490 __ cmpl(rax, 0xE0); 491 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 492 493 if (FLAG_IS_DEFAULT(UseAVX)) { 494 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 495 __ movl(rax, Address(rsi, 0)); 496 __ cmpl(rax, 0x50654); // If it is Skylake 497 __ jcc(Assembler::equal, legacy_setup); 498 } 499 // EVEX setup: run in lowest evex mode 500 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 501 UseAVX = 3; 502 UseSSE = 2; 503 #ifdef _WINDOWS 504 // xmm5-xmm15 are not preserved by caller on windows 505 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 506 __ subptr(rsp, 64); 507 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 508 __ subptr(rsp, 64); 509 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 510 __ subptr(rsp, 64); 511 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 512 #endif // _WINDOWS 513 514 // load value into all 64 bytes of zmm7 register 515 __ movl(rcx, VM_Version::ymm_test_value()); 516 __ movdl(xmm0, rcx); 517 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 518 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 519 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 520 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 521 VM_Version::clean_cpuFeatures(); 522 __ jmp(save_restore_except); 523 } 524 525 __ bind(legacy_setup); 526 // AVX setup 527 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 528 UseAVX = 1; 529 UseSSE = 2; 530 #ifdef _WINDOWS 531 __ subptr(rsp, 32); 532 __ vmovdqu(Address(rsp, 0), xmm7); 533 __ subptr(rsp, 32); 534 __ vmovdqu(Address(rsp, 0), xmm8); 535 __ subptr(rsp, 32); 536 __ vmovdqu(Address(rsp, 0), xmm15); 537 #endif // _WINDOWS 538 539 // load value into all 32 bytes of ymm7 register 540 __ movl(rcx, VM_Version::ymm_test_value()); 541 542 __ movdl(xmm0, rcx); 543 __ pshufd(xmm0, xmm0, 0x00); 544 __ vinsertf128_high(xmm0, xmm0); 545 __ vmovdqu(xmm7, xmm0); 546 __ vmovdqu(xmm8, xmm0); 547 __ vmovdqu(xmm15, xmm0); 548 VM_Version::clean_cpuFeatures(); 549 550 __ bind(save_restore_except); 551 __ xorl(rsi, rsi); 552 VM_Version::set_cpuinfo_segv_addr(__ pc()); 553 // Generate SEGV 554 __ movl(rax, Address(rsi, 0)); 555 556 VM_Version::set_cpuinfo_cont_addr(__ pc()); 557 // Returns here after signal. Save xmm0 to check it later. 558 559 // If UseAVX is uninitialized or is set by the user to include EVEX 560 if (use_evex) { 561 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 562 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 563 __ movl(rax, 0x10000); 564 __ andl(rax, Address(rsi, 4)); 565 __ cmpl(rax, 0x10000); 566 __ jcc(Assembler::notEqual, legacy_save_restore); 567 // check _cpuid_info.xem_xcr0_eax.bits.opmask 568 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 569 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 570 __ movl(rax, 0xE0); 571 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 572 __ cmpl(rax, 0xE0); 573 __ jcc(Assembler::notEqual, legacy_save_restore); 574 575 if (FLAG_IS_DEFAULT(UseAVX)) { 576 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 577 __ movl(rax, Address(rsi, 0)); 578 __ cmpl(rax, 0x50654); // If it is Skylake 579 __ jcc(Assembler::equal, legacy_save_restore); 580 } 581 // EVEX check: run in lowest evex mode 582 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 583 UseAVX = 3; 584 UseSSE = 2; 585 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 586 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 587 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 588 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 589 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 590 591 #ifdef _WINDOWS 592 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 593 __ addptr(rsp, 64); 594 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 595 __ addptr(rsp, 64); 596 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 597 __ addptr(rsp, 64); 598 #endif // _WINDOWS 599 generate_vzeroupper(wrapup); 600 VM_Version::clean_cpuFeatures(); 601 UseAVX = saved_useavx; 602 UseSSE = saved_usesse; 603 __ jmp(wrapup); 604 } 605 606 __ bind(legacy_save_restore); 607 // AVX check 608 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 609 UseAVX = 1; 610 UseSSE = 2; 611 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 612 __ vmovdqu(Address(rsi, 0), xmm0); 613 __ vmovdqu(Address(rsi, 32), xmm7); 614 __ vmovdqu(Address(rsi, 64), xmm8); 615 __ vmovdqu(Address(rsi, 96), xmm15); 616 617 #ifdef _WINDOWS 618 __ vmovdqu(xmm15, Address(rsp, 0)); 619 __ addptr(rsp, 32); 620 __ vmovdqu(xmm8, Address(rsp, 0)); 621 __ addptr(rsp, 32); 622 __ vmovdqu(xmm7, Address(rsp, 0)); 623 __ addptr(rsp, 32); 624 #endif // _WINDOWS 625 626 generate_vzeroupper(wrapup); 627 VM_Version::clean_cpuFeatures(); 628 UseAVX = saved_useavx; 629 UseSSE = saved_usesse; 630 631 __ bind(wrapup); 632 __ popf(); 633 __ pop(rsi); 634 __ pop(rbx); 635 __ pop(rbp); 636 __ ret(0); 637 638 # undef __ 639 640 return start; 641 }; 642 void generate_vzeroupper(Label& L_wrapup) { 643 # define __ _masm-> 644 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 645 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 646 __ jcc(Assembler::notEqual, L_wrapup); 647 __ movl(rcx, 0x0FFF0FF0); 648 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 649 __ andl(rcx, Address(rsi, 0)); 650 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 651 __ jcc(Assembler::equal, L_wrapup); 652 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 653 __ jcc(Assembler::equal, L_wrapup); 654 // vzeroupper() will use a pre-computed instruction sequence that we 655 // can't compute until after we've determined CPU capabilities. Use 656 // uncached variant here directly to be able to bootstrap correctly 657 __ vzeroupper_uncached(); 658 # undef __ 659 } 660 address generate_detect_virt() { 661 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 662 # define __ _masm-> 663 664 address start = __ pc(); 665 666 // Evacuate callee-saved registers 667 __ push(rbp); 668 __ push(rbx); 669 __ push(rsi); // for Windows 670 671 __ mov(rax, c_rarg0); // CPUID leaf 672 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 673 674 __ cpuid(); 675 676 // Store result to register array 677 __ movl(Address(rsi, 0), rax); 678 __ movl(Address(rsi, 4), rbx); 679 __ movl(Address(rsi, 8), rcx); 680 __ movl(Address(rsi, 12), rdx); 681 682 // Epilogue 683 __ pop(rsi); 684 __ pop(rbx); 685 __ pop(rbp); 686 __ ret(0); 687 688 # undef __ 689 690 return start; 691 }; 692 693 694 address generate_getCPUIDBrandString(void) { 695 // Flags to test CPU type. 696 const uint32_t HS_EFL_AC = 0x40000; 697 const uint32_t HS_EFL_ID = 0x200000; 698 // Values for when we don't have a CPUID instruction. 699 const int CPU_FAMILY_SHIFT = 8; 700 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 701 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 702 703 Label detect_486, cpu486, detect_586, done, ext_cpuid; 704 705 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 706 # define __ _masm-> 707 708 address start = __ pc(); 709 710 // 711 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 712 // 713 // rcx and rdx are first and second argument registers on windows 714 715 __ push(rbp); 716 __ mov(rbp, c_rarg0); // cpuid_info address 717 __ push(rbx); 718 __ push(rsi); 719 __ pushf(); // preserve rbx, and flags 720 __ pop(rax); 721 __ push(rax); 722 __ mov(rcx, rax); 723 // 724 // if we are unable to change the AC flag, we have a 386 725 // 726 __ xorl(rax, HS_EFL_AC); 727 __ push(rax); 728 __ popf(); 729 __ pushf(); 730 __ pop(rax); 731 __ cmpptr(rax, rcx); 732 __ jccb(Assembler::notEqual, detect_486); 733 734 __ movl(rax, CPU_FAMILY_386); 735 __ jmp(done); 736 737 // 738 // If we are unable to change the ID flag, we have a 486 which does 739 // not support the "cpuid" instruction. 740 // 741 __ bind(detect_486); 742 __ mov(rax, rcx); 743 __ xorl(rax, HS_EFL_ID); 744 __ push(rax); 745 __ popf(); 746 __ pushf(); 747 __ pop(rax); 748 __ cmpptr(rcx, rax); 749 __ jccb(Assembler::notEqual, detect_586); 750 751 __ bind(cpu486); 752 __ movl(rax, CPU_FAMILY_486); 753 __ jmp(done); 754 755 // 756 // At this point, we have a chip which supports the "cpuid" instruction 757 // 758 __ bind(detect_586); 759 __ xorl(rax, rax); 760 __ cpuid(); 761 __ orl(rax, rax); 762 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 763 // value of at least 1, we give up and 764 // assume a 486 765 766 // 767 // Extended cpuid(0x80000000) for processor brand string detection 768 // 769 __ bind(ext_cpuid); 770 __ movl(rax, CPUID_EXTENDED_FN); 771 __ cpuid(); 772 __ cmpl(rax, CPUID_EXTENDED_FN_4); 773 __ jcc(Assembler::below, done); 774 775 // 776 // Extended cpuid(0x80000002) // first 16 bytes in brand string 777 // 778 __ movl(rax, CPUID_EXTENDED_FN_2); 779 __ cpuid(); 780 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 781 __ movl(Address(rsi, 0), rax); 782 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 783 __ movl(Address(rsi, 0), rbx); 784 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 785 __ movl(Address(rsi, 0), rcx); 786 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 787 __ movl(Address(rsi,0), rdx); 788 789 // 790 // Extended cpuid(0x80000003) // next 16 bytes in brand string 791 // 792 __ movl(rax, CPUID_EXTENDED_FN_3); 793 __ cpuid(); 794 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 795 __ movl(Address(rsi, 0), rax); 796 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 797 __ movl(Address(rsi, 0), rbx); 798 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 799 __ movl(Address(rsi, 0), rcx); 800 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 801 __ movl(Address(rsi,0), rdx); 802 803 // 804 // Extended cpuid(0x80000004) // last 16 bytes in brand string 805 // 806 __ movl(rax, CPUID_EXTENDED_FN_4); 807 __ cpuid(); 808 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 809 __ movl(Address(rsi, 0), rax); 810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 811 __ movl(Address(rsi, 0), rbx); 812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 813 __ movl(Address(rsi, 0), rcx); 814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 815 __ movl(Address(rsi,0), rdx); 816 817 // 818 // return 819 // 820 __ bind(done); 821 __ popf(); 822 __ pop(rsi); 823 __ pop(rbx); 824 __ pop(rbp); 825 __ ret(0); 826 827 # undef __ 828 829 return start; 830 }; 831 }; 832 833 void VM_Version::get_processor_features() { 834 835 _cpu = 4; // 486 by default 836 _model = 0; 837 _stepping = 0; 838 _features = 0; 839 _logical_processors_per_package = 1; 840 // i486 internal cache is both I&D and has a 16-byte line size 841 _L1_data_cache_line_size = 16; 842 843 // Get raw processor info 844 845 get_cpu_info_stub(&_cpuid_info); 846 847 assert_is_initialized(); 848 _cpu = extended_cpu_family(); 849 _model = extended_cpu_model(); 850 _stepping = cpu_stepping(); 851 852 if (cpu_family() > 4) { // it supports CPUID 853 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 854 _cpu_features = _features; // Preserve features 855 // Logical processors are only available on P4s and above, 856 // and only if hyperthreading is available. 857 _logical_processors_per_package = logical_processor_count(); 858 _L1_data_cache_line_size = L1_line_size(); 859 } 860 861 // xchg and xadd instructions 862 _supports_atomic_getset4 = true; 863 _supports_atomic_getadd4 = true; 864 _supports_atomic_getset8 = true; 865 _supports_atomic_getadd8 = true; 866 867 // OS should support SSE for x64 and hardware should support at least SSE2. 868 if (!VM_Version::supports_sse2()) { 869 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 870 } 871 // in 64 bit the use of SSE2 is the minimum 872 if (UseSSE < 2) UseSSE = 2; 873 874 // flush_icache_stub have to be generated first. 875 // That is why Icache line size is hard coded in ICache class, 876 // see icache_x86.hpp. It is also the reason why we can't use 877 // clflush instruction in 32-bit VM since it could be running 878 // on CPU which does not support it. 879 // 880 // The only thing we can do is to verify that flushed 881 // ICache::line_size has correct value. 882 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 883 // clflush_size is size in quadwords (8 bytes). 884 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 885 886 // assigning this field effectively enables Unsafe.writebackMemory() 887 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 888 // that is only implemented on x86_64 and only if the OS plays ball 889 if (os::supports_map_sync()) { 890 // publish data cache line flush size to generic field, otherwise 891 // let if default to zero thereby disabling writeback 892 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 893 } 894 895 // Check if processor has Intel Ecore 896 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 897 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 898 _model == 0xCC || _model == 0xDD)) { 899 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 900 } 901 902 if (UseSSE < 4) { 903 _features &= ~CPU_SSE4_1; 904 _features &= ~CPU_SSE4_2; 905 } 906 907 if (UseSSE < 3) { 908 _features &= ~CPU_SSE3; 909 _features &= ~CPU_SSSE3; 910 _features &= ~CPU_SSE4A; 911 } 912 913 if (UseSSE < 2) 914 _features &= ~CPU_SSE2; 915 916 if (UseSSE < 1) 917 _features &= ~CPU_SSE; 918 919 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 920 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 921 UseAVX = 0; 922 } 923 924 // UseSSE is set to the smaller of what hardware supports and what 925 // the command line requires. I.e., you cannot set UseSSE to 2 on 926 // older Pentiums which do not support it. 927 int use_sse_limit = 0; 928 if (UseSSE > 0) { 929 if (UseSSE > 3 && supports_sse4_1()) { 930 use_sse_limit = 4; 931 } else if (UseSSE > 2 && supports_sse3()) { 932 use_sse_limit = 3; 933 } else if (UseSSE > 1 && supports_sse2()) { 934 use_sse_limit = 2; 935 } else if (UseSSE > 0 && supports_sse()) { 936 use_sse_limit = 1; 937 } else { 938 use_sse_limit = 0; 939 } 940 } 941 if (FLAG_IS_DEFAULT(UseSSE)) { 942 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 943 } else if (UseSSE > use_sse_limit) { 944 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 945 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 946 } 947 948 // first try initial setting and detect what we can support 949 int use_avx_limit = 0; 950 if (UseAVX > 0) { 951 if (UseSSE < 4) { 952 // Don't use AVX if SSE is unavailable or has been disabled. 953 use_avx_limit = 0; 954 } else if (UseAVX > 2 && supports_evex()) { 955 use_avx_limit = 3; 956 } else if (UseAVX > 1 && supports_avx2()) { 957 use_avx_limit = 2; 958 } else if (UseAVX > 0 && supports_avx()) { 959 use_avx_limit = 1; 960 } else { 961 use_avx_limit = 0; 962 } 963 } 964 if (FLAG_IS_DEFAULT(UseAVX)) { 965 // Don't use AVX-512 on older Skylakes unless explicitly requested. 966 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 967 FLAG_SET_DEFAULT(UseAVX, 2); 968 } else { 969 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 970 } 971 } 972 973 if (UseAVX > use_avx_limit) { 974 if (UseSSE < 4) { 975 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 976 } else { 977 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 978 } 979 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 980 } 981 982 if (UseAVX < 3) { 983 _features &= ~CPU_AVX512F; 984 _features &= ~CPU_AVX512DQ; 985 _features &= ~CPU_AVX512CD; 986 _features &= ~CPU_AVX512BW; 987 _features &= ~CPU_AVX512VL; 988 _features &= ~CPU_AVX512_VPOPCNTDQ; 989 _features &= ~CPU_AVX512_VPCLMULQDQ; 990 _features &= ~CPU_AVX512_VAES; 991 _features &= ~CPU_AVX512_VNNI; 992 _features &= ~CPU_AVX512_VBMI; 993 _features &= ~CPU_AVX512_VBMI2; 994 _features &= ~CPU_AVX512_BITALG; 995 _features &= ~CPU_AVX512_IFMA; 996 _features &= ~CPU_APX_F; 997 _features &= ~CPU_AVX512_FP16; 998 } 999 1000 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1001 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1002 if (UseAPX && !apx_supported) { 1003 warning("UseAPX is not supported on this CPU, setting it to false"); 1004 FLAG_SET_DEFAULT(UseAPX, false); 1005 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1006 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1007 } 1008 1009 if (!UseAPX) { 1010 _features &= ~CPU_APX_F; 1011 } 1012 1013 if (UseAVX < 2) { 1014 _features &= ~CPU_AVX2; 1015 _features &= ~CPU_AVX_IFMA; 1016 } 1017 1018 if (UseAVX < 1) { 1019 _features &= ~CPU_AVX; 1020 _features &= ~CPU_VZEROUPPER; 1021 _features &= ~CPU_F16C; 1022 _features &= ~CPU_SHA512; 1023 } 1024 1025 if (logical_processors_per_package() == 1) { 1026 // HT processor could be installed on a system which doesn't support HT. 1027 _features &= ~CPU_HT; 1028 } 1029 1030 if (is_intel()) { // Intel cpus specific settings 1031 if (is_knights_family()) { 1032 _features &= ~CPU_VZEROUPPER; 1033 _features &= ~CPU_AVX512BW; 1034 _features &= ~CPU_AVX512VL; 1035 _features &= ~CPU_AVX512DQ; 1036 _features &= ~CPU_AVX512_VNNI; 1037 _features &= ~CPU_AVX512_VAES; 1038 _features &= ~CPU_AVX512_VPOPCNTDQ; 1039 _features &= ~CPU_AVX512_VPCLMULQDQ; 1040 _features &= ~CPU_AVX512_VBMI; 1041 _features &= ~CPU_AVX512_VBMI2; 1042 _features &= ~CPU_CLWB; 1043 _features &= ~CPU_FLUSHOPT; 1044 _features &= ~CPU_GFNI; 1045 _features &= ~CPU_AVX512_BITALG; 1046 _features &= ~CPU_AVX512_IFMA; 1047 _features &= ~CPU_AVX_IFMA; 1048 _features &= ~CPU_AVX512_FP16; 1049 } 1050 } 1051 1052 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1053 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1054 } else { 1055 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1056 } 1057 1058 assert(supports_cpuid(), "Always present"); 1059 assert(supports_clflush(), "Always present"); 1060 if (X86ICacheSync == -1) { 1061 // Auto-detect, choosing the best performant one that still flushes 1062 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. 1063 if (supports_clwb()) { 1064 FLAG_SET_ERGO(X86ICacheSync, 3); 1065 } else if (supports_clflushopt()) { 1066 FLAG_SET_ERGO(X86ICacheSync, 2); 1067 } else { 1068 FLAG_SET_ERGO(X86ICacheSync, 1); 1069 } 1070 } else { 1071 if ((X86ICacheSync == 2) && !supports_clflushopt()) { 1072 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); 1073 } 1074 if ((X86ICacheSync == 3) && !supports_clwb()) { 1075 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); 1076 } 1077 if ((X86ICacheSync == 5) && !supports_serialize()) { 1078 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); 1079 } 1080 } 1081 1082 char buf[1024]; 1083 int cpu_info_size = jio_snprintf( 1084 buf, sizeof(buf), 1085 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1086 cores_per_cpu(), threads_per_core(), 1087 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1088 assert(cpu_info_size > 0, "not enough temporary space allocated"); 1089 insert_features_names(buf + cpu_info_size, sizeof(buf) - cpu_info_size, _features_names); 1090 1091 _cpu_info_string = os::strdup(buf); 1092 1093 _features_string = extract_features_string(_cpu_info_string, 1094 strnlen(_cpu_info_string, sizeof(buf)), 1095 cpu_info_size); 1096 1097 // Use AES instructions if available. 1098 if (supports_aes()) { 1099 if (FLAG_IS_DEFAULT(UseAES)) { 1100 FLAG_SET_DEFAULT(UseAES, true); 1101 } 1102 if (!UseAES) { 1103 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1104 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1105 } 1106 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1107 } else { 1108 if (UseSSE > 2) { 1109 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1110 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1111 } 1112 } else { 1113 // The AES intrinsic stubs require AES instruction support (of course) 1114 // but also require sse3 mode or higher for instructions it use. 1115 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1116 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1117 } 1118 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1119 } 1120 1121 // --AES-CTR begins-- 1122 if (!UseAESIntrinsics) { 1123 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1124 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1125 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1126 } 1127 } else { 1128 if (supports_sse4_1()) { 1129 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1130 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1131 } 1132 } else { 1133 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1134 // but also require sse4.1 mode or higher for instructions it use. 1135 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1136 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1137 } 1138 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1139 } 1140 } 1141 // --AES-CTR ends-- 1142 } 1143 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1144 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1145 warning("AES instructions are not available on this CPU"); 1146 FLAG_SET_DEFAULT(UseAES, false); 1147 } 1148 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1149 warning("AES intrinsics are not available on this CPU"); 1150 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1151 } 1152 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1153 warning("AES-CTR intrinsics are not available on this CPU"); 1154 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1155 } 1156 } 1157 1158 // Use CLMUL instructions if available. 1159 if (supports_clmul()) { 1160 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1161 UseCLMUL = true; 1162 } 1163 } else if (UseCLMUL) { 1164 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1165 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1166 FLAG_SET_DEFAULT(UseCLMUL, false); 1167 } 1168 1169 if (UseCLMUL && (UseSSE > 2)) { 1170 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1171 UseCRC32Intrinsics = true; 1172 } 1173 } else if (UseCRC32Intrinsics) { 1174 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1175 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1176 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1177 } 1178 1179 if (supports_avx2()) { 1180 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1181 UseAdler32Intrinsics = true; 1182 } 1183 } else if (UseAdler32Intrinsics) { 1184 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1185 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1186 } 1187 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1188 } 1189 1190 if (supports_sse4_2() && supports_clmul()) { 1191 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1192 UseCRC32CIntrinsics = true; 1193 } 1194 } else if (UseCRC32CIntrinsics) { 1195 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1196 warning("CRC32C intrinsics are not available on this CPU"); 1197 } 1198 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1199 } 1200 1201 // GHASH/GCM intrinsics 1202 if (UseCLMUL && (UseSSE > 2)) { 1203 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1204 UseGHASHIntrinsics = true; 1205 } 1206 } else if (UseGHASHIntrinsics) { 1207 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1208 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1209 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1210 } 1211 1212 // ChaCha20 Intrinsics 1213 // As long as the system supports AVX as a baseline we can do a 1214 // SIMD-enabled block function. StubGenerator makes the determination 1215 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1216 // version. 1217 if (UseAVX >= 1) { 1218 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1219 UseChaCha20Intrinsics = true; 1220 } 1221 } else if (UseChaCha20Intrinsics) { 1222 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1223 warning("ChaCha20 intrinsic requires AVX instructions"); 1224 } 1225 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1226 } 1227 1228 // Dilithium Intrinsics 1229 // Currently we only have them for AVX512 1230 if (supports_evex() && supports_avx512bw()) { 1231 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { 1232 UseDilithiumIntrinsics = true; 1233 } 1234 } else if (UseDilithiumIntrinsics) { 1235 warning("Intrinsics for ML-DSA are not available on this CPU."); 1236 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); 1237 } 1238 1239 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1240 if (UseAVX >= 2) { 1241 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1242 UseBASE64Intrinsics = true; 1243 } 1244 } else if (UseBASE64Intrinsics) { 1245 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1246 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1247 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1248 } 1249 1250 if (supports_fma()) { 1251 if (FLAG_IS_DEFAULT(UseFMA)) { 1252 UseFMA = true; 1253 } 1254 } else if (UseFMA) { 1255 warning("FMA instructions are not available on this CPU"); 1256 FLAG_SET_DEFAULT(UseFMA, false); 1257 } 1258 1259 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1260 UseMD5Intrinsics = true; 1261 } 1262 1263 if (supports_sha() || (supports_avx2() && supports_bmi2())) { 1264 if (FLAG_IS_DEFAULT(UseSHA)) { 1265 UseSHA = true; 1266 } 1267 } else if (UseSHA) { 1268 warning("SHA instructions are not available on this CPU"); 1269 FLAG_SET_DEFAULT(UseSHA, false); 1270 } 1271 1272 if (supports_sha() && supports_sse4_1() && UseSHA) { 1273 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1274 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1275 } 1276 } else if (UseSHA1Intrinsics) { 1277 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1278 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1279 } 1280 1281 if (supports_sse4_1() && UseSHA) { 1282 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1283 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1284 } 1285 } else if (UseSHA256Intrinsics) { 1286 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1287 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1288 } 1289 1290 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1291 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1292 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1293 } 1294 } else if (UseSHA512Intrinsics) { 1295 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1296 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1297 } 1298 1299 if (supports_evex() && supports_avx512bw()) { 1300 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1301 UseSHA3Intrinsics = true; 1302 } 1303 } else if (UseSHA3Intrinsics) { 1304 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1305 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1306 } 1307 1308 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1309 FLAG_SET_DEFAULT(UseSHA, false); 1310 } 1311 1312 #if COMPILER2_OR_JVMCI 1313 int max_vector_size = 0; 1314 if (UseAVX == 0 || !os_supports_avx_vectors()) { 1315 // 16 byte vectors (in XMM) are supported with SSE2+ 1316 max_vector_size = 16; 1317 } else if (UseAVX == 1 || UseAVX == 2) { 1318 // 32 bytes vectors (in YMM) are only supported with AVX+ 1319 max_vector_size = 32; 1320 } else if (UseAVX > 2) { 1321 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1322 max_vector_size = 64; 1323 } 1324 1325 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1326 1327 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1328 if (MaxVectorSize < min_vector_size) { 1329 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1330 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1331 } 1332 if (MaxVectorSize > max_vector_size) { 1333 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1334 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1335 } 1336 if (!is_power_of_2(MaxVectorSize)) { 1337 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1338 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1339 } 1340 } else { 1341 // If default, use highest supported configuration 1342 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1343 } 1344 1345 #if defined(COMPILER2) && defined(ASSERT) 1346 if (MaxVectorSize > 0) { 1347 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1348 tty->print_cr("State of YMM registers after signal handle:"); 1349 int nreg = 4; 1350 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1351 for (int i = 0; i < nreg; i++) { 1352 tty->print("YMM%s:", ymm_name[i]); 1353 for (int j = 7; j >=0; j--) { 1354 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1355 } 1356 tty->cr(); 1357 } 1358 } 1359 } 1360 #endif // COMPILER2 && ASSERT 1361 1362 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1363 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1364 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1365 } 1366 } else if (UsePoly1305Intrinsics) { 1367 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1368 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1369 } 1370 1371 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1372 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1373 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1374 } 1375 } else if (UseIntPolyIntrinsics) { 1376 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1377 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1378 } 1379 1380 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1381 UseMultiplyToLenIntrinsic = true; 1382 } 1383 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1384 UseSquareToLenIntrinsic = true; 1385 } 1386 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1387 UseMulAddIntrinsic = true; 1388 } 1389 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1390 UseMontgomeryMultiplyIntrinsic = true; 1391 } 1392 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1393 UseMontgomerySquareIntrinsic = true; 1394 } 1395 #endif // COMPILER2_OR_JVMCI 1396 1397 // On new cpus instructions which update whole XMM register should be used 1398 // to prevent partial register stall due to dependencies on high half. 1399 // 1400 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1401 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1402 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1403 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1404 1405 1406 if (is_zx()) { // ZX cpus specific settings 1407 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1408 UseStoreImmI16 = false; // don't use it on ZX cpus 1409 } 1410 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1411 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1412 // Use it on all ZX cpus 1413 UseAddressNop = true; 1414 } 1415 } 1416 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1417 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1418 } 1419 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1420 if (supports_sse3()) { 1421 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1422 } else { 1423 UseXmmRegToRegMoveAll = false; 1424 } 1425 } 1426 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1427 #ifdef COMPILER2 1428 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1429 // For new ZX cpus do the next optimization: 1430 // don't align the beginning of a loop if there are enough instructions 1431 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1432 // in current fetch line (OptoLoopAlignment) or the padding 1433 // is big (> MaxLoopPad). 1434 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1435 // generated NOP instructions. 11 is the largest size of one 1436 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1437 MaxLoopPad = 11; 1438 } 1439 #endif // COMPILER2 1440 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1441 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1442 } 1443 if (supports_sse4_2()) { // new ZX cpus 1444 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1445 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1446 } 1447 } 1448 if (supports_sse4_2()) { 1449 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1450 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1451 } 1452 } else { 1453 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1454 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1455 } 1456 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1457 } 1458 } 1459 1460 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1461 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1462 } 1463 } 1464 1465 if (is_amd_family()) { // AMD cpus specific settings 1466 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1467 // Use it on new AMD cpus starting from Opteron. 1468 UseAddressNop = true; 1469 } 1470 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1471 // Use it on new AMD cpus starting from Opteron. 1472 UseNewLongLShift = true; 1473 } 1474 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1475 if (supports_sse4a()) { 1476 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1477 } else { 1478 UseXmmLoadAndClearUpper = false; 1479 } 1480 } 1481 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1482 if (supports_sse4a()) { 1483 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1484 } else { 1485 UseXmmRegToRegMoveAll = false; 1486 } 1487 } 1488 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1489 if (supports_sse4a()) { 1490 UseXmmI2F = true; 1491 } else { 1492 UseXmmI2F = false; 1493 } 1494 } 1495 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1496 if (supports_sse4a()) { 1497 UseXmmI2D = true; 1498 } else { 1499 UseXmmI2D = false; 1500 } 1501 } 1502 if (supports_sse4_2()) { 1503 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1504 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1505 } 1506 } else { 1507 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1508 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1509 } 1510 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1511 } 1512 1513 // some defaults for AMD family 15h 1514 if (cpu_family() == 0x15) { 1515 // On family 15h processors default is no sw prefetch 1516 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1517 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1518 } 1519 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1520 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1521 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1522 } 1523 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1524 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1525 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1526 } 1527 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1528 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1529 } 1530 } 1531 1532 #ifdef COMPILER2 1533 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1534 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1535 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1536 } 1537 #endif // COMPILER2 1538 1539 // Some defaults for AMD family >= 17h && Hygon family 18h 1540 if (cpu_family() >= 0x17) { 1541 // On family >=17h processors use XMM and UnalignedLoadStores 1542 // for Array Copy 1543 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1544 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1545 } 1546 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1547 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1548 } 1549 #ifdef COMPILER2 1550 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1551 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1552 } 1553 #endif 1554 } 1555 } 1556 1557 if (is_intel()) { // Intel cpus specific settings 1558 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1559 UseStoreImmI16 = false; // don't use it on Intel cpus 1560 } 1561 if (cpu_family() == 6 || cpu_family() == 15) { 1562 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1563 // Use it on all Intel cpus starting from PentiumPro 1564 UseAddressNop = true; 1565 } 1566 } 1567 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1568 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1569 } 1570 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1571 if (supports_sse3()) { 1572 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1573 } else { 1574 UseXmmRegToRegMoveAll = false; 1575 } 1576 } 1577 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1578 #ifdef COMPILER2 1579 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1580 // For new Intel cpus do the next optimization: 1581 // don't align the beginning of a loop if there are enough instructions 1582 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1583 // in current fetch line (OptoLoopAlignment) or the padding 1584 // is big (> MaxLoopPad). 1585 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1586 // generated NOP instructions. 11 is the largest size of one 1587 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1588 MaxLoopPad = 11; 1589 } 1590 #endif // COMPILER2 1591 1592 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1593 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1594 } 1595 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1596 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1597 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1598 } 1599 } 1600 if (supports_sse4_2()) { 1601 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1602 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1603 } 1604 } else { 1605 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1606 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1607 } 1608 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1609 } 1610 } 1611 if (is_atom_family() || is_knights_family()) { 1612 #ifdef COMPILER2 1613 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1614 OptoScheduling = true; 1615 } 1616 #endif 1617 if (supports_sse4_2()) { // Silvermont 1618 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1619 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1620 } 1621 } 1622 if (FLAG_IS_DEFAULT(UseIncDec)) { 1623 FLAG_SET_DEFAULT(UseIncDec, false); 1624 } 1625 } 1626 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1627 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1628 } 1629 #ifdef COMPILER2 1630 if (UseAVX > 2) { 1631 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1632 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1633 ArrayOperationPartialInlineSize != 0 && 1634 ArrayOperationPartialInlineSize != 16 && 1635 ArrayOperationPartialInlineSize != 32 && 1636 ArrayOperationPartialInlineSize != 64)) { 1637 int inline_size = 0; 1638 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1639 inline_size = 64; 1640 } else if (MaxVectorSize >= 32) { 1641 inline_size = 32; 1642 } else if (MaxVectorSize >= 16) { 1643 inline_size = 16; 1644 } 1645 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1646 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1647 } 1648 ArrayOperationPartialInlineSize = inline_size; 1649 } 1650 1651 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1652 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1653 if (ArrayOperationPartialInlineSize) { 1654 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1655 } else { 1656 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1657 } 1658 } 1659 } 1660 #endif 1661 } 1662 1663 #ifdef COMPILER2 1664 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1665 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1666 OptimizeFill = false; 1667 } 1668 } 1669 #endif 1670 1671 if (UseSSE42Intrinsics) { 1672 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1673 UseVectorizedMismatchIntrinsic = true; 1674 } 1675 } else if (UseVectorizedMismatchIntrinsic) { 1676 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1677 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1678 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1679 } 1680 if (UseAVX >= 2) { 1681 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1682 } else if (UseVectorizedHashCodeIntrinsic) { 1683 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1684 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1685 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1686 } 1687 1688 // Use count leading zeros count instruction if available. 1689 if (supports_lzcnt()) { 1690 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1691 UseCountLeadingZerosInstruction = true; 1692 } 1693 } else if (UseCountLeadingZerosInstruction) { 1694 warning("lzcnt instruction is not available on this CPU"); 1695 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1696 } 1697 1698 // Use count trailing zeros instruction if available 1699 if (supports_bmi1()) { 1700 // tzcnt does not require VEX prefix 1701 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1702 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1703 // Don't use tzcnt if BMI1 is switched off on command line. 1704 UseCountTrailingZerosInstruction = false; 1705 } else { 1706 UseCountTrailingZerosInstruction = true; 1707 } 1708 } 1709 } else if (UseCountTrailingZerosInstruction) { 1710 warning("tzcnt instruction is not available on this CPU"); 1711 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1712 } 1713 1714 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1715 // VEX prefix is generated only when AVX > 0. 1716 if (supports_bmi1() && supports_avx()) { 1717 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1718 UseBMI1Instructions = true; 1719 } 1720 } else if (UseBMI1Instructions) { 1721 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1722 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1723 } 1724 1725 if (supports_bmi2() && supports_avx()) { 1726 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1727 UseBMI2Instructions = true; 1728 } 1729 } else if (UseBMI2Instructions) { 1730 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1731 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1732 } 1733 1734 // Use population count instruction if available. 1735 if (supports_popcnt()) { 1736 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1737 UsePopCountInstruction = true; 1738 } 1739 } else if (UsePopCountInstruction) { 1740 warning("POPCNT instruction is not available on this CPU"); 1741 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1742 } 1743 1744 // Use fast-string operations if available. 1745 if (supports_erms()) { 1746 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1747 UseFastStosb = true; 1748 } 1749 } else if (UseFastStosb) { 1750 warning("fast-string operations are not available on this CPU"); 1751 FLAG_SET_DEFAULT(UseFastStosb, false); 1752 } 1753 1754 // For AMD Processors use XMM/YMM MOVDQU instructions 1755 // for Object Initialization as default 1756 if (is_amd() && cpu_family() >= 0x19) { 1757 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1758 UseFastStosb = false; 1759 } 1760 } 1761 1762 #ifdef COMPILER2 1763 if (is_intel() && MaxVectorSize > 16) { 1764 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1765 UseFastStosb = false; 1766 } 1767 } 1768 #endif 1769 1770 // Use XMM/YMM MOVDQU instruction for Object Initialization 1771 if (UseUnalignedLoadStores) { 1772 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1773 UseXMMForObjInit = true; 1774 } 1775 } else if (UseXMMForObjInit) { 1776 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1777 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1778 } 1779 1780 #ifdef COMPILER2 1781 if (FLAG_IS_DEFAULT(AlignVector)) { 1782 // Modern processors allow misaligned memory operations for vectors. 1783 AlignVector = !UseUnalignedLoadStores; 1784 } 1785 #endif // COMPILER2 1786 1787 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1788 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1789 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1790 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1791 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1792 } 1793 } 1794 1795 // Allocation prefetch settings 1796 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1797 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1798 (cache_line_size > AllocatePrefetchStepSize)) { 1799 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1800 } 1801 1802 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1803 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1804 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1805 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1806 } 1807 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1808 } 1809 1810 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1811 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1812 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1813 } 1814 1815 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1816 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1817 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1818 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1819 } 1820 #ifdef COMPILER2 1821 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1822 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1823 } 1824 #endif 1825 } 1826 1827 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1828 #ifdef COMPILER2 1829 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1830 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1831 } 1832 #endif 1833 } 1834 1835 // Prefetch settings 1836 1837 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1838 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1839 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1840 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1841 1842 // gc copy/scan is disabled if prefetchw isn't supported, because 1843 // Prefetch::write emits an inlined prefetchw on Linux. 1844 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1845 // The used prefetcht0 instruction works for both amd64 and em64t. 1846 1847 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1848 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1849 } 1850 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1851 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1852 } 1853 1854 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1855 (cache_line_size > ContendedPaddingWidth)) 1856 ContendedPaddingWidth = cache_line_size; 1857 1858 // This machine allows unaligned memory accesses 1859 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1860 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1861 } 1862 1863 #ifndef PRODUCT 1864 if (log_is_enabled(Info, os, cpu)) { 1865 LogStream ls(Log(os, cpu)::info()); 1866 outputStream* log = &ls; 1867 log->print_cr("Logical CPUs per core: %u", 1868 logical_processors_per_package()); 1869 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1870 log->print("UseSSE=%d", UseSSE); 1871 if (UseAVX > 0) { 1872 log->print(" UseAVX=%d", UseAVX); 1873 } 1874 if (UseAES) { 1875 log->print(" UseAES=1"); 1876 } 1877 #ifdef COMPILER2 1878 if (MaxVectorSize > 0) { 1879 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1880 } 1881 #endif 1882 log->cr(); 1883 log->print("Allocation"); 1884 if (AllocatePrefetchStyle <= 0) { 1885 log->print_cr(": no prefetching"); 1886 } else { 1887 log->print(" prefetching: "); 1888 if (AllocatePrefetchInstr == 0) { 1889 log->print("PREFETCHNTA"); 1890 } else if (AllocatePrefetchInstr == 1) { 1891 log->print("PREFETCHT0"); 1892 } else if (AllocatePrefetchInstr == 2) { 1893 log->print("PREFETCHT2"); 1894 } else if (AllocatePrefetchInstr == 3) { 1895 log->print("PREFETCHW"); 1896 } 1897 if (AllocatePrefetchLines > 1) { 1898 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1899 } else { 1900 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1901 } 1902 } 1903 1904 if (PrefetchCopyIntervalInBytes > 0) { 1905 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1906 } 1907 if (PrefetchScanIntervalInBytes > 0) { 1908 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1909 } 1910 if (ContendedPaddingWidth > 0) { 1911 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1912 } 1913 } 1914 #endif // !PRODUCT 1915 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1916 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1917 } 1918 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1919 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1920 } 1921 } 1922 1923 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1924 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1925 if (vrt == XenHVM) { 1926 st->print_cr("Xen hardware-assisted virtualization detected"); 1927 } else if (vrt == KVM) { 1928 st->print_cr("KVM virtualization detected"); 1929 } else if (vrt == VMWare) { 1930 st->print_cr("VMWare virtualization detected"); 1931 VirtualizationSupport::print_virtualization_info(st); 1932 } else if (vrt == HyperV) { 1933 st->print_cr("Hyper-V virtualization detected"); 1934 } else if (vrt == HyperVRole) { 1935 st->print_cr("Hyper-V role detected"); 1936 } 1937 } 1938 1939 bool VM_Version::compute_has_intel_jcc_erratum() { 1940 if (!is_intel_family_core()) { 1941 // Only Intel CPUs are affected. 1942 return false; 1943 } 1944 // The following table of affected CPUs is based on the following document released by Intel: 1945 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1946 switch (_model) { 1947 case 0x8E: 1948 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1949 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1950 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1951 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1952 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1953 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1954 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1955 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1956 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1957 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1958 case 0x4E: 1959 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1960 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1961 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1962 return _stepping == 0x3; 1963 case 0x55: 1964 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1965 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1966 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1967 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1968 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1969 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1970 return _stepping == 0x4 || _stepping == 0x7; 1971 case 0x5E: 1972 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1973 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1974 return _stepping == 0x3; 1975 case 0x9E: 1976 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 1977 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 1978 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 1979 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 1980 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 1981 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 1982 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 1983 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 1984 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 1985 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 1986 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 1987 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 1988 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 1989 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 1990 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 1991 case 0xA5: 1992 // Not in Intel documentation. 1993 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 1994 return true; 1995 case 0xA6: 1996 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 1997 return _stepping == 0x0; 1998 case 0xAE: 1999 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2000 return _stepping == 0xA; 2001 default: 2002 // If we are running on another intel machine not recognized in the table, we are okay. 2003 return false; 2004 } 2005 } 2006 2007 // On Xen, the cpuid instruction returns 2008 // eax / registers[0]: Version of Xen 2009 // ebx / registers[1]: chars 'XenV' 2010 // ecx / registers[2]: chars 'MMXe' 2011 // edx / registers[3]: chars 'nVMM' 2012 // 2013 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2014 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2015 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2016 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2017 // 2018 // more information : 2019 // https://kb.vmware.com/s/article/1009458 2020 // 2021 void VM_Version::check_virtualizations() { 2022 uint32_t registers[4] = {0}; 2023 char signature[13] = {0}; 2024 2025 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2026 // from 0x40000000 until 0x40010000. 2027 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2028 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2029 detect_virt_stub(leaf, registers); 2030 memcpy(signature, ®isters[1], 12); 2031 2032 if (strncmp("VMwareVMware", signature, 12) == 0) { 2033 Abstract_VM_Version::_detected_virtualization = VMWare; 2034 // check for extended metrics from guestlib 2035 VirtualizationSupport::initialize(); 2036 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2037 Abstract_VM_Version::_detected_virtualization = HyperV; 2038 #ifdef _WINDOWS 2039 // CPUID leaf 0x40000007 is available to the root partition only. 2040 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2041 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2042 detect_virt_stub(0x40000007, registers); 2043 if ((registers[0] != 0x0) || 2044 (registers[1] != 0x0) || 2045 (registers[2] != 0x0) || 2046 (registers[3] != 0x0)) { 2047 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2048 } 2049 #endif 2050 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2051 Abstract_VM_Version::_detected_virtualization = KVM; 2052 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2053 Abstract_VM_Version::_detected_virtualization = XenHVM; 2054 } 2055 } 2056 } 2057 2058 #ifdef COMPILER2 2059 // Determine if it's running on Cascade Lake using default options. 2060 bool VM_Version::is_default_intel_cascade_lake() { 2061 return FLAG_IS_DEFAULT(UseAVX) && 2062 FLAG_IS_DEFAULT(MaxVectorSize) && 2063 UseAVX > 2 && 2064 is_intel_cascade_lake(); 2065 } 2066 #endif 2067 2068 bool VM_Version::is_intel_cascade_lake() { 2069 return is_intel_skylake() && _stepping >= 5; 2070 } 2071 2072 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2073 // for implementing the array copy and clear operations. 2074 // The Intel platforms that supports the serialize instruction 2075 // has improved implementation of 64-byte load/stores and so the default 2076 // threshold is set to 0 for these platforms. 2077 int VM_Version::avx3_threshold() { 2078 return (is_intel_family_core() && 2079 supports_serialize() && 2080 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2081 } 2082 2083 void VM_Version::clear_apx_test_state() { 2084 clear_apx_test_state_stub(); 2085 } 2086 2087 static bool _vm_version_initialized = false; 2088 2089 void VM_Version::initialize() { 2090 ResourceMark rm; 2091 // Making this stub must be FIRST use of assembler 2092 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2093 if (stub_blob == nullptr) { 2094 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2095 } 2096 CodeBuffer c(stub_blob); 2097 VM_Version_StubGenerator g(&c); 2098 2099 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2100 g.generate_get_cpu_info()); 2101 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2102 g.generate_detect_virt()); 2103 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2104 g.clear_apx_test_state()); 2105 get_processor_features(); 2106 2107 Assembler::precompute_instructions(); 2108 2109 if (VM_Version::supports_hv()) { // Supports hypervisor 2110 check_virtualizations(); 2111 } 2112 _vm_version_initialized = true; 2113 } 2114 2115 typedef enum { 2116 CPU_FAMILY_8086_8088 = 0, 2117 CPU_FAMILY_INTEL_286 = 2, 2118 CPU_FAMILY_INTEL_386 = 3, 2119 CPU_FAMILY_INTEL_486 = 4, 2120 CPU_FAMILY_PENTIUM = 5, 2121 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2122 CPU_FAMILY_PENTIUM_4 = 0xF 2123 } FamilyFlag; 2124 2125 typedef enum { 2126 RDTSCP_FLAG = 0x08000000, // bit 27 2127 INTEL64_FLAG = 0x20000000 // bit 29 2128 } _featureExtendedEdxFlag; 2129 2130 typedef enum { 2131 FPU_FLAG = 0x00000001, 2132 VME_FLAG = 0x00000002, 2133 DE_FLAG = 0x00000004, 2134 PSE_FLAG = 0x00000008, 2135 TSC_FLAG = 0x00000010, 2136 MSR_FLAG = 0x00000020, 2137 PAE_FLAG = 0x00000040, 2138 MCE_FLAG = 0x00000080, 2139 CX8_FLAG = 0x00000100, 2140 APIC_FLAG = 0x00000200, 2141 SEP_FLAG = 0x00000800, 2142 MTRR_FLAG = 0x00001000, 2143 PGE_FLAG = 0x00002000, 2144 MCA_FLAG = 0x00004000, 2145 CMOV_FLAG = 0x00008000, 2146 PAT_FLAG = 0x00010000, 2147 PSE36_FLAG = 0x00020000, 2148 PSNUM_FLAG = 0x00040000, 2149 CLFLUSH_FLAG = 0x00080000, 2150 DTS_FLAG = 0x00200000, 2151 ACPI_FLAG = 0x00400000, 2152 MMX_FLAG = 0x00800000, 2153 FXSR_FLAG = 0x01000000, 2154 SSE_FLAG = 0x02000000, 2155 SSE2_FLAG = 0x04000000, 2156 SS_FLAG = 0x08000000, 2157 HTT_FLAG = 0x10000000, 2158 TM_FLAG = 0x20000000 2159 } FeatureEdxFlag; 2160 2161 static BufferBlob* cpuid_brand_string_stub_blob; 2162 static const int cpuid_brand_string_stub_size = 550; 2163 2164 extern "C" { 2165 typedef void (*getCPUIDBrandString_stub_t)(void*); 2166 } 2167 2168 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2169 2170 // VM_Version statics 2171 enum { 2172 ExtendedFamilyIdLength_INTEL = 16, 2173 ExtendedFamilyIdLength_AMD = 24 2174 }; 2175 2176 const size_t VENDOR_LENGTH = 13; 2177 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2178 static char* _cpu_brand_string = nullptr; 2179 static int64_t _max_qualified_cpu_frequency = 0; 2180 2181 static int _no_of_threads = 0; 2182 static int _no_of_cores = 0; 2183 2184 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2185 "8086/8088", 2186 "", 2187 "286", 2188 "386", 2189 "486", 2190 "Pentium", 2191 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2192 "", 2193 "", 2194 "", 2195 "", 2196 "", 2197 "", 2198 "", 2199 "", 2200 "Pentium 4" 2201 }; 2202 2203 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2204 "", 2205 "", 2206 "", 2207 "", 2208 "5x86", 2209 "K5/K6", 2210 "Athlon/AthlonXP", 2211 "", 2212 "", 2213 "", 2214 "", 2215 "", 2216 "", 2217 "", 2218 "", 2219 "Opteron/Athlon64", 2220 "Opteron QC/Phenom", // Barcelona et.al. 2221 "", 2222 "", 2223 "", 2224 "", 2225 "", 2226 "", 2227 "Zen" 2228 }; 2229 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2230 // September 2013, Vol 3C Table 35-1 2231 const char* const _model_id_pentium_pro[] = { 2232 "", 2233 "Pentium Pro", 2234 "", 2235 "Pentium II model 3", 2236 "", 2237 "Pentium II model 5/Xeon/Celeron", 2238 "Celeron", 2239 "Pentium III/Pentium III Xeon", 2240 "Pentium III/Pentium III Xeon", 2241 "Pentium M model 9", // Yonah 2242 "Pentium III, model A", 2243 "Pentium III, model B", 2244 "", 2245 "Pentium M model D", // Dothan 2246 "", 2247 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2248 "", 2249 "", 2250 "", 2251 "", 2252 "", 2253 "", 2254 "Celeron", // 0x16 Celeron 65nm 2255 "Core 2", // 0x17 Penryn / Harpertown 2256 "", 2257 "", 2258 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2259 "Atom", // 0x1B Z5xx series Silverthorn 2260 "", 2261 "Core 2", // 0x1D Dunnington (6-core) 2262 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2263 "", 2264 "", 2265 "", 2266 "", 2267 "", 2268 "", 2269 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2270 "", 2271 "", 2272 "", // 0x28 2273 "", 2274 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2275 "", 2276 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2277 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2278 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2279 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2280 "", 2281 "", 2282 "", 2283 "", 2284 "", 2285 "", 2286 "", 2287 "", 2288 "", 2289 "", 2290 "Ivy Bridge", // 0x3a 2291 "", 2292 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2293 "", // 0x3d "Next Generation Intel Core Processor" 2294 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2295 "", // 0x3f "Future Generation Intel Xeon Processor" 2296 "", 2297 "", 2298 "", 2299 "", 2300 "", 2301 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2302 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2303 nullptr 2304 }; 2305 2306 /* Brand ID is for back compatibility 2307 * Newer CPUs uses the extended brand string */ 2308 const char* const _brand_id[] = { 2309 "", 2310 "Celeron processor", 2311 "Pentium III processor", 2312 "Intel Pentium III Xeon processor", 2313 "", 2314 "", 2315 "", 2316 "", 2317 "Intel Pentium 4 processor", 2318 nullptr 2319 }; 2320 2321 2322 const char* const _feature_edx_id[] = { 2323 "On-Chip FPU", 2324 "Virtual Mode Extensions", 2325 "Debugging Extensions", 2326 "Page Size Extensions", 2327 "Time Stamp Counter", 2328 "Model Specific Registers", 2329 "Physical Address Extension", 2330 "Machine Check Exceptions", 2331 "CMPXCHG8B Instruction", 2332 "On-Chip APIC", 2333 "", 2334 "Fast System Call", 2335 "Memory Type Range Registers", 2336 "Page Global Enable", 2337 "Machine Check Architecture", 2338 "Conditional Mov Instruction", 2339 "Page Attribute Table", 2340 "36-bit Page Size Extension", 2341 "Processor Serial Number", 2342 "CLFLUSH Instruction", 2343 "", 2344 "Debug Trace Store feature", 2345 "ACPI registers in MSR space", 2346 "Intel Architecture MMX Technology", 2347 "Fast Float Point Save and Restore", 2348 "Streaming SIMD extensions", 2349 "Streaming SIMD extensions 2", 2350 "Self-Snoop", 2351 "Hyper Threading", 2352 "Thermal Monitor", 2353 "", 2354 "Pending Break Enable" 2355 }; 2356 2357 const char* const _feature_extended_edx_id[] = { 2358 "", 2359 "", 2360 "", 2361 "", 2362 "", 2363 "", 2364 "", 2365 "", 2366 "", 2367 "", 2368 "", 2369 "SYSCALL/SYSRET", 2370 "", 2371 "", 2372 "", 2373 "", 2374 "", 2375 "", 2376 "", 2377 "", 2378 "Execute Disable Bit", 2379 "", 2380 "", 2381 "", 2382 "", 2383 "", 2384 "", 2385 "RDTSCP", 2386 "", 2387 "Intel 64 Architecture", 2388 "", 2389 "" 2390 }; 2391 2392 const char* const _feature_ecx_id[] = { 2393 "Streaming SIMD Extensions 3", 2394 "PCLMULQDQ", 2395 "64-bit DS Area", 2396 "MONITOR/MWAIT instructions", 2397 "CPL Qualified Debug Store", 2398 "Virtual Machine Extensions", 2399 "Safer Mode Extensions", 2400 "Enhanced Intel SpeedStep technology", 2401 "Thermal Monitor 2", 2402 "Supplemental Streaming SIMD Extensions 3", 2403 "L1 Context ID", 2404 "", 2405 "Fused Multiply-Add", 2406 "CMPXCHG16B", 2407 "xTPR Update Control", 2408 "Perfmon and Debug Capability", 2409 "", 2410 "Process-context identifiers", 2411 "Direct Cache Access", 2412 "Streaming SIMD extensions 4.1", 2413 "Streaming SIMD extensions 4.2", 2414 "x2APIC", 2415 "MOVBE", 2416 "Popcount instruction", 2417 "TSC-Deadline", 2418 "AESNI", 2419 "XSAVE", 2420 "OSXSAVE", 2421 "AVX", 2422 "F16C", 2423 "RDRAND", 2424 "" 2425 }; 2426 2427 const char* const _feature_extended_ecx_id[] = { 2428 "LAHF/SAHF instruction support", 2429 "Core multi-processor legacy mode", 2430 "", 2431 "", 2432 "", 2433 "Advanced Bit Manipulations: LZCNT", 2434 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2435 "Misaligned SSE mode", 2436 "", 2437 "", 2438 "", 2439 "", 2440 "", 2441 "", 2442 "", 2443 "", 2444 "", 2445 "", 2446 "", 2447 "", 2448 "", 2449 "", 2450 "", 2451 "", 2452 "", 2453 "", 2454 "", 2455 "", 2456 "", 2457 "", 2458 "", 2459 "" 2460 }; 2461 2462 void VM_Version::initialize_tsc(void) { 2463 ResourceMark rm; 2464 2465 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2466 if (cpuid_brand_string_stub_blob == nullptr) { 2467 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2468 } 2469 CodeBuffer c(cpuid_brand_string_stub_blob); 2470 VM_Version_StubGenerator g(&c); 2471 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2472 g.generate_getCPUIDBrandString()); 2473 } 2474 2475 const char* VM_Version::cpu_model_description(void) { 2476 uint32_t cpu_family = extended_cpu_family(); 2477 uint32_t cpu_model = extended_cpu_model(); 2478 const char* model = nullptr; 2479 2480 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2481 for (uint32_t i = 0; i <= cpu_model; i++) { 2482 model = _model_id_pentium_pro[i]; 2483 if (model == nullptr) { 2484 break; 2485 } 2486 } 2487 } 2488 return model; 2489 } 2490 2491 const char* VM_Version::cpu_brand_string(void) { 2492 if (_cpu_brand_string == nullptr) { 2493 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2494 if (nullptr == _cpu_brand_string) { 2495 return nullptr; 2496 } 2497 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2498 if (ret_val != OS_OK) { 2499 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2500 _cpu_brand_string = nullptr; 2501 } 2502 } 2503 return _cpu_brand_string; 2504 } 2505 2506 const char* VM_Version::cpu_brand(void) { 2507 const char* brand = nullptr; 2508 2509 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2510 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2511 brand = _brand_id[0]; 2512 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2513 brand = _brand_id[i]; 2514 } 2515 } 2516 return brand; 2517 } 2518 2519 bool VM_Version::cpu_is_em64t(void) { 2520 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2521 } 2522 2523 bool VM_Version::is_netburst(void) { 2524 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2525 } 2526 2527 bool VM_Version::supports_tscinv_ext(void) { 2528 if (!supports_tscinv_bit()) { 2529 return false; 2530 } 2531 2532 if (is_intel()) { 2533 return true; 2534 } 2535 2536 if (is_amd()) { 2537 return !is_amd_Barcelona(); 2538 } 2539 2540 if (is_hygon()) { 2541 return true; 2542 } 2543 2544 return false; 2545 } 2546 2547 void VM_Version::resolve_cpu_information_details(void) { 2548 2549 // in future we want to base this information on proper cpu 2550 // and cache topology enumeration such as: 2551 // Intel 64 Architecture Processor Topology Enumeration 2552 // which supports system cpu and cache topology enumeration 2553 // either using 2xAPICIDs or initial APICIDs 2554 2555 // currently only rough cpu information estimates 2556 // which will not necessarily reflect the exact configuration of the system 2557 2558 // this is the number of logical hardware threads 2559 // visible to the operating system 2560 _no_of_threads = os::processor_count(); 2561 2562 // find out number of threads per cpu package 2563 int threads_per_package = threads_per_core() * cores_per_cpu(); 2564 2565 // use amount of threads visible to the process in order to guess number of sockets 2566 _no_of_sockets = _no_of_threads / threads_per_package; 2567 2568 // process might only see a subset of the total number of threads 2569 // from a single processor package. Virtualization/resource management for example. 2570 // If so then just write a hard 1 as num of pkgs. 2571 if (0 == _no_of_sockets) { 2572 _no_of_sockets = 1; 2573 } 2574 2575 // estimate the number of cores 2576 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2577 } 2578 2579 2580 const char* VM_Version::cpu_family_description(void) { 2581 int cpu_family_id = extended_cpu_family(); 2582 if (is_amd()) { 2583 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2584 return _family_id_amd[cpu_family_id]; 2585 } 2586 } 2587 if (is_intel()) { 2588 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2589 return cpu_model_description(); 2590 } 2591 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2592 return _family_id_intel[cpu_family_id]; 2593 } 2594 } 2595 if (is_hygon()) { 2596 return "Dhyana"; 2597 } 2598 return "Unknown x86"; 2599 } 2600 2601 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2602 assert(buf != nullptr, "buffer is null!"); 2603 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2604 2605 const char* cpu_type = nullptr; 2606 const char* x64 = nullptr; 2607 2608 if (is_intel()) { 2609 cpu_type = "Intel"; 2610 x64 = cpu_is_em64t() ? " Intel64" : ""; 2611 } else if (is_amd()) { 2612 cpu_type = "AMD"; 2613 x64 = cpu_is_em64t() ? " AMD64" : ""; 2614 } else if (is_hygon()) { 2615 cpu_type = "Hygon"; 2616 x64 = cpu_is_em64t() ? " AMD64" : ""; 2617 } else { 2618 cpu_type = "Unknown x86"; 2619 x64 = cpu_is_em64t() ? " x86_64" : ""; 2620 } 2621 2622 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2623 cpu_type, 2624 cpu_family_description(), 2625 supports_ht() ? " (HT)" : "", 2626 supports_sse3() ? " SSE3" : "", 2627 supports_ssse3() ? " SSSE3" : "", 2628 supports_sse4_1() ? " SSE4.1" : "", 2629 supports_sse4_2() ? " SSE4.2" : "", 2630 supports_sse4a() ? " SSE4A" : "", 2631 is_netburst() ? " Netburst" : "", 2632 is_intel_family_core() ? " Core" : "", 2633 x64); 2634 2635 return OS_OK; 2636 } 2637 2638 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2639 assert(buf != nullptr, "buffer is null!"); 2640 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2641 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2642 2643 // invoke newly generated asm code to fetch CPU Brand String 2644 getCPUIDBrandString_stub(&_cpuid_info); 2645 2646 // fetch results into buffer 2647 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2648 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2649 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2650 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2651 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2652 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2653 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2654 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2655 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2656 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2657 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2658 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2659 2660 return OS_OK; 2661 } 2662 2663 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2664 guarantee(buf != nullptr, "buffer is null!"); 2665 guarantee(buf_len > 0, "buffer len not enough!"); 2666 2667 unsigned int flag = 0; 2668 unsigned int fi = 0; 2669 size_t written = 0; 2670 const char* prefix = ""; 2671 2672 #define WRITE_TO_BUF(string) \ 2673 { \ 2674 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2675 if (res < 0) { \ 2676 return buf_len - 1; \ 2677 } \ 2678 written += res; \ 2679 if (prefix[0] == '\0') { \ 2680 prefix = ", "; \ 2681 } \ 2682 } 2683 2684 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2685 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2686 continue; /* no hyperthreading */ 2687 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2688 continue; /* no fast system call */ 2689 } 2690 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2691 WRITE_TO_BUF(_feature_edx_id[fi]); 2692 } 2693 } 2694 2695 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2696 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2697 WRITE_TO_BUF(_feature_ecx_id[fi]); 2698 } 2699 } 2700 2701 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2702 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2703 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2704 } 2705 } 2706 2707 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2708 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2709 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2710 } 2711 } 2712 2713 if (supports_tscinv_bit()) { 2714 WRITE_TO_BUF("Invariant TSC"); 2715 } 2716 2717 return written; 2718 } 2719 2720 /** 2721 * Write a detailed description of the cpu to a given buffer, including 2722 * feature set. 2723 */ 2724 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2725 assert(buf != nullptr, "buffer is null!"); 2726 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2727 2728 static const char* unknown = "<unknown>"; 2729 char vendor_id[VENDOR_LENGTH]; 2730 const char* family = nullptr; 2731 const char* model = nullptr; 2732 const char* brand = nullptr; 2733 int outputLen = 0; 2734 2735 family = cpu_family_description(); 2736 if (family == nullptr) { 2737 family = unknown; 2738 } 2739 2740 model = cpu_model_description(); 2741 if (model == nullptr) { 2742 model = unknown; 2743 } 2744 2745 brand = cpu_brand_string(); 2746 2747 if (brand == nullptr) { 2748 brand = cpu_brand(); 2749 if (brand == nullptr) { 2750 brand = unknown; 2751 } 2752 } 2753 2754 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2755 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2756 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2757 vendor_id[VENDOR_LENGTH-1] = '\0'; 2758 2759 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2760 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2761 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2762 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2763 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2764 "Supports: ", 2765 brand, 2766 vendor_id, 2767 family, 2768 extended_cpu_family(), 2769 model, 2770 extended_cpu_model(), 2771 cpu_stepping(), 2772 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2773 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2774 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2775 _cpuid_info.std_cpuid1_eax.value, 2776 _cpuid_info.std_cpuid1_ebx.value, 2777 _cpuid_info.std_cpuid1_ecx.value, 2778 _cpuid_info.std_cpuid1_edx.value, 2779 _cpuid_info.ext_cpuid1_eax, 2780 _cpuid_info.ext_cpuid1_ebx, 2781 _cpuid_info.ext_cpuid1_ecx, 2782 _cpuid_info.ext_cpuid1_edx); 2783 2784 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2785 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2786 return OS_ERR; 2787 } 2788 2789 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2790 2791 return OS_OK; 2792 } 2793 2794 2795 // Fill in Abstract_VM_Version statics 2796 void VM_Version::initialize_cpu_information() { 2797 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2798 assert(!_initialized, "shouldn't be initialized yet"); 2799 resolve_cpu_information_details(); 2800 2801 // initialize cpu_name and cpu_desc 2802 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2803 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2804 _initialized = true; 2805 } 2806 2807 /** 2808 * For information about extracting the frequency from the cpu brand string, please see: 2809 * 2810 * Intel Processor Identification and the CPUID Instruction 2811 * Application Note 485 2812 * May 2012 2813 * 2814 * The return value is the frequency in Hz. 2815 */ 2816 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2817 const char* const brand_string = cpu_brand_string(); 2818 if (brand_string == nullptr) { 2819 return 0; 2820 } 2821 const int64_t MEGA = 1000000; 2822 int64_t multiplier = 0; 2823 int64_t frequency = 0; 2824 uint8_t idx = 0; 2825 // The brand string buffer is at most 48 bytes. 2826 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2827 for (; idx < 48-2; ++idx) { 2828 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2829 // Search brand string for "yHz" where y is M, G, or T. 2830 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2831 if (brand_string[idx] == 'M') { 2832 multiplier = MEGA; 2833 } else if (brand_string[idx] == 'G') { 2834 multiplier = MEGA * 1000; 2835 } else if (brand_string[idx] == 'T') { 2836 multiplier = MEGA * MEGA; 2837 } 2838 break; 2839 } 2840 } 2841 if (multiplier > 0) { 2842 // Compute frequency (in Hz) from brand string. 2843 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2844 frequency = (brand_string[idx-4] - '0') * multiplier; 2845 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2846 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2847 } else { // format is "xxxx" 2848 frequency = (brand_string[idx-4] - '0') * 1000; 2849 frequency += (brand_string[idx-3] - '0') * 100; 2850 frequency += (brand_string[idx-2] - '0') * 10; 2851 frequency += (brand_string[idx-1] - '0'); 2852 frequency *= multiplier; 2853 } 2854 } 2855 return frequency; 2856 } 2857 2858 2859 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2860 if (_max_qualified_cpu_frequency == 0) { 2861 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2862 } 2863 return _max_qualified_cpu_frequency; 2864 } 2865 2866 uint64_t VM_Version::CpuidInfo::feature_flags() const { 2867 uint64_t result = 0; 2868 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2869 result |= CPU_CX8; 2870 if (std_cpuid1_edx.bits.cmov != 0) 2871 result |= CPU_CMOV; 2872 if (std_cpuid1_edx.bits.clflush != 0) 2873 result |= CPU_FLUSH; 2874 // clflush should always be available on x86_64 2875 // if not we are in real trouble because we rely on it 2876 // to flush the code cache. 2877 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2878 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2879 ext_cpuid1_edx.bits.fxsr != 0)) 2880 result |= CPU_FXSR; 2881 // HT flag is set for multi-core processors also. 2882 if (threads_per_core() > 1) 2883 result |= CPU_HT; 2884 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2885 ext_cpuid1_edx.bits.mmx != 0)) 2886 result |= CPU_MMX; 2887 if (std_cpuid1_edx.bits.sse != 0) 2888 result |= CPU_SSE; 2889 if (std_cpuid1_edx.bits.sse2 != 0) 2890 result |= CPU_SSE2; 2891 if (std_cpuid1_ecx.bits.sse3 != 0) 2892 result |= CPU_SSE3; 2893 if (std_cpuid1_ecx.bits.ssse3 != 0) 2894 result |= CPU_SSSE3; 2895 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2896 result |= CPU_SSE4_1; 2897 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2898 result |= CPU_SSE4_2; 2899 if (std_cpuid1_ecx.bits.popcnt != 0) 2900 result |= CPU_POPCNT; 2901 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 2902 xem_xcr0_eax.bits.apx_f != 0) { 2903 result |= CPU_APX_F; 2904 } 2905 if (std_cpuid1_ecx.bits.avx != 0 && 2906 std_cpuid1_ecx.bits.osxsave != 0 && 2907 xem_xcr0_eax.bits.sse != 0 && 2908 xem_xcr0_eax.bits.ymm != 0) { 2909 result |= CPU_AVX; 2910 result |= CPU_VZEROUPPER; 2911 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 2912 result |= CPU_SHA512; 2913 if (std_cpuid1_ecx.bits.f16c != 0) 2914 result |= CPU_F16C; 2915 if (sef_cpuid7_ebx.bits.avx2 != 0) { 2916 result |= CPU_AVX2; 2917 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 2918 result |= CPU_AVX_IFMA; 2919 } 2920 if (sef_cpuid7_ecx.bits.gfni != 0) 2921 result |= CPU_GFNI; 2922 if (sef_cpuid7_ebx.bits.avx512f != 0 && 2923 xem_xcr0_eax.bits.opmask != 0 && 2924 xem_xcr0_eax.bits.zmm512 != 0 && 2925 xem_xcr0_eax.bits.zmm32 != 0) { 2926 result |= CPU_AVX512F; 2927 if (sef_cpuid7_ebx.bits.avx512cd != 0) 2928 result |= CPU_AVX512CD; 2929 if (sef_cpuid7_ebx.bits.avx512dq != 0) 2930 result |= CPU_AVX512DQ; 2931 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 2932 result |= CPU_AVX512_IFMA; 2933 if (sef_cpuid7_ebx.bits.avx512pf != 0) 2934 result |= CPU_AVX512PF; 2935 if (sef_cpuid7_ebx.bits.avx512er != 0) 2936 result |= CPU_AVX512ER; 2937 if (sef_cpuid7_ebx.bits.avx512bw != 0) 2938 result |= CPU_AVX512BW; 2939 if (sef_cpuid7_ebx.bits.avx512vl != 0) 2940 result |= CPU_AVX512VL; 2941 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2942 result |= CPU_AVX512_VPOPCNTDQ; 2943 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2944 result |= CPU_AVX512_VPCLMULQDQ; 2945 if (sef_cpuid7_ecx.bits.vaes != 0) 2946 result |= CPU_AVX512_VAES; 2947 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 2948 result |= CPU_AVX512_VNNI; 2949 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2950 result |= CPU_AVX512_BITALG; 2951 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2952 result |= CPU_AVX512_VBMI; 2953 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2954 result |= CPU_AVX512_VBMI2; 2955 } 2956 } 2957 if (std_cpuid1_ecx.bits.hv != 0) 2958 result |= CPU_HV; 2959 if (sef_cpuid7_ebx.bits.bmi1 != 0) 2960 result |= CPU_BMI1; 2961 if (std_cpuid1_edx.bits.tsc != 0) 2962 result |= CPU_TSC; 2963 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 2964 result |= CPU_TSCINV_BIT; 2965 if (std_cpuid1_ecx.bits.aes != 0) 2966 result |= CPU_AES; 2967 if (ext_cpuid1_ecx.bits.lzcnt != 0) 2968 result |= CPU_LZCNT; 2969 if (ext_cpuid1_ecx.bits.prefetchw != 0) 2970 result |= CPU_3DNOW_PREFETCH; 2971 if (sef_cpuid7_ebx.bits.erms != 0) 2972 result |= CPU_ERMS; 2973 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 2974 result |= CPU_FSRM; 2975 if (std_cpuid1_ecx.bits.clmul != 0) 2976 result |= CPU_CLMUL; 2977 if (sef_cpuid7_ebx.bits.rtm != 0) 2978 result |= CPU_RTM; 2979 if (sef_cpuid7_ebx.bits.adx != 0) 2980 result |= CPU_ADX; 2981 if (sef_cpuid7_ebx.bits.bmi2 != 0) 2982 result |= CPU_BMI2; 2983 if (sef_cpuid7_ebx.bits.sha != 0) 2984 result |= CPU_SHA; 2985 if (std_cpuid1_ecx.bits.fma != 0) 2986 result |= CPU_FMA; 2987 if (sef_cpuid7_ebx.bits.clflushopt != 0) 2988 result |= CPU_FLUSHOPT; 2989 if (sef_cpuid7_ebx.bits.clwb != 0) 2990 result |= CPU_CLWB; 2991 if (ext_cpuid1_edx.bits.rdtscp != 0) 2992 result |= CPU_RDTSCP; 2993 if (sef_cpuid7_ecx.bits.rdpid != 0) 2994 result |= CPU_RDPID; 2995 2996 // AMD|Hygon additional features. 2997 if (is_amd_family()) { 2998 // PREFETCHW was checked above, check TDNOW here. 2999 if ((ext_cpuid1_edx.bits.tdnow != 0)) 3000 result |= CPU_3DNOW_PREFETCH; 3001 if (ext_cpuid1_ecx.bits.sse4a != 0) 3002 result |= CPU_SSE4A; 3003 } 3004 3005 // Intel additional features. 3006 if (is_intel()) { 3007 if (sef_cpuid7_edx.bits.serialize != 0) 3008 result |= CPU_SERIALIZE; 3009 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3010 result |= CPU_AVX512_FP16; 3011 } 3012 3013 // ZX additional features. 3014 if (is_zx()) { 3015 // We do not know if these are supported by ZX, so we cannot trust 3016 // common CPUID bit for them. 3017 assert((result & CPU_CLWB) == 0, "Check if it is supported?"); 3018 result &= ~CPU_CLWB; 3019 } 3020 3021 // Protection key features. 3022 if (sef_cpuid7_ecx.bits.pku != 0) { 3023 result |= CPU_PKU; 3024 } 3025 if (sef_cpuid7_ecx.bits.ospke != 0) { 3026 result |= CPU_OSPKE; 3027 } 3028 3029 // Control flow enforcement (CET) features. 3030 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3031 result |= CPU_CET_SS; 3032 } 3033 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3034 result |= CPU_CET_IBT; 3035 } 3036 3037 // Composite features. 3038 if (supports_tscinv_bit() && 3039 ((is_amd_family() && !is_amd_Barcelona()) || 3040 is_intel_tsc_synched_at_init())) { 3041 result |= CPU_TSCINV; 3042 } 3043 3044 return result; 3045 } 3046 3047 bool VM_Version::os_supports_avx_vectors() { 3048 bool retVal = false; 3049 int nreg = 4; 3050 if (supports_evex()) { 3051 // Verify that OS save/restore all bits of EVEX registers 3052 // during signal processing. 3053 retVal = true; 3054 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3055 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3056 retVal = false; 3057 break; 3058 } 3059 } 3060 } else if (supports_avx()) { 3061 // Verify that OS save/restore all bits of AVX registers 3062 // during signal processing. 3063 retVal = true; 3064 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3065 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3066 retVal = false; 3067 break; 3068 } 3069 } 3070 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3071 if (retVal == false) { 3072 // Verify that OS save/restore all bits of EVEX registers 3073 // during signal processing. 3074 retVal = true; 3075 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3076 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3077 retVal = false; 3078 break; 3079 } 3080 } 3081 } 3082 } 3083 return retVal; 3084 } 3085 3086 bool VM_Version::os_supports_apx_egprs() { 3087 if (!supports_apx_f()) { 3088 return false; 3089 } 3090 // Enable APX support for product builds after 3091 // completion of planned features listed in JDK-8329030. 3092 #if !defined(PRODUCT) 3093 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3094 _cpuid_info.apx_save[1] != egpr_test_value()) { 3095 return false; 3096 } 3097 return true; 3098 #else 3099 return false; 3100 #endif 3101 } 3102 3103 uint VM_Version::cores_per_cpu() { 3104 uint result = 1; 3105 if (is_intel()) { 3106 bool supports_topology = supports_processor_topology(); 3107 if (supports_topology) { 3108 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3109 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3110 } 3111 if (!supports_topology || result == 0) { 3112 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3113 } 3114 } else if (is_amd_family()) { 3115 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3116 } else if (is_zx()) { 3117 bool supports_topology = supports_processor_topology(); 3118 if (supports_topology) { 3119 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3120 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3121 } 3122 if (!supports_topology || result == 0) { 3123 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3124 } 3125 } 3126 return result; 3127 } 3128 3129 uint VM_Version::threads_per_core() { 3130 uint result = 1; 3131 if (is_intel() && supports_processor_topology()) { 3132 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3133 } else if (is_zx() && supports_processor_topology()) { 3134 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3135 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3136 if (cpu_family() >= 0x17) { 3137 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3138 } else { 3139 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3140 cores_per_cpu(); 3141 } 3142 } 3143 return (result == 0 ? 1 : result); 3144 } 3145 3146 uint VM_Version::L1_line_size() { 3147 uint result = 0; 3148 if (is_intel()) { 3149 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3150 } else if (is_amd_family()) { 3151 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3152 } else if (is_zx()) { 3153 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3154 } 3155 if (result < 32) // not defined ? 3156 result = 32; // 32 bytes by default on x86 and other x64 3157 return result; 3158 } 3159 3160 bool VM_Version::is_intel_tsc_synched_at_init() { 3161 if (is_intel_family_core()) { 3162 uint32_t ext_model = extended_cpu_model(); 3163 if (ext_model == CPU_MODEL_NEHALEM_EP || 3164 ext_model == CPU_MODEL_WESTMERE_EP || 3165 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3166 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3167 // <= 2-socket invariant tsc support. EX versions are usually used 3168 // in > 2-socket systems and likely don't synchronize tscs at 3169 // initialization. 3170 // Code that uses tsc values must be prepared for them to arbitrarily 3171 // jump forward or backward. 3172 return true; 3173 } 3174 } 3175 return false; 3176 } 3177 3178 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3179 // Hardware prefetching (distance/size in bytes): 3180 // Pentium 3 - 64 / 32 3181 // Pentium 4 - 256 / 128 3182 // Athlon - 64 / 32 ???? 3183 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3184 // Core - 128 / 64 3185 // 3186 // Software prefetching (distance in bytes / instruction with best score): 3187 // Pentium 3 - 128 / prefetchnta 3188 // Pentium 4 - 512 / prefetchnta 3189 // Athlon - 128 / prefetchnta 3190 // Opteron - 256 / prefetchnta 3191 // Core - 256 / prefetchnta 3192 // It will be used only when AllocatePrefetchStyle > 0 3193 3194 if (is_amd_family()) { // AMD | Hygon 3195 if (supports_sse2()) { 3196 return 256; // Opteron 3197 } else { 3198 return 128; // Athlon 3199 } 3200 } else { // Intel 3201 if (supports_sse3() && cpu_family() == 6) { 3202 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3203 return 192; 3204 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3205 return 384; 3206 } 3207 } 3208 if (supports_sse2()) { 3209 if (cpu_family() == 6) { 3210 return 256; // Pentium M, Core, Core2 3211 } else { 3212 return 512; // Pentium 4 3213 } 3214 } else { 3215 return 128; // Pentium 3 (and all other old CPUs) 3216 } 3217 } 3218 } 3219 3220 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3221 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3222 switch (id) { 3223 case vmIntrinsics::_floatToFloat16: 3224 case vmIntrinsics::_float16ToFloat: 3225 if (!supports_float16()) { 3226 return false; 3227 } 3228 break; 3229 default: 3230 break; 3231 } 3232 return true; 3233 }