1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/checkedCast.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = 0; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = 0; 59 60 static BufferBlob* stub_blob; 61 static const int stub_size = 2000; 62 63 extern "C" { 64 typedef void (*get_cpu_info_stub_t)(void*); 65 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 66 } 67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 68 static detect_virt_stub_t detect_virt_stub = nullptr; 69 70 #ifdef _LP64 71 72 bool VM_Version::supports_clflush() { 73 // clflush should always be available on x86_64 74 // if not we are in real trouble because we rely on it 75 // to flush the code cache. 76 // Unfortunately, Assembler::clflush is currently called as part 77 // of generation of the code cache flush routine. This happens 78 // under Universe::init before the processor features are set 79 // up. Assembler::flush calls this routine to check that clflush 80 // is allowed. So, we give the caller a free pass if Universe init 81 // is still in progress. 82 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 83 return true; 84 } 85 #endif 86 87 #define CPUID_STANDARD_FN 0x0 88 #define CPUID_STANDARD_FN_1 0x1 89 #define CPUID_STANDARD_FN_4 0x4 90 #define CPUID_STANDARD_FN_B 0xb 91 92 #define CPUID_EXTENDED_FN 0x80000000 93 #define CPUID_EXTENDED_FN_1 0x80000001 94 #define CPUID_EXTENDED_FN_2 0x80000002 95 #define CPUID_EXTENDED_FN_3 0x80000003 96 #define CPUID_EXTENDED_FN_4 0x80000004 97 #define CPUID_EXTENDED_FN_7 0x80000007 98 #define CPUID_EXTENDED_FN_8 0x80000008 99 100 class VM_Version_StubGenerator: public StubCodeGenerator { 101 public: 102 103 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 104 105 address generate_get_cpu_info() { 106 // Flags to test CPU type. 107 const uint32_t HS_EFL_AC = 0x40000; 108 const uint32_t HS_EFL_ID = 0x200000; 109 // Values for when we don't have a CPUID instruction. 110 const int CPU_FAMILY_SHIFT = 8; 111 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 112 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 113 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 114 115 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 116 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; 117 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 118 119 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 120 # define __ _masm-> 121 122 address start = __ pc(); 123 124 // 125 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 126 // 127 // LP64: rcx and rdx are first and second argument registers on windows 128 129 __ push(rbp); 130 #ifdef _LP64 131 __ mov(rbp, c_rarg0); // cpuid_info address 132 #else 133 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 134 #endif 135 __ push(rbx); 136 __ push(rsi); 137 __ pushf(); // preserve rbx, and flags 138 __ pop(rax); 139 __ push(rax); 140 __ mov(rcx, rax); 141 // 142 // if we are unable to change the AC flag, we have a 386 143 // 144 __ xorl(rax, HS_EFL_AC); 145 __ push(rax); 146 __ popf(); 147 __ pushf(); 148 __ pop(rax); 149 __ cmpptr(rax, rcx); 150 __ jccb(Assembler::notEqual, detect_486); 151 152 __ movl(rax, CPU_FAMILY_386); 153 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 154 __ jmp(done); 155 156 // 157 // If we are unable to change the ID flag, we have a 486 which does 158 // not support the "cpuid" instruction. 159 // 160 __ bind(detect_486); 161 __ mov(rax, rcx); 162 __ xorl(rax, HS_EFL_ID); 163 __ push(rax); 164 __ popf(); 165 __ pushf(); 166 __ pop(rax); 167 __ cmpptr(rcx, rax); 168 __ jccb(Assembler::notEqual, detect_586); 169 170 __ bind(cpu486); 171 __ movl(rax, CPU_FAMILY_486); 172 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 173 __ jmp(done); 174 175 // 176 // At this point, we have a chip which supports the "cpuid" instruction 177 // 178 __ bind(detect_586); 179 __ xorl(rax, rax); 180 __ cpuid(); 181 __ orl(rax, rax); 182 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 183 // value of at least 1, we give up and 184 // assume a 486 185 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 186 __ movl(Address(rsi, 0), rax); 187 __ movl(Address(rsi, 4), rbx); 188 __ movl(Address(rsi, 8), rcx); 189 __ movl(Address(rsi,12), rdx); 190 191 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 192 __ jccb(Assembler::belowEqual, std_cpuid4); 193 194 // 195 // cpuid(0xB) Processor Topology 196 // 197 __ movl(rax, 0xb); 198 __ xorl(rcx, rcx); // Threads level 199 __ cpuid(); 200 201 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 202 __ movl(Address(rsi, 0), rax); 203 __ movl(Address(rsi, 4), rbx); 204 __ movl(Address(rsi, 8), rcx); 205 __ movl(Address(rsi,12), rdx); 206 207 __ movl(rax, 0xb); 208 __ movl(rcx, 1); // Cores level 209 __ cpuid(); 210 __ push(rax); 211 __ andl(rax, 0x1f); // Determine if valid topology level 212 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 213 __ andl(rax, 0xffff); 214 __ pop(rax); 215 __ jccb(Assembler::equal, std_cpuid4); 216 217 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 218 __ movl(Address(rsi, 0), rax); 219 __ movl(Address(rsi, 4), rbx); 220 __ movl(Address(rsi, 8), rcx); 221 __ movl(Address(rsi,12), rdx); 222 223 __ movl(rax, 0xb); 224 __ movl(rcx, 2); // Packages level 225 __ cpuid(); 226 __ push(rax); 227 __ andl(rax, 0x1f); // Determine if valid topology level 228 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 229 __ andl(rax, 0xffff); 230 __ pop(rax); 231 __ jccb(Assembler::equal, std_cpuid4); 232 233 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 234 __ movl(Address(rsi, 0), rax); 235 __ movl(Address(rsi, 4), rbx); 236 __ movl(Address(rsi, 8), rcx); 237 __ movl(Address(rsi,12), rdx); 238 239 // 240 // cpuid(0x4) Deterministic cache params 241 // 242 __ bind(std_cpuid4); 243 __ movl(rax, 4); 244 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 245 __ jccb(Assembler::greater, std_cpuid1); 246 247 __ xorl(rcx, rcx); // L1 cache 248 __ cpuid(); 249 __ push(rax); 250 __ andl(rax, 0x1f); // Determine if valid cache parameters used 251 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 252 __ pop(rax); 253 __ jccb(Assembler::equal, std_cpuid1); 254 255 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 256 __ movl(Address(rsi, 0), rax); 257 __ movl(Address(rsi, 4), rbx); 258 __ movl(Address(rsi, 8), rcx); 259 __ movl(Address(rsi,12), rdx); 260 261 // 262 // Standard cpuid(0x1) 263 // 264 __ bind(std_cpuid1); 265 __ movl(rax, 1); 266 __ cpuid(); 267 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 268 __ movl(Address(rsi, 0), rax); 269 __ movl(Address(rsi, 4), rbx); 270 __ movl(Address(rsi, 8), rcx); 271 __ movl(Address(rsi,12), rdx); 272 273 // 274 // Check if OS has enabled XGETBV instruction to access XCR0 275 // (OSXSAVE feature flag) and CPU supports AVX 276 // 277 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 278 __ cmpl(rcx, 0x18000000); 279 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 280 281 // 282 // XCR0, XFEATURE_ENABLED_MASK register 283 // 284 __ xorl(rcx, rcx); // zero for XCR0 register 285 __ xgetbv(); 286 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 287 __ movl(Address(rsi, 0), rax); 288 __ movl(Address(rsi, 4), rdx); 289 290 // 291 // cpuid(0x7) Structured Extended Features 292 // 293 __ bind(sef_cpuid); 294 __ movl(rax, 7); 295 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 296 __ jccb(Assembler::greater, ext_cpuid); 297 298 __ xorl(rcx, rcx); 299 __ cpuid(); 300 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 301 __ movl(Address(rsi, 0), rax); 302 __ movl(Address(rsi, 4), rbx); 303 __ movl(Address(rsi, 8), rcx); 304 __ movl(Address(rsi, 12), rdx); 305 306 // 307 // Extended cpuid(0x80000000) 308 // 309 __ bind(ext_cpuid); 310 __ movl(rax, 0x80000000); 311 __ cpuid(); 312 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 313 __ jcc(Assembler::belowEqual, done); 314 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 315 __ jcc(Assembler::belowEqual, ext_cpuid1); 316 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 317 __ jccb(Assembler::belowEqual, ext_cpuid5); 318 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 319 __ jccb(Assembler::belowEqual, ext_cpuid7); 320 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 321 __ jccb(Assembler::belowEqual, ext_cpuid8); 322 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 323 __ jccb(Assembler::below, ext_cpuid8); 324 // 325 // Extended cpuid(0x8000001E) 326 // 327 __ movl(rax, 0x8000001E); 328 __ cpuid(); 329 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 330 __ movl(Address(rsi, 0), rax); 331 __ movl(Address(rsi, 4), rbx); 332 __ movl(Address(rsi, 8), rcx); 333 __ movl(Address(rsi,12), rdx); 334 335 // 336 // Extended cpuid(0x80000008) 337 // 338 __ bind(ext_cpuid8); 339 __ movl(rax, 0x80000008); 340 __ cpuid(); 341 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 342 __ movl(Address(rsi, 0), rax); 343 __ movl(Address(rsi, 4), rbx); 344 __ movl(Address(rsi, 8), rcx); 345 __ movl(Address(rsi,12), rdx); 346 347 // 348 // Extended cpuid(0x80000007) 349 // 350 __ bind(ext_cpuid7); 351 __ movl(rax, 0x80000007); 352 __ cpuid(); 353 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 354 __ movl(Address(rsi, 0), rax); 355 __ movl(Address(rsi, 4), rbx); 356 __ movl(Address(rsi, 8), rcx); 357 __ movl(Address(rsi,12), rdx); 358 359 // 360 // Extended cpuid(0x80000005) 361 // 362 __ bind(ext_cpuid5); 363 __ movl(rax, 0x80000005); 364 __ cpuid(); 365 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 366 __ movl(Address(rsi, 0), rax); 367 __ movl(Address(rsi, 4), rbx); 368 __ movl(Address(rsi, 8), rcx); 369 __ movl(Address(rsi,12), rdx); 370 371 // 372 // Extended cpuid(0x80000001) 373 // 374 __ bind(ext_cpuid1); 375 __ movl(rax, 0x80000001); 376 __ cpuid(); 377 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 378 __ movl(Address(rsi, 0), rax); 379 __ movl(Address(rsi, 4), rbx); 380 __ movl(Address(rsi, 8), rcx); 381 __ movl(Address(rsi,12), rdx); 382 383 // 384 // Check if OS has enabled XGETBV instruction to access XCR0 385 // (OSXSAVE feature flag) and CPU supports AVX 386 // 387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 388 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 389 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 390 __ cmpl(rcx, 0x18000000); 391 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 392 393 __ movl(rax, 0x6); 394 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 395 __ cmpl(rax, 0x6); 396 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 397 398 // we need to bridge farther than imm8, so we use this island as a thunk 399 __ bind(done); 400 __ jmp(wrapup); 401 402 __ bind(start_simd_check); 403 // 404 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 405 // registers are not restored after a signal processing. 406 // Generate SEGV here (reference through null) 407 // and check upper YMM/ZMM bits after it. 408 // 409 int saved_useavx = UseAVX; 410 int saved_usesse = UseSSE; 411 412 // If UseAVX is uninitialized or is set by the user to include EVEX 413 if (use_evex) { 414 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 415 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 416 __ movl(rax, 0x10000); 417 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 418 __ cmpl(rax, 0x10000); 419 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 420 // check _cpuid_info.xem_xcr0_eax.bits.opmask 421 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 422 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 423 __ movl(rax, 0xE0); 424 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 425 __ cmpl(rax, 0xE0); 426 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 427 428 if (FLAG_IS_DEFAULT(UseAVX)) { 429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 430 __ movl(rax, Address(rsi, 0)); 431 __ cmpl(rax, 0x50654); // If it is Skylake 432 __ jcc(Assembler::equal, legacy_setup); 433 } 434 // EVEX setup: run in lowest evex mode 435 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 436 UseAVX = 3; 437 UseSSE = 2; 438 #ifdef _WINDOWS 439 // xmm5-xmm15 are not preserved by caller on windows 440 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 441 __ subptr(rsp, 64); 442 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 443 #ifdef _LP64 444 __ subptr(rsp, 64); 445 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 446 __ subptr(rsp, 64); 447 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 448 #endif // _LP64 449 #endif // _WINDOWS 450 451 // load value into all 64 bytes of zmm7 register 452 __ movl(rcx, VM_Version::ymm_test_value()); 453 __ movdl(xmm0, rcx); 454 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 455 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 456 #ifdef _LP64 457 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 458 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 459 #endif 460 VM_Version::clean_cpuFeatures(); 461 __ jmp(save_restore_except); 462 } 463 464 __ bind(legacy_setup); 465 // AVX setup 466 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 467 UseAVX = 1; 468 UseSSE = 2; 469 #ifdef _WINDOWS 470 __ subptr(rsp, 32); 471 __ vmovdqu(Address(rsp, 0), xmm7); 472 #ifdef _LP64 473 __ subptr(rsp, 32); 474 __ vmovdqu(Address(rsp, 0), xmm8); 475 __ subptr(rsp, 32); 476 __ vmovdqu(Address(rsp, 0), xmm15); 477 #endif // _LP64 478 #endif // _WINDOWS 479 480 // load value into all 32 bytes of ymm7 register 481 __ movl(rcx, VM_Version::ymm_test_value()); 482 483 __ movdl(xmm0, rcx); 484 __ pshufd(xmm0, xmm0, 0x00); 485 __ vinsertf128_high(xmm0, xmm0); 486 __ vmovdqu(xmm7, xmm0); 487 #ifdef _LP64 488 __ vmovdqu(xmm8, xmm0); 489 __ vmovdqu(xmm15, xmm0); 490 #endif 491 VM_Version::clean_cpuFeatures(); 492 493 __ bind(save_restore_except); 494 __ xorl(rsi, rsi); 495 VM_Version::set_cpuinfo_segv_addr(__ pc()); 496 // Generate SEGV 497 __ movl(rax, Address(rsi, 0)); 498 499 VM_Version::set_cpuinfo_cont_addr(__ pc()); 500 // Returns here after signal. Save xmm0 to check it later. 501 502 // If UseAVX is uninitialized or is set by the user to include EVEX 503 if (use_evex) { 504 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 505 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 506 __ movl(rax, 0x10000); 507 __ andl(rax, Address(rsi, 4)); 508 __ cmpl(rax, 0x10000); 509 __ jcc(Assembler::notEqual, legacy_save_restore); 510 // check _cpuid_info.xem_xcr0_eax.bits.opmask 511 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 512 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 513 __ movl(rax, 0xE0); 514 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 515 __ cmpl(rax, 0xE0); 516 __ jcc(Assembler::notEqual, legacy_save_restore); 517 518 if (FLAG_IS_DEFAULT(UseAVX)) { 519 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 520 __ movl(rax, Address(rsi, 0)); 521 __ cmpl(rax, 0x50654); // If it is Skylake 522 __ jcc(Assembler::equal, legacy_save_restore); 523 } 524 // EVEX check: run in lowest evex mode 525 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 526 UseAVX = 3; 527 UseSSE = 2; 528 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 529 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 530 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 531 #ifdef _LP64 532 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 533 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 534 #endif 535 536 #ifdef _WINDOWS 537 #ifdef _LP64 538 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 539 __ addptr(rsp, 64); 540 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 541 __ addptr(rsp, 64); 542 #endif // _LP64 543 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 544 __ addptr(rsp, 64); 545 #endif // _WINDOWS 546 generate_vzeroupper(wrapup); 547 VM_Version::clean_cpuFeatures(); 548 UseAVX = saved_useavx; 549 UseSSE = saved_usesse; 550 __ jmp(wrapup); 551 } 552 553 __ bind(legacy_save_restore); 554 // AVX check 555 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 556 UseAVX = 1; 557 UseSSE = 2; 558 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 559 __ vmovdqu(Address(rsi, 0), xmm0); 560 __ vmovdqu(Address(rsi, 32), xmm7); 561 #ifdef _LP64 562 __ vmovdqu(Address(rsi, 64), xmm8); 563 __ vmovdqu(Address(rsi, 96), xmm15); 564 #endif 565 566 #ifdef _WINDOWS 567 #ifdef _LP64 568 __ vmovdqu(xmm15, Address(rsp, 0)); 569 __ addptr(rsp, 32); 570 __ vmovdqu(xmm8, Address(rsp, 0)); 571 __ addptr(rsp, 32); 572 #endif // _LP64 573 __ vmovdqu(xmm7, Address(rsp, 0)); 574 __ addptr(rsp, 32); 575 #endif // _WINDOWS 576 generate_vzeroupper(wrapup); 577 VM_Version::clean_cpuFeatures(); 578 UseAVX = saved_useavx; 579 UseSSE = saved_usesse; 580 581 __ bind(wrapup); 582 __ popf(); 583 __ pop(rsi); 584 __ pop(rbx); 585 __ pop(rbp); 586 __ ret(0); 587 588 # undef __ 589 590 return start; 591 }; 592 void generate_vzeroupper(Label& L_wrapup) { 593 # define __ _masm-> 594 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 595 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 596 __ jcc(Assembler::notEqual, L_wrapup); 597 __ movl(rcx, 0x0FFF0FF0); 598 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 599 __ andl(rcx, Address(rsi, 0)); 600 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 601 __ jcc(Assembler::equal, L_wrapup); 602 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 603 __ jcc(Assembler::equal, L_wrapup); 604 // vzeroupper() will use a pre-computed instruction sequence that we 605 // can't compute until after we've determined CPU capabilities. Use 606 // uncached variant here directly to be able to bootstrap correctly 607 __ vzeroupper_uncached(); 608 # undef __ 609 } 610 address generate_detect_virt() { 611 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 612 # define __ _masm-> 613 614 address start = __ pc(); 615 616 // Evacuate callee-saved registers 617 __ push(rbp); 618 __ push(rbx); 619 __ push(rsi); // for Windows 620 621 #ifdef _LP64 622 __ mov(rax, c_rarg0); // CPUID leaf 623 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 624 #else 625 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 626 __ movptr(rsi, Address(rsp, 20)); // register array address 627 #endif 628 629 __ cpuid(); 630 631 // Store result to register array 632 __ movl(Address(rsi, 0), rax); 633 __ movl(Address(rsi, 4), rbx); 634 __ movl(Address(rsi, 8), rcx); 635 __ movl(Address(rsi, 12), rdx); 636 637 // Epilogue 638 __ pop(rsi); 639 __ pop(rbx); 640 __ pop(rbp); 641 __ ret(0); 642 643 # undef __ 644 645 return start; 646 }; 647 648 649 address generate_getCPUIDBrandString(void) { 650 // Flags to test CPU type. 651 const uint32_t HS_EFL_AC = 0x40000; 652 const uint32_t HS_EFL_ID = 0x200000; 653 // Values for when we don't have a CPUID instruction. 654 const int CPU_FAMILY_SHIFT = 8; 655 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 656 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 657 658 Label detect_486, cpu486, detect_586, done, ext_cpuid; 659 660 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 661 # define __ _masm-> 662 663 address start = __ pc(); 664 665 // 666 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 667 // 668 // LP64: rcx and rdx are first and second argument registers on windows 669 670 __ push(rbp); 671 #ifdef _LP64 672 __ mov(rbp, c_rarg0); // cpuid_info address 673 #else 674 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 675 #endif 676 __ push(rbx); 677 __ push(rsi); 678 __ pushf(); // preserve rbx, and flags 679 __ pop(rax); 680 __ push(rax); 681 __ mov(rcx, rax); 682 // 683 // if we are unable to change the AC flag, we have a 386 684 // 685 __ xorl(rax, HS_EFL_AC); 686 __ push(rax); 687 __ popf(); 688 __ pushf(); 689 __ pop(rax); 690 __ cmpptr(rax, rcx); 691 __ jccb(Assembler::notEqual, detect_486); 692 693 __ movl(rax, CPU_FAMILY_386); 694 __ jmp(done); 695 696 // 697 // If we are unable to change the ID flag, we have a 486 which does 698 // not support the "cpuid" instruction. 699 // 700 __ bind(detect_486); 701 __ mov(rax, rcx); 702 __ xorl(rax, HS_EFL_ID); 703 __ push(rax); 704 __ popf(); 705 __ pushf(); 706 __ pop(rax); 707 __ cmpptr(rcx, rax); 708 __ jccb(Assembler::notEqual, detect_586); 709 710 __ bind(cpu486); 711 __ movl(rax, CPU_FAMILY_486); 712 __ jmp(done); 713 714 // 715 // At this point, we have a chip which supports the "cpuid" instruction 716 // 717 __ bind(detect_586); 718 __ xorl(rax, rax); 719 __ cpuid(); 720 __ orl(rax, rax); 721 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 722 // value of at least 1, we give up and 723 // assume a 486 724 725 // 726 // Extended cpuid(0x80000000) for processor brand string detection 727 // 728 __ bind(ext_cpuid); 729 __ movl(rax, CPUID_EXTENDED_FN); 730 __ cpuid(); 731 __ cmpl(rax, CPUID_EXTENDED_FN_4); 732 __ jcc(Assembler::below, done); 733 734 // 735 // Extended cpuid(0x80000002) // first 16 bytes in brand string 736 // 737 __ movl(rax, CPUID_EXTENDED_FN_2); 738 __ cpuid(); 739 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 740 __ movl(Address(rsi, 0), rax); 741 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 742 __ movl(Address(rsi, 0), rbx); 743 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 744 __ movl(Address(rsi, 0), rcx); 745 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 746 __ movl(Address(rsi,0), rdx); 747 748 // 749 // Extended cpuid(0x80000003) // next 16 bytes in brand string 750 // 751 __ movl(rax, CPUID_EXTENDED_FN_3); 752 __ cpuid(); 753 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 754 __ movl(Address(rsi, 0), rax); 755 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 756 __ movl(Address(rsi, 0), rbx); 757 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 758 __ movl(Address(rsi, 0), rcx); 759 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 760 __ movl(Address(rsi,0), rdx); 761 762 // 763 // Extended cpuid(0x80000004) // last 16 bytes in brand string 764 // 765 __ movl(rax, CPUID_EXTENDED_FN_4); 766 __ cpuid(); 767 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 768 __ movl(Address(rsi, 0), rax); 769 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 770 __ movl(Address(rsi, 0), rbx); 771 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 772 __ movl(Address(rsi, 0), rcx); 773 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 774 __ movl(Address(rsi,0), rdx); 775 776 // 777 // return 778 // 779 __ bind(done); 780 __ popf(); 781 __ pop(rsi); 782 __ pop(rbx); 783 __ pop(rbp); 784 __ ret(0); 785 786 # undef __ 787 788 return start; 789 }; 790 }; 791 792 void VM_Version::get_processor_features() { 793 794 _cpu = 4; // 486 by default 795 _model = 0; 796 _stepping = 0; 797 _features = 0; 798 _logical_processors_per_package = 1; 799 // i486 internal cache is both I&D and has a 16-byte line size 800 _L1_data_cache_line_size = 16; 801 802 // Get raw processor info 803 804 get_cpu_info_stub(&_cpuid_info); 805 806 assert_is_initialized(); 807 _cpu = extended_cpu_family(); 808 _model = extended_cpu_model(); 809 _stepping = cpu_stepping(); 810 811 if (cpu_family() > 4) { // it supports CPUID 812 _features = feature_flags(); 813 // Logical processors are only available on P4s and above, 814 // and only if hyperthreading is available. 815 _logical_processors_per_package = logical_processor_count(); 816 _L1_data_cache_line_size = L1_line_size(); 817 } 818 819 _supports_cx8 = supports_cmpxchg8(); 820 // xchg and xadd instructions 821 _supports_atomic_getset4 = true; 822 _supports_atomic_getadd4 = true; 823 LP64_ONLY(_supports_atomic_getset8 = true); 824 LP64_ONLY(_supports_atomic_getadd8 = true); 825 826 #ifdef _LP64 827 // OS should support SSE for x64 and hardware should support at least SSE2. 828 if (!VM_Version::supports_sse2()) { 829 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 830 } 831 // in 64 bit the use of SSE2 is the minimum 832 if (UseSSE < 2) UseSSE = 2; 833 #endif 834 835 #ifdef AMD64 836 // flush_icache_stub have to be generated first. 837 // That is why Icache line size is hard coded in ICache class, 838 // see icache_x86.hpp. It is also the reason why we can't use 839 // clflush instruction in 32-bit VM since it could be running 840 // on CPU which does not support it. 841 // 842 // The only thing we can do is to verify that flushed 843 // ICache::line_size has correct value. 844 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 845 // clflush_size is size in quadwords (8 bytes). 846 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 847 #endif 848 849 #ifdef _LP64 850 // assigning this field effectively enables Unsafe.writebackMemory() 851 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 852 // that is only implemented on x86_64 and only if the OS plays ball 853 if (os::supports_map_sync()) { 854 // publish data cache line flush size to generic field, otherwise 855 // let if default to zero thereby disabling writeback 856 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 857 } 858 #endif 859 860 if (UseSSE < 4) { 861 _features &= ~CPU_SSE4_1; 862 _features &= ~CPU_SSE4_2; 863 } 864 865 if (UseSSE < 3) { 866 _features &= ~CPU_SSE3; 867 _features &= ~CPU_SSSE3; 868 _features &= ~CPU_SSE4A; 869 } 870 871 if (UseSSE < 2) 872 _features &= ~CPU_SSE2; 873 874 if (UseSSE < 1) 875 _features &= ~CPU_SSE; 876 877 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 878 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 879 UseAVX = 0; 880 } 881 882 // UseSSE is set to the smaller of what hardware supports and what 883 // the command line requires. I.e., you cannot set UseSSE to 2 on 884 // older Pentiums which do not support it. 885 int use_sse_limit = 0; 886 if (UseSSE > 0) { 887 if (UseSSE > 3 && supports_sse4_1()) { 888 use_sse_limit = 4; 889 } else if (UseSSE > 2 && supports_sse3()) { 890 use_sse_limit = 3; 891 } else if (UseSSE > 1 && supports_sse2()) { 892 use_sse_limit = 2; 893 } else if (UseSSE > 0 && supports_sse()) { 894 use_sse_limit = 1; 895 } else { 896 use_sse_limit = 0; 897 } 898 } 899 if (FLAG_IS_DEFAULT(UseSSE)) { 900 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 901 } else if (UseSSE > use_sse_limit) { 902 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 903 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 904 } 905 906 // first try initial setting and detect what we can support 907 int use_avx_limit = 0; 908 if (UseAVX > 0) { 909 if (UseSSE < 4) { 910 // Don't use AVX if SSE is unavailable or has been disabled. 911 use_avx_limit = 0; 912 } else if (UseAVX > 2 && supports_evex()) { 913 use_avx_limit = 3; 914 } else if (UseAVX > 1 && supports_avx2()) { 915 use_avx_limit = 2; 916 } else if (UseAVX > 0 && supports_avx()) { 917 use_avx_limit = 1; 918 } else { 919 use_avx_limit = 0; 920 } 921 } 922 if (FLAG_IS_DEFAULT(UseAVX)) { 923 // Don't use AVX-512 on older Skylakes unless explicitly requested. 924 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 925 FLAG_SET_DEFAULT(UseAVX, 2); 926 } else { 927 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 928 } 929 } 930 if (UseAVX > use_avx_limit) { 931 if (UseSSE < 4) { 932 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 933 } else { 934 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 935 } 936 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 937 } 938 939 if (UseAVX < 3) { 940 _features &= ~CPU_AVX512F; 941 _features &= ~CPU_AVX512DQ; 942 _features &= ~CPU_AVX512CD; 943 _features &= ~CPU_AVX512BW; 944 _features &= ~CPU_AVX512VL; 945 _features &= ~CPU_AVX512_VPOPCNTDQ; 946 _features &= ~CPU_AVX512_VPCLMULQDQ; 947 _features &= ~CPU_AVX512_VAES; 948 _features &= ~CPU_AVX512_VNNI; 949 _features &= ~CPU_AVX512_VBMI; 950 _features &= ~CPU_AVX512_VBMI2; 951 _features &= ~CPU_AVX512_BITALG; 952 _features &= ~CPU_AVX512_IFMA; 953 } 954 955 if (UseAVX < 2) 956 _features &= ~CPU_AVX2; 957 958 if (UseAVX < 1) { 959 _features &= ~CPU_AVX; 960 _features &= ~CPU_VZEROUPPER; 961 _features &= ~CPU_F16C; 962 } 963 964 if (logical_processors_per_package() == 1) { 965 // HT processor could be installed on a system which doesn't support HT. 966 _features &= ~CPU_HT; 967 } 968 969 if (is_intel()) { // Intel cpus specific settings 970 if (is_knights_family()) { 971 _features &= ~CPU_VZEROUPPER; 972 _features &= ~CPU_AVX512BW; 973 _features &= ~CPU_AVX512VL; 974 _features &= ~CPU_AVX512DQ; 975 _features &= ~CPU_AVX512_VNNI; 976 _features &= ~CPU_AVX512_VAES; 977 _features &= ~CPU_AVX512_VPOPCNTDQ; 978 _features &= ~CPU_AVX512_VPCLMULQDQ; 979 _features &= ~CPU_AVX512_VBMI; 980 _features &= ~CPU_AVX512_VBMI2; 981 _features &= ~CPU_CLWB; 982 _features &= ~CPU_FLUSHOPT; 983 _features &= ~CPU_GFNI; 984 _features &= ~CPU_AVX512_BITALG; 985 _features &= ~CPU_AVX512_IFMA; 986 } 987 } 988 989 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 990 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 991 } else { 992 _has_intel_jcc_erratum = IntelJccErratumMitigation; 993 } 994 995 char buf[1024]; 996 int res = jio_snprintf( 997 buf, sizeof(buf), 998 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 999 cores_per_cpu(), threads_per_core(), 1000 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1001 assert(res > 0, "not enough temporary space allocated"); 1002 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1003 1004 _features_string = os::strdup(buf); 1005 1006 // Use AES instructions if available. 1007 if (supports_aes()) { 1008 if (FLAG_IS_DEFAULT(UseAES)) { 1009 FLAG_SET_DEFAULT(UseAES, true); 1010 } 1011 if (!UseAES) { 1012 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1013 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1014 } 1015 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1016 } else { 1017 if (UseSSE > 2) { 1018 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1019 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1020 } 1021 } else { 1022 // The AES intrinsic stubs require AES instruction support (of course) 1023 // but also require sse3 mode or higher for instructions it use. 1024 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1025 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1026 } 1027 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1028 } 1029 1030 // --AES-CTR begins-- 1031 if (!UseAESIntrinsics) { 1032 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1033 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1034 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1035 } 1036 } else { 1037 if (supports_sse4_1()) { 1038 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1039 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1040 } 1041 } else { 1042 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1043 // but also require sse4.1 mode or higher for instructions it use. 1044 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1045 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1046 } 1047 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1048 } 1049 } 1050 // --AES-CTR ends-- 1051 } 1052 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1053 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1054 warning("AES instructions are not available on this CPU"); 1055 FLAG_SET_DEFAULT(UseAES, false); 1056 } 1057 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1058 warning("AES intrinsics are not available on this CPU"); 1059 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1060 } 1061 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1062 warning("AES-CTR intrinsics are not available on this CPU"); 1063 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1064 } 1065 } 1066 1067 // Use CLMUL instructions if available. 1068 if (supports_clmul()) { 1069 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1070 UseCLMUL = true; 1071 } 1072 } else if (UseCLMUL) { 1073 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1074 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1075 FLAG_SET_DEFAULT(UseCLMUL, false); 1076 } 1077 1078 if (UseCLMUL && (UseSSE > 2)) { 1079 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1080 UseCRC32Intrinsics = true; 1081 } 1082 } else if (UseCRC32Intrinsics) { 1083 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1084 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1085 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1086 } 1087 1088 #ifdef _LP64 1089 if (supports_avx2()) { 1090 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1091 UseAdler32Intrinsics = true; 1092 } 1093 } else if (UseAdler32Intrinsics) { 1094 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1095 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1096 } 1097 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1098 } 1099 #else 1100 if (UseAdler32Intrinsics) { 1101 warning("Adler32Intrinsics not available on this CPU."); 1102 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1103 } 1104 #endif 1105 1106 if (supports_sse4_2() && supports_clmul()) { 1107 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1108 UseCRC32CIntrinsics = true; 1109 } 1110 } else if (UseCRC32CIntrinsics) { 1111 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1112 warning("CRC32C intrinsics are not available on this CPU"); 1113 } 1114 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1115 } 1116 1117 // GHASH/GCM intrinsics 1118 if (UseCLMUL && (UseSSE > 2)) { 1119 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1120 UseGHASHIntrinsics = true; 1121 } 1122 } else if (UseGHASHIntrinsics) { 1123 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1124 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1125 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1126 } 1127 1128 // ChaCha20 Intrinsics 1129 // As long as the system supports AVX as a baseline we can do a 1130 // SIMD-enabled block function. StubGenerator makes the determination 1131 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1132 // version. 1133 if (UseAVX >= 1) { 1134 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1135 UseChaCha20Intrinsics = true; 1136 } 1137 } else if (UseChaCha20Intrinsics) { 1138 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1139 warning("ChaCha20 intrinsic requires AVX instructions"); 1140 } 1141 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1142 } 1143 1144 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1145 if (UseAVX >= 2) { 1146 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1147 UseBASE64Intrinsics = true; 1148 } 1149 } else if (UseBASE64Intrinsics) { 1150 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1151 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1152 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1153 } 1154 1155 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1156 if (FLAG_IS_DEFAULT(UseFMA)) { 1157 UseFMA = true; 1158 } 1159 } else if (UseFMA) { 1160 warning("FMA instructions are not available on this CPU"); 1161 FLAG_SET_DEFAULT(UseFMA, false); 1162 } 1163 1164 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1165 UseMD5Intrinsics = true; 1166 } 1167 1168 if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) { 1169 if (FLAG_IS_DEFAULT(UseSHA)) { 1170 UseSHA = true; 1171 } 1172 } else if (UseSHA) { 1173 warning("SHA instructions are not available on this CPU"); 1174 FLAG_SET_DEFAULT(UseSHA, false); 1175 } 1176 1177 if (supports_sha() && supports_sse4_1() && UseSHA) { 1178 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1179 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1180 } 1181 } else if (UseSHA1Intrinsics) { 1182 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1183 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1184 } 1185 1186 if (supports_sse4_1() && UseSHA) { 1187 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1188 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1189 } 1190 } else if (UseSHA256Intrinsics) { 1191 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1192 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1193 } 1194 1195 #ifdef _LP64 1196 // These are only supported on 64-bit 1197 if (UseSHA && supports_avx2() && supports_bmi2()) { 1198 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1199 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1200 } 1201 } else 1202 #endif 1203 if (UseSHA512Intrinsics) { 1204 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1205 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1206 } 1207 1208 if (UseSHA3Intrinsics) { 1209 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1210 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1211 } 1212 1213 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1214 FLAG_SET_DEFAULT(UseSHA, false); 1215 } 1216 1217 if (!supports_rtm() && UseRTMLocking) { 1218 vm_exit_during_initialization("RTM instructions are not available on this CPU"); 1219 } 1220 1221 #if INCLUDE_RTM_OPT 1222 if (UseRTMLocking) { 1223 if (!CompilerConfig::is_c2_enabled()) { 1224 // Only C2 does RTM locking optimization. 1225 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1226 } 1227 if (is_intel_family_core()) { 1228 if ((_model == CPU_MODEL_HASWELL_E3) || 1229 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || 1230 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { 1231 // currently a collision between SKL and HSW_E3 1232 if (!UnlockExperimentalVMOptions && UseAVX < 3) { 1233 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " 1234 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); 1235 } else { 1236 warning("UseRTMLocking is only available as experimental option on this platform."); 1237 } 1238 } 1239 } 1240 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 1241 // RTM locking should be used only for applications with 1242 // high lock contention. For now we do not use it by default. 1243 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 1244 } 1245 } else { // !UseRTMLocking 1246 if (UseRTMForStackLocks) { 1247 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 1248 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 1249 } 1250 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 1251 } 1252 if (UseRTMDeopt) { 1253 FLAG_SET_DEFAULT(UseRTMDeopt, false); 1254 } 1255 if (PrintPreciseRTMLockingStatistics) { 1256 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 1257 } 1258 } 1259 #else 1260 if (UseRTMLocking) { 1261 // Only C2 does RTM locking optimization. 1262 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1263 } 1264 #endif 1265 1266 #ifdef COMPILER2 1267 if (UseFPUForSpilling) { 1268 if (UseSSE < 2) { 1269 // Only supported with SSE2+ 1270 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1271 } 1272 } 1273 #endif 1274 1275 #if COMPILER2_OR_JVMCI 1276 int max_vector_size = 0; 1277 if (UseSSE < 2) { 1278 // Vectors (in XMM) are only supported with SSE2+ 1279 // SSE is always 2 on x64. 1280 max_vector_size = 0; 1281 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1282 // 16 byte vectors (in XMM) are supported with SSE2+ 1283 max_vector_size = 16; 1284 } else if (UseAVX == 1 || UseAVX == 2) { 1285 // 32 bytes vectors (in YMM) are only supported with AVX+ 1286 max_vector_size = 32; 1287 } else if (UseAVX > 2) { 1288 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1289 max_vector_size = 64; 1290 } 1291 1292 #ifdef _LP64 1293 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1294 #else 1295 int min_vector_size = 0; 1296 #endif 1297 1298 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1299 if (MaxVectorSize < min_vector_size) { 1300 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1301 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1302 } 1303 if (MaxVectorSize > max_vector_size) { 1304 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1305 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1306 } 1307 if (!is_power_of_2(MaxVectorSize)) { 1308 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1309 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1310 } 1311 } else { 1312 // If default, use highest supported configuration 1313 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1314 } 1315 1316 #if defined(COMPILER2) && defined(ASSERT) 1317 if (MaxVectorSize > 0) { 1318 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1319 tty->print_cr("State of YMM registers after signal handle:"); 1320 int nreg = 2 LP64_ONLY(+2); 1321 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1322 for (int i = 0; i < nreg; i++) { 1323 tty->print("YMM%s:", ymm_name[i]); 1324 for (int j = 7; j >=0; j--) { 1325 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1326 } 1327 tty->cr(); 1328 } 1329 } 1330 } 1331 #endif // COMPILER2 && ASSERT 1332 1333 #ifdef _LP64 1334 if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) { 1335 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1336 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1337 } 1338 } else 1339 #endif 1340 if (UsePoly1305Intrinsics) { 1341 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1342 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1343 } 1344 1345 #ifdef _LP64 1346 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1347 UseMultiplyToLenIntrinsic = true; 1348 } 1349 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1350 UseSquareToLenIntrinsic = true; 1351 } 1352 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1353 UseMulAddIntrinsic = true; 1354 } 1355 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1356 UseMontgomeryMultiplyIntrinsic = true; 1357 } 1358 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1359 UseMontgomerySquareIntrinsic = true; 1360 } 1361 #else 1362 if (UseMultiplyToLenIntrinsic) { 1363 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1364 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1365 } 1366 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1367 } 1368 if (UseMontgomeryMultiplyIntrinsic) { 1369 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1370 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1371 } 1372 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1373 } 1374 if (UseMontgomerySquareIntrinsic) { 1375 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1376 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1377 } 1378 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1379 } 1380 if (UseSquareToLenIntrinsic) { 1381 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1382 warning("squareToLen intrinsic is not available in 32-bit VM"); 1383 } 1384 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1385 } 1386 if (UseMulAddIntrinsic) { 1387 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1388 warning("mulAdd intrinsic is not available in 32-bit VM"); 1389 } 1390 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1391 } 1392 #endif // _LP64 1393 #endif // COMPILER2_OR_JVMCI 1394 1395 // On new cpus instructions which update whole XMM register should be used 1396 // to prevent partial register stall due to dependencies on high half. 1397 // 1398 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1399 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1400 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1401 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1402 1403 1404 if (is_zx()) { // ZX cpus specific settings 1405 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1406 UseStoreImmI16 = false; // don't use it on ZX cpus 1407 } 1408 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1409 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1410 // Use it on all ZX cpus 1411 UseAddressNop = true; 1412 } 1413 } 1414 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1415 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1416 } 1417 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1418 if (supports_sse3()) { 1419 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1420 } else { 1421 UseXmmRegToRegMoveAll = false; 1422 } 1423 } 1424 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1425 #ifdef COMPILER2 1426 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1427 // For new ZX cpus do the next optimization: 1428 // don't align the beginning of a loop if there are enough instructions 1429 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1430 // in current fetch line (OptoLoopAlignment) or the padding 1431 // is big (> MaxLoopPad). 1432 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1433 // generated NOP instructions. 11 is the largest size of one 1434 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1435 MaxLoopPad = 11; 1436 } 1437 #endif // COMPILER2 1438 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1439 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1440 } 1441 if (supports_sse4_2()) { // new ZX cpus 1442 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1443 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1444 } 1445 } 1446 if (supports_sse4_2()) { 1447 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1448 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1449 } 1450 } else { 1451 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1452 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1453 } 1454 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1455 } 1456 } 1457 1458 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1459 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1460 } 1461 } 1462 1463 if (is_amd_family()) { // AMD cpus specific settings 1464 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1465 // Use it on new AMD cpus starting from Opteron. 1466 UseAddressNop = true; 1467 } 1468 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1469 // Use it on new AMD cpus starting from Opteron. 1470 UseNewLongLShift = true; 1471 } 1472 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1473 if (supports_sse4a()) { 1474 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1475 } else { 1476 UseXmmLoadAndClearUpper = false; 1477 } 1478 } 1479 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1480 if (supports_sse4a()) { 1481 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1482 } else { 1483 UseXmmRegToRegMoveAll = false; 1484 } 1485 } 1486 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1487 if (supports_sse4a()) { 1488 UseXmmI2F = true; 1489 } else { 1490 UseXmmI2F = false; 1491 } 1492 } 1493 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1494 if (supports_sse4a()) { 1495 UseXmmI2D = true; 1496 } else { 1497 UseXmmI2D = false; 1498 } 1499 } 1500 if (supports_sse4_2()) { 1501 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1502 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1503 } 1504 } else { 1505 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1506 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1507 } 1508 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1509 } 1510 1511 // some defaults for AMD family 15h 1512 if (cpu_family() == 0x15) { 1513 // On family 15h processors default is no sw prefetch 1514 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1515 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1516 } 1517 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1518 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1519 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1520 } 1521 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1522 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1523 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1524 } 1525 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1526 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1527 } 1528 } 1529 1530 #ifdef COMPILER2 1531 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1532 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1533 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1534 } 1535 #endif // COMPILER2 1536 1537 // Some defaults for AMD family >= 17h && Hygon family 18h 1538 if (cpu_family() >= 0x17) { 1539 // On family >=17h processors use XMM and UnalignedLoadStores 1540 // for Array Copy 1541 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1542 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1543 } 1544 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1545 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1546 } 1547 #ifdef COMPILER2 1548 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1549 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1550 } 1551 #endif 1552 } 1553 } 1554 1555 if (is_intel()) { // Intel cpus specific settings 1556 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1557 UseStoreImmI16 = false; // don't use it on Intel cpus 1558 } 1559 if (cpu_family() == 6 || cpu_family() == 15) { 1560 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1561 // Use it on all Intel cpus starting from PentiumPro 1562 UseAddressNop = true; 1563 } 1564 } 1565 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1566 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1567 } 1568 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1569 if (supports_sse3()) { 1570 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1571 } else { 1572 UseXmmRegToRegMoveAll = false; 1573 } 1574 } 1575 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1576 #ifdef COMPILER2 1577 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1578 // For new Intel cpus do the next optimization: 1579 // don't align the beginning of a loop if there are enough instructions 1580 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1581 // in current fetch line (OptoLoopAlignment) or the padding 1582 // is big (> MaxLoopPad). 1583 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1584 // generated NOP instructions. 11 is the largest size of one 1585 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1586 MaxLoopPad = 11; 1587 } 1588 #endif // COMPILER2 1589 1590 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1591 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1592 } 1593 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1594 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1595 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1596 } 1597 } 1598 if (supports_sse4_2()) { 1599 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1600 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1601 } 1602 } else { 1603 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1604 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1605 } 1606 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1607 } 1608 } 1609 if (is_atom_family() || is_knights_family()) { 1610 #ifdef COMPILER2 1611 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1612 OptoScheduling = true; 1613 } 1614 #endif 1615 if (supports_sse4_2()) { // Silvermont 1616 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1617 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1618 } 1619 } 1620 if (FLAG_IS_DEFAULT(UseIncDec)) { 1621 FLAG_SET_DEFAULT(UseIncDec, false); 1622 } 1623 } 1624 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1625 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1626 } 1627 #ifdef COMPILER2 1628 if (UseAVX > 2) { 1629 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1630 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1631 ArrayOperationPartialInlineSize != 0 && 1632 ArrayOperationPartialInlineSize != 16 && 1633 ArrayOperationPartialInlineSize != 32 && 1634 ArrayOperationPartialInlineSize != 64)) { 1635 int inline_size = 0; 1636 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1637 inline_size = 64; 1638 } else if (MaxVectorSize >= 32) { 1639 inline_size = 32; 1640 } else if (MaxVectorSize >= 16) { 1641 inline_size = 16; 1642 } 1643 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1644 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1645 } 1646 ArrayOperationPartialInlineSize = inline_size; 1647 } 1648 1649 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1650 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1651 if (ArrayOperationPartialInlineSize) { 1652 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1653 } else { 1654 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1655 } 1656 } 1657 } 1658 #endif 1659 } 1660 1661 #ifdef COMPILER2 1662 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1663 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1664 OptimizeFill = false; 1665 } 1666 } 1667 #endif 1668 1669 #ifdef _LP64 1670 if (UseSSE42Intrinsics) { 1671 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1672 UseVectorizedMismatchIntrinsic = true; 1673 } 1674 } else if (UseVectorizedMismatchIntrinsic) { 1675 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1676 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1677 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1678 } 1679 if (UseAVX >= 2) { 1680 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1681 } else if (UseVectorizedHashCodeIntrinsic) { 1682 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1683 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1684 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1685 } 1686 #else 1687 if (UseVectorizedMismatchIntrinsic) { 1688 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1689 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1690 } 1691 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1692 } 1693 if (UseVectorizedHashCodeIntrinsic) { 1694 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1695 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1696 } 1697 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1698 } 1699 #endif // _LP64 1700 1701 // Use count leading zeros count instruction if available. 1702 if (supports_lzcnt()) { 1703 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1704 UseCountLeadingZerosInstruction = true; 1705 } 1706 } else if (UseCountLeadingZerosInstruction) { 1707 warning("lzcnt instruction is not available on this CPU"); 1708 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1709 } 1710 1711 // Use count trailing zeros instruction if available 1712 if (supports_bmi1()) { 1713 // tzcnt does not require VEX prefix 1714 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1715 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1716 // Don't use tzcnt if BMI1 is switched off on command line. 1717 UseCountTrailingZerosInstruction = false; 1718 } else { 1719 UseCountTrailingZerosInstruction = true; 1720 } 1721 } 1722 } else if (UseCountTrailingZerosInstruction) { 1723 warning("tzcnt instruction is not available on this CPU"); 1724 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1725 } 1726 1727 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1728 // VEX prefix is generated only when AVX > 0. 1729 if (supports_bmi1() && supports_avx()) { 1730 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1731 UseBMI1Instructions = true; 1732 } 1733 } else if (UseBMI1Instructions) { 1734 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1735 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1736 } 1737 1738 if (supports_bmi2() && supports_avx()) { 1739 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1740 UseBMI2Instructions = true; 1741 } 1742 } else if (UseBMI2Instructions) { 1743 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1744 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1745 } 1746 1747 // Use population count instruction if available. 1748 if (supports_popcnt()) { 1749 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1750 UsePopCountInstruction = true; 1751 } 1752 } else if (UsePopCountInstruction) { 1753 warning("POPCNT instruction is not available on this CPU"); 1754 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1755 } 1756 1757 // Use fast-string operations if available. 1758 if (supports_erms()) { 1759 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1760 UseFastStosb = true; 1761 } 1762 } else if (UseFastStosb) { 1763 warning("fast-string operations are not available on this CPU"); 1764 FLAG_SET_DEFAULT(UseFastStosb, false); 1765 } 1766 1767 // For AMD Processors use XMM/YMM MOVDQU instructions 1768 // for Object Initialization as default 1769 if (is_amd() && cpu_family() >= 0x19) { 1770 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1771 UseFastStosb = false; 1772 } 1773 } 1774 1775 #ifdef COMPILER2 1776 if (is_intel() && MaxVectorSize > 16) { 1777 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1778 UseFastStosb = false; 1779 } 1780 } 1781 #endif 1782 1783 // Use XMM/YMM MOVDQU instruction for Object Initialization 1784 if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { 1785 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1786 UseXMMForObjInit = true; 1787 } 1788 } else if (UseXMMForObjInit) { 1789 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1790 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1791 } 1792 1793 #ifdef COMPILER2 1794 if (FLAG_IS_DEFAULT(AlignVector)) { 1795 // Modern processors allow misaligned memory operations for vectors. 1796 AlignVector = !UseUnalignedLoadStores; 1797 } 1798 #endif // COMPILER2 1799 1800 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1801 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1802 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1803 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1804 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1805 } 1806 } 1807 1808 // Allocation prefetch settings 1809 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1810 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1811 (cache_line_size > AllocatePrefetchStepSize)) { 1812 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1813 } 1814 1815 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1816 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1817 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1818 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1819 } 1820 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1821 } 1822 1823 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1824 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1825 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1826 } 1827 1828 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1829 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1830 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1831 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1832 } 1833 #ifdef COMPILER2 1834 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1835 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1836 } 1837 #endif 1838 } 1839 1840 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1841 #ifdef COMPILER2 1842 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1843 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1844 } 1845 #endif 1846 } 1847 1848 #ifdef _LP64 1849 // Prefetch settings 1850 1851 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1852 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1853 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1854 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1855 1856 // gc copy/scan is disabled if prefetchw isn't supported, because 1857 // Prefetch::write emits an inlined prefetchw on Linux. 1858 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1859 // The used prefetcht0 instruction works for both amd64 and em64t. 1860 1861 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1862 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1863 } 1864 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1865 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1866 } 1867 #endif 1868 1869 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1870 (cache_line_size > ContendedPaddingWidth)) 1871 ContendedPaddingWidth = cache_line_size; 1872 1873 // This machine allows unaligned memory accesses 1874 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1875 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1876 } 1877 1878 #ifndef PRODUCT 1879 if (log_is_enabled(Info, os, cpu)) { 1880 LogStream ls(Log(os, cpu)::info()); 1881 outputStream* log = &ls; 1882 log->print_cr("Logical CPUs per core: %u", 1883 logical_processors_per_package()); 1884 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1885 log->print("UseSSE=%d", UseSSE); 1886 if (UseAVX > 0) { 1887 log->print(" UseAVX=%d", UseAVX); 1888 } 1889 if (UseAES) { 1890 log->print(" UseAES=1"); 1891 } 1892 #ifdef COMPILER2 1893 if (MaxVectorSize > 0) { 1894 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1895 } 1896 #endif 1897 log->cr(); 1898 log->print("Allocation"); 1899 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1900 log->print_cr(": no prefetching"); 1901 } else { 1902 log->print(" prefetching: "); 1903 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1904 log->print("PREFETCHW"); 1905 } else if (UseSSE >= 1) { 1906 if (AllocatePrefetchInstr == 0) { 1907 log->print("PREFETCHNTA"); 1908 } else if (AllocatePrefetchInstr == 1) { 1909 log->print("PREFETCHT0"); 1910 } else if (AllocatePrefetchInstr == 2) { 1911 log->print("PREFETCHT2"); 1912 } else if (AllocatePrefetchInstr == 3) { 1913 log->print("PREFETCHW"); 1914 } 1915 } 1916 if (AllocatePrefetchLines > 1) { 1917 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1918 } else { 1919 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1920 } 1921 } 1922 1923 if (PrefetchCopyIntervalInBytes > 0) { 1924 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1925 } 1926 if (PrefetchScanIntervalInBytes > 0) { 1927 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1928 } 1929 if (ContendedPaddingWidth > 0) { 1930 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1931 } 1932 } 1933 #endif // !PRODUCT 1934 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1935 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1936 } 1937 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1938 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1939 } 1940 } 1941 1942 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1943 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1944 if (vrt == XenHVM) { 1945 st->print_cr("Xen hardware-assisted virtualization detected"); 1946 } else if (vrt == KVM) { 1947 st->print_cr("KVM virtualization detected"); 1948 } else if (vrt == VMWare) { 1949 st->print_cr("VMWare virtualization detected"); 1950 VirtualizationSupport::print_virtualization_info(st); 1951 } else if (vrt == HyperV) { 1952 st->print_cr("Hyper-V virtualization detected"); 1953 } else if (vrt == HyperVRole) { 1954 st->print_cr("Hyper-V role detected"); 1955 } 1956 } 1957 1958 bool VM_Version::compute_has_intel_jcc_erratum() { 1959 if (!is_intel_family_core()) { 1960 // Only Intel CPUs are affected. 1961 return false; 1962 } 1963 // The following table of affected CPUs is based on the following document released by Intel: 1964 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1965 switch (_model) { 1966 case 0x8E: 1967 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1968 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1969 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1970 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1971 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1972 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1973 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1974 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1975 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1976 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1977 case 0x4E: 1978 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1979 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1980 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1981 return _stepping == 0x3; 1982 case 0x55: 1983 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1984 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1985 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1986 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1987 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1988 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1989 return _stepping == 0x4 || _stepping == 0x7; 1990 case 0x5E: 1991 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1992 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1993 return _stepping == 0x3; 1994 case 0x9E: 1995 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 1996 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 1997 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 1998 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 1999 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2000 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2001 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2002 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2003 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2004 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2005 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2006 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2007 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2008 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2009 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2010 case 0xA5: 2011 // Not in Intel documentation. 2012 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2013 return true; 2014 case 0xA6: 2015 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2016 return _stepping == 0x0; 2017 case 0xAE: 2018 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2019 return _stepping == 0xA; 2020 default: 2021 // If we are running on another intel machine not recognized in the table, we are okay. 2022 return false; 2023 } 2024 } 2025 2026 // On Xen, the cpuid instruction returns 2027 // eax / registers[0]: Version of Xen 2028 // ebx / registers[1]: chars 'XenV' 2029 // ecx / registers[2]: chars 'MMXe' 2030 // edx / registers[3]: chars 'nVMM' 2031 // 2032 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2033 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2034 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2035 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2036 // 2037 // more information : 2038 // https://kb.vmware.com/s/article/1009458 2039 // 2040 void VM_Version::check_virtualizations() { 2041 uint32_t registers[4] = {0}; 2042 char signature[13] = {0}; 2043 2044 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2045 // from 0x40000000 until 0x40010000. 2046 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2047 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2048 detect_virt_stub(leaf, registers); 2049 memcpy(signature, ®isters[1], 12); 2050 2051 if (strncmp("VMwareVMware", signature, 12) == 0) { 2052 Abstract_VM_Version::_detected_virtualization = VMWare; 2053 // check for extended metrics from guestlib 2054 VirtualizationSupport::initialize(); 2055 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2056 Abstract_VM_Version::_detected_virtualization = HyperV; 2057 #ifdef _WINDOWS 2058 // CPUID leaf 0x40000007 is available to the root partition only. 2059 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2060 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2061 detect_virt_stub(0x40000007, registers); 2062 if ((registers[0] != 0x0) || 2063 (registers[1] != 0x0) || 2064 (registers[2] != 0x0) || 2065 (registers[3] != 0x0)) { 2066 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2067 } 2068 #endif 2069 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2070 Abstract_VM_Version::_detected_virtualization = KVM; 2071 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2072 Abstract_VM_Version::_detected_virtualization = XenHVM; 2073 } 2074 } 2075 } 2076 2077 #ifdef COMPILER2 2078 // Determine if it's running on Cascade Lake using default options. 2079 bool VM_Version::is_default_intel_cascade_lake() { 2080 return FLAG_IS_DEFAULT(UseAVX) && 2081 FLAG_IS_DEFAULT(MaxVectorSize) && 2082 UseAVX > 2 && 2083 is_intel_cascade_lake(); 2084 } 2085 #endif 2086 2087 bool VM_Version::is_intel_cascade_lake() { 2088 return is_intel_skylake() && _stepping >= 5; 2089 } 2090 2091 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2092 // for implementing the array copy and clear operations. 2093 // The Intel platforms that supports the serialize instruction 2094 // has improved implementation of 64-byte load/stores and so the default 2095 // threshold is set to 0 for these platforms. 2096 int VM_Version::avx3_threshold() { 2097 return (is_intel_family_core() && 2098 supports_serialize() && 2099 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2100 } 2101 2102 static bool _vm_version_initialized = false; 2103 2104 void VM_Version::initialize() { 2105 ResourceMark rm; 2106 // Making this stub must be FIRST use of assembler 2107 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2108 if (stub_blob == nullptr) { 2109 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2110 } 2111 CodeBuffer c(stub_blob); 2112 VM_Version_StubGenerator g(&c); 2113 2114 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2115 g.generate_get_cpu_info()); 2116 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2117 g.generate_detect_virt()); 2118 2119 get_processor_features(); 2120 2121 LP64_ONLY(Assembler::precompute_instructions();) 2122 2123 if (VM_Version::supports_hv()) { // Supports hypervisor 2124 check_virtualizations(); 2125 } 2126 _vm_version_initialized = true; 2127 } 2128 2129 typedef enum { 2130 CPU_FAMILY_8086_8088 = 0, 2131 CPU_FAMILY_INTEL_286 = 2, 2132 CPU_FAMILY_INTEL_386 = 3, 2133 CPU_FAMILY_INTEL_486 = 4, 2134 CPU_FAMILY_PENTIUM = 5, 2135 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2136 CPU_FAMILY_PENTIUM_4 = 0xF 2137 } FamilyFlag; 2138 2139 typedef enum { 2140 RDTSCP_FLAG = 0x08000000, // bit 27 2141 INTEL64_FLAG = 0x20000000 // bit 29 2142 } _featureExtendedEdxFlag; 2143 2144 typedef enum { 2145 FPU_FLAG = 0x00000001, 2146 VME_FLAG = 0x00000002, 2147 DE_FLAG = 0x00000004, 2148 PSE_FLAG = 0x00000008, 2149 TSC_FLAG = 0x00000010, 2150 MSR_FLAG = 0x00000020, 2151 PAE_FLAG = 0x00000040, 2152 MCE_FLAG = 0x00000080, 2153 CX8_FLAG = 0x00000100, 2154 APIC_FLAG = 0x00000200, 2155 SEP_FLAG = 0x00000800, 2156 MTRR_FLAG = 0x00001000, 2157 PGE_FLAG = 0x00002000, 2158 MCA_FLAG = 0x00004000, 2159 CMOV_FLAG = 0x00008000, 2160 PAT_FLAG = 0x00010000, 2161 PSE36_FLAG = 0x00020000, 2162 PSNUM_FLAG = 0x00040000, 2163 CLFLUSH_FLAG = 0x00080000, 2164 DTS_FLAG = 0x00200000, 2165 ACPI_FLAG = 0x00400000, 2166 MMX_FLAG = 0x00800000, 2167 FXSR_FLAG = 0x01000000, 2168 SSE_FLAG = 0x02000000, 2169 SSE2_FLAG = 0x04000000, 2170 SS_FLAG = 0x08000000, 2171 HTT_FLAG = 0x10000000, 2172 TM_FLAG = 0x20000000 2173 } FeatureEdxFlag; 2174 2175 static BufferBlob* cpuid_brand_string_stub_blob; 2176 static const int cpuid_brand_string_stub_size = 550; 2177 2178 extern "C" { 2179 typedef void (*getCPUIDBrandString_stub_t)(void*); 2180 } 2181 2182 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2183 2184 // VM_Version statics 2185 enum { 2186 ExtendedFamilyIdLength_INTEL = 16, 2187 ExtendedFamilyIdLength_AMD = 24 2188 }; 2189 2190 const size_t VENDOR_LENGTH = 13; 2191 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2192 static char* _cpu_brand_string = nullptr; 2193 static int64_t _max_qualified_cpu_frequency = 0; 2194 2195 static int _no_of_threads = 0; 2196 static int _no_of_cores = 0; 2197 2198 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2199 "8086/8088", 2200 "", 2201 "286", 2202 "386", 2203 "486", 2204 "Pentium", 2205 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2206 "", 2207 "", 2208 "", 2209 "", 2210 "", 2211 "", 2212 "", 2213 "", 2214 "Pentium 4" 2215 }; 2216 2217 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2218 "", 2219 "", 2220 "", 2221 "", 2222 "5x86", 2223 "K5/K6", 2224 "Athlon/AthlonXP", 2225 "", 2226 "", 2227 "", 2228 "", 2229 "", 2230 "", 2231 "", 2232 "", 2233 "Opteron/Athlon64", 2234 "Opteron QC/Phenom", // Barcelona et.al. 2235 "", 2236 "", 2237 "", 2238 "", 2239 "", 2240 "", 2241 "Zen" 2242 }; 2243 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2244 // September 2013, Vol 3C Table 35-1 2245 const char* const _model_id_pentium_pro[] = { 2246 "", 2247 "Pentium Pro", 2248 "", 2249 "Pentium II model 3", 2250 "", 2251 "Pentium II model 5/Xeon/Celeron", 2252 "Celeron", 2253 "Pentium III/Pentium III Xeon", 2254 "Pentium III/Pentium III Xeon", 2255 "Pentium M model 9", // Yonah 2256 "Pentium III, model A", 2257 "Pentium III, model B", 2258 "", 2259 "Pentium M model D", // Dothan 2260 "", 2261 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2262 "", 2263 "", 2264 "", 2265 "", 2266 "", 2267 "", 2268 "Celeron", // 0x16 Celeron 65nm 2269 "Core 2", // 0x17 Penryn / Harpertown 2270 "", 2271 "", 2272 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2273 "Atom", // 0x1B Z5xx series Silverthorn 2274 "", 2275 "Core 2", // 0x1D Dunnington (6-core) 2276 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2277 "", 2278 "", 2279 "", 2280 "", 2281 "", 2282 "", 2283 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2284 "", 2285 "", 2286 "", // 0x28 2287 "", 2288 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2289 "", 2290 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2291 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2292 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2293 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2294 "", 2295 "", 2296 "", 2297 "", 2298 "", 2299 "", 2300 "", 2301 "", 2302 "", 2303 "", 2304 "Ivy Bridge", // 0x3a 2305 "", 2306 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2307 "", // 0x3d "Next Generation Intel Core Processor" 2308 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2309 "", // 0x3f "Future Generation Intel Xeon Processor" 2310 "", 2311 "", 2312 "", 2313 "", 2314 "", 2315 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2316 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2317 nullptr 2318 }; 2319 2320 /* Brand ID is for back compatibility 2321 * Newer CPUs uses the extended brand string */ 2322 const char* const _brand_id[] = { 2323 "", 2324 "Celeron processor", 2325 "Pentium III processor", 2326 "Intel Pentium III Xeon processor", 2327 "", 2328 "", 2329 "", 2330 "", 2331 "Intel Pentium 4 processor", 2332 nullptr 2333 }; 2334 2335 2336 const char* const _feature_edx_id[] = { 2337 "On-Chip FPU", 2338 "Virtual Mode Extensions", 2339 "Debugging Extensions", 2340 "Page Size Extensions", 2341 "Time Stamp Counter", 2342 "Model Specific Registers", 2343 "Physical Address Extension", 2344 "Machine Check Exceptions", 2345 "CMPXCHG8B Instruction", 2346 "On-Chip APIC", 2347 "", 2348 "Fast System Call", 2349 "Memory Type Range Registers", 2350 "Page Global Enable", 2351 "Machine Check Architecture", 2352 "Conditional Mov Instruction", 2353 "Page Attribute Table", 2354 "36-bit Page Size Extension", 2355 "Processor Serial Number", 2356 "CLFLUSH Instruction", 2357 "", 2358 "Debug Trace Store feature", 2359 "ACPI registers in MSR space", 2360 "Intel Architecture MMX Technology", 2361 "Fast Float Point Save and Restore", 2362 "Streaming SIMD extensions", 2363 "Streaming SIMD extensions 2", 2364 "Self-Snoop", 2365 "Hyper Threading", 2366 "Thermal Monitor", 2367 "", 2368 "Pending Break Enable" 2369 }; 2370 2371 const char* const _feature_extended_edx_id[] = { 2372 "", 2373 "", 2374 "", 2375 "", 2376 "", 2377 "", 2378 "", 2379 "", 2380 "", 2381 "", 2382 "", 2383 "SYSCALL/SYSRET", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "", 2389 "", 2390 "", 2391 "", 2392 "Execute Disable Bit", 2393 "", 2394 "", 2395 "", 2396 "", 2397 "", 2398 "", 2399 "RDTSCP", 2400 "", 2401 "Intel 64 Architecture", 2402 "", 2403 "" 2404 }; 2405 2406 const char* const _feature_ecx_id[] = { 2407 "Streaming SIMD Extensions 3", 2408 "PCLMULQDQ", 2409 "64-bit DS Area", 2410 "MONITOR/MWAIT instructions", 2411 "CPL Qualified Debug Store", 2412 "Virtual Machine Extensions", 2413 "Safer Mode Extensions", 2414 "Enhanced Intel SpeedStep technology", 2415 "Thermal Monitor 2", 2416 "Supplemental Streaming SIMD Extensions 3", 2417 "L1 Context ID", 2418 "", 2419 "Fused Multiply-Add", 2420 "CMPXCHG16B", 2421 "xTPR Update Control", 2422 "Perfmon and Debug Capability", 2423 "", 2424 "Process-context identifiers", 2425 "Direct Cache Access", 2426 "Streaming SIMD extensions 4.1", 2427 "Streaming SIMD extensions 4.2", 2428 "x2APIC", 2429 "MOVBE", 2430 "Popcount instruction", 2431 "TSC-Deadline", 2432 "AESNI", 2433 "XSAVE", 2434 "OSXSAVE", 2435 "AVX", 2436 "F16C", 2437 "RDRAND", 2438 "" 2439 }; 2440 2441 const char* const _feature_extended_ecx_id[] = { 2442 "LAHF/SAHF instruction support", 2443 "Core multi-processor legacy mode", 2444 "", 2445 "", 2446 "", 2447 "Advanced Bit Manipulations: LZCNT", 2448 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2449 "Misaligned SSE mode", 2450 "", 2451 "", 2452 "", 2453 "", 2454 "", 2455 "", 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "" 2474 }; 2475 2476 void VM_Version::initialize_tsc(void) { 2477 ResourceMark rm; 2478 2479 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2480 if (cpuid_brand_string_stub_blob == nullptr) { 2481 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2482 } 2483 CodeBuffer c(cpuid_brand_string_stub_blob); 2484 VM_Version_StubGenerator g(&c); 2485 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2486 g.generate_getCPUIDBrandString()); 2487 } 2488 2489 const char* VM_Version::cpu_model_description(void) { 2490 uint32_t cpu_family = extended_cpu_family(); 2491 uint32_t cpu_model = extended_cpu_model(); 2492 const char* model = nullptr; 2493 2494 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2495 for (uint32_t i = 0; i <= cpu_model; i++) { 2496 model = _model_id_pentium_pro[i]; 2497 if (model == nullptr) { 2498 break; 2499 } 2500 } 2501 } 2502 return model; 2503 } 2504 2505 const char* VM_Version::cpu_brand_string(void) { 2506 if (_cpu_brand_string == nullptr) { 2507 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2508 if (nullptr == _cpu_brand_string) { 2509 return nullptr; 2510 } 2511 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2512 if (ret_val != OS_OK) { 2513 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2514 _cpu_brand_string = nullptr; 2515 } 2516 } 2517 return _cpu_brand_string; 2518 } 2519 2520 const char* VM_Version::cpu_brand(void) { 2521 const char* brand = nullptr; 2522 2523 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2524 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2525 brand = _brand_id[0]; 2526 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2527 brand = _brand_id[i]; 2528 } 2529 } 2530 return brand; 2531 } 2532 2533 bool VM_Version::cpu_is_em64t(void) { 2534 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2535 } 2536 2537 bool VM_Version::is_netburst(void) { 2538 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2539 } 2540 2541 bool VM_Version::supports_tscinv_ext(void) { 2542 if (!supports_tscinv_bit()) { 2543 return false; 2544 } 2545 2546 if (is_intel()) { 2547 return true; 2548 } 2549 2550 if (is_amd()) { 2551 return !is_amd_Barcelona(); 2552 } 2553 2554 if (is_hygon()) { 2555 return true; 2556 } 2557 2558 return false; 2559 } 2560 2561 void VM_Version::resolve_cpu_information_details(void) { 2562 2563 // in future we want to base this information on proper cpu 2564 // and cache topology enumeration such as: 2565 // Intel 64 Architecture Processor Topology Enumeration 2566 // which supports system cpu and cache topology enumeration 2567 // either using 2xAPICIDs or initial APICIDs 2568 2569 // currently only rough cpu information estimates 2570 // which will not necessarily reflect the exact configuration of the system 2571 2572 // this is the number of logical hardware threads 2573 // visible to the operating system 2574 _no_of_threads = os::processor_count(); 2575 2576 // find out number of threads per cpu package 2577 int threads_per_package = threads_per_core() * cores_per_cpu(); 2578 2579 // use amount of threads visible to the process in order to guess number of sockets 2580 _no_of_sockets = _no_of_threads / threads_per_package; 2581 2582 // process might only see a subset of the total number of threads 2583 // from a single processor package. Virtualization/resource management for example. 2584 // If so then just write a hard 1 as num of pkgs. 2585 if (0 == _no_of_sockets) { 2586 _no_of_sockets = 1; 2587 } 2588 2589 // estimate the number of cores 2590 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2591 } 2592 2593 2594 const char* VM_Version::cpu_family_description(void) { 2595 int cpu_family_id = extended_cpu_family(); 2596 if (is_amd()) { 2597 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2598 return _family_id_amd[cpu_family_id]; 2599 } 2600 } 2601 if (is_intel()) { 2602 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2603 return cpu_model_description(); 2604 } 2605 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2606 return _family_id_intel[cpu_family_id]; 2607 } 2608 } 2609 if (is_hygon()) { 2610 return "Dhyana"; 2611 } 2612 return "Unknown x86"; 2613 } 2614 2615 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2616 assert(buf != nullptr, "buffer is null!"); 2617 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2618 2619 const char* cpu_type = nullptr; 2620 const char* x64 = nullptr; 2621 2622 if (is_intel()) { 2623 cpu_type = "Intel"; 2624 x64 = cpu_is_em64t() ? " Intel64" : ""; 2625 } else if (is_amd()) { 2626 cpu_type = "AMD"; 2627 x64 = cpu_is_em64t() ? " AMD64" : ""; 2628 } else if (is_hygon()) { 2629 cpu_type = "Hygon"; 2630 x64 = cpu_is_em64t() ? " AMD64" : ""; 2631 } else { 2632 cpu_type = "Unknown x86"; 2633 x64 = cpu_is_em64t() ? " x86_64" : ""; 2634 } 2635 2636 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2637 cpu_type, 2638 cpu_family_description(), 2639 supports_ht() ? " (HT)" : "", 2640 supports_sse3() ? " SSE3" : "", 2641 supports_ssse3() ? " SSSE3" : "", 2642 supports_sse4_1() ? " SSE4.1" : "", 2643 supports_sse4_2() ? " SSE4.2" : "", 2644 supports_sse4a() ? " SSE4A" : "", 2645 is_netburst() ? " Netburst" : "", 2646 is_intel_family_core() ? " Core" : "", 2647 x64); 2648 2649 return OS_OK; 2650 } 2651 2652 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2653 assert(buf != nullptr, "buffer is null!"); 2654 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2655 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2656 2657 // invoke newly generated asm code to fetch CPU Brand String 2658 getCPUIDBrandString_stub(&_cpuid_info); 2659 2660 // fetch results into buffer 2661 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2662 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2663 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2664 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2665 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2666 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2667 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2668 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2669 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2670 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2671 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2672 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2673 2674 return OS_OK; 2675 } 2676 2677 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2678 guarantee(buf != nullptr, "buffer is null!"); 2679 guarantee(buf_len > 0, "buffer len not enough!"); 2680 2681 unsigned int flag = 0; 2682 unsigned int fi = 0; 2683 size_t written = 0; 2684 const char* prefix = ""; 2685 2686 #define WRITE_TO_BUF(string) \ 2687 { \ 2688 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2689 if (res < 0) { \ 2690 return buf_len - 1; \ 2691 } \ 2692 written += res; \ 2693 if (prefix[0] == '\0') { \ 2694 prefix = ", "; \ 2695 } \ 2696 } 2697 2698 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2699 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2700 continue; /* no hyperthreading */ 2701 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2702 continue; /* no fast system call */ 2703 } 2704 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2705 WRITE_TO_BUF(_feature_edx_id[fi]); 2706 } 2707 } 2708 2709 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2710 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2711 WRITE_TO_BUF(_feature_ecx_id[fi]); 2712 } 2713 } 2714 2715 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2716 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2717 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2718 } 2719 } 2720 2721 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2722 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2723 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2724 } 2725 } 2726 2727 if (supports_tscinv_bit()) { 2728 WRITE_TO_BUF("Invariant TSC"); 2729 } 2730 2731 return written; 2732 } 2733 2734 /** 2735 * Write a detailed description of the cpu to a given buffer, including 2736 * feature set. 2737 */ 2738 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2739 assert(buf != nullptr, "buffer is null!"); 2740 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2741 2742 static const char* unknown = "<unknown>"; 2743 char vendor_id[VENDOR_LENGTH]; 2744 const char* family = nullptr; 2745 const char* model = nullptr; 2746 const char* brand = nullptr; 2747 int outputLen = 0; 2748 2749 family = cpu_family_description(); 2750 if (family == nullptr) { 2751 family = unknown; 2752 } 2753 2754 model = cpu_model_description(); 2755 if (model == nullptr) { 2756 model = unknown; 2757 } 2758 2759 brand = cpu_brand_string(); 2760 2761 if (brand == nullptr) { 2762 brand = cpu_brand(); 2763 if (brand == nullptr) { 2764 brand = unknown; 2765 } 2766 } 2767 2768 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2769 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2770 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2771 vendor_id[VENDOR_LENGTH-1] = '\0'; 2772 2773 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2774 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2775 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2776 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2777 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2778 "Supports: ", 2779 brand, 2780 vendor_id, 2781 family, 2782 extended_cpu_family(), 2783 model, 2784 extended_cpu_model(), 2785 cpu_stepping(), 2786 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2787 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2788 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2789 _cpuid_info.std_cpuid1_eax.value, 2790 _cpuid_info.std_cpuid1_ebx.value, 2791 _cpuid_info.std_cpuid1_ecx.value, 2792 _cpuid_info.std_cpuid1_edx.value, 2793 _cpuid_info.ext_cpuid1_eax, 2794 _cpuid_info.ext_cpuid1_ebx, 2795 _cpuid_info.ext_cpuid1_ecx, 2796 _cpuid_info.ext_cpuid1_edx); 2797 2798 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2799 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2800 return OS_ERR; 2801 } 2802 2803 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2804 2805 return OS_OK; 2806 } 2807 2808 2809 // Fill in Abstract_VM_Version statics 2810 void VM_Version::initialize_cpu_information() { 2811 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2812 assert(!_initialized, "shouldn't be initialized yet"); 2813 resolve_cpu_information_details(); 2814 2815 // initialize cpu_name and cpu_desc 2816 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2817 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2818 _initialized = true; 2819 } 2820 2821 /** 2822 * For information about extracting the frequency from the cpu brand string, please see: 2823 * 2824 * Intel Processor Identification and the CPUID Instruction 2825 * Application Note 485 2826 * May 2012 2827 * 2828 * The return value is the frequency in Hz. 2829 */ 2830 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2831 const char* const brand_string = cpu_brand_string(); 2832 if (brand_string == nullptr) { 2833 return 0; 2834 } 2835 const int64_t MEGA = 1000000; 2836 int64_t multiplier = 0; 2837 int64_t frequency = 0; 2838 uint8_t idx = 0; 2839 // The brand string buffer is at most 48 bytes. 2840 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2841 for (; idx < 48-2; ++idx) { 2842 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2843 // Search brand string for "yHz" where y is M, G, or T. 2844 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2845 if (brand_string[idx] == 'M') { 2846 multiplier = MEGA; 2847 } else if (brand_string[idx] == 'G') { 2848 multiplier = MEGA * 1000; 2849 } else if (brand_string[idx] == 'T') { 2850 multiplier = MEGA * MEGA; 2851 } 2852 break; 2853 } 2854 } 2855 if (multiplier > 0) { 2856 // Compute frequency (in Hz) from brand string. 2857 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2858 frequency = (brand_string[idx-4] - '0') * multiplier; 2859 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2860 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2861 } else { // format is "xxxx" 2862 frequency = (brand_string[idx-4] - '0') * 1000; 2863 frequency += (brand_string[idx-3] - '0') * 100; 2864 frequency += (brand_string[idx-2] - '0') * 10; 2865 frequency += (brand_string[idx-1] - '0'); 2866 frequency *= multiplier; 2867 } 2868 } 2869 return frequency; 2870 } 2871 2872 2873 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2874 if (_max_qualified_cpu_frequency == 0) { 2875 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2876 } 2877 return _max_qualified_cpu_frequency; 2878 } 2879 2880 uint64_t VM_Version::feature_flags() { 2881 uint64_t result = 0; 2882 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 2883 result |= CPU_CX8; 2884 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 2885 result |= CPU_CMOV; 2886 if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0) 2887 result |= CPU_FLUSH; 2888 #ifdef _LP64 2889 // clflush should always be available on x86_64 2890 // if not we are in real trouble because we rely on it 2891 // to flush the code cache. 2892 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2893 #endif 2894 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2895 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) 2896 result |= CPU_FXSR; 2897 // HT flag is set for multi-core processors also. 2898 if (threads_per_core() > 1) 2899 result |= CPU_HT; 2900 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2901 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) 2902 result |= CPU_MMX; 2903 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 2904 result |= CPU_SSE; 2905 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 2906 result |= CPU_SSE2; 2907 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 2908 result |= CPU_SSE3; 2909 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 2910 result |= CPU_SSSE3; 2911 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 2912 result |= CPU_SSE4_1; 2913 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 2914 result |= CPU_SSE4_2; 2915 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 2916 result |= CPU_POPCNT; 2917 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && 2918 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && 2919 _cpuid_info.xem_xcr0_eax.bits.sse != 0 && 2920 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { 2921 result |= CPU_AVX; 2922 result |= CPU_VZEROUPPER; 2923 if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0) 2924 result |= CPU_F16C; 2925 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) 2926 result |= CPU_AVX2; 2927 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 && 2928 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 && 2929 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 && 2930 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) { 2931 result |= CPU_AVX512F; 2932 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0) 2933 result |= CPU_AVX512CD; 2934 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0) 2935 result |= CPU_AVX512DQ; 2936 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0) 2937 result |= CPU_AVX512_IFMA; 2938 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0) 2939 result |= CPU_AVX512PF; 2940 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0) 2941 result |= CPU_AVX512ER; 2942 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0) 2943 result |= CPU_AVX512BW; 2944 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0) 2945 result |= CPU_AVX512VL; 2946 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2947 result |= CPU_AVX512_VPOPCNTDQ; 2948 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2949 result |= CPU_AVX512_VPCLMULQDQ; 2950 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0) 2951 result |= CPU_AVX512_VAES; 2952 if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0) 2953 result |= CPU_GFNI; 2954 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0) 2955 result |= CPU_AVX512_VNNI; 2956 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2957 result |= CPU_AVX512_BITALG; 2958 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2959 result |= CPU_AVX512_VBMI; 2960 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2961 result |= CPU_AVX512_VBMI2; 2962 } 2963 } 2964 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0) 2965 result |= CPU_HV; 2966 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) 2967 result |= CPU_BMI1; 2968 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) 2969 result |= CPU_TSC; 2970 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) 2971 result |= CPU_TSCINV_BIT; 2972 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) 2973 result |= CPU_AES; 2974 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) 2975 result |= CPU_ERMS; 2976 if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 2977 result |= CPU_FSRM; 2978 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) 2979 result |= CPU_CLMUL; 2980 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0) 2981 result |= CPU_RTM; 2982 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0) 2983 result |= CPU_ADX; 2984 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) 2985 result |= CPU_BMI2; 2986 if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0) 2987 result |= CPU_SHA; 2988 if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0) 2989 result |= CPU_FMA; 2990 if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0) 2991 result |= CPU_FLUSHOPT; 2992 if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0) 2993 result |= CPU_RDTSCP; 2994 if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0) 2995 result |= CPU_RDPID; 2996 2997 // AMD|Hygon features. 2998 if (is_amd_family()) { 2999 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || 3000 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) 3001 result |= CPU_3DNOW_PREFETCH; 3002 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) 3003 result |= CPU_LZCNT; 3004 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 3005 result |= CPU_SSE4A; 3006 } 3007 3008 // Intel features. 3009 if (is_intel()) { 3010 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3011 result |= CPU_LZCNT; 3012 } 3013 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3014 result |= CPU_3DNOW_PREFETCH; 3015 } 3016 if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) { 3017 result |= CPU_CLWB; 3018 } 3019 if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0) 3020 result |= CPU_SERIALIZE; 3021 } 3022 3023 // ZX features. 3024 if (is_zx()) { 3025 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3026 result |= CPU_LZCNT; 3027 } 3028 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3029 result |= CPU_3DNOW_PREFETCH; 3030 } 3031 } 3032 3033 // Protection key features. 3034 if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) { 3035 result |= CPU_PKU; 3036 } 3037 if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) { 3038 result |= CPU_OSPKE; 3039 } 3040 3041 // Control flow enforcement (CET) features. 3042 if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) { 3043 result |= CPU_CET_SS; 3044 } 3045 if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) { 3046 result |= CPU_CET_IBT; 3047 } 3048 3049 // Composite features. 3050 if (supports_tscinv_bit() && 3051 ((is_amd_family() && !is_amd_Barcelona()) || 3052 is_intel_tsc_synched_at_init())) { 3053 result |= CPU_TSCINV; 3054 } 3055 3056 return result; 3057 } 3058 3059 bool VM_Version::os_supports_avx_vectors() { 3060 bool retVal = false; 3061 int nreg = 2 LP64_ONLY(+2); 3062 if (supports_evex()) { 3063 // Verify that OS save/restore all bits of EVEX registers 3064 // during signal processing. 3065 retVal = true; 3066 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3067 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3068 retVal = false; 3069 break; 3070 } 3071 } 3072 } else if (supports_avx()) { 3073 // Verify that OS save/restore all bits of AVX registers 3074 // during signal processing. 3075 retVal = true; 3076 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3077 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3078 retVal = false; 3079 break; 3080 } 3081 } 3082 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3083 if (retVal == false) { 3084 // Verify that OS save/restore all bits of EVEX registers 3085 // during signal processing. 3086 retVal = true; 3087 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3088 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3089 retVal = false; 3090 break; 3091 } 3092 } 3093 } 3094 } 3095 return retVal; 3096 } 3097 3098 uint VM_Version::cores_per_cpu() { 3099 uint result = 1; 3100 if (is_intel()) { 3101 bool supports_topology = supports_processor_topology(); 3102 if (supports_topology) { 3103 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3104 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3105 } 3106 if (!supports_topology || result == 0) { 3107 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3108 } 3109 } else if (is_amd_family()) { 3110 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3111 } else if (is_zx()) { 3112 bool supports_topology = supports_processor_topology(); 3113 if (supports_topology) { 3114 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3115 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3116 } 3117 if (!supports_topology || result == 0) { 3118 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3119 } 3120 } 3121 return result; 3122 } 3123 3124 uint VM_Version::threads_per_core() { 3125 uint result = 1; 3126 if (is_intel() && supports_processor_topology()) { 3127 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3128 } else if (is_zx() && supports_processor_topology()) { 3129 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3130 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3131 if (cpu_family() >= 0x17) { 3132 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3133 } else { 3134 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3135 cores_per_cpu(); 3136 } 3137 } 3138 return (result == 0 ? 1 : result); 3139 } 3140 3141 uint VM_Version::L1_line_size() { 3142 uint result = 0; 3143 if (is_intel()) { 3144 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3145 } else if (is_amd_family()) { 3146 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3147 } else if (is_zx()) { 3148 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3149 } 3150 if (result < 32) // not defined ? 3151 result = 32; // 32 bytes by default on x86 and other x64 3152 return result; 3153 } 3154 3155 bool VM_Version::is_intel_tsc_synched_at_init() { 3156 if (is_intel_family_core()) { 3157 uint32_t ext_model = extended_cpu_model(); 3158 if (ext_model == CPU_MODEL_NEHALEM_EP || 3159 ext_model == CPU_MODEL_WESTMERE_EP || 3160 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3161 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3162 // <= 2-socket invariant tsc support. EX versions are usually used 3163 // in > 2-socket systems and likely don't synchronize tscs at 3164 // initialization. 3165 // Code that uses tsc values must be prepared for them to arbitrarily 3166 // jump forward or backward. 3167 return true; 3168 } 3169 } 3170 return false; 3171 } 3172 3173 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3174 // Hardware prefetching (distance/size in bytes): 3175 // Pentium 3 - 64 / 32 3176 // Pentium 4 - 256 / 128 3177 // Athlon - 64 / 32 ???? 3178 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3179 // Core - 128 / 64 3180 // 3181 // Software prefetching (distance in bytes / instruction with best score): 3182 // Pentium 3 - 128 / prefetchnta 3183 // Pentium 4 - 512 / prefetchnta 3184 // Athlon - 128 / prefetchnta 3185 // Opteron - 256 / prefetchnta 3186 // Core - 256 / prefetchnta 3187 // It will be used only when AllocatePrefetchStyle > 0 3188 3189 if (is_amd_family()) { // AMD | Hygon 3190 if (supports_sse2()) { 3191 return 256; // Opteron 3192 } else { 3193 return 128; // Athlon 3194 } 3195 } else { // Intel 3196 if (supports_sse3() && cpu_family() == 6) { 3197 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3198 return 192; 3199 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3200 #ifdef _LP64 3201 return 384; 3202 #else 3203 return 320; 3204 #endif 3205 } 3206 } 3207 if (supports_sse2()) { 3208 if (cpu_family() == 6) { 3209 return 256; // Pentium M, Core, Core2 3210 } else { 3211 return 512; // Pentium 4 3212 } 3213 } else { 3214 return 128; // Pentium 3 (and all other old CPUs) 3215 } 3216 } 3217 } 3218 3219 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3220 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3221 switch (id) { 3222 case vmIntrinsics::_floatToFloat16: 3223 case vmIntrinsics::_float16ToFloat: 3224 if (!supports_float16()) { 3225 return false; 3226 } 3227 break; 3228 default: 3229 break; 3230 } 3231 return true; 3232 } 3233