1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/powerOfTwo.hpp" 42 #include "utilities/virtualizationSupport.hpp" 43 44 int VM_Version::_cpu; 45 int VM_Version::_model; 46 int VM_Version::_stepping; 47 bool VM_Version::_has_intel_jcc_erratum; 48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 49 50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 52 #undef DECLARE_CPU_FEATURE_FLAG 53 54 // Address of instruction which causes SEGV 55 address VM_Version::_cpuinfo_segv_addr = 0; 56 // Address of instruction after the one which causes SEGV 57 address VM_Version::_cpuinfo_cont_addr = 0; 58 59 static BufferBlob* stub_blob; 60 static const int stub_size = 2000; 61 62 extern "C" { 63 typedef void (*get_cpu_info_stub_t)(void*); 64 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 65 } 66 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 67 static detect_virt_stub_t detect_virt_stub = nullptr; 68 69 #ifdef _LP64 70 71 bool VM_Version::supports_clflush() { 72 // clflush should always be available on x86_64 73 // if not we are in real trouble because we rely on it 74 // to flush the code cache. 75 // Unfortunately, Assembler::clflush is currently called as part 76 // of generation of the code cache flush routine. This happens 77 // under Universe::init before the processor features are set 78 // up. Assembler::flush calls this routine to check that clflush 79 // is allowed. So, we give the caller a free pass if Universe init 80 // is still in progress. 81 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 82 return true; 83 } 84 #endif 85 86 #define CPUID_STANDARD_FN 0x0 87 #define CPUID_STANDARD_FN_1 0x1 88 #define CPUID_STANDARD_FN_4 0x4 89 #define CPUID_STANDARD_FN_B 0xb 90 91 #define CPUID_EXTENDED_FN 0x80000000 92 #define CPUID_EXTENDED_FN_1 0x80000001 93 #define CPUID_EXTENDED_FN_2 0x80000002 94 #define CPUID_EXTENDED_FN_3 0x80000003 95 #define CPUID_EXTENDED_FN_4 0x80000004 96 #define CPUID_EXTENDED_FN_7 0x80000007 97 #define CPUID_EXTENDED_FN_8 0x80000008 98 99 class VM_Version_StubGenerator: public StubCodeGenerator { 100 public: 101 102 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 103 104 address generate_get_cpu_info() { 105 // Flags to test CPU type. 106 const uint32_t HS_EFL_AC = 0x40000; 107 const uint32_t HS_EFL_ID = 0x200000; 108 // Values for when we don't have a CPUID instruction. 109 const int CPU_FAMILY_SHIFT = 8; 110 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 111 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 112 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 113 114 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 115 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; 116 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 117 118 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 119 # define __ _masm-> 120 121 address start = __ pc(); 122 123 // 124 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 125 // 126 // LP64: rcx and rdx are first and second argument registers on windows 127 128 __ push(rbp); 129 #ifdef _LP64 130 __ mov(rbp, c_rarg0); // cpuid_info address 131 #else 132 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 133 #endif 134 __ push(rbx); 135 __ push(rsi); 136 __ pushf(); // preserve rbx, and flags 137 __ pop(rax); 138 __ push(rax); 139 __ mov(rcx, rax); 140 // 141 // if we are unable to change the AC flag, we have a 386 142 // 143 __ xorl(rax, HS_EFL_AC); 144 __ push(rax); 145 __ popf(); 146 __ pushf(); 147 __ pop(rax); 148 __ cmpptr(rax, rcx); 149 __ jccb(Assembler::notEqual, detect_486); 150 151 __ movl(rax, CPU_FAMILY_386); 152 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 153 __ jmp(done); 154 155 // 156 // If we are unable to change the ID flag, we have a 486 which does 157 // not support the "cpuid" instruction. 158 // 159 __ bind(detect_486); 160 __ mov(rax, rcx); 161 __ xorl(rax, HS_EFL_ID); 162 __ push(rax); 163 __ popf(); 164 __ pushf(); 165 __ pop(rax); 166 __ cmpptr(rcx, rax); 167 __ jccb(Assembler::notEqual, detect_586); 168 169 __ bind(cpu486); 170 __ movl(rax, CPU_FAMILY_486); 171 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 172 __ jmp(done); 173 174 // 175 // At this point, we have a chip which supports the "cpuid" instruction 176 // 177 __ bind(detect_586); 178 __ xorl(rax, rax); 179 __ cpuid(); 180 __ orl(rax, rax); 181 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 182 // value of at least 1, we give up and 183 // assume a 486 184 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 185 __ movl(Address(rsi, 0), rax); 186 __ movl(Address(rsi, 4), rbx); 187 __ movl(Address(rsi, 8), rcx); 188 __ movl(Address(rsi,12), rdx); 189 190 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 191 __ jccb(Assembler::belowEqual, std_cpuid4); 192 193 // 194 // cpuid(0xB) Processor Topology 195 // 196 __ movl(rax, 0xb); 197 __ xorl(rcx, rcx); // Threads level 198 __ cpuid(); 199 200 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 201 __ movl(Address(rsi, 0), rax); 202 __ movl(Address(rsi, 4), rbx); 203 __ movl(Address(rsi, 8), rcx); 204 __ movl(Address(rsi,12), rdx); 205 206 __ movl(rax, 0xb); 207 __ movl(rcx, 1); // Cores level 208 __ cpuid(); 209 __ push(rax); 210 __ andl(rax, 0x1f); // Determine if valid topology level 211 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 212 __ andl(rax, 0xffff); 213 __ pop(rax); 214 __ jccb(Assembler::equal, std_cpuid4); 215 216 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 217 __ movl(Address(rsi, 0), rax); 218 __ movl(Address(rsi, 4), rbx); 219 __ movl(Address(rsi, 8), rcx); 220 __ movl(Address(rsi,12), rdx); 221 222 __ movl(rax, 0xb); 223 __ movl(rcx, 2); // Packages level 224 __ cpuid(); 225 __ push(rax); 226 __ andl(rax, 0x1f); // Determine if valid topology level 227 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 228 __ andl(rax, 0xffff); 229 __ pop(rax); 230 __ jccb(Assembler::equal, std_cpuid4); 231 232 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 233 __ movl(Address(rsi, 0), rax); 234 __ movl(Address(rsi, 4), rbx); 235 __ movl(Address(rsi, 8), rcx); 236 __ movl(Address(rsi,12), rdx); 237 238 // 239 // cpuid(0x4) Deterministic cache params 240 // 241 __ bind(std_cpuid4); 242 __ movl(rax, 4); 243 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 244 __ jccb(Assembler::greater, std_cpuid1); 245 246 __ xorl(rcx, rcx); // L1 cache 247 __ cpuid(); 248 __ push(rax); 249 __ andl(rax, 0x1f); // Determine if valid cache parameters used 250 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 251 __ pop(rax); 252 __ jccb(Assembler::equal, std_cpuid1); 253 254 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 255 __ movl(Address(rsi, 0), rax); 256 __ movl(Address(rsi, 4), rbx); 257 __ movl(Address(rsi, 8), rcx); 258 __ movl(Address(rsi,12), rdx); 259 260 // 261 // Standard cpuid(0x1) 262 // 263 __ bind(std_cpuid1); 264 __ movl(rax, 1); 265 __ cpuid(); 266 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 267 __ movl(Address(rsi, 0), rax); 268 __ movl(Address(rsi, 4), rbx); 269 __ movl(Address(rsi, 8), rcx); 270 __ movl(Address(rsi,12), rdx); 271 272 // 273 // Check if OS has enabled XGETBV instruction to access XCR0 274 // (OSXSAVE feature flag) and CPU supports AVX 275 // 276 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 277 __ cmpl(rcx, 0x18000000); 278 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 279 280 // 281 // XCR0, XFEATURE_ENABLED_MASK register 282 // 283 __ xorl(rcx, rcx); // zero for XCR0 register 284 __ xgetbv(); 285 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 286 __ movl(Address(rsi, 0), rax); 287 __ movl(Address(rsi, 4), rdx); 288 289 // 290 // cpuid(0x7) Structured Extended Features 291 // 292 __ bind(sef_cpuid); 293 __ movl(rax, 7); 294 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 295 __ jccb(Assembler::greater, ext_cpuid); 296 297 __ xorl(rcx, rcx); 298 __ cpuid(); 299 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 300 __ movl(Address(rsi, 0), rax); 301 __ movl(Address(rsi, 4), rbx); 302 __ movl(Address(rsi, 8), rcx); 303 __ movl(Address(rsi, 12), rdx); 304 305 // 306 // Extended cpuid(0x80000000) 307 // 308 __ bind(ext_cpuid); 309 __ movl(rax, 0x80000000); 310 __ cpuid(); 311 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 312 __ jcc(Assembler::belowEqual, done); 313 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 314 __ jcc(Assembler::belowEqual, ext_cpuid1); 315 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 316 __ jccb(Assembler::belowEqual, ext_cpuid5); 317 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 318 __ jccb(Assembler::belowEqual, ext_cpuid7); 319 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 320 __ jccb(Assembler::belowEqual, ext_cpuid8); 321 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 322 __ jccb(Assembler::below, ext_cpuid8); 323 // 324 // Extended cpuid(0x8000001E) 325 // 326 __ movl(rax, 0x8000001E); 327 __ cpuid(); 328 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 329 __ movl(Address(rsi, 0), rax); 330 __ movl(Address(rsi, 4), rbx); 331 __ movl(Address(rsi, 8), rcx); 332 __ movl(Address(rsi,12), rdx); 333 334 // 335 // Extended cpuid(0x80000008) 336 // 337 __ bind(ext_cpuid8); 338 __ movl(rax, 0x80000008); 339 __ cpuid(); 340 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 341 __ movl(Address(rsi, 0), rax); 342 __ movl(Address(rsi, 4), rbx); 343 __ movl(Address(rsi, 8), rcx); 344 __ movl(Address(rsi,12), rdx); 345 346 // 347 // Extended cpuid(0x80000007) 348 // 349 __ bind(ext_cpuid7); 350 __ movl(rax, 0x80000007); 351 __ cpuid(); 352 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 353 __ movl(Address(rsi, 0), rax); 354 __ movl(Address(rsi, 4), rbx); 355 __ movl(Address(rsi, 8), rcx); 356 __ movl(Address(rsi,12), rdx); 357 358 // 359 // Extended cpuid(0x80000005) 360 // 361 __ bind(ext_cpuid5); 362 __ movl(rax, 0x80000005); 363 __ cpuid(); 364 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 365 __ movl(Address(rsi, 0), rax); 366 __ movl(Address(rsi, 4), rbx); 367 __ movl(Address(rsi, 8), rcx); 368 __ movl(Address(rsi,12), rdx); 369 370 // 371 // Extended cpuid(0x80000001) 372 // 373 __ bind(ext_cpuid1); 374 __ movl(rax, 0x80000001); 375 __ cpuid(); 376 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 377 __ movl(Address(rsi, 0), rax); 378 __ movl(Address(rsi, 4), rbx); 379 __ movl(Address(rsi, 8), rcx); 380 __ movl(Address(rsi,12), rdx); 381 382 // 383 // Check if OS has enabled XGETBV instruction to access XCR0 384 // (OSXSAVE feature flag) and CPU supports AVX 385 // 386 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 387 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 388 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 389 __ cmpl(rcx, 0x18000000); 390 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 391 392 __ movl(rax, 0x6); 393 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 394 __ cmpl(rax, 0x6); 395 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 396 397 // we need to bridge farther than imm8, so we use this island as a thunk 398 __ bind(done); 399 __ jmp(wrapup); 400 401 __ bind(start_simd_check); 402 // 403 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 404 // registers are not restored after a signal processing. 405 // Generate SEGV here (reference through null) 406 // and check upper YMM/ZMM bits after it. 407 // 408 int saved_useavx = UseAVX; 409 int saved_usesse = UseSSE; 410 411 // If UseAVX is uninitialized or is set by the user to include EVEX 412 if (use_evex) { 413 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 414 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 415 __ movl(rax, 0x10000); 416 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 417 __ cmpl(rax, 0x10000); 418 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 419 // check _cpuid_info.xem_xcr0_eax.bits.opmask 420 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 421 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 422 __ movl(rax, 0xE0); 423 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 424 __ cmpl(rax, 0xE0); 425 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 426 427 if (FLAG_IS_DEFAULT(UseAVX)) { 428 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 429 __ movl(rax, Address(rsi, 0)); 430 __ cmpl(rax, 0x50654); // If it is Skylake 431 __ jcc(Assembler::equal, legacy_setup); 432 } 433 // EVEX setup: run in lowest evex mode 434 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 435 UseAVX = 3; 436 UseSSE = 2; 437 #ifdef _WINDOWS 438 // xmm5-xmm15 are not preserved by caller on windows 439 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 440 __ subptr(rsp, 64); 441 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 442 #ifdef _LP64 443 __ subptr(rsp, 64); 444 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 445 __ subptr(rsp, 64); 446 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 447 #endif // _LP64 448 #endif // _WINDOWS 449 450 // load value into all 64 bytes of zmm7 register 451 __ movl(rcx, VM_Version::ymm_test_value()); 452 __ movdl(xmm0, rcx); 453 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 454 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 455 #ifdef _LP64 456 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 457 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 458 #endif 459 VM_Version::clean_cpuFeatures(); 460 __ jmp(save_restore_except); 461 } 462 463 __ bind(legacy_setup); 464 // AVX setup 465 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 466 UseAVX = 1; 467 UseSSE = 2; 468 #ifdef _WINDOWS 469 __ subptr(rsp, 32); 470 __ vmovdqu(Address(rsp, 0), xmm7); 471 #ifdef _LP64 472 __ subptr(rsp, 32); 473 __ vmovdqu(Address(rsp, 0), xmm8); 474 __ subptr(rsp, 32); 475 __ vmovdqu(Address(rsp, 0), xmm15); 476 #endif // _LP64 477 #endif // _WINDOWS 478 479 // load value into all 32 bytes of ymm7 register 480 __ movl(rcx, VM_Version::ymm_test_value()); 481 482 __ movdl(xmm0, rcx); 483 __ pshufd(xmm0, xmm0, 0x00); 484 __ vinsertf128_high(xmm0, xmm0); 485 __ vmovdqu(xmm7, xmm0); 486 #ifdef _LP64 487 __ vmovdqu(xmm8, xmm0); 488 __ vmovdqu(xmm15, xmm0); 489 #endif 490 VM_Version::clean_cpuFeatures(); 491 492 __ bind(save_restore_except); 493 __ xorl(rsi, rsi); 494 VM_Version::set_cpuinfo_segv_addr(__ pc()); 495 // Generate SEGV 496 __ movl(rax, Address(rsi, 0)); 497 498 VM_Version::set_cpuinfo_cont_addr(__ pc()); 499 // Returns here after signal. Save xmm0 to check it later. 500 501 // If UseAVX is uninitialized or is set by the user to include EVEX 502 if (use_evex) { 503 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 504 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 505 __ movl(rax, 0x10000); 506 __ andl(rax, Address(rsi, 4)); 507 __ cmpl(rax, 0x10000); 508 __ jcc(Assembler::notEqual, legacy_save_restore); 509 // check _cpuid_info.xem_xcr0_eax.bits.opmask 510 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 511 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 512 __ movl(rax, 0xE0); 513 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 514 __ cmpl(rax, 0xE0); 515 __ jcc(Assembler::notEqual, legacy_save_restore); 516 517 if (FLAG_IS_DEFAULT(UseAVX)) { 518 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 519 __ movl(rax, Address(rsi, 0)); 520 __ cmpl(rax, 0x50654); // If it is Skylake 521 __ jcc(Assembler::equal, legacy_save_restore); 522 } 523 // EVEX check: run in lowest evex mode 524 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 525 UseAVX = 3; 526 UseSSE = 2; 527 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 528 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 529 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 530 #ifdef _LP64 531 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 532 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 533 #endif 534 535 #ifdef _WINDOWS 536 #ifdef _LP64 537 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 538 __ addptr(rsp, 64); 539 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 540 __ addptr(rsp, 64); 541 #endif // _LP64 542 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 543 __ addptr(rsp, 64); 544 #endif // _WINDOWS 545 generate_vzeroupper(wrapup); 546 VM_Version::clean_cpuFeatures(); 547 UseAVX = saved_useavx; 548 UseSSE = saved_usesse; 549 __ jmp(wrapup); 550 } 551 552 __ bind(legacy_save_restore); 553 // AVX check 554 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 555 UseAVX = 1; 556 UseSSE = 2; 557 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 558 __ vmovdqu(Address(rsi, 0), xmm0); 559 __ vmovdqu(Address(rsi, 32), xmm7); 560 #ifdef _LP64 561 __ vmovdqu(Address(rsi, 64), xmm8); 562 __ vmovdqu(Address(rsi, 96), xmm15); 563 #endif 564 565 #ifdef _WINDOWS 566 #ifdef _LP64 567 __ vmovdqu(xmm15, Address(rsp, 0)); 568 __ addptr(rsp, 32); 569 __ vmovdqu(xmm8, Address(rsp, 0)); 570 __ addptr(rsp, 32); 571 #endif // _LP64 572 __ vmovdqu(xmm7, Address(rsp, 0)); 573 __ addptr(rsp, 32); 574 #endif // _WINDOWS 575 generate_vzeroupper(wrapup); 576 VM_Version::clean_cpuFeatures(); 577 UseAVX = saved_useavx; 578 UseSSE = saved_usesse; 579 580 __ bind(wrapup); 581 __ popf(); 582 __ pop(rsi); 583 __ pop(rbx); 584 __ pop(rbp); 585 __ ret(0); 586 587 # undef __ 588 589 return start; 590 }; 591 void generate_vzeroupper(Label& L_wrapup) { 592 # define __ _masm-> 593 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 594 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 595 __ jcc(Assembler::notEqual, L_wrapup); 596 __ movl(rcx, 0x0FFF0FF0); 597 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 598 __ andl(rcx, Address(rsi, 0)); 599 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 600 __ jcc(Assembler::equal, L_wrapup); 601 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 602 __ jcc(Assembler::equal, L_wrapup); 603 // vzeroupper() will use a pre-computed instruction sequence that we 604 // can't compute until after we've determined CPU capabilities. Use 605 // uncached variant here directly to be able to bootstrap correctly 606 __ vzeroupper_uncached(); 607 # undef __ 608 } 609 address generate_detect_virt() { 610 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 611 # define __ _masm-> 612 613 address start = __ pc(); 614 615 // Evacuate callee-saved registers 616 __ push(rbp); 617 __ push(rbx); 618 __ push(rsi); // for Windows 619 620 #ifdef _LP64 621 __ mov(rax, c_rarg0); // CPUID leaf 622 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 623 #else 624 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 625 __ movptr(rsi, Address(rsp, 20)); // register array address 626 #endif 627 628 __ cpuid(); 629 630 // Store result to register array 631 __ movl(Address(rsi, 0), rax); 632 __ movl(Address(rsi, 4), rbx); 633 __ movl(Address(rsi, 8), rcx); 634 __ movl(Address(rsi, 12), rdx); 635 636 // Epilogue 637 __ pop(rsi); 638 __ pop(rbx); 639 __ pop(rbp); 640 __ ret(0); 641 642 # undef __ 643 644 return start; 645 }; 646 647 648 address generate_getCPUIDBrandString(void) { 649 // Flags to test CPU type. 650 const uint32_t HS_EFL_AC = 0x40000; 651 const uint32_t HS_EFL_ID = 0x200000; 652 // Values for when we don't have a CPUID instruction. 653 const int CPU_FAMILY_SHIFT = 8; 654 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 655 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 656 657 Label detect_486, cpu486, detect_586, done, ext_cpuid; 658 659 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 660 # define __ _masm-> 661 662 address start = __ pc(); 663 664 // 665 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 666 // 667 // LP64: rcx and rdx are first and second argument registers on windows 668 669 __ push(rbp); 670 #ifdef _LP64 671 __ mov(rbp, c_rarg0); // cpuid_info address 672 #else 673 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 674 #endif 675 __ push(rbx); 676 __ push(rsi); 677 __ pushf(); // preserve rbx, and flags 678 __ pop(rax); 679 __ push(rax); 680 __ mov(rcx, rax); 681 // 682 // if we are unable to change the AC flag, we have a 386 683 // 684 __ xorl(rax, HS_EFL_AC); 685 __ push(rax); 686 __ popf(); 687 __ pushf(); 688 __ pop(rax); 689 __ cmpptr(rax, rcx); 690 __ jccb(Assembler::notEqual, detect_486); 691 692 __ movl(rax, CPU_FAMILY_386); 693 __ jmp(done); 694 695 // 696 // If we are unable to change the ID flag, we have a 486 which does 697 // not support the "cpuid" instruction. 698 // 699 __ bind(detect_486); 700 __ mov(rax, rcx); 701 __ xorl(rax, HS_EFL_ID); 702 __ push(rax); 703 __ popf(); 704 __ pushf(); 705 __ pop(rax); 706 __ cmpptr(rcx, rax); 707 __ jccb(Assembler::notEqual, detect_586); 708 709 __ bind(cpu486); 710 __ movl(rax, CPU_FAMILY_486); 711 __ jmp(done); 712 713 // 714 // At this point, we have a chip which supports the "cpuid" instruction 715 // 716 __ bind(detect_586); 717 __ xorl(rax, rax); 718 __ cpuid(); 719 __ orl(rax, rax); 720 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 721 // value of at least 1, we give up and 722 // assume a 486 723 724 // 725 // Extended cpuid(0x80000000) for processor brand string detection 726 // 727 __ bind(ext_cpuid); 728 __ movl(rax, CPUID_EXTENDED_FN); 729 __ cpuid(); 730 __ cmpl(rax, CPUID_EXTENDED_FN_4); 731 __ jcc(Assembler::below, done); 732 733 // 734 // Extended cpuid(0x80000002) // first 16 bytes in brand string 735 // 736 __ movl(rax, CPUID_EXTENDED_FN_2); 737 __ cpuid(); 738 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 739 __ movl(Address(rsi, 0), rax); 740 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 741 __ movl(Address(rsi, 0), rbx); 742 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 743 __ movl(Address(rsi, 0), rcx); 744 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 745 __ movl(Address(rsi,0), rdx); 746 747 // 748 // Extended cpuid(0x80000003) // next 16 bytes in brand string 749 // 750 __ movl(rax, CPUID_EXTENDED_FN_3); 751 __ cpuid(); 752 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 753 __ movl(Address(rsi, 0), rax); 754 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 755 __ movl(Address(rsi, 0), rbx); 756 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 757 __ movl(Address(rsi, 0), rcx); 758 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 759 __ movl(Address(rsi,0), rdx); 760 761 // 762 // Extended cpuid(0x80000004) // last 16 bytes in brand string 763 // 764 __ movl(rax, CPUID_EXTENDED_FN_4); 765 __ cpuid(); 766 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 767 __ movl(Address(rsi, 0), rax); 768 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 769 __ movl(Address(rsi, 0), rbx); 770 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 771 __ movl(Address(rsi, 0), rcx); 772 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 773 __ movl(Address(rsi,0), rdx); 774 775 // 776 // return 777 // 778 __ bind(done); 779 __ popf(); 780 __ pop(rsi); 781 __ pop(rbx); 782 __ pop(rbp); 783 __ ret(0); 784 785 # undef __ 786 787 return start; 788 }; 789 }; 790 791 void VM_Version::get_processor_features() { 792 793 _cpu = 4; // 486 by default 794 _model = 0; 795 _stepping = 0; 796 _features = 0; 797 _logical_processors_per_package = 1; 798 // i486 internal cache is both I&D and has a 16-byte line size 799 _L1_data_cache_line_size = 16; 800 801 // Get raw processor info 802 803 get_cpu_info_stub(&_cpuid_info); 804 805 assert_is_initialized(); 806 _cpu = extended_cpu_family(); 807 _model = extended_cpu_model(); 808 _stepping = cpu_stepping(); 809 810 if (cpu_family() > 4) { // it supports CPUID 811 _features = feature_flags(); 812 // Logical processors are only available on P4s and above, 813 // and only if hyperthreading is available. 814 _logical_processors_per_package = logical_processor_count(); 815 _L1_data_cache_line_size = L1_line_size(); 816 } 817 818 _supports_cx8 = supports_cmpxchg8(); 819 // xchg and xadd instructions 820 _supports_atomic_getset4 = true; 821 _supports_atomic_getadd4 = true; 822 LP64_ONLY(_supports_atomic_getset8 = true); 823 LP64_ONLY(_supports_atomic_getadd8 = true); 824 825 #ifdef _LP64 826 // OS should support SSE for x64 and hardware should support at least SSE2. 827 if (!VM_Version::supports_sse2()) { 828 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 829 } 830 // in 64 bit the use of SSE2 is the minimum 831 if (UseSSE < 2) UseSSE = 2; 832 #endif 833 834 #ifdef AMD64 835 // flush_icache_stub have to be generated first. 836 // That is why Icache line size is hard coded in ICache class, 837 // see icache_x86.hpp. It is also the reason why we can't use 838 // clflush instruction in 32-bit VM since it could be running 839 // on CPU which does not support it. 840 // 841 // The only thing we can do is to verify that flushed 842 // ICache::line_size has correct value. 843 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 844 // clflush_size is size in quadwords (8 bytes). 845 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 846 #endif 847 848 #ifdef _LP64 849 // assigning this field effectively enables Unsafe.writebackMemory() 850 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 851 // that is only implemented on x86_64 and only if the OS plays ball 852 if (os::supports_map_sync()) { 853 // publish data cache line flush size to generic field, otherwise 854 // let if default to zero thereby disabling writeback 855 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 856 } 857 #endif 858 859 if (UseSSE < 4) { 860 _features &= ~CPU_SSE4_1; 861 _features &= ~CPU_SSE4_2; 862 } 863 864 if (UseSSE < 3) { 865 _features &= ~CPU_SSE3; 866 _features &= ~CPU_SSSE3; 867 _features &= ~CPU_SSE4A; 868 } 869 870 if (UseSSE < 2) 871 _features &= ~CPU_SSE2; 872 873 if (UseSSE < 1) 874 _features &= ~CPU_SSE; 875 876 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 877 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 878 UseAVX = 0; 879 } 880 881 // UseSSE is set to the smaller of what hardware supports and what 882 // the command line requires. I.e., you cannot set UseSSE to 2 on 883 // older Pentiums which do not support it. 884 int use_sse_limit = 0; 885 if (UseSSE > 0) { 886 if (UseSSE > 3 && supports_sse4_1()) { 887 use_sse_limit = 4; 888 } else if (UseSSE > 2 && supports_sse3()) { 889 use_sse_limit = 3; 890 } else if (UseSSE > 1 && supports_sse2()) { 891 use_sse_limit = 2; 892 } else if (UseSSE > 0 && supports_sse()) { 893 use_sse_limit = 1; 894 } else { 895 use_sse_limit = 0; 896 } 897 } 898 if (FLAG_IS_DEFAULT(UseSSE)) { 899 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 900 } else if (UseSSE > use_sse_limit) { 901 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 902 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 903 } 904 905 // first try initial setting and detect what we can support 906 int use_avx_limit = 0; 907 if (UseAVX > 0) { 908 if (UseSSE < 4) { 909 // Don't use AVX if SSE is unavailable or has been disabled. 910 use_avx_limit = 0; 911 } else if (UseAVX > 2 && supports_evex()) { 912 use_avx_limit = 3; 913 } else if (UseAVX > 1 && supports_avx2()) { 914 use_avx_limit = 2; 915 } else if (UseAVX > 0 && supports_avx()) { 916 use_avx_limit = 1; 917 } else { 918 use_avx_limit = 0; 919 } 920 } 921 if (FLAG_IS_DEFAULT(UseAVX)) { 922 // Don't use AVX-512 on older Skylakes unless explicitly requested. 923 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 924 FLAG_SET_DEFAULT(UseAVX, 2); 925 } else { 926 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 927 } 928 } 929 if (UseAVX > use_avx_limit) { 930 if (UseSSE < 4) { 931 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 932 } else { 933 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 934 } 935 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 936 } 937 938 if (UseAVX < 3) { 939 _features &= ~CPU_AVX512F; 940 _features &= ~CPU_AVX512DQ; 941 _features &= ~CPU_AVX512CD; 942 _features &= ~CPU_AVX512BW; 943 _features &= ~CPU_AVX512VL; 944 _features &= ~CPU_AVX512_VPOPCNTDQ; 945 _features &= ~CPU_AVX512_VPCLMULQDQ; 946 _features &= ~CPU_AVX512_VAES; 947 _features &= ~CPU_AVX512_VNNI; 948 _features &= ~CPU_AVX512_VBMI; 949 _features &= ~CPU_AVX512_VBMI2; 950 _features &= ~CPU_AVX512_BITALG; 951 _features &= ~CPU_AVX512_IFMA; 952 } 953 954 if (UseAVX < 2) 955 _features &= ~CPU_AVX2; 956 957 if (UseAVX < 1) { 958 _features &= ~CPU_AVX; 959 _features &= ~CPU_VZEROUPPER; 960 _features &= ~CPU_F16C; 961 } 962 963 if (logical_processors_per_package() == 1) { 964 // HT processor could be installed on a system which doesn't support HT. 965 _features &= ~CPU_HT; 966 } 967 968 if (is_intel()) { // Intel cpus specific settings 969 if (is_knights_family()) { 970 _features &= ~CPU_VZEROUPPER; 971 _features &= ~CPU_AVX512BW; 972 _features &= ~CPU_AVX512VL; 973 _features &= ~CPU_AVX512DQ; 974 _features &= ~CPU_AVX512_VNNI; 975 _features &= ~CPU_AVX512_VAES; 976 _features &= ~CPU_AVX512_VPOPCNTDQ; 977 _features &= ~CPU_AVX512_VPCLMULQDQ; 978 _features &= ~CPU_AVX512_VBMI; 979 _features &= ~CPU_AVX512_VBMI2; 980 _features &= ~CPU_CLWB; 981 _features &= ~CPU_FLUSHOPT; 982 _features &= ~CPU_GFNI; 983 _features &= ~CPU_AVX512_BITALG; 984 _features &= ~CPU_AVX512_IFMA; 985 } 986 } 987 988 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 989 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 990 } else { 991 _has_intel_jcc_erratum = IntelJccErratumMitigation; 992 } 993 994 char buf[1024]; 995 int res = jio_snprintf( 996 buf, sizeof(buf), 997 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 998 cores_per_cpu(), threads_per_core(), 999 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1000 assert(res > 0, "not enough temporary space allocated"); 1001 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1002 1003 _features_string = os::strdup(buf); 1004 1005 // Use AES instructions if available. 1006 if (supports_aes()) { 1007 if (FLAG_IS_DEFAULT(UseAES)) { 1008 FLAG_SET_DEFAULT(UseAES, true); 1009 } 1010 if (!UseAES) { 1011 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1012 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1013 } 1014 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1015 } else { 1016 if (UseSSE > 2) { 1017 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1018 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1019 } 1020 } else { 1021 // The AES intrinsic stubs require AES instruction support (of course) 1022 // but also require sse3 mode or higher for instructions it use. 1023 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1024 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1025 } 1026 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1027 } 1028 1029 // --AES-CTR begins-- 1030 if (!UseAESIntrinsics) { 1031 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1032 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1033 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1034 } 1035 } else { 1036 if (supports_sse4_1()) { 1037 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1038 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1039 } 1040 } else { 1041 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1042 // but also require sse4.1 mode or higher for instructions it use. 1043 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1044 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1045 } 1046 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1047 } 1048 } 1049 // --AES-CTR ends-- 1050 } 1051 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1052 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1053 warning("AES instructions are not available on this CPU"); 1054 FLAG_SET_DEFAULT(UseAES, false); 1055 } 1056 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1057 warning("AES intrinsics are not available on this CPU"); 1058 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1059 } 1060 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1061 warning("AES-CTR intrinsics are not available on this CPU"); 1062 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1063 } 1064 } 1065 1066 // Use CLMUL instructions if available. 1067 if (supports_clmul()) { 1068 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1069 UseCLMUL = true; 1070 } 1071 } else if (UseCLMUL) { 1072 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1073 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1074 FLAG_SET_DEFAULT(UseCLMUL, false); 1075 } 1076 1077 if (UseCLMUL && (UseSSE > 2)) { 1078 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1079 UseCRC32Intrinsics = true; 1080 } 1081 } else if (UseCRC32Intrinsics) { 1082 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1083 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1084 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1085 } 1086 1087 #ifdef _LP64 1088 if (supports_avx2()) { 1089 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1090 UseAdler32Intrinsics = true; 1091 } 1092 } else if (UseAdler32Intrinsics) { 1093 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1094 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1095 } 1096 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1097 } 1098 #else 1099 if (UseAdler32Intrinsics) { 1100 warning("Adler32Intrinsics not available on this CPU."); 1101 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1102 } 1103 #endif 1104 1105 if (supports_sse4_2() && supports_clmul()) { 1106 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1107 UseCRC32CIntrinsics = true; 1108 } 1109 } else if (UseCRC32CIntrinsics) { 1110 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1111 warning("CRC32C intrinsics are not available on this CPU"); 1112 } 1113 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1114 } 1115 1116 // GHASH/GCM intrinsics 1117 if (UseCLMUL && (UseSSE > 2)) { 1118 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1119 UseGHASHIntrinsics = true; 1120 } 1121 } else if (UseGHASHIntrinsics) { 1122 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1123 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1124 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1125 } 1126 1127 // ChaCha20 Intrinsics 1128 // As long as the system supports AVX as a baseline we can do a 1129 // SIMD-enabled block function. StubGenerator makes the determination 1130 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1131 // version. 1132 if (UseAVX >= 1) { 1133 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1134 UseChaCha20Intrinsics = true; 1135 } 1136 } else if (UseChaCha20Intrinsics) { 1137 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1138 warning("ChaCha20 intrinsic requires AVX instructions"); 1139 } 1140 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1141 } 1142 1143 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1144 if (UseAVX >= 2) { 1145 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1146 UseBASE64Intrinsics = true; 1147 } 1148 } else if (UseBASE64Intrinsics) { 1149 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1150 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1151 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1152 } 1153 1154 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1155 if (FLAG_IS_DEFAULT(UseFMA)) { 1156 UseFMA = true; 1157 } 1158 } else if (UseFMA) { 1159 warning("FMA instructions are not available on this CPU"); 1160 FLAG_SET_DEFAULT(UseFMA, false); 1161 } 1162 1163 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1164 UseMD5Intrinsics = true; 1165 } 1166 1167 if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) { 1168 if (FLAG_IS_DEFAULT(UseSHA)) { 1169 UseSHA = true; 1170 } 1171 } else if (UseSHA) { 1172 warning("SHA instructions are not available on this CPU"); 1173 FLAG_SET_DEFAULT(UseSHA, false); 1174 } 1175 1176 if (supports_sha() && supports_sse4_1() && UseSHA) { 1177 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1178 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1179 } 1180 } else if (UseSHA1Intrinsics) { 1181 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1182 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1183 } 1184 1185 if (supports_sse4_1() && UseSHA) { 1186 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1187 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1188 } 1189 } else if (UseSHA256Intrinsics) { 1190 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1191 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1192 } 1193 1194 #ifdef _LP64 1195 // These are only supported on 64-bit 1196 if (UseSHA && supports_avx2() && supports_bmi2()) { 1197 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1198 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1199 } 1200 } else 1201 #endif 1202 if (UseSHA512Intrinsics) { 1203 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1204 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1205 } 1206 1207 if (UseSHA3Intrinsics) { 1208 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1209 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1210 } 1211 1212 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1213 FLAG_SET_DEFAULT(UseSHA, false); 1214 } 1215 1216 if (!supports_rtm() && UseRTMLocking) { 1217 vm_exit_during_initialization("RTM instructions are not available on this CPU"); 1218 } 1219 1220 #if INCLUDE_RTM_OPT 1221 if (UseRTMLocking) { 1222 if (!CompilerConfig::is_c2_enabled()) { 1223 // Only C2 does RTM locking optimization. 1224 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1225 } 1226 if (is_intel_family_core()) { 1227 if ((_model == CPU_MODEL_HASWELL_E3) || 1228 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || 1229 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { 1230 // currently a collision between SKL and HSW_E3 1231 if (!UnlockExperimentalVMOptions && UseAVX < 3) { 1232 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " 1233 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); 1234 } else { 1235 warning("UseRTMLocking is only available as experimental option on this platform."); 1236 } 1237 } 1238 } 1239 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 1240 // RTM locking should be used only for applications with 1241 // high lock contention. For now we do not use it by default. 1242 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 1243 } 1244 } else { // !UseRTMLocking 1245 if (UseRTMForStackLocks) { 1246 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 1247 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 1248 } 1249 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 1250 } 1251 if (UseRTMDeopt) { 1252 FLAG_SET_DEFAULT(UseRTMDeopt, false); 1253 } 1254 if (PrintPreciseRTMLockingStatistics) { 1255 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 1256 } 1257 } 1258 #else 1259 if (UseRTMLocking) { 1260 // Only C2 does RTM locking optimization. 1261 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1262 } 1263 #endif 1264 1265 #ifdef COMPILER2 1266 if (UseFPUForSpilling) { 1267 if (UseSSE < 2) { 1268 // Only supported with SSE2+ 1269 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1270 } 1271 } 1272 #endif 1273 1274 #if COMPILER2_OR_JVMCI 1275 int max_vector_size = 0; 1276 if (UseSSE < 2) { 1277 // Vectors (in XMM) are only supported with SSE2+ 1278 // SSE is always 2 on x64. 1279 max_vector_size = 0; 1280 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1281 // 16 byte vectors (in XMM) are supported with SSE2+ 1282 max_vector_size = 16; 1283 } else if (UseAVX == 1 || UseAVX == 2) { 1284 // 32 bytes vectors (in YMM) are only supported with AVX+ 1285 max_vector_size = 32; 1286 } else if (UseAVX > 2) { 1287 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1288 max_vector_size = 64; 1289 } 1290 1291 #ifdef _LP64 1292 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1293 #else 1294 int min_vector_size = 0; 1295 #endif 1296 1297 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1298 if (MaxVectorSize < min_vector_size) { 1299 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1300 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1301 } 1302 if (MaxVectorSize > max_vector_size) { 1303 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1304 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1305 } 1306 if (!is_power_of_2(MaxVectorSize)) { 1307 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1308 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1309 } 1310 } else { 1311 // If default, use highest supported configuration 1312 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1313 } 1314 1315 #if defined(COMPILER2) && defined(ASSERT) 1316 if (MaxVectorSize > 0) { 1317 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1318 tty->print_cr("State of YMM registers after signal handle:"); 1319 int nreg = 2 LP64_ONLY(+2); 1320 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1321 for (int i = 0; i < nreg; i++) { 1322 tty->print("YMM%s:", ymm_name[i]); 1323 for (int j = 7; j >=0; j--) { 1324 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1325 } 1326 tty->cr(); 1327 } 1328 } 1329 } 1330 #endif // COMPILER2 && ASSERT 1331 1332 #ifdef _LP64 1333 if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) { 1334 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1335 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1336 } 1337 } else 1338 #endif 1339 if (UsePoly1305Intrinsics) { 1340 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1341 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1342 } 1343 1344 #ifdef _LP64 1345 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1346 UseMultiplyToLenIntrinsic = true; 1347 } 1348 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1349 UseSquareToLenIntrinsic = true; 1350 } 1351 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1352 UseMulAddIntrinsic = true; 1353 } 1354 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1355 UseMontgomeryMultiplyIntrinsic = true; 1356 } 1357 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1358 UseMontgomerySquareIntrinsic = true; 1359 } 1360 #else 1361 if (UseMultiplyToLenIntrinsic) { 1362 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1363 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1364 } 1365 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1366 } 1367 if (UseMontgomeryMultiplyIntrinsic) { 1368 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1369 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1370 } 1371 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1372 } 1373 if (UseMontgomerySquareIntrinsic) { 1374 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1375 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1376 } 1377 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1378 } 1379 if (UseSquareToLenIntrinsic) { 1380 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1381 warning("squareToLen intrinsic is not available in 32-bit VM"); 1382 } 1383 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1384 } 1385 if (UseMulAddIntrinsic) { 1386 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1387 warning("mulAdd intrinsic is not available in 32-bit VM"); 1388 } 1389 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1390 } 1391 #endif // _LP64 1392 #endif // COMPILER2_OR_JVMCI 1393 1394 // On new cpus instructions which update whole XMM register should be used 1395 // to prevent partial register stall due to dependencies on high half. 1396 // 1397 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1398 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1399 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1400 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1401 1402 1403 if (is_zx()) { // ZX cpus specific settings 1404 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1405 UseStoreImmI16 = false; // don't use it on ZX cpus 1406 } 1407 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1408 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1409 // Use it on all ZX cpus 1410 UseAddressNop = true; 1411 } 1412 } 1413 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1414 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1415 } 1416 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1417 if (supports_sse3()) { 1418 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1419 } else { 1420 UseXmmRegToRegMoveAll = false; 1421 } 1422 } 1423 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1424 #ifdef COMPILER2 1425 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1426 // For new ZX cpus do the next optimization: 1427 // don't align the beginning of a loop if there are enough instructions 1428 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1429 // in current fetch line (OptoLoopAlignment) or the padding 1430 // is big (> MaxLoopPad). 1431 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1432 // generated NOP instructions. 11 is the largest size of one 1433 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1434 MaxLoopPad = 11; 1435 } 1436 #endif // COMPILER2 1437 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1438 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1439 } 1440 if (supports_sse4_2()) { // new ZX cpus 1441 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1442 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1443 } 1444 } 1445 if (supports_sse4_2()) { 1446 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1447 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1448 } 1449 } else { 1450 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1451 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1452 } 1453 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1454 } 1455 } 1456 1457 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1458 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1459 } 1460 } 1461 1462 if (is_amd_family()) { // AMD cpus specific settings 1463 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1464 // Use it on new AMD cpus starting from Opteron. 1465 UseAddressNop = true; 1466 } 1467 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1468 // Use it on new AMD cpus starting from Opteron. 1469 UseNewLongLShift = true; 1470 } 1471 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1472 if (supports_sse4a()) { 1473 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1474 } else { 1475 UseXmmLoadAndClearUpper = false; 1476 } 1477 } 1478 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1479 if (supports_sse4a()) { 1480 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1481 } else { 1482 UseXmmRegToRegMoveAll = false; 1483 } 1484 } 1485 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1486 if (supports_sse4a()) { 1487 UseXmmI2F = true; 1488 } else { 1489 UseXmmI2F = false; 1490 } 1491 } 1492 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1493 if (supports_sse4a()) { 1494 UseXmmI2D = true; 1495 } else { 1496 UseXmmI2D = false; 1497 } 1498 } 1499 if (supports_sse4_2()) { 1500 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1501 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1502 } 1503 } else { 1504 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1505 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1506 } 1507 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1508 } 1509 1510 // some defaults for AMD family 15h 1511 if (cpu_family() == 0x15) { 1512 // On family 15h processors default is no sw prefetch 1513 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1514 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1515 } 1516 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1517 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1518 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1519 } 1520 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1521 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1522 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1523 } 1524 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1525 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1526 } 1527 } 1528 1529 #ifdef COMPILER2 1530 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1531 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1532 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1533 } 1534 #endif // COMPILER2 1535 1536 // Some defaults for AMD family >= 17h && Hygon family 18h 1537 if (cpu_family() >= 0x17) { 1538 // On family >=17h processors use XMM and UnalignedLoadStores 1539 // for Array Copy 1540 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1541 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1542 } 1543 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1544 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1545 } 1546 #ifdef COMPILER2 1547 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1548 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1549 } 1550 #endif 1551 } 1552 } 1553 1554 if (is_intel()) { // Intel cpus specific settings 1555 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1556 UseStoreImmI16 = false; // don't use it on Intel cpus 1557 } 1558 if (cpu_family() == 6 || cpu_family() == 15) { 1559 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1560 // Use it on all Intel cpus starting from PentiumPro 1561 UseAddressNop = true; 1562 } 1563 } 1564 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1565 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1566 } 1567 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1568 if (supports_sse3()) { 1569 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1570 } else { 1571 UseXmmRegToRegMoveAll = false; 1572 } 1573 } 1574 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1575 #ifdef COMPILER2 1576 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1577 // For new Intel cpus do the next optimization: 1578 // don't align the beginning of a loop if there are enough instructions 1579 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1580 // in current fetch line (OptoLoopAlignment) or the padding 1581 // is big (> MaxLoopPad). 1582 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1583 // generated NOP instructions. 11 is the largest size of one 1584 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1585 MaxLoopPad = 11; 1586 } 1587 #endif // COMPILER2 1588 1589 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1590 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1591 } 1592 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1593 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1594 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1595 } 1596 } 1597 if (supports_sse4_2()) { 1598 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1599 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1600 } 1601 } else { 1602 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1603 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1604 } 1605 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1606 } 1607 } 1608 if (is_atom_family() || is_knights_family()) { 1609 #ifdef COMPILER2 1610 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1611 OptoScheduling = true; 1612 } 1613 #endif 1614 if (supports_sse4_2()) { // Silvermont 1615 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1616 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1617 } 1618 } 1619 if (FLAG_IS_DEFAULT(UseIncDec)) { 1620 FLAG_SET_DEFAULT(UseIncDec, false); 1621 } 1622 } 1623 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1624 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1625 } 1626 #ifdef COMPILER2 1627 if (UseAVX > 2) { 1628 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1629 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1630 ArrayOperationPartialInlineSize != 0 && 1631 ArrayOperationPartialInlineSize != 16 && 1632 ArrayOperationPartialInlineSize != 32 && 1633 ArrayOperationPartialInlineSize != 64)) { 1634 int inline_size = 0; 1635 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1636 inline_size = 64; 1637 } else if (MaxVectorSize >= 32) { 1638 inline_size = 32; 1639 } else if (MaxVectorSize >= 16) { 1640 inline_size = 16; 1641 } 1642 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1643 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1644 } 1645 ArrayOperationPartialInlineSize = inline_size; 1646 } 1647 1648 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1649 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1650 if (ArrayOperationPartialInlineSize) { 1651 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1652 } else { 1653 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1654 } 1655 } 1656 } 1657 #endif 1658 } 1659 1660 #ifdef COMPILER2 1661 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1662 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1663 OptimizeFill = false; 1664 } 1665 } 1666 #endif 1667 1668 #ifdef _LP64 1669 if (UseSSE42Intrinsics) { 1670 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1671 UseVectorizedMismatchIntrinsic = true; 1672 } 1673 } else if (UseVectorizedMismatchIntrinsic) { 1674 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1675 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1676 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1677 } 1678 if (UseAVX >= 2) { 1679 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1680 } else if (UseVectorizedHashCodeIntrinsic) { 1681 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1682 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1683 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1684 } 1685 #else 1686 if (UseVectorizedMismatchIntrinsic) { 1687 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1688 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1689 } 1690 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1691 } 1692 if (UseVectorizedHashCodeIntrinsic) { 1693 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1694 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1695 } 1696 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1697 } 1698 #endif // _LP64 1699 1700 // Use count leading zeros count instruction if available. 1701 if (supports_lzcnt()) { 1702 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1703 UseCountLeadingZerosInstruction = true; 1704 } 1705 } else if (UseCountLeadingZerosInstruction) { 1706 warning("lzcnt instruction is not available on this CPU"); 1707 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1708 } 1709 1710 // Use count trailing zeros instruction if available 1711 if (supports_bmi1()) { 1712 // tzcnt does not require VEX prefix 1713 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1714 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1715 // Don't use tzcnt if BMI1 is switched off on command line. 1716 UseCountTrailingZerosInstruction = false; 1717 } else { 1718 UseCountTrailingZerosInstruction = true; 1719 } 1720 } 1721 } else if (UseCountTrailingZerosInstruction) { 1722 warning("tzcnt instruction is not available on this CPU"); 1723 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1724 } 1725 1726 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1727 // VEX prefix is generated only when AVX > 0. 1728 if (supports_bmi1() && supports_avx()) { 1729 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1730 UseBMI1Instructions = true; 1731 } 1732 } else if (UseBMI1Instructions) { 1733 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1734 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1735 } 1736 1737 if (supports_bmi2() && supports_avx()) { 1738 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1739 UseBMI2Instructions = true; 1740 } 1741 } else if (UseBMI2Instructions) { 1742 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1743 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1744 } 1745 1746 // Use population count instruction if available. 1747 if (supports_popcnt()) { 1748 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1749 UsePopCountInstruction = true; 1750 } 1751 } else if (UsePopCountInstruction) { 1752 warning("POPCNT instruction is not available on this CPU"); 1753 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1754 } 1755 1756 // Use fast-string operations if available. 1757 if (supports_erms()) { 1758 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1759 UseFastStosb = true; 1760 } 1761 } else if (UseFastStosb) { 1762 warning("fast-string operations are not available on this CPU"); 1763 FLAG_SET_DEFAULT(UseFastStosb, false); 1764 } 1765 1766 // For AMD Processors use XMM/YMM MOVDQU instructions 1767 // for Object Initialization as default 1768 if (is_amd() && cpu_family() >= 0x19) { 1769 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1770 UseFastStosb = false; 1771 } 1772 } 1773 1774 #ifdef COMPILER2 1775 if (is_intel() && MaxVectorSize > 16) { 1776 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1777 UseFastStosb = false; 1778 } 1779 } 1780 #endif 1781 1782 // Use XMM/YMM MOVDQU instruction for Object Initialization 1783 if (UseSSE >= 2 && UseUnalignedLoadStores) { 1784 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1785 UseXMMForObjInit = true; 1786 } 1787 } else if (UseXMMForObjInit) { 1788 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1789 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1790 } 1791 1792 #ifdef COMPILER2 1793 if (FLAG_IS_DEFAULT(AlignVector)) { 1794 // Modern processors allow misaligned memory operations for vectors. 1795 AlignVector = !UseUnalignedLoadStores; 1796 } 1797 #endif // COMPILER2 1798 1799 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1800 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1801 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1802 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1803 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1804 } 1805 } 1806 1807 // Allocation prefetch settings 1808 intx cache_line_size = prefetch_data_size(); 1809 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1810 (cache_line_size > AllocatePrefetchStepSize)) { 1811 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1812 } 1813 1814 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1815 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1816 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1817 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1818 } 1819 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1820 } 1821 1822 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1823 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1824 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1825 } 1826 1827 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1828 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1829 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1830 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1831 } 1832 #ifdef COMPILER2 1833 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1834 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1835 } 1836 #endif 1837 } 1838 1839 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1840 #ifdef COMPILER2 1841 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1842 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1843 } 1844 #endif 1845 } 1846 1847 #ifdef _LP64 1848 // Prefetch settings 1849 1850 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1851 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1852 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1853 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1854 1855 // gc copy/scan is disabled if prefetchw isn't supported, because 1856 // Prefetch::write emits an inlined prefetchw on Linux. 1857 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1858 // The used prefetcht0 instruction works for both amd64 and em64t. 1859 1860 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1861 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1862 } 1863 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1864 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1865 } 1866 #endif 1867 1868 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1869 (cache_line_size > ContendedPaddingWidth)) 1870 ContendedPaddingWidth = cache_line_size; 1871 1872 // This machine allows unaligned memory accesses 1873 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1874 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1875 } 1876 1877 #ifndef PRODUCT 1878 if (log_is_enabled(Info, os, cpu)) { 1879 LogStream ls(Log(os, cpu)::info()); 1880 outputStream* log = &ls; 1881 log->print_cr("Logical CPUs per core: %u", 1882 logical_processors_per_package()); 1883 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1884 log->print("UseSSE=%d", UseSSE); 1885 if (UseAVX > 0) { 1886 log->print(" UseAVX=%d", UseAVX); 1887 } 1888 if (UseAES) { 1889 log->print(" UseAES=1"); 1890 } 1891 #ifdef COMPILER2 1892 if (MaxVectorSize > 0) { 1893 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1894 } 1895 #endif 1896 log->cr(); 1897 log->print("Allocation"); 1898 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1899 log->print_cr(": no prefetching"); 1900 } else { 1901 log->print(" prefetching: "); 1902 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1903 log->print("PREFETCHW"); 1904 } else if (UseSSE >= 1) { 1905 if (AllocatePrefetchInstr == 0) { 1906 log->print("PREFETCHNTA"); 1907 } else if (AllocatePrefetchInstr == 1) { 1908 log->print("PREFETCHT0"); 1909 } else if (AllocatePrefetchInstr == 2) { 1910 log->print("PREFETCHT2"); 1911 } else if (AllocatePrefetchInstr == 3) { 1912 log->print("PREFETCHW"); 1913 } 1914 } 1915 if (AllocatePrefetchLines > 1) { 1916 log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); 1917 } else { 1918 log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); 1919 } 1920 } 1921 1922 if (PrefetchCopyIntervalInBytes > 0) { 1923 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1924 } 1925 if (PrefetchScanIntervalInBytes > 0) { 1926 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1927 } 1928 if (ContendedPaddingWidth > 0) { 1929 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1930 } 1931 } 1932 #endif // !PRODUCT 1933 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1934 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1935 } 1936 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1937 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1938 } 1939 } 1940 1941 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1942 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1943 if (vrt == XenHVM) { 1944 st->print_cr("Xen hardware-assisted virtualization detected"); 1945 } else if (vrt == KVM) { 1946 st->print_cr("KVM virtualization detected"); 1947 } else if (vrt == VMWare) { 1948 st->print_cr("VMWare virtualization detected"); 1949 VirtualizationSupport::print_virtualization_info(st); 1950 } else if (vrt == HyperV) { 1951 st->print_cr("Hyper-V virtualization detected"); 1952 } else if (vrt == HyperVRole) { 1953 st->print_cr("Hyper-V role detected"); 1954 } 1955 } 1956 1957 bool VM_Version::compute_has_intel_jcc_erratum() { 1958 if (!is_intel_family_core()) { 1959 // Only Intel CPUs are affected. 1960 return false; 1961 } 1962 // The following table of affected CPUs is based on the following document released by Intel: 1963 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1964 switch (_model) { 1965 case 0x8E: 1966 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1967 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1968 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1969 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1970 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1971 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1972 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1973 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1974 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1975 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1976 case 0x4E: 1977 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1978 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1979 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1980 return _stepping == 0x3; 1981 case 0x55: 1982 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1983 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1984 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1985 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1986 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1987 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1988 return _stepping == 0x4 || _stepping == 0x7; 1989 case 0x5E: 1990 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1991 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1992 return _stepping == 0x3; 1993 case 0x9E: 1994 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 1995 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 1996 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 1997 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 1998 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 1999 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2000 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2001 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2002 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2003 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2004 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2005 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2006 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2007 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2008 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2009 case 0xA5: 2010 // Not in Intel documentation. 2011 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2012 return true; 2013 case 0xA6: 2014 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2015 return _stepping == 0x0; 2016 case 0xAE: 2017 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2018 return _stepping == 0xA; 2019 default: 2020 // If we are running on another intel machine not recognized in the table, we are okay. 2021 return false; 2022 } 2023 } 2024 2025 // On Xen, the cpuid instruction returns 2026 // eax / registers[0]: Version of Xen 2027 // ebx / registers[1]: chars 'XenV' 2028 // ecx / registers[2]: chars 'MMXe' 2029 // edx / registers[3]: chars 'nVMM' 2030 // 2031 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2032 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2033 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2034 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2035 // 2036 // more information : 2037 // https://kb.vmware.com/s/article/1009458 2038 // 2039 void VM_Version::check_virtualizations() { 2040 uint32_t registers[4] = {0}; 2041 char signature[13] = {0}; 2042 2043 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2044 // from 0x40000000 until 0x40010000. 2045 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2046 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2047 detect_virt_stub(leaf, registers); 2048 memcpy(signature, ®isters[1], 12); 2049 2050 if (strncmp("VMwareVMware", signature, 12) == 0) { 2051 Abstract_VM_Version::_detected_virtualization = VMWare; 2052 // check for extended metrics from guestlib 2053 VirtualizationSupport::initialize(); 2054 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2055 Abstract_VM_Version::_detected_virtualization = HyperV; 2056 #ifdef _WINDOWS 2057 // CPUID leaf 0x40000007 is available to the root partition only. 2058 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2059 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2060 detect_virt_stub(0x40000007, registers); 2061 if ((registers[0] != 0x0) || 2062 (registers[1] != 0x0) || 2063 (registers[2] != 0x0) || 2064 (registers[3] != 0x0)) { 2065 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2066 } 2067 #endif 2068 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2069 Abstract_VM_Version::_detected_virtualization = KVM; 2070 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2071 Abstract_VM_Version::_detected_virtualization = XenHVM; 2072 } 2073 } 2074 } 2075 2076 #ifdef COMPILER2 2077 // Determine if it's running on Cascade Lake using default options. 2078 bool VM_Version::is_default_intel_cascade_lake() { 2079 return FLAG_IS_DEFAULT(UseAVX) && 2080 FLAG_IS_DEFAULT(MaxVectorSize) && 2081 UseAVX > 2 && 2082 is_intel_cascade_lake(); 2083 } 2084 #endif 2085 2086 bool VM_Version::is_intel_cascade_lake() { 2087 return is_intel_skylake() && _stepping >= 5; 2088 } 2089 2090 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2091 // for implementing the array copy and clear operations. 2092 // The Intel platforms that supports the serialize instruction 2093 // has improved implementation of 64-byte load/stores and so the default 2094 // threshold is set to 0 for these platforms. 2095 int VM_Version::avx3_threshold() { 2096 return (is_intel_family_core() && 2097 supports_serialize() && 2098 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2099 } 2100 2101 static bool _vm_version_initialized = false; 2102 2103 void VM_Version::initialize() { 2104 ResourceMark rm; 2105 // Making this stub must be FIRST use of assembler 2106 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2107 if (stub_blob == nullptr) { 2108 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2109 } 2110 CodeBuffer c(stub_blob); 2111 VM_Version_StubGenerator g(&c); 2112 2113 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2114 g.generate_get_cpu_info()); 2115 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2116 g.generate_detect_virt()); 2117 2118 get_processor_features(); 2119 2120 LP64_ONLY(Assembler::precompute_instructions();) 2121 2122 if (VM_Version::supports_hv()) { // Supports hypervisor 2123 check_virtualizations(); 2124 } 2125 _vm_version_initialized = true; 2126 } 2127 2128 typedef enum { 2129 CPU_FAMILY_8086_8088 = 0, 2130 CPU_FAMILY_INTEL_286 = 2, 2131 CPU_FAMILY_INTEL_386 = 3, 2132 CPU_FAMILY_INTEL_486 = 4, 2133 CPU_FAMILY_PENTIUM = 5, 2134 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2135 CPU_FAMILY_PENTIUM_4 = 0xF 2136 } FamilyFlag; 2137 2138 typedef enum { 2139 RDTSCP_FLAG = 0x08000000, // bit 27 2140 INTEL64_FLAG = 0x20000000 // bit 29 2141 } _featureExtendedEdxFlag; 2142 2143 typedef enum { 2144 FPU_FLAG = 0x00000001, 2145 VME_FLAG = 0x00000002, 2146 DE_FLAG = 0x00000004, 2147 PSE_FLAG = 0x00000008, 2148 TSC_FLAG = 0x00000010, 2149 MSR_FLAG = 0x00000020, 2150 PAE_FLAG = 0x00000040, 2151 MCE_FLAG = 0x00000080, 2152 CX8_FLAG = 0x00000100, 2153 APIC_FLAG = 0x00000200, 2154 SEP_FLAG = 0x00000800, 2155 MTRR_FLAG = 0x00001000, 2156 PGE_FLAG = 0x00002000, 2157 MCA_FLAG = 0x00004000, 2158 CMOV_FLAG = 0x00008000, 2159 PAT_FLAG = 0x00010000, 2160 PSE36_FLAG = 0x00020000, 2161 PSNUM_FLAG = 0x00040000, 2162 CLFLUSH_FLAG = 0x00080000, 2163 DTS_FLAG = 0x00200000, 2164 ACPI_FLAG = 0x00400000, 2165 MMX_FLAG = 0x00800000, 2166 FXSR_FLAG = 0x01000000, 2167 SSE_FLAG = 0x02000000, 2168 SSE2_FLAG = 0x04000000, 2169 SS_FLAG = 0x08000000, 2170 HTT_FLAG = 0x10000000, 2171 TM_FLAG = 0x20000000 2172 } FeatureEdxFlag; 2173 2174 static BufferBlob* cpuid_brand_string_stub_blob; 2175 static const int cpuid_brand_string_stub_size = 550; 2176 2177 extern "C" { 2178 typedef void (*getCPUIDBrandString_stub_t)(void*); 2179 } 2180 2181 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2182 2183 // VM_Version statics 2184 enum { 2185 ExtendedFamilyIdLength_INTEL = 16, 2186 ExtendedFamilyIdLength_AMD = 24 2187 }; 2188 2189 const size_t VENDOR_LENGTH = 13; 2190 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2191 static char* _cpu_brand_string = nullptr; 2192 static int64_t _max_qualified_cpu_frequency = 0; 2193 2194 static int _no_of_threads = 0; 2195 static int _no_of_cores = 0; 2196 2197 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2198 "8086/8088", 2199 "", 2200 "286", 2201 "386", 2202 "486", 2203 "Pentium", 2204 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2205 "", 2206 "", 2207 "", 2208 "", 2209 "", 2210 "", 2211 "", 2212 "", 2213 "Pentium 4" 2214 }; 2215 2216 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2217 "", 2218 "", 2219 "", 2220 "", 2221 "5x86", 2222 "K5/K6", 2223 "Athlon/AthlonXP", 2224 "", 2225 "", 2226 "", 2227 "", 2228 "", 2229 "", 2230 "", 2231 "", 2232 "Opteron/Athlon64", 2233 "Opteron QC/Phenom", // Barcelona et.al. 2234 "", 2235 "", 2236 "", 2237 "", 2238 "", 2239 "", 2240 "Zen" 2241 }; 2242 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2243 // September 2013, Vol 3C Table 35-1 2244 const char* const _model_id_pentium_pro[] = { 2245 "", 2246 "Pentium Pro", 2247 "", 2248 "Pentium II model 3", 2249 "", 2250 "Pentium II model 5/Xeon/Celeron", 2251 "Celeron", 2252 "Pentium III/Pentium III Xeon", 2253 "Pentium III/Pentium III Xeon", 2254 "Pentium M model 9", // Yonah 2255 "Pentium III, model A", 2256 "Pentium III, model B", 2257 "", 2258 "Pentium M model D", // Dothan 2259 "", 2260 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2261 "", 2262 "", 2263 "", 2264 "", 2265 "", 2266 "", 2267 "Celeron", // 0x16 Celeron 65nm 2268 "Core 2", // 0x17 Penryn / Harpertown 2269 "", 2270 "", 2271 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2272 "Atom", // 0x1B Z5xx series Silverthorn 2273 "", 2274 "Core 2", // 0x1D Dunnington (6-core) 2275 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2276 "", 2277 "", 2278 "", 2279 "", 2280 "", 2281 "", 2282 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2283 "", 2284 "", 2285 "", // 0x28 2286 "", 2287 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2288 "", 2289 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2290 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2291 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2292 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2293 "", 2294 "", 2295 "", 2296 "", 2297 "", 2298 "", 2299 "", 2300 "", 2301 "", 2302 "", 2303 "Ivy Bridge", // 0x3a 2304 "", 2305 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2306 "", // 0x3d "Next Generation Intel Core Processor" 2307 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2308 "", // 0x3f "Future Generation Intel Xeon Processor" 2309 "", 2310 "", 2311 "", 2312 "", 2313 "", 2314 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2315 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2316 nullptr 2317 }; 2318 2319 /* Brand ID is for back compatibility 2320 * Newer CPUs uses the extended brand string */ 2321 const char* const _brand_id[] = { 2322 "", 2323 "Celeron processor", 2324 "Pentium III processor", 2325 "Intel Pentium III Xeon processor", 2326 "", 2327 "", 2328 "", 2329 "", 2330 "Intel Pentium 4 processor", 2331 nullptr 2332 }; 2333 2334 2335 const char* const _feature_edx_id[] = { 2336 "On-Chip FPU", 2337 "Virtual Mode Extensions", 2338 "Debugging Extensions", 2339 "Page Size Extensions", 2340 "Time Stamp Counter", 2341 "Model Specific Registers", 2342 "Physical Address Extension", 2343 "Machine Check Exceptions", 2344 "CMPXCHG8B Instruction", 2345 "On-Chip APIC", 2346 "", 2347 "Fast System Call", 2348 "Memory Type Range Registers", 2349 "Page Global Enable", 2350 "Machine Check Architecture", 2351 "Conditional Mov Instruction", 2352 "Page Attribute Table", 2353 "36-bit Page Size Extension", 2354 "Processor Serial Number", 2355 "CLFLUSH Instruction", 2356 "", 2357 "Debug Trace Store feature", 2358 "ACPI registers in MSR space", 2359 "Intel Architecture MMX Technology", 2360 "Fast Float Point Save and Restore", 2361 "Streaming SIMD extensions", 2362 "Streaming SIMD extensions 2", 2363 "Self-Snoop", 2364 "Hyper Threading", 2365 "Thermal Monitor", 2366 "", 2367 "Pending Break Enable" 2368 }; 2369 2370 const char* const _feature_extended_edx_id[] = { 2371 "", 2372 "", 2373 "", 2374 "", 2375 "", 2376 "", 2377 "", 2378 "", 2379 "", 2380 "", 2381 "", 2382 "SYSCALL/SYSRET", 2383 "", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "", 2389 "", 2390 "", 2391 "Execute Disable Bit", 2392 "", 2393 "", 2394 "", 2395 "", 2396 "", 2397 "", 2398 "RDTSCP", 2399 "", 2400 "Intel 64 Architecture", 2401 "", 2402 "" 2403 }; 2404 2405 const char* const _feature_ecx_id[] = { 2406 "Streaming SIMD Extensions 3", 2407 "PCLMULQDQ", 2408 "64-bit DS Area", 2409 "MONITOR/MWAIT instructions", 2410 "CPL Qualified Debug Store", 2411 "Virtual Machine Extensions", 2412 "Safer Mode Extensions", 2413 "Enhanced Intel SpeedStep technology", 2414 "Thermal Monitor 2", 2415 "Supplemental Streaming SIMD Extensions 3", 2416 "L1 Context ID", 2417 "", 2418 "Fused Multiply-Add", 2419 "CMPXCHG16B", 2420 "xTPR Update Control", 2421 "Perfmon and Debug Capability", 2422 "", 2423 "Process-context identifiers", 2424 "Direct Cache Access", 2425 "Streaming SIMD extensions 4.1", 2426 "Streaming SIMD extensions 4.2", 2427 "x2APIC", 2428 "MOVBE", 2429 "Popcount instruction", 2430 "TSC-Deadline", 2431 "AESNI", 2432 "XSAVE", 2433 "OSXSAVE", 2434 "AVX", 2435 "F16C", 2436 "RDRAND", 2437 "" 2438 }; 2439 2440 const char* const _feature_extended_ecx_id[] = { 2441 "LAHF/SAHF instruction support", 2442 "Core multi-processor legacy mode", 2443 "", 2444 "", 2445 "", 2446 "Advanced Bit Manipulations: LZCNT", 2447 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2448 "Misaligned SSE mode", 2449 "", 2450 "", 2451 "", 2452 "", 2453 "", 2454 "", 2455 "", 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "" 2473 }; 2474 2475 void VM_Version::initialize_tsc(void) { 2476 ResourceMark rm; 2477 2478 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2479 if (cpuid_brand_string_stub_blob == nullptr) { 2480 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2481 } 2482 CodeBuffer c(cpuid_brand_string_stub_blob); 2483 VM_Version_StubGenerator g(&c); 2484 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2485 g.generate_getCPUIDBrandString()); 2486 } 2487 2488 const char* VM_Version::cpu_model_description(void) { 2489 uint32_t cpu_family = extended_cpu_family(); 2490 uint32_t cpu_model = extended_cpu_model(); 2491 const char* model = nullptr; 2492 2493 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2494 for (uint32_t i = 0; i <= cpu_model; i++) { 2495 model = _model_id_pentium_pro[i]; 2496 if (model == nullptr) { 2497 break; 2498 } 2499 } 2500 } 2501 return model; 2502 } 2503 2504 const char* VM_Version::cpu_brand_string(void) { 2505 if (_cpu_brand_string == nullptr) { 2506 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2507 if (nullptr == _cpu_brand_string) { 2508 return nullptr; 2509 } 2510 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2511 if (ret_val != OS_OK) { 2512 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2513 _cpu_brand_string = nullptr; 2514 } 2515 } 2516 return _cpu_brand_string; 2517 } 2518 2519 const char* VM_Version::cpu_brand(void) { 2520 const char* brand = nullptr; 2521 2522 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2523 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2524 brand = _brand_id[0]; 2525 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2526 brand = _brand_id[i]; 2527 } 2528 } 2529 return brand; 2530 } 2531 2532 bool VM_Version::cpu_is_em64t(void) { 2533 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2534 } 2535 2536 bool VM_Version::is_netburst(void) { 2537 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2538 } 2539 2540 bool VM_Version::supports_tscinv_ext(void) { 2541 if (!supports_tscinv_bit()) { 2542 return false; 2543 } 2544 2545 if (is_intel()) { 2546 return true; 2547 } 2548 2549 if (is_amd()) { 2550 return !is_amd_Barcelona(); 2551 } 2552 2553 if (is_hygon()) { 2554 return true; 2555 } 2556 2557 return false; 2558 } 2559 2560 void VM_Version::resolve_cpu_information_details(void) { 2561 2562 // in future we want to base this information on proper cpu 2563 // and cache topology enumeration such as: 2564 // Intel 64 Architecture Processor Topology Enumeration 2565 // which supports system cpu and cache topology enumeration 2566 // either using 2xAPICIDs or initial APICIDs 2567 2568 // currently only rough cpu information estimates 2569 // which will not necessarily reflect the exact configuration of the system 2570 2571 // this is the number of logical hardware threads 2572 // visible to the operating system 2573 _no_of_threads = os::processor_count(); 2574 2575 // find out number of threads per cpu package 2576 int threads_per_package = threads_per_core() * cores_per_cpu(); 2577 2578 // use amount of threads visible to the process in order to guess number of sockets 2579 _no_of_sockets = _no_of_threads / threads_per_package; 2580 2581 // process might only see a subset of the total number of threads 2582 // from a single processor package. Virtualization/resource management for example. 2583 // If so then just write a hard 1 as num of pkgs. 2584 if (0 == _no_of_sockets) { 2585 _no_of_sockets = 1; 2586 } 2587 2588 // estimate the number of cores 2589 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2590 } 2591 2592 2593 const char* VM_Version::cpu_family_description(void) { 2594 int cpu_family_id = extended_cpu_family(); 2595 if (is_amd()) { 2596 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2597 return _family_id_amd[cpu_family_id]; 2598 } 2599 } 2600 if (is_intel()) { 2601 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2602 return cpu_model_description(); 2603 } 2604 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2605 return _family_id_intel[cpu_family_id]; 2606 } 2607 } 2608 if (is_hygon()) { 2609 return "Dhyana"; 2610 } 2611 return "Unknown x86"; 2612 } 2613 2614 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2615 assert(buf != nullptr, "buffer is null!"); 2616 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2617 2618 const char* cpu_type = nullptr; 2619 const char* x64 = nullptr; 2620 2621 if (is_intel()) { 2622 cpu_type = "Intel"; 2623 x64 = cpu_is_em64t() ? " Intel64" : ""; 2624 } else if (is_amd()) { 2625 cpu_type = "AMD"; 2626 x64 = cpu_is_em64t() ? " AMD64" : ""; 2627 } else if (is_hygon()) { 2628 cpu_type = "Hygon"; 2629 x64 = cpu_is_em64t() ? " AMD64" : ""; 2630 } else { 2631 cpu_type = "Unknown x86"; 2632 x64 = cpu_is_em64t() ? " x86_64" : ""; 2633 } 2634 2635 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2636 cpu_type, 2637 cpu_family_description(), 2638 supports_ht() ? " (HT)" : "", 2639 supports_sse3() ? " SSE3" : "", 2640 supports_ssse3() ? " SSSE3" : "", 2641 supports_sse4_1() ? " SSE4.1" : "", 2642 supports_sse4_2() ? " SSE4.2" : "", 2643 supports_sse4a() ? " SSE4A" : "", 2644 is_netburst() ? " Netburst" : "", 2645 is_intel_family_core() ? " Core" : "", 2646 x64); 2647 2648 return OS_OK; 2649 } 2650 2651 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2652 assert(buf != nullptr, "buffer is null!"); 2653 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2654 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2655 2656 // invoke newly generated asm code to fetch CPU Brand String 2657 getCPUIDBrandString_stub(&_cpuid_info); 2658 2659 // fetch results into buffer 2660 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2661 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2662 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2663 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2664 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2665 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2666 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2667 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2668 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2669 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2670 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2671 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2672 2673 return OS_OK; 2674 } 2675 2676 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2677 guarantee(buf != nullptr, "buffer is null!"); 2678 guarantee(buf_len > 0, "buffer len not enough!"); 2679 2680 unsigned int flag = 0; 2681 unsigned int fi = 0; 2682 size_t written = 0; 2683 const char* prefix = ""; 2684 2685 #define WRITE_TO_BUF(string) \ 2686 { \ 2687 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2688 if (res < 0) { \ 2689 return buf_len - 1; \ 2690 } \ 2691 written += res; \ 2692 if (prefix[0] == '\0') { \ 2693 prefix = ", "; \ 2694 } \ 2695 } 2696 2697 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2698 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2699 continue; /* no hyperthreading */ 2700 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2701 continue; /* no fast system call */ 2702 } 2703 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2704 WRITE_TO_BUF(_feature_edx_id[fi]); 2705 } 2706 } 2707 2708 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2709 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2710 WRITE_TO_BUF(_feature_ecx_id[fi]); 2711 } 2712 } 2713 2714 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2715 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2716 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2717 } 2718 } 2719 2720 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2721 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2722 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2723 } 2724 } 2725 2726 if (supports_tscinv_bit()) { 2727 WRITE_TO_BUF("Invariant TSC"); 2728 } 2729 2730 return written; 2731 } 2732 2733 /** 2734 * Write a detailed description of the cpu to a given buffer, including 2735 * feature set. 2736 */ 2737 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2738 assert(buf != nullptr, "buffer is null!"); 2739 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2740 2741 static const char* unknown = "<unknown>"; 2742 char vendor_id[VENDOR_LENGTH]; 2743 const char* family = nullptr; 2744 const char* model = nullptr; 2745 const char* brand = nullptr; 2746 int outputLen = 0; 2747 2748 family = cpu_family_description(); 2749 if (family == nullptr) { 2750 family = unknown; 2751 } 2752 2753 model = cpu_model_description(); 2754 if (model == nullptr) { 2755 model = unknown; 2756 } 2757 2758 brand = cpu_brand_string(); 2759 2760 if (brand == nullptr) { 2761 brand = cpu_brand(); 2762 if (brand == nullptr) { 2763 brand = unknown; 2764 } 2765 } 2766 2767 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2768 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2769 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2770 vendor_id[VENDOR_LENGTH-1] = '\0'; 2771 2772 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2773 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2774 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2775 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2776 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2777 "Supports: ", 2778 brand, 2779 vendor_id, 2780 family, 2781 extended_cpu_family(), 2782 model, 2783 extended_cpu_model(), 2784 cpu_stepping(), 2785 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2786 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2787 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2788 _cpuid_info.std_cpuid1_eax.value, 2789 _cpuid_info.std_cpuid1_ebx.value, 2790 _cpuid_info.std_cpuid1_ecx.value, 2791 _cpuid_info.std_cpuid1_edx.value, 2792 _cpuid_info.ext_cpuid1_eax, 2793 _cpuid_info.ext_cpuid1_ebx, 2794 _cpuid_info.ext_cpuid1_ecx, 2795 _cpuid_info.ext_cpuid1_edx); 2796 2797 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2798 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2799 return OS_ERR; 2800 } 2801 2802 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2803 2804 return OS_OK; 2805 } 2806 2807 2808 // Fill in Abstract_VM_Version statics 2809 void VM_Version::initialize_cpu_information() { 2810 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2811 assert(!_initialized, "shouldn't be initialized yet"); 2812 resolve_cpu_information_details(); 2813 2814 // initialize cpu_name and cpu_desc 2815 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2816 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2817 _initialized = true; 2818 } 2819 2820 /** 2821 * For information about extracting the frequency from the cpu brand string, please see: 2822 * 2823 * Intel Processor Identification and the CPUID Instruction 2824 * Application Note 485 2825 * May 2012 2826 * 2827 * The return value is the frequency in Hz. 2828 */ 2829 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2830 const char* const brand_string = cpu_brand_string(); 2831 if (brand_string == nullptr) { 2832 return 0; 2833 } 2834 const int64_t MEGA = 1000000; 2835 int64_t multiplier = 0; 2836 int64_t frequency = 0; 2837 uint8_t idx = 0; 2838 // The brand string buffer is at most 48 bytes. 2839 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2840 for (; idx < 48-2; ++idx) { 2841 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2842 // Search brand string for "yHz" where y is M, G, or T. 2843 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2844 if (brand_string[idx] == 'M') { 2845 multiplier = MEGA; 2846 } else if (brand_string[idx] == 'G') { 2847 multiplier = MEGA * 1000; 2848 } else if (brand_string[idx] == 'T') { 2849 multiplier = MEGA * MEGA; 2850 } 2851 break; 2852 } 2853 } 2854 if (multiplier > 0) { 2855 // Compute frequency (in Hz) from brand string. 2856 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2857 frequency = (brand_string[idx-4] - '0') * multiplier; 2858 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2859 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2860 } else { // format is "xxxx" 2861 frequency = (brand_string[idx-4] - '0') * 1000; 2862 frequency += (brand_string[idx-3] - '0') * 100; 2863 frequency += (brand_string[idx-2] - '0') * 10; 2864 frequency += (brand_string[idx-1] - '0'); 2865 frequency *= multiplier; 2866 } 2867 } 2868 return frequency; 2869 } 2870 2871 2872 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2873 if (_max_qualified_cpu_frequency == 0) { 2874 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2875 } 2876 return _max_qualified_cpu_frequency; 2877 } 2878 2879 uint64_t VM_Version::feature_flags() { 2880 uint64_t result = 0; 2881 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 2882 result |= CPU_CX8; 2883 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 2884 result |= CPU_CMOV; 2885 if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0) 2886 result |= CPU_FLUSH; 2887 #ifdef _LP64 2888 // clflush should always be available on x86_64 2889 // if not we are in real trouble because we rely on it 2890 // to flush the code cache. 2891 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2892 #endif 2893 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2894 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) 2895 result |= CPU_FXSR; 2896 // HT flag is set for multi-core processors also. 2897 if (threads_per_core() > 1) 2898 result |= CPU_HT; 2899 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2900 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) 2901 result |= CPU_MMX; 2902 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 2903 result |= CPU_SSE; 2904 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 2905 result |= CPU_SSE2; 2906 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 2907 result |= CPU_SSE3; 2908 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 2909 result |= CPU_SSSE3; 2910 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 2911 result |= CPU_SSE4_1; 2912 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 2913 result |= CPU_SSE4_2; 2914 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 2915 result |= CPU_POPCNT; 2916 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && 2917 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && 2918 _cpuid_info.xem_xcr0_eax.bits.sse != 0 && 2919 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { 2920 result |= CPU_AVX; 2921 result |= CPU_VZEROUPPER; 2922 if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0) 2923 result |= CPU_F16C; 2924 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) 2925 result |= CPU_AVX2; 2926 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 && 2927 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 && 2928 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 && 2929 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) { 2930 result |= CPU_AVX512F; 2931 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0) 2932 result |= CPU_AVX512CD; 2933 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0) 2934 result |= CPU_AVX512DQ; 2935 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0) 2936 result |= CPU_AVX512_IFMA; 2937 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0) 2938 result |= CPU_AVX512PF; 2939 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0) 2940 result |= CPU_AVX512ER; 2941 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0) 2942 result |= CPU_AVX512BW; 2943 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0) 2944 result |= CPU_AVX512VL; 2945 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2946 result |= CPU_AVX512_VPOPCNTDQ; 2947 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2948 result |= CPU_AVX512_VPCLMULQDQ; 2949 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0) 2950 result |= CPU_AVX512_VAES; 2951 if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0) 2952 result |= CPU_GFNI; 2953 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0) 2954 result |= CPU_AVX512_VNNI; 2955 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2956 result |= CPU_AVX512_BITALG; 2957 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2958 result |= CPU_AVX512_VBMI; 2959 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2960 result |= CPU_AVX512_VBMI2; 2961 } 2962 } 2963 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0) 2964 result |= CPU_HV; 2965 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) 2966 result |= CPU_BMI1; 2967 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) 2968 result |= CPU_TSC; 2969 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) 2970 result |= CPU_TSCINV_BIT; 2971 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) 2972 result |= CPU_AES; 2973 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) 2974 result |= CPU_ERMS; 2975 if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 2976 result |= CPU_FSRM; 2977 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) 2978 result |= CPU_CLMUL; 2979 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0) 2980 result |= CPU_RTM; 2981 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0) 2982 result |= CPU_ADX; 2983 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) 2984 result |= CPU_BMI2; 2985 if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0) 2986 result |= CPU_SHA; 2987 if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0) 2988 result |= CPU_FMA; 2989 if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0) 2990 result |= CPU_FLUSHOPT; 2991 if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0) 2992 result |= CPU_RDTSCP; 2993 if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0) 2994 result |= CPU_RDPID; 2995 2996 // AMD|Hygon features. 2997 if (is_amd_family()) { 2998 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || 2999 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) 3000 result |= CPU_3DNOW_PREFETCH; 3001 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) 3002 result |= CPU_LZCNT; 3003 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 3004 result |= CPU_SSE4A; 3005 } 3006 3007 // Intel features. 3008 if (is_intel()) { 3009 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3010 result |= CPU_LZCNT; 3011 } 3012 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3013 result |= CPU_3DNOW_PREFETCH; 3014 } 3015 if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) { 3016 result |= CPU_CLWB; 3017 } 3018 if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0) 3019 result |= CPU_SERIALIZE; 3020 } 3021 3022 // ZX features. 3023 if (is_zx()) { 3024 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3025 result |= CPU_LZCNT; 3026 } 3027 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3028 result |= CPU_3DNOW_PREFETCH; 3029 } 3030 } 3031 3032 // Protection key features. 3033 if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) { 3034 result |= CPU_PKU; 3035 } 3036 if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) { 3037 result |= CPU_OSPKE; 3038 } 3039 3040 // Control flow enforcement (CET) features. 3041 if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) { 3042 result |= CPU_CET_SS; 3043 } 3044 if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) { 3045 result |= CPU_CET_IBT; 3046 } 3047 3048 // Composite features. 3049 if (supports_tscinv_bit() && 3050 ((is_amd_family() && !is_amd_Barcelona()) || 3051 is_intel_tsc_synched_at_init())) { 3052 result |= CPU_TSCINV; 3053 } 3054 3055 return result; 3056 } 3057 3058 bool VM_Version::os_supports_avx_vectors() { 3059 bool retVal = false; 3060 int nreg = 2 LP64_ONLY(+2); 3061 if (supports_evex()) { 3062 // Verify that OS save/restore all bits of EVEX registers 3063 // during signal processing. 3064 retVal = true; 3065 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3066 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3067 retVal = false; 3068 break; 3069 } 3070 } 3071 } else if (supports_avx()) { 3072 // Verify that OS save/restore all bits of AVX registers 3073 // during signal processing. 3074 retVal = true; 3075 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3076 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3077 retVal = false; 3078 break; 3079 } 3080 } 3081 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3082 if (retVal == false) { 3083 // Verify that OS save/restore all bits of EVEX registers 3084 // during signal processing. 3085 retVal = true; 3086 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3087 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3088 retVal = false; 3089 break; 3090 } 3091 } 3092 } 3093 } 3094 return retVal; 3095 } 3096 3097 uint VM_Version::cores_per_cpu() { 3098 uint result = 1; 3099 if (is_intel()) { 3100 bool supports_topology = supports_processor_topology(); 3101 if (supports_topology) { 3102 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3103 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3104 } 3105 if (!supports_topology || result == 0) { 3106 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3107 } 3108 } else if (is_amd_family()) { 3109 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3110 } else if (is_zx()) { 3111 bool supports_topology = supports_processor_topology(); 3112 if (supports_topology) { 3113 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3114 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3115 } 3116 if (!supports_topology || result == 0) { 3117 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3118 } 3119 } 3120 return result; 3121 } 3122 3123 uint VM_Version::threads_per_core() { 3124 uint result = 1; 3125 if (is_intel() && supports_processor_topology()) { 3126 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3127 } else if (is_zx() && supports_processor_topology()) { 3128 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3129 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3130 if (cpu_family() >= 0x17) { 3131 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3132 } else { 3133 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3134 cores_per_cpu(); 3135 } 3136 } 3137 return (result == 0 ? 1 : result); 3138 } 3139 3140 intx VM_Version::L1_line_size() { 3141 intx result = 0; 3142 if (is_intel()) { 3143 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3144 } else if (is_amd_family()) { 3145 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3146 } else if (is_zx()) { 3147 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3148 } 3149 if (result < 32) // not defined ? 3150 result = 32; // 32 bytes by default on x86 and other x64 3151 return result; 3152 } 3153 3154 bool VM_Version::is_intel_tsc_synched_at_init() { 3155 if (is_intel_family_core()) { 3156 uint32_t ext_model = extended_cpu_model(); 3157 if (ext_model == CPU_MODEL_NEHALEM_EP || 3158 ext_model == CPU_MODEL_WESTMERE_EP || 3159 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3160 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3161 // <= 2-socket invariant tsc support. EX versions are usually used 3162 // in > 2-socket systems and likely don't synchronize tscs at 3163 // initialization. 3164 // Code that uses tsc values must be prepared for them to arbitrarily 3165 // jump forward or backward. 3166 return true; 3167 } 3168 } 3169 return false; 3170 } 3171 3172 intx VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3173 // Hardware prefetching (distance/size in bytes): 3174 // Pentium 3 - 64 / 32 3175 // Pentium 4 - 256 / 128 3176 // Athlon - 64 / 32 ???? 3177 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3178 // Core - 128 / 64 3179 // 3180 // Software prefetching (distance in bytes / instruction with best score): 3181 // Pentium 3 - 128 / prefetchnta 3182 // Pentium 4 - 512 / prefetchnta 3183 // Athlon - 128 / prefetchnta 3184 // Opteron - 256 / prefetchnta 3185 // Core - 256 / prefetchnta 3186 // It will be used only when AllocatePrefetchStyle > 0 3187 3188 if (is_amd_family()) { // AMD | Hygon 3189 if (supports_sse2()) { 3190 return 256; // Opteron 3191 } else { 3192 return 128; // Athlon 3193 } 3194 } else { // Intel 3195 if (supports_sse3() && cpu_family() == 6) { 3196 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3197 return 192; 3198 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3199 #ifdef _LP64 3200 return 384; 3201 #else 3202 return 320; 3203 #endif 3204 } 3205 } 3206 if (supports_sse2()) { 3207 if (cpu_family() == 6) { 3208 return 256; // Pentium M, Core, Core2 3209 } else { 3210 return 512; // Pentium 4 3211 } 3212 } else { 3213 return 128; // Pentium 3 (and all other old CPUs) 3214 } 3215 } 3216 } 3217 3218 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3219 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3220 switch (id) { 3221 case vmIntrinsics::_floatToFloat16: 3222 case vmIntrinsics::_float16ToFloat: 3223 if (!supports_float16()) { 3224 return false; 3225 } 3226 break; 3227 default: 3228 break; 3229 } 3230 return true; 3231 } 3232