1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/powerOfTwo.hpp" 42 #include "utilities/virtualizationSupport.hpp" 43 44 int VM_Version::_cpu; 45 int VM_Version::_model; 46 int VM_Version::_stepping; 47 bool VM_Version::_has_intel_jcc_erratum; 48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 49 50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 52 #undef DECLARE_CPU_FEATURE_FLAG 53 54 // Address of instruction which causes SEGV 55 address VM_Version::_cpuinfo_segv_addr = nullptr; 56 // Address of instruction after the one which causes SEGV 57 address VM_Version::_cpuinfo_cont_addr = nullptr; 58 // Address of instruction which causes APX specific SEGV 59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 60 // Address of instruction after the one which causes APX specific SEGV 61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 62 63 static BufferBlob* stub_blob; 64 static const int stub_size = 2000; 65 66 extern "C" { 67 typedef void (*get_cpu_info_stub_t)(void*); 68 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 69 typedef void (*clear_apx_test_state_t)(void); 70 } 71 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 72 static detect_virt_stub_t detect_virt_stub = nullptr; 73 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 74 75 #ifdef _LP64 76 77 bool VM_Version::supports_clflush() { 78 // clflush should always be available on x86_64 79 // if not we are in real trouble because we rely on it 80 // to flush the code cache. 81 // Unfortunately, Assembler::clflush is currently called as part 82 // of generation of the code cache flush routine. This happens 83 // under Universe::init before the processor features are set 84 // up. Assembler::flush calls this routine to check that clflush 85 // is allowed. So, we give the caller a free pass if Universe init 86 // is still in progress. 87 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 88 return true; 89 } 90 #endif 91 92 #define CPUID_STANDARD_FN 0x0 93 #define CPUID_STANDARD_FN_1 0x1 94 #define CPUID_STANDARD_FN_4 0x4 95 #define CPUID_STANDARD_FN_B 0xb 96 97 #define CPUID_EXTENDED_FN 0x80000000 98 #define CPUID_EXTENDED_FN_1 0x80000001 99 #define CPUID_EXTENDED_FN_2 0x80000002 100 #define CPUID_EXTENDED_FN_3 0x80000003 101 #define CPUID_EXTENDED_FN_4 0x80000004 102 #define CPUID_EXTENDED_FN_7 0x80000007 103 #define CPUID_EXTENDED_FN_8 0x80000008 104 105 class VM_Version_StubGenerator: public StubCodeGenerator { 106 public: 107 108 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 109 110 #if defined(_LP64) 111 address clear_apx_test_state() { 112 # define __ _masm-> 113 address start = __ pc(); 114 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 115 // handling guarantees that preserved register values post signal handling were 116 // re-instantiated by operating system and not because they were not modified externally. 117 118 bool save_apx = UseAPX; 119 VM_Version::set_apx_cpuFeatures(); 120 UseAPX = true; 121 // EGPR state save/restoration. 122 __ mov64(r16, 0L); 123 __ mov64(r31, 0L); 124 UseAPX = save_apx; 125 VM_Version::clean_cpuFeatures(); 126 __ ret(0); 127 return start; 128 } 129 #endif 130 131 address generate_get_cpu_info() { 132 // Flags to test CPU type. 133 const uint32_t HS_EFL_AC = 0x40000; 134 const uint32_t HS_EFL_ID = 0x200000; 135 // Values for when we don't have a CPUID instruction. 136 const int CPU_FAMILY_SHIFT = 8; 137 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 138 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 139 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 140 141 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 142 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 143 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 144 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 145 146 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 147 # define __ _masm-> 148 149 address start = __ pc(); 150 151 // 152 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 153 // 154 // LP64: rcx and rdx are first and second argument registers on windows 155 156 __ push(rbp); 157 #ifdef _LP64 158 __ mov(rbp, c_rarg0); // cpuid_info address 159 #else 160 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 161 #endif 162 __ push(rbx); 163 __ push(rsi); 164 __ pushf(); // preserve rbx, and flags 165 __ pop(rax); 166 __ push(rax); 167 __ mov(rcx, rax); 168 // 169 // if we are unable to change the AC flag, we have a 386 170 // 171 __ xorl(rax, HS_EFL_AC); 172 __ push(rax); 173 __ popf(); 174 __ pushf(); 175 __ pop(rax); 176 __ cmpptr(rax, rcx); 177 __ jccb(Assembler::notEqual, detect_486); 178 179 __ movl(rax, CPU_FAMILY_386); 180 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 181 __ jmp(done); 182 183 // 184 // If we are unable to change the ID flag, we have a 486 which does 185 // not support the "cpuid" instruction. 186 // 187 __ bind(detect_486); 188 __ mov(rax, rcx); 189 __ xorl(rax, HS_EFL_ID); 190 __ push(rax); 191 __ popf(); 192 __ pushf(); 193 __ pop(rax); 194 __ cmpptr(rcx, rax); 195 __ jccb(Assembler::notEqual, detect_586); 196 197 __ bind(cpu486); 198 __ movl(rax, CPU_FAMILY_486); 199 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 200 __ jmp(done); 201 202 // 203 // At this point, we have a chip which supports the "cpuid" instruction 204 // 205 __ bind(detect_586); 206 __ xorl(rax, rax); 207 __ cpuid(); 208 __ orl(rax, rax); 209 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 210 // value of at least 1, we give up and 211 // assume a 486 212 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 213 __ movl(Address(rsi, 0), rax); 214 __ movl(Address(rsi, 4), rbx); 215 __ movl(Address(rsi, 8), rcx); 216 __ movl(Address(rsi,12), rdx); 217 218 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 219 __ jccb(Assembler::belowEqual, std_cpuid4); 220 221 // 222 // cpuid(0xB) Processor Topology 223 // 224 __ movl(rax, 0xb); 225 __ xorl(rcx, rcx); // Threads level 226 __ cpuid(); 227 228 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 229 __ movl(Address(rsi, 0), rax); 230 __ movl(Address(rsi, 4), rbx); 231 __ movl(Address(rsi, 8), rcx); 232 __ movl(Address(rsi,12), rdx); 233 234 __ movl(rax, 0xb); 235 __ movl(rcx, 1); // Cores level 236 __ cpuid(); 237 __ push(rax); 238 __ andl(rax, 0x1f); // Determine if valid topology level 239 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 240 __ andl(rax, 0xffff); 241 __ pop(rax); 242 __ jccb(Assembler::equal, std_cpuid4); 243 244 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 245 __ movl(Address(rsi, 0), rax); 246 __ movl(Address(rsi, 4), rbx); 247 __ movl(Address(rsi, 8), rcx); 248 __ movl(Address(rsi,12), rdx); 249 250 __ movl(rax, 0xb); 251 __ movl(rcx, 2); // Packages level 252 __ cpuid(); 253 __ push(rax); 254 __ andl(rax, 0x1f); // Determine if valid topology level 255 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 256 __ andl(rax, 0xffff); 257 __ pop(rax); 258 __ jccb(Assembler::equal, std_cpuid4); 259 260 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 261 __ movl(Address(rsi, 0), rax); 262 __ movl(Address(rsi, 4), rbx); 263 __ movl(Address(rsi, 8), rcx); 264 __ movl(Address(rsi,12), rdx); 265 266 // 267 // cpuid(0x4) Deterministic cache params 268 // 269 __ bind(std_cpuid4); 270 __ movl(rax, 4); 271 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 272 __ jccb(Assembler::greater, std_cpuid1); 273 274 __ xorl(rcx, rcx); // L1 cache 275 __ cpuid(); 276 __ push(rax); 277 __ andl(rax, 0x1f); // Determine if valid cache parameters used 278 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 279 __ pop(rax); 280 __ jccb(Assembler::equal, std_cpuid1); 281 282 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 283 __ movl(Address(rsi, 0), rax); 284 __ movl(Address(rsi, 4), rbx); 285 __ movl(Address(rsi, 8), rcx); 286 __ movl(Address(rsi,12), rdx); 287 288 // 289 // Standard cpuid(0x1) 290 // 291 __ bind(std_cpuid1); 292 __ movl(rax, 1); 293 __ cpuid(); 294 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 295 __ movl(Address(rsi, 0), rax); 296 __ movl(Address(rsi, 4), rbx); 297 __ movl(Address(rsi, 8), rcx); 298 __ movl(Address(rsi,12), rdx); 299 300 // 301 // Check if OS has enabled XGETBV instruction to access XCR0 302 // (OSXSAVE feature flag) and CPU supports AVX 303 // 304 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 305 __ cmpl(rcx, 0x18000000); 306 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 307 308 // 309 // XCR0, XFEATURE_ENABLED_MASK register 310 // 311 __ xorl(rcx, rcx); // zero for XCR0 register 312 __ xgetbv(); 313 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 314 __ movl(Address(rsi, 0), rax); 315 __ movl(Address(rsi, 4), rdx); 316 317 // 318 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 319 // 320 __ bind(sef_cpuid); 321 __ movl(rax, 7); 322 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 323 __ jccb(Assembler::greater, ext_cpuid); 324 // ECX = 0 325 __ xorl(rcx, rcx); 326 __ cpuid(); 327 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 328 __ movl(Address(rsi, 0), rax); 329 __ movl(Address(rsi, 4), rbx); 330 __ movl(Address(rsi, 8), rcx); 331 __ movl(Address(rsi, 12), rdx); 332 333 // 334 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 335 // 336 __ bind(sefsl1_cpuid); 337 __ movl(rax, 7); 338 __ movl(rcx, 1); 339 __ cpuid(); 340 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 341 __ movl(Address(rsi, 0), rax); 342 __ movl(Address(rsi, 4), rdx); 343 344 // 345 // Extended cpuid(0x80000000) 346 // 347 __ bind(ext_cpuid); 348 __ movl(rax, 0x80000000); 349 __ cpuid(); 350 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 351 __ jcc(Assembler::belowEqual, done); 352 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 353 __ jcc(Assembler::belowEqual, ext_cpuid1); 354 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 355 __ jccb(Assembler::belowEqual, ext_cpuid5); 356 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 357 __ jccb(Assembler::belowEqual, ext_cpuid7); 358 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 359 __ jccb(Assembler::belowEqual, ext_cpuid8); 360 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 361 __ jccb(Assembler::below, ext_cpuid8); 362 // 363 // Extended cpuid(0x8000001E) 364 // 365 __ movl(rax, 0x8000001E); 366 __ cpuid(); 367 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 368 __ movl(Address(rsi, 0), rax); 369 __ movl(Address(rsi, 4), rbx); 370 __ movl(Address(rsi, 8), rcx); 371 __ movl(Address(rsi,12), rdx); 372 373 // 374 // Extended cpuid(0x80000008) 375 // 376 __ bind(ext_cpuid8); 377 __ movl(rax, 0x80000008); 378 __ cpuid(); 379 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 380 __ movl(Address(rsi, 0), rax); 381 __ movl(Address(rsi, 4), rbx); 382 __ movl(Address(rsi, 8), rcx); 383 __ movl(Address(rsi,12), rdx); 384 385 // 386 // Extended cpuid(0x80000007) 387 // 388 __ bind(ext_cpuid7); 389 __ movl(rax, 0x80000007); 390 __ cpuid(); 391 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 392 __ movl(Address(rsi, 0), rax); 393 __ movl(Address(rsi, 4), rbx); 394 __ movl(Address(rsi, 8), rcx); 395 __ movl(Address(rsi,12), rdx); 396 397 // 398 // Extended cpuid(0x80000005) 399 // 400 __ bind(ext_cpuid5); 401 __ movl(rax, 0x80000005); 402 __ cpuid(); 403 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 404 __ movl(Address(rsi, 0), rax); 405 __ movl(Address(rsi, 4), rbx); 406 __ movl(Address(rsi, 8), rcx); 407 __ movl(Address(rsi,12), rdx); 408 409 // 410 // Extended cpuid(0x80000001) 411 // 412 __ bind(ext_cpuid1); 413 __ movl(rax, 0x80000001); 414 __ cpuid(); 415 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 416 __ movl(Address(rsi, 0), rax); 417 __ movl(Address(rsi, 4), rbx); 418 __ movl(Address(rsi, 8), rcx); 419 __ movl(Address(rsi,12), rdx); 420 421 #if defined(_LP64) 422 // 423 // Check if OS has enabled XGETBV instruction to access XCR0 424 // (OSXSAVE feature flag) and CPU supports APX 425 // 426 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 427 // and XCRO[19] bit for OS support to save/restore extended GPR state. 428 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 429 __ movl(rax, 0x200000); 430 __ andl(rax, Address(rsi, 4)); 431 __ cmpl(rax, 0x200000); 432 __ jcc(Assembler::notEqual, vector_save_restore); 433 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 434 __ movl(rax, 0x80000); 435 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 436 __ cmpl(rax, 0x80000); 437 __ jcc(Assembler::notEqual, vector_save_restore); 438 439 #ifndef PRODUCT 440 bool save_apx = UseAPX; 441 VM_Version::set_apx_cpuFeatures(); 442 UseAPX = true; 443 __ mov64(r16, VM_Version::egpr_test_value()); 444 __ mov64(r31, VM_Version::egpr_test_value()); 445 __ xorl(rsi, rsi); 446 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 447 // Generate SEGV 448 __ movl(rax, Address(rsi, 0)); 449 450 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 451 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 452 __ movq(Address(rsi, 0), r16); 453 __ movq(Address(rsi, 8), r31); 454 455 UseAPX = save_apx; 456 #endif 457 #endif 458 __ bind(vector_save_restore); 459 // 460 // Check if OS has enabled XGETBV instruction to access XCR0 461 // (OSXSAVE feature flag) and CPU supports AVX 462 // 463 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 464 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 465 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 466 __ cmpl(rcx, 0x18000000); 467 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 468 469 __ movl(rax, 0x6); 470 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 471 __ cmpl(rax, 0x6); 472 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 473 474 // we need to bridge farther than imm8, so we use this island as a thunk 475 __ bind(done); 476 __ jmp(wrapup); 477 478 __ bind(start_simd_check); 479 // 480 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 481 // registers are not restored after a signal processing. 482 // Generate SEGV here (reference through null) 483 // and check upper YMM/ZMM bits after it. 484 // 485 int saved_useavx = UseAVX; 486 int saved_usesse = UseSSE; 487 488 // If UseAVX is uninitialized or is set by the user to include EVEX 489 if (use_evex) { 490 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 491 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 492 __ movl(rax, 0x10000); 493 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 494 __ cmpl(rax, 0x10000); 495 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 496 // check _cpuid_info.xem_xcr0_eax.bits.opmask 497 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 498 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 499 __ movl(rax, 0xE0); 500 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 501 __ cmpl(rax, 0xE0); 502 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 503 504 if (FLAG_IS_DEFAULT(UseAVX)) { 505 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 506 __ movl(rax, Address(rsi, 0)); 507 __ cmpl(rax, 0x50654); // If it is Skylake 508 __ jcc(Assembler::equal, legacy_setup); 509 } 510 // EVEX setup: run in lowest evex mode 511 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 512 UseAVX = 3; 513 UseSSE = 2; 514 #ifdef _WINDOWS 515 // xmm5-xmm15 are not preserved by caller on windows 516 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 517 __ subptr(rsp, 64); 518 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 519 __ subptr(rsp, 64); 520 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 521 __ subptr(rsp, 64); 522 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 523 #endif // _WINDOWS 524 525 // load value into all 64 bytes of zmm7 register 526 __ movl(rcx, VM_Version::ymm_test_value()); 527 __ movdl(xmm0, rcx); 528 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 529 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 530 #ifdef _LP64 531 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 532 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 533 #endif 534 VM_Version::clean_cpuFeatures(); 535 __ jmp(save_restore_except); 536 } 537 538 __ bind(legacy_setup); 539 // AVX setup 540 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 541 UseAVX = 1; 542 UseSSE = 2; 543 #ifdef _WINDOWS 544 __ subptr(rsp, 32); 545 __ vmovdqu(Address(rsp, 0), xmm7); 546 __ subptr(rsp, 32); 547 __ vmovdqu(Address(rsp, 0), xmm8); 548 __ subptr(rsp, 32); 549 __ vmovdqu(Address(rsp, 0), xmm15); 550 #endif // _WINDOWS 551 552 // load value into all 32 bytes of ymm7 register 553 __ movl(rcx, VM_Version::ymm_test_value()); 554 555 __ movdl(xmm0, rcx); 556 __ pshufd(xmm0, xmm0, 0x00); 557 __ vinsertf128_high(xmm0, xmm0); 558 __ vmovdqu(xmm7, xmm0); 559 #ifdef _LP64 560 __ vmovdqu(xmm8, xmm0); 561 __ vmovdqu(xmm15, xmm0); 562 #endif 563 VM_Version::clean_cpuFeatures(); 564 565 __ bind(save_restore_except); 566 __ xorl(rsi, rsi); 567 VM_Version::set_cpuinfo_segv_addr(__ pc()); 568 // Generate SEGV 569 __ movl(rax, Address(rsi, 0)); 570 571 VM_Version::set_cpuinfo_cont_addr(__ pc()); 572 // Returns here after signal. Save xmm0 to check it later. 573 574 // If UseAVX is uninitialized or is set by the user to include EVEX 575 if (use_evex) { 576 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 577 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 578 __ movl(rax, 0x10000); 579 __ andl(rax, Address(rsi, 4)); 580 __ cmpl(rax, 0x10000); 581 __ jcc(Assembler::notEqual, legacy_save_restore); 582 // check _cpuid_info.xem_xcr0_eax.bits.opmask 583 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 584 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 585 __ movl(rax, 0xE0); 586 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 587 __ cmpl(rax, 0xE0); 588 __ jcc(Assembler::notEqual, legacy_save_restore); 589 590 if (FLAG_IS_DEFAULT(UseAVX)) { 591 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 592 __ movl(rax, Address(rsi, 0)); 593 __ cmpl(rax, 0x50654); // If it is Skylake 594 __ jcc(Assembler::equal, legacy_save_restore); 595 } 596 // EVEX check: run in lowest evex mode 597 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 598 UseAVX = 3; 599 UseSSE = 2; 600 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 601 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 602 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 603 #ifdef _LP64 604 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 605 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 606 #endif 607 608 #ifdef _WINDOWS 609 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 610 __ addptr(rsp, 64); 611 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 612 __ addptr(rsp, 64); 613 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 614 __ addptr(rsp, 64); 615 #endif // _WINDOWS 616 generate_vzeroupper(wrapup); 617 VM_Version::clean_cpuFeatures(); 618 UseAVX = saved_useavx; 619 UseSSE = saved_usesse; 620 __ jmp(wrapup); 621 } 622 623 __ bind(legacy_save_restore); 624 // AVX check 625 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 626 UseAVX = 1; 627 UseSSE = 2; 628 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 629 __ vmovdqu(Address(rsi, 0), xmm0); 630 __ vmovdqu(Address(rsi, 32), xmm7); 631 #ifdef _LP64 632 __ vmovdqu(Address(rsi, 64), xmm8); 633 __ vmovdqu(Address(rsi, 96), xmm15); 634 #endif 635 636 #ifdef _WINDOWS 637 __ vmovdqu(xmm15, Address(rsp, 0)); 638 __ addptr(rsp, 32); 639 __ vmovdqu(xmm8, Address(rsp, 0)); 640 __ addptr(rsp, 32); 641 __ vmovdqu(xmm7, Address(rsp, 0)); 642 __ addptr(rsp, 32); 643 #endif // _WINDOWS 644 645 generate_vzeroupper(wrapup); 646 VM_Version::clean_cpuFeatures(); 647 UseAVX = saved_useavx; 648 UseSSE = saved_usesse; 649 650 __ bind(wrapup); 651 __ popf(); 652 __ pop(rsi); 653 __ pop(rbx); 654 __ pop(rbp); 655 __ ret(0); 656 657 # undef __ 658 659 return start; 660 }; 661 void generate_vzeroupper(Label& L_wrapup) { 662 # define __ _masm-> 663 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 664 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 665 __ jcc(Assembler::notEqual, L_wrapup); 666 __ movl(rcx, 0x0FFF0FF0); 667 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 668 __ andl(rcx, Address(rsi, 0)); 669 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 670 __ jcc(Assembler::equal, L_wrapup); 671 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 672 __ jcc(Assembler::equal, L_wrapup); 673 // vzeroupper() will use a pre-computed instruction sequence that we 674 // can't compute until after we've determined CPU capabilities. Use 675 // uncached variant here directly to be able to bootstrap correctly 676 __ vzeroupper_uncached(); 677 # undef __ 678 } 679 address generate_detect_virt() { 680 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 681 # define __ _masm-> 682 683 address start = __ pc(); 684 685 // Evacuate callee-saved registers 686 __ push(rbp); 687 __ push(rbx); 688 __ push(rsi); // for Windows 689 690 #ifdef _LP64 691 __ mov(rax, c_rarg0); // CPUID leaf 692 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 693 #else 694 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 695 __ movptr(rsi, Address(rsp, 20)); // register array address 696 #endif 697 698 __ cpuid(); 699 700 // Store result to register array 701 __ movl(Address(rsi, 0), rax); 702 __ movl(Address(rsi, 4), rbx); 703 __ movl(Address(rsi, 8), rcx); 704 __ movl(Address(rsi, 12), rdx); 705 706 // Epilogue 707 __ pop(rsi); 708 __ pop(rbx); 709 __ pop(rbp); 710 __ ret(0); 711 712 # undef __ 713 714 return start; 715 }; 716 717 718 address generate_getCPUIDBrandString(void) { 719 // Flags to test CPU type. 720 const uint32_t HS_EFL_AC = 0x40000; 721 const uint32_t HS_EFL_ID = 0x200000; 722 // Values for when we don't have a CPUID instruction. 723 const int CPU_FAMILY_SHIFT = 8; 724 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 725 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 726 727 Label detect_486, cpu486, detect_586, done, ext_cpuid; 728 729 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 730 # define __ _masm-> 731 732 address start = __ pc(); 733 734 // 735 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 736 // 737 // LP64: rcx and rdx are first and second argument registers on windows 738 739 __ push(rbp); 740 #ifdef _LP64 741 __ mov(rbp, c_rarg0); // cpuid_info address 742 #else 743 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 744 #endif 745 __ push(rbx); 746 __ push(rsi); 747 __ pushf(); // preserve rbx, and flags 748 __ pop(rax); 749 __ push(rax); 750 __ mov(rcx, rax); 751 // 752 // if we are unable to change the AC flag, we have a 386 753 // 754 __ xorl(rax, HS_EFL_AC); 755 __ push(rax); 756 __ popf(); 757 __ pushf(); 758 __ pop(rax); 759 __ cmpptr(rax, rcx); 760 __ jccb(Assembler::notEqual, detect_486); 761 762 __ movl(rax, CPU_FAMILY_386); 763 __ jmp(done); 764 765 // 766 // If we are unable to change the ID flag, we have a 486 which does 767 // not support the "cpuid" instruction. 768 // 769 __ bind(detect_486); 770 __ mov(rax, rcx); 771 __ xorl(rax, HS_EFL_ID); 772 __ push(rax); 773 __ popf(); 774 __ pushf(); 775 __ pop(rax); 776 __ cmpptr(rcx, rax); 777 __ jccb(Assembler::notEqual, detect_586); 778 779 __ bind(cpu486); 780 __ movl(rax, CPU_FAMILY_486); 781 __ jmp(done); 782 783 // 784 // At this point, we have a chip which supports the "cpuid" instruction 785 // 786 __ bind(detect_586); 787 __ xorl(rax, rax); 788 __ cpuid(); 789 __ orl(rax, rax); 790 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 791 // value of at least 1, we give up and 792 // assume a 486 793 794 // 795 // Extended cpuid(0x80000000) for processor brand string detection 796 // 797 __ bind(ext_cpuid); 798 __ movl(rax, CPUID_EXTENDED_FN); 799 __ cpuid(); 800 __ cmpl(rax, CPUID_EXTENDED_FN_4); 801 __ jcc(Assembler::below, done); 802 803 // 804 // Extended cpuid(0x80000002) // first 16 bytes in brand string 805 // 806 __ movl(rax, CPUID_EXTENDED_FN_2); 807 __ cpuid(); 808 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 809 __ movl(Address(rsi, 0), rax); 810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 811 __ movl(Address(rsi, 0), rbx); 812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 813 __ movl(Address(rsi, 0), rcx); 814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 815 __ movl(Address(rsi,0), rdx); 816 817 // 818 // Extended cpuid(0x80000003) // next 16 bytes in brand string 819 // 820 __ movl(rax, CPUID_EXTENDED_FN_3); 821 __ cpuid(); 822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 823 __ movl(Address(rsi, 0), rax); 824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 825 __ movl(Address(rsi, 0), rbx); 826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 827 __ movl(Address(rsi, 0), rcx); 828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 829 __ movl(Address(rsi,0), rdx); 830 831 // 832 // Extended cpuid(0x80000004) // last 16 bytes in brand string 833 // 834 __ movl(rax, CPUID_EXTENDED_FN_4); 835 __ cpuid(); 836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 837 __ movl(Address(rsi, 0), rax); 838 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 839 __ movl(Address(rsi, 0), rbx); 840 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 841 __ movl(Address(rsi, 0), rcx); 842 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 843 __ movl(Address(rsi,0), rdx); 844 845 // 846 // return 847 // 848 __ bind(done); 849 __ popf(); 850 __ pop(rsi); 851 __ pop(rbx); 852 __ pop(rbp); 853 __ ret(0); 854 855 # undef __ 856 857 return start; 858 }; 859 }; 860 861 void VM_Version::get_processor_features() { 862 863 _cpu = 4; // 486 by default 864 _model = 0; 865 _stepping = 0; 866 _features = 0; 867 _logical_processors_per_package = 1; 868 // i486 internal cache is both I&D and has a 16-byte line size 869 _L1_data_cache_line_size = 16; 870 871 // Get raw processor info 872 873 get_cpu_info_stub(&_cpuid_info); 874 875 assert_is_initialized(); 876 _cpu = extended_cpu_family(); 877 _model = extended_cpu_model(); 878 _stepping = cpu_stepping(); 879 880 if (cpu_family() > 4) { // it supports CPUID 881 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 882 _cpu_features = _features; // Preserve features 883 // Logical processors are only available on P4s and above, 884 // and only if hyperthreading is available. 885 _logical_processors_per_package = logical_processor_count(); 886 _L1_data_cache_line_size = L1_line_size(); 887 } 888 889 // xchg and xadd instructions 890 _supports_atomic_getset4 = true; 891 _supports_atomic_getadd4 = true; 892 LP64_ONLY(_supports_atomic_getset8 = true); 893 LP64_ONLY(_supports_atomic_getadd8 = true); 894 895 #ifdef _LP64 896 // OS should support SSE for x64 and hardware should support at least SSE2. 897 if (!VM_Version::supports_sse2()) { 898 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 899 } 900 // in 64 bit the use of SSE2 is the minimum 901 if (UseSSE < 2) UseSSE = 2; 902 #endif 903 904 #ifdef AMD64 905 // flush_icache_stub have to be generated first. 906 // That is why Icache line size is hard coded in ICache class, 907 // see icache_x86.hpp. It is also the reason why we can't use 908 // clflush instruction in 32-bit VM since it could be running 909 // on CPU which does not support it. 910 // 911 // The only thing we can do is to verify that flushed 912 // ICache::line_size has correct value. 913 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 914 // clflush_size is size in quadwords (8 bytes). 915 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 916 #endif 917 918 #ifdef _LP64 919 // assigning this field effectively enables Unsafe.writebackMemory() 920 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 921 // that is only implemented on x86_64 and only if the OS plays ball 922 if (os::supports_map_sync()) { 923 // publish data cache line flush size to generic field, otherwise 924 // let if default to zero thereby disabling writeback 925 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 926 } 927 #endif 928 929 // Check if processor has Intel Ecore 930 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 931 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 932 _model == 0xCC || _model == 0xDD)) { 933 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 934 } 935 936 if (UseSSE < 4) { 937 _features &= ~CPU_SSE4_1; 938 _features &= ~CPU_SSE4_2; 939 } 940 941 if (UseSSE < 3) { 942 _features &= ~CPU_SSE3; 943 _features &= ~CPU_SSSE3; 944 _features &= ~CPU_SSE4A; 945 } 946 947 if (UseSSE < 2) 948 _features &= ~CPU_SSE2; 949 950 if (UseSSE < 1) 951 _features &= ~CPU_SSE; 952 953 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 954 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 955 UseAVX = 0; 956 } 957 958 // UseSSE is set to the smaller of what hardware supports and what 959 // the command line requires. I.e., you cannot set UseSSE to 2 on 960 // older Pentiums which do not support it. 961 int use_sse_limit = 0; 962 if (UseSSE > 0) { 963 if (UseSSE > 3 && supports_sse4_1()) { 964 use_sse_limit = 4; 965 } else if (UseSSE > 2 && supports_sse3()) { 966 use_sse_limit = 3; 967 } else if (UseSSE > 1 && supports_sse2()) { 968 use_sse_limit = 2; 969 } else if (UseSSE > 0 && supports_sse()) { 970 use_sse_limit = 1; 971 } else { 972 use_sse_limit = 0; 973 } 974 } 975 if (FLAG_IS_DEFAULT(UseSSE)) { 976 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 977 } else if (UseSSE > use_sse_limit) { 978 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 979 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 980 } 981 982 // first try initial setting and detect what we can support 983 int use_avx_limit = 0; 984 if (UseAVX > 0) { 985 if (UseSSE < 4) { 986 // Don't use AVX if SSE is unavailable or has been disabled. 987 use_avx_limit = 0; 988 } else if (UseAVX > 2 && supports_evex()) { 989 use_avx_limit = 3; 990 } else if (UseAVX > 1 && supports_avx2()) { 991 use_avx_limit = 2; 992 } else if (UseAVX > 0 && supports_avx()) { 993 use_avx_limit = 1; 994 } else { 995 use_avx_limit = 0; 996 } 997 } 998 if (FLAG_IS_DEFAULT(UseAVX)) { 999 // Don't use AVX-512 on older Skylakes unless explicitly requested. 1000 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 1001 FLAG_SET_DEFAULT(UseAVX, 2); 1002 } else { 1003 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1004 } 1005 } 1006 1007 if (UseAVX > use_avx_limit) { 1008 if (UseSSE < 4) { 1009 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 1010 } else { 1011 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 1012 } 1013 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1014 } 1015 1016 if (UseAVX < 3) { 1017 _features &= ~CPU_AVX512F; 1018 _features &= ~CPU_AVX512DQ; 1019 _features &= ~CPU_AVX512CD; 1020 _features &= ~CPU_AVX512BW; 1021 _features &= ~CPU_AVX512VL; 1022 _features &= ~CPU_AVX512_VPOPCNTDQ; 1023 _features &= ~CPU_AVX512_VPCLMULQDQ; 1024 _features &= ~CPU_AVX512_VAES; 1025 _features &= ~CPU_AVX512_VNNI; 1026 _features &= ~CPU_AVX512_VBMI; 1027 _features &= ~CPU_AVX512_VBMI2; 1028 _features &= ~CPU_AVX512_BITALG; 1029 _features &= ~CPU_AVX512_IFMA; 1030 _features &= ~CPU_APX_F; 1031 _features &= ~CPU_AVX512_FP16; 1032 } 1033 1034 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1035 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1036 if (UseAPX && !apx_supported) { 1037 warning("UseAPX is not supported on this CPU, setting it to false"); 1038 FLAG_SET_DEFAULT(UseAPX, false); 1039 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1040 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1041 } 1042 1043 if (!UseAPX) { 1044 _features &= ~CPU_APX_F; 1045 } 1046 1047 if (UseAVX < 2) { 1048 _features &= ~CPU_AVX2; 1049 _features &= ~CPU_AVX_IFMA; 1050 } 1051 1052 if (UseAVX < 1) { 1053 _features &= ~CPU_AVX; 1054 _features &= ~CPU_VZEROUPPER; 1055 _features &= ~CPU_F16C; 1056 _features &= ~CPU_SHA512; 1057 } 1058 1059 if (logical_processors_per_package() == 1) { 1060 // HT processor could be installed on a system which doesn't support HT. 1061 _features &= ~CPU_HT; 1062 } 1063 1064 if (is_intel()) { // Intel cpus specific settings 1065 if (is_knights_family()) { 1066 _features &= ~CPU_VZEROUPPER; 1067 _features &= ~CPU_AVX512BW; 1068 _features &= ~CPU_AVX512VL; 1069 _features &= ~CPU_AVX512DQ; 1070 _features &= ~CPU_AVX512_VNNI; 1071 _features &= ~CPU_AVX512_VAES; 1072 _features &= ~CPU_AVX512_VPOPCNTDQ; 1073 _features &= ~CPU_AVX512_VPCLMULQDQ; 1074 _features &= ~CPU_AVX512_VBMI; 1075 _features &= ~CPU_AVX512_VBMI2; 1076 _features &= ~CPU_CLWB; 1077 _features &= ~CPU_FLUSHOPT; 1078 _features &= ~CPU_GFNI; 1079 _features &= ~CPU_AVX512_BITALG; 1080 _features &= ~CPU_AVX512_IFMA; 1081 _features &= ~CPU_AVX_IFMA; 1082 _features &= ~CPU_AVX512_FP16; 1083 } 1084 } 1085 1086 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1087 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1088 } else { 1089 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1090 } 1091 1092 assert(supports_cpuid(), "Always present"); 1093 assert(supports_clflush(), "Always present"); 1094 if (X86ICacheSync == -1) { 1095 // Auto-detect, choosing the best performant one that still flushes 1096 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. 1097 if (supports_clwb()) { 1098 FLAG_SET_ERGO(X86ICacheSync, 3); 1099 } else if (supports_clflushopt()) { 1100 FLAG_SET_ERGO(X86ICacheSync, 2); 1101 } else { 1102 FLAG_SET_ERGO(X86ICacheSync, 1); 1103 } 1104 } else { 1105 if ((X86ICacheSync == 2) && !supports_clflushopt()) { 1106 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); 1107 } 1108 if ((X86ICacheSync == 3) && !supports_clwb()) { 1109 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); 1110 } 1111 if ((X86ICacheSync == 5) && !supports_serialize()) { 1112 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); 1113 } 1114 } 1115 1116 char buf[1024]; 1117 int res = jio_snprintf( 1118 buf, sizeof(buf), 1119 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1120 cores_per_cpu(), threads_per_core(), 1121 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1122 assert(res > 0, "not enough temporary space allocated"); 1123 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1124 1125 _features_string = os::strdup(buf); 1126 1127 // Use AES instructions if available. 1128 if (supports_aes()) { 1129 if (FLAG_IS_DEFAULT(UseAES)) { 1130 FLAG_SET_DEFAULT(UseAES, true); 1131 } 1132 if (!UseAES) { 1133 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1134 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1135 } 1136 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1137 } else { 1138 if (UseSSE > 2) { 1139 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1140 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1141 } 1142 } else { 1143 // The AES intrinsic stubs require AES instruction support (of course) 1144 // but also require sse3 mode or higher for instructions it use. 1145 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1146 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1147 } 1148 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1149 } 1150 1151 // --AES-CTR begins-- 1152 if (!UseAESIntrinsics) { 1153 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1154 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1155 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1156 } 1157 } else { 1158 if (supports_sse4_1()) { 1159 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1160 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1161 } 1162 } else { 1163 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1164 // but also require sse4.1 mode or higher for instructions it use. 1165 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1166 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1167 } 1168 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1169 } 1170 } 1171 // --AES-CTR ends-- 1172 } 1173 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1174 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1175 warning("AES instructions are not available on this CPU"); 1176 FLAG_SET_DEFAULT(UseAES, false); 1177 } 1178 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1179 warning("AES intrinsics are not available on this CPU"); 1180 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1181 } 1182 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1183 warning("AES-CTR intrinsics are not available on this CPU"); 1184 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1185 } 1186 } 1187 1188 // Use CLMUL instructions if available. 1189 if (supports_clmul()) { 1190 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1191 UseCLMUL = true; 1192 } 1193 } else if (UseCLMUL) { 1194 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1195 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1196 FLAG_SET_DEFAULT(UseCLMUL, false); 1197 } 1198 1199 if (UseCLMUL && (UseSSE > 2)) { 1200 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1201 UseCRC32Intrinsics = true; 1202 } 1203 } else if (UseCRC32Intrinsics) { 1204 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1205 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1206 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1207 } 1208 1209 #ifdef _LP64 1210 if (supports_avx2()) { 1211 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1212 UseAdler32Intrinsics = true; 1213 } 1214 } else if (UseAdler32Intrinsics) { 1215 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1216 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1217 } 1218 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1219 } 1220 #else 1221 if (UseAdler32Intrinsics) { 1222 warning("Adler32Intrinsics not available on this CPU."); 1223 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1224 } 1225 #endif 1226 1227 if (supports_sse4_2() && supports_clmul()) { 1228 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1229 UseCRC32CIntrinsics = true; 1230 } 1231 } else if (UseCRC32CIntrinsics) { 1232 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1233 warning("CRC32C intrinsics are not available on this CPU"); 1234 } 1235 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1236 } 1237 1238 // GHASH/GCM intrinsics 1239 if (UseCLMUL && (UseSSE > 2)) { 1240 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1241 UseGHASHIntrinsics = true; 1242 } 1243 } else if (UseGHASHIntrinsics) { 1244 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1245 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1246 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1247 } 1248 1249 #ifdef _LP64 1250 // ChaCha20 Intrinsics 1251 // As long as the system supports AVX as a baseline we can do a 1252 // SIMD-enabled block function. StubGenerator makes the determination 1253 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1254 // version. 1255 if (UseAVX >= 1) { 1256 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1257 UseChaCha20Intrinsics = true; 1258 } 1259 } else if (UseChaCha20Intrinsics) { 1260 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1261 warning("ChaCha20 intrinsic requires AVX instructions"); 1262 } 1263 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1264 } 1265 #else 1266 // No support currently for ChaCha20 intrinsics on 32-bit platforms 1267 if (UseChaCha20Intrinsics) { 1268 warning("ChaCha20 intrinsics are not available on this CPU."); 1269 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1270 } 1271 #endif // _LP64 1272 1273 // Dilithium Intrinsics 1274 // Currently we only have them for AVX512 1275 #ifdef _LP64 1276 if (supports_evex() && supports_avx512bw()) { 1277 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { 1278 UseDilithiumIntrinsics = true; 1279 } 1280 } else 1281 #endif 1282 if (UseDilithiumIntrinsics) { 1283 warning("Intrinsics for ML-DSA are not available on this CPU."); 1284 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); 1285 } 1286 1287 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1288 if (UseAVX >= 2) { 1289 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1290 UseBASE64Intrinsics = true; 1291 } 1292 } else if (UseBASE64Intrinsics) { 1293 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1294 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1295 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1296 } 1297 1298 if (supports_fma()) { 1299 if (FLAG_IS_DEFAULT(UseFMA)) { 1300 UseFMA = true; 1301 } 1302 } else if (UseFMA) { 1303 warning("FMA instructions are not available on this CPU"); 1304 FLAG_SET_DEFAULT(UseFMA, false); 1305 } 1306 1307 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1308 UseMD5Intrinsics = true; 1309 } 1310 1311 if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { 1312 if (FLAG_IS_DEFAULT(UseSHA)) { 1313 UseSHA = true; 1314 } 1315 } else if (UseSHA) { 1316 warning("SHA instructions are not available on this CPU"); 1317 FLAG_SET_DEFAULT(UseSHA, false); 1318 } 1319 1320 if (supports_sha() && supports_sse4_1() && UseSHA) { 1321 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1322 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1323 } 1324 } else if (UseSHA1Intrinsics) { 1325 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1326 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1327 } 1328 1329 if (supports_sse4_1() && UseSHA) { 1330 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1331 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1332 } 1333 } else if (UseSHA256Intrinsics) { 1334 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1335 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1336 } 1337 1338 #ifdef _LP64 1339 // These are only supported on 64-bit 1340 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1341 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1342 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1343 } 1344 } else 1345 #endif 1346 if (UseSHA512Intrinsics) { 1347 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1348 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1349 } 1350 1351 #ifdef _LP64 1352 if (supports_evex() && supports_avx512bw()) { 1353 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1354 UseSHA3Intrinsics = true; 1355 } 1356 } else 1357 #endif 1358 if (UseSHA3Intrinsics) { 1359 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1360 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1361 } 1362 1363 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1364 FLAG_SET_DEFAULT(UseSHA, false); 1365 } 1366 1367 #if COMPILER2_OR_JVMCI 1368 int max_vector_size = 0; 1369 if (UseAVX == 0 || !os_supports_avx_vectors()) { 1370 // 16 byte vectors (in XMM) are supported with SSE2+ 1371 max_vector_size = 16; 1372 } else if (UseAVX == 1 || UseAVX == 2) { 1373 // 32 bytes vectors (in YMM) are only supported with AVX+ 1374 max_vector_size = 32; 1375 } else if (UseAVX > 2) { 1376 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1377 max_vector_size = 64; 1378 } 1379 1380 #ifdef _LP64 1381 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1382 #else 1383 int min_vector_size = 0; 1384 #endif 1385 1386 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1387 if (MaxVectorSize < min_vector_size) { 1388 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1389 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1390 } 1391 if (MaxVectorSize > max_vector_size) { 1392 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1393 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1394 } 1395 if (!is_power_of_2(MaxVectorSize)) { 1396 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1397 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1398 } 1399 } else { 1400 // If default, use highest supported configuration 1401 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1402 } 1403 1404 #if defined(COMPILER2) && defined(ASSERT) 1405 if (MaxVectorSize > 0) { 1406 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1407 tty->print_cr("State of YMM registers after signal handle:"); 1408 int nreg = 2 LP64_ONLY(+2); 1409 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1410 for (int i = 0; i < nreg; i++) { 1411 tty->print("YMM%s:", ymm_name[i]); 1412 for (int j = 7; j >=0; j--) { 1413 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1414 } 1415 tty->cr(); 1416 } 1417 } 1418 } 1419 #endif // COMPILER2 && ASSERT 1420 1421 #ifdef _LP64 1422 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1423 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1424 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1425 } 1426 } else 1427 #endif 1428 if (UsePoly1305Intrinsics) { 1429 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1430 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1431 } 1432 1433 #ifdef _LP64 1434 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1435 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1436 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1437 } 1438 } else 1439 #endif 1440 if (UseIntPolyIntrinsics) { 1441 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1442 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1443 } 1444 1445 #ifdef _LP64 1446 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1447 UseMultiplyToLenIntrinsic = true; 1448 } 1449 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1450 UseSquareToLenIntrinsic = true; 1451 } 1452 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1453 UseMulAddIntrinsic = true; 1454 } 1455 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1456 UseMontgomeryMultiplyIntrinsic = true; 1457 } 1458 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1459 UseMontgomerySquareIntrinsic = true; 1460 } 1461 #else 1462 if (UseMultiplyToLenIntrinsic) { 1463 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1464 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1465 } 1466 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1467 } 1468 if (UseMontgomeryMultiplyIntrinsic) { 1469 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1470 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1471 } 1472 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1473 } 1474 if (UseMontgomerySquareIntrinsic) { 1475 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1476 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1477 } 1478 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1479 } 1480 if (UseSquareToLenIntrinsic) { 1481 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1482 warning("squareToLen intrinsic is not available in 32-bit VM"); 1483 } 1484 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1485 } 1486 if (UseMulAddIntrinsic) { 1487 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1488 warning("mulAdd intrinsic is not available in 32-bit VM"); 1489 } 1490 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1491 } 1492 #endif // _LP64 1493 #endif // COMPILER2_OR_JVMCI 1494 1495 // On new cpus instructions which update whole XMM register should be used 1496 // to prevent partial register stall due to dependencies on high half. 1497 // 1498 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1499 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1500 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1501 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1502 1503 1504 if (is_zx()) { // ZX cpus specific settings 1505 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1506 UseStoreImmI16 = false; // don't use it on ZX cpus 1507 } 1508 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1509 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1510 // Use it on all ZX cpus 1511 UseAddressNop = true; 1512 } 1513 } 1514 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1515 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1516 } 1517 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1518 if (supports_sse3()) { 1519 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1520 } else { 1521 UseXmmRegToRegMoveAll = false; 1522 } 1523 } 1524 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1525 #ifdef COMPILER2 1526 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1527 // For new ZX cpus do the next optimization: 1528 // don't align the beginning of a loop if there are enough instructions 1529 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1530 // in current fetch line (OptoLoopAlignment) or the padding 1531 // is big (> MaxLoopPad). 1532 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1533 // generated NOP instructions. 11 is the largest size of one 1534 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1535 MaxLoopPad = 11; 1536 } 1537 #endif // COMPILER2 1538 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1539 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1540 } 1541 if (supports_sse4_2()) { // new ZX cpus 1542 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1543 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1544 } 1545 } 1546 if (supports_sse4_2()) { 1547 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1548 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1549 } 1550 } else { 1551 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1552 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1553 } 1554 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1555 } 1556 } 1557 1558 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1559 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1560 } 1561 } 1562 1563 if (is_amd_family()) { // AMD cpus specific settings 1564 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1565 // Use it on new AMD cpus starting from Opteron. 1566 UseAddressNop = true; 1567 } 1568 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1569 // Use it on new AMD cpus starting from Opteron. 1570 UseNewLongLShift = true; 1571 } 1572 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1573 if (supports_sse4a()) { 1574 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1575 } else { 1576 UseXmmLoadAndClearUpper = false; 1577 } 1578 } 1579 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1580 if (supports_sse4a()) { 1581 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1582 } else { 1583 UseXmmRegToRegMoveAll = false; 1584 } 1585 } 1586 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1587 if (supports_sse4a()) { 1588 UseXmmI2F = true; 1589 } else { 1590 UseXmmI2F = false; 1591 } 1592 } 1593 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1594 if (supports_sse4a()) { 1595 UseXmmI2D = true; 1596 } else { 1597 UseXmmI2D = false; 1598 } 1599 } 1600 if (supports_sse4_2()) { 1601 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1602 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1603 } 1604 } else { 1605 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1606 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1607 } 1608 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1609 } 1610 1611 // some defaults for AMD family 15h 1612 if (cpu_family() == 0x15) { 1613 // On family 15h processors default is no sw prefetch 1614 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1615 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1616 } 1617 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1618 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1619 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1620 } 1621 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1622 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1623 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1624 } 1625 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1626 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1627 } 1628 } 1629 1630 #ifdef COMPILER2 1631 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1632 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1633 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1634 } 1635 #endif // COMPILER2 1636 1637 // Some defaults for AMD family >= 17h && Hygon family 18h 1638 if (cpu_family() >= 0x17) { 1639 // On family >=17h processors use XMM and UnalignedLoadStores 1640 // for Array Copy 1641 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1642 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1643 } 1644 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1645 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1646 } 1647 #ifdef COMPILER2 1648 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1649 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1650 } 1651 #endif 1652 } 1653 } 1654 1655 if (is_intel()) { // Intel cpus specific settings 1656 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1657 UseStoreImmI16 = false; // don't use it on Intel cpus 1658 } 1659 if (cpu_family() == 6 || cpu_family() == 15) { 1660 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1661 // Use it on all Intel cpus starting from PentiumPro 1662 UseAddressNop = true; 1663 } 1664 } 1665 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1666 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1667 } 1668 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1669 if (supports_sse3()) { 1670 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1671 } else { 1672 UseXmmRegToRegMoveAll = false; 1673 } 1674 } 1675 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1676 #ifdef COMPILER2 1677 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1678 // For new Intel cpus do the next optimization: 1679 // don't align the beginning of a loop if there are enough instructions 1680 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1681 // in current fetch line (OptoLoopAlignment) or the padding 1682 // is big (> MaxLoopPad). 1683 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1684 // generated NOP instructions. 11 is the largest size of one 1685 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1686 MaxLoopPad = 11; 1687 } 1688 #endif // COMPILER2 1689 1690 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1691 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1692 } 1693 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1694 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1695 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1696 } 1697 } 1698 if (supports_sse4_2()) { 1699 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1700 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1701 } 1702 } else { 1703 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1704 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1705 } 1706 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1707 } 1708 } 1709 if (is_atom_family() || is_knights_family()) { 1710 #ifdef COMPILER2 1711 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1712 OptoScheduling = true; 1713 } 1714 #endif 1715 if (supports_sse4_2()) { // Silvermont 1716 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1717 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1718 } 1719 } 1720 if (FLAG_IS_DEFAULT(UseIncDec)) { 1721 FLAG_SET_DEFAULT(UseIncDec, false); 1722 } 1723 } 1724 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1725 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1726 } 1727 #ifdef COMPILER2 1728 if (UseAVX > 2) { 1729 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1730 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1731 ArrayOperationPartialInlineSize != 0 && 1732 ArrayOperationPartialInlineSize != 16 && 1733 ArrayOperationPartialInlineSize != 32 && 1734 ArrayOperationPartialInlineSize != 64)) { 1735 int inline_size = 0; 1736 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1737 inline_size = 64; 1738 } else if (MaxVectorSize >= 32) { 1739 inline_size = 32; 1740 } else if (MaxVectorSize >= 16) { 1741 inline_size = 16; 1742 } 1743 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1744 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1745 } 1746 ArrayOperationPartialInlineSize = inline_size; 1747 } 1748 1749 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1750 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1751 if (ArrayOperationPartialInlineSize) { 1752 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1753 } else { 1754 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1755 } 1756 } 1757 } 1758 #endif 1759 } 1760 1761 #ifdef COMPILER2 1762 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1763 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1764 OptimizeFill = false; 1765 } 1766 } 1767 #endif 1768 1769 #ifdef _LP64 1770 if (UseSSE42Intrinsics) { 1771 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1772 UseVectorizedMismatchIntrinsic = true; 1773 } 1774 } else if (UseVectorizedMismatchIntrinsic) { 1775 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1776 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1777 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1778 } 1779 if (UseAVX >= 2) { 1780 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1781 } else if (UseVectorizedHashCodeIntrinsic) { 1782 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1783 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1784 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1785 } 1786 #else 1787 if (UseVectorizedMismatchIntrinsic) { 1788 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1789 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1790 } 1791 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1792 } 1793 if (UseVectorizedHashCodeIntrinsic) { 1794 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1795 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1796 } 1797 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1798 } 1799 #endif // _LP64 1800 1801 // Use count leading zeros count instruction if available. 1802 if (supports_lzcnt()) { 1803 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1804 UseCountLeadingZerosInstruction = true; 1805 } 1806 } else if (UseCountLeadingZerosInstruction) { 1807 warning("lzcnt instruction is not available on this CPU"); 1808 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1809 } 1810 1811 // Use count trailing zeros instruction if available 1812 if (supports_bmi1()) { 1813 // tzcnt does not require VEX prefix 1814 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1815 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1816 // Don't use tzcnt if BMI1 is switched off on command line. 1817 UseCountTrailingZerosInstruction = false; 1818 } else { 1819 UseCountTrailingZerosInstruction = true; 1820 } 1821 } 1822 } else if (UseCountTrailingZerosInstruction) { 1823 warning("tzcnt instruction is not available on this CPU"); 1824 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1825 } 1826 1827 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1828 // VEX prefix is generated only when AVX > 0. 1829 if (supports_bmi1() && supports_avx()) { 1830 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1831 UseBMI1Instructions = true; 1832 } 1833 } else if (UseBMI1Instructions) { 1834 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1835 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1836 } 1837 1838 if (supports_bmi2() && supports_avx()) { 1839 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1840 UseBMI2Instructions = true; 1841 } 1842 } else if (UseBMI2Instructions) { 1843 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1844 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1845 } 1846 1847 // Use population count instruction if available. 1848 if (supports_popcnt()) { 1849 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1850 UsePopCountInstruction = true; 1851 } 1852 } else if (UsePopCountInstruction) { 1853 warning("POPCNT instruction is not available on this CPU"); 1854 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1855 } 1856 1857 // Use fast-string operations if available. 1858 if (supports_erms()) { 1859 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1860 UseFastStosb = true; 1861 } 1862 } else if (UseFastStosb) { 1863 warning("fast-string operations are not available on this CPU"); 1864 FLAG_SET_DEFAULT(UseFastStosb, false); 1865 } 1866 1867 // For AMD Processors use XMM/YMM MOVDQU instructions 1868 // for Object Initialization as default 1869 if (is_amd() && cpu_family() >= 0x19) { 1870 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1871 UseFastStosb = false; 1872 } 1873 } 1874 1875 #ifdef COMPILER2 1876 if (is_intel() && MaxVectorSize > 16) { 1877 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1878 UseFastStosb = false; 1879 } 1880 } 1881 #endif 1882 1883 // Use XMM/YMM MOVDQU instruction for Object Initialization 1884 if (UseUnalignedLoadStores) { 1885 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1886 UseXMMForObjInit = true; 1887 } 1888 } else if (UseXMMForObjInit) { 1889 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1890 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1891 } 1892 1893 #ifdef COMPILER2 1894 if (FLAG_IS_DEFAULT(AlignVector)) { 1895 // Modern processors allow misaligned memory operations for vectors. 1896 AlignVector = !UseUnalignedLoadStores; 1897 } 1898 #endif // COMPILER2 1899 1900 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1901 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1902 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1903 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1904 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1905 } 1906 } 1907 1908 // Allocation prefetch settings 1909 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1910 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1911 (cache_line_size > AllocatePrefetchStepSize)) { 1912 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1913 } 1914 1915 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1916 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1917 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1918 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1919 } 1920 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1921 } 1922 1923 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1924 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1925 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1926 } 1927 1928 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1929 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1930 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1931 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1932 } 1933 #ifdef COMPILER2 1934 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1935 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1936 } 1937 #endif 1938 } 1939 1940 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1941 #ifdef COMPILER2 1942 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1943 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1944 } 1945 #endif 1946 } 1947 1948 #ifdef _LP64 1949 // Prefetch settings 1950 1951 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1952 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1953 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1954 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1955 1956 // gc copy/scan is disabled if prefetchw isn't supported, because 1957 // Prefetch::write emits an inlined prefetchw on Linux. 1958 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1959 // The used prefetcht0 instruction works for both amd64 and em64t. 1960 1961 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1962 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1963 } 1964 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1965 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1966 } 1967 #endif 1968 1969 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1970 (cache_line_size > ContendedPaddingWidth)) 1971 ContendedPaddingWidth = cache_line_size; 1972 1973 // This machine allows unaligned memory accesses 1974 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1975 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1976 } 1977 1978 #ifndef PRODUCT 1979 if (log_is_enabled(Info, os, cpu)) { 1980 LogStream ls(Log(os, cpu)::info()); 1981 outputStream* log = &ls; 1982 log->print_cr("Logical CPUs per core: %u", 1983 logical_processors_per_package()); 1984 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1985 log->print("UseSSE=%d", UseSSE); 1986 if (UseAVX > 0) { 1987 log->print(" UseAVX=%d", UseAVX); 1988 } 1989 if (UseAES) { 1990 log->print(" UseAES=1"); 1991 } 1992 #ifdef COMPILER2 1993 if (MaxVectorSize > 0) { 1994 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1995 } 1996 #endif 1997 log->cr(); 1998 log->print("Allocation"); 1999 if (AllocatePrefetchStyle <= 0) { 2000 log->print_cr(": no prefetching"); 2001 } else { 2002 log->print(" prefetching: "); 2003 if (AllocatePrefetchInstr == 0) { 2004 log->print("PREFETCHNTA"); 2005 } else if (AllocatePrefetchInstr == 1) { 2006 log->print("PREFETCHT0"); 2007 } else if (AllocatePrefetchInstr == 2) { 2008 log->print("PREFETCHT2"); 2009 } else if (AllocatePrefetchInstr == 3) { 2010 log->print("PREFETCHW"); 2011 } 2012 if (AllocatePrefetchLines > 1) { 2013 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 2014 } else { 2015 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 2016 } 2017 } 2018 2019 if (PrefetchCopyIntervalInBytes > 0) { 2020 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 2021 } 2022 if (PrefetchScanIntervalInBytes > 0) { 2023 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 2024 } 2025 if (ContendedPaddingWidth > 0) { 2026 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 2027 } 2028 } 2029 #endif // !PRODUCT 2030 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 2031 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 2032 } 2033 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 2034 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 2035 } 2036 } 2037 2038 void VM_Version::print_platform_virtualization_info(outputStream* st) { 2039 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 2040 if (vrt == XenHVM) { 2041 st->print_cr("Xen hardware-assisted virtualization detected"); 2042 } else if (vrt == KVM) { 2043 st->print_cr("KVM virtualization detected"); 2044 } else if (vrt == VMWare) { 2045 st->print_cr("VMWare virtualization detected"); 2046 VirtualizationSupport::print_virtualization_info(st); 2047 } else if (vrt == HyperV) { 2048 st->print_cr("Hyper-V virtualization detected"); 2049 } else if (vrt == HyperVRole) { 2050 st->print_cr("Hyper-V role detected"); 2051 } 2052 } 2053 2054 bool VM_Version::compute_has_intel_jcc_erratum() { 2055 if (!is_intel_family_core()) { 2056 // Only Intel CPUs are affected. 2057 return false; 2058 } 2059 // The following table of affected CPUs is based on the following document released by Intel: 2060 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 2061 switch (_model) { 2062 case 0x8E: 2063 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2064 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 2065 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 2066 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 2067 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 2068 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2069 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2070 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 2071 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2072 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 2073 case 0x4E: 2074 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 2075 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 2076 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 2077 return _stepping == 0x3; 2078 case 0x55: 2079 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 2080 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 2081 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2082 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2083 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2084 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2085 return _stepping == 0x4 || _stepping == 0x7; 2086 case 0x5E: 2087 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2088 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2089 return _stepping == 0x3; 2090 case 0x9E: 2091 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2092 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2093 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2094 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2095 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2096 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2097 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2098 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2099 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2100 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2101 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2102 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2103 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2104 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2105 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2106 case 0xA5: 2107 // Not in Intel documentation. 2108 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2109 return true; 2110 case 0xA6: 2111 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2112 return _stepping == 0x0; 2113 case 0xAE: 2114 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2115 return _stepping == 0xA; 2116 default: 2117 // If we are running on another intel machine not recognized in the table, we are okay. 2118 return false; 2119 } 2120 } 2121 2122 // On Xen, the cpuid instruction returns 2123 // eax / registers[0]: Version of Xen 2124 // ebx / registers[1]: chars 'XenV' 2125 // ecx / registers[2]: chars 'MMXe' 2126 // edx / registers[3]: chars 'nVMM' 2127 // 2128 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2129 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2130 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2131 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2132 // 2133 // more information : 2134 // https://kb.vmware.com/s/article/1009458 2135 // 2136 void VM_Version::check_virtualizations() { 2137 uint32_t registers[4] = {0}; 2138 char signature[13] = {0}; 2139 2140 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2141 // from 0x40000000 until 0x40010000. 2142 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2143 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2144 detect_virt_stub(leaf, registers); 2145 memcpy(signature, ®isters[1], 12); 2146 2147 if (strncmp("VMwareVMware", signature, 12) == 0) { 2148 Abstract_VM_Version::_detected_virtualization = VMWare; 2149 // check for extended metrics from guestlib 2150 VirtualizationSupport::initialize(); 2151 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2152 Abstract_VM_Version::_detected_virtualization = HyperV; 2153 #ifdef _WINDOWS 2154 // CPUID leaf 0x40000007 is available to the root partition only. 2155 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2156 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2157 detect_virt_stub(0x40000007, registers); 2158 if ((registers[0] != 0x0) || 2159 (registers[1] != 0x0) || 2160 (registers[2] != 0x0) || 2161 (registers[3] != 0x0)) { 2162 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2163 } 2164 #endif 2165 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2166 Abstract_VM_Version::_detected_virtualization = KVM; 2167 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2168 Abstract_VM_Version::_detected_virtualization = XenHVM; 2169 } 2170 } 2171 } 2172 2173 #ifdef COMPILER2 2174 // Determine if it's running on Cascade Lake using default options. 2175 bool VM_Version::is_default_intel_cascade_lake() { 2176 return FLAG_IS_DEFAULT(UseAVX) && 2177 FLAG_IS_DEFAULT(MaxVectorSize) && 2178 UseAVX > 2 && 2179 is_intel_cascade_lake(); 2180 } 2181 #endif 2182 2183 bool VM_Version::is_intel_cascade_lake() { 2184 return is_intel_skylake() && _stepping >= 5; 2185 } 2186 2187 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2188 // for implementing the array copy and clear operations. 2189 // The Intel platforms that supports the serialize instruction 2190 // has improved implementation of 64-byte load/stores and so the default 2191 // threshold is set to 0 for these platforms. 2192 int VM_Version::avx3_threshold() { 2193 return (is_intel_family_core() && 2194 supports_serialize() && 2195 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2196 } 2197 2198 #if defined(_LP64) 2199 void VM_Version::clear_apx_test_state() { 2200 clear_apx_test_state_stub(); 2201 } 2202 #endif 2203 2204 static bool _vm_version_initialized = false; 2205 2206 void VM_Version::initialize() { 2207 ResourceMark rm; 2208 // Making this stub must be FIRST use of assembler 2209 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2210 if (stub_blob == nullptr) { 2211 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2212 } 2213 CodeBuffer c(stub_blob); 2214 VM_Version_StubGenerator g(&c); 2215 2216 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2217 g.generate_get_cpu_info()); 2218 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2219 g.generate_detect_virt()); 2220 2221 #if defined(_LP64) 2222 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2223 g.clear_apx_test_state()); 2224 #endif 2225 get_processor_features(); 2226 2227 LP64_ONLY(Assembler::precompute_instructions();) 2228 2229 if (VM_Version::supports_hv()) { // Supports hypervisor 2230 check_virtualizations(); 2231 } 2232 _vm_version_initialized = true; 2233 } 2234 2235 typedef enum { 2236 CPU_FAMILY_8086_8088 = 0, 2237 CPU_FAMILY_INTEL_286 = 2, 2238 CPU_FAMILY_INTEL_386 = 3, 2239 CPU_FAMILY_INTEL_486 = 4, 2240 CPU_FAMILY_PENTIUM = 5, 2241 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2242 CPU_FAMILY_PENTIUM_4 = 0xF 2243 } FamilyFlag; 2244 2245 typedef enum { 2246 RDTSCP_FLAG = 0x08000000, // bit 27 2247 INTEL64_FLAG = 0x20000000 // bit 29 2248 } _featureExtendedEdxFlag; 2249 2250 typedef enum { 2251 FPU_FLAG = 0x00000001, 2252 VME_FLAG = 0x00000002, 2253 DE_FLAG = 0x00000004, 2254 PSE_FLAG = 0x00000008, 2255 TSC_FLAG = 0x00000010, 2256 MSR_FLAG = 0x00000020, 2257 PAE_FLAG = 0x00000040, 2258 MCE_FLAG = 0x00000080, 2259 CX8_FLAG = 0x00000100, 2260 APIC_FLAG = 0x00000200, 2261 SEP_FLAG = 0x00000800, 2262 MTRR_FLAG = 0x00001000, 2263 PGE_FLAG = 0x00002000, 2264 MCA_FLAG = 0x00004000, 2265 CMOV_FLAG = 0x00008000, 2266 PAT_FLAG = 0x00010000, 2267 PSE36_FLAG = 0x00020000, 2268 PSNUM_FLAG = 0x00040000, 2269 CLFLUSH_FLAG = 0x00080000, 2270 DTS_FLAG = 0x00200000, 2271 ACPI_FLAG = 0x00400000, 2272 MMX_FLAG = 0x00800000, 2273 FXSR_FLAG = 0x01000000, 2274 SSE_FLAG = 0x02000000, 2275 SSE2_FLAG = 0x04000000, 2276 SS_FLAG = 0x08000000, 2277 HTT_FLAG = 0x10000000, 2278 TM_FLAG = 0x20000000 2279 } FeatureEdxFlag; 2280 2281 static BufferBlob* cpuid_brand_string_stub_blob; 2282 static const int cpuid_brand_string_stub_size = 550; 2283 2284 extern "C" { 2285 typedef void (*getCPUIDBrandString_stub_t)(void*); 2286 } 2287 2288 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2289 2290 // VM_Version statics 2291 enum { 2292 ExtendedFamilyIdLength_INTEL = 16, 2293 ExtendedFamilyIdLength_AMD = 24 2294 }; 2295 2296 const size_t VENDOR_LENGTH = 13; 2297 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2298 static char* _cpu_brand_string = nullptr; 2299 static int64_t _max_qualified_cpu_frequency = 0; 2300 2301 static int _no_of_threads = 0; 2302 static int _no_of_cores = 0; 2303 2304 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2305 "8086/8088", 2306 "", 2307 "286", 2308 "386", 2309 "486", 2310 "Pentium", 2311 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2312 "", 2313 "", 2314 "", 2315 "", 2316 "", 2317 "", 2318 "", 2319 "", 2320 "Pentium 4" 2321 }; 2322 2323 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2324 "", 2325 "", 2326 "", 2327 "", 2328 "5x86", 2329 "K5/K6", 2330 "Athlon/AthlonXP", 2331 "", 2332 "", 2333 "", 2334 "", 2335 "", 2336 "", 2337 "", 2338 "", 2339 "Opteron/Athlon64", 2340 "Opteron QC/Phenom", // Barcelona et.al. 2341 "", 2342 "", 2343 "", 2344 "", 2345 "", 2346 "", 2347 "Zen" 2348 }; 2349 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2350 // September 2013, Vol 3C Table 35-1 2351 const char* const _model_id_pentium_pro[] = { 2352 "", 2353 "Pentium Pro", 2354 "", 2355 "Pentium II model 3", 2356 "", 2357 "Pentium II model 5/Xeon/Celeron", 2358 "Celeron", 2359 "Pentium III/Pentium III Xeon", 2360 "Pentium III/Pentium III Xeon", 2361 "Pentium M model 9", // Yonah 2362 "Pentium III, model A", 2363 "Pentium III, model B", 2364 "", 2365 "Pentium M model D", // Dothan 2366 "", 2367 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2368 "", 2369 "", 2370 "", 2371 "", 2372 "", 2373 "", 2374 "Celeron", // 0x16 Celeron 65nm 2375 "Core 2", // 0x17 Penryn / Harpertown 2376 "", 2377 "", 2378 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2379 "Atom", // 0x1B Z5xx series Silverthorn 2380 "", 2381 "Core 2", // 0x1D Dunnington (6-core) 2382 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2383 "", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "", 2389 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2390 "", 2391 "", 2392 "", // 0x28 2393 "", 2394 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2395 "", 2396 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2397 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2398 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2399 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2400 "", 2401 "", 2402 "", 2403 "", 2404 "", 2405 "", 2406 "", 2407 "", 2408 "", 2409 "", 2410 "Ivy Bridge", // 0x3a 2411 "", 2412 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2413 "", // 0x3d "Next Generation Intel Core Processor" 2414 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2415 "", // 0x3f "Future Generation Intel Xeon Processor" 2416 "", 2417 "", 2418 "", 2419 "", 2420 "", 2421 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2422 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2423 nullptr 2424 }; 2425 2426 /* Brand ID is for back compatibility 2427 * Newer CPUs uses the extended brand string */ 2428 const char* const _brand_id[] = { 2429 "", 2430 "Celeron processor", 2431 "Pentium III processor", 2432 "Intel Pentium III Xeon processor", 2433 "", 2434 "", 2435 "", 2436 "", 2437 "Intel Pentium 4 processor", 2438 nullptr 2439 }; 2440 2441 2442 const char* const _feature_edx_id[] = { 2443 "On-Chip FPU", 2444 "Virtual Mode Extensions", 2445 "Debugging Extensions", 2446 "Page Size Extensions", 2447 "Time Stamp Counter", 2448 "Model Specific Registers", 2449 "Physical Address Extension", 2450 "Machine Check Exceptions", 2451 "CMPXCHG8B Instruction", 2452 "On-Chip APIC", 2453 "", 2454 "Fast System Call", 2455 "Memory Type Range Registers", 2456 "Page Global Enable", 2457 "Machine Check Architecture", 2458 "Conditional Mov Instruction", 2459 "Page Attribute Table", 2460 "36-bit Page Size Extension", 2461 "Processor Serial Number", 2462 "CLFLUSH Instruction", 2463 "", 2464 "Debug Trace Store feature", 2465 "ACPI registers in MSR space", 2466 "Intel Architecture MMX Technology", 2467 "Fast Float Point Save and Restore", 2468 "Streaming SIMD extensions", 2469 "Streaming SIMD extensions 2", 2470 "Self-Snoop", 2471 "Hyper Threading", 2472 "Thermal Monitor", 2473 "", 2474 "Pending Break Enable" 2475 }; 2476 2477 const char* const _feature_extended_edx_id[] = { 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "", 2484 "", 2485 "", 2486 "", 2487 "", 2488 "", 2489 "SYSCALL/SYSRET", 2490 "", 2491 "", 2492 "", 2493 "", 2494 "", 2495 "", 2496 "", 2497 "", 2498 "Execute Disable Bit", 2499 "", 2500 "", 2501 "", 2502 "", 2503 "", 2504 "", 2505 "RDTSCP", 2506 "", 2507 "Intel 64 Architecture", 2508 "", 2509 "" 2510 }; 2511 2512 const char* const _feature_ecx_id[] = { 2513 "Streaming SIMD Extensions 3", 2514 "PCLMULQDQ", 2515 "64-bit DS Area", 2516 "MONITOR/MWAIT instructions", 2517 "CPL Qualified Debug Store", 2518 "Virtual Machine Extensions", 2519 "Safer Mode Extensions", 2520 "Enhanced Intel SpeedStep technology", 2521 "Thermal Monitor 2", 2522 "Supplemental Streaming SIMD Extensions 3", 2523 "L1 Context ID", 2524 "", 2525 "Fused Multiply-Add", 2526 "CMPXCHG16B", 2527 "xTPR Update Control", 2528 "Perfmon and Debug Capability", 2529 "", 2530 "Process-context identifiers", 2531 "Direct Cache Access", 2532 "Streaming SIMD extensions 4.1", 2533 "Streaming SIMD extensions 4.2", 2534 "x2APIC", 2535 "MOVBE", 2536 "Popcount instruction", 2537 "TSC-Deadline", 2538 "AESNI", 2539 "XSAVE", 2540 "OSXSAVE", 2541 "AVX", 2542 "F16C", 2543 "RDRAND", 2544 "" 2545 }; 2546 2547 const char* const _feature_extended_ecx_id[] = { 2548 "LAHF/SAHF instruction support", 2549 "Core multi-processor legacy mode", 2550 "", 2551 "", 2552 "", 2553 "Advanced Bit Manipulations: LZCNT", 2554 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2555 "Misaligned SSE mode", 2556 "", 2557 "", 2558 "", 2559 "", 2560 "", 2561 "", 2562 "", 2563 "", 2564 "", 2565 "", 2566 "", 2567 "", 2568 "", 2569 "", 2570 "", 2571 "", 2572 "", 2573 "", 2574 "", 2575 "", 2576 "", 2577 "", 2578 "", 2579 "" 2580 }; 2581 2582 void VM_Version::initialize_tsc(void) { 2583 ResourceMark rm; 2584 2585 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2586 if (cpuid_brand_string_stub_blob == nullptr) { 2587 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2588 } 2589 CodeBuffer c(cpuid_brand_string_stub_blob); 2590 VM_Version_StubGenerator g(&c); 2591 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2592 g.generate_getCPUIDBrandString()); 2593 } 2594 2595 const char* VM_Version::cpu_model_description(void) { 2596 uint32_t cpu_family = extended_cpu_family(); 2597 uint32_t cpu_model = extended_cpu_model(); 2598 const char* model = nullptr; 2599 2600 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2601 for (uint32_t i = 0; i <= cpu_model; i++) { 2602 model = _model_id_pentium_pro[i]; 2603 if (model == nullptr) { 2604 break; 2605 } 2606 } 2607 } 2608 return model; 2609 } 2610 2611 const char* VM_Version::cpu_brand_string(void) { 2612 if (_cpu_brand_string == nullptr) { 2613 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2614 if (nullptr == _cpu_brand_string) { 2615 return nullptr; 2616 } 2617 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2618 if (ret_val != OS_OK) { 2619 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2620 _cpu_brand_string = nullptr; 2621 } 2622 } 2623 return _cpu_brand_string; 2624 } 2625 2626 const char* VM_Version::cpu_brand(void) { 2627 const char* brand = nullptr; 2628 2629 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2630 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2631 brand = _brand_id[0]; 2632 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2633 brand = _brand_id[i]; 2634 } 2635 } 2636 return brand; 2637 } 2638 2639 bool VM_Version::cpu_is_em64t(void) { 2640 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2641 } 2642 2643 bool VM_Version::is_netburst(void) { 2644 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2645 } 2646 2647 bool VM_Version::supports_tscinv_ext(void) { 2648 if (!supports_tscinv_bit()) { 2649 return false; 2650 } 2651 2652 if (is_intel()) { 2653 return true; 2654 } 2655 2656 if (is_amd()) { 2657 return !is_amd_Barcelona(); 2658 } 2659 2660 if (is_hygon()) { 2661 return true; 2662 } 2663 2664 return false; 2665 } 2666 2667 void VM_Version::resolve_cpu_information_details(void) { 2668 2669 // in future we want to base this information on proper cpu 2670 // and cache topology enumeration such as: 2671 // Intel 64 Architecture Processor Topology Enumeration 2672 // which supports system cpu and cache topology enumeration 2673 // either using 2xAPICIDs or initial APICIDs 2674 2675 // currently only rough cpu information estimates 2676 // which will not necessarily reflect the exact configuration of the system 2677 2678 // this is the number of logical hardware threads 2679 // visible to the operating system 2680 _no_of_threads = os::processor_count(); 2681 2682 // find out number of threads per cpu package 2683 int threads_per_package = threads_per_core() * cores_per_cpu(); 2684 2685 // use amount of threads visible to the process in order to guess number of sockets 2686 _no_of_sockets = _no_of_threads / threads_per_package; 2687 2688 // process might only see a subset of the total number of threads 2689 // from a single processor package. Virtualization/resource management for example. 2690 // If so then just write a hard 1 as num of pkgs. 2691 if (0 == _no_of_sockets) { 2692 _no_of_sockets = 1; 2693 } 2694 2695 // estimate the number of cores 2696 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2697 } 2698 2699 2700 const char* VM_Version::cpu_family_description(void) { 2701 int cpu_family_id = extended_cpu_family(); 2702 if (is_amd()) { 2703 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2704 return _family_id_amd[cpu_family_id]; 2705 } 2706 } 2707 if (is_intel()) { 2708 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2709 return cpu_model_description(); 2710 } 2711 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2712 return _family_id_intel[cpu_family_id]; 2713 } 2714 } 2715 if (is_hygon()) { 2716 return "Dhyana"; 2717 } 2718 return "Unknown x86"; 2719 } 2720 2721 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2722 assert(buf != nullptr, "buffer is null!"); 2723 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2724 2725 const char* cpu_type = nullptr; 2726 const char* x64 = nullptr; 2727 2728 if (is_intel()) { 2729 cpu_type = "Intel"; 2730 x64 = cpu_is_em64t() ? " Intel64" : ""; 2731 } else if (is_amd()) { 2732 cpu_type = "AMD"; 2733 x64 = cpu_is_em64t() ? " AMD64" : ""; 2734 } else if (is_hygon()) { 2735 cpu_type = "Hygon"; 2736 x64 = cpu_is_em64t() ? " AMD64" : ""; 2737 } else { 2738 cpu_type = "Unknown x86"; 2739 x64 = cpu_is_em64t() ? " x86_64" : ""; 2740 } 2741 2742 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2743 cpu_type, 2744 cpu_family_description(), 2745 supports_ht() ? " (HT)" : "", 2746 supports_sse3() ? " SSE3" : "", 2747 supports_ssse3() ? " SSSE3" : "", 2748 supports_sse4_1() ? " SSE4.1" : "", 2749 supports_sse4_2() ? " SSE4.2" : "", 2750 supports_sse4a() ? " SSE4A" : "", 2751 is_netburst() ? " Netburst" : "", 2752 is_intel_family_core() ? " Core" : "", 2753 x64); 2754 2755 return OS_OK; 2756 } 2757 2758 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2759 assert(buf != nullptr, "buffer is null!"); 2760 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2761 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2762 2763 // invoke newly generated asm code to fetch CPU Brand String 2764 getCPUIDBrandString_stub(&_cpuid_info); 2765 2766 // fetch results into buffer 2767 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2768 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2769 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2770 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2771 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2772 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2773 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2774 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2775 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2776 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2777 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2778 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2779 2780 return OS_OK; 2781 } 2782 2783 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2784 guarantee(buf != nullptr, "buffer is null!"); 2785 guarantee(buf_len > 0, "buffer len not enough!"); 2786 2787 unsigned int flag = 0; 2788 unsigned int fi = 0; 2789 size_t written = 0; 2790 const char* prefix = ""; 2791 2792 #define WRITE_TO_BUF(string) \ 2793 { \ 2794 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2795 if (res < 0) { \ 2796 return buf_len - 1; \ 2797 } \ 2798 written += res; \ 2799 if (prefix[0] == '\0') { \ 2800 prefix = ", "; \ 2801 } \ 2802 } 2803 2804 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2805 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2806 continue; /* no hyperthreading */ 2807 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2808 continue; /* no fast system call */ 2809 } 2810 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2811 WRITE_TO_BUF(_feature_edx_id[fi]); 2812 } 2813 } 2814 2815 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2816 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2817 WRITE_TO_BUF(_feature_ecx_id[fi]); 2818 } 2819 } 2820 2821 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2822 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2823 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2824 } 2825 } 2826 2827 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2828 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2829 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2830 } 2831 } 2832 2833 if (supports_tscinv_bit()) { 2834 WRITE_TO_BUF("Invariant TSC"); 2835 } 2836 2837 return written; 2838 } 2839 2840 /** 2841 * Write a detailed description of the cpu to a given buffer, including 2842 * feature set. 2843 */ 2844 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2845 assert(buf != nullptr, "buffer is null!"); 2846 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2847 2848 static const char* unknown = "<unknown>"; 2849 char vendor_id[VENDOR_LENGTH]; 2850 const char* family = nullptr; 2851 const char* model = nullptr; 2852 const char* brand = nullptr; 2853 int outputLen = 0; 2854 2855 family = cpu_family_description(); 2856 if (family == nullptr) { 2857 family = unknown; 2858 } 2859 2860 model = cpu_model_description(); 2861 if (model == nullptr) { 2862 model = unknown; 2863 } 2864 2865 brand = cpu_brand_string(); 2866 2867 if (brand == nullptr) { 2868 brand = cpu_brand(); 2869 if (brand == nullptr) { 2870 brand = unknown; 2871 } 2872 } 2873 2874 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2875 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2876 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2877 vendor_id[VENDOR_LENGTH-1] = '\0'; 2878 2879 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2880 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2881 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2882 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2883 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2884 "Supports: ", 2885 brand, 2886 vendor_id, 2887 family, 2888 extended_cpu_family(), 2889 model, 2890 extended_cpu_model(), 2891 cpu_stepping(), 2892 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2893 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2894 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2895 _cpuid_info.std_cpuid1_eax.value, 2896 _cpuid_info.std_cpuid1_ebx.value, 2897 _cpuid_info.std_cpuid1_ecx.value, 2898 _cpuid_info.std_cpuid1_edx.value, 2899 _cpuid_info.ext_cpuid1_eax, 2900 _cpuid_info.ext_cpuid1_ebx, 2901 _cpuid_info.ext_cpuid1_ecx, 2902 _cpuid_info.ext_cpuid1_edx); 2903 2904 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2905 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2906 return OS_ERR; 2907 } 2908 2909 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2910 2911 return OS_OK; 2912 } 2913 2914 2915 // Fill in Abstract_VM_Version statics 2916 void VM_Version::initialize_cpu_information() { 2917 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2918 assert(!_initialized, "shouldn't be initialized yet"); 2919 resolve_cpu_information_details(); 2920 2921 // initialize cpu_name and cpu_desc 2922 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2923 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2924 _initialized = true; 2925 } 2926 2927 /** 2928 * For information about extracting the frequency from the cpu brand string, please see: 2929 * 2930 * Intel Processor Identification and the CPUID Instruction 2931 * Application Note 485 2932 * May 2012 2933 * 2934 * The return value is the frequency in Hz. 2935 */ 2936 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2937 const char* const brand_string = cpu_brand_string(); 2938 if (brand_string == nullptr) { 2939 return 0; 2940 } 2941 const int64_t MEGA = 1000000; 2942 int64_t multiplier = 0; 2943 int64_t frequency = 0; 2944 uint8_t idx = 0; 2945 // The brand string buffer is at most 48 bytes. 2946 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2947 for (; idx < 48-2; ++idx) { 2948 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2949 // Search brand string for "yHz" where y is M, G, or T. 2950 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2951 if (brand_string[idx] == 'M') { 2952 multiplier = MEGA; 2953 } else if (brand_string[idx] == 'G') { 2954 multiplier = MEGA * 1000; 2955 } else if (brand_string[idx] == 'T') { 2956 multiplier = MEGA * MEGA; 2957 } 2958 break; 2959 } 2960 } 2961 if (multiplier > 0) { 2962 // Compute frequency (in Hz) from brand string. 2963 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2964 frequency = (brand_string[idx-4] - '0') * multiplier; 2965 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2966 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2967 } else { // format is "xxxx" 2968 frequency = (brand_string[idx-4] - '0') * 1000; 2969 frequency += (brand_string[idx-3] - '0') * 100; 2970 frequency += (brand_string[idx-2] - '0') * 10; 2971 frequency += (brand_string[idx-1] - '0'); 2972 frequency *= multiplier; 2973 } 2974 } 2975 return frequency; 2976 } 2977 2978 2979 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2980 if (_max_qualified_cpu_frequency == 0) { 2981 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2982 } 2983 return _max_qualified_cpu_frequency; 2984 } 2985 2986 uint64_t VM_Version::CpuidInfo::feature_flags() const { 2987 uint64_t result = 0; 2988 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2989 result |= CPU_CX8; 2990 if (std_cpuid1_edx.bits.cmov != 0) 2991 result |= CPU_CMOV; 2992 if (std_cpuid1_edx.bits.clflush != 0) 2993 result |= CPU_FLUSH; 2994 #ifdef _LP64 2995 // clflush should always be available on x86_64 2996 // if not we are in real trouble because we rely on it 2997 // to flush the code cache. 2998 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2999 #endif 3000 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 3001 ext_cpuid1_edx.bits.fxsr != 0)) 3002 result |= CPU_FXSR; 3003 // HT flag is set for multi-core processors also. 3004 if (threads_per_core() > 1) 3005 result |= CPU_HT; 3006 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 3007 ext_cpuid1_edx.bits.mmx != 0)) 3008 result |= CPU_MMX; 3009 if (std_cpuid1_edx.bits.sse != 0) 3010 result |= CPU_SSE; 3011 if (std_cpuid1_edx.bits.sse2 != 0) 3012 result |= CPU_SSE2; 3013 if (std_cpuid1_ecx.bits.sse3 != 0) 3014 result |= CPU_SSE3; 3015 if (std_cpuid1_ecx.bits.ssse3 != 0) 3016 result |= CPU_SSSE3; 3017 if (std_cpuid1_ecx.bits.sse4_1 != 0) 3018 result |= CPU_SSE4_1; 3019 if (std_cpuid1_ecx.bits.sse4_2 != 0) 3020 result |= CPU_SSE4_2; 3021 if (std_cpuid1_ecx.bits.popcnt != 0) 3022 result |= CPU_POPCNT; 3023 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 3024 xem_xcr0_eax.bits.apx_f != 0) { 3025 result |= CPU_APX_F; 3026 } 3027 if (std_cpuid1_ecx.bits.avx != 0 && 3028 std_cpuid1_ecx.bits.osxsave != 0 && 3029 xem_xcr0_eax.bits.sse != 0 && 3030 xem_xcr0_eax.bits.ymm != 0) { 3031 result |= CPU_AVX; 3032 result |= CPU_VZEROUPPER; 3033 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 3034 result |= CPU_SHA512; 3035 if (std_cpuid1_ecx.bits.f16c != 0) 3036 result |= CPU_F16C; 3037 if (sef_cpuid7_ebx.bits.avx2 != 0) { 3038 result |= CPU_AVX2; 3039 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 3040 result |= CPU_AVX_IFMA; 3041 } 3042 if (sef_cpuid7_ecx.bits.gfni != 0) 3043 result |= CPU_GFNI; 3044 if (sef_cpuid7_ebx.bits.avx512f != 0 && 3045 xem_xcr0_eax.bits.opmask != 0 && 3046 xem_xcr0_eax.bits.zmm512 != 0 && 3047 xem_xcr0_eax.bits.zmm32 != 0) { 3048 result |= CPU_AVX512F; 3049 if (sef_cpuid7_ebx.bits.avx512cd != 0) 3050 result |= CPU_AVX512CD; 3051 if (sef_cpuid7_ebx.bits.avx512dq != 0) 3052 result |= CPU_AVX512DQ; 3053 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 3054 result |= CPU_AVX512_IFMA; 3055 if (sef_cpuid7_ebx.bits.avx512pf != 0) 3056 result |= CPU_AVX512PF; 3057 if (sef_cpuid7_ebx.bits.avx512er != 0) 3058 result |= CPU_AVX512ER; 3059 if (sef_cpuid7_ebx.bits.avx512bw != 0) 3060 result |= CPU_AVX512BW; 3061 if (sef_cpuid7_ebx.bits.avx512vl != 0) 3062 result |= CPU_AVX512VL; 3063 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 3064 result |= CPU_AVX512_VPOPCNTDQ; 3065 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 3066 result |= CPU_AVX512_VPCLMULQDQ; 3067 if (sef_cpuid7_ecx.bits.vaes != 0) 3068 result |= CPU_AVX512_VAES; 3069 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 3070 result |= CPU_AVX512_VNNI; 3071 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 3072 result |= CPU_AVX512_BITALG; 3073 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 3074 result |= CPU_AVX512_VBMI; 3075 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 3076 result |= CPU_AVX512_VBMI2; 3077 } 3078 } 3079 if (std_cpuid1_ecx.bits.hv != 0) 3080 result |= CPU_HV; 3081 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3082 result |= CPU_BMI1; 3083 if (std_cpuid1_edx.bits.tsc != 0) 3084 result |= CPU_TSC; 3085 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3086 result |= CPU_TSCINV_BIT; 3087 if (std_cpuid1_ecx.bits.aes != 0) 3088 result |= CPU_AES; 3089 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3090 result |= CPU_LZCNT; 3091 if (ext_cpuid1_ecx.bits.prefetchw != 0) 3092 result |= CPU_3DNOW_PREFETCH; 3093 if (sef_cpuid7_ebx.bits.erms != 0) 3094 result |= CPU_ERMS; 3095 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3096 result |= CPU_FSRM; 3097 if (std_cpuid1_ecx.bits.clmul != 0) 3098 result |= CPU_CLMUL; 3099 if (sef_cpuid7_ebx.bits.rtm != 0) 3100 result |= CPU_RTM; 3101 if (sef_cpuid7_ebx.bits.adx != 0) 3102 result |= CPU_ADX; 3103 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3104 result |= CPU_BMI2; 3105 if (sef_cpuid7_ebx.bits.sha != 0) 3106 result |= CPU_SHA; 3107 if (std_cpuid1_ecx.bits.fma != 0) 3108 result |= CPU_FMA; 3109 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3110 result |= CPU_FLUSHOPT; 3111 if (sef_cpuid7_ebx.bits.clwb != 0) 3112 result |= CPU_CLWB; 3113 if (ext_cpuid1_edx.bits.rdtscp != 0) 3114 result |= CPU_RDTSCP; 3115 if (sef_cpuid7_ecx.bits.rdpid != 0) 3116 result |= CPU_RDPID; 3117 3118 // AMD|Hygon additional features. 3119 if (is_amd_family()) { 3120 // PREFETCHW was checked above, check TDNOW here. 3121 if ((ext_cpuid1_edx.bits.tdnow != 0)) 3122 result |= CPU_3DNOW_PREFETCH; 3123 if (ext_cpuid1_ecx.bits.sse4a != 0) 3124 result |= CPU_SSE4A; 3125 } 3126 3127 // Intel additional features. 3128 if (is_intel()) { 3129 if (sef_cpuid7_edx.bits.serialize != 0) 3130 result |= CPU_SERIALIZE; 3131 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3132 result |= CPU_AVX512_FP16; 3133 } 3134 3135 // ZX additional features. 3136 if (is_zx()) { 3137 // We do not know if these are supported by ZX, so we cannot trust 3138 // common CPUID bit for them. 3139 assert((result & CPU_CLWB) == 0, "Check if it is supported?"); 3140 result &= ~CPU_CLWB; 3141 } 3142 3143 // Protection key features. 3144 if (sef_cpuid7_ecx.bits.pku != 0) { 3145 result |= CPU_PKU; 3146 } 3147 if (sef_cpuid7_ecx.bits.ospke != 0) { 3148 result |= CPU_OSPKE; 3149 } 3150 3151 // Control flow enforcement (CET) features. 3152 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3153 result |= CPU_CET_SS; 3154 } 3155 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3156 result |= CPU_CET_IBT; 3157 } 3158 3159 // Composite features. 3160 if (supports_tscinv_bit() && 3161 ((is_amd_family() && !is_amd_Barcelona()) || 3162 is_intel_tsc_synched_at_init())) { 3163 result |= CPU_TSCINV; 3164 } 3165 3166 return result; 3167 } 3168 3169 bool VM_Version::os_supports_avx_vectors() { 3170 bool retVal = false; 3171 int nreg = 2 LP64_ONLY(+2); 3172 if (supports_evex()) { 3173 // Verify that OS save/restore all bits of EVEX registers 3174 // during signal processing. 3175 retVal = true; 3176 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3177 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3178 retVal = false; 3179 break; 3180 } 3181 } 3182 } else if (supports_avx()) { 3183 // Verify that OS save/restore all bits of AVX registers 3184 // during signal processing. 3185 retVal = true; 3186 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3187 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3188 retVal = false; 3189 break; 3190 } 3191 } 3192 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3193 if (retVal == false) { 3194 // Verify that OS save/restore all bits of EVEX registers 3195 // during signal processing. 3196 retVal = true; 3197 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3198 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3199 retVal = false; 3200 break; 3201 } 3202 } 3203 } 3204 } 3205 return retVal; 3206 } 3207 3208 bool VM_Version::os_supports_apx_egprs() { 3209 if (!supports_apx_f()) { 3210 return false; 3211 } 3212 // Enable APX support for product builds after 3213 // completion of planned features listed in JDK-8329030. 3214 #if !defined(PRODUCT) 3215 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3216 _cpuid_info.apx_save[1] != egpr_test_value()) { 3217 return false; 3218 } 3219 return true; 3220 #else 3221 return false; 3222 #endif 3223 } 3224 3225 uint VM_Version::cores_per_cpu() { 3226 uint result = 1; 3227 if (is_intel()) { 3228 bool supports_topology = supports_processor_topology(); 3229 if (supports_topology) { 3230 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3231 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3232 } 3233 if (!supports_topology || result == 0) { 3234 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3235 } 3236 } else if (is_amd_family()) { 3237 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3238 } else if (is_zx()) { 3239 bool supports_topology = supports_processor_topology(); 3240 if (supports_topology) { 3241 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3242 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3243 } 3244 if (!supports_topology || result == 0) { 3245 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3246 } 3247 } 3248 return result; 3249 } 3250 3251 uint VM_Version::threads_per_core() { 3252 uint result = 1; 3253 if (is_intel() && supports_processor_topology()) { 3254 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3255 } else if (is_zx() && supports_processor_topology()) { 3256 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3257 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3258 if (cpu_family() >= 0x17) { 3259 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3260 } else { 3261 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3262 cores_per_cpu(); 3263 } 3264 } 3265 return (result == 0 ? 1 : result); 3266 } 3267 3268 uint VM_Version::L1_line_size() { 3269 uint result = 0; 3270 if (is_intel()) { 3271 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3272 } else if (is_amd_family()) { 3273 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3274 } else if (is_zx()) { 3275 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3276 } 3277 if (result < 32) // not defined ? 3278 result = 32; // 32 bytes by default on x86 and other x64 3279 return result; 3280 } 3281 3282 bool VM_Version::is_intel_tsc_synched_at_init() { 3283 if (is_intel_family_core()) { 3284 uint32_t ext_model = extended_cpu_model(); 3285 if (ext_model == CPU_MODEL_NEHALEM_EP || 3286 ext_model == CPU_MODEL_WESTMERE_EP || 3287 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3288 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3289 // <= 2-socket invariant tsc support. EX versions are usually used 3290 // in > 2-socket systems and likely don't synchronize tscs at 3291 // initialization. 3292 // Code that uses tsc values must be prepared for them to arbitrarily 3293 // jump forward or backward. 3294 return true; 3295 } 3296 } 3297 return false; 3298 } 3299 3300 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3301 // Hardware prefetching (distance/size in bytes): 3302 // Pentium 3 - 64 / 32 3303 // Pentium 4 - 256 / 128 3304 // Athlon - 64 / 32 ???? 3305 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3306 // Core - 128 / 64 3307 // 3308 // Software prefetching (distance in bytes / instruction with best score): 3309 // Pentium 3 - 128 / prefetchnta 3310 // Pentium 4 - 512 / prefetchnta 3311 // Athlon - 128 / prefetchnta 3312 // Opteron - 256 / prefetchnta 3313 // Core - 256 / prefetchnta 3314 // It will be used only when AllocatePrefetchStyle > 0 3315 3316 if (is_amd_family()) { // AMD | Hygon 3317 if (supports_sse2()) { 3318 return 256; // Opteron 3319 } else { 3320 return 128; // Athlon 3321 } 3322 } else { // Intel 3323 if (supports_sse3() && cpu_family() == 6) { 3324 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3325 return 192; 3326 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3327 #ifdef _LP64 3328 return 384; 3329 #else 3330 return 320; 3331 #endif 3332 } 3333 } 3334 if (supports_sse2()) { 3335 if (cpu_family() == 6) { 3336 return 256; // Pentium M, Core, Core2 3337 } else { 3338 return 512; // Pentium 4 3339 } 3340 } else { 3341 return 128; // Pentium 3 (and all other old CPUs) 3342 } 3343 } 3344 } 3345 3346 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3347 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3348 switch (id) { 3349 case vmIntrinsics::_floatToFloat16: 3350 case vmIntrinsics::_float16ToFloat: 3351 if (!supports_float16()) { 3352 return false; 3353 } 3354 break; 3355 default: 3356 break; 3357 } 3358 return true; 3359 }