1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/checkedCast.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = nullptr; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = nullptr; 59 // Address of instruction which causes APX specific SEGV 60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 61 // Address of instruction after the one which causes APX specific SEGV 62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 63 64 static BufferBlob* stub_blob; 65 static const int stub_size = 2000; 66 67 extern "C" { 68 typedef void (*get_cpu_info_stub_t)(void*); 69 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 70 typedef void (*clear_apx_test_state_t)(void); 71 } 72 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 73 static detect_virt_stub_t detect_virt_stub = nullptr; 74 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 75 76 #ifdef _LP64 77 78 bool VM_Version::supports_clflush() { 79 // clflush should always be available on x86_64 80 // if not we are in real trouble because we rely on it 81 // to flush the code cache. 82 // Unfortunately, Assembler::clflush is currently called as part 83 // of generation of the code cache flush routine. This happens 84 // under Universe::init before the processor features are set 85 // up. Assembler::flush calls this routine to check that clflush 86 // is allowed. So, we give the caller a free pass if Universe init 87 // is still in progress. 88 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 89 return true; 90 } 91 #endif 92 93 #define CPUID_STANDARD_FN 0x0 94 #define CPUID_STANDARD_FN_1 0x1 95 #define CPUID_STANDARD_FN_4 0x4 96 #define CPUID_STANDARD_FN_B 0xb 97 98 #define CPUID_EXTENDED_FN 0x80000000 99 #define CPUID_EXTENDED_FN_1 0x80000001 100 #define CPUID_EXTENDED_FN_2 0x80000002 101 #define CPUID_EXTENDED_FN_3 0x80000003 102 #define CPUID_EXTENDED_FN_4 0x80000004 103 #define CPUID_EXTENDED_FN_7 0x80000007 104 #define CPUID_EXTENDED_FN_8 0x80000008 105 106 class VM_Version_StubGenerator: public StubCodeGenerator { 107 public: 108 109 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 110 111 #if defined(_LP64) 112 address clear_apx_test_state() { 113 # define __ _masm-> 114 address start = __ pc(); 115 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 116 // handling guarantees that preserved register values post signal handling were 117 // re-instantiated by operating system and not because they were not modified externally. 118 119 bool save_apx = UseAPX; 120 VM_Version::set_apx_cpuFeatures(); 121 UseAPX = true; 122 // EGPR state save/restoration. 123 __ mov64(r16, 0L); 124 __ mov64(r31, 0L); 125 UseAPX = save_apx; 126 VM_Version::clean_cpuFeatures(); 127 __ ret(0); 128 return start; 129 } 130 #endif 131 132 address generate_get_cpu_info() { 133 // Flags to test CPU type. 134 const uint32_t HS_EFL_AC = 0x40000; 135 const uint32_t HS_EFL_ID = 0x200000; 136 // Values for when we don't have a CPUID instruction. 137 const int CPU_FAMILY_SHIFT = 8; 138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 141 142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 146 147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 148 # define __ _masm-> 149 150 address start = __ pc(); 151 152 // 153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 154 // 155 // LP64: rcx and rdx are first and second argument registers on windows 156 157 __ push(rbp); 158 #ifdef _LP64 159 __ mov(rbp, c_rarg0); // cpuid_info address 160 #else 161 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 162 #endif 163 __ push(rbx); 164 __ push(rsi); 165 __ pushf(); // preserve rbx, and flags 166 __ pop(rax); 167 __ push(rax); 168 __ mov(rcx, rax); 169 // 170 // if we are unable to change the AC flag, we have a 386 171 // 172 __ xorl(rax, HS_EFL_AC); 173 __ push(rax); 174 __ popf(); 175 __ pushf(); 176 __ pop(rax); 177 __ cmpptr(rax, rcx); 178 __ jccb(Assembler::notEqual, detect_486); 179 180 __ movl(rax, CPU_FAMILY_386); 181 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 182 __ jmp(done); 183 184 // 185 // If we are unable to change the ID flag, we have a 486 which does 186 // not support the "cpuid" instruction. 187 // 188 __ bind(detect_486); 189 __ mov(rax, rcx); 190 __ xorl(rax, HS_EFL_ID); 191 __ push(rax); 192 __ popf(); 193 __ pushf(); 194 __ pop(rax); 195 __ cmpptr(rcx, rax); 196 __ jccb(Assembler::notEqual, detect_586); 197 198 __ bind(cpu486); 199 __ movl(rax, CPU_FAMILY_486); 200 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 201 __ jmp(done); 202 203 // 204 // At this point, we have a chip which supports the "cpuid" instruction 205 // 206 __ bind(detect_586); 207 __ xorl(rax, rax); 208 __ cpuid(); 209 __ orl(rax, rax); 210 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 211 // value of at least 1, we give up and 212 // assume a 486 213 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 214 __ movl(Address(rsi, 0), rax); 215 __ movl(Address(rsi, 4), rbx); 216 __ movl(Address(rsi, 8), rcx); 217 __ movl(Address(rsi,12), rdx); 218 219 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 220 __ jccb(Assembler::belowEqual, std_cpuid4); 221 222 // 223 // cpuid(0xB) Processor Topology 224 // 225 __ movl(rax, 0xb); 226 __ xorl(rcx, rcx); // Threads level 227 __ cpuid(); 228 229 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 230 __ movl(Address(rsi, 0), rax); 231 __ movl(Address(rsi, 4), rbx); 232 __ movl(Address(rsi, 8), rcx); 233 __ movl(Address(rsi,12), rdx); 234 235 __ movl(rax, 0xb); 236 __ movl(rcx, 1); // Cores level 237 __ cpuid(); 238 __ push(rax); 239 __ andl(rax, 0x1f); // Determine if valid topology level 240 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 241 __ andl(rax, 0xffff); 242 __ pop(rax); 243 __ jccb(Assembler::equal, std_cpuid4); 244 245 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 246 __ movl(Address(rsi, 0), rax); 247 __ movl(Address(rsi, 4), rbx); 248 __ movl(Address(rsi, 8), rcx); 249 __ movl(Address(rsi,12), rdx); 250 251 __ movl(rax, 0xb); 252 __ movl(rcx, 2); // Packages level 253 __ cpuid(); 254 __ push(rax); 255 __ andl(rax, 0x1f); // Determine if valid topology level 256 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 257 __ andl(rax, 0xffff); 258 __ pop(rax); 259 __ jccb(Assembler::equal, std_cpuid4); 260 261 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 262 __ movl(Address(rsi, 0), rax); 263 __ movl(Address(rsi, 4), rbx); 264 __ movl(Address(rsi, 8), rcx); 265 __ movl(Address(rsi,12), rdx); 266 267 // 268 // cpuid(0x4) Deterministic cache params 269 // 270 __ bind(std_cpuid4); 271 __ movl(rax, 4); 272 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 273 __ jccb(Assembler::greater, std_cpuid1); 274 275 __ xorl(rcx, rcx); // L1 cache 276 __ cpuid(); 277 __ push(rax); 278 __ andl(rax, 0x1f); // Determine if valid cache parameters used 279 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 280 __ pop(rax); 281 __ jccb(Assembler::equal, std_cpuid1); 282 283 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 284 __ movl(Address(rsi, 0), rax); 285 __ movl(Address(rsi, 4), rbx); 286 __ movl(Address(rsi, 8), rcx); 287 __ movl(Address(rsi,12), rdx); 288 289 // 290 // Standard cpuid(0x1) 291 // 292 __ bind(std_cpuid1); 293 __ movl(rax, 1); 294 __ cpuid(); 295 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 296 __ movl(Address(rsi, 0), rax); 297 __ movl(Address(rsi, 4), rbx); 298 __ movl(Address(rsi, 8), rcx); 299 __ movl(Address(rsi,12), rdx); 300 301 // 302 // Check if OS has enabled XGETBV instruction to access XCR0 303 // (OSXSAVE feature flag) and CPU supports AVX 304 // 305 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 306 __ cmpl(rcx, 0x18000000); 307 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 308 309 // 310 // XCR0, XFEATURE_ENABLED_MASK register 311 // 312 __ xorl(rcx, rcx); // zero for XCR0 register 313 __ xgetbv(); 314 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 315 __ movl(Address(rsi, 0), rax); 316 __ movl(Address(rsi, 4), rdx); 317 318 // 319 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 320 // 321 __ bind(sef_cpuid); 322 __ movl(rax, 7); 323 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 324 __ jccb(Assembler::greater, ext_cpuid); 325 // ECX = 0 326 __ xorl(rcx, rcx); 327 __ cpuid(); 328 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 329 __ movl(Address(rsi, 0), rax); 330 __ movl(Address(rsi, 4), rbx); 331 __ movl(Address(rsi, 8), rcx); 332 __ movl(Address(rsi, 12), rdx); 333 334 // 335 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 336 // 337 __ bind(sefsl1_cpuid); 338 __ movl(rax, 7); 339 __ movl(rcx, 1); 340 __ cpuid(); 341 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 342 __ movl(Address(rsi, 0), rax); 343 __ movl(Address(rsi, 4), rdx); 344 345 // 346 // Extended cpuid(0x80000000) 347 // 348 __ bind(ext_cpuid); 349 __ movl(rax, 0x80000000); 350 __ cpuid(); 351 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 352 __ jcc(Assembler::belowEqual, done); 353 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 354 __ jcc(Assembler::belowEqual, ext_cpuid1); 355 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 356 __ jccb(Assembler::belowEqual, ext_cpuid5); 357 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 358 __ jccb(Assembler::belowEqual, ext_cpuid7); 359 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 360 __ jccb(Assembler::belowEqual, ext_cpuid8); 361 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 362 __ jccb(Assembler::below, ext_cpuid8); 363 // 364 // Extended cpuid(0x8000001E) 365 // 366 __ movl(rax, 0x8000001E); 367 __ cpuid(); 368 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 369 __ movl(Address(rsi, 0), rax); 370 __ movl(Address(rsi, 4), rbx); 371 __ movl(Address(rsi, 8), rcx); 372 __ movl(Address(rsi,12), rdx); 373 374 // 375 // Extended cpuid(0x80000008) 376 // 377 __ bind(ext_cpuid8); 378 __ movl(rax, 0x80000008); 379 __ cpuid(); 380 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 381 __ movl(Address(rsi, 0), rax); 382 __ movl(Address(rsi, 4), rbx); 383 __ movl(Address(rsi, 8), rcx); 384 __ movl(Address(rsi,12), rdx); 385 386 // 387 // Extended cpuid(0x80000007) 388 // 389 __ bind(ext_cpuid7); 390 __ movl(rax, 0x80000007); 391 __ cpuid(); 392 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 393 __ movl(Address(rsi, 0), rax); 394 __ movl(Address(rsi, 4), rbx); 395 __ movl(Address(rsi, 8), rcx); 396 __ movl(Address(rsi,12), rdx); 397 398 // 399 // Extended cpuid(0x80000005) 400 // 401 __ bind(ext_cpuid5); 402 __ movl(rax, 0x80000005); 403 __ cpuid(); 404 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 405 __ movl(Address(rsi, 0), rax); 406 __ movl(Address(rsi, 4), rbx); 407 __ movl(Address(rsi, 8), rcx); 408 __ movl(Address(rsi,12), rdx); 409 410 // 411 // Extended cpuid(0x80000001) 412 // 413 __ bind(ext_cpuid1); 414 __ movl(rax, 0x80000001); 415 __ cpuid(); 416 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 417 __ movl(Address(rsi, 0), rax); 418 __ movl(Address(rsi, 4), rbx); 419 __ movl(Address(rsi, 8), rcx); 420 __ movl(Address(rsi,12), rdx); 421 422 #if defined(_LP64) 423 // 424 // Check if OS has enabled XGETBV instruction to access XCR0 425 // (OSXSAVE feature flag) and CPU supports APX 426 // 427 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 428 // and XCRO[19] bit for OS support to save/restore extended GPR state. 429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 430 __ movl(rax, 0x200000); 431 __ andl(rax, Address(rsi, 4)); 432 __ cmpl(rax, 0x200000); 433 __ jcc(Assembler::notEqual, vector_save_restore); 434 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 435 __ movl(rax, 0x80000); 436 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 437 __ cmpl(rax, 0x80000); 438 __ jcc(Assembler::notEqual, vector_save_restore); 439 440 #ifndef PRODUCT 441 bool save_apx = UseAPX; 442 VM_Version::set_apx_cpuFeatures(); 443 UseAPX = true; 444 __ mov64(r16, VM_Version::egpr_test_value()); 445 __ mov64(r31, VM_Version::egpr_test_value()); 446 __ xorl(rsi, rsi); 447 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 448 // Generate SEGV 449 __ movl(rax, Address(rsi, 0)); 450 451 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 452 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 453 __ movq(Address(rsi, 0), r16); 454 __ movq(Address(rsi, 8), r31); 455 456 UseAPX = save_apx; 457 #endif 458 #endif 459 __ bind(vector_save_restore); 460 // 461 // Check if OS has enabled XGETBV instruction to access XCR0 462 // (OSXSAVE feature flag) and CPU supports AVX 463 // 464 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 465 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 466 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 467 __ cmpl(rcx, 0x18000000); 468 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 469 470 __ movl(rax, 0x6); 471 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 472 __ cmpl(rax, 0x6); 473 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 474 475 // we need to bridge farther than imm8, so we use this island as a thunk 476 __ bind(done); 477 __ jmp(wrapup); 478 479 __ bind(start_simd_check); 480 // 481 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 482 // registers are not restored after a signal processing. 483 // Generate SEGV here (reference through null) 484 // and check upper YMM/ZMM bits after it. 485 // 486 int saved_useavx = UseAVX; 487 int saved_usesse = UseSSE; 488 489 // If UseAVX is uninitialized or is set by the user to include EVEX 490 if (use_evex) { 491 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 492 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 493 __ movl(rax, 0x10000); 494 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 495 __ cmpl(rax, 0x10000); 496 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 497 // check _cpuid_info.xem_xcr0_eax.bits.opmask 498 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 499 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 500 __ movl(rax, 0xE0); 501 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 502 __ cmpl(rax, 0xE0); 503 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 504 505 if (FLAG_IS_DEFAULT(UseAVX)) { 506 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 507 __ movl(rax, Address(rsi, 0)); 508 __ cmpl(rax, 0x50654); // If it is Skylake 509 __ jcc(Assembler::equal, legacy_setup); 510 } 511 // EVEX setup: run in lowest evex mode 512 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 513 UseAVX = 3; 514 UseSSE = 2; 515 #ifdef _WINDOWS 516 // xmm5-xmm15 are not preserved by caller on windows 517 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 518 __ subptr(rsp, 64); 519 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 520 #ifdef _LP64 521 __ subptr(rsp, 64); 522 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 523 __ subptr(rsp, 64); 524 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 525 #endif // _LP64 526 #endif // _WINDOWS 527 528 // load value into all 64 bytes of zmm7 register 529 __ movl(rcx, VM_Version::ymm_test_value()); 530 __ movdl(xmm0, rcx); 531 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 532 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 533 #ifdef _LP64 534 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 535 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 536 #endif 537 VM_Version::clean_cpuFeatures(); 538 __ jmp(save_restore_except); 539 } 540 541 __ bind(legacy_setup); 542 // AVX setup 543 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 544 UseAVX = 1; 545 UseSSE = 2; 546 #ifdef _WINDOWS 547 __ subptr(rsp, 32); 548 __ vmovdqu(Address(rsp, 0), xmm7); 549 #ifdef _LP64 550 __ subptr(rsp, 32); 551 __ vmovdqu(Address(rsp, 0), xmm8); 552 __ subptr(rsp, 32); 553 __ vmovdqu(Address(rsp, 0), xmm15); 554 #endif // _LP64 555 #endif // _WINDOWS 556 557 // load value into all 32 bytes of ymm7 register 558 __ movl(rcx, VM_Version::ymm_test_value()); 559 560 __ movdl(xmm0, rcx); 561 __ pshufd(xmm0, xmm0, 0x00); 562 __ vinsertf128_high(xmm0, xmm0); 563 __ vmovdqu(xmm7, xmm0); 564 #ifdef _LP64 565 __ vmovdqu(xmm8, xmm0); 566 __ vmovdqu(xmm15, xmm0); 567 #endif 568 VM_Version::clean_cpuFeatures(); 569 570 __ bind(save_restore_except); 571 __ xorl(rsi, rsi); 572 VM_Version::set_cpuinfo_segv_addr(__ pc()); 573 // Generate SEGV 574 __ movl(rax, Address(rsi, 0)); 575 576 VM_Version::set_cpuinfo_cont_addr(__ pc()); 577 // Returns here after signal. Save xmm0 to check it later. 578 579 // If UseAVX is uninitialized or is set by the user to include EVEX 580 if (use_evex) { 581 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 582 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 583 __ movl(rax, 0x10000); 584 __ andl(rax, Address(rsi, 4)); 585 __ cmpl(rax, 0x10000); 586 __ jcc(Assembler::notEqual, legacy_save_restore); 587 // check _cpuid_info.xem_xcr0_eax.bits.opmask 588 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 589 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 590 __ movl(rax, 0xE0); 591 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 592 __ cmpl(rax, 0xE0); 593 __ jcc(Assembler::notEqual, legacy_save_restore); 594 595 if (FLAG_IS_DEFAULT(UseAVX)) { 596 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 597 __ movl(rax, Address(rsi, 0)); 598 __ cmpl(rax, 0x50654); // If it is Skylake 599 __ jcc(Assembler::equal, legacy_save_restore); 600 } 601 // EVEX check: run in lowest evex mode 602 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 603 UseAVX = 3; 604 UseSSE = 2; 605 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 606 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 607 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 608 #ifdef _LP64 609 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 610 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 611 #endif 612 613 #ifdef _WINDOWS 614 #ifdef _LP64 615 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 616 __ addptr(rsp, 64); 617 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 618 __ addptr(rsp, 64); 619 #endif // _LP64 620 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 621 __ addptr(rsp, 64); 622 #endif // _WINDOWS 623 generate_vzeroupper(wrapup); 624 VM_Version::clean_cpuFeatures(); 625 UseAVX = saved_useavx; 626 UseSSE = saved_usesse; 627 __ jmp(wrapup); 628 } 629 630 __ bind(legacy_save_restore); 631 // AVX check 632 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 633 UseAVX = 1; 634 UseSSE = 2; 635 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 636 __ vmovdqu(Address(rsi, 0), xmm0); 637 __ vmovdqu(Address(rsi, 32), xmm7); 638 #ifdef _LP64 639 __ vmovdqu(Address(rsi, 64), xmm8); 640 __ vmovdqu(Address(rsi, 96), xmm15); 641 #endif 642 643 #ifdef _WINDOWS 644 #ifdef _LP64 645 __ vmovdqu(xmm15, Address(rsp, 0)); 646 __ addptr(rsp, 32); 647 __ vmovdqu(xmm8, Address(rsp, 0)); 648 __ addptr(rsp, 32); 649 #endif // _LP64 650 __ vmovdqu(xmm7, Address(rsp, 0)); 651 __ addptr(rsp, 32); 652 #endif // _WINDOWS 653 654 generate_vzeroupper(wrapup); 655 VM_Version::clean_cpuFeatures(); 656 UseAVX = saved_useavx; 657 UseSSE = saved_usesse; 658 659 __ bind(wrapup); 660 __ popf(); 661 __ pop(rsi); 662 __ pop(rbx); 663 __ pop(rbp); 664 __ ret(0); 665 666 # undef __ 667 668 return start; 669 }; 670 void generate_vzeroupper(Label& L_wrapup) { 671 # define __ _masm-> 672 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 673 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 674 __ jcc(Assembler::notEqual, L_wrapup); 675 __ movl(rcx, 0x0FFF0FF0); 676 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 677 __ andl(rcx, Address(rsi, 0)); 678 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 679 __ jcc(Assembler::equal, L_wrapup); 680 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 681 __ jcc(Assembler::equal, L_wrapup); 682 // vzeroupper() will use a pre-computed instruction sequence that we 683 // can't compute until after we've determined CPU capabilities. Use 684 // uncached variant here directly to be able to bootstrap correctly 685 __ vzeroupper_uncached(); 686 # undef __ 687 } 688 address generate_detect_virt() { 689 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 690 # define __ _masm-> 691 692 address start = __ pc(); 693 694 // Evacuate callee-saved registers 695 __ push(rbp); 696 __ push(rbx); 697 __ push(rsi); // for Windows 698 699 #ifdef _LP64 700 __ mov(rax, c_rarg0); // CPUID leaf 701 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 702 #else 703 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 704 __ movptr(rsi, Address(rsp, 20)); // register array address 705 #endif 706 707 __ cpuid(); 708 709 // Store result to register array 710 __ movl(Address(rsi, 0), rax); 711 __ movl(Address(rsi, 4), rbx); 712 __ movl(Address(rsi, 8), rcx); 713 __ movl(Address(rsi, 12), rdx); 714 715 // Epilogue 716 __ pop(rsi); 717 __ pop(rbx); 718 __ pop(rbp); 719 __ ret(0); 720 721 # undef __ 722 723 return start; 724 }; 725 726 727 address generate_getCPUIDBrandString(void) { 728 // Flags to test CPU type. 729 const uint32_t HS_EFL_AC = 0x40000; 730 const uint32_t HS_EFL_ID = 0x200000; 731 // Values for when we don't have a CPUID instruction. 732 const int CPU_FAMILY_SHIFT = 8; 733 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 734 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 735 736 Label detect_486, cpu486, detect_586, done, ext_cpuid; 737 738 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 739 # define __ _masm-> 740 741 address start = __ pc(); 742 743 // 744 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 745 // 746 // LP64: rcx and rdx are first and second argument registers on windows 747 748 __ push(rbp); 749 #ifdef _LP64 750 __ mov(rbp, c_rarg0); // cpuid_info address 751 #else 752 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 753 #endif 754 __ push(rbx); 755 __ push(rsi); 756 __ pushf(); // preserve rbx, and flags 757 __ pop(rax); 758 __ push(rax); 759 __ mov(rcx, rax); 760 // 761 // if we are unable to change the AC flag, we have a 386 762 // 763 __ xorl(rax, HS_EFL_AC); 764 __ push(rax); 765 __ popf(); 766 __ pushf(); 767 __ pop(rax); 768 __ cmpptr(rax, rcx); 769 __ jccb(Assembler::notEqual, detect_486); 770 771 __ movl(rax, CPU_FAMILY_386); 772 __ jmp(done); 773 774 // 775 // If we are unable to change the ID flag, we have a 486 which does 776 // not support the "cpuid" instruction. 777 // 778 __ bind(detect_486); 779 __ mov(rax, rcx); 780 __ xorl(rax, HS_EFL_ID); 781 __ push(rax); 782 __ popf(); 783 __ pushf(); 784 __ pop(rax); 785 __ cmpptr(rcx, rax); 786 __ jccb(Assembler::notEqual, detect_586); 787 788 __ bind(cpu486); 789 __ movl(rax, CPU_FAMILY_486); 790 __ jmp(done); 791 792 // 793 // At this point, we have a chip which supports the "cpuid" instruction 794 // 795 __ bind(detect_586); 796 __ xorl(rax, rax); 797 __ cpuid(); 798 __ orl(rax, rax); 799 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 800 // value of at least 1, we give up and 801 // assume a 486 802 803 // 804 // Extended cpuid(0x80000000) for processor brand string detection 805 // 806 __ bind(ext_cpuid); 807 __ movl(rax, CPUID_EXTENDED_FN); 808 __ cpuid(); 809 __ cmpl(rax, CPUID_EXTENDED_FN_4); 810 __ jcc(Assembler::below, done); 811 812 // 813 // Extended cpuid(0x80000002) // first 16 bytes in brand string 814 // 815 __ movl(rax, CPUID_EXTENDED_FN_2); 816 __ cpuid(); 817 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 818 __ movl(Address(rsi, 0), rax); 819 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 820 __ movl(Address(rsi, 0), rbx); 821 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 822 __ movl(Address(rsi, 0), rcx); 823 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 824 __ movl(Address(rsi,0), rdx); 825 826 // 827 // Extended cpuid(0x80000003) // next 16 bytes in brand string 828 // 829 __ movl(rax, CPUID_EXTENDED_FN_3); 830 __ cpuid(); 831 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 832 __ movl(Address(rsi, 0), rax); 833 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 834 __ movl(Address(rsi, 0), rbx); 835 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 836 __ movl(Address(rsi, 0), rcx); 837 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 838 __ movl(Address(rsi,0), rdx); 839 840 // 841 // Extended cpuid(0x80000004) // last 16 bytes in brand string 842 // 843 __ movl(rax, CPUID_EXTENDED_FN_4); 844 __ cpuid(); 845 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 846 __ movl(Address(rsi, 0), rax); 847 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 848 __ movl(Address(rsi, 0), rbx); 849 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 850 __ movl(Address(rsi, 0), rcx); 851 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 852 __ movl(Address(rsi,0), rdx); 853 854 // 855 // return 856 // 857 __ bind(done); 858 __ popf(); 859 __ pop(rsi); 860 __ pop(rbx); 861 __ pop(rbp); 862 __ ret(0); 863 864 # undef __ 865 866 return start; 867 }; 868 }; 869 870 void VM_Version::get_processor_features() { 871 872 _cpu = 4; // 486 by default 873 _model = 0; 874 _stepping = 0; 875 _features = 0; 876 _logical_processors_per_package = 1; 877 // i486 internal cache is both I&D and has a 16-byte line size 878 _L1_data_cache_line_size = 16; 879 880 // Get raw processor info 881 882 get_cpu_info_stub(&_cpuid_info); 883 884 assert_is_initialized(); 885 _cpu = extended_cpu_family(); 886 _model = extended_cpu_model(); 887 _stepping = cpu_stepping(); 888 889 if (cpu_family() > 4) { // it supports CPUID 890 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 891 _cpu_features = _features; // Preserve features 892 // Logical processors are only available on P4s and above, 893 // and only if hyperthreading is available. 894 _logical_processors_per_package = logical_processor_count(); 895 _L1_data_cache_line_size = L1_line_size(); 896 } 897 898 // xchg and xadd instructions 899 _supports_atomic_getset4 = true; 900 _supports_atomic_getadd4 = true; 901 LP64_ONLY(_supports_atomic_getset8 = true); 902 LP64_ONLY(_supports_atomic_getadd8 = true); 903 904 #ifdef _LP64 905 // OS should support SSE for x64 and hardware should support at least SSE2. 906 if (!VM_Version::supports_sse2()) { 907 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 908 } 909 // in 64 bit the use of SSE2 is the minimum 910 if (UseSSE < 2) UseSSE = 2; 911 #endif 912 913 #ifdef AMD64 914 // flush_icache_stub have to be generated first. 915 // That is why Icache line size is hard coded in ICache class, 916 // see icache_x86.hpp. It is also the reason why we can't use 917 // clflush instruction in 32-bit VM since it could be running 918 // on CPU which does not support it. 919 // 920 // The only thing we can do is to verify that flushed 921 // ICache::line_size has correct value. 922 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 923 // clflush_size is size in quadwords (8 bytes). 924 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 925 #endif 926 927 #ifdef _LP64 928 // assigning this field effectively enables Unsafe.writebackMemory() 929 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 930 // that is only implemented on x86_64 and only if the OS plays ball 931 if (os::supports_map_sync()) { 932 // publish data cache line flush size to generic field, otherwise 933 // let if default to zero thereby disabling writeback 934 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 935 } 936 #endif 937 938 // Check if processor has Intel Ecore 939 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 940 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) { 941 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 942 } 943 944 if (UseSSE < 4) { 945 _features &= ~CPU_SSE4_1; 946 _features &= ~CPU_SSE4_2; 947 } 948 949 if (UseSSE < 3) { 950 _features &= ~CPU_SSE3; 951 _features &= ~CPU_SSSE3; 952 _features &= ~CPU_SSE4A; 953 } 954 955 if (UseSSE < 2) 956 _features &= ~CPU_SSE2; 957 958 if (UseSSE < 1) 959 _features &= ~CPU_SSE; 960 961 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 962 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 963 UseAVX = 0; 964 } 965 966 // UseSSE is set to the smaller of what hardware supports and what 967 // the command line requires. I.e., you cannot set UseSSE to 2 on 968 // older Pentiums which do not support it. 969 int use_sse_limit = 0; 970 if (UseSSE > 0) { 971 if (UseSSE > 3 && supports_sse4_1()) { 972 use_sse_limit = 4; 973 } else if (UseSSE > 2 && supports_sse3()) { 974 use_sse_limit = 3; 975 } else if (UseSSE > 1 && supports_sse2()) { 976 use_sse_limit = 2; 977 } else if (UseSSE > 0 && supports_sse()) { 978 use_sse_limit = 1; 979 } else { 980 use_sse_limit = 0; 981 } 982 } 983 if (FLAG_IS_DEFAULT(UseSSE)) { 984 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 985 } else if (UseSSE > use_sse_limit) { 986 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 987 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 988 } 989 990 // first try initial setting and detect what we can support 991 int use_avx_limit = 0; 992 if (UseAVX > 0) { 993 if (UseSSE < 4) { 994 // Don't use AVX if SSE is unavailable or has been disabled. 995 use_avx_limit = 0; 996 } else if (UseAVX > 2 && supports_evex()) { 997 use_avx_limit = 3; 998 } else if (UseAVX > 1 && supports_avx2()) { 999 use_avx_limit = 2; 1000 } else if (UseAVX > 0 && supports_avx()) { 1001 use_avx_limit = 1; 1002 } else { 1003 use_avx_limit = 0; 1004 } 1005 } 1006 if (FLAG_IS_DEFAULT(UseAVX)) { 1007 // Don't use AVX-512 on older Skylakes unless explicitly requested. 1008 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 1009 FLAG_SET_DEFAULT(UseAVX, 2); 1010 } else { 1011 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1012 } 1013 } 1014 1015 if (UseAVX > use_avx_limit) { 1016 if (UseSSE < 4) { 1017 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 1018 } else { 1019 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 1020 } 1021 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1022 } 1023 1024 if (UseAVX < 3) { 1025 _features &= ~CPU_AVX512F; 1026 _features &= ~CPU_AVX512DQ; 1027 _features &= ~CPU_AVX512CD; 1028 _features &= ~CPU_AVX512BW; 1029 _features &= ~CPU_AVX512VL; 1030 _features &= ~CPU_AVX512_VPOPCNTDQ; 1031 _features &= ~CPU_AVX512_VPCLMULQDQ; 1032 _features &= ~CPU_AVX512_VAES; 1033 _features &= ~CPU_AVX512_VNNI; 1034 _features &= ~CPU_AVX512_VBMI; 1035 _features &= ~CPU_AVX512_VBMI2; 1036 _features &= ~CPU_AVX512_BITALG; 1037 _features &= ~CPU_AVX512_IFMA; 1038 _features &= ~CPU_APX_F; 1039 } 1040 1041 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1042 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1043 if (UseAPX && !apx_supported) { 1044 warning("UseAPX is not supported on this CPU, setting it to false"); 1045 FLAG_SET_DEFAULT(UseAPX, false); 1046 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1047 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1048 } 1049 1050 if (!UseAPX) { 1051 _features &= ~CPU_APX_F; 1052 } 1053 1054 if (UseAVX < 2) { 1055 _features &= ~CPU_AVX2; 1056 _features &= ~CPU_AVX_IFMA; 1057 } 1058 1059 if (UseAVX < 1) { 1060 _features &= ~CPU_AVX; 1061 _features &= ~CPU_VZEROUPPER; 1062 _features &= ~CPU_F16C; 1063 } 1064 1065 if (logical_processors_per_package() == 1) { 1066 // HT processor could be installed on a system which doesn't support HT. 1067 _features &= ~CPU_HT; 1068 } 1069 1070 if (is_intel()) { // Intel cpus specific settings 1071 if (is_knights_family()) { 1072 _features &= ~CPU_VZEROUPPER; 1073 _features &= ~CPU_AVX512BW; 1074 _features &= ~CPU_AVX512VL; 1075 _features &= ~CPU_AVX512DQ; 1076 _features &= ~CPU_AVX512_VNNI; 1077 _features &= ~CPU_AVX512_VAES; 1078 _features &= ~CPU_AVX512_VPOPCNTDQ; 1079 _features &= ~CPU_AVX512_VPCLMULQDQ; 1080 _features &= ~CPU_AVX512_VBMI; 1081 _features &= ~CPU_AVX512_VBMI2; 1082 _features &= ~CPU_CLWB; 1083 _features &= ~CPU_FLUSHOPT; 1084 _features &= ~CPU_GFNI; 1085 _features &= ~CPU_AVX512_BITALG; 1086 _features &= ~CPU_AVX512_IFMA; 1087 _features &= ~CPU_AVX_IFMA; 1088 } 1089 } 1090 1091 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1092 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1093 } else { 1094 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1095 } 1096 1097 char buf[1024]; 1098 int res = jio_snprintf( 1099 buf, sizeof(buf), 1100 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1101 cores_per_cpu(), threads_per_core(), 1102 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1103 assert(res > 0, "not enough temporary space allocated"); 1104 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1105 1106 _features_string = os::strdup(buf); 1107 1108 // Use AES instructions if available. 1109 if (supports_aes()) { 1110 if (FLAG_IS_DEFAULT(UseAES)) { 1111 FLAG_SET_DEFAULT(UseAES, true); 1112 } 1113 if (!UseAES) { 1114 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1115 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1116 } 1117 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1118 } else { 1119 if (UseSSE > 2) { 1120 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1121 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1122 } 1123 } else { 1124 // The AES intrinsic stubs require AES instruction support (of course) 1125 // but also require sse3 mode or higher for instructions it use. 1126 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1127 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1128 } 1129 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1130 } 1131 1132 // --AES-CTR begins-- 1133 if (!UseAESIntrinsics) { 1134 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1135 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1136 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1137 } 1138 } else { 1139 if (supports_sse4_1()) { 1140 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1141 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1142 } 1143 } else { 1144 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1145 // but also require sse4.1 mode or higher for instructions it use. 1146 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1147 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1148 } 1149 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1150 } 1151 } 1152 // --AES-CTR ends-- 1153 } 1154 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1155 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1156 warning("AES instructions are not available on this CPU"); 1157 FLAG_SET_DEFAULT(UseAES, false); 1158 } 1159 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1160 warning("AES intrinsics are not available on this CPU"); 1161 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1162 } 1163 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1164 warning("AES-CTR intrinsics are not available on this CPU"); 1165 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1166 } 1167 } 1168 1169 // Use CLMUL instructions if available. 1170 if (supports_clmul()) { 1171 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1172 UseCLMUL = true; 1173 } 1174 } else if (UseCLMUL) { 1175 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1176 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1177 FLAG_SET_DEFAULT(UseCLMUL, false); 1178 } 1179 1180 if (UseCLMUL && (UseSSE > 2)) { 1181 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1182 UseCRC32Intrinsics = true; 1183 } 1184 } else if (UseCRC32Intrinsics) { 1185 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1186 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1187 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1188 } 1189 1190 #ifdef _LP64 1191 if (supports_avx2()) { 1192 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1193 UseAdler32Intrinsics = true; 1194 } 1195 } else if (UseAdler32Intrinsics) { 1196 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1197 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1198 } 1199 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1200 } 1201 #else 1202 if (UseAdler32Intrinsics) { 1203 warning("Adler32Intrinsics not available on this CPU."); 1204 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1205 } 1206 #endif 1207 1208 if (supports_sse4_2() && supports_clmul()) { 1209 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1210 UseCRC32CIntrinsics = true; 1211 } 1212 } else if (UseCRC32CIntrinsics) { 1213 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1214 warning("CRC32C intrinsics are not available on this CPU"); 1215 } 1216 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1217 } 1218 1219 // GHASH/GCM intrinsics 1220 if (UseCLMUL && (UseSSE > 2)) { 1221 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1222 UseGHASHIntrinsics = true; 1223 } 1224 } else if (UseGHASHIntrinsics) { 1225 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1226 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1227 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1228 } 1229 1230 #ifdef _LP64 1231 // ChaCha20 Intrinsics 1232 // As long as the system supports AVX as a baseline we can do a 1233 // SIMD-enabled block function. StubGenerator makes the determination 1234 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1235 // version. 1236 if (UseAVX >= 1) { 1237 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1238 UseChaCha20Intrinsics = true; 1239 } 1240 } else if (UseChaCha20Intrinsics) { 1241 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1242 warning("ChaCha20 intrinsic requires AVX instructions"); 1243 } 1244 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1245 } 1246 #else 1247 // No support currently for ChaCha20 intrinsics on 32-bit platforms 1248 if (UseChaCha20Intrinsics) { 1249 warning("ChaCha20 intrinsics are not available on this CPU."); 1250 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1251 } 1252 #endif // _LP64 1253 1254 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1255 if (UseAVX >= 2) { 1256 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1257 UseBASE64Intrinsics = true; 1258 } 1259 } else if (UseBASE64Intrinsics) { 1260 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1261 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1262 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1263 } 1264 1265 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1266 if (FLAG_IS_DEFAULT(UseFMA)) { 1267 UseFMA = true; 1268 } 1269 } else if (UseFMA) { 1270 warning("FMA instructions are not available on this CPU"); 1271 FLAG_SET_DEFAULT(UseFMA, false); 1272 } 1273 1274 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1275 UseMD5Intrinsics = true; 1276 } 1277 1278 if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { 1279 if (FLAG_IS_DEFAULT(UseSHA)) { 1280 UseSHA = true; 1281 } 1282 } else if (UseSHA) { 1283 warning("SHA instructions are not available on this CPU"); 1284 FLAG_SET_DEFAULT(UseSHA, false); 1285 } 1286 1287 if (supports_sha() && supports_sse4_1() && UseSHA) { 1288 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1289 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1290 } 1291 } else if (UseSHA1Intrinsics) { 1292 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1293 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1294 } 1295 1296 if (supports_sse4_1() && UseSHA) { 1297 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1298 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1299 } 1300 } else if (UseSHA256Intrinsics) { 1301 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1302 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1303 } 1304 1305 #ifdef _LP64 1306 // These are only supported on 64-bit 1307 if (UseSHA && supports_avx2() && supports_bmi2()) { 1308 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1309 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1310 } 1311 } else 1312 #endif 1313 if (UseSHA512Intrinsics) { 1314 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1315 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1316 } 1317 1318 if (UseSHA3Intrinsics) { 1319 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1320 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1321 } 1322 1323 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1324 FLAG_SET_DEFAULT(UseSHA, false); 1325 } 1326 1327 #ifdef COMPILER2 1328 if (UseFPUForSpilling) { 1329 if (UseSSE < 2) { 1330 // Only supported with SSE2+ 1331 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1332 } 1333 } 1334 #endif 1335 1336 #if COMPILER2_OR_JVMCI 1337 int max_vector_size = 0; 1338 if (UseSSE < 2) { 1339 // Vectors (in XMM) are only supported with SSE2+ 1340 // SSE is always 2 on x64. 1341 max_vector_size = 0; 1342 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1343 // 16 byte vectors (in XMM) are supported with SSE2+ 1344 max_vector_size = 16; 1345 } else if (UseAVX == 1 || UseAVX == 2) { 1346 // 32 bytes vectors (in YMM) are only supported with AVX+ 1347 max_vector_size = 32; 1348 } else if (UseAVX > 2) { 1349 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1350 max_vector_size = 64; 1351 } 1352 1353 #ifdef _LP64 1354 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1355 #else 1356 int min_vector_size = 0; 1357 #endif 1358 1359 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1360 if (MaxVectorSize < min_vector_size) { 1361 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1362 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1363 } 1364 if (MaxVectorSize > max_vector_size) { 1365 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1366 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1367 } 1368 if (!is_power_of_2(MaxVectorSize)) { 1369 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1370 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1371 } 1372 } else { 1373 // If default, use highest supported configuration 1374 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1375 } 1376 1377 #if defined(COMPILER2) && defined(ASSERT) 1378 if (MaxVectorSize > 0) { 1379 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1380 tty->print_cr("State of YMM registers after signal handle:"); 1381 int nreg = 2 LP64_ONLY(+2); 1382 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1383 for (int i = 0; i < nreg; i++) { 1384 tty->print("YMM%s:", ymm_name[i]); 1385 for (int j = 7; j >=0; j--) { 1386 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1387 } 1388 tty->cr(); 1389 } 1390 } 1391 } 1392 #endif // COMPILER2 && ASSERT 1393 1394 #ifdef _LP64 1395 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1396 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1397 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1398 } 1399 } else 1400 #endif 1401 if (UsePoly1305Intrinsics) { 1402 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1403 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1404 } 1405 1406 #ifdef _LP64 1407 if (supports_avx512ifma() && supports_avx512vlbw()) { 1408 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1409 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1410 } 1411 } else 1412 #endif 1413 if (UseIntPolyIntrinsics) { 1414 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1415 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1416 } 1417 1418 #ifdef _LP64 1419 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1420 UseMultiplyToLenIntrinsic = true; 1421 } 1422 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1423 UseSquareToLenIntrinsic = true; 1424 } 1425 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1426 UseMulAddIntrinsic = true; 1427 } 1428 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1429 UseMontgomeryMultiplyIntrinsic = true; 1430 } 1431 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1432 UseMontgomerySquareIntrinsic = true; 1433 } 1434 #else 1435 if (UseMultiplyToLenIntrinsic) { 1436 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1437 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1438 } 1439 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1440 } 1441 if (UseMontgomeryMultiplyIntrinsic) { 1442 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1443 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1444 } 1445 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1446 } 1447 if (UseMontgomerySquareIntrinsic) { 1448 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1449 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1450 } 1451 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1452 } 1453 if (UseSquareToLenIntrinsic) { 1454 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1455 warning("squareToLen intrinsic is not available in 32-bit VM"); 1456 } 1457 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1458 } 1459 if (UseMulAddIntrinsic) { 1460 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1461 warning("mulAdd intrinsic is not available in 32-bit VM"); 1462 } 1463 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1464 } 1465 #endif // _LP64 1466 #endif // COMPILER2_OR_JVMCI 1467 1468 // On new cpus instructions which update whole XMM register should be used 1469 // to prevent partial register stall due to dependencies on high half. 1470 // 1471 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1472 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1473 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1474 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1475 1476 1477 if (is_zx()) { // ZX cpus specific settings 1478 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1479 UseStoreImmI16 = false; // don't use it on ZX cpus 1480 } 1481 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1482 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1483 // Use it on all ZX cpus 1484 UseAddressNop = true; 1485 } 1486 } 1487 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1488 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1489 } 1490 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1491 if (supports_sse3()) { 1492 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1493 } else { 1494 UseXmmRegToRegMoveAll = false; 1495 } 1496 } 1497 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1498 #ifdef COMPILER2 1499 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1500 // For new ZX cpus do the next optimization: 1501 // don't align the beginning of a loop if there are enough instructions 1502 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1503 // in current fetch line (OptoLoopAlignment) or the padding 1504 // is big (> MaxLoopPad). 1505 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1506 // generated NOP instructions. 11 is the largest size of one 1507 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1508 MaxLoopPad = 11; 1509 } 1510 #endif // COMPILER2 1511 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1512 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1513 } 1514 if (supports_sse4_2()) { // new ZX cpus 1515 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1516 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1517 } 1518 } 1519 if (supports_sse4_2()) { 1520 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1521 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1522 } 1523 } else { 1524 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1525 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1526 } 1527 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1528 } 1529 } 1530 1531 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1532 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1533 } 1534 } 1535 1536 if (is_amd_family()) { // AMD cpus specific settings 1537 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1538 // Use it on new AMD cpus starting from Opteron. 1539 UseAddressNop = true; 1540 } 1541 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1542 // Use it on new AMD cpus starting from Opteron. 1543 UseNewLongLShift = true; 1544 } 1545 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1546 if (supports_sse4a()) { 1547 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1548 } else { 1549 UseXmmLoadAndClearUpper = false; 1550 } 1551 } 1552 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1553 if (supports_sse4a()) { 1554 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1555 } else { 1556 UseXmmRegToRegMoveAll = false; 1557 } 1558 } 1559 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1560 if (supports_sse4a()) { 1561 UseXmmI2F = true; 1562 } else { 1563 UseXmmI2F = false; 1564 } 1565 } 1566 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1567 if (supports_sse4a()) { 1568 UseXmmI2D = true; 1569 } else { 1570 UseXmmI2D = false; 1571 } 1572 } 1573 if (supports_sse4_2()) { 1574 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1575 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1576 } 1577 } else { 1578 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1579 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1580 } 1581 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1582 } 1583 1584 // some defaults for AMD family 15h 1585 if (cpu_family() == 0x15) { 1586 // On family 15h processors default is no sw prefetch 1587 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1588 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1589 } 1590 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1591 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1592 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1593 } 1594 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1595 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1596 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1597 } 1598 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1599 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1600 } 1601 } 1602 1603 #ifdef COMPILER2 1604 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1605 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1606 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1607 } 1608 #endif // COMPILER2 1609 1610 // Some defaults for AMD family >= 17h && Hygon family 18h 1611 if (cpu_family() >= 0x17) { 1612 // On family >=17h processors use XMM and UnalignedLoadStores 1613 // for Array Copy 1614 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1615 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1616 } 1617 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1618 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1619 } 1620 #ifdef COMPILER2 1621 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1622 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1623 } 1624 #endif 1625 } 1626 } 1627 1628 if (is_intel()) { // Intel cpus specific settings 1629 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1630 UseStoreImmI16 = false; // don't use it on Intel cpus 1631 } 1632 if (cpu_family() == 6 || cpu_family() == 15) { 1633 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1634 // Use it on all Intel cpus starting from PentiumPro 1635 UseAddressNop = true; 1636 } 1637 } 1638 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1639 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1640 } 1641 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1642 if (supports_sse3()) { 1643 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1644 } else { 1645 UseXmmRegToRegMoveAll = false; 1646 } 1647 } 1648 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1649 #ifdef COMPILER2 1650 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1651 // For new Intel cpus do the next optimization: 1652 // don't align the beginning of a loop if there are enough instructions 1653 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1654 // in current fetch line (OptoLoopAlignment) or the padding 1655 // is big (> MaxLoopPad). 1656 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1657 // generated NOP instructions. 11 is the largest size of one 1658 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1659 MaxLoopPad = 11; 1660 } 1661 #endif // COMPILER2 1662 1663 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1664 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1665 } 1666 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1667 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1668 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1669 } 1670 } 1671 if (supports_sse4_2()) { 1672 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1673 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1674 } 1675 } else { 1676 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1677 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1678 } 1679 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1680 } 1681 } 1682 if (is_atom_family() || is_knights_family()) { 1683 #ifdef COMPILER2 1684 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1685 OptoScheduling = true; 1686 } 1687 #endif 1688 if (supports_sse4_2()) { // Silvermont 1689 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1690 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1691 } 1692 } 1693 if (FLAG_IS_DEFAULT(UseIncDec)) { 1694 FLAG_SET_DEFAULT(UseIncDec, false); 1695 } 1696 } 1697 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1698 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1699 } 1700 #ifdef COMPILER2 1701 if (UseAVX > 2) { 1702 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1703 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1704 ArrayOperationPartialInlineSize != 0 && 1705 ArrayOperationPartialInlineSize != 16 && 1706 ArrayOperationPartialInlineSize != 32 && 1707 ArrayOperationPartialInlineSize != 64)) { 1708 int inline_size = 0; 1709 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1710 inline_size = 64; 1711 } else if (MaxVectorSize >= 32) { 1712 inline_size = 32; 1713 } else if (MaxVectorSize >= 16) { 1714 inline_size = 16; 1715 } 1716 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1717 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1718 } 1719 ArrayOperationPartialInlineSize = inline_size; 1720 } 1721 1722 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1723 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1724 if (ArrayOperationPartialInlineSize) { 1725 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1726 } else { 1727 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1728 } 1729 } 1730 } 1731 #endif 1732 } 1733 1734 #ifdef COMPILER2 1735 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1736 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1737 OptimizeFill = false; 1738 } 1739 } 1740 #endif 1741 1742 #ifdef _LP64 1743 if (UseSSE42Intrinsics) { 1744 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1745 UseVectorizedMismatchIntrinsic = true; 1746 } 1747 } else if (UseVectorizedMismatchIntrinsic) { 1748 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1749 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1750 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1751 } 1752 if (UseAVX >= 2) { 1753 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1754 } else if (UseVectorizedHashCodeIntrinsic) { 1755 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1756 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1757 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1758 } 1759 #else 1760 if (UseVectorizedMismatchIntrinsic) { 1761 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1762 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1763 } 1764 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1765 } 1766 if (UseVectorizedHashCodeIntrinsic) { 1767 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1768 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1769 } 1770 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1771 } 1772 #endif // _LP64 1773 1774 // Use count leading zeros count instruction if available. 1775 if (supports_lzcnt()) { 1776 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1777 UseCountLeadingZerosInstruction = true; 1778 } 1779 } else if (UseCountLeadingZerosInstruction) { 1780 warning("lzcnt instruction is not available on this CPU"); 1781 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1782 } 1783 1784 // Use count trailing zeros instruction if available 1785 if (supports_bmi1()) { 1786 // tzcnt does not require VEX prefix 1787 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1788 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1789 // Don't use tzcnt if BMI1 is switched off on command line. 1790 UseCountTrailingZerosInstruction = false; 1791 } else { 1792 UseCountTrailingZerosInstruction = true; 1793 } 1794 } 1795 } else if (UseCountTrailingZerosInstruction) { 1796 warning("tzcnt instruction is not available on this CPU"); 1797 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1798 } 1799 1800 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1801 // VEX prefix is generated only when AVX > 0. 1802 if (supports_bmi1() && supports_avx()) { 1803 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1804 UseBMI1Instructions = true; 1805 } 1806 } else if (UseBMI1Instructions) { 1807 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1808 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1809 } 1810 1811 if (supports_bmi2() && supports_avx()) { 1812 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1813 UseBMI2Instructions = true; 1814 } 1815 } else if (UseBMI2Instructions) { 1816 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1817 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1818 } 1819 1820 // Use population count instruction if available. 1821 if (supports_popcnt()) { 1822 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1823 UsePopCountInstruction = true; 1824 } 1825 } else if (UsePopCountInstruction) { 1826 warning("POPCNT instruction is not available on this CPU"); 1827 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1828 } 1829 1830 // Use fast-string operations if available. 1831 if (supports_erms()) { 1832 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1833 UseFastStosb = true; 1834 } 1835 } else if (UseFastStosb) { 1836 warning("fast-string operations are not available on this CPU"); 1837 FLAG_SET_DEFAULT(UseFastStosb, false); 1838 } 1839 1840 // For AMD Processors use XMM/YMM MOVDQU instructions 1841 // for Object Initialization as default 1842 if (is_amd() && cpu_family() >= 0x19) { 1843 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1844 UseFastStosb = false; 1845 } 1846 } 1847 1848 #ifdef COMPILER2 1849 if (is_intel() && MaxVectorSize > 16) { 1850 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1851 UseFastStosb = false; 1852 } 1853 } 1854 #endif 1855 1856 // Use XMM/YMM MOVDQU instruction for Object Initialization 1857 if (UseSSE >= 2 && UseUnalignedLoadStores) { 1858 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1859 UseXMMForObjInit = true; 1860 } 1861 } else if (UseXMMForObjInit) { 1862 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1863 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1864 } 1865 1866 #ifdef COMPILER2 1867 if (FLAG_IS_DEFAULT(AlignVector)) { 1868 // Modern processors allow misaligned memory operations for vectors. 1869 AlignVector = !UseUnalignedLoadStores; 1870 } 1871 #endif // COMPILER2 1872 1873 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1874 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1875 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1876 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1877 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1878 } 1879 } 1880 1881 // Allocation prefetch settings 1882 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1883 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1884 (cache_line_size > AllocatePrefetchStepSize)) { 1885 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1886 } 1887 1888 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1889 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1890 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1891 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1892 } 1893 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1894 } 1895 1896 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1897 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1898 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1899 } 1900 1901 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1902 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1903 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1904 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1905 } 1906 #ifdef COMPILER2 1907 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1908 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1909 } 1910 #endif 1911 } 1912 1913 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1914 #ifdef COMPILER2 1915 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1916 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1917 } 1918 #endif 1919 } 1920 1921 #ifdef _LP64 1922 // Prefetch settings 1923 1924 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1925 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1926 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1927 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1928 1929 // gc copy/scan is disabled if prefetchw isn't supported, because 1930 // Prefetch::write emits an inlined prefetchw on Linux. 1931 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1932 // The used prefetcht0 instruction works for both amd64 and em64t. 1933 1934 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1935 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1936 } 1937 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1938 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1939 } 1940 #endif 1941 1942 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1943 (cache_line_size > ContendedPaddingWidth)) 1944 ContendedPaddingWidth = cache_line_size; 1945 1946 // This machine allows unaligned memory accesses 1947 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1948 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1949 } 1950 1951 #ifndef PRODUCT 1952 if (log_is_enabled(Info, os, cpu)) { 1953 LogStream ls(Log(os, cpu)::info()); 1954 outputStream* log = &ls; 1955 log->print_cr("Logical CPUs per core: %u", 1956 logical_processors_per_package()); 1957 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1958 log->print("UseSSE=%d", UseSSE); 1959 if (UseAVX > 0) { 1960 log->print(" UseAVX=%d", UseAVX); 1961 } 1962 if (UseAES) { 1963 log->print(" UseAES=1"); 1964 } 1965 #ifdef COMPILER2 1966 if (MaxVectorSize > 0) { 1967 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1968 } 1969 #endif 1970 log->cr(); 1971 log->print("Allocation"); 1972 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1973 log->print_cr(": no prefetching"); 1974 } else { 1975 log->print(" prefetching: "); 1976 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1977 log->print("PREFETCHW"); 1978 } else if (UseSSE >= 1) { 1979 if (AllocatePrefetchInstr == 0) { 1980 log->print("PREFETCHNTA"); 1981 } else if (AllocatePrefetchInstr == 1) { 1982 log->print("PREFETCHT0"); 1983 } else if (AllocatePrefetchInstr == 2) { 1984 log->print("PREFETCHT2"); 1985 } else if (AllocatePrefetchInstr == 3) { 1986 log->print("PREFETCHW"); 1987 } 1988 } 1989 if (AllocatePrefetchLines > 1) { 1990 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1991 } else { 1992 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1993 } 1994 } 1995 1996 if (PrefetchCopyIntervalInBytes > 0) { 1997 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1998 } 1999 if (PrefetchScanIntervalInBytes > 0) { 2000 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 2001 } 2002 if (ContendedPaddingWidth > 0) { 2003 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 2004 } 2005 } 2006 #endif // !PRODUCT 2007 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 2008 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 2009 } 2010 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 2011 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 2012 } 2013 } 2014 2015 void VM_Version::print_platform_virtualization_info(outputStream* st) { 2016 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 2017 if (vrt == XenHVM) { 2018 st->print_cr("Xen hardware-assisted virtualization detected"); 2019 } else if (vrt == KVM) { 2020 st->print_cr("KVM virtualization detected"); 2021 } else if (vrt == VMWare) { 2022 st->print_cr("VMWare virtualization detected"); 2023 VirtualizationSupport::print_virtualization_info(st); 2024 } else if (vrt == HyperV) { 2025 st->print_cr("Hyper-V virtualization detected"); 2026 } else if (vrt == HyperVRole) { 2027 st->print_cr("Hyper-V role detected"); 2028 } 2029 } 2030 2031 bool VM_Version::compute_has_intel_jcc_erratum() { 2032 if (!is_intel_family_core()) { 2033 // Only Intel CPUs are affected. 2034 return false; 2035 } 2036 // The following table of affected CPUs is based on the following document released by Intel: 2037 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 2038 switch (_model) { 2039 case 0x8E: 2040 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2041 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 2042 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 2043 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 2044 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 2045 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2046 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2047 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 2048 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2049 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 2050 case 0x4E: 2051 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 2052 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 2053 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 2054 return _stepping == 0x3; 2055 case 0x55: 2056 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 2057 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 2058 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2059 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2060 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2061 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2062 return _stepping == 0x4 || _stepping == 0x7; 2063 case 0x5E: 2064 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2065 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2066 return _stepping == 0x3; 2067 case 0x9E: 2068 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2069 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2070 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2071 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2072 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2073 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2074 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2075 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2076 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2077 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2078 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2079 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2080 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2081 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2082 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2083 case 0xA5: 2084 // Not in Intel documentation. 2085 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2086 return true; 2087 case 0xA6: 2088 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2089 return _stepping == 0x0; 2090 case 0xAE: 2091 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2092 return _stepping == 0xA; 2093 default: 2094 // If we are running on another intel machine not recognized in the table, we are okay. 2095 return false; 2096 } 2097 } 2098 2099 // On Xen, the cpuid instruction returns 2100 // eax / registers[0]: Version of Xen 2101 // ebx / registers[1]: chars 'XenV' 2102 // ecx / registers[2]: chars 'MMXe' 2103 // edx / registers[3]: chars 'nVMM' 2104 // 2105 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2106 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2107 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2108 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2109 // 2110 // more information : 2111 // https://kb.vmware.com/s/article/1009458 2112 // 2113 void VM_Version::check_virtualizations() { 2114 uint32_t registers[4] = {0}; 2115 char signature[13] = {0}; 2116 2117 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2118 // from 0x40000000 until 0x40010000. 2119 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2120 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2121 detect_virt_stub(leaf, registers); 2122 memcpy(signature, ®isters[1], 12); 2123 2124 if (strncmp("VMwareVMware", signature, 12) == 0) { 2125 Abstract_VM_Version::_detected_virtualization = VMWare; 2126 // check for extended metrics from guestlib 2127 VirtualizationSupport::initialize(); 2128 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2129 Abstract_VM_Version::_detected_virtualization = HyperV; 2130 #ifdef _WINDOWS 2131 // CPUID leaf 0x40000007 is available to the root partition only. 2132 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2133 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2134 detect_virt_stub(0x40000007, registers); 2135 if ((registers[0] != 0x0) || 2136 (registers[1] != 0x0) || 2137 (registers[2] != 0x0) || 2138 (registers[3] != 0x0)) { 2139 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2140 } 2141 #endif 2142 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2143 Abstract_VM_Version::_detected_virtualization = KVM; 2144 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2145 Abstract_VM_Version::_detected_virtualization = XenHVM; 2146 } 2147 } 2148 } 2149 2150 #ifdef COMPILER2 2151 // Determine if it's running on Cascade Lake using default options. 2152 bool VM_Version::is_default_intel_cascade_lake() { 2153 return FLAG_IS_DEFAULT(UseAVX) && 2154 FLAG_IS_DEFAULT(MaxVectorSize) && 2155 UseAVX > 2 && 2156 is_intel_cascade_lake(); 2157 } 2158 #endif 2159 2160 bool VM_Version::is_intel_cascade_lake() { 2161 return is_intel_skylake() && _stepping >= 5; 2162 } 2163 2164 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2165 // for implementing the array copy and clear operations. 2166 // The Intel platforms that supports the serialize instruction 2167 // has improved implementation of 64-byte load/stores and so the default 2168 // threshold is set to 0 for these platforms. 2169 int VM_Version::avx3_threshold() { 2170 return (is_intel_family_core() && 2171 supports_serialize() && 2172 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2173 } 2174 2175 #if defined(_LP64) 2176 void VM_Version::clear_apx_test_state() { 2177 clear_apx_test_state_stub(); 2178 } 2179 #endif 2180 2181 static bool _vm_version_initialized = false; 2182 2183 void VM_Version::initialize() { 2184 ResourceMark rm; 2185 // Making this stub must be FIRST use of assembler 2186 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2187 if (stub_blob == nullptr) { 2188 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2189 } 2190 CodeBuffer c(stub_blob); 2191 VM_Version_StubGenerator g(&c); 2192 2193 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2194 g.generate_get_cpu_info()); 2195 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2196 g.generate_detect_virt()); 2197 2198 #if defined(_LP64) 2199 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2200 g.clear_apx_test_state()); 2201 #endif 2202 get_processor_features(); 2203 2204 LP64_ONLY(Assembler::precompute_instructions();) 2205 2206 if (VM_Version::supports_hv()) { // Supports hypervisor 2207 check_virtualizations(); 2208 } 2209 _vm_version_initialized = true; 2210 } 2211 2212 typedef enum { 2213 CPU_FAMILY_8086_8088 = 0, 2214 CPU_FAMILY_INTEL_286 = 2, 2215 CPU_FAMILY_INTEL_386 = 3, 2216 CPU_FAMILY_INTEL_486 = 4, 2217 CPU_FAMILY_PENTIUM = 5, 2218 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2219 CPU_FAMILY_PENTIUM_4 = 0xF 2220 } FamilyFlag; 2221 2222 typedef enum { 2223 RDTSCP_FLAG = 0x08000000, // bit 27 2224 INTEL64_FLAG = 0x20000000 // bit 29 2225 } _featureExtendedEdxFlag; 2226 2227 typedef enum { 2228 FPU_FLAG = 0x00000001, 2229 VME_FLAG = 0x00000002, 2230 DE_FLAG = 0x00000004, 2231 PSE_FLAG = 0x00000008, 2232 TSC_FLAG = 0x00000010, 2233 MSR_FLAG = 0x00000020, 2234 PAE_FLAG = 0x00000040, 2235 MCE_FLAG = 0x00000080, 2236 CX8_FLAG = 0x00000100, 2237 APIC_FLAG = 0x00000200, 2238 SEP_FLAG = 0x00000800, 2239 MTRR_FLAG = 0x00001000, 2240 PGE_FLAG = 0x00002000, 2241 MCA_FLAG = 0x00004000, 2242 CMOV_FLAG = 0x00008000, 2243 PAT_FLAG = 0x00010000, 2244 PSE36_FLAG = 0x00020000, 2245 PSNUM_FLAG = 0x00040000, 2246 CLFLUSH_FLAG = 0x00080000, 2247 DTS_FLAG = 0x00200000, 2248 ACPI_FLAG = 0x00400000, 2249 MMX_FLAG = 0x00800000, 2250 FXSR_FLAG = 0x01000000, 2251 SSE_FLAG = 0x02000000, 2252 SSE2_FLAG = 0x04000000, 2253 SS_FLAG = 0x08000000, 2254 HTT_FLAG = 0x10000000, 2255 TM_FLAG = 0x20000000 2256 } FeatureEdxFlag; 2257 2258 static BufferBlob* cpuid_brand_string_stub_blob; 2259 static const int cpuid_brand_string_stub_size = 550; 2260 2261 extern "C" { 2262 typedef void (*getCPUIDBrandString_stub_t)(void*); 2263 } 2264 2265 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2266 2267 // VM_Version statics 2268 enum { 2269 ExtendedFamilyIdLength_INTEL = 16, 2270 ExtendedFamilyIdLength_AMD = 24 2271 }; 2272 2273 const size_t VENDOR_LENGTH = 13; 2274 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2275 static char* _cpu_brand_string = nullptr; 2276 static int64_t _max_qualified_cpu_frequency = 0; 2277 2278 static int _no_of_threads = 0; 2279 static int _no_of_cores = 0; 2280 2281 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2282 "8086/8088", 2283 "", 2284 "286", 2285 "386", 2286 "486", 2287 "Pentium", 2288 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2289 "", 2290 "", 2291 "", 2292 "", 2293 "", 2294 "", 2295 "", 2296 "", 2297 "Pentium 4" 2298 }; 2299 2300 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2301 "", 2302 "", 2303 "", 2304 "", 2305 "5x86", 2306 "K5/K6", 2307 "Athlon/AthlonXP", 2308 "", 2309 "", 2310 "", 2311 "", 2312 "", 2313 "", 2314 "", 2315 "", 2316 "Opteron/Athlon64", 2317 "Opteron QC/Phenom", // Barcelona et.al. 2318 "", 2319 "", 2320 "", 2321 "", 2322 "", 2323 "", 2324 "Zen" 2325 }; 2326 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2327 // September 2013, Vol 3C Table 35-1 2328 const char* const _model_id_pentium_pro[] = { 2329 "", 2330 "Pentium Pro", 2331 "", 2332 "Pentium II model 3", 2333 "", 2334 "Pentium II model 5/Xeon/Celeron", 2335 "Celeron", 2336 "Pentium III/Pentium III Xeon", 2337 "Pentium III/Pentium III Xeon", 2338 "Pentium M model 9", // Yonah 2339 "Pentium III, model A", 2340 "Pentium III, model B", 2341 "", 2342 "Pentium M model D", // Dothan 2343 "", 2344 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2345 "", 2346 "", 2347 "", 2348 "", 2349 "", 2350 "", 2351 "Celeron", // 0x16 Celeron 65nm 2352 "Core 2", // 0x17 Penryn / Harpertown 2353 "", 2354 "", 2355 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2356 "Atom", // 0x1B Z5xx series Silverthorn 2357 "", 2358 "Core 2", // 0x1D Dunnington (6-core) 2359 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2360 "", 2361 "", 2362 "", 2363 "", 2364 "", 2365 "", 2366 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2367 "", 2368 "", 2369 "", // 0x28 2370 "", 2371 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2372 "", 2373 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2374 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2375 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2376 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2377 "", 2378 "", 2379 "", 2380 "", 2381 "", 2382 "", 2383 "", 2384 "", 2385 "", 2386 "", 2387 "Ivy Bridge", // 0x3a 2388 "", 2389 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2390 "", // 0x3d "Next Generation Intel Core Processor" 2391 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2392 "", // 0x3f "Future Generation Intel Xeon Processor" 2393 "", 2394 "", 2395 "", 2396 "", 2397 "", 2398 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2399 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2400 nullptr 2401 }; 2402 2403 /* Brand ID is for back compatibility 2404 * Newer CPUs uses the extended brand string */ 2405 const char* const _brand_id[] = { 2406 "", 2407 "Celeron processor", 2408 "Pentium III processor", 2409 "Intel Pentium III Xeon processor", 2410 "", 2411 "", 2412 "", 2413 "", 2414 "Intel Pentium 4 processor", 2415 nullptr 2416 }; 2417 2418 2419 const char* const _feature_edx_id[] = { 2420 "On-Chip FPU", 2421 "Virtual Mode Extensions", 2422 "Debugging Extensions", 2423 "Page Size Extensions", 2424 "Time Stamp Counter", 2425 "Model Specific Registers", 2426 "Physical Address Extension", 2427 "Machine Check Exceptions", 2428 "CMPXCHG8B Instruction", 2429 "On-Chip APIC", 2430 "", 2431 "Fast System Call", 2432 "Memory Type Range Registers", 2433 "Page Global Enable", 2434 "Machine Check Architecture", 2435 "Conditional Mov Instruction", 2436 "Page Attribute Table", 2437 "36-bit Page Size Extension", 2438 "Processor Serial Number", 2439 "CLFLUSH Instruction", 2440 "", 2441 "Debug Trace Store feature", 2442 "ACPI registers in MSR space", 2443 "Intel Architecture MMX Technology", 2444 "Fast Float Point Save and Restore", 2445 "Streaming SIMD extensions", 2446 "Streaming SIMD extensions 2", 2447 "Self-Snoop", 2448 "Hyper Threading", 2449 "Thermal Monitor", 2450 "", 2451 "Pending Break Enable" 2452 }; 2453 2454 const char* const _feature_extended_edx_id[] = { 2455 "", 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "SYSCALL/SYSRET", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "", 2475 "Execute Disable Bit", 2476 "", 2477 "", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "RDTSCP", 2483 "", 2484 "Intel 64 Architecture", 2485 "", 2486 "" 2487 }; 2488 2489 const char* const _feature_ecx_id[] = { 2490 "Streaming SIMD Extensions 3", 2491 "PCLMULQDQ", 2492 "64-bit DS Area", 2493 "MONITOR/MWAIT instructions", 2494 "CPL Qualified Debug Store", 2495 "Virtual Machine Extensions", 2496 "Safer Mode Extensions", 2497 "Enhanced Intel SpeedStep technology", 2498 "Thermal Monitor 2", 2499 "Supplemental Streaming SIMD Extensions 3", 2500 "L1 Context ID", 2501 "", 2502 "Fused Multiply-Add", 2503 "CMPXCHG16B", 2504 "xTPR Update Control", 2505 "Perfmon and Debug Capability", 2506 "", 2507 "Process-context identifiers", 2508 "Direct Cache Access", 2509 "Streaming SIMD extensions 4.1", 2510 "Streaming SIMD extensions 4.2", 2511 "x2APIC", 2512 "MOVBE", 2513 "Popcount instruction", 2514 "TSC-Deadline", 2515 "AESNI", 2516 "XSAVE", 2517 "OSXSAVE", 2518 "AVX", 2519 "F16C", 2520 "RDRAND", 2521 "" 2522 }; 2523 2524 const char* const _feature_extended_ecx_id[] = { 2525 "LAHF/SAHF instruction support", 2526 "Core multi-processor legacy mode", 2527 "", 2528 "", 2529 "", 2530 "Advanced Bit Manipulations: LZCNT", 2531 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2532 "Misaligned SSE mode", 2533 "", 2534 "", 2535 "", 2536 "", 2537 "", 2538 "", 2539 "", 2540 "", 2541 "", 2542 "", 2543 "", 2544 "", 2545 "", 2546 "", 2547 "", 2548 "", 2549 "", 2550 "", 2551 "", 2552 "", 2553 "", 2554 "", 2555 "", 2556 "" 2557 }; 2558 2559 void VM_Version::initialize_tsc(void) { 2560 ResourceMark rm; 2561 2562 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2563 if (cpuid_brand_string_stub_blob == nullptr) { 2564 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2565 } 2566 CodeBuffer c(cpuid_brand_string_stub_blob); 2567 VM_Version_StubGenerator g(&c); 2568 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2569 g.generate_getCPUIDBrandString()); 2570 } 2571 2572 const char* VM_Version::cpu_model_description(void) { 2573 uint32_t cpu_family = extended_cpu_family(); 2574 uint32_t cpu_model = extended_cpu_model(); 2575 const char* model = nullptr; 2576 2577 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2578 for (uint32_t i = 0; i <= cpu_model; i++) { 2579 model = _model_id_pentium_pro[i]; 2580 if (model == nullptr) { 2581 break; 2582 } 2583 } 2584 } 2585 return model; 2586 } 2587 2588 const char* VM_Version::cpu_brand_string(void) { 2589 if (_cpu_brand_string == nullptr) { 2590 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2591 if (nullptr == _cpu_brand_string) { 2592 return nullptr; 2593 } 2594 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2595 if (ret_val != OS_OK) { 2596 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2597 _cpu_brand_string = nullptr; 2598 } 2599 } 2600 return _cpu_brand_string; 2601 } 2602 2603 const char* VM_Version::cpu_brand(void) { 2604 const char* brand = nullptr; 2605 2606 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2607 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2608 brand = _brand_id[0]; 2609 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2610 brand = _brand_id[i]; 2611 } 2612 } 2613 return brand; 2614 } 2615 2616 bool VM_Version::cpu_is_em64t(void) { 2617 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2618 } 2619 2620 bool VM_Version::is_netburst(void) { 2621 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2622 } 2623 2624 bool VM_Version::supports_tscinv_ext(void) { 2625 if (!supports_tscinv_bit()) { 2626 return false; 2627 } 2628 2629 if (is_intel()) { 2630 return true; 2631 } 2632 2633 if (is_amd()) { 2634 return !is_amd_Barcelona(); 2635 } 2636 2637 if (is_hygon()) { 2638 return true; 2639 } 2640 2641 return false; 2642 } 2643 2644 void VM_Version::resolve_cpu_information_details(void) { 2645 2646 // in future we want to base this information on proper cpu 2647 // and cache topology enumeration such as: 2648 // Intel 64 Architecture Processor Topology Enumeration 2649 // which supports system cpu and cache topology enumeration 2650 // either using 2xAPICIDs or initial APICIDs 2651 2652 // currently only rough cpu information estimates 2653 // which will not necessarily reflect the exact configuration of the system 2654 2655 // this is the number of logical hardware threads 2656 // visible to the operating system 2657 _no_of_threads = os::processor_count(); 2658 2659 // find out number of threads per cpu package 2660 int threads_per_package = threads_per_core() * cores_per_cpu(); 2661 2662 // use amount of threads visible to the process in order to guess number of sockets 2663 _no_of_sockets = _no_of_threads / threads_per_package; 2664 2665 // process might only see a subset of the total number of threads 2666 // from a single processor package. Virtualization/resource management for example. 2667 // If so then just write a hard 1 as num of pkgs. 2668 if (0 == _no_of_sockets) { 2669 _no_of_sockets = 1; 2670 } 2671 2672 // estimate the number of cores 2673 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2674 } 2675 2676 2677 const char* VM_Version::cpu_family_description(void) { 2678 int cpu_family_id = extended_cpu_family(); 2679 if (is_amd()) { 2680 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2681 return _family_id_amd[cpu_family_id]; 2682 } 2683 } 2684 if (is_intel()) { 2685 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2686 return cpu_model_description(); 2687 } 2688 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2689 return _family_id_intel[cpu_family_id]; 2690 } 2691 } 2692 if (is_hygon()) { 2693 return "Dhyana"; 2694 } 2695 return "Unknown x86"; 2696 } 2697 2698 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2699 assert(buf != nullptr, "buffer is null!"); 2700 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2701 2702 const char* cpu_type = nullptr; 2703 const char* x64 = nullptr; 2704 2705 if (is_intel()) { 2706 cpu_type = "Intel"; 2707 x64 = cpu_is_em64t() ? " Intel64" : ""; 2708 } else if (is_amd()) { 2709 cpu_type = "AMD"; 2710 x64 = cpu_is_em64t() ? " AMD64" : ""; 2711 } else if (is_hygon()) { 2712 cpu_type = "Hygon"; 2713 x64 = cpu_is_em64t() ? " AMD64" : ""; 2714 } else { 2715 cpu_type = "Unknown x86"; 2716 x64 = cpu_is_em64t() ? " x86_64" : ""; 2717 } 2718 2719 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2720 cpu_type, 2721 cpu_family_description(), 2722 supports_ht() ? " (HT)" : "", 2723 supports_sse3() ? " SSE3" : "", 2724 supports_ssse3() ? " SSSE3" : "", 2725 supports_sse4_1() ? " SSE4.1" : "", 2726 supports_sse4_2() ? " SSE4.2" : "", 2727 supports_sse4a() ? " SSE4A" : "", 2728 is_netburst() ? " Netburst" : "", 2729 is_intel_family_core() ? " Core" : "", 2730 x64); 2731 2732 return OS_OK; 2733 } 2734 2735 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2736 assert(buf != nullptr, "buffer is null!"); 2737 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2738 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2739 2740 // invoke newly generated asm code to fetch CPU Brand String 2741 getCPUIDBrandString_stub(&_cpuid_info); 2742 2743 // fetch results into buffer 2744 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2745 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2746 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2747 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2748 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2749 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2750 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2751 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2752 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2753 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2754 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2755 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2756 2757 return OS_OK; 2758 } 2759 2760 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2761 guarantee(buf != nullptr, "buffer is null!"); 2762 guarantee(buf_len > 0, "buffer len not enough!"); 2763 2764 unsigned int flag = 0; 2765 unsigned int fi = 0; 2766 size_t written = 0; 2767 const char* prefix = ""; 2768 2769 #define WRITE_TO_BUF(string) \ 2770 { \ 2771 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2772 if (res < 0) { \ 2773 return buf_len - 1; \ 2774 } \ 2775 written += res; \ 2776 if (prefix[0] == '\0') { \ 2777 prefix = ", "; \ 2778 } \ 2779 } 2780 2781 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2782 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2783 continue; /* no hyperthreading */ 2784 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2785 continue; /* no fast system call */ 2786 } 2787 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2788 WRITE_TO_BUF(_feature_edx_id[fi]); 2789 } 2790 } 2791 2792 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2793 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2794 WRITE_TO_BUF(_feature_ecx_id[fi]); 2795 } 2796 } 2797 2798 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2799 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2800 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2801 } 2802 } 2803 2804 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2805 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2806 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2807 } 2808 } 2809 2810 if (supports_tscinv_bit()) { 2811 WRITE_TO_BUF("Invariant TSC"); 2812 } 2813 2814 return written; 2815 } 2816 2817 /** 2818 * Write a detailed description of the cpu to a given buffer, including 2819 * feature set. 2820 */ 2821 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2822 assert(buf != nullptr, "buffer is null!"); 2823 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2824 2825 static const char* unknown = "<unknown>"; 2826 char vendor_id[VENDOR_LENGTH]; 2827 const char* family = nullptr; 2828 const char* model = nullptr; 2829 const char* brand = nullptr; 2830 int outputLen = 0; 2831 2832 family = cpu_family_description(); 2833 if (family == nullptr) { 2834 family = unknown; 2835 } 2836 2837 model = cpu_model_description(); 2838 if (model == nullptr) { 2839 model = unknown; 2840 } 2841 2842 brand = cpu_brand_string(); 2843 2844 if (brand == nullptr) { 2845 brand = cpu_brand(); 2846 if (brand == nullptr) { 2847 brand = unknown; 2848 } 2849 } 2850 2851 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2852 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2853 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2854 vendor_id[VENDOR_LENGTH-1] = '\0'; 2855 2856 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2857 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2858 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2859 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2860 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2861 "Supports: ", 2862 brand, 2863 vendor_id, 2864 family, 2865 extended_cpu_family(), 2866 model, 2867 extended_cpu_model(), 2868 cpu_stepping(), 2869 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2870 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2871 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2872 _cpuid_info.std_cpuid1_eax.value, 2873 _cpuid_info.std_cpuid1_ebx.value, 2874 _cpuid_info.std_cpuid1_ecx.value, 2875 _cpuid_info.std_cpuid1_edx.value, 2876 _cpuid_info.ext_cpuid1_eax, 2877 _cpuid_info.ext_cpuid1_ebx, 2878 _cpuid_info.ext_cpuid1_ecx, 2879 _cpuid_info.ext_cpuid1_edx); 2880 2881 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2882 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2883 return OS_ERR; 2884 } 2885 2886 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2887 2888 return OS_OK; 2889 } 2890 2891 2892 // Fill in Abstract_VM_Version statics 2893 void VM_Version::initialize_cpu_information() { 2894 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2895 assert(!_initialized, "shouldn't be initialized yet"); 2896 resolve_cpu_information_details(); 2897 2898 // initialize cpu_name and cpu_desc 2899 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2900 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2901 _initialized = true; 2902 } 2903 2904 /** 2905 * For information about extracting the frequency from the cpu brand string, please see: 2906 * 2907 * Intel Processor Identification and the CPUID Instruction 2908 * Application Note 485 2909 * May 2012 2910 * 2911 * The return value is the frequency in Hz. 2912 */ 2913 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2914 const char* const brand_string = cpu_brand_string(); 2915 if (brand_string == nullptr) { 2916 return 0; 2917 } 2918 const int64_t MEGA = 1000000; 2919 int64_t multiplier = 0; 2920 int64_t frequency = 0; 2921 uint8_t idx = 0; 2922 // The brand string buffer is at most 48 bytes. 2923 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2924 for (; idx < 48-2; ++idx) { 2925 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2926 // Search brand string for "yHz" where y is M, G, or T. 2927 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2928 if (brand_string[idx] == 'M') { 2929 multiplier = MEGA; 2930 } else if (brand_string[idx] == 'G') { 2931 multiplier = MEGA * 1000; 2932 } else if (brand_string[idx] == 'T') { 2933 multiplier = MEGA * MEGA; 2934 } 2935 break; 2936 } 2937 } 2938 if (multiplier > 0) { 2939 // Compute frequency (in Hz) from brand string. 2940 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2941 frequency = (brand_string[idx-4] - '0') * multiplier; 2942 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2943 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2944 } else { // format is "xxxx" 2945 frequency = (brand_string[idx-4] - '0') * 1000; 2946 frequency += (brand_string[idx-3] - '0') * 100; 2947 frequency += (brand_string[idx-2] - '0') * 10; 2948 frequency += (brand_string[idx-1] - '0'); 2949 frequency *= multiplier; 2950 } 2951 } 2952 return frequency; 2953 } 2954 2955 2956 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2957 if (_max_qualified_cpu_frequency == 0) { 2958 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2959 } 2960 return _max_qualified_cpu_frequency; 2961 } 2962 2963 uint64_t VM_Version::CpuidInfo::feature_flags() const { 2964 uint64_t result = 0; 2965 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2966 result |= CPU_CX8; 2967 if (std_cpuid1_edx.bits.cmov != 0) 2968 result |= CPU_CMOV; 2969 if (std_cpuid1_edx.bits.clflush != 0) 2970 result |= CPU_FLUSH; 2971 #ifdef _LP64 2972 // clflush should always be available on x86_64 2973 // if not we are in real trouble because we rely on it 2974 // to flush the code cache. 2975 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2976 #endif 2977 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2978 ext_cpuid1_edx.bits.fxsr != 0)) 2979 result |= CPU_FXSR; 2980 // HT flag is set for multi-core processors also. 2981 if (threads_per_core() > 1) 2982 result |= CPU_HT; 2983 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2984 ext_cpuid1_edx.bits.mmx != 0)) 2985 result |= CPU_MMX; 2986 if (std_cpuid1_edx.bits.sse != 0) 2987 result |= CPU_SSE; 2988 if (std_cpuid1_edx.bits.sse2 != 0) 2989 result |= CPU_SSE2; 2990 if (std_cpuid1_ecx.bits.sse3 != 0) 2991 result |= CPU_SSE3; 2992 if (std_cpuid1_ecx.bits.ssse3 != 0) 2993 result |= CPU_SSSE3; 2994 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2995 result |= CPU_SSE4_1; 2996 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2997 result |= CPU_SSE4_2; 2998 if (std_cpuid1_ecx.bits.popcnt != 0) 2999 result |= CPU_POPCNT; 3000 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 3001 xem_xcr0_eax.bits.apx_f != 0) { 3002 result |= CPU_APX_F; 3003 } 3004 if (std_cpuid1_ecx.bits.avx != 0 && 3005 std_cpuid1_ecx.bits.osxsave != 0 && 3006 xem_xcr0_eax.bits.sse != 0 && 3007 xem_xcr0_eax.bits.ymm != 0) { 3008 result |= CPU_AVX; 3009 result |= CPU_VZEROUPPER; 3010 if (std_cpuid1_ecx.bits.f16c != 0) 3011 result |= CPU_F16C; 3012 if (sef_cpuid7_ebx.bits.avx2 != 0) { 3013 result |= CPU_AVX2; 3014 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 3015 result |= CPU_AVX_IFMA; 3016 } 3017 if (sef_cpuid7_ecx.bits.gfni != 0) 3018 result |= CPU_GFNI; 3019 if (sef_cpuid7_ebx.bits.avx512f != 0 && 3020 xem_xcr0_eax.bits.opmask != 0 && 3021 xem_xcr0_eax.bits.zmm512 != 0 && 3022 xem_xcr0_eax.bits.zmm32 != 0) { 3023 result |= CPU_AVX512F; 3024 if (sef_cpuid7_ebx.bits.avx512cd != 0) 3025 result |= CPU_AVX512CD; 3026 if (sef_cpuid7_ebx.bits.avx512dq != 0) 3027 result |= CPU_AVX512DQ; 3028 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 3029 result |= CPU_AVX512_IFMA; 3030 if (sef_cpuid7_ebx.bits.avx512pf != 0) 3031 result |= CPU_AVX512PF; 3032 if (sef_cpuid7_ebx.bits.avx512er != 0) 3033 result |= CPU_AVX512ER; 3034 if (sef_cpuid7_ebx.bits.avx512bw != 0) 3035 result |= CPU_AVX512BW; 3036 if (sef_cpuid7_ebx.bits.avx512vl != 0) 3037 result |= CPU_AVX512VL; 3038 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 3039 result |= CPU_AVX512_VPOPCNTDQ; 3040 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 3041 result |= CPU_AVX512_VPCLMULQDQ; 3042 if (sef_cpuid7_ecx.bits.vaes != 0) 3043 result |= CPU_AVX512_VAES; 3044 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 3045 result |= CPU_AVX512_VNNI; 3046 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 3047 result |= CPU_AVX512_BITALG; 3048 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 3049 result |= CPU_AVX512_VBMI; 3050 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 3051 result |= CPU_AVX512_VBMI2; 3052 } 3053 } 3054 if (std_cpuid1_ecx.bits.hv != 0) 3055 result |= CPU_HV; 3056 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3057 result |= CPU_BMI1; 3058 if (std_cpuid1_edx.bits.tsc != 0) 3059 result |= CPU_TSC; 3060 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3061 result |= CPU_TSCINV_BIT; 3062 if (std_cpuid1_ecx.bits.aes != 0) 3063 result |= CPU_AES; 3064 if (sef_cpuid7_ebx.bits.erms != 0) 3065 result |= CPU_ERMS; 3066 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3067 result |= CPU_FSRM; 3068 if (std_cpuid1_ecx.bits.clmul != 0) 3069 result |= CPU_CLMUL; 3070 if (sef_cpuid7_ebx.bits.rtm != 0) 3071 result |= CPU_RTM; 3072 if (sef_cpuid7_ebx.bits.adx != 0) 3073 result |= CPU_ADX; 3074 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3075 result |= CPU_BMI2; 3076 if (sef_cpuid7_ebx.bits.sha != 0) 3077 result |= CPU_SHA; 3078 if (std_cpuid1_ecx.bits.fma != 0) 3079 result |= CPU_FMA; 3080 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3081 result |= CPU_FLUSHOPT; 3082 if (ext_cpuid1_edx.bits.rdtscp != 0) 3083 result |= CPU_RDTSCP; 3084 if (sef_cpuid7_ecx.bits.rdpid != 0) 3085 result |= CPU_RDPID; 3086 3087 // AMD|Hygon features. 3088 if (is_amd_family()) { 3089 if ((ext_cpuid1_edx.bits.tdnow != 0) || 3090 (ext_cpuid1_ecx.bits.prefetchw != 0)) 3091 result |= CPU_3DNOW_PREFETCH; 3092 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3093 result |= CPU_LZCNT; 3094 if (ext_cpuid1_ecx.bits.sse4a != 0) 3095 result |= CPU_SSE4A; 3096 } 3097 3098 // Intel features. 3099 if (is_intel()) { 3100 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3101 result |= CPU_LZCNT; 3102 } 3103 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3104 result |= CPU_3DNOW_PREFETCH; 3105 } 3106 if (sef_cpuid7_ebx.bits.clwb != 0) { 3107 result |= CPU_CLWB; 3108 } 3109 if (sef_cpuid7_edx.bits.serialize != 0) 3110 result |= CPU_SERIALIZE; 3111 } 3112 3113 // ZX features. 3114 if (is_zx()) { 3115 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3116 result |= CPU_LZCNT; 3117 } 3118 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3119 result |= CPU_3DNOW_PREFETCH; 3120 } 3121 } 3122 3123 // Protection key features. 3124 if (sef_cpuid7_ecx.bits.pku != 0) { 3125 result |= CPU_PKU; 3126 } 3127 if (sef_cpuid7_ecx.bits.ospke != 0) { 3128 result |= CPU_OSPKE; 3129 } 3130 3131 // Control flow enforcement (CET) features. 3132 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3133 result |= CPU_CET_SS; 3134 } 3135 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3136 result |= CPU_CET_IBT; 3137 } 3138 3139 // Composite features. 3140 if (supports_tscinv_bit() && 3141 ((is_amd_family() && !is_amd_Barcelona()) || 3142 is_intel_tsc_synched_at_init())) { 3143 result |= CPU_TSCINV; 3144 } 3145 3146 return result; 3147 } 3148 3149 bool VM_Version::os_supports_avx_vectors() { 3150 bool retVal = false; 3151 int nreg = 2 LP64_ONLY(+2); 3152 if (supports_evex()) { 3153 // Verify that OS save/restore all bits of EVEX registers 3154 // during signal processing. 3155 retVal = true; 3156 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3157 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3158 retVal = false; 3159 break; 3160 } 3161 } 3162 } else if (supports_avx()) { 3163 // Verify that OS save/restore all bits of AVX registers 3164 // during signal processing. 3165 retVal = true; 3166 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3167 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3168 retVal = false; 3169 break; 3170 } 3171 } 3172 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3173 if (retVal == false) { 3174 // Verify that OS save/restore all bits of EVEX registers 3175 // during signal processing. 3176 retVal = true; 3177 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3178 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3179 retVal = false; 3180 break; 3181 } 3182 } 3183 } 3184 } 3185 return retVal; 3186 } 3187 3188 bool VM_Version::os_supports_apx_egprs() { 3189 if (!supports_apx_f()) { 3190 return false; 3191 } 3192 // Enable APX support for product builds after 3193 // completion of planned features listed in JDK-8329030. 3194 #if !defined(PRODUCT) 3195 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3196 _cpuid_info.apx_save[1] != egpr_test_value()) { 3197 return false; 3198 } 3199 return true; 3200 #else 3201 return false; 3202 #endif 3203 } 3204 3205 uint VM_Version::cores_per_cpu() { 3206 uint result = 1; 3207 if (is_intel()) { 3208 bool supports_topology = supports_processor_topology(); 3209 if (supports_topology) { 3210 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3211 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3212 } 3213 if (!supports_topology || result == 0) { 3214 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3215 } 3216 } else if (is_amd_family()) { 3217 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3218 } else if (is_zx()) { 3219 bool supports_topology = supports_processor_topology(); 3220 if (supports_topology) { 3221 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3222 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3223 } 3224 if (!supports_topology || result == 0) { 3225 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3226 } 3227 } 3228 return result; 3229 } 3230 3231 uint VM_Version::threads_per_core() { 3232 uint result = 1; 3233 if (is_intel() && supports_processor_topology()) { 3234 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3235 } else if (is_zx() && supports_processor_topology()) { 3236 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3237 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3238 if (cpu_family() >= 0x17) { 3239 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3240 } else { 3241 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3242 cores_per_cpu(); 3243 } 3244 } 3245 return (result == 0 ? 1 : result); 3246 } 3247 3248 uint VM_Version::L1_line_size() { 3249 uint result = 0; 3250 if (is_intel()) { 3251 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3252 } else if (is_amd_family()) { 3253 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3254 } else if (is_zx()) { 3255 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3256 } 3257 if (result < 32) // not defined ? 3258 result = 32; // 32 bytes by default on x86 and other x64 3259 return result; 3260 } 3261 3262 bool VM_Version::is_intel_tsc_synched_at_init() { 3263 if (is_intel_family_core()) { 3264 uint32_t ext_model = extended_cpu_model(); 3265 if (ext_model == CPU_MODEL_NEHALEM_EP || 3266 ext_model == CPU_MODEL_WESTMERE_EP || 3267 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3268 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3269 // <= 2-socket invariant tsc support. EX versions are usually used 3270 // in > 2-socket systems and likely don't synchronize tscs at 3271 // initialization. 3272 // Code that uses tsc values must be prepared for them to arbitrarily 3273 // jump forward or backward. 3274 return true; 3275 } 3276 } 3277 return false; 3278 } 3279 3280 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3281 // Hardware prefetching (distance/size in bytes): 3282 // Pentium 3 - 64 / 32 3283 // Pentium 4 - 256 / 128 3284 // Athlon - 64 / 32 ???? 3285 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3286 // Core - 128 / 64 3287 // 3288 // Software prefetching (distance in bytes / instruction with best score): 3289 // Pentium 3 - 128 / prefetchnta 3290 // Pentium 4 - 512 / prefetchnta 3291 // Athlon - 128 / prefetchnta 3292 // Opteron - 256 / prefetchnta 3293 // Core - 256 / prefetchnta 3294 // It will be used only when AllocatePrefetchStyle > 0 3295 3296 if (is_amd_family()) { // AMD | Hygon 3297 if (supports_sse2()) { 3298 return 256; // Opteron 3299 } else { 3300 return 128; // Athlon 3301 } 3302 } else { // Intel 3303 if (supports_sse3() && cpu_family() == 6) { 3304 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3305 return 192; 3306 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3307 #ifdef _LP64 3308 return 384; 3309 #else 3310 return 320; 3311 #endif 3312 } 3313 } 3314 if (supports_sse2()) { 3315 if (cpu_family() == 6) { 3316 return 256; // Pentium M, Core, Core2 3317 } else { 3318 return 512; // Pentium 4 3319 } 3320 } else { 3321 return 128; // Pentium 3 (and all other old CPUs) 3322 } 3323 } 3324 } 3325 3326 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3327 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3328 switch (id) { 3329 case vmIntrinsics::_floatToFloat16: 3330 case vmIntrinsics::_float16ToFloat: 3331 if (!supports_float16()) { 3332 return false; 3333 } 3334 break; 3335 default: 3336 break; 3337 } 3338 return true; 3339 }