1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/checkedCast.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = nullptr; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = nullptr; 59 // Address of instruction which causes APX specific SEGV 60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 61 // Address of instruction after the one which causes APX specific SEGV 62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 63 64 static BufferBlob* stub_blob; 65 static const int stub_size = 2000; 66 67 extern "C" { 68 typedef void (*get_cpu_info_stub_t)(void*); 69 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 70 typedef void (*clear_apx_test_state_t)(void); 71 } 72 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 73 static detect_virt_stub_t detect_virt_stub = nullptr; 74 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 75 76 #ifdef _LP64 77 78 bool VM_Version::supports_clflush() { 79 // clflush should always be available on x86_64 80 // if not we are in real trouble because we rely on it 81 // to flush the code cache. 82 // Unfortunately, Assembler::clflush is currently called as part 83 // of generation of the code cache flush routine. This happens 84 // under Universe::init before the processor features are set 85 // up. Assembler::flush calls this routine to check that clflush 86 // is allowed. So, we give the caller a free pass if Universe init 87 // is still in progress. 88 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 89 return true; 90 } 91 #endif 92 93 #define CPUID_STANDARD_FN 0x0 94 #define CPUID_STANDARD_FN_1 0x1 95 #define CPUID_STANDARD_FN_4 0x4 96 #define CPUID_STANDARD_FN_B 0xb 97 98 #define CPUID_EXTENDED_FN 0x80000000 99 #define CPUID_EXTENDED_FN_1 0x80000001 100 #define CPUID_EXTENDED_FN_2 0x80000002 101 #define CPUID_EXTENDED_FN_3 0x80000003 102 #define CPUID_EXTENDED_FN_4 0x80000004 103 #define CPUID_EXTENDED_FN_7 0x80000007 104 #define CPUID_EXTENDED_FN_8 0x80000008 105 106 class VM_Version_StubGenerator: public StubCodeGenerator { 107 public: 108 109 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 110 111 #if defined(_LP64) 112 address clear_apx_test_state() { 113 # define __ _masm-> 114 address start = __ pc(); 115 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 116 // handling guarantees that preserved register values post signal handling were 117 // re-instantiated by operating system and not because they were not modified externally. 118 119 bool save_apx = UseAPX; 120 VM_Version::set_apx_cpuFeatures(); 121 UseAPX = true; 122 // EGPR state save/restoration. 123 __ mov64(r16, 0L); 124 __ mov64(r31, 0L); 125 UseAPX = save_apx; 126 VM_Version::clean_cpuFeatures(); 127 __ ret(0); 128 return start; 129 } 130 #endif 131 132 address generate_get_cpu_info() { 133 // Flags to test CPU type. 134 const uint32_t HS_EFL_AC = 0x40000; 135 const uint32_t HS_EFL_ID = 0x200000; 136 // Values for when we don't have a CPUID instruction. 137 const int CPU_FAMILY_SHIFT = 8; 138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 141 142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 146 147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 148 # define __ _masm-> 149 150 address start = __ pc(); 151 152 // 153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 154 // 155 // LP64: rcx and rdx are first and second argument registers on windows 156 157 __ push(rbp); 158 #ifdef _LP64 159 __ mov(rbp, c_rarg0); // cpuid_info address 160 #else 161 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 162 #endif 163 __ push(rbx); 164 __ push(rsi); 165 __ pushf(); // preserve rbx, and flags 166 __ pop(rax); 167 __ push(rax); 168 __ mov(rcx, rax); 169 // 170 // if we are unable to change the AC flag, we have a 386 171 // 172 __ xorl(rax, HS_EFL_AC); 173 __ push(rax); 174 __ popf(); 175 __ pushf(); 176 __ pop(rax); 177 __ cmpptr(rax, rcx); 178 __ jccb(Assembler::notEqual, detect_486); 179 180 __ movl(rax, CPU_FAMILY_386); 181 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 182 __ jmp(done); 183 184 // 185 // If we are unable to change the ID flag, we have a 486 which does 186 // not support the "cpuid" instruction. 187 // 188 __ bind(detect_486); 189 __ mov(rax, rcx); 190 __ xorl(rax, HS_EFL_ID); 191 __ push(rax); 192 __ popf(); 193 __ pushf(); 194 __ pop(rax); 195 __ cmpptr(rcx, rax); 196 __ jccb(Assembler::notEqual, detect_586); 197 198 __ bind(cpu486); 199 __ movl(rax, CPU_FAMILY_486); 200 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 201 __ jmp(done); 202 203 // 204 // At this point, we have a chip which supports the "cpuid" instruction 205 // 206 __ bind(detect_586); 207 __ xorl(rax, rax); 208 __ cpuid(); 209 __ orl(rax, rax); 210 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 211 // value of at least 1, we give up and 212 // assume a 486 213 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 214 __ movl(Address(rsi, 0), rax); 215 __ movl(Address(rsi, 4), rbx); 216 __ movl(Address(rsi, 8), rcx); 217 __ movl(Address(rsi,12), rdx); 218 219 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 220 __ jccb(Assembler::belowEqual, std_cpuid4); 221 222 // 223 // cpuid(0xB) Processor Topology 224 // 225 __ movl(rax, 0xb); 226 __ xorl(rcx, rcx); // Threads level 227 __ cpuid(); 228 229 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 230 __ movl(Address(rsi, 0), rax); 231 __ movl(Address(rsi, 4), rbx); 232 __ movl(Address(rsi, 8), rcx); 233 __ movl(Address(rsi,12), rdx); 234 235 __ movl(rax, 0xb); 236 __ movl(rcx, 1); // Cores level 237 __ cpuid(); 238 __ push(rax); 239 __ andl(rax, 0x1f); // Determine if valid topology level 240 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 241 __ andl(rax, 0xffff); 242 __ pop(rax); 243 __ jccb(Assembler::equal, std_cpuid4); 244 245 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 246 __ movl(Address(rsi, 0), rax); 247 __ movl(Address(rsi, 4), rbx); 248 __ movl(Address(rsi, 8), rcx); 249 __ movl(Address(rsi,12), rdx); 250 251 __ movl(rax, 0xb); 252 __ movl(rcx, 2); // Packages level 253 __ cpuid(); 254 __ push(rax); 255 __ andl(rax, 0x1f); // Determine if valid topology level 256 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 257 __ andl(rax, 0xffff); 258 __ pop(rax); 259 __ jccb(Assembler::equal, std_cpuid4); 260 261 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 262 __ movl(Address(rsi, 0), rax); 263 __ movl(Address(rsi, 4), rbx); 264 __ movl(Address(rsi, 8), rcx); 265 __ movl(Address(rsi,12), rdx); 266 267 // 268 // cpuid(0x4) Deterministic cache params 269 // 270 __ bind(std_cpuid4); 271 __ movl(rax, 4); 272 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 273 __ jccb(Assembler::greater, std_cpuid1); 274 275 __ xorl(rcx, rcx); // L1 cache 276 __ cpuid(); 277 __ push(rax); 278 __ andl(rax, 0x1f); // Determine if valid cache parameters used 279 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 280 __ pop(rax); 281 __ jccb(Assembler::equal, std_cpuid1); 282 283 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 284 __ movl(Address(rsi, 0), rax); 285 __ movl(Address(rsi, 4), rbx); 286 __ movl(Address(rsi, 8), rcx); 287 __ movl(Address(rsi,12), rdx); 288 289 // 290 // Standard cpuid(0x1) 291 // 292 __ bind(std_cpuid1); 293 __ movl(rax, 1); 294 __ cpuid(); 295 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 296 __ movl(Address(rsi, 0), rax); 297 __ movl(Address(rsi, 4), rbx); 298 __ movl(Address(rsi, 8), rcx); 299 __ movl(Address(rsi,12), rdx); 300 301 // 302 // Check if OS has enabled XGETBV instruction to access XCR0 303 // (OSXSAVE feature flag) and CPU supports AVX 304 // 305 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 306 __ cmpl(rcx, 0x18000000); 307 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 308 309 // 310 // XCR0, XFEATURE_ENABLED_MASK register 311 // 312 __ xorl(rcx, rcx); // zero for XCR0 register 313 __ xgetbv(); 314 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 315 __ movl(Address(rsi, 0), rax); 316 __ movl(Address(rsi, 4), rdx); 317 318 // 319 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 320 // 321 __ bind(sef_cpuid); 322 __ movl(rax, 7); 323 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 324 __ jccb(Assembler::greater, ext_cpuid); 325 // ECX = 0 326 __ xorl(rcx, rcx); 327 __ cpuid(); 328 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 329 __ movl(Address(rsi, 0), rax); 330 __ movl(Address(rsi, 4), rbx); 331 __ movl(Address(rsi, 8), rcx); 332 __ movl(Address(rsi, 12), rdx); 333 334 // 335 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 336 // 337 __ bind(sefsl1_cpuid); 338 __ movl(rax, 7); 339 __ movl(rcx, 1); 340 __ cpuid(); 341 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 342 __ movl(Address(rsi, 0), rax); 343 __ movl(Address(rsi, 4), rdx); 344 345 // 346 // Extended cpuid(0x80000000) 347 // 348 __ bind(ext_cpuid); 349 __ movl(rax, 0x80000000); 350 __ cpuid(); 351 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 352 __ jcc(Assembler::belowEqual, done); 353 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 354 __ jcc(Assembler::belowEqual, ext_cpuid1); 355 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 356 __ jccb(Assembler::belowEqual, ext_cpuid5); 357 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 358 __ jccb(Assembler::belowEqual, ext_cpuid7); 359 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 360 __ jccb(Assembler::belowEqual, ext_cpuid8); 361 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 362 __ jccb(Assembler::below, ext_cpuid8); 363 // 364 // Extended cpuid(0x8000001E) 365 // 366 __ movl(rax, 0x8000001E); 367 __ cpuid(); 368 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 369 __ movl(Address(rsi, 0), rax); 370 __ movl(Address(rsi, 4), rbx); 371 __ movl(Address(rsi, 8), rcx); 372 __ movl(Address(rsi,12), rdx); 373 374 // 375 // Extended cpuid(0x80000008) 376 // 377 __ bind(ext_cpuid8); 378 __ movl(rax, 0x80000008); 379 __ cpuid(); 380 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 381 __ movl(Address(rsi, 0), rax); 382 __ movl(Address(rsi, 4), rbx); 383 __ movl(Address(rsi, 8), rcx); 384 __ movl(Address(rsi,12), rdx); 385 386 // 387 // Extended cpuid(0x80000007) 388 // 389 __ bind(ext_cpuid7); 390 __ movl(rax, 0x80000007); 391 __ cpuid(); 392 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 393 __ movl(Address(rsi, 0), rax); 394 __ movl(Address(rsi, 4), rbx); 395 __ movl(Address(rsi, 8), rcx); 396 __ movl(Address(rsi,12), rdx); 397 398 // 399 // Extended cpuid(0x80000005) 400 // 401 __ bind(ext_cpuid5); 402 __ movl(rax, 0x80000005); 403 __ cpuid(); 404 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 405 __ movl(Address(rsi, 0), rax); 406 __ movl(Address(rsi, 4), rbx); 407 __ movl(Address(rsi, 8), rcx); 408 __ movl(Address(rsi,12), rdx); 409 410 // 411 // Extended cpuid(0x80000001) 412 // 413 __ bind(ext_cpuid1); 414 __ movl(rax, 0x80000001); 415 __ cpuid(); 416 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 417 __ movl(Address(rsi, 0), rax); 418 __ movl(Address(rsi, 4), rbx); 419 __ movl(Address(rsi, 8), rcx); 420 __ movl(Address(rsi,12), rdx); 421 422 #if defined(_LP64) 423 // 424 // Check if OS has enabled XGETBV instruction to access XCR0 425 // (OSXSAVE feature flag) and CPU supports APX 426 // 427 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 428 // and XCRO[19] bit for OS support to save/restore extended GPR state. 429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 430 __ movl(rax, 0x200000); 431 __ andl(rax, Address(rsi, 4)); 432 __ cmpl(rax, 0x200000); 433 __ jcc(Assembler::notEqual, vector_save_restore); 434 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 435 __ movl(rax, 0x80000); 436 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 437 __ cmpl(rax, 0x80000); 438 __ jcc(Assembler::notEqual, vector_save_restore); 439 440 #ifndef PRODUCT 441 bool save_apx = UseAPX; 442 VM_Version::set_apx_cpuFeatures(); 443 UseAPX = true; 444 __ mov64(r16, VM_Version::egpr_test_value()); 445 __ mov64(r31, VM_Version::egpr_test_value()); 446 __ xorl(rsi, rsi); 447 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 448 // Generate SEGV 449 __ movl(rax, Address(rsi, 0)); 450 451 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 452 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 453 __ movq(Address(rsi, 0), r16); 454 __ movq(Address(rsi, 8), r31); 455 456 UseAPX = save_apx; 457 #endif 458 #endif 459 __ bind(vector_save_restore); 460 // 461 // Check if OS has enabled XGETBV instruction to access XCR0 462 // (OSXSAVE feature flag) and CPU supports AVX 463 // 464 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 465 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 466 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 467 __ cmpl(rcx, 0x18000000); 468 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 469 470 __ movl(rax, 0x6); 471 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 472 __ cmpl(rax, 0x6); 473 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 474 475 // we need to bridge farther than imm8, so we use this island as a thunk 476 __ bind(done); 477 __ jmp(wrapup); 478 479 __ bind(start_simd_check); 480 // 481 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 482 // registers are not restored after a signal processing. 483 // Generate SEGV here (reference through null) 484 // and check upper YMM/ZMM bits after it. 485 // 486 int saved_useavx = UseAVX; 487 int saved_usesse = UseSSE; 488 489 // If UseAVX is uninitialized or is set by the user to include EVEX 490 if (use_evex) { 491 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 492 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 493 __ movl(rax, 0x10000); 494 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 495 __ cmpl(rax, 0x10000); 496 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 497 // check _cpuid_info.xem_xcr0_eax.bits.opmask 498 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 499 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 500 __ movl(rax, 0xE0); 501 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 502 __ cmpl(rax, 0xE0); 503 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 504 505 if (FLAG_IS_DEFAULT(UseAVX)) { 506 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 507 __ movl(rax, Address(rsi, 0)); 508 __ cmpl(rax, 0x50654); // If it is Skylake 509 __ jcc(Assembler::equal, legacy_setup); 510 } 511 // EVEX setup: run in lowest evex mode 512 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 513 UseAVX = 3; 514 UseSSE = 2; 515 #ifdef _WINDOWS 516 // xmm5-xmm15 are not preserved by caller on windows 517 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 518 __ subptr(rsp, 64); 519 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 520 #ifdef _LP64 521 __ subptr(rsp, 64); 522 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 523 __ subptr(rsp, 64); 524 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 525 #endif // _LP64 526 #endif // _WINDOWS 527 528 // load value into all 64 bytes of zmm7 register 529 __ movl(rcx, VM_Version::ymm_test_value()); 530 __ movdl(xmm0, rcx); 531 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 532 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 533 #ifdef _LP64 534 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 535 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 536 #endif 537 VM_Version::clean_cpuFeatures(); 538 __ jmp(save_restore_except); 539 } 540 541 __ bind(legacy_setup); 542 // AVX setup 543 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 544 UseAVX = 1; 545 UseSSE = 2; 546 #ifdef _WINDOWS 547 __ subptr(rsp, 32); 548 __ vmovdqu(Address(rsp, 0), xmm7); 549 #ifdef _LP64 550 __ subptr(rsp, 32); 551 __ vmovdqu(Address(rsp, 0), xmm8); 552 __ subptr(rsp, 32); 553 __ vmovdqu(Address(rsp, 0), xmm15); 554 #endif // _LP64 555 #endif // _WINDOWS 556 557 // load value into all 32 bytes of ymm7 register 558 __ movl(rcx, VM_Version::ymm_test_value()); 559 560 __ movdl(xmm0, rcx); 561 __ pshufd(xmm0, xmm0, 0x00); 562 __ vinsertf128_high(xmm0, xmm0); 563 __ vmovdqu(xmm7, xmm0); 564 #ifdef _LP64 565 __ vmovdqu(xmm8, xmm0); 566 __ vmovdqu(xmm15, xmm0); 567 #endif 568 VM_Version::clean_cpuFeatures(); 569 570 __ bind(save_restore_except); 571 __ xorl(rsi, rsi); 572 VM_Version::set_cpuinfo_segv_addr(__ pc()); 573 // Generate SEGV 574 __ movl(rax, Address(rsi, 0)); 575 576 VM_Version::set_cpuinfo_cont_addr(__ pc()); 577 // Returns here after signal. Save xmm0 to check it later. 578 579 // If UseAVX is uninitialized or is set by the user to include EVEX 580 if (use_evex) { 581 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 582 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 583 __ movl(rax, 0x10000); 584 __ andl(rax, Address(rsi, 4)); 585 __ cmpl(rax, 0x10000); 586 __ jcc(Assembler::notEqual, legacy_save_restore); 587 // check _cpuid_info.xem_xcr0_eax.bits.opmask 588 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 589 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 590 __ movl(rax, 0xE0); 591 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 592 __ cmpl(rax, 0xE0); 593 __ jcc(Assembler::notEqual, legacy_save_restore); 594 595 if (FLAG_IS_DEFAULT(UseAVX)) { 596 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 597 __ movl(rax, Address(rsi, 0)); 598 __ cmpl(rax, 0x50654); // If it is Skylake 599 __ jcc(Assembler::equal, legacy_save_restore); 600 } 601 // EVEX check: run in lowest evex mode 602 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 603 UseAVX = 3; 604 UseSSE = 2; 605 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 606 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 607 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 608 #ifdef _LP64 609 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 610 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 611 #endif 612 613 #ifdef _WINDOWS 614 #ifdef _LP64 615 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 616 __ addptr(rsp, 64); 617 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 618 __ addptr(rsp, 64); 619 #endif // _LP64 620 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 621 __ addptr(rsp, 64); 622 #endif // _WINDOWS 623 generate_vzeroupper(wrapup); 624 VM_Version::clean_cpuFeatures(); 625 UseAVX = saved_useavx; 626 UseSSE = saved_usesse; 627 __ jmp(wrapup); 628 } 629 630 __ bind(legacy_save_restore); 631 // AVX check 632 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 633 UseAVX = 1; 634 UseSSE = 2; 635 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 636 __ vmovdqu(Address(rsi, 0), xmm0); 637 __ vmovdqu(Address(rsi, 32), xmm7); 638 #ifdef _LP64 639 __ vmovdqu(Address(rsi, 64), xmm8); 640 __ vmovdqu(Address(rsi, 96), xmm15); 641 #endif 642 643 #ifdef _WINDOWS 644 #ifdef _LP64 645 __ vmovdqu(xmm15, Address(rsp, 0)); 646 __ addptr(rsp, 32); 647 __ vmovdqu(xmm8, Address(rsp, 0)); 648 __ addptr(rsp, 32); 649 #endif // _LP64 650 __ vmovdqu(xmm7, Address(rsp, 0)); 651 __ addptr(rsp, 32); 652 #endif // _WINDOWS 653 654 generate_vzeroupper(wrapup); 655 VM_Version::clean_cpuFeatures(); 656 UseAVX = saved_useavx; 657 UseSSE = saved_usesse; 658 659 __ bind(wrapup); 660 __ popf(); 661 __ pop(rsi); 662 __ pop(rbx); 663 __ pop(rbp); 664 __ ret(0); 665 666 # undef __ 667 668 return start; 669 }; 670 void generate_vzeroupper(Label& L_wrapup) { 671 # define __ _masm-> 672 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 673 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 674 __ jcc(Assembler::notEqual, L_wrapup); 675 __ movl(rcx, 0x0FFF0FF0); 676 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 677 __ andl(rcx, Address(rsi, 0)); 678 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 679 __ jcc(Assembler::equal, L_wrapup); 680 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 681 __ jcc(Assembler::equal, L_wrapup); 682 // vzeroupper() will use a pre-computed instruction sequence that we 683 // can't compute until after we've determined CPU capabilities. Use 684 // uncached variant here directly to be able to bootstrap correctly 685 __ vzeroupper_uncached(); 686 # undef __ 687 } 688 address generate_detect_virt() { 689 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 690 # define __ _masm-> 691 692 address start = __ pc(); 693 694 // Evacuate callee-saved registers 695 __ push(rbp); 696 __ push(rbx); 697 __ push(rsi); // for Windows 698 699 #ifdef _LP64 700 __ mov(rax, c_rarg0); // CPUID leaf 701 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 702 #else 703 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 704 __ movptr(rsi, Address(rsp, 20)); // register array address 705 #endif 706 707 __ cpuid(); 708 709 // Store result to register array 710 __ movl(Address(rsi, 0), rax); 711 __ movl(Address(rsi, 4), rbx); 712 __ movl(Address(rsi, 8), rcx); 713 __ movl(Address(rsi, 12), rdx); 714 715 // Epilogue 716 __ pop(rsi); 717 __ pop(rbx); 718 __ pop(rbp); 719 __ ret(0); 720 721 # undef __ 722 723 return start; 724 }; 725 726 727 address generate_getCPUIDBrandString(void) { 728 // Flags to test CPU type. 729 const uint32_t HS_EFL_AC = 0x40000; 730 const uint32_t HS_EFL_ID = 0x200000; 731 // Values for when we don't have a CPUID instruction. 732 const int CPU_FAMILY_SHIFT = 8; 733 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 734 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 735 736 Label detect_486, cpu486, detect_586, done, ext_cpuid; 737 738 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 739 # define __ _masm-> 740 741 address start = __ pc(); 742 743 // 744 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 745 // 746 // LP64: rcx and rdx are first and second argument registers on windows 747 748 __ push(rbp); 749 #ifdef _LP64 750 __ mov(rbp, c_rarg0); // cpuid_info address 751 #else 752 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 753 #endif 754 __ push(rbx); 755 __ push(rsi); 756 __ pushf(); // preserve rbx, and flags 757 __ pop(rax); 758 __ push(rax); 759 __ mov(rcx, rax); 760 // 761 // if we are unable to change the AC flag, we have a 386 762 // 763 __ xorl(rax, HS_EFL_AC); 764 __ push(rax); 765 __ popf(); 766 __ pushf(); 767 __ pop(rax); 768 __ cmpptr(rax, rcx); 769 __ jccb(Assembler::notEqual, detect_486); 770 771 __ movl(rax, CPU_FAMILY_386); 772 __ jmp(done); 773 774 // 775 // If we are unable to change the ID flag, we have a 486 which does 776 // not support the "cpuid" instruction. 777 // 778 __ bind(detect_486); 779 __ mov(rax, rcx); 780 __ xorl(rax, HS_EFL_ID); 781 __ push(rax); 782 __ popf(); 783 __ pushf(); 784 __ pop(rax); 785 __ cmpptr(rcx, rax); 786 __ jccb(Assembler::notEqual, detect_586); 787 788 __ bind(cpu486); 789 __ movl(rax, CPU_FAMILY_486); 790 __ jmp(done); 791 792 // 793 // At this point, we have a chip which supports the "cpuid" instruction 794 // 795 __ bind(detect_586); 796 __ xorl(rax, rax); 797 __ cpuid(); 798 __ orl(rax, rax); 799 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 800 // value of at least 1, we give up and 801 // assume a 486 802 803 // 804 // Extended cpuid(0x80000000) for processor brand string detection 805 // 806 __ bind(ext_cpuid); 807 __ movl(rax, CPUID_EXTENDED_FN); 808 __ cpuid(); 809 __ cmpl(rax, CPUID_EXTENDED_FN_4); 810 __ jcc(Assembler::below, done); 811 812 // 813 // Extended cpuid(0x80000002) // first 16 bytes in brand string 814 // 815 __ movl(rax, CPUID_EXTENDED_FN_2); 816 __ cpuid(); 817 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 818 __ movl(Address(rsi, 0), rax); 819 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 820 __ movl(Address(rsi, 0), rbx); 821 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 822 __ movl(Address(rsi, 0), rcx); 823 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 824 __ movl(Address(rsi,0), rdx); 825 826 // 827 // Extended cpuid(0x80000003) // next 16 bytes in brand string 828 // 829 __ movl(rax, CPUID_EXTENDED_FN_3); 830 __ cpuid(); 831 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 832 __ movl(Address(rsi, 0), rax); 833 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 834 __ movl(Address(rsi, 0), rbx); 835 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 836 __ movl(Address(rsi, 0), rcx); 837 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 838 __ movl(Address(rsi,0), rdx); 839 840 // 841 // Extended cpuid(0x80000004) // last 16 bytes in brand string 842 // 843 __ movl(rax, CPUID_EXTENDED_FN_4); 844 __ cpuid(); 845 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 846 __ movl(Address(rsi, 0), rax); 847 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 848 __ movl(Address(rsi, 0), rbx); 849 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 850 __ movl(Address(rsi, 0), rcx); 851 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 852 __ movl(Address(rsi,0), rdx); 853 854 // 855 // return 856 // 857 __ bind(done); 858 __ popf(); 859 __ pop(rsi); 860 __ pop(rbx); 861 __ pop(rbp); 862 __ ret(0); 863 864 # undef __ 865 866 return start; 867 }; 868 }; 869 870 void VM_Version::get_processor_features() { 871 872 _cpu = 4; // 486 by default 873 _model = 0; 874 _stepping = 0; 875 _features = 0; 876 _logical_processors_per_package = 1; 877 // i486 internal cache is both I&D and has a 16-byte line size 878 _L1_data_cache_line_size = 16; 879 880 // Get raw processor info 881 882 get_cpu_info_stub(&_cpuid_info); 883 884 assert_is_initialized(); 885 _cpu = extended_cpu_family(); 886 _model = extended_cpu_model(); 887 _stepping = cpu_stepping(); 888 889 if (cpu_family() > 4) { // it supports CPUID 890 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 891 _cpu_features = _features; // Preserve features 892 // Logical processors are only available on P4s and above, 893 // and only if hyperthreading is available. 894 _logical_processors_per_package = logical_processor_count(); 895 _L1_data_cache_line_size = L1_line_size(); 896 } 897 898 // xchg and xadd instructions 899 _supports_atomic_getset4 = true; 900 _supports_atomic_getadd4 = true; 901 LP64_ONLY(_supports_atomic_getset8 = true); 902 LP64_ONLY(_supports_atomic_getadd8 = true); 903 904 #ifdef _LP64 905 // OS should support SSE for x64 and hardware should support at least SSE2. 906 if (!VM_Version::supports_sse2()) { 907 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 908 } 909 // in 64 bit the use of SSE2 is the minimum 910 if (UseSSE < 2) UseSSE = 2; 911 #endif 912 913 #ifdef AMD64 914 // flush_icache_stub have to be generated first. 915 // That is why Icache line size is hard coded in ICache class, 916 // see icache_x86.hpp. It is also the reason why we can't use 917 // clflush instruction in 32-bit VM since it could be running 918 // on CPU which does not support it. 919 // 920 // The only thing we can do is to verify that flushed 921 // ICache::line_size has correct value. 922 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 923 // clflush_size is size in quadwords (8 bytes). 924 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 925 #endif 926 927 #ifdef _LP64 928 // assigning this field effectively enables Unsafe.writebackMemory() 929 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 930 // that is only implemented on x86_64 and only if the OS plays ball 931 if (os::supports_map_sync()) { 932 // publish data cache line flush size to generic field, otherwise 933 // let if default to zero thereby disabling writeback 934 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 935 } 936 #endif 937 938 // Check if processor has Intel Ecore 939 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 940 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) { 941 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 942 } 943 944 if (UseSSE < 4) { 945 _features &= ~CPU_SSE4_1; 946 _features &= ~CPU_SSE4_2; 947 } 948 949 if (UseSSE < 3) { 950 _features &= ~CPU_SSE3; 951 _features &= ~CPU_SSSE3; 952 _features &= ~CPU_SSE4A; 953 } 954 955 if (UseSSE < 2) 956 _features &= ~CPU_SSE2; 957 958 if (UseSSE < 1) 959 _features &= ~CPU_SSE; 960 961 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 962 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 963 UseAVX = 0; 964 } 965 966 // UseSSE is set to the smaller of what hardware supports and what 967 // the command line requires. I.e., you cannot set UseSSE to 2 on 968 // older Pentiums which do not support it. 969 int use_sse_limit = 0; 970 if (UseSSE > 0) { 971 if (UseSSE > 3 && supports_sse4_1()) { 972 use_sse_limit = 4; 973 } else if (UseSSE > 2 && supports_sse3()) { 974 use_sse_limit = 3; 975 } else if (UseSSE > 1 && supports_sse2()) { 976 use_sse_limit = 2; 977 } else if (UseSSE > 0 && supports_sse()) { 978 use_sse_limit = 1; 979 } else { 980 use_sse_limit = 0; 981 } 982 } 983 if (FLAG_IS_DEFAULT(UseSSE)) { 984 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 985 } else if (UseSSE > use_sse_limit) { 986 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 987 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 988 } 989 990 // first try initial setting and detect what we can support 991 int use_avx_limit = 0; 992 if (UseAVX > 0) { 993 if (UseSSE < 4) { 994 // Don't use AVX if SSE is unavailable or has been disabled. 995 use_avx_limit = 0; 996 } else if (UseAVX > 2 && supports_evex()) { 997 use_avx_limit = 3; 998 } else if (UseAVX > 1 && supports_avx2()) { 999 use_avx_limit = 2; 1000 } else if (UseAVX > 0 && supports_avx()) { 1001 use_avx_limit = 1; 1002 } else { 1003 use_avx_limit = 0; 1004 } 1005 } 1006 if (FLAG_IS_DEFAULT(UseAVX)) { 1007 // Don't use AVX-512 on older Skylakes unless explicitly requested. 1008 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 1009 FLAG_SET_DEFAULT(UseAVX, 2); 1010 } else { 1011 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1012 } 1013 } 1014 1015 if (UseAVX > use_avx_limit) { 1016 if (UseSSE < 4) { 1017 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 1018 } else { 1019 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 1020 } 1021 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1022 } 1023 1024 if (UseAVX < 3) { 1025 _features &= ~CPU_AVX512F; 1026 _features &= ~CPU_AVX512DQ; 1027 _features &= ~CPU_AVX512CD; 1028 _features &= ~CPU_AVX512BW; 1029 _features &= ~CPU_AVX512VL; 1030 _features &= ~CPU_AVX512_VPOPCNTDQ; 1031 _features &= ~CPU_AVX512_VPCLMULQDQ; 1032 _features &= ~CPU_AVX512_VAES; 1033 _features &= ~CPU_AVX512_VNNI; 1034 _features &= ~CPU_AVX512_VBMI; 1035 _features &= ~CPU_AVX512_VBMI2; 1036 _features &= ~CPU_AVX512_BITALG; 1037 _features &= ~CPU_AVX512_IFMA; 1038 _features &= ~CPU_APX_F; 1039 } 1040 1041 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1042 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1043 if (UseAPX && !apx_supported) { 1044 warning("UseAPX is not supported on this CPU, setting it to false"); 1045 FLAG_SET_DEFAULT(UseAPX, false); 1046 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1047 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1048 } 1049 1050 if (!UseAPX) { 1051 _features &= ~CPU_APX_F; 1052 } 1053 1054 if (UseAVX < 2) { 1055 _features &= ~CPU_AVX2; 1056 _features &= ~CPU_AVX_IFMA; 1057 } 1058 1059 if (UseAVX < 1) { 1060 _features &= ~CPU_AVX; 1061 _features &= ~CPU_VZEROUPPER; 1062 _features &= ~CPU_F16C; 1063 _features &= ~CPU_SHA512; 1064 } 1065 1066 if (logical_processors_per_package() == 1) { 1067 // HT processor could be installed on a system which doesn't support HT. 1068 _features &= ~CPU_HT; 1069 } 1070 1071 if (is_intel()) { // Intel cpus specific settings 1072 if (is_knights_family()) { 1073 _features &= ~CPU_VZEROUPPER; 1074 _features &= ~CPU_AVX512BW; 1075 _features &= ~CPU_AVX512VL; 1076 _features &= ~CPU_AVX512DQ; 1077 _features &= ~CPU_AVX512_VNNI; 1078 _features &= ~CPU_AVX512_VAES; 1079 _features &= ~CPU_AVX512_VPOPCNTDQ; 1080 _features &= ~CPU_AVX512_VPCLMULQDQ; 1081 _features &= ~CPU_AVX512_VBMI; 1082 _features &= ~CPU_AVX512_VBMI2; 1083 _features &= ~CPU_CLWB; 1084 _features &= ~CPU_FLUSHOPT; 1085 _features &= ~CPU_GFNI; 1086 _features &= ~CPU_AVX512_BITALG; 1087 _features &= ~CPU_AVX512_IFMA; 1088 _features &= ~CPU_AVX_IFMA; 1089 } 1090 } 1091 1092 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1093 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1094 } else { 1095 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1096 } 1097 1098 char buf[1024]; 1099 int res = jio_snprintf( 1100 buf, sizeof(buf), 1101 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1102 cores_per_cpu(), threads_per_core(), 1103 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1104 assert(res > 0, "not enough temporary space allocated"); 1105 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1106 1107 _features_string = os::strdup(buf); 1108 1109 // Use AES instructions if available. 1110 if (supports_aes()) { 1111 if (FLAG_IS_DEFAULT(UseAES)) { 1112 FLAG_SET_DEFAULT(UseAES, true); 1113 } 1114 if (!UseAES) { 1115 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1116 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1117 } 1118 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1119 } else { 1120 if (UseSSE > 2) { 1121 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1122 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1123 } 1124 } else { 1125 // The AES intrinsic stubs require AES instruction support (of course) 1126 // but also require sse3 mode or higher for instructions it use. 1127 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1128 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1129 } 1130 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1131 } 1132 1133 // --AES-CTR begins-- 1134 if (!UseAESIntrinsics) { 1135 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1136 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1137 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1138 } 1139 } else { 1140 if (supports_sse4_1()) { 1141 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1142 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1143 } 1144 } else { 1145 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1146 // but also require sse4.1 mode or higher for instructions it use. 1147 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1148 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1149 } 1150 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1151 } 1152 } 1153 // --AES-CTR ends-- 1154 } 1155 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1156 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1157 warning("AES instructions are not available on this CPU"); 1158 FLAG_SET_DEFAULT(UseAES, false); 1159 } 1160 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1161 warning("AES intrinsics are not available on this CPU"); 1162 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1163 } 1164 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1165 warning("AES-CTR intrinsics are not available on this CPU"); 1166 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1167 } 1168 } 1169 1170 // Use CLMUL instructions if available. 1171 if (supports_clmul()) { 1172 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1173 UseCLMUL = true; 1174 } 1175 } else if (UseCLMUL) { 1176 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1177 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1178 FLAG_SET_DEFAULT(UseCLMUL, false); 1179 } 1180 1181 if (UseCLMUL && (UseSSE > 2)) { 1182 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1183 UseCRC32Intrinsics = true; 1184 } 1185 } else if (UseCRC32Intrinsics) { 1186 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1187 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1188 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1189 } 1190 1191 #ifdef _LP64 1192 if (supports_avx2()) { 1193 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1194 UseAdler32Intrinsics = true; 1195 } 1196 } else if (UseAdler32Intrinsics) { 1197 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1198 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1199 } 1200 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1201 } 1202 #else 1203 if (UseAdler32Intrinsics) { 1204 warning("Adler32Intrinsics not available on this CPU."); 1205 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1206 } 1207 #endif 1208 1209 if (supports_sse4_2() && supports_clmul()) { 1210 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1211 UseCRC32CIntrinsics = true; 1212 } 1213 } else if (UseCRC32CIntrinsics) { 1214 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1215 warning("CRC32C intrinsics are not available on this CPU"); 1216 } 1217 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1218 } 1219 1220 // GHASH/GCM intrinsics 1221 if (UseCLMUL && (UseSSE > 2)) { 1222 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1223 UseGHASHIntrinsics = true; 1224 } 1225 } else if (UseGHASHIntrinsics) { 1226 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1227 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1228 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1229 } 1230 1231 #ifdef _LP64 1232 // ChaCha20 Intrinsics 1233 // As long as the system supports AVX as a baseline we can do a 1234 // SIMD-enabled block function. StubGenerator makes the determination 1235 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1236 // version. 1237 if (UseAVX >= 1) { 1238 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1239 UseChaCha20Intrinsics = true; 1240 } 1241 } else if (UseChaCha20Intrinsics) { 1242 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1243 warning("ChaCha20 intrinsic requires AVX instructions"); 1244 } 1245 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1246 } 1247 #else 1248 // No support currently for ChaCha20 intrinsics on 32-bit platforms 1249 if (UseChaCha20Intrinsics) { 1250 warning("ChaCha20 intrinsics are not available on this CPU."); 1251 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1252 } 1253 #endif // _LP64 1254 1255 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1256 if (UseAVX >= 2) { 1257 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1258 UseBASE64Intrinsics = true; 1259 } 1260 } else if (UseBASE64Intrinsics) { 1261 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1262 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1263 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1264 } 1265 1266 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1267 if (FLAG_IS_DEFAULT(UseFMA)) { 1268 UseFMA = true; 1269 } 1270 } else if (UseFMA) { 1271 warning("FMA instructions are not available on this CPU"); 1272 FLAG_SET_DEFAULT(UseFMA, false); 1273 } 1274 1275 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1276 UseMD5Intrinsics = true; 1277 } 1278 1279 if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { 1280 if (FLAG_IS_DEFAULT(UseSHA)) { 1281 UseSHA = true; 1282 } 1283 } else if (UseSHA) { 1284 warning("SHA instructions are not available on this CPU"); 1285 FLAG_SET_DEFAULT(UseSHA, false); 1286 } 1287 1288 if (supports_sha() && supports_sse4_1() && UseSHA) { 1289 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1290 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1291 } 1292 } else if (UseSHA1Intrinsics) { 1293 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1294 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1295 } 1296 1297 if (supports_sse4_1() && UseSHA) { 1298 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1299 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1300 } 1301 } else if (UseSHA256Intrinsics) { 1302 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1303 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1304 } 1305 1306 #ifdef _LP64 1307 // These are only supported on 64-bit 1308 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1309 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1310 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1311 } 1312 } else 1313 #endif 1314 if (UseSHA512Intrinsics) { 1315 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1316 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1317 } 1318 1319 if (UseSHA3Intrinsics) { 1320 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1321 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1322 } 1323 1324 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1325 FLAG_SET_DEFAULT(UseSHA, false); 1326 } 1327 1328 #ifdef COMPILER2 1329 if (UseFPUForSpilling) { 1330 if (UseSSE < 2) { 1331 // Only supported with SSE2+ 1332 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1333 } 1334 } 1335 #endif 1336 1337 #if COMPILER2_OR_JVMCI 1338 int max_vector_size = 0; 1339 if (UseSSE < 2) { 1340 // Vectors (in XMM) are only supported with SSE2+ 1341 // SSE is always 2 on x64. 1342 max_vector_size = 0; 1343 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1344 // 16 byte vectors (in XMM) are supported with SSE2+ 1345 max_vector_size = 16; 1346 } else if (UseAVX == 1 || UseAVX == 2) { 1347 // 32 bytes vectors (in YMM) are only supported with AVX+ 1348 max_vector_size = 32; 1349 } else if (UseAVX > 2) { 1350 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1351 max_vector_size = 64; 1352 } 1353 1354 #ifdef _LP64 1355 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1356 #else 1357 int min_vector_size = 0; 1358 #endif 1359 1360 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1361 if (MaxVectorSize < min_vector_size) { 1362 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1363 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1364 } 1365 if (MaxVectorSize > max_vector_size) { 1366 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1367 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1368 } 1369 if (!is_power_of_2(MaxVectorSize)) { 1370 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1371 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1372 } 1373 } else { 1374 // If default, use highest supported configuration 1375 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1376 } 1377 1378 #if defined(COMPILER2) && defined(ASSERT) 1379 if (MaxVectorSize > 0) { 1380 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1381 tty->print_cr("State of YMM registers after signal handle:"); 1382 int nreg = 2 LP64_ONLY(+2); 1383 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1384 for (int i = 0; i < nreg; i++) { 1385 tty->print("YMM%s:", ymm_name[i]); 1386 for (int j = 7; j >=0; j--) { 1387 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1388 } 1389 tty->cr(); 1390 } 1391 } 1392 } 1393 #endif // COMPILER2 && ASSERT 1394 1395 #ifdef _LP64 1396 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1397 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1398 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1399 } 1400 } else 1401 #endif 1402 if (UsePoly1305Intrinsics) { 1403 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1404 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1405 } 1406 1407 #ifdef _LP64 1408 if (supports_avx512ifma() && supports_avx512vlbw()) { 1409 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1410 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1411 } 1412 } else 1413 #endif 1414 if (UseIntPolyIntrinsics) { 1415 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1416 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1417 } 1418 1419 #ifdef _LP64 1420 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1421 UseMultiplyToLenIntrinsic = true; 1422 } 1423 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1424 UseSquareToLenIntrinsic = true; 1425 } 1426 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1427 UseMulAddIntrinsic = true; 1428 } 1429 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1430 UseMontgomeryMultiplyIntrinsic = true; 1431 } 1432 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1433 UseMontgomerySquareIntrinsic = true; 1434 } 1435 #else 1436 if (UseMultiplyToLenIntrinsic) { 1437 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1438 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1439 } 1440 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1441 } 1442 if (UseMontgomeryMultiplyIntrinsic) { 1443 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1444 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1445 } 1446 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1447 } 1448 if (UseMontgomerySquareIntrinsic) { 1449 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1450 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1451 } 1452 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1453 } 1454 if (UseSquareToLenIntrinsic) { 1455 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1456 warning("squareToLen intrinsic is not available in 32-bit VM"); 1457 } 1458 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1459 } 1460 if (UseMulAddIntrinsic) { 1461 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1462 warning("mulAdd intrinsic is not available in 32-bit VM"); 1463 } 1464 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1465 } 1466 #endif // _LP64 1467 #endif // COMPILER2_OR_JVMCI 1468 1469 // On new cpus instructions which update whole XMM register should be used 1470 // to prevent partial register stall due to dependencies on high half. 1471 // 1472 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1473 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1474 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1475 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1476 1477 1478 if (is_zx()) { // ZX cpus specific settings 1479 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1480 UseStoreImmI16 = false; // don't use it on ZX cpus 1481 } 1482 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1483 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1484 // Use it on all ZX cpus 1485 UseAddressNop = true; 1486 } 1487 } 1488 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1489 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1490 } 1491 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1492 if (supports_sse3()) { 1493 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1494 } else { 1495 UseXmmRegToRegMoveAll = false; 1496 } 1497 } 1498 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1499 #ifdef COMPILER2 1500 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1501 // For new ZX cpus do the next optimization: 1502 // don't align the beginning of a loop if there are enough instructions 1503 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1504 // in current fetch line (OptoLoopAlignment) or the padding 1505 // is big (> MaxLoopPad). 1506 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1507 // generated NOP instructions. 11 is the largest size of one 1508 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1509 MaxLoopPad = 11; 1510 } 1511 #endif // COMPILER2 1512 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1513 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1514 } 1515 if (supports_sse4_2()) { // new ZX cpus 1516 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1517 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1518 } 1519 } 1520 if (supports_sse4_2()) { 1521 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1522 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1523 } 1524 } else { 1525 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1526 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1527 } 1528 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1529 } 1530 } 1531 1532 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1533 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1534 } 1535 } 1536 1537 if (is_amd_family()) { // AMD cpus specific settings 1538 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1539 // Use it on new AMD cpus starting from Opteron. 1540 UseAddressNop = true; 1541 } 1542 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1543 // Use it on new AMD cpus starting from Opteron. 1544 UseNewLongLShift = true; 1545 } 1546 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1547 if (supports_sse4a()) { 1548 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1549 } else { 1550 UseXmmLoadAndClearUpper = false; 1551 } 1552 } 1553 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1554 if (supports_sse4a()) { 1555 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1556 } else { 1557 UseXmmRegToRegMoveAll = false; 1558 } 1559 } 1560 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1561 if (supports_sse4a()) { 1562 UseXmmI2F = true; 1563 } else { 1564 UseXmmI2F = false; 1565 } 1566 } 1567 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1568 if (supports_sse4a()) { 1569 UseXmmI2D = true; 1570 } else { 1571 UseXmmI2D = false; 1572 } 1573 } 1574 if (supports_sse4_2()) { 1575 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1576 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1577 } 1578 } else { 1579 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1580 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1581 } 1582 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1583 } 1584 1585 // some defaults for AMD family 15h 1586 if (cpu_family() == 0x15) { 1587 // On family 15h processors default is no sw prefetch 1588 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1589 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1590 } 1591 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1592 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1593 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1594 } 1595 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1596 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1597 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1598 } 1599 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1600 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1601 } 1602 } 1603 1604 #ifdef COMPILER2 1605 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1606 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1607 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1608 } 1609 #endif // COMPILER2 1610 1611 // Some defaults for AMD family >= 17h && Hygon family 18h 1612 if (cpu_family() >= 0x17) { 1613 // On family >=17h processors use XMM and UnalignedLoadStores 1614 // for Array Copy 1615 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1616 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1617 } 1618 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1619 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1620 } 1621 #ifdef COMPILER2 1622 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1623 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1624 } 1625 #endif 1626 } 1627 } 1628 1629 if (is_intel()) { // Intel cpus specific settings 1630 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1631 UseStoreImmI16 = false; // don't use it on Intel cpus 1632 } 1633 if (cpu_family() == 6 || cpu_family() == 15) { 1634 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1635 // Use it on all Intel cpus starting from PentiumPro 1636 UseAddressNop = true; 1637 } 1638 } 1639 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1640 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1641 } 1642 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1643 if (supports_sse3()) { 1644 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1645 } else { 1646 UseXmmRegToRegMoveAll = false; 1647 } 1648 } 1649 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1650 #ifdef COMPILER2 1651 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1652 // For new Intel cpus do the next optimization: 1653 // don't align the beginning of a loop if there are enough instructions 1654 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1655 // in current fetch line (OptoLoopAlignment) or the padding 1656 // is big (> MaxLoopPad). 1657 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1658 // generated NOP instructions. 11 is the largest size of one 1659 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1660 MaxLoopPad = 11; 1661 } 1662 #endif // COMPILER2 1663 1664 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1665 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1666 } 1667 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1668 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1669 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1670 } 1671 } 1672 if (supports_sse4_2()) { 1673 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1674 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1675 } 1676 } else { 1677 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1678 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1679 } 1680 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1681 } 1682 } 1683 if (is_atom_family() || is_knights_family()) { 1684 #ifdef COMPILER2 1685 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1686 OptoScheduling = true; 1687 } 1688 #endif 1689 if (supports_sse4_2()) { // Silvermont 1690 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1691 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1692 } 1693 } 1694 if (FLAG_IS_DEFAULT(UseIncDec)) { 1695 FLAG_SET_DEFAULT(UseIncDec, false); 1696 } 1697 } 1698 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1699 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1700 } 1701 #ifdef COMPILER2 1702 if (UseAVX > 2) { 1703 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1704 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1705 ArrayOperationPartialInlineSize != 0 && 1706 ArrayOperationPartialInlineSize != 16 && 1707 ArrayOperationPartialInlineSize != 32 && 1708 ArrayOperationPartialInlineSize != 64)) { 1709 int inline_size = 0; 1710 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1711 inline_size = 64; 1712 } else if (MaxVectorSize >= 32) { 1713 inline_size = 32; 1714 } else if (MaxVectorSize >= 16) { 1715 inline_size = 16; 1716 } 1717 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1718 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1719 } 1720 ArrayOperationPartialInlineSize = inline_size; 1721 } 1722 1723 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1724 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1725 if (ArrayOperationPartialInlineSize) { 1726 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1727 } else { 1728 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1729 } 1730 } 1731 } 1732 #endif 1733 } 1734 1735 #ifdef COMPILER2 1736 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1737 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1738 OptimizeFill = false; 1739 } 1740 } 1741 #endif 1742 1743 #ifdef _LP64 1744 if (UseSSE42Intrinsics) { 1745 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1746 UseVectorizedMismatchIntrinsic = true; 1747 } 1748 } else if (UseVectorizedMismatchIntrinsic) { 1749 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1750 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1751 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1752 } 1753 if (UseAVX >= 2) { 1754 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1755 } else if (UseVectorizedHashCodeIntrinsic) { 1756 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1757 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1758 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1759 } 1760 #else 1761 if (UseVectorizedMismatchIntrinsic) { 1762 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1763 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1764 } 1765 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1766 } 1767 if (UseVectorizedHashCodeIntrinsic) { 1768 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1769 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1770 } 1771 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1772 } 1773 #endif // _LP64 1774 1775 // Use count leading zeros count instruction if available. 1776 if (supports_lzcnt()) { 1777 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1778 UseCountLeadingZerosInstruction = true; 1779 } 1780 } else if (UseCountLeadingZerosInstruction) { 1781 warning("lzcnt instruction is not available on this CPU"); 1782 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1783 } 1784 1785 // Use count trailing zeros instruction if available 1786 if (supports_bmi1()) { 1787 // tzcnt does not require VEX prefix 1788 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1789 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1790 // Don't use tzcnt if BMI1 is switched off on command line. 1791 UseCountTrailingZerosInstruction = false; 1792 } else { 1793 UseCountTrailingZerosInstruction = true; 1794 } 1795 } 1796 } else if (UseCountTrailingZerosInstruction) { 1797 warning("tzcnt instruction is not available on this CPU"); 1798 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1799 } 1800 1801 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1802 // VEX prefix is generated only when AVX > 0. 1803 if (supports_bmi1() && supports_avx()) { 1804 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1805 UseBMI1Instructions = true; 1806 } 1807 } else if (UseBMI1Instructions) { 1808 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1809 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1810 } 1811 1812 if (supports_bmi2() && supports_avx()) { 1813 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1814 UseBMI2Instructions = true; 1815 } 1816 } else if (UseBMI2Instructions) { 1817 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1818 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1819 } 1820 1821 // Use population count instruction if available. 1822 if (supports_popcnt()) { 1823 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1824 UsePopCountInstruction = true; 1825 } 1826 } else if (UsePopCountInstruction) { 1827 warning("POPCNT instruction is not available on this CPU"); 1828 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1829 } 1830 1831 // Use fast-string operations if available. 1832 if (supports_erms()) { 1833 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1834 UseFastStosb = true; 1835 } 1836 } else if (UseFastStosb) { 1837 warning("fast-string operations are not available on this CPU"); 1838 FLAG_SET_DEFAULT(UseFastStosb, false); 1839 } 1840 1841 // For AMD Processors use XMM/YMM MOVDQU instructions 1842 // for Object Initialization as default 1843 if (is_amd() && cpu_family() >= 0x19) { 1844 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1845 UseFastStosb = false; 1846 } 1847 } 1848 1849 #ifdef COMPILER2 1850 if (is_intel() && MaxVectorSize > 16) { 1851 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1852 UseFastStosb = false; 1853 } 1854 } 1855 #endif 1856 1857 // Use XMM/YMM MOVDQU instruction for Object Initialization 1858 if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { 1859 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1860 UseXMMForObjInit = true; 1861 } 1862 } else if (UseXMMForObjInit) { 1863 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1864 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1865 } 1866 1867 #ifdef COMPILER2 1868 if (FLAG_IS_DEFAULT(AlignVector)) { 1869 // Modern processors allow misaligned memory operations for vectors. 1870 AlignVector = !UseUnalignedLoadStores; 1871 } 1872 #endif // COMPILER2 1873 1874 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1875 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1876 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1877 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1878 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1879 } 1880 } 1881 1882 // Allocation prefetch settings 1883 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1884 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1885 (cache_line_size > AllocatePrefetchStepSize)) { 1886 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1887 } 1888 1889 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1890 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1891 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1892 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1893 } 1894 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1895 } 1896 1897 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1898 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1899 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1900 } 1901 1902 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1903 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1904 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1905 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1906 } 1907 #ifdef COMPILER2 1908 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1909 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1910 } 1911 #endif 1912 } 1913 1914 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1915 #ifdef COMPILER2 1916 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1917 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1918 } 1919 #endif 1920 } 1921 1922 #ifdef _LP64 1923 // Prefetch settings 1924 1925 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1926 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1927 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1928 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1929 1930 // gc copy/scan is disabled if prefetchw isn't supported, because 1931 // Prefetch::write emits an inlined prefetchw on Linux. 1932 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1933 // The used prefetcht0 instruction works for both amd64 and em64t. 1934 1935 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1936 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1937 } 1938 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1939 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1940 } 1941 #endif 1942 1943 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1944 (cache_line_size > ContendedPaddingWidth)) 1945 ContendedPaddingWidth = cache_line_size; 1946 1947 // This machine allows unaligned memory accesses 1948 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1949 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1950 } 1951 1952 #ifndef PRODUCT 1953 if (log_is_enabled(Info, os, cpu)) { 1954 LogStream ls(Log(os, cpu)::info()); 1955 outputStream* log = &ls; 1956 log->print_cr("Logical CPUs per core: %u", 1957 logical_processors_per_package()); 1958 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1959 log->print("UseSSE=%d", UseSSE); 1960 if (UseAVX > 0) { 1961 log->print(" UseAVX=%d", UseAVX); 1962 } 1963 if (UseAES) { 1964 log->print(" UseAES=1"); 1965 } 1966 #ifdef COMPILER2 1967 if (MaxVectorSize > 0) { 1968 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1969 } 1970 #endif 1971 log->cr(); 1972 log->print("Allocation"); 1973 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1974 log->print_cr(": no prefetching"); 1975 } else { 1976 log->print(" prefetching: "); 1977 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1978 log->print("PREFETCHW"); 1979 } else if (UseSSE >= 1) { 1980 if (AllocatePrefetchInstr == 0) { 1981 log->print("PREFETCHNTA"); 1982 } else if (AllocatePrefetchInstr == 1) { 1983 log->print("PREFETCHT0"); 1984 } else if (AllocatePrefetchInstr == 2) { 1985 log->print("PREFETCHT2"); 1986 } else if (AllocatePrefetchInstr == 3) { 1987 log->print("PREFETCHW"); 1988 } 1989 } 1990 if (AllocatePrefetchLines > 1) { 1991 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1992 } else { 1993 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1994 } 1995 } 1996 1997 if (PrefetchCopyIntervalInBytes > 0) { 1998 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1999 } 2000 if (PrefetchScanIntervalInBytes > 0) { 2001 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 2002 } 2003 if (ContendedPaddingWidth > 0) { 2004 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 2005 } 2006 } 2007 #endif // !PRODUCT 2008 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 2009 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 2010 } 2011 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 2012 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 2013 } 2014 } 2015 2016 void VM_Version::print_platform_virtualization_info(outputStream* st) { 2017 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 2018 if (vrt == XenHVM) { 2019 st->print_cr("Xen hardware-assisted virtualization detected"); 2020 } else if (vrt == KVM) { 2021 st->print_cr("KVM virtualization detected"); 2022 } else if (vrt == VMWare) { 2023 st->print_cr("VMWare virtualization detected"); 2024 VirtualizationSupport::print_virtualization_info(st); 2025 } else if (vrt == HyperV) { 2026 st->print_cr("Hyper-V virtualization detected"); 2027 } else if (vrt == HyperVRole) { 2028 st->print_cr("Hyper-V role detected"); 2029 } 2030 } 2031 2032 bool VM_Version::compute_has_intel_jcc_erratum() { 2033 if (!is_intel_family_core()) { 2034 // Only Intel CPUs are affected. 2035 return false; 2036 } 2037 // The following table of affected CPUs is based on the following document released by Intel: 2038 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 2039 switch (_model) { 2040 case 0x8E: 2041 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2042 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 2043 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 2044 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 2045 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 2046 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2047 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2048 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 2049 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2050 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 2051 case 0x4E: 2052 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 2053 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 2054 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 2055 return _stepping == 0x3; 2056 case 0x55: 2057 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 2058 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 2059 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2060 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2061 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2062 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2063 return _stepping == 0x4 || _stepping == 0x7; 2064 case 0x5E: 2065 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2066 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2067 return _stepping == 0x3; 2068 case 0x9E: 2069 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2070 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2071 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2072 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2073 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2074 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2075 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2076 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2077 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2078 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2079 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2080 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2081 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2082 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2083 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2084 case 0xA5: 2085 // Not in Intel documentation. 2086 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2087 return true; 2088 case 0xA6: 2089 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2090 return _stepping == 0x0; 2091 case 0xAE: 2092 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2093 return _stepping == 0xA; 2094 default: 2095 // If we are running on another intel machine not recognized in the table, we are okay. 2096 return false; 2097 } 2098 } 2099 2100 // On Xen, the cpuid instruction returns 2101 // eax / registers[0]: Version of Xen 2102 // ebx / registers[1]: chars 'XenV' 2103 // ecx / registers[2]: chars 'MMXe' 2104 // edx / registers[3]: chars 'nVMM' 2105 // 2106 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2107 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2108 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2109 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2110 // 2111 // more information : 2112 // https://kb.vmware.com/s/article/1009458 2113 // 2114 void VM_Version::check_virtualizations() { 2115 uint32_t registers[4] = {0}; 2116 char signature[13] = {0}; 2117 2118 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2119 // from 0x40000000 until 0x40010000. 2120 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2121 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2122 detect_virt_stub(leaf, registers); 2123 memcpy(signature, ®isters[1], 12); 2124 2125 if (strncmp("VMwareVMware", signature, 12) == 0) { 2126 Abstract_VM_Version::_detected_virtualization = VMWare; 2127 // check for extended metrics from guestlib 2128 VirtualizationSupport::initialize(); 2129 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2130 Abstract_VM_Version::_detected_virtualization = HyperV; 2131 #ifdef _WINDOWS 2132 // CPUID leaf 0x40000007 is available to the root partition only. 2133 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2134 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2135 detect_virt_stub(0x40000007, registers); 2136 if ((registers[0] != 0x0) || 2137 (registers[1] != 0x0) || 2138 (registers[2] != 0x0) || 2139 (registers[3] != 0x0)) { 2140 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2141 } 2142 #endif 2143 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2144 Abstract_VM_Version::_detected_virtualization = KVM; 2145 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2146 Abstract_VM_Version::_detected_virtualization = XenHVM; 2147 } 2148 } 2149 } 2150 2151 #ifdef COMPILER2 2152 // Determine if it's running on Cascade Lake using default options. 2153 bool VM_Version::is_default_intel_cascade_lake() { 2154 return FLAG_IS_DEFAULT(UseAVX) && 2155 FLAG_IS_DEFAULT(MaxVectorSize) && 2156 UseAVX > 2 && 2157 is_intel_cascade_lake(); 2158 } 2159 #endif 2160 2161 bool VM_Version::is_intel_cascade_lake() { 2162 return is_intel_skylake() && _stepping >= 5; 2163 } 2164 2165 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2166 // for implementing the array copy and clear operations. 2167 // The Intel platforms that supports the serialize instruction 2168 // has improved implementation of 64-byte load/stores and so the default 2169 // threshold is set to 0 for these platforms. 2170 int VM_Version::avx3_threshold() { 2171 return (is_intel_family_core() && 2172 supports_serialize() && 2173 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2174 } 2175 2176 #if defined(_LP64) 2177 void VM_Version::clear_apx_test_state() { 2178 clear_apx_test_state_stub(); 2179 } 2180 #endif 2181 2182 static bool _vm_version_initialized = false; 2183 2184 void VM_Version::initialize() { 2185 ResourceMark rm; 2186 // Making this stub must be FIRST use of assembler 2187 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2188 if (stub_blob == nullptr) { 2189 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2190 } 2191 CodeBuffer c(stub_blob); 2192 VM_Version_StubGenerator g(&c); 2193 2194 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2195 g.generate_get_cpu_info()); 2196 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2197 g.generate_detect_virt()); 2198 2199 #if defined(_LP64) 2200 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2201 g.clear_apx_test_state()); 2202 #endif 2203 get_processor_features(); 2204 2205 LP64_ONLY(Assembler::precompute_instructions();) 2206 2207 if (VM_Version::supports_hv()) { // Supports hypervisor 2208 check_virtualizations(); 2209 } 2210 _vm_version_initialized = true; 2211 } 2212 2213 typedef enum { 2214 CPU_FAMILY_8086_8088 = 0, 2215 CPU_FAMILY_INTEL_286 = 2, 2216 CPU_FAMILY_INTEL_386 = 3, 2217 CPU_FAMILY_INTEL_486 = 4, 2218 CPU_FAMILY_PENTIUM = 5, 2219 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2220 CPU_FAMILY_PENTIUM_4 = 0xF 2221 } FamilyFlag; 2222 2223 typedef enum { 2224 RDTSCP_FLAG = 0x08000000, // bit 27 2225 INTEL64_FLAG = 0x20000000 // bit 29 2226 } _featureExtendedEdxFlag; 2227 2228 typedef enum { 2229 FPU_FLAG = 0x00000001, 2230 VME_FLAG = 0x00000002, 2231 DE_FLAG = 0x00000004, 2232 PSE_FLAG = 0x00000008, 2233 TSC_FLAG = 0x00000010, 2234 MSR_FLAG = 0x00000020, 2235 PAE_FLAG = 0x00000040, 2236 MCE_FLAG = 0x00000080, 2237 CX8_FLAG = 0x00000100, 2238 APIC_FLAG = 0x00000200, 2239 SEP_FLAG = 0x00000800, 2240 MTRR_FLAG = 0x00001000, 2241 PGE_FLAG = 0x00002000, 2242 MCA_FLAG = 0x00004000, 2243 CMOV_FLAG = 0x00008000, 2244 PAT_FLAG = 0x00010000, 2245 PSE36_FLAG = 0x00020000, 2246 PSNUM_FLAG = 0x00040000, 2247 CLFLUSH_FLAG = 0x00080000, 2248 DTS_FLAG = 0x00200000, 2249 ACPI_FLAG = 0x00400000, 2250 MMX_FLAG = 0x00800000, 2251 FXSR_FLAG = 0x01000000, 2252 SSE_FLAG = 0x02000000, 2253 SSE2_FLAG = 0x04000000, 2254 SS_FLAG = 0x08000000, 2255 HTT_FLAG = 0x10000000, 2256 TM_FLAG = 0x20000000 2257 } FeatureEdxFlag; 2258 2259 static BufferBlob* cpuid_brand_string_stub_blob; 2260 static const int cpuid_brand_string_stub_size = 550; 2261 2262 extern "C" { 2263 typedef void (*getCPUIDBrandString_stub_t)(void*); 2264 } 2265 2266 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2267 2268 // VM_Version statics 2269 enum { 2270 ExtendedFamilyIdLength_INTEL = 16, 2271 ExtendedFamilyIdLength_AMD = 24 2272 }; 2273 2274 const size_t VENDOR_LENGTH = 13; 2275 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2276 static char* _cpu_brand_string = nullptr; 2277 static int64_t _max_qualified_cpu_frequency = 0; 2278 2279 static int _no_of_threads = 0; 2280 static int _no_of_cores = 0; 2281 2282 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2283 "8086/8088", 2284 "", 2285 "286", 2286 "386", 2287 "486", 2288 "Pentium", 2289 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2290 "", 2291 "", 2292 "", 2293 "", 2294 "", 2295 "", 2296 "", 2297 "", 2298 "Pentium 4" 2299 }; 2300 2301 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2302 "", 2303 "", 2304 "", 2305 "", 2306 "5x86", 2307 "K5/K6", 2308 "Athlon/AthlonXP", 2309 "", 2310 "", 2311 "", 2312 "", 2313 "", 2314 "", 2315 "", 2316 "", 2317 "Opteron/Athlon64", 2318 "Opteron QC/Phenom", // Barcelona et.al. 2319 "", 2320 "", 2321 "", 2322 "", 2323 "", 2324 "", 2325 "Zen" 2326 }; 2327 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2328 // September 2013, Vol 3C Table 35-1 2329 const char* const _model_id_pentium_pro[] = { 2330 "", 2331 "Pentium Pro", 2332 "", 2333 "Pentium II model 3", 2334 "", 2335 "Pentium II model 5/Xeon/Celeron", 2336 "Celeron", 2337 "Pentium III/Pentium III Xeon", 2338 "Pentium III/Pentium III Xeon", 2339 "Pentium M model 9", // Yonah 2340 "Pentium III, model A", 2341 "Pentium III, model B", 2342 "", 2343 "Pentium M model D", // Dothan 2344 "", 2345 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2346 "", 2347 "", 2348 "", 2349 "", 2350 "", 2351 "", 2352 "Celeron", // 0x16 Celeron 65nm 2353 "Core 2", // 0x17 Penryn / Harpertown 2354 "", 2355 "", 2356 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2357 "Atom", // 0x1B Z5xx series Silverthorn 2358 "", 2359 "Core 2", // 0x1D Dunnington (6-core) 2360 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2361 "", 2362 "", 2363 "", 2364 "", 2365 "", 2366 "", 2367 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2368 "", 2369 "", 2370 "", // 0x28 2371 "", 2372 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2373 "", 2374 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2375 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2376 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2377 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2378 "", 2379 "", 2380 "", 2381 "", 2382 "", 2383 "", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "Ivy Bridge", // 0x3a 2389 "", 2390 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2391 "", // 0x3d "Next Generation Intel Core Processor" 2392 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2393 "", // 0x3f "Future Generation Intel Xeon Processor" 2394 "", 2395 "", 2396 "", 2397 "", 2398 "", 2399 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2400 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2401 nullptr 2402 }; 2403 2404 /* Brand ID is for back compatibility 2405 * Newer CPUs uses the extended brand string */ 2406 const char* const _brand_id[] = { 2407 "", 2408 "Celeron processor", 2409 "Pentium III processor", 2410 "Intel Pentium III Xeon processor", 2411 "", 2412 "", 2413 "", 2414 "", 2415 "Intel Pentium 4 processor", 2416 nullptr 2417 }; 2418 2419 2420 const char* const _feature_edx_id[] = { 2421 "On-Chip FPU", 2422 "Virtual Mode Extensions", 2423 "Debugging Extensions", 2424 "Page Size Extensions", 2425 "Time Stamp Counter", 2426 "Model Specific Registers", 2427 "Physical Address Extension", 2428 "Machine Check Exceptions", 2429 "CMPXCHG8B Instruction", 2430 "On-Chip APIC", 2431 "", 2432 "Fast System Call", 2433 "Memory Type Range Registers", 2434 "Page Global Enable", 2435 "Machine Check Architecture", 2436 "Conditional Mov Instruction", 2437 "Page Attribute Table", 2438 "36-bit Page Size Extension", 2439 "Processor Serial Number", 2440 "CLFLUSH Instruction", 2441 "", 2442 "Debug Trace Store feature", 2443 "ACPI registers in MSR space", 2444 "Intel Architecture MMX Technology", 2445 "Fast Float Point Save and Restore", 2446 "Streaming SIMD extensions", 2447 "Streaming SIMD extensions 2", 2448 "Self-Snoop", 2449 "Hyper Threading", 2450 "Thermal Monitor", 2451 "", 2452 "Pending Break Enable" 2453 }; 2454 2455 const char* const _feature_extended_edx_id[] = { 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "SYSCALL/SYSRET", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "", 2475 "", 2476 "Execute Disable Bit", 2477 "", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "RDTSCP", 2484 "", 2485 "Intel 64 Architecture", 2486 "", 2487 "" 2488 }; 2489 2490 const char* const _feature_ecx_id[] = { 2491 "Streaming SIMD Extensions 3", 2492 "PCLMULQDQ", 2493 "64-bit DS Area", 2494 "MONITOR/MWAIT instructions", 2495 "CPL Qualified Debug Store", 2496 "Virtual Machine Extensions", 2497 "Safer Mode Extensions", 2498 "Enhanced Intel SpeedStep technology", 2499 "Thermal Monitor 2", 2500 "Supplemental Streaming SIMD Extensions 3", 2501 "L1 Context ID", 2502 "", 2503 "Fused Multiply-Add", 2504 "CMPXCHG16B", 2505 "xTPR Update Control", 2506 "Perfmon and Debug Capability", 2507 "", 2508 "Process-context identifiers", 2509 "Direct Cache Access", 2510 "Streaming SIMD extensions 4.1", 2511 "Streaming SIMD extensions 4.2", 2512 "x2APIC", 2513 "MOVBE", 2514 "Popcount instruction", 2515 "TSC-Deadline", 2516 "AESNI", 2517 "XSAVE", 2518 "OSXSAVE", 2519 "AVX", 2520 "F16C", 2521 "RDRAND", 2522 "" 2523 }; 2524 2525 const char* const _feature_extended_ecx_id[] = { 2526 "LAHF/SAHF instruction support", 2527 "Core multi-processor legacy mode", 2528 "", 2529 "", 2530 "", 2531 "Advanced Bit Manipulations: LZCNT", 2532 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2533 "Misaligned SSE mode", 2534 "", 2535 "", 2536 "", 2537 "", 2538 "", 2539 "", 2540 "", 2541 "", 2542 "", 2543 "", 2544 "", 2545 "", 2546 "", 2547 "", 2548 "", 2549 "", 2550 "", 2551 "", 2552 "", 2553 "", 2554 "", 2555 "", 2556 "", 2557 "" 2558 }; 2559 2560 void VM_Version::initialize_tsc(void) { 2561 ResourceMark rm; 2562 2563 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2564 if (cpuid_brand_string_stub_blob == nullptr) { 2565 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2566 } 2567 CodeBuffer c(cpuid_brand_string_stub_blob); 2568 VM_Version_StubGenerator g(&c); 2569 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2570 g.generate_getCPUIDBrandString()); 2571 } 2572 2573 const char* VM_Version::cpu_model_description(void) { 2574 uint32_t cpu_family = extended_cpu_family(); 2575 uint32_t cpu_model = extended_cpu_model(); 2576 const char* model = nullptr; 2577 2578 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2579 for (uint32_t i = 0; i <= cpu_model; i++) { 2580 model = _model_id_pentium_pro[i]; 2581 if (model == nullptr) { 2582 break; 2583 } 2584 } 2585 } 2586 return model; 2587 } 2588 2589 const char* VM_Version::cpu_brand_string(void) { 2590 if (_cpu_brand_string == nullptr) { 2591 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2592 if (nullptr == _cpu_brand_string) { 2593 return nullptr; 2594 } 2595 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2596 if (ret_val != OS_OK) { 2597 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2598 _cpu_brand_string = nullptr; 2599 } 2600 } 2601 return _cpu_brand_string; 2602 } 2603 2604 const char* VM_Version::cpu_brand(void) { 2605 const char* brand = nullptr; 2606 2607 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2608 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2609 brand = _brand_id[0]; 2610 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2611 brand = _brand_id[i]; 2612 } 2613 } 2614 return brand; 2615 } 2616 2617 bool VM_Version::cpu_is_em64t(void) { 2618 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2619 } 2620 2621 bool VM_Version::is_netburst(void) { 2622 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2623 } 2624 2625 bool VM_Version::supports_tscinv_ext(void) { 2626 if (!supports_tscinv_bit()) { 2627 return false; 2628 } 2629 2630 if (is_intel()) { 2631 return true; 2632 } 2633 2634 if (is_amd()) { 2635 return !is_amd_Barcelona(); 2636 } 2637 2638 if (is_hygon()) { 2639 return true; 2640 } 2641 2642 return false; 2643 } 2644 2645 void VM_Version::resolve_cpu_information_details(void) { 2646 2647 // in future we want to base this information on proper cpu 2648 // and cache topology enumeration such as: 2649 // Intel 64 Architecture Processor Topology Enumeration 2650 // which supports system cpu and cache topology enumeration 2651 // either using 2xAPICIDs or initial APICIDs 2652 2653 // currently only rough cpu information estimates 2654 // which will not necessarily reflect the exact configuration of the system 2655 2656 // this is the number of logical hardware threads 2657 // visible to the operating system 2658 _no_of_threads = os::processor_count(); 2659 2660 // find out number of threads per cpu package 2661 int threads_per_package = threads_per_core() * cores_per_cpu(); 2662 2663 // use amount of threads visible to the process in order to guess number of sockets 2664 _no_of_sockets = _no_of_threads / threads_per_package; 2665 2666 // process might only see a subset of the total number of threads 2667 // from a single processor package. Virtualization/resource management for example. 2668 // If so then just write a hard 1 as num of pkgs. 2669 if (0 == _no_of_sockets) { 2670 _no_of_sockets = 1; 2671 } 2672 2673 // estimate the number of cores 2674 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2675 } 2676 2677 2678 const char* VM_Version::cpu_family_description(void) { 2679 int cpu_family_id = extended_cpu_family(); 2680 if (is_amd()) { 2681 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2682 return _family_id_amd[cpu_family_id]; 2683 } 2684 } 2685 if (is_intel()) { 2686 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2687 return cpu_model_description(); 2688 } 2689 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2690 return _family_id_intel[cpu_family_id]; 2691 } 2692 } 2693 if (is_hygon()) { 2694 return "Dhyana"; 2695 } 2696 return "Unknown x86"; 2697 } 2698 2699 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2700 assert(buf != nullptr, "buffer is null!"); 2701 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2702 2703 const char* cpu_type = nullptr; 2704 const char* x64 = nullptr; 2705 2706 if (is_intel()) { 2707 cpu_type = "Intel"; 2708 x64 = cpu_is_em64t() ? " Intel64" : ""; 2709 } else if (is_amd()) { 2710 cpu_type = "AMD"; 2711 x64 = cpu_is_em64t() ? " AMD64" : ""; 2712 } else if (is_hygon()) { 2713 cpu_type = "Hygon"; 2714 x64 = cpu_is_em64t() ? " AMD64" : ""; 2715 } else { 2716 cpu_type = "Unknown x86"; 2717 x64 = cpu_is_em64t() ? " x86_64" : ""; 2718 } 2719 2720 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2721 cpu_type, 2722 cpu_family_description(), 2723 supports_ht() ? " (HT)" : "", 2724 supports_sse3() ? " SSE3" : "", 2725 supports_ssse3() ? " SSSE3" : "", 2726 supports_sse4_1() ? " SSE4.1" : "", 2727 supports_sse4_2() ? " SSE4.2" : "", 2728 supports_sse4a() ? " SSE4A" : "", 2729 is_netburst() ? " Netburst" : "", 2730 is_intel_family_core() ? " Core" : "", 2731 x64); 2732 2733 return OS_OK; 2734 } 2735 2736 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2737 assert(buf != nullptr, "buffer is null!"); 2738 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2739 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2740 2741 // invoke newly generated asm code to fetch CPU Brand String 2742 getCPUIDBrandString_stub(&_cpuid_info); 2743 2744 // fetch results into buffer 2745 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2746 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2747 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2748 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2749 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2750 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2751 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2752 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2753 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2754 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2755 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2756 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2757 2758 return OS_OK; 2759 } 2760 2761 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2762 guarantee(buf != nullptr, "buffer is null!"); 2763 guarantee(buf_len > 0, "buffer len not enough!"); 2764 2765 unsigned int flag = 0; 2766 unsigned int fi = 0; 2767 size_t written = 0; 2768 const char* prefix = ""; 2769 2770 #define WRITE_TO_BUF(string) \ 2771 { \ 2772 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2773 if (res < 0) { \ 2774 return buf_len - 1; \ 2775 } \ 2776 written += res; \ 2777 if (prefix[0] == '\0') { \ 2778 prefix = ", "; \ 2779 } \ 2780 } 2781 2782 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2783 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2784 continue; /* no hyperthreading */ 2785 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2786 continue; /* no fast system call */ 2787 } 2788 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2789 WRITE_TO_BUF(_feature_edx_id[fi]); 2790 } 2791 } 2792 2793 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2794 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2795 WRITE_TO_BUF(_feature_ecx_id[fi]); 2796 } 2797 } 2798 2799 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2800 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2801 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2802 } 2803 } 2804 2805 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2806 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2807 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2808 } 2809 } 2810 2811 if (supports_tscinv_bit()) { 2812 WRITE_TO_BUF("Invariant TSC"); 2813 } 2814 2815 return written; 2816 } 2817 2818 /** 2819 * Write a detailed description of the cpu to a given buffer, including 2820 * feature set. 2821 */ 2822 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2823 assert(buf != nullptr, "buffer is null!"); 2824 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2825 2826 static const char* unknown = "<unknown>"; 2827 char vendor_id[VENDOR_LENGTH]; 2828 const char* family = nullptr; 2829 const char* model = nullptr; 2830 const char* brand = nullptr; 2831 int outputLen = 0; 2832 2833 family = cpu_family_description(); 2834 if (family == nullptr) { 2835 family = unknown; 2836 } 2837 2838 model = cpu_model_description(); 2839 if (model == nullptr) { 2840 model = unknown; 2841 } 2842 2843 brand = cpu_brand_string(); 2844 2845 if (brand == nullptr) { 2846 brand = cpu_brand(); 2847 if (brand == nullptr) { 2848 brand = unknown; 2849 } 2850 } 2851 2852 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2853 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2854 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2855 vendor_id[VENDOR_LENGTH-1] = '\0'; 2856 2857 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2858 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2859 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2860 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2861 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2862 "Supports: ", 2863 brand, 2864 vendor_id, 2865 family, 2866 extended_cpu_family(), 2867 model, 2868 extended_cpu_model(), 2869 cpu_stepping(), 2870 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2871 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2872 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2873 _cpuid_info.std_cpuid1_eax.value, 2874 _cpuid_info.std_cpuid1_ebx.value, 2875 _cpuid_info.std_cpuid1_ecx.value, 2876 _cpuid_info.std_cpuid1_edx.value, 2877 _cpuid_info.ext_cpuid1_eax, 2878 _cpuid_info.ext_cpuid1_ebx, 2879 _cpuid_info.ext_cpuid1_ecx, 2880 _cpuid_info.ext_cpuid1_edx); 2881 2882 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2883 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2884 return OS_ERR; 2885 } 2886 2887 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2888 2889 return OS_OK; 2890 } 2891 2892 2893 // Fill in Abstract_VM_Version statics 2894 void VM_Version::initialize_cpu_information() { 2895 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2896 assert(!_initialized, "shouldn't be initialized yet"); 2897 resolve_cpu_information_details(); 2898 2899 // initialize cpu_name and cpu_desc 2900 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2901 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2902 _initialized = true; 2903 } 2904 2905 /** 2906 * For information about extracting the frequency from the cpu brand string, please see: 2907 * 2908 * Intel Processor Identification and the CPUID Instruction 2909 * Application Note 485 2910 * May 2012 2911 * 2912 * The return value is the frequency in Hz. 2913 */ 2914 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2915 const char* const brand_string = cpu_brand_string(); 2916 if (brand_string == nullptr) { 2917 return 0; 2918 } 2919 const int64_t MEGA = 1000000; 2920 int64_t multiplier = 0; 2921 int64_t frequency = 0; 2922 uint8_t idx = 0; 2923 // The brand string buffer is at most 48 bytes. 2924 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2925 for (; idx < 48-2; ++idx) { 2926 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2927 // Search brand string for "yHz" where y is M, G, or T. 2928 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2929 if (brand_string[idx] == 'M') { 2930 multiplier = MEGA; 2931 } else if (brand_string[idx] == 'G') { 2932 multiplier = MEGA * 1000; 2933 } else if (brand_string[idx] == 'T') { 2934 multiplier = MEGA * MEGA; 2935 } 2936 break; 2937 } 2938 } 2939 if (multiplier > 0) { 2940 // Compute frequency (in Hz) from brand string. 2941 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2942 frequency = (brand_string[idx-4] - '0') * multiplier; 2943 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2944 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2945 } else { // format is "xxxx" 2946 frequency = (brand_string[idx-4] - '0') * 1000; 2947 frequency += (brand_string[idx-3] - '0') * 100; 2948 frequency += (brand_string[idx-2] - '0') * 10; 2949 frequency += (brand_string[idx-1] - '0'); 2950 frequency *= multiplier; 2951 } 2952 } 2953 return frequency; 2954 } 2955 2956 2957 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2958 if (_max_qualified_cpu_frequency == 0) { 2959 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2960 } 2961 return _max_qualified_cpu_frequency; 2962 } 2963 2964 uint64_t VM_Version::CpuidInfo::feature_flags() const { 2965 uint64_t result = 0; 2966 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2967 result |= CPU_CX8; 2968 if (std_cpuid1_edx.bits.cmov != 0) 2969 result |= CPU_CMOV; 2970 if (std_cpuid1_edx.bits.clflush != 0) 2971 result |= CPU_FLUSH; 2972 #ifdef _LP64 2973 // clflush should always be available on x86_64 2974 // if not we are in real trouble because we rely on it 2975 // to flush the code cache. 2976 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2977 #endif 2978 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2979 ext_cpuid1_edx.bits.fxsr != 0)) 2980 result |= CPU_FXSR; 2981 // HT flag is set for multi-core processors also. 2982 if (threads_per_core() > 1) 2983 result |= CPU_HT; 2984 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2985 ext_cpuid1_edx.bits.mmx != 0)) 2986 result |= CPU_MMX; 2987 if (std_cpuid1_edx.bits.sse != 0) 2988 result |= CPU_SSE; 2989 if (std_cpuid1_edx.bits.sse2 != 0) 2990 result |= CPU_SSE2; 2991 if (std_cpuid1_ecx.bits.sse3 != 0) 2992 result |= CPU_SSE3; 2993 if (std_cpuid1_ecx.bits.ssse3 != 0) 2994 result |= CPU_SSSE3; 2995 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2996 result |= CPU_SSE4_1; 2997 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2998 result |= CPU_SSE4_2; 2999 if (std_cpuid1_ecx.bits.popcnt != 0) 3000 result |= CPU_POPCNT; 3001 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 3002 xem_xcr0_eax.bits.apx_f != 0) { 3003 result |= CPU_APX_F; 3004 } 3005 if (std_cpuid1_ecx.bits.avx != 0 && 3006 std_cpuid1_ecx.bits.osxsave != 0 && 3007 xem_xcr0_eax.bits.sse != 0 && 3008 xem_xcr0_eax.bits.ymm != 0) { 3009 result |= CPU_AVX; 3010 result |= CPU_VZEROUPPER; 3011 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 3012 result |= CPU_SHA512; 3013 if (std_cpuid1_ecx.bits.f16c != 0) 3014 result |= CPU_F16C; 3015 if (sef_cpuid7_ebx.bits.avx2 != 0) { 3016 result |= CPU_AVX2; 3017 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 3018 result |= CPU_AVX_IFMA; 3019 } 3020 if (sef_cpuid7_ecx.bits.gfni != 0) 3021 result |= CPU_GFNI; 3022 if (sef_cpuid7_ebx.bits.avx512f != 0 && 3023 xem_xcr0_eax.bits.opmask != 0 && 3024 xem_xcr0_eax.bits.zmm512 != 0 && 3025 xem_xcr0_eax.bits.zmm32 != 0) { 3026 result |= CPU_AVX512F; 3027 if (sef_cpuid7_ebx.bits.avx512cd != 0) 3028 result |= CPU_AVX512CD; 3029 if (sef_cpuid7_ebx.bits.avx512dq != 0) 3030 result |= CPU_AVX512DQ; 3031 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 3032 result |= CPU_AVX512_IFMA; 3033 if (sef_cpuid7_ebx.bits.avx512pf != 0) 3034 result |= CPU_AVX512PF; 3035 if (sef_cpuid7_ebx.bits.avx512er != 0) 3036 result |= CPU_AVX512ER; 3037 if (sef_cpuid7_ebx.bits.avx512bw != 0) 3038 result |= CPU_AVX512BW; 3039 if (sef_cpuid7_ebx.bits.avx512vl != 0) 3040 result |= CPU_AVX512VL; 3041 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 3042 result |= CPU_AVX512_VPOPCNTDQ; 3043 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 3044 result |= CPU_AVX512_VPCLMULQDQ; 3045 if (sef_cpuid7_ecx.bits.vaes != 0) 3046 result |= CPU_AVX512_VAES; 3047 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 3048 result |= CPU_AVX512_VNNI; 3049 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 3050 result |= CPU_AVX512_BITALG; 3051 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 3052 result |= CPU_AVX512_VBMI; 3053 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 3054 result |= CPU_AVX512_VBMI2; 3055 } 3056 } 3057 if (std_cpuid1_ecx.bits.hv != 0) 3058 result |= CPU_HV; 3059 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3060 result |= CPU_BMI1; 3061 if (std_cpuid1_edx.bits.tsc != 0) 3062 result |= CPU_TSC; 3063 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3064 result |= CPU_TSCINV_BIT; 3065 if (std_cpuid1_ecx.bits.aes != 0) 3066 result |= CPU_AES; 3067 if (sef_cpuid7_ebx.bits.erms != 0) 3068 result |= CPU_ERMS; 3069 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3070 result |= CPU_FSRM; 3071 if (std_cpuid1_ecx.bits.clmul != 0) 3072 result |= CPU_CLMUL; 3073 if (sef_cpuid7_ebx.bits.rtm != 0) 3074 result |= CPU_RTM; 3075 if (sef_cpuid7_ebx.bits.adx != 0) 3076 result |= CPU_ADX; 3077 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3078 result |= CPU_BMI2; 3079 if (sef_cpuid7_ebx.bits.sha != 0) 3080 result |= CPU_SHA; 3081 if (std_cpuid1_ecx.bits.fma != 0) 3082 result |= CPU_FMA; 3083 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3084 result |= CPU_FLUSHOPT; 3085 if (ext_cpuid1_edx.bits.rdtscp != 0) 3086 result |= CPU_RDTSCP; 3087 if (sef_cpuid7_ecx.bits.rdpid != 0) 3088 result |= CPU_RDPID; 3089 3090 // AMD|Hygon features. 3091 if (is_amd_family()) { 3092 if ((ext_cpuid1_edx.bits.tdnow != 0) || 3093 (ext_cpuid1_ecx.bits.prefetchw != 0)) 3094 result |= CPU_3DNOW_PREFETCH; 3095 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3096 result |= CPU_LZCNT; 3097 if (ext_cpuid1_ecx.bits.sse4a != 0) 3098 result |= CPU_SSE4A; 3099 } 3100 3101 // Intel features. 3102 if (is_intel()) { 3103 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3104 result |= CPU_LZCNT; 3105 } 3106 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3107 result |= CPU_3DNOW_PREFETCH; 3108 } 3109 if (sef_cpuid7_ebx.bits.clwb != 0) { 3110 result |= CPU_CLWB; 3111 } 3112 if (sef_cpuid7_edx.bits.serialize != 0) 3113 result |= CPU_SERIALIZE; 3114 } 3115 3116 // ZX features. 3117 if (is_zx()) { 3118 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3119 result |= CPU_LZCNT; 3120 } 3121 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3122 result |= CPU_3DNOW_PREFETCH; 3123 } 3124 } 3125 3126 // Protection key features. 3127 if (sef_cpuid7_ecx.bits.pku != 0) { 3128 result |= CPU_PKU; 3129 } 3130 if (sef_cpuid7_ecx.bits.ospke != 0) { 3131 result |= CPU_OSPKE; 3132 } 3133 3134 // Control flow enforcement (CET) features. 3135 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3136 result |= CPU_CET_SS; 3137 } 3138 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3139 result |= CPU_CET_IBT; 3140 } 3141 3142 // Composite features. 3143 if (supports_tscinv_bit() && 3144 ((is_amd_family() && !is_amd_Barcelona()) || 3145 is_intel_tsc_synched_at_init())) { 3146 result |= CPU_TSCINV; 3147 } 3148 3149 return result; 3150 } 3151 3152 bool VM_Version::os_supports_avx_vectors() { 3153 bool retVal = false; 3154 int nreg = 2 LP64_ONLY(+2); 3155 if (supports_evex()) { 3156 // Verify that OS save/restore all bits of EVEX registers 3157 // during signal processing. 3158 retVal = true; 3159 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3160 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3161 retVal = false; 3162 break; 3163 } 3164 } 3165 } else if (supports_avx()) { 3166 // Verify that OS save/restore all bits of AVX registers 3167 // during signal processing. 3168 retVal = true; 3169 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3170 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3171 retVal = false; 3172 break; 3173 } 3174 } 3175 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3176 if (retVal == false) { 3177 // Verify that OS save/restore all bits of EVEX registers 3178 // during signal processing. 3179 retVal = true; 3180 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3181 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3182 retVal = false; 3183 break; 3184 } 3185 } 3186 } 3187 } 3188 return retVal; 3189 } 3190 3191 bool VM_Version::os_supports_apx_egprs() { 3192 if (!supports_apx_f()) { 3193 return false; 3194 } 3195 // Enable APX support for product builds after 3196 // completion of planned features listed in JDK-8329030. 3197 #if !defined(PRODUCT) 3198 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3199 _cpuid_info.apx_save[1] != egpr_test_value()) { 3200 return false; 3201 } 3202 return true; 3203 #else 3204 return false; 3205 #endif 3206 } 3207 3208 uint VM_Version::cores_per_cpu() { 3209 uint result = 1; 3210 if (is_intel()) { 3211 bool supports_topology = supports_processor_topology(); 3212 if (supports_topology) { 3213 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3214 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3215 } 3216 if (!supports_topology || result == 0) { 3217 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3218 } 3219 } else if (is_amd_family()) { 3220 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3221 } else if (is_zx()) { 3222 bool supports_topology = supports_processor_topology(); 3223 if (supports_topology) { 3224 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3225 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3226 } 3227 if (!supports_topology || result == 0) { 3228 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3229 } 3230 } 3231 return result; 3232 } 3233 3234 uint VM_Version::threads_per_core() { 3235 uint result = 1; 3236 if (is_intel() && supports_processor_topology()) { 3237 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3238 } else if (is_zx() && supports_processor_topology()) { 3239 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3240 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3241 if (cpu_family() >= 0x17) { 3242 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3243 } else { 3244 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3245 cores_per_cpu(); 3246 } 3247 } 3248 return (result == 0 ? 1 : result); 3249 } 3250 3251 uint VM_Version::L1_line_size() { 3252 uint result = 0; 3253 if (is_intel()) { 3254 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3255 } else if (is_amd_family()) { 3256 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3257 } else if (is_zx()) { 3258 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3259 } 3260 if (result < 32) // not defined ? 3261 result = 32; // 32 bytes by default on x86 and other x64 3262 return result; 3263 } 3264 3265 bool VM_Version::is_intel_tsc_synched_at_init() { 3266 if (is_intel_family_core()) { 3267 uint32_t ext_model = extended_cpu_model(); 3268 if (ext_model == CPU_MODEL_NEHALEM_EP || 3269 ext_model == CPU_MODEL_WESTMERE_EP || 3270 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3271 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3272 // <= 2-socket invariant tsc support. EX versions are usually used 3273 // in > 2-socket systems and likely don't synchronize tscs at 3274 // initialization. 3275 // Code that uses tsc values must be prepared for them to arbitrarily 3276 // jump forward or backward. 3277 return true; 3278 } 3279 } 3280 return false; 3281 } 3282 3283 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3284 // Hardware prefetching (distance/size in bytes): 3285 // Pentium 3 - 64 / 32 3286 // Pentium 4 - 256 / 128 3287 // Athlon - 64 / 32 ???? 3288 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3289 // Core - 128 / 64 3290 // 3291 // Software prefetching (distance in bytes / instruction with best score): 3292 // Pentium 3 - 128 / prefetchnta 3293 // Pentium 4 - 512 / prefetchnta 3294 // Athlon - 128 / prefetchnta 3295 // Opteron - 256 / prefetchnta 3296 // Core - 256 / prefetchnta 3297 // It will be used only when AllocatePrefetchStyle > 0 3298 3299 if (is_amd_family()) { // AMD | Hygon 3300 if (supports_sse2()) { 3301 return 256; // Opteron 3302 } else { 3303 return 128; // Athlon 3304 } 3305 } else { // Intel 3306 if (supports_sse3() && cpu_family() == 6) { 3307 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3308 return 192; 3309 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3310 #ifdef _LP64 3311 return 384; 3312 #else 3313 return 320; 3314 #endif 3315 } 3316 } 3317 if (supports_sse2()) { 3318 if (cpu_family() == 6) { 3319 return 256; // Pentium M, Core, Core2 3320 } else { 3321 return 512; // Pentium 4 3322 } 3323 } else { 3324 return 128; // Pentium 3 (and all other old CPUs) 3325 } 3326 } 3327 } 3328 3329 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3330 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3331 switch (id) { 3332 case vmIntrinsics::_floatToFloat16: 3333 case vmIntrinsics::_float16ToFloat: 3334 if (!supports_float16()) { 3335 return false; 3336 } 3337 break; 3338 default: 3339 break; 3340 } 3341 return true; 3342 }