1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/checkedCast.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = nullptr; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = nullptr; 59 // Address of instruction which causes APX specific SEGV 60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 61 // Address of instruction after the one which causes APX specific SEGV 62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 63 64 static BufferBlob* stub_blob; 65 static const int stub_size = 2000; 66 67 extern "C" { 68 typedef void (*get_cpu_info_stub_t)(void*); 69 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 70 typedef void (*clear_apx_test_state_t)(void); 71 } 72 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 73 static detect_virt_stub_t detect_virt_stub = nullptr; 74 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 75 76 #ifdef _LP64 77 78 bool VM_Version::supports_clflush() { 79 // clflush should always be available on x86_64 80 // if not we are in real trouble because we rely on it 81 // to flush the code cache. 82 // Unfortunately, Assembler::clflush is currently called as part 83 // of generation of the code cache flush routine. This happens 84 // under Universe::init before the processor features are set 85 // up. Assembler::flush calls this routine to check that clflush 86 // is allowed. So, we give the caller a free pass if Universe init 87 // is still in progress. 88 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 89 return true; 90 } 91 #endif 92 93 #define CPUID_STANDARD_FN 0x0 94 #define CPUID_STANDARD_FN_1 0x1 95 #define CPUID_STANDARD_FN_4 0x4 96 #define CPUID_STANDARD_FN_B 0xb 97 98 #define CPUID_EXTENDED_FN 0x80000000 99 #define CPUID_EXTENDED_FN_1 0x80000001 100 #define CPUID_EXTENDED_FN_2 0x80000002 101 #define CPUID_EXTENDED_FN_3 0x80000003 102 #define CPUID_EXTENDED_FN_4 0x80000004 103 #define CPUID_EXTENDED_FN_7 0x80000007 104 #define CPUID_EXTENDED_FN_8 0x80000008 105 106 class VM_Version_StubGenerator: public StubCodeGenerator { 107 public: 108 109 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 110 111 #if defined(_LP64) 112 address clear_apx_test_state() { 113 # define __ _masm-> 114 address start = __ pc(); 115 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 116 // handling guarantees that preserved register values post signal handling were 117 // re-instantiated by operating system and not because they were not modified externally. 118 119 bool save_apx = UseAPX; 120 VM_Version::set_apx_cpuFeatures(); 121 UseAPX = true; 122 // EGPR state save/restoration. 123 __ mov64(r16, 0L); 124 __ mov64(r31, 0L); 125 UseAPX = save_apx; 126 VM_Version::clean_cpuFeatures(); 127 __ ret(0); 128 return start; 129 } 130 #endif 131 132 address generate_get_cpu_info() { 133 // Flags to test CPU type. 134 const uint32_t HS_EFL_AC = 0x40000; 135 const uint32_t HS_EFL_ID = 0x200000; 136 // Values for when we don't have a CPUID instruction. 137 const int CPU_FAMILY_SHIFT = 8; 138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 141 142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 146 147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 148 # define __ _masm-> 149 150 address start = __ pc(); 151 152 // 153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 154 // 155 // LP64: rcx and rdx are first and second argument registers on windows 156 157 __ push(rbp); 158 #ifdef _LP64 159 __ mov(rbp, c_rarg0); // cpuid_info address 160 #else 161 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 162 #endif 163 __ push(rbx); 164 __ push(rsi); 165 __ pushf(); // preserve rbx, and flags 166 __ pop(rax); 167 __ push(rax); 168 __ mov(rcx, rax); 169 // 170 // if we are unable to change the AC flag, we have a 386 171 // 172 __ xorl(rax, HS_EFL_AC); 173 __ push(rax); 174 __ popf(); 175 __ pushf(); 176 __ pop(rax); 177 __ cmpptr(rax, rcx); 178 __ jccb(Assembler::notEqual, detect_486); 179 180 __ movl(rax, CPU_FAMILY_386); 181 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 182 __ jmp(done); 183 184 // 185 // If we are unable to change the ID flag, we have a 486 which does 186 // not support the "cpuid" instruction. 187 // 188 __ bind(detect_486); 189 __ mov(rax, rcx); 190 __ xorl(rax, HS_EFL_ID); 191 __ push(rax); 192 __ popf(); 193 __ pushf(); 194 __ pop(rax); 195 __ cmpptr(rcx, rax); 196 __ jccb(Assembler::notEqual, detect_586); 197 198 __ bind(cpu486); 199 __ movl(rax, CPU_FAMILY_486); 200 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 201 __ jmp(done); 202 203 // 204 // At this point, we have a chip which supports the "cpuid" instruction 205 // 206 __ bind(detect_586); 207 __ xorl(rax, rax); 208 __ cpuid(); 209 __ orl(rax, rax); 210 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 211 // value of at least 1, we give up and 212 // assume a 486 213 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 214 __ movl(Address(rsi, 0), rax); 215 __ movl(Address(rsi, 4), rbx); 216 __ movl(Address(rsi, 8), rcx); 217 __ movl(Address(rsi,12), rdx); 218 219 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 220 __ jccb(Assembler::belowEqual, std_cpuid4); 221 222 // 223 // cpuid(0xB) Processor Topology 224 // 225 __ movl(rax, 0xb); 226 __ xorl(rcx, rcx); // Threads level 227 __ cpuid(); 228 229 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 230 __ movl(Address(rsi, 0), rax); 231 __ movl(Address(rsi, 4), rbx); 232 __ movl(Address(rsi, 8), rcx); 233 __ movl(Address(rsi,12), rdx); 234 235 __ movl(rax, 0xb); 236 __ movl(rcx, 1); // Cores level 237 __ cpuid(); 238 __ push(rax); 239 __ andl(rax, 0x1f); // Determine if valid topology level 240 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 241 __ andl(rax, 0xffff); 242 __ pop(rax); 243 __ jccb(Assembler::equal, std_cpuid4); 244 245 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 246 __ movl(Address(rsi, 0), rax); 247 __ movl(Address(rsi, 4), rbx); 248 __ movl(Address(rsi, 8), rcx); 249 __ movl(Address(rsi,12), rdx); 250 251 __ movl(rax, 0xb); 252 __ movl(rcx, 2); // Packages level 253 __ cpuid(); 254 __ push(rax); 255 __ andl(rax, 0x1f); // Determine if valid topology level 256 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 257 __ andl(rax, 0xffff); 258 __ pop(rax); 259 __ jccb(Assembler::equal, std_cpuid4); 260 261 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 262 __ movl(Address(rsi, 0), rax); 263 __ movl(Address(rsi, 4), rbx); 264 __ movl(Address(rsi, 8), rcx); 265 __ movl(Address(rsi,12), rdx); 266 267 // 268 // cpuid(0x4) Deterministic cache params 269 // 270 __ bind(std_cpuid4); 271 __ movl(rax, 4); 272 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 273 __ jccb(Assembler::greater, std_cpuid1); 274 275 __ xorl(rcx, rcx); // L1 cache 276 __ cpuid(); 277 __ push(rax); 278 __ andl(rax, 0x1f); // Determine if valid cache parameters used 279 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 280 __ pop(rax); 281 __ jccb(Assembler::equal, std_cpuid1); 282 283 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 284 __ movl(Address(rsi, 0), rax); 285 __ movl(Address(rsi, 4), rbx); 286 __ movl(Address(rsi, 8), rcx); 287 __ movl(Address(rsi,12), rdx); 288 289 // 290 // Standard cpuid(0x1) 291 // 292 __ bind(std_cpuid1); 293 __ movl(rax, 1); 294 __ cpuid(); 295 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 296 __ movl(Address(rsi, 0), rax); 297 __ movl(Address(rsi, 4), rbx); 298 __ movl(Address(rsi, 8), rcx); 299 __ movl(Address(rsi,12), rdx); 300 301 // 302 // Check if OS has enabled XGETBV instruction to access XCR0 303 // (OSXSAVE feature flag) and CPU supports AVX 304 // 305 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 306 __ cmpl(rcx, 0x18000000); 307 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 308 309 // 310 // XCR0, XFEATURE_ENABLED_MASK register 311 // 312 __ xorl(rcx, rcx); // zero for XCR0 register 313 __ xgetbv(); 314 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 315 __ movl(Address(rsi, 0), rax); 316 __ movl(Address(rsi, 4), rdx); 317 318 // 319 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 320 // 321 __ bind(sef_cpuid); 322 __ movl(rax, 7); 323 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 324 __ jccb(Assembler::greater, ext_cpuid); 325 // ECX = 0 326 __ xorl(rcx, rcx); 327 __ cpuid(); 328 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 329 __ movl(Address(rsi, 0), rax); 330 __ movl(Address(rsi, 4), rbx); 331 __ movl(Address(rsi, 8), rcx); 332 __ movl(Address(rsi, 12), rdx); 333 334 // 335 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 336 // 337 __ bind(sefsl1_cpuid); 338 __ movl(rax, 7); 339 __ movl(rcx, 1); 340 __ cpuid(); 341 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 342 __ movl(Address(rsi, 0), rax); 343 __ movl(Address(rsi, 4), rdx); 344 345 // 346 // Extended cpuid(0x80000000) 347 // 348 __ bind(ext_cpuid); 349 __ movl(rax, 0x80000000); 350 __ cpuid(); 351 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 352 __ jcc(Assembler::belowEqual, done); 353 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 354 __ jcc(Assembler::belowEqual, ext_cpuid1); 355 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 356 __ jccb(Assembler::belowEqual, ext_cpuid5); 357 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 358 __ jccb(Assembler::belowEqual, ext_cpuid7); 359 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 360 __ jccb(Assembler::belowEqual, ext_cpuid8); 361 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 362 __ jccb(Assembler::below, ext_cpuid8); 363 // 364 // Extended cpuid(0x8000001E) 365 // 366 __ movl(rax, 0x8000001E); 367 __ cpuid(); 368 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 369 __ movl(Address(rsi, 0), rax); 370 __ movl(Address(rsi, 4), rbx); 371 __ movl(Address(rsi, 8), rcx); 372 __ movl(Address(rsi,12), rdx); 373 374 // 375 // Extended cpuid(0x80000008) 376 // 377 __ bind(ext_cpuid8); 378 __ movl(rax, 0x80000008); 379 __ cpuid(); 380 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 381 __ movl(Address(rsi, 0), rax); 382 __ movl(Address(rsi, 4), rbx); 383 __ movl(Address(rsi, 8), rcx); 384 __ movl(Address(rsi,12), rdx); 385 386 // 387 // Extended cpuid(0x80000007) 388 // 389 __ bind(ext_cpuid7); 390 __ movl(rax, 0x80000007); 391 __ cpuid(); 392 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 393 __ movl(Address(rsi, 0), rax); 394 __ movl(Address(rsi, 4), rbx); 395 __ movl(Address(rsi, 8), rcx); 396 __ movl(Address(rsi,12), rdx); 397 398 // 399 // Extended cpuid(0x80000005) 400 // 401 __ bind(ext_cpuid5); 402 __ movl(rax, 0x80000005); 403 __ cpuid(); 404 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 405 __ movl(Address(rsi, 0), rax); 406 __ movl(Address(rsi, 4), rbx); 407 __ movl(Address(rsi, 8), rcx); 408 __ movl(Address(rsi,12), rdx); 409 410 // 411 // Extended cpuid(0x80000001) 412 // 413 __ bind(ext_cpuid1); 414 __ movl(rax, 0x80000001); 415 __ cpuid(); 416 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 417 __ movl(Address(rsi, 0), rax); 418 __ movl(Address(rsi, 4), rbx); 419 __ movl(Address(rsi, 8), rcx); 420 __ movl(Address(rsi,12), rdx); 421 422 #if defined(_LP64) 423 // 424 // Check if OS has enabled XGETBV instruction to access XCR0 425 // (OSXSAVE feature flag) and CPU supports APX 426 // 427 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 428 // and XCRO[19] bit for OS support to save/restore extended GPR state. 429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 430 __ movl(rax, 0x200000); 431 __ andl(rax, Address(rsi, 4)); 432 __ cmpl(rax, 0x200000); 433 __ jcc(Assembler::notEqual, vector_save_restore); 434 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 435 __ movl(rax, 0x80000); 436 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 437 __ cmpl(rax, 0x80000); 438 __ jcc(Assembler::notEqual, vector_save_restore); 439 440 #ifndef PRODUCT 441 bool save_apx = UseAPX; 442 VM_Version::set_apx_cpuFeatures(); 443 UseAPX = true; 444 __ mov64(r16, VM_Version::egpr_test_value()); 445 __ mov64(r31, VM_Version::egpr_test_value()); 446 __ xorl(rsi, rsi); 447 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 448 // Generate SEGV 449 __ movl(rax, Address(rsi, 0)); 450 451 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 452 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 453 __ movq(Address(rsi, 0), r16); 454 __ movq(Address(rsi, 8), r31); 455 456 UseAPX = save_apx; 457 #endif 458 #endif 459 __ bind(vector_save_restore); 460 // 461 // Check if OS has enabled XGETBV instruction to access XCR0 462 // (OSXSAVE feature flag) and CPU supports AVX 463 // 464 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 465 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 466 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 467 __ cmpl(rcx, 0x18000000); 468 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 469 470 __ movl(rax, 0x6); 471 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 472 __ cmpl(rax, 0x6); 473 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 474 475 // we need to bridge farther than imm8, so we use this island as a thunk 476 __ bind(done); 477 __ jmp(wrapup); 478 479 __ bind(start_simd_check); 480 // 481 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 482 // registers are not restored after a signal processing. 483 // Generate SEGV here (reference through null) 484 // and check upper YMM/ZMM bits after it. 485 // 486 int saved_useavx = UseAVX; 487 int saved_usesse = UseSSE; 488 489 // If UseAVX is uninitialized or is set by the user to include EVEX 490 if (use_evex) { 491 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 492 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 493 __ movl(rax, 0x10000); 494 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 495 __ cmpl(rax, 0x10000); 496 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 497 // check _cpuid_info.xem_xcr0_eax.bits.opmask 498 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 499 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 500 __ movl(rax, 0xE0); 501 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 502 __ cmpl(rax, 0xE0); 503 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 504 505 if (FLAG_IS_DEFAULT(UseAVX)) { 506 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 507 __ movl(rax, Address(rsi, 0)); 508 __ cmpl(rax, 0x50654); // If it is Skylake 509 __ jcc(Assembler::equal, legacy_setup); 510 } 511 // EVEX setup: run in lowest evex mode 512 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 513 UseAVX = 3; 514 UseSSE = 2; 515 #ifdef _WINDOWS 516 // xmm5-xmm15 are not preserved by caller on windows 517 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 518 __ subptr(rsp, 64); 519 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 520 #ifdef _LP64 521 __ subptr(rsp, 64); 522 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 523 __ subptr(rsp, 64); 524 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 525 #endif // _LP64 526 #endif // _WINDOWS 527 528 // load value into all 64 bytes of zmm7 register 529 __ movl(rcx, VM_Version::ymm_test_value()); 530 __ movdl(xmm0, rcx); 531 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 532 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 533 #ifdef _LP64 534 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 535 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 536 #endif 537 VM_Version::clean_cpuFeatures(); 538 __ jmp(save_restore_except); 539 } 540 541 __ bind(legacy_setup); 542 // AVX setup 543 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 544 UseAVX = 1; 545 UseSSE = 2; 546 #ifdef _WINDOWS 547 __ subptr(rsp, 32); 548 __ vmovdqu(Address(rsp, 0), xmm7); 549 #ifdef _LP64 550 __ subptr(rsp, 32); 551 __ vmovdqu(Address(rsp, 0), xmm8); 552 __ subptr(rsp, 32); 553 __ vmovdqu(Address(rsp, 0), xmm15); 554 #endif // _LP64 555 #endif // _WINDOWS 556 557 // load value into all 32 bytes of ymm7 register 558 __ movl(rcx, VM_Version::ymm_test_value()); 559 560 __ movdl(xmm0, rcx); 561 __ pshufd(xmm0, xmm0, 0x00); 562 __ vinsertf128_high(xmm0, xmm0); 563 __ vmovdqu(xmm7, xmm0); 564 #ifdef _LP64 565 __ vmovdqu(xmm8, xmm0); 566 __ vmovdqu(xmm15, xmm0); 567 #endif 568 VM_Version::clean_cpuFeatures(); 569 570 __ bind(save_restore_except); 571 __ xorl(rsi, rsi); 572 VM_Version::set_cpuinfo_segv_addr(__ pc()); 573 // Generate SEGV 574 __ movl(rax, Address(rsi, 0)); 575 576 VM_Version::set_cpuinfo_cont_addr(__ pc()); 577 // Returns here after signal. Save xmm0 to check it later. 578 579 // If UseAVX is uninitialized or is set by the user to include EVEX 580 if (use_evex) { 581 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 582 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 583 __ movl(rax, 0x10000); 584 __ andl(rax, Address(rsi, 4)); 585 __ cmpl(rax, 0x10000); 586 __ jcc(Assembler::notEqual, legacy_save_restore); 587 // check _cpuid_info.xem_xcr0_eax.bits.opmask 588 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 589 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 590 __ movl(rax, 0xE0); 591 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 592 __ cmpl(rax, 0xE0); 593 __ jcc(Assembler::notEqual, legacy_save_restore); 594 595 if (FLAG_IS_DEFAULT(UseAVX)) { 596 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 597 __ movl(rax, Address(rsi, 0)); 598 __ cmpl(rax, 0x50654); // If it is Skylake 599 __ jcc(Assembler::equal, legacy_save_restore); 600 } 601 // EVEX check: run in lowest evex mode 602 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 603 UseAVX = 3; 604 UseSSE = 2; 605 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 606 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 607 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 608 #ifdef _LP64 609 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 610 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 611 #endif 612 613 #ifdef _WINDOWS 614 #ifdef _LP64 615 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 616 __ addptr(rsp, 64); 617 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 618 __ addptr(rsp, 64); 619 #endif // _LP64 620 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 621 __ addptr(rsp, 64); 622 #endif // _WINDOWS 623 generate_vzeroupper(wrapup); 624 VM_Version::clean_cpuFeatures(); 625 UseAVX = saved_useavx; 626 UseSSE = saved_usesse; 627 __ jmp(wrapup); 628 } 629 630 __ bind(legacy_save_restore); 631 // AVX check 632 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 633 UseAVX = 1; 634 UseSSE = 2; 635 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 636 __ vmovdqu(Address(rsi, 0), xmm0); 637 __ vmovdqu(Address(rsi, 32), xmm7); 638 #ifdef _LP64 639 __ vmovdqu(Address(rsi, 64), xmm8); 640 __ vmovdqu(Address(rsi, 96), xmm15); 641 #endif 642 643 #ifdef _WINDOWS 644 #ifdef _LP64 645 __ vmovdqu(xmm15, Address(rsp, 0)); 646 __ addptr(rsp, 32); 647 __ vmovdqu(xmm8, Address(rsp, 0)); 648 __ addptr(rsp, 32); 649 #endif // _LP64 650 __ vmovdqu(xmm7, Address(rsp, 0)); 651 __ addptr(rsp, 32); 652 #endif // _WINDOWS 653 654 generate_vzeroupper(wrapup); 655 VM_Version::clean_cpuFeatures(); 656 UseAVX = saved_useavx; 657 UseSSE = saved_usesse; 658 659 __ bind(wrapup); 660 __ popf(); 661 __ pop(rsi); 662 __ pop(rbx); 663 __ pop(rbp); 664 __ ret(0); 665 666 # undef __ 667 668 return start; 669 }; 670 void generate_vzeroupper(Label& L_wrapup) { 671 # define __ _masm-> 672 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 673 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 674 __ jcc(Assembler::notEqual, L_wrapup); 675 __ movl(rcx, 0x0FFF0FF0); 676 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 677 __ andl(rcx, Address(rsi, 0)); 678 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 679 __ jcc(Assembler::equal, L_wrapup); 680 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 681 __ jcc(Assembler::equal, L_wrapup); 682 // vzeroupper() will use a pre-computed instruction sequence that we 683 // can't compute until after we've determined CPU capabilities. Use 684 // uncached variant here directly to be able to bootstrap correctly 685 __ vzeroupper_uncached(); 686 # undef __ 687 } 688 address generate_detect_virt() { 689 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 690 # define __ _masm-> 691 692 address start = __ pc(); 693 694 // Evacuate callee-saved registers 695 __ push(rbp); 696 __ push(rbx); 697 __ push(rsi); // for Windows 698 699 #ifdef _LP64 700 __ mov(rax, c_rarg0); // CPUID leaf 701 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 702 #else 703 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 704 __ movptr(rsi, Address(rsp, 20)); // register array address 705 #endif 706 707 __ cpuid(); 708 709 // Store result to register array 710 __ movl(Address(rsi, 0), rax); 711 __ movl(Address(rsi, 4), rbx); 712 __ movl(Address(rsi, 8), rcx); 713 __ movl(Address(rsi, 12), rdx); 714 715 // Epilogue 716 __ pop(rsi); 717 __ pop(rbx); 718 __ pop(rbp); 719 __ ret(0); 720 721 # undef __ 722 723 return start; 724 }; 725 726 727 address generate_getCPUIDBrandString(void) { 728 // Flags to test CPU type. 729 const uint32_t HS_EFL_AC = 0x40000; 730 const uint32_t HS_EFL_ID = 0x200000; 731 // Values for when we don't have a CPUID instruction. 732 const int CPU_FAMILY_SHIFT = 8; 733 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 734 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 735 736 Label detect_486, cpu486, detect_586, done, ext_cpuid; 737 738 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 739 # define __ _masm-> 740 741 address start = __ pc(); 742 743 // 744 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 745 // 746 // LP64: rcx and rdx are first and second argument registers on windows 747 748 __ push(rbp); 749 #ifdef _LP64 750 __ mov(rbp, c_rarg0); // cpuid_info address 751 #else 752 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 753 #endif 754 __ push(rbx); 755 __ push(rsi); 756 __ pushf(); // preserve rbx, and flags 757 __ pop(rax); 758 __ push(rax); 759 __ mov(rcx, rax); 760 // 761 // if we are unable to change the AC flag, we have a 386 762 // 763 __ xorl(rax, HS_EFL_AC); 764 __ push(rax); 765 __ popf(); 766 __ pushf(); 767 __ pop(rax); 768 __ cmpptr(rax, rcx); 769 __ jccb(Assembler::notEqual, detect_486); 770 771 __ movl(rax, CPU_FAMILY_386); 772 __ jmp(done); 773 774 // 775 // If we are unable to change the ID flag, we have a 486 which does 776 // not support the "cpuid" instruction. 777 // 778 __ bind(detect_486); 779 __ mov(rax, rcx); 780 __ xorl(rax, HS_EFL_ID); 781 __ push(rax); 782 __ popf(); 783 __ pushf(); 784 __ pop(rax); 785 __ cmpptr(rcx, rax); 786 __ jccb(Assembler::notEqual, detect_586); 787 788 __ bind(cpu486); 789 __ movl(rax, CPU_FAMILY_486); 790 __ jmp(done); 791 792 // 793 // At this point, we have a chip which supports the "cpuid" instruction 794 // 795 __ bind(detect_586); 796 __ xorl(rax, rax); 797 __ cpuid(); 798 __ orl(rax, rax); 799 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 800 // value of at least 1, we give up and 801 // assume a 486 802 803 // 804 // Extended cpuid(0x80000000) for processor brand string detection 805 // 806 __ bind(ext_cpuid); 807 __ movl(rax, CPUID_EXTENDED_FN); 808 __ cpuid(); 809 __ cmpl(rax, CPUID_EXTENDED_FN_4); 810 __ jcc(Assembler::below, done); 811 812 // 813 // Extended cpuid(0x80000002) // first 16 bytes in brand string 814 // 815 __ movl(rax, CPUID_EXTENDED_FN_2); 816 __ cpuid(); 817 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 818 __ movl(Address(rsi, 0), rax); 819 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 820 __ movl(Address(rsi, 0), rbx); 821 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 822 __ movl(Address(rsi, 0), rcx); 823 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 824 __ movl(Address(rsi,0), rdx); 825 826 // 827 // Extended cpuid(0x80000003) // next 16 bytes in brand string 828 // 829 __ movl(rax, CPUID_EXTENDED_FN_3); 830 __ cpuid(); 831 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 832 __ movl(Address(rsi, 0), rax); 833 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 834 __ movl(Address(rsi, 0), rbx); 835 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 836 __ movl(Address(rsi, 0), rcx); 837 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 838 __ movl(Address(rsi,0), rdx); 839 840 // 841 // Extended cpuid(0x80000004) // last 16 bytes in brand string 842 // 843 __ movl(rax, CPUID_EXTENDED_FN_4); 844 __ cpuid(); 845 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 846 __ movl(Address(rsi, 0), rax); 847 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 848 __ movl(Address(rsi, 0), rbx); 849 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 850 __ movl(Address(rsi, 0), rcx); 851 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 852 __ movl(Address(rsi,0), rdx); 853 854 // 855 // return 856 // 857 __ bind(done); 858 __ popf(); 859 __ pop(rsi); 860 __ pop(rbx); 861 __ pop(rbp); 862 __ ret(0); 863 864 # undef __ 865 866 return start; 867 }; 868 }; 869 870 void VM_Version::get_processor_features() { 871 872 _cpu = 4; // 486 by default 873 _model = 0; 874 _stepping = 0; 875 _features = 0; 876 _logical_processors_per_package = 1; 877 // i486 internal cache is both I&D and has a 16-byte line size 878 _L1_data_cache_line_size = 16; 879 880 // Get raw processor info 881 882 get_cpu_info_stub(&_cpuid_info); 883 884 assert_is_initialized(); 885 _cpu = extended_cpu_family(); 886 _model = extended_cpu_model(); 887 _stepping = cpu_stepping(); 888 889 if (cpu_family() > 4) { // it supports CPUID 890 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 891 _cpu_features = _features; // Preserve features 892 // Logical processors are only available on P4s and above, 893 // and only if hyperthreading is available. 894 _logical_processors_per_package = logical_processor_count(); 895 _L1_data_cache_line_size = L1_line_size(); 896 } 897 898 // xchg and xadd instructions 899 _supports_atomic_getset4 = true; 900 _supports_atomic_getadd4 = true; 901 LP64_ONLY(_supports_atomic_getset8 = true); 902 LP64_ONLY(_supports_atomic_getadd8 = true); 903 904 #ifdef _LP64 905 // OS should support SSE for x64 and hardware should support at least SSE2. 906 if (!VM_Version::supports_sse2()) { 907 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 908 } 909 // in 64 bit the use of SSE2 is the minimum 910 if (UseSSE < 2) UseSSE = 2; 911 #endif 912 913 #ifdef AMD64 914 // flush_icache_stub have to be generated first. 915 // That is why Icache line size is hard coded in ICache class, 916 // see icache_x86.hpp. It is also the reason why we can't use 917 // clflush instruction in 32-bit VM since it could be running 918 // on CPU which does not support it. 919 // 920 // The only thing we can do is to verify that flushed 921 // ICache::line_size has correct value. 922 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 923 // clflush_size is size in quadwords (8 bytes). 924 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 925 #endif 926 927 #ifdef _LP64 928 // assigning this field effectively enables Unsafe.writebackMemory() 929 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 930 // that is only implemented on x86_64 and only if the OS plays ball 931 if (os::supports_map_sync()) { 932 // publish data cache line flush size to generic field, otherwise 933 // let if default to zero thereby disabling writeback 934 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 935 } 936 #endif 937 938 // Check if processor has Intel Ecore 939 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 940 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) { 941 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 942 } 943 944 if (UseSSE < 4) { 945 _features &= ~CPU_SSE4_1; 946 _features &= ~CPU_SSE4_2; 947 } 948 949 if (UseSSE < 3) { 950 _features &= ~CPU_SSE3; 951 _features &= ~CPU_SSSE3; 952 _features &= ~CPU_SSE4A; 953 } 954 955 if (UseSSE < 2) 956 _features &= ~CPU_SSE2; 957 958 if (UseSSE < 1) 959 _features &= ~CPU_SSE; 960 961 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 962 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 963 UseAVX = 0; 964 } 965 966 // UseSSE is set to the smaller of what hardware supports and what 967 // the command line requires. I.e., you cannot set UseSSE to 2 on 968 // older Pentiums which do not support it. 969 int use_sse_limit = 0; 970 if (UseSSE > 0) { 971 if (UseSSE > 3 && supports_sse4_1()) { 972 use_sse_limit = 4; 973 } else if (UseSSE > 2 && supports_sse3()) { 974 use_sse_limit = 3; 975 } else if (UseSSE > 1 && supports_sse2()) { 976 use_sse_limit = 2; 977 } else if (UseSSE > 0 && supports_sse()) { 978 use_sse_limit = 1; 979 } else { 980 use_sse_limit = 0; 981 } 982 } 983 if (FLAG_IS_DEFAULT(UseSSE)) { 984 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 985 } else if (UseSSE > use_sse_limit) { 986 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 987 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 988 } 989 990 // first try initial setting and detect what we can support 991 int use_avx_limit = 0; 992 if (UseAVX > 0) { 993 if (UseSSE < 4) { 994 // Don't use AVX if SSE is unavailable or has been disabled. 995 use_avx_limit = 0; 996 } else if (UseAVX > 2 && supports_evex()) { 997 use_avx_limit = 3; 998 } else if (UseAVX > 1 && supports_avx2()) { 999 use_avx_limit = 2; 1000 } else if (UseAVX > 0 && supports_avx()) { 1001 use_avx_limit = 1; 1002 } else { 1003 use_avx_limit = 0; 1004 } 1005 } 1006 if (FLAG_IS_DEFAULT(UseAVX)) { 1007 // Don't use AVX-512 on older Skylakes unless explicitly requested. 1008 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 1009 FLAG_SET_DEFAULT(UseAVX, 2); 1010 } else { 1011 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1012 } 1013 } 1014 1015 if (UseAVX > use_avx_limit) { 1016 if (UseSSE < 4) { 1017 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 1018 } else { 1019 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 1020 } 1021 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1022 } 1023 1024 if (UseAVX < 3) { 1025 _features &= ~CPU_AVX512F; 1026 _features &= ~CPU_AVX512DQ; 1027 _features &= ~CPU_AVX512CD; 1028 _features &= ~CPU_AVX512BW; 1029 _features &= ~CPU_AVX512VL; 1030 _features &= ~CPU_AVX512_VPOPCNTDQ; 1031 _features &= ~CPU_AVX512_VPCLMULQDQ; 1032 _features &= ~CPU_AVX512_VAES; 1033 _features &= ~CPU_AVX512_VNNI; 1034 _features &= ~CPU_AVX512_VBMI; 1035 _features &= ~CPU_AVX512_VBMI2; 1036 _features &= ~CPU_AVX512_BITALG; 1037 _features &= ~CPU_AVX512_IFMA; 1038 _features &= ~CPU_APX_F; 1039 } 1040 1041 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1042 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1043 if (UseAPX && !apx_supported) { 1044 warning("UseAPX is not supported on this CPU, setting it to false"); 1045 FLAG_SET_DEFAULT(UseAPX, false); 1046 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1047 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1048 } 1049 1050 if (!UseAPX) { 1051 _features &= ~CPU_APX_F; 1052 } 1053 1054 if (UseAVX < 2) { 1055 _features &= ~CPU_AVX2; 1056 _features &= ~CPU_AVX_IFMA; 1057 } 1058 1059 if (UseAVX < 1) { 1060 _features &= ~CPU_AVX; 1061 _features &= ~CPU_VZEROUPPER; 1062 _features &= ~CPU_F16C; 1063 _features &= ~CPU_SHA512; 1064 } 1065 1066 if (logical_processors_per_package() == 1) { 1067 // HT processor could be installed on a system which doesn't support HT. 1068 _features &= ~CPU_HT; 1069 } 1070 1071 if (is_intel()) { // Intel cpus specific settings 1072 if (is_knights_family()) { 1073 _features &= ~CPU_VZEROUPPER; 1074 _features &= ~CPU_AVX512BW; 1075 _features &= ~CPU_AVX512VL; 1076 _features &= ~CPU_AVX512DQ; 1077 _features &= ~CPU_AVX512_VNNI; 1078 _features &= ~CPU_AVX512_VAES; 1079 _features &= ~CPU_AVX512_VPOPCNTDQ; 1080 _features &= ~CPU_AVX512_VPCLMULQDQ; 1081 _features &= ~CPU_AVX512_VBMI; 1082 _features &= ~CPU_AVX512_VBMI2; 1083 _features &= ~CPU_CLWB; 1084 _features &= ~CPU_FLUSHOPT; 1085 _features &= ~CPU_GFNI; 1086 _features &= ~CPU_AVX512_BITALG; 1087 _features &= ~CPU_AVX512_IFMA; 1088 _features &= ~CPU_AVX_IFMA; 1089 } 1090 } 1091 1092 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1093 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1094 } else { 1095 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1096 } 1097 1098 char buf[1024]; 1099 int res = jio_snprintf( 1100 buf, sizeof(buf), 1101 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1102 cores_per_cpu(), threads_per_core(), 1103 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1104 assert(res > 0, "not enough temporary space allocated"); 1105 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1106 1107 _features_string = os::strdup(buf); 1108 1109 // Use AES instructions if available. 1110 if (supports_aes()) { 1111 if (FLAG_IS_DEFAULT(UseAES)) { 1112 FLAG_SET_DEFAULT(UseAES, true); 1113 } 1114 if (!UseAES) { 1115 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1116 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1117 } 1118 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1119 } else { 1120 if (UseSSE > 2) { 1121 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1122 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1123 } 1124 } else { 1125 // The AES intrinsic stubs require AES instruction support (of course) 1126 // but also require sse3 mode or higher for instructions it use. 1127 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1128 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1129 } 1130 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1131 } 1132 1133 // --AES-CTR begins-- 1134 if (!UseAESIntrinsics) { 1135 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1136 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1137 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1138 } 1139 } else { 1140 if (supports_sse4_1()) { 1141 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1142 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1143 } 1144 } else { 1145 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1146 // but also require sse4.1 mode or higher for instructions it use. 1147 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1148 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1149 } 1150 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1151 } 1152 } 1153 // --AES-CTR ends-- 1154 } 1155 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1156 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1157 warning("AES instructions are not available on this CPU"); 1158 FLAG_SET_DEFAULT(UseAES, false); 1159 } 1160 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1161 warning("AES intrinsics are not available on this CPU"); 1162 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1163 } 1164 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1165 warning("AES-CTR intrinsics are not available on this CPU"); 1166 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1167 } 1168 } 1169 1170 // Use CLMUL instructions if available. 1171 if (supports_clmul()) { 1172 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1173 UseCLMUL = true; 1174 } 1175 } else if (UseCLMUL) { 1176 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1177 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1178 FLAG_SET_DEFAULT(UseCLMUL, false); 1179 } 1180 1181 if (UseCLMUL && (UseSSE > 2)) { 1182 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1183 UseCRC32Intrinsics = true; 1184 } 1185 } else if (UseCRC32Intrinsics) { 1186 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1187 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1188 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1189 } 1190 1191 #ifdef _LP64 1192 if (supports_avx2()) { 1193 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1194 UseAdler32Intrinsics = true; 1195 } 1196 } else if (UseAdler32Intrinsics) { 1197 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1198 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1199 } 1200 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1201 } 1202 #else 1203 if (UseAdler32Intrinsics) { 1204 warning("Adler32Intrinsics not available on this CPU."); 1205 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1206 } 1207 #endif 1208 1209 if (supports_sse4_2() && supports_clmul()) { 1210 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1211 UseCRC32CIntrinsics = true; 1212 } 1213 } else if (UseCRC32CIntrinsics) { 1214 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1215 warning("CRC32C intrinsics are not available on this CPU"); 1216 } 1217 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1218 } 1219 1220 // GHASH/GCM intrinsics 1221 if (UseCLMUL && (UseSSE > 2)) { 1222 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1223 UseGHASHIntrinsics = true; 1224 } 1225 } else if (UseGHASHIntrinsics) { 1226 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1227 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1228 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1229 } 1230 1231 #ifdef _LP64 1232 // ChaCha20 Intrinsics 1233 // As long as the system supports AVX as a baseline we can do a 1234 // SIMD-enabled block function. StubGenerator makes the determination 1235 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1236 // version. 1237 if (UseAVX >= 1) { 1238 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1239 UseChaCha20Intrinsics = true; 1240 } 1241 } else if (UseChaCha20Intrinsics) { 1242 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1243 warning("ChaCha20 intrinsic requires AVX instructions"); 1244 } 1245 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1246 } 1247 #else 1248 // No support currently for ChaCha20 intrinsics on 32-bit platforms 1249 if (UseChaCha20Intrinsics) { 1250 warning("ChaCha20 intrinsics are not available on this CPU."); 1251 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1252 } 1253 #endif // _LP64 1254 1255 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1256 if (UseAVX >= 2) { 1257 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1258 UseBASE64Intrinsics = true; 1259 } 1260 } else if (UseBASE64Intrinsics) { 1261 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1262 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1263 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1264 } 1265 1266 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1267 if (FLAG_IS_DEFAULT(UseFMA)) { 1268 UseFMA = true; 1269 } 1270 } else if (UseFMA) { 1271 warning("FMA instructions are not available on this CPU"); 1272 FLAG_SET_DEFAULT(UseFMA, false); 1273 } 1274 1275 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1276 UseMD5Intrinsics = true; 1277 } 1278 1279 if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { 1280 if (FLAG_IS_DEFAULT(UseSHA)) { 1281 UseSHA = true; 1282 } 1283 } else if (UseSHA) { 1284 warning("SHA instructions are not available on this CPU"); 1285 FLAG_SET_DEFAULT(UseSHA, false); 1286 } 1287 1288 if (supports_sha() && supports_sse4_1() && UseSHA) { 1289 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1290 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1291 } 1292 } else if (UseSHA1Intrinsics) { 1293 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1294 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1295 } 1296 1297 if (supports_sse4_1() && UseSHA) { 1298 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1299 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1300 } 1301 } else if (UseSHA256Intrinsics) { 1302 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1303 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1304 } 1305 1306 #ifdef _LP64 1307 // These are only supported on 64-bit 1308 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1309 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1310 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1311 } 1312 } else 1313 #endif 1314 if (UseSHA512Intrinsics) { 1315 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1316 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1317 } 1318 1319 #ifdef _LP64 1320 if (supports_evex() && supports_avx512bw()) { 1321 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1322 UseSHA3Intrinsics = true; 1323 } 1324 } else 1325 #endif 1326 if (UseSHA3Intrinsics) { 1327 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1328 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1329 } 1330 1331 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1332 FLAG_SET_DEFAULT(UseSHA, false); 1333 } 1334 1335 #ifdef COMPILER2 1336 if (UseFPUForSpilling) { 1337 if (UseSSE < 2) { 1338 // Only supported with SSE2+ 1339 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1340 } 1341 } 1342 #endif 1343 1344 #if COMPILER2_OR_JVMCI 1345 int max_vector_size = 0; 1346 if (UseSSE < 2) { 1347 // Vectors (in XMM) are only supported with SSE2+ 1348 // SSE is always 2 on x64. 1349 max_vector_size = 0; 1350 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1351 // 16 byte vectors (in XMM) are supported with SSE2+ 1352 max_vector_size = 16; 1353 } else if (UseAVX == 1 || UseAVX == 2) { 1354 // 32 bytes vectors (in YMM) are only supported with AVX+ 1355 max_vector_size = 32; 1356 } else if (UseAVX > 2) { 1357 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1358 max_vector_size = 64; 1359 } 1360 1361 #ifdef _LP64 1362 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1363 #else 1364 int min_vector_size = 0; 1365 #endif 1366 1367 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1368 if (MaxVectorSize < min_vector_size) { 1369 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1370 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1371 } 1372 if (MaxVectorSize > max_vector_size) { 1373 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1374 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1375 } 1376 if (!is_power_of_2(MaxVectorSize)) { 1377 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1378 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1379 } 1380 } else { 1381 // If default, use highest supported configuration 1382 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1383 } 1384 1385 #if defined(COMPILER2) && defined(ASSERT) 1386 if (MaxVectorSize > 0) { 1387 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1388 tty->print_cr("State of YMM registers after signal handle:"); 1389 int nreg = 2 LP64_ONLY(+2); 1390 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1391 for (int i = 0; i < nreg; i++) { 1392 tty->print("YMM%s:", ymm_name[i]); 1393 for (int j = 7; j >=0; j--) { 1394 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1395 } 1396 tty->cr(); 1397 } 1398 } 1399 } 1400 #endif // COMPILER2 && ASSERT 1401 1402 #ifdef _LP64 1403 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1404 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1405 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1406 } 1407 } else 1408 #endif 1409 if (UsePoly1305Intrinsics) { 1410 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1411 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1412 } 1413 1414 #ifdef _LP64 1415 if (supports_avx512ifma() && supports_avx512vlbw()) { 1416 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1417 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1418 } 1419 } else 1420 #endif 1421 if (UseIntPolyIntrinsics) { 1422 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1423 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1424 } 1425 1426 #ifdef _LP64 1427 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1428 UseMultiplyToLenIntrinsic = true; 1429 } 1430 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1431 UseSquareToLenIntrinsic = true; 1432 } 1433 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1434 UseMulAddIntrinsic = true; 1435 } 1436 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1437 UseMontgomeryMultiplyIntrinsic = true; 1438 } 1439 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1440 UseMontgomerySquareIntrinsic = true; 1441 } 1442 #else 1443 if (UseMultiplyToLenIntrinsic) { 1444 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1445 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1446 } 1447 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1448 } 1449 if (UseMontgomeryMultiplyIntrinsic) { 1450 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1451 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1452 } 1453 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1454 } 1455 if (UseMontgomerySquareIntrinsic) { 1456 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1457 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1458 } 1459 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1460 } 1461 if (UseSquareToLenIntrinsic) { 1462 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1463 warning("squareToLen intrinsic is not available in 32-bit VM"); 1464 } 1465 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1466 } 1467 if (UseMulAddIntrinsic) { 1468 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1469 warning("mulAdd intrinsic is not available in 32-bit VM"); 1470 } 1471 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1472 } 1473 #endif // _LP64 1474 #endif // COMPILER2_OR_JVMCI 1475 1476 // On new cpus instructions which update whole XMM register should be used 1477 // to prevent partial register stall due to dependencies on high half. 1478 // 1479 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1480 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1481 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1482 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1483 1484 1485 if (is_zx()) { // ZX cpus specific settings 1486 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1487 UseStoreImmI16 = false; // don't use it on ZX cpus 1488 } 1489 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1490 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1491 // Use it on all ZX cpus 1492 UseAddressNop = true; 1493 } 1494 } 1495 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1496 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1497 } 1498 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1499 if (supports_sse3()) { 1500 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1501 } else { 1502 UseXmmRegToRegMoveAll = false; 1503 } 1504 } 1505 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1506 #ifdef COMPILER2 1507 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1508 // For new ZX cpus do the next optimization: 1509 // don't align the beginning of a loop if there are enough instructions 1510 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1511 // in current fetch line (OptoLoopAlignment) or the padding 1512 // is big (> MaxLoopPad). 1513 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1514 // generated NOP instructions. 11 is the largest size of one 1515 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1516 MaxLoopPad = 11; 1517 } 1518 #endif // COMPILER2 1519 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1520 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1521 } 1522 if (supports_sse4_2()) { // new ZX cpus 1523 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1524 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1525 } 1526 } 1527 if (supports_sse4_2()) { 1528 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1529 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1530 } 1531 } else { 1532 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1533 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1534 } 1535 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1536 } 1537 } 1538 1539 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1540 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1541 } 1542 } 1543 1544 if (is_amd_family()) { // AMD cpus specific settings 1545 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1546 // Use it on new AMD cpus starting from Opteron. 1547 UseAddressNop = true; 1548 } 1549 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1550 // Use it on new AMD cpus starting from Opteron. 1551 UseNewLongLShift = true; 1552 } 1553 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1554 if (supports_sse4a()) { 1555 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1556 } else { 1557 UseXmmLoadAndClearUpper = false; 1558 } 1559 } 1560 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1561 if (supports_sse4a()) { 1562 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1563 } else { 1564 UseXmmRegToRegMoveAll = false; 1565 } 1566 } 1567 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1568 if (supports_sse4a()) { 1569 UseXmmI2F = true; 1570 } else { 1571 UseXmmI2F = false; 1572 } 1573 } 1574 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1575 if (supports_sse4a()) { 1576 UseXmmI2D = true; 1577 } else { 1578 UseXmmI2D = false; 1579 } 1580 } 1581 if (supports_sse4_2()) { 1582 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1583 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1584 } 1585 } else { 1586 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1587 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1588 } 1589 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1590 } 1591 1592 // some defaults for AMD family 15h 1593 if (cpu_family() == 0x15) { 1594 // On family 15h processors default is no sw prefetch 1595 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1596 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1597 } 1598 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1599 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1600 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1601 } 1602 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1603 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1604 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1605 } 1606 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1607 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1608 } 1609 } 1610 1611 #ifdef COMPILER2 1612 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1613 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1614 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1615 } 1616 #endif // COMPILER2 1617 1618 // Some defaults for AMD family >= 17h && Hygon family 18h 1619 if (cpu_family() >= 0x17) { 1620 // On family >=17h processors use XMM and UnalignedLoadStores 1621 // for Array Copy 1622 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1623 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1624 } 1625 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1626 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1627 } 1628 #ifdef COMPILER2 1629 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1630 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1631 } 1632 #endif 1633 } 1634 } 1635 1636 if (is_intel()) { // Intel cpus specific settings 1637 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1638 UseStoreImmI16 = false; // don't use it on Intel cpus 1639 } 1640 if (cpu_family() == 6 || cpu_family() == 15) { 1641 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1642 // Use it on all Intel cpus starting from PentiumPro 1643 UseAddressNop = true; 1644 } 1645 } 1646 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1647 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1648 } 1649 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1650 if (supports_sse3()) { 1651 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1652 } else { 1653 UseXmmRegToRegMoveAll = false; 1654 } 1655 } 1656 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1657 #ifdef COMPILER2 1658 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1659 // For new Intel cpus do the next optimization: 1660 // don't align the beginning of a loop if there are enough instructions 1661 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1662 // in current fetch line (OptoLoopAlignment) or the padding 1663 // is big (> MaxLoopPad). 1664 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1665 // generated NOP instructions. 11 is the largest size of one 1666 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1667 MaxLoopPad = 11; 1668 } 1669 #endif // COMPILER2 1670 1671 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1672 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1673 } 1674 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1675 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1676 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1677 } 1678 } 1679 if (supports_sse4_2()) { 1680 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1681 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1682 } 1683 } else { 1684 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1685 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1686 } 1687 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1688 } 1689 } 1690 if (is_atom_family() || is_knights_family()) { 1691 #ifdef COMPILER2 1692 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1693 OptoScheduling = true; 1694 } 1695 #endif 1696 if (supports_sse4_2()) { // Silvermont 1697 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1698 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1699 } 1700 } 1701 if (FLAG_IS_DEFAULT(UseIncDec)) { 1702 FLAG_SET_DEFAULT(UseIncDec, false); 1703 } 1704 } 1705 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1706 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1707 } 1708 #ifdef COMPILER2 1709 if (UseAVX > 2) { 1710 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1711 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1712 ArrayOperationPartialInlineSize != 0 && 1713 ArrayOperationPartialInlineSize != 16 && 1714 ArrayOperationPartialInlineSize != 32 && 1715 ArrayOperationPartialInlineSize != 64)) { 1716 int inline_size = 0; 1717 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1718 inline_size = 64; 1719 } else if (MaxVectorSize >= 32) { 1720 inline_size = 32; 1721 } else if (MaxVectorSize >= 16) { 1722 inline_size = 16; 1723 } 1724 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1725 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1726 } 1727 ArrayOperationPartialInlineSize = inline_size; 1728 } 1729 1730 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1731 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1732 if (ArrayOperationPartialInlineSize) { 1733 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1734 } else { 1735 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1736 } 1737 } 1738 } 1739 #endif 1740 } 1741 1742 #ifdef COMPILER2 1743 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1744 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1745 OptimizeFill = false; 1746 } 1747 } 1748 #endif 1749 1750 #ifdef _LP64 1751 if (UseSSE42Intrinsics) { 1752 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1753 UseVectorizedMismatchIntrinsic = true; 1754 } 1755 } else if (UseVectorizedMismatchIntrinsic) { 1756 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1757 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1758 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1759 } 1760 if (UseAVX >= 2) { 1761 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1762 } else if (UseVectorizedHashCodeIntrinsic) { 1763 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1764 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1765 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1766 } 1767 #else 1768 if (UseVectorizedMismatchIntrinsic) { 1769 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1770 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1771 } 1772 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1773 } 1774 if (UseVectorizedHashCodeIntrinsic) { 1775 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1776 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1777 } 1778 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1779 } 1780 #endif // _LP64 1781 1782 // Use count leading zeros count instruction if available. 1783 if (supports_lzcnt()) { 1784 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1785 UseCountLeadingZerosInstruction = true; 1786 } 1787 } else if (UseCountLeadingZerosInstruction) { 1788 warning("lzcnt instruction is not available on this CPU"); 1789 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1790 } 1791 1792 // Use count trailing zeros instruction if available 1793 if (supports_bmi1()) { 1794 // tzcnt does not require VEX prefix 1795 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1796 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1797 // Don't use tzcnt if BMI1 is switched off on command line. 1798 UseCountTrailingZerosInstruction = false; 1799 } else { 1800 UseCountTrailingZerosInstruction = true; 1801 } 1802 } 1803 } else if (UseCountTrailingZerosInstruction) { 1804 warning("tzcnt instruction is not available on this CPU"); 1805 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1806 } 1807 1808 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1809 // VEX prefix is generated only when AVX > 0. 1810 if (supports_bmi1() && supports_avx()) { 1811 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1812 UseBMI1Instructions = true; 1813 } 1814 } else if (UseBMI1Instructions) { 1815 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1816 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1817 } 1818 1819 if (supports_bmi2() && supports_avx()) { 1820 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1821 UseBMI2Instructions = true; 1822 } 1823 } else if (UseBMI2Instructions) { 1824 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1825 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1826 } 1827 1828 // Use population count instruction if available. 1829 if (supports_popcnt()) { 1830 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1831 UsePopCountInstruction = true; 1832 } 1833 } else if (UsePopCountInstruction) { 1834 warning("POPCNT instruction is not available on this CPU"); 1835 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1836 } 1837 1838 // Use fast-string operations if available. 1839 if (supports_erms()) { 1840 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1841 UseFastStosb = true; 1842 } 1843 } else if (UseFastStosb) { 1844 warning("fast-string operations are not available on this CPU"); 1845 FLAG_SET_DEFAULT(UseFastStosb, false); 1846 } 1847 1848 // For AMD Processors use XMM/YMM MOVDQU instructions 1849 // for Object Initialization as default 1850 if (is_amd() && cpu_family() >= 0x19) { 1851 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1852 UseFastStosb = false; 1853 } 1854 } 1855 1856 #ifdef COMPILER2 1857 if (is_intel() && MaxVectorSize > 16) { 1858 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1859 UseFastStosb = false; 1860 } 1861 } 1862 #endif 1863 1864 // Use XMM/YMM MOVDQU instruction for Object Initialization 1865 if (UseSSE >= 2 && UseUnalignedLoadStores) { 1866 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1867 UseXMMForObjInit = true; 1868 } 1869 } else if (UseXMMForObjInit) { 1870 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1871 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1872 } 1873 1874 #ifdef COMPILER2 1875 if (FLAG_IS_DEFAULT(AlignVector)) { 1876 // Modern processors allow misaligned memory operations for vectors. 1877 AlignVector = !UseUnalignedLoadStores; 1878 } 1879 #endif // COMPILER2 1880 1881 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1882 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1883 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1884 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1885 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1886 } 1887 } 1888 1889 // Allocation prefetch settings 1890 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1891 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1892 (cache_line_size > AllocatePrefetchStepSize)) { 1893 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1894 } 1895 1896 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1897 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1898 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1899 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1900 } 1901 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1902 } 1903 1904 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1905 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1906 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1907 } 1908 1909 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1910 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1911 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1912 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1913 } 1914 #ifdef COMPILER2 1915 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1916 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1917 } 1918 #endif 1919 } 1920 1921 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1922 #ifdef COMPILER2 1923 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1924 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1925 } 1926 #endif 1927 } 1928 1929 #ifdef _LP64 1930 // Prefetch settings 1931 1932 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1933 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1934 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1935 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1936 1937 // gc copy/scan is disabled if prefetchw isn't supported, because 1938 // Prefetch::write emits an inlined prefetchw on Linux. 1939 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1940 // The used prefetcht0 instruction works for both amd64 and em64t. 1941 1942 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1943 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1944 } 1945 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1946 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1947 } 1948 #endif 1949 1950 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1951 (cache_line_size > ContendedPaddingWidth)) 1952 ContendedPaddingWidth = cache_line_size; 1953 1954 // This machine allows unaligned memory accesses 1955 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1956 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1957 } 1958 1959 #ifndef PRODUCT 1960 if (log_is_enabled(Info, os, cpu)) { 1961 LogStream ls(Log(os, cpu)::info()); 1962 outputStream* log = &ls; 1963 log->print_cr("Logical CPUs per core: %u", 1964 logical_processors_per_package()); 1965 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1966 log->print("UseSSE=%d", UseSSE); 1967 if (UseAVX > 0) { 1968 log->print(" UseAVX=%d", UseAVX); 1969 } 1970 if (UseAES) { 1971 log->print(" UseAES=1"); 1972 } 1973 #ifdef COMPILER2 1974 if (MaxVectorSize > 0) { 1975 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1976 } 1977 #endif 1978 log->cr(); 1979 log->print("Allocation"); 1980 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1981 log->print_cr(": no prefetching"); 1982 } else { 1983 log->print(" prefetching: "); 1984 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1985 log->print("PREFETCHW"); 1986 } else if (UseSSE >= 1) { 1987 if (AllocatePrefetchInstr == 0) { 1988 log->print("PREFETCHNTA"); 1989 } else if (AllocatePrefetchInstr == 1) { 1990 log->print("PREFETCHT0"); 1991 } else if (AllocatePrefetchInstr == 2) { 1992 log->print("PREFETCHT2"); 1993 } else if (AllocatePrefetchInstr == 3) { 1994 log->print("PREFETCHW"); 1995 } 1996 } 1997 if (AllocatePrefetchLines > 1) { 1998 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1999 } else { 2000 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 2001 } 2002 } 2003 2004 if (PrefetchCopyIntervalInBytes > 0) { 2005 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 2006 } 2007 if (PrefetchScanIntervalInBytes > 0) { 2008 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 2009 } 2010 if (ContendedPaddingWidth > 0) { 2011 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 2012 } 2013 } 2014 #endif // !PRODUCT 2015 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 2016 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 2017 } 2018 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 2019 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 2020 } 2021 } 2022 2023 void VM_Version::print_platform_virtualization_info(outputStream* st) { 2024 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 2025 if (vrt == XenHVM) { 2026 st->print_cr("Xen hardware-assisted virtualization detected"); 2027 } else if (vrt == KVM) { 2028 st->print_cr("KVM virtualization detected"); 2029 } else if (vrt == VMWare) { 2030 st->print_cr("VMWare virtualization detected"); 2031 VirtualizationSupport::print_virtualization_info(st); 2032 } else if (vrt == HyperV) { 2033 st->print_cr("Hyper-V virtualization detected"); 2034 } else if (vrt == HyperVRole) { 2035 st->print_cr("Hyper-V role detected"); 2036 } 2037 } 2038 2039 bool VM_Version::compute_has_intel_jcc_erratum() { 2040 if (!is_intel_family_core()) { 2041 // Only Intel CPUs are affected. 2042 return false; 2043 } 2044 // The following table of affected CPUs is based on the following document released by Intel: 2045 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 2046 switch (_model) { 2047 case 0x8E: 2048 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2049 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 2050 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 2051 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 2052 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 2053 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2054 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2055 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 2056 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2057 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 2058 case 0x4E: 2059 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 2060 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 2061 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 2062 return _stepping == 0x3; 2063 case 0x55: 2064 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 2065 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 2066 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2067 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2068 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2069 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2070 return _stepping == 0x4 || _stepping == 0x7; 2071 case 0x5E: 2072 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2073 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2074 return _stepping == 0x3; 2075 case 0x9E: 2076 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2077 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2078 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2079 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2080 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2081 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2082 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2083 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2084 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2085 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2086 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2087 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2088 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2089 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2090 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2091 case 0xA5: 2092 // Not in Intel documentation. 2093 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2094 return true; 2095 case 0xA6: 2096 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2097 return _stepping == 0x0; 2098 case 0xAE: 2099 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2100 return _stepping == 0xA; 2101 default: 2102 // If we are running on another intel machine not recognized in the table, we are okay. 2103 return false; 2104 } 2105 } 2106 2107 // On Xen, the cpuid instruction returns 2108 // eax / registers[0]: Version of Xen 2109 // ebx / registers[1]: chars 'XenV' 2110 // ecx / registers[2]: chars 'MMXe' 2111 // edx / registers[3]: chars 'nVMM' 2112 // 2113 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2114 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2115 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2116 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2117 // 2118 // more information : 2119 // https://kb.vmware.com/s/article/1009458 2120 // 2121 void VM_Version::check_virtualizations() { 2122 uint32_t registers[4] = {0}; 2123 char signature[13] = {0}; 2124 2125 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2126 // from 0x40000000 until 0x40010000. 2127 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2128 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2129 detect_virt_stub(leaf, registers); 2130 memcpy(signature, ®isters[1], 12); 2131 2132 if (strncmp("VMwareVMware", signature, 12) == 0) { 2133 Abstract_VM_Version::_detected_virtualization = VMWare; 2134 // check for extended metrics from guestlib 2135 VirtualizationSupport::initialize(); 2136 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2137 Abstract_VM_Version::_detected_virtualization = HyperV; 2138 #ifdef _WINDOWS 2139 // CPUID leaf 0x40000007 is available to the root partition only. 2140 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2141 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2142 detect_virt_stub(0x40000007, registers); 2143 if ((registers[0] != 0x0) || 2144 (registers[1] != 0x0) || 2145 (registers[2] != 0x0) || 2146 (registers[3] != 0x0)) { 2147 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2148 } 2149 #endif 2150 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2151 Abstract_VM_Version::_detected_virtualization = KVM; 2152 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2153 Abstract_VM_Version::_detected_virtualization = XenHVM; 2154 } 2155 } 2156 } 2157 2158 #ifdef COMPILER2 2159 // Determine if it's running on Cascade Lake using default options. 2160 bool VM_Version::is_default_intel_cascade_lake() { 2161 return FLAG_IS_DEFAULT(UseAVX) && 2162 FLAG_IS_DEFAULT(MaxVectorSize) && 2163 UseAVX > 2 && 2164 is_intel_cascade_lake(); 2165 } 2166 #endif 2167 2168 bool VM_Version::is_intel_cascade_lake() { 2169 return is_intel_skylake() && _stepping >= 5; 2170 } 2171 2172 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2173 // for implementing the array copy and clear operations. 2174 // The Intel platforms that supports the serialize instruction 2175 // has improved implementation of 64-byte load/stores and so the default 2176 // threshold is set to 0 for these platforms. 2177 int VM_Version::avx3_threshold() { 2178 return (is_intel_family_core() && 2179 supports_serialize() && 2180 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2181 } 2182 2183 #if defined(_LP64) 2184 void VM_Version::clear_apx_test_state() { 2185 clear_apx_test_state_stub(); 2186 } 2187 #endif 2188 2189 static bool _vm_version_initialized = false; 2190 2191 void VM_Version::initialize() { 2192 ResourceMark rm; 2193 // Making this stub must be FIRST use of assembler 2194 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2195 if (stub_blob == nullptr) { 2196 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2197 } 2198 CodeBuffer c(stub_blob); 2199 VM_Version_StubGenerator g(&c); 2200 2201 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2202 g.generate_get_cpu_info()); 2203 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2204 g.generate_detect_virt()); 2205 2206 #if defined(_LP64) 2207 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2208 g.clear_apx_test_state()); 2209 #endif 2210 get_processor_features(); 2211 2212 LP64_ONLY(Assembler::precompute_instructions();) 2213 2214 if (VM_Version::supports_hv()) { // Supports hypervisor 2215 check_virtualizations(); 2216 } 2217 _vm_version_initialized = true; 2218 } 2219 2220 typedef enum { 2221 CPU_FAMILY_8086_8088 = 0, 2222 CPU_FAMILY_INTEL_286 = 2, 2223 CPU_FAMILY_INTEL_386 = 3, 2224 CPU_FAMILY_INTEL_486 = 4, 2225 CPU_FAMILY_PENTIUM = 5, 2226 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2227 CPU_FAMILY_PENTIUM_4 = 0xF 2228 } FamilyFlag; 2229 2230 typedef enum { 2231 RDTSCP_FLAG = 0x08000000, // bit 27 2232 INTEL64_FLAG = 0x20000000 // bit 29 2233 } _featureExtendedEdxFlag; 2234 2235 typedef enum { 2236 FPU_FLAG = 0x00000001, 2237 VME_FLAG = 0x00000002, 2238 DE_FLAG = 0x00000004, 2239 PSE_FLAG = 0x00000008, 2240 TSC_FLAG = 0x00000010, 2241 MSR_FLAG = 0x00000020, 2242 PAE_FLAG = 0x00000040, 2243 MCE_FLAG = 0x00000080, 2244 CX8_FLAG = 0x00000100, 2245 APIC_FLAG = 0x00000200, 2246 SEP_FLAG = 0x00000800, 2247 MTRR_FLAG = 0x00001000, 2248 PGE_FLAG = 0x00002000, 2249 MCA_FLAG = 0x00004000, 2250 CMOV_FLAG = 0x00008000, 2251 PAT_FLAG = 0x00010000, 2252 PSE36_FLAG = 0x00020000, 2253 PSNUM_FLAG = 0x00040000, 2254 CLFLUSH_FLAG = 0x00080000, 2255 DTS_FLAG = 0x00200000, 2256 ACPI_FLAG = 0x00400000, 2257 MMX_FLAG = 0x00800000, 2258 FXSR_FLAG = 0x01000000, 2259 SSE_FLAG = 0x02000000, 2260 SSE2_FLAG = 0x04000000, 2261 SS_FLAG = 0x08000000, 2262 HTT_FLAG = 0x10000000, 2263 TM_FLAG = 0x20000000 2264 } FeatureEdxFlag; 2265 2266 static BufferBlob* cpuid_brand_string_stub_blob; 2267 static const int cpuid_brand_string_stub_size = 550; 2268 2269 extern "C" { 2270 typedef void (*getCPUIDBrandString_stub_t)(void*); 2271 } 2272 2273 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2274 2275 // VM_Version statics 2276 enum { 2277 ExtendedFamilyIdLength_INTEL = 16, 2278 ExtendedFamilyIdLength_AMD = 24 2279 }; 2280 2281 const size_t VENDOR_LENGTH = 13; 2282 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2283 static char* _cpu_brand_string = nullptr; 2284 static int64_t _max_qualified_cpu_frequency = 0; 2285 2286 static int _no_of_threads = 0; 2287 static int _no_of_cores = 0; 2288 2289 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2290 "8086/8088", 2291 "", 2292 "286", 2293 "386", 2294 "486", 2295 "Pentium", 2296 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2297 "", 2298 "", 2299 "", 2300 "", 2301 "", 2302 "", 2303 "", 2304 "", 2305 "Pentium 4" 2306 }; 2307 2308 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2309 "", 2310 "", 2311 "", 2312 "", 2313 "5x86", 2314 "K5/K6", 2315 "Athlon/AthlonXP", 2316 "", 2317 "", 2318 "", 2319 "", 2320 "", 2321 "", 2322 "", 2323 "", 2324 "Opteron/Athlon64", 2325 "Opteron QC/Phenom", // Barcelona et.al. 2326 "", 2327 "", 2328 "", 2329 "", 2330 "", 2331 "", 2332 "Zen" 2333 }; 2334 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2335 // September 2013, Vol 3C Table 35-1 2336 const char* const _model_id_pentium_pro[] = { 2337 "", 2338 "Pentium Pro", 2339 "", 2340 "Pentium II model 3", 2341 "", 2342 "Pentium II model 5/Xeon/Celeron", 2343 "Celeron", 2344 "Pentium III/Pentium III Xeon", 2345 "Pentium III/Pentium III Xeon", 2346 "Pentium M model 9", // Yonah 2347 "Pentium III, model A", 2348 "Pentium III, model B", 2349 "", 2350 "Pentium M model D", // Dothan 2351 "", 2352 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2353 "", 2354 "", 2355 "", 2356 "", 2357 "", 2358 "", 2359 "Celeron", // 0x16 Celeron 65nm 2360 "Core 2", // 0x17 Penryn / Harpertown 2361 "", 2362 "", 2363 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2364 "Atom", // 0x1B Z5xx series Silverthorn 2365 "", 2366 "Core 2", // 0x1D Dunnington (6-core) 2367 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2368 "", 2369 "", 2370 "", 2371 "", 2372 "", 2373 "", 2374 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2375 "", 2376 "", 2377 "", // 0x28 2378 "", 2379 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2380 "", 2381 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2382 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2383 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2384 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2385 "", 2386 "", 2387 "", 2388 "", 2389 "", 2390 "", 2391 "", 2392 "", 2393 "", 2394 "", 2395 "Ivy Bridge", // 0x3a 2396 "", 2397 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2398 "", // 0x3d "Next Generation Intel Core Processor" 2399 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2400 "", // 0x3f "Future Generation Intel Xeon Processor" 2401 "", 2402 "", 2403 "", 2404 "", 2405 "", 2406 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2407 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2408 nullptr 2409 }; 2410 2411 /* Brand ID is for back compatibility 2412 * Newer CPUs uses the extended brand string */ 2413 const char* const _brand_id[] = { 2414 "", 2415 "Celeron processor", 2416 "Pentium III processor", 2417 "Intel Pentium III Xeon processor", 2418 "", 2419 "", 2420 "", 2421 "", 2422 "Intel Pentium 4 processor", 2423 nullptr 2424 }; 2425 2426 2427 const char* const _feature_edx_id[] = { 2428 "On-Chip FPU", 2429 "Virtual Mode Extensions", 2430 "Debugging Extensions", 2431 "Page Size Extensions", 2432 "Time Stamp Counter", 2433 "Model Specific Registers", 2434 "Physical Address Extension", 2435 "Machine Check Exceptions", 2436 "CMPXCHG8B Instruction", 2437 "On-Chip APIC", 2438 "", 2439 "Fast System Call", 2440 "Memory Type Range Registers", 2441 "Page Global Enable", 2442 "Machine Check Architecture", 2443 "Conditional Mov Instruction", 2444 "Page Attribute Table", 2445 "36-bit Page Size Extension", 2446 "Processor Serial Number", 2447 "CLFLUSH Instruction", 2448 "", 2449 "Debug Trace Store feature", 2450 "ACPI registers in MSR space", 2451 "Intel Architecture MMX Technology", 2452 "Fast Float Point Save and Restore", 2453 "Streaming SIMD extensions", 2454 "Streaming SIMD extensions 2", 2455 "Self-Snoop", 2456 "Hyper Threading", 2457 "Thermal Monitor", 2458 "", 2459 "Pending Break Enable" 2460 }; 2461 2462 const char* const _feature_extended_edx_id[] = { 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "SYSCALL/SYSRET", 2475 "", 2476 "", 2477 "", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "Execute Disable Bit", 2484 "", 2485 "", 2486 "", 2487 "", 2488 "", 2489 "", 2490 "RDTSCP", 2491 "", 2492 "Intel 64 Architecture", 2493 "", 2494 "" 2495 }; 2496 2497 const char* const _feature_ecx_id[] = { 2498 "Streaming SIMD Extensions 3", 2499 "PCLMULQDQ", 2500 "64-bit DS Area", 2501 "MONITOR/MWAIT instructions", 2502 "CPL Qualified Debug Store", 2503 "Virtual Machine Extensions", 2504 "Safer Mode Extensions", 2505 "Enhanced Intel SpeedStep technology", 2506 "Thermal Monitor 2", 2507 "Supplemental Streaming SIMD Extensions 3", 2508 "L1 Context ID", 2509 "", 2510 "Fused Multiply-Add", 2511 "CMPXCHG16B", 2512 "xTPR Update Control", 2513 "Perfmon and Debug Capability", 2514 "", 2515 "Process-context identifiers", 2516 "Direct Cache Access", 2517 "Streaming SIMD extensions 4.1", 2518 "Streaming SIMD extensions 4.2", 2519 "x2APIC", 2520 "MOVBE", 2521 "Popcount instruction", 2522 "TSC-Deadline", 2523 "AESNI", 2524 "XSAVE", 2525 "OSXSAVE", 2526 "AVX", 2527 "F16C", 2528 "RDRAND", 2529 "" 2530 }; 2531 2532 const char* const _feature_extended_ecx_id[] = { 2533 "LAHF/SAHF instruction support", 2534 "Core multi-processor legacy mode", 2535 "", 2536 "", 2537 "", 2538 "Advanced Bit Manipulations: LZCNT", 2539 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2540 "Misaligned SSE mode", 2541 "", 2542 "", 2543 "", 2544 "", 2545 "", 2546 "", 2547 "", 2548 "", 2549 "", 2550 "", 2551 "", 2552 "", 2553 "", 2554 "", 2555 "", 2556 "", 2557 "", 2558 "", 2559 "", 2560 "", 2561 "", 2562 "", 2563 "", 2564 "" 2565 }; 2566 2567 void VM_Version::initialize_tsc(void) { 2568 ResourceMark rm; 2569 2570 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2571 if (cpuid_brand_string_stub_blob == nullptr) { 2572 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2573 } 2574 CodeBuffer c(cpuid_brand_string_stub_blob); 2575 VM_Version_StubGenerator g(&c); 2576 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2577 g.generate_getCPUIDBrandString()); 2578 } 2579 2580 const char* VM_Version::cpu_model_description(void) { 2581 uint32_t cpu_family = extended_cpu_family(); 2582 uint32_t cpu_model = extended_cpu_model(); 2583 const char* model = nullptr; 2584 2585 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2586 for (uint32_t i = 0; i <= cpu_model; i++) { 2587 model = _model_id_pentium_pro[i]; 2588 if (model == nullptr) { 2589 break; 2590 } 2591 } 2592 } 2593 return model; 2594 } 2595 2596 const char* VM_Version::cpu_brand_string(void) { 2597 if (_cpu_brand_string == nullptr) { 2598 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2599 if (nullptr == _cpu_brand_string) { 2600 return nullptr; 2601 } 2602 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2603 if (ret_val != OS_OK) { 2604 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2605 _cpu_brand_string = nullptr; 2606 } 2607 } 2608 return _cpu_brand_string; 2609 } 2610 2611 const char* VM_Version::cpu_brand(void) { 2612 const char* brand = nullptr; 2613 2614 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2615 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2616 brand = _brand_id[0]; 2617 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2618 brand = _brand_id[i]; 2619 } 2620 } 2621 return brand; 2622 } 2623 2624 bool VM_Version::cpu_is_em64t(void) { 2625 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2626 } 2627 2628 bool VM_Version::is_netburst(void) { 2629 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2630 } 2631 2632 bool VM_Version::supports_tscinv_ext(void) { 2633 if (!supports_tscinv_bit()) { 2634 return false; 2635 } 2636 2637 if (is_intel()) { 2638 return true; 2639 } 2640 2641 if (is_amd()) { 2642 return !is_amd_Barcelona(); 2643 } 2644 2645 if (is_hygon()) { 2646 return true; 2647 } 2648 2649 return false; 2650 } 2651 2652 void VM_Version::resolve_cpu_information_details(void) { 2653 2654 // in future we want to base this information on proper cpu 2655 // and cache topology enumeration such as: 2656 // Intel 64 Architecture Processor Topology Enumeration 2657 // which supports system cpu and cache topology enumeration 2658 // either using 2xAPICIDs or initial APICIDs 2659 2660 // currently only rough cpu information estimates 2661 // which will not necessarily reflect the exact configuration of the system 2662 2663 // this is the number of logical hardware threads 2664 // visible to the operating system 2665 _no_of_threads = os::processor_count(); 2666 2667 // find out number of threads per cpu package 2668 int threads_per_package = threads_per_core() * cores_per_cpu(); 2669 2670 // use amount of threads visible to the process in order to guess number of sockets 2671 _no_of_sockets = _no_of_threads / threads_per_package; 2672 2673 // process might only see a subset of the total number of threads 2674 // from a single processor package. Virtualization/resource management for example. 2675 // If so then just write a hard 1 as num of pkgs. 2676 if (0 == _no_of_sockets) { 2677 _no_of_sockets = 1; 2678 } 2679 2680 // estimate the number of cores 2681 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2682 } 2683 2684 2685 const char* VM_Version::cpu_family_description(void) { 2686 int cpu_family_id = extended_cpu_family(); 2687 if (is_amd()) { 2688 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2689 return _family_id_amd[cpu_family_id]; 2690 } 2691 } 2692 if (is_intel()) { 2693 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2694 return cpu_model_description(); 2695 } 2696 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2697 return _family_id_intel[cpu_family_id]; 2698 } 2699 } 2700 if (is_hygon()) { 2701 return "Dhyana"; 2702 } 2703 return "Unknown x86"; 2704 } 2705 2706 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2707 assert(buf != nullptr, "buffer is null!"); 2708 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2709 2710 const char* cpu_type = nullptr; 2711 const char* x64 = nullptr; 2712 2713 if (is_intel()) { 2714 cpu_type = "Intel"; 2715 x64 = cpu_is_em64t() ? " Intel64" : ""; 2716 } else if (is_amd()) { 2717 cpu_type = "AMD"; 2718 x64 = cpu_is_em64t() ? " AMD64" : ""; 2719 } else if (is_hygon()) { 2720 cpu_type = "Hygon"; 2721 x64 = cpu_is_em64t() ? " AMD64" : ""; 2722 } else { 2723 cpu_type = "Unknown x86"; 2724 x64 = cpu_is_em64t() ? " x86_64" : ""; 2725 } 2726 2727 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2728 cpu_type, 2729 cpu_family_description(), 2730 supports_ht() ? " (HT)" : "", 2731 supports_sse3() ? " SSE3" : "", 2732 supports_ssse3() ? " SSSE3" : "", 2733 supports_sse4_1() ? " SSE4.1" : "", 2734 supports_sse4_2() ? " SSE4.2" : "", 2735 supports_sse4a() ? " SSE4A" : "", 2736 is_netburst() ? " Netburst" : "", 2737 is_intel_family_core() ? " Core" : "", 2738 x64); 2739 2740 return OS_OK; 2741 } 2742 2743 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2744 assert(buf != nullptr, "buffer is null!"); 2745 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2746 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2747 2748 // invoke newly generated asm code to fetch CPU Brand String 2749 getCPUIDBrandString_stub(&_cpuid_info); 2750 2751 // fetch results into buffer 2752 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2753 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2754 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2755 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2756 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2757 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2758 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2759 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2760 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2761 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2762 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2763 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2764 2765 return OS_OK; 2766 } 2767 2768 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2769 guarantee(buf != nullptr, "buffer is null!"); 2770 guarantee(buf_len > 0, "buffer len not enough!"); 2771 2772 unsigned int flag = 0; 2773 unsigned int fi = 0; 2774 size_t written = 0; 2775 const char* prefix = ""; 2776 2777 #define WRITE_TO_BUF(string) \ 2778 { \ 2779 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2780 if (res < 0) { \ 2781 return buf_len - 1; \ 2782 } \ 2783 written += res; \ 2784 if (prefix[0] == '\0') { \ 2785 prefix = ", "; \ 2786 } \ 2787 } 2788 2789 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2790 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2791 continue; /* no hyperthreading */ 2792 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2793 continue; /* no fast system call */ 2794 } 2795 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2796 WRITE_TO_BUF(_feature_edx_id[fi]); 2797 } 2798 } 2799 2800 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2801 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2802 WRITE_TO_BUF(_feature_ecx_id[fi]); 2803 } 2804 } 2805 2806 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2807 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2808 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2809 } 2810 } 2811 2812 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2813 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2814 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2815 } 2816 } 2817 2818 if (supports_tscinv_bit()) { 2819 WRITE_TO_BUF("Invariant TSC"); 2820 } 2821 2822 return written; 2823 } 2824 2825 /** 2826 * Write a detailed description of the cpu to a given buffer, including 2827 * feature set. 2828 */ 2829 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2830 assert(buf != nullptr, "buffer is null!"); 2831 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2832 2833 static const char* unknown = "<unknown>"; 2834 char vendor_id[VENDOR_LENGTH]; 2835 const char* family = nullptr; 2836 const char* model = nullptr; 2837 const char* brand = nullptr; 2838 int outputLen = 0; 2839 2840 family = cpu_family_description(); 2841 if (family == nullptr) { 2842 family = unknown; 2843 } 2844 2845 model = cpu_model_description(); 2846 if (model == nullptr) { 2847 model = unknown; 2848 } 2849 2850 brand = cpu_brand_string(); 2851 2852 if (brand == nullptr) { 2853 brand = cpu_brand(); 2854 if (brand == nullptr) { 2855 brand = unknown; 2856 } 2857 } 2858 2859 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2860 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2861 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2862 vendor_id[VENDOR_LENGTH-1] = '\0'; 2863 2864 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2865 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2866 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2867 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2868 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2869 "Supports: ", 2870 brand, 2871 vendor_id, 2872 family, 2873 extended_cpu_family(), 2874 model, 2875 extended_cpu_model(), 2876 cpu_stepping(), 2877 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2878 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2879 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2880 _cpuid_info.std_cpuid1_eax.value, 2881 _cpuid_info.std_cpuid1_ebx.value, 2882 _cpuid_info.std_cpuid1_ecx.value, 2883 _cpuid_info.std_cpuid1_edx.value, 2884 _cpuid_info.ext_cpuid1_eax, 2885 _cpuid_info.ext_cpuid1_ebx, 2886 _cpuid_info.ext_cpuid1_ecx, 2887 _cpuid_info.ext_cpuid1_edx); 2888 2889 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2890 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2891 return OS_ERR; 2892 } 2893 2894 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2895 2896 return OS_OK; 2897 } 2898 2899 2900 // Fill in Abstract_VM_Version statics 2901 void VM_Version::initialize_cpu_information() { 2902 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2903 assert(!_initialized, "shouldn't be initialized yet"); 2904 resolve_cpu_information_details(); 2905 2906 // initialize cpu_name and cpu_desc 2907 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2908 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2909 _initialized = true; 2910 } 2911 2912 /** 2913 * For information about extracting the frequency from the cpu brand string, please see: 2914 * 2915 * Intel Processor Identification and the CPUID Instruction 2916 * Application Note 485 2917 * May 2012 2918 * 2919 * The return value is the frequency in Hz. 2920 */ 2921 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2922 const char* const brand_string = cpu_brand_string(); 2923 if (brand_string == nullptr) { 2924 return 0; 2925 } 2926 const int64_t MEGA = 1000000; 2927 int64_t multiplier = 0; 2928 int64_t frequency = 0; 2929 uint8_t idx = 0; 2930 // The brand string buffer is at most 48 bytes. 2931 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2932 for (; idx < 48-2; ++idx) { 2933 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2934 // Search brand string for "yHz" where y is M, G, or T. 2935 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2936 if (brand_string[idx] == 'M') { 2937 multiplier = MEGA; 2938 } else if (brand_string[idx] == 'G') { 2939 multiplier = MEGA * 1000; 2940 } else if (brand_string[idx] == 'T') { 2941 multiplier = MEGA * MEGA; 2942 } 2943 break; 2944 } 2945 } 2946 if (multiplier > 0) { 2947 // Compute frequency (in Hz) from brand string. 2948 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2949 frequency = (brand_string[idx-4] - '0') * multiplier; 2950 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2951 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2952 } else { // format is "xxxx" 2953 frequency = (brand_string[idx-4] - '0') * 1000; 2954 frequency += (brand_string[idx-3] - '0') * 100; 2955 frequency += (brand_string[idx-2] - '0') * 10; 2956 frequency += (brand_string[idx-1] - '0'); 2957 frequency *= multiplier; 2958 } 2959 } 2960 return frequency; 2961 } 2962 2963 2964 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2965 if (_max_qualified_cpu_frequency == 0) { 2966 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2967 } 2968 return _max_qualified_cpu_frequency; 2969 } 2970 2971 uint64_t VM_Version::CpuidInfo::feature_flags() const { 2972 uint64_t result = 0; 2973 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2974 result |= CPU_CX8; 2975 if (std_cpuid1_edx.bits.cmov != 0) 2976 result |= CPU_CMOV; 2977 if (std_cpuid1_edx.bits.clflush != 0) 2978 result |= CPU_FLUSH; 2979 #ifdef _LP64 2980 // clflush should always be available on x86_64 2981 // if not we are in real trouble because we rely on it 2982 // to flush the code cache. 2983 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2984 #endif 2985 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2986 ext_cpuid1_edx.bits.fxsr != 0)) 2987 result |= CPU_FXSR; 2988 // HT flag is set for multi-core processors also. 2989 if (threads_per_core() > 1) 2990 result |= CPU_HT; 2991 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2992 ext_cpuid1_edx.bits.mmx != 0)) 2993 result |= CPU_MMX; 2994 if (std_cpuid1_edx.bits.sse != 0) 2995 result |= CPU_SSE; 2996 if (std_cpuid1_edx.bits.sse2 != 0) 2997 result |= CPU_SSE2; 2998 if (std_cpuid1_ecx.bits.sse3 != 0) 2999 result |= CPU_SSE3; 3000 if (std_cpuid1_ecx.bits.ssse3 != 0) 3001 result |= CPU_SSSE3; 3002 if (std_cpuid1_ecx.bits.sse4_1 != 0) 3003 result |= CPU_SSE4_1; 3004 if (std_cpuid1_ecx.bits.sse4_2 != 0) 3005 result |= CPU_SSE4_2; 3006 if (std_cpuid1_ecx.bits.popcnt != 0) 3007 result |= CPU_POPCNT; 3008 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 3009 xem_xcr0_eax.bits.apx_f != 0) { 3010 result |= CPU_APX_F; 3011 } 3012 if (std_cpuid1_ecx.bits.avx != 0 && 3013 std_cpuid1_ecx.bits.osxsave != 0 && 3014 xem_xcr0_eax.bits.sse != 0 && 3015 xem_xcr0_eax.bits.ymm != 0) { 3016 result |= CPU_AVX; 3017 result |= CPU_VZEROUPPER; 3018 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 3019 result |= CPU_SHA512; 3020 if (std_cpuid1_ecx.bits.f16c != 0) 3021 result |= CPU_F16C; 3022 if (sef_cpuid7_ebx.bits.avx2 != 0) { 3023 result |= CPU_AVX2; 3024 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 3025 result |= CPU_AVX_IFMA; 3026 } 3027 if (sef_cpuid7_ecx.bits.gfni != 0) 3028 result |= CPU_GFNI; 3029 if (sef_cpuid7_ebx.bits.avx512f != 0 && 3030 xem_xcr0_eax.bits.opmask != 0 && 3031 xem_xcr0_eax.bits.zmm512 != 0 && 3032 xem_xcr0_eax.bits.zmm32 != 0) { 3033 result |= CPU_AVX512F; 3034 if (sef_cpuid7_ebx.bits.avx512cd != 0) 3035 result |= CPU_AVX512CD; 3036 if (sef_cpuid7_ebx.bits.avx512dq != 0) 3037 result |= CPU_AVX512DQ; 3038 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 3039 result |= CPU_AVX512_IFMA; 3040 if (sef_cpuid7_ebx.bits.avx512pf != 0) 3041 result |= CPU_AVX512PF; 3042 if (sef_cpuid7_ebx.bits.avx512er != 0) 3043 result |= CPU_AVX512ER; 3044 if (sef_cpuid7_ebx.bits.avx512bw != 0) 3045 result |= CPU_AVX512BW; 3046 if (sef_cpuid7_ebx.bits.avx512vl != 0) 3047 result |= CPU_AVX512VL; 3048 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 3049 result |= CPU_AVX512_VPOPCNTDQ; 3050 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 3051 result |= CPU_AVX512_VPCLMULQDQ; 3052 if (sef_cpuid7_ecx.bits.vaes != 0) 3053 result |= CPU_AVX512_VAES; 3054 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 3055 result |= CPU_AVX512_VNNI; 3056 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 3057 result |= CPU_AVX512_BITALG; 3058 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 3059 result |= CPU_AVX512_VBMI; 3060 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 3061 result |= CPU_AVX512_VBMI2; 3062 } 3063 } 3064 if (std_cpuid1_ecx.bits.hv != 0) 3065 result |= CPU_HV; 3066 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3067 result |= CPU_BMI1; 3068 if (std_cpuid1_edx.bits.tsc != 0) 3069 result |= CPU_TSC; 3070 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3071 result |= CPU_TSCINV_BIT; 3072 if (std_cpuid1_ecx.bits.aes != 0) 3073 result |= CPU_AES; 3074 if (sef_cpuid7_ebx.bits.erms != 0) 3075 result |= CPU_ERMS; 3076 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3077 result |= CPU_FSRM; 3078 if (std_cpuid1_ecx.bits.clmul != 0) 3079 result |= CPU_CLMUL; 3080 if (sef_cpuid7_ebx.bits.rtm != 0) 3081 result |= CPU_RTM; 3082 if (sef_cpuid7_ebx.bits.adx != 0) 3083 result |= CPU_ADX; 3084 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3085 result |= CPU_BMI2; 3086 if (sef_cpuid7_ebx.bits.sha != 0) 3087 result |= CPU_SHA; 3088 if (std_cpuid1_ecx.bits.fma != 0) 3089 result |= CPU_FMA; 3090 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3091 result |= CPU_FLUSHOPT; 3092 if (ext_cpuid1_edx.bits.rdtscp != 0) 3093 result |= CPU_RDTSCP; 3094 if (sef_cpuid7_ecx.bits.rdpid != 0) 3095 result |= CPU_RDPID; 3096 3097 // AMD|Hygon features. 3098 if (is_amd_family()) { 3099 if ((ext_cpuid1_edx.bits.tdnow != 0) || 3100 (ext_cpuid1_ecx.bits.prefetchw != 0)) 3101 result |= CPU_3DNOW_PREFETCH; 3102 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3103 result |= CPU_LZCNT; 3104 if (ext_cpuid1_ecx.bits.sse4a != 0) 3105 result |= CPU_SSE4A; 3106 } 3107 3108 // Intel features. 3109 if (is_intel()) { 3110 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3111 result |= CPU_LZCNT; 3112 } 3113 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3114 result |= CPU_3DNOW_PREFETCH; 3115 } 3116 if (sef_cpuid7_ebx.bits.clwb != 0) { 3117 result |= CPU_CLWB; 3118 } 3119 if (sef_cpuid7_edx.bits.serialize != 0) 3120 result |= CPU_SERIALIZE; 3121 } 3122 3123 // ZX features. 3124 if (is_zx()) { 3125 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3126 result |= CPU_LZCNT; 3127 } 3128 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3129 result |= CPU_3DNOW_PREFETCH; 3130 } 3131 } 3132 3133 // Protection key features. 3134 if (sef_cpuid7_ecx.bits.pku != 0) { 3135 result |= CPU_PKU; 3136 } 3137 if (sef_cpuid7_ecx.bits.ospke != 0) { 3138 result |= CPU_OSPKE; 3139 } 3140 3141 // Control flow enforcement (CET) features. 3142 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3143 result |= CPU_CET_SS; 3144 } 3145 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3146 result |= CPU_CET_IBT; 3147 } 3148 3149 // Composite features. 3150 if (supports_tscinv_bit() && 3151 ((is_amd_family() && !is_amd_Barcelona()) || 3152 is_intel_tsc_synched_at_init())) { 3153 result |= CPU_TSCINV; 3154 } 3155 3156 return result; 3157 } 3158 3159 bool VM_Version::os_supports_avx_vectors() { 3160 bool retVal = false; 3161 int nreg = 2 LP64_ONLY(+2); 3162 if (supports_evex()) { 3163 // Verify that OS save/restore all bits of EVEX registers 3164 // during signal processing. 3165 retVal = true; 3166 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3167 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3168 retVal = false; 3169 break; 3170 } 3171 } 3172 } else if (supports_avx()) { 3173 // Verify that OS save/restore all bits of AVX registers 3174 // during signal processing. 3175 retVal = true; 3176 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3177 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3178 retVal = false; 3179 break; 3180 } 3181 } 3182 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3183 if (retVal == false) { 3184 // Verify that OS save/restore all bits of EVEX registers 3185 // during signal processing. 3186 retVal = true; 3187 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3188 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3189 retVal = false; 3190 break; 3191 } 3192 } 3193 } 3194 } 3195 return retVal; 3196 } 3197 3198 bool VM_Version::os_supports_apx_egprs() { 3199 if (!supports_apx_f()) { 3200 return false; 3201 } 3202 // Enable APX support for product builds after 3203 // completion of planned features listed in JDK-8329030. 3204 #if !defined(PRODUCT) 3205 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3206 _cpuid_info.apx_save[1] != egpr_test_value()) { 3207 return false; 3208 } 3209 return true; 3210 #else 3211 return false; 3212 #endif 3213 } 3214 3215 uint VM_Version::cores_per_cpu() { 3216 uint result = 1; 3217 if (is_intel()) { 3218 bool supports_topology = supports_processor_topology(); 3219 if (supports_topology) { 3220 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3221 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3222 } 3223 if (!supports_topology || result == 0) { 3224 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3225 } 3226 } else if (is_amd_family()) { 3227 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3228 } else if (is_zx()) { 3229 bool supports_topology = supports_processor_topology(); 3230 if (supports_topology) { 3231 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3232 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3233 } 3234 if (!supports_topology || result == 0) { 3235 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3236 } 3237 } 3238 return result; 3239 } 3240 3241 uint VM_Version::threads_per_core() { 3242 uint result = 1; 3243 if (is_intel() && supports_processor_topology()) { 3244 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3245 } else if (is_zx() && supports_processor_topology()) { 3246 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3247 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3248 if (cpu_family() >= 0x17) { 3249 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3250 } else { 3251 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3252 cores_per_cpu(); 3253 } 3254 } 3255 return (result == 0 ? 1 : result); 3256 } 3257 3258 uint VM_Version::L1_line_size() { 3259 uint result = 0; 3260 if (is_intel()) { 3261 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3262 } else if (is_amd_family()) { 3263 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3264 } else if (is_zx()) { 3265 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3266 } 3267 if (result < 32) // not defined ? 3268 result = 32; // 32 bytes by default on x86 and other x64 3269 return result; 3270 } 3271 3272 bool VM_Version::is_intel_tsc_synched_at_init() { 3273 if (is_intel_family_core()) { 3274 uint32_t ext_model = extended_cpu_model(); 3275 if (ext_model == CPU_MODEL_NEHALEM_EP || 3276 ext_model == CPU_MODEL_WESTMERE_EP || 3277 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3278 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3279 // <= 2-socket invariant tsc support. EX versions are usually used 3280 // in > 2-socket systems and likely don't synchronize tscs at 3281 // initialization. 3282 // Code that uses tsc values must be prepared for them to arbitrarily 3283 // jump forward or backward. 3284 return true; 3285 } 3286 } 3287 return false; 3288 } 3289 3290 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3291 // Hardware prefetching (distance/size in bytes): 3292 // Pentium 3 - 64 / 32 3293 // Pentium 4 - 256 / 128 3294 // Athlon - 64 / 32 ???? 3295 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3296 // Core - 128 / 64 3297 // 3298 // Software prefetching (distance in bytes / instruction with best score): 3299 // Pentium 3 - 128 / prefetchnta 3300 // Pentium 4 - 512 / prefetchnta 3301 // Athlon - 128 / prefetchnta 3302 // Opteron - 256 / prefetchnta 3303 // Core - 256 / prefetchnta 3304 // It will be used only when AllocatePrefetchStyle > 0 3305 3306 if (is_amd_family()) { // AMD | Hygon 3307 if (supports_sse2()) { 3308 return 256; // Opteron 3309 } else { 3310 return 128; // Athlon 3311 } 3312 } else { // Intel 3313 if (supports_sse3() && cpu_family() == 6) { 3314 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3315 return 192; 3316 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3317 #ifdef _LP64 3318 return 384; 3319 #else 3320 return 320; 3321 #endif 3322 } 3323 } 3324 if (supports_sse2()) { 3325 if (cpu_family() == 6) { 3326 return 256; // Pentium M, Core, Core2 3327 } else { 3328 return 512; // Pentium 4 3329 } 3330 } else { 3331 return 128; // Pentium 3 (and all other old CPUs) 3332 } 3333 } 3334 } 3335 3336 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3337 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3338 switch (id) { 3339 case vmIntrinsics::_floatToFloat16: 3340 case vmIntrinsics::_float16ToFloat: 3341 if (!supports_float16()) { 3342 return false; 3343 } 3344 break; 3345 default: 3346 break; 3347 } 3348 return true; 3349 }