1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/ostream.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_NAME 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = nullptr; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = nullptr; 59 // Address of instruction which causes APX specific SEGV 60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 61 // Address of instruction after the one which causes APX specific SEGV 62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 63 64 static BufferBlob* stub_blob; 65 static const int stub_size = 2000; 66 67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong; 68 69 VM_Version::VM_Features VM_Version::_features; 70 VM_Version::VM_Features VM_Version::_cpu_features; 71 72 extern "C" { 73 typedef void (*get_cpu_info_stub_t)(void*); 74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 75 typedef void (*clear_apx_test_state_t)(void); 76 } 77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 78 static detect_virt_stub_t detect_virt_stub = nullptr; 79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 80 81 bool VM_Version::supports_clflush() { 82 // clflush should always be available on x86_64 83 // if not we are in real trouble because we rely on it 84 // to flush the code cache. 85 // Unfortunately, Assembler::clflush is currently called as part 86 // of generation of the code cache flush routine. This happens 87 // under Universe::init before the processor features are set 88 // up. Assembler::flush calls this routine to check that clflush 89 // is allowed. So, we give the caller a free pass if Universe init 90 // is still in progress. 91 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available"); 92 return true; 93 } 94 95 #define CPUID_STANDARD_FN 0x0 96 #define CPUID_STANDARD_FN_1 0x1 97 #define CPUID_STANDARD_FN_4 0x4 98 #define CPUID_STANDARD_FN_B 0xb 99 100 #define CPUID_EXTENDED_FN 0x80000000 101 #define CPUID_EXTENDED_FN_1 0x80000001 102 #define CPUID_EXTENDED_FN_2 0x80000002 103 #define CPUID_EXTENDED_FN_3 0x80000003 104 #define CPUID_EXTENDED_FN_4 0x80000004 105 #define CPUID_EXTENDED_FN_7 0x80000007 106 #define CPUID_EXTENDED_FN_8 0x80000008 107 108 class VM_Version_StubGenerator: public StubCodeGenerator { 109 public: 110 111 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 112 113 address clear_apx_test_state() { 114 # define __ _masm-> 115 address start = __ pc(); 116 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 117 // handling guarantees that preserved register values post signal handling were 118 // re-instantiated by operating system and not because they were not modified externally. 119 120 bool save_apx = UseAPX; 121 VM_Version::set_apx_cpuFeatures(); 122 UseAPX = true; 123 // EGPR state save/restoration. 124 __ mov64(r16, 0L); 125 __ mov64(r31, 0L); 126 UseAPX = save_apx; 127 VM_Version::clean_cpuFeatures(); 128 __ ret(0); 129 return start; 130 } 131 132 address generate_get_cpu_info() { 133 // Flags to test CPU type. 134 const uint32_t HS_EFL_AC = 0x40000; 135 const uint32_t HS_EFL_ID = 0x200000; 136 // Values for when we don't have a CPUID instruction. 137 const int CPU_FAMILY_SHIFT = 8; 138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 141 142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29; 143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 146 147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 148 # define __ _masm-> 149 150 address start = __ pc(); 151 152 // 153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 154 // 155 // rcx and rdx are first and second argument registers on windows 156 157 __ push(rbp); 158 __ mov(rbp, c_rarg0); // cpuid_info address 159 __ push(rbx); 160 __ push(rsi); 161 __ pushf(); // preserve rbx, and flags 162 __ pop(rax); 163 __ push(rax); 164 __ mov(rcx, rax); 165 // 166 // if we are unable to change the AC flag, we have a 386 167 // 168 __ xorl(rax, HS_EFL_AC); 169 __ push(rax); 170 __ popf(); 171 __ pushf(); 172 __ pop(rax); 173 __ cmpptr(rax, rcx); 174 __ jccb(Assembler::notEqual, detect_486); 175 176 __ movl(rax, CPU_FAMILY_386); 177 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 178 __ jmp(done); 179 180 // 181 // If we are unable to change the ID flag, we have a 486 which does 182 // not support the "cpuid" instruction. 183 // 184 __ bind(detect_486); 185 __ mov(rax, rcx); 186 __ xorl(rax, HS_EFL_ID); 187 __ push(rax); 188 __ popf(); 189 __ pushf(); 190 __ pop(rax); 191 __ cmpptr(rcx, rax); 192 __ jccb(Assembler::notEqual, detect_586); 193 194 __ bind(cpu486); 195 __ movl(rax, CPU_FAMILY_486); 196 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 197 __ jmp(done); 198 199 // 200 // At this point, we have a chip which supports the "cpuid" instruction 201 // 202 __ bind(detect_586); 203 __ xorl(rax, rax); 204 __ cpuid(); 205 __ orl(rax, rax); 206 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 207 // value of at least 1, we give up and 208 // assume a 486 209 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 210 __ movl(Address(rsi, 0), rax); 211 __ movl(Address(rsi, 4), rbx); 212 __ movl(Address(rsi, 8), rcx); 213 __ movl(Address(rsi,12), rdx); 214 215 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 216 __ jccb(Assembler::belowEqual, std_cpuid4); 217 218 // 219 // cpuid(0xB) Processor Topology 220 // 221 __ movl(rax, 0xb); 222 __ xorl(rcx, rcx); // Threads level 223 __ cpuid(); 224 225 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 226 __ movl(Address(rsi, 0), rax); 227 __ movl(Address(rsi, 4), rbx); 228 __ movl(Address(rsi, 8), rcx); 229 __ movl(Address(rsi,12), rdx); 230 231 __ movl(rax, 0xb); 232 __ movl(rcx, 1); // Cores level 233 __ cpuid(); 234 __ push(rax); 235 __ andl(rax, 0x1f); // Determine if valid topology level 236 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 237 __ andl(rax, 0xffff); 238 __ pop(rax); 239 __ jccb(Assembler::equal, std_cpuid4); 240 241 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 242 __ movl(Address(rsi, 0), rax); 243 __ movl(Address(rsi, 4), rbx); 244 __ movl(Address(rsi, 8), rcx); 245 __ movl(Address(rsi,12), rdx); 246 247 __ movl(rax, 0xb); 248 __ movl(rcx, 2); // Packages level 249 __ cpuid(); 250 __ push(rax); 251 __ andl(rax, 0x1f); // Determine if valid topology level 252 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 253 __ andl(rax, 0xffff); 254 __ pop(rax); 255 __ jccb(Assembler::equal, std_cpuid4); 256 257 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 258 __ movl(Address(rsi, 0), rax); 259 __ movl(Address(rsi, 4), rbx); 260 __ movl(Address(rsi, 8), rcx); 261 __ movl(Address(rsi,12), rdx); 262 263 // 264 // cpuid(0x4) Deterministic cache params 265 // 266 __ bind(std_cpuid4); 267 __ movl(rax, 4); 268 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 269 __ jccb(Assembler::greater, std_cpuid1); 270 271 __ xorl(rcx, rcx); // L1 cache 272 __ cpuid(); 273 __ push(rax); 274 __ andl(rax, 0x1f); // Determine if valid cache parameters used 275 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 276 __ pop(rax); 277 __ jccb(Assembler::equal, std_cpuid1); 278 279 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 280 __ movl(Address(rsi, 0), rax); 281 __ movl(Address(rsi, 4), rbx); 282 __ movl(Address(rsi, 8), rcx); 283 __ movl(Address(rsi,12), rdx); 284 285 // 286 // Standard cpuid(0x1) 287 // 288 __ bind(std_cpuid1); 289 __ movl(rax, 1); 290 __ cpuid(); 291 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 292 __ movl(Address(rsi, 0), rax); 293 __ movl(Address(rsi, 4), rbx); 294 __ movl(Address(rsi, 8), rcx); 295 __ movl(Address(rsi,12), rdx); 296 297 // 298 // Check if OS has enabled XGETBV instruction to access XCR0 299 // (OSXSAVE feature flag) and CPU supports AVX 300 // 301 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 302 __ cmpl(rcx, 0x18000000); 303 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 304 305 // 306 // XCR0, XFEATURE_ENABLED_MASK register 307 // 308 __ xorl(rcx, rcx); // zero for XCR0 register 309 __ xgetbv(); 310 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 311 __ movl(Address(rsi, 0), rax); 312 __ movl(Address(rsi, 4), rdx); 313 314 // 315 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 316 // 317 __ bind(sef_cpuid); 318 __ movl(rax, 7); 319 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 320 __ jccb(Assembler::greater, ext_cpuid); 321 // ECX = 0 322 __ xorl(rcx, rcx); 323 __ cpuid(); 324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 325 __ movl(Address(rsi, 0), rax); 326 __ movl(Address(rsi, 4), rbx); 327 __ movl(Address(rsi, 8), rcx); 328 __ movl(Address(rsi, 12), rdx); 329 330 // 331 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 332 // 333 __ bind(sefsl1_cpuid); 334 __ movl(rax, 7); 335 __ movl(rcx, 1); 336 __ cpuid(); 337 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 338 __ movl(Address(rsi, 0), rax); 339 __ movl(Address(rsi, 4), rdx); 340 341 // 342 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0). 343 // 344 __ bind(std_cpuid29); 345 __ movl(rax, 0x29); 346 __ movl(rcx, 0); 347 __ cpuid(); 348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset()))); 349 __ movl(Address(rsi, 0), rbx); 350 351 // 352 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0). 353 // 354 __ bind(std_cpuid24); 355 __ movl(rax, 0x24); 356 __ movl(rcx, 0); 357 __ cpuid(); 358 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset()))); 359 __ movl(Address(rsi, 0), rax); 360 __ movl(Address(rsi, 4), rbx); 361 362 // 363 // Extended cpuid(0x80000000) 364 // 365 __ bind(ext_cpuid); 366 __ movl(rax, 0x80000000); 367 __ cpuid(); 368 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 369 __ jcc(Assembler::belowEqual, done); 370 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 371 __ jcc(Assembler::belowEqual, ext_cpuid1); 372 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 373 __ jccb(Assembler::belowEqual, ext_cpuid5); 374 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 375 __ jccb(Assembler::belowEqual, ext_cpuid7); 376 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 377 __ jccb(Assembler::belowEqual, ext_cpuid8); 378 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 379 __ jccb(Assembler::below, ext_cpuid8); 380 // 381 // Extended cpuid(0x8000001E) 382 // 383 __ movl(rax, 0x8000001E); 384 __ cpuid(); 385 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 386 __ movl(Address(rsi, 0), rax); 387 __ movl(Address(rsi, 4), rbx); 388 __ movl(Address(rsi, 8), rcx); 389 __ movl(Address(rsi,12), rdx); 390 391 // 392 // Extended cpuid(0x80000008) 393 // 394 __ bind(ext_cpuid8); 395 __ movl(rax, 0x80000008); 396 __ cpuid(); 397 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 398 __ movl(Address(rsi, 0), rax); 399 __ movl(Address(rsi, 4), rbx); 400 __ movl(Address(rsi, 8), rcx); 401 __ movl(Address(rsi,12), rdx); 402 403 // 404 // Extended cpuid(0x80000007) 405 // 406 __ bind(ext_cpuid7); 407 __ movl(rax, 0x80000007); 408 __ cpuid(); 409 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 410 __ movl(Address(rsi, 0), rax); 411 __ movl(Address(rsi, 4), rbx); 412 __ movl(Address(rsi, 8), rcx); 413 __ movl(Address(rsi,12), rdx); 414 415 // 416 // Extended cpuid(0x80000005) 417 // 418 __ bind(ext_cpuid5); 419 __ movl(rax, 0x80000005); 420 __ cpuid(); 421 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 422 __ movl(Address(rsi, 0), rax); 423 __ movl(Address(rsi, 4), rbx); 424 __ movl(Address(rsi, 8), rcx); 425 __ movl(Address(rsi,12), rdx); 426 427 // 428 // Extended cpuid(0x80000001) 429 // 430 __ bind(ext_cpuid1); 431 __ movl(rax, 0x80000001); 432 __ cpuid(); 433 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 434 __ movl(Address(rsi, 0), rax); 435 __ movl(Address(rsi, 4), rbx); 436 __ movl(Address(rsi, 8), rcx); 437 __ movl(Address(rsi,12), rdx); 438 439 // 440 // Check if OS has enabled XGETBV instruction to access XCR0 441 // (OSXSAVE feature flag) and CPU supports APX 442 // 443 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 444 // and XCRO[19] bit for OS support to save/restore extended GPR state. 445 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 446 __ movl(rax, 0x200000); 447 __ andl(rax, Address(rsi, 4)); 448 __ jcc(Assembler::equal, vector_save_restore); 449 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 450 __ movl(rax, 0x80000); 451 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 452 __ jcc(Assembler::equal, vector_save_restore); 453 454 bool save_apx = UseAPX; 455 VM_Version::set_apx_cpuFeatures(); 456 UseAPX = true; 457 __ mov64(r16, VM_Version::egpr_test_value()); 458 __ mov64(r31, VM_Version::egpr_test_value()); 459 __ xorl(rsi, rsi); 460 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 461 // Generate SEGV 462 __ movl(rax, Address(rsi, 0)); 463 464 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 465 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 466 __ movq(Address(rsi, 0), r16); 467 __ movq(Address(rsi, 8), r31); 468 469 UseAPX = save_apx; 470 __ bind(vector_save_restore); 471 // 472 // Check if OS has enabled XGETBV instruction to access XCR0 473 // (OSXSAVE feature flag) and CPU supports AVX 474 // 475 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 476 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 477 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 478 __ cmpl(rcx, 0x18000000); 479 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 480 481 __ movl(rax, 0x6); 482 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 483 __ cmpl(rax, 0x6); 484 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 485 486 // we need to bridge farther than imm8, so we use this island as a thunk 487 __ bind(done); 488 __ jmp(wrapup); 489 490 __ bind(start_simd_check); 491 // 492 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 493 // registers are not restored after a signal processing. 494 // Generate SEGV here (reference through null) 495 // and check upper YMM/ZMM bits after it. 496 // 497 int saved_useavx = UseAVX; 498 int saved_usesse = UseSSE; 499 500 // If UseAVX is uninitialized or is set by the user to include EVEX 501 if (use_evex) { 502 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 503 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10 504 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 505 __ movl(rax, 0x10000); 506 __ andl(rax, Address(rsi, 4)); 507 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 508 __ movl(rbx, 0x80000); 509 __ andl(rbx, Address(rsi, 4)); 510 __ orl(rax, rbx); 511 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported 512 // check _cpuid_info.xem_xcr0_eax.bits.opmask 513 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 514 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 515 __ movl(rax, 0xE0); 516 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 517 __ cmpl(rax, 0xE0); 518 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 519 520 if (FLAG_IS_DEFAULT(UseAVX)) { 521 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 522 __ movl(rax, Address(rsi, 0)); 523 __ cmpl(rax, 0x50654); // If it is Skylake 524 __ jcc(Assembler::equal, legacy_setup); 525 } 526 // EVEX setup: run in lowest evex mode 527 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 528 UseAVX = 3; 529 UseSSE = 2; 530 #ifdef _WINDOWS 531 // xmm5-xmm15 are not preserved by caller on windows 532 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 533 __ subptr(rsp, 64); 534 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 535 __ subptr(rsp, 64); 536 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 537 __ subptr(rsp, 64); 538 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 539 #endif // _WINDOWS 540 541 // load value into all 64 bytes of zmm7 register 542 __ movl(rcx, VM_Version::ymm_test_value()); 543 __ movdl(xmm0, rcx); 544 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 545 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 546 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 547 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 548 VM_Version::clean_cpuFeatures(); 549 __ jmp(save_restore_except); 550 } 551 552 __ bind(legacy_setup); 553 // AVX setup 554 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 555 UseAVX = 1; 556 UseSSE = 2; 557 #ifdef _WINDOWS 558 __ subptr(rsp, 32); 559 __ vmovdqu(Address(rsp, 0), xmm7); 560 __ subptr(rsp, 32); 561 __ vmovdqu(Address(rsp, 0), xmm8); 562 __ subptr(rsp, 32); 563 __ vmovdqu(Address(rsp, 0), xmm15); 564 #endif // _WINDOWS 565 566 // load value into all 32 bytes of ymm7 register 567 __ movl(rcx, VM_Version::ymm_test_value()); 568 569 __ movdl(xmm0, rcx); 570 __ pshufd(xmm0, xmm0, 0x00); 571 __ vinsertf128_high(xmm0, xmm0); 572 __ vmovdqu(xmm7, xmm0); 573 __ vmovdqu(xmm8, xmm0); 574 __ vmovdqu(xmm15, xmm0); 575 VM_Version::clean_cpuFeatures(); 576 577 __ bind(save_restore_except); 578 __ xorl(rsi, rsi); 579 VM_Version::set_cpuinfo_segv_addr(__ pc()); 580 // Generate SEGV 581 __ movl(rax, Address(rsi, 0)); 582 583 VM_Version::set_cpuinfo_cont_addr(__ pc()); 584 // Returns here after signal. Save xmm0 to check it later. 585 586 // If UseAVX is uninitialized or is set by the user to include EVEX 587 if (use_evex) { 588 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 589 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 590 __ movl(rax, 0x10000); 591 __ andl(rax, Address(rsi, 4)); 592 __ jcc(Assembler::equal, legacy_save_restore); 593 // check _cpuid_info.xem_xcr0_eax.bits.opmask 594 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 595 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 596 __ movl(rax, 0xE0); 597 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 598 __ cmpl(rax, 0xE0); 599 __ jcc(Assembler::notEqual, legacy_save_restore); 600 601 if (FLAG_IS_DEFAULT(UseAVX)) { 602 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 603 __ movl(rax, Address(rsi, 0)); 604 __ cmpl(rax, 0x50654); // If it is Skylake 605 __ jcc(Assembler::equal, legacy_save_restore); 606 } 607 // EVEX check: run in lowest evex mode 608 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 609 UseAVX = 3; 610 UseSSE = 2; 611 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 612 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 613 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 614 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 615 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 616 617 #ifdef _WINDOWS 618 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 619 __ addptr(rsp, 64); 620 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 621 __ addptr(rsp, 64); 622 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 623 __ addptr(rsp, 64); 624 #endif // _WINDOWS 625 generate_vzeroupper(wrapup); 626 VM_Version::clean_cpuFeatures(); 627 UseAVX = saved_useavx; 628 UseSSE = saved_usesse; 629 __ jmp(wrapup); 630 } 631 632 __ bind(legacy_save_restore); 633 // AVX check 634 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 635 UseAVX = 1; 636 UseSSE = 2; 637 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 638 __ vmovdqu(Address(rsi, 0), xmm0); 639 __ vmovdqu(Address(rsi, 32), xmm7); 640 __ vmovdqu(Address(rsi, 64), xmm8); 641 __ vmovdqu(Address(rsi, 96), xmm15); 642 643 #ifdef _WINDOWS 644 __ vmovdqu(xmm15, Address(rsp, 0)); 645 __ addptr(rsp, 32); 646 __ vmovdqu(xmm8, Address(rsp, 0)); 647 __ addptr(rsp, 32); 648 __ vmovdqu(xmm7, Address(rsp, 0)); 649 __ addptr(rsp, 32); 650 #endif // _WINDOWS 651 652 generate_vzeroupper(wrapup); 653 VM_Version::clean_cpuFeatures(); 654 UseAVX = saved_useavx; 655 UseSSE = saved_usesse; 656 657 __ bind(wrapup); 658 __ popf(); 659 __ pop(rsi); 660 __ pop(rbx); 661 __ pop(rbp); 662 __ ret(0); 663 664 # undef __ 665 666 return start; 667 }; 668 void generate_vzeroupper(Label& L_wrapup) { 669 # define __ _masm-> 670 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 671 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 672 __ jcc(Assembler::notEqual, L_wrapup); 673 __ movl(rcx, 0x0FFF0FF0); 674 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 675 __ andl(rcx, Address(rsi, 0)); 676 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 677 __ jcc(Assembler::equal, L_wrapup); 678 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 679 __ jcc(Assembler::equal, L_wrapup); 680 // vzeroupper() will use a pre-computed instruction sequence that we 681 // can't compute until after we've determined CPU capabilities. Use 682 // uncached variant here directly to be able to bootstrap correctly 683 __ vzeroupper_uncached(); 684 # undef __ 685 } 686 address generate_detect_virt() { 687 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 688 # define __ _masm-> 689 690 address start = __ pc(); 691 692 // Evacuate callee-saved registers 693 __ push(rbp); 694 __ push(rbx); 695 __ push(rsi); // for Windows 696 697 __ mov(rax, c_rarg0); // CPUID leaf 698 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 699 700 __ cpuid(); 701 702 // Store result to register array 703 __ movl(Address(rsi, 0), rax); 704 __ movl(Address(rsi, 4), rbx); 705 __ movl(Address(rsi, 8), rcx); 706 __ movl(Address(rsi, 12), rdx); 707 708 // Epilogue 709 __ pop(rsi); 710 __ pop(rbx); 711 __ pop(rbp); 712 __ ret(0); 713 714 # undef __ 715 716 return start; 717 }; 718 719 720 address generate_getCPUIDBrandString(void) { 721 // Flags to test CPU type. 722 const uint32_t HS_EFL_AC = 0x40000; 723 const uint32_t HS_EFL_ID = 0x200000; 724 // Values for when we don't have a CPUID instruction. 725 const int CPU_FAMILY_SHIFT = 8; 726 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 727 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 728 729 Label detect_486, cpu486, detect_586, done, ext_cpuid; 730 731 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 732 # define __ _masm-> 733 734 address start = __ pc(); 735 736 // 737 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 738 // 739 // rcx and rdx are first and second argument registers on windows 740 741 __ push(rbp); 742 __ mov(rbp, c_rarg0); // cpuid_info address 743 __ push(rbx); 744 __ push(rsi); 745 __ pushf(); // preserve rbx, and flags 746 __ pop(rax); 747 __ push(rax); 748 __ mov(rcx, rax); 749 // 750 // if we are unable to change the AC flag, we have a 386 751 // 752 __ xorl(rax, HS_EFL_AC); 753 __ push(rax); 754 __ popf(); 755 __ pushf(); 756 __ pop(rax); 757 __ cmpptr(rax, rcx); 758 __ jccb(Assembler::notEqual, detect_486); 759 760 __ movl(rax, CPU_FAMILY_386); 761 __ jmp(done); 762 763 // 764 // If we are unable to change the ID flag, we have a 486 which does 765 // not support the "cpuid" instruction. 766 // 767 __ bind(detect_486); 768 __ mov(rax, rcx); 769 __ xorl(rax, HS_EFL_ID); 770 __ push(rax); 771 __ popf(); 772 __ pushf(); 773 __ pop(rax); 774 __ cmpptr(rcx, rax); 775 __ jccb(Assembler::notEqual, detect_586); 776 777 __ bind(cpu486); 778 __ movl(rax, CPU_FAMILY_486); 779 __ jmp(done); 780 781 // 782 // At this point, we have a chip which supports the "cpuid" instruction 783 // 784 __ bind(detect_586); 785 __ xorl(rax, rax); 786 __ cpuid(); 787 __ orl(rax, rax); 788 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 789 // value of at least 1, we give up and 790 // assume a 486 791 792 // 793 // Extended cpuid(0x80000000) for processor brand string detection 794 // 795 __ bind(ext_cpuid); 796 __ movl(rax, CPUID_EXTENDED_FN); 797 __ cpuid(); 798 __ cmpl(rax, CPUID_EXTENDED_FN_4); 799 __ jcc(Assembler::below, done); 800 801 // 802 // Extended cpuid(0x80000002) // first 16 bytes in brand string 803 // 804 __ movl(rax, CPUID_EXTENDED_FN_2); 805 __ cpuid(); 806 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 807 __ movl(Address(rsi, 0), rax); 808 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 809 __ movl(Address(rsi, 0), rbx); 810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 811 __ movl(Address(rsi, 0), rcx); 812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 813 __ movl(Address(rsi,0), rdx); 814 815 // 816 // Extended cpuid(0x80000003) // next 16 bytes in brand string 817 // 818 __ movl(rax, CPUID_EXTENDED_FN_3); 819 __ cpuid(); 820 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 821 __ movl(Address(rsi, 0), rax); 822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 823 __ movl(Address(rsi, 0), rbx); 824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 825 __ movl(Address(rsi, 0), rcx); 826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 827 __ movl(Address(rsi,0), rdx); 828 829 // 830 // Extended cpuid(0x80000004) // last 16 bytes in brand string 831 // 832 __ movl(rax, CPUID_EXTENDED_FN_4); 833 __ cpuid(); 834 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 835 __ movl(Address(rsi, 0), rax); 836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 837 __ movl(Address(rsi, 0), rbx); 838 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 839 __ movl(Address(rsi, 0), rcx); 840 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 841 __ movl(Address(rsi,0), rdx); 842 843 // 844 // return 845 // 846 __ bind(done); 847 __ popf(); 848 __ pop(rsi); 849 __ pop(rbx); 850 __ pop(rbp); 851 __ ret(0); 852 853 # undef __ 854 855 return start; 856 }; 857 }; 858 859 void VM_Version::get_processor_features() { 860 861 _cpu = 4; // 486 by default 862 _model = 0; 863 _stepping = 0; 864 _logical_processors_per_package = 1; 865 // i486 internal cache is both I&D and has a 16-byte line size 866 _L1_data_cache_line_size = 16; 867 868 // Get raw processor info 869 870 get_cpu_info_stub(&_cpuid_info); 871 872 assert_is_initialized(); 873 _cpu = extended_cpu_family(); 874 _model = extended_cpu_model(); 875 _stepping = cpu_stepping(); 876 877 if (cpu_family() > 4) { // it supports CPUID 878 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 879 _cpu_features = _features; // Preserve features 880 // Logical processors are only available on P4s and above, 881 // and only if hyperthreading is available. 882 _logical_processors_per_package = logical_processor_count(); 883 _L1_data_cache_line_size = L1_line_size(); 884 } 885 886 // xchg and xadd instructions 887 _supports_atomic_getset4 = true; 888 _supports_atomic_getadd4 = true; 889 _supports_atomic_getset8 = true; 890 _supports_atomic_getadd8 = true; 891 892 // OS should support SSE for x64 and hardware should support at least SSE2. 893 if (!VM_Version::supports_sse2()) { 894 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 895 } 896 // in 64 bit the use of SSE2 is the minimum 897 if (UseSSE < 2) UseSSE = 2; 898 899 // flush_icache_stub have to be generated first. 900 // That is why Icache line size is hard coded in ICache class, 901 // see icache_x86.hpp. It is also the reason why we can't use 902 // clflush instruction in 32-bit VM since it could be running 903 // on CPU which does not support it. 904 // 905 // The only thing we can do is to verify that flushed 906 // ICache::line_size has correct value. 907 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 908 // clflush_size is size in quadwords (8 bytes). 909 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 910 911 // assigning this field effectively enables Unsafe.writebackMemory() 912 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 913 // that is only implemented on x86_64 and only if the OS plays ball 914 if (os::supports_map_sync()) { 915 // publish data cache line flush size to generic field, otherwise 916 // let if default to zero thereby disabling writeback 917 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 918 } 919 920 // Check if processor has Intel Ecore 921 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() && 922 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 923 _model == 0xCC || _model == 0xDD)) { 924 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 925 } 926 927 if (UseSSE < 4) { 928 _features.clear_feature(CPU_SSE4_1); 929 _features.clear_feature(CPU_SSE4_2); 930 } 931 932 if (UseSSE < 3) { 933 _features.clear_feature(CPU_SSE3); 934 _features.clear_feature(CPU_SSSE3); 935 _features.clear_feature(CPU_SSE4A); 936 } 937 938 if (UseSSE < 2) 939 _features.clear_feature(CPU_SSE2); 940 941 if (UseSSE < 1) 942 _features.clear_feature(CPU_SSE); 943 944 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 945 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 946 UseAVX = 0; 947 } 948 949 // UseSSE is set to the smaller of what hardware supports and what 950 // the command line requires. I.e., you cannot set UseSSE to 2 on 951 // older Pentiums which do not support it. 952 int use_sse_limit = 0; 953 if (UseSSE > 0) { 954 if (UseSSE > 3 && supports_sse4_1()) { 955 use_sse_limit = 4; 956 } else if (UseSSE > 2 && supports_sse3()) { 957 use_sse_limit = 3; 958 } else if (UseSSE > 1 && supports_sse2()) { 959 use_sse_limit = 2; 960 } else if (UseSSE > 0 && supports_sse()) { 961 use_sse_limit = 1; 962 } else { 963 use_sse_limit = 0; 964 } 965 } 966 if (FLAG_IS_DEFAULT(UseSSE)) { 967 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 968 } else if (UseSSE > use_sse_limit) { 969 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 970 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 971 } 972 973 // first try initial setting and detect what we can support 974 int use_avx_limit = 0; 975 if (UseAVX > 0) { 976 if (UseSSE < 4) { 977 // Don't use AVX if SSE is unavailable or has been disabled. 978 use_avx_limit = 0; 979 } else if (UseAVX > 2 && supports_evex()) { 980 use_avx_limit = 3; 981 } else if (UseAVX > 1 && supports_avx2()) { 982 use_avx_limit = 2; 983 } else if (UseAVX > 0 && supports_avx()) { 984 use_avx_limit = 1; 985 } else { 986 use_avx_limit = 0; 987 } 988 } 989 if (FLAG_IS_DEFAULT(UseAVX)) { 990 // Don't use AVX-512 on older Skylakes unless explicitly requested. 991 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 992 FLAG_SET_DEFAULT(UseAVX, 2); 993 } else { 994 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 995 } 996 } 997 998 if (UseAVX > use_avx_limit) { 999 if (UseSSE < 4) { 1000 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 1001 } else { 1002 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 1003 } 1004 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1005 } 1006 1007 if (UseAVX < 3) { 1008 _features.clear_feature(CPU_AVX512F); 1009 _features.clear_feature(CPU_AVX512DQ); 1010 _features.clear_feature(CPU_AVX512CD); 1011 _features.clear_feature(CPU_AVX512BW); 1012 _features.clear_feature(CPU_AVX512ER); 1013 _features.clear_feature(CPU_AVX512PF); 1014 _features.clear_feature(CPU_AVX512VL); 1015 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1016 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1017 _features.clear_feature(CPU_AVX512_VAES); 1018 _features.clear_feature(CPU_AVX512_VNNI); 1019 _features.clear_feature(CPU_AVX512_VBMI); 1020 _features.clear_feature(CPU_AVX512_VBMI2); 1021 _features.clear_feature(CPU_AVX512_BITALG); 1022 _features.clear_feature(CPU_AVX512_IFMA); 1023 _features.clear_feature(CPU_APX_F); 1024 _features.clear_feature(CPU_AVX512_FP16); 1025 _features.clear_feature(CPU_AVX10_1); 1026 _features.clear_feature(CPU_AVX10_2); 1027 } 1028 1029 1030 if (UseAVX < 2) { 1031 _features.clear_feature(CPU_AVX2); 1032 _features.clear_feature(CPU_AVX_IFMA); 1033 } 1034 1035 if (UseAVX < 1) { 1036 _features.clear_feature(CPU_AVX); 1037 _features.clear_feature(CPU_VZEROUPPER); 1038 _features.clear_feature(CPU_F16C); 1039 _features.clear_feature(CPU_SHA512); 1040 } 1041 1042 if (logical_processors_per_package() == 1) { 1043 // HT processor could be installed on a system which doesn't support HT. 1044 _features.clear_feature(CPU_HT); 1045 } 1046 1047 if (is_intel()) { // Intel cpus specific settings 1048 if (is_knights_family()) { 1049 _features.clear_feature(CPU_VZEROUPPER); 1050 _features.clear_feature(CPU_AVX512BW); 1051 _features.clear_feature(CPU_AVX512VL); 1052 _features.clear_feature(CPU_APX_F); 1053 _features.clear_feature(CPU_AVX512DQ); 1054 _features.clear_feature(CPU_AVX512_VNNI); 1055 _features.clear_feature(CPU_AVX512_VAES); 1056 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1057 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1058 _features.clear_feature(CPU_AVX512_VBMI); 1059 _features.clear_feature(CPU_AVX512_VBMI2); 1060 _features.clear_feature(CPU_CLWB); 1061 _features.clear_feature(CPU_FLUSHOPT); 1062 _features.clear_feature(CPU_GFNI); 1063 _features.clear_feature(CPU_AVX512_BITALG); 1064 _features.clear_feature(CPU_AVX512_IFMA); 1065 _features.clear_feature(CPU_AVX_IFMA); 1066 _features.clear_feature(CPU_AVX512_FP16); 1067 _features.clear_feature(CPU_AVX10_1); 1068 _features.clear_feature(CPU_AVX10_2); 1069 } 1070 } 1071 1072 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1073 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1074 if (UseAPX && !apx_supported) { 1075 warning("UseAPX is not supported on this CPU, setting it to false"); 1076 FLAG_SET_DEFAULT(UseAPX, false); 1077 } 1078 1079 if (!UseAPX) { 1080 _features.clear_feature(CPU_APX_F); 1081 } 1082 1083 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1084 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1085 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum); 1086 } else { 1087 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1088 } 1089 1090 assert(supports_clflush(), "Always present"); 1091 if (X86ICacheSync == -1) { 1092 // Auto-detect, choosing the best performant one that still flushes 1093 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. 1094 if (supports_clwb()) { 1095 FLAG_SET_ERGO(X86ICacheSync, 3); 1096 } else if (supports_clflushopt()) { 1097 FLAG_SET_ERGO(X86ICacheSync, 2); 1098 } else { 1099 FLAG_SET_ERGO(X86ICacheSync, 1); 1100 } 1101 } else { 1102 if ((X86ICacheSync == 2) && !supports_clflushopt()) { 1103 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); 1104 } 1105 if ((X86ICacheSync == 3) && !supports_clwb()) { 1106 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); 1107 } 1108 if ((X86ICacheSync == 5) && !supports_serialize()) { 1109 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); 1110 } 1111 } 1112 1113 stringStream ss(2048); 1114 if (supports_hybrid()) { 1115 ss.print("(hybrid)"); 1116 } else { 1117 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core()); 1118 } 1119 ss.print(" family %d model %d stepping %d microcode 0x%x", 1120 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1121 ss.print(", "); 1122 int features_offset = (int)ss.size(); 1123 insert_features_names(_features, ss); 1124 1125 _cpu_info_string = ss.as_string(true); 1126 _features_string = _cpu_info_string + features_offset; 1127 1128 // Use AES instructions if available. 1129 if (supports_aes()) { 1130 if (FLAG_IS_DEFAULT(UseAES)) { 1131 FLAG_SET_DEFAULT(UseAES, true); 1132 } 1133 if (!UseAES) { 1134 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1135 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1136 } 1137 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1138 } else { 1139 if (UseSSE > 2) { 1140 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1141 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1142 } 1143 } else { 1144 // The AES intrinsic stubs require AES instruction support (of course) 1145 // but also require sse3 mode or higher for instructions it use. 1146 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1147 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1148 } 1149 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1150 } 1151 1152 // --AES-CTR begins-- 1153 if (!UseAESIntrinsics) { 1154 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1155 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1156 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1157 } 1158 } else { 1159 if (supports_sse4_1()) { 1160 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1161 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1162 } 1163 } else { 1164 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1165 // but also require sse4.1 mode or higher for instructions it use. 1166 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1167 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1168 } 1169 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1170 } 1171 } 1172 // --AES-CTR ends-- 1173 } 1174 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1175 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1176 warning("AES instructions are not available on this CPU"); 1177 FLAG_SET_DEFAULT(UseAES, false); 1178 } 1179 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1180 warning("AES intrinsics are not available on this CPU"); 1181 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1182 } 1183 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1184 warning("AES-CTR intrinsics are not available on this CPU"); 1185 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1186 } 1187 } 1188 1189 // Use CLMUL instructions if available. 1190 if (supports_clmul()) { 1191 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1192 UseCLMUL = true; 1193 } 1194 } else if (UseCLMUL) { 1195 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1196 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1197 FLAG_SET_DEFAULT(UseCLMUL, false); 1198 } 1199 1200 if (UseCLMUL && (UseSSE > 2)) { 1201 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1202 UseCRC32Intrinsics = true; 1203 } 1204 } else if (UseCRC32Intrinsics) { 1205 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1206 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1207 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1208 } 1209 1210 if (supports_avx2()) { 1211 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1212 UseAdler32Intrinsics = true; 1213 } 1214 } else if (UseAdler32Intrinsics) { 1215 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1216 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1217 } 1218 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1219 } 1220 1221 if (supports_sse4_2() && supports_clmul()) { 1222 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1223 UseCRC32CIntrinsics = true; 1224 } 1225 } else if (UseCRC32CIntrinsics) { 1226 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1227 warning("CRC32C intrinsics are not available on this CPU"); 1228 } 1229 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1230 } 1231 1232 // GHASH/GCM intrinsics 1233 if (UseCLMUL && (UseSSE > 2)) { 1234 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1235 UseGHASHIntrinsics = true; 1236 } 1237 } else if (UseGHASHIntrinsics) { 1238 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1239 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1240 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1241 } 1242 1243 // ChaCha20 Intrinsics 1244 // As long as the system supports AVX as a baseline we can do a 1245 // SIMD-enabled block function. StubGenerator makes the determination 1246 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1247 // version. 1248 if (UseAVX >= 1) { 1249 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1250 UseChaCha20Intrinsics = true; 1251 } 1252 } else if (UseChaCha20Intrinsics) { 1253 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1254 warning("ChaCha20 intrinsic requires AVX instructions"); 1255 } 1256 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1257 } 1258 1259 // Kyber Intrinsics 1260 // Currently we only have them for AVX512 1261 #ifdef _LP64 1262 if (supports_evex() && supports_avx512bw()) { 1263 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) { 1264 UseKyberIntrinsics = true; 1265 } 1266 } else 1267 #endif 1268 if (UseKyberIntrinsics) { 1269 warning("Intrinsics for ML-KEM are not available on this CPU."); 1270 FLAG_SET_DEFAULT(UseKyberIntrinsics, false); 1271 } 1272 1273 // Dilithium Intrinsics 1274 // Currently we only have them for AVX512 1275 if (supports_evex() && supports_avx512bw()) { 1276 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { 1277 UseDilithiumIntrinsics = true; 1278 } 1279 } else if (UseDilithiumIntrinsics) { 1280 warning("Intrinsics for ML-DSA are not available on this CPU."); 1281 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); 1282 } 1283 1284 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1285 if (UseAVX >= 2) { 1286 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1287 UseBASE64Intrinsics = true; 1288 } 1289 } else if (UseBASE64Intrinsics) { 1290 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1291 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1292 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1293 } 1294 1295 if (supports_fma()) { 1296 if (FLAG_IS_DEFAULT(UseFMA)) { 1297 UseFMA = true; 1298 } 1299 } else if (UseFMA) { 1300 warning("FMA instructions are not available on this CPU"); 1301 FLAG_SET_DEFAULT(UseFMA, false); 1302 } 1303 1304 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1305 UseMD5Intrinsics = true; 1306 } 1307 1308 if (supports_sha() || (supports_avx2() && supports_bmi2())) { 1309 if (FLAG_IS_DEFAULT(UseSHA)) { 1310 UseSHA = true; 1311 } 1312 } else if (UseSHA) { 1313 warning("SHA instructions are not available on this CPU"); 1314 FLAG_SET_DEFAULT(UseSHA, false); 1315 } 1316 1317 if (supports_sha() && supports_sse4_1() && UseSHA) { 1318 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1319 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1320 } 1321 } else if (UseSHA1Intrinsics) { 1322 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1323 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1324 } 1325 1326 if (supports_sse4_1() && UseSHA) { 1327 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1328 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1329 } 1330 } else if (UseSHA256Intrinsics) { 1331 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1332 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1333 } 1334 1335 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1336 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1337 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1338 } 1339 } else if (UseSHA512Intrinsics) { 1340 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1341 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1342 } 1343 1344 if (supports_evex() && supports_avx512bw()) { 1345 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1346 UseSHA3Intrinsics = true; 1347 } 1348 } else if (UseSHA3Intrinsics) { 1349 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1350 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1351 } 1352 1353 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1354 FLAG_SET_DEFAULT(UseSHA, false); 1355 } 1356 1357 #if COMPILER2_OR_JVMCI 1358 int max_vector_size = 0; 1359 if (UseAVX == 0 || !os_supports_avx_vectors()) { 1360 // 16 byte vectors (in XMM) are supported with SSE2+ 1361 max_vector_size = 16; 1362 } else if (UseAVX == 1 || UseAVX == 2) { 1363 // 32 bytes vectors (in YMM) are only supported with AVX+ 1364 max_vector_size = 32; 1365 } else if (UseAVX > 2) { 1366 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1367 max_vector_size = 64; 1368 } 1369 1370 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1371 1372 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1373 if (MaxVectorSize < min_vector_size) { 1374 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1375 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1376 } 1377 if (MaxVectorSize > max_vector_size) { 1378 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1379 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1380 } 1381 if (!is_power_of_2(MaxVectorSize)) { 1382 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1383 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1384 } 1385 } else { 1386 // If default, use highest supported configuration 1387 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1388 } 1389 1390 #if defined(COMPILER2) && defined(ASSERT) 1391 if (MaxVectorSize > 0) { 1392 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1393 tty->print_cr("State of YMM registers after signal handle:"); 1394 int nreg = 4; 1395 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1396 for (int i = 0; i < nreg; i++) { 1397 tty->print("YMM%s:", ymm_name[i]); 1398 for (int j = 7; j >=0; j--) { 1399 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1400 } 1401 tty->cr(); 1402 } 1403 } 1404 } 1405 #endif // COMPILER2 && ASSERT 1406 1407 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1408 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1409 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1410 } 1411 } else if (UsePoly1305Intrinsics) { 1412 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1413 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1414 } 1415 1416 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1417 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1418 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1419 } 1420 } else if (UseIntPolyIntrinsics) { 1421 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1422 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1423 } 1424 1425 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1426 UseMultiplyToLenIntrinsic = true; 1427 } 1428 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1429 UseSquareToLenIntrinsic = true; 1430 } 1431 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1432 UseMulAddIntrinsic = true; 1433 } 1434 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1435 UseMontgomeryMultiplyIntrinsic = true; 1436 } 1437 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1438 UseMontgomerySquareIntrinsic = true; 1439 } 1440 #endif // COMPILER2_OR_JVMCI 1441 1442 // On new cpus instructions which update whole XMM register should be used 1443 // to prevent partial register stall due to dependencies on high half. 1444 // 1445 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1446 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1447 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1448 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1449 1450 1451 if (is_zx()) { // ZX cpus specific settings 1452 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1453 UseStoreImmI16 = false; // don't use it on ZX cpus 1454 } 1455 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1456 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1457 // Use it on all ZX cpus 1458 UseAddressNop = true; 1459 } 1460 } 1461 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1462 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1463 } 1464 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1465 if (supports_sse3()) { 1466 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1467 } else { 1468 UseXmmRegToRegMoveAll = false; 1469 } 1470 } 1471 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1472 #ifdef COMPILER2 1473 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1474 // For new ZX cpus do the next optimization: 1475 // don't align the beginning of a loop if there are enough instructions 1476 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1477 // in current fetch line (OptoLoopAlignment) or the padding 1478 // is big (> MaxLoopPad). 1479 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1480 // generated NOP instructions. 11 is the largest size of one 1481 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1482 MaxLoopPad = 11; 1483 } 1484 #endif // COMPILER2 1485 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1486 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1487 } 1488 if (supports_sse4_2()) { // new ZX cpus 1489 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1490 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1491 } 1492 } 1493 } 1494 1495 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1496 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1497 } 1498 } 1499 1500 if (is_amd_family()) { // AMD cpus specific settings 1501 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1502 // Use it on new AMD cpus starting from Opteron. 1503 UseAddressNop = true; 1504 } 1505 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1506 // Use it on new AMD cpus starting from Opteron. 1507 UseNewLongLShift = true; 1508 } 1509 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1510 if (supports_sse4a()) { 1511 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1512 } else { 1513 UseXmmLoadAndClearUpper = false; 1514 } 1515 } 1516 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1517 if (supports_sse4a()) { 1518 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1519 } else { 1520 UseXmmRegToRegMoveAll = false; 1521 } 1522 } 1523 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1524 if (supports_sse4a()) { 1525 UseXmmI2F = true; 1526 } else { 1527 UseXmmI2F = false; 1528 } 1529 } 1530 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1531 if (supports_sse4a()) { 1532 UseXmmI2D = true; 1533 } else { 1534 UseXmmI2D = false; 1535 } 1536 } 1537 1538 // some defaults for AMD family 15h 1539 if (cpu_family() == 0x15) { 1540 // On family 15h processors default is no sw prefetch 1541 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1542 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1543 } 1544 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1545 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1546 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1547 } 1548 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1549 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1550 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1551 } 1552 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1553 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1554 } 1555 } 1556 1557 #ifdef COMPILER2 1558 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1559 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1560 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1561 } 1562 #endif // COMPILER2 1563 1564 // Some defaults for AMD family >= 17h && Hygon family 18h 1565 if (cpu_family() >= 0x17) { 1566 // On family >=17h processors use XMM and UnalignedLoadStores 1567 // for Array Copy 1568 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1569 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1570 } 1571 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1572 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1573 } 1574 #ifdef COMPILER2 1575 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1576 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1577 } 1578 #endif 1579 } 1580 } 1581 1582 if (is_intel()) { // Intel cpus specific settings 1583 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1584 UseStoreImmI16 = false; // don't use it on Intel cpus 1585 } 1586 if (is_intel_server_family() || cpu_family() == 15) { 1587 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1588 // Use it on all Intel cpus starting from PentiumPro 1589 UseAddressNop = true; 1590 } 1591 } 1592 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1593 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1594 } 1595 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1596 if (supports_sse3()) { 1597 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1598 } else { 1599 UseXmmRegToRegMoveAll = false; 1600 } 1601 } 1602 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus 1603 #ifdef COMPILER2 1604 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1605 // For new Intel cpus do the next optimization: 1606 // don't align the beginning of a loop if there are enough instructions 1607 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1608 // in current fetch line (OptoLoopAlignment) or the padding 1609 // is big (> MaxLoopPad). 1610 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1611 // generated NOP instructions. 11 is the largest size of one 1612 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1613 MaxLoopPad = 11; 1614 } 1615 #endif // COMPILER2 1616 1617 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1618 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1619 } 1620 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1621 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1622 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1623 } 1624 } 1625 } 1626 if (is_atom_family() || is_knights_family()) { 1627 #ifdef COMPILER2 1628 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1629 OptoScheduling = true; 1630 } 1631 #endif 1632 if (supports_sse4_2()) { // Silvermont 1633 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1634 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1635 } 1636 } 1637 if (FLAG_IS_DEFAULT(UseIncDec)) { 1638 FLAG_SET_DEFAULT(UseIncDec, false); 1639 } 1640 } 1641 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1642 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1643 } 1644 #ifdef COMPILER2 1645 if (UseAVX > 2) { 1646 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1647 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1648 ArrayOperationPartialInlineSize != 0 && 1649 ArrayOperationPartialInlineSize != 16 && 1650 ArrayOperationPartialInlineSize != 32 && 1651 ArrayOperationPartialInlineSize != 64)) { 1652 int inline_size = 0; 1653 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1654 inline_size = 64; 1655 } else if (MaxVectorSize >= 32) { 1656 inline_size = 32; 1657 } else if (MaxVectorSize >= 16) { 1658 inline_size = 16; 1659 } 1660 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1661 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1662 } 1663 ArrayOperationPartialInlineSize = inline_size; 1664 } 1665 1666 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1667 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1668 if (ArrayOperationPartialInlineSize) { 1669 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1670 } else { 1671 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1672 } 1673 } 1674 } 1675 #endif 1676 } 1677 1678 #ifdef COMPILER2 1679 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1680 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) { 1681 OptimizeFill = false; 1682 } 1683 } 1684 #endif 1685 if (supports_sse4_2()) { 1686 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1687 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1688 } 1689 } else { 1690 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1691 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1692 } 1693 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1694 } 1695 if (UseSSE42Intrinsics) { 1696 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1697 UseVectorizedMismatchIntrinsic = true; 1698 } 1699 } else if (UseVectorizedMismatchIntrinsic) { 1700 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1701 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1702 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1703 } 1704 if (UseAVX >= 2) { 1705 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1706 } else if (UseVectorizedHashCodeIntrinsic) { 1707 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1708 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1709 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1710 } 1711 1712 // Use count leading zeros count instruction if available. 1713 if (supports_lzcnt()) { 1714 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1715 UseCountLeadingZerosInstruction = true; 1716 } 1717 } else if (UseCountLeadingZerosInstruction) { 1718 warning("lzcnt instruction is not available on this CPU"); 1719 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1720 } 1721 1722 // Use count trailing zeros instruction if available 1723 if (supports_bmi1()) { 1724 // tzcnt does not require VEX prefix 1725 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1726 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1727 // Don't use tzcnt if BMI1 is switched off on command line. 1728 UseCountTrailingZerosInstruction = false; 1729 } else { 1730 UseCountTrailingZerosInstruction = true; 1731 } 1732 } 1733 } else if (UseCountTrailingZerosInstruction) { 1734 warning("tzcnt instruction is not available on this CPU"); 1735 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1736 } 1737 1738 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1739 // VEX prefix is generated only when AVX > 0. 1740 if (supports_bmi1() && supports_avx()) { 1741 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1742 UseBMI1Instructions = true; 1743 } 1744 } else if (UseBMI1Instructions) { 1745 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1746 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1747 } 1748 1749 if (supports_bmi2() && supports_avx()) { 1750 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1751 UseBMI2Instructions = true; 1752 } 1753 } else if (UseBMI2Instructions) { 1754 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1755 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1756 } 1757 1758 // Use population count instruction if available. 1759 if (supports_popcnt()) { 1760 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1761 UsePopCountInstruction = true; 1762 } 1763 } else if (UsePopCountInstruction) { 1764 warning("POPCNT instruction is not available on this CPU"); 1765 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1766 } 1767 1768 // Use fast-string operations if available. 1769 if (supports_erms()) { 1770 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1771 UseFastStosb = true; 1772 } 1773 } else if (UseFastStosb) { 1774 warning("fast-string operations are not available on this CPU"); 1775 FLAG_SET_DEFAULT(UseFastStosb, false); 1776 } 1777 1778 // For AMD Processors use XMM/YMM MOVDQU instructions 1779 // for Object Initialization as default 1780 if (is_amd() && cpu_family() >= 0x19) { 1781 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1782 UseFastStosb = false; 1783 } 1784 } 1785 1786 #ifdef COMPILER2 1787 if (is_intel() && MaxVectorSize > 16) { 1788 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1789 UseFastStosb = false; 1790 } 1791 } 1792 #endif 1793 1794 // Use XMM/YMM MOVDQU instruction for Object Initialization 1795 if (!UseFastStosb && UseUnalignedLoadStores) { 1796 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1797 UseXMMForObjInit = true; 1798 } 1799 } else if (UseXMMForObjInit) { 1800 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1801 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1802 } 1803 1804 #ifdef COMPILER2 1805 if (FLAG_IS_DEFAULT(AlignVector)) { 1806 // Modern processors allow misaligned memory operations for vectors. 1807 AlignVector = !UseUnalignedLoadStores; 1808 } 1809 #endif // COMPILER2 1810 1811 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1812 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1813 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1814 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1815 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1816 } 1817 } 1818 1819 // Allocation prefetch settings 1820 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1821 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1822 (cache_line_size > AllocatePrefetchStepSize)) { 1823 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1824 } 1825 1826 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1827 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1828 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1829 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1830 } 1831 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1832 } 1833 1834 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1835 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1836 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1837 } 1838 1839 if (is_intel() && is_intel_server_family() && supports_sse3()) { 1840 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1841 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1842 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1843 } 1844 #ifdef COMPILER2 1845 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1846 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1847 } 1848 #endif 1849 } 1850 1851 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1852 #ifdef COMPILER2 1853 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1854 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1855 } 1856 #endif 1857 } 1858 1859 // Prefetch settings 1860 1861 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1862 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1863 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1864 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1865 1866 // gc copy/scan is disabled if prefetchw isn't supported, because 1867 // Prefetch::write emits an inlined prefetchw on Linux. 1868 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1869 // The used prefetcht0 instruction works for both amd64 and em64t. 1870 1871 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1872 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1873 } 1874 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1875 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1876 } 1877 1878 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1879 (cache_line_size > ContendedPaddingWidth)) 1880 ContendedPaddingWidth = cache_line_size; 1881 1882 // This machine allows unaligned memory accesses 1883 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1884 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1885 } 1886 1887 #ifndef PRODUCT 1888 if (log_is_enabled(Info, os, cpu)) { 1889 LogStream ls(Log(os, cpu)::info()); 1890 outputStream* log = &ls; 1891 log->print_cr("Logical CPUs per core: %u", 1892 logical_processors_per_package()); 1893 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1894 log->print("UseSSE=%d", UseSSE); 1895 if (UseAVX > 0) { 1896 log->print(" UseAVX=%d", UseAVX); 1897 } 1898 if (UseAES) { 1899 log->print(" UseAES=1"); 1900 } 1901 #ifdef COMPILER2 1902 if (MaxVectorSize > 0) { 1903 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1904 } 1905 #endif 1906 log->cr(); 1907 log->print("Allocation"); 1908 if (AllocatePrefetchStyle <= 0) { 1909 log->print_cr(": no prefetching"); 1910 } else { 1911 log->print(" prefetching: "); 1912 if (AllocatePrefetchInstr == 0) { 1913 log->print("PREFETCHNTA"); 1914 } else if (AllocatePrefetchInstr == 1) { 1915 log->print("PREFETCHT0"); 1916 } else if (AllocatePrefetchInstr == 2) { 1917 log->print("PREFETCHT2"); 1918 } else if (AllocatePrefetchInstr == 3) { 1919 log->print("PREFETCHW"); 1920 } 1921 if (AllocatePrefetchLines > 1) { 1922 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1923 } else { 1924 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1925 } 1926 } 1927 1928 if (PrefetchCopyIntervalInBytes > 0) { 1929 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1930 } 1931 if (PrefetchScanIntervalInBytes > 0) { 1932 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1933 } 1934 if (ContendedPaddingWidth > 0) { 1935 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1936 } 1937 } 1938 #endif // !PRODUCT 1939 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1940 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1941 } 1942 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1943 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1944 } 1945 } 1946 1947 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1948 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1949 if (vrt == XenHVM) { 1950 st->print_cr("Xen hardware-assisted virtualization detected"); 1951 } else if (vrt == KVM) { 1952 st->print_cr("KVM virtualization detected"); 1953 } else if (vrt == VMWare) { 1954 st->print_cr("VMWare virtualization detected"); 1955 VirtualizationSupport::print_virtualization_info(st); 1956 } else if (vrt == HyperV) { 1957 st->print_cr("Hyper-V virtualization detected"); 1958 } else if (vrt == HyperVRole) { 1959 st->print_cr("Hyper-V role detected"); 1960 } 1961 } 1962 1963 bool VM_Version::compute_has_intel_jcc_erratum() { 1964 if (!is_intel_family_core()) { 1965 // Only Intel CPUs are affected. 1966 return false; 1967 } 1968 // The following table of affected CPUs is based on the following document released by Intel: 1969 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1970 switch (_model) { 1971 case 0x8E: 1972 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1973 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1974 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1975 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1976 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1977 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1978 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1979 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1980 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1981 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1982 case 0x4E: 1983 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1984 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1985 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1986 return _stepping == 0x3; 1987 case 0x55: 1988 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1989 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1990 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1991 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1992 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1993 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1994 return _stepping == 0x4 || _stepping == 0x7; 1995 case 0x5E: 1996 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1997 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1998 return _stepping == 0x3; 1999 case 0x9E: 2000 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2001 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2002 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2003 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2004 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2005 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2006 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2007 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2008 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2009 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2010 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2011 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2012 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2013 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2014 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2015 case 0xA5: 2016 // Not in Intel documentation. 2017 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2018 return true; 2019 case 0xA6: 2020 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2021 return _stepping == 0x0; 2022 case 0xAE: 2023 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2024 return _stepping == 0xA; 2025 default: 2026 // If we are running on another intel machine not recognized in the table, we are okay. 2027 return false; 2028 } 2029 } 2030 2031 // On Xen, the cpuid instruction returns 2032 // eax / registers[0]: Version of Xen 2033 // ebx / registers[1]: chars 'XenV' 2034 // ecx / registers[2]: chars 'MMXe' 2035 // edx / registers[3]: chars 'nVMM' 2036 // 2037 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2038 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2039 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2040 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2041 // 2042 // more information : 2043 // https://kb.vmware.com/s/article/1009458 2044 // 2045 void VM_Version::check_virtualizations() { 2046 uint32_t registers[4] = {0}; 2047 char signature[13] = {0}; 2048 2049 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2050 // from 0x40000000 until 0x40010000. 2051 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2052 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2053 detect_virt_stub(leaf, registers); 2054 memcpy(signature, ®isters[1], 12); 2055 2056 if (strncmp("VMwareVMware", signature, 12) == 0) { 2057 Abstract_VM_Version::_detected_virtualization = VMWare; 2058 // check for extended metrics from guestlib 2059 VirtualizationSupport::initialize(); 2060 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2061 Abstract_VM_Version::_detected_virtualization = HyperV; 2062 #ifdef _WINDOWS 2063 // CPUID leaf 0x40000007 is available to the root partition only. 2064 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2065 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2066 detect_virt_stub(0x40000007, registers); 2067 if ((registers[0] != 0x0) || 2068 (registers[1] != 0x0) || 2069 (registers[2] != 0x0) || 2070 (registers[3] != 0x0)) { 2071 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2072 } 2073 #endif 2074 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2075 Abstract_VM_Version::_detected_virtualization = KVM; 2076 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2077 Abstract_VM_Version::_detected_virtualization = XenHVM; 2078 } 2079 } 2080 } 2081 2082 #ifdef COMPILER2 2083 // Determine if it's running on Cascade Lake using default options. 2084 bool VM_Version::is_default_intel_cascade_lake() { 2085 return FLAG_IS_DEFAULT(UseAVX) && 2086 FLAG_IS_DEFAULT(MaxVectorSize) && 2087 UseAVX > 2 && 2088 is_intel_cascade_lake(); 2089 } 2090 #endif 2091 2092 bool VM_Version::is_intel_cascade_lake() { 2093 return is_intel_skylake() && _stepping >= 5; 2094 } 2095 2096 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2097 // for implementing the array copy and clear operations. 2098 // The Intel platforms that supports the serialize instruction 2099 // has improved implementation of 64-byte load/stores and so the default 2100 // threshold is set to 0 for these platforms. 2101 int VM_Version::avx3_threshold() { 2102 return (is_intel_server_family() && 2103 supports_serialize() && 2104 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2105 } 2106 2107 void VM_Version::clear_apx_test_state() { 2108 clear_apx_test_state_stub(); 2109 } 2110 2111 static bool _vm_version_initialized = false; 2112 2113 void VM_Version::initialize() { 2114 ResourceMark rm; 2115 2116 // Making this stub must be FIRST use of assembler 2117 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2118 if (stub_blob == nullptr) { 2119 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2120 } 2121 CodeBuffer c(stub_blob); 2122 VM_Version_StubGenerator g(&c); 2123 2124 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2125 g.generate_get_cpu_info()); 2126 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2127 g.generate_detect_virt()); 2128 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2129 g.clear_apx_test_state()); 2130 get_processor_features(); 2131 2132 Assembler::precompute_instructions(); 2133 2134 if (VM_Version::supports_hv()) { // Supports hypervisor 2135 check_virtualizations(); 2136 } 2137 _vm_version_initialized = true; 2138 } 2139 2140 typedef enum { 2141 CPU_FAMILY_8086_8088 = 0, 2142 CPU_FAMILY_INTEL_286 = 2, 2143 CPU_FAMILY_INTEL_386 = 3, 2144 CPU_FAMILY_INTEL_486 = 4, 2145 CPU_FAMILY_PENTIUM = 5, 2146 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2147 CPU_FAMILY_PENTIUM_4 = 0xF 2148 } FamilyFlag; 2149 2150 typedef enum { 2151 RDTSCP_FLAG = 0x08000000, // bit 27 2152 INTEL64_FLAG = 0x20000000 // bit 29 2153 } _featureExtendedEdxFlag; 2154 2155 typedef enum { 2156 FPU_FLAG = 0x00000001, 2157 VME_FLAG = 0x00000002, 2158 DE_FLAG = 0x00000004, 2159 PSE_FLAG = 0x00000008, 2160 TSC_FLAG = 0x00000010, 2161 MSR_FLAG = 0x00000020, 2162 PAE_FLAG = 0x00000040, 2163 MCE_FLAG = 0x00000080, 2164 CX8_FLAG = 0x00000100, 2165 APIC_FLAG = 0x00000200, 2166 SEP_FLAG = 0x00000800, 2167 MTRR_FLAG = 0x00001000, 2168 PGE_FLAG = 0x00002000, 2169 MCA_FLAG = 0x00004000, 2170 CMOV_FLAG = 0x00008000, 2171 PAT_FLAG = 0x00010000, 2172 PSE36_FLAG = 0x00020000, 2173 PSNUM_FLAG = 0x00040000, 2174 CLFLUSH_FLAG = 0x00080000, 2175 DTS_FLAG = 0x00200000, 2176 ACPI_FLAG = 0x00400000, 2177 MMX_FLAG = 0x00800000, 2178 FXSR_FLAG = 0x01000000, 2179 SSE_FLAG = 0x02000000, 2180 SSE2_FLAG = 0x04000000, 2181 SS_FLAG = 0x08000000, 2182 HTT_FLAG = 0x10000000, 2183 TM_FLAG = 0x20000000 2184 } FeatureEdxFlag; 2185 2186 static BufferBlob* cpuid_brand_string_stub_blob; 2187 static const int cpuid_brand_string_stub_size = 550; 2188 2189 extern "C" { 2190 typedef void (*getCPUIDBrandString_stub_t)(void*); 2191 } 2192 2193 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2194 2195 // VM_Version statics 2196 enum { 2197 ExtendedFamilyIdLength_INTEL = 16, 2198 ExtendedFamilyIdLength_AMD = 24 2199 }; 2200 2201 const size_t VENDOR_LENGTH = 13; 2202 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2203 static char* _cpu_brand_string = nullptr; 2204 static int64_t _max_qualified_cpu_frequency = 0; 2205 2206 static int _no_of_threads = 0; 2207 static int _no_of_cores = 0; 2208 2209 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2210 "8086/8088", 2211 "", 2212 "286", 2213 "386", 2214 "486", 2215 "Pentium", 2216 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2217 "", 2218 "", 2219 "", 2220 "", 2221 "", 2222 "", 2223 "", 2224 "", 2225 "Pentium 4" 2226 }; 2227 2228 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2229 "", 2230 "", 2231 "", 2232 "", 2233 "5x86", 2234 "K5/K6", 2235 "Athlon/AthlonXP", 2236 "", 2237 "", 2238 "", 2239 "", 2240 "", 2241 "", 2242 "", 2243 "", 2244 "Opteron/Athlon64", 2245 "Opteron QC/Phenom", // Barcelona et.al. 2246 "", 2247 "", 2248 "", 2249 "", 2250 "", 2251 "", 2252 "Zen" 2253 }; 2254 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2255 // September 2013, Vol 3C Table 35-1 2256 const char* const _model_id_pentium_pro[] = { 2257 "", 2258 "Pentium Pro", 2259 "", 2260 "Pentium II model 3", 2261 "", 2262 "Pentium II model 5/Xeon/Celeron", 2263 "Celeron", 2264 "Pentium III/Pentium III Xeon", 2265 "Pentium III/Pentium III Xeon", 2266 "Pentium M model 9", // Yonah 2267 "Pentium III, model A", 2268 "Pentium III, model B", 2269 "", 2270 "Pentium M model D", // Dothan 2271 "", 2272 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2273 "", 2274 "", 2275 "", 2276 "", 2277 "", 2278 "", 2279 "Celeron", // 0x16 Celeron 65nm 2280 "Core 2", // 0x17 Penryn / Harpertown 2281 "", 2282 "", 2283 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2284 "Atom", // 0x1B Z5xx series Silverthorn 2285 "", 2286 "Core 2", // 0x1D Dunnington (6-core) 2287 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2288 "", 2289 "", 2290 "", 2291 "", 2292 "", 2293 "", 2294 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2295 "", 2296 "", 2297 "", // 0x28 2298 "", 2299 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2300 "", 2301 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2302 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2303 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2304 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2305 "", 2306 "", 2307 "", 2308 "", 2309 "", 2310 "", 2311 "", 2312 "", 2313 "", 2314 "", 2315 "Ivy Bridge", // 0x3a 2316 "", 2317 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2318 "", // 0x3d "Next Generation Intel Core Processor" 2319 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2320 "", // 0x3f "Future Generation Intel Xeon Processor" 2321 "", 2322 "", 2323 "", 2324 "", 2325 "", 2326 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2327 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2328 nullptr 2329 }; 2330 2331 /* Brand ID is for back compatibility 2332 * Newer CPUs uses the extended brand string */ 2333 const char* const _brand_id[] = { 2334 "", 2335 "Celeron processor", 2336 "Pentium III processor", 2337 "Intel Pentium III Xeon processor", 2338 "", 2339 "", 2340 "", 2341 "", 2342 "Intel Pentium 4 processor", 2343 nullptr 2344 }; 2345 2346 2347 const char* const _feature_edx_id[] = { 2348 "On-Chip FPU", 2349 "Virtual Mode Extensions", 2350 "Debugging Extensions", 2351 "Page Size Extensions", 2352 "Time Stamp Counter", 2353 "Model Specific Registers", 2354 "Physical Address Extension", 2355 "Machine Check Exceptions", 2356 "CMPXCHG8B Instruction", 2357 "On-Chip APIC", 2358 "", 2359 "Fast System Call", 2360 "Memory Type Range Registers", 2361 "Page Global Enable", 2362 "Machine Check Architecture", 2363 "Conditional Mov Instruction", 2364 "Page Attribute Table", 2365 "36-bit Page Size Extension", 2366 "Processor Serial Number", 2367 "CLFLUSH Instruction", 2368 "", 2369 "Debug Trace Store feature", 2370 "ACPI registers in MSR space", 2371 "Intel Architecture MMX Technology", 2372 "Fast Float Point Save and Restore", 2373 "Streaming SIMD extensions", 2374 "Streaming SIMD extensions 2", 2375 "Self-Snoop", 2376 "Hyper Threading", 2377 "Thermal Monitor", 2378 "", 2379 "Pending Break Enable" 2380 }; 2381 2382 const char* const _feature_extended_edx_id[] = { 2383 "", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "", 2389 "", 2390 "", 2391 "", 2392 "", 2393 "", 2394 "SYSCALL/SYSRET", 2395 "", 2396 "", 2397 "", 2398 "", 2399 "", 2400 "", 2401 "", 2402 "", 2403 "Execute Disable Bit", 2404 "", 2405 "", 2406 "", 2407 "", 2408 "", 2409 "", 2410 "RDTSCP", 2411 "", 2412 "Intel 64 Architecture", 2413 "", 2414 "" 2415 }; 2416 2417 const char* const _feature_ecx_id[] = { 2418 "Streaming SIMD Extensions 3", 2419 "PCLMULQDQ", 2420 "64-bit DS Area", 2421 "MONITOR/MWAIT instructions", 2422 "CPL Qualified Debug Store", 2423 "Virtual Machine Extensions", 2424 "Safer Mode Extensions", 2425 "Enhanced Intel SpeedStep technology", 2426 "Thermal Monitor 2", 2427 "Supplemental Streaming SIMD Extensions 3", 2428 "L1 Context ID", 2429 "", 2430 "Fused Multiply-Add", 2431 "CMPXCHG16B", 2432 "xTPR Update Control", 2433 "Perfmon and Debug Capability", 2434 "", 2435 "Process-context identifiers", 2436 "Direct Cache Access", 2437 "Streaming SIMD extensions 4.1", 2438 "Streaming SIMD extensions 4.2", 2439 "x2APIC", 2440 "MOVBE", 2441 "Popcount instruction", 2442 "TSC-Deadline", 2443 "AESNI", 2444 "XSAVE", 2445 "OSXSAVE", 2446 "AVX", 2447 "F16C", 2448 "RDRAND", 2449 "" 2450 }; 2451 2452 const char* const _feature_extended_ecx_id[] = { 2453 "LAHF/SAHF instruction support", 2454 "Core multi-processor legacy mode", 2455 "", 2456 "", 2457 "", 2458 "Advanced Bit Manipulations: LZCNT", 2459 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2460 "Misaligned SSE mode", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "", 2475 "", 2476 "", 2477 "", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "", 2484 "" 2485 }; 2486 2487 void VM_Version::initialize_tsc(void) { 2488 ResourceMark rm; 2489 2490 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2491 if (cpuid_brand_string_stub_blob == nullptr) { 2492 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2493 } 2494 CodeBuffer c(cpuid_brand_string_stub_blob); 2495 VM_Version_StubGenerator g(&c); 2496 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2497 g.generate_getCPUIDBrandString()); 2498 } 2499 2500 const char* VM_Version::cpu_model_description(void) { 2501 uint32_t cpu_family = extended_cpu_family(); 2502 uint32_t cpu_model = extended_cpu_model(); 2503 const char* model = nullptr; 2504 2505 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2506 for (uint32_t i = 0; i <= cpu_model; i++) { 2507 model = _model_id_pentium_pro[i]; 2508 if (model == nullptr) { 2509 break; 2510 } 2511 } 2512 } 2513 return model; 2514 } 2515 2516 const char* VM_Version::cpu_brand_string(void) { 2517 if (_cpu_brand_string == nullptr) { 2518 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2519 if (nullptr == _cpu_brand_string) { 2520 return nullptr; 2521 } 2522 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2523 if (ret_val != OS_OK) { 2524 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2525 _cpu_brand_string = nullptr; 2526 } 2527 } 2528 return _cpu_brand_string; 2529 } 2530 2531 const char* VM_Version::cpu_brand(void) { 2532 const char* brand = nullptr; 2533 2534 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2535 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2536 brand = _brand_id[0]; 2537 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2538 brand = _brand_id[i]; 2539 } 2540 } 2541 return brand; 2542 } 2543 2544 bool VM_Version::cpu_is_em64t(void) { 2545 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2546 } 2547 2548 bool VM_Version::is_netburst(void) { 2549 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2550 } 2551 2552 bool VM_Version::supports_tscinv_ext(void) { 2553 if (!supports_tscinv_bit()) { 2554 return false; 2555 } 2556 2557 if (is_intel()) { 2558 return true; 2559 } 2560 2561 if (is_amd()) { 2562 return !is_amd_Barcelona(); 2563 } 2564 2565 if (is_hygon()) { 2566 return true; 2567 } 2568 2569 return false; 2570 } 2571 2572 void VM_Version::resolve_cpu_information_details(void) { 2573 2574 // in future we want to base this information on proper cpu 2575 // and cache topology enumeration such as: 2576 // Intel 64 Architecture Processor Topology Enumeration 2577 // which supports system cpu and cache topology enumeration 2578 // either using 2xAPICIDs or initial APICIDs 2579 2580 // currently only rough cpu information estimates 2581 // which will not necessarily reflect the exact configuration of the system 2582 2583 // this is the number of logical hardware threads 2584 // visible to the operating system 2585 _no_of_threads = os::processor_count(); 2586 2587 // find out number of threads per cpu package 2588 int threads_per_package = threads_per_core() * cores_per_cpu(); 2589 2590 // use amount of threads visible to the process in order to guess number of sockets 2591 _no_of_sockets = _no_of_threads / threads_per_package; 2592 2593 // process might only see a subset of the total number of threads 2594 // from a single processor package. Virtualization/resource management for example. 2595 // If so then just write a hard 1 as num of pkgs. 2596 if (0 == _no_of_sockets) { 2597 _no_of_sockets = 1; 2598 } 2599 2600 // estimate the number of cores 2601 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2602 } 2603 2604 2605 const char* VM_Version::cpu_family_description(void) { 2606 int cpu_family_id = extended_cpu_family(); 2607 if (is_amd()) { 2608 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2609 return _family_id_amd[cpu_family_id]; 2610 } 2611 } 2612 if (is_intel()) { 2613 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2614 return cpu_model_description(); 2615 } 2616 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2617 return _family_id_intel[cpu_family_id]; 2618 } 2619 } 2620 if (is_hygon()) { 2621 return "Dhyana"; 2622 } 2623 return "Unknown x86"; 2624 } 2625 2626 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2627 assert(buf != nullptr, "buffer is null!"); 2628 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2629 2630 const char* cpu_type = nullptr; 2631 const char* x64 = nullptr; 2632 2633 if (is_intel()) { 2634 cpu_type = "Intel"; 2635 x64 = cpu_is_em64t() ? " Intel64" : ""; 2636 } else if (is_amd()) { 2637 cpu_type = "AMD"; 2638 x64 = cpu_is_em64t() ? " AMD64" : ""; 2639 } else if (is_hygon()) { 2640 cpu_type = "Hygon"; 2641 x64 = cpu_is_em64t() ? " AMD64" : ""; 2642 } else { 2643 cpu_type = "Unknown x86"; 2644 x64 = cpu_is_em64t() ? " x86_64" : ""; 2645 } 2646 2647 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2648 cpu_type, 2649 cpu_family_description(), 2650 supports_ht() ? " (HT)" : "", 2651 supports_sse3() ? " SSE3" : "", 2652 supports_ssse3() ? " SSSE3" : "", 2653 supports_sse4_1() ? " SSE4.1" : "", 2654 supports_sse4_2() ? " SSE4.2" : "", 2655 supports_sse4a() ? " SSE4A" : "", 2656 is_netburst() ? " Netburst" : "", 2657 is_intel_family_core() ? " Core" : "", 2658 x64); 2659 2660 return OS_OK; 2661 } 2662 2663 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2664 assert(buf != nullptr, "buffer is null!"); 2665 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2666 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2667 2668 // invoke newly generated asm code to fetch CPU Brand String 2669 getCPUIDBrandString_stub(&_cpuid_info); 2670 2671 // fetch results into buffer 2672 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2673 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2674 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2675 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2676 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2677 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2678 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2679 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2680 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2681 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2682 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2683 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2684 2685 return OS_OK; 2686 } 2687 2688 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2689 guarantee(buf != nullptr, "buffer is null!"); 2690 guarantee(buf_len > 0, "buffer len not enough!"); 2691 2692 unsigned int flag = 0; 2693 unsigned int fi = 0; 2694 size_t written = 0; 2695 const char* prefix = ""; 2696 2697 #define WRITE_TO_BUF(string) \ 2698 { \ 2699 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2700 if (res < 0) { \ 2701 return buf_len - 1; \ 2702 } \ 2703 written += res; \ 2704 if (prefix[0] == '\0') { \ 2705 prefix = ", "; \ 2706 } \ 2707 } 2708 2709 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2710 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2711 continue; /* no hyperthreading */ 2712 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2713 continue; /* no fast system call */ 2714 } 2715 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2716 WRITE_TO_BUF(_feature_edx_id[fi]); 2717 } 2718 } 2719 2720 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2721 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2722 WRITE_TO_BUF(_feature_ecx_id[fi]); 2723 } 2724 } 2725 2726 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2727 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2728 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2729 } 2730 } 2731 2732 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2733 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2734 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2735 } 2736 } 2737 2738 if (supports_tscinv_bit()) { 2739 WRITE_TO_BUF("Invariant TSC"); 2740 } 2741 2742 return written; 2743 } 2744 2745 /** 2746 * Write a detailed description of the cpu to a given buffer, including 2747 * feature set. 2748 */ 2749 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2750 assert(buf != nullptr, "buffer is null!"); 2751 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2752 2753 static const char* unknown = "<unknown>"; 2754 char vendor_id[VENDOR_LENGTH]; 2755 const char* family = nullptr; 2756 const char* model = nullptr; 2757 const char* brand = nullptr; 2758 int outputLen = 0; 2759 2760 family = cpu_family_description(); 2761 if (family == nullptr) { 2762 family = unknown; 2763 } 2764 2765 model = cpu_model_description(); 2766 if (model == nullptr) { 2767 model = unknown; 2768 } 2769 2770 brand = cpu_brand_string(); 2771 2772 if (brand == nullptr) { 2773 brand = cpu_brand(); 2774 if (brand == nullptr) { 2775 brand = unknown; 2776 } 2777 } 2778 2779 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2780 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2781 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2782 vendor_id[VENDOR_LENGTH-1] = '\0'; 2783 2784 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2785 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2786 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2787 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2788 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2789 "Supports: ", 2790 brand, 2791 vendor_id, 2792 family, 2793 extended_cpu_family(), 2794 model, 2795 extended_cpu_model(), 2796 cpu_stepping(), 2797 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2798 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2799 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2800 _cpuid_info.std_cpuid1_eax.value, 2801 _cpuid_info.std_cpuid1_ebx.value, 2802 _cpuid_info.std_cpuid1_ecx.value, 2803 _cpuid_info.std_cpuid1_edx.value, 2804 _cpuid_info.ext_cpuid1_eax, 2805 _cpuid_info.ext_cpuid1_ebx, 2806 _cpuid_info.ext_cpuid1_ecx, 2807 _cpuid_info.ext_cpuid1_edx); 2808 2809 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2810 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2811 return OS_ERR; 2812 } 2813 2814 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2815 2816 return OS_OK; 2817 } 2818 2819 2820 // Fill in Abstract_VM_Version statics 2821 void VM_Version::initialize_cpu_information() { 2822 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2823 assert(!_initialized, "shouldn't be initialized yet"); 2824 resolve_cpu_information_details(); 2825 2826 // initialize cpu_name and cpu_desc 2827 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2828 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2829 _initialized = true; 2830 } 2831 2832 /** 2833 * For information about extracting the frequency from the cpu brand string, please see: 2834 * 2835 * Intel Processor Identification and the CPUID Instruction 2836 * Application Note 485 2837 * May 2012 2838 * 2839 * The return value is the frequency in Hz. 2840 */ 2841 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2842 const char* const brand_string = cpu_brand_string(); 2843 if (brand_string == nullptr) { 2844 return 0; 2845 } 2846 const int64_t MEGA = 1000000; 2847 int64_t multiplier = 0; 2848 int64_t frequency = 0; 2849 uint8_t idx = 0; 2850 // The brand string buffer is at most 48 bytes. 2851 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2852 for (; idx < 48-2; ++idx) { 2853 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2854 // Search brand string for "yHz" where y is M, G, or T. 2855 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2856 if (brand_string[idx] == 'M') { 2857 multiplier = MEGA; 2858 } else if (brand_string[idx] == 'G') { 2859 multiplier = MEGA * 1000; 2860 } else if (brand_string[idx] == 'T') { 2861 multiplier = MEGA * MEGA; 2862 } 2863 break; 2864 } 2865 } 2866 if (multiplier > 0) { 2867 // Compute frequency (in Hz) from brand string. 2868 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2869 frequency = (brand_string[idx-4] - '0') * multiplier; 2870 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2871 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2872 } else { // format is "xxxx" 2873 frequency = (brand_string[idx-4] - '0') * 1000; 2874 frequency += (brand_string[idx-3] - '0') * 100; 2875 frequency += (brand_string[idx-2] - '0') * 10; 2876 frequency += (brand_string[idx-1] - '0'); 2877 frequency *= multiplier; 2878 } 2879 } 2880 return frequency; 2881 } 2882 2883 2884 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2885 if (_max_qualified_cpu_frequency == 0) { 2886 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2887 } 2888 return _max_qualified_cpu_frequency; 2889 } 2890 2891 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const { 2892 VM_Features vm_features; 2893 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2894 vm_features.set_feature(CPU_CX8); 2895 if (std_cpuid1_edx.bits.cmov != 0) 2896 vm_features.set_feature(CPU_CMOV); 2897 if (std_cpuid1_edx.bits.clflush != 0) 2898 vm_features.set_feature(CPU_FLUSH); 2899 // clflush should always be available on x86_64 2900 // if not we are in real trouble because we rely on it 2901 // to flush the code cache. 2902 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available"); 2903 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2904 ext_cpuid1_edx.bits.fxsr != 0)) 2905 vm_features.set_feature(CPU_FXSR); 2906 // HT flag is set for multi-core processors also. 2907 if (threads_per_core() > 1) 2908 vm_features.set_feature(CPU_HT); 2909 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2910 ext_cpuid1_edx.bits.mmx != 0)) 2911 vm_features.set_feature(CPU_MMX); 2912 if (std_cpuid1_edx.bits.sse != 0) 2913 vm_features.set_feature(CPU_SSE); 2914 if (std_cpuid1_edx.bits.sse2 != 0) 2915 vm_features.set_feature(CPU_SSE2); 2916 if (std_cpuid1_ecx.bits.sse3 != 0) 2917 vm_features.set_feature(CPU_SSE3); 2918 if (std_cpuid1_ecx.bits.ssse3 != 0) 2919 vm_features.set_feature(CPU_SSSE3); 2920 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2921 vm_features.set_feature(CPU_SSE4_1); 2922 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2923 vm_features.set_feature(CPU_SSE4_2); 2924 if (std_cpuid1_ecx.bits.popcnt != 0) 2925 vm_features.set_feature(CPU_POPCNT); 2926 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 2927 xem_xcr0_eax.bits.apx_f != 0 && 2928 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) { 2929 vm_features.set_feature(CPU_APX_F); 2930 } 2931 if (std_cpuid1_ecx.bits.avx != 0 && 2932 std_cpuid1_ecx.bits.osxsave != 0 && 2933 xem_xcr0_eax.bits.sse != 0 && 2934 xem_xcr0_eax.bits.ymm != 0) { 2935 vm_features.set_feature(CPU_AVX); 2936 vm_features.set_feature(CPU_VZEROUPPER); 2937 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 2938 vm_features.set_feature(CPU_SHA512); 2939 if (std_cpuid1_ecx.bits.f16c != 0) 2940 vm_features.set_feature(CPU_F16C); 2941 if (sef_cpuid7_ebx.bits.avx2 != 0) { 2942 vm_features.set_feature(CPU_AVX2); 2943 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 2944 vm_features.set_feature(CPU_AVX_IFMA); 2945 } 2946 if (sef_cpuid7_ecx.bits.gfni != 0) 2947 vm_features.set_feature(CPU_GFNI); 2948 if (sef_cpuid7_ebx.bits.avx512f != 0 && 2949 xem_xcr0_eax.bits.opmask != 0 && 2950 xem_xcr0_eax.bits.zmm512 != 0 && 2951 xem_xcr0_eax.bits.zmm32 != 0) { 2952 vm_features.set_feature(CPU_AVX512F); 2953 if (sef_cpuid7_ebx.bits.avx512cd != 0) 2954 vm_features.set_feature(CPU_AVX512CD); 2955 if (sef_cpuid7_ebx.bits.avx512dq != 0) 2956 vm_features.set_feature(CPU_AVX512DQ); 2957 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 2958 vm_features.set_feature(CPU_AVX512_IFMA); 2959 if (sef_cpuid7_ebx.bits.avx512pf != 0) 2960 vm_features.set_feature(CPU_AVX512PF); 2961 if (sef_cpuid7_ebx.bits.avx512er != 0) 2962 vm_features.set_feature(CPU_AVX512ER); 2963 if (sef_cpuid7_ebx.bits.avx512bw != 0) 2964 vm_features.set_feature(CPU_AVX512BW); 2965 if (sef_cpuid7_ebx.bits.avx512vl != 0) 2966 vm_features.set_feature(CPU_AVX512VL); 2967 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2968 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2969 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2970 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2971 if (sef_cpuid7_ecx.bits.vaes != 0) 2972 vm_features.set_feature(CPU_AVX512_VAES); 2973 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 2974 vm_features.set_feature(CPU_AVX512_VNNI); 2975 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2976 vm_features.set_feature(CPU_AVX512_BITALG); 2977 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2978 vm_features.set_feature(CPU_AVX512_VBMI); 2979 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2980 vm_features.set_feature(CPU_AVX512_VBMI2); 2981 } 2982 if (is_intel()) { 2983 if (sefsl1_cpuid7_edx.bits.avx10 != 0 && 2984 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 && 2985 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 && 2986 xem_xcr0_eax.bits.opmask != 0 && 2987 xem_xcr0_eax.bits.zmm512 != 0 && 2988 xem_xcr0_eax.bits.zmm32 != 0) { 2989 vm_features.set_feature(CPU_AVX10_1); 2990 vm_features.set_feature(CPU_AVX512F); 2991 vm_features.set_feature(CPU_AVX512CD); 2992 vm_features.set_feature(CPU_AVX512DQ); 2993 vm_features.set_feature(CPU_AVX512PF); 2994 vm_features.set_feature(CPU_AVX512ER); 2995 vm_features.set_feature(CPU_AVX512BW); 2996 vm_features.set_feature(CPU_AVX512VL); 2997 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2998 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2999 vm_features.set_feature(CPU_AVX512_VAES); 3000 vm_features.set_feature(CPU_AVX512_VNNI); 3001 vm_features.set_feature(CPU_AVX512_BITALG); 3002 vm_features.set_feature(CPU_AVX512_VBMI); 3003 vm_features.set_feature(CPU_AVX512_VBMI2); 3004 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) { 3005 vm_features.set_feature(CPU_AVX10_2); 3006 } 3007 } 3008 } 3009 } 3010 3011 if (std_cpuid1_ecx.bits.hv != 0) 3012 vm_features.set_feature(CPU_HV); 3013 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3014 vm_features.set_feature(CPU_BMI1); 3015 if (std_cpuid1_edx.bits.tsc != 0) 3016 vm_features.set_feature(CPU_TSC); 3017 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3018 vm_features.set_feature(CPU_TSCINV_BIT); 3019 if (std_cpuid1_ecx.bits.aes != 0) 3020 vm_features.set_feature(CPU_AES); 3021 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3022 vm_features.set_feature(CPU_LZCNT); 3023 if (ext_cpuid1_ecx.bits.prefetchw != 0) 3024 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3025 if (sef_cpuid7_ebx.bits.erms != 0) 3026 vm_features.set_feature(CPU_ERMS); 3027 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3028 vm_features.set_feature(CPU_FSRM); 3029 if (std_cpuid1_ecx.bits.clmul != 0) 3030 vm_features.set_feature(CPU_CLMUL); 3031 if (sef_cpuid7_ebx.bits.rtm != 0) 3032 vm_features.set_feature(CPU_RTM); 3033 if (sef_cpuid7_ebx.bits.adx != 0) 3034 vm_features.set_feature(CPU_ADX); 3035 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3036 vm_features.set_feature(CPU_BMI2); 3037 if (sef_cpuid7_ebx.bits.sha != 0) 3038 vm_features.set_feature(CPU_SHA); 3039 if (std_cpuid1_ecx.bits.fma != 0) 3040 vm_features.set_feature(CPU_FMA); 3041 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3042 vm_features.set_feature(CPU_FLUSHOPT); 3043 if (sef_cpuid7_ebx.bits.clwb != 0) 3044 vm_features.set_feature(CPU_CLWB); 3045 if (ext_cpuid1_edx.bits.rdtscp != 0) 3046 vm_features.set_feature(CPU_RDTSCP); 3047 if (sef_cpuid7_ecx.bits.rdpid != 0) 3048 vm_features.set_feature(CPU_RDPID); 3049 3050 // AMD|Hygon additional features. 3051 if (is_amd_family()) { 3052 // PREFETCHW was checked above, check TDNOW here. 3053 if ((ext_cpuid1_edx.bits.tdnow != 0)) 3054 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3055 if (ext_cpuid1_ecx.bits.sse4a != 0) 3056 vm_features.set_feature(CPU_SSE4A); 3057 } 3058 3059 // Intel additional features. 3060 if (is_intel()) { 3061 if (sef_cpuid7_edx.bits.serialize != 0) 3062 vm_features.set_feature(CPU_SERIALIZE); 3063 if (sef_cpuid7_edx.bits.hybrid != 0) 3064 vm_features.set_feature(CPU_HYBRID); 3065 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3066 vm_features.set_feature(CPU_AVX512_FP16); 3067 } 3068 3069 // ZX additional features. 3070 if (is_zx()) { 3071 // We do not know if these are supported by ZX, so we cannot trust 3072 // common CPUID bit for them. 3073 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?"); 3074 vm_features.clear_feature(CPU_CLWB); 3075 } 3076 3077 // Protection key features. 3078 if (sef_cpuid7_ecx.bits.pku != 0) { 3079 vm_features.set_feature(CPU_PKU); 3080 } 3081 if (sef_cpuid7_ecx.bits.ospke != 0) { 3082 vm_features.set_feature(CPU_OSPKE); 3083 } 3084 3085 // Control flow enforcement (CET) features. 3086 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3087 vm_features.set_feature(CPU_CET_SS); 3088 } 3089 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3090 vm_features.set_feature(CPU_CET_IBT); 3091 } 3092 3093 // Composite features. 3094 if (supports_tscinv_bit() && 3095 ((is_amd_family() && !is_amd_Barcelona()) || 3096 is_intel_tsc_synched_at_init())) { 3097 vm_features.set_feature(CPU_TSCINV); 3098 } 3099 return vm_features; 3100 } 3101 3102 bool VM_Version::os_supports_avx_vectors() { 3103 bool retVal = false; 3104 int nreg = 4; 3105 if (supports_evex()) { 3106 // Verify that OS save/restore all bits of EVEX registers 3107 // during signal processing. 3108 retVal = true; 3109 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3110 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3111 retVal = false; 3112 break; 3113 } 3114 } 3115 } else if (supports_avx()) { 3116 // Verify that OS save/restore all bits of AVX registers 3117 // during signal processing. 3118 retVal = true; 3119 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3120 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3121 retVal = false; 3122 break; 3123 } 3124 } 3125 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3126 if (retVal == false) { 3127 // Verify that OS save/restore all bits of EVEX registers 3128 // during signal processing. 3129 retVal = true; 3130 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3131 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3132 retVal = false; 3133 break; 3134 } 3135 } 3136 } 3137 } 3138 return retVal; 3139 } 3140 3141 bool VM_Version::os_supports_apx_egprs() { 3142 if (!supports_apx_f()) { 3143 return false; 3144 } 3145 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3146 _cpuid_info.apx_save[1] != egpr_test_value()) { 3147 return false; 3148 } 3149 return true; 3150 } 3151 3152 uint VM_Version::cores_per_cpu() { 3153 uint result = 1; 3154 if (is_intel()) { 3155 bool supports_topology = supports_processor_topology(); 3156 if (supports_topology) { 3157 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3158 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3159 } 3160 if (!supports_topology || result == 0) { 3161 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3162 } 3163 } else if (is_amd_family()) { 3164 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1; 3165 if (cpu_family() >= 0x17) { // Zen or later 3166 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3167 } 3168 } else if (is_zx()) { 3169 bool supports_topology = supports_processor_topology(); 3170 if (supports_topology) { 3171 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3172 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3173 } 3174 if (!supports_topology || result == 0) { 3175 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3176 } 3177 } 3178 return result; 3179 } 3180 3181 uint VM_Version::threads_per_core() { 3182 uint result = 1; 3183 if (is_intel() && supports_processor_topology()) { 3184 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3185 } else if (is_zx() && supports_processor_topology()) { 3186 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3187 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3188 if (cpu_family() >= 0x17) { 3189 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3190 } else { 3191 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3192 cores_per_cpu(); 3193 } 3194 } 3195 return (result == 0 ? 1 : result); 3196 } 3197 3198 uint VM_Version::L1_line_size() { 3199 uint result = 0; 3200 if (is_intel()) { 3201 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3202 } else if (is_amd_family()) { 3203 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3204 } else if (is_zx()) { 3205 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3206 } 3207 if (result < 32) // not defined ? 3208 result = 32; // 32 bytes by default on x86 and other x64 3209 return result; 3210 } 3211 3212 bool VM_Version::is_intel_tsc_synched_at_init() { 3213 if (is_intel_family_core()) { 3214 uint32_t ext_model = extended_cpu_model(); 3215 if (ext_model == CPU_MODEL_NEHALEM_EP || 3216 ext_model == CPU_MODEL_WESTMERE_EP || 3217 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3218 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3219 // <= 2-socket invariant tsc support. EX versions are usually used 3220 // in > 2-socket systems and likely don't synchronize tscs at 3221 // initialization. 3222 // Code that uses tsc values must be prepared for them to arbitrarily 3223 // jump forward or backward. 3224 return true; 3225 } 3226 } 3227 return false; 3228 } 3229 3230 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3231 // Hardware prefetching (distance/size in bytes): 3232 // Pentium 3 - 64 / 32 3233 // Pentium 4 - 256 / 128 3234 // Athlon - 64 / 32 ???? 3235 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3236 // Core - 128 / 64 3237 // 3238 // Software prefetching (distance in bytes / instruction with best score): 3239 // Pentium 3 - 128 / prefetchnta 3240 // Pentium 4 - 512 / prefetchnta 3241 // Athlon - 128 / prefetchnta 3242 // Opteron - 256 / prefetchnta 3243 // Core - 256 / prefetchnta 3244 // It will be used only when AllocatePrefetchStyle > 0 3245 3246 if (is_amd_family()) { // AMD | Hygon 3247 if (supports_sse2()) { 3248 return 256; // Opteron 3249 } else { 3250 return 128; // Athlon 3251 } 3252 } else { // Intel 3253 if (supports_sse3() && is_intel_server_family()) { 3254 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3255 return 192; 3256 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3257 return 384; 3258 } 3259 } 3260 if (supports_sse2()) { 3261 if (is_intel_server_family()) { 3262 return 256; // Pentium M, Core, Core2 3263 } else { 3264 return 512; // Pentium 4 3265 } 3266 } else { 3267 return 128; // Pentium 3 (and all other old CPUs) 3268 } 3269 } 3270 } 3271 3272 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3273 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3274 switch (id) { 3275 case vmIntrinsics::_floatToFloat16: 3276 case vmIntrinsics::_float16ToFloat: 3277 if (!supports_float16()) { 3278 return false; 3279 } 3280 break; 3281 default: 3282 break; 3283 } 3284 return true; 3285 } 3286 3287 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) { 3288 int i = 0; 3289 ss.join([&]() { 3290 while (i < MAX_CPU_FEATURES) { 3291 if (_features.supports_feature((VM_Version::Feature_Flag)i)) { 3292 return _features_names[i++]; 3293 } 3294 i += 1; 3295 } 3296 return (const char*)nullptr; 3297 }, ", "); 3298 }