1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/ostream.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name), 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_NAME 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = nullptr; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = nullptr; 59 // Address of instruction which causes APX specific SEGV 60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 61 // Address of instruction after the one which causes APX specific SEGV 62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 63 64 static BufferBlob* stub_blob; 65 static const int stub_size = 2000; 66 67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong; 68 69 VM_Version::VM_Features VM_Version::_features; 70 VM_Version::VM_Features VM_Version::_cpu_features; 71 72 extern "C" { 73 typedef void (*get_cpu_info_stub_t)(void*); 74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 75 typedef void (*clear_apx_test_state_t)(void); 76 } 77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 78 static detect_virt_stub_t detect_virt_stub = nullptr; 79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 80 81 bool VM_Version::supports_clflush() { 82 // clflush should always be available on x86_64 83 // if not we are in real trouble because we rely on it 84 // to flush the code cache. 85 // Unfortunately, Assembler::clflush is currently called as part 86 // of generation of the code cache flush routine. This happens 87 // under Universe::init before the processor features are set 88 // up. Assembler::flush calls this routine to check that clflush 89 // is allowed. So, we give the caller a free pass if Universe init 90 // is still in progress. 91 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available"); 92 return true; 93 } 94 95 #define CPUID_STANDARD_FN 0x0 96 #define CPUID_STANDARD_FN_1 0x1 97 #define CPUID_STANDARD_FN_4 0x4 98 #define CPUID_STANDARD_FN_B 0xb 99 100 #define CPUID_EXTENDED_FN 0x80000000 101 #define CPUID_EXTENDED_FN_1 0x80000001 102 #define CPUID_EXTENDED_FN_2 0x80000002 103 #define CPUID_EXTENDED_FN_3 0x80000003 104 #define CPUID_EXTENDED_FN_4 0x80000004 105 #define CPUID_EXTENDED_FN_7 0x80000007 106 #define CPUID_EXTENDED_FN_8 0x80000008 107 108 class VM_Version_StubGenerator: public StubCodeGenerator { 109 public: 110 111 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 112 113 address clear_apx_test_state() { 114 # define __ _masm-> 115 address start = __ pc(); 116 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 117 // handling guarantees that preserved register values post signal handling were 118 // re-instantiated by operating system and not because they were not modified externally. 119 120 bool save_apx = UseAPX; 121 VM_Version::set_apx_cpuFeatures(); 122 UseAPX = true; 123 // EGPR state save/restoration. 124 __ mov64(r16, 0L); 125 __ mov64(r31, 0L); 126 UseAPX = save_apx; 127 VM_Version::clean_cpuFeatures(); 128 __ ret(0); 129 return start; 130 } 131 132 address generate_get_cpu_info() { 133 // Flags to test CPU type. 134 const uint32_t HS_EFL_AC = 0x40000; 135 const uint32_t HS_EFL_ID = 0x200000; 136 // Values for when we don't have a CPUID instruction. 137 const int CPU_FAMILY_SHIFT = 8; 138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 141 142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24; 143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 146 147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 148 # define __ _masm-> 149 150 address start = __ pc(); 151 152 // 153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 154 // 155 // rcx and rdx are first and second argument registers on windows 156 157 __ push(rbp); 158 __ mov(rbp, c_rarg0); // cpuid_info address 159 __ push(rbx); 160 __ push(rsi); 161 __ pushf(); // preserve rbx, and flags 162 __ pop(rax); 163 __ push(rax); 164 __ mov(rcx, rax); 165 // 166 // if we are unable to change the AC flag, we have a 386 167 // 168 __ xorl(rax, HS_EFL_AC); 169 __ push(rax); 170 __ popf(); 171 __ pushf(); 172 __ pop(rax); 173 __ cmpptr(rax, rcx); 174 __ jccb(Assembler::notEqual, detect_486); 175 176 __ movl(rax, CPU_FAMILY_386); 177 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 178 __ jmp(done); 179 180 // 181 // If we are unable to change the ID flag, we have a 486 which does 182 // not support the "cpuid" instruction. 183 // 184 __ bind(detect_486); 185 __ mov(rax, rcx); 186 __ xorl(rax, HS_EFL_ID); 187 __ push(rax); 188 __ popf(); 189 __ pushf(); 190 __ pop(rax); 191 __ cmpptr(rcx, rax); 192 __ jccb(Assembler::notEqual, detect_586); 193 194 __ bind(cpu486); 195 __ movl(rax, CPU_FAMILY_486); 196 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 197 __ jmp(done); 198 199 // 200 // At this point, we have a chip which supports the "cpuid" instruction 201 // 202 __ bind(detect_586); 203 __ xorl(rax, rax); 204 __ cpuid(); 205 __ orl(rax, rax); 206 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 207 // value of at least 1, we give up and 208 // assume a 486 209 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 210 __ movl(Address(rsi, 0), rax); 211 __ movl(Address(rsi, 4), rbx); 212 __ movl(Address(rsi, 8), rcx); 213 __ movl(Address(rsi,12), rdx); 214 215 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 216 __ jccb(Assembler::belowEqual, std_cpuid4); 217 218 // 219 // cpuid(0xB) Processor Topology 220 // 221 __ movl(rax, 0xb); 222 __ xorl(rcx, rcx); // Threads level 223 __ cpuid(); 224 225 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 226 __ movl(Address(rsi, 0), rax); 227 __ movl(Address(rsi, 4), rbx); 228 __ movl(Address(rsi, 8), rcx); 229 __ movl(Address(rsi,12), rdx); 230 231 __ movl(rax, 0xb); 232 __ movl(rcx, 1); // Cores level 233 __ cpuid(); 234 __ push(rax); 235 __ andl(rax, 0x1f); // Determine if valid topology level 236 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 237 __ andl(rax, 0xffff); 238 __ pop(rax); 239 __ jccb(Assembler::equal, std_cpuid4); 240 241 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 242 __ movl(Address(rsi, 0), rax); 243 __ movl(Address(rsi, 4), rbx); 244 __ movl(Address(rsi, 8), rcx); 245 __ movl(Address(rsi,12), rdx); 246 247 __ movl(rax, 0xb); 248 __ movl(rcx, 2); // Packages level 249 __ cpuid(); 250 __ push(rax); 251 __ andl(rax, 0x1f); // Determine if valid topology level 252 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 253 __ andl(rax, 0xffff); 254 __ pop(rax); 255 __ jccb(Assembler::equal, std_cpuid4); 256 257 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 258 __ movl(Address(rsi, 0), rax); 259 __ movl(Address(rsi, 4), rbx); 260 __ movl(Address(rsi, 8), rcx); 261 __ movl(Address(rsi,12), rdx); 262 263 // 264 // cpuid(0x4) Deterministic cache params 265 // 266 __ bind(std_cpuid4); 267 __ movl(rax, 4); 268 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 269 __ jccb(Assembler::greater, std_cpuid1); 270 271 __ xorl(rcx, rcx); // L1 cache 272 __ cpuid(); 273 __ push(rax); 274 __ andl(rax, 0x1f); // Determine if valid cache parameters used 275 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 276 __ pop(rax); 277 __ jccb(Assembler::equal, std_cpuid1); 278 279 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 280 __ movl(Address(rsi, 0), rax); 281 __ movl(Address(rsi, 4), rbx); 282 __ movl(Address(rsi, 8), rcx); 283 __ movl(Address(rsi,12), rdx); 284 285 // 286 // Standard cpuid(0x1) 287 // 288 __ bind(std_cpuid1); 289 __ movl(rax, 1); 290 __ cpuid(); 291 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 292 __ movl(Address(rsi, 0), rax); 293 __ movl(Address(rsi, 4), rbx); 294 __ movl(Address(rsi, 8), rcx); 295 __ movl(Address(rsi,12), rdx); 296 297 // 298 // Check if OS has enabled XGETBV instruction to access XCR0 299 // (OSXSAVE feature flag) and CPU supports AVX 300 // 301 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 302 __ cmpl(rcx, 0x18000000); 303 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 304 305 // 306 // XCR0, XFEATURE_ENABLED_MASK register 307 // 308 __ xorl(rcx, rcx); // zero for XCR0 register 309 __ xgetbv(); 310 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 311 __ movl(Address(rsi, 0), rax); 312 __ movl(Address(rsi, 4), rdx); 313 314 // 315 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 316 // 317 __ bind(sef_cpuid); 318 __ movl(rax, 7); 319 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 320 __ jccb(Assembler::greater, ext_cpuid); 321 // ECX = 0 322 __ xorl(rcx, rcx); 323 __ cpuid(); 324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 325 __ movl(Address(rsi, 0), rax); 326 __ movl(Address(rsi, 4), rbx); 327 __ movl(Address(rsi, 8), rcx); 328 __ movl(Address(rsi, 12), rdx); 329 330 // 331 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 332 // 333 __ bind(sefsl1_cpuid); 334 __ movl(rax, 7); 335 __ movl(rcx, 1); 336 __ cpuid(); 337 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 338 __ movl(Address(rsi, 0), rax); 339 __ movl(Address(rsi, 4), rdx); 340 341 // 342 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0). 343 // 344 __ bind(std_cpuid24); 345 __ movl(rax, 0x24); 346 __ movl(rcx, 0); 347 __ cpuid(); 348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset()))); 349 __ movl(Address(rsi, 0), rax); 350 __ movl(Address(rsi, 4), rbx); 351 352 // 353 // Extended cpuid(0x80000000) 354 // 355 __ bind(ext_cpuid); 356 __ movl(rax, 0x80000000); 357 __ cpuid(); 358 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 359 __ jcc(Assembler::belowEqual, done); 360 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 361 __ jcc(Assembler::belowEqual, ext_cpuid1); 362 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 363 __ jccb(Assembler::belowEqual, ext_cpuid5); 364 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 365 __ jccb(Assembler::belowEqual, ext_cpuid7); 366 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 367 __ jccb(Assembler::belowEqual, ext_cpuid8); 368 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 369 __ jccb(Assembler::below, ext_cpuid8); 370 // 371 // Extended cpuid(0x8000001E) 372 // 373 __ movl(rax, 0x8000001E); 374 __ cpuid(); 375 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 376 __ movl(Address(rsi, 0), rax); 377 __ movl(Address(rsi, 4), rbx); 378 __ movl(Address(rsi, 8), rcx); 379 __ movl(Address(rsi,12), rdx); 380 381 // 382 // Extended cpuid(0x80000008) 383 // 384 __ bind(ext_cpuid8); 385 __ movl(rax, 0x80000008); 386 __ cpuid(); 387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 388 __ movl(Address(rsi, 0), rax); 389 __ movl(Address(rsi, 4), rbx); 390 __ movl(Address(rsi, 8), rcx); 391 __ movl(Address(rsi,12), rdx); 392 393 // 394 // Extended cpuid(0x80000007) 395 // 396 __ bind(ext_cpuid7); 397 __ movl(rax, 0x80000007); 398 __ cpuid(); 399 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 400 __ movl(Address(rsi, 0), rax); 401 __ movl(Address(rsi, 4), rbx); 402 __ movl(Address(rsi, 8), rcx); 403 __ movl(Address(rsi,12), rdx); 404 405 // 406 // Extended cpuid(0x80000005) 407 // 408 __ bind(ext_cpuid5); 409 __ movl(rax, 0x80000005); 410 __ cpuid(); 411 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 412 __ movl(Address(rsi, 0), rax); 413 __ movl(Address(rsi, 4), rbx); 414 __ movl(Address(rsi, 8), rcx); 415 __ movl(Address(rsi,12), rdx); 416 417 // 418 // Extended cpuid(0x80000001) 419 // 420 __ bind(ext_cpuid1); 421 __ movl(rax, 0x80000001); 422 __ cpuid(); 423 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 424 __ movl(Address(rsi, 0), rax); 425 __ movl(Address(rsi, 4), rbx); 426 __ movl(Address(rsi, 8), rcx); 427 __ movl(Address(rsi,12), rdx); 428 429 // 430 // Check if OS has enabled XGETBV instruction to access XCR0 431 // (OSXSAVE feature flag) and CPU supports APX 432 // 433 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 434 // and XCRO[19] bit for OS support to save/restore extended GPR state. 435 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 436 __ movl(rax, 0x200000); 437 __ andl(rax, Address(rsi, 4)); 438 __ jcc(Assembler::equal, vector_save_restore); 439 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 440 __ movl(rax, 0x80000); 441 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 442 __ jcc(Assembler::equal, vector_save_restore); 443 444 bool save_apx = UseAPX; 445 VM_Version::set_apx_cpuFeatures(); 446 UseAPX = true; 447 __ mov64(r16, VM_Version::egpr_test_value()); 448 __ mov64(r31, VM_Version::egpr_test_value()); 449 __ xorl(rsi, rsi); 450 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 451 // Generate SEGV 452 __ movl(rax, Address(rsi, 0)); 453 454 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 455 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 456 __ movq(Address(rsi, 0), r16); 457 __ movq(Address(rsi, 8), r31); 458 459 UseAPX = save_apx; 460 __ bind(vector_save_restore); 461 // 462 // Check if OS has enabled XGETBV instruction to access XCR0 463 // (OSXSAVE feature flag) and CPU supports AVX 464 // 465 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 466 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 467 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 468 __ cmpl(rcx, 0x18000000); 469 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 470 471 __ movl(rax, 0x6); 472 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 473 __ cmpl(rax, 0x6); 474 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 475 476 // we need to bridge farther than imm8, so we use this island as a thunk 477 __ bind(done); 478 __ jmp(wrapup); 479 480 __ bind(start_simd_check); 481 // 482 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 483 // registers are not restored after a signal processing. 484 // Generate SEGV here (reference through null) 485 // and check upper YMM/ZMM bits after it. 486 // 487 int saved_useavx = UseAVX; 488 int saved_usesse = UseSSE; 489 490 // If UseAVX is uninitialized or is set by the user to include EVEX 491 if (use_evex) { 492 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 493 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10 494 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 495 __ movl(rax, 0x10000); 496 __ andl(rax, Address(rsi, 4)); 497 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 498 __ movl(rbx, 0x80000); 499 __ andl(rbx, Address(rsi, 4)); 500 __ orl(rax, rbx); 501 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported 502 // check _cpuid_info.xem_xcr0_eax.bits.opmask 503 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 504 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 505 __ movl(rax, 0xE0); 506 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 507 __ cmpl(rax, 0xE0); 508 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 509 510 if (FLAG_IS_DEFAULT(UseAVX)) { 511 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 512 __ movl(rax, Address(rsi, 0)); 513 __ cmpl(rax, 0x50654); // If it is Skylake 514 __ jcc(Assembler::equal, legacy_setup); 515 } 516 // EVEX setup: run in lowest evex mode 517 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 518 UseAVX = 3; 519 UseSSE = 2; 520 #ifdef _WINDOWS 521 // xmm5-xmm15 are not preserved by caller on windows 522 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 523 __ subptr(rsp, 64); 524 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 525 __ subptr(rsp, 64); 526 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 527 __ subptr(rsp, 64); 528 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 529 #endif // _WINDOWS 530 531 // load value into all 64 bytes of zmm7 register 532 __ movl(rcx, VM_Version::ymm_test_value()); 533 __ movdl(xmm0, rcx); 534 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 535 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 536 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 537 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 538 VM_Version::clean_cpuFeatures(); 539 __ jmp(save_restore_except); 540 } 541 542 __ bind(legacy_setup); 543 // AVX setup 544 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 545 UseAVX = 1; 546 UseSSE = 2; 547 #ifdef _WINDOWS 548 __ subptr(rsp, 32); 549 __ vmovdqu(Address(rsp, 0), xmm7); 550 __ subptr(rsp, 32); 551 __ vmovdqu(Address(rsp, 0), xmm8); 552 __ subptr(rsp, 32); 553 __ vmovdqu(Address(rsp, 0), xmm15); 554 #endif // _WINDOWS 555 556 // load value into all 32 bytes of ymm7 register 557 __ movl(rcx, VM_Version::ymm_test_value()); 558 559 __ movdl(xmm0, rcx); 560 __ pshufd(xmm0, xmm0, 0x00); 561 __ vinsertf128_high(xmm0, xmm0); 562 __ vmovdqu(xmm7, xmm0); 563 __ vmovdqu(xmm8, xmm0); 564 __ vmovdqu(xmm15, xmm0); 565 VM_Version::clean_cpuFeatures(); 566 567 __ bind(save_restore_except); 568 __ xorl(rsi, rsi); 569 VM_Version::set_cpuinfo_segv_addr(__ pc()); 570 // Generate SEGV 571 __ movl(rax, Address(rsi, 0)); 572 573 VM_Version::set_cpuinfo_cont_addr(__ pc()); 574 // Returns here after signal. Save xmm0 to check it later. 575 576 // If UseAVX is uninitialized or is set by the user to include EVEX 577 if (use_evex) { 578 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 579 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 580 __ movl(rax, 0x10000); 581 __ andl(rax, Address(rsi, 4)); 582 __ jcc(Assembler::equal, legacy_save_restore); 583 // check _cpuid_info.xem_xcr0_eax.bits.opmask 584 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 585 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 586 __ movl(rax, 0xE0); 587 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 588 __ cmpl(rax, 0xE0); 589 __ jcc(Assembler::notEqual, legacy_save_restore); 590 591 if (FLAG_IS_DEFAULT(UseAVX)) { 592 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 593 __ movl(rax, Address(rsi, 0)); 594 __ cmpl(rax, 0x50654); // If it is Skylake 595 __ jcc(Assembler::equal, legacy_save_restore); 596 } 597 // EVEX check: run in lowest evex mode 598 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 599 UseAVX = 3; 600 UseSSE = 2; 601 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 602 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 603 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 604 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 605 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 606 607 #ifdef _WINDOWS 608 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 609 __ addptr(rsp, 64); 610 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 611 __ addptr(rsp, 64); 612 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 613 __ addptr(rsp, 64); 614 #endif // _WINDOWS 615 generate_vzeroupper(wrapup); 616 VM_Version::clean_cpuFeatures(); 617 UseAVX = saved_useavx; 618 UseSSE = saved_usesse; 619 __ jmp(wrapup); 620 } 621 622 __ bind(legacy_save_restore); 623 // AVX check 624 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 625 UseAVX = 1; 626 UseSSE = 2; 627 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 628 __ vmovdqu(Address(rsi, 0), xmm0); 629 __ vmovdqu(Address(rsi, 32), xmm7); 630 __ vmovdqu(Address(rsi, 64), xmm8); 631 __ vmovdqu(Address(rsi, 96), xmm15); 632 633 #ifdef _WINDOWS 634 __ vmovdqu(xmm15, Address(rsp, 0)); 635 __ addptr(rsp, 32); 636 __ vmovdqu(xmm8, Address(rsp, 0)); 637 __ addptr(rsp, 32); 638 __ vmovdqu(xmm7, Address(rsp, 0)); 639 __ addptr(rsp, 32); 640 #endif // _WINDOWS 641 642 generate_vzeroupper(wrapup); 643 VM_Version::clean_cpuFeatures(); 644 UseAVX = saved_useavx; 645 UseSSE = saved_usesse; 646 647 __ bind(wrapup); 648 __ popf(); 649 __ pop(rsi); 650 __ pop(rbx); 651 __ pop(rbp); 652 __ ret(0); 653 654 # undef __ 655 656 return start; 657 }; 658 void generate_vzeroupper(Label& L_wrapup) { 659 # define __ _masm-> 660 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 661 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 662 __ jcc(Assembler::notEqual, L_wrapup); 663 __ movl(rcx, 0x0FFF0FF0); 664 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 665 __ andl(rcx, Address(rsi, 0)); 666 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 667 __ jcc(Assembler::equal, L_wrapup); 668 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 669 __ jcc(Assembler::equal, L_wrapup); 670 // vzeroupper() will use a pre-computed instruction sequence that we 671 // can't compute until after we've determined CPU capabilities. Use 672 // uncached variant here directly to be able to bootstrap correctly 673 __ vzeroupper_uncached(); 674 # undef __ 675 } 676 address generate_detect_virt() { 677 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 678 # define __ _masm-> 679 680 address start = __ pc(); 681 682 // Evacuate callee-saved registers 683 __ push(rbp); 684 __ push(rbx); 685 __ push(rsi); // for Windows 686 687 __ mov(rax, c_rarg0); // CPUID leaf 688 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 689 690 __ cpuid(); 691 692 // Store result to register array 693 __ movl(Address(rsi, 0), rax); 694 __ movl(Address(rsi, 4), rbx); 695 __ movl(Address(rsi, 8), rcx); 696 __ movl(Address(rsi, 12), rdx); 697 698 // Epilogue 699 __ pop(rsi); 700 __ pop(rbx); 701 __ pop(rbp); 702 __ ret(0); 703 704 # undef __ 705 706 return start; 707 }; 708 709 710 address generate_getCPUIDBrandString(void) { 711 // Flags to test CPU type. 712 const uint32_t HS_EFL_AC = 0x40000; 713 const uint32_t HS_EFL_ID = 0x200000; 714 // Values for when we don't have a CPUID instruction. 715 const int CPU_FAMILY_SHIFT = 8; 716 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 717 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 718 719 Label detect_486, cpu486, detect_586, done, ext_cpuid; 720 721 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 722 # define __ _masm-> 723 724 address start = __ pc(); 725 726 // 727 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 728 // 729 // rcx and rdx are first and second argument registers on windows 730 731 __ push(rbp); 732 __ mov(rbp, c_rarg0); // cpuid_info address 733 __ push(rbx); 734 __ push(rsi); 735 __ pushf(); // preserve rbx, and flags 736 __ pop(rax); 737 __ push(rax); 738 __ mov(rcx, rax); 739 // 740 // if we are unable to change the AC flag, we have a 386 741 // 742 __ xorl(rax, HS_EFL_AC); 743 __ push(rax); 744 __ popf(); 745 __ pushf(); 746 __ pop(rax); 747 __ cmpptr(rax, rcx); 748 __ jccb(Assembler::notEqual, detect_486); 749 750 __ movl(rax, CPU_FAMILY_386); 751 __ jmp(done); 752 753 // 754 // If we are unable to change the ID flag, we have a 486 which does 755 // not support the "cpuid" instruction. 756 // 757 __ bind(detect_486); 758 __ mov(rax, rcx); 759 __ xorl(rax, HS_EFL_ID); 760 __ push(rax); 761 __ popf(); 762 __ pushf(); 763 __ pop(rax); 764 __ cmpptr(rcx, rax); 765 __ jccb(Assembler::notEqual, detect_586); 766 767 __ bind(cpu486); 768 __ movl(rax, CPU_FAMILY_486); 769 __ jmp(done); 770 771 // 772 // At this point, we have a chip which supports the "cpuid" instruction 773 // 774 __ bind(detect_586); 775 __ xorl(rax, rax); 776 __ cpuid(); 777 __ orl(rax, rax); 778 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 779 // value of at least 1, we give up and 780 // assume a 486 781 782 // 783 // Extended cpuid(0x80000000) for processor brand string detection 784 // 785 __ bind(ext_cpuid); 786 __ movl(rax, CPUID_EXTENDED_FN); 787 __ cpuid(); 788 __ cmpl(rax, CPUID_EXTENDED_FN_4); 789 __ jcc(Assembler::below, done); 790 791 // 792 // Extended cpuid(0x80000002) // first 16 bytes in brand string 793 // 794 __ movl(rax, CPUID_EXTENDED_FN_2); 795 __ cpuid(); 796 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 797 __ movl(Address(rsi, 0), rax); 798 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 799 __ movl(Address(rsi, 0), rbx); 800 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 801 __ movl(Address(rsi, 0), rcx); 802 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 803 __ movl(Address(rsi,0), rdx); 804 805 // 806 // Extended cpuid(0x80000003) // next 16 bytes in brand string 807 // 808 __ movl(rax, CPUID_EXTENDED_FN_3); 809 __ cpuid(); 810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 811 __ movl(Address(rsi, 0), rax); 812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 813 __ movl(Address(rsi, 0), rbx); 814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 815 __ movl(Address(rsi, 0), rcx); 816 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 817 __ movl(Address(rsi,0), rdx); 818 819 // 820 // Extended cpuid(0x80000004) // last 16 bytes in brand string 821 // 822 __ movl(rax, CPUID_EXTENDED_FN_4); 823 __ cpuid(); 824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 825 __ movl(Address(rsi, 0), rax); 826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 827 __ movl(Address(rsi, 0), rbx); 828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 829 __ movl(Address(rsi, 0), rcx); 830 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 831 __ movl(Address(rsi,0), rdx); 832 833 // 834 // return 835 // 836 __ bind(done); 837 __ popf(); 838 __ pop(rsi); 839 __ pop(rbx); 840 __ pop(rbp); 841 __ ret(0); 842 843 # undef __ 844 845 return start; 846 }; 847 }; 848 849 void VM_Version::get_processor_features() { 850 851 _cpu = 4; // 486 by default 852 _model = 0; 853 _stepping = 0; 854 _logical_processors_per_package = 1; 855 // i486 internal cache is both I&D and has a 16-byte line size 856 _L1_data_cache_line_size = 16; 857 858 // Get raw processor info 859 860 get_cpu_info_stub(&_cpuid_info); 861 862 assert_is_initialized(); 863 _cpu = extended_cpu_family(); 864 _model = extended_cpu_model(); 865 _stepping = cpu_stepping(); 866 867 if (cpu_family() > 4) { // it supports CPUID 868 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 869 _cpu_features = _features; // Preserve features 870 // Logical processors are only available on P4s and above, 871 // and only if hyperthreading is available. 872 _logical_processors_per_package = logical_processor_count(); 873 _L1_data_cache_line_size = L1_line_size(); 874 } 875 876 // xchg and xadd instructions 877 _supports_atomic_getset4 = true; 878 _supports_atomic_getadd4 = true; 879 _supports_atomic_getset8 = true; 880 _supports_atomic_getadd8 = true; 881 882 // OS should support SSE for x64 and hardware should support at least SSE2. 883 if (!VM_Version::supports_sse2()) { 884 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 885 } 886 // in 64 bit the use of SSE2 is the minimum 887 if (UseSSE < 2) UseSSE = 2; 888 889 // flush_icache_stub have to be generated first. 890 // That is why Icache line size is hard coded in ICache class, 891 // see icache_x86.hpp. It is also the reason why we can't use 892 // clflush instruction in 32-bit VM since it could be running 893 // on CPU which does not support it. 894 // 895 // The only thing we can do is to verify that flushed 896 // ICache::line_size has correct value. 897 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 898 // clflush_size is size in quadwords (8 bytes). 899 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 900 901 // assigning this field effectively enables Unsafe.writebackMemory() 902 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 903 // that is only implemented on x86_64 and only if the OS plays ball 904 if (os::supports_map_sync()) { 905 // publish data cache line flush size to generic field, otherwise 906 // let if default to zero thereby disabling writeback 907 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 908 } 909 910 // Check if processor has Intel Ecore 911 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() && 912 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 913 _model == 0xCC || _model == 0xDD)) { 914 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 915 } 916 917 if (UseSSE < 4) { 918 _features.clear_feature(CPU_SSE4_1); 919 _features.clear_feature(CPU_SSE4_2); 920 } 921 922 if (UseSSE < 3) { 923 _features.clear_feature(CPU_SSE3); 924 _features.clear_feature(CPU_SSSE3); 925 _features.clear_feature(CPU_SSE4A); 926 } 927 928 if (UseSSE < 2) 929 _features.clear_feature(CPU_SSE2); 930 931 if (UseSSE < 1) 932 _features.clear_feature(CPU_SSE); 933 934 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 935 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 936 UseAVX = 0; 937 } 938 939 // UseSSE is set to the smaller of what hardware supports and what 940 // the command line requires. I.e., you cannot set UseSSE to 2 on 941 // older Pentiums which do not support it. 942 int use_sse_limit = 0; 943 if (UseSSE > 0) { 944 if (UseSSE > 3 && supports_sse4_1()) { 945 use_sse_limit = 4; 946 } else if (UseSSE > 2 && supports_sse3()) { 947 use_sse_limit = 3; 948 } else if (UseSSE > 1 && supports_sse2()) { 949 use_sse_limit = 2; 950 } else if (UseSSE > 0 && supports_sse()) { 951 use_sse_limit = 1; 952 } else { 953 use_sse_limit = 0; 954 } 955 } 956 if (FLAG_IS_DEFAULT(UseSSE)) { 957 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 958 } else if (UseSSE > use_sse_limit) { 959 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 960 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 961 } 962 963 // first try initial setting and detect what we can support 964 int use_avx_limit = 0; 965 if (UseAVX > 0) { 966 if (UseSSE < 4) { 967 // Don't use AVX if SSE is unavailable or has been disabled. 968 use_avx_limit = 0; 969 } else if (UseAVX > 2 && supports_evex()) { 970 use_avx_limit = 3; 971 } else if (UseAVX > 1 && supports_avx2()) { 972 use_avx_limit = 2; 973 } else if (UseAVX > 0 && supports_avx()) { 974 use_avx_limit = 1; 975 } else { 976 use_avx_limit = 0; 977 } 978 } 979 if (FLAG_IS_DEFAULT(UseAVX)) { 980 // Don't use AVX-512 on older Skylakes unless explicitly requested. 981 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 982 FLAG_SET_DEFAULT(UseAVX, 2); 983 } else { 984 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 985 } 986 } 987 988 if (UseAVX > use_avx_limit) { 989 if (UseSSE < 4) { 990 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 991 } else { 992 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 993 } 994 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 995 } 996 997 if (UseAVX < 3) { 998 _features.clear_feature(CPU_AVX512F); 999 _features.clear_feature(CPU_AVX512DQ); 1000 _features.clear_feature(CPU_AVX512CD); 1001 _features.clear_feature(CPU_AVX512BW); 1002 _features.clear_feature(CPU_AVX512ER); 1003 _features.clear_feature(CPU_AVX512PF); 1004 _features.clear_feature(CPU_AVX512VL); 1005 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1006 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1007 _features.clear_feature(CPU_AVX512_VAES); 1008 _features.clear_feature(CPU_AVX512_VNNI); 1009 _features.clear_feature(CPU_AVX512_VBMI); 1010 _features.clear_feature(CPU_AVX512_VBMI2); 1011 _features.clear_feature(CPU_AVX512_BITALG); 1012 _features.clear_feature(CPU_AVX512_IFMA); 1013 _features.clear_feature(CPU_APX_F); 1014 _features.clear_feature(CPU_AVX512_FP16); 1015 _features.clear_feature(CPU_AVX10_1); 1016 _features.clear_feature(CPU_AVX10_2); 1017 } 1018 1019 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1020 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1021 if (UseAPX && !apx_supported) { 1022 warning("UseAPX is not supported on this CPU, setting it to false"); 1023 FLAG_SET_DEFAULT(UseAPX, false); 1024 } 1025 1026 if (!UseAPX) { 1027 _features.clear_feature(CPU_APX_F); 1028 } 1029 1030 if (UseAVX < 2) { 1031 _features.clear_feature(CPU_AVX2); 1032 _features.clear_feature(CPU_AVX_IFMA); 1033 } 1034 1035 if (UseAVX < 1) { 1036 _features.clear_feature(CPU_AVX); 1037 _features.clear_feature(CPU_VZEROUPPER); 1038 _features.clear_feature(CPU_F16C); 1039 _features.clear_feature(CPU_SHA512); 1040 } 1041 1042 if (logical_processors_per_package() == 1) { 1043 // HT processor could be installed on a system which doesn't support HT. 1044 _features.clear_feature(CPU_HT); 1045 } 1046 1047 if (is_intel()) { // Intel cpus specific settings 1048 if (is_knights_family()) { 1049 _features.clear_feature(CPU_VZEROUPPER); 1050 _features.clear_feature(CPU_AVX512BW); 1051 _features.clear_feature(CPU_AVX512VL); 1052 _features.clear_feature(CPU_AVX512DQ); 1053 _features.clear_feature(CPU_AVX512_VNNI); 1054 _features.clear_feature(CPU_AVX512_VAES); 1055 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1056 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1057 _features.clear_feature(CPU_AVX512_VBMI); 1058 _features.clear_feature(CPU_AVX512_VBMI2); 1059 _features.clear_feature(CPU_CLWB); 1060 _features.clear_feature(CPU_FLUSHOPT); 1061 _features.clear_feature(CPU_GFNI); 1062 _features.clear_feature(CPU_AVX512_BITALG); 1063 _features.clear_feature(CPU_AVX512_IFMA); 1064 _features.clear_feature(CPU_AVX_IFMA); 1065 _features.clear_feature(CPU_AVX512_FP16); 1066 _features.clear_feature(CPU_AVX10_1); 1067 _features.clear_feature(CPU_AVX10_2); 1068 } 1069 } 1070 1071 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1072 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1073 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum); 1074 } else { 1075 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1076 } 1077 1078 assert(supports_clflush(), "Always present"); 1079 if (X86ICacheSync == -1) { 1080 // Auto-detect, choosing the best performant one that still flushes 1081 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. 1082 if (supports_clwb()) { 1083 FLAG_SET_ERGO(X86ICacheSync, 3); 1084 } else if (supports_clflushopt()) { 1085 FLAG_SET_ERGO(X86ICacheSync, 2); 1086 } else { 1087 FLAG_SET_ERGO(X86ICacheSync, 1); 1088 } 1089 } else { 1090 if ((X86ICacheSync == 2) && !supports_clflushopt()) { 1091 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); 1092 } 1093 if ((X86ICacheSync == 3) && !supports_clwb()) { 1094 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); 1095 } 1096 if ((X86ICacheSync == 5) && !supports_serialize()) { 1097 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); 1098 } 1099 } 1100 1101 stringStream ss(2048); 1102 if (supports_hybrid()) { 1103 ss.print("(hybrid)"); 1104 } else { 1105 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core()); 1106 } 1107 ss.print(" family %d model %d stepping %d microcode 0x%x", 1108 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1109 ss.print(", "); 1110 int features_offset = (int)ss.size(); 1111 insert_features_names(_features, ss); 1112 1113 _cpu_info_string = ss.as_string(true); 1114 _features_string = _cpu_info_string + features_offset; 1115 1116 // Use AES instructions if available. 1117 if (supports_aes()) { 1118 if (FLAG_IS_DEFAULT(UseAES)) { 1119 FLAG_SET_DEFAULT(UseAES, true); 1120 } 1121 if (!UseAES) { 1122 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1123 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1124 } 1125 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1126 } else { 1127 if (UseSSE > 2) { 1128 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1129 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1130 } 1131 } else { 1132 // The AES intrinsic stubs require AES instruction support (of course) 1133 // but also require sse3 mode or higher for instructions it use. 1134 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1135 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1136 } 1137 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1138 } 1139 1140 // --AES-CTR begins-- 1141 if (!UseAESIntrinsics) { 1142 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1143 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1144 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1145 } 1146 } else { 1147 if (supports_sse4_1()) { 1148 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1149 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1150 } 1151 } else { 1152 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1153 // but also require sse4.1 mode or higher for instructions it use. 1154 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1155 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1156 } 1157 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1158 } 1159 } 1160 // --AES-CTR ends-- 1161 } 1162 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1163 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1164 warning("AES instructions are not available on this CPU"); 1165 FLAG_SET_DEFAULT(UseAES, false); 1166 } 1167 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1168 warning("AES intrinsics are not available on this CPU"); 1169 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1170 } 1171 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1172 warning("AES-CTR intrinsics are not available on this CPU"); 1173 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1174 } 1175 } 1176 1177 // Use CLMUL instructions if available. 1178 if (supports_clmul()) { 1179 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1180 UseCLMUL = true; 1181 } 1182 } else if (UseCLMUL) { 1183 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1184 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1185 FLAG_SET_DEFAULT(UseCLMUL, false); 1186 } 1187 1188 if (UseCLMUL && (UseSSE > 2)) { 1189 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1190 UseCRC32Intrinsics = true; 1191 } 1192 } else if (UseCRC32Intrinsics) { 1193 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1194 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1195 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1196 } 1197 1198 if (supports_avx2()) { 1199 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1200 UseAdler32Intrinsics = true; 1201 } 1202 } else if (UseAdler32Intrinsics) { 1203 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1204 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1205 } 1206 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1207 } 1208 1209 if (supports_sse4_2() && supports_clmul()) { 1210 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1211 UseCRC32CIntrinsics = true; 1212 } 1213 } else if (UseCRC32CIntrinsics) { 1214 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1215 warning("CRC32C intrinsics are not available on this CPU"); 1216 } 1217 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1218 } 1219 1220 // GHASH/GCM intrinsics 1221 if (UseCLMUL && (UseSSE > 2)) { 1222 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1223 UseGHASHIntrinsics = true; 1224 } 1225 } else if (UseGHASHIntrinsics) { 1226 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1227 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1228 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1229 } 1230 1231 // ChaCha20 Intrinsics 1232 // As long as the system supports AVX as a baseline we can do a 1233 // SIMD-enabled block function. StubGenerator makes the determination 1234 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1235 // version. 1236 if (UseAVX >= 1) { 1237 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1238 UseChaCha20Intrinsics = true; 1239 } 1240 } else if (UseChaCha20Intrinsics) { 1241 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1242 warning("ChaCha20 intrinsic requires AVX instructions"); 1243 } 1244 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1245 } 1246 1247 // Kyber Intrinsics 1248 // Currently we only have them for AVX512 1249 #ifdef _LP64 1250 if (supports_evex() && supports_avx512bw()) { 1251 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) { 1252 UseKyberIntrinsics = true; 1253 } 1254 } else 1255 #endif 1256 if (UseKyberIntrinsics) { 1257 warning("Intrinsics for ML-KEM are not available on this CPU."); 1258 FLAG_SET_DEFAULT(UseKyberIntrinsics, false); 1259 } 1260 1261 // Dilithium Intrinsics 1262 // Currently we only have them for AVX512 1263 if (supports_evex() && supports_avx512bw()) { 1264 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { 1265 UseDilithiumIntrinsics = true; 1266 } 1267 } else if (UseDilithiumIntrinsics) { 1268 warning("Intrinsics for ML-DSA are not available on this CPU."); 1269 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); 1270 } 1271 1272 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1273 if (UseAVX >= 2) { 1274 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1275 UseBASE64Intrinsics = true; 1276 } 1277 } else if (UseBASE64Intrinsics) { 1278 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1279 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1280 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1281 } 1282 1283 if (supports_fma()) { 1284 if (FLAG_IS_DEFAULT(UseFMA)) { 1285 UseFMA = true; 1286 } 1287 } else if (UseFMA) { 1288 warning("FMA instructions are not available on this CPU"); 1289 FLAG_SET_DEFAULT(UseFMA, false); 1290 } 1291 1292 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1293 UseMD5Intrinsics = true; 1294 } 1295 1296 if (supports_sha() || (supports_avx2() && supports_bmi2())) { 1297 if (FLAG_IS_DEFAULT(UseSHA)) { 1298 UseSHA = true; 1299 } 1300 } else if (UseSHA) { 1301 warning("SHA instructions are not available on this CPU"); 1302 FLAG_SET_DEFAULT(UseSHA, false); 1303 } 1304 1305 if (supports_sha() && supports_sse4_1() && UseSHA) { 1306 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1307 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1308 } 1309 } else if (UseSHA1Intrinsics) { 1310 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1311 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1312 } 1313 1314 if (supports_sse4_1() && UseSHA) { 1315 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1316 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1317 } 1318 } else if (UseSHA256Intrinsics) { 1319 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1320 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1321 } 1322 1323 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1324 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1325 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1326 } 1327 } else if (UseSHA512Intrinsics) { 1328 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1329 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1330 } 1331 1332 if (supports_evex() && supports_avx512bw()) { 1333 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1334 UseSHA3Intrinsics = true; 1335 } 1336 } else if (UseSHA3Intrinsics) { 1337 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1338 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1339 } 1340 1341 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1342 FLAG_SET_DEFAULT(UseSHA, false); 1343 } 1344 1345 #if COMPILER2_OR_JVMCI 1346 int max_vector_size = 0; 1347 if (UseAVX == 0 || !os_supports_avx_vectors()) { 1348 // 16 byte vectors (in XMM) are supported with SSE2+ 1349 max_vector_size = 16; 1350 } else if (UseAVX == 1 || UseAVX == 2) { 1351 // 32 bytes vectors (in YMM) are only supported with AVX+ 1352 max_vector_size = 32; 1353 } else if (UseAVX > 2) { 1354 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1355 max_vector_size = 64; 1356 } 1357 1358 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1359 1360 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1361 if (MaxVectorSize < min_vector_size) { 1362 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1363 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1364 } 1365 if (MaxVectorSize > max_vector_size) { 1366 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1367 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1368 } 1369 if (!is_power_of_2(MaxVectorSize)) { 1370 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1371 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1372 } 1373 } else { 1374 // If default, use highest supported configuration 1375 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1376 } 1377 1378 #if defined(COMPILER2) && defined(ASSERT) 1379 if (MaxVectorSize > 0) { 1380 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1381 tty->print_cr("State of YMM registers after signal handle:"); 1382 int nreg = 4; 1383 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1384 for (int i = 0; i < nreg; i++) { 1385 tty->print("YMM%s:", ymm_name[i]); 1386 for (int j = 7; j >=0; j--) { 1387 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1388 } 1389 tty->cr(); 1390 } 1391 } 1392 } 1393 #endif // COMPILER2 && ASSERT 1394 1395 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1396 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1397 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1398 } 1399 } else if (UsePoly1305Intrinsics) { 1400 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1401 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1402 } 1403 1404 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1405 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1406 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1407 } 1408 } else if (UseIntPolyIntrinsics) { 1409 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1410 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1411 } 1412 1413 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1414 UseMultiplyToLenIntrinsic = true; 1415 } 1416 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1417 UseSquareToLenIntrinsic = true; 1418 } 1419 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1420 UseMulAddIntrinsic = true; 1421 } 1422 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1423 UseMontgomeryMultiplyIntrinsic = true; 1424 } 1425 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1426 UseMontgomerySquareIntrinsic = true; 1427 } 1428 #endif // COMPILER2_OR_JVMCI 1429 1430 // On new cpus instructions which update whole XMM register should be used 1431 // to prevent partial register stall due to dependencies on high half. 1432 // 1433 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1434 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1435 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1436 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1437 1438 1439 if (is_zx()) { // ZX cpus specific settings 1440 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1441 UseStoreImmI16 = false; // don't use it on ZX cpus 1442 } 1443 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1444 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1445 // Use it on all ZX cpus 1446 UseAddressNop = true; 1447 } 1448 } 1449 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1450 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1451 } 1452 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1453 if (supports_sse3()) { 1454 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1455 } else { 1456 UseXmmRegToRegMoveAll = false; 1457 } 1458 } 1459 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1460 #ifdef COMPILER2 1461 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1462 // For new ZX cpus do the next optimization: 1463 // don't align the beginning of a loop if there are enough instructions 1464 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1465 // in current fetch line (OptoLoopAlignment) or the padding 1466 // is big (> MaxLoopPad). 1467 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1468 // generated NOP instructions. 11 is the largest size of one 1469 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1470 MaxLoopPad = 11; 1471 } 1472 #endif // COMPILER2 1473 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1474 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1475 } 1476 if (supports_sse4_2()) { // new ZX cpus 1477 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1478 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1479 } 1480 } 1481 } 1482 1483 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1484 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1485 } 1486 } 1487 1488 if (is_amd_family()) { // AMD cpus specific settings 1489 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1490 // Use it on new AMD cpus starting from Opteron. 1491 UseAddressNop = true; 1492 } 1493 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1494 // Use it on new AMD cpus starting from Opteron. 1495 UseNewLongLShift = true; 1496 } 1497 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1498 if (supports_sse4a()) { 1499 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1500 } else { 1501 UseXmmLoadAndClearUpper = false; 1502 } 1503 } 1504 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1505 if (supports_sse4a()) { 1506 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1507 } else { 1508 UseXmmRegToRegMoveAll = false; 1509 } 1510 } 1511 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1512 if (supports_sse4a()) { 1513 UseXmmI2F = true; 1514 } else { 1515 UseXmmI2F = false; 1516 } 1517 } 1518 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1519 if (supports_sse4a()) { 1520 UseXmmI2D = true; 1521 } else { 1522 UseXmmI2D = false; 1523 } 1524 } 1525 1526 // some defaults for AMD family 15h 1527 if (cpu_family() == 0x15) { 1528 // On family 15h processors default is no sw prefetch 1529 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1530 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1531 } 1532 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1533 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1534 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1535 } 1536 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1537 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1538 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1539 } 1540 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1541 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1542 } 1543 } 1544 1545 #ifdef COMPILER2 1546 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1547 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1548 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1549 } 1550 #endif // COMPILER2 1551 1552 // Some defaults for AMD family >= 17h && Hygon family 18h 1553 if (cpu_family() >= 0x17) { 1554 // On family >=17h processors use XMM and UnalignedLoadStores 1555 // for Array Copy 1556 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1557 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1558 } 1559 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1560 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1561 } 1562 #ifdef COMPILER2 1563 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1564 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1565 } 1566 #endif 1567 } 1568 } 1569 1570 if (is_intel()) { // Intel cpus specific settings 1571 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1572 UseStoreImmI16 = false; // don't use it on Intel cpus 1573 } 1574 if (is_intel_server_family() || cpu_family() == 15) { 1575 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1576 // Use it on all Intel cpus starting from PentiumPro 1577 UseAddressNop = true; 1578 } 1579 } 1580 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1581 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1582 } 1583 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1584 if (supports_sse3()) { 1585 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1586 } else { 1587 UseXmmRegToRegMoveAll = false; 1588 } 1589 } 1590 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus 1591 #ifdef COMPILER2 1592 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1593 // For new Intel cpus do the next optimization: 1594 // don't align the beginning of a loop if there are enough instructions 1595 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1596 // in current fetch line (OptoLoopAlignment) or the padding 1597 // is big (> MaxLoopPad). 1598 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1599 // generated NOP instructions. 11 is the largest size of one 1600 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1601 MaxLoopPad = 11; 1602 } 1603 #endif // COMPILER2 1604 1605 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1606 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1607 } 1608 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1609 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1610 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1611 } 1612 } 1613 } 1614 if (is_atom_family() || is_knights_family()) { 1615 #ifdef COMPILER2 1616 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1617 OptoScheduling = true; 1618 } 1619 #endif 1620 if (supports_sse4_2()) { // Silvermont 1621 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1622 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1623 } 1624 } 1625 if (FLAG_IS_DEFAULT(UseIncDec)) { 1626 FLAG_SET_DEFAULT(UseIncDec, false); 1627 } 1628 } 1629 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1630 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1631 } 1632 #ifdef COMPILER2 1633 if (UseAVX > 2) { 1634 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1635 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1636 ArrayOperationPartialInlineSize != 0 && 1637 ArrayOperationPartialInlineSize != 16 && 1638 ArrayOperationPartialInlineSize != 32 && 1639 ArrayOperationPartialInlineSize != 64)) { 1640 int inline_size = 0; 1641 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1642 inline_size = 64; 1643 } else if (MaxVectorSize >= 32) { 1644 inline_size = 32; 1645 } else if (MaxVectorSize >= 16) { 1646 inline_size = 16; 1647 } 1648 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1649 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1650 } 1651 ArrayOperationPartialInlineSize = inline_size; 1652 } 1653 1654 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1655 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1656 if (ArrayOperationPartialInlineSize) { 1657 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1658 } else { 1659 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1660 } 1661 } 1662 } 1663 #endif 1664 } 1665 1666 #ifdef COMPILER2 1667 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1668 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) { 1669 OptimizeFill = false; 1670 } 1671 } 1672 #endif 1673 if (supports_sse4_2()) { 1674 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1675 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1676 } 1677 } else { 1678 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1679 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1680 } 1681 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1682 } 1683 if (UseSSE42Intrinsics) { 1684 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1685 UseVectorizedMismatchIntrinsic = true; 1686 } 1687 } else if (UseVectorizedMismatchIntrinsic) { 1688 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1689 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1690 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1691 } 1692 if (UseAVX >= 2) { 1693 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1694 } else if (UseVectorizedHashCodeIntrinsic) { 1695 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1696 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1697 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1698 } 1699 1700 // Use count leading zeros count instruction if available. 1701 if (supports_lzcnt()) { 1702 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1703 UseCountLeadingZerosInstruction = true; 1704 } 1705 } else if (UseCountLeadingZerosInstruction) { 1706 warning("lzcnt instruction is not available on this CPU"); 1707 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1708 } 1709 1710 // Use count trailing zeros instruction if available 1711 if (supports_bmi1()) { 1712 // tzcnt does not require VEX prefix 1713 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1714 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1715 // Don't use tzcnt if BMI1 is switched off on command line. 1716 UseCountTrailingZerosInstruction = false; 1717 } else { 1718 UseCountTrailingZerosInstruction = true; 1719 } 1720 } 1721 } else if (UseCountTrailingZerosInstruction) { 1722 warning("tzcnt instruction is not available on this CPU"); 1723 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1724 } 1725 1726 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1727 // VEX prefix is generated only when AVX > 0. 1728 if (supports_bmi1() && supports_avx()) { 1729 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1730 UseBMI1Instructions = true; 1731 } 1732 } else if (UseBMI1Instructions) { 1733 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1734 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1735 } 1736 1737 if (supports_bmi2() && supports_avx()) { 1738 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1739 UseBMI2Instructions = true; 1740 } 1741 } else if (UseBMI2Instructions) { 1742 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1743 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1744 } 1745 1746 // Use population count instruction if available. 1747 if (supports_popcnt()) { 1748 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1749 UsePopCountInstruction = true; 1750 } 1751 } else if (UsePopCountInstruction) { 1752 warning("POPCNT instruction is not available on this CPU"); 1753 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1754 } 1755 1756 // Use fast-string operations if available. 1757 if (supports_erms()) { 1758 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1759 UseFastStosb = true; 1760 } 1761 } else if (UseFastStosb) { 1762 warning("fast-string operations are not available on this CPU"); 1763 FLAG_SET_DEFAULT(UseFastStosb, false); 1764 } 1765 1766 // For AMD Processors use XMM/YMM MOVDQU instructions 1767 // for Object Initialization as default 1768 if (is_amd() && cpu_family() >= 0x19) { 1769 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1770 UseFastStosb = false; 1771 } 1772 } 1773 1774 #ifdef COMPILER2 1775 if (is_intel() && MaxVectorSize > 16) { 1776 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1777 UseFastStosb = false; 1778 } 1779 } 1780 #endif 1781 1782 // Use XMM/YMM MOVDQU instruction for Object Initialization 1783 if (!UseFastStosb && UseUnalignedLoadStores) { 1784 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1785 UseXMMForObjInit = true; 1786 } 1787 } else if (UseXMMForObjInit) { 1788 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1789 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1790 } 1791 1792 #ifdef COMPILER2 1793 if (FLAG_IS_DEFAULT(AlignVector)) { 1794 // Modern processors allow misaligned memory operations for vectors. 1795 AlignVector = !UseUnalignedLoadStores; 1796 } 1797 #endif // COMPILER2 1798 1799 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1800 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1801 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1802 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1803 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1804 } 1805 } 1806 1807 // Allocation prefetch settings 1808 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1809 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1810 (cache_line_size > AllocatePrefetchStepSize)) { 1811 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1812 } 1813 1814 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1815 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1816 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1817 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1818 } 1819 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1820 } 1821 1822 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1823 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1824 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1825 } 1826 1827 if (is_intel() && is_intel_server_family() && supports_sse3()) { 1828 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1829 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1830 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1831 } 1832 #ifdef COMPILER2 1833 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1834 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1835 } 1836 #endif 1837 } 1838 1839 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1840 #ifdef COMPILER2 1841 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1842 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1843 } 1844 #endif 1845 } 1846 1847 // Prefetch settings 1848 1849 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1850 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1851 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1852 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1853 1854 // gc copy/scan is disabled if prefetchw isn't supported, because 1855 // Prefetch::write emits an inlined prefetchw on Linux. 1856 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1857 // The used prefetcht0 instruction works for both amd64 and em64t. 1858 1859 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1860 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1861 } 1862 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1863 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1864 } 1865 1866 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1867 (cache_line_size > ContendedPaddingWidth)) 1868 ContendedPaddingWidth = cache_line_size; 1869 1870 // This machine allows unaligned memory accesses 1871 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1872 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1873 } 1874 1875 #ifndef PRODUCT 1876 if (log_is_enabled(Info, os, cpu)) { 1877 LogStream ls(Log(os, cpu)::info()); 1878 outputStream* log = &ls; 1879 log->print_cr("Logical CPUs per core: %u", 1880 logical_processors_per_package()); 1881 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1882 log->print("UseSSE=%d", UseSSE); 1883 if (UseAVX > 0) { 1884 log->print(" UseAVX=%d", UseAVX); 1885 } 1886 if (UseAES) { 1887 log->print(" UseAES=1"); 1888 } 1889 #ifdef COMPILER2 1890 if (MaxVectorSize > 0) { 1891 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1892 } 1893 #endif 1894 log->cr(); 1895 log->print("Allocation"); 1896 if (AllocatePrefetchStyle <= 0) { 1897 log->print_cr(": no prefetching"); 1898 } else { 1899 log->print(" prefetching: "); 1900 if (AllocatePrefetchInstr == 0) { 1901 log->print("PREFETCHNTA"); 1902 } else if (AllocatePrefetchInstr == 1) { 1903 log->print("PREFETCHT0"); 1904 } else if (AllocatePrefetchInstr == 2) { 1905 log->print("PREFETCHT2"); 1906 } else if (AllocatePrefetchInstr == 3) { 1907 log->print("PREFETCHW"); 1908 } 1909 if (AllocatePrefetchLines > 1) { 1910 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1911 } else { 1912 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1913 } 1914 } 1915 1916 if (PrefetchCopyIntervalInBytes > 0) { 1917 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1918 } 1919 if (PrefetchScanIntervalInBytes > 0) { 1920 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1921 } 1922 if (ContendedPaddingWidth > 0) { 1923 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1924 } 1925 } 1926 #endif // !PRODUCT 1927 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1928 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1929 } 1930 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1931 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1932 } 1933 } 1934 1935 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1936 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1937 if (vrt == XenHVM) { 1938 st->print_cr("Xen hardware-assisted virtualization detected"); 1939 } else if (vrt == KVM) { 1940 st->print_cr("KVM virtualization detected"); 1941 } else if (vrt == VMWare) { 1942 st->print_cr("VMWare virtualization detected"); 1943 VirtualizationSupport::print_virtualization_info(st); 1944 } else if (vrt == HyperV) { 1945 st->print_cr("Hyper-V virtualization detected"); 1946 } else if (vrt == HyperVRole) { 1947 st->print_cr("Hyper-V role detected"); 1948 } 1949 } 1950 1951 bool VM_Version::compute_has_intel_jcc_erratum() { 1952 if (!is_intel_family_core()) { 1953 // Only Intel CPUs are affected. 1954 return false; 1955 } 1956 // The following table of affected CPUs is based on the following document released by Intel: 1957 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1958 switch (_model) { 1959 case 0x8E: 1960 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1961 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1962 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1963 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1964 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1965 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1966 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1967 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1968 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1969 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1970 case 0x4E: 1971 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1972 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1973 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1974 return _stepping == 0x3; 1975 case 0x55: 1976 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1977 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1978 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1979 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1980 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1981 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1982 return _stepping == 0x4 || _stepping == 0x7; 1983 case 0x5E: 1984 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1985 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1986 return _stepping == 0x3; 1987 case 0x9E: 1988 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 1989 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 1990 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 1991 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 1992 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 1993 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 1994 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 1995 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 1996 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 1997 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 1998 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 1999 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2000 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2001 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2002 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2003 case 0xA5: 2004 // Not in Intel documentation. 2005 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2006 return true; 2007 case 0xA6: 2008 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2009 return _stepping == 0x0; 2010 case 0xAE: 2011 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2012 return _stepping == 0xA; 2013 default: 2014 // If we are running on another intel machine not recognized in the table, we are okay. 2015 return false; 2016 } 2017 } 2018 2019 // On Xen, the cpuid instruction returns 2020 // eax / registers[0]: Version of Xen 2021 // ebx / registers[1]: chars 'XenV' 2022 // ecx / registers[2]: chars 'MMXe' 2023 // edx / registers[3]: chars 'nVMM' 2024 // 2025 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2026 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2027 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2028 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2029 // 2030 // more information : 2031 // https://kb.vmware.com/s/article/1009458 2032 // 2033 void VM_Version::check_virtualizations() { 2034 uint32_t registers[4] = {0}; 2035 char signature[13] = {0}; 2036 2037 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2038 // from 0x40000000 until 0x40010000. 2039 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2040 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2041 detect_virt_stub(leaf, registers); 2042 memcpy(signature, ®isters[1], 12); 2043 2044 if (strncmp("VMwareVMware", signature, 12) == 0) { 2045 Abstract_VM_Version::_detected_virtualization = VMWare; 2046 // check for extended metrics from guestlib 2047 VirtualizationSupport::initialize(); 2048 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2049 Abstract_VM_Version::_detected_virtualization = HyperV; 2050 #ifdef _WINDOWS 2051 // CPUID leaf 0x40000007 is available to the root partition only. 2052 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2053 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2054 detect_virt_stub(0x40000007, registers); 2055 if ((registers[0] != 0x0) || 2056 (registers[1] != 0x0) || 2057 (registers[2] != 0x0) || 2058 (registers[3] != 0x0)) { 2059 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2060 } 2061 #endif 2062 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2063 Abstract_VM_Version::_detected_virtualization = KVM; 2064 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2065 Abstract_VM_Version::_detected_virtualization = XenHVM; 2066 } 2067 } 2068 } 2069 2070 #ifdef COMPILER2 2071 // Determine if it's running on Cascade Lake using default options. 2072 bool VM_Version::is_default_intel_cascade_lake() { 2073 return FLAG_IS_DEFAULT(UseAVX) && 2074 FLAG_IS_DEFAULT(MaxVectorSize) && 2075 UseAVX > 2 && 2076 is_intel_cascade_lake(); 2077 } 2078 #endif 2079 2080 bool VM_Version::is_intel_cascade_lake() { 2081 return is_intel_skylake() && _stepping >= 5; 2082 } 2083 2084 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2085 // for implementing the array copy and clear operations. 2086 // The Intel platforms that supports the serialize instruction 2087 // has improved implementation of 64-byte load/stores and so the default 2088 // threshold is set to 0 for these platforms. 2089 int VM_Version::avx3_threshold() { 2090 return (is_intel_server_family() && 2091 supports_serialize() && 2092 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2093 } 2094 2095 void VM_Version::clear_apx_test_state() { 2096 clear_apx_test_state_stub(); 2097 } 2098 2099 static bool _vm_version_initialized = false; 2100 2101 void VM_Version::initialize() { 2102 ResourceMark rm; 2103 2104 // Making this stub must be FIRST use of assembler 2105 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2106 if (stub_blob == nullptr) { 2107 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2108 } 2109 CodeBuffer c(stub_blob); 2110 VM_Version_StubGenerator g(&c); 2111 2112 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2113 g.generate_get_cpu_info()); 2114 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2115 g.generate_detect_virt()); 2116 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2117 g.clear_apx_test_state()); 2118 get_processor_features(); 2119 2120 Assembler::precompute_instructions(); 2121 2122 if (VM_Version::supports_hv()) { // Supports hypervisor 2123 check_virtualizations(); 2124 } 2125 _vm_version_initialized = true; 2126 } 2127 2128 typedef enum { 2129 CPU_FAMILY_8086_8088 = 0, 2130 CPU_FAMILY_INTEL_286 = 2, 2131 CPU_FAMILY_INTEL_386 = 3, 2132 CPU_FAMILY_INTEL_486 = 4, 2133 CPU_FAMILY_PENTIUM = 5, 2134 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2135 CPU_FAMILY_PENTIUM_4 = 0xF 2136 } FamilyFlag; 2137 2138 typedef enum { 2139 RDTSCP_FLAG = 0x08000000, // bit 27 2140 INTEL64_FLAG = 0x20000000 // bit 29 2141 } _featureExtendedEdxFlag; 2142 2143 typedef enum { 2144 FPU_FLAG = 0x00000001, 2145 VME_FLAG = 0x00000002, 2146 DE_FLAG = 0x00000004, 2147 PSE_FLAG = 0x00000008, 2148 TSC_FLAG = 0x00000010, 2149 MSR_FLAG = 0x00000020, 2150 PAE_FLAG = 0x00000040, 2151 MCE_FLAG = 0x00000080, 2152 CX8_FLAG = 0x00000100, 2153 APIC_FLAG = 0x00000200, 2154 SEP_FLAG = 0x00000800, 2155 MTRR_FLAG = 0x00001000, 2156 PGE_FLAG = 0x00002000, 2157 MCA_FLAG = 0x00004000, 2158 CMOV_FLAG = 0x00008000, 2159 PAT_FLAG = 0x00010000, 2160 PSE36_FLAG = 0x00020000, 2161 PSNUM_FLAG = 0x00040000, 2162 CLFLUSH_FLAG = 0x00080000, 2163 DTS_FLAG = 0x00200000, 2164 ACPI_FLAG = 0x00400000, 2165 MMX_FLAG = 0x00800000, 2166 FXSR_FLAG = 0x01000000, 2167 SSE_FLAG = 0x02000000, 2168 SSE2_FLAG = 0x04000000, 2169 SS_FLAG = 0x08000000, 2170 HTT_FLAG = 0x10000000, 2171 TM_FLAG = 0x20000000 2172 } FeatureEdxFlag; 2173 2174 static BufferBlob* cpuid_brand_string_stub_blob; 2175 static const int cpuid_brand_string_stub_size = 550; 2176 2177 extern "C" { 2178 typedef void (*getCPUIDBrandString_stub_t)(void*); 2179 } 2180 2181 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2182 2183 // VM_Version statics 2184 enum { 2185 ExtendedFamilyIdLength_INTEL = 16, 2186 ExtendedFamilyIdLength_AMD = 24 2187 }; 2188 2189 const size_t VENDOR_LENGTH = 13; 2190 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2191 static char* _cpu_brand_string = nullptr; 2192 static int64_t _max_qualified_cpu_frequency = 0; 2193 2194 static int _no_of_threads = 0; 2195 static int _no_of_cores = 0; 2196 2197 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2198 "8086/8088", 2199 "", 2200 "286", 2201 "386", 2202 "486", 2203 "Pentium", 2204 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2205 "", 2206 "", 2207 "", 2208 "", 2209 "", 2210 "", 2211 "", 2212 "", 2213 "Pentium 4" 2214 }; 2215 2216 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2217 "", 2218 "", 2219 "", 2220 "", 2221 "5x86", 2222 "K5/K6", 2223 "Athlon/AthlonXP", 2224 "", 2225 "", 2226 "", 2227 "", 2228 "", 2229 "", 2230 "", 2231 "", 2232 "Opteron/Athlon64", 2233 "Opteron QC/Phenom", // Barcelona et.al. 2234 "", 2235 "", 2236 "", 2237 "", 2238 "", 2239 "", 2240 "Zen" 2241 }; 2242 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2243 // September 2013, Vol 3C Table 35-1 2244 const char* const _model_id_pentium_pro[] = { 2245 "", 2246 "Pentium Pro", 2247 "", 2248 "Pentium II model 3", 2249 "", 2250 "Pentium II model 5/Xeon/Celeron", 2251 "Celeron", 2252 "Pentium III/Pentium III Xeon", 2253 "Pentium III/Pentium III Xeon", 2254 "Pentium M model 9", // Yonah 2255 "Pentium III, model A", 2256 "Pentium III, model B", 2257 "", 2258 "Pentium M model D", // Dothan 2259 "", 2260 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2261 "", 2262 "", 2263 "", 2264 "", 2265 "", 2266 "", 2267 "Celeron", // 0x16 Celeron 65nm 2268 "Core 2", // 0x17 Penryn / Harpertown 2269 "", 2270 "", 2271 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2272 "Atom", // 0x1B Z5xx series Silverthorn 2273 "", 2274 "Core 2", // 0x1D Dunnington (6-core) 2275 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2276 "", 2277 "", 2278 "", 2279 "", 2280 "", 2281 "", 2282 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2283 "", 2284 "", 2285 "", // 0x28 2286 "", 2287 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2288 "", 2289 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2290 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2291 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2292 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2293 "", 2294 "", 2295 "", 2296 "", 2297 "", 2298 "", 2299 "", 2300 "", 2301 "", 2302 "", 2303 "Ivy Bridge", // 0x3a 2304 "", 2305 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2306 "", // 0x3d "Next Generation Intel Core Processor" 2307 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2308 "", // 0x3f "Future Generation Intel Xeon Processor" 2309 "", 2310 "", 2311 "", 2312 "", 2313 "", 2314 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2315 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2316 nullptr 2317 }; 2318 2319 /* Brand ID is for back compatibility 2320 * Newer CPUs uses the extended brand string */ 2321 const char* const _brand_id[] = { 2322 "", 2323 "Celeron processor", 2324 "Pentium III processor", 2325 "Intel Pentium III Xeon processor", 2326 "", 2327 "", 2328 "", 2329 "", 2330 "Intel Pentium 4 processor", 2331 nullptr 2332 }; 2333 2334 2335 const char* const _feature_edx_id[] = { 2336 "On-Chip FPU", 2337 "Virtual Mode Extensions", 2338 "Debugging Extensions", 2339 "Page Size Extensions", 2340 "Time Stamp Counter", 2341 "Model Specific Registers", 2342 "Physical Address Extension", 2343 "Machine Check Exceptions", 2344 "CMPXCHG8B Instruction", 2345 "On-Chip APIC", 2346 "", 2347 "Fast System Call", 2348 "Memory Type Range Registers", 2349 "Page Global Enable", 2350 "Machine Check Architecture", 2351 "Conditional Mov Instruction", 2352 "Page Attribute Table", 2353 "36-bit Page Size Extension", 2354 "Processor Serial Number", 2355 "CLFLUSH Instruction", 2356 "", 2357 "Debug Trace Store feature", 2358 "ACPI registers in MSR space", 2359 "Intel Architecture MMX Technology", 2360 "Fast Float Point Save and Restore", 2361 "Streaming SIMD extensions", 2362 "Streaming SIMD extensions 2", 2363 "Self-Snoop", 2364 "Hyper Threading", 2365 "Thermal Monitor", 2366 "", 2367 "Pending Break Enable" 2368 }; 2369 2370 const char* const _feature_extended_edx_id[] = { 2371 "", 2372 "", 2373 "", 2374 "", 2375 "", 2376 "", 2377 "", 2378 "", 2379 "", 2380 "", 2381 "", 2382 "SYSCALL/SYSRET", 2383 "", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "", 2389 "", 2390 "", 2391 "Execute Disable Bit", 2392 "", 2393 "", 2394 "", 2395 "", 2396 "", 2397 "", 2398 "RDTSCP", 2399 "", 2400 "Intel 64 Architecture", 2401 "", 2402 "" 2403 }; 2404 2405 const char* const _feature_ecx_id[] = { 2406 "Streaming SIMD Extensions 3", 2407 "PCLMULQDQ", 2408 "64-bit DS Area", 2409 "MONITOR/MWAIT instructions", 2410 "CPL Qualified Debug Store", 2411 "Virtual Machine Extensions", 2412 "Safer Mode Extensions", 2413 "Enhanced Intel SpeedStep technology", 2414 "Thermal Monitor 2", 2415 "Supplemental Streaming SIMD Extensions 3", 2416 "L1 Context ID", 2417 "", 2418 "Fused Multiply-Add", 2419 "CMPXCHG16B", 2420 "xTPR Update Control", 2421 "Perfmon and Debug Capability", 2422 "", 2423 "Process-context identifiers", 2424 "Direct Cache Access", 2425 "Streaming SIMD extensions 4.1", 2426 "Streaming SIMD extensions 4.2", 2427 "x2APIC", 2428 "MOVBE", 2429 "Popcount instruction", 2430 "TSC-Deadline", 2431 "AESNI", 2432 "XSAVE", 2433 "OSXSAVE", 2434 "AVX", 2435 "F16C", 2436 "RDRAND", 2437 "" 2438 }; 2439 2440 const char* const _feature_extended_ecx_id[] = { 2441 "LAHF/SAHF instruction support", 2442 "Core multi-processor legacy mode", 2443 "", 2444 "", 2445 "", 2446 "Advanced Bit Manipulations: LZCNT", 2447 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2448 "Misaligned SSE mode", 2449 "", 2450 "", 2451 "", 2452 "", 2453 "", 2454 "", 2455 "", 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "" 2473 }; 2474 2475 void VM_Version::initialize_tsc(void) { 2476 ResourceMark rm; 2477 2478 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2479 if (cpuid_brand_string_stub_blob == nullptr) { 2480 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2481 } 2482 CodeBuffer c(cpuid_brand_string_stub_blob); 2483 VM_Version_StubGenerator g(&c); 2484 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2485 g.generate_getCPUIDBrandString()); 2486 } 2487 2488 const char* VM_Version::cpu_model_description(void) { 2489 uint32_t cpu_family = extended_cpu_family(); 2490 uint32_t cpu_model = extended_cpu_model(); 2491 const char* model = nullptr; 2492 2493 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2494 for (uint32_t i = 0; i <= cpu_model; i++) { 2495 model = _model_id_pentium_pro[i]; 2496 if (model == nullptr) { 2497 break; 2498 } 2499 } 2500 } 2501 return model; 2502 } 2503 2504 const char* VM_Version::cpu_brand_string(void) { 2505 if (_cpu_brand_string == nullptr) { 2506 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2507 if (nullptr == _cpu_brand_string) { 2508 return nullptr; 2509 } 2510 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2511 if (ret_val != OS_OK) { 2512 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2513 _cpu_brand_string = nullptr; 2514 } 2515 } 2516 return _cpu_brand_string; 2517 } 2518 2519 const char* VM_Version::cpu_brand(void) { 2520 const char* brand = nullptr; 2521 2522 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2523 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2524 brand = _brand_id[0]; 2525 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2526 brand = _brand_id[i]; 2527 } 2528 } 2529 return brand; 2530 } 2531 2532 bool VM_Version::cpu_is_em64t(void) { 2533 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2534 } 2535 2536 bool VM_Version::is_netburst(void) { 2537 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2538 } 2539 2540 bool VM_Version::supports_tscinv_ext(void) { 2541 if (!supports_tscinv_bit()) { 2542 return false; 2543 } 2544 2545 if (is_intel()) { 2546 return true; 2547 } 2548 2549 if (is_amd()) { 2550 return !is_amd_Barcelona(); 2551 } 2552 2553 if (is_hygon()) { 2554 return true; 2555 } 2556 2557 return false; 2558 } 2559 2560 void VM_Version::resolve_cpu_information_details(void) { 2561 2562 // in future we want to base this information on proper cpu 2563 // and cache topology enumeration such as: 2564 // Intel 64 Architecture Processor Topology Enumeration 2565 // which supports system cpu and cache topology enumeration 2566 // either using 2xAPICIDs or initial APICIDs 2567 2568 // currently only rough cpu information estimates 2569 // which will not necessarily reflect the exact configuration of the system 2570 2571 // this is the number of logical hardware threads 2572 // visible to the operating system 2573 _no_of_threads = os::processor_count(); 2574 2575 // find out number of threads per cpu package 2576 int threads_per_package = threads_per_core() * cores_per_cpu(); 2577 2578 // use amount of threads visible to the process in order to guess number of sockets 2579 _no_of_sockets = _no_of_threads / threads_per_package; 2580 2581 // process might only see a subset of the total number of threads 2582 // from a single processor package. Virtualization/resource management for example. 2583 // If so then just write a hard 1 as num of pkgs. 2584 if (0 == _no_of_sockets) { 2585 _no_of_sockets = 1; 2586 } 2587 2588 // estimate the number of cores 2589 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2590 } 2591 2592 2593 const char* VM_Version::cpu_family_description(void) { 2594 int cpu_family_id = extended_cpu_family(); 2595 if (is_amd()) { 2596 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2597 return _family_id_amd[cpu_family_id]; 2598 } 2599 } 2600 if (is_intel()) { 2601 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2602 return cpu_model_description(); 2603 } 2604 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2605 return _family_id_intel[cpu_family_id]; 2606 } 2607 } 2608 if (is_hygon()) { 2609 return "Dhyana"; 2610 } 2611 return "Unknown x86"; 2612 } 2613 2614 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2615 assert(buf != nullptr, "buffer is null!"); 2616 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2617 2618 const char* cpu_type = nullptr; 2619 const char* x64 = nullptr; 2620 2621 if (is_intel()) { 2622 cpu_type = "Intel"; 2623 x64 = cpu_is_em64t() ? " Intel64" : ""; 2624 } else if (is_amd()) { 2625 cpu_type = "AMD"; 2626 x64 = cpu_is_em64t() ? " AMD64" : ""; 2627 } else if (is_hygon()) { 2628 cpu_type = "Hygon"; 2629 x64 = cpu_is_em64t() ? " AMD64" : ""; 2630 } else { 2631 cpu_type = "Unknown x86"; 2632 x64 = cpu_is_em64t() ? " x86_64" : ""; 2633 } 2634 2635 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2636 cpu_type, 2637 cpu_family_description(), 2638 supports_ht() ? " (HT)" : "", 2639 supports_sse3() ? " SSE3" : "", 2640 supports_ssse3() ? " SSSE3" : "", 2641 supports_sse4_1() ? " SSE4.1" : "", 2642 supports_sse4_2() ? " SSE4.2" : "", 2643 supports_sse4a() ? " SSE4A" : "", 2644 is_netburst() ? " Netburst" : "", 2645 is_intel_family_core() ? " Core" : "", 2646 x64); 2647 2648 return OS_OK; 2649 } 2650 2651 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2652 assert(buf != nullptr, "buffer is null!"); 2653 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2654 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2655 2656 // invoke newly generated asm code to fetch CPU Brand String 2657 getCPUIDBrandString_stub(&_cpuid_info); 2658 2659 // fetch results into buffer 2660 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2661 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2662 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2663 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2664 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2665 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2666 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2667 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2668 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2669 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2670 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2671 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2672 2673 return OS_OK; 2674 } 2675 2676 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2677 guarantee(buf != nullptr, "buffer is null!"); 2678 guarantee(buf_len > 0, "buffer len not enough!"); 2679 2680 unsigned int flag = 0; 2681 unsigned int fi = 0; 2682 size_t written = 0; 2683 const char* prefix = ""; 2684 2685 #define WRITE_TO_BUF(string) \ 2686 { \ 2687 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2688 if (res < 0) { \ 2689 return buf_len - 1; \ 2690 } \ 2691 written += res; \ 2692 if (prefix[0] == '\0') { \ 2693 prefix = ", "; \ 2694 } \ 2695 } 2696 2697 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2698 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2699 continue; /* no hyperthreading */ 2700 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2701 continue; /* no fast system call */ 2702 } 2703 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2704 WRITE_TO_BUF(_feature_edx_id[fi]); 2705 } 2706 } 2707 2708 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2709 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2710 WRITE_TO_BUF(_feature_ecx_id[fi]); 2711 } 2712 } 2713 2714 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2715 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2716 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2717 } 2718 } 2719 2720 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2721 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2722 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2723 } 2724 } 2725 2726 if (supports_tscinv_bit()) { 2727 WRITE_TO_BUF("Invariant TSC"); 2728 } 2729 2730 return written; 2731 } 2732 2733 /** 2734 * Write a detailed description of the cpu to a given buffer, including 2735 * feature set. 2736 */ 2737 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2738 assert(buf != nullptr, "buffer is null!"); 2739 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2740 2741 static const char* unknown = "<unknown>"; 2742 char vendor_id[VENDOR_LENGTH]; 2743 const char* family = nullptr; 2744 const char* model = nullptr; 2745 const char* brand = nullptr; 2746 int outputLen = 0; 2747 2748 family = cpu_family_description(); 2749 if (family == nullptr) { 2750 family = unknown; 2751 } 2752 2753 model = cpu_model_description(); 2754 if (model == nullptr) { 2755 model = unknown; 2756 } 2757 2758 brand = cpu_brand_string(); 2759 2760 if (brand == nullptr) { 2761 brand = cpu_brand(); 2762 if (brand == nullptr) { 2763 brand = unknown; 2764 } 2765 } 2766 2767 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2768 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2769 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2770 vendor_id[VENDOR_LENGTH-1] = '\0'; 2771 2772 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2773 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2774 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2775 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2776 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2777 "Supports: ", 2778 brand, 2779 vendor_id, 2780 family, 2781 extended_cpu_family(), 2782 model, 2783 extended_cpu_model(), 2784 cpu_stepping(), 2785 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2786 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2787 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2788 _cpuid_info.std_cpuid1_eax.value, 2789 _cpuid_info.std_cpuid1_ebx.value, 2790 _cpuid_info.std_cpuid1_ecx.value, 2791 _cpuid_info.std_cpuid1_edx.value, 2792 _cpuid_info.ext_cpuid1_eax, 2793 _cpuid_info.ext_cpuid1_ebx, 2794 _cpuid_info.ext_cpuid1_ecx, 2795 _cpuid_info.ext_cpuid1_edx); 2796 2797 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2798 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2799 return OS_ERR; 2800 } 2801 2802 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2803 2804 return OS_OK; 2805 } 2806 2807 2808 // Fill in Abstract_VM_Version statics 2809 void VM_Version::initialize_cpu_information() { 2810 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2811 assert(!_initialized, "shouldn't be initialized yet"); 2812 resolve_cpu_information_details(); 2813 2814 // initialize cpu_name and cpu_desc 2815 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2816 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2817 _initialized = true; 2818 } 2819 2820 /** 2821 * For information about extracting the frequency from the cpu brand string, please see: 2822 * 2823 * Intel Processor Identification and the CPUID Instruction 2824 * Application Note 485 2825 * May 2012 2826 * 2827 * The return value is the frequency in Hz. 2828 */ 2829 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2830 const char* const brand_string = cpu_brand_string(); 2831 if (brand_string == nullptr) { 2832 return 0; 2833 } 2834 const int64_t MEGA = 1000000; 2835 int64_t multiplier = 0; 2836 int64_t frequency = 0; 2837 uint8_t idx = 0; 2838 // The brand string buffer is at most 48 bytes. 2839 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2840 for (; idx < 48-2; ++idx) { 2841 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2842 // Search brand string for "yHz" where y is M, G, or T. 2843 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2844 if (brand_string[idx] == 'M') { 2845 multiplier = MEGA; 2846 } else if (brand_string[idx] == 'G') { 2847 multiplier = MEGA * 1000; 2848 } else if (brand_string[idx] == 'T') { 2849 multiplier = MEGA * MEGA; 2850 } 2851 break; 2852 } 2853 } 2854 if (multiplier > 0) { 2855 // Compute frequency (in Hz) from brand string. 2856 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2857 frequency = (brand_string[idx-4] - '0') * multiplier; 2858 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2859 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2860 } else { // format is "xxxx" 2861 frequency = (brand_string[idx-4] - '0') * 1000; 2862 frequency += (brand_string[idx-3] - '0') * 100; 2863 frequency += (brand_string[idx-2] - '0') * 10; 2864 frequency += (brand_string[idx-1] - '0'); 2865 frequency *= multiplier; 2866 } 2867 } 2868 return frequency; 2869 } 2870 2871 2872 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2873 if (_max_qualified_cpu_frequency == 0) { 2874 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2875 } 2876 return _max_qualified_cpu_frequency; 2877 } 2878 2879 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const { 2880 VM_Features vm_features; 2881 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2882 vm_features.set_feature(CPU_CX8); 2883 if (std_cpuid1_edx.bits.cmov != 0) 2884 vm_features.set_feature(CPU_CMOV); 2885 if (std_cpuid1_edx.bits.clflush != 0) 2886 vm_features.set_feature(CPU_FLUSH); 2887 // clflush should always be available on x86_64 2888 // if not we are in real trouble because we rely on it 2889 // to flush the code cache. 2890 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available"); 2891 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2892 ext_cpuid1_edx.bits.fxsr != 0)) 2893 vm_features.set_feature(CPU_FXSR); 2894 // HT flag is set for multi-core processors also. 2895 if (threads_per_core() > 1) 2896 vm_features.set_feature(CPU_HT); 2897 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2898 ext_cpuid1_edx.bits.mmx != 0)) 2899 vm_features.set_feature(CPU_MMX); 2900 if (std_cpuid1_edx.bits.sse != 0) 2901 vm_features.set_feature(CPU_SSE); 2902 if (std_cpuid1_edx.bits.sse2 != 0) 2903 vm_features.set_feature(CPU_SSE2); 2904 if (std_cpuid1_ecx.bits.sse3 != 0) 2905 vm_features.set_feature(CPU_SSE3); 2906 if (std_cpuid1_ecx.bits.ssse3 != 0) 2907 vm_features.set_feature(CPU_SSSE3); 2908 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2909 vm_features.set_feature(CPU_SSE4_1); 2910 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2911 vm_features.set_feature(CPU_SSE4_2); 2912 if (std_cpuid1_ecx.bits.popcnt != 0) 2913 vm_features.set_feature(CPU_POPCNT); 2914 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 2915 xem_xcr0_eax.bits.apx_f != 0) { 2916 vm_features.set_feature(CPU_APX_F); 2917 } 2918 if (std_cpuid1_ecx.bits.avx != 0 && 2919 std_cpuid1_ecx.bits.osxsave != 0 && 2920 xem_xcr0_eax.bits.sse != 0 && 2921 xem_xcr0_eax.bits.ymm != 0) { 2922 vm_features.set_feature(CPU_AVX); 2923 vm_features.set_feature(CPU_VZEROUPPER); 2924 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 2925 vm_features.set_feature(CPU_SHA512); 2926 if (std_cpuid1_ecx.bits.f16c != 0) 2927 vm_features.set_feature(CPU_F16C); 2928 if (sef_cpuid7_ebx.bits.avx2 != 0) { 2929 vm_features.set_feature(CPU_AVX2); 2930 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 2931 vm_features.set_feature(CPU_AVX_IFMA); 2932 } 2933 if (sef_cpuid7_ecx.bits.gfni != 0) 2934 vm_features.set_feature(CPU_GFNI); 2935 if (sef_cpuid7_ebx.bits.avx512f != 0 && 2936 xem_xcr0_eax.bits.opmask != 0 && 2937 xem_xcr0_eax.bits.zmm512 != 0 && 2938 xem_xcr0_eax.bits.zmm32 != 0) { 2939 vm_features.set_feature(CPU_AVX512F); 2940 if (sef_cpuid7_ebx.bits.avx512cd != 0) 2941 vm_features.set_feature(CPU_AVX512CD); 2942 if (sef_cpuid7_ebx.bits.avx512dq != 0) 2943 vm_features.set_feature(CPU_AVX512DQ); 2944 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 2945 vm_features.set_feature(CPU_AVX512_IFMA); 2946 if (sef_cpuid7_ebx.bits.avx512pf != 0) 2947 vm_features.set_feature(CPU_AVX512PF); 2948 if (sef_cpuid7_ebx.bits.avx512er != 0) 2949 vm_features.set_feature(CPU_AVX512ER); 2950 if (sef_cpuid7_ebx.bits.avx512bw != 0) 2951 vm_features.set_feature(CPU_AVX512BW); 2952 if (sef_cpuid7_ebx.bits.avx512vl != 0) 2953 vm_features.set_feature(CPU_AVX512VL); 2954 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2955 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2956 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2957 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2958 if (sef_cpuid7_ecx.bits.vaes != 0) 2959 vm_features.set_feature(CPU_AVX512_VAES); 2960 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 2961 vm_features.set_feature(CPU_AVX512_VNNI); 2962 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2963 vm_features.set_feature(CPU_AVX512_BITALG); 2964 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2965 vm_features.set_feature(CPU_AVX512_VBMI); 2966 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2967 vm_features.set_feature(CPU_AVX512_VBMI2); 2968 } 2969 if (is_intel()) { 2970 if (sefsl1_cpuid7_edx.bits.avx10 != 0 && 2971 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 && 2972 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 && 2973 xem_xcr0_eax.bits.opmask != 0 && 2974 xem_xcr0_eax.bits.zmm512 != 0 && 2975 xem_xcr0_eax.bits.zmm32 != 0) { 2976 vm_features.set_feature(CPU_AVX10_1); 2977 vm_features.set_feature(CPU_AVX512F); 2978 vm_features.set_feature(CPU_AVX512CD); 2979 vm_features.set_feature(CPU_AVX512DQ); 2980 vm_features.set_feature(CPU_AVX512PF); 2981 vm_features.set_feature(CPU_AVX512ER); 2982 vm_features.set_feature(CPU_AVX512BW); 2983 vm_features.set_feature(CPU_AVX512VL); 2984 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2985 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2986 vm_features.set_feature(CPU_AVX512_VAES); 2987 vm_features.set_feature(CPU_AVX512_VNNI); 2988 vm_features.set_feature(CPU_AVX512_BITALG); 2989 vm_features.set_feature(CPU_AVX512_VBMI); 2990 vm_features.set_feature(CPU_AVX512_VBMI2); 2991 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) { 2992 vm_features.set_feature(CPU_AVX10_2); 2993 } 2994 } 2995 } 2996 } 2997 2998 if (std_cpuid1_ecx.bits.hv != 0) 2999 vm_features.set_feature(CPU_HV); 3000 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3001 vm_features.set_feature(CPU_BMI1); 3002 if (std_cpuid1_edx.bits.tsc != 0) 3003 vm_features.set_feature(CPU_TSC); 3004 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3005 vm_features.set_feature(CPU_TSCINV_BIT); 3006 if (std_cpuid1_ecx.bits.aes != 0) 3007 vm_features.set_feature(CPU_AES); 3008 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3009 vm_features.set_feature(CPU_LZCNT); 3010 if (ext_cpuid1_ecx.bits.prefetchw != 0) 3011 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3012 if (sef_cpuid7_ebx.bits.erms != 0) 3013 vm_features.set_feature(CPU_ERMS); 3014 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3015 vm_features.set_feature(CPU_FSRM); 3016 if (std_cpuid1_ecx.bits.clmul != 0) 3017 vm_features.set_feature(CPU_CLMUL); 3018 if (sef_cpuid7_ebx.bits.rtm != 0) 3019 vm_features.set_feature(CPU_RTM); 3020 if (sef_cpuid7_ebx.bits.adx != 0) 3021 vm_features.set_feature(CPU_ADX); 3022 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3023 vm_features.set_feature(CPU_BMI2); 3024 if (sef_cpuid7_ebx.bits.sha != 0) 3025 vm_features.set_feature(CPU_SHA); 3026 if (std_cpuid1_ecx.bits.fma != 0) 3027 vm_features.set_feature(CPU_FMA); 3028 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3029 vm_features.set_feature(CPU_FLUSHOPT); 3030 if (sef_cpuid7_ebx.bits.clwb != 0) 3031 vm_features.set_feature(CPU_CLWB); 3032 if (ext_cpuid1_edx.bits.rdtscp != 0) 3033 vm_features.set_feature(CPU_RDTSCP); 3034 if (sef_cpuid7_ecx.bits.rdpid != 0) 3035 vm_features.set_feature(CPU_RDPID); 3036 3037 // AMD|Hygon additional features. 3038 if (is_amd_family()) { 3039 // PREFETCHW was checked above, check TDNOW here. 3040 if ((ext_cpuid1_edx.bits.tdnow != 0)) 3041 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3042 if (ext_cpuid1_ecx.bits.sse4a != 0) 3043 vm_features.set_feature(CPU_SSE4A); 3044 } 3045 3046 // Intel additional features. 3047 if (is_intel()) { 3048 if (sef_cpuid7_edx.bits.serialize != 0) 3049 vm_features.set_feature(CPU_SERIALIZE); 3050 if (sef_cpuid7_edx.bits.hybrid != 0) 3051 vm_features.set_feature(CPU_HYBRID); 3052 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3053 vm_features.set_feature(CPU_AVX512_FP16); 3054 } 3055 3056 // ZX additional features. 3057 if (is_zx()) { 3058 // We do not know if these are supported by ZX, so we cannot trust 3059 // common CPUID bit for them. 3060 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?"); 3061 vm_features.clear_feature(CPU_CLWB); 3062 } 3063 3064 // Protection key features. 3065 if (sef_cpuid7_ecx.bits.pku != 0) { 3066 vm_features.set_feature(CPU_PKU); 3067 } 3068 if (sef_cpuid7_ecx.bits.ospke != 0) { 3069 vm_features.set_feature(CPU_OSPKE); 3070 } 3071 3072 // Control flow enforcement (CET) features. 3073 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3074 vm_features.set_feature(CPU_CET_SS); 3075 } 3076 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3077 vm_features.set_feature(CPU_CET_IBT); 3078 } 3079 3080 // Composite features. 3081 if (supports_tscinv_bit() && 3082 ((is_amd_family() && !is_amd_Barcelona()) || 3083 is_intel_tsc_synched_at_init())) { 3084 vm_features.set_feature(CPU_TSCINV); 3085 } 3086 return vm_features; 3087 } 3088 3089 bool VM_Version::os_supports_avx_vectors() { 3090 bool retVal = false; 3091 int nreg = 4; 3092 if (supports_evex()) { 3093 // Verify that OS save/restore all bits of EVEX registers 3094 // during signal processing. 3095 retVal = true; 3096 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3097 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3098 retVal = false; 3099 break; 3100 } 3101 } 3102 } else if (supports_avx()) { 3103 // Verify that OS save/restore all bits of AVX registers 3104 // during signal processing. 3105 retVal = true; 3106 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3107 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3108 retVal = false; 3109 break; 3110 } 3111 } 3112 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3113 if (retVal == false) { 3114 // Verify that OS save/restore all bits of EVEX registers 3115 // during signal processing. 3116 retVal = true; 3117 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3118 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3119 retVal = false; 3120 break; 3121 } 3122 } 3123 } 3124 } 3125 return retVal; 3126 } 3127 3128 bool VM_Version::os_supports_apx_egprs() { 3129 if (!supports_apx_f()) { 3130 return false; 3131 } 3132 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3133 _cpuid_info.apx_save[1] != egpr_test_value()) { 3134 return false; 3135 } 3136 return true; 3137 } 3138 3139 uint VM_Version::cores_per_cpu() { 3140 uint result = 1; 3141 if (is_intel()) { 3142 bool supports_topology = supports_processor_topology(); 3143 if (supports_topology) { 3144 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3145 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3146 } 3147 if (!supports_topology || result == 0) { 3148 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3149 } 3150 } else if (is_amd_family()) { 3151 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1; 3152 if (cpu_family() >= 0x17) { // Zen or later 3153 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3154 } 3155 } else if (is_zx()) { 3156 bool supports_topology = supports_processor_topology(); 3157 if (supports_topology) { 3158 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3159 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3160 } 3161 if (!supports_topology || result == 0) { 3162 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3163 } 3164 } 3165 return result; 3166 } 3167 3168 uint VM_Version::threads_per_core() { 3169 uint result = 1; 3170 if (is_intel() && supports_processor_topology()) { 3171 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3172 } else if (is_zx() && supports_processor_topology()) { 3173 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3174 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3175 if (cpu_family() >= 0x17) { 3176 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3177 } else { 3178 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3179 cores_per_cpu(); 3180 } 3181 } 3182 return (result == 0 ? 1 : result); 3183 } 3184 3185 uint VM_Version::L1_line_size() { 3186 uint result = 0; 3187 if (is_intel()) { 3188 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3189 } else if (is_amd_family()) { 3190 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3191 } else if (is_zx()) { 3192 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3193 } 3194 if (result < 32) // not defined ? 3195 result = 32; // 32 bytes by default on x86 and other x64 3196 return result; 3197 } 3198 3199 bool VM_Version::is_intel_tsc_synched_at_init() { 3200 if (is_intel_family_core()) { 3201 uint32_t ext_model = extended_cpu_model(); 3202 if (ext_model == CPU_MODEL_NEHALEM_EP || 3203 ext_model == CPU_MODEL_WESTMERE_EP || 3204 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3205 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3206 // <= 2-socket invariant tsc support. EX versions are usually used 3207 // in > 2-socket systems and likely don't synchronize tscs at 3208 // initialization. 3209 // Code that uses tsc values must be prepared for them to arbitrarily 3210 // jump forward or backward. 3211 return true; 3212 } 3213 } 3214 return false; 3215 } 3216 3217 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3218 // Hardware prefetching (distance/size in bytes): 3219 // Pentium 3 - 64 / 32 3220 // Pentium 4 - 256 / 128 3221 // Athlon - 64 / 32 ???? 3222 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3223 // Core - 128 / 64 3224 // 3225 // Software prefetching (distance in bytes / instruction with best score): 3226 // Pentium 3 - 128 / prefetchnta 3227 // Pentium 4 - 512 / prefetchnta 3228 // Athlon - 128 / prefetchnta 3229 // Opteron - 256 / prefetchnta 3230 // Core - 256 / prefetchnta 3231 // It will be used only when AllocatePrefetchStyle > 0 3232 3233 if (is_amd_family()) { // AMD | Hygon 3234 if (supports_sse2()) { 3235 return 256; // Opteron 3236 } else { 3237 return 128; // Athlon 3238 } 3239 } else { // Intel 3240 if (supports_sse3() && is_intel_server_family()) { 3241 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3242 return 192; 3243 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3244 return 384; 3245 } 3246 } 3247 if (supports_sse2()) { 3248 if (is_intel_server_family()) { 3249 return 256; // Pentium M, Core, Core2 3250 } else { 3251 return 512; // Pentium 4 3252 } 3253 } else { 3254 return 128; // Pentium 3 (and all other old CPUs) 3255 } 3256 } 3257 } 3258 3259 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3260 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3261 switch (id) { 3262 case vmIntrinsics::_floatToFloat16: 3263 case vmIntrinsics::_float16ToFloat: 3264 if (!supports_float16()) { 3265 return false; 3266 } 3267 break; 3268 default: 3269 break; 3270 } 3271 return true; 3272 } 3273 3274 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) { 3275 int i = 0; 3276 ss.join([&]() { 3277 const char* str = nullptr; 3278 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) { 3279 if (features.supports_feature((VM_Version::Feature_Flag)i)) { 3280 str = _features_names[i]; 3281 } 3282 i += 1; 3283 } 3284 return str; 3285 }, ", "); 3286 } 3287 3288 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) { 3289 VM_Features* features = (VM_Features*)features_buffer; 3290 insert_features_names(*features, ss); 3291 } 3292 3293 void VM_Version::get_missing_features_name(void* features_buffer, stringStream& ss) { 3294 VM_Features* features_to_test = (VM_Features*)features_buffer; 3295 int i = 0; 3296 ss.join([&]() { 3297 const char* str = nullptr; 3298 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) { 3299 Feature_Flag flag = (Feature_Flag)i; 3300 if (features_to_test->supports_feature(flag) && !_features.supports_feature(flag)) { 3301 str = _features_names[i]; 3302 } 3303 i += 1; 3304 } 3305 return str; 3306 }, ", "); 3307 } 3308 3309 int VM_Version::cpu_features_size() { 3310 return sizeof(VM_Features); 3311 } 3312 3313 void VM_Version::store_cpu_features(void* buf) { 3314 VM_Features copy = _features; 3315 copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache 3316 memcpy(buf, ©, sizeof(VM_Features)); 3317 } 3318 3319 bool VM_Version::supports_features(void* features_buffer) { 3320 VM_Features* features_to_test = (VM_Features*)features_buffer; 3321 return _features.supports_features(features_to_test); 3322 }