1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/ostream.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name), 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = nullptr; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = nullptr; 59 // Address of instruction which causes APX specific SEGV 60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 61 // Address of instruction after the one which causes APX specific SEGV 62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 63 64 static BufferBlob* stub_blob; 65 static const int stub_size = 2000; 66 67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong; 68 69 VM_Version::VM_Features VM_Version::_features; 70 VM_Version::VM_Features VM_Version::_cpu_features; 71 72 extern "C" { 73 typedef void (*get_cpu_info_stub_t)(void*); 74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 75 typedef void (*clear_apx_test_state_t)(void); 76 } 77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 78 static detect_virt_stub_t detect_virt_stub = nullptr; 79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 80 81 bool VM_Version::supports_clflush() { 82 // clflush should always be available on x86_64 83 // if not we are in real trouble because we rely on it 84 // to flush the code cache. 85 // Unfortunately, Assembler::clflush is currently called as part 86 // of generation of the code cache flush routine. This happens 87 // under Universe::init before the processor features are set 88 // up. Assembler::flush calls this routine to check that clflush 89 // is allowed. So, we give the caller a free pass if Universe init 90 // is still in progress. 91 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available"); 92 return true; 93 } 94 95 #define CPUID_STANDARD_FN 0x0 96 #define CPUID_STANDARD_FN_1 0x1 97 #define CPUID_STANDARD_FN_4 0x4 98 #define CPUID_STANDARD_FN_B 0xb 99 100 #define CPUID_EXTENDED_FN 0x80000000 101 #define CPUID_EXTENDED_FN_1 0x80000001 102 #define CPUID_EXTENDED_FN_2 0x80000002 103 #define CPUID_EXTENDED_FN_3 0x80000003 104 #define CPUID_EXTENDED_FN_4 0x80000004 105 #define CPUID_EXTENDED_FN_7 0x80000007 106 #define CPUID_EXTENDED_FN_8 0x80000008 107 108 class VM_Version_StubGenerator: public StubCodeGenerator { 109 public: 110 111 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 112 113 address clear_apx_test_state() { 114 # define __ _masm-> 115 address start = __ pc(); 116 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 117 // handling guarantees that preserved register values post signal handling were 118 // re-instantiated by operating system and not because they were not modified externally. 119 120 bool save_apx = UseAPX; 121 VM_Version::set_apx_cpuFeatures(); 122 UseAPX = true; 123 // EGPR state save/restoration. 124 __ mov64(r16, 0L); 125 __ mov64(r31, 0L); 126 UseAPX = save_apx; 127 VM_Version::clean_cpuFeatures(); 128 __ ret(0); 129 return start; 130 } 131 132 address generate_get_cpu_info() { 133 // Flags to test CPU type. 134 const uint32_t HS_EFL_AC = 0x40000; 135 const uint32_t HS_EFL_ID = 0x200000; 136 // Values for when we don't have a CPUID instruction. 137 const int CPU_FAMILY_SHIFT = 8; 138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 141 142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24; 143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 146 147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 148 # define __ _masm-> 149 150 address start = __ pc(); 151 152 // 153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 154 // 155 // rcx and rdx are first and second argument registers on windows 156 157 __ push(rbp); 158 __ mov(rbp, c_rarg0); // cpuid_info address 159 __ push(rbx); 160 __ push(rsi); 161 __ pushf(); // preserve rbx, and flags 162 __ pop(rax); 163 __ push(rax); 164 __ mov(rcx, rax); 165 // 166 // if we are unable to change the AC flag, we have a 386 167 // 168 __ xorl(rax, HS_EFL_AC); 169 __ push(rax); 170 __ popf(); 171 __ pushf(); 172 __ pop(rax); 173 __ cmpptr(rax, rcx); 174 __ jccb(Assembler::notEqual, detect_486); 175 176 __ movl(rax, CPU_FAMILY_386); 177 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 178 __ jmp(done); 179 180 // 181 // If we are unable to change the ID flag, we have a 486 which does 182 // not support the "cpuid" instruction. 183 // 184 __ bind(detect_486); 185 __ mov(rax, rcx); 186 __ xorl(rax, HS_EFL_ID); 187 __ push(rax); 188 __ popf(); 189 __ pushf(); 190 __ pop(rax); 191 __ cmpptr(rcx, rax); 192 __ jccb(Assembler::notEqual, detect_586); 193 194 __ bind(cpu486); 195 __ movl(rax, CPU_FAMILY_486); 196 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 197 __ jmp(done); 198 199 // 200 // At this point, we have a chip which supports the "cpuid" instruction 201 // 202 __ bind(detect_586); 203 __ xorl(rax, rax); 204 __ cpuid(); 205 __ orl(rax, rax); 206 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 207 // value of at least 1, we give up and 208 // assume a 486 209 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 210 __ movl(Address(rsi, 0), rax); 211 __ movl(Address(rsi, 4), rbx); 212 __ movl(Address(rsi, 8), rcx); 213 __ movl(Address(rsi,12), rdx); 214 215 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 216 __ jccb(Assembler::belowEqual, std_cpuid4); 217 218 // 219 // cpuid(0xB) Processor Topology 220 // 221 __ movl(rax, 0xb); 222 __ xorl(rcx, rcx); // Threads level 223 __ cpuid(); 224 225 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 226 __ movl(Address(rsi, 0), rax); 227 __ movl(Address(rsi, 4), rbx); 228 __ movl(Address(rsi, 8), rcx); 229 __ movl(Address(rsi,12), rdx); 230 231 __ movl(rax, 0xb); 232 __ movl(rcx, 1); // Cores level 233 __ cpuid(); 234 __ push(rax); 235 __ andl(rax, 0x1f); // Determine if valid topology level 236 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 237 __ andl(rax, 0xffff); 238 __ pop(rax); 239 __ jccb(Assembler::equal, std_cpuid4); 240 241 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 242 __ movl(Address(rsi, 0), rax); 243 __ movl(Address(rsi, 4), rbx); 244 __ movl(Address(rsi, 8), rcx); 245 __ movl(Address(rsi,12), rdx); 246 247 __ movl(rax, 0xb); 248 __ movl(rcx, 2); // Packages level 249 __ cpuid(); 250 __ push(rax); 251 __ andl(rax, 0x1f); // Determine if valid topology level 252 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 253 __ andl(rax, 0xffff); 254 __ pop(rax); 255 __ jccb(Assembler::equal, std_cpuid4); 256 257 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 258 __ movl(Address(rsi, 0), rax); 259 __ movl(Address(rsi, 4), rbx); 260 __ movl(Address(rsi, 8), rcx); 261 __ movl(Address(rsi,12), rdx); 262 263 // 264 // cpuid(0x4) Deterministic cache params 265 // 266 __ bind(std_cpuid4); 267 __ movl(rax, 4); 268 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 269 __ jccb(Assembler::greater, std_cpuid1); 270 271 __ xorl(rcx, rcx); // L1 cache 272 __ cpuid(); 273 __ push(rax); 274 __ andl(rax, 0x1f); // Determine if valid cache parameters used 275 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 276 __ pop(rax); 277 __ jccb(Assembler::equal, std_cpuid1); 278 279 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 280 __ movl(Address(rsi, 0), rax); 281 __ movl(Address(rsi, 4), rbx); 282 __ movl(Address(rsi, 8), rcx); 283 __ movl(Address(rsi,12), rdx); 284 285 // 286 // Standard cpuid(0x1) 287 // 288 __ bind(std_cpuid1); 289 __ movl(rax, 1); 290 __ cpuid(); 291 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 292 __ movl(Address(rsi, 0), rax); 293 __ movl(Address(rsi, 4), rbx); 294 __ movl(Address(rsi, 8), rcx); 295 __ movl(Address(rsi,12), rdx); 296 297 // 298 // Check if OS has enabled XGETBV instruction to access XCR0 299 // (OSXSAVE feature flag) and CPU supports AVX 300 // 301 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 302 __ cmpl(rcx, 0x18000000); 303 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 304 305 // 306 // XCR0, XFEATURE_ENABLED_MASK register 307 // 308 __ xorl(rcx, rcx); // zero for XCR0 register 309 __ xgetbv(); 310 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 311 __ movl(Address(rsi, 0), rax); 312 __ movl(Address(rsi, 4), rdx); 313 314 // 315 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 316 // 317 __ bind(sef_cpuid); 318 __ movl(rax, 7); 319 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 320 __ jccb(Assembler::greater, ext_cpuid); 321 // ECX = 0 322 __ xorl(rcx, rcx); 323 __ cpuid(); 324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 325 __ movl(Address(rsi, 0), rax); 326 __ movl(Address(rsi, 4), rbx); 327 __ movl(Address(rsi, 8), rcx); 328 __ movl(Address(rsi, 12), rdx); 329 330 // 331 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 332 // 333 __ bind(sefsl1_cpuid); 334 __ movl(rax, 7); 335 __ movl(rcx, 1); 336 __ cpuid(); 337 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 338 __ movl(Address(rsi, 0), rax); 339 __ movl(Address(rsi, 4), rdx); 340 341 // 342 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0). 343 // 344 __ bind(std_cpuid24); 345 __ movl(rax, 0x24); 346 __ movl(rcx, 0); 347 __ cpuid(); 348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset()))); 349 __ movl(Address(rsi, 0), rax); 350 __ movl(Address(rsi, 4), rbx); 351 352 // 353 // Extended cpuid(0x80000000) 354 // 355 __ bind(ext_cpuid); 356 __ movl(rax, 0x80000000); 357 __ cpuid(); 358 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 359 __ jcc(Assembler::belowEqual, done); 360 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 361 __ jcc(Assembler::belowEqual, ext_cpuid1); 362 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 363 __ jccb(Assembler::belowEqual, ext_cpuid5); 364 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 365 __ jccb(Assembler::belowEqual, ext_cpuid7); 366 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 367 __ jccb(Assembler::belowEqual, ext_cpuid8); 368 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 369 __ jccb(Assembler::below, ext_cpuid8); 370 // 371 // Extended cpuid(0x8000001E) 372 // 373 __ movl(rax, 0x8000001E); 374 __ cpuid(); 375 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 376 __ movl(Address(rsi, 0), rax); 377 __ movl(Address(rsi, 4), rbx); 378 __ movl(Address(rsi, 8), rcx); 379 __ movl(Address(rsi,12), rdx); 380 381 // 382 // Extended cpuid(0x80000008) 383 // 384 __ bind(ext_cpuid8); 385 __ movl(rax, 0x80000008); 386 __ cpuid(); 387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 388 __ movl(Address(rsi, 0), rax); 389 __ movl(Address(rsi, 4), rbx); 390 __ movl(Address(rsi, 8), rcx); 391 __ movl(Address(rsi,12), rdx); 392 393 // 394 // Extended cpuid(0x80000007) 395 // 396 __ bind(ext_cpuid7); 397 __ movl(rax, 0x80000007); 398 __ cpuid(); 399 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 400 __ movl(Address(rsi, 0), rax); 401 __ movl(Address(rsi, 4), rbx); 402 __ movl(Address(rsi, 8), rcx); 403 __ movl(Address(rsi,12), rdx); 404 405 // 406 // Extended cpuid(0x80000005) 407 // 408 __ bind(ext_cpuid5); 409 __ movl(rax, 0x80000005); 410 __ cpuid(); 411 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 412 __ movl(Address(rsi, 0), rax); 413 __ movl(Address(rsi, 4), rbx); 414 __ movl(Address(rsi, 8), rcx); 415 __ movl(Address(rsi,12), rdx); 416 417 // 418 // Extended cpuid(0x80000001) 419 // 420 __ bind(ext_cpuid1); 421 __ movl(rax, 0x80000001); 422 __ cpuid(); 423 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 424 __ movl(Address(rsi, 0), rax); 425 __ movl(Address(rsi, 4), rbx); 426 __ movl(Address(rsi, 8), rcx); 427 __ movl(Address(rsi,12), rdx); 428 429 // 430 // Check if OS has enabled XGETBV instruction to access XCR0 431 // (OSXSAVE feature flag) and CPU supports APX 432 // 433 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 434 // and XCRO[19] bit for OS support to save/restore extended GPR state. 435 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 436 __ movl(rax, 0x200000); 437 __ andl(rax, Address(rsi, 4)); 438 __ jcc(Assembler::equal, vector_save_restore); 439 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 440 __ movl(rax, 0x80000); 441 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 442 __ jcc(Assembler::equal, vector_save_restore); 443 444 #ifndef PRODUCT 445 bool save_apx = UseAPX; 446 VM_Version::set_apx_cpuFeatures(); 447 UseAPX = true; 448 __ mov64(r16, VM_Version::egpr_test_value()); 449 __ mov64(r31, VM_Version::egpr_test_value()); 450 __ xorl(rsi, rsi); 451 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 452 // Generate SEGV 453 __ movl(rax, Address(rsi, 0)); 454 455 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 456 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 457 __ movq(Address(rsi, 0), r16); 458 __ movq(Address(rsi, 8), r31); 459 460 UseAPX = save_apx; 461 #endif 462 __ bind(vector_save_restore); 463 // 464 // Check if OS has enabled XGETBV instruction to access XCR0 465 // (OSXSAVE feature flag) and CPU supports AVX 466 // 467 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 468 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 469 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 470 __ cmpl(rcx, 0x18000000); 471 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 472 473 __ movl(rax, 0x6); 474 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 475 __ cmpl(rax, 0x6); 476 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 477 478 // we need to bridge farther than imm8, so we use this island as a thunk 479 __ bind(done); 480 __ jmp(wrapup); 481 482 __ bind(start_simd_check); 483 // 484 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 485 // registers are not restored after a signal processing. 486 // Generate SEGV here (reference through null) 487 // and check upper YMM/ZMM bits after it. 488 // 489 int saved_useavx = UseAVX; 490 int saved_usesse = UseSSE; 491 492 // If UseAVX is uninitialized or is set by the user to include EVEX 493 if (use_evex) { 494 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 495 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10 496 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 497 __ movl(rax, 0x10000); 498 __ andl(rax, Address(rsi, 4)); 499 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 500 __ movl(rbx, 0x80000); 501 __ andl(rbx, Address(rsi, 4)); 502 __ orl(rax, rbx); 503 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported 504 // check _cpuid_info.xem_xcr0_eax.bits.opmask 505 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 506 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 507 __ movl(rax, 0xE0); 508 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 509 __ cmpl(rax, 0xE0); 510 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 511 512 if (FLAG_IS_DEFAULT(UseAVX)) { 513 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 514 __ movl(rax, Address(rsi, 0)); 515 __ cmpl(rax, 0x50654); // If it is Skylake 516 __ jcc(Assembler::equal, legacy_setup); 517 } 518 // EVEX setup: run in lowest evex mode 519 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 520 UseAVX = 3; 521 UseSSE = 2; 522 #ifdef _WINDOWS 523 // xmm5-xmm15 are not preserved by caller on windows 524 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 525 __ subptr(rsp, 64); 526 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 527 __ subptr(rsp, 64); 528 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 529 __ subptr(rsp, 64); 530 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 531 #endif // _WINDOWS 532 533 // load value into all 64 bytes of zmm7 register 534 __ movl(rcx, VM_Version::ymm_test_value()); 535 __ movdl(xmm0, rcx); 536 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 537 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 538 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 539 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 540 VM_Version::clean_cpuFeatures(); 541 __ jmp(save_restore_except); 542 } 543 544 __ bind(legacy_setup); 545 // AVX setup 546 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 547 UseAVX = 1; 548 UseSSE = 2; 549 #ifdef _WINDOWS 550 __ subptr(rsp, 32); 551 __ vmovdqu(Address(rsp, 0), xmm7); 552 __ subptr(rsp, 32); 553 __ vmovdqu(Address(rsp, 0), xmm8); 554 __ subptr(rsp, 32); 555 __ vmovdqu(Address(rsp, 0), xmm15); 556 #endif // _WINDOWS 557 558 // load value into all 32 bytes of ymm7 register 559 __ movl(rcx, VM_Version::ymm_test_value()); 560 561 __ movdl(xmm0, rcx); 562 __ pshufd(xmm0, xmm0, 0x00); 563 __ vinsertf128_high(xmm0, xmm0); 564 __ vmovdqu(xmm7, xmm0); 565 __ vmovdqu(xmm8, xmm0); 566 __ vmovdqu(xmm15, xmm0); 567 VM_Version::clean_cpuFeatures(); 568 569 __ bind(save_restore_except); 570 __ xorl(rsi, rsi); 571 VM_Version::set_cpuinfo_segv_addr(__ pc()); 572 // Generate SEGV 573 __ movl(rax, Address(rsi, 0)); 574 575 VM_Version::set_cpuinfo_cont_addr(__ pc()); 576 // Returns here after signal. Save xmm0 to check it later. 577 578 // If UseAVX is uninitialized or is set by the user to include EVEX 579 if (use_evex) { 580 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 581 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 582 __ movl(rax, 0x10000); 583 __ andl(rax, Address(rsi, 4)); 584 __ jcc(Assembler::equal, legacy_save_restore); 585 // check _cpuid_info.xem_xcr0_eax.bits.opmask 586 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 587 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 588 __ movl(rax, 0xE0); 589 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 590 __ cmpl(rax, 0xE0); 591 __ jcc(Assembler::notEqual, legacy_save_restore); 592 593 if (FLAG_IS_DEFAULT(UseAVX)) { 594 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 595 __ movl(rax, Address(rsi, 0)); 596 __ cmpl(rax, 0x50654); // If it is Skylake 597 __ jcc(Assembler::equal, legacy_save_restore); 598 } 599 // EVEX check: run in lowest evex mode 600 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 601 UseAVX = 3; 602 UseSSE = 2; 603 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 604 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 605 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 606 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 607 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 608 609 #ifdef _WINDOWS 610 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 611 __ addptr(rsp, 64); 612 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 613 __ addptr(rsp, 64); 614 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 615 __ addptr(rsp, 64); 616 #endif // _WINDOWS 617 generate_vzeroupper(wrapup); 618 VM_Version::clean_cpuFeatures(); 619 UseAVX = saved_useavx; 620 UseSSE = saved_usesse; 621 __ jmp(wrapup); 622 } 623 624 __ bind(legacy_save_restore); 625 // AVX check 626 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 627 UseAVX = 1; 628 UseSSE = 2; 629 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 630 __ vmovdqu(Address(rsi, 0), xmm0); 631 __ vmovdqu(Address(rsi, 32), xmm7); 632 __ vmovdqu(Address(rsi, 64), xmm8); 633 __ vmovdqu(Address(rsi, 96), xmm15); 634 635 #ifdef _WINDOWS 636 __ vmovdqu(xmm15, Address(rsp, 0)); 637 __ addptr(rsp, 32); 638 __ vmovdqu(xmm8, Address(rsp, 0)); 639 __ addptr(rsp, 32); 640 __ vmovdqu(xmm7, Address(rsp, 0)); 641 __ addptr(rsp, 32); 642 #endif // _WINDOWS 643 644 generate_vzeroupper(wrapup); 645 VM_Version::clean_cpuFeatures(); 646 UseAVX = saved_useavx; 647 UseSSE = saved_usesse; 648 649 __ bind(wrapup); 650 __ popf(); 651 __ pop(rsi); 652 __ pop(rbx); 653 __ pop(rbp); 654 __ ret(0); 655 656 # undef __ 657 658 return start; 659 }; 660 void generate_vzeroupper(Label& L_wrapup) { 661 # define __ _masm-> 662 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 663 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 664 __ jcc(Assembler::notEqual, L_wrapup); 665 __ movl(rcx, 0x0FFF0FF0); 666 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 667 __ andl(rcx, Address(rsi, 0)); 668 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 669 __ jcc(Assembler::equal, L_wrapup); 670 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 671 __ jcc(Assembler::equal, L_wrapup); 672 // vzeroupper() will use a pre-computed instruction sequence that we 673 // can't compute until after we've determined CPU capabilities. Use 674 // uncached variant here directly to be able to bootstrap correctly 675 __ vzeroupper_uncached(); 676 # undef __ 677 } 678 address generate_detect_virt() { 679 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 680 # define __ _masm-> 681 682 address start = __ pc(); 683 684 // Evacuate callee-saved registers 685 __ push(rbp); 686 __ push(rbx); 687 __ push(rsi); // for Windows 688 689 __ mov(rax, c_rarg0); // CPUID leaf 690 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 691 692 __ cpuid(); 693 694 // Store result to register array 695 __ movl(Address(rsi, 0), rax); 696 __ movl(Address(rsi, 4), rbx); 697 __ movl(Address(rsi, 8), rcx); 698 __ movl(Address(rsi, 12), rdx); 699 700 // Epilogue 701 __ pop(rsi); 702 __ pop(rbx); 703 __ pop(rbp); 704 __ ret(0); 705 706 # undef __ 707 708 return start; 709 }; 710 711 712 address generate_getCPUIDBrandString(void) { 713 // Flags to test CPU type. 714 const uint32_t HS_EFL_AC = 0x40000; 715 const uint32_t HS_EFL_ID = 0x200000; 716 // Values for when we don't have a CPUID instruction. 717 const int CPU_FAMILY_SHIFT = 8; 718 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 719 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 720 721 Label detect_486, cpu486, detect_586, done, ext_cpuid; 722 723 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 724 # define __ _masm-> 725 726 address start = __ pc(); 727 728 // 729 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 730 // 731 // rcx and rdx are first and second argument registers on windows 732 733 __ push(rbp); 734 __ mov(rbp, c_rarg0); // cpuid_info address 735 __ push(rbx); 736 __ push(rsi); 737 __ pushf(); // preserve rbx, and flags 738 __ pop(rax); 739 __ push(rax); 740 __ mov(rcx, rax); 741 // 742 // if we are unable to change the AC flag, we have a 386 743 // 744 __ xorl(rax, HS_EFL_AC); 745 __ push(rax); 746 __ popf(); 747 __ pushf(); 748 __ pop(rax); 749 __ cmpptr(rax, rcx); 750 __ jccb(Assembler::notEqual, detect_486); 751 752 __ movl(rax, CPU_FAMILY_386); 753 __ jmp(done); 754 755 // 756 // If we are unable to change the ID flag, we have a 486 which does 757 // not support the "cpuid" instruction. 758 // 759 __ bind(detect_486); 760 __ mov(rax, rcx); 761 __ xorl(rax, HS_EFL_ID); 762 __ push(rax); 763 __ popf(); 764 __ pushf(); 765 __ pop(rax); 766 __ cmpptr(rcx, rax); 767 __ jccb(Assembler::notEqual, detect_586); 768 769 __ bind(cpu486); 770 __ movl(rax, CPU_FAMILY_486); 771 __ jmp(done); 772 773 // 774 // At this point, we have a chip which supports the "cpuid" instruction 775 // 776 __ bind(detect_586); 777 __ xorl(rax, rax); 778 __ cpuid(); 779 __ orl(rax, rax); 780 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 781 // value of at least 1, we give up and 782 // assume a 486 783 784 // 785 // Extended cpuid(0x80000000) for processor brand string detection 786 // 787 __ bind(ext_cpuid); 788 __ movl(rax, CPUID_EXTENDED_FN); 789 __ cpuid(); 790 __ cmpl(rax, CPUID_EXTENDED_FN_4); 791 __ jcc(Assembler::below, done); 792 793 // 794 // Extended cpuid(0x80000002) // first 16 bytes in brand string 795 // 796 __ movl(rax, CPUID_EXTENDED_FN_2); 797 __ cpuid(); 798 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 799 __ movl(Address(rsi, 0), rax); 800 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 801 __ movl(Address(rsi, 0), rbx); 802 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 803 __ movl(Address(rsi, 0), rcx); 804 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 805 __ movl(Address(rsi,0), rdx); 806 807 // 808 // Extended cpuid(0x80000003) // next 16 bytes in brand string 809 // 810 __ movl(rax, CPUID_EXTENDED_FN_3); 811 __ cpuid(); 812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 813 __ movl(Address(rsi, 0), rax); 814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 815 __ movl(Address(rsi, 0), rbx); 816 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 817 __ movl(Address(rsi, 0), rcx); 818 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 819 __ movl(Address(rsi,0), rdx); 820 821 // 822 // Extended cpuid(0x80000004) // last 16 bytes in brand string 823 // 824 __ movl(rax, CPUID_EXTENDED_FN_4); 825 __ cpuid(); 826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 827 __ movl(Address(rsi, 0), rax); 828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 829 __ movl(Address(rsi, 0), rbx); 830 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 831 __ movl(Address(rsi, 0), rcx); 832 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 833 __ movl(Address(rsi,0), rdx); 834 835 // 836 // return 837 // 838 __ bind(done); 839 __ popf(); 840 __ pop(rsi); 841 __ pop(rbx); 842 __ pop(rbp); 843 __ ret(0); 844 845 # undef __ 846 847 return start; 848 }; 849 }; 850 851 void VM_Version::get_processor_features() { 852 853 _cpu = 4; // 486 by default 854 _model = 0; 855 _stepping = 0; 856 _logical_processors_per_package = 1; 857 // i486 internal cache is both I&D and has a 16-byte line size 858 _L1_data_cache_line_size = 16; 859 860 // Get raw processor info 861 862 get_cpu_info_stub(&_cpuid_info); 863 864 assert_is_initialized(); 865 _cpu = extended_cpu_family(); 866 _model = extended_cpu_model(); 867 _stepping = cpu_stepping(); 868 869 if (cpu_family() > 4) { // it supports CPUID 870 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 871 _cpu_features = _features; // Preserve features 872 // Logical processors are only available on P4s and above, 873 // and only if hyperthreading is available. 874 _logical_processors_per_package = logical_processor_count(); 875 _L1_data_cache_line_size = L1_line_size(); 876 } 877 878 // xchg and xadd instructions 879 _supports_atomic_getset4 = true; 880 _supports_atomic_getadd4 = true; 881 _supports_atomic_getset8 = true; 882 _supports_atomic_getadd8 = true; 883 884 // OS should support SSE for x64 and hardware should support at least SSE2. 885 if (!VM_Version::supports_sse2()) { 886 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 887 } 888 // in 64 bit the use of SSE2 is the minimum 889 if (UseSSE < 2) UseSSE = 2; 890 891 // flush_icache_stub have to be generated first. 892 // That is why Icache line size is hard coded in ICache class, 893 // see icache_x86.hpp. It is also the reason why we can't use 894 // clflush instruction in 32-bit VM since it could be running 895 // on CPU which does not support it. 896 // 897 // The only thing we can do is to verify that flushed 898 // ICache::line_size has correct value. 899 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 900 // clflush_size is size in quadwords (8 bytes). 901 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 902 903 // assigning this field effectively enables Unsafe.writebackMemory() 904 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 905 // that is only implemented on x86_64 and only if the OS plays ball 906 if (os::supports_map_sync()) { 907 // publish data cache line flush size to generic field, otherwise 908 // let if default to zero thereby disabling writeback 909 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 910 } 911 912 // Check if processor has Intel Ecore 913 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 914 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 915 _model == 0xCC || _model == 0xDD)) { 916 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 917 } 918 919 if (UseSSE < 4) { 920 _features.clear_feature(CPU_SSE4_1); 921 _features.clear_feature(CPU_SSE4_2); 922 } 923 924 if (UseSSE < 3) { 925 _features.clear_feature(CPU_SSE3); 926 _features.clear_feature(CPU_SSSE3); 927 _features.clear_feature(CPU_SSE4A); 928 } 929 930 if (UseSSE < 2) 931 _features.clear_feature(CPU_SSE2); 932 933 if (UseSSE < 1) 934 _features.clear_feature(CPU_SSE); 935 936 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 937 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 938 UseAVX = 0; 939 } 940 941 // UseSSE is set to the smaller of what hardware supports and what 942 // the command line requires. I.e., you cannot set UseSSE to 2 on 943 // older Pentiums which do not support it. 944 int use_sse_limit = 0; 945 if (UseSSE > 0) { 946 if (UseSSE > 3 && supports_sse4_1()) { 947 use_sse_limit = 4; 948 } else if (UseSSE > 2 && supports_sse3()) { 949 use_sse_limit = 3; 950 } else if (UseSSE > 1 && supports_sse2()) { 951 use_sse_limit = 2; 952 } else if (UseSSE > 0 && supports_sse()) { 953 use_sse_limit = 1; 954 } else { 955 use_sse_limit = 0; 956 } 957 } 958 if (FLAG_IS_DEFAULT(UseSSE)) { 959 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 960 } else if (UseSSE > use_sse_limit) { 961 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 962 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 963 } 964 965 // first try initial setting and detect what we can support 966 int use_avx_limit = 0; 967 if (UseAVX > 0) { 968 if (UseSSE < 4) { 969 // Don't use AVX if SSE is unavailable or has been disabled. 970 use_avx_limit = 0; 971 } else if (UseAVX > 2 && supports_evex()) { 972 use_avx_limit = 3; 973 } else if (UseAVX > 1 && supports_avx2()) { 974 use_avx_limit = 2; 975 } else if (UseAVX > 0 && supports_avx()) { 976 use_avx_limit = 1; 977 } else { 978 use_avx_limit = 0; 979 } 980 } 981 if (FLAG_IS_DEFAULT(UseAVX)) { 982 // Don't use AVX-512 on older Skylakes unless explicitly requested. 983 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 984 FLAG_SET_DEFAULT(UseAVX, 2); 985 } else { 986 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 987 } 988 } 989 990 if (UseAVX > use_avx_limit) { 991 if (UseSSE < 4) { 992 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 993 } else { 994 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 995 } 996 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 997 } 998 999 if (UseAVX < 3) { 1000 _features.clear_feature(CPU_AVX512F); 1001 _features.clear_feature(CPU_AVX512DQ); 1002 _features.clear_feature(CPU_AVX512CD); 1003 _features.clear_feature(CPU_AVX512BW); 1004 _features.clear_feature(CPU_AVX512ER); 1005 _features.clear_feature(CPU_AVX512PF); 1006 _features.clear_feature(CPU_AVX512VL); 1007 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1008 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1009 _features.clear_feature(CPU_AVX512_VAES); 1010 _features.clear_feature(CPU_AVX512_VNNI); 1011 _features.clear_feature(CPU_AVX512_VBMI); 1012 _features.clear_feature(CPU_AVX512_VBMI2); 1013 _features.clear_feature(CPU_AVX512_BITALG); 1014 _features.clear_feature(CPU_AVX512_IFMA); 1015 _features.clear_feature(CPU_APX_F); 1016 _features.clear_feature(CPU_AVX512_FP16); 1017 _features.clear_feature(CPU_AVX10_1); 1018 _features.clear_feature(CPU_AVX10_2); 1019 } 1020 1021 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1022 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1023 if (UseAPX && !apx_supported) { 1024 warning("UseAPX is not supported on this CPU, setting it to false"); 1025 FLAG_SET_DEFAULT(UseAPX, false); 1026 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1027 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1028 } 1029 1030 if (!UseAPX) { 1031 _features.clear_feature(CPU_APX_F); 1032 } 1033 1034 if (UseAVX < 2) { 1035 _features.clear_feature(CPU_AVX2); 1036 _features.clear_feature(CPU_AVX_IFMA); 1037 } 1038 1039 if (UseAVX < 1) { 1040 _features.clear_feature(CPU_AVX); 1041 _features.clear_feature(CPU_VZEROUPPER); 1042 _features.clear_feature(CPU_F16C); 1043 _features.clear_feature(CPU_SHA512); 1044 } 1045 1046 if (logical_processors_per_package() == 1) { 1047 // HT processor could be installed on a system which doesn't support HT. 1048 _features.clear_feature(CPU_HT); 1049 } 1050 1051 if (is_intel()) { // Intel cpus specific settings 1052 if (is_knights_family()) { 1053 _features.clear_feature(CPU_VZEROUPPER); 1054 _features.clear_feature(CPU_AVX512BW); 1055 _features.clear_feature(CPU_AVX512VL); 1056 _features.clear_feature(CPU_AVX512DQ); 1057 _features.clear_feature(CPU_AVX512_VNNI); 1058 _features.clear_feature(CPU_AVX512_VAES); 1059 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1060 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1061 _features.clear_feature(CPU_AVX512_VBMI); 1062 _features.clear_feature(CPU_AVX512_VBMI2); 1063 _features.clear_feature(CPU_CLWB); 1064 _features.clear_feature(CPU_FLUSHOPT); 1065 _features.clear_feature(CPU_GFNI); 1066 _features.clear_feature(CPU_AVX512_BITALG); 1067 _features.clear_feature(CPU_AVX512_IFMA); 1068 _features.clear_feature(CPU_AVX_IFMA); 1069 _features.clear_feature(CPU_AVX512_FP16); 1070 _features.clear_feature(CPU_AVX10_1); 1071 _features.clear_feature(CPU_AVX10_2); 1072 } 1073 } 1074 1075 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1076 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1077 } else { 1078 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1079 } 1080 1081 assert(supports_clflush(), "Always present"); 1082 if (X86ICacheSync == -1) { 1083 // Auto-detect, choosing the best performant one that still flushes 1084 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. 1085 if (supports_clwb()) { 1086 FLAG_SET_ERGO(X86ICacheSync, 3); 1087 } else if (supports_clflushopt()) { 1088 FLAG_SET_ERGO(X86ICacheSync, 2); 1089 } else { 1090 FLAG_SET_ERGO(X86ICacheSync, 1); 1091 } 1092 } else { 1093 if ((X86ICacheSync == 2) && !supports_clflushopt()) { 1094 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); 1095 } 1096 if ((X86ICacheSync == 3) && !supports_clwb()) { 1097 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); 1098 } 1099 if ((X86ICacheSync == 5) && !supports_serialize()) { 1100 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); 1101 } 1102 } 1103 1104 stringStream ss(2048); 1105 ss.print("(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1106 cores_per_cpu(), threads_per_core(), 1107 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1108 ss.print(", "); 1109 int features_offset = (int)ss.size(); 1110 insert_features_names(_features, ss); 1111 1112 _cpu_info_string = ss.as_string(true); 1113 _features_string = _cpu_info_string + features_offset; 1114 1115 // Use AES instructions if available. 1116 if (supports_aes()) { 1117 if (FLAG_IS_DEFAULT(UseAES)) { 1118 FLAG_SET_DEFAULT(UseAES, true); 1119 } 1120 if (!UseAES) { 1121 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1122 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1123 } 1124 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1125 } else { 1126 if (UseSSE > 2) { 1127 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1128 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1129 } 1130 } else { 1131 // The AES intrinsic stubs require AES instruction support (of course) 1132 // but also require sse3 mode or higher for instructions it use. 1133 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1134 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1135 } 1136 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1137 } 1138 1139 // --AES-CTR begins-- 1140 if (!UseAESIntrinsics) { 1141 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1142 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1143 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1144 } 1145 } else { 1146 if (supports_sse4_1()) { 1147 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1148 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1149 } 1150 } else { 1151 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1152 // but also require sse4.1 mode or higher for instructions it use. 1153 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1154 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1155 } 1156 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1157 } 1158 } 1159 // --AES-CTR ends-- 1160 } 1161 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1162 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1163 warning("AES instructions are not available on this CPU"); 1164 FLAG_SET_DEFAULT(UseAES, false); 1165 } 1166 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1167 warning("AES intrinsics are not available on this CPU"); 1168 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1169 } 1170 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1171 warning("AES-CTR intrinsics are not available on this CPU"); 1172 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1173 } 1174 } 1175 1176 // Use CLMUL instructions if available. 1177 if (supports_clmul()) { 1178 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1179 UseCLMUL = true; 1180 } 1181 } else if (UseCLMUL) { 1182 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1183 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1184 FLAG_SET_DEFAULT(UseCLMUL, false); 1185 } 1186 1187 if (UseCLMUL && (UseSSE > 2)) { 1188 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1189 UseCRC32Intrinsics = true; 1190 } 1191 } else if (UseCRC32Intrinsics) { 1192 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1193 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1194 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1195 } 1196 1197 if (supports_avx2()) { 1198 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1199 UseAdler32Intrinsics = true; 1200 } 1201 } else if (UseAdler32Intrinsics) { 1202 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1203 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1204 } 1205 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1206 } 1207 1208 if (supports_sse4_2() && supports_clmul()) { 1209 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1210 UseCRC32CIntrinsics = true; 1211 } 1212 } else if (UseCRC32CIntrinsics) { 1213 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1214 warning("CRC32C intrinsics are not available on this CPU"); 1215 } 1216 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1217 } 1218 1219 // GHASH/GCM intrinsics 1220 if (UseCLMUL && (UseSSE > 2)) { 1221 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1222 UseGHASHIntrinsics = true; 1223 } 1224 } else if (UseGHASHIntrinsics) { 1225 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1226 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1227 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1228 } 1229 1230 // ChaCha20 Intrinsics 1231 // As long as the system supports AVX as a baseline we can do a 1232 // SIMD-enabled block function. StubGenerator makes the determination 1233 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1234 // version. 1235 if (UseAVX >= 1) { 1236 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1237 UseChaCha20Intrinsics = true; 1238 } 1239 } else if (UseChaCha20Intrinsics) { 1240 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1241 warning("ChaCha20 intrinsic requires AVX instructions"); 1242 } 1243 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1244 } 1245 1246 // Kyber Intrinsics 1247 // Currently we only have them for AVX512 1248 #ifdef _LP64 1249 if (supports_evex() && supports_avx512bw()) { 1250 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) { 1251 UseKyberIntrinsics = true; 1252 } 1253 } else 1254 #endif 1255 if (UseKyberIntrinsics) { 1256 warning("Intrinsics for ML-KEM are not available on this CPU."); 1257 FLAG_SET_DEFAULT(UseKyberIntrinsics, false); 1258 } 1259 1260 // Dilithium Intrinsics 1261 // Currently we only have them for AVX512 1262 if (supports_evex() && supports_avx512bw()) { 1263 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { 1264 UseDilithiumIntrinsics = true; 1265 } 1266 } else if (UseDilithiumIntrinsics) { 1267 warning("Intrinsics for ML-DSA are not available on this CPU."); 1268 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); 1269 } 1270 1271 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1272 if (UseAVX >= 2) { 1273 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1274 UseBASE64Intrinsics = true; 1275 } 1276 } else if (UseBASE64Intrinsics) { 1277 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1278 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1279 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1280 } 1281 1282 if (supports_fma()) { 1283 if (FLAG_IS_DEFAULT(UseFMA)) { 1284 UseFMA = true; 1285 } 1286 } else if (UseFMA) { 1287 warning("FMA instructions are not available on this CPU"); 1288 FLAG_SET_DEFAULT(UseFMA, false); 1289 } 1290 1291 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1292 UseMD5Intrinsics = true; 1293 } 1294 1295 if (supports_sha() || (supports_avx2() && supports_bmi2())) { 1296 if (FLAG_IS_DEFAULT(UseSHA)) { 1297 UseSHA = true; 1298 } 1299 } else if (UseSHA) { 1300 warning("SHA instructions are not available on this CPU"); 1301 FLAG_SET_DEFAULT(UseSHA, false); 1302 } 1303 1304 if (supports_sha() && supports_sse4_1() && UseSHA) { 1305 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1306 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1307 } 1308 } else if (UseSHA1Intrinsics) { 1309 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1310 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1311 } 1312 1313 if (supports_sse4_1() && UseSHA) { 1314 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1315 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1316 } 1317 } else if (UseSHA256Intrinsics) { 1318 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1319 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1320 } 1321 1322 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1323 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1324 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1325 } 1326 } else if (UseSHA512Intrinsics) { 1327 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1328 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1329 } 1330 1331 if (supports_evex() && supports_avx512bw()) { 1332 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1333 UseSHA3Intrinsics = true; 1334 } 1335 } else if (UseSHA3Intrinsics) { 1336 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1337 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1338 } 1339 1340 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1341 FLAG_SET_DEFAULT(UseSHA, false); 1342 } 1343 1344 #if COMPILER2_OR_JVMCI 1345 int max_vector_size = 0; 1346 if (UseAVX == 0 || !os_supports_avx_vectors()) { 1347 // 16 byte vectors (in XMM) are supported with SSE2+ 1348 max_vector_size = 16; 1349 } else if (UseAVX == 1 || UseAVX == 2) { 1350 // 32 bytes vectors (in YMM) are only supported with AVX+ 1351 max_vector_size = 32; 1352 } else if (UseAVX > 2) { 1353 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1354 max_vector_size = 64; 1355 } 1356 1357 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1358 1359 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1360 if (MaxVectorSize < min_vector_size) { 1361 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1362 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1363 } 1364 if (MaxVectorSize > max_vector_size) { 1365 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1366 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1367 } 1368 if (!is_power_of_2(MaxVectorSize)) { 1369 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1370 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1371 } 1372 } else { 1373 // If default, use highest supported configuration 1374 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1375 } 1376 1377 #if defined(COMPILER2) && defined(ASSERT) 1378 if (MaxVectorSize > 0) { 1379 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1380 tty->print_cr("State of YMM registers after signal handle:"); 1381 int nreg = 4; 1382 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1383 for (int i = 0; i < nreg; i++) { 1384 tty->print("YMM%s:", ymm_name[i]); 1385 for (int j = 7; j >=0; j--) { 1386 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1387 } 1388 tty->cr(); 1389 } 1390 } 1391 } 1392 #endif // COMPILER2 && ASSERT 1393 1394 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1395 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1396 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1397 } 1398 } else if (UsePoly1305Intrinsics) { 1399 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1400 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1401 } 1402 1403 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1404 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1405 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1406 } 1407 } else if (UseIntPolyIntrinsics) { 1408 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1409 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1410 } 1411 1412 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1413 UseMultiplyToLenIntrinsic = true; 1414 } 1415 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1416 UseSquareToLenIntrinsic = true; 1417 } 1418 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1419 UseMulAddIntrinsic = true; 1420 } 1421 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1422 UseMontgomeryMultiplyIntrinsic = true; 1423 } 1424 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1425 UseMontgomerySquareIntrinsic = true; 1426 } 1427 #endif // COMPILER2_OR_JVMCI 1428 1429 // On new cpus instructions which update whole XMM register should be used 1430 // to prevent partial register stall due to dependencies on high half. 1431 // 1432 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1433 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1434 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1435 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1436 1437 1438 if (is_zx()) { // ZX cpus specific settings 1439 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1440 UseStoreImmI16 = false; // don't use it on ZX cpus 1441 } 1442 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1443 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1444 // Use it on all ZX cpus 1445 UseAddressNop = true; 1446 } 1447 } 1448 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1449 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1450 } 1451 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1452 if (supports_sse3()) { 1453 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1454 } else { 1455 UseXmmRegToRegMoveAll = false; 1456 } 1457 } 1458 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1459 #ifdef COMPILER2 1460 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1461 // For new ZX cpus do the next optimization: 1462 // don't align the beginning of a loop if there are enough instructions 1463 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1464 // in current fetch line (OptoLoopAlignment) or the padding 1465 // is big (> MaxLoopPad). 1466 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1467 // generated NOP instructions. 11 is the largest size of one 1468 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1469 MaxLoopPad = 11; 1470 } 1471 #endif // COMPILER2 1472 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1473 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1474 } 1475 if (supports_sse4_2()) { // new ZX cpus 1476 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1477 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1478 } 1479 } 1480 if (supports_sse4_2()) { 1481 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1482 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1483 } 1484 } else { 1485 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1486 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1487 } 1488 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1489 } 1490 } 1491 1492 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1493 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1494 } 1495 } 1496 1497 if (is_amd_family()) { // AMD cpus specific settings 1498 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1499 // Use it on new AMD cpus starting from Opteron. 1500 UseAddressNop = true; 1501 } 1502 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1503 // Use it on new AMD cpus starting from Opteron. 1504 UseNewLongLShift = true; 1505 } 1506 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1507 if (supports_sse4a()) { 1508 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1509 } else { 1510 UseXmmLoadAndClearUpper = false; 1511 } 1512 } 1513 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1514 if (supports_sse4a()) { 1515 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1516 } else { 1517 UseXmmRegToRegMoveAll = false; 1518 } 1519 } 1520 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1521 if (supports_sse4a()) { 1522 UseXmmI2F = true; 1523 } else { 1524 UseXmmI2F = false; 1525 } 1526 } 1527 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1528 if (supports_sse4a()) { 1529 UseXmmI2D = true; 1530 } else { 1531 UseXmmI2D = false; 1532 } 1533 } 1534 if (supports_sse4_2()) { 1535 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1536 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1537 } 1538 } else { 1539 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1540 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1541 } 1542 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1543 } 1544 1545 // some defaults for AMD family 15h 1546 if (cpu_family() == 0x15) { 1547 // On family 15h processors default is no sw prefetch 1548 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1549 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1550 } 1551 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1552 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1553 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1554 } 1555 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1556 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1557 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1558 } 1559 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1560 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1561 } 1562 } 1563 1564 #ifdef COMPILER2 1565 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1566 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1567 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1568 } 1569 #endif // COMPILER2 1570 1571 // Some defaults for AMD family >= 17h && Hygon family 18h 1572 if (cpu_family() >= 0x17) { 1573 // On family >=17h processors use XMM and UnalignedLoadStores 1574 // for Array Copy 1575 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1576 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1577 } 1578 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1579 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1580 } 1581 #ifdef COMPILER2 1582 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1583 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1584 } 1585 #endif 1586 } 1587 } 1588 1589 if (is_intel()) { // Intel cpus specific settings 1590 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1591 UseStoreImmI16 = false; // don't use it on Intel cpus 1592 } 1593 if (cpu_family() == 6 || cpu_family() == 15) { 1594 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1595 // Use it on all Intel cpus starting from PentiumPro 1596 UseAddressNop = true; 1597 } 1598 } 1599 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1600 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1601 } 1602 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1603 if (supports_sse3()) { 1604 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1605 } else { 1606 UseXmmRegToRegMoveAll = false; 1607 } 1608 } 1609 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1610 #ifdef COMPILER2 1611 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1612 // For new Intel cpus do the next optimization: 1613 // don't align the beginning of a loop if there are enough instructions 1614 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1615 // in current fetch line (OptoLoopAlignment) or the padding 1616 // is big (> MaxLoopPad). 1617 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1618 // generated NOP instructions. 11 is the largest size of one 1619 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1620 MaxLoopPad = 11; 1621 } 1622 #endif // COMPILER2 1623 1624 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1625 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1626 } 1627 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1628 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1629 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1630 } 1631 } 1632 if (supports_sse4_2()) { 1633 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1634 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1635 } 1636 } else { 1637 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1638 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1639 } 1640 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1641 } 1642 } 1643 if (is_atom_family() || is_knights_family()) { 1644 #ifdef COMPILER2 1645 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1646 OptoScheduling = true; 1647 } 1648 #endif 1649 if (supports_sse4_2()) { // Silvermont 1650 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1651 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1652 } 1653 } 1654 if (FLAG_IS_DEFAULT(UseIncDec)) { 1655 FLAG_SET_DEFAULT(UseIncDec, false); 1656 } 1657 } 1658 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1659 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1660 } 1661 #ifdef COMPILER2 1662 if (UseAVX > 2) { 1663 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1664 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1665 ArrayOperationPartialInlineSize != 0 && 1666 ArrayOperationPartialInlineSize != 16 && 1667 ArrayOperationPartialInlineSize != 32 && 1668 ArrayOperationPartialInlineSize != 64)) { 1669 int inline_size = 0; 1670 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1671 inline_size = 64; 1672 } else if (MaxVectorSize >= 32) { 1673 inline_size = 32; 1674 } else if (MaxVectorSize >= 16) { 1675 inline_size = 16; 1676 } 1677 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1678 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1679 } 1680 ArrayOperationPartialInlineSize = inline_size; 1681 } 1682 1683 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1684 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1685 if (ArrayOperationPartialInlineSize) { 1686 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1687 } else { 1688 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1689 } 1690 } 1691 } 1692 #endif 1693 } 1694 1695 #ifdef COMPILER2 1696 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1697 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1698 OptimizeFill = false; 1699 } 1700 } 1701 #endif 1702 1703 if (UseSSE42Intrinsics) { 1704 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1705 UseVectorizedMismatchIntrinsic = true; 1706 } 1707 } else if (UseVectorizedMismatchIntrinsic) { 1708 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1709 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1710 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1711 } 1712 if (UseAVX >= 2) { 1713 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1714 } else if (UseVectorizedHashCodeIntrinsic) { 1715 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1716 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1717 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1718 } 1719 1720 // Use count leading zeros count instruction if available. 1721 if (supports_lzcnt()) { 1722 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1723 UseCountLeadingZerosInstruction = true; 1724 } 1725 } else if (UseCountLeadingZerosInstruction) { 1726 warning("lzcnt instruction is not available on this CPU"); 1727 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1728 } 1729 1730 // Use count trailing zeros instruction if available 1731 if (supports_bmi1()) { 1732 // tzcnt does not require VEX prefix 1733 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1734 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1735 // Don't use tzcnt if BMI1 is switched off on command line. 1736 UseCountTrailingZerosInstruction = false; 1737 } else { 1738 UseCountTrailingZerosInstruction = true; 1739 } 1740 } 1741 } else if (UseCountTrailingZerosInstruction) { 1742 warning("tzcnt instruction is not available on this CPU"); 1743 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1744 } 1745 1746 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1747 // VEX prefix is generated only when AVX > 0. 1748 if (supports_bmi1() && supports_avx()) { 1749 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1750 UseBMI1Instructions = true; 1751 } 1752 } else if (UseBMI1Instructions) { 1753 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1754 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1755 } 1756 1757 if (supports_bmi2() && supports_avx()) { 1758 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1759 UseBMI2Instructions = true; 1760 } 1761 } else if (UseBMI2Instructions) { 1762 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1763 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1764 } 1765 1766 // Use population count instruction if available. 1767 if (supports_popcnt()) { 1768 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1769 UsePopCountInstruction = true; 1770 } 1771 } else if (UsePopCountInstruction) { 1772 warning("POPCNT instruction is not available on this CPU"); 1773 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1774 } 1775 1776 // Use fast-string operations if available. 1777 if (supports_erms()) { 1778 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1779 UseFastStosb = true; 1780 } 1781 } else if (UseFastStosb) { 1782 warning("fast-string operations are not available on this CPU"); 1783 FLAG_SET_DEFAULT(UseFastStosb, false); 1784 } 1785 1786 // For AMD Processors use XMM/YMM MOVDQU instructions 1787 // for Object Initialization as default 1788 if (is_amd() && cpu_family() >= 0x19) { 1789 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1790 UseFastStosb = false; 1791 } 1792 } 1793 1794 #ifdef COMPILER2 1795 if (is_intel() && MaxVectorSize > 16) { 1796 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1797 UseFastStosb = false; 1798 } 1799 } 1800 #endif 1801 1802 // Use XMM/YMM MOVDQU instruction for Object Initialization 1803 if (!UseFastStosb && UseUnalignedLoadStores) { 1804 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1805 UseXMMForObjInit = true; 1806 } 1807 } else if (UseXMMForObjInit) { 1808 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1809 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1810 } 1811 1812 #ifdef COMPILER2 1813 if (FLAG_IS_DEFAULT(AlignVector)) { 1814 // Modern processors allow misaligned memory operations for vectors. 1815 AlignVector = !UseUnalignedLoadStores; 1816 } 1817 #endif // COMPILER2 1818 1819 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1820 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1821 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1822 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1823 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1824 } 1825 } 1826 1827 // Allocation prefetch settings 1828 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1829 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1830 (cache_line_size > AllocatePrefetchStepSize)) { 1831 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1832 } 1833 1834 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1835 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1836 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1837 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1838 } 1839 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1840 } 1841 1842 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1843 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1844 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1845 } 1846 1847 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1848 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1849 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1850 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1851 } 1852 #ifdef COMPILER2 1853 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1854 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1855 } 1856 #endif 1857 } 1858 1859 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1860 #ifdef COMPILER2 1861 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1862 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1863 } 1864 #endif 1865 } 1866 1867 // Prefetch settings 1868 1869 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1870 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1871 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1872 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1873 1874 // gc copy/scan is disabled if prefetchw isn't supported, because 1875 // Prefetch::write emits an inlined prefetchw on Linux. 1876 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1877 // The used prefetcht0 instruction works for both amd64 and em64t. 1878 1879 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1880 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1881 } 1882 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1883 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1884 } 1885 1886 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1887 (cache_line_size > ContendedPaddingWidth)) 1888 ContendedPaddingWidth = cache_line_size; 1889 1890 // This machine allows unaligned memory accesses 1891 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1892 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1893 } 1894 1895 #ifndef PRODUCT 1896 if (log_is_enabled(Info, os, cpu)) { 1897 LogStream ls(Log(os, cpu)::info()); 1898 outputStream* log = &ls; 1899 log->print_cr("Logical CPUs per core: %u", 1900 logical_processors_per_package()); 1901 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1902 log->print("UseSSE=%d", UseSSE); 1903 if (UseAVX > 0) { 1904 log->print(" UseAVX=%d", UseAVX); 1905 } 1906 if (UseAES) { 1907 log->print(" UseAES=1"); 1908 } 1909 #ifdef COMPILER2 1910 if (MaxVectorSize > 0) { 1911 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1912 } 1913 #endif 1914 log->cr(); 1915 log->print("Allocation"); 1916 if (AllocatePrefetchStyle <= 0) { 1917 log->print_cr(": no prefetching"); 1918 } else { 1919 log->print(" prefetching: "); 1920 if (AllocatePrefetchInstr == 0) { 1921 log->print("PREFETCHNTA"); 1922 } else if (AllocatePrefetchInstr == 1) { 1923 log->print("PREFETCHT0"); 1924 } else if (AllocatePrefetchInstr == 2) { 1925 log->print("PREFETCHT2"); 1926 } else if (AllocatePrefetchInstr == 3) { 1927 log->print("PREFETCHW"); 1928 } 1929 if (AllocatePrefetchLines > 1) { 1930 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1931 } else { 1932 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1933 } 1934 } 1935 1936 if (PrefetchCopyIntervalInBytes > 0) { 1937 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1938 } 1939 if (PrefetchScanIntervalInBytes > 0) { 1940 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1941 } 1942 if (ContendedPaddingWidth > 0) { 1943 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1944 } 1945 } 1946 #endif // !PRODUCT 1947 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1948 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1949 } 1950 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1951 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1952 } 1953 } 1954 1955 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1956 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1957 if (vrt == XenHVM) { 1958 st->print_cr("Xen hardware-assisted virtualization detected"); 1959 } else if (vrt == KVM) { 1960 st->print_cr("KVM virtualization detected"); 1961 } else if (vrt == VMWare) { 1962 st->print_cr("VMWare virtualization detected"); 1963 VirtualizationSupport::print_virtualization_info(st); 1964 } else if (vrt == HyperV) { 1965 st->print_cr("Hyper-V virtualization detected"); 1966 } else if (vrt == HyperVRole) { 1967 st->print_cr("Hyper-V role detected"); 1968 } 1969 } 1970 1971 bool VM_Version::compute_has_intel_jcc_erratum() { 1972 if (!is_intel_family_core()) { 1973 // Only Intel CPUs are affected. 1974 return false; 1975 } 1976 // The following table of affected CPUs is based on the following document released by Intel: 1977 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1978 switch (_model) { 1979 case 0x8E: 1980 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1981 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1982 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1983 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1984 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1985 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1986 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1987 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1988 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1989 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1990 case 0x4E: 1991 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1992 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1993 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1994 return _stepping == 0x3; 1995 case 0x55: 1996 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1997 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1998 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1999 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2000 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2001 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2002 return _stepping == 0x4 || _stepping == 0x7; 2003 case 0x5E: 2004 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2005 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2006 return _stepping == 0x3; 2007 case 0x9E: 2008 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2009 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2010 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2011 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2012 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2013 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2014 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2015 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2016 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2017 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2018 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2019 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2020 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2021 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2022 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2023 case 0xA5: 2024 // Not in Intel documentation. 2025 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2026 return true; 2027 case 0xA6: 2028 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2029 return _stepping == 0x0; 2030 case 0xAE: 2031 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2032 return _stepping == 0xA; 2033 default: 2034 // If we are running on another intel machine not recognized in the table, we are okay. 2035 return false; 2036 } 2037 } 2038 2039 // On Xen, the cpuid instruction returns 2040 // eax / registers[0]: Version of Xen 2041 // ebx / registers[1]: chars 'XenV' 2042 // ecx / registers[2]: chars 'MMXe' 2043 // edx / registers[3]: chars 'nVMM' 2044 // 2045 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2046 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2047 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2048 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2049 // 2050 // more information : 2051 // https://kb.vmware.com/s/article/1009458 2052 // 2053 void VM_Version::check_virtualizations() { 2054 uint32_t registers[4] = {0}; 2055 char signature[13] = {0}; 2056 2057 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2058 // from 0x40000000 until 0x40010000. 2059 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2060 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2061 detect_virt_stub(leaf, registers); 2062 memcpy(signature, ®isters[1], 12); 2063 2064 if (strncmp("VMwareVMware", signature, 12) == 0) { 2065 Abstract_VM_Version::_detected_virtualization = VMWare; 2066 // check for extended metrics from guestlib 2067 VirtualizationSupport::initialize(); 2068 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2069 Abstract_VM_Version::_detected_virtualization = HyperV; 2070 #ifdef _WINDOWS 2071 // CPUID leaf 0x40000007 is available to the root partition only. 2072 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2073 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2074 detect_virt_stub(0x40000007, registers); 2075 if ((registers[0] != 0x0) || 2076 (registers[1] != 0x0) || 2077 (registers[2] != 0x0) || 2078 (registers[3] != 0x0)) { 2079 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2080 } 2081 #endif 2082 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2083 Abstract_VM_Version::_detected_virtualization = KVM; 2084 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2085 Abstract_VM_Version::_detected_virtualization = XenHVM; 2086 } 2087 } 2088 } 2089 2090 #ifdef COMPILER2 2091 // Determine if it's running on Cascade Lake using default options. 2092 bool VM_Version::is_default_intel_cascade_lake() { 2093 return FLAG_IS_DEFAULT(UseAVX) && 2094 FLAG_IS_DEFAULT(MaxVectorSize) && 2095 UseAVX > 2 && 2096 is_intel_cascade_lake(); 2097 } 2098 #endif 2099 2100 bool VM_Version::is_intel_cascade_lake() { 2101 return is_intel_skylake() && _stepping >= 5; 2102 } 2103 2104 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2105 // for implementing the array copy and clear operations. 2106 // The Intel platforms that supports the serialize instruction 2107 // has improved implementation of 64-byte load/stores and so the default 2108 // threshold is set to 0 for these platforms. 2109 int VM_Version::avx3_threshold() { 2110 return (is_intel_family_core() && 2111 supports_serialize() && 2112 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2113 } 2114 2115 void VM_Version::clear_apx_test_state() { 2116 clear_apx_test_state_stub(); 2117 } 2118 2119 static bool _vm_version_initialized = false; 2120 2121 void VM_Version::initialize() { 2122 ResourceMark rm; 2123 2124 // Making this stub must be FIRST use of assembler 2125 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2126 if (stub_blob == nullptr) { 2127 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2128 } 2129 CodeBuffer c(stub_blob); 2130 VM_Version_StubGenerator g(&c); 2131 2132 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2133 g.generate_get_cpu_info()); 2134 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2135 g.generate_detect_virt()); 2136 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2137 g.clear_apx_test_state()); 2138 get_processor_features(); 2139 2140 Assembler::precompute_instructions(); 2141 2142 if (VM_Version::supports_hv()) { // Supports hypervisor 2143 check_virtualizations(); 2144 } 2145 _vm_version_initialized = true; 2146 } 2147 2148 typedef enum { 2149 CPU_FAMILY_8086_8088 = 0, 2150 CPU_FAMILY_INTEL_286 = 2, 2151 CPU_FAMILY_INTEL_386 = 3, 2152 CPU_FAMILY_INTEL_486 = 4, 2153 CPU_FAMILY_PENTIUM = 5, 2154 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2155 CPU_FAMILY_PENTIUM_4 = 0xF 2156 } FamilyFlag; 2157 2158 typedef enum { 2159 RDTSCP_FLAG = 0x08000000, // bit 27 2160 INTEL64_FLAG = 0x20000000 // bit 29 2161 } _featureExtendedEdxFlag; 2162 2163 typedef enum { 2164 FPU_FLAG = 0x00000001, 2165 VME_FLAG = 0x00000002, 2166 DE_FLAG = 0x00000004, 2167 PSE_FLAG = 0x00000008, 2168 TSC_FLAG = 0x00000010, 2169 MSR_FLAG = 0x00000020, 2170 PAE_FLAG = 0x00000040, 2171 MCE_FLAG = 0x00000080, 2172 CX8_FLAG = 0x00000100, 2173 APIC_FLAG = 0x00000200, 2174 SEP_FLAG = 0x00000800, 2175 MTRR_FLAG = 0x00001000, 2176 PGE_FLAG = 0x00002000, 2177 MCA_FLAG = 0x00004000, 2178 CMOV_FLAG = 0x00008000, 2179 PAT_FLAG = 0x00010000, 2180 PSE36_FLAG = 0x00020000, 2181 PSNUM_FLAG = 0x00040000, 2182 CLFLUSH_FLAG = 0x00080000, 2183 DTS_FLAG = 0x00200000, 2184 ACPI_FLAG = 0x00400000, 2185 MMX_FLAG = 0x00800000, 2186 FXSR_FLAG = 0x01000000, 2187 SSE_FLAG = 0x02000000, 2188 SSE2_FLAG = 0x04000000, 2189 SS_FLAG = 0x08000000, 2190 HTT_FLAG = 0x10000000, 2191 TM_FLAG = 0x20000000 2192 } FeatureEdxFlag; 2193 2194 static BufferBlob* cpuid_brand_string_stub_blob; 2195 static const int cpuid_brand_string_stub_size = 550; 2196 2197 extern "C" { 2198 typedef void (*getCPUIDBrandString_stub_t)(void*); 2199 } 2200 2201 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2202 2203 // VM_Version statics 2204 enum { 2205 ExtendedFamilyIdLength_INTEL = 16, 2206 ExtendedFamilyIdLength_AMD = 24 2207 }; 2208 2209 const size_t VENDOR_LENGTH = 13; 2210 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2211 static char* _cpu_brand_string = nullptr; 2212 static int64_t _max_qualified_cpu_frequency = 0; 2213 2214 static int _no_of_threads = 0; 2215 static int _no_of_cores = 0; 2216 2217 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2218 "8086/8088", 2219 "", 2220 "286", 2221 "386", 2222 "486", 2223 "Pentium", 2224 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2225 "", 2226 "", 2227 "", 2228 "", 2229 "", 2230 "", 2231 "", 2232 "", 2233 "Pentium 4" 2234 }; 2235 2236 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2237 "", 2238 "", 2239 "", 2240 "", 2241 "5x86", 2242 "K5/K6", 2243 "Athlon/AthlonXP", 2244 "", 2245 "", 2246 "", 2247 "", 2248 "", 2249 "", 2250 "", 2251 "", 2252 "Opteron/Athlon64", 2253 "Opteron QC/Phenom", // Barcelona et.al. 2254 "", 2255 "", 2256 "", 2257 "", 2258 "", 2259 "", 2260 "Zen" 2261 }; 2262 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2263 // September 2013, Vol 3C Table 35-1 2264 const char* const _model_id_pentium_pro[] = { 2265 "", 2266 "Pentium Pro", 2267 "", 2268 "Pentium II model 3", 2269 "", 2270 "Pentium II model 5/Xeon/Celeron", 2271 "Celeron", 2272 "Pentium III/Pentium III Xeon", 2273 "Pentium III/Pentium III Xeon", 2274 "Pentium M model 9", // Yonah 2275 "Pentium III, model A", 2276 "Pentium III, model B", 2277 "", 2278 "Pentium M model D", // Dothan 2279 "", 2280 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2281 "", 2282 "", 2283 "", 2284 "", 2285 "", 2286 "", 2287 "Celeron", // 0x16 Celeron 65nm 2288 "Core 2", // 0x17 Penryn / Harpertown 2289 "", 2290 "", 2291 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2292 "Atom", // 0x1B Z5xx series Silverthorn 2293 "", 2294 "Core 2", // 0x1D Dunnington (6-core) 2295 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2296 "", 2297 "", 2298 "", 2299 "", 2300 "", 2301 "", 2302 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2303 "", 2304 "", 2305 "", // 0x28 2306 "", 2307 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2308 "", 2309 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2310 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2311 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2312 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2313 "", 2314 "", 2315 "", 2316 "", 2317 "", 2318 "", 2319 "", 2320 "", 2321 "", 2322 "", 2323 "Ivy Bridge", // 0x3a 2324 "", 2325 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2326 "", // 0x3d "Next Generation Intel Core Processor" 2327 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2328 "", // 0x3f "Future Generation Intel Xeon Processor" 2329 "", 2330 "", 2331 "", 2332 "", 2333 "", 2334 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2335 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2336 nullptr 2337 }; 2338 2339 /* Brand ID is for back compatibility 2340 * Newer CPUs uses the extended brand string */ 2341 const char* const _brand_id[] = { 2342 "", 2343 "Celeron processor", 2344 "Pentium III processor", 2345 "Intel Pentium III Xeon processor", 2346 "", 2347 "", 2348 "", 2349 "", 2350 "Intel Pentium 4 processor", 2351 nullptr 2352 }; 2353 2354 2355 const char* const _feature_edx_id[] = { 2356 "On-Chip FPU", 2357 "Virtual Mode Extensions", 2358 "Debugging Extensions", 2359 "Page Size Extensions", 2360 "Time Stamp Counter", 2361 "Model Specific Registers", 2362 "Physical Address Extension", 2363 "Machine Check Exceptions", 2364 "CMPXCHG8B Instruction", 2365 "On-Chip APIC", 2366 "", 2367 "Fast System Call", 2368 "Memory Type Range Registers", 2369 "Page Global Enable", 2370 "Machine Check Architecture", 2371 "Conditional Mov Instruction", 2372 "Page Attribute Table", 2373 "36-bit Page Size Extension", 2374 "Processor Serial Number", 2375 "CLFLUSH Instruction", 2376 "", 2377 "Debug Trace Store feature", 2378 "ACPI registers in MSR space", 2379 "Intel Architecture MMX Technology", 2380 "Fast Float Point Save and Restore", 2381 "Streaming SIMD extensions", 2382 "Streaming SIMD extensions 2", 2383 "Self-Snoop", 2384 "Hyper Threading", 2385 "Thermal Monitor", 2386 "", 2387 "Pending Break Enable" 2388 }; 2389 2390 const char* const _feature_extended_edx_id[] = { 2391 "", 2392 "", 2393 "", 2394 "", 2395 "", 2396 "", 2397 "", 2398 "", 2399 "", 2400 "", 2401 "", 2402 "SYSCALL/SYSRET", 2403 "", 2404 "", 2405 "", 2406 "", 2407 "", 2408 "", 2409 "", 2410 "", 2411 "Execute Disable Bit", 2412 "", 2413 "", 2414 "", 2415 "", 2416 "", 2417 "", 2418 "RDTSCP", 2419 "", 2420 "Intel 64 Architecture", 2421 "", 2422 "" 2423 }; 2424 2425 const char* const _feature_ecx_id[] = { 2426 "Streaming SIMD Extensions 3", 2427 "PCLMULQDQ", 2428 "64-bit DS Area", 2429 "MONITOR/MWAIT instructions", 2430 "CPL Qualified Debug Store", 2431 "Virtual Machine Extensions", 2432 "Safer Mode Extensions", 2433 "Enhanced Intel SpeedStep technology", 2434 "Thermal Monitor 2", 2435 "Supplemental Streaming SIMD Extensions 3", 2436 "L1 Context ID", 2437 "", 2438 "Fused Multiply-Add", 2439 "CMPXCHG16B", 2440 "xTPR Update Control", 2441 "Perfmon and Debug Capability", 2442 "", 2443 "Process-context identifiers", 2444 "Direct Cache Access", 2445 "Streaming SIMD extensions 4.1", 2446 "Streaming SIMD extensions 4.2", 2447 "x2APIC", 2448 "MOVBE", 2449 "Popcount instruction", 2450 "TSC-Deadline", 2451 "AESNI", 2452 "XSAVE", 2453 "OSXSAVE", 2454 "AVX", 2455 "F16C", 2456 "RDRAND", 2457 "" 2458 }; 2459 2460 const char* const _feature_extended_ecx_id[] = { 2461 "LAHF/SAHF instruction support", 2462 "Core multi-processor legacy mode", 2463 "", 2464 "", 2465 "", 2466 "Advanced Bit Manipulations: LZCNT", 2467 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2468 "Misaligned SSE mode", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "", 2475 "", 2476 "", 2477 "", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "", 2484 "", 2485 "", 2486 "", 2487 "", 2488 "", 2489 "", 2490 "", 2491 "", 2492 "" 2493 }; 2494 2495 void VM_Version::initialize_tsc(void) { 2496 ResourceMark rm; 2497 2498 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2499 if (cpuid_brand_string_stub_blob == nullptr) { 2500 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2501 } 2502 CodeBuffer c(cpuid_brand_string_stub_blob); 2503 VM_Version_StubGenerator g(&c); 2504 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2505 g.generate_getCPUIDBrandString()); 2506 } 2507 2508 const char* VM_Version::cpu_model_description(void) { 2509 uint32_t cpu_family = extended_cpu_family(); 2510 uint32_t cpu_model = extended_cpu_model(); 2511 const char* model = nullptr; 2512 2513 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2514 for (uint32_t i = 0; i <= cpu_model; i++) { 2515 model = _model_id_pentium_pro[i]; 2516 if (model == nullptr) { 2517 break; 2518 } 2519 } 2520 } 2521 return model; 2522 } 2523 2524 const char* VM_Version::cpu_brand_string(void) { 2525 if (_cpu_brand_string == nullptr) { 2526 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2527 if (nullptr == _cpu_brand_string) { 2528 return nullptr; 2529 } 2530 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2531 if (ret_val != OS_OK) { 2532 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2533 _cpu_brand_string = nullptr; 2534 } 2535 } 2536 return _cpu_brand_string; 2537 } 2538 2539 const char* VM_Version::cpu_brand(void) { 2540 const char* brand = nullptr; 2541 2542 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2543 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2544 brand = _brand_id[0]; 2545 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2546 brand = _brand_id[i]; 2547 } 2548 } 2549 return brand; 2550 } 2551 2552 bool VM_Version::cpu_is_em64t(void) { 2553 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2554 } 2555 2556 bool VM_Version::is_netburst(void) { 2557 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2558 } 2559 2560 bool VM_Version::supports_tscinv_ext(void) { 2561 if (!supports_tscinv_bit()) { 2562 return false; 2563 } 2564 2565 if (is_intel()) { 2566 return true; 2567 } 2568 2569 if (is_amd()) { 2570 return !is_amd_Barcelona(); 2571 } 2572 2573 if (is_hygon()) { 2574 return true; 2575 } 2576 2577 return false; 2578 } 2579 2580 void VM_Version::resolve_cpu_information_details(void) { 2581 2582 // in future we want to base this information on proper cpu 2583 // and cache topology enumeration such as: 2584 // Intel 64 Architecture Processor Topology Enumeration 2585 // which supports system cpu and cache topology enumeration 2586 // either using 2xAPICIDs or initial APICIDs 2587 2588 // currently only rough cpu information estimates 2589 // which will not necessarily reflect the exact configuration of the system 2590 2591 // this is the number of logical hardware threads 2592 // visible to the operating system 2593 _no_of_threads = os::processor_count(); 2594 2595 // find out number of threads per cpu package 2596 int threads_per_package = threads_per_core() * cores_per_cpu(); 2597 2598 // use amount of threads visible to the process in order to guess number of sockets 2599 _no_of_sockets = _no_of_threads / threads_per_package; 2600 2601 // process might only see a subset of the total number of threads 2602 // from a single processor package. Virtualization/resource management for example. 2603 // If so then just write a hard 1 as num of pkgs. 2604 if (0 == _no_of_sockets) { 2605 _no_of_sockets = 1; 2606 } 2607 2608 // estimate the number of cores 2609 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2610 } 2611 2612 2613 const char* VM_Version::cpu_family_description(void) { 2614 int cpu_family_id = extended_cpu_family(); 2615 if (is_amd()) { 2616 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2617 return _family_id_amd[cpu_family_id]; 2618 } 2619 } 2620 if (is_intel()) { 2621 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2622 return cpu_model_description(); 2623 } 2624 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2625 return _family_id_intel[cpu_family_id]; 2626 } 2627 } 2628 if (is_hygon()) { 2629 return "Dhyana"; 2630 } 2631 return "Unknown x86"; 2632 } 2633 2634 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2635 assert(buf != nullptr, "buffer is null!"); 2636 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2637 2638 const char* cpu_type = nullptr; 2639 const char* x64 = nullptr; 2640 2641 if (is_intel()) { 2642 cpu_type = "Intel"; 2643 x64 = cpu_is_em64t() ? " Intel64" : ""; 2644 } else if (is_amd()) { 2645 cpu_type = "AMD"; 2646 x64 = cpu_is_em64t() ? " AMD64" : ""; 2647 } else if (is_hygon()) { 2648 cpu_type = "Hygon"; 2649 x64 = cpu_is_em64t() ? " AMD64" : ""; 2650 } else { 2651 cpu_type = "Unknown x86"; 2652 x64 = cpu_is_em64t() ? " x86_64" : ""; 2653 } 2654 2655 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2656 cpu_type, 2657 cpu_family_description(), 2658 supports_ht() ? " (HT)" : "", 2659 supports_sse3() ? " SSE3" : "", 2660 supports_ssse3() ? " SSSE3" : "", 2661 supports_sse4_1() ? " SSE4.1" : "", 2662 supports_sse4_2() ? " SSE4.2" : "", 2663 supports_sse4a() ? " SSE4A" : "", 2664 is_netburst() ? " Netburst" : "", 2665 is_intel_family_core() ? " Core" : "", 2666 x64); 2667 2668 return OS_OK; 2669 } 2670 2671 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2672 assert(buf != nullptr, "buffer is null!"); 2673 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2674 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2675 2676 // invoke newly generated asm code to fetch CPU Brand String 2677 getCPUIDBrandString_stub(&_cpuid_info); 2678 2679 // fetch results into buffer 2680 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2681 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2682 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2683 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2684 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2685 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2686 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2687 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2688 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2689 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2690 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2691 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2692 2693 return OS_OK; 2694 } 2695 2696 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2697 guarantee(buf != nullptr, "buffer is null!"); 2698 guarantee(buf_len > 0, "buffer len not enough!"); 2699 2700 unsigned int flag = 0; 2701 unsigned int fi = 0; 2702 size_t written = 0; 2703 const char* prefix = ""; 2704 2705 #define WRITE_TO_BUF(string) \ 2706 { \ 2707 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2708 if (res < 0) { \ 2709 return buf_len - 1; \ 2710 } \ 2711 written += res; \ 2712 if (prefix[0] == '\0') { \ 2713 prefix = ", "; \ 2714 } \ 2715 } 2716 2717 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2718 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2719 continue; /* no hyperthreading */ 2720 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2721 continue; /* no fast system call */ 2722 } 2723 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2724 WRITE_TO_BUF(_feature_edx_id[fi]); 2725 } 2726 } 2727 2728 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2729 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2730 WRITE_TO_BUF(_feature_ecx_id[fi]); 2731 } 2732 } 2733 2734 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2735 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2736 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2737 } 2738 } 2739 2740 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2741 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2742 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2743 } 2744 } 2745 2746 if (supports_tscinv_bit()) { 2747 WRITE_TO_BUF("Invariant TSC"); 2748 } 2749 2750 return written; 2751 } 2752 2753 /** 2754 * Write a detailed description of the cpu to a given buffer, including 2755 * feature set. 2756 */ 2757 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2758 assert(buf != nullptr, "buffer is null!"); 2759 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2760 2761 static const char* unknown = "<unknown>"; 2762 char vendor_id[VENDOR_LENGTH]; 2763 const char* family = nullptr; 2764 const char* model = nullptr; 2765 const char* brand = nullptr; 2766 int outputLen = 0; 2767 2768 family = cpu_family_description(); 2769 if (family == nullptr) { 2770 family = unknown; 2771 } 2772 2773 model = cpu_model_description(); 2774 if (model == nullptr) { 2775 model = unknown; 2776 } 2777 2778 brand = cpu_brand_string(); 2779 2780 if (brand == nullptr) { 2781 brand = cpu_brand(); 2782 if (brand == nullptr) { 2783 brand = unknown; 2784 } 2785 } 2786 2787 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2788 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2789 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2790 vendor_id[VENDOR_LENGTH-1] = '\0'; 2791 2792 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2793 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2794 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2795 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2796 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2797 "Supports: ", 2798 brand, 2799 vendor_id, 2800 family, 2801 extended_cpu_family(), 2802 model, 2803 extended_cpu_model(), 2804 cpu_stepping(), 2805 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2806 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2807 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2808 _cpuid_info.std_cpuid1_eax.value, 2809 _cpuid_info.std_cpuid1_ebx.value, 2810 _cpuid_info.std_cpuid1_ecx.value, 2811 _cpuid_info.std_cpuid1_edx.value, 2812 _cpuid_info.ext_cpuid1_eax, 2813 _cpuid_info.ext_cpuid1_ebx, 2814 _cpuid_info.ext_cpuid1_ecx, 2815 _cpuid_info.ext_cpuid1_edx); 2816 2817 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2818 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2819 return OS_ERR; 2820 } 2821 2822 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2823 2824 return OS_OK; 2825 } 2826 2827 2828 // Fill in Abstract_VM_Version statics 2829 void VM_Version::initialize_cpu_information() { 2830 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2831 assert(!_initialized, "shouldn't be initialized yet"); 2832 resolve_cpu_information_details(); 2833 2834 // initialize cpu_name and cpu_desc 2835 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2836 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2837 _initialized = true; 2838 } 2839 2840 /** 2841 * For information about extracting the frequency from the cpu brand string, please see: 2842 * 2843 * Intel Processor Identification and the CPUID Instruction 2844 * Application Note 485 2845 * May 2012 2846 * 2847 * The return value is the frequency in Hz. 2848 */ 2849 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2850 const char* const brand_string = cpu_brand_string(); 2851 if (brand_string == nullptr) { 2852 return 0; 2853 } 2854 const int64_t MEGA = 1000000; 2855 int64_t multiplier = 0; 2856 int64_t frequency = 0; 2857 uint8_t idx = 0; 2858 // The brand string buffer is at most 48 bytes. 2859 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2860 for (; idx < 48-2; ++idx) { 2861 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2862 // Search brand string for "yHz" where y is M, G, or T. 2863 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2864 if (brand_string[idx] == 'M') { 2865 multiplier = MEGA; 2866 } else if (brand_string[idx] == 'G') { 2867 multiplier = MEGA * 1000; 2868 } else if (brand_string[idx] == 'T') { 2869 multiplier = MEGA * MEGA; 2870 } 2871 break; 2872 } 2873 } 2874 if (multiplier > 0) { 2875 // Compute frequency (in Hz) from brand string. 2876 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2877 frequency = (brand_string[idx-4] - '0') * multiplier; 2878 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2879 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2880 } else { // format is "xxxx" 2881 frequency = (brand_string[idx-4] - '0') * 1000; 2882 frequency += (brand_string[idx-3] - '0') * 100; 2883 frequency += (brand_string[idx-2] - '0') * 10; 2884 frequency += (brand_string[idx-1] - '0'); 2885 frequency *= multiplier; 2886 } 2887 } 2888 return frequency; 2889 } 2890 2891 2892 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2893 if (_max_qualified_cpu_frequency == 0) { 2894 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2895 } 2896 return _max_qualified_cpu_frequency; 2897 } 2898 2899 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const { 2900 VM_Features vm_features; 2901 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2902 vm_features.set_feature(CPU_CX8); 2903 if (std_cpuid1_edx.bits.cmov != 0) 2904 vm_features.set_feature(CPU_CMOV); 2905 if (std_cpuid1_edx.bits.clflush != 0) 2906 vm_features.set_feature(CPU_FLUSH); 2907 // clflush should always be available on x86_64 2908 // if not we are in real trouble because we rely on it 2909 // to flush the code cache. 2910 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available"); 2911 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2912 ext_cpuid1_edx.bits.fxsr != 0)) 2913 vm_features.set_feature(CPU_FXSR); 2914 // HT flag is set for multi-core processors also. 2915 if (threads_per_core() > 1) 2916 vm_features.set_feature(CPU_HT); 2917 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2918 ext_cpuid1_edx.bits.mmx != 0)) 2919 vm_features.set_feature(CPU_MMX); 2920 if (std_cpuid1_edx.bits.sse != 0) 2921 vm_features.set_feature(CPU_SSE); 2922 if (std_cpuid1_edx.bits.sse2 != 0) 2923 vm_features.set_feature(CPU_SSE2); 2924 if (std_cpuid1_ecx.bits.sse3 != 0) 2925 vm_features.set_feature(CPU_SSE3); 2926 if (std_cpuid1_ecx.bits.ssse3 != 0) 2927 vm_features.set_feature(CPU_SSSE3); 2928 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2929 vm_features.set_feature(CPU_SSE4_1); 2930 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2931 vm_features.set_feature(CPU_SSE4_2); 2932 if (std_cpuid1_ecx.bits.popcnt != 0) 2933 vm_features.set_feature(CPU_POPCNT); 2934 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 2935 xem_xcr0_eax.bits.apx_f != 0) { 2936 vm_features.set_feature(CPU_APX_F); 2937 } 2938 if (std_cpuid1_ecx.bits.avx != 0 && 2939 std_cpuid1_ecx.bits.osxsave != 0 && 2940 xem_xcr0_eax.bits.sse != 0 && 2941 xem_xcr0_eax.bits.ymm != 0) { 2942 vm_features.set_feature(CPU_AVX); 2943 vm_features.set_feature(CPU_VZEROUPPER); 2944 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 2945 vm_features.set_feature(CPU_SHA512); 2946 if (std_cpuid1_ecx.bits.f16c != 0) 2947 vm_features.set_feature(CPU_F16C); 2948 if (sef_cpuid7_ebx.bits.avx2 != 0) { 2949 vm_features.set_feature(CPU_AVX2); 2950 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 2951 vm_features.set_feature(CPU_AVX_IFMA); 2952 } 2953 if (sef_cpuid7_ecx.bits.gfni != 0) 2954 vm_features.set_feature(CPU_GFNI); 2955 if (sef_cpuid7_ebx.bits.avx512f != 0 && 2956 xem_xcr0_eax.bits.opmask != 0 && 2957 xem_xcr0_eax.bits.zmm512 != 0 && 2958 xem_xcr0_eax.bits.zmm32 != 0) { 2959 vm_features.set_feature(CPU_AVX512F); 2960 if (sef_cpuid7_ebx.bits.avx512cd != 0) 2961 vm_features.set_feature(CPU_AVX512CD); 2962 if (sef_cpuid7_ebx.bits.avx512dq != 0) 2963 vm_features.set_feature(CPU_AVX512DQ); 2964 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 2965 vm_features.set_feature(CPU_AVX512_IFMA); 2966 if (sef_cpuid7_ebx.bits.avx512pf != 0) 2967 vm_features.set_feature(CPU_AVX512PF); 2968 if (sef_cpuid7_ebx.bits.avx512er != 0) 2969 vm_features.set_feature(CPU_AVX512ER); 2970 if (sef_cpuid7_ebx.bits.avx512bw != 0) 2971 vm_features.set_feature(CPU_AVX512BW); 2972 if (sef_cpuid7_ebx.bits.avx512vl != 0) 2973 vm_features.set_feature(CPU_AVX512VL); 2974 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2975 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2976 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2977 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2978 if (sef_cpuid7_ecx.bits.vaes != 0) 2979 vm_features.set_feature(CPU_AVX512_VAES); 2980 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 2981 vm_features.set_feature(CPU_AVX512_VNNI); 2982 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2983 vm_features.set_feature(CPU_AVX512_BITALG); 2984 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2985 vm_features.set_feature(CPU_AVX512_VBMI); 2986 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2987 vm_features.set_feature(CPU_AVX512_VBMI2); 2988 } 2989 if (is_intel()) { 2990 if (sefsl1_cpuid7_edx.bits.avx10 != 0 && 2991 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 && 2992 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 && 2993 xem_xcr0_eax.bits.opmask != 0 && 2994 xem_xcr0_eax.bits.zmm512 != 0 && 2995 xem_xcr0_eax.bits.zmm32 != 0) { 2996 vm_features.set_feature(CPU_AVX10_1); 2997 vm_features.set_feature(CPU_AVX512F); 2998 vm_features.set_feature(CPU_AVX512CD); 2999 vm_features.set_feature(CPU_AVX512DQ); 3000 vm_features.set_feature(CPU_AVX512PF); 3001 vm_features.set_feature(CPU_AVX512ER); 3002 vm_features.set_feature(CPU_AVX512BW); 3003 vm_features.set_feature(CPU_AVX512VL); 3004 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 3005 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 3006 vm_features.set_feature(CPU_AVX512_VAES); 3007 vm_features.set_feature(CPU_AVX512_VNNI); 3008 vm_features.set_feature(CPU_AVX512_BITALG); 3009 vm_features.set_feature(CPU_AVX512_VBMI); 3010 vm_features.set_feature(CPU_AVX512_VBMI2); 3011 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) { 3012 vm_features.set_feature(CPU_AVX10_2); 3013 } 3014 } 3015 } 3016 } 3017 3018 if (std_cpuid1_ecx.bits.hv != 0) 3019 vm_features.set_feature(CPU_HV); 3020 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3021 vm_features.set_feature(CPU_BMI1); 3022 if (std_cpuid1_edx.bits.tsc != 0) 3023 vm_features.set_feature(CPU_TSC); 3024 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3025 vm_features.set_feature(CPU_TSCINV_BIT); 3026 if (std_cpuid1_ecx.bits.aes != 0) 3027 vm_features.set_feature(CPU_AES); 3028 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3029 vm_features.set_feature(CPU_LZCNT); 3030 if (ext_cpuid1_ecx.bits.prefetchw != 0) 3031 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3032 if (sef_cpuid7_ebx.bits.erms != 0) 3033 vm_features.set_feature(CPU_ERMS); 3034 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3035 vm_features.set_feature(CPU_FSRM); 3036 if (std_cpuid1_ecx.bits.clmul != 0) 3037 vm_features.set_feature(CPU_CLMUL); 3038 if (sef_cpuid7_ebx.bits.rtm != 0) 3039 vm_features.set_feature(CPU_RTM); 3040 if (sef_cpuid7_ebx.bits.adx != 0) 3041 vm_features.set_feature(CPU_ADX); 3042 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3043 vm_features.set_feature(CPU_BMI2); 3044 if (sef_cpuid7_ebx.bits.sha != 0) 3045 vm_features.set_feature(CPU_SHA); 3046 if (std_cpuid1_ecx.bits.fma != 0) 3047 vm_features.set_feature(CPU_FMA); 3048 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3049 vm_features.set_feature(CPU_FLUSHOPT); 3050 if (sef_cpuid7_ebx.bits.clwb != 0) 3051 vm_features.set_feature(CPU_CLWB); 3052 if (ext_cpuid1_edx.bits.rdtscp != 0) 3053 vm_features.set_feature(CPU_RDTSCP); 3054 if (sef_cpuid7_ecx.bits.rdpid != 0) 3055 vm_features.set_feature(CPU_RDPID); 3056 3057 // AMD|Hygon additional features. 3058 if (is_amd_family()) { 3059 // PREFETCHW was checked above, check TDNOW here. 3060 if ((ext_cpuid1_edx.bits.tdnow != 0)) 3061 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3062 if (ext_cpuid1_ecx.bits.sse4a != 0) 3063 vm_features.set_feature(CPU_SSE4A); 3064 } 3065 3066 // Intel additional features. 3067 if (is_intel()) { 3068 if (sef_cpuid7_edx.bits.serialize != 0) 3069 vm_features.set_feature(CPU_SERIALIZE); 3070 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3071 vm_features.set_feature(CPU_AVX512_FP16); 3072 } 3073 3074 // ZX additional features. 3075 if (is_zx()) { 3076 // We do not know if these are supported by ZX, so we cannot trust 3077 // common CPUID bit for them. 3078 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?"); 3079 vm_features.clear_feature(CPU_CLWB); 3080 } 3081 3082 // Protection key features. 3083 if (sef_cpuid7_ecx.bits.pku != 0) { 3084 vm_features.set_feature(CPU_PKU); 3085 } 3086 if (sef_cpuid7_ecx.bits.ospke != 0) { 3087 vm_features.set_feature(CPU_OSPKE); 3088 } 3089 3090 // Control flow enforcement (CET) features. 3091 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3092 vm_features.set_feature(CPU_CET_SS); 3093 } 3094 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3095 vm_features.set_feature(CPU_CET_IBT); 3096 } 3097 3098 // Composite features. 3099 if (supports_tscinv_bit() && 3100 ((is_amd_family() && !is_amd_Barcelona()) || 3101 is_intel_tsc_synched_at_init())) { 3102 vm_features.set_feature(CPU_TSCINV); 3103 } 3104 return vm_features; 3105 } 3106 3107 bool VM_Version::os_supports_avx_vectors() { 3108 bool retVal = false; 3109 int nreg = 4; 3110 if (supports_evex()) { 3111 // Verify that OS save/restore all bits of EVEX registers 3112 // during signal processing. 3113 retVal = true; 3114 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3115 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3116 retVal = false; 3117 break; 3118 } 3119 } 3120 } else if (supports_avx()) { 3121 // Verify that OS save/restore all bits of AVX registers 3122 // during signal processing. 3123 retVal = true; 3124 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3125 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3126 retVal = false; 3127 break; 3128 } 3129 } 3130 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3131 if (retVal == false) { 3132 // Verify that OS save/restore all bits of EVEX registers 3133 // during signal processing. 3134 retVal = true; 3135 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3136 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3137 retVal = false; 3138 break; 3139 } 3140 } 3141 } 3142 } 3143 return retVal; 3144 } 3145 3146 bool VM_Version::os_supports_apx_egprs() { 3147 if (!supports_apx_f()) { 3148 return false; 3149 } 3150 // Enable APX support for product builds after 3151 // completion of planned features listed in JDK-8329030. 3152 #if !defined(PRODUCT) 3153 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3154 _cpuid_info.apx_save[1] != egpr_test_value()) { 3155 return false; 3156 } 3157 return true; 3158 #else 3159 return false; 3160 #endif 3161 } 3162 3163 uint VM_Version::cores_per_cpu() { 3164 uint result = 1; 3165 if (is_intel()) { 3166 bool supports_topology = supports_processor_topology(); 3167 if (supports_topology) { 3168 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3169 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3170 } 3171 if (!supports_topology || result == 0) { 3172 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3173 } 3174 } else if (is_amd_family()) { 3175 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3176 } else if (is_zx()) { 3177 bool supports_topology = supports_processor_topology(); 3178 if (supports_topology) { 3179 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3180 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3181 } 3182 if (!supports_topology || result == 0) { 3183 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3184 } 3185 } 3186 return result; 3187 } 3188 3189 uint VM_Version::threads_per_core() { 3190 uint result = 1; 3191 if (is_intel() && supports_processor_topology()) { 3192 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3193 } else if (is_zx() && supports_processor_topology()) { 3194 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3195 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3196 if (cpu_family() >= 0x17) { 3197 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3198 } else { 3199 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3200 cores_per_cpu(); 3201 } 3202 } 3203 return (result == 0 ? 1 : result); 3204 } 3205 3206 uint VM_Version::L1_line_size() { 3207 uint result = 0; 3208 if (is_intel()) { 3209 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3210 } else if (is_amd_family()) { 3211 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3212 } else if (is_zx()) { 3213 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3214 } 3215 if (result < 32) // not defined ? 3216 result = 32; // 32 bytes by default on x86 and other x64 3217 return result; 3218 } 3219 3220 bool VM_Version::is_intel_tsc_synched_at_init() { 3221 if (is_intel_family_core()) { 3222 uint32_t ext_model = extended_cpu_model(); 3223 if (ext_model == CPU_MODEL_NEHALEM_EP || 3224 ext_model == CPU_MODEL_WESTMERE_EP || 3225 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3226 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3227 // <= 2-socket invariant tsc support. EX versions are usually used 3228 // in > 2-socket systems and likely don't synchronize tscs at 3229 // initialization. 3230 // Code that uses tsc values must be prepared for them to arbitrarily 3231 // jump forward or backward. 3232 return true; 3233 } 3234 } 3235 return false; 3236 } 3237 3238 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3239 // Hardware prefetching (distance/size in bytes): 3240 // Pentium 3 - 64 / 32 3241 // Pentium 4 - 256 / 128 3242 // Athlon - 64 / 32 ???? 3243 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3244 // Core - 128 / 64 3245 // 3246 // Software prefetching (distance in bytes / instruction with best score): 3247 // Pentium 3 - 128 / prefetchnta 3248 // Pentium 4 - 512 / prefetchnta 3249 // Athlon - 128 / prefetchnta 3250 // Opteron - 256 / prefetchnta 3251 // Core - 256 / prefetchnta 3252 // It will be used only when AllocatePrefetchStyle > 0 3253 3254 if (is_amd_family()) { // AMD | Hygon 3255 if (supports_sse2()) { 3256 return 256; // Opteron 3257 } else { 3258 return 128; // Athlon 3259 } 3260 } else { // Intel 3261 if (supports_sse3() && cpu_family() == 6) { 3262 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3263 return 192; 3264 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3265 return 384; 3266 } 3267 } 3268 if (supports_sse2()) { 3269 if (cpu_family() == 6) { 3270 return 256; // Pentium M, Core, Core2 3271 } else { 3272 return 512; // Pentium 4 3273 } 3274 } else { 3275 return 128; // Pentium 3 (and all other old CPUs) 3276 } 3277 } 3278 } 3279 3280 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3281 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3282 switch (id) { 3283 case vmIntrinsics::_floatToFloat16: 3284 case vmIntrinsics::_float16ToFloat: 3285 if (!supports_float16()) { 3286 return false; 3287 } 3288 break; 3289 default: 3290 break; 3291 } 3292 return true; 3293 } 3294 3295 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) { 3296 int i = 0; 3297 ss.join([&]() { 3298 const char* str = nullptr; 3299 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) { 3300 if (features.supports_feature((VM_Version::Feature_Flag)i)) { 3301 str = _features_names[i]; 3302 } 3303 i += 1; 3304 } 3305 return str; 3306 }, ", "); 3307 } 3308 3309 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) { 3310 VM_Features* features = (VM_Features*)features_buffer; 3311 insert_features_names(*features, ss); 3312 } 3313 3314 void VM_Version::get_missing_features_name(void* features_buffer, stringStream& ss) { 3315 VM_Features* features_to_test = (VM_Features*)features_buffer; 3316 int i = 0; 3317 ss.join([&]() { 3318 const char* str = nullptr; 3319 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) { 3320 Feature_Flag flag = (Feature_Flag)i; 3321 if (features_to_test->supports_feature(flag) && !_features.supports_feature(flag)) { 3322 str = _features_names[i]; 3323 } 3324 i += 1; 3325 } 3326 return str; 3327 }, ", "); 3328 } 3329 3330 int VM_Version::cpu_features_size() { 3331 return sizeof(VM_Features); 3332 } 3333 3334 void VM_Version::store_cpu_features(void* buf) { 3335 VM_Features copy = _features; 3336 copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache 3337 memcpy(buf, ©, sizeof(VM_Features)); 3338 } 3339 3340 bool VM_Version::supports_features(void* features_buffer) { 3341 VM_Features* features_to_test = (VM_Features*)features_buffer; 3342 return _features.supports_features(features_to_test); 3343 }