1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/powerOfTwo.hpp" 42 #include "utilities/virtualizationSupport.hpp" 43 44 int VM_Version::_cpu; 45 int VM_Version::_model; 46 int VM_Version::_stepping; 47 bool VM_Version::_has_intel_jcc_erratum; 48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 49 50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 52 #undef DECLARE_CPU_FEATURE_FLAG 53 54 // Address of instruction which causes SEGV 55 address VM_Version::_cpuinfo_segv_addr = nullptr; 56 // Address of instruction after the one which causes SEGV 57 address VM_Version::_cpuinfo_cont_addr = nullptr; 58 // Address of instruction which causes APX specific SEGV 59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 60 // Address of instruction after the one which causes APX specific SEGV 61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 62 63 static BufferBlob* stub_blob; 64 static const int stub_size = 2000; 65 66 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong; 67 68 VM_Version::VM_Features VM_Version::_features; 69 VM_Version::VM_Features VM_Version::_cpu_features; 70 71 extern "C" { 72 typedef void (*get_cpu_info_stub_t)(void*); 73 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 74 typedef void (*clear_apx_test_state_t)(void); 75 } 76 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 77 static detect_virt_stub_t detect_virt_stub = nullptr; 78 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 79 80 bool VM_Version::supports_clflush() { 81 // clflush should always be available on x86_64 82 // if not we are in real trouble because we rely on it 83 // to flush the code cache. 84 // Unfortunately, Assembler::clflush is currently called as part 85 // of generation of the code cache flush routine. This happens 86 // under Universe::init before the processor features are set 87 // up. Assembler::flush calls this routine to check that clflush 88 // is allowed. So, we give the caller a free pass if Universe init 89 // is still in progress. 90 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available"); 91 return true; 92 } 93 94 #define CPUID_STANDARD_FN 0x0 95 #define CPUID_STANDARD_FN_1 0x1 96 #define CPUID_STANDARD_FN_4 0x4 97 #define CPUID_STANDARD_FN_B 0xb 98 99 #define CPUID_EXTENDED_FN 0x80000000 100 #define CPUID_EXTENDED_FN_1 0x80000001 101 #define CPUID_EXTENDED_FN_2 0x80000002 102 #define CPUID_EXTENDED_FN_3 0x80000003 103 #define CPUID_EXTENDED_FN_4 0x80000004 104 #define CPUID_EXTENDED_FN_7 0x80000007 105 #define CPUID_EXTENDED_FN_8 0x80000008 106 107 class VM_Version_StubGenerator: public StubCodeGenerator { 108 public: 109 110 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 111 112 address clear_apx_test_state() { 113 # define __ _masm-> 114 address start = __ pc(); 115 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 116 // handling guarantees that preserved register values post signal handling were 117 // re-instantiated by operating system and not because they were not modified externally. 118 119 bool save_apx = UseAPX; 120 VM_Version::set_apx_cpuFeatures(); 121 UseAPX = true; 122 // EGPR state save/restoration. 123 __ mov64(r16, 0L); 124 __ mov64(r31, 0L); 125 UseAPX = save_apx; 126 VM_Version::clean_cpuFeatures(); 127 __ ret(0); 128 return start; 129 } 130 131 address generate_get_cpu_info() { 132 // Flags to test CPU type. 133 const uint32_t HS_EFL_AC = 0x40000; 134 const uint32_t HS_EFL_ID = 0x200000; 135 // Values for when we don't have a CPUID instruction. 136 const int CPU_FAMILY_SHIFT = 8; 137 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 138 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 139 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 140 141 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24; 142 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 143 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 144 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 145 146 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 147 # define __ _masm-> 148 149 address start = __ pc(); 150 151 // 152 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 153 // 154 // rcx and rdx are first and second argument registers on windows 155 156 __ push(rbp); 157 __ mov(rbp, c_rarg0); // cpuid_info address 158 __ push(rbx); 159 __ push(rsi); 160 __ pushf(); // preserve rbx, and flags 161 __ pop(rax); 162 __ push(rax); 163 __ mov(rcx, rax); 164 // 165 // if we are unable to change the AC flag, we have a 386 166 // 167 __ xorl(rax, HS_EFL_AC); 168 __ push(rax); 169 __ popf(); 170 __ pushf(); 171 __ pop(rax); 172 __ cmpptr(rax, rcx); 173 __ jccb(Assembler::notEqual, detect_486); 174 175 __ movl(rax, CPU_FAMILY_386); 176 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 177 __ jmp(done); 178 179 // 180 // If we are unable to change the ID flag, we have a 486 which does 181 // not support the "cpuid" instruction. 182 // 183 __ bind(detect_486); 184 __ mov(rax, rcx); 185 __ xorl(rax, HS_EFL_ID); 186 __ push(rax); 187 __ popf(); 188 __ pushf(); 189 __ pop(rax); 190 __ cmpptr(rcx, rax); 191 __ jccb(Assembler::notEqual, detect_586); 192 193 __ bind(cpu486); 194 __ movl(rax, CPU_FAMILY_486); 195 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 196 __ jmp(done); 197 198 // 199 // At this point, we have a chip which supports the "cpuid" instruction 200 // 201 __ bind(detect_586); 202 __ xorl(rax, rax); 203 __ cpuid(); 204 __ orl(rax, rax); 205 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 206 // value of at least 1, we give up and 207 // assume a 486 208 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 209 __ movl(Address(rsi, 0), rax); 210 __ movl(Address(rsi, 4), rbx); 211 __ movl(Address(rsi, 8), rcx); 212 __ movl(Address(rsi,12), rdx); 213 214 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 215 __ jccb(Assembler::belowEqual, std_cpuid4); 216 217 // 218 // cpuid(0xB) Processor Topology 219 // 220 __ movl(rax, 0xb); 221 __ xorl(rcx, rcx); // Threads level 222 __ cpuid(); 223 224 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 225 __ movl(Address(rsi, 0), rax); 226 __ movl(Address(rsi, 4), rbx); 227 __ movl(Address(rsi, 8), rcx); 228 __ movl(Address(rsi,12), rdx); 229 230 __ movl(rax, 0xb); 231 __ movl(rcx, 1); // Cores level 232 __ cpuid(); 233 __ push(rax); 234 __ andl(rax, 0x1f); // Determine if valid topology level 235 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 236 __ andl(rax, 0xffff); 237 __ pop(rax); 238 __ jccb(Assembler::equal, std_cpuid4); 239 240 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 241 __ movl(Address(rsi, 0), rax); 242 __ movl(Address(rsi, 4), rbx); 243 __ movl(Address(rsi, 8), rcx); 244 __ movl(Address(rsi,12), rdx); 245 246 __ movl(rax, 0xb); 247 __ movl(rcx, 2); // Packages level 248 __ cpuid(); 249 __ push(rax); 250 __ andl(rax, 0x1f); // Determine if valid topology level 251 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 252 __ andl(rax, 0xffff); 253 __ pop(rax); 254 __ jccb(Assembler::equal, std_cpuid4); 255 256 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 257 __ movl(Address(rsi, 0), rax); 258 __ movl(Address(rsi, 4), rbx); 259 __ movl(Address(rsi, 8), rcx); 260 __ movl(Address(rsi,12), rdx); 261 262 // 263 // cpuid(0x4) Deterministic cache params 264 // 265 __ bind(std_cpuid4); 266 __ movl(rax, 4); 267 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 268 __ jccb(Assembler::greater, std_cpuid1); 269 270 __ xorl(rcx, rcx); // L1 cache 271 __ cpuid(); 272 __ push(rax); 273 __ andl(rax, 0x1f); // Determine if valid cache parameters used 274 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 275 __ pop(rax); 276 __ jccb(Assembler::equal, std_cpuid1); 277 278 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 279 __ movl(Address(rsi, 0), rax); 280 __ movl(Address(rsi, 4), rbx); 281 __ movl(Address(rsi, 8), rcx); 282 __ movl(Address(rsi,12), rdx); 283 284 // 285 // Standard cpuid(0x1) 286 // 287 __ bind(std_cpuid1); 288 __ movl(rax, 1); 289 __ cpuid(); 290 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 291 __ movl(Address(rsi, 0), rax); 292 __ movl(Address(rsi, 4), rbx); 293 __ movl(Address(rsi, 8), rcx); 294 __ movl(Address(rsi,12), rdx); 295 296 // 297 // Check if OS has enabled XGETBV instruction to access XCR0 298 // (OSXSAVE feature flag) and CPU supports AVX 299 // 300 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 301 __ cmpl(rcx, 0x18000000); 302 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 303 304 // 305 // XCR0, XFEATURE_ENABLED_MASK register 306 // 307 __ xorl(rcx, rcx); // zero for XCR0 register 308 __ xgetbv(); 309 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 310 __ movl(Address(rsi, 0), rax); 311 __ movl(Address(rsi, 4), rdx); 312 313 // 314 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 315 // 316 __ bind(sef_cpuid); 317 __ movl(rax, 7); 318 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 319 __ jccb(Assembler::greater, ext_cpuid); 320 // ECX = 0 321 __ xorl(rcx, rcx); 322 __ cpuid(); 323 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 324 __ movl(Address(rsi, 0), rax); 325 __ movl(Address(rsi, 4), rbx); 326 __ movl(Address(rsi, 8), rcx); 327 __ movl(Address(rsi, 12), rdx); 328 329 // 330 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 331 // 332 __ bind(sefsl1_cpuid); 333 __ movl(rax, 7); 334 __ movl(rcx, 1); 335 __ cpuid(); 336 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 337 __ movl(Address(rsi, 0), rax); 338 __ movl(Address(rsi, 4), rdx); 339 340 // 341 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0). 342 // 343 __ bind(std_cpuid24); 344 __ movl(rax, 0x24); 345 __ movl(rcx, 0); 346 __ cpuid(); 347 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset()))); 348 __ movl(Address(rsi, 0), rax); 349 __ movl(Address(rsi, 4), rbx); 350 351 // 352 // Extended cpuid(0x80000000) 353 // 354 __ bind(ext_cpuid); 355 __ movl(rax, 0x80000000); 356 __ cpuid(); 357 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 358 __ jcc(Assembler::belowEqual, done); 359 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 360 __ jcc(Assembler::belowEqual, ext_cpuid1); 361 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 362 __ jccb(Assembler::belowEqual, ext_cpuid5); 363 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 364 __ jccb(Assembler::belowEqual, ext_cpuid7); 365 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 366 __ jccb(Assembler::belowEqual, ext_cpuid8); 367 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 368 __ jccb(Assembler::below, ext_cpuid8); 369 // 370 // Extended cpuid(0x8000001E) 371 // 372 __ movl(rax, 0x8000001E); 373 __ cpuid(); 374 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 375 __ movl(Address(rsi, 0), rax); 376 __ movl(Address(rsi, 4), rbx); 377 __ movl(Address(rsi, 8), rcx); 378 __ movl(Address(rsi,12), rdx); 379 380 // 381 // Extended cpuid(0x80000008) 382 // 383 __ bind(ext_cpuid8); 384 __ movl(rax, 0x80000008); 385 __ cpuid(); 386 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 387 __ movl(Address(rsi, 0), rax); 388 __ movl(Address(rsi, 4), rbx); 389 __ movl(Address(rsi, 8), rcx); 390 __ movl(Address(rsi,12), rdx); 391 392 // 393 // Extended cpuid(0x80000007) 394 // 395 __ bind(ext_cpuid7); 396 __ movl(rax, 0x80000007); 397 __ cpuid(); 398 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 399 __ movl(Address(rsi, 0), rax); 400 __ movl(Address(rsi, 4), rbx); 401 __ movl(Address(rsi, 8), rcx); 402 __ movl(Address(rsi,12), rdx); 403 404 // 405 // Extended cpuid(0x80000005) 406 // 407 __ bind(ext_cpuid5); 408 __ movl(rax, 0x80000005); 409 __ cpuid(); 410 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 411 __ movl(Address(rsi, 0), rax); 412 __ movl(Address(rsi, 4), rbx); 413 __ movl(Address(rsi, 8), rcx); 414 __ movl(Address(rsi,12), rdx); 415 416 // 417 // Extended cpuid(0x80000001) 418 // 419 __ bind(ext_cpuid1); 420 __ movl(rax, 0x80000001); 421 __ cpuid(); 422 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 423 __ movl(Address(rsi, 0), rax); 424 __ movl(Address(rsi, 4), rbx); 425 __ movl(Address(rsi, 8), rcx); 426 __ movl(Address(rsi,12), rdx); 427 428 // 429 // Check if OS has enabled XGETBV instruction to access XCR0 430 // (OSXSAVE feature flag) and CPU supports APX 431 // 432 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 433 // and XCRO[19] bit for OS support to save/restore extended GPR state. 434 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 435 __ movl(rax, 0x200000); 436 __ andl(rax, Address(rsi, 4)); 437 __ jcc(Assembler::equal, vector_save_restore); 438 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 439 __ movl(rax, 0x80000); 440 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 441 __ jcc(Assembler::equal, vector_save_restore); 442 443 #ifndef PRODUCT 444 bool save_apx = UseAPX; 445 VM_Version::set_apx_cpuFeatures(); 446 UseAPX = true; 447 __ mov64(r16, VM_Version::egpr_test_value()); 448 __ mov64(r31, VM_Version::egpr_test_value()); 449 __ xorl(rsi, rsi); 450 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 451 // Generate SEGV 452 __ movl(rax, Address(rsi, 0)); 453 454 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 455 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 456 __ movq(Address(rsi, 0), r16); 457 __ movq(Address(rsi, 8), r31); 458 459 UseAPX = save_apx; 460 #endif 461 __ bind(vector_save_restore); 462 // 463 // Check if OS has enabled XGETBV instruction to access XCR0 464 // (OSXSAVE feature flag) and CPU supports AVX 465 // 466 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 467 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 468 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 469 __ cmpl(rcx, 0x18000000); 470 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 471 472 __ movl(rax, 0x6); 473 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 474 __ cmpl(rax, 0x6); 475 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 476 477 // we need to bridge farther than imm8, so we use this island as a thunk 478 __ bind(done); 479 __ jmp(wrapup); 480 481 __ bind(start_simd_check); 482 // 483 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 484 // registers are not restored after a signal processing. 485 // Generate SEGV here (reference through null) 486 // and check upper YMM/ZMM bits after it. 487 // 488 int saved_useavx = UseAVX; 489 int saved_usesse = UseSSE; 490 491 // If UseAVX is uninitialized or is set by the user to include EVEX 492 if (use_evex) { 493 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 494 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10 495 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 496 __ movl(rax, 0x10000); 497 __ andl(rax, Address(rsi, 4)); 498 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 499 __ movl(rbx, 0x80000); 500 __ andl(rbx, Address(rsi, 4)); 501 __ orl(rax, rbx); 502 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported 503 // check _cpuid_info.xem_xcr0_eax.bits.opmask 504 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 505 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 506 __ movl(rax, 0xE0); 507 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 508 __ cmpl(rax, 0xE0); 509 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 510 511 if (FLAG_IS_DEFAULT(UseAVX)) { 512 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 513 __ movl(rax, Address(rsi, 0)); 514 __ cmpl(rax, 0x50654); // If it is Skylake 515 __ jcc(Assembler::equal, legacy_setup); 516 } 517 // EVEX setup: run in lowest evex mode 518 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 519 UseAVX = 3; 520 UseSSE = 2; 521 #ifdef _WINDOWS 522 // xmm5-xmm15 are not preserved by caller on windows 523 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 524 __ subptr(rsp, 64); 525 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 526 __ subptr(rsp, 64); 527 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 528 __ subptr(rsp, 64); 529 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 530 #endif // _WINDOWS 531 532 // load value into all 64 bytes of zmm7 register 533 __ movl(rcx, VM_Version::ymm_test_value()); 534 __ movdl(xmm0, rcx); 535 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 536 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 537 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 538 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 539 VM_Version::clean_cpuFeatures(); 540 __ jmp(save_restore_except); 541 } 542 543 __ bind(legacy_setup); 544 // AVX setup 545 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 546 UseAVX = 1; 547 UseSSE = 2; 548 #ifdef _WINDOWS 549 __ subptr(rsp, 32); 550 __ vmovdqu(Address(rsp, 0), xmm7); 551 __ subptr(rsp, 32); 552 __ vmovdqu(Address(rsp, 0), xmm8); 553 __ subptr(rsp, 32); 554 __ vmovdqu(Address(rsp, 0), xmm15); 555 #endif // _WINDOWS 556 557 // load value into all 32 bytes of ymm7 register 558 __ movl(rcx, VM_Version::ymm_test_value()); 559 560 __ movdl(xmm0, rcx); 561 __ pshufd(xmm0, xmm0, 0x00); 562 __ vinsertf128_high(xmm0, xmm0); 563 __ vmovdqu(xmm7, xmm0); 564 __ vmovdqu(xmm8, xmm0); 565 __ vmovdqu(xmm15, xmm0); 566 VM_Version::clean_cpuFeatures(); 567 568 __ bind(save_restore_except); 569 __ xorl(rsi, rsi); 570 VM_Version::set_cpuinfo_segv_addr(__ pc()); 571 // Generate SEGV 572 __ movl(rax, Address(rsi, 0)); 573 574 VM_Version::set_cpuinfo_cont_addr(__ pc()); 575 // Returns here after signal. Save xmm0 to check it later. 576 577 // If UseAVX is uninitialized or is set by the user to include EVEX 578 if (use_evex) { 579 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 580 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 581 __ movl(rax, 0x10000); 582 __ andl(rax, Address(rsi, 4)); 583 __ jcc(Assembler::equal, legacy_save_restore); 584 // check _cpuid_info.xem_xcr0_eax.bits.opmask 585 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 586 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 587 __ movl(rax, 0xE0); 588 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 589 __ cmpl(rax, 0xE0); 590 __ jcc(Assembler::notEqual, legacy_save_restore); 591 592 if (FLAG_IS_DEFAULT(UseAVX)) { 593 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 594 __ movl(rax, Address(rsi, 0)); 595 __ cmpl(rax, 0x50654); // If it is Skylake 596 __ jcc(Assembler::equal, legacy_save_restore); 597 } 598 // EVEX check: run in lowest evex mode 599 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 600 UseAVX = 3; 601 UseSSE = 2; 602 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 603 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 604 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 605 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 606 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 607 608 #ifdef _WINDOWS 609 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 610 __ addptr(rsp, 64); 611 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 612 __ addptr(rsp, 64); 613 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 614 __ addptr(rsp, 64); 615 #endif // _WINDOWS 616 generate_vzeroupper(wrapup); 617 VM_Version::clean_cpuFeatures(); 618 UseAVX = saved_useavx; 619 UseSSE = saved_usesse; 620 __ jmp(wrapup); 621 } 622 623 __ bind(legacy_save_restore); 624 // AVX check 625 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 626 UseAVX = 1; 627 UseSSE = 2; 628 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 629 __ vmovdqu(Address(rsi, 0), xmm0); 630 __ vmovdqu(Address(rsi, 32), xmm7); 631 __ vmovdqu(Address(rsi, 64), xmm8); 632 __ vmovdqu(Address(rsi, 96), xmm15); 633 634 #ifdef _WINDOWS 635 __ vmovdqu(xmm15, Address(rsp, 0)); 636 __ addptr(rsp, 32); 637 __ vmovdqu(xmm8, Address(rsp, 0)); 638 __ addptr(rsp, 32); 639 __ vmovdqu(xmm7, Address(rsp, 0)); 640 __ addptr(rsp, 32); 641 #endif // _WINDOWS 642 643 generate_vzeroupper(wrapup); 644 VM_Version::clean_cpuFeatures(); 645 UseAVX = saved_useavx; 646 UseSSE = saved_usesse; 647 648 __ bind(wrapup); 649 __ popf(); 650 __ pop(rsi); 651 __ pop(rbx); 652 __ pop(rbp); 653 __ ret(0); 654 655 # undef __ 656 657 return start; 658 }; 659 void generate_vzeroupper(Label& L_wrapup) { 660 # define __ _masm-> 661 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 662 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 663 __ jcc(Assembler::notEqual, L_wrapup); 664 __ movl(rcx, 0x0FFF0FF0); 665 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 666 __ andl(rcx, Address(rsi, 0)); 667 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 668 __ jcc(Assembler::equal, L_wrapup); 669 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 670 __ jcc(Assembler::equal, L_wrapup); 671 // vzeroupper() will use a pre-computed instruction sequence that we 672 // can't compute until after we've determined CPU capabilities. Use 673 // uncached variant here directly to be able to bootstrap correctly 674 __ vzeroupper_uncached(); 675 # undef __ 676 } 677 address generate_detect_virt() { 678 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 679 # define __ _masm-> 680 681 address start = __ pc(); 682 683 // Evacuate callee-saved registers 684 __ push(rbp); 685 __ push(rbx); 686 __ push(rsi); // for Windows 687 688 __ mov(rax, c_rarg0); // CPUID leaf 689 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 690 691 __ cpuid(); 692 693 // Store result to register array 694 __ movl(Address(rsi, 0), rax); 695 __ movl(Address(rsi, 4), rbx); 696 __ movl(Address(rsi, 8), rcx); 697 __ movl(Address(rsi, 12), rdx); 698 699 // Epilogue 700 __ pop(rsi); 701 __ pop(rbx); 702 __ pop(rbp); 703 __ ret(0); 704 705 # undef __ 706 707 return start; 708 }; 709 710 711 address generate_getCPUIDBrandString(void) { 712 // Flags to test CPU type. 713 const uint32_t HS_EFL_AC = 0x40000; 714 const uint32_t HS_EFL_ID = 0x200000; 715 // Values for when we don't have a CPUID instruction. 716 const int CPU_FAMILY_SHIFT = 8; 717 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 718 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 719 720 Label detect_486, cpu486, detect_586, done, ext_cpuid; 721 722 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 723 # define __ _masm-> 724 725 address start = __ pc(); 726 727 // 728 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 729 // 730 // rcx and rdx are first and second argument registers on windows 731 732 __ push(rbp); 733 __ mov(rbp, c_rarg0); // cpuid_info address 734 __ push(rbx); 735 __ push(rsi); 736 __ pushf(); // preserve rbx, and flags 737 __ pop(rax); 738 __ push(rax); 739 __ mov(rcx, rax); 740 // 741 // if we are unable to change the AC flag, we have a 386 742 // 743 __ xorl(rax, HS_EFL_AC); 744 __ push(rax); 745 __ popf(); 746 __ pushf(); 747 __ pop(rax); 748 __ cmpptr(rax, rcx); 749 __ jccb(Assembler::notEqual, detect_486); 750 751 __ movl(rax, CPU_FAMILY_386); 752 __ jmp(done); 753 754 // 755 // If we are unable to change the ID flag, we have a 486 which does 756 // not support the "cpuid" instruction. 757 // 758 __ bind(detect_486); 759 __ mov(rax, rcx); 760 __ xorl(rax, HS_EFL_ID); 761 __ push(rax); 762 __ popf(); 763 __ pushf(); 764 __ pop(rax); 765 __ cmpptr(rcx, rax); 766 __ jccb(Assembler::notEqual, detect_586); 767 768 __ bind(cpu486); 769 __ movl(rax, CPU_FAMILY_486); 770 __ jmp(done); 771 772 // 773 // At this point, we have a chip which supports the "cpuid" instruction 774 // 775 __ bind(detect_586); 776 __ xorl(rax, rax); 777 __ cpuid(); 778 __ orl(rax, rax); 779 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 780 // value of at least 1, we give up and 781 // assume a 486 782 783 // 784 // Extended cpuid(0x80000000) for processor brand string detection 785 // 786 __ bind(ext_cpuid); 787 __ movl(rax, CPUID_EXTENDED_FN); 788 __ cpuid(); 789 __ cmpl(rax, CPUID_EXTENDED_FN_4); 790 __ jcc(Assembler::below, done); 791 792 // 793 // Extended cpuid(0x80000002) // first 16 bytes in brand string 794 // 795 __ movl(rax, CPUID_EXTENDED_FN_2); 796 __ cpuid(); 797 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 798 __ movl(Address(rsi, 0), rax); 799 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 800 __ movl(Address(rsi, 0), rbx); 801 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 802 __ movl(Address(rsi, 0), rcx); 803 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 804 __ movl(Address(rsi,0), rdx); 805 806 // 807 // Extended cpuid(0x80000003) // next 16 bytes in brand string 808 // 809 __ movl(rax, CPUID_EXTENDED_FN_3); 810 __ cpuid(); 811 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 812 __ movl(Address(rsi, 0), rax); 813 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 814 __ movl(Address(rsi, 0), rbx); 815 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 816 __ movl(Address(rsi, 0), rcx); 817 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 818 __ movl(Address(rsi,0), rdx); 819 820 // 821 // Extended cpuid(0x80000004) // last 16 bytes in brand string 822 // 823 __ movl(rax, CPUID_EXTENDED_FN_4); 824 __ cpuid(); 825 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 826 __ movl(Address(rsi, 0), rax); 827 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 828 __ movl(Address(rsi, 0), rbx); 829 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 830 __ movl(Address(rsi, 0), rcx); 831 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 832 __ movl(Address(rsi,0), rdx); 833 834 // 835 // return 836 // 837 __ bind(done); 838 __ popf(); 839 __ pop(rsi); 840 __ pop(rbx); 841 __ pop(rbp); 842 __ ret(0); 843 844 # undef __ 845 846 return start; 847 }; 848 }; 849 850 void VM_Version::get_processor_features() { 851 852 _cpu = 4; // 486 by default 853 _model = 0; 854 _stepping = 0; 855 _logical_processors_per_package = 1; 856 // i486 internal cache is both I&D and has a 16-byte line size 857 _L1_data_cache_line_size = 16; 858 859 // Get raw processor info 860 861 get_cpu_info_stub(&_cpuid_info); 862 863 assert_is_initialized(); 864 _cpu = extended_cpu_family(); 865 _model = extended_cpu_model(); 866 _stepping = cpu_stepping(); 867 868 if (cpu_family() > 4) { // it supports CPUID 869 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 870 _cpu_features = _features; // Preserve features 871 // Logical processors are only available on P4s and above, 872 // and only if hyperthreading is available. 873 _logical_processors_per_package = logical_processor_count(); 874 _L1_data_cache_line_size = L1_line_size(); 875 } 876 877 // xchg and xadd instructions 878 _supports_atomic_getset4 = true; 879 _supports_atomic_getadd4 = true; 880 _supports_atomic_getset8 = true; 881 _supports_atomic_getadd8 = true; 882 883 // OS should support SSE for x64 and hardware should support at least SSE2. 884 if (!VM_Version::supports_sse2()) { 885 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 886 } 887 // in 64 bit the use of SSE2 is the minimum 888 if (UseSSE < 2) UseSSE = 2; 889 890 // flush_icache_stub have to be generated first. 891 // That is why Icache line size is hard coded in ICache class, 892 // see icache_x86.hpp. It is also the reason why we can't use 893 // clflush instruction in 32-bit VM since it could be running 894 // on CPU which does not support it. 895 // 896 // The only thing we can do is to verify that flushed 897 // ICache::line_size has correct value. 898 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 899 // clflush_size is size in quadwords (8 bytes). 900 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 901 902 // assigning this field effectively enables Unsafe.writebackMemory() 903 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 904 // that is only implemented on x86_64 and only if the OS plays ball 905 if (os::supports_map_sync()) { 906 // publish data cache line flush size to generic field, otherwise 907 // let if default to zero thereby disabling writeback 908 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 909 } 910 911 // Check if processor has Intel Ecore 912 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 913 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 914 _model == 0xCC || _model == 0xDD)) { 915 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 916 } 917 918 if (UseSSE < 4) { 919 _features.clear_feature(CPU_SSE4_1); 920 _features.clear_feature(CPU_SSE4_2); 921 } 922 923 if (UseSSE < 3) { 924 _features.clear_feature(CPU_SSE3); 925 _features.clear_feature(CPU_SSSE3); 926 _features.clear_feature(CPU_SSE4A); 927 } 928 929 if (UseSSE < 2) 930 _features.clear_feature(CPU_SSE2); 931 932 if (UseSSE < 1) 933 _features.clear_feature(CPU_SSE); 934 935 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 936 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 937 UseAVX = 0; 938 } 939 940 // UseSSE is set to the smaller of what hardware supports and what 941 // the command line requires. I.e., you cannot set UseSSE to 2 on 942 // older Pentiums which do not support it. 943 int use_sse_limit = 0; 944 if (UseSSE > 0) { 945 if (UseSSE > 3 && supports_sse4_1()) { 946 use_sse_limit = 4; 947 } else if (UseSSE > 2 && supports_sse3()) { 948 use_sse_limit = 3; 949 } else if (UseSSE > 1 && supports_sse2()) { 950 use_sse_limit = 2; 951 } else if (UseSSE > 0 && supports_sse()) { 952 use_sse_limit = 1; 953 } else { 954 use_sse_limit = 0; 955 } 956 } 957 if (FLAG_IS_DEFAULT(UseSSE)) { 958 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 959 } else if (UseSSE > use_sse_limit) { 960 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 961 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 962 } 963 964 // first try initial setting and detect what we can support 965 int use_avx_limit = 0; 966 if (UseAVX > 0) { 967 if (UseSSE < 4) { 968 // Don't use AVX if SSE is unavailable or has been disabled. 969 use_avx_limit = 0; 970 } else if (UseAVX > 2 && supports_evex()) { 971 use_avx_limit = 3; 972 } else if (UseAVX > 1 && supports_avx2()) { 973 use_avx_limit = 2; 974 } else if (UseAVX > 0 && supports_avx()) { 975 use_avx_limit = 1; 976 } else { 977 use_avx_limit = 0; 978 } 979 } 980 if (FLAG_IS_DEFAULT(UseAVX)) { 981 // Don't use AVX-512 on older Skylakes unless explicitly requested. 982 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 983 FLAG_SET_DEFAULT(UseAVX, 2); 984 } else { 985 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 986 } 987 } 988 989 if (UseAVX > use_avx_limit) { 990 if (UseSSE < 4) { 991 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 992 } else { 993 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 994 } 995 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 996 } 997 998 if (UseAVX < 3) { 999 _features.clear_feature(CPU_AVX512F); 1000 _features.clear_feature(CPU_AVX512DQ); 1001 _features.clear_feature(CPU_AVX512CD); 1002 _features.clear_feature(CPU_AVX512BW); 1003 _features.clear_feature(CPU_AVX512ER); 1004 _features.clear_feature(CPU_AVX512PF); 1005 _features.clear_feature(CPU_AVX512VL); 1006 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1007 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1008 _features.clear_feature(CPU_AVX512_VAES); 1009 _features.clear_feature(CPU_AVX512_VNNI); 1010 _features.clear_feature(CPU_AVX512_VBMI); 1011 _features.clear_feature(CPU_AVX512_VBMI2); 1012 _features.clear_feature(CPU_AVX512_BITALG); 1013 _features.clear_feature(CPU_AVX512_IFMA); 1014 _features.clear_feature(CPU_APX_F); 1015 _features.clear_feature(CPU_AVX512_FP16); 1016 _features.clear_feature(CPU_AVX10_1); 1017 _features.clear_feature(CPU_AVX10_2); 1018 } 1019 1020 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1021 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1022 if (UseAPX && !apx_supported) { 1023 warning("UseAPX is not supported on this CPU, setting it to false"); 1024 FLAG_SET_DEFAULT(UseAPX, false); 1025 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1026 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1027 } 1028 1029 if (!UseAPX) { 1030 _features.clear_feature(CPU_APX_F); 1031 } 1032 1033 if (UseAVX < 2) { 1034 _features.clear_feature(CPU_AVX2); 1035 _features.clear_feature(CPU_AVX_IFMA); 1036 } 1037 1038 if (UseAVX < 1) { 1039 _features.clear_feature(CPU_AVX); 1040 _features.clear_feature(CPU_VZEROUPPER); 1041 _features.clear_feature(CPU_F16C); 1042 _features.clear_feature(CPU_SHA512); 1043 } 1044 1045 if (logical_processors_per_package() == 1) { 1046 // HT processor could be installed on a system which doesn't support HT. 1047 _features.clear_feature(CPU_HT); 1048 } 1049 1050 if (is_intel()) { // Intel cpus specific settings 1051 if (is_knights_family()) { 1052 _features.clear_feature(CPU_VZEROUPPER); 1053 _features.clear_feature(CPU_AVX512BW); 1054 _features.clear_feature(CPU_AVX512VL); 1055 _features.clear_feature(CPU_AVX512DQ); 1056 _features.clear_feature(CPU_AVX512_VNNI); 1057 _features.clear_feature(CPU_AVX512_VAES); 1058 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1059 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1060 _features.clear_feature(CPU_AVX512_VBMI); 1061 _features.clear_feature(CPU_AVX512_VBMI2); 1062 _features.clear_feature(CPU_CLWB); 1063 _features.clear_feature(CPU_FLUSHOPT); 1064 _features.clear_feature(CPU_GFNI); 1065 _features.clear_feature(CPU_AVX512_BITALG); 1066 _features.clear_feature(CPU_AVX512_IFMA); 1067 _features.clear_feature(CPU_AVX_IFMA); 1068 _features.clear_feature(CPU_AVX512_FP16); 1069 _features.clear_feature(CPU_AVX10_1); 1070 _features.clear_feature(CPU_AVX10_2); 1071 } 1072 } 1073 1074 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1075 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1076 } else { 1077 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1078 } 1079 1080 assert(supports_clflush(), "Always present"); 1081 if (X86ICacheSync == -1) { 1082 // Auto-detect, choosing the best performant one that still flushes 1083 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. 1084 if (supports_clwb()) { 1085 FLAG_SET_ERGO(X86ICacheSync, 3); 1086 } else if (supports_clflushopt()) { 1087 FLAG_SET_ERGO(X86ICacheSync, 2); 1088 } else { 1089 FLAG_SET_ERGO(X86ICacheSync, 1); 1090 } 1091 } else { 1092 if ((X86ICacheSync == 2) && !supports_clflushopt()) { 1093 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); 1094 } 1095 if ((X86ICacheSync == 3) && !supports_clwb()) { 1096 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); 1097 } 1098 if ((X86ICacheSync == 5) && !supports_serialize()) { 1099 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); 1100 } 1101 } 1102 1103 char buf[2048]; 1104 size_t cpu_info_size = jio_snprintf( 1105 buf, sizeof(buf), 1106 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1107 cores_per_cpu(), threads_per_core(), 1108 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1109 assert(cpu_info_size > 0, "not enough temporary space allocated"); 1110 1111 insert_features_names(_features, buf + cpu_info_size, sizeof(buf) - cpu_info_size); 1112 1113 _cpu_info_string = os::strdup(buf); 1114 1115 _features_string = extract_features_string(_cpu_info_string, 1116 strnlen(_cpu_info_string, sizeof(buf)), 1117 cpu_info_size); 1118 1119 // Use AES instructions if available. 1120 if (supports_aes()) { 1121 if (FLAG_IS_DEFAULT(UseAES)) { 1122 FLAG_SET_DEFAULT(UseAES, true); 1123 } 1124 if (!UseAES) { 1125 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1126 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1127 } 1128 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1129 } else { 1130 if (UseSSE > 2) { 1131 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1132 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1133 } 1134 } else { 1135 // The AES intrinsic stubs require AES instruction support (of course) 1136 // but also require sse3 mode or higher for instructions it use. 1137 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1138 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1139 } 1140 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1141 } 1142 1143 // --AES-CTR begins-- 1144 if (!UseAESIntrinsics) { 1145 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1146 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1147 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1148 } 1149 } else { 1150 if (supports_sse4_1()) { 1151 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1152 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1153 } 1154 } else { 1155 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1156 // but also require sse4.1 mode or higher for instructions it use. 1157 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1158 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1159 } 1160 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1161 } 1162 } 1163 // --AES-CTR ends-- 1164 } 1165 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1166 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1167 warning("AES instructions are not available on this CPU"); 1168 FLAG_SET_DEFAULT(UseAES, false); 1169 } 1170 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1171 warning("AES intrinsics are not available on this CPU"); 1172 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1173 } 1174 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1175 warning("AES-CTR intrinsics are not available on this CPU"); 1176 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1177 } 1178 } 1179 1180 // Use CLMUL instructions if available. 1181 if (supports_clmul()) { 1182 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1183 UseCLMUL = true; 1184 } 1185 } else if (UseCLMUL) { 1186 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1187 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1188 FLAG_SET_DEFAULT(UseCLMUL, false); 1189 } 1190 1191 if (UseCLMUL && (UseSSE > 2)) { 1192 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1193 UseCRC32Intrinsics = true; 1194 } 1195 } else if (UseCRC32Intrinsics) { 1196 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1197 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1198 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1199 } 1200 1201 if (supports_avx2()) { 1202 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1203 UseAdler32Intrinsics = true; 1204 } 1205 } else if (UseAdler32Intrinsics) { 1206 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1207 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1208 } 1209 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1210 } 1211 1212 if (supports_sse4_2() && supports_clmul()) { 1213 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1214 UseCRC32CIntrinsics = true; 1215 } 1216 } else if (UseCRC32CIntrinsics) { 1217 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1218 warning("CRC32C intrinsics are not available on this CPU"); 1219 } 1220 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1221 } 1222 1223 // GHASH/GCM intrinsics 1224 if (UseCLMUL && (UseSSE > 2)) { 1225 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1226 UseGHASHIntrinsics = true; 1227 } 1228 } else if (UseGHASHIntrinsics) { 1229 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1230 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1231 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1232 } 1233 1234 // ChaCha20 Intrinsics 1235 // As long as the system supports AVX as a baseline we can do a 1236 // SIMD-enabled block function. StubGenerator makes the determination 1237 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1238 // version. 1239 if (UseAVX >= 1) { 1240 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1241 UseChaCha20Intrinsics = true; 1242 } 1243 } else if (UseChaCha20Intrinsics) { 1244 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1245 warning("ChaCha20 intrinsic requires AVX instructions"); 1246 } 1247 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1248 } 1249 1250 // Kyber Intrinsics 1251 // Currently we only have them for AVX512 1252 #ifdef _LP64 1253 if (supports_evex() && supports_avx512bw()) { 1254 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) { 1255 UseKyberIntrinsics = true; 1256 } 1257 } else 1258 #endif 1259 if (UseKyberIntrinsics) { 1260 warning("Intrinsics for ML-KEM are not available on this CPU."); 1261 FLAG_SET_DEFAULT(UseKyberIntrinsics, false); 1262 } 1263 1264 // Dilithium Intrinsics 1265 // Currently we only have them for AVX512 1266 if (supports_evex() && supports_avx512bw()) { 1267 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { 1268 UseDilithiumIntrinsics = true; 1269 } 1270 } else if (UseDilithiumIntrinsics) { 1271 warning("Intrinsics for ML-DSA are not available on this CPU."); 1272 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); 1273 } 1274 1275 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1276 if (UseAVX >= 2) { 1277 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1278 UseBASE64Intrinsics = true; 1279 } 1280 } else if (UseBASE64Intrinsics) { 1281 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1282 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1283 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1284 } 1285 1286 if (supports_fma()) { 1287 if (FLAG_IS_DEFAULT(UseFMA)) { 1288 UseFMA = true; 1289 } 1290 } else if (UseFMA) { 1291 warning("FMA instructions are not available on this CPU"); 1292 FLAG_SET_DEFAULT(UseFMA, false); 1293 } 1294 1295 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1296 UseMD5Intrinsics = true; 1297 } 1298 1299 if (supports_sha() || (supports_avx2() && supports_bmi2())) { 1300 if (FLAG_IS_DEFAULT(UseSHA)) { 1301 UseSHA = true; 1302 } 1303 } else if (UseSHA) { 1304 warning("SHA instructions are not available on this CPU"); 1305 FLAG_SET_DEFAULT(UseSHA, false); 1306 } 1307 1308 if (supports_sha() && supports_sse4_1() && UseSHA) { 1309 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1310 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1311 } 1312 } else if (UseSHA1Intrinsics) { 1313 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1314 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1315 } 1316 1317 if (supports_sse4_1() && UseSHA) { 1318 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1319 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1320 } 1321 } else if (UseSHA256Intrinsics) { 1322 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1323 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1324 } 1325 1326 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1327 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1328 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1329 } 1330 } else if (UseSHA512Intrinsics) { 1331 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1332 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1333 } 1334 1335 if (supports_evex() && supports_avx512bw()) { 1336 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1337 UseSHA3Intrinsics = true; 1338 } 1339 } else if (UseSHA3Intrinsics) { 1340 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1341 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1342 } 1343 1344 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1345 FLAG_SET_DEFAULT(UseSHA, false); 1346 } 1347 1348 #if COMPILER2_OR_JVMCI 1349 int max_vector_size = 0; 1350 if (UseAVX == 0 || !os_supports_avx_vectors()) { 1351 // 16 byte vectors (in XMM) are supported with SSE2+ 1352 max_vector_size = 16; 1353 } else if (UseAVX == 1 || UseAVX == 2) { 1354 // 32 bytes vectors (in YMM) are only supported with AVX+ 1355 max_vector_size = 32; 1356 } else if (UseAVX > 2) { 1357 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1358 max_vector_size = 64; 1359 } 1360 1361 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1362 1363 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1364 if (MaxVectorSize < min_vector_size) { 1365 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1366 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1367 } 1368 if (MaxVectorSize > max_vector_size) { 1369 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1370 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1371 } 1372 if (!is_power_of_2(MaxVectorSize)) { 1373 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1374 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1375 } 1376 } else { 1377 // If default, use highest supported configuration 1378 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1379 } 1380 1381 #if defined(COMPILER2) && defined(ASSERT) 1382 if (MaxVectorSize > 0) { 1383 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1384 tty->print_cr("State of YMM registers after signal handle:"); 1385 int nreg = 4; 1386 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1387 for (int i = 0; i < nreg; i++) { 1388 tty->print("YMM%s:", ymm_name[i]); 1389 for (int j = 7; j >=0; j--) { 1390 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1391 } 1392 tty->cr(); 1393 } 1394 } 1395 } 1396 #endif // COMPILER2 && ASSERT 1397 1398 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1399 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1400 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1401 } 1402 } else if (UsePoly1305Intrinsics) { 1403 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1404 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1405 } 1406 1407 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1408 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1409 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1410 } 1411 } else if (UseIntPolyIntrinsics) { 1412 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1413 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1414 } 1415 1416 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1417 UseMultiplyToLenIntrinsic = true; 1418 } 1419 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1420 UseSquareToLenIntrinsic = true; 1421 } 1422 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1423 UseMulAddIntrinsic = true; 1424 } 1425 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1426 UseMontgomeryMultiplyIntrinsic = true; 1427 } 1428 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1429 UseMontgomerySquareIntrinsic = true; 1430 } 1431 #endif // COMPILER2_OR_JVMCI 1432 1433 // On new cpus instructions which update whole XMM register should be used 1434 // to prevent partial register stall due to dependencies on high half. 1435 // 1436 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1437 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1438 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1439 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1440 1441 1442 if (is_zx()) { // ZX cpus specific settings 1443 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1444 UseStoreImmI16 = false; // don't use it on ZX cpus 1445 } 1446 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1447 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1448 // Use it on all ZX cpus 1449 UseAddressNop = true; 1450 } 1451 } 1452 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1453 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1454 } 1455 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1456 if (supports_sse3()) { 1457 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1458 } else { 1459 UseXmmRegToRegMoveAll = false; 1460 } 1461 } 1462 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1463 #ifdef COMPILER2 1464 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1465 // For new ZX cpus do the next optimization: 1466 // don't align the beginning of a loop if there are enough instructions 1467 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1468 // in current fetch line (OptoLoopAlignment) or the padding 1469 // is big (> MaxLoopPad). 1470 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1471 // generated NOP instructions. 11 is the largest size of one 1472 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1473 MaxLoopPad = 11; 1474 } 1475 #endif // COMPILER2 1476 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1477 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1478 } 1479 if (supports_sse4_2()) { // new ZX cpus 1480 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1481 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1482 } 1483 } 1484 if (supports_sse4_2()) { 1485 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1486 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1487 } 1488 } else { 1489 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1490 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1491 } 1492 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1493 } 1494 } 1495 1496 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1497 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1498 } 1499 } 1500 1501 if (is_amd_family()) { // AMD cpus specific settings 1502 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1503 // Use it on new AMD cpus starting from Opteron. 1504 UseAddressNop = true; 1505 } 1506 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1507 // Use it on new AMD cpus starting from Opteron. 1508 UseNewLongLShift = true; 1509 } 1510 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1511 if (supports_sse4a()) { 1512 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1513 } else { 1514 UseXmmLoadAndClearUpper = false; 1515 } 1516 } 1517 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1518 if (supports_sse4a()) { 1519 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1520 } else { 1521 UseXmmRegToRegMoveAll = false; 1522 } 1523 } 1524 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1525 if (supports_sse4a()) { 1526 UseXmmI2F = true; 1527 } else { 1528 UseXmmI2F = false; 1529 } 1530 } 1531 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1532 if (supports_sse4a()) { 1533 UseXmmI2D = true; 1534 } else { 1535 UseXmmI2D = false; 1536 } 1537 } 1538 if (supports_sse4_2()) { 1539 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1540 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1541 } 1542 } else { 1543 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1544 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1545 } 1546 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1547 } 1548 1549 // some defaults for AMD family 15h 1550 if (cpu_family() == 0x15) { 1551 // On family 15h processors default is no sw prefetch 1552 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1553 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1554 } 1555 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1556 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1557 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1558 } 1559 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1560 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1561 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1562 } 1563 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1564 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1565 } 1566 } 1567 1568 #ifdef COMPILER2 1569 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1570 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1571 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1572 } 1573 #endif // COMPILER2 1574 1575 // Some defaults for AMD family >= 17h && Hygon family 18h 1576 if (cpu_family() >= 0x17) { 1577 // On family >=17h processors use XMM and UnalignedLoadStores 1578 // for Array Copy 1579 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1580 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1581 } 1582 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1583 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1584 } 1585 #ifdef COMPILER2 1586 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1587 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1588 } 1589 #endif 1590 } 1591 } 1592 1593 if (is_intel()) { // Intel cpus specific settings 1594 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1595 UseStoreImmI16 = false; // don't use it on Intel cpus 1596 } 1597 if (cpu_family() == 6 || cpu_family() == 15) { 1598 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1599 // Use it on all Intel cpus starting from PentiumPro 1600 UseAddressNop = true; 1601 } 1602 } 1603 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1604 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1605 } 1606 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1607 if (supports_sse3()) { 1608 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1609 } else { 1610 UseXmmRegToRegMoveAll = false; 1611 } 1612 } 1613 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1614 #ifdef COMPILER2 1615 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1616 // For new Intel cpus do the next optimization: 1617 // don't align the beginning of a loop if there are enough instructions 1618 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1619 // in current fetch line (OptoLoopAlignment) or the padding 1620 // is big (> MaxLoopPad). 1621 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1622 // generated NOP instructions. 11 is the largest size of one 1623 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1624 MaxLoopPad = 11; 1625 } 1626 #endif // COMPILER2 1627 1628 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1629 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1630 } 1631 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1632 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1633 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1634 } 1635 } 1636 if (supports_sse4_2()) { 1637 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1638 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1639 } 1640 } else { 1641 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1642 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1643 } 1644 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1645 } 1646 } 1647 if (is_atom_family() || is_knights_family()) { 1648 #ifdef COMPILER2 1649 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1650 OptoScheduling = true; 1651 } 1652 #endif 1653 if (supports_sse4_2()) { // Silvermont 1654 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1655 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1656 } 1657 } 1658 if (FLAG_IS_DEFAULT(UseIncDec)) { 1659 FLAG_SET_DEFAULT(UseIncDec, false); 1660 } 1661 } 1662 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1663 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1664 } 1665 #ifdef COMPILER2 1666 if (UseAVX > 2) { 1667 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1668 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1669 ArrayOperationPartialInlineSize != 0 && 1670 ArrayOperationPartialInlineSize != 16 && 1671 ArrayOperationPartialInlineSize != 32 && 1672 ArrayOperationPartialInlineSize != 64)) { 1673 int inline_size = 0; 1674 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1675 inline_size = 64; 1676 } else if (MaxVectorSize >= 32) { 1677 inline_size = 32; 1678 } else if (MaxVectorSize >= 16) { 1679 inline_size = 16; 1680 } 1681 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1682 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1683 } 1684 ArrayOperationPartialInlineSize = inline_size; 1685 } 1686 1687 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1688 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1689 if (ArrayOperationPartialInlineSize) { 1690 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1691 } else { 1692 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1693 } 1694 } 1695 } 1696 #endif 1697 } 1698 1699 #ifdef COMPILER2 1700 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1701 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1702 OptimizeFill = false; 1703 } 1704 } 1705 #endif 1706 1707 if (UseSSE42Intrinsics) { 1708 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1709 UseVectorizedMismatchIntrinsic = true; 1710 } 1711 } else if (UseVectorizedMismatchIntrinsic) { 1712 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1713 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1714 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1715 } 1716 if (UseAVX >= 2) { 1717 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1718 } else if (UseVectorizedHashCodeIntrinsic) { 1719 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1720 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1721 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1722 } 1723 1724 // Use count leading zeros count instruction if available. 1725 if (supports_lzcnt()) { 1726 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1727 UseCountLeadingZerosInstruction = true; 1728 } 1729 } else if (UseCountLeadingZerosInstruction) { 1730 warning("lzcnt instruction is not available on this CPU"); 1731 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1732 } 1733 1734 // Use count trailing zeros instruction if available 1735 if (supports_bmi1()) { 1736 // tzcnt does not require VEX prefix 1737 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1738 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1739 // Don't use tzcnt if BMI1 is switched off on command line. 1740 UseCountTrailingZerosInstruction = false; 1741 } else { 1742 UseCountTrailingZerosInstruction = true; 1743 } 1744 } 1745 } else if (UseCountTrailingZerosInstruction) { 1746 warning("tzcnt instruction is not available on this CPU"); 1747 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1748 } 1749 1750 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1751 // VEX prefix is generated only when AVX > 0. 1752 if (supports_bmi1() && supports_avx()) { 1753 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1754 UseBMI1Instructions = true; 1755 } 1756 } else if (UseBMI1Instructions) { 1757 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1758 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1759 } 1760 1761 if (supports_bmi2() && supports_avx()) { 1762 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1763 UseBMI2Instructions = true; 1764 } 1765 } else if (UseBMI2Instructions) { 1766 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1767 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1768 } 1769 1770 // Use population count instruction if available. 1771 if (supports_popcnt()) { 1772 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1773 UsePopCountInstruction = true; 1774 } 1775 } else if (UsePopCountInstruction) { 1776 warning("POPCNT instruction is not available on this CPU"); 1777 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1778 } 1779 1780 // Use fast-string operations if available. 1781 if (supports_erms()) { 1782 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1783 UseFastStosb = true; 1784 } 1785 } else if (UseFastStosb) { 1786 warning("fast-string operations are not available on this CPU"); 1787 FLAG_SET_DEFAULT(UseFastStosb, false); 1788 } 1789 1790 // For AMD Processors use XMM/YMM MOVDQU instructions 1791 // for Object Initialization as default 1792 if (is_amd() && cpu_family() >= 0x19) { 1793 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1794 UseFastStosb = false; 1795 } 1796 } 1797 1798 #ifdef COMPILER2 1799 if (is_intel() && MaxVectorSize > 16) { 1800 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1801 UseFastStosb = false; 1802 } 1803 } 1804 #endif 1805 1806 // Use XMM/YMM MOVDQU instruction for Object Initialization 1807 if (!UseFastStosb && UseUnalignedLoadStores) { 1808 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1809 UseXMMForObjInit = true; 1810 } 1811 } else if (UseXMMForObjInit) { 1812 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1813 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1814 } 1815 1816 #ifdef COMPILER2 1817 if (FLAG_IS_DEFAULT(AlignVector)) { 1818 // Modern processors allow misaligned memory operations for vectors. 1819 AlignVector = !UseUnalignedLoadStores; 1820 } 1821 #endif // COMPILER2 1822 1823 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1824 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1825 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1826 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1827 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1828 } 1829 } 1830 1831 // Allocation prefetch settings 1832 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1833 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1834 (cache_line_size > AllocatePrefetchStepSize)) { 1835 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1836 } 1837 1838 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1839 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1840 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1841 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1842 } 1843 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1844 } 1845 1846 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1847 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1848 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1849 } 1850 1851 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1852 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1853 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1854 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1855 } 1856 #ifdef COMPILER2 1857 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1858 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1859 } 1860 #endif 1861 } 1862 1863 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1864 #ifdef COMPILER2 1865 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1866 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1867 } 1868 #endif 1869 } 1870 1871 // Prefetch settings 1872 1873 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1874 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1875 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1876 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1877 1878 // gc copy/scan is disabled if prefetchw isn't supported, because 1879 // Prefetch::write emits an inlined prefetchw on Linux. 1880 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1881 // The used prefetcht0 instruction works for both amd64 and em64t. 1882 1883 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1884 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1885 } 1886 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1887 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1888 } 1889 1890 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1891 (cache_line_size > ContendedPaddingWidth)) 1892 ContendedPaddingWidth = cache_line_size; 1893 1894 // This machine allows unaligned memory accesses 1895 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1896 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1897 } 1898 1899 #ifndef PRODUCT 1900 if (log_is_enabled(Info, os, cpu)) { 1901 LogStream ls(Log(os, cpu)::info()); 1902 outputStream* log = &ls; 1903 log->print_cr("Logical CPUs per core: %u", 1904 logical_processors_per_package()); 1905 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1906 log->print("UseSSE=%d", UseSSE); 1907 if (UseAVX > 0) { 1908 log->print(" UseAVX=%d", UseAVX); 1909 } 1910 if (UseAES) { 1911 log->print(" UseAES=1"); 1912 } 1913 #ifdef COMPILER2 1914 if (MaxVectorSize > 0) { 1915 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1916 } 1917 #endif 1918 log->cr(); 1919 log->print("Allocation"); 1920 if (AllocatePrefetchStyle <= 0) { 1921 log->print_cr(": no prefetching"); 1922 } else { 1923 log->print(" prefetching: "); 1924 if (AllocatePrefetchInstr == 0) { 1925 log->print("PREFETCHNTA"); 1926 } else if (AllocatePrefetchInstr == 1) { 1927 log->print("PREFETCHT0"); 1928 } else if (AllocatePrefetchInstr == 2) { 1929 log->print("PREFETCHT2"); 1930 } else if (AllocatePrefetchInstr == 3) { 1931 log->print("PREFETCHW"); 1932 } 1933 if (AllocatePrefetchLines > 1) { 1934 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1935 } else { 1936 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1937 } 1938 } 1939 1940 if (PrefetchCopyIntervalInBytes > 0) { 1941 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1942 } 1943 if (PrefetchScanIntervalInBytes > 0) { 1944 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1945 } 1946 if (ContendedPaddingWidth > 0) { 1947 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1948 } 1949 } 1950 #endif // !PRODUCT 1951 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1952 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1953 } 1954 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1955 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1956 } 1957 } 1958 1959 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1960 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1961 if (vrt == XenHVM) { 1962 st->print_cr("Xen hardware-assisted virtualization detected"); 1963 } else if (vrt == KVM) { 1964 st->print_cr("KVM virtualization detected"); 1965 } else if (vrt == VMWare) { 1966 st->print_cr("VMWare virtualization detected"); 1967 VirtualizationSupport::print_virtualization_info(st); 1968 } else if (vrt == HyperV) { 1969 st->print_cr("Hyper-V virtualization detected"); 1970 } else if (vrt == HyperVRole) { 1971 st->print_cr("Hyper-V role detected"); 1972 } 1973 } 1974 1975 bool VM_Version::compute_has_intel_jcc_erratum() { 1976 if (!is_intel_family_core()) { 1977 // Only Intel CPUs are affected. 1978 return false; 1979 } 1980 // The following table of affected CPUs is based on the following document released by Intel: 1981 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1982 switch (_model) { 1983 case 0x8E: 1984 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1985 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1986 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1987 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1988 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1989 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1990 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1991 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1992 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1993 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1994 case 0x4E: 1995 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1996 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1997 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1998 return _stepping == 0x3; 1999 case 0x55: 2000 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 2001 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 2002 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2003 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2004 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2005 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2006 return _stepping == 0x4 || _stepping == 0x7; 2007 case 0x5E: 2008 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2009 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2010 return _stepping == 0x3; 2011 case 0x9E: 2012 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2013 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2014 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2015 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2016 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2017 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2018 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2019 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2020 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2021 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2022 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2023 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2024 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2025 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2026 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2027 case 0xA5: 2028 // Not in Intel documentation. 2029 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2030 return true; 2031 case 0xA6: 2032 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2033 return _stepping == 0x0; 2034 case 0xAE: 2035 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2036 return _stepping == 0xA; 2037 default: 2038 // If we are running on another intel machine not recognized in the table, we are okay. 2039 return false; 2040 } 2041 } 2042 2043 // On Xen, the cpuid instruction returns 2044 // eax / registers[0]: Version of Xen 2045 // ebx / registers[1]: chars 'XenV' 2046 // ecx / registers[2]: chars 'MMXe' 2047 // edx / registers[3]: chars 'nVMM' 2048 // 2049 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2050 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2051 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2052 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2053 // 2054 // more information : 2055 // https://kb.vmware.com/s/article/1009458 2056 // 2057 void VM_Version::check_virtualizations() { 2058 uint32_t registers[4] = {0}; 2059 char signature[13] = {0}; 2060 2061 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2062 // from 0x40000000 until 0x40010000. 2063 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2064 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2065 detect_virt_stub(leaf, registers); 2066 memcpy(signature, ®isters[1], 12); 2067 2068 if (strncmp("VMwareVMware", signature, 12) == 0) { 2069 Abstract_VM_Version::_detected_virtualization = VMWare; 2070 // check for extended metrics from guestlib 2071 VirtualizationSupport::initialize(); 2072 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2073 Abstract_VM_Version::_detected_virtualization = HyperV; 2074 #ifdef _WINDOWS 2075 // CPUID leaf 0x40000007 is available to the root partition only. 2076 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2077 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2078 detect_virt_stub(0x40000007, registers); 2079 if ((registers[0] != 0x0) || 2080 (registers[1] != 0x0) || 2081 (registers[2] != 0x0) || 2082 (registers[3] != 0x0)) { 2083 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2084 } 2085 #endif 2086 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2087 Abstract_VM_Version::_detected_virtualization = KVM; 2088 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2089 Abstract_VM_Version::_detected_virtualization = XenHVM; 2090 } 2091 } 2092 } 2093 2094 #ifdef COMPILER2 2095 // Determine if it's running on Cascade Lake using default options. 2096 bool VM_Version::is_default_intel_cascade_lake() { 2097 return FLAG_IS_DEFAULT(UseAVX) && 2098 FLAG_IS_DEFAULT(MaxVectorSize) && 2099 UseAVX > 2 && 2100 is_intel_cascade_lake(); 2101 } 2102 #endif 2103 2104 bool VM_Version::is_intel_cascade_lake() { 2105 return is_intel_skylake() && _stepping >= 5; 2106 } 2107 2108 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2109 // for implementing the array copy and clear operations. 2110 // The Intel platforms that supports the serialize instruction 2111 // has improved implementation of 64-byte load/stores and so the default 2112 // threshold is set to 0 for these platforms. 2113 int VM_Version::avx3_threshold() { 2114 return (is_intel_family_core() && 2115 supports_serialize() && 2116 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2117 } 2118 2119 void VM_Version::clear_apx_test_state() { 2120 clear_apx_test_state_stub(); 2121 } 2122 2123 static bool _vm_version_initialized = false; 2124 2125 void VM_Version::initialize() { 2126 ResourceMark rm; 2127 2128 // Making this stub must be FIRST use of assembler 2129 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2130 if (stub_blob == nullptr) { 2131 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2132 } 2133 CodeBuffer c(stub_blob); 2134 VM_Version_StubGenerator g(&c); 2135 2136 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2137 g.generate_get_cpu_info()); 2138 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2139 g.generate_detect_virt()); 2140 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2141 g.clear_apx_test_state()); 2142 get_processor_features(); 2143 2144 Assembler::precompute_instructions(); 2145 2146 if (VM_Version::supports_hv()) { // Supports hypervisor 2147 check_virtualizations(); 2148 } 2149 _vm_version_initialized = true; 2150 } 2151 2152 typedef enum { 2153 CPU_FAMILY_8086_8088 = 0, 2154 CPU_FAMILY_INTEL_286 = 2, 2155 CPU_FAMILY_INTEL_386 = 3, 2156 CPU_FAMILY_INTEL_486 = 4, 2157 CPU_FAMILY_PENTIUM = 5, 2158 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2159 CPU_FAMILY_PENTIUM_4 = 0xF 2160 } FamilyFlag; 2161 2162 typedef enum { 2163 RDTSCP_FLAG = 0x08000000, // bit 27 2164 INTEL64_FLAG = 0x20000000 // bit 29 2165 } _featureExtendedEdxFlag; 2166 2167 typedef enum { 2168 FPU_FLAG = 0x00000001, 2169 VME_FLAG = 0x00000002, 2170 DE_FLAG = 0x00000004, 2171 PSE_FLAG = 0x00000008, 2172 TSC_FLAG = 0x00000010, 2173 MSR_FLAG = 0x00000020, 2174 PAE_FLAG = 0x00000040, 2175 MCE_FLAG = 0x00000080, 2176 CX8_FLAG = 0x00000100, 2177 APIC_FLAG = 0x00000200, 2178 SEP_FLAG = 0x00000800, 2179 MTRR_FLAG = 0x00001000, 2180 PGE_FLAG = 0x00002000, 2181 MCA_FLAG = 0x00004000, 2182 CMOV_FLAG = 0x00008000, 2183 PAT_FLAG = 0x00010000, 2184 PSE36_FLAG = 0x00020000, 2185 PSNUM_FLAG = 0x00040000, 2186 CLFLUSH_FLAG = 0x00080000, 2187 DTS_FLAG = 0x00200000, 2188 ACPI_FLAG = 0x00400000, 2189 MMX_FLAG = 0x00800000, 2190 FXSR_FLAG = 0x01000000, 2191 SSE_FLAG = 0x02000000, 2192 SSE2_FLAG = 0x04000000, 2193 SS_FLAG = 0x08000000, 2194 HTT_FLAG = 0x10000000, 2195 TM_FLAG = 0x20000000 2196 } FeatureEdxFlag; 2197 2198 static BufferBlob* cpuid_brand_string_stub_blob; 2199 static const int cpuid_brand_string_stub_size = 550; 2200 2201 extern "C" { 2202 typedef void (*getCPUIDBrandString_stub_t)(void*); 2203 } 2204 2205 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2206 2207 // VM_Version statics 2208 enum { 2209 ExtendedFamilyIdLength_INTEL = 16, 2210 ExtendedFamilyIdLength_AMD = 24 2211 }; 2212 2213 const size_t VENDOR_LENGTH = 13; 2214 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2215 static char* _cpu_brand_string = nullptr; 2216 static int64_t _max_qualified_cpu_frequency = 0; 2217 2218 static int _no_of_threads = 0; 2219 static int _no_of_cores = 0; 2220 2221 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2222 "8086/8088", 2223 "", 2224 "286", 2225 "386", 2226 "486", 2227 "Pentium", 2228 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2229 "", 2230 "", 2231 "", 2232 "", 2233 "", 2234 "", 2235 "", 2236 "", 2237 "Pentium 4" 2238 }; 2239 2240 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2241 "", 2242 "", 2243 "", 2244 "", 2245 "5x86", 2246 "K5/K6", 2247 "Athlon/AthlonXP", 2248 "", 2249 "", 2250 "", 2251 "", 2252 "", 2253 "", 2254 "", 2255 "", 2256 "Opteron/Athlon64", 2257 "Opteron QC/Phenom", // Barcelona et.al. 2258 "", 2259 "", 2260 "", 2261 "", 2262 "", 2263 "", 2264 "Zen" 2265 }; 2266 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2267 // September 2013, Vol 3C Table 35-1 2268 const char* const _model_id_pentium_pro[] = { 2269 "", 2270 "Pentium Pro", 2271 "", 2272 "Pentium II model 3", 2273 "", 2274 "Pentium II model 5/Xeon/Celeron", 2275 "Celeron", 2276 "Pentium III/Pentium III Xeon", 2277 "Pentium III/Pentium III Xeon", 2278 "Pentium M model 9", // Yonah 2279 "Pentium III, model A", 2280 "Pentium III, model B", 2281 "", 2282 "Pentium M model D", // Dothan 2283 "", 2284 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2285 "", 2286 "", 2287 "", 2288 "", 2289 "", 2290 "", 2291 "Celeron", // 0x16 Celeron 65nm 2292 "Core 2", // 0x17 Penryn / Harpertown 2293 "", 2294 "", 2295 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2296 "Atom", // 0x1B Z5xx series Silverthorn 2297 "", 2298 "Core 2", // 0x1D Dunnington (6-core) 2299 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2300 "", 2301 "", 2302 "", 2303 "", 2304 "", 2305 "", 2306 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2307 "", 2308 "", 2309 "", // 0x28 2310 "", 2311 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2312 "", 2313 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2314 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2315 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2316 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2317 "", 2318 "", 2319 "", 2320 "", 2321 "", 2322 "", 2323 "", 2324 "", 2325 "", 2326 "", 2327 "Ivy Bridge", // 0x3a 2328 "", 2329 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2330 "", // 0x3d "Next Generation Intel Core Processor" 2331 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2332 "", // 0x3f "Future Generation Intel Xeon Processor" 2333 "", 2334 "", 2335 "", 2336 "", 2337 "", 2338 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2339 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2340 nullptr 2341 }; 2342 2343 /* Brand ID is for back compatibility 2344 * Newer CPUs uses the extended brand string */ 2345 const char* const _brand_id[] = { 2346 "", 2347 "Celeron processor", 2348 "Pentium III processor", 2349 "Intel Pentium III Xeon processor", 2350 "", 2351 "", 2352 "", 2353 "", 2354 "Intel Pentium 4 processor", 2355 nullptr 2356 }; 2357 2358 2359 const char* const _feature_edx_id[] = { 2360 "On-Chip FPU", 2361 "Virtual Mode Extensions", 2362 "Debugging Extensions", 2363 "Page Size Extensions", 2364 "Time Stamp Counter", 2365 "Model Specific Registers", 2366 "Physical Address Extension", 2367 "Machine Check Exceptions", 2368 "CMPXCHG8B Instruction", 2369 "On-Chip APIC", 2370 "", 2371 "Fast System Call", 2372 "Memory Type Range Registers", 2373 "Page Global Enable", 2374 "Machine Check Architecture", 2375 "Conditional Mov Instruction", 2376 "Page Attribute Table", 2377 "36-bit Page Size Extension", 2378 "Processor Serial Number", 2379 "CLFLUSH Instruction", 2380 "", 2381 "Debug Trace Store feature", 2382 "ACPI registers in MSR space", 2383 "Intel Architecture MMX Technology", 2384 "Fast Float Point Save and Restore", 2385 "Streaming SIMD extensions", 2386 "Streaming SIMD extensions 2", 2387 "Self-Snoop", 2388 "Hyper Threading", 2389 "Thermal Monitor", 2390 "", 2391 "Pending Break Enable" 2392 }; 2393 2394 const char* const _feature_extended_edx_id[] = { 2395 "", 2396 "", 2397 "", 2398 "", 2399 "", 2400 "", 2401 "", 2402 "", 2403 "", 2404 "", 2405 "", 2406 "SYSCALL/SYSRET", 2407 "", 2408 "", 2409 "", 2410 "", 2411 "", 2412 "", 2413 "", 2414 "", 2415 "Execute Disable Bit", 2416 "", 2417 "", 2418 "", 2419 "", 2420 "", 2421 "", 2422 "RDTSCP", 2423 "", 2424 "Intel 64 Architecture", 2425 "", 2426 "" 2427 }; 2428 2429 const char* const _feature_ecx_id[] = { 2430 "Streaming SIMD Extensions 3", 2431 "PCLMULQDQ", 2432 "64-bit DS Area", 2433 "MONITOR/MWAIT instructions", 2434 "CPL Qualified Debug Store", 2435 "Virtual Machine Extensions", 2436 "Safer Mode Extensions", 2437 "Enhanced Intel SpeedStep technology", 2438 "Thermal Monitor 2", 2439 "Supplemental Streaming SIMD Extensions 3", 2440 "L1 Context ID", 2441 "", 2442 "Fused Multiply-Add", 2443 "CMPXCHG16B", 2444 "xTPR Update Control", 2445 "Perfmon and Debug Capability", 2446 "", 2447 "Process-context identifiers", 2448 "Direct Cache Access", 2449 "Streaming SIMD extensions 4.1", 2450 "Streaming SIMD extensions 4.2", 2451 "x2APIC", 2452 "MOVBE", 2453 "Popcount instruction", 2454 "TSC-Deadline", 2455 "AESNI", 2456 "XSAVE", 2457 "OSXSAVE", 2458 "AVX", 2459 "F16C", 2460 "RDRAND", 2461 "" 2462 }; 2463 2464 const char* const _feature_extended_ecx_id[] = { 2465 "LAHF/SAHF instruction support", 2466 "Core multi-processor legacy mode", 2467 "", 2468 "", 2469 "", 2470 "Advanced Bit Manipulations: LZCNT", 2471 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2472 "Misaligned SSE mode", 2473 "", 2474 "", 2475 "", 2476 "", 2477 "", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "", 2484 "", 2485 "", 2486 "", 2487 "", 2488 "", 2489 "", 2490 "", 2491 "", 2492 "", 2493 "", 2494 "", 2495 "", 2496 "" 2497 }; 2498 2499 void VM_Version::initialize_tsc(void) { 2500 ResourceMark rm; 2501 2502 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2503 if (cpuid_brand_string_stub_blob == nullptr) { 2504 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2505 } 2506 CodeBuffer c(cpuid_brand_string_stub_blob); 2507 VM_Version_StubGenerator g(&c); 2508 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2509 g.generate_getCPUIDBrandString()); 2510 } 2511 2512 const char* VM_Version::cpu_model_description(void) { 2513 uint32_t cpu_family = extended_cpu_family(); 2514 uint32_t cpu_model = extended_cpu_model(); 2515 const char* model = nullptr; 2516 2517 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2518 for (uint32_t i = 0; i <= cpu_model; i++) { 2519 model = _model_id_pentium_pro[i]; 2520 if (model == nullptr) { 2521 break; 2522 } 2523 } 2524 } 2525 return model; 2526 } 2527 2528 const char* VM_Version::cpu_brand_string(void) { 2529 if (_cpu_brand_string == nullptr) { 2530 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2531 if (nullptr == _cpu_brand_string) { 2532 return nullptr; 2533 } 2534 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2535 if (ret_val != OS_OK) { 2536 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2537 _cpu_brand_string = nullptr; 2538 } 2539 } 2540 return _cpu_brand_string; 2541 } 2542 2543 const char* VM_Version::cpu_brand(void) { 2544 const char* brand = nullptr; 2545 2546 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2547 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2548 brand = _brand_id[0]; 2549 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2550 brand = _brand_id[i]; 2551 } 2552 } 2553 return brand; 2554 } 2555 2556 bool VM_Version::cpu_is_em64t(void) { 2557 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2558 } 2559 2560 bool VM_Version::is_netburst(void) { 2561 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2562 } 2563 2564 bool VM_Version::supports_tscinv_ext(void) { 2565 if (!supports_tscinv_bit()) { 2566 return false; 2567 } 2568 2569 if (is_intel()) { 2570 return true; 2571 } 2572 2573 if (is_amd()) { 2574 return !is_amd_Barcelona(); 2575 } 2576 2577 if (is_hygon()) { 2578 return true; 2579 } 2580 2581 return false; 2582 } 2583 2584 void VM_Version::resolve_cpu_information_details(void) { 2585 2586 // in future we want to base this information on proper cpu 2587 // and cache topology enumeration such as: 2588 // Intel 64 Architecture Processor Topology Enumeration 2589 // which supports system cpu and cache topology enumeration 2590 // either using 2xAPICIDs or initial APICIDs 2591 2592 // currently only rough cpu information estimates 2593 // which will not necessarily reflect the exact configuration of the system 2594 2595 // this is the number of logical hardware threads 2596 // visible to the operating system 2597 _no_of_threads = os::processor_count(); 2598 2599 // find out number of threads per cpu package 2600 int threads_per_package = threads_per_core() * cores_per_cpu(); 2601 2602 // use amount of threads visible to the process in order to guess number of sockets 2603 _no_of_sockets = _no_of_threads / threads_per_package; 2604 2605 // process might only see a subset of the total number of threads 2606 // from a single processor package. Virtualization/resource management for example. 2607 // If so then just write a hard 1 as num of pkgs. 2608 if (0 == _no_of_sockets) { 2609 _no_of_sockets = 1; 2610 } 2611 2612 // estimate the number of cores 2613 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2614 } 2615 2616 2617 const char* VM_Version::cpu_family_description(void) { 2618 int cpu_family_id = extended_cpu_family(); 2619 if (is_amd()) { 2620 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2621 return _family_id_amd[cpu_family_id]; 2622 } 2623 } 2624 if (is_intel()) { 2625 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2626 return cpu_model_description(); 2627 } 2628 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2629 return _family_id_intel[cpu_family_id]; 2630 } 2631 } 2632 if (is_hygon()) { 2633 return "Dhyana"; 2634 } 2635 return "Unknown x86"; 2636 } 2637 2638 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2639 assert(buf != nullptr, "buffer is null!"); 2640 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2641 2642 const char* cpu_type = nullptr; 2643 const char* x64 = nullptr; 2644 2645 if (is_intel()) { 2646 cpu_type = "Intel"; 2647 x64 = cpu_is_em64t() ? " Intel64" : ""; 2648 } else if (is_amd()) { 2649 cpu_type = "AMD"; 2650 x64 = cpu_is_em64t() ? " AMD64" : ""; 2651 } else if (is_hygon()) { 2652 cpu_type = "Hygon"; 2653 x64 = cpu_is_em64t() ? " AMD64" : ""; 2654 } else { 2655 cpu_type = "Unknown x86"; 2656 x64 = cpu_is_em64t() ? " x86_64" : ""; 2657 } 2658 2659 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2660 cpu_type, 2661 cpu_family_description(), 2662 supports_ht() ? " (HT)" : "", 2663 supports_sse3() ? " SSE3" : "", 2664 supports_ssse3() ? " SSSE3" : "", 2665 supports_sse4_1() ? " SSE4.1" : "", 2666 supports_sse4_2() ? " SSE4.2" : "", 2667 supports_sse4a() ? " SSE4A" : "", 2668 is_netburst() ? " Netburst" : "", 2669 is_intel_family_core() ? " Core" : "", 2670 x64); 2671 2672 return OS_OK; 2673 } 2674 2675 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2676 assert(buf != nullptr, "buffer is null!"); 2677 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2678 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2679 2680 // invoke newly generated asm code to fetch CPU Brand String 2681 getCPUIDBrandString_stub(&_cpuid_info); 2682 2683 // fetch results into buffer 2684 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2685 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2686 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2687 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2688 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2689 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2690 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2691 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2692 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2693 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2694 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2695 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2696 2697 return OS_OK; 2698 } 2699 2700 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2701 guarantee(buf != nullptr, "buffer is null!"); 2702 guarantee(buf_len > 0, "buffer len not enough!"); 2703 2704 unsigned int flag = 0; 2705 unsigned int fi = 0; 2706 size_t written = 0; 2707 const char* prefix = ""; 2708 2709 #define WRITE_TO_BUF(string) \ 2710 { \ 2711 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2712 if (res < 0) { \ 2713 return buf_len - 1; \ 2714 } \ 2715 written += res; \ 2716 if (prefix[0] == '\0') { \ 2717 prefix = ", "; \ 2718 } \ 2719 } 2720 2721 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2722 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2723 continue; /* no hyperthreading */ 2724 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2725 continue; /* no fast system call */ 2726 } 2727 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2728 WRITE_TO_BUF(_feature_edx_id[fi]); 2729 } 2730 } 2731 2732 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2733 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2734 WRITE_TO_BUF(_feature_ecx_id[fi]); 2735 } 2736 } 2737 2738 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2739 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2740 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2741 } 2742 } 2743 2744 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2745 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2746 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2747 } 2748 } 2749 2750 if (supports_tscinv_bit()) { 2751 WRITE_TO_BUF("Invariant TSC"); 2752 } 2753 2754 return written; 2755 } 2756 2757 /** 2758 * Write a detailed description of the cpu to a given buffer, including 2759 * feature set. 2760 */ 2761 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2762 assert(buf != nullptr, "buffer is null!"); 2763 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2764 2765 static const char* unknown = "<unknown>"; 2766 char vendor_id[VENDOR_LENGTH]; 2767 const char* family = nullptr; 2768 const char* model = nullptr; 2769 const char* brand = nullptr; 2770 int outputLen = 0; 2771 2772 family = cpu_family_description(); 2773 if (family == nullptr) { 2774 family = unknown; 2775 } 2776 2777 model = cpu_model_description(); 2778 if (model == nullptr) { 2779 model = unknown; 2780 } 2781 2782 brand = cpu_brand_string(); 2783 2784 if (brand == nullptr) { 2785 brand = cpu_brand(); 2786 if (brand == nullptr) { 2787 brand = unknown; 2788 } 2789 } 2790 2791 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2792 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2793 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2794 vendor_id[VENDOR_LENGTH-1] = '\0'; 2795 2796 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2797 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2798 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2799 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2800 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2801 "Supports: ", 2802 brand, 2803 vendor_id, 2804 family, 2805 extended_cpu_family(), 2806 model, 2807 extended_cpu_model(), 2808 cpu_stepping(), 2809 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2810 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2811 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2812 _cpuid_info.std_cpuid1_eax.value, 2813 _cpuid_info.std_cpuid1_ebx.value, 2814 _cpuid_info.std_cpuid1_ecx.value, 2815 _cpuid_info.std_cpuid1_edx.value, 2816 _cpuid_info.ext_cpuid1_eax, 2817 _cpuid_info.ext_cpuid1_ebx, 2818 _cpuid_info.ext_cpuid1_ecx, 2819 _cpuid_info.ext_cpuid1_edx); 2820 2821 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2822 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2823 return OS_ERR; 2824 } 2825 2826 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2827 2828 return OS_OK; 2829 } 2830 2831 2832 // Fill in Abstract_VM_Version statics 2833 void VM_Version::initialize_cpu_information() { 2834 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2835 assert(!_initialized, "shouldn't be initialized yet"); 2836 resolve_cpu_information_details(); 2837 2838 // initialize cpu_name and cpu_desc 2839 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2840 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2841 _initialized = true; 2842 } 2843 2844 /** 2845 * For information about extracting the frequency from the cpu brand string, please see: 2846 * 2847 * Intel Processor Identification and the CPUID Instruction 2848 * Application Note 485 2849 * May 2012 2850 * 2851 * The return value is the frequency in Hz. 2852 */ 2853 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2854 const char* const brand_string = cpu_brand_string(); 2855 if (brand_string == nullptr) { 2856 return 0; 2857 } 2858 const int64_t MEGA = 1000000; 2859 int64_t multiplier = 0; 2860 int64_t frequency = 0; 2861 uint8_t idx = 0; 2862 // The brand string buffer is at most 48 bytes. 2863 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2864 for (; idx < 48-2; ++idx) { 2865 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2866 // Search brand string for "yHz" where y is M, G, or T. 2867 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2868 if (brand_string[idx] == 'M') { 2869 multiplier = MEGA; 2870 } else if (brand_string[idx] == 'G') { 2871 multiplier = MEGA * 1000; 2872 } else if (brand_string[idx] == 'T') { 2873 multiplier = MEGA * MEGA; 2874 } 2875 break; 2876 } 2877 } 2878 if (multiplier > 0) { 2879 // Compute frequency (in Hz) from brand string. 2880 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2881 frequency = (brand_string[idx-4] - '0') * multiplier; 2882 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2883 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2884 } else { // format is "xxxx" 2885 frequency = (brand_string[idx-4] - '0') * 1000; 2886 frequency += (brand_string[idx-3] - '0') * 100; 2887 frequency += (brand_string[idx-2] - '0') * 10; 2888 frequency += (brand_string[idx-1] - '0'); 2889 frequency *= multiplier; 2890 } 2891 } 2892 return frequency; 2893 } 2894 2895 2896 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2897 if (_max_qualified_cpu_frequency == 0) { 2898 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2899 } 2900 return _max_qualified_cpu_frequency; 2901 } 2902 2903 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const { 2904 VM_Features vm_features; 2905 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2906 vm_features.set_feature(CPU_CX8); 2907 if (std_cpuid1_edx.bits.cmov != 0) 2908 vm_features.set_feature(CPU_CMOV); 2909 if (std_cpuid1_edx.bits.clflush != 0) 2910 vm_features.set_feature(CPU_FLUSH); 2911 // clflush should always be available on x86_64 2912 // if not we are in real trouble because we rely on it 2913 // to flush the code cache. 2914 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available"); 2915 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2916 ext_cpuid1_edx.bits.fxsr != 0)) 2917 vm_features.set_feature(CPU_FXSR); 2918 // HT flag is set for multi-core processors also. 2919 if (threads_per_core() > 1) 2920 vm_features.set_feature(CPU_HT); 2921 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2922 ext_cpuid1_edx.bits.mmx != 0)) 2923 vm_features.set_feature(CPU_MMX); 2924 if (std_cpuid1_edx.bits.sse != 0) 2925 vm_features.set_feature(CPU_SSE); 2926 if (std_cpuid1_edx.bits.sse2 != 0) 2927 vm_features.set_feature(CPU_SSE2); 2928 if (std_cpuid1_ecx.bits.sse3 != 0) 2929 vm_features.set_feature(CPU_SSE3); 2930 if (std_cpuid1_ecx.bits.ssse3 != 0) 2931 vm_features.set_feature(CPU_SSSE3); 2932 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2933 vm_features.set_feature(CPU_SSE4_1); 2934 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2935 vm_features.set_feature(CPU_SSE4_2); 2936 if (std_cpuid1_ecx.bits.popcnt != 0) 2937 vm_features.set_feature(CPU_POPCNT); 2938 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 2939 xem_xcr0_eax.bits.apx_f != 0) { 2940 vm_features.set_feature(CPU_APX_F); 2941 } 2942 if (std_cpuid1_ecx.bits.avx != 0 && 2943 std_cpuid1_ecx.bits.osxsave != 0 && 2944 xem_xcr0_eax.bits.sse != 0 && 2945 xem_xcr0_eax.bits.ymm != 0) { 2946 vm_features.set_feature(CPU_AVX); 2947 vm_features.set_feature(CPU_VZEROUPPER); 2948 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 2949 vm_features.set_feature(CPU_SHA512); 2950 if (std_cpuid1_ecx.bits.f16c != 0) 2951 vm_features.set_feature(CPU_F16C); 2952 if (sef_cpuid7_ebx.bits.avx2 != 0) { 2953 vm_features.set_feature(CPU_AVX2); 2954 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 2955 vm_features.set_feature(CPU_AVX_IFMA); 2956 } 2957 if (sef_cpuid7_ecx.bits.gfni != 0) 2958 vm_features.set_feature(CPU_GFNI); 2959 if (sef_cpuid7_ebx.bits.avx512f != 0 && 2960 xem_xcr0_eax.bits.opmask != 0 && 2961 xem_xcr0_eax.bits.zmm512 != 0 && 2962 xem_xcr0_eax.bits.zmm32 != 0) { 2963 vm_features.set_feature(CPU_AVX512F); 2964 if (sef_cpuid7_ebx.bits.avx512cd != 0) 2965 vm_features.set_feature(CPU_AVX512CD); 2966 if (sef_cpuid7_ebx.bits.avx512dq != 0) 2967 vm_features.set_feature(CPU_AVX512DQ); 2968 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 2969 vm_features.set_feature(CPU_AVX512_IFMA); 2970 if (sef_cpuid7_ebx.bits.avx512pf != 0) 2971 vm_features.set_feature(CPU_AVX512PF); 2972 if (sef_cpuid7_ebx.bits.avx512er != 0) 2973 vm_features.set_feature(CPU_AVX512ER); 2974 if (sef_cpuid7_ebx.bits.avx512bw != 0) 2975 vm_features.set_feature(CPU_AVX512BW); 2976 if (sef_cpuid7_ebx.bits.avx512vl != 0) 2977 vm_features.set_feature(CPU_AVX512VL); 2978 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2979 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2980 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2981 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2982 if (sef_cpuid7_ecx.bits.vaes != 0) 2983 vm_features.set_feature(CPU_AVX512_VAES); 2984 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 2985 vm_features.set_feature(CPU_AVX512_VNNI); 2986 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2987 vm_features.set_feature(CPU_AVX512_BITALG); 2988 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2989 vm_features.set_feature(CPU_AVX512_VBMI); 2990 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2991 vm_features.set_feature(CPU_AVX512_VBMI2); 2992 } 2993 if (is_intel()) { 2994 if (sefsl1_cpuid7_edx.bits.avx10 != 0 && 2995 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 && 2996 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 && 2997 xem_xcr0_eax.bits.opmask != 0 && 2998 xem_xcr0_eax.bits.zmm512 != 0 && 2999 xem_xcr0_eax.bits.zmm32 != 0) { 3000 vm_features.set_feature(CPU_AVX10_1); 3001 vm_features.set_feature(CPU_AVX512F); 3002 vm_features.set_feature(CPU_AVX512CD); 3003 vm_features.set_feature(CPU_AVX512DQ); 3004 vm_features.set_feature(CPU_AVX512PF); 3005 vm_features.set_feature(CPU_AVX512ER); 3006 vm_features.set_feature(CPU_AVX512BW); 3007 vm_features.set_feature(CPU_AVX512VL); 3008 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 3009 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 3010 vm_features.set_feature(CPU_AVX512_VAES); 3011 vm_features.set_feature(CPU_AVX512_VNNI); 3012 vm_features.set_feature(CPU_AVX512_BITALG); 3013 vm_features.set_feature(CPU_AVX512_VBMI); 3014 vm_features.set_feature(CPU_AVX512_VBMI2); 3015 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) { 3016 vm_features.set_feature(CPU_AVX10_2); 3017 } 3018 } 3019 } 3020 } 3021 3022 if (std_cpuid1_ecx.bits.hv != 0) 3023 vm_features.set_feature(CPU_HV); 3024 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3025 vm_features.set_feature(CPU_BMI1); 3026 if (std_cpuid1_edx.bits.tsc != 0) 3027 vm_features.set_feature(CPU_TSC); 3028 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3029 vm_features.set_feature(CPU_TSCINV_BIT); 3030 if (std_cpuid1_ecx.bits.aes != 0) 3031 vm_features.set_feature(CPU_AES); 3032 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3033 vm_features.set_feature(CPU_LZCNT); 3034 if (ext_cpuid1_ecx.bits.prefetchw != 0) 3035 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3036 if (sef_cpuid7_ebx.bits.erms != 0) 3037 vm_features.set_feature(CPU_ERMS); 3038 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3039 vm_features.set_feature(CPU_FSRM); 3040 if (std_cpuid1_ecx.bits.clmul != 0) 3041 vm_features.set_feature(CPU_CLMUL); 3042 if (sef_cpuid7_ebx.bits.rtm != 0) 3043 vm_features.set_feature(CPU_RTM); 3044 if (sef_cpuid7_ebx.bits.adx != 0) 3045 vm_features.set_feature(CPU_ADX); 3046 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3047 vm_features.set_feature(CPU_BMI2); 3048 if (sef_cpuid7_ebx.bits.sha != 0) 3049 vm_features.set_feature(CPU_SHA); 3050 if (std_cpuid1_ecx.bits.fma != 0) 3051 vm_features.set_feature(CPU_FMA); 3052 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3053 vm_features.set_feature(CPU_FLUSHOPT); 3054 if (sef_cpuid7_ebx.bits.clwb != 0) 3055 vm_features.set_feature(CPU_CLWB); 3056 if (ext_cpuid1_edx.bits.rdtscp != 0) 3057 vm_features.set_feature(CPU_RDTSCP); 3058 if (sef_cpuid7_ecx.bits.rdpid != 0) 3059 vm_features.set_feature(CPU_RDPID); 3060 3061 // AMD|Hygon additional features. 3062 if (is_amd_family()) { 3063 // PREFETCHW was checked above, check TDNOW here. 3064 if ((ext_cpuid1_edx.bits.tdnow != 0)) 3065 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3066 if (ext_cpuid1_ecx.bits.sse4a != 0) 3067 vm_features.set_feature(CPU_SSE4A); 3068 } 3069 3070 // Intel additional features. 3071 if (is_intel()) { 3072 if (sef_cpuid7_edx.bits.serialize != 0) 3073 vm_features.set_feature(CPU_SERIALIZE); 3074 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3075 vm_features.set_feature(CPU_AVX512_FP16); 3076 } 3077 3078 // ZX additional features. 3079 if (is_zx()) { 3080 // We do not know if these are supported by ZX, so we cannot trust 3081 // common CPUID bit for them. 3082 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?"); 3083 vm_features.clear_feature(CPU_CLWB); 3084 } 3085 3086 // Protection key features. 3087 if (sef_cpuid7_ecx.bits.pku != 0) { 3088 vm_features.set_feature(CPU_PKU); 3089 } 3090 if (sef_cpuid7_ecx.bits.ospke != 0) { 3091 vm_features.set_feature(CPU_OSPKE); 3092 } 3093 3094 // Control flow enforcement (CET) features. 3095 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3096 vm_features.set_feature(CPU_CET_SS); 3097 } 3098 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3099 vm_features.set_feature(CPU_CET_IBT); 3100 } 3101 3102 // Composite features. 3103 if (supports_tscinv_bit() && 3104 ((is_amd_family() && !is_amd_Barcelona()) || 3105 is_intel_tsc_synched_at_init())) { 3106 vm_features.set_feature(CPU_TSCINV); 3107 } 3108 return vm_features; 3109 } 3110 3111 bool VM_Version::os_supports_avx_vectors() { 3112 bool retVal = false; 3113 int nreg = 4; 3114 if (supports_evex()) { 3115 // Verify that OS save/restore all bits of EVEX registers 3116 // during signal processing. 3117 retVal = true; 3118 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3119 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3120 retVal = false; 3121 break; 3122 } 3123 } 3124 } else if (supports_avx()) { 3125 // Verify that OS save/restore all bits of AVX registers 3126 // during signal processing. 3127 retVal = true; 3128 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3129 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3130 retVal = false; 3131 break; 3132 } 3133 } 3134 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3135 if (retVal == false) { 3136 // Verify that OS save/restore all bits of EVEX registers 3137 // during signal processing. 3138 retVal = true; 3139 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3140 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3141 retVal = false; 3142 break; 3143 } 3144 } 3145 } 3146 } 3147 return retVal; 3148 } 3149 3150 bool VM_Version::os_supports_apx_egprs() { 3151 if (!supports_apx_f()) { 3152 return false; 3153 } 3154 // Enable APX support for product builds after 3155 // completion of planned features listed in JDK-8329030. 3156 #if !defined(PRODUCT) 3157 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3158 _cpuid_info.apx_save[1] != egpr_test_value()) { 3159 return false; 3160 } 3161 return true; 3162 #else 3163 return false; 3164 #endif 3165 } 3166 3167 uint VM_Version::cores_per_cpu() { 3168 uint result = 1; 3169 if (is_intel()) { 3170 bool supports_topology = supports_processor_topology(); 3171 if (supports_topology) { 3172 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3173 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3174 } 3175 if (!supports_topology || result == 0) { 3176 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3177 } 3178 } else if (is_amd_family()) { 3179 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3180 } else if (is_zx()) { 3181 bool supports_topology = supports_processor_topology(); 3182 if (supports_topology) { 3183 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3184 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3185 } 3186 if (!supports_topology || result == 0) { 3187 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3188 } 3189 } 3190 return result; 3191 } 3192 3193 uint VM_Version::threads_per_core() { 3194 uint result = 1; 3195 if (is_intel() && supports_processor_topology()) { 3196 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3197 } else if (is_zx() && supports_processor_topology()) { 3198 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3199 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3200 if (cpu_family() >= 0x17) { 3201 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3202 } else { 3203 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3204 cores_per_cpu(); 3205 } 3206 } 3207 return (result == 0 ? 1 : result); 3208 } 3209 3210 uint VM_Version::L1_line_size() { 3211 uint result = 0; 3212 if (is_intel()) { 3213 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3214 } else if (is_amd_family()) { 3215 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3216 } else if (is_zx()) { 3217 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3218 } 3219 if (result < 32) // not defined ? 3220 result = 32; // 32 bytes by default on x86 and other x64 3221 return result; 3222 } 3223 3224 bool VM_Version::is_intel_tsc_synched_at_init() { 3225 if (is_intel_family_core()) { 3226 uint32_t ext_model = extended_cpu_model(); 3227 if (ext_model == CPU_MODEL_NEHALEM_EP || 3228 ext_model == CPU_MODEL_WESTMERE_EP || 3229 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3230 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3231 // <= 2-socket invariant tsc support. EX versions are usually used 3232 // in > 2-socket systems and likely don't synchronize tscs at 3233 // initialization. 3234 // Code that uses tsc values must be prepared for them to arbitrarily 3235 // jump forward or backward. 3236 return true; 3237 } 3238 } 3239 return false; 3240 } 3241 3242 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3243 // Hardware prefetching (distance/size in bytes): 3244 // Pentium 3 - 64 / 32 3245 // Pentium 4 - 256 / 128 3246 // Athlon - 64 / 32 ???? 3247 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3248 // Core - 128 / 64 3249 // 3250 // Software prefetching (distance in bytes / instruction with best score): 3251 // Pentium 3 - 128 / prefetchnta 3252 // Pentium 4 - 512 / prefetchnta 3253 // Athlon - 128 / prefetchnta 3254 // Opteron - 256 / prefetchnta 3255 // Core - 256 / prefetchnta 3256 // It will be used only when AllocatePrefetchStyle > 0 3257 3258 if (is_amd_family()) { // AMD | Hygon 3259 if (supports_sse2()) { 3260 return 256; // Opteron 3261 } else { 3262 return 128; // Athlon 3263 } 3264 } else { // Intel 3265 if (supports_sse3() && cpu_family() == 6) { 3266 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3267 return 192; 3268 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3269 return 384; 3270 } 3271 } 3272 if (supports_sse2()) { 3273 if (cpu_family() == 6) { 3274 return 256; // Pentium M, Core, Core2 3275 } else { 3276 return 512; // Pentium 4 3277 } 3278 } else { 3279 return 128; // Pentium 3 (and all other old CPUs) 3280 } 3281 } 3282 } 3283 3284 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3285 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3286 switch (id) { 3287 case vmIntrinsics::_floatToFloat16: 3288 case vmIntrinsics::_float16ToFloat: 3289 if (!supports_float16()) { 3290 return false; 3291 } 3292 break; 3293 default: 3294 break; 3295 } 3296 return true; 3297 } 3298 3299 void VM_Version::insert_features_names(VM_Version::VM_Features features, char* buf, size_t buflen) { 3300 for (int i = 0; i < MAX_CPU_FEATURES; i++) { 3301 if (features.supports_feature((VM_Version::Feature_Flag)i)) { 3302 int res = jio_snprintf(buf, buflen, ", %s", _features_names[i]); 3303 assert(res > 0, "not enough temporary space allocated"); 3304 buf += res; 3305 buflen -= res; 3306 } 3307 } 3308 }