1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/ostream.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_NAME 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = nullptr; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = nullptr; 59 // Address of instruction which causes APX specific SEGV 60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 61 // Address of instruction after the one which causes APX specific SEGV 62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 63 64 static BufferBlob* stub_blob; 65 static const int stub_size = 2000; 66 67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong; 68 69 VM_Version::VM_Features VM_Version::_features; 70 VM_Version::VM_Features VM_Version::_cpu_features; 71 72 extern "C" { 73 typedef void (*get_cpu_info_stub_t)(void*); 74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 75 typedef void (*clear_apx_test_state_t)(void); 76 } 77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 78 static detect_virt_stub_t detect_virt_stub = nullptr; 79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 80 81 bool VM_Version::supports_clflush() { 82 // clflush should always be available on x86_64 83 // if not we are in real trouble because we rely on it 84 // to flush the code cache. 85 // Unfortunately, Assembler::clflush is currently called as part 86 // of generation of the code cache flush routine. This happens 87 // under Universe::init before the processor features are set 88 // up. Assembler::flush calls this routine to check that clflush 89 // is allowed. So, we give the caller a free pass if Universe init 90 // is still in progress. 91 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available"); 92 return true; 93 } 94 95 #define CPUID_STANDARD_FN 0x0 96 #define CPUID_STANDARD_FN_1 0x1 97 #define CPUID_STANDARD_FN_4 0x4 98 #define CPUID_STANDARD_FN_B 0xb 99 100 #define CPUID_EXTENDED_FN 0x80000000 101 #define CPUID_EXTENDED_FN_1 0x80000001 102 #define CPUID_EXTENDED_FN_2 0x80000002 103 #define CPUID_EXTENDED_FN_3 0x80000003 104 #define CPUID_EXTENDED_FN_4 0x80000004 105 #define CPUID_EXTENDED_FN_7 0x80000007 106 #define CPUID_EXTENDED_FN_8 0x80000008 107 108 class VM_Version_StubGenerator: public StubCodeGenerator { 109 public: 110 111 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 112 113 address clear_apx_test_state() { 114 # define __ _masm-> 115 address start = __ pc(); 116 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 117 // handling guarantees that preserved register values post signal handling were 118 // re-instantiated by operating system and not because they were not modified externally. 119 120 bool save_apx = UseAPX; 121 VM_Version::set_apx_cpuFeatures(); 122 UseAPX = true; 123 // EGPR state save/restoration. 124 __ mov64(r16, 0L); 125 __ mov64(r31, 0L); 126 UseAPX = save_apx; 127 VM_Version::clean_cpuFeatures(); 128 __ ret(0); 129 return start; 130 } 131 132 address generate_get_cpu_info() { 133 // Flags to test CPU type. 134 const uint32_t HS_EFL_AC = 0x40000; 135 const uint32_t HS_EFL_ID = 0x200000; 136 // Values for when we don't have a CPUID instruction. 137 const int CPU_FAMILY_SHIFT = 8; 138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 141 142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24; 143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 146 147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 148 # define __ _masm-> 149 150 address start = __ pc(); 151 152 // 153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 154 // 155 // rcx and rdx are first and second argument registers on windows 156 157 __ push(rbp); 158 __ mov(rbp, c_rarg0); // cpuid_info address 159 __ push(rbx); 160 __ push(rsi); 161 __ pushf(); // preserve rbx, and flags 162 __ pop(rax); 163 __ push(rax); 164 __ mov(rcx, rax); 165 // 166 // if we are unable to change the AC flag, we have a 386 167 // 168 __ xorl(rax, HS_EFL_AC); 169 __ push(rax); 170 __ popf(); 171 __ pushf(); 172 __ pop(rax); 173 __ cmpptr(rax, rcx); 174 __ jccb(Assembler::notEqual, detect_486); 175 176 __ movl(rax, CPU_FAMILY_386); 177 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 178 __ jmp(done); 179 180 // 181 // If we are unable to change the ID flag, we have a 486 which does 182 // not support the "cpuid" instruction. 183 // 184 __ bind(detect_486); 185 __ mov(rax, rcx); 186 __ xorl(rax, HS_EFL_ID); 187 __ push(rax); 188 __ popf(); 189 __ pushf(); 190 __ pop(rax); 191 __ cmpptr(rcx, rax); 192 __ jccb(Assembler::notEqual, detect_586); 193 194 __ bind(cpu486); 195 __ movl(rax, CPU_FAMILY_486); 196 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 197 __ jmp(done); 198 199 // 200 // At this point, we have a chip which supports the "cpuid" instruction 201 // 202 __ bind(detect_586); 203 __ xorl(rax, rax); 204 __ cpuid(); 205 __ orl(rax, rax); 206 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 207 // value of at least 1, we give up and 208 // assume a 486 209 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 210 __ movl(Address(rsi, 0), rax); 211 __ movl(Address(rsi, 4), rbx); 212 __ movl(Address(rsi, 8), rcx); 213 __ movl(Address(rsi,12), rdx); 214 215 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 216 __ jccb(Assembler::belowEqual, std_cpuid4); 217 218 // 219 // cpuid(0xB) Processor Topology 220 // 221 __ movl(rax, 0xb); 222 __ xorl(rcx, rcx); // Threads level 223 __ cpuid(); 224 225 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 226 __ movl(Address(rsi, 0), rax); 227 __ movl(Address(rsi, 4), rbx); 228 __ movl(Address(rsi, 8), rcx); 229 __ movl(Address(rsi,12), rdx); 230 231 __ movl(rax, 0xb); 232 __ movl(rcx, 1); // Cores level 233 __ cpuid(); 234 __ push(rax); 235 __ andl(rax, 0x1f); // Determine if valid topology level 236 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 237 __ andl(rax, 0xffff); 238 __ pop(rax); 239 __ jccb(Assembler::equal, std_cpuid4); 240 241 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 242 __ movl(Address(rsi, 0), rax); 243 __ movl(Address(rsi, 4), rbx); 244 __ movl(Address(rsi, 8), rcx); 245 __ movl(Address(rsi,12), rdx); 246 247 __ movl(rax, 0xb); 248 __ movl(rcx, 2); // Packages level 249 __ cpuid(); 250 __ push(rax); 251 __ andl(rax, 0x1f); // Determine if valid topology level 252 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 253 __ andl(rax, 0xffff); 254 __ pop(rax); 255 __ jccb(Assembler::equal, std_cpuid4); 256 257 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 258 __ movl(Address(rsi, 0), rax); 259 __ movl(Address(rsi, 4), rbx); 260 __ movl(Address(rsi, 8), rcx); 261 __ movl(Address(rsi,12), rdx); 262 263 // 264 // cpuid(0x4) Deterministic cache params 265 // 266 __ bind(std_cpuid4); 267 __ movl(rax, 4); 268 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 269 __ jccb(Assembler::greater, std_cpuid1); 270 271 __ xorl(rcx, rcx); // L1 cache 272 __ cpuid(); 273 __ push(rax); 274 __ andl(rax, 0x1f); // Determine if valid cache parameters used 275 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 276 __ pop(rax); 277 __ jccb(Assembler::equal, std_cpuid1); 278 279 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 280 __ movl(Address(rsi, 0), rax); 281 __ movl(Address(rsi, 4), rbx); 282 __ movl(Address(rsi, 8), rcx); 283 __ movl(Address(rsi,12), rdx); 284 285 // 286 // Standard cpuid(0x1) 287 // 288 __ bind(std_cpuid1); 289 __ movl(rax, 1); 290 __ cpuid(); 291 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 292 __ movl(Address(rsi, 0), rax); 293 __ movl(Address(rsi, 4), rbx); 294 __ movl(Address(rsi, 8), rcx); 295 __ movl(Address(rsi,12), rdx); 296 297 // 298 // Check if OS has enabled XGETBV instruction to access XCR0 299 // (OSXSAVE feature flag) and CPU supports AVX 300 // 301 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 302 __ cmpl(rcx, 0x18000000); 303 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 304 305 // 306 // XCR0, XFEATURE_ENABLED_MASK register 307 // 308 __ xorl(rcx, rcx); // zero for XCR0 register 309 __ xgetbv(); 310 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 311 __ movl(Address(rsi, 0), rax); 312 __ movl(Address(rsi, 4), rdx); 313 314 // 315 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 316 // 317 __ bind(sef_cpuid); 318 __ movl(rax, 7); 319 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 320 __ jccb(Assembler::greater, ext_cpuid); 321 // ECX = 0 322 __ xorl(rcx, rcx); 323 __ cpuid(); 324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 325 __ movl(Address(rsi, 0), rax); 326 __ movl(Address(rsi, 4), rbx); 327 __ movl(Address(rsi, 8), rcx); 328 __ movl(Address(rsi, 12), rdx); 329 330 // 331 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 332 // 333 __ bind(sefsl1_cpuid); 334 __ movl(rax, 7); 335 __ movl(rcx, 1); 336 __ cpuid(); 337 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 338 __ movl(Address(rsi, 0), rax); 339 __ movl(Address(rsi, 4), rdx); 340 341 // 342 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0). 343 // 344 __ bind(std_cpuid24); 345 __ movl(rax, 0x24); 346 __ movl(rcx, 0); 347 __ cpuid(); 348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset()))); 349 __ movl(Address(rsi, 0), rax); 350 __ movl(Address(rsi, 4), rbx); 351 352 // 353 // Extended cpuid(0x80000000) 354 // 355 __ bind(ext_cpuid); 356 __ movl(rax, 0x80000000); 357 __ cpuid(); 358 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 359 __ jcc(Assembler::belowEqual, done); 360 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 361 __ jcc(Assembler::belowEqual, ext_cpuid1); 362 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 363 __ jccb(Assembler::belowEqual, ext_cpuid5); 364 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 365 __ jccb(Assembler::belowEqual, ext_cpuid7); 366 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 367 __ jccb(Assembler::belowEqual, ext_cpuid8); 368 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 369 __ jccb(Assembler::below, ext_cpuid8); 370 // 371 // Extended cpuid(0x8000001E) 372 // 373 __ movl(rax, 0x8000001E); 374 __ cpuid(); 375 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 376 __ movl(Address(rsi, 0), rax); 377 __ movl(Address(rsi, 4), rbx); 378 __ movl(Address(rsi, 8), rcx); 379 __ movl(Address(rsi,12), rdx); 380 381 // 382 // Extended cpuid(0x80000008) 383 // 384 __ bind(ext_cpuid8); 385 __ movl(rax, 0x80000008); 386 __ cpuid(); 387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 388 __ movl(Address(rsi, 0), rax); 389 __ movl(Address(rsi, 4), rbx); 390 __ movl(Address(rsi, 8), rcx); 391 __ movl(Address(rsi,12), rdx); 392 393 // 394 // Extended cpuid(0x80000007) 395 // 396 __ bind(ext_cpuid7); 397 __ movl(rax, 0x80000007); 398 __ cpuid(); 399 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 400 __ movl(Address(rsi, 0), rax); 401 __ movl(Address(rsi, 4), rbx); 402 __ movl(Address(rsi, 8), rcx); 403 __ movl(Address(rsi,12), rdx); 404 405 // 406 // Extended cpuid(0x80000005) 407 // 408 __ bind(ext_cpuid5); 409 __ movl(rax, 0x80000005); 410 __ cpuid(); 411 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 412 __ movl(Address(rsi, 0), rax); 413 __ movl(Address(rsi, 4), rbx); 414 __ movl(Address(rsi, 8), rcx); 415 __ movl(Address(rsi,12), rdx); 416 417 // 418 // Extended cpuid(0x80000001) 419 // 420 __ bind(ext_cpuid1); 421 __ movl(rax, 0x80000001); 422 __ cpuid(); 423 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 424 __ movl(Address(rsi, 0), rax); 425 __ movl(Address(rsi, 4), rbx); 426 __ movl(Address(rsi, 8), rcx); 427 __ movl(Address(rsi,12), rdx); 428 429 // 430 // Check if OS has enabled XGETBV instruction to access XCR0 431 // (OSXSAVE feature flag) and CPU supports APX 432 // 433 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 434 // and XCRO[19] bit for OS support to save/restore extended GPR state. 435 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 436 __ movl(rax, 0x200000); 437 __ andl(rax, Address(rsi, 4)); 438 __ jcc(Assembler::equal, vector_save_restore); 439 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 440 __ movl(rax, 0x80000); 441 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 442 __ jcc(Assembler::equal, vector_save_restore); 443 444 bool save_apx = UseAPX; 445 VM_Version::set_apx_cpuFeatures(); 446 UseAPX = true; 447 __ mov64(r16, VM_Version::egpr_test_value()); 448 __ mov64(r31, VM_Version::egpr_test_value()); 449 __ xorl(rsi, rsi); 450 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 451 // Generate SEGV 452 __ movl(rax, Address(rsi, 0)); 453 454 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 455 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 456 __ movq(Address(rsi, 0), r16); 457 __ movq(Address(rsi, 8), r31); 458 459 UseAPX = save_apx; 460 __ bind(vector_save_restore); 461 // 462 // Check if OS has enabled XGETBV instruction to access XCR0 463 // (OSXSAVE feature flag) and CPU supports AVX 464 // 465 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 466 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 467 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 468 __ cmpl(rcx, 0x18000000); 469 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 470 471 __ movl(rax, 0x6); 472 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 473 __ cmpl(rax, 0x6); 474 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 475 476 // we need to bridge farther than imm8, so we use this island as a thunk 477 __ bind(done); 478 __ jmp(wrapup); 479 480 __ bind(start_simd_check); 481 // 482 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 483 // registers are not restored after a signal processing. 484 // Generate SEGV here (reference through null) 485 // and check upper YMM/ZMM bits after it. 486 // 487 int saved_useavx = UseAVX; 488 int saved_usesse = UseSSE; 489 490 // If UseAVX is uninitialized or is set by the user to include EVEX 491 if (use_evex) { 492 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 493 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10 494 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 495 __ movl(rax, 0x10000); 496 __ andl(rax, Address(rsi, 4)); 497 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 498 __ movl(rbx, 0x80000); 499 __ andl(rbx, Address(rsi, 4)); 500 __ orl(rax, rbx); 501 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported 502 // check _cpuid_info.xem_xcr0_eax.bits.opmask 503 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 504 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 505 __ movl(rax, 0xE0); 506 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 507 __ cmpl(rax, 0xE0); 508 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 509 510 if (FLAG_IS_DEFAULT(UseAVX)) { 511 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 512 __ movl(rax, Address(rsi, 0)); 513 __ cmpl(rax, 0x50654); // If it is Skylake 514 __ jcc(Assembler::equal, legacy_setup); 515 } 516 // EVEX setup: run in lowest evex mode 517 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 518 UseAVX = 3; 519 UseSSE = 2; 520 #ifdef _WINDOWS 521 // xmm5-xmm15 are not preserved by caller on windows 522 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 523 __ subptr(rsp, 64); 524 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 525 __ subptr(rsp, 64); 526 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 527 __ subptr(rsp, 64); 528 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 529 #endif // _WINDOWS 530 531 // load value into all 64 bytes of zmm7 register 532 __ movl(rcx, VM_Version::ymm_test_value()); 533 __ movdl(xmm0, rcx); 534 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 535 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 536 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 537 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 538 VM_Version::clean_cpuFeatures(); 539 __ jmp(save_restore_except); 540 } 541 542 __ bind(legacy_setup); 543 // AVX setup 544 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 545 UseAVX = 1; 546 UseSSE = 2; 547 #ifdef _WINDOWS 548 __ subptr(rsp, 32); 549 __ vmovdqu(Address(rsp, 0), xmm7); 550 __ subptr(rsp, 32); 551 __ vmovdqu(Address(rsp, 0), xmm8); 552 __ subptr(rsp, 32); 553 __ vmovdqu(Address(rsp, 0), xmm15); 554 #endif // _WINDOWS 555 556 // load value into all 32 bytes of ymm7 register 557 __ movl(rcx, VM_Version::ymm_test_value()); 558 559 __ movdl(xmm0, rcx); 560 __ pshufd(xmm0, xmm0, 0x00); 561 __ vinsertf128_high(xmm0, xmm0); 562 __ vmovdqu(xmm7, xmm0); 563 __ vmovdqu(xmm8, xmm0); 564 __ vmovdqu(xmm15, xmm0); 565 VM_Version::clean_cpuFeatures(); 566 567 __ bind(save_restore_except); 568 __ xorl(rsi, rsi); 569 VM_Version::set_cpuinfo_segv_addr(__ pc()); 570 // Generate SEGV 571 __ movl(rax, Address(rsi, 0)); 572 573 VM_Version::set_cpuinfo_cont_addr(__ pc()); 574 // Returns here after signal. Save xmm0 to check it later. 575 576 // If UseAVX is uninitialized or is set by the user to include EVEX 577 if (use_evex) { 578 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 579 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 580 __ movl(rax, 0x10000); 581 __ andl(rax, Address(rsi, 4)); 582 __ jcc(Assembler::equal, legacy_save_restore); 583 // check _cpuid_info.xem_xcr0_eax.bits.opmask 584 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 585 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 586 __ movl(rax, 0xE0); 587 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 588 __ cmpl(rax, 0xE0); 589 __ jcc(Assembler::notEqual, legacy_save_restore); 590 591 if (FLAG_IS_DEFAULT(UseAVX)) { 592 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 593 __ movl(rax, Address(rsi, 0)); 594 __ cmpl(rax, 0x50654); // If it is Skylake 595 __ jcc(Assembler::equal, legacy_save_restore); 596 } 597 // EVEX check: run in lowest evex mode 598 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 599 UseAVX = 3; 600 UseSSE = 2; 601 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 602 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 603 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 604 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 605 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 606 607 #ifdef _WINDOWS 608 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 609 __ addptr(rsp, 64); 610 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 611 __ addptr(rsp, 64); 612 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 613 __ addptr(rsp, 64); 614 #endif // _WINDOWS 615 generate_vzeroupper(wrapup); 616 VM_Version::clean_cpuFeatures(); 617 UseAVX = saved_useavx; 618 UseSSE = saved_usesse; 619 __ jmp(wrapup); 620 } 621 622 __ bind(legacy_save_restore); 623 // AVX check 624 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 625 UseAVX = 1; 626 UseSSE = 2; 627 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 628 __ vmovdqu(Address(rsi, 0), xmm0); 629 __ vmovdqu(Address(rsi, 32), xmm7); 630 __ vmovdqu(Address(rsi, 64), xmm8); 631 __ vmovdqu(Address(rsi, 96), xmm15); 632 633 #ifdef _WINDOWS 634 __ vmovdqu(xmm15, Address(rsp, 0)); 635 __ addptr(rsp, 32); 636 __ vmovdqu(xmm8, Address(rsp, 0)); 637 __ addptr(rsp, 32); 638 __ vmovdqu(xmm7, Address(rsp, 0)); 639 __ addptr(rsp, 32); 640 #endif // _WINDOWS 641 642 generate_vzeroupper(wrapup); 643 VM_Version::clean_cpuFeatures(); 644 UseAVX = saved_useavx; 645 UseSSE = saved_usesse; 646 647 __ bind(wrapup); 648 __ popf(); 649 __ pop(rsi); 650 __ pop(rbx); 651 __ pop(rbp); 652 __ ret(0); 653 654 # undef __ 655 656 return start; 657 }; 658 void generate_vzeroupper(Label& L_wrapup) { 659 # define __ _masm-> 660 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 661 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 662 __ jcc(Assembler::notEqual, L_wrapup); 663 __ movl(rcx, 0x0FFF0FF0); 664 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 665 __ andl(rcx, Address(rsi, 0)); 666 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 667 __ jcc(Assembler::equal, L_wrapup); 668 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 669 __ jcc(Assembler::equal, L_wrapup); 670 // vzeroupper() will use a pre-computed instruction sequence that we 671 // can't compute until after we've determined CPU capabilities. Use 672 // uncached variant here directly to be able to bootstrap correctly 673 __ vzeroupper_uncached(); 674 # undef __ 675 } 676 address generate_detect_virt() { 677 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 678 # define __ _masm-> 679 680 address start = __ pc(); 681 682 // Evacuate callee-saved registers 683 __ push(rbp); 684 __ push(rbx); 685 __ push(rsi); // for Windows 686 687 __ mov(rax, c_rarg0); // CPUID leaf 688 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 689 690 __ cpuid(); 691 692 // Store result to register array 693 __ movl(Address(rsi, 0), rax); 694 __ movl(Address(rsi, 4), rbx); 695 __ movl(Address(rsi, 8), rcx); 696 __ movl(Address(rsi, 12), rdx); 697 698 // Epilogue 699 __ pop(rsi); 700 __ pop(rbx); 701 __ pop(rbp); 702 __ ret(0); 703 704 # undef __ 705 706 return start; 707 }; 708 709 710 address generate_getCPUIDBrandString(void) { 711 // Flags to test CPU type. 712 const uint32_t HS_EFL_AC = 0x40000; 713 const uint32_t HS_EFL_ID = 0x200000; 714 // Values for when we don't have a CPUID instruction. 715 const int CPU_FAMILY_SHIFT = 8; 716 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 717 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 718 719 Label detect_486, cpu486, detect_586, done, ext_cpuid; 720 721 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 722 # define __ _masm-> 723 724 address start = __ pc(); 725 726 // 727 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 728 // 729 // rcx and rdx are first and second argument registers on windows 730 731 __ push(rbp); 732 __ mov(rbp, c_rarg0); // cpuid_info address 733 __ push(rbx); 734 __ push(rsi); 735 __ pushf(); // preserve rbx, and flags 736 __ pop(rax); 737 __ push(rax); 738 __ mov(rcx, rax); 739 // 740 // if we are unable to change the AC flag, we have a 386 741 // 742 __ xorl(rax, HS_EFL_AC); 743 __ push(rax); 744 __ popf(); 745 __ pushf(); 746 __ pop(rax); 747 __ cmpptr(rax, rcx); 748 __ jccb(Assembler::notEqual, detect_486); 749 750 __ movl(rax, CPU_FAMILY_386); 751 __ jmp(done); 752 753 // 754 // If we are unable to change the ID flag, we have a 486 which does 755 // not support the "cpuid" instruction. 756 // 757 __ bind(detect_486); 758 __ mov(rax, rcx); 759 __ xorl(rax, HS_EFL_ID); 760 __ push(rax); 761 __ popf(); 762 __ pushf(); 763 __ pop(rax); 764 __ cmpptr(rcx, rax); 765 __ jccb(Assembler::notEqual, detect_586); 766 767 __ bind(cpu486); 768 __ movl(rax, CPU_FAMILY_486); 769 __ jmp(done); 770 771 // 772 // At this point, we have a chip which supports the "cpuid" instruction 773 // 774 __ bind(detect_586); 775 __ xorl(rax, rax); 776 __ cpuid(); 777 __ orl(rax, rax); 778 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 779 // value of at least 1, we give up and 780 // assume a 486 781 782 // 783 // Extended cpuid(0x80000000) for processor brand string detection 784 // 785 __ bind(ext_cpuid); 786 __ movl(rax, CPUID_EXTENDED_FN); 787 __ cpuid(); 788 __ cmpl(rax, CPUID_EXTENDED_FN_4); 789 __ jcc(Assembler::below, done); 790 791 // 792 // Extended cpuid(0x80000002) // first 16 bytes in brand string 793 // 794 __ movl(rax, CPUID_EXTENDED_FN_2); 795 __ cpuid(); 796 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 797 __ movl(Address(rsi, 0), rax); 798 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 799 __ movl(Address(rsi, 0), rbx); 800 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 801 __ movl(Address(rsi, 0), rcx); 802 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 803 __ movl(Address(rsi,0), rdx); 804 805 // 806 // Extended cpuid(0x80000003) // next 16 bytes in brand string 807 // 808 __ movl(rax, CPUID_EXTENDED_FN_3); 809 __ cpuid(); 810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 811 __ movl(Address(rsi, 0), rax); 812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 813 __ movl(Address(rsi, 0), rbx); 814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 815 __ movl(Address(rsi, 0), rcx); 816 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 817 __ movl(Address(rsi,0), rdx); 818 819 // 820 // Extended cpuid(0x80000004) // last 16 bytes in brand string 821 // 822 __ movl(rax, CPUID_EXTENDED_FN_4); 823 __ cpuid(); 824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 825 __ movl(Address(rsi, 0), rax); 826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 827 __ movl(Address(rsi, 0), rbx); 828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 829 __ movl(Address(rsi, 0), rcx); 830 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 831 __ movl(Address(rsi,0), rdx); 832 833 // 834 // return 835 // 836 __ bind(done); 837 __ popf(); 838 __ pop(rsi); 839 __ pop(rbx); 840 __ pop(rbp); 841 __ ret(0); 842 843 # undef __ 844 845 return start; 846 }; 847 }; 848 849 void VM_Version::get_processor_features() { 850 851 _cpu = 4; // 486 by default 852 _model = 0; 853 _stepping = 0; 854 _logical_processors_per_package = 1; 855 // i486 internal cache is both I&D and has a 16-byte line size 856 _L1_data_cache_line_size = 16; 857 858 // Get raw processor info 859 860 get_cpu_info_stub(&_cpuid_info); 861 862 assert_is_initialized(); 863 _cpu = extended_cpu_family(); 864 _model = extended_cpu_model(); 865 _stepping = cpu_stepping(); 866 867 if (cpu_family() > 4) { // it supports CPUID 868 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 869 _cpu_features = _features; // Preserve features 870 // Logical processors are only available on P4s and above, 871 // and only if hyperthreading is available. 872 _logical_processors_per_package = logical_processor_count(); 873 _L1_data_cache_line_size = L1_line_size(); 874 } 875 876 // xchg and xadd instructions 877 _supports_atomic_getset4 = true; 878 _supports_atomic_getadd4 = true; 879 _supports_atomic_getset8 = true; 880 _supports_atomic_getadd8 = true; 881 882 // OS should support SSE for x64 and hardware should support at least SSE2. 883 if (!VM_Version::supports_sse2()) { 884 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 885 } 886 // in 64 bit the use of SSE2 is the minimum 887 if (UseSSE < 2) UseSSE = 2; 888 889 // flush_icache_stub have to be generated first. 890 // That is why Icache line size is hard coded in ICache class, 891 // see icache_x86.hpp. It is also the reason why we can't use 892 // clflush instruction in 32-bit VM since it could be running 893 // on CPU which does not support it. 894 // 895 // The only thing we can do is to verify that flushed 896 // ICache::line_size has correct value. 897 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 898 // clflush_size is size in quadwords (8 bytes). 899 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 900 901 // assigning this field effectively enables Unsafe.writebackMemory() 902 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 903 // that is only implemented on x86_64 and only if the OS plays ball 904 if (os::supports_map_sync()) { 905 // publish data cache line flush size to generic field, otherwise 906 // let if default to zero thereby disabling writeback 907 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 908 } 909 910 // Check if processor has Intel Ecore 911 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() && 912 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 913 _model == 0xCC || _model == 0xDD)) { 914 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 915 } 916 917 if (UseSSE < 4) { 918 _features.clear_feature(CPU_SSE4_1); 919 _features.clear_feature(CPU_SSE4_2); 920 } 921 922 if (UseSSE < 3) { 923 _features.clear_feature(CPU_SSE3); 924 _features.clear_feature(CPU_SSSE3); 925 _features.clear_feature(CPU_SSE4A); 926 } 927 928 if (UseSSE < 2) 929 _features.clear_feature(CPU_SSE2); 930 931 if (UseSSE < 1) 932 _features.clear_feature(CPU_SSE); 933 934 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 935 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 936 UseAVX = 0; 937 } 938 939 // UseSSE is set to the smaller of what hardware supports and what 940 // the command line requires. I.e., you cannot set UseSSE to 2 on 941 // older Pentiums which do not support it. 942 int use_sse_limit = 0; 943 if (UseSSE > 0) { 944 if (UseSSE > 3 && supports_sse4_1()) { 945 use_sse_limit = 4; 946 } else if (UseSSE > 2 && supports_sse3()) { 947 use_sse_limit = 3; 948 } else if (UseSSE > 1 && supports_sse2()) { 949 use_sse_limit = 2; 950 } else if (UseSSE > 0 && supports_sse()) { 951 use_sse_limit = 1; 952 } else { 953 use_sse_limit = 0; 954 } 955 } 956 if (FLAG_IS_DEFAULT(UseSSE)) { 957 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 958 } else if (UseSSE > use_sse_limit) { 959 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 960 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 961 } 962 963 // first try initial setting and detect what we can support 964 int use_avx_limit = 0; 965 if (UseAVX > 0) { 966 if (UseSSE < 4) { 967 // Don't use AVX if SSE is unavailable or has been disabled. 968 use_avx_limit = 0; 969 } else if (UseAVX > 2 && supports_evex()) { 970 use_avx_limit = 3; 971 } else if (UseAVX > 1 && supports_avx2()) { 972 use_avx_limit = 2; 973 } else if (UseAVX > 0 && supports_avx()) { 974 use_avx_limit = 1; 975 } else { 976 use_avx_limit = 0; 977 } 978 } 979 if (FLAG_IS_DEFAULT(UseAVX)) { 980 // Don't use AVX-512 on older Skylakes unless explicitly requested. 981 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 982 FLAG_SET_DEFAULT(UseAVX, 2); 983 } else { 984 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 985 } 986 } 987 988 if (UseAVX > use_avx_limit) { 989 if (UseSSE < 4) { 990 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 991 } else { 992 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 993 } 994 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 995 } 996 997 if (UseAVX < 3) { 998 _features.clear_feature(CPU_AVX512F); 999 _features.clear_feature(CPU_AVX512DQ); 1000 _features.clear_feature(CPU_AVX512CD); 1001 _features.clear_feature(CPU_AVX512BW); 1002 _features.clear_feature(CPU_AVX512ER); 1003 _features.clear_feature(CPU_AVX512PF); 1004 _features.clear_feature(CPU_AVX512VL); 1005 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1006 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1007 _features.clear_feature(CPU_AVX512_VAES); 1008 _features.clear_feature(CPU_AVX512_VNNI); 1009 _features.clear_feature(CPU_AVX512_VBMI); 1010 _features.clear_feature(CPU_AVX512_VBMI2); 1011 _features.clear_feature(CPU_AVX512_BITALG); 1012 _features.clear_feature(CPU_AVX512_IFMA); 1013 _features.clear_feature(CPU_APX_F); 1014 _features.clear_feature(CPU_AVX512_FP16); 1015 _features.clear_feature(CPU_AVX10_1); 1016 _features.clear_feature(CPU_AVX10_2); 1017 } 1018 1019 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1020 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1021 if (UseAPX && !apx_supported) { 1022 warning("UseAPX is not supported on this CPU, setting it to false"); 1023 FLAG_SET_DEFAULT(UseAPX, false); 1024 } 1025 1026 if (!UseAPX) { 1027 _features.clear_feature(CPU_APX_F); 1028 } 1029 1030 if (UseAVX < 2) { 1031 _features.clear_feature(CPU_AVX2); 1032 _features.clear_feature(CPU_AVX_IFMA); 1033 } 1034 1035 if (UseAVX < 1) { 1036 _features.clear_feature(CPU_AVX); 1037 _features.clear_feature(CPU_VZEROUPPER); 1038 _features.clear_feature(CPU_F16C); 1039 _features.clear_feature(CPU_SHA512); 1040 } 1041 1042 if (logical_processors_per_package() == 1) { 1043 // HT processor could be installed on a system which doesn't support HT. 1044 _features.clear_feature(CPU_HT); 1045 } 1046 1047 if (is_intel()) { // Intel cpus specific settings 1048 if (is_knights_family()) { 1049 _features.clear_feature(CPU_VZEROUPPER); 1050 _features.clear_feature(CPU_AVX512BW); 1051 _features.clear_feature(CPU_AVX512VL); 1052 _features.clear_feature(CPU_AVX512DQ); 1053 _features.clear_feature(CPU_AVX512_VNNI); 1054 _features.clear_feature(CPU_AVX512_VAES); 1055 _features.clear_feature(CPU_AVX512_VPOPCNTDQ); 1056 _features.clear_feature(CPU_AVX512_VPCLMULQDQ); 1057 _features.clear_feature(CPU_AVX512_VBMI); 1058 _features.clear_feature(CPU_AVX512_VBMI2); 1059 _features.clear_feature(CPU_CLWB); 1060 _features.clear_feature(CPU_FLUSHOPT); 1061 _features.clear_feature(CPU_GFNI); 1062 _features.clear_feature(CPU_AVX512_BITALG); 1063 _features.clear_feature(CPU_AVX512_IFMA); 1064 _features.clear_feature(CPU_AVX_IFMA); 1065 _features.clear_feature(CPU_AVX512_FP16); 1066 _features.clear_feature(CPU_AVX10_1); 1067 _features.clear_feature(CPU_AVX10_2); 1068 } 1069 } 1070 1071 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1072 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1073 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum); 1074 } else { 1075 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1076 } 1077 1078 assert(supports_clflush(), "Always present"); 1079 if (X86ICacheSync == -1) { 1080 // Auto-detect, choosing the best performant one that still flushes 1081 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. 1082 if (supports_clwb()) { 1083 FLAG_SET_ERGO(X86ICacheSync, 3); 1084 } else if (supports_clflushopt()) { 1085 FLAG_SET_ERGO(X86ICacheSync, 2); 1086 } else { 1087 FLAG_SET_ERGO(X86ICacheSync, 1); 1088 } 1089 } else { 1090 if ((X86ICacheSync == 2) && !supports_clflushopt()) { 1091 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); 1092 } 1093 if ((X86ICacheSync == 3) && !supports_clwb()) { 1094 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); 1095 } 1096 if ((X86ICacheSync == 5) && !supports_serialize()) { 1097 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); 1098 } 1099 } 1100 1101 stringStream ss(2048); 1102 ss.print("(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1103 cores_per_cpu(), threads_per_core(), 1104 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1105 ss.print(", "); 1106 int features_offset = (int)ss.size(); 1107 insert_features_names(_features, ss); 1108 1109 _cpu_info_string = ss.as_string(true); 1110 _features_string = _cpu_info_string + features_offset; 1111 1112 // Use AES instructions if available. 1113 if (supports_aes()) { 1114 if (FLAG_IS_DEFAULT(UseAES)) { 1115 FLAG_SET_DEFAULT(UseAES, true); 1116 } 1117 if (!UseAES) { 1118 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1119 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1120 } 1121 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1122 } else { 1123 if (UseSSE > 2) { 1124 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1125 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1126 } 1127 } else { 1128 // The AES intrinsic stubs require AES instruction support (of course) 1129 // but also require sse3 mode or higher for instructions it use. 1130 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1131 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1132 } 1133 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1134 } 1135 1136 // --AES-CTR begins-- 1137 if (!UseAESIntrinsics) { 1138 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1139 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1140 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1141 } 1142 } else { 1143 if (supports_sse4_1()) { 1144 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1145 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1146 } 1147 } else { 1148 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1149 // but also require sse4.1 mode or higher for instructions it use. 1150 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1151 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1152 } 1153 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1154 } 1155 } 1156 // --AES-CTR ends-- 1157 } 1158 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1159 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1160 warning("AES instructions are not available on this CPU"); 1161 FLAG_SET_DEFAULT(UseAES, false); 1162 } 1163 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1164 warning("AES intrinsics are not available on this CPU"); 1165 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1166 } 1167 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1168 warning("AES-CTR intrinsics are not available on this CPU"); 1169 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1170 } 1171 } 1172 1173 // Use CLMUL instructions if available. 1174 if (supports_clmul()) { 1175 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1176 UseCLMUL = true; 1177 } 1178 } else if (UseCLMUL) { 1179 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1180 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1181 FLAG_SET_DEFAULT(UseCLMUL, false); 1182 } 1183 1184 if (UseCLMUL && (UseSSE > 2)) { 1185 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1186 UseCRC32Intrinsics = true; 1187 } 1188 } else if (UseCRC32Intrinsics) { 1189 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1190 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1191 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1192 } 1193 1194 if (supports_avx2()) { 1195 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1196 UseAdler32Intrinsics = true; 1197 } 1198 } else if (UseAdler32Intrinsics) { 1199 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1200 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1201 } 1202 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1203 } 1204 1205 if (supports_sse4_2() && supports_clmul()) { 1206 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1207 UseCRC32CIntrinsics = true; 1208 } 1209 } else if (UseCRC32CIntrinsics) { 1210 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1211 warning("CRC32C intrinsics are not available on this CPU"); 1212 } 1213 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1214 } 1215 1216 // GHASH/GCM intrinsics 1217 if (UseCLMUL && (UseSSE > 2)) { 1218 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1219 UseGHASHIntrinsics = true; 1220 } 1221 } else if (UseGHASHIntrinsics) { 1222 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1223 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1224 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1225 } 1226 1227 // ChaCha20 Intrinsics 1228 // As long as the system supports AVX as a baseline we can do a 1229 // SIMD-enabled block function. StubGenerator makes the determination 1230 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1231 // version. 1232 if (UseAVX >= 1) { 1233 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1234 UseChaCha20Intrinsics = true; 1235 } 1236 } else if (UseChaCha20Intrinsics) { 1237 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1238 warning("ChaCha20 intrinsic requires AVX instructions"); 1239 } 1240 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1241 } 1242 1243 // Kyber Intrinsics 1244 // Currently we only have them for AVX512 1245 #ifdef _LP64 1246 if (supports_evex() && supports_avx512bw()) { 1247 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) { 1248 UseKyberIntrinsics = true; 1249 } 1250 } else 1251 #endif 1252 if (UseKyberIntrinsics) { 1253 warning("Intrinsics for ML-KEM are not available on this CPU."); 1254 FLAG_SET_DEFAULT(UseKyberIntrinsics, false); 1255 } 1256 1257 // Dilithium Intrinsics 1258 // Currently we only have them for AVX512 1259 if (supports_evex() && supports_avx512bw()) { 1260 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { 1261 UseDilithiumIntrinsics = true; 1262 } 1263 } else if (UseDilithiumIntrinsics) { 1264 warning("Intrinsics for ML-DSA are not available on this CPU."); 1265 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); 1266 } 1267 1268 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1269 if (UseAVX >= 2) { 1270 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1271 UseBASE64Intrinsics = true; 1272 } 1273 } else if (UseBASE64Intrinsics) { 1274 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1275 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1276 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1277 } 1278 1279 if (supports_fma()) { 1280 if (FLAG_IS_DEFAULT(UseFMA)) { 1281 UseFMA = true; 1282 } 1283 } else if (UseFMA) { 1284 warning("FMA instructions are not available on this CPU"); 1285 FLAG_SET_DEFAULT(UseFMA, false); 1286 } 1287 1288 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1289 UseMD5Intrinsics = true; 1290 } 1291 1292 if (supports_sha() || (supports_avx2() && supports_bmi2())) { 1293 if (FLAG_IS_DEFAULT(UseSHA)) { 1294 UseSHA = true; 1295 } 1296 } else if (UseSHA) { 1297 warning("SHA instructions are not available on this CPU"); 1298 FLAG_SET_DEFAULT(UseSHA, false); 1299 } 1300 1301 if (supports_sha() && supports_sse4_1() && UseSHA) { 1302 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1303 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1304 } 1305 } else if (UseSHA1Intrinsics) { 1306 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1307 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1308 } 1309 1310 if (supports_sse4_1() && UseSHA) { 1311 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1312 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1313 } 1314 } else if (UseSHA256Intrinsics) { 1315 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1316 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1317 } 1318 1319 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1320 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1321 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1322 } 1323 } else if (UseSHA512Intrinsics) { 1324 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1325 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1326 } 1327 1328 if (supports_evex() && supports_avx512bw()) { 1329 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1330 UseSHA3Intrinsics = true; 1331 } 1332 } else if (UseSHA3Intrinsics) { 1333 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1334 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1335 } 1336 1337 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1338 FLAG_SET_DEFAULT(UseSHA, false); 1339 } 1340 1341 #if COMPILER2_OR_JVMCI 1342 int max_vector_size = 0; 1343 if (UseAVX == 0 || !os_supports_avx_vectors()) { 1344 // 16 byte vectors (in XMM) are supported with SSE2+ 1345 max_vector_size = 16; 1346 } else if (UseAVX == 1 || UseAVX == 2) { 1347 // 32 bytes vectors (in YMM) are only supported with AVX+ 1348 max_vector_size = 32; 1349 } else if (UseAVX > 2) { 1350 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1351 max_vector_size = 64; 1352 } 1353 1354 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1355 1356 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1357 if (MaxVectorSize < min_vector_size) { 1358 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1359 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1360 } 1361 if (MaxVectorSize > max_vector_size) { 1362 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1363 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1364 } 1365 if (!is_power_of_2(MaxVectorSize)) { 1366 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1367 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1368 } 1369 } else { 1370 // If default, use highest supported configuration 1371 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1372 } 1373 1374 #if defined(COMPILER2) && defined(ASSERT) 1375 if (MaxVectorSize > 0) { 1376 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1377 tty->print_cr("State of YMM registers after signal handle:"); 1378 int nreg = 4; 1379 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1380 for (int i = 0; i < nreg; i++) { 1381 tty->print("YMM%s:", ymm_name[i]); 1382 for (int j = 7; j >=0; j--) { 1383 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1384 } 1385 tty->cr(); 1386 } 1387 } 1388 } 1389 #endif // COMPILER2 && ASSERT 1390 1391 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1392 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1393 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1394 } 1395 } else if (UsePoly1305Intrinsics) { 1396 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1397 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1398 } 1399 1400 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1401 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1402 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1403 } 1404 } else if (UseIntPolyIntrinsics) { 1405 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1406 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1407 } 1408 1409 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1410 UseMultiplyToLenIntrinsic = true; 1411 } 1412 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1413 UseSquareToLenIntrinsic = true; 1414 } 1415 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1416 UseMulAddIntrinsic = true; 1417 } 1418 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1419 UseMontgomeryMultiplyIntrinsic = true; 1420 } 1421 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1422 UseMontgomerySquareIntrinsic = true; 1423 } 1424 #endif // COMPILER2_OR_JVMCI 1425 1426 // On new cpus instructions which update whole XMM register should be used 1427 // to prevent partial register stall due to dependencies on high half. 1428 // 1429 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1430 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1431 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1432 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1433 1434 1435 if (is_zx()) { // ZX cpus specific settings 1436 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1437 UseStoreImmI16 = false; // don't use it on ZX cpus 1438 } 1439 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1440 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1441 // Use it on all ZX cpus 1442 UseAddressNop = true; 1443 } 1444 } 1445 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1446 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1447 } 1448 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1449 if (supports_sse3()) { 1450 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1451 } else { 1452 UseXmmRegToRegMoveAll = false; 1453 } 1454 } 1455 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1456 #ifdef COMPILER2 1457 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1458 // For new ZX cpus do the next optimization: 1459 // don't align the beginning of a loop if there are enough instructions 1460 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1461 // in current fetch line (OptoLoopAlignment) or the padding 1462 // is big (> MaxLoopPad). 1463 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1464 // generated NOP instructions. 11 is the largest size of one 1465 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1466 MaxLoopPad = 11; 1467 } 1468 #endif // COMPILER2 1469 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1470 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1471 } 1472 if (supports_sse4_2()) { // new ZX cpus 1473 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1474 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1475 } 1476 } 1477 } 1478 1479 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1480 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1481 } 1482 } 1483 1484 if (is_amd_family()) { // AMD cpus specific settings 1485 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1486 // Use it on new AMD cpus starting from Opteron. 1487 UseAddressNop = true; 1488 } 1489 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1490 // Use it on new AMD cpus starting from Opteron. 1491 UseNewLongLShift = true; 1492 } 1493 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1494 if (supports_sse4a()) { 1495 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1496 } else { 1497 UseXmmLoadAndClearUpper = false; 1498 } 1499 } 1500 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1501 if (supports_sse4a()) { 1502 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1503 } else { 1504 UseXmmRegToRegMoveAll = false; 1505 } 1506 } 1507 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1508 if (supports_sse4a()) { 1509 UseXmmI2F = true; 1510 } else { 1511 UseXmmI2F = false; 1512 } 1513 } 1514 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1515 if (supports_sse4a()) { 1516 UseXmmI2D = true; 1517 } else { 1518 UseXmmI2D = false; 1519 } 1520 } 1521 1522 // some defaults for AMD family 15h 1523 if (cpu_family() == 0x15) { 1524 // On family 15h processors default is no sw prefetch 1525 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1526 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1527 } 1528 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1529 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1530 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1531 } 1532 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1533 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1534 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1535 } 1536 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1537 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1538 } 1539 } 1540 1541 #ifdef COMPILER2 1542 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1543 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1544 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1545 } 1546 #endif // COMPILER2 1547 1548 // Some defaults for AMD family >= 17h && Hygon family 18h 1549 if (cpu_family() >= 0x17) { 1550 // On family >=17h processors use XMM and UnalignedLoadStores 1551 // for Array Copy 1552 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1553 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1554 } 1555 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1556 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1557 } 1558 #ifdef COMPILER2 1559 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1560 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1561 } 1562 #endif 1563 } 1564 } 1565 1566 if (is_intel()) { // Intel cpus specific settings 1567 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1568 UseStoreImmI16 = false; // don't use it on Intel cpus 1569 } 1570 if (is_intel_server_family() || cpu_family() == 15) { 1571 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1572 // Use it on all Intel cpus starting from PentiumPro 1573 UseAddressNop = true; 1574 } 1575 } 1576 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1577 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1578 } 1579 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1580 if (supports_sse3()) { 1581 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1582 } else { 1583 UseXmmRegToRegMoveAll = false; 1584 } 1585 } 1586 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus 1587 #ifdef COMPILER2 1588 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1589 // For new Intel cpus do the next optimization: 1590 // don't align the beginning of a loop if there are enough instructions 1591 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1592 // in current fetch line (OptoLoopAlignment) or the padding 1593 // is big (> MaxLoopPad). 1594 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1595 // generated NOP instructions. 11 is the largest size of one 1596 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1597 MaxLoopPad = 11; 1598 } 1599 #endif // COMPILER2 1600 1601 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1602 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1603 } 1604 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1605 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1606 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1607 } 1608 } 1609 } 1610 if (is_atom_family() || is_knights_family()) { 1611 #ifdef COMPILER2 1612 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1613 OptoScheduling = true; 1614 } 1615 #endif 1616 if (supports_sse4_2()) { // Silvermont 1617 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1618 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1619 } 1620 } 1621 if (FLAG_IS_DEFAULT(UseIncDec)) { 1622 FLAG_SET_DEFAULT(UseIncDec, false); 1623 } 1624 } 1625 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1626 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1627 } 1628 #ifdef COMPILER2 1629 if (UseAVX > 2) { 1630 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1631 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1632 ArrayOperationPartialInlineSize != 0 && 1633 ArrayOperationPartialInlineSize != 16 && 1634 ArrayOperationPartialInlineSize != 32 && 1635 ArrayOperationPartialInlineSize != 64)) { 1636 int inline_size = 0; 1637 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1638 inline_size = 64; 1639 } else if (MaxVectorSize >= 32) { 1640 inline_size = 32; 1641 } else if (MaxVectorSize >= 16) { 1642 inline_size = 16; 1643 } 1644 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1645 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1646 } 1647 ArrayOperationPartialInlineSize = inline_size; 1648 } 1649 1650 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1651 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1652 if (ArrayOperationPartialInlineSize) { 1653 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1654 } else { 1655 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1656 } 1657 } 1658 } 1659 #endif 1660 } 1661 1662 #ifdef COMPILER2 1663 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1664 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1665 OptimizeFill = false; 1666 } 1667 } 1668 #endif 1669 if (supports_sse4_2()) { 1670 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1671 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1672 } 1673 } else { 1674 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1675 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1676 } 1677 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1678 } 1679 if (UseSSE42Intrinsics) { 1680 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1681 UseVectorizedMismatchIntrinsic = true; 1682 } 1683 } else if (UseVectorizedMismatchIntrinsic) { 1684 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1685 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1686 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1687 } 1688 if (UseAVX >= 2) { 1689 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1690 } else if (UseVectorizedHashCodeIntrinsic) { 1691 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1692 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1693 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1694 } 1695 1696 // Use count leading zeros count instruction if available. 1697 if (supports_lzcnt()) { 1698 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1699 UseCountLeadingZerosInstruction = true; 1700 } 1701 } else if (UseCountLeadingZerosInstruction) { 1702 warning("lzcnt instruction is not available on this CPU"); 1703 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1704 } 1705 1706 // Use count trailing zeros instruction if available 1707 if (supports_bmi1()) { 1708 // tzcnt does not require VEX prefix 1709 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1710 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1711 // Don't use tzcnt if BMI1 is switched off on command line. 1712 UseCountTrailingZerosInstruction = false; 1713 } else { 1714 UseCountTrailingZerosInstruction = true; 1715 } 1716 } 1717 } else if (UseCountTrailingZerosInstruction) { 1718 warning("tzcnt instruction is not available on this CPU"); 1719 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1720 } 1721 1722 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1723 // VEX prefix is generated only when AVX > 0. 1724 if (supports_bmi1() && supports_avx()) { 1725 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1726 UseBMI1Instructions = true; 1727 } 1728 } else if (UseBMI1Instructions) { 1729 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1730 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1731 } 1732 1733 if (supports_bmi2() && supports_avx()) { 1734 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1735 UseBMI2Instructions = true; 1736 } 1737 } else if (UseBMI2Instructions) { 1738 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1739 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1740 } 1741 1742 // Use population count instruction if available. 1743 if (supports_popcnt()) { 1744 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1745 UsePopCountInstruction = true; 1746 } 1747 } else if (UsePopCountInstruction) { 1748 warning("POPCNT instruction is not available on this CPU"); 1749 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1750 } 1751 1752 // Use fast-string operations if available. 1753 if (supports_erms()) { 1754 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1755 UseFastStosb = true; 1756 } 1757 } else if (UseFastStosb) { 1758 warning("fast-string operations are not available on this CPU"); 1759 FLAG_SET_DEFAULT(UseFastStosb, false); 1760 } 1761 1762 // For AMD Processors use XMM/YMM MOVDQU instructions 1763 // for Object Initialization as default 1764 if (is_amd() && cpu_family() >= 0x19) { 1765 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1766 UseFastStosb = false; 1767 } 1768 } 1769 1770 #ifdef COMPILER2 1771 if (is_intel() && MaxVectorSize > 16) { 1772 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1773 UseFastStosb = false; 1774 } 1775 } 1776 #endif 1777 1778 // Use XMM/YMM MOVDQU instruction for Object Initialization 1779 if (UseUnalignedLoadStores) { 1780 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1781 UseXMMForObjInit = true; 1782 } 1783 } else if (UseXMMForObjInit) { 1784 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1785 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1786 } 1787 1788 #ifdef COMPILER2 1789 if (FLAG_IS_DEFAULT(AlignVector)) { 1790 // Modern processors allow misaligned memory operations for vectors. 1791 AlignVector = !UseUnalignedLoadStores; 1792 } 1793 #endif // COMPILER2 1794 1795 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1796 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1797 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1798 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1799 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1800 } 1801 } 1802 1803 // Allocation prefetch settings 1804 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1805 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1806 (cache_line_size > AllocatePrefetchStepSize)) { 1807 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1808 } 1809 1810 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1811 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1812 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1813 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1814 } 1815 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1816 } 1817 1818 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1819 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1820 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1821 } 1822 1823 if (is_intel() && is_intel_server_family() && supports_sse3()) { 1824 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1825 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1826 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1827 } 1828 #ifdef COMPILER2 1829 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1830 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1831 } 1832 #endif 1833 } 1834 1835 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1836 #ifdef COMPILER2 1837 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1838 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1839 } 1840 #endif 1841 } 1842 1843 // Prefetch settings 1844 1845 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1846 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1847 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1848 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1849 1850 // gc copy/scan is disabled if prefetchw isn't supported, because 1851 // Prefetch::write emits an inlined prefetchw on Linux. 1852 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1853 // The used prefetcht0 instruction works for both amd64 and em64t. 1854 1855 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1856 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1857 } 1858 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1859 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1860 } 1861 1862 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1863 (cache_line_size > ContendedPaddingWidth)) 1864 ContendedPaddingWidth = cache_line_size; 1865 1866 // This machine allows unaligned memory accesses 1867 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1868 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1869 } 1870 1871 #ifndef PRODUCT 1872 if (log_is_enabled(Info, os, cpu)) { 1873 LogStream ls(Log(os, cpu)::info()); 1874 outputStream* log = &ls; 1875 log->print_cr("Logical CPUs per core: %u", 1876 logical_processors_per_package()); 1877 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1878 log->print("UseSSE=%d", UseSSE); 1879 if (UseAVX > 0) { 1880 log->print(" UseAVX=%d", UseAVX); 1881 } 1882 if (UseAES) { 1883 log->print(" UseAES=1"); 1884 } 1885 #ifdef COMPILER2 1886 if (MaxVectorSize > 0) { 1887 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1888 } 1889 #endif 1890 log->cr(); 1891 log->print("Allocation"); 1892 if (AllocatePrefetchStyle <= 0) { 1893 log->print_cr(": no prefetching"); 1894 } else { 1895 log->print(" prefetching: "); 1896 if (AllocatePrefetchInstr == 0) { 1897 log->print("PREFETCHNTA"); 1898 } else if (AllocatePrefetchInstr == 1) { 1899 log->print("PREFETCHT0"); 1900 } else if (AllocatePrefetchInstr == 2) { 1901 log->print("PREFETCHT2"); 1902 } else if (AllocatePrefetchInstr == 3) { 1903 log->print("PREFETCHW"); 1904 } 1905 if (AllocatePrefetchLines > 1) { 1906 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1907 } else { 1908 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1909 } 1910 } 1911 1912 if (PrefetchCopyIntervalInBytes > 0) { 1913 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1914 } 1915 if (PrefetchScanIntervalInBytes > 0) { 1916 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1917 } 1918 if (ContendedPaddingWidth > 0) { 1919 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1920 } 1921 } 1922 #endif // !PRODUCT 1923 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1924 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1925 } 1926 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1927 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1928 } 1929 } 1930 1931 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1932 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1933 if (vrt == XenHVM) { 1934 st->print_cr("Xen hardware-assisted virtualization detected"); 1935 } else if (vrt == KVM) { 1936 st->print_cr("KVM virtualization detected"); 1937 } else if (vrt == VMWare) { 1938 st->print_cr("VMWare virtualization detected"); 1939 VirtualizationSupport::print_virtualization_info(st); 1940 } else if (vrt == HyperV) { 1941 st->print_cr("Hyper-V virtualization detected"); 1942 } else if (vrt == HyperVRole) { 1943 st->print_cr("Hyper-V role detected"); 1944 } 1945 } 1946 1947 bool VM_Version::compute_has_intel_jcc_erratum() { 1948 if (!is_intel_family_core()) { 1949 // Only Intel CPUs are affected. 1950 return false; 1951 } 1952 // The following table of affected CPUs is based on the following document released by Intel: 1953 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1954 switch (_model) { 1955 case 0x8E: 1956 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1957 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1958 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1959 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1960 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1961 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1962 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1963 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1964 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1965 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1966 case 0x4E: 1967 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1968 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1969 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1970 return _stepping == 0x3; 1971 case 0x55: 1972 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1973 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1974 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1975 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1976 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1977 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1978 return _stepping == 0x4 || _stepping == 0x7; 1979 case 0x5E: 1980 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1981 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1982 return _stepping == 0x3; 1983 case 0x9E: 1984 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 1985 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 1986 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 1987 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 1988 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 1989 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 1990 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 1991 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 1992 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 1993 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 1994 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 1995 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 1996 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 1997 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 1998 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 1999 case 0xA5: 2000 // Not in Intel documentation. 2001 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2002 return true; 2003 case 0xA6: 2004 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2005 return _stepping == 0x0; 2006 case 0xAE: 2007 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2008 return _stepping == 0xA; 2009 default: 2010 // If we are running on another intel machine not recognized in the table, we are okay. 2011 return false; 2012 } 2013 } 2014 2015 // On Xen, the cpuid instruction returns 2016 // eax / registers[0]: Version of Xen 2017 // ebx / registers[1]: chars 'XenV' 2018 // ecx / registers[2]: chars 'MMXe' 2019 // edx / registers[3]: chars 'nVMM' 2020 // 2021 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2022 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2023 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2024 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2025 // 2026 // more information : 2027 // https://kb.vmware.com/s/article/1009458 2028 // 2029 void VM_Version::check_virtualizations() { 2030 uint32_t registers[4] = {0}; 2031 char signature[13] = {0}; 2032 2033 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2034 // from 0x40000000 until 0x40010000. 2035 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2036 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2037 detect_virt_stub(leaf, registers); 2038 memcpy(signature, ®isters[1], 12); 2039 2040 if (strncmp("VMwareVMware", signature, 12) == 0) { 2041 Abstract_VM_Version::_detected_virtualization = VMWare; 2042 // check for extended metrics from guestlib 2043 VirtualizationSupport::initialize(); 2044 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2045 Abstract_VM_Version::_detected_virtualization = HyperV; 2046 #ifdef _WINDOWS 2047 // CPUID leaf 0x40000007 is available to the root partition only. 2048 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2049 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2050 detect_virt_stub(0x40000007, registers); 2051 if ((registers[0] != 0x0) || 2052 (registers[1] != 0x0) || 2053 (registers[2] != 0x0) || 2054 (registers[3] != 0x0)) { 2055 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2056 } 2057 #endif 2058 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2059 Abstract_VM_Version::_detected_virtualization = KVM; 2060 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2061 Abstract_VM_Version::_detected_virtualization = XenHVM; 2062 } 2063 } 2064 } 2065 2066 #ifdef COMPILER2 2067 // Determine if it's running on Cascade Lake using default options. 2068 bool VM_Version::is_default_intel_cascade_lake() { 2069 return FLAG_IS_DEFAULT(UseAVX) && 2070 FLAG_IS_DEFAULT(MaxVectorSize) && 2071 UseAVX > 2 && 2072 is_intel_cascade_lake(); 2073 } 2074 #endif 2075 2076 bool VM_Version::is_intel_cascade_lake() { 2077 return is_intel_skylake() && _stepping >= 5; 2078 } 2079 2080 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2081 // for implementing the array copy and clear operations. 2082 // The Intel platforms that supports the serialize instruction 2083 // has improved implementation of 64-byte load/stores and so the default 2084 // threshold is set to 0 for these platforms. 2085 int VM_Version::avx3_threshold() { 2086 return (is_intel_server_family() && 2087 supports_serialize() && 2088 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2089 } 2090 2091 void VM_Version::clear_apx_test_state() { 2092 clear_apx_test_state_stub(); 2093 } 2094 2095 static bool _vm_version_initialized = false; 2096 2097 void VM_Version::initialize() { 2098 ResourceMark rm; 2099 2100 // Making this stub must be FIRST use of assembler 2101 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2102 if (stub_blob == nullptr) { 2103 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2104 } 2105 CodeBuffer c(stub_blob); 2106 VM_Version_StubGenerator g(&c); 2107 2108 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2109 g.generate_get_cpu_info()); 2110 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2111 g.generate_detect_virt()); 2112 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2113 g.clear_apx_test_state()); 2114 get_processor_features(); 2115 2116 Assembler::precompute_instructions(); 2117 2118 if (VM_Version::supports_hv()) { // Supports hypervisor 2119 check_virtualizations(); 2120 } 2121 _vm_version_initialized = true; 2122 } 2123 2124 typedef enum { 2125 CPU_FAMILY_8086_8088 = 0, 2126 CPU_FAMILY_INTEL_286 = 2, 2127 CPU_FAMILY_INTEL_386 = 3, 2128 CPU_FAMILY_INTEL_486 = 4, 2129 CPU_FAMILY_PENTIUM = 5, 2130 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2131 CPU_FAMILY_PENTIUM_4 = 0xF 2132 } FamilyFlag; 2133 2134 typedef enum { 2135 RDTSCP_FLAG = 0x08000000, // bit 27 2136 INTEL64_FLAG = 0x20000000 // bit 29 2137 } _featureExtendedEdxFlag; 2138 2139 typedef enum { 2140 FPU_FLAG = 0x00000001, 2141 VME_FLAG = 0x00000002, 2142 DE_FLAG = 0x00000004, 2143 PSE_FLAG = 0x00000008, 2144 TSC_FLAG = 0x00000010, 2145 MSR_FLAG = 0x00000020, 2146 PAE_FLAG = 0x00000040, 2147 MCE_FLAG = 0x00000080, 2148 CX8_FLAG = 0x00000100, 2149 APIC_FLAG = 0x00000200, 2150 SEP_FLAG = 0x00000800, 2151 MTRR_FLAG = 0x00001000, 2152 PGE_FLAG = 0x00002000, 2153 MCA_FLAG = 0x00004000, 2154 CMOV_FLAG = 0x00008000, 2155 PAT_FLAG = 0x00010000, 2156 PSE36_FLAG = 0x00020000, 2157 PSNUM_FLAG = 0x00040000, 2158 CLFLUSH_FLAG = 0x00080000, 2159 DTS_FLAG = 0x00200000, 2160 ACPI_FLAG = 0x00400000, 2161 MMX_FLAG = 0x00800000, 2162 FXSR_FLAG = 0x01000000, 2163 SSE_FLAG = 0x02000000, 2164 SSE2_FLAG = 0x04000000, 2165 SS_FLAG = 0x08000000, 2166 HTT_FLAG = 0x10000000, 2167 TM_FLAG = 0x20000000 2168 } FeatureEdxFlag; 2169 2170 static BufferBlob* cpuid_brand_string_stub_blob; 2171 static const int cpuid_brand_string_stub_size = 550; 2172 2173 extern "C" { 2174 typedef void (*getCPUIDBrandString_stub_t)(void*); 2175 } 2176 2177 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2178 2179 // VM_Version statics 2180 enum { 2181 ExtendedFamilyIdLength_INTEL = 16, 2182 ExtendedFamilyIdLength_AMD = 24 2183 }; 2184 2185 const size_t VENDOR_LENGTH = 13; 2186 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2187 static char* _cpu_brand_string = nullptr; 2188 static int64_t _max_qualified_cpu_frequency = 0; 2189 2190 static int _no_of_threads = 0; 2191 static int _no_of_cores = 0; 2192 2193 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2194 "8086/8088", 2195 "", 2196 "286", 2197 "386", 2198 "486", 2199 "Pentium", 2200 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2201 "", 2202 "", 2203 "", 2204 "", 2205 "", 2206 "", 2207 "", 2208 "", 2209 "Pentium 4" 2210 }; 2211 2212 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2213 "", 2214 "", 2215 "", 2216 "", 2217 "5x86", 2218 "K5/K6", 2219 "Athlon/AthlonXP", 2220 "", 2221 "", 2222 "", 2223 "", 2224 "", 2225 "", 2226 "", 2227 "", 2228 "Opteron/Athlon64", 2229 "Opteron QC/Phenom", // Barcelona et.al. 2230 "", 2231 "", 2232 "", 2233 "", 2234 "", 2235 "", 2236 "Zen" 2237 }; 2238 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2239 // September 2013, Vol 3C Table 35-1 2240 const char* const _model_id_pentium_pro[] = { 2241 "", 2242 "Pentium Pro", 2243 "", 2244 "Pentium II model 3", 2245 "", 2246 "Pentium II model 5/Xeon/Celeron", 2247 "Celeron", 2248 "Pentium III/Pentium III Xeon", 2249 "Pentium III/Pentium III Xeon", 2250 "Pentium M model 9", // Yonah 2251 "Pentium III, model A", 2252 "Pentium III, model B", 2253 "", 2254 "Pentium M model D", // Dothan 2255 "", 2256 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2257 "", 2258 "", 2259 "", 2260 "", 2261 "", 2262 "", 2263 "Celeron", // 0x16 Celeron 65nm 2264 "Core 2", // 0x17 Penryn / Harpertown 2265 "", 2266 "", 2267 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2268 "Atom", // 0x1B Z5xx series Silverthorn 2269 "", 2270 "Core 2", // 0x1D Dunnington (6-core) 2271 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2272 "", 2273 "", 2274 "", 2275 "", 2276 "", 2277 "", 2278 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2279 "", 2280 "", 2281 "", // 0x28 2282 "", 2283 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2284 "", 2285 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2286 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2287 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2288 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2289 "", 2290 "", 2291 "", 2292 "", 2293 "", 2294 "", 2295 "", 2296 "", 2297 "", 2298 "", 2299 "Ivy Bridge", // 0x3a 2300 "", 2301 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2302 "", // 0x3d "Next Generation Intel Core Processor" 2303 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2304 "", // 0x3f "Future Generation Intel Xeon Processor" 2305 "", 2306 "", 2307 "", 2308 "", 2309 "", 2310 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2311 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2312 nullptr 2313 }; 2314 2315 /* Brand ID is for back compatibility 2316 * Newer CPUs uses the extended brand string */ 2317 const char* const _brand_id[] = { 2318 "", 2319 "Celeron processor", 2320 "Pentium III processor", 2321 "Intel Pentium III Xeon processor", 2322 "", 2323 "", 2324 "", 2325 "", 2326 "Intel Pentium 4 processor", 2327 nullptr 2328 }; 2329 2330 2331 const char* const _feature_edx_id[] = { 2332 "On-Chip FPU", 2333 "Virtual Mode Extensions", 2334 "Debugging Extensions", 2335 "Page Size Extensions", 2336 "Time Stamp Counter", 2337 "Model Specific Registers", 2338 "Physical Address Extension", 2339 "Machine Check Exceptions", 2340 "CMPXCHG8B Instruction", 2341 "On-Chip APIC", 2342 "", 2343 "Fast System Call", 2344 "Memory Type Range Registers", 2345 "Page Global Enable", 2346 "Machine Check Architecture", 2347 "Conditional Mov Instruction", 2348 "Page Attribute Table", 2349 "36-bit Page Size Extension", 2350 "Processor Serial Number", 2351 "CLFLUSH Instruction", 2352 "", 2353 "Debug Trace Store feature", 2354 "ACPI registers in MSR space", 2355 "Intel Architecture MMX Technology", 2356 "Fast Float Point Save and Restore", 2357 "Streaming SIMD extensions", 2358 "Streaming SIMD extensions 2", 2359 "Self-Snoop", 2360 "Hyper Threading", 2361 "Thermal Monitor", 2362 "", 2363 "Pending Break Enable" 2364 }; 2365 2366 const char* const _feature_extended_edx_id[] = { 2367 "", 2368 "", 2369 "", 2370 "", 2371 "", 2372 "", 2373 "", 2374 "", 2375 "", 2376 "", 2377 "", 2378 "SYSCALL/SYSRET", 2379 "", 2380 "", 2381 "", 2382 "", 2383 "", 2384 "", 2385 "", 2386 "", 2387 "Execute Disable Bit", 2388 "", 2389 "", 2390 "", 2391 "", 2392 "", 2393 "", 2394 "RDTSCP", 2395 "", 2396 "Intel 64 Architecture", 2397 "", 2398 "" 2399 }; 2400 2401 const char* const _feature_ecx_id[] = { 2402 "Streaming SIMD Extensions 3", 2403 "PCLMULQDQ", 2404 "64-bit DS Area", 2405 "MONITOR/MWAIT instructions", 2406 "CPL Qualified Debug Store", 2407 "Virtual Machine Extensions", 2408 "Safer Mode Extensions", 2409 "Enhanced Intel SpeedStep technology", 2410 "Thermal Monitor 2", 2411 "Supplemental Streaming SIMD Extensions 3", 2412 "L1 Context ID", 2413 "", 2414 "Fused Multiply-Add", 2415 "CMPXCHG16B", 2416 "xTPR Update Control", 2417 "Perfmon and Debug Capability", 2418 "", 2419 "Process-context identifiers", 2420 "Direct Cache Access", 2421 "Streaming SIMD extensions 4.1", 2422 "Streaming SIMD extensions 4.2", 2423 "x2APIC", 2424 "MOVBE", 2425 "Popcount instruction", 2426 "TSC-Deadline", 2427 "AESNI", 2428 "XSAVE", 2429 "OSXSAVE", 2430 "AVX", 2431 "F16C", 2432 "RDRAND", 2433 "" 2434 }; 2435 2436 const char* const _feature_extended_ecx_id[] = { 2437 "LAHF/SAHF instruction support", 2438 "Core multi-processor legacy mode", 2439 "", 2440 "", 2441 "", 2442 "Advanced Bit Manipulations: LZCNT", 2443 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2444 "Misaligned SSE mode", 2445 "", 2446 "", 2447 "", 2448 "", 2449 "", 2450 "", 2451 "", 2452 "", 2453 "", 2454 "", 2455 "", 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "" 2469 }; 2470 2471 void VM_Version::initialize_tsc(void) { 2472 ResourceMark rm; 2473 2474 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2475 if (cpuid_brand_string_stub_blob == nullptr) { 2476 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2477 } 2478 CodeBuffer c(cpuid_brand_string_stub_blob); 2479 VM_Version_StubGenerator g(&c); 2480 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2481 g.generate_getCPUIDBrandString()); 2482 } 2483 2484 const char* VM_Version::cpu_model_description(void) { 2485 uint32_t cpu_family = extended_cpu_family(); 2486 uint32_t cpu_model = extended_cpu_model(); 2487 const char* model = nullptr; 2488 2489 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2490 for (uint32_t i = 0; i <= cpu_model; i++) { 2491 model = _model_id_pentium_pro[i]; 2492 if (model == nullptr) { 2493 break; 2494 } 2495 } 2496 } 2497 return model; 2498 } 2499 2500 const char* VM_Version::cpu_brand_string(void) { 2501 if (_cpu_brand_string == nullptr) { 2502 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2503 if (nullptr == _cpu_brand_string) { 2504 return nullptr; 2505 } 2506 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2507 if (ret_val != OS_OK) { 2508 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2509 _cpu_brand_string = nullptr; 2510 } 2511 } 2512 return _cpu_brand_string; 2513 } 2514 2515 const char* VM_Version::cpu_brand(void) { 2516 const char* brand = nullptr; 2517 2518 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2519 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2520 brand = _brand_id[0]; 2521 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2522 brand = _brand_id[i]; 2523 } 2524 } 2525 return brand; 2526 } 2527 2528 bool VM_Version::cpu_is_em64t(void) { 2529 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2530 } 2531 2532 bool VM_Version::is_netburst(void) { 2533 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2534 } 2535 2536 bool VM_Version::supports_tscinv_ext(void) { 2537 if (!supports_tscinv_bit()) { 2538 return false; 2539 } 2540 2541 if (is_intel()) { 2542 return true; 2543 } 2544 2545 if (is_amd()) { 2546 return !is_amd_Barcelona(); 2547 } 2548 2549 if (is_hygon()) { 2550 return true; 2551 } 2552 2553 return false; 2554 } 2555 2556 void VM_Version::resolve_cpu_information_details(void) { 2557 2558 // in future we want to base this information on proper cpu 2559 // and cache topology enumeration such as: 2560 // Intel 64 Architecture Processor Topology Enumeration 2561 // which supports system cpu and cache topology enumeration 2562 // either using 2xAPICIDs or initial APICIDs 2563 2564 // currently only rough cpu information estimates 2565 // which will not necessarily reflect the exact configuration of the system 2566 2567 // this is the number of logical hardware threads 2568 // visible to the operating system 2569 _no_of_threads = os::processor_count(); 2570 2571 // find out number of threads per cpu package 2572 int threads_per_package = threads_per_core() * cores_per_cpu(); 2573 2574 // use amount of threads visible to the process in order to guess number of sockets 2575 _no_of_sockets = _no_of_threads / threads_per_package; 2576 2577 // process might only see a subset of the total number of threads 2578 // from a single processor package. Virtualization/resource management for example. 2579 // If so then just write a hard 1 as num of pkgs. 2580 if (0 == _no_of_sockets) { 2581 _no_of_sockets = 1; 2582 } 2583 2584 // estimate the number of cores 2585 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2586 } 2587 2588 2589 const char* VM_Version::cpu_family_description(void) { 2590 int cpu_family_id = extended_cpu_family(); 2591 if (is_amd()) { 2592 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2593 return _family_id_amd[cpu_family_id]; 2594 } 2595 } 2596 if (is_intel()) { 2597 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2598 return cpu_model_description(); 2599 } 2600 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2601 return _family_id_intel[cpu_family_id]; 2602 } 2603 } 2604 if (is_hygon()) { 2605 return "Dhyana"; 2606 } 2607 return "Unknown x86"; 2608 } 2609 2610 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2611 assert(buf != nullptr, "buffer is null!"); 2612 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2613 2614 const char* cpu_type = nullptr; 2615 const char* x64 = nullptr; 2616 2617 if (is_intel()) { 2618 cpu_type = "Intel"; 2619 x64 = cpu_is_em64t() ? " Intel64" : ""; 2620 } else if (is_amd()) { 2621 cpu_type = "AMD"; 2622 x64 = cpu_is_em64t() ? " AMD64" : ""; 2623 } else if (is_hygon()) { 2624 cpu_type = "Hygon"; 2625 x64 = cpu_is_em64t() ? " AMD64" : ""; 2626 } else { 2627 cpu_type = "Unknown x86"; 2628 x64 = cpu_is_em64t() ? " x86_64" : ""; 2629 } 2630 2631 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2632 cpu_type, 2633 cpu_family_description(), 2634 supports_ht() ? " (HT)" : "", 2635 supports_sse3() ? " SSE3" : "", 2636 supports_ssse3() ? " SSSE3" : "", 2637 supports_sse4_1() ? " SSE4.1" : "", 2638 supports_sse4_2() ? " SSE4.2" : "", 2639 supports_sse4a() ? " SSE4A" : "", 2640 is_netburst() ? " Netburst" : "", 2641 is_intel_family_core() ? " Core" : "", 2642 x64); 2643 2644 return OS_OK; 2645 } 2646 2647 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2648 assert(buf != nullptr, "buffer is null!"); 2649 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2650 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2651 2652 // invoke newly generated asm code to fetch CPU Brand String 2653 getCPUIDBrandString_stub(&_cpuid_info); 2654 2655 // fetch results into buffer 2656 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2657 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2658 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2659 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2660 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2661 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2662 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2663 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2664 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2665 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2666 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2667 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2668 2669 return OS_OK; 2670 } 2671 2672 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2673 guarantee(buf != nullptr, "buffer is null!"); 2674 guarantee(buf_len > 0, "buffer len not enough!"); 2675 2676 unsigned int flag = 0; 2677 unsigned int fi = 0; 2678 size_t written = 0; 2679 const char* prefix = ""; 2680 2681 #define WRITE_TO_BUF(string) \ 2682 { \ 2683 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2684 if (res < 0) { \ 2685 return buf_len - 1; \ 2686 } \ 2687 written += res; \ 2688 if (prefix[0] == '\0') { \ 2689 prefix = ", "; \ 2690 } \ 2691 } 2692 2693 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2694 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2695 continue; /* no hyperthreading */ 2696 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2697 continue; /* no fast system call */ 2698 } 2699 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2700 WRITE_TO_BUF(_feature_edx_id[fi]); 2701 } 2702 } 2703 2704 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2705 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2706 WRITE_TO_BUF(_feature_ecx_id[fi]); 2707 } 2708 } 2709 2710 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2711 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2712 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2713 } 2714 } 2715 2716 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2717 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2718 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2719 } 2720 } 2721 2722 if (supports_tscinv_bit()) { 2723 WRITE_TO_BUF("Invariant TSC"); 2724 } 2725 2726 return written; 2727 } 2728 2729 /** 2730 * Write a detailed description of the cpu to a given buffer, including 2731 * feature set. 2732 */ 2733 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2734 assert(buf != nullptr, "buffer is null!"); 2735 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2736 2737 static const char* unknown = "<unknown>"; 2738 char vendor_id[VENDOR_LENGTH]; 2739 const char* family = nullptr; 2740 const char* model = nullptr; 2741 const char* brand = nullptr; 2742 int outputLen = 0; 2743 2744 family = cpu_family_description(); 2745 if (family == nullptr) { 2746 family = unknown; 2747 } 2748 2749 model = cpu_model_description(); 2750 if (model == nullptr) { 2751 model = unknown; 2752 } 2753 2754 brand = cpu_brand_string(); 2755 2756 if (brand == nullptr) { 2757 brand = cpu_brand(); 2758 if (brand == nullptr) { 2759 brand = unknown; 2760 } 2761 } 2762 2763 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2764 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2765 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2766 vendor_id[VENDOR_LENGTH-1] = '\0'; 2767 2768 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2769 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2770 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2771 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2772 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2773 "Supports: ", 2774 brand, 2775 vendor_id, 2776 family, 2777 extended_cpu_family(), 2778 model, 2779 extended_cpu_model(), 2780 cpu_stepping(), 2781 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2782 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2783 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2784 _cpuid_info.std_cpuid1_eax.value, 2785 _cpuid_info.std_cpuid1_ebx.value, 2786 _cpuid_info.std_cpuid1_ecx.value, 2787 _cpuid_info.std_cpuid1_edx.value, 2788 _cpuid_info.ext_cpuid1_eax, 2789 _cpuid_info.ext_cpuid1_ebx, 2790 _cpuid_info.ext_cpuid1_ecx, 2791 _cpuid_info.ext_cpuid1_edx); 2792 2793 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2794 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2795 return OS_ERR; 2796 } 2797 2798 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2799 2800 return OS_OK; 2801 } 2802 2803 2804 // Fill in Abstract_VM_Version statics 2805 void VM_Version::initialize_cpu_information() { 2806 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2807 assert(!_initialized, "shouldn't be initialized yet"); 2808 resolve_cpu_information_details(); 2809 2810 // initialize cpu_name and cpu_desc 2811 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2812 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2813 _initialized = true; 2814 } 2815 2816 /** 2817 * For information about extracting the frequency from the cpu brand string, please see: 2818 * 2819 * Intel Processor Identification and the CPUID Instruction 2820 * Application Note 485 2821 * May 2012 2822 * 2823 * The return value is the frequency in Hz. 2824 */ 2825 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2826 const char* const brand_string = cpu_brand_string(); 2827 if (brand_string == nullptr) { 2828 return 0; 2829 } 2830 const int64_t MEGA = 1000000; 2831 int64_t multiplier = 0; 2832 int64_t frequency = 0; 2833 uint8_t idx = 0; 2834 // The brand string buffer is at most 48 bytes. 2835 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2836 for (; idx < 48-2; ++idx) { 2837 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2838 // Search brand string for "yHz" where y is M, G, or T. 2839 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2840 if (brand_string[idx] == 'M') { 2841 multiplier = MEGA; 2842 } else if (brand_string[idx] == 'G') { 2843 multiplier = MEGA * 1000; 2844 } else if (brand_string[idx] == 'T') { 2845 multiplier = MEGA * MEGA; 2846 } 2847 break; 2848 } 2849 } 2850 if (multiplier > 0) { 2851 // Compute frequency (in Hz) from brand string. 2852 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2853 frequency = (brand_string[idx-4] - '0') * multiplier; 2854 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2855 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2856 } else { // format is "xxxx" 2857 frequency = (brand_string[idx-4] - '0') * 1000; 2858 frequency += (brand_string[idx-3] - '0') * 100; 2859 frequency += (brand_string[idx-2] - '0') * 10; 2860 frequency += (brand_string[idx-1] - '0'); 2861 frequency *= multiplier; 2862 } 2863 } 2864 return frequency; 2865 } 2866 2867 2868 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2869 if (_max_qualified_cpu_frequency == 0) { 2870 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2871 } 2872 return _max_qualified_cpu_frequency; 2873 } 2874 2875 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const { 2876 VM_Features vm_features; 2877 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2878 vm_features.set_feature(CPU_CX8); 2879 if (std_cpuid1_edx.bits.cmov != 0) 2880 vm_features.set_feature(CPU_CMOV); 2881 if (std_cpuid1_edx.bits.clflush != 0) 2882 vm_features.set_feature(CPU_FLUSH); 2883 // clflush should always be available on x86_64 2884 // if not we are in real trouble because we rely on it 2885 // to flush the code cache. 2886 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available"); 2887 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2888 ext_cpuid1_edx.bits.fxsr != 0)) 2889 vm_features.set_feature(CPU_FXSR); 2890 // HT flag is set for multi-core processors also. 2891 if (threads_per_core() > 1) 2892 vm_features.set_feature(CPU_HT); 2893 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2894 ext_cpuid1_edx.bits.mmx != 0)) 2895 vm_features.set_feature(CPU_MMX); 2896 if (std_cpuid1_edx.bits.sse != 0) 2897 vm_features.set_feature(CPU_SSE); 2898 if (std_cpuid1_edx.bits.sse2 != 0) 2899 vm_features.set_feature(CPU_SSE2); 2900 if (std_cpuid1_ecx.bits.sse3 != 0) 2901 vm_features.set_feature(CPU_SSE3); 2902 if (std_cpuid1_ecx.bits.ssse3 != 0) 2903 vm_features.set_feature(CPU_SSSE3); 2904 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2905 vm_features.set_feature(CPU_SSE4_1); 2906 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2907 vm_features.set_feature(CPU_SSE4_2); 2908 if (std_cpuid1_ecx.bits.popcnt != 0) 2909 vm_features.set_feature(CPU_POPCNT); 2910 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 2911 xem_xcr0_eax.bits.apx_f != 0) { 2912 vm_features.set_feature(CPU_APX_F); 2913 } 2914 if (std_cpuid1_ecx.bits.avx != 0 && 2915 std_cpuid1_ecx.bits.osxsave != 0 && 2916 xem_xcr0_eax.bits.sse != 0 && 2917 xem_xcr0_eax.bits.ymm != 0) { 2918 vm_features.set_feature(CPU_AVX); 2919 vm_features.set_feature(CPU_VZEROUPPER); 2920 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 2921 vm_features.set_feature(CPU_SHA512); 2922 if (std_cpuid1_ecx.bits.f16c != 0) 2923 vm_features.set_feature(CPU_F16C); 2924 if (sef_cpuid7_ebx.bits.avx2 != 0) { 2925 vm_features.set_feature(CPU_AVX2); 2926 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 2927 vm_features.set_feature(CPU_AVX_IFMA); 2928 } 2929 if (sef_cpuid7_ecx.bits.gfni != 0) 2930 vm_features.set_feature(CPU_GFNI); 2931 if (sef_cpuid7_ebx.bits.avx512f != 0 && 2932 xem_xcr0_eax.bits.opmask != 0 && 2933 xem_xcr0_eax.bits.zmm512 != 0 && 2934 xem_xcr0_eax.bits.zmm32 != 0) { 2935 vm_features.set_feature(CPU_AVX512F); 2936 if (sef_cpuid7_ebx.bits.avx512cd != 0) 2937 vm_features.set_feature(CPU_AVX512CD); 2938 if (sef_cpuid7_ebx.bits.avx512dq != 0) 2939 vm_features.set_feature(CPU_AVX512DQ); 2940 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 2941 vm_features.set_feature(CPU_AVX512_IFMA); 2942 if (sef_cpuid7_ebx.bits.avx512pf != 0) 2943 vm_features.set_feature(CPU_AVX512PF); 2944 if (sef_cpuid7_ebx.bits.avx512er != 0) 2945 vm_features.set_feature(CPU_AVX512ER); 2946 if (sef_cpuid7_ebx.bits.avx512bw != 0) 2947 vm_features.set_feature(CPU_AVX512BW); 2948 if (sef_cpuid7_ebx.bits.avx512vl != 0) 2949 vm_features.set_feature(CPU_AVX512VL); 2950 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2951 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2952 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2953 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2954 if (sef_cpuid7_ecx.bits.vaes != 0) 2955 vm_features.set_feature(CPU_AVX512_VAES); 2956 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 2957 vm_features.set_feature(CPU_AVX512_VNNI); 2958 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2959 vm_features.set_feature(CPU_AVX512_BITALG); 2960 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2961 vm_features.set_feature(CPU_AVX512_VBMI); 2962 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2963 vm_features.set_feature(CPU_AVX512_VBMI2); 2964 } 2965 if (is_intel()) { 2966 if (sefsl1_cpuid7_edx.bits.avx10 != 0 && 2967 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 && 2968 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 && 2969 xem_xcr0_eax.bits.opmask != 0 && 2970 xem_xcr0_eax.bits.zmm512 != 0 && 2971 xem_xcr0_eax.bits.zmm32 != 0) { 2972 vm_features.set_feature(CPU_AVX10_1); 2973 vm_features.set_feature(CPU_AVX512F); 2974 vm_features.set_feature(CPU_AVX512CD); 2975 vm_features.set_feature(CPU_AVX512DQ); 2976 vm_features.set_feature(CPU_AVX512PF); 2977 vm_features.set_feature(CPU_AVX512ER); 2978 vm_features.set_feature(CPU_AVX512BW); 2979 vm_features.set_feature(CPU_AVX512VL); 2980 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); 2981 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); 2982 vm_features.set_feature(CPU_AVX512_VAES); 2983 vm_features.set_feature(CPU_AVX512_VNNI); 2984 vm_features.set_feature(CPU_AVX512_BITALG); 2985 vm_features.set_feature(CPU_AVX512_VBMI); 2986 vm_features.set_feature(CPU_AVX512_VBMI2); 2987 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) { 2988 vm_features.set_feature(CPU_AVX10_2); 2989 } 2990 } 2991 } 2992 } 2993 2994 if (std_cpuid1_ecx.bits.hv != 0) 2995 vm_features.set_feature(CPU_HV); 2996 if (sef_cpuid7_ebx.bits.bmi1 != 0) 2997 vm_features.set_feature(CPU_BMI1); 2998 if (std_cpuid1_edx.bits.tsc != 0) 2999 vm_features.set_feature(CPU_TSC); 3000 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3001 vm_features.set_feature(CPU_TSCINV_BIT); 3002 if (std_cpuid1_ecx.bits.aes != 0) 3003 vm_features.set_feature(CPU_AES); 3004 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3005 vm_features.set_feature(CPU_LZCNT); 3006 if (ext_cpuid1_ecx.bits.prefetchw != 0) 3007 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3008 if (sef_cpuid7_ebx.bits.erms != 0) 3009 vm_features.set_feature(CPU_ERMS); 3010 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3011 vm_features.set_feature(CPU_FSRM); 3012 if (std_cpuid1_ecx.bits.clmul != 0) 3013 vm_features.set_feature(CPU_CLMUL); 3014 if (sef_cpuid7_ebx.bits.rtm != 0) 3015 vm_features.set_feature(CPU_RTM); 3016 if (sef_cpuid7_ebx.bits.adx != 0) 3017 vm_features.set_feature(CPU_ADX); 3018 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3019 vm_features.set_feature(CPU_BMI2); 3020 if (sef_cpuid7_ebx.bits.sha != 0) 3021 vm_features.set_feature(CPU_SHA); 3022 if (std_cpuid1_ecx.bits.fma != 0) 3023 vm_features.set_feature(CPU_FMA); 3024 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3025 vm_features.set_feature(CPU_FLUSHOPT); 3026 if (sef_cpuid7_ebx.bits.clwb != 0) 3027 vm_features.set_feature(CPU_CLWB); 3028 if (ext_cpuid1_edx.bits.rdtscp != 0) 3029 vm_features.set_feature(CPU_RDTSCP); 3030 if (sef_cpuid7_ecx.bits.rdpid != 0) 3031 vm_features.set_feature(CPU_RDPID); 3032 3033 // AMD|Hygon additional features. 3034 if (is_amd_family()) { 3035 // PREFETCHW was checked above, check TDNOW here. 3036 if ((ext_cpuid1_edx.bits.tdnow != 0)) 3037 vm_features.set_feature(CPU_3DNOW_PREFETCH); 3038 if (ext_cpuid1_ecx.bits.sse4a != 0) 3039 vm_features.set_feature(CPU_SSE4A); 3040 } 3041 3042 // Intel additional features. 3043 if (is_intel()) { 3044 if (sef_cpuid7_edx.bits.serialize != 0) 3045 vm_features.set_feature(CPU_SERIALIZE); 3046 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3047 vm_features.set_feature(CPU_AVX512_FP16); 3048 } 3049 3050 // ZX additional features. 3051 if (is_zx()) { 3052 // We do not know if these are supported by ZX, so we cannot trust 3053 // common CPUID bit for them. 3054 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?"); 3055 vm_features.clear_feature(CPU_CLWB); 3056 } 3057 3058 // Protection key features. 3059 if (sef_cpuid7_ecx.bits.pku != 0) { 3060 vm_features.set_feature(CPU_PKU); 3061 } 3062 if (sef_cpuid7_ecx.bits.ospke != 0) { 3063 vm_features.set_feature(CPU_OSPKE); 3064 } 3065 3066 // Control flow enforcement (CET) features. 3067 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3068 vm_features.set_feature(CPU_CET_SS); 3069 } 3070 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3071 vm_features.set_feature(CPU_CET_IBT); 3072 } 3073 3074 // Composite features. 3075 if (supports_tscinv_bit() && 3076 ((is_amd_family() && !is_amd_Barcelona()) || 3077 is_intel_tsc_synched_at_init())) { 3078 vm_features.set_feature(CPU_TSCINV); 3079 } 3080 return vm_features; 3081 } 3082 3083 bool VM_Version::os_supports_avx_vectors() { 3084 bool retVal = false; 3085 int nreg = 4; 3086 if (supports_evex()) { 3087 // Verify that OS save/restore all bits of EVEX registers 3088 // during signal processing. 3089 retVal = true; 3090 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3091 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3092 retVal = false; 3093 break; 3094 } 3095 } 3096 } else if (supports_avx()) { 3097 // Verify that OS save/restore all bits of AVX registers 3098 // during signal processing. 3099 retVal = true; 3100 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3101 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3102 retVal = false; 3103 break; 3104 } 3105 } 3106 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3107 if (retVal == false) { 3108 // Verify that OS save/restore all bits of EVEX registers 3109 // during signal processing. 3110 retVal = true; 3111 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3112 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3113 retVal = false; 3114 break; 3115 } 3116 } 3117 } 3118 } 3119 return retVal; 3120 } 3121 3122 bool VM_Version::os_supports_apx_egprs() { 3123 if (!supports_apx_f()) { 3124 return false; 3125 } 3126 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3127 _cpuid_info.apx_save[1] != egpr_test_value()) { 3128 return false; 3129 } 3130 return true; 3131 } 3132 3133 uint VM_Version::cores_per_cpu() { 3134 uint result = 1; 3135 if (is_intel()) { 3136 bool supports_topology = supports_processor_topology(); 3137 if (supports_topology) { 3138 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3139 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3140 } 3141 if (!supports_topology || result == 0) { 3142 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3143 } 3144 } else if (is_amd_family()) { 3145 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3146 } else if (is_zx()) { 3147 bool supports_topology = supports_processor_topology(); 3148 if (supports_topology) { 3149 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3150 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3151 } 3152 if (!supports_topology || result == 0) { 3153 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3154 } 3155 } 3156 return result; 3157 } 3158 3159 uint VM_Version::threads_per_core() { 3160 uint result = 1; 3161 if (is_intel() && supports_processor_topology()) { 3162 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3163 } else if (is_zx() && supports_processor_topology()) { 3164 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3165 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3166 if (cpu_family() >= 0x17) { 3167 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3168 } else { 3169 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3170 cores_per_cpu(); 3171 } 3172 } 3173 return (result == 0 ? 1 : result); 3174 } 3175 3176 uint VM_Version::L1_line_size() { 3177 uint result = 0; 3178 if (is_intel()) { 3179 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3180 } else if (is_amd_family()) { 3181 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3182 } else if (is_zx()) { 3183 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3184 } 3185 if (result < 32) // not defined ? 3186 result = 32; // 32 bytes by default on x86 and other x64 3187 return result; 3188 } 3189 3190 bool VM_Version::is_intel_tsc_synched_at_init() { 3191 if (is_intel_family_core()) { 3192 uint32_t ext_model = extended_cpu_model(); 3193 if (ext_model == CPU_MODEL_NEHALEM_EP || 3194 ext_model == CPU_MODEL_WESTMERE_EP || 3195 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3196 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3197 // <= 2-socket invariant tsc support. EX versions are usually used 3198 // in > 2-socket systems and likely don't synchronize tscs at 3199 // initialization. 3200 // Code that uses tsc values must be prepared for them to arbitrarily 3201 // jump forward or backward. 3202 return true; 3203 } 3204 } 3205 return false; 3206 } 3207 3208 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3209 // Hardware prefetching (distance/size in bytes): 3210 // Pentium 3 - 64 / 32 3211 // Pentium 4 - 256 / 128 3212 // Athlon - 64 / 32 ???? 3213 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3214 // Core - 128 / 64 3215 // 3216 // Software prefetching (distance in bytes / instruction with best score): 3217 // Pentium 3 - 128 / prefetchnta 3218 // Pentium 4 - 512 / prefetchnta 3219 // Athlon - 128 / prefetchnta 3220 // Opteron - 256 / prefetchnta 3221 // Core - 256 / prefetchnta 3222 // It will be used only when AllocatePrefetchStyle > 0 3223 3224 if (is_amd_family()) { // AMD | Hygon 3225 if (supports_sse2()) { 3226 return 256; // Opteron 3227 } else { 3228 return 128; // Athlon 3229 } 3230 } else { // Intel 3231 if (supports_sse3() && is_intel_server_family()) { 3232 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3233 return 192; 3234 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3235 return 384; 3236 } 3237 } 3238 if (supports_sse2()) { 3239 if (is_intel_server_family()) { 3240 return 256; // Pentium M, Core, Core2 3241 } else { 3242 return 512; // Pentium 4 3243 } 3244 } else { 3245 return 128; // Pentium 3 (and all other old CPUs) 3246 } 3247 } 3248 } 3249 3250 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3251 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3252 switch (id) { 3253 case vmIntrinsics::_floatToFloat16: 3254 case vmIntrinsics::_float16ToFloat: 3255 if (!supports_float16()) { 3256 return false; 3257 } 3258 break; 3259 default: 3260 break; 3261 } 3262 return true; 3263 } 3264 3265 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) { 3266 int i = 0; 3267 ss.join([&]() { 3268 while (i < MAX_CPU_FEATURES) { 3269 if (_features.supports_feature((VM_Version::Feature_Flag)i)) { 3270 return _features_names[i++]; 3271 } 3272 i += 1; 3273 } 3274 return (const char*)nullptr; 3275 }, ", "); 3276 }