1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "classfile/vmIntrinsics.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/checkedCast.hpp" 41 #include "utilities/powerOfTwo.hpp" 42 #include "utilities/virtualizationSupport.hpp" 43 44 int VM_Version::_cpu; 45 int VM_Version::_model; 46 int VM_Version::_stepping; 47 bool VM_Version::_has_intel_jcc_erratum; 48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 49 50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 52 #undef DECLARE_CPU_FEATURE_FLAG 53 54 // Address of instruction which causes SEGV 55 address VM_Version::_cpuinfo_segv_addr = nullptr; 56 // Address of instruction after the one which causes SEGV 57 address VM_Version::_cpuinfo_cont_addr = nullptr; 58 // Address of instruction which causes APX specific SEGV 59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr; 60 // Address of instruction after the one which causes APX specific SEGV 61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr; 62 63 static BufferBlob* stub_blob; 64 static const int stub_size = 2000; 65 66 extern "C" { 67 typedef void (*get_cpu_info_stub_t)(void*); 68 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 69 typedef void (*clear_apx_test_state_t)(void); 70 } 71 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 72 static detect_virt_stub_t detect_virt_stub = nullptr; 73 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; 74 75 #ifdef _LP64 76 77 bool VM_Version::supports_clflush() { 78 // clflush should always be available on x86_64 79 // if not we are in real trouble because we rely on it 80 // to flush the code cache. 81 // Unfortunately, Assembler::clflush is currently called as part 82 // of generation of the code cache flush routine. This happens 83 // under Universe::init before the processor features are set 84 // up. Assembler::flush calls this routine to check that clflush 85 // is allowed. So, we give the caller a free pass if Universe init 86 // is still in progress. 87 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 88 return true; 89 } 90 #endif 91 92 #define CPUID_STANDARD_FN 0x0 93 #define CPUID_STANDARD_FN_1 0x1 94 #define CPUID_STANDARD_FN_4 0x4 95 #define CPUID_STANDARD_FN_B 0xb 96 97 #define CPUID_EXTENDED_FN 0x80000000 98 #define CPUID_EXTENDED_FN_1 0x80000001 99 #define CPUID_EXTENDED_FN_2 0x80000002 100 #define CPUID_EXTENDED_FN_3 0x80000003 101 #define CPUID_EXTENDED_FN_4 0x80000004 102 #define CPUID_EXTENDED_FN_7 0x80000007 103 #define CPUID_EXTENDED_FN_8 0x80000008 104 105 class VM_Version_StubGenerator: public StubCodeGenerator { 106 public: 107 108 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 109 110 #if defined(_LP64) 111 address clear_apx_test_state() { 112 # define __ _masm-> 113 address start = __ pc(); 114 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal 115 // handling guarantees that preserved register values post signal handling were 116 // re-instantiated by operating system and not because they were not modified externally. 117 118 bool save_apx = UseAPX; 119 VM_Version::set_apx_cpuFeatures(); 120 UseAPX = true; 121 // EGPR state save/restoration. 122 __ mov64(r16, 0L); 123 __ mov64(r31, 0L); 124 UseAPX = save_apx; 125 VM_Version::clean_cpuFeatures(); 126 __ ret(0); 127 return start; 128 } 129 #endif 130 131 address generate_get_cpu_info() { 132 // Flags to test CPU type. 133 const uint32_t HS_EFL_AC = 0x40000; 134 const uint32_t HS_EFL_ID = 0x200000; 135 // Values for when we don't have a CPUID instruction. 136 const int CPU_FAMILY_SHIFT = 8; 137 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 138 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 139 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 140 141 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 142 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; 143 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; 144 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 145 146 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 147 # define __ _masm-> 148 149 address start = __ pc(); 150 151 // 152 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 153 // 154 // LP64: rcx and rdx are first and second argument registers on windows 155 156 __ push(rbp); 157 #ifdef _LP64 158 __ mov(rbp, c_rarg0); // cpuid_info address 159 #else 160 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 161 #endif 162 __ push(rbx); 163 __ push(rsi); 164 __ pushf(); // preserve rbx, and flags 165 __ pop(rax); 166 __ push(rax); 167 __ mov(rcx, rax); 168 // 169 // if we are unable to change the AC flag, we have a 386 170 // 171 __ xorl(rax, HS_EFL_AC); 172 __ push(rax); 173 __ popf(); 174 __ pushf(); 175 __ pop(rax); 176 __ cmpptr(rax, rcx); 177 __ jccb(Assembler::notEqual, detect_486); 178 179 __ movl(rax, CPU_FAMILY_386); 180 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 181 __ jmp(done); 182 183 // 184 // If we are unable to change the ID flag, we have a 486 which does 185 // not support the "cpuid" instruction. 186 // 187 __ bind(detect_486); 188 __ mov(rax, rcx); 189 __ xorl(rax, HS_EFL_ID); 190 __ push(rax); 191 __ popf(); 192 __ pushf(); 193 __ pop(rax); 194 __ cmpptr(rcx, rax); 195 __ jccb(Assembler::notEqual, detect_586); 196 197 __ bind(cpu486); 198 __ movl(rax, CPU_FAMILY_486); 199 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 200 __ jmp(done); 201 202 // 203 // At this point, we have a chip which supports the "cpuid" instruction 204 // 205 __ bind(detect_586); 206 __ xorl(rax, rax); 207 __ cpuid(); 208 __ orl(rax, rax); 209 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 210 // value of at least 1, we give up and 211 // assume a 486 212 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 213 __ movl(Address(rsi, 0), rax); 214 __ movl(Address(rsi, 4), rbx); 215 __ movl(Address(rsi, 8), rcx); 216 __ movl(Address(rsi,12), rdx); 217 218 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 219 __ jccb(Assembler::belowEqual, std_cpuid4); 220 221 // 222 // cpuid(0xB) Processor Topology 223 // 224 __ movl(rax, 0xb); 225 __ xorl(rcx, rcx); // Threads level 226 __ cpuid(); 227 228 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 229 __ movl(Address(rsi, 0), rax); 230 __ movl(Address(rsi, 4), rbx); 231 __ movl(Address(rsi, 8), rcx); 232 __ movl(Address(rsi,12), rdx); 233 234 __ movl(rax, 0xb); 235 __ movl(rcx, 1); // Cores level 236 __ cpuid(); 237 __ push(rax); 238 __ andl(rax, 0x1f); // Determine if valid topology level 239 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 240 __ andl(rax, 0xffff); 241 __ pop(rax); 242 __ jccb(Assembler::equal, std_cpuid4); 243 244 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 245 __ movl(Address(rsi, 0), rax); 246 __ movl(Address(rsi, 4), rbx); 247 __ movl(Address(rsi, 8), rcx); 248 __ movl(Address(rsi,12), rdx); 249 250 __ movl(rax, 0xb); 251 __ movl(rcx, 2); // Packages level 252 __ cpuid(); 253 __ push(rax); 254 __ andl(rax, 0x1f); // Determine if valid topology level 255 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 256 __ andl(rax, 0xffff); 257 __ pop(rax); 258 __ jccb(Assembler::equal, std_cpuid4); 259 260 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 261 __ movl(Address(rsi, 0), rax); 262 __ movl(Address(rsi, 4), rbx); 263 __ movl(Address(rsi, 8), rcx); 264 __ movl(Address(rsi,12), rdx); 265 266 // 267 // cpuid(0x4) Deterministic cache params 268 // 269 __ bind(std_cpuid4); 270 __ movl(rax, 4); 271 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 272 __ jccb(Assembler::greater, std_cpuid1); 273 274 __ xorl(rcx, rcx); // L1 cache 275 __ cpuid(); 276 __ push(rax); 277 __ andl(rax, 0x1f); // Determine if valid cache parameters used 278 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 279 __ pop(rax); 280 __ jccb(Assembler::equal, std_cpuid1); 281 282 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 283 __ movl(Address(rsi, 0), rax); 284 __ movl(Address(rsi, 4), rbx); 285 __ movl(Address(rsi, 8), rcx); 286 __ movl(Address(rsi,12), rdx); 287 288 // 289 // Standard cpuid(0x1) 290 // 291 __ bind(std_cpuid1); 292 __ movl(rax, 1); 293 __ cpuid(); 294 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 295 __ movl(Address(rsi, 0), rax); 296 __ movl(Address(rsi, 4), rbx); 297 __ movl(Address(rsi, 8), rcx); 298 __ movl(Address(rsi,12), rdx); 299 300 // 301 // Check if OS has enabled XGETBV instruction to access XCR0 302 // (OSXSAVE feature flag) and CPU supports AVX 303 // 304 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 305 __ cmpl(rcx, 0x18000000); 306 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 307 308 // 309 // XCR0, XFEATURE_ENABLED_MASK register 310 // 311 __ xorl(rcx, rcx); // zero for XCR0 register 312 __ xgetbv(); 313 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 314 __ movl(Address(rsi, 0), rax); 315 __ movl(Address(rsi, 4), rdx); 316 317 // 318 // cpuid(0x7) Structured Extended Features Enumeration Leaf. 319 // 320 __ bind(sef_cpuid); 321 __ movl(rax, 7); 322 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 323 __ jccb(Assembler::greater, ext_cpuid); 324 // ECX = 0 325 __ xorl(rcx, rcx); 326 __ cpuid(); 327 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 328 __ movl(Address(rsi, 0), rax); 329 __ movl(Address(rsi, 4), rbx); 330 __ movl(Address(rsi, 8), rcx); 331 __ movl(Address(rsi, 12), rdx); 332 333 // 334 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1. 335 // 336 __ bind(sefsl1_cpuid); 337 __ movl(rax, 7); 338 __ movl(rcx, 1); 339 __ cpuid(); 340 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 341 __ movl(Address(rsi, 0), rax); 342 __ movl(Address(rsi, 4), rdx); 343 344 // 345 // Extended cpuid(0x80000000) 346 // 347 __ bind(ext_cpuid); 348 __ movl(rax, 0x80000000); 349 __ cpuid(); 350 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 351 __ jcc(Assembler::belowEqual, done); 352 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 353 __ jcc(Assembler::belowEqual, ext_cpuid1); 354 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 355 __ jccb(Assembler::belowEqual, ext_cpuid5); 356 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 357 __ jccb(Assembler::belowEqual, ext_cpuid7); 358 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 359 __ jccb(Assembler::belowEqual, ext_cpuid8); 360 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 361 __ jccb(Assembler::below, ext_cpuid8); 362 // 363 // Extended cpuid(0x8000001E) 364 // 365 __ movl(rax, 0x8000001E); 366 __ cpuid(); 367 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 368 __ movl(Address(rsi, 0), rax); 369 __ movl(Address(rsi, 4), rbx); 370 __ movl(Address(rsi, 8), rcx); 371 __ movl(Address(rsi,12), rdx); 372 373 // 374 // Extended cpuid(0x80000008) 375 // 376 __ bind(ext_cpuid8); 377 __ movl(rax, 0x80000008); 378 __ cpuid(); 379 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 380 __ movl(Address(rsi, 0), rax); 381 __ movl(Address(rsi, 4), rbx); 382 __ movl(Address(rsi, 8), rcx); 383 __ movl(Address(rsi,12), rdx); 384 385 // 386 // Extended cpuid(0x80000007) 387 // 388 __ bind(ext_cpuid7); 389 __ movl(rax, 0x80000007); 390 __ cpuid(); 391 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 392 __ movl(Address(rsi, 0), rax); 393 __ movl(Address(rsi, 4), rbx); 394 __ movl(Address(rsi, 8), rcx); 395 __ movl(Address(rsi,12), rdx); 396 397 // 398 // Extended cpuid(0x80000005) 399 // 400 __ bind(ext_cpuid5); 401 __ movl(rax, 0x80000005); 402 __ cpuid(); 403 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 404 __ movl(Address(rsi, 0), rax); 405 __ movl(Address(rsi, 4), rbx); 406 __ movl(Address(rsi, 8), rcx); 407 __ movl(Address(rsi,12), rdx); 408 409 // 410 // Extended cpuid(0x80000001) 411 // 412 __ bind(ext_cpuid1); 413 __ movl(rax, 0x80000001); 414 __ cpuid(); 415 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 416 __ movl(Address(rsi, 0), rax); 417 __ movl(Address(rsi, 4), rbx); 418 __ movl(Address(rsi, 8), rcx); 419 __ movl(Address(rsi,12), rdx); 420 421 #if defined(_LP64) 422 // 423 // Check if OS has enabled XGETBV instruction to access XCR0 424 // (OSXSAVE feature flag) and CPU supports APX 425 // 426 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support 427 // and XCRO[19] bit for OS support to save/restore extended GPR state. 428 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); 429 __ movl(rax, 0x200000); 430 __ andl(rax, Address(rsi, 4)); 431 __ cmpl(rax, 0x200000); 432 __ jcc(Assembler::notEqual, vector_save_restore); 433 // check _cpuid_info.xem_xcr0_eax.bits.apx_f 434 __ movl(rax, 0x80000); 435 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f 436 __ cmpl(rax, 0x80000); 437 __ jcc(Assembler::notEqual, vector_save_restore); 438 439 #ifndef PRODUCT 440 bool save_apx = UseAPX; 441 VM_Version::set_apx_cpuFeatures(); 442 UseAPX = true; 443 __ mov64(r16, VM_Version::egpr_test_value()); 444 __ mov64(r31, VM_Version::egpr_test_value()); 445 __ xorl(rsi, rsi); 446 VM_Version::set_cpuinfo_segv_addr_apx(__ pc()); 447 // Generate SEGV 448 __ movl(rax, Address(rsi, 0)); 449 450 VM_Version::set_cpuinfo_cont_addr_apx(__ pc()); 451 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset()))); 452 __ movq(Address(rsi, 0), r16); 453 __ movq(Address(rsi, 8), r31); 454 455 UseAPX = save_apx; 456 #endif 457 #endif 458 __ bind(vector_save_restore); 459 // 460 // Check if OS has enabled XGETBV instruction to access XCR0 461 // (OSXSAVE feature flag) and CPU supports AVX 462 // 463 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 464 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 465 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 466 __ cmpl(rcx, 0x18000000); 467 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 468 469 __ movl(rax, 0x6); 470 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 471 __ cmpl(rax, 0x6); 472 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 473 474 // we need to bridge farther than imm8, so we use this island as a thunk 475 __ bind(done); 476 __ jmp(wrapup); 477 478 __ bind(start_simd_check); 479 // 480 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 481 // registers are not restored after a signal processing. 482 // Generate SEGV here (reference through null) 483 // and check upper YMM/ZMM bits after it. 484 // 485 int saved_useavx = UseAVX; 486 int saved_usesse = UseSSE; 487 488 // If UseAVX is uninitialized or is set by the user to include EVEX 489 if (use_evex) { 490 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 491 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 492 __ movl(rax, 0x10000); 493 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 494 __ cmpl(rax, 0x10000); 495 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 496 // check _cpuid_info.xem_xcr0_eax.bits.opmask 497 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 498 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 499 __ movl(rax, 0xE0); 500 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 501 __ cmpl(rax, 0xE0); 502 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 503 504 if (FLAG_IS_DEFAULT(UseAVX)) { 505 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 506 __ movl(rax, Address(rsi, 0)); 507 __ cmpl(rax, 0x50654); // If it is Skylake 508 __ jcc(Assembler::equal, legacy_setup); 509 } 510 // EVEX setup: run in lowest evex mode 511 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 512 UseAVX = 3; 513 UseSSE = 2; 514 #ifdef _WINDOWS 515 // xmm5-xmm15 are not preserved by caller on windows 516 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 517 __ subptr(rsp, 64); 518 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 519 __ subptr(rsp, 64); 520 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 521 __ subptr(rsp, 64); 522 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 523 #endif // _WINDOWS 524 525 // load value into all 64 bytes of zmm7 register 526 __ movl(rcx, VM_Version::ymm_test_value()); 527 __ movdl(xmm0, rcx); 528 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 529 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 530 #ifdef _LP64 531 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 532 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 533 #endif 534 VM_Version::clean_cpuFeatures(); 535 __ jmp(save_restore_except); 536 } 537 538 __ bind(legacy_setup); 539 // AVX setup 540 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 541 UseAVX = 1; 542 UseSSE = 2; 543 #ifdef _WINDOWS 544 __ subptr(rsp, 32); 545 __ vmovdqu(Address(rsp, 0), xmm7); 546 __ subptr(rsp, 32); 547 __ vmovdqu(Address(rsp, 0), xmm8); 548 __ subptr(rsp, 32); 549 __ vmovdqu(Address(rsp, 0), xmm15); 550 #endif // _WINDOWS 551 552 // load value into all 32 bytes of ymm7 register 553 __ movl(rcx, VM_Version::ymm_test_value()); 554 555 __ movdl(xmm0, rcx); 556 __ pshufd(xmm0, xmm0, 0x00); 557 __ vinsertf128_high(xmm0, xmm0); 558 __ vmovdqu(xmm7, xmm0); 559 #ifdef _LP64 560 __ vmovdqu(xmm8, xmm0); 561 __ vmovdqu(xmm15, xmm0); 562 #endif 563 VM_Version::clean_cpuFeatures(); 564 565 __ bind(save_restore_except); 566 __ xorl(rsi, rsi); 567 VM_Version::set_cpuinfo_segv_addr(__ pc()); 568 // Generate SEGV 569 __ movl(rax, Address(rsi, 0)); 570 571 VM_Version::set_cpuinfo_cont_addr(__ pc()); 572 // Returns here after signal. Save xmm0 to check it later. 573 574 // If UseAVX is uninitialized or is set by the user to include EVEX 575 if (use_evex) { 576 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 577 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 578 __ movl(rax, 0x10000); 579 __ andl(rax, Address(rsi, 4)); 580 __ cmpl(rax, 0x10000); 581 __ jcc(Assembler::notEqual, legacy_save_restore); 582 // check _cpuid_info.xem_xcr0_eax.bits.opmask 583 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 584 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 585 __ movl(rax, 0xE0); 586 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 587 __ cmpl(rax, 0xE0); 588 __ jcc(Assembler::notEqual, legacy_save_restore); 589 590 if (FLAG_IS_DEFAULT(UseAVX)) { 591 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 592 __ movl(rax, Address(rsi, 0)); 593 __ cmpl(rax, 0x50654); // If it is Skylake 594 __ jcc(Assembler::equal, legacy_save_restore); 595 } 596 // EVEX check: run in lowest evex mode 597 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 598 UseAVX = 3; 599 UseSSE = 2; 600 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 601 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 602 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 603 #ifdef _LP64 604 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 605 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 606 #endif 607 608 #ifdef _WINDOWS 609 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 610 __ addptr(rsp, 64); 611 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 612 __ addptr(rsp, 64); 613 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 614 __ addptr(rsp, 64); 615 #endif // _WINDOWS 616 generate_vzeroupper(wrapup); 617 VM_Version::clean_cpuFeatures(); 618 UseAVX = saved_useavx; 619 UseSSE = saved_usesse; 620 __ jmp(wrapup); 621 } 622 623 __ bind(legacy_save_restore); 624 // AVX check 625 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 626 UseAVX = 1; 627 UseSSE = 2; 628 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 629 __ vmovdqu(Address(rsi, 0), xmm0); 630 __ vmovdqu(Address(rsi, 32), xmm7); 631 #ifdef _LP64 632 __ vmovdqu(Address(rsi, 64), xmm8); 633 __ vmovdqu(Address(rsi, 96), xmm15); 634 #endif 635 636 #ifdef _WINDOWS 637 __ vmovdqu(xmm15, Address(rsp, 0)); 638 __ addptr(rsp, 32); 639 __ vmovdqu(xmm8, Address(rsp, 0)); 640 __ addptr(rsp, 32); 641 __ vmovdqu(xmm7, Address(rsp, 0)); 642 __ addptr(rsp, 32); 643 #endif // _WINDOWS 644 645 generate_vzeroupper(wrapup); 646 VM_Version::clean_cpuFeatures(); 647 UseAVX = saved_useavx; 648 UseSSE = saved_usesse; 649 650 __ bind(wrapup); 651 __ popf(); 652 __ pop(rsi); 653 __ pop(rbx); 654 __ pop(rbp); 655 __ ret(0); 656 657 # undef __ 658 659 return start; 660 }; 661 void generate_vzeroupper(Label& L_wrapup) { 662 # define __ _masm-> 663 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 664 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 665 __ jcc(Assembler::notEqual, L_wrapup); 666 __ movl(rcx, 0x0FFF0FF0); 667 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 668 __ andl(rcx, Address(rsi, 0)); 669 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 670 __ jcc(Assembler::equal, L_wrapup); 671 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 672 __ jcc(Assembler::equal, L_wrapup); 673 // vzeroupper() will use a pre-computed instruction sequence that we 674 // can't compute until after we've determined CPU capabilities. Use 675 // uncached variant here directly to be able to bootstrap correctly 676 __ vzeroupper_uncached(); 677 # undef __ 678 } 679 address generate_detect_virt() { 680 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 681 # define __ _masm-> 682 683 address start = __ pc(); 684 685 // Evacuate callee-saved registers 686 __ push(rbp); 687 __ push(rbx); 688 __ push(rsi); // for Windows 689 690 #ifdef _LP64 691 __ mov(rax, c_rarg0); // CPUID leaf 692 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 693 #else 694 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 695 __ movptr(rsi, Address(rsp, 20)); // register array address 696 #endif 697 698 __ cpuid(); 699 700 // Store result to register array 701 __ movl(Address(rsi, 0), rax); 702 __ movl(Address(rsi, 4), rbx); 703 __ movl(Address(rsi, 8), rcx); 704 __ movl(Address(rsi, 12), rdx); 705 706 // Epilogue 707 __ pop(rsi); 708 __ pop(rbx); 709 __ pop(rbp); 710 __ ret(0); 711 712 # undef __ 713 714 return start; 715 }; 716 717 718 address generate_getCPUIDBrandString(void) { 719 // Flags to test CPU type. 720 const uint32_t HS_EFL_AC = 0x40000; 721 const uint32_t HS_EFL_ID = 0x200000; 722 // Values for when we don't have a CPUID instruction. 723 const int CPU_FAMILY_SHIFT = 8; 724 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 725 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 726 727 Label detect_486, cpu486, detect_586, done, ext_cpuid; 728 729 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 730 # define __ _masm-> 731 732 address start = __ pc(); 733 734 // 735 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 736 // 737 // LP64: rcx and rdx are first and second argument registers on windows 738 739 __ push(rbp); 740 #ifdef _LP64 741 __ mov(rbp, c_rarg0); // cpuid_info address 742 #else 743 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 744 #endif 745 __ push(rbx); 746 __ push(rsi); 747 __ pushf(); // preserve rbx, and flags 748 __ pop(rax); 749 __ push(rax); 750 __ mov(rcx, rax); 751 // 752 // if we are unable to change the AC flag, we have a 386 753 // 754 __ xorl(rax, HS_EFL_AC); 755 __ push(rax); 756 __ popf(); 757 __ pushf(); 758 __ pop(rax); 759 __ cmpptr(rax, rcx); 760 __ jccb(Assembler::notEqual, detect_486); 761 762 __ movl(rax, CPU_FAMILY_386); 763 __ jmp(done); 764 765 // 766 // If we are unable to change the ID flag, we have a 486 which does 767 // not support the "cpuid" instruction. 768 // 769 __ bind(detect_486); 770 __ mov(rax, rcx); 771 __ xorl(rax, HS_EFL_ID); 772 __ push(rax); 773 __ popf(); 774 __ pushf(); 775 __ pop(rax); 776 __ cmpptr(rcx, rax); 777 __ jccb(Assembler::notEqual, detect_586); 778 779 __ bind(cpu486); 780 __ movl(rax, CPU_FAMILY_486); 781 __ jmp(done); 782 783 // 784 // At this point, we have a chip which supports the "cpuid" instruction 785 // 786 __ bind(detect_586); 787 __ xorl(rax, rax); 788 __ cpuid(); 789 __ orl(rax, rax); 790 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 791 // value of at least 1, we give up and 792 // assume a 486 793 794 // 795 // Extended cpuid(0x80000000) for processor brand string detection 796 // 797 __ bind(ext_cpuid); 798 __ movl(rax, CPUID_EXTENDED_FN); 799 __ cpuid(); 800 __ cmpl(rax, CPUID_EXTENDED_FN_4); 801 __ jcc(Assembler::below, done); 802 803 // 804 // Extended cpuid(0x80000002) // first 16 bytes in brand string 805 // 806 __ movl(rax, CPUID_EXTENDED_FN_2); 807 __ cpuid(); 808 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 809 __ movl(Address(rsi, 0), rax); 810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 811 __ movl(Address(rsi, 0), rbx); 812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 813 __ movl(Address(rsi, 0), rcx); 814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 815 __ movl(Address(rsi,0), rdx); 816 817 // 818 // Extended cpuid(0x80000003) // next 16 bytes in brand string 819 // 820 __ movl(rax, CPUID_EXTENDED_FN_3); 821 __ cpuid(); 822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 823 __ movl(Address(rsi, 0), rax); 824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 825 __ movl(Address(rsi, 0), rbx); 826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 827 __ movl(Address(rsi, 0), rcx); 828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 829 __ movl(Address(rsi,0), rdx); 830 831 // 832 // Extended cpuid(0x80000004) // last 16 bytes in brand string 833 // 834 __ movl(rax, CPUID_EXTENDED_FN_4); 835 __ cpuid(); 836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 837 __ movl(Address(rsi, 0), rax); 838 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 839 __ movl(Address(rsi, 0), rbx); 840 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 841 __ movl(Address(rsi, 0), rcx); 842 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 843 __ movl(Address(rsi,0), rdx); 844 845 // 846 // return 847 // 848 __ bind(done); 849 __ popf(); 850 __ pop(rsi); 851 __ pop(rbx); 852 __ pop(rbp); 853 __ ret(0); 854 855 # undef __ 856 857 return start; 858 }; 859 }; 860 861 void VM_Version::get_processor_features() { 862 863 _cpu = 4; // 486 by default 864 _model = 0; 865 _stepping = 0; 866 _features = 0; 867 _logical_processors_per_package = 1; 868 // i486 internal cache is both I&D and has a 16-byte line size 869 _L1_data_cache_line_size = 16; 870 871 // Get raw processor info 872 873 get_cpu_info_stub(&_cpuid_info); 874 875 assert_is_initialized(); 876 _cpu = extended_cpu_family(); 877 _model = extended_cpu_model(); 878 _stepping = cpu_stepping(); 879 880 if (cpu_family() > 4) { // it supports CPUID 881 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 882 _cpu_features = _features; // Preserve features 883 // Logical processors are only available on P4s and above, 884 // and only if hyperthreading is available. 885 _logical_processors_per_package = logical_processor_count(); 886 _L1_data_cache_line_size = L1_line_size(); 887 } 888 889 // xchg and xadd instructions 890 _supports_atomic_getset4 = true; 891 _supports_atomic_getadd4 = true; 892 LP64_ONLY(_supports_atomic_getset8 = true); 893 LP64_ONLY(_supports_atomic_getadd8 = true); 894 895 #ifdef _LP64 896 // OS should support SSE for x64 and hardware should support at least SSE2. 897 if (!VM_Version::supports_sse2()) { 898 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 899 } 900 // in 64 bit the use of SSE2 is the minimum 901 if (UseSSE < 2) UseSSE = 2; 902 #endif 903 904 #ifdef AMD64 905 // flush_icache_stub have to be generated first. 906 // That is why Icache line size is hard coded in ICache class, 907 // see icache_x86.hpp. It is also the reason why we can't use 908 // clflush instruction in 32-bit VM since it could be running 909 // on CPU which does not support it. 910 // 911 // The only thing we can do is to verify that flushed 912 // ICache::line_size has correct value. 913 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 914 // clflush_size is size in quadwords (8 bytes). 915 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 916 #endif 917 918 #ifdef _LP64 919 // assigning this field effectively enables Unsafe.writebackMemory() 920 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 921 // that is only implemented on x86_64 and only if the OS plays ball 922 if (os::supports_map_sync()) { 923 // publish data cache line flush size to generic field, otherwise 924 // let if default to zero thereby disabling writeback 925 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 926 } 927 #endif 928 929 // Check if processor has Intel Ecore 930 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 931 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || 932 _model == 0xCC || _model == 0xDD)) { 933 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 934 } 935 936 if (UseSSE < 4) { 937 _features &= ~CPU_SSE4_1; 938 _features &= ~CPU_SSE4_2; 939 } 940 941 if (UseSSE < 3) { 942 _features &= ~CPU_SSE3; 943 _features &= ~CPU_SSSE3; 944 _features &= ~CPU_SSE4A; 945 } 946 947 if (UseSSE < 2) 948 _features &= ~CPU_SSE2; 949 950 if (UseSSE < 1) 951 _features &= ~CPU_SSE; 952 953 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 954 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 955 UseAVX = 0; 956 } 957 958 // UseSSE is set to the smaller of what hardware supports and what 959 // the command line requires. I.e., you cannot set UseSSE to 2 on 960 // older Pentiums which do not support it. 961 int use_sse_limit = 0; 962 if (UseSSE > 0) { 963 if (UseSSE > 3 && supports_sse4_1()) { 964 use_sse_limit = 4; 965 } else if (UseSSE > 2 && supports_sse3()) { 966 use_sse_limit = 3; 967 } else if (UseSSE > 1 && supports_sse2()) { 968 use_sse_limit = 2; 969 } else if (UseSSE > 0 && supports_sse()) { 970 use_sse_limit = 1; 971 } else { 972 use_sse_limit = 0; 973 } 974 } 975 if (FLAG_IS_DEFAULT(UseSSE)) { 976 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 977 } else if (UseSSE > use_sse_limit) { 978 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 979 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 980 } 981 982 // first try initial setting and detect what we can support 983 int use_avx_limit = 0; 984 if (UseAVX > 0) { 985 if (UseSSE < 4) { 986 // Don't use AVX if SSE is unavailable or has been disabled. 987 use_avx_limit = 0; 988 } else if (UseAVX > 2 && supports_evex()) { 989 use_avx_limit = 3; 990 } else if (UseAVX > 1 && supports_avx2()) { 991 use_avx_limit = 2; 992 } else if (UseAVX > 0 && supports_avx()) { 993 use_avx_limit = 1; 994 } else { 995 use_avx_limit = 0; 996 } 997 } 998 if (FLAG_IS_DEFAULT(UseAVX)) { 999 // Don't use AVX-512 on older Skylakes unless explicitly requested. 1000 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 1001 FLAG_SET_DEFAULT(UseAVX, 2); 1002 } else { 1003 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1004 } 1005 } 1006 1007 if (UseAVX > use_avx_limit) { 1008 if (UseSSE < 4) { 1009 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 1010 } else { 1011 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 1012 } 1013 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 1014 } 1015 1016 if (UseAVX < 3) { 1017 _features &= ~CPU_AVX512F; 1018 _features &= ~CPU_AVX512DQ; 1019 _features &= ~CPU_AVX512CD; 1020 _features &= ~CPU_AVX512BW; 1021 _features &= ~CPU_AVX512VL; 1022 _features &= ~CPU_AVX512_VPOPCNTDQ; 1023 _features &= ~CPU_AVX512_VPCLMULQDQ; 1024 _features &= ~CPU_AVX512_VAES; 1025 _features &= ~CPU_AVX512_VNNI; 1026 _features &= ~CPU_AVX512_VBMI; 1027 _features &= ~CPU_AVX512_VBMI2; 1028 _features &= ~CPU_AVX512_BITALG; 1029 _features &= ~CPU_AVX512_IFMA; 1030 _features &= ~CPU_APX_F; 1031 _features &= ~CPU_AVX512_FP16; 1032 } 1033 1034 // Currently APX support is only enabled for targets supporting AVX512VL feature. 1035 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl(); 1036 if (UseAPX && !apx_supported) { 1037 warning("UseAPX is not supported on this CPU, setting it to false"); 1038 FLAG_SET_DEFAULT(UseAPX, false); 1039 } else if (FLAG_IS_DEFAULT(UseAPX)) { 1040 FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false); 1041 } 1042 1043 if (!UseAPX) { 1044 _features &= ~CPU_APX_F; 1045 } 1046 1047 if (UseAVX < 2) { 1048 _features &= ~CPU_AVX2; 1049 _features &= ~CPU_AVX_IFMA; 1050 } 1051 1052 if (UseAVX < 1) { 1053 _features &= ~CPU_AVX; 1054 _features &= ~CPU_VZEROUPPER; 1055 _features &= ~CPU_F16C; 1056 _features &= ~CPU_SHA512; 1057 } 1058 1059 if (logical_processors_per_package() == 1) { 1060 // HT processor could be installed on a system which doesn't support HT. 1061 _features &= ~CPU_HT; 1062 } 1063 1064 if (is_intel()) { // Intel cpus specific settings 1065 if (is_knights_family()) { 1066 _features &= ~CPU_VZEROUPPER; 1067 _features &= ~CPU_AVX512BW; 1068 _features &= ~CPU_AVX512VL; 1069 _features &= ~CPU_AVX512DQ; 1070 _features &= ~CPU_AVX512_VNNI; 1071 _features &= ~CPU_AVX512_VAES; 1072 _features &= ~CPU_AVX512_VPOPCNTDQ; 1073 _features &= ~CPU_AVX512_VPCLMULQDQ; 1074 _features &= ~CPU_AVX512_VBMI; 1075 _features &= ~CPU_AVX512_VBMI2; 1076 _features &= ~CPU_CLWB; 1077 _features &= ~CPU_FLUSHOPT; 1078 _features &= ~CPU_GFNI; 1079 _features &= ~CPU_AVX512_BITALG; 1080 _features &= ~CPU_AVX512_IFMA; 1081 _features &= ~CPU_AVX_IFMA; 1082 _features &= ~CPU_AVX512_FP16; 1083 } 1084 } 1085 1086 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1087 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1088 } else { 1089 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1090 } 1091 1092 char buf[1024]; 1093 int res = jio_snprintf( 1094 buf, sizeof(buf), 1095 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1096 cores_per_cpu(), threads_per_core(), 1097 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1098 assert(res > 0, "not enough temporary space allocated"); 1099 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1100 1101 _features_string = os::strdup(buf); 1102 1103 // Use AES instructions if available. 1104 if (supports_aes()) { 1105 if (FLAG_IS_DEFAULT(UseAES)) { 1106 FLAG_SET_DEFAULT(UseAES, true); 1107 } 1108 if (!UseAES) { 1109 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1110 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1111 } 1112 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1113 } else { 1114 if (UseSSE > 2) { 1115 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1116 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1117 } 1118 } else { 1119 // The AES intrinsic stubs require AES instruction support (of course) 1120 // but also require sse3 mode or higher for instructions it use. 1121 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1122 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1123 } 1124 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1125 } 1126 1127 // --AES-CTR begins-- 1128 if (!UseAESIntrinsics) { 1129 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1130 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1131 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1132 } 1133 } else { 1134 if (supports_sse4_1()) { 1135 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1136 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1137 } 1138 } else { 1139 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1140 // but also require sse4.1 mode or higher for instructions it use. 1141 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1142 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1143 } 1144 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1145 } 1146 } 1147 // --AES-CTR ends-- 1148 } 1149 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1150 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1151 warning("AES instructions are not available on this CPU"); 1152 FLAG_SET_DEFAULT(UseAES, false); 1153 } 1154 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1155 warning("AES intrinsics are not available on this CPU"); 1156 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1157 } 1158 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1159 warning("AES-CTR intrinsics are not available on this CPU"); 1160 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1161 } 1162 } 1163 1164 // Use CLMUL instructions if available. 1165 if (supports_clmul()) { 1166 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1167 UseCLMUL = true; 1168 } 1169 } else if (UseCLMUL) { 1170 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1171 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1172 FLAG_SET_DEFAULT(UseCLMUL, false); 1173 } 1174 1175 if (UseCLMUL && (UseSSE > 2)) { 1176 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1177 UseCRC32Intrinsics = true; 1178 } 1179 } else if (UseCRC32Intrinsics) { 1180 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1181 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1182 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1183 } 1184 1185 #ifdef _LP64 1186 if (supports_avx2()) { 1187 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1188 UseAdler32Intrinsics = true; 1189 } 1190 } else if (UseAdler32Intrinsics) { 1191 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1192 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1193 } 1194 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1195 } 1196 #else 1197 if (UseAdler32Intrinsics) { 1198 warning("Adler32Intrinsics not available on this CPU."); 1199 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1200 } 1201 #endif 1202 1203 if (supports_sse4_2() && supports_clmul()) { 1204 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1205 UseCRC32CIntrinsics = true; 1206 } 1207 } else if (UseCRC32CIntrinsics) { 1208 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1209 warning("CRC32C intrinsics are not available on this CPU"); 1210 } 1211 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1212 } 1213 1214 // GHASH/GCM intrinsics 1215 if (UseCLMUL && (UseSSE > 2)) { 1216 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1217 UseGHASHIntrinsics = true; 1218 } 1219 } else if (UseGHASHIntrinsics) { 1220 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1221 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1222 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1223 } 1224 1225 #ifdef _LP64 1226 // ChaCha20 Intrinsics 1227 // As long as the system supports AVX as a baseline we can do a 1228 // SIMD-enabled block function. StubGenerator makes the determination 1229 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1230 // version. 1231 if (UseAVX >= 1) { 1232 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1233 UseChaCha20Intrinsics = true; 1234 } 1235 } else if (UseChaCha20Intrinsics) { 1236 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1237 warning("ChaCha20 intrinsic requires AVX instructions"); 1238 } 1239 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1240 } 1241 #else 1242 // No support currently for ChaCha20 intrinsics on 32-bit platforms 1243 if (UseChaCha20Intrinsics) { 1244 warning("ChaCha20 intrinsics are not available on this CPU."); 1245 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1246 } 1247 #endif // _LP64 1248 1249 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1250 if (UseAVX >= 2) { 1251 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1252 UseBASE64Intrinsics = true; 1253 } 1254 } else if (UseBASE64Intrinsics) { 1255 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1256 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1257 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1258 } 1259 1260 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1261 if (FLAG_IS_DEFAULT(UseFMA)) { 1262 UseFMA = true; 1263 } 1264 } else if (UseFMA) { 1265 warning("FMA instructions are not available on this CPU"); 1266 FLAG_SET_DEFAULT(UseFMA, false); 1267 } 1268 1269 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1270 UseMD5Intrinsics = true; 1271 } 1272 1273 if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { 1274 if (FLAG_IS_DEFAULT(UseSHA)) { 1275 UseSHA = true; 1276 } 1277 } else if (UseSHA) { 1278 warning("SHA instructions are not available on this CPU"); 1279 FLAG_SET_DEFAULT(UseSHA, false); 1280 } 1281 1282 if (supports_sha() && supports_sse4_1() && UseSHA) { 1283 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1284 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1285 } 1286 } else if (UseSHA1Intrinsics) { 1287 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1288 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1289 } 1290 1291 if (supports_sse4_1() && UseSHA) { 1292 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1293 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1294 } 1295 } else if (UseSHA256Intrinsics) { 1296 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1297 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1298 } 1299 1300 #ifdef _LP64 1301 // These are only supported on 64-bit 1302 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { 1303 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1304 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1305 } 1306 } else 1307 #endif 1308 if (UseSHA512Intrinsics) { 1309 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1310 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1311 } 1312 1313 #ifdef _LP64 1314 if (supports_evex() && supports_avx512bw()) { 1315 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { 1316 UseSHA3Intrinsics = true; 1317 } 1318 } else 1319 #endif 1320 if (UseSHA3Intrinsics) { 1321 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1322 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1323 } 1324 1325 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1326 FLAG_SET_DEFAULT(UseSHA, false); 1327 } 1328 1329 #ifdef COMPILER2 1330 if (UseFPUForSpilling) { 1331 if (UseSSE < 2) { 1332 // Only supported with SSE2+ 1333 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1334 } 1335 } 1336 #endif 1337 1338 #if COMPILER2_OR_JVMCI 1339 int max_vector_size = 0; 1340 if (UseSSE < 2) { 1341 // Vectors (in XMM) are only supported with SSE2+ 1342 // SSE is always 2 on x64. 1343 max_vector_size = 0; 1344 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1345 // 16 byte vectors (in XMM) are supported with SSE2+ 1346 max_vector_size = 16; 1347 } else if (UseAVX == 1 || UseAVX == 2) { 1348 // 32 bytes vectors (in YMM) are only supported with AVX+ 1349 max_vector_size = 32; 1350 } else if (UseAVX > 2) { 1351 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1352 max_vector_size = 64; 1353 } 1354 1355 #ifdef _LP64 1356 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1357 #else 1358 int min_vector_size = 0; 1359 #endif 1360 1361 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1362 if (MaxVectorSize < min_vector_size) { 1363 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1364 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1365 } 1366 if (MaxVectorSize > max_vector_size) { 1367 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1368 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1369 } 1370 if (!is_power_of_2(MaxVectorSize)) { 1371 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1372 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1373 } 1374 } else { 1375 // If default, use highest supported configuration 1376 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1377 } 1378 1379 #if defined(COMPILER2) && defined(ASSERT) 1380 if (MaxVectorSize > 0) { 1381 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1382 tty->print_cr("State of YMM registers after signal handle:"); 1383 int nreg = 2 LP64_ONLY(+2); 1384 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1385 for (int i = 0; i < nreg; i++) { 1386 tty->print("YMM%s:", ymm_name[i]); 1387 for (int j = 7; j >=0; j--) { 1388 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1389 } 1390 tty->cr(); 1391 } 1392 } 1393 } 1394 #endif // COMPILER2 && ASSERT 1395 1396 #ifdef _LP64 1397 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1398 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1399 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1400 } 1401 } else 1402 #endif 1403 if (UsePoly1305Intrinsics) { 1404 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1405 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1406 } 1407 1408 #ifdef _LP64 1409 if (supports_avx512ifma() && supports_avx512vlbw()) { 1410 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1411 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1412 } 1413 } else 1414 #endif 1415 if (UseIntPolyIntrinsics) { 1416 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1417 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1418 } 1419 1420 #ifdef _LP64 1421 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1422 UseMultiplyToLenIntrinsic = true; 1423 } 1424 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1425 UseSquareToLenIntrinsic = true; 1426 } 1427 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1428 UseMulAddIntrinsic = true; 1429 } 1430 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1431 UseMontgomeryMultiplyIntrinsic = true; 1432 } 1433 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1434 UseMontgomerySquareIntrinsic = true; 1435 } 1436 #else 1437 if (UseMultiplyToLenIntrinsic) { 1438 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1439 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1440 } 1441 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1442 } 1443 if (UseMontgomeryMultiplyIntrinsic) { 1444 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1445 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1446 } 1447 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1448 } 1449 if (UseMontgomerySquareIntrinsic) { 1450 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1451 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1452 } 1453 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1454 } 1455 if (UseSquareToLenIntrinsic) { 1456 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1457 warning("squareToLen intrinsic is not available in 32-bit VM"); 1458 } 1459 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1460 } 1461 if (UseMulAddIntrinsic) { 1462 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1463 warning("mulAdd intrinsic is not available in 32-bit VM"); 1464 } 1465 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1466 } 1467 #endif // _LP64 1468 #endif // COMPILER2_OR_JVMCI 1469 1470 // On new cpus instructions which update whole XMM register should be used 1471 // to prevent partial register stall due to dependencies on high half. 1472 // 1473 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1474 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1475 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1476 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1477 1478 1479 if (is_zx()) { // ZX cpus specific settings 1480 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1481 UseStoreImmI16 = false; // don't use it on ZX cpus 1482 } 1483 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1484 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1485 // Use it on all ZX cpus 1486 UseAddressNop = true; 1487 } 1488 } 1489 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1490 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1491 } 1492 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1493 if (supports_sse3()) { 1494 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1495 } else { 1496 UseXmmRegToRegMoveAll = false; 1497 } 1498 } 1499 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1500 #ifdef COMPILER2 1501 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1502 // For new ZX cpus do the next optimization: 1503 // don't align the beginning of a loop if there are enough instructions 1504 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1505 // in current fetch line (OptoLoopAlignment) or the padding 1506 // is big (> MaxLoopPad). 1507 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1508 // generated NOP instructions. 11 is the largest size of one 1509 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1510 MaxLoopPad = 11; 1511 } 1512 #endif // COMPILER2 1513 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1514 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1515 } 1516 if (supports_sse4_2()) { // new ZX cpus 1517 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1518 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1519 } 1520 } 1521 if (supports_sse4_2()) { 1522 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1523 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1524 } 1525 } else { 1526 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1527 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1528 } 1529 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1530 } 1531 } 1532 1533 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1534 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1535 } 1536 } 1537 1538 if (is_amd_family()) { // AMD cpus specific settings 1539 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1540 // Use it on new AMD cpus starting from Opteron. 1541 UseAddressNop = true; 1542 } 1543 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1544 // Use it on new AMD cpus starting from Opteron. 1545 UseNewLongLShift = true; 1546 } 1547 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1548 if (supports_sse4a()) { 1549 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1550 } else { 1551 UseXmmLoadAndClearUpper = false; 1552 } 1553 } 1554 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1555 if (supports_sse4a()) { 1556 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1557 } else { 1558 UseXmmRegToRegMoveAll = false; 1559 } 1560 } 1561 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1562 if (supports_sse4a()) { 1563 UseXmmI2F = true; 1564 } else { 1565 UseXmmI2F = false; 1566 } 1567 } 1568 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1569 if (supports_sse4a()) { 1570 UseXmmI2D = true; 1571 } else { 1572 UseXmmI2D = false; 1573 } 1574 } 1575 if (supports_sse4_2()) { 1576 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1577 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1578 } 1579 } else { 1580 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1581 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1582 } 1583 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1584 } 1585 1586 // some defaults for AMD family 15h 1587 if (cpu_family() == 0x15) { 1588 // On family 15h processors default is no sw prefetch 1589 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1590 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1591 } 1592 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1593 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1594 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1595 } 1596 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1597 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1598 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1599 } 1600 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1601 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1602 } 1603 } 1604 1605 #ifdef COMPILER2 1606 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1607 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1608 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1609 } 1610 #endif // COMPILER2 1611 1612 // Some defaults for AMD family >= 17h && Hygon family 18h 1613 if (cpu_family() >= 0x17) { 1614 // On family >=17h processors use XMM and UnalignedLoadStores 1615 // for Array Copy 1616 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1617 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1618 } 1619 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1620 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1621 } 1622 #ifdef COMPILER2 1623 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1624 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1625 } 1626 #endif 1627 } 1628 } 1629 1630 if (is_intel()) { // Intel cpus specific settings 1631 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1632 UseStoreImmI16 = false; // don't use it on Intel cpus 1633 } 1634 if (cpu_family() == 6 || cpu_family() == 15) { 1635 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1636 // Use it on all Intel cpus starting from PentiumPro 1637 UseAddressNop = true; 1638 } 1639 } 1640 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1641 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1642 } 1643 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1644 if (supports_sse3()) { 1645 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1646 } else { 1647 UseXmmRegToRegMoveAll = false; 1648 } 1649 } 1650 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1651 #ifdef COMPILER2 1652 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1653 // For new Intel cpus do the next optimization: 1654 // don't align the beginning of a loop if there are enough instructions 1655 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1656 // in current fetch line (OptoLoopAlignment) or the padding 1657 // is big (> MaxLoopPad). 1658 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1659 // generated NOP instructions. 11 is the largest size of one 1660 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1661 MaxLoopPad = 11; 1662 } 1663 #endif // COMPILER2 1664 1665 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1666 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1667 } 1668 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1669 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1670 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1671 } 1672 } 1673 if (supports_sse4_2()) { 1674 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1675 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1676 } 1677 } else { 1678 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1679 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1680 } 1681 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1682 } 1683 } 1684 if (is_atom_family() || is_knights_family()) { 1685 #ifdef COMPILER2 1686 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1687 OptoScheduling = true; 1688 } 1689 #endif 1690 if (supports_sse4_2()) { // Silvermont 1691 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1692 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1693 } 1694 } 1695 if (FLAG_IS_DEFAULT(UseIncDec)) { 1696 FLAG_SET_DEFAULT(UseIncDec, false); 1697 } 1698 } 1699 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1700 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1701 } 1702 #ifdef COMPILER2 1703 if (UseAVX > 2) { 1704 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1705 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1706 ArrayOperationPartialInlineSize != 0 && 1707 ArrayOperationPartialInlineSize != 16 && 1708 ArrayOperationPartialInlineSize != 32 && 1709 ArrayOperationPartialInlineSize != 64)) { 1710 int inline_size = 0; 1711 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1712 inline_size = 64; 1713 } else if (MaxVectorSize >= 32) { 1714 inline_size = 32; 1715 } else if (MaxVectorSize >= 16) { 1716 inline_size = 16; 1717 } 1718 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1719 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1720 } 1721 ArrayOperationPartialInlineSize = inline_size; 1722 } 1723 1724 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1725 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1726 if (ArrayOperationPartialInlineSize) { 1727 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize); 1728 } else { 1729 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize); 1730 } 1731 } 1732 } 1733 #endif 1734 } 1735 1736 #ifdef COMPILER2 1737 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1738 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1739 OptimizeFill = false; 1740 } 1741 } 1742 #endif 1743 1744 #ifdef _LP64 1745 if (UseSSE42Intrinsics) { 1746 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1747 UseVectorizedMismatchIntrinsic = true; 1748 } 1749 } else if (UseVectorizedMismatchIntrinsic) { 1750 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1751 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1752 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1753 } 1754 if (UseAVX >= 2) { 1755 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1756 } else if (UseVectorizedHashCodeIntrinsic) { 1757 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1758 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1759 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1760 } 1761 #else 1762 if (UseVectorizedMismatchIntrinsic) { 1763 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1764 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1765 } 1766 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1767 } 1768 if (UseVectorizedHashCodeIntrinsic) { 1769 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1770 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1771 } 1772 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1773 } 1774 #endif // _LP64 1775 1776 // Use count leading zeros count instruction if available. 1777 if (supports_lzcnt()) { 1778 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1779 UseCountLeadingZerosInstruction = true; 1780 } 1781 } else if (UseCountLeadingZerosInstruction) { 1782 warning("lzcnt instruction is not available on this CPU"); 1783 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1784 } 1785 1786 // Use count trailing zeros instruction if available 1787 if (supports_bmi1()) { 1788 // tzcnt does not require VEX prefix 1789 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1790 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1791 // Don't use tzcnt if BMI1 is switched off on command line. 1792 UseCountTrailingZerosInstruction = false; 1793 } else { 1794 UseCountTrailingZerosInstruction = true; 1795 } 1796 } 1797 } else if (UseCountTrailingZerosInstruction) { 1798 warning("tzcnt instruction is not available on this CPU"); 1799 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1800 } 1801 1802 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1803 // VEX prefix is generated only when AVX > 0. 1804 if (supports_bmi1() && supports_avx()) { 1805 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1806 UseBMI1Instructions = true; 1807 } 1808 } else if (UseBMI1Instructions) { 1809 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1810 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1811 } 1812 1813 if (supports_bmi2() && supports_avx()) { 1814 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1815 UseBMI2Instructions = true; 1816 } 1817 } else if (UseBMI2Instructions) { 1818 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1819 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1820 } 1821 1822 // Use population count instruction if available. 1823 if (supports_popcnt()) { 1824 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1825 UsePopCountInstruction = true; 1826 } 1827 } else if (UsePopCountInstruction) { 1828 warning("POPCNT instruction is not available on this CPU"); 1829 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1830 } 1831 1832 // Use fast-string operations if available. 1833 if (supports_erms()) { 1834 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1835 UseFastStosb = true; 1836 } 1837 } else if (UseFastStosb) { 1838 warning("fast-string operations are not available on this CPU"); 1839 FLAG_SET_DEFAULT(UseFastStosb, false); 1840 } 1841 1842 // For AMD Processors use XMM/YMM MOVDQU instructions 1843 // for Object Initialization as default 1844 if (is_amd() && cpu_family() >= 0x19) { 1845 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1846 UseFastStosb = false; 1847 } 1848 } 1849 1850 #ifdef COMPILER2 1851 if (is_intel() && MaxVectorSize > 16) { 1852 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1853 UseFastStosb = false; 1854 } 1855 } 1856 #endif 1857 1858 // Use XMM/YMM MOVDQU instruction for Object Initialization 1859 if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { 1860 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1861 UseXMMForObjInit = true; 1862 } 1863 } else if (UseXMMForObjInit) { 1864 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1865 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1866 } 1867 1868 #ifdef COMPILER2 1869 if (FLAG_IS_DEFAULT(AlignVector)) { 1870 // Modern processors allow misaligned memory operations for vectors. 1871 AlignVector = !UseUnalignedLoadStores; 1872 } 1873 #endif // COMPILER2 1874 1875 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1876 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1877 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1878 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1879 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1880 } 1881 } 1882 1883 // Allocation prefetch settings 1884 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1885 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1886 (cache_line_size > AllocatePrefetchStepSize)) { 1887 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1888 } 1889 1890 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1891 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1892 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1893 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1894 } 1895 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1896 } 1897 1898 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1899 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1900 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1901 } 1902 1903 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1904 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1905 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1906 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1907 } 1908 #ifdef COMPILER2 1909 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1910 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1911 } 1912 #endif 1913 } 1914 1915 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1916 #ifdef COMPILER2 1917 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1918 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1919 } 1920 #endif 1921 } 1922 1923 #ifdef _LP64 1924 // Prefetch settings 1925 1926 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1927 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1928 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1929 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1930 1931 // gc copy/scan is disabled if prefetchw isn't supported, because 1932 // Prefetch::write emits an inlined prefetchw on Linux. 1933 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1934 // The used prefetcht0 instruction works for both amd64 and em64t. 1935 1936 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1937 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1938 } 1939 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1940 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1941 } 1942 #endif 1943 1944 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1945 (cache_line_size > ContendedPaddingWidth)) 1946 ContendedPaddingWidth = cache_line_size; 1947 1948 // This machine allows unaligned memory accesses 1949 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1950 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1951 } 1952 1953 #ifndef PRODUCT 1954 if (log_is_enabled(Info, os, cpu)) { 1955 LogStream ls(Log(os, cpu)::info()); 1956 outputStream* log = &ls; 1957 log->print_cr("Logical CPUs per core: %u", 1958 logical_processors_per_package()); 1959 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1960 log->print("UseSSE=%d", UseSSE); 1961 if (UseAVX > 0) { 1962 log->print(" UseAVX=%d", UseAVX); 1963 } 1964 if (UseAES) { 1965 log->print(" UseAES=1"); 1966 } 1967 #ifdef COMPILER2 1968 if (MaxVectorSize > 0) { 1969 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1970 } 1971 #endif 1972 log->cr(); 1973 log->print("Allocation"); 1974 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1975 log->print_cr(": no prefetching"); 1976 } else { 1977 log->print(" prefetching: "); 1978 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1979 log->print("PREFETCHW"); 1980 } else if (UseSSE >= 1) { 1981 if (AllocatePrefetchInstr == 0) { 1982 log->print("PREFETCHNTA"); 1983 } else if (AllocatePrefetchInstr == 1) { 1984 log->print("PREFETCHT0"); 1985 } else if (AllocatePrefetchInstr == 2) { 1986 log->print("PREFETCHT2"); 1987 } else if (AllocatePrefetchInstr == 3) { 1988 log->print("PREFETCHW"); 1989 } 1990 } 1991 if (AllocatePrefetchLines > 1) { 1992 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1993 } else { 1994 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1995 } 1996 } 1997 1998 if (PrefetchCopyIntervalInBytes > 0) { 1999 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 2000 } 2001 if (PrefetchScanIntervalInBytes > 0) { 2002 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 2003 } 2004 if (ContendedPaddingWidth > 0) { 2005 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 2006 } 2007 } 2008 #endif // !PRODUCT 2009 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 2010 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 2011 } 2012 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 2013 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 2014 } 2015 } 2016 2017 void VM_Version::print_platform_virtualization_info(outputStream* st) { 2018 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 2019 if (vrt == XenHVM) { 2020 st->print_cr("Xen hardware-assisted virtualization detected"); 2021 } else if (vrt == KVM) { 2022 st->print_cr("KVM virtualization detected"); 2023 } else if (vrt == VMWare) { 2024 st->print_cr("VMWare virtualization detected"); 2025 VirtualizationSupport::print_virtualization_info(st); 2026 } else if (vrt == HyperV) { 2027 st->print_cr("Hyper-V virtualization detected"); 2028 } else if (vrt == HyperVRole) { 2029 st->print_cr("Hyper-V role detected"); 2030 } 2031 } 2032 2033 bool VM_Version::compute_has_intel_jcc_erratum() { 2034 if (!is_intel_family_core()) { 2035 // Only Intel CPUs are affected. 2036 return false; 2037 } 2038 // The following table of affected CPUs is based on the following document released by Intel: 2039 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 2040 switch (_model) { 2041 case 0x8E: 2042 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2043 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 2044 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 2045 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 2046 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 2047 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2048 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2049 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 2050 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2051 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 2052 case 0x4E: 2053 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 2054 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 2055 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 2056 return _stepping == 0x3; 2057 case 0x55: 2058 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 2059 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 2060 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2061 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2062 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2063 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2064 return _stepping == 0x4 || _stepping == 0x7; 2065 case 0x5E: 2066 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2067 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2068 return _stepping == 0x3; 2069 case 0x9E: 2070 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2071 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2072 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2073 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2074 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2075 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2076 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2077 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2078 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2079 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2080 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2081 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2082 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2083 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2084 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2085 case 0xA5: 2086 // Not in Intel documentation. 2087 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2088 return true; 2089 case 0xA6: 2090 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2091 return _stepping == 0x0; 2092 case 0xAE: 2093 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2094 return _stepping == 0xA; 2095 default: 2096 // If we are running on another intel machine not recognized in the table, we are okay. 2097 return false; 2098 } 2099 } 2100 2101 // On Xen, the cpuid instruction returns 2102 // eax / registers[0]: Version of Xen 2103 // ebx / registers[1]: chars 'XenV' 2104 // ecx / registers[2]: chars 'MMXe' 2105 // edx / registers[3]: chars 'nVMM' 2106 // 2107 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2108 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2109 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2110 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2111 // 2112 // more information : 2113 // https://kb.vmware.com/s/article/1009458 2114 // 2115 void VM_Version::check_virtualizations() { 2116 uint32_t registers[4] = {0}; 2117 char signature[13] = {0}; 2118 2119 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2120 // from 0x40000000 until 0x40010000. 2121 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2122 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2123 detect_virt_stub(leaf, registers); 2124 memcpy(signature, ®isters[1], 12); 2125 2126 if (strncmp("VMwareVMware", signature, 12) == 0) { 2127 Abstract_VM_Version::_detected_virtualization = VMWare; 2128 // check for extended metrics from guestlib 2129 VirtualizationSupport::initialize(); 2130 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2131 Abstract_VM_Version::_detected_virtualization = HyperV; 2132 #ifdef _WINDOWS 2133 // CPUID leaf 0x40000007 is available to the root partition only. 2134 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2135 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2136 detect_virt_stub(0x40000007, registers); 2137 if ((registers[0] != 0x0) || 2138 (registers[1] != 0x0) || 2139 (registers[2] != 0x0) || 2140 (registers[3] != 0x0)) { 2141 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2142 } 2143 #endif 2144 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2145 Abstract_VM_Version::_detected_virtualization = KVM; 2146 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2147 Abstract_VM_Version::_detected_virtualization = XenHVM; 2148 } 2149 } 2150 } 2151 2152 #ifdef COMPILER2 2153 // Determine if it's running on Cascade Lake using default options. 2154 bool VM_Version::is_default_intel_cascade_lake() { 2155 return FLAG_IS_DEFAULT(UseAVX) && 2156 FLAG_IS_DEFAULT(MaxVectorSize) && 2157 UseAVX > 2 && 2158 is_intel_cascade_lake(); 2159 } 2160 #endif 2161 2162 bool VM_Version::is_intel_cascade_lake() { 2163 return is_intel_skylake() && _stepping >= 5; 2164 } 2165 2166 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2167 // for implementing the array copy and clear operations. 2168 // The Intel platforms that supports the serialize instruction 2169 // has improved implementation of 64-byte load/stores and so the default 2170 // threshold is set to 0 for these platforms. 2171 int VM_Version::avx3_threshold() { 2172 return (is_intel_family_core() && 2173 supports_serialize() && 2174 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2175 } 2176 2177 #if defined(_LP64) 2178 void VM_Version::clear_apx_test_state() { 2179 clear_apx_test_state_stub(); 2180 } 2181 #endif 2182 2183 static bool _vm_version_initialized = false; 2184 2185 void VM_Version::initialize() { 2186 ResourceMark rm; 2187 // Making this stub must be FIRST use of assembler 2188 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2189 if (stub_blob == nullptr) { 2190 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2191 } 2192 CodeBuffer c(stub_blob); 2193 VM_Version_StubGenerator g(&c); 2194 2195 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2196 g.generate_get_cpu_info()); 2197 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2198 g.generate_detect_virt()); 2199 2200 #if defined(_LP64) 2201 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, 2202 g.clear_apx_test_state()); 2203 #endif 2204 get_processor_features(); 2205 2206 LP64_ONLY(Assembler::precompute_instructions();) 2207 2208 if (VM_Version::supports_hv()) { // Supports hypervisor 2209 check_virtualizations(); 2210 } 2211 _vm_version_initialized = true; 2212 } 2213 2214 typedef enum { 2215 CPU_FAMILY_8086_8088 = 0, 2216 CPU_FAMILY_INTEL_286 = 2, 2217 CPU_FAMILY_INTEL_386 = 3, 2218 CPU_FAMILY_INTEL_486 = 4, 2219 CPU_FAMILY_PENTIUM = 5, 2220 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2221 CPU_FAMILY_PENTIUM_4 = 0xF 2222 } FamilyFlag; 2223 2224 typedef enum { 2225 RDTSCP_FLAG = 0x08000000, // bit 27 2226 INTEL64_FLAG = 0x20000000 // bit 29 2227 } _featureExtendedEdxFlag; 2228 2229 typedef enum { 2230 FPU_FLAG = 0x00000001, 2231 VME_FLAG = 0x00000002, 2232 DE_FLAG = 0x00000004, 2233 PSE_FLAG = 0x00000008, 2234 TSC_FLAG = 0x00000010, 2235 MSR_FLAG = 0x00000020, 2236 PAE_FLAG = 0x00000040, 2237 MCE_FLAG = 0x00000080, 2238 CX8_FLAG = 0x00000100, 2239 APIC_FLAG = 0x00000200, 2240 SEP_FLAG = 0x00000800, 2241 MTRR_FLAG = 0x00001000, 2242 PGE_FLAG = 0x00002000, 2243 MCA_FLAG = 0x00004000, 2244 CMOV_FLAG = 0x00008000, 2245 PAT_FLAG = 0x00010000, 2246 PSE36_FLAG = 0x00020000, 2247 PSNUM_FLAG = 0x00040000, 2248 CLFLUSH_FLAG = 0x00080000, 2249 DTS_FLAG = 0x00200000, 2250 ACPI_FLAG = 0x00400000, 2251 MMX_FLAG = 0x00800000, 2252 FXSR_FLAG = 0x01000000, 2253 SSE_FLAG = 0x02000000, 2254 SSE2_FLAG = 0x04000000, 2255 SS_FLAG = 0x08000000, 2256 HTT_FLAG = 0x10000000, 2257 TM_FLAG = 0x20000000 2258 } FeatureEdxFlag; 2259 2260 static BufferBlob* cpuid_brand_string_stub_blob; 2261 static const int cpuid_brand_string_stub_size = 550; 2262 2263 extern "C" { 2264 typedef void (*getCPUIDBrandString_stub_t)(void*); 2265 } 2266 2267 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2268 2269 // VM_Version statics 2270 enum { 2271 ExtendedFamilyIdLength_INTEL = 16, 2272 ExtendedFamilyIdLength_AMD = 24 2273 }; 2274 2275 const size_t VENDOR_LENGTH = 13; 2276 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2277 static char* _cpu_brand_string = nullptr; 2278 static int64_t _max_qualified_cpu_frequency = 0; 2279 2280 static int _no_of_threads = 0; 2281 static int _no_of_cores = 0; 2282 2283 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2284 "8086/8088", 2285 "", 2286 "286", 2287 "386", 2288 "486", 2289 "Pentium", 2290 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2291 "", 2292 "", 2293 "", 2294 "", 2295 "", 2296 "", 2297 "", 2298 "", 2299 "Pentium 4" 2300 }; 2301 2302 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2303 "", 2304 "", 2305 "", 2306 "", 2307 "5x86", 2308 "K5/K6", 2309 "Athlon/AthlonXP", 2310 "", 2311 "", 2312 "", 2313 "", 2314 "", 2315 "", 2316 "", 2317 "", 2318 "Opteron/Athlon64", 2319 "Opteron QC/Phenom", // Barcelona et.al. 2320 "", 2321 "", 2322 "", 2323 "", 2324 "", 2325 "", 2326 "Zen" 2327 }; 2328 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2329 // September 2013, Vol 3C Table 35-1 2330 const char* const _model_id_pentium_pro[] = { 2331 "", 2332 "Pentium Pro", 2333 "", 2334 "Pentium II model 3", 2335 "", 2336 "Pentium II model 5/Xeon/Celeron", 2337 "Celeron", 2338 "Pentium III/Pentium III Xeon", 2339 "Pentium III/Pentium III Xeon", 2340 "Pentium M model 9", // Yonah 2341 "Pentium III, model A", 2342 "Pentium III, model B", 2343 "", 2344 "Pentium M model D", // Dothan 2345 "", 2346 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2347 "", 2348 "", 2349 "", 2350 "", 2351 "", 2352 "", 2353 "Celeron", // 0x16 Celeron 65nm 2354 "Core 2", // 0x17 Penryn / Harpertown 2355 "", 2356 "", 2357 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2358 "Atom", // 0x1B Z5xx series Silverthorn 2359 "", 2360 "Core 2", // 0x1D Dunnington (6-core) 2361 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2362 "", 2363 "", 2364 "", 2365 "", 2366 "", 2367 "", 2368 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2369 "", 2370 "", 2371 "", // 0x28 2372 "", 2373 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2374 "", 2375 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2376 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2377 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2378 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2379 "", 2380 "", 2381 "", 2382 "", 2383 "", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "", 2389 "Ivy Bridge", // 0x3a 2390 "", 2391 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2392 "", // 0x3d "Next Generation Intel Core Processor" 2393 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2394 "", // 0x3f "Future Generation Intel Xeon Processor" 2395 "", 2396 "", 2397 "", 2398 "", 2399 "", 2400 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2401 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2402 nullptr 2403 }; 2404 2405 /* Brand ID is for back compatibility 2406 * Newer CPUs uses the extended brand string */ 2407 const char* const _brand_id[] = { 2408 "", 2409 "Celeron processor", 2410 "Pentium III processor", 2411 "Intel Pentium III Xeon processor", 2412 "", 2413 "", 2414 "", 2415 "", 2416 "Intel Pentium 4 processor", 2417 nullptr 2418 }; 2419 2420 2421 const char* const _feature_edx_id[] = { 2422 "On-Chip FPU", 2423 "Virtual Mode Extensions", 2424 "Debugging Extensions", 2425 "Page Size Extensions", 2426 "Time Stamp Counter", 2427 "Model Specific Registers", 2428 "Physical Address Extension", 2429 "Machine Check Exceptions", 2430 "CMPXCHG8B Instruction", 2431 "On-Chip APIC", 2432 "", 2433 "Fast System Call", 2434 "Memory Type Range Registers", 2435 "Page Global Enable", 2436 "Machine Check Architecture", 2437 "Conditional Mov Instruction", 2438 "Page Attribute Table", 2439 "36-bit Page Size Extension", 2440 "Processor Serial Number", 2441 "CLFLUSH Instruction", 2442 "", 2443 "Debug Trace Store feature", 2444 "ACPI registers in MSR space", 2445 "Intel Architecture MMX Technology", 2446 "Fast Float Point Save and Restore", 2447 "Streaming SIMD extensions", 2448 "Streaming SIMD extensions 2", 2449 "Self-Snoop", 2450 "Hyper Threading", 2451 "Thermal Monitor", 2452 "", 2453 "Pending Break Enable" 2454 }; 2455 2456 const char* const _feature_extended_edx_id[] = { 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "SYSCALL/SYSRET", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "", 2475 "", 2476 "", 2477 "Execute Disable Bit", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "", 2484 "RDTSCP", 2485 "", 2486 "Intel 64 Architecture", 2487 "", 2488 "" 2489 }; 2490 2491 const char* const _feature_ecx_id[] = { 2492 "Streaming SIMD Extensions 3", 2493 "PCLMULQDQ", 2494 "64-bit DS Area", 2495 "MONITOR/MWAIT instructions", 2496 "CPL Qualified Debug Store", 2497 "Virtual Machine Extensions", 2498 "Safer Mode Extensions", 2499 "Enhanced Intel SpeedStep technology", 2500 "Thermal Monitor 2", 2501 "Supplemental Streaming SIMD Extensions 3", 2502 "L1 Context ID", 2503 "", 2504 "Fused Multiply-Add", 2505 "CMPXCHG16B", 2506 "xTPR Update Control", 2507 "Perfmon and Debug Capability", 2508 "", 2509 "Process-context identifiers", 2510 "Direct Cache Access", 2511 "Streaming SIMD extensions 4.1", 2512 "Streaming SIMD extensions 4.2", 2513 "x2APIC", 2514 "MOVBE", 2515 "Popcount instruction", 2516 "TSC-Deadline", 2517 "AESNI", 2518 "XSAVE", 2519 "OSXSAVE", 2520 "AVX", 2521 "F16C", 2522 "RDRAND", 2523 "" 2524 }; 2525 2526 const char* const _feature_extended_ecx_id[] = { 2527 "LAHF/SAHF instruction support", 2528 "Core multi-processor legacy mode", 2529 "", 2530 "", 2531 "", 2532 "Advanced Bit Manipulations: LZCNT", 2533 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2534 "Misaligned SSE mode", 2535 "", 2536 "", 2537 "", 2538 "", 2539 "", 2540 "", 2541 "", 2542 "", 2543 "", 2544 "", 2545 "", 2546 "", 2547 "", 2548 "", 2549 "", 2550 "", 2551 "", 2552 "", 2553 "", 2554 "", 2555 "", 2556 "", 2557 "", 2558 "" 2559 }; 2560 2561 void VM_Version::initialize_tsc(void) { 2562 ResourceMark rm; 2563 2564 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2565 if (cpuid_brand_string_stub_blob == nullptr) { 2566 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2567 } 2568 CodeBuffer c(cpuid_brand_string_stub_blob); 2569 VM_Version_StubGenerator g(&c); 2570 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2571 g.generate_getCPUIDBrandString()); 2572 } 2573 2574 const char* VM_Version::cpu_model_description(void) { 2575 uint32_t cpu_family = extended_cpu_family(); 2576 uint32_t cpu_model = extended_cpu_model(); 2577 const char* model = nullptr; 2578 2579 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2580 for (uint32_t i = 0; i <= cpu_model; i++) { 2581 model = _model_id_pentium_pro[i]; 2582 if (model == nullptr) { 2583 break; 2584 } 2585 } 2586 } 2587 return model; 2588 } 2589 2590 const char* VM_Version::cpu_brand_string(void) { 2591 if (_cpu_brand_string == nullptr) { 2592 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2593 if (nullptr == _cpu_brand_string) { 2594 return nullptr; 2595 } 2596 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2597 if (ret_val != OS_OK) { 2598 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2599 _cpu_brand_string = nullptr; 2600 } 2601 } 2602 return _cpu_brand_string; 2603 } 2604 2605 const char* VM_Version::cpu_brand(void) { 2606 const char* brand = nullptr; 2607 2608 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2609 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2610 brand = _brand_id[0]; 2611 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2612 brand = _brand_id[i]; 2613 } 2614 } 2615 return brand; 2616 } 2617 2618 bool VM_Version::cpu_is_em64t(void) { 2619 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2620 } 2621 2622 bool VM_Version::is_netburst(void) { 2623 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2624 } 2625 2626 bool VM_Version::supports_tscinv_ext(void) { 2627 if (!supports_tscinv_bit()) { 2628 return false; 2629 } 2630 2631 if (is_intel()) { 2632 return true; 2633 } 2634 2635 if (is_amd()) { 2636 return !is_amd_Barcelona(); 2637 } 2638 2639 if (is_hygon()) { 2640 return true; 2641 } 2642 2643 return false; 2644 } 2645 2646 void VM_Version::resolve_cpu_information_details(void) { 2647 2648 // in future we want to base this information on proper cpu 2649 // and cache topology enumeration such as: 2650 // Intel 64 Architecture Processor Topology Enumeration 2651 // which supports system cpu and cache topology enumeration 2652 // either using 2xAPICIDs or initial APICIDs 2653 2654 // currently only rough cpu information estimates 2655 // which will not necessarily reflect the exact configuration of the system 2656 2657 // this is the number of logical hardware threads 2658 // visible to the operating system 2659 _no_of_threads = os::processor_count(); 2660 2661 // find out number of threads per cpu package 2662 int threads_per_package = threads_per_core() * cores_per_cpu(); 2663 2664 // use amount of threads visible to the process in order to guess number of sockets 2665 _no_of_sockets = _no_of_threads / threads_per_package; 2666 2667 // process might only see a subset of the total number of threads 2668 // from a single processor package. Virtualization/resource management for example. 2669 // If so then just write a hard 1 as num of pkgs. 2670 if (0 == _no_of_sockets) { 2671 _no_of_sockets = 1; 2672 } 2673 2674 // estimate the number of cores 2675 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2676 } 2677 2678 2679 const char* VM_Version::cpu_family_description(void) { 2680 int cpu_family_id = extended_cpu_family(); 2681 if (is_amd()) { 2682 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2683 return _family_id_amd[cpu_family_id]; 2684 } 2685 } 2686 if (is_intel()) { 2687 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2688 return cpu_model_description(); 2689 } 2690 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2691 return _family_id_intel[cpu_family_id]; 2692 } 2693 } 2694 if (is_hygon()) { 2695 return "Dhyana"; 2696 } 2697 return "Unknown x86"; 2698 } 2699 2700 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2701 assert(buf != nullptr, "buffer is null!"); 2702 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2703 2704 const char* cpu_type = nullptr; 2705 const char* x64 = nullptr; 2706 2707 if (is_intel()) { 2708 cpu_type = "Intel"; 2709 x64 = cpu_is_em64t() ? " Intel64" : ""; 2710 } else if (is_amd()) { 2711 cpu_type = "AMD"; 2712 x64 = cpu_is_em64t() ? " AMD64" : ""; 2713 } else if (is_hygon()) { 2714 cpu_type = "Hygon"; 2715 x64 = cpu_is_em64t() ? " AMD64" : ""; 2716 } else { 2717 cpu_type = "Unknown x86"; 2718 x64 = cpu_is_em64t() ? " x86_64" : ""; 2719 } 2720 2721 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2722 cpu_type, 2723 cpu_family_description(), 2724 supports_ht() ? " (HT)" : "", 2725 supports_sse3() ? " SSE3" : "", 2726 supports_ssse3() ? " SSSE3" : "", 2727 supports_sse4_1() ? " SSE4.1" : "", 2728 supports_sse4_2() ? " SSE4.2" : "", 2729 supports_sse4a() ? " SSE4A" : "", 2730 is_netburst() ? " Netburst" : "", 2731 is_intel_family_core() ? " Core" : "", 2732 x64); 2733 2734 return OS_OK; 2735 } 2736 2737 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2738 assert(buf != nullptr, "buffer is null!"); 2739 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2740 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2741 2742 // invoke newly generated asm code to fetch CPU Brand String 2743 getCPUIDBrandString_stub(&_cpuid_info); 2744 2745 // fetch results into buffer 2746 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2747 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2748 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2749 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2750 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2751 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2752 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2753 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2754 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2755 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2756 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2757 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2758 2759 return OS_OK; 2760 } 2761 2762 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2763 guarantee(buf != nullptr, "buffer is null!"); 2764 guarantee(buf_len > 0, "buffer len not enough!"); 2765 2766 unsigned int flag = 0; 2767 unsigned int fi = 0; 2768 size_t written = 0; 2769 const char* prefix = ""; 2770 2771 #define WRITE_TO_BUF(string) \ 2772 { \ 2773 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2774 if (res < 0) { \ 2775 return buf_len - 1; \ 2776 } \ 2777 written += res; \ 2778 if (prefix[0] == '\0') { \ 2779 prefix = ", "; \ 2780 } \ 2781 } 2782 2783 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2784 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2785 continue; /* no hyperthreading */ 2786 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2787 continue; /* no fast system call */ 2788 } 2789 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2790 WRITE_TO_BUF(_feature_edx_id[fi]); 2791 } 2792 } 2793 2794 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2795 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2796 WRITE_TO_BUF(_feature_ecx_id[fi]); 2797 } 2798 } 2799 2800 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2801 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2802 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2803 } 2804 } 2805 2806 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2807 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2808 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2809 } 2810 } 2811 2812 if (supports_tscinv_bit()) { 2813 WRITE_TO_BUF("Invariant TSC"); 2814 } 2815 2816 return written; 2817 } 2818 2819 /** 2820 * Write a detailed description of the cpu to a given buffer, including 2821 * feature set. 2822 */ 2823 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2824 assert(buf != nullptr, "buffer is null!"); 2825 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2826 2827 static const char* unknown = "<unknown>"; 2828 char vendor_id[VENDOR_LENGTH]; 2829 const char* family = nullptr; 2830 const char* model = nullptr; 2831 const char* brand = nullptr; 2832 int outputLen = 0; 2833 2834 family = cpu_family_description(); 2835 if (family == nullptr) { 2836 family = unknown; 2837 } 2838 2839 model = cpu_model_description(); 2840 if (model == nullptr) { 2841 model = unknown; 2842 } 2843 2844 brand = cpu_brand_string(); 2845 2846 if (brand == nullptr) { 2847 brand = cpu_brand(); 2848 if (brand == nullptr) { 2849 brand = unknown; 2850 } 2851 } 2852 2853 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2854 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2855 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2856 vendor_id[VENDOR_LENGTH-1] = '\0'; 2857 2858 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2859 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2860 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2861 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2862 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2863 "Supports: ", 2864 brand, 2865 vendor_id, 2866 family, 2867 extended_cpu_family(), 2868 model, 2869 extended_cpu_model(), 2870 cpu_stepping(), 2871 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2872 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2873 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2874 _cpuid_info.std_cpuid1_eax.value, 2875 _cpuid_info.std_cpuid1_ebx.value, 2876 _cpuid_info.std_cpuid1_ecx.value, 2877 _cpuid_info.std_cpuid1_edx.value, 2878 _cpuid_info.ext_cpuid1_eax, 2879 _cpuid_info.ext_cpuid1_ebx, 2880 _cpuid_info.ext_cpuid1_ecx, 2881 _cpuid_info.ext_cpuid1_edx); 2882 2883 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2884 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2885 return OS_ERR; 2886 } 2887 2888 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2889 2890 return OS_OK; 2891 } 2892 2893 2894 // Fill in Abstract_VM_Version statics 2895 void VM_Version::initialize_cpu_information() { 2896 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2897 assert(!_initialized, "shouldn't be initialized yet"); 2898 resolve_cpu_information_details(); 2899 2900 // initialize cpu_name and cpu_desc 2901 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2902 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2903 _initialized = true; 2904 } 2905 2906 /** 2907 * For information about extracting the frequency from the cpu brand string, please see: 2908 * 2909 * Intel Processor Identification and the CPUID Instruction 2910 * Application Note 485 2911 * May 2012 2912 * 2913 * The return value is the frequency in Hz. 2914 */ 2915 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2916 const char* const brand_string = cpu_brand_string(); 2917 if (brand_string == nullptr) { 2918 return 0; 2919 } 2920 const int64_t MEGA = 1000000; 2921 int64_t multiplier = 0; 2922 int64_t frequency = 0; 2923 uint8_t idx = 0; 2924 // The brand string buffer is at most 48 bytes. 2925 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2926 for (; idx < 48-2; ++idx) { 2927 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2928 // Search brand string for "yHz" where y is M, G, or T. 2929 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2930 if (brand_string[idx] == 'M') { 2931 multiplier = MEGA; 2932 } else if (brand_string[idx] == 'G') { 2933 multiplier = MEGA * 1000; 2934 } else if (brand_string[idx] == 'T') { 2935 multiplier = MEGA * MEGA; 2936 } 2937 break; 2938 } 2939 } 2940 if (multiplier > 0) { 2941 // Compute frequency (in Hz) from brand string. 2942 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2943 frequency = (brand_string[idx-4] - '0') * multiplier; 2944 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2945 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2946 } else { // format is "xxxx" 2947 frequency = (brand_string[idx-4] - '0') * 1000; 2948 frequency += (brand_string[idx-3] - '0') * 100; 2949 frequency += (brand_string[idx-2] - '0') * 10; 2950 frequency += (brand_string[idx-1] - '0'); 2951 frequency *= multiplier; 2952 } 2953 } 2954 return frequency; 2955 } 2956 2957 2958 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2959 if (_max_qualified_cpu_frequency == 0) { 2960 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2961 } 2962 return _max_qualified_cpu_frequency; 2963 } 2964 2965 uint64_t VM_Version::CpuidInfo::feature_flags() const { 2966 uint64_t result = 0; 2967 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2968 result |= CPU_CX8; 2969 if (std_cpuid1_edx.bits.cmov != 0) 2970 result |= CPU_CMOV; 2971 if (std_cpuid1_edx.bits.clflush != 0) 2972 result |= CPU_FLUSH; 2973 #ifdef _LP64 2974 // clflush should always be available on x86_64 2975 // if not we are in real trouble because we rely on it 2976 // to flush the code cache. 2977 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2978 #endif 2979 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2980 ext_cpuid1_edx.bits.fxsr != 0)) 2981 result |= CPU_FXSR; 2982 // HT flag is set for multi-core processors also. 2983 if (threads_per_core() > 1) 2984 result |= CPU_HT; 2985 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2986 ext_cpuid1_edx.bits.mmx != 0)) 2987 result |= CPU_MMX; 2988 if (std_cpuid1_edx.bits.sse != 0) 2989 result |= CPU_SSE; 2990 if (std_cpuid1_edx.bits.sse2 != 0) 2991 result |= CPU_SSE2; 2992 if (std_cpuid1_ecx.bits.sse3 != 0) 2993 result |= CPU_SSE3; 2994 if (std_cpuid1_ecx.bits.ssse3 != 0) 2995 result |= CPU_SSSE3; 2996 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2997 result |= CPU_SSE4_1; 2998 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2999 result |= CPU_SSE4_2; 3000 if (std_cpuid1_ecx.bits.popcnt != 0) 3001 result |= CPU_POPCNT; 3002 if (sefsl1_cpuid7_edx.bits.apx_f != 0 && 3003 xem_xcr0_eax.bits.apx_f != 0) { 3004 result |= CPU_APX_F; 3005 } 3006 if (std_cpuid1_ecx.bits.avx != 0 && 3007 std_cpuid1_ecx.bits.osxsave != 0 && 3008 xem_xcr0_eax.bits.sse != 0 && 3009 xem_xcr0_eax.bits.ymm != 0) { 3010 result |= CPU_AVX; 3011 result |= CPU_VZEROUPPER; 3012 if (sefsl1_cpuid7_eax.bits.sha512 != 0) 3013 result |= CPU_SHA512; 3014 if (std_cpuid1_ecx.bits.f16c != 0) 3015 result |= CPU_F16C; 3016 if (sef_cpuid7_ebx.bits.avx2 != 0) { 3017 result |= CPU_AVX2; 3018 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) 3019 result |= CPU_AVX_IFMA; 3020 } 3021 if (sef_cpuid7_ecx.bits.gfni != 0) 3022 result |= CPU_GFNI; 3023 if (sef_cpuid7_ebx.bits.avx512f != 0 && 3024 xem_xcr0_eax.bits.opmask != 0 && 3025 xem_xcr0_eax.bits.zmm512 != 0 && 3026 xem_xcr0_eax.bits.zmm32 != 0) { 3027 result |= CPU_AVX512F; 3028 if (sef_cpuid7_ebx.bits.avx512cd != 0) 3029 result |= CPU_AVX512CD; 3030 if (sef_cpuid7_ebx.bits.avx512dq != 0) 3031 result |= CPU_AVX512DQ; 3032 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 3033 result |= CPU_AVX512_IFMA; 3034 if (sef_cpuid7_ebx.bits.avx512pf != 0) 3035 result |= CPU_AVX512PF; 3036 if (sef_cpuid7_ebx.bits.avx512er != 0) 3037 result |= CPU_AVX512ER; 3038 if (sef_cpuid7_ebx.bits.avx512bw != 0) 3039 result |= CPU_AVX512BW; 3040 if (sef_cpuid7_ebx.bits.avx512vl != 0) 3041 result |= CPU_AVX512VL; 3042 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 3043 result |= CPU_AVX512_VPOPCNTDQ; 3044 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 3045 result |= CPU_AVX512_VPCLMULQDQ; 3046 if (sef_cpuid7_ecx.bits.vaes != 0) 3047 result |= CPU_AVX512_VAES; 3048 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 3049 result |= CPU_AVX512_VNNI; 3050 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 3051 result |= CPU_AVX512_BITALG; 3052 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 3053 result |= CPU_AVX512_VBMI; 3054 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 3055 result |= CPU_AVX512_VBMI2; 3056 } 3057 } 3058 if (std_cpuid1_ecx.bits.hv != 0) 3059 result |= CPU_HV; 3060 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3061 result |= CPU_BMI1; 3062 if (std_cpuid1_edx.bits.tsc != 0) 3063 result |= CPU_TSC; 3064 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3065 result |= CPU_TSCINV_BIT; 3066 if (std_cpuid1_ecx.bits.aes != 0) 3067 result |= CPU_AES; 3068 if (sef_cpuid7_ebx.bits.erms != 0) 3069 result |= CPU_ERMS; 3070 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3071 result |= CPU_FSRM; 3072 if (std_cpuid1_ecx.bits.clmul != 0) 3073 result |= CPU_CLMUL; 3074 if (sef_cpuid7_ebx.bits.rtm != 0) 3075 result |= CPU_RTM; 3076 if (sef_cpuid7_ebx.bits.adx != 0) 3077 result |= CPU_ADX; 3078 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3079 result |= CPU_BMI2; 3080 if (sef_cpuid7_ebx.bits.sha != 0) 3081 result |= CPU_SHA; 3082 if (std_cpuid1_ecx.bits.fma != 0) 3083 result |= CPU_FMA; 3084 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3085 result |= CPU_FLUSHOPT; 3086 if (ext_cpuid1_edx.bits.rdtscp != 0) 3087 result |= CPU_RDTSCP; 3088 if (sef_cpuid7_ecx.bits.rdpid != 0) 3089 result |= CPU_RDPID; 3090 3091 // AMD|Hygon features. 3092 if (is_amd_family()) { 3093 if ((ext_cpuid1_edx.bits.tdnow != 0) || 3094 (ext_cpuid1_ecx.bits.prefetchw != 0)) 3095 result |= CPU_3DNOW_PREFETCH; 3096 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3097 result |= CPU_LZCNT; 3098 if (ext_cpuid1_ecx.bits.sse4a != 0) 3099 result |= CPU_SSE4A; 3100 } 3101 3102 // Intel features. 3103 if (is_intel()) { 3104 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3105 result |= CPU_LZCNT; 3106 } 3107 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3108 result |= CPU_3DNOW_PREFETCH; 3109 } 3110 if (sef_cpuid7_ebx.bits.clwb != 0) { 3111 result |= CPU_CLWB; 3112 } 3113 if (sef_cpuid7_edx.bits.serialize != 0) 3114 result |= CPU_SERIALIZE; 3115 3116 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) 3117 result |= CPU_AVX512_FP16; 3118 } 3119 3120 // ZX features. 3121 if (is_zx()) { 3122 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3123 result |= CPU_LZCNT; 3124 } 3125 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3126 result |= CPU_3DNOW_PREFETCH; 3127 } 3128 } 3129 3130 // Protection key features. 3131 if (sef_cpuid7_ecx.bits.pku != 0) { 3132 result |= CPU_PKU; 3133 } 3134 if (sef_cpuid7_ecx.bits.ospke != 0) { 3135 result |= CPU_OSPKE; 3136 } 3137 3138 // Control flow enforcement (CET) features. 3139 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3140 result |= CPU_CET_SS; 3141 } 3142 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3143 result |= CPU_CET_IBT; 3144 } 3145 3146 // Composite features. 3147 if (supports_tscinv_bit() && 3148 ((is_amd_family() && !is_amd_Barcelona()) || 3149 is_intel_tsc_synched_at_init())) { 3150 result |= CPU_TSCINV; 3151 } 3152 3153 return result; 3154 } 3155 3156 bool VM_Version::os_supports_avx_vectors() { 3157 bool retVal = false; 3158 int nreg = 2 LP64_ONLY(+2); 3159 if (supports_evex()) { 3160 // Verify that OS save/restore all bits of EVEX registers 3161 // during signal processing. 3162 retVal = true; 3163 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3164 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3165 retVal = false; 3166 break; 3167 } 3168 } 3169 } else if (supports_avx()) { 3170 // Verify that OS save/restore all bits of AVX registers 3171 // during signal processing. 3172 retVal = true; 3173 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3174 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3175 retVal = false; 3176 break; 3177 } 3178 } 3179 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3180 if (retVal == false) { 3181 // Verify that OS save/restore all bits of EVEX registers 3182 // during signal processing. 3183 retVal = true; 3184 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3185 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3186 retVal = false; 3187 break; 3188 } 3189 } 3190 } 3191 } 3192 return retVal; 3193 } 3194 3195 bool VM_Version::os_supports_apx_egprs() { 3196 if (!supports_apx_f()) { 3197 return false; 3198 } 3199 // Enable APX support for product builds after 3200 // completion of planned features listed in JDK-8329030. 3201 #if !defined(PRODUCT) 3202 if (_cpuid_info.apx_save[0] != egpr_test_value() || 3203 _cpuid_info.apx_save[1] != egpr_test_value()) { 3204 return false; 3205 } 3206 return true; 3207 #else 3208 return false; 3209 #endif 3210 } 3211 3212 uint VM_Version::cores_per_cpu() { 3213 uint result = 1; 3214 if (is_intel()) { 3215 bool supports_topology = supports_processor_topology(); 3216 if (supports_topology) { 3217 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3218 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3219 } 3220 if (!supports_topology || result == 0) { 3221 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3222 } 3223 } else if (is_amd_family()) { 3224 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3225 } else if (is_zx()) { 3226 bool supports_topology = supports_processor_topology(); 3227 if (supports_topology) { 3228 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3229 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3230 } 3231 if (!supports_topology || result == 0) { 3232 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3233 } 3234 } 3235 return result; 3236 } 3237 3238 uint VM_Version::threads_per_core() { 3239 uint result = 1; 3240 if (is_intel() && supports_processor_topology()) { 3241 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3242 } else if (is_zx() && supports_processor_topology()) { 3243 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3244 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3245 if (cpu_family() >= 0x17) { 3246 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3247 } else { 3248 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3249 cores_per_cpu(); 3250 } 3251 } 3252 return (result == 0 ? 1 : result); 3253 } 3254 3255 uint VM_Version::L1_line_size() { 3256 uint result = 0; 3257 if (is_intel()) { 3258 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3259 } else if (is_amd_family()) { 3260 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3261 } else if (is_zx()) { 3262 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3263 } 3264 if (result < 32) // not defined ? 3265 result = 32; // 32 bytes by default on x86 and other x64 3266 return result; 3267 } 3268 3269 bool VM_Version::is_intel_tsc_synched_at_init() { 3270 if (is_intel_family_core()) { 3271 uint32_t ext_model = extended_cpu_model(); 3272 if (ext_model == CPU_MODEL_NEHALEM_EP || 3273 ext_model == CPU_MODEL_WESTMERE_EP || 3274 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3275 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3276 // <= 2-socket invariant tsc support. EX versions are usually used 3277 // in > 2-socket systems and likely don't synchronize tscs at 3278 // initialization. 3279 // Code that uses tsc values must be prepared for them to arbitrarily 3280 // jump forward or backward. 3281 return true; 3282 } 3283 } 3284 return false; 3285 } 3286 3287 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3288 // Hardware prefetching (distance/size in bytes): 3289 // Pentium 3 - 64 / 32 3290 // Pentium 4 - 256 / 128 3291 // Athlon - 64 / 32 ???? 3292 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3293 // Core - 128 / 64 3294 // 3295 // Software prefetching (distance in bytes / instruction with best score): 3296 // Pentium 3 - 128 / prefetchnta 3297 // Pentium 4 - 512 / prefetchnta 3298 // Athlon - 128 / prefetchnta 3299 // Opteron - 256 / prefetchnta 3300 // Core - 256 / prefetchnta 3301 // It will be used only when AllocatePrefetchStyle > 0 3302 3303 if (is_amd_family()) { // AMD | Hygon 3304 if (supports_sse2()) { 3305 return 256; // Opteron 3306 } else { 3307 return 128; // Athlon 3308 } 3309 } else { // Intel 3310 if (supports_sse3() && cpu_family() == 6) { 3311 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3312 return 192; 3313 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3314 #ifdef _LP64 3315 return 384; 3316 #else 3317 return 320; 3318 #endif 3319 } 3320 } 3321 if (supports_sse2()) { 3322 if (cpu_family() == 6) { 3323 return 256; // Pentium M, Core, Core2 3324 } else { 3325 return 512; // Pentium 4 3326 } 3327 } else { 3328 return 128; // Pentium 3 (and all other old CPUs) 3329 } 3330 } 3331 } 3332 3333 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3334 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3335 switch (id) { 3336 case vmIntrinsics::_floatToFloat16: 3337 case vmIntrinsics::_float16ToFloat: 3338 if (!supports_float16()) { 3339 return false; 3340 } 3341 break; 3342 default: 3343 break; 3344 } 3345 return true; 3346 }