1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/checkedCast.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = 0; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = 0; 59 60 static BufferBlob* stub_blob; 61 static const int stub_size = 2000; 62 63 extern "C" { 64 typedef void (*get_cpu_info_stub_t)(void*); 65 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 66 } 67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 68 static detect_virt_stub_t detect_virt_stub = nullptr; 69 70 #ifdef _LP64 71 72 bool VM_Version::supports_clflush() { 73 // clflush should always be available on x86_64 74 // if not we are in real trouble because we rely on it 75 // to flush the code cache. 76 // Unfortunately, Assembler::clflush is currently called as part 77 // of generation of the code cache flush routine. This happens 78 // under Universe::init before the processor features are set 79 // up. Assembler::flush calls this routine to check that clflush 80 // is allowed. So, we give the caller a free pass if Universe init 81 // is still in progress. 82 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 83 return true; 84 } 85 #endif 86 87 #define CPUID_STANDARD_FN 0x0 88 #define CPUID_STANDARD_FN_1 0x1 89 #define CPUID_STANDARD_FN_4 0x4 90 #define CPUID_STANDARD_FN_B 0xb 91 92 #define CPUID_EXTENDED_FN 0x80000000 93 #define CPUID_EXTENDED_FN_1 0x80000001 94 #define CPUID_EXTENDED_FN_2 0x80000002 95 #define CPUID_EXTENDED_FN_3 0x80000003 96 #define CPUID_EXTENDED_FN_4 0x80000004 97 #define CPUID_EXTENDED_FN_7 0x80000007 98 #define CPUID_EXTENDED_FN_8 0x80000008 99 100 class VM_Version_StubGenerator: public StubCodeGenerator { 101 public: 102 103 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 104 105 address generate_get_cpu_info() { 106 // Flags to test CPU type. 107 const uint32_t HS_EFL_AC = 0x40000; 108 const uint32_t HS_EFL_ID = 0x200000; 109 // Values for when we don't have a CPUID instruction. 110 const int CPU_FAMILY_SHIFT = 8; 111 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 112 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 113 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 114 115 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 116 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; 117 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 118 119 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 120 # define __ _masm-> 121 122 address start = __ pc(); 123 124 // 125 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 126 // 127 // LP64: rcx and rdx are first and second argument registers on windows 128 129 __ push(rbp); 130 #ifdef _LP64 131 __ mov(rbp, c_rarg0); // cpuid_info address 132 #else 133 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 134 #endif 135 __ push(rbx); 136 __ push(rsi); 137 __ pushf(); // preserve rbx, and flags 138 __ pop(rax); 139 __ push(rax); 140 __ mov(rcx, rax); 141 // 142 // if we are unable to change the AC flag, we have a 386 143 // 144 __ xorl(rax, HS_EFL_AC); 145 __ push(rax); 146 __ popf(); 147 __ pushf(); 148 __ pop(rax); 149 __ cmpptr(rax, rcx); 150 __ jccb(Assembler::notEqual, detect_486); 151 152 __ movl(rax, CPU_FAMILY_386); 153 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 154 __ jmp(done); 155 156 // 157 // If we are unable to change the ID flag, we have a 486 which does 158 // not support the "cpuid" instruction. 159 // 160 __ bind(detect_486); 161 __ mov(rax, rcx); 162 __ xorl(rax, HS_EFL_ID); 163 __ push(rax); 164 __ popf(); 165 __ pushf(); 166 __ pop(rax); 167 __ cmpptr(rcx, rax); 168 __ jccb(Assembler::notEqual, detect_586); 169 170 __ bind(cpu486); 171 __ movl(rax, CPU_FAMILY_486); 172 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 173 __ jmp(done); 174 175 // 176 // At this point, we have a chip which supports the "cpuid" instruction 177 // 178 __ bind(detect_586); 179 __ xorl(rax, rax); 180 __ cpuid(); 181 __ orl(rax, rax); 182 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 183 // value of at least 1, we give up and 184 // assume a 486 185 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 186 __ movl(Address(rsi, 0), rax); 187 __ movl(Address(rsi, 4), rbx); 188 __ movl(Address(rsi, 8), rcx); 189 __ movl(Address(rsi,12), rdx); 190 191 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 192 __ jccb(Assembler::belowEqual, std_cpuid4); 193 194 // 195 // cpuid(0xB) Processor Topology 196 // 197 __ movl(rax, 0xb); 198 __ xorl(rcx, rcx); // Threads level 199 __ cpuid(); 200 201 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 202 __ movl(Address(rsi, 0), rax); 203 __ movl(Address(rsi, 4), rbx); 204 __ movl(Address(rsi, 8), rcx); 205 __ movl(Address(rsi,12), rdx); 206 207 __ movl(rax, 0xb); 208 __ movl(rcx, 1); // Cores level 209 __ cpuid(); 210 __ push(rax); 211 __ andl(rax, 0x1f); // Determine if valid topology level 212 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 213 __ andl(rax, 0xffff); 214 __ pop(rax); 215 __ jccb(Assembler::equal, std_cpuid4); 216 217 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 218 __ movl(Address(rsi, 0), rax); 219 __ movl(Address(rsi, 4), rbx); 220 __ movl(Address(rsi, 8), rcx); 221 __ movl(Address(rsi,12), rdx); 222 223 __ movl(rax, 0xb); 224 __ movl(rcx, 2); // Packages level 225 __ cpuid(); 226 __ push(rax); 227 __ andl(rax, 0x1f); // Determine if valid topology level 228 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 229 __ andl(rax, 0xffff); 230 __ pop(rax); 231 __ jccb(Assembler::equal, std_cpuid4); 232 233 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 234 __ movl(Address(rsi, 0), rax); 235 __ movl(Address(rsi, 4), rbx); 236 __ movl(Address(rsi, 8), rcx); 237 __ movl(Address(rsi,12), rdx); 238 239 // 240 // cpuid(0x4) Deterministic cache params 241 // 242 __ bind(std_cpuid4); 243 __ movl(rax, 4); 244 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 245 __ jccb(Assembler::greater, std_cpuid1); 246 247 __ xorl(rcx, rcx); // L1 cache 248 __ cpuid(); 249 __ push(rax); 250 __ andl(rax, 0x1f); // Determine if valid cache parameters used 251 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 252 __ pop(rax); 253 __ jccb(Assembler::equal, std_cpuid1); 254 255 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 256 __ movl(Address(rsi, 0), rax); 257 __ movl(Address(rsi, 4), rbx); 258 __ movl(Address(rsi, 8), rcx); 259 __ movl(Address(rsi,12), rdx); 260 261 // 262 // Standard cpuid(0x1) 263 // 264 __ bind(std_cpuid1); 265 __ movl(rax, 1); 266 __ cpuid(); 267 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 268 __ movl(Address(rsi, 0), rax); 269 __ movl(Address(rsi, 4), rbx); 270 __ movl(Address(rsi, 8), rcx); 271 __ movl(Address(rsi,12), rdx); 272 273 // 274 // Check if OS has enabled XGETBV instruction to access XCR0 275 // (OSXSAVE feature flag) and CPU supports AVX 276 // 277 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 278 __ cmpl(rcx, 0x18000000); 279 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 280 281 // 282 // XCR0, XFEATURE_ENABLED_MASK register 283 // 284 __ xorl(rcx, rcx); // zero for XCR0 register 285 __ xgetbv(); 286 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 287 __ movl(Address(rsi, 0), rax); 288 __ movl(Address(rsi, 4), rdx); 289 290 // 291 // cpuid(0x7) Structured Extended Features 292 // 293 __ bind(sef_cpuid); 294 __ movl(rax, 7); 295 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 296 __ jccb(Assembler::greater, ext_cpuid); 297 // ECX = 0 298 __ xorl(rcx, rcx); 299 __ cpuid(); 300 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 301 __ movl(Address(rsi, 0), rax); 302 __ movl(Address(rsi, 4), rbx); 303 __ movl(Address(rsi, 8), rcx); 304 __ movl(Address(rsi, 12), rdx); 305 306 // ECX = 1 307 __ movl(rax, 7); 308 __ movl(rcx, 1); 309 __ cpuid(); 310 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_ecx1_offset()))); 311 __ movl(Address(rsi, 0), rax); 312 313 // 314 // Extended cpuid(0x80000000) 315 // 316 __ bind(ext_cpuid); 317 __ movl(rax, 0x80000000); 318 __ cpuid(); 319 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 320 __ jcc(Assembler::belowEqual, done); 321 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 322 __ jcc(Assembler::belowEqual, ext_cpuid1); 323 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 324 __ jccb(Assembler::belowEqual, ext_cpuid5); 325 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 326 __ jccb(Assembler::belowEqual, ext_cpuid7); 327 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 328 __ jccb(Assembler::belowEqual, ext_cpuid8); 329 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 330 __ jccb(Assembler::below, ext_cpuid8); 331 // 332 // Extended cpuid(0x8000001E) 333 // 334 __ movl(rax, 0x8000001E); 335 __ cpuid(); 336 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 337 __ movl(Address(rsi, 0), rax); 338 __ movl(Address(rsi, 4), rbx); 339 __ movl(Address(rsi, 8), rcx); 340 __ movl(Address(rsi,12), rdx); 341 342 // 343 // Extended cpuid(0x80000008) 344 // 345 __ bind(ext_cpuid8); 346 __ movl(rax, 0x80000008); 347 __ cpuid(); 348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 349 __ movl(Address(rsi, 0), rax); 350 __ movl(Address(rsi, 4), rbx); 351 __ movl(Address(rsi, 8), rcx); 352 __ movl(Address(rsi,12), rdx); 353 354 // 355 // Extended cpuid(0x80000007) 356 // 357 __ bind(ext_cpuid7); 358 __ movl(rax, 0x80000007); 359 __ cpuid(); 360 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 361 __ movl(Address(rsi, 0), rax); 362 __ movl(Address(rsi, 4), rbx); 363 __ movl(Address(rsi, 8), rcx); 364 __ movl(Address(rsi,12), rdx); 365 366 // 367 // Extended cpuid(0x80000005) 368 // 369 __ bind(ext_cpuid5); 370 __ movl(rax, 0x80000005); 371 __ cpuid(); 372 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 373 __ movl(Address(rsi, 0), rax); 374 __ movl(Address(rsi, 4), rbx); 375 __ movl(Address(rsi, 8), rcx); 376 __ movl(Address(rsi,12), rdx); 377 378 // 379 // Extended cpuid(0x80000001) 380 // 381 __ bind(ext_cpuid1); 382 __ movl(rax, 0x80000001); 383 __ cpuid(); 384 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 385 __ movl(Address(rsi, 0), rax); 386 __ movl(Address(rsi, 4), rbx); 387 __ movl(Address(rsi, 8), rcx); 388 __ movl(Address(rsi,12), rdx); 389 390 // 391 // Check if OS has enabled XGETBV instruction to access XCR0 392 // (OSXSAVE feature flag) and CPU supports AVX 393 // 394 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 395 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 396 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 397 __ cmpl(rcx, 0x18000000); 398 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 399 400 __ movl(rax, 0x6); 401 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 402 __ cmpl(rax, 0x6); 403 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 404 405 // we need to bridge farther than imm8, so we use this island as a thunk 406 __ bind(done); 407 __ jmp(wrapup); 408 409 __ bind(start_simd_check); 410 // 411 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 412 // registers are not restored after a signal processing. 413 // Generate SEGV here (reference through null) 414 // and check upper YMM/ZMM bits after it. 415 // 416 int saved_useavx = UseAVX; 417 int saved_usesse = UseSSE; 418 419 // If UseAVX is uninitialized or is set by the user to include EVEX 420 if (use_evex) { 421 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 422 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 423 __ movl(rax, 0x10000); 424 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 425 __ cmpl(rax, 0x10000); 426 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 427 // check _cpuid_info.xem_xcr0_eax.bits.opmask 428 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 429 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 430 __ movl(rax, 0xE0); 431 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 432 __ cmpl(rax, 0xE0); 433 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 434 435 if (FLAG_IS_DEFAULT(UseAVX)) { 436 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 437 __ movl(rax, Address(rsi, 0)); 438 __ cmpl(rax, 0x50654); // If it is Skylake 439 __ jcc(Assembler::equal, legacy_setup); 440 } 441 // EVEX setup: run in lowest evex mode 442 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 443 UseAVX = 3; 444 UseSSE = 2; 445 #ifdef _WINDOWS 446 // xmm5-xmm15 are not preserved by caller on windows 447 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 448 __ subptr(rsp, 64); 449 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 450 #ifdef _LP64 451 __ subptr(rsp, 64); 452 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 453 __ subptr(rsp, 64); 454 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 455 #endif // _LP64 456 #endif // _WINDOWS 457 458 // load value into all 64 bytes of zmm7 register 459 __ movl(rcx, VM_Version::ymm_test_value()); 460 __ movdl(xmm0, rcx); 461 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 462 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 463 #ifdef _LP64 464 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 465 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 466 #endif 467 VM_Version::clean_cpuFeatures(); 468 __ jmp(save_restore_except); 469 } 470 471 __ bind(legacy_setup); 472 // AVX setup 473 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 474 UseAVX = 1; 475 UseSSE = 2; 476 #ifdef _WINDOWS 477 __ subptr(rsp, 32); 478 __ vmovdqu(Address(rsp, 0), xmm7); 479 #ifdef _LP64 480 __ subptr(rsp, 32); 481 __ vmovdqu(Address(rsp, 0), xmm8); 482 __ subptr(rsp, 32); 483 __ vmovdqu(Address(rsp, 0), xmm15); 484 #endif // _LP64 485 #endif // _WINDOWS 486 487 // load value into all 32 bytes of ymm7 register 488 __ movl(rcx, VM_Version::ymm_test_value()); 489 490 __ movdl(xmm0, rcx); 491 __ pshufd(xmm0, xmm0, 0x00); 492 __ vinsertf128_high(xmm0, xmm0); 493 __ vmovdqu(xmm7, xmm0); 494 #ifdef _LP64 495 __ vmovdqu(xmm8, xmm0); 496 __ vmovdqu(xmm15, xmm0); 497 #endif 498 VM_Version::clean_cpuFeatures(); 499 500 __ bind(save_restore_except); 501 __ xorl(rsi, rsi); 502 VM_Version::set_cpuinfo_segv_addr(__ pc()); 503 // Generate SEGV 504 __ movl(rax, Address(rsi, 0)); 505 506 VM_Version::set_cpuinfo_cont_addr(__ pc()); 507 // Returns here after signal. Save xmm0 to check it later. 508 509 // If UseAVX is uninitialized or is set by the user to include EVEX 510 if (use_evex) { 511 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 512 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 513 __ movl(rax, 0x10000); 514 __ andl(rax, Address(rsi, 4)); 515 __ cmpl(rax, 0x10000); 516 __ jcc(Assembler::notEqual, legacy_save_restore); 517 // check _cpuid_info.xem_xcr0_eax.bits.opmask 518 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 519 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 520 __ movl(rax, 0xE0); 521 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 522 __ cmpl(rax, 0xE0); 523 __ jcc(Assembler::notEqual, legacy_save_restore); 524 525 if (FLAG_IS_DEFAULT(UseAVX)) { 526 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 527 __ movl(rax, Address(rsi, 0)); 528 __ cmpl(rax, 0x50654); // If it is Skylake 529 __ jcc(Assembler::equal, legacy_save_restore); 530 } 531 // EVEX check: run in lowest evex mode 532 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 533 UseAVX = 3; 534 UseSSE = 2; 535 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 536 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 537 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 538 #ifdef _LP64 539 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 540 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 541 #endif 542 543 #ifdef _WINDOWS 544 #ifdef _LP64 545 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 546 __ addptr(rsp, 64); 547 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 548 __ addptr(rsp, 64); 549 #endif // _LP64 550 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 551 __ addptr(rsp, 64); 552 #endif // _WINDOWS 553 generate_vzeroupper(wrapup); 554 VM_Version::clean_cpuFeatures(); 555 UseAVX = saved_useavx; 556 UseSSE = saved_usesse; 557 __ jmp(wrapup); 558 } 559 560 __ bind(legacy_save_restore); 561 // AVX check 562 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 563 UseAVX = 1; 564 UseSSE = 2; 565 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 566 __ vmovdqu(Address(rsi, 0), xmm0); 567 __ vmovdqu(Address(rsi, 32), xmm7); 568 #ifdef _LP64 569 __ vmovdqu(Address(rsi, 64), xmm8); 570 __ vmovdqu(Address(rsi, 96), xmm15); 571 #endif 572 573 #ifdef _WINDOWS 574 #ifdef _LP64 575 __ vmovdqu(xmm15, Address(rsp, 0)); 576 __ addptr(rsp, 32); 577 __ vmovdqu(xmm8, Address(rsp, 0)); 578 __ addptr(rsp, 32); 579 #endif // _LP64 580 __ vmovdqu(xmm7, Address(rsp, 0)); 581 __ addptr(rsp, 32); 582 #endif // _WINDOWS 583 generate_vzeroupper(wrapup); 584 VM_Version::clean_cpuFeatures(); 585 UseAVX = saved_useavx; 586 UseSSE = saved_usesse; 587 588 __ bind(wrapup); 589 __ popf(); 590 __ pop(rsi); 591 __ pop(rbx); 592 __ pop(rbp); 593 __ ret(0); 594 595 # undef __ 596 597 return start; 598 }; 599 void generate_vzeroupper(Label& L_wrapup) { 600 # define __ _masm-> 601 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 602 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 603 __ jcc(Assembler::notEqual, L_wrapup); 604 __ movl(rcx, 0x0FFF0FF0); 605 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 606 __ andl(rcx, Address(rsi, 0)); 607 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 608 __ jcc(Assembler::equal, L_wrapup); 609 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 610 __ jcc(Assembler::equal, L_wrapup); 611 // vzeroupper() will use a pre-computed instruction sequence that we 612 // can't compute until after we've determined CPU capabilities. Use 613 // uncached variant here directly to be able to bootstrap correctly 614 __ vzeroupper_uncached(); 615 # undef __ 616 } 617 address generate_detect_virt() { 618 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 619 # define __ _masm-> 620 621 address start = __ pc(); 622 623 // Evacuate callee-saved registers 624 __ push(rbp); 625 __ push(rbx); 626 __ push(rsi); // for Windows 627 628 #ifdef _LP64 629 __ mov(rax, c_rarg0); // CPUID leaf 630 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 631 #else 632 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 633 __ movptr(rsi, Address(rsp, 20)); // register array address 634 #endif 635 636 __ cpuid(); 637 638 // Store result to register array 639 __ movl(Address(rsi, 0), rax); 640 __ movl(Address(rsi, 4), rbx); 641 __ movl(Address(rsi, 8), rcx); 642 __ movl(Address(rsi, 12), rdx); 643 644 // Epilogue 645 __ pop(rsi); 646 __ pop(rbx); 647 __ pop(rbp); 648 __ ret(0); 649 650 # undef __ 651 652 return start; 653 }; 654 655 656 address generate_getCPUIDBrandString(void) { 657 // Flags to test CPU type. 658 const uint32_t HS_EFL_AC = 0x40000; 659 const uint32_t HS_EFL_ID = 0x200000; 660 // Values for when we don't have a CPUID instruction. 661 const int CPU_FAMILY_SHIFT = 8; 662 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 663 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 664 665 Label detect_486, cpu486, detect_586, done, ext_cpuid; 666 667 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 668 # define __ _masm-> 669 670 address start = __ pc(); 671 672 // 673 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 674 // 675 // LP64: rcx and rdx are first and second argument registers on windows 676 677 __ push(rbp); 678 #ifdef _LP64 679 __ mov(rbp, c_rarg0); // cpuid_info address 680 #else 681 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 682 #endif 683 __ push(rbx); 684 __ push(rsi); 685 __ pushf(); // preserve rbx, and flags 686 __ pop(rax); 687 __ push(rax); 688 __ mov(rcx, rax); 689 // 690 // if we are unable to change the AC flag, we have a 386 691 // 692 __ xorl(rax, HS_EFL_AC); 693 __ push(rax); 694 __ popf(); 695 __ pushf(); 696 __ pop(rax); 697 __ cmpptr(rax, rcx); 698 __ jccb(Assembler::notEqual, detect_486); 699 700 __ movl(rax, CPU_FAMILY_386); 701 __ jmp(done); 702 703 // 704 // If we are unable to change the ID flag, we have a 486 which does 705 // not support the "cpuid" instruction. 706 // 707 __ bind(detect_486); 708 __ mov(rax, rcx); 709 __ xorl(rax, HS_EFL_ID); 710 __ push(rax); 711 __ popf(); 712 __ pushf(); 713 __ pop(rax); 714 __ cmpptr(rcx, rax); 715 __ jccb(Assembler::notEqual, detect_586); 716 717 __ bind(cpu486); 718 __ movl(rax, CPU_FAMILY_486); 719 __ jmp(done); 720 721 // 722 // At this point, we have a chip which supports the "cpuid" instruction 723 // 724 __ bind(detect_586); 725 __ xorl(rax, rax); 726 __ cpuid(); 727 __ orl(rax, rax); 728 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 729 // value of at least 1, we give up and 730 // assume a 486 731 732 // 733 // Extended cpuid(0x80000000) for processor brand string detection 734 // 735 __ bind(ext_cpuid); 736 __ movl(rax, CPUID_EXTENDED_FN); 737 __ cpuid(); 738 __ cmpl(rax, CPUID_EXTENDED_FN_4); 739 __ jcc(Assembler::below, done); 740 741 // 742 // Extended cpuid(0x80000002) // first 16 bytes in brand string 743 // 744 __ movl(rax, CPUID_EXTENDED_FN_2); 745 __ cpuid(); 746 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 747 __ movl(Address(rsi, 0), rax); 748 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 749 __ movl(Address(rsi, 0), rbx); 750 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 751 __ movl(Address(rsi, 0), rcx); 752 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 753 __ movl(Address(rsi,0), rdx); 754 755 // 756 // Extended cpuid(0x80000003) // next 16 bytes in brand string 757 // 758 __ movl(rax, CPUID_EXTENDED_FN_3); 759 __ cpuid(); 760 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 761 __ movl(Address(rsi, 0), rax); 762 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 763 __ movl(Address(rsi, 0), rbx); 764 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 765 __ movl(Address(rsi, 0), rcx); 766 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 767 __ movl(Address(rsi,0), rdx); 768 769 // 770 // Extended cpuid(0x80000004) // last 16 bytes in brand string 771 // 772 __ movl(rax, CPUID_EXTENDED_FN_4); 773 __ cpuid(); 774 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 775 __ movl(Address(rsi, 0), rax); 776 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 777 __ movl(Address(rsi, 0), rbx); 778 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 779 __ movl(Address(rsi, 0), rcx); 780 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 781 __ movl(Address(rsi,0), rdx); 782 783 // 784 // return 785 // 786 __ bind(done); 787 __ popf(); 788 __ pop(rsi); 789 __ pop(rbx); 790 __ pop(rbp); 791 __ ret(0); 792 793 # undef __ 794 795 return start; 796 }; 797 }; 798 799 void VM_Version::get_processor_features() { 800 801 _cpu = 4; // 486 by default 802 _model = 0; 803 _stepping = 0; 804 _features = 0; 805 _logical_processors_per_package = 1; 806 // i486 internal cache is both I&D and has a 16-byte line size 807 _L1_data_cache_line_size = 16; 808 809 // Get raw processor info 810 811 get_cpu_info_stub(&_cpuid_info); 812 813 assert_is_initialized(); 814 _cpu = extended_cpu_family(); 815 _model = extended_cpu_model(); 816 _stepping = cpu_stepping(); 817 818 if (cpu_family() > 4) { // it supports CPUID 819 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings 820 _cpu_features = _features; // Preserve features 821 // Logical processors are only available on P4s and above, 822 // and only if hyperthreading is available. 823 _logical_processors_per_package = logical_processor_count(); 824 _L1_data_cache_line_size = L1_line_size(); 825 } 826 827 // xchg and xadd instructions 828 _supports_atomic_getset4 = true; 829 _supports_atomic_getadd4 = true; 830 LP64_ONLY(_supports_atomic_getset8 = true); 831 LP64_ONLY(_supports_atomic_getadd8 = true); 832 833 #ifdef _LP64 834 // OS should support SSE for x64 and hardware should support at least SSE2. 835 if (!VM_Version::supports_sse2()) { 836 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 837 } 838 // in 64 bit the use of SSE2 is the minimum 839 if (UseSSE < 2) UseSSE = 2; 840 #endif 841 842 #ifdef AMD64 843 // flush_icache_stub have to be generated first. 844 // That is why Icache line size is hard coded in ICache class, 845 // see icache_x86.hpp. It is also the reason why we can't use 846 // clflush instruction in 32-bit VM since it could be running 847 // on CPU which does not support it. 848 // 849 // The only thing we can do is to verify that flushed 850 // ICache::line_size has correct value. 851 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 852 // clflush_size is size in quadwords (8 bytes). 853 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 854 #endif 855 856 #ifdef _LP64 857 // assigning this field effectively enables Unsafe.writebackMemory() 858 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 859 // that is only implemented on x86_64 and only if the OS plays ball 860 if (os::supports_map_sync()) { 861 // publish data cache line flush size to generic field, otherwise 862 // let if default to zero thereby disabling writeback 863 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 864 } 865 #endif 866 867 // Check if processor has Intel Ecore 868 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 869 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) { 870 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 871 } 872 873 if (UseSSE < 4) { 874 _features &= ~CPU_SSE4_1; 875 _features &= ~CPU_SSE4_2; 876 } 877 878 if (UseSSE < 3) { 879 _features &= ~CPU_SSE3; 880 _features &= ~CPU_SSSE3; 881 _features &= ~CPU_SSE4A; 882 } 883 884 if (UseSSE < 2) 885 _features &= ~CPU_SSE2; 886 887 if (UseSSE < 1) 888 _features &= ~CPU_SSE; 889 890 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 891 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 892 UseAVX = 0; 893 } 894 895 // UseSSE is set to the smaller of what hardware supports and what 896 // the command line requires. I.e., you cannot set UseSSE to 2 on 897 // older Pentiums which do not support it. 898 int use_sse_limit = 0; 899 if (UseSSE > 0) { 900 if (UseSSE > 3 && supports_sse4_1()) { 901 use_sse_limit = 4; 902 } else if (UseSSE > 2 && supports_sse3()) { 903 use_sse_limit = 3; 904 } else if (UseSSE > 1 && supports_sse2()) { 905 use_sse_limit = 2; 906 } else if (UseSSE > 0 && supports_sse()) { 907 use_sse_limit = 1; 908 } else { 909 use_sse_limit = 0; 910 } 911 } 912 if (FLAG_IS_DEFAULT(UseSSE)) { 913 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 914 } else if (UseSSE > use_sse_limit) { 915 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 916 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 917 } 918 919 // first try initial setting and detect what we can support 920 int use_avx_limit = 0; 921 if (UseAVX > 0) { 922 if (UseSSE < 4) { 923 // Don't use AVX if SSE is unavailable or has been disabled. 924 use_avx_limit = 0; 925 } else if (UseAVX > 2 && supports_evex()) { 926 use_avx_limit = 3; 927 } else if (UseAVX > 1 && supports_avx2()) { 928 use_avx_limit = 2; 929 } else if (UseAVX > 0 && supports_avx()) { 930 use_avx_limit = 1; 931 } else { 932 use_avx_limit = 0; 933 } 934 } 935 if (FLAG_IS_DEFAULT(UseAVX)) { 936 // Don't use AVX-512 on older Skylakes unless explicitly requested. 937 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 938 FLAG_SET_DEFAULT(UseAVX, 2); 939 } else { 940 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 941 } 942 } 943 if (UseAVX > use_avx_limit) { 944 if (UseSSE < 4) { 945 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 946 } else { 947 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 948 } 949 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 950 } 951 952 if (UseAVX < 3) { 953 _features &= ~CPU_AVX512F; 954 _features &= ~CPU_AVX512DQ; 955 _features &= ~CPU_AVX512CD; 956 _features &= ~CPU_AVX512BW; 957 _features &= ~CPU_AVX512VL; 958 _features &= ~CPU_AVX512_VPOPCNTDQ; 959 _features &= ~CPU_AVX512_VPCLMULQDQ; 960 _features &= ~CPU_AVX512_VAES; 961 _features &= ~CPU_AVX512_VNNI; 962 _features &= ~CPU_AVX512_VBMI; 963 _features &= ~CPU_AVX512_VBMI2; 964 _features &= ~CPU_AVX512_BITALG; 965 _features &= ~CPU_AVX512_IFMA; 966 } 967 968 if (UseAVX < 2) { 969 _features &= ~CPU_AVX2; 970 _features &= ~CPU_AVX_IFMA; 971 } 972 973 if (UseAVX < 1) { 974 _features &= ~CPU_AVX; 975 _features &= ~CPU_VZEROUPPER; 976 _features &= ~CPU_F16C; 977 } 978 979 if (logical_processors_per_package() == 1) { 980 // HT processor could be installed on a system which doesn't support HT. 981 _features &= ~CPU_HT; 982 } 983 984 if (is_intel()) { // Intel cpus specific settings 985 if (is_knights_family()) { 986 _features &= ~CPU_VZEROUPPER; 987 _features &= ~CPU_AVX512BW; 988 _features &= ~CPU_AVX512VL; 989 _features &= ~CPU_AVX512DQ; 990 _features &= ~CPU_AVX512_VNNI; 991 _features &= ~CPU_AVX512_VAES; 992 _features &= ~CPU_AVX512_VPOPCNTDQ; 993 _features &= ~CPU_AVX512_VPCLMULQDQ; 994 _features &= ~CPU_AVX512_VBMI; 995 _features &= ~CPU_AVX512_VBMI2; 996 _features &= ~CPU_CLWB; 997 _features &= ~CPU_FLUSHOPT; 998 _features &= ~CPU_GFNI; 999 _features &= ~CPU_AVX512_BITALG; 1000 _features &= ~CPU_AVX512_IFMA; 1001 _features &= ~CPU_AVX_IFMA; 1002 } 1003 } 1004 1005 // APX support not enabled yet 1006 if (UseAPX) { 1007 if (!FLAG_IS_DEFAULT(UseAPX)) { 1008 warning("APX is not supported on this CPU."); 1009 } 1010 FLAG_SET_DEFAULT(UseAPX, false); 1011 } 1012 1013 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 1014 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 1015 } else { 1016 _has_intel_jcc_erratum = IntelJccErratumMitigation; 1017 } 1018 1019 char buf[1024]; 1020 int res = jio_snprintf( 1021 buf, sizeof(buf), 1022 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1023 cores_per_cpu(), threads_per_core(), 1024 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1025 assert(res > 0, "not enough temporary space allocated"); 1026 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1027 1028 _features_string = os::strdup(buf); 1029 1030 // Use AES instructions if available. 1031 if (supports_aes()) { 1032 if (FLAG_IS_DEFAULT(UseAES)) { 1033 FLAG_SET_DEFAULT(UseAES, true); 1034 } 1035 if (!UseAES) { 1036 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1037 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1038 } 1039 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1040 } else { 1041 if (UseSSE > 2) { 1042 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1043 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1044 } 1045 } else { 1046 // The AES intrinsic stubs require AES instruction support (of course) 1047 // but also require sse3 mode or higher for instructions it use. 1048 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1049 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1050 } 1051 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1052 } 1053 1054 // --AES-CTR begins-- 1055 if (!UseAESIntrinsics) { 1056 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1057 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1058 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1059 } 1060 } else { 1061 if (supports_sse4_1()) { 1062 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1063 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1064 } 1065 } else { 1066 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1067 // but also require sse4.1 mode or higher for instructions it use. 1068 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1069 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1070 } 1071 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1072 } 1073 } 1074 // --AES-CTR ends-- 1075 } 1076 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1077 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1078 warning("AES instructions are not available on this CPU"); 1079 FLAG_SET_DEFAULT(UseAES, false); 1080 } 1081 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1082 warning("AES intrinsics are not available on this CPU"); 1083 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1084 } 1085 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1086 warning("AES-CTR intrinsics are not available on this CPU"); 1087 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1088 } 1089 } 1090 1091 // Use CLMUL instructions if available. 1092 if (supports_clmul()) { 1093 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1094 UseCLMUL = true; 1095 } 1096 } else if (UseCLMUL) { 1097 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1098 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1099 FLAG_SET_DEFAULT(UseCLMUL, false); 1100 } 1101 1102 if (UseCLMUL && (UseSSE > 2)) { 1103 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1104 UseCRC32Intrinsics = true; 1105 } 1106 } else if (UseCRC32Intrinsics) { 1107 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1108 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1109 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1110 } 1111 1112 #ifdef _LP64 1113 if (supports_avx2()) { 1114 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1115 UseAdler32Intrinsics = true; 1116 } 1117 } else if (UseAdler32Intrinsics) { 1118 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1119 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1120 } 1121 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1122 } 1123 #else 1124 if (UseAdler32Intrinsics) { 1125 warning("Adler32Intrinsics not available on this CPU."); 1126 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1127 } 1128 #endif 1129 1130 if (supports_sse4_2() && supports_clmul()) { 1131 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1132 UseCRC32CIntrinsics = true; 1133 } 1134 } else if (UseCRC32CIntrinsics) { 1135 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1136 warning("CRC32C intrinsics are not available on this CPU"); 1137 } 1138 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1139 } 1140 1141 // GHASH/GCM intrinsics 1142 if (UseCLMUL && (UseSSE > 2)) { 1143 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1144 UseGHASHIntrinsics = true; 1145 } 1146 } else if (UseGHASHIntrinsics) { 1147 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1148 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1149 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1150 } 1151 1152 #ifdef _LP64 1153 // ChaCha20 Intrinsics 1154 // As long as the system supports AVX as a baseline we can do a 1155 // SIMD-enabled block function. StubGenerator makes the determination 1156 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1157 // version. 1158 if (UseAVX >= 1) { 1159 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1160 UseChaCha20Intrinsics = true; 1161 } 1162 } else if (UseChaCha20Intrinsics) { 1163 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1164 warning("ChaCha20 intrinsic requires AVX instructions"); 1165 } 1166 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1167 } 1168 #else 1169 // No support currently for ChaCha20 intrinsics on 32-bit platforms 1170 if (UseChaCha20Intrinsics) { 1171 warning("ChaCha20 intrinsics are not available on this CPU."); 1172 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1173 } 1174 #endif // _LP64 1175 1176 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1177 if (UseAVX >= 2) { 1178 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1179 UseBASE64Intrinsics = true; 1180 } 1181 } else if (UseBASE64Intrinsics) { 1182 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1183 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1184 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1185 } 1186 1187 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1188 if (FLAG_IS_DEFAULT(UseFMA)) { 1189 UseFMA = true; 1190 } 1191 } else if (UseFMA) { 1192 warning("FMA instructions are not available on this CPU"); 1193 FLAG_SET_DEFAULT(UseFMA, false); 1194 } 1195 1196 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1197 UseMD5Intrinsics = true; 1198 } 1199 1200 if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { 1201 if (FLAG_IS_DEFAULT(UseSHA)) { 1202 UseSHA = true; 1203 } 1204 } else if (UseSHA) { 1205 warning("SHA instructions are not available on this CPU"); 1206 FLAG_SET_DEFAULT(UseSHA, false); 1207 } 1208 1209 if (supports_sha() && supports_sse4_1() && UseSHA) { 1210 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1211 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1212 } 1213 } else if (UseSHA1Intrinsics) { 1214 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1215 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1216 } 1217 1218 if (supports_sse4_1() && UseSHA) { 1219 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1220 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1221 } 1222 } else if (UseSHA256Intrinsics) { 1223 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1224 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1225 } 1226 1227 #ifdef _LP64 1228 // These are only supported on 64-bit 1229 if (UseSHA && supports_avx2() && supports_bmi2()) { 1230 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1231 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1232 } 1233 } else 1234 #endif 1235 if (UseSHA512Intrinsics) { 1236 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1237 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1238 } 1239 1240 if (UseSHA3Intrinsics) { 1241 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1242 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1243 } 1244 1245 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1246 FLAG_SET_DEFAULT(UseSHA, false); 1247 } 1248 1249 if (!supports_rtm() && UseRTMLocking) { 1250 vm_exit_during_initialization("RTM instructions are not available on this CPU"); 1251 } 1252 1253 #if INCLUDE_RTM_OPT 1254 if (UseRTMLocking) { 1255 if (!CompilerConfig::is_c2_enabled()) { 1256 // Only C2 does RTM locking optimization. 1257 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1258 } 1259 if (is_intel_family_core()) { 1260 if ((_model == CPU_MODEL_HASWELL_E3) || 1261 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || 1262 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { 1263 // currently a collision between SKL and HSW_E3 1264 if (!UnlockExperimentalVMOptions && UseAVX < 3) { 1265 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " 1266 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); 1267 } else { 1268 warning("UseRTMLocking is only available as experimental option on this platform."); 1269 } 1270 } 1271 } 1272 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 1273 // RTM locking should be used only for applications with 1274 // high lock contention. For now we do not use it by default. 1275 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 1276 } 1277 } else { // !UseRTMLocking 1278 if (UseRTMForStackLocks) { 1279 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 1280 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 1281 } 1282 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 1283 } 1284 if (UseRTMDeopt) { 1285 FLAG_SET_DEFAULT(UseRTMDeopt, false); 1286 } 1287 if (PrintPreciseRTMLockingStatistics) { 1288 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 1289 } 1290 } 1291 #else 1292 if (UseRTMLocking) { 1293 // Only C2 does RTM locking optimization. 1294 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1295 } 1296 #endif 1297 1298 #ifdef COMPILER2 1299 if (UseFPUForSpilling) { 1300 if (UseSSE < 2) { 1301 // Only supported with SSE2+ 1302 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1303 } 1304 } 1305 #endif 1306 1307 #if COMPILER2_OR_JVMCI 1308 int max_vector_size = 0; 1309 if (UseSSE < 2) { 1310 // Vectors (in XMM) are only supported with SSE2+ 1311 // SSE is always 2 on x64. 1312 max_vector_size = 0; 1313 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1314 // 16 byte vectors (in XMM) are supported with SSE2+ 1315 max_vector_size = 16; 1316 } else if (UseAVX == 1 || UseAVX == 2) { 1317 // 32 bytes vectors (in YMM) are only supported with AVX+ 1318 max_vector_size = 32; 1319 } else if (UseAVX > 2) { 1320 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1321 max_vector_size = 64; 1322 } 1323 1324 #ifdef _LP64 1325 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1326 #else 1327 int min_vector_size = 0; 1328 #endif 1329 1330 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1331 if (MaxVectorSize < min_vector_size) { 1332 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1333 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1334 } 1335 if (MaxVectorSize > max_vector_size) { 1336 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1337 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1338 } 1339 if (!is_power_of_2(MaxVectorSize)) { 1340 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1341 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1342 } 1343 } else { 1344 // If default, use highest supported configuration 1345 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1346 } 1347 1348 #if defined(COMPILER2) && defined(ASSERT) 1349 if (MaxVectorSize > 0) { 1350 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1351 tty->print_cr("State of YMM registers after signal handle:"); 1352 int nreg = 2 LP64_ONLY(+2); 1353 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1354 for (int i = 0; i < nreg; i++) { 1355 tty->print("YMM%s:", ymm_name[i]); 1356 for (int j = 7; j >=0; j--) { 1357 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1358 } 1359 tty->cr(); 1360 } 1361 } 1362 } 1363 #endif // COMPILER2 && ASSERT 1364 1365 #ifdef _LP64 1366 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { 1367 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1368 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1369 } 1370 } else 1371 #endif 1372 if (UsePoly1305Intrinsics) { 1373 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1374 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1375 } 1376 1377 #ifdef _LP64 1378 if (supports_avx512ifma() && supports_avx512vlbw()) { 1379 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { 1380 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); 1381 } 1382 } else 1383 #endif 1384 if (UseIntPolyIntrinsics) { 1385 warning("Intrinsics for Polynomial crypto functions not available on this CPU."); 1386 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); 1387 } 1388 1389 #ifdef _LP64 1390 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1391 UseMultiplyToLenIntrinsic = true; 1392 } 1393 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1394 UseSquareToLenIntrinsic = true; 1395 } 1396 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1397 UseMulAddIntrinsic = true; 1398 } 1399 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1400 UseMontgomeryMultiplyIntrinsic = true; 1401 } 1402 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1403 UseMontgomerySquareIntrinsic = true; 1404 } 1405 #else 1406 if (UseMultiplyToLenIntrinsic) { 1407 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1408 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1409 } 1410 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1411 } 1412 if (UseMontgomeryMultiplyIntrinsic) { 1413 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1414 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1415 } 1416 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1417 } 1418 if (UseMontgomerySquareIntrinsic) { 1419 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1420 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1421 } 1422 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1423 } 1424 if (UseSquareToLenIntrinsic) { 1425 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1426 warning("squareToLen intrinsic is not available in 32-bit VM"); 1427 } 1428 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1429 } 1430 if (UseMulAddIntrinsic) { 1431 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1432 warning("mulAdd intrinsic is not available in 32-bit VM"); 1433 } 1434 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1435 } 1436 #endif // _LP64 1437 #endif // COMPILER2_OR_JVMCI 1438 1439 // On new cpus instructions which update whole XMM register should be used 1440 // to prevent partial register stall due to dependencies on high half. 1441 // 1442 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1443 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1444 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1445 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1446 1447 1448 if (is_zx()) { // ZX cpus specific settings 1449 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1450 UseStoreImmI16 = false; // don't use it on ZX cpus 1451 } 1452 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1453 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1454 // Use it on all ZX cpus 1455 UseAddressNop = true; 1456 } 1457 } 1458 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1459 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1460 } 1461 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1462 if (supports_sse3()) { 1463 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1464 } else { 1465 UseXmmRegToRegMoveAll = false; 1466 } 1467 } 1468 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1469 #ifdef COMPILER2 1470 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1471 // For new ZX cpus do the next optimization: 1472 // don't align the beginning of a loop if there are enough instructions 1473 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1474 // in current fetch line (OptoLoopAlignment) or the padding 1475 // is big (> MaxLoopPad). 1476 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1477 // generated NOP instructions. 11 is the largest size of one 1478 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1479 MaxLoopPad = 11; 1480 } 1481 #endif // COMPILER2 1482 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1483 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1484 } 1485 if (supports_sse4_2()) { // new ZX cpus 1486 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1487 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1488 } 1489 } 1490 if (supports_sse4_2()) { 1491 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1492 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1493 } 1494 } else { 1495 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1496 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1497 } 1498 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1499 } 1500 } 1501 1502 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1503 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1504 } 1505 } 1506 1507 if (is_amd_family()) { // AMD cpus specific settings 1508 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1509 // Use it on new AMD cpus starting from Opteron. 1510 UseAddressNop = true; 1511 } 1512 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1513 // Use it on new AMD cpus starting from Opteron. 1514 UseNewLongLShift = true; 1515 } 1516 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1517 if (supports_sse4a()) { 1518 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1519 } else { 1520 UseXmmLoadAndClearUpper = false; 1521 } 1522 } 1523 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1524 if (supports_sse4a()) { 1525 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1526 } else { 1527 UseXmmRegToRegMoveAll = false; 1528 } 1529 } 1530 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1531 if (supports_sse4a()) { 1532 UseXmmI2F = true; 1533 } else { 1534 UseXmmI2F = false; 1535 } 1536 } 1537 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1538 if (supports_sse4a()) { 1539 UseXmmI2D = true; 1540 } else { 1541 UseXmmI2D = false; 1542 } 1543 } 1544 if (supports_sse4_2()) { 1545 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1546 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1547 } 1548 } else { 1549 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1550 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1551 } 1552 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1553 } 1554 1555 // some defaults for AMD family 15h 1556 if (cpu_family() == 0x15) { 1557 // On family 15h processors default is no sw prefetch 1558 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1559 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1560 } 1561 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1562 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1563 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1564 } 1565 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1566 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1567 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1568 } 1569 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1570 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1571 } 1572 } 1573 1574 #ifdef COMPILER2 1575 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1576 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1577 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1578 } 1579 #endif // COMPILER2 1580 1581 // Some defaults for AMD family >= 17h && Hygon family 18h 1582 if (cpu_family() >= 0x17) { 1583 // On family >=17h processors use XMM and UnalignedLoadStores 1584 // for Array Copy 1585 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1586 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1587 } 1588 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1589 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1590 } 1591 #ifdef COMPILER2 1592 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1593 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1594 } 1595 #endif 1596 } 1597 } 1598 1599 if (is_intel()) { // Intel cpus specific settings 1600 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1601 UseStoreImmI16 = false; // don't use it on Intel cpus 1602 } 1603 if (cpu_family() == 6 || cpu_family() == 15) { 1604 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1605 // Use it on all Intel cpus starting from PentiumPro 1606 UseAddressNop = true; 1607 } 1608 } 1609 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1610 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1611 } 1612 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1613 if (supports_sse3()) { 1614 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1615 } else { 1616 UseXmmRegToRegMoveAll = false; 1617 } 1618 } 1619 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1620 #ifdef COMPILER2 1621 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1622 // For new Intel cpus do the next optimization: 1623 // don't align the beginning of a loop if there are enough instructions 1624 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1625 // in current fetch line (OptoLoopAlignment) or the padding 1626 // is big (> MaxLoopPad). 1627 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1628 // generated NOP instructions. 11 is the largest size of one 1629 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1630 MaxLoopPad = 11; 1631 } 1632 #endif // COMPILER2 1633 1634 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1635 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1636 } 1637 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1638 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1639 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1640 } 1641 } 1642 if (supports_sse4_2()) { 1643 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1644 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1645 } 1646 } else { 1647 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1648 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1649 } 1650 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1651 } 1652 } 1653 if (is_atom_family() || is_knights_family()) { 1654 #ifdef COMPILER2 1655 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1656 OptoScheduling = true; 1657 } 1658 #endif 1659 if (supports_sse4_2()) { // Silvermont 1660 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1661 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1662 } 1663 } 1664 if (FLAG_IS_DEFAULT(UseIncDec)) { 1665 FLAG_SET_DEFAULT(UseIncDec, false); 1666 } 1667 } 1668 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1669 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1670 } 1671 #ifdef COMPILER2 1672 if (UseAVX > 2) { 1673 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1674 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1675 ArrayOperationPartialInlineSize != 0 && 1676 ArrayOperationPartialInlineSize != 16 && 1677 ArrayOperationPartialInlineSize != 32 && 1678 ArrayOperationPartialInlineSize != 64)) { 1679 int inline_size = 0; 1680 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1681 inline_size = 64; 1682 } else if (MaxVectorSize >= 32) { 1683 inline_size = 32; 1684 } else if (MaxVectorSize >= 16) { 1685 inline_size = 16; 1686 } 1687 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1688 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1689 } 1690 ArrayOperationPartialInlineSize = inline_size; 1691 } 1692 1693 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1694 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1695 if (ArrayOperationPartialInlineSize) { 1696 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1697 } else { 1698 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1699 } 1700 } 1701 } 1702 #endif 1703 } 1704 1705 #ifdef COMPILER2 1706 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1707 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1708 OptimizeFill = false; 1709 } 1710 } 1711 #endif 1712 1713 #ifdef _LP64 1714 if (UseSSE42Intrinsics) { 1715 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1716 UseVectorizedMismatchIntrinsic = true; 1717 } 1718 } else if (UseVectorizedMismatchIntrinsic) { 1719 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1720 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1721 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1722 } 1723 if (UseAVX >= 2) { 1724 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1725 } else if (UseVectorizedHashCodeIntrinsic) { 1726 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1727 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1728 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1729 } 1730 #else 1731 if (UseVectorizedMismatchIntrinsic) { 1732 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1733 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1734 } 1735 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1736 } 1737 if (UseVectorizedHashCodeIntrinsic) { 1738 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1739 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1740 } 1741 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1742 } 1743 #endif // _LP64 1744 1745 // Use count leading zeros count instruction if available. 1746 if (supports_lzcnt()) { 1747 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1748 UseCountLeadingZerosInstruction = true; 1749 } 1750 } else if (UseCountLeadingZerosInstruction) { 1751 warning("lzcnt instruction is not available on this CPU"); 1752 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1753 } 1754 1755 // Use count trailing zeros instruction if available 1756 if (supports_bmi1()) { 1757 // tzcnt does not require VEX prefix 1758 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1759 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1760 // Don't use tzcnt if BMI1 is switched off on command line. 1761 UseCountTrailingZerosInstruction = false; 1762 } else { 1763 UseCountTrailingZerosInstruction = true; 1764 } 1765 } 1766 } else if (UseCountTrailingZerosInstruction) { 1767 warning("tzcnt instruction is not available on this CPU"); 1768 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1769 } 1770 1771 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1772 // VEX prefix is generated only when AVX > 0. 1773 if (supports_bmi1() && supports_avx()) { 1774 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1775 UseBMI1Instructions = true; 1776 } 1777 } else if (UseBMI1Instructions) { 1778 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1779 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1780 } 1781 1782 if (supports_bmi2() && supports_avx()) { 1783 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1784 UseBMI2Instructions = true; 1785 } 1786 } else if (UseBMI2Instructions) { 1787 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1788 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1789 } 1790 1791 // Use population count instruction if available. 1792 if (supports_popcnt()) { 1793 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1794 UsePopCountInstruction = true; 1795 } 1796 } else if (UsePopCountInstruction) { 1797 warning("POPCNT instruction is not available on this CPU"); 1798 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1799 } 1800 1801 // Use fast-string operations if available. 1802 if (supports_erms()) { 1803 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1804 UseFastStosb = true; 1805 } 1806 } else if (UseFastStosb) { 1807 warning("fast-string operations are not available on this CPU"); 1808 FLAG_SET_DEFAULT(UseFastStosb, false); 1809 } 1810 1811 // For AMD Processors use XMM/YMM MOVDQU instructions 1812 // for Object Initialization as default 1813 if (is_amd() && cpu_family() >= 0x19) { 1814 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1815 UseFastStosb = false; 1816 } 1817 } 1818 1819 #ifdef COMPILER2 1820 if (is_intel() && MaxVectorSize > 16) { 1821 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1822 UseFastStosb = false; 1823 } 1824 } 1825 #endif 1826 1827 // Use XMM/YMM MOVDQU instruction for Object Initialization 1828 if (UseSSE >= 2 && UseUnalignedLoadStores) { 1829 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1830 UseXMMForObjInit = true; 1831 } 1832 } else if (UseXMMForObjInit) { 1833 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1834 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1835 } 1836 1837 #ifdef COMPILER2 1838 if (FLAG_IS_DEFAULT(AlignVector)) { 1839 // Modern processors allow misaligned memory operations for vectors. 1840 AlignVector = !UseUnalignedLoadStores; 1841 } 1842 #endif // COMPILER2 1843 1844 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1845 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1846 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1847 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1848 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1849 } 1850 } 1851 1852 // Allocation prefetch settings 1853 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1854 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1855 (cache_line_size > AllocatePrefetchStepSize)) { 1856 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1857 } 1858 1859 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1860 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1861 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1862 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1863 } 1864 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1865 } 1866 1867 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1868 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1869 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1870 } 1871 1872 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1873 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1874 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1875 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1876 } 1877 #ifdef COMPILER2 1878 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1879 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1880 } 1881 #endif 1882 } 1883 1884 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1885 #ifdef COMPILER2 1886 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1887 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1888 } 1889 #endif 1890 } 1891 1892 #ifdef _LP64 1893 // Prefetch settings 1894 1895 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1896 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1897 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1898 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1899 1900 // gc copy/scan is disabled if prefetchw isn't supported, because 1901 // Prefetch::write emits an inlined prefetchw on Linux. 1902 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1903 // The used prefetcht0 instruction works for both amd64 and em64t. 1904 1905 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1906 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1907 } 1908 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1909 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1910 } 1911 #endif 1912 1913 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1914 (cache_line_size > ContendedPaddingWidth)) 1915 ContendedPaddingWidth = cache_line_size; 1916 1917 // This machine allows unaligned memory accesses 1918 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1919 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1920 } 1921 1922 #ifndef PRODUCT 1923 if (log_is_enabled(Info, os, cpu)) { 1924 LogStream ls(Log(os, cpu)::info()); 1925 outputStream* log = &ls; 1926 log->print_cr("Logical CPUs per core: %u", 1927 logical_processors_per_package()); 1928 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1929 log->print("UseSSE=%d", UseSSE); 1930 if (UseAVX > 0) { 1931 log->print(" UseAVX=%d", UseAVX); 1932 } 1933 if (UseAES) { 1934 log->print(" UseAES=1"); 1935 } 1936 #ifdef COMPILER2 1937 if (MaxVectorSize > 0) { 1938 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1939 } 1940 #endif 1941 log->cr(); 1942 log->print("Allocation"); 1943 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1944 log->print_cr(": no prefetching"); 1945 } else { 1946 log->print(" prefetching: "); 1947 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1948 log->print("PREFETCHW"); 1949 } else if (UseSSE >= 1) { 1950 if (AllocatePrefetchInstr == 0) { 1951 log->print("PREFETCHNTA"); 1952 } else if (AllocatePrefetchInstr == 1) { 1953 log->print("PREFETCHT0"); 1954 } else if (AllocatePrefetchInstr == 2) { 1955 log->print("PREFETCHT2"); 1956 } else if (AllocatePrefetchInstr == 3) { 1957 log->print("PREFETCHW"); 1958 } 1959 } 1960 if (AllocatePrefetchLines > 1) { 1961 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1962 } else { 1963 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1964 } 1965 } 1966 1967 if (PrefetchCopyIntervalInBytes > 0) { 1968 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1969 } 1970 if (PrefetchScanIntervalInBytes > 0) { 1971 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1972 } 1973 if (ContendedPaddingWidth > 0) { 1974 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1975 } 1976 } 1977 #endif // !PRODUCT 1978 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1979 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1980 } 1981 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1982 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1983 } 1984 } 1985 1986 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1987 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1988 if (vrt == XenHVM) { 1989 st->print_cr("Xen hardware-assisted virtualization detected"); 1990 } else if (vrt == KVM) { 1991 st->print_cr("KVM virtualization detected"); 1992 } else if (vrt == VMWare) { 1993 st->print_cr("VMWare virtualization detected"); 1994 VirtualizationSupport::print_virtualization_info(st); 1995 } else if (vrt == HyperV) { 1996 st->print_cr("Hyper-V virtualization detected"); 1997 } else if (vrt == HyperVRole) { 1998 st->print_cr("Hyper-V role detected"); 1999 } 2000 } 2001 2002 bool VM_Version::compute_has_intel_jcc_erratum() { 2003 if (!is_intel_family_core()) { 2004 // Only Intel CPUs are affected. 2005 return false; 2006 } 2007 // The following table of affected CPUs is based on the following document released by Intel: 2008 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 2009 switch (_model) { 2010 case 0x8E: 2011 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2012 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 2013 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 2014 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 2015 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 2016 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2017 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 2018 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 2019 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 2020 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 2021 case 0x4E: 2022 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 2023 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 2024 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 2025 return _stepping == 0x3; 2026 case 0x55: 2027 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 2028 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 2029 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2030 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2031 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2032 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2033 return _stepping == 0x4 || _stepping == 0x7; 2034 case 0x5E: 2035 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2036 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2037 return _stepping == 0x3; 2038 case 0x9E: 2039 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2040 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2041 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2042 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2043 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2044 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2045 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2046 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2047 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2048 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2049 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2050 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2051 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2052 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2053 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2054 case 0xA5: 2055 // Not in Intel documentation. 2056 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2057 return true; 2058 case 0xA6: 2059 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2060 return _stepping == 0x0; 2061 case 0xAE: 2062 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2063 return _stepping == 0xA; 2064 default: 2065 // If we are running on another intel machine not recognized in the table, we are okay. 2066 return false; 2067 } 2068 } 2069 2070 // On Xen, the cpuid instruction returns 2071 // eax / registers[0]: Version of Xen 2072 // ebx / registers[1]: chars 'XenV' 2073 // ecx / registers[2]: chars 'MMXe' 2074 // edx / registers[3]: chars 'nVMM' 2075 // 2076 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2077 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2078 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2079 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2080 // 2081 // more information : 2082 // https://kb.vmware.com/s/article/1009458 2083 // 2084 void VM_Version::check_virtualizations() { 2085 uint32_t registers[4] = {0}; 2086 char signature[13] = {0}; 2087 2088 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2089 // from 0x40000000 until 0x40010000. 2090 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2091 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2092 detect_virt_stub(leaf, registers); 2093 memcpy(signature, ®isters[1], 12); 2094 2095 if (strncmp("VMwareVMware", signature, 12) == 0) { 2096 Abstract_VM_Version::_detected_virtualization = VMWare; 2097 // check for extended metrics from guestlib 2098 VirtualizationSupport::initialize(); 2099 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2100 Abstract_VM_Version::_detected_virtualization = HyperV; 2101 #ifdef _WINDOWS 2102 // CPUID leaf 0x40000007 is available to the root partition only. 2103 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2104 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2105 detect_virt_stub(0x40000007, registers); 2106 if ((registers[0] != 0x0) || 2107 (registers[1] != 0x0) || 2108 (registers[2] != 0x0) || 2109 (registers[3] != 0x0)) { 2110 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2111 } 2112 #endif 2113 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2114 Abstract_VM_Version::_detected_virtualization = KVM; 2115 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2116 Abstract_VM_Version::_detected_virtualization = XenHVM; 2117 } 2118 } 2119 } 2120 2121 #ifdef COMPILER2 2122 // Determine if it's running on Cascade Lake using default options. 2123 bool VM_Version::is_default_intel_cascade_lake() { 2124 return FLAG_IS_DEFAULT(UseAVX) && 2125 FLAG_IS_DEFAULT(MaxVectorSize) && 2126 UseAVX > 2 && 2127 is_intel_cascade_lake(); 2128 } 2129 #endif 2130 2131 bool VM_Version::is_intel_cascade_lake() { 2132 return is_intel_skylake() && _stepping >= 5; 2133 } 2134 2135 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2136 // for implementing the array copy and clear operations. 2137 // The Intel platforms that supports the serialize instruction 2138 // has improved implementation of 64-byte load/stores and so the default 2139 // threshold is set to 0 for these platforms. 2140 int VM_Version::avx3_threshold() { 2141 return (is_intel_family_core() && 2142 supports_serialize() && 2143 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2144 } 2145 2146 static bool _vm_version_initialized = false; 2147 2148 void VM_Version::initialize() { 2149 ResourceMark rm; 2150 // Making this stub must be FIRST use of assembler 2151 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2152 if (stub_blob == nullptr) { 2153 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2154 } 2155 CodeBuffer c(stub_blob); 2156 VM_Version_StubGenerator g(&c); 2157 2158 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2159 g.generate_get_cpu_info()); 2160 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2161 g.generate_detect_virt()); 2162 2163 get_processor_features(); 2164 2165 LP64_ONLY(Assembler::precompute_instructions();) 2166 2167 if (VM_Version::supports_hv()) { // Supports hypervisor 2168 check_virtualizations(); 2169 } 2170 _vm_version_initialized = true; 2171 } 2172 2173 typedef enum { 2174 CPU_FAMILY_8086_8088 = 0, 2175 CPU_FAMILY_INTEL_286 = 2, 2176 CPU_FAMILY_INTEL_386 = 3, 2177 CPU_FAMILY_INTEL_486 = 4, 2178 CPU_FAMILY_PENTIUM = 5, 2179 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2180 CPU_FAMILY_PENTIUM_4 = 0xF 2181 } FamilyFlag; 2182 2183 typedef enum { 2184 RDTSCP_FLAG = 0x08000000, // bit 27 2185 INTEL64_FLAG = 0x20000000 // bit 29 2186 } _featureExtendedEdxFlag; 2187 2188 typedef enum { 2189 FPU_FLAG = 0x00000001, 2190 VME_FLAG = 0x00000002, 2191 DE_FLAG = 0x00000004, 2192 PSE_FLAG = 0x00000008, 2193 TSC_FLAG = 0x00000010, 2194 MSR_FLAG = 0x00000020, 2195 PAE_FLAG = 0x00000040, 2196 MCE_FLAG = 0x00000080, 2197 CX8_FLAG = 0x00000100, 2198 APIC_FLAG = 0x00000200, 2199 SEP_FLAG = 0x00000800, 2200 MTRR_FLAG = 0x00001000, 2201 PGE_FLAG = 0x00002000, 2202 MCA_FLAG = 0x00004000, 2203 CMOV_FLAG = 0x00008000, 2204 PAT_FLAG = 0x00010000, 2205 PSE36_FLAG = 0x00020000, 2206 PSNUM_FLAG = 0x00040000, 2207 CLFLUSH_FLAG = 0x00080000, 2208 DTS_FLAG = 0x00200000, 2209 ACPI_FLAG = 0x00400000, 2210 MMX_FLAG = 0x00800000, 2211 FXSR_FLAG = 0x01000000, 2212 SSE_FLAG = 0x02000000, 2213 SSE2_FLAG = 0x04000000, 2214 SS_FLAG = 0x08000000, 2215 HTT_FLAG = 0x10000000, 2216 TM_FLAG = 0x20000000 2217 } FeatureEdxFlag; 2218 2219 static BufferBlob* cpuid_brand_string_stub_blob; 2220 static const int cpuid_brand_string_stub_size = 550; 2221 2222 extern "C" { 2223 typedef void (*getCPUIDBrandString_stub_t)(void*); 2224 } 2225 2226 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2227 2228 // VM_Version statics 2229 enum { 2230 ExtendedFamilyIdLength_INTEL = 16, 2231 ExtendedFamilyIdLength_AMD = 24 2232 }; 2233 2234 const size_t VENDOR_LENGTH = 13; 2235 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2236 static char* _cpu_brand_string = nullptr; 2237 static int64_t _max_qualified_cpu_frequency = 0; 2238 2239 static int _no_of_threads = 0; 2240 static int _no_of_cores = 0; 2241 2242 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2243 "8086/8088", 2244 "", 2245 "286", 2246 "386", 2247 "486", 2248 "Pentium", 2249 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2250 "", 2251 "", 2252 "", 2253 "", 2254 "", 2255 "", 2256 "", 2257 "", 2258 "Pentium 4" 2259 }; 2260 2261 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2262 "", 2263 "", 2264 "", 2265 "", 2266 "5x86", 2267 "K5/K6", 2268 "Athlon/AthlonXP", 2269 "", 2270 "", 2271 "", 2272 "", 2273 "", 2274 "", 2275 "", 2276 "", 2277 "Opteron/Athlon64", 2278 "Opteron QC/Phenom", // Barcelona et.al. 2279 "", 2280 "", 2281 "", 2282 "", 2283 "", 2284 "", 2285 "Zen" 2286 }; 2287 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2288 // September 2013, Vol 3C Table 35-1 2289 const char* const _model_id_pentium_pro[] = { 2290 "", 2291 "Pentium Pro", 2292 "", 2293 "Pentium II model 3", 2294 "", 2295 "Pentium II model 5/Xeon/Celeron", 2296 "Celeron", 2297 "Pentium III/Pentium III Xeon", 2298 "Pentium III/Pentium III Xeon", 2299 "Pentium M model 9", // Yonah 2300 "Pentium III, model A", 2301 "Pentium III, model B", 2302 "", 2303 "Pentium M model D", // Dothan 2304 "", 2305 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2306 "", 2307 "", 2308 "", 2309 "", 2310 "", 2311 "", 2312 "Celeron", // 0x16 Celeron 65nm 2313 "Core 2", // 0x17 Penryn / Harpertown 2314 "", 2315 "", 2316 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2317 "Atom", // 0x1B Z5xx series Silverthorn 2318 "", 2319 "Core 2", // 0x1D Dunnington (6-core) 2320 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2321 "", 2322 "", 2323 "", 2324 "", 2325 "", 2326 "", 2327 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2328 "", 2329 "", 2330 "", // 0x28 2331 "", 2332 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2333 "", 2334 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2335 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2336 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2337 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2338 "", 2339 "", 2340 "", 2341 "", 2342 "", 2343 "", 2344 "", 2345 "", 2346 "", 2347 "", 2348 "Ivy Bridge", // 0x3a 2349 "", 2350 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2351 "", // 0x3d "Next Generation Intel Core Processor" 2352 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2353 "", // 0x3f "Future Generation Intel Xeon Processor" 2354 "", 2355 "", 2356 "", 2357 "", 2358 "", 2359 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2360 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2361 nullptr 2362 }; 2363 2364 /* Brand ID is for back compatibility 2365 * Newer CPUs uses the extended brand string */ 2366 const char* const _brand_id[] = { 2367 "", 2368 "Celeron processor", 2369 "Pentium III processor", 2370 "Intel Pentium III Xeon processor", 2371 "", 2372 "", 2373 "", 2374 "", 2375 "Intel Pentium 4 processor", 2376 nullptr 2377 }; 2378 2379 2380 const char* const _feature_edx_id[] = { 2381 "On-Chip FPU", 2382 "Virtual Mode Extensions", 2383 "Debugging Extensions", 2384 "Page Size Extensions", 2385 "Time Stamp Counter", 2386 "Model Specific Registers", 2387 "Physical Address Extension", 2388 "Machine Check Exceptions", 2389 "CMPXCHG8B Instruction", 2390 "On-Chip APIC", 2391 "", 2392 "Fast System Call", 2393 "Memory Type Range Registers", 2394 "Page Global Enable", 2395 "Machine Check Architecture", 2396 "Conditional Mov Instruction", 2397 "Page Attribute Table", 2398 "36-bit Page Size Extension", 2399 "Processor Serial Number", 2400 "CLFLUSH Instruction", 2401 "", 2402 "Debug Trace Store feature", 2403 "ACPI registers in MSR space", 2404 "Intel Architecture MMX Technology", 2405 "Fast Float Point Save and Restore", 2406 "Streaming SIMD extensions", 2407 "Streaming SIMD extensions 2", 2408 "Self-Snoop", 2409 "Hyper Threading", 2410 "Thermal Monitor", 2411 "", 2412 "Pending Break Enable" 2413 }; 2414 2415 const char* const _feature_extended_edx_id[] = { 2416 "", 2417 "", 2418 "", 2419 "", 2420 "", 2421 "", 2422 "", 2423 "", 2424 "", 2425 "", 2426 "", 2427 "SYSCALL/SYSRET", 2428 "", 2429 "", 2430 "", 2431 "", 2432 "", 2433 "", 2434 "", 2435 "", 2436 "Execute Disable Bit", 2437 "", 2438 "", 2439 "", 2440 "", 2441 "", 2442 "", 2443 "RDTSCP", 2444 "", 2445 "Intel 64 Architecture", 2446 "", 2447 "" 2448 }; 2449 2450 const char* const _feature_ecx_id[] = { 2451 "Streaming SIMD Extensions 3", 2452 "PCLMULQDQ", 2453 "64-bit DS Area", 2454 "MONITOR/MWAIT instructions", 2455 "CPL Qualified Debug Store", 2456 "Virtual Machine Extensions", 2457 "Safer Mode Extensions", 2458 "Enhanced Intel SpeedStep technology", 2459 "Thermal Monitor 2", 2460 "Supplemental Streaming SIMD Extensions 3", 2461 "L1 Context ID", 2462 "", 2463 "Fused Multiply-Add", 2464 "CMPXCHG16B", 2465 "xTPR Update Control", 2466 "Perfmon and Debug Capability", 2467 "", 2468 "Process-context identifiers", 2469 "Direct Cache Access", 2470 "Streaming SIMD extensions 4.1", 2471 "Streaming SIMD extensions 4.2", 2472 "x2APIC", 2473 "MOVBE", 2474 "Popcount instruction", 2475 "TSC-Deadline", 2476 "AESNI", 2477 "XSAVE", 2478 "OSXSAVE", 2479 "AVX", 2480 "F16C", 2481 "RDRAND", 2482 "" 2483 }; 2484 2485 const char* const _feature_extended_ecx_id[] = { 2486 "LAHF/SAHF instruction support", 2487 "Core multi-processor legacy mode", 2488 "", 2489 "", 2490 "", 2491 "Advanced Bit Manipulations: LZCNT", 2492 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2493 "Misaligned SSE mode", 2494 "", 2495 "", 2496 "", 2497 "", 2498 "", 2499 "", 2500 "", 2501 "", 2502 "", 2503 "", 2504 "", 2505 "", 2506 "", 2507 "", 2508 "", 2509 "", 2510 "", 2511 "", 2512 "", 2513 "", 2514 "", 2515 "", 2516 "", 2517 "" 2518 }; 2519 2520 void VM_Version::initialize_tsc(void) { 2521 ResourceMark rm; 2522 2523 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2524 if (cpuid_brand_string_stub_blob == nullptr) { 2525 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2526 } 2527 CodeBuffer c(cpuid_brand_string_stub_blob); 2528 VM_Version_StubGenerator g(&c); 2529 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2530 g.generate_getCPUIDBrandString()); 2531 } 2532 2533 const char* VM_Version::cpu_model_description(void) { 2534 uint32_t cpu_family = extended_cpu_family(); 2535 uint32_t cpu_model = extended_cpu_model(); 2536 const char* model = nullptr; 2537 2538 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2539 for (uint32_t i = 0; i <= cpu_model; i++) { 2540 model = _model_id_pentium_pro[i]; 2541 if (model == nullptr) { 2542 break; 2543 } 2544 } 2545 } 2546 return model; 2547 } 2548 2549 const char* VM_Version::cpu_brand_string(void) { 2550 if (_cpu_brand_string == nullptr) { 2551 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2552 if (nullptr == _cpu_brand_string) { 2553 return nullptr; 2554 } 2555 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2556 if (ret_val != OS_OK) { 2557 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2558 _cpu_brand_string = nullptr; 2559 } 2560 } 2561 return _cpu_brand_string; 2562 } 2563 2564 const char* VM_Version::cpu_brand(void) { 2565 const char* brand = nullptr; 2566 2567 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2568 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2569 brand = _brand_id[0]; 2570 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2571 brand = _brand_id[i]; 2572 } 2573 } 2574 return brand; 2575 } 2576 2577 bool VM_Version::cpu_is_em64t(void) { 2578 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2579 } 2580 2581 bool VM_Version::is_netburst(void) { 2582 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2583 } 2584 2585 bool VM_Version::supports_tscinv_ext(void) { 2586 if (!supports_tscinv_bit()) { 2587 return false; 2588 } 2589 2590 if (is_intel()) { 2591 return true; 2592 } 2593 2594 if (is_amd()) { 2595 return !is_amd_Barcelona(); 2596 } 2597 2598 if (is_hygon()) { 2599 return true; 2600 } 2601 2602 return false; 2603 } 2604 2605 void VM_Version::resolve_cpu_information_details(void) { 2606 2607 // in future we want to base this information on proper cpu 2608 // and cache topology enumeration such as: 2609 // Intel 64 Architecture Processor Topology Enumeration 2610 // which supports system cpu and cache topology enumeration 2611 // either using 2xAPICIDs or initial APICIDs 2612 2613 // currently only rough cpu information estimates 2614 // which will not necessarily reflect the exact configuration of the system 2615 2616 // this is the number of logical hardware threads 2617 // visible to the operating system 2618 _no_of_threads = os::processor_count(); 2619 2620 // find out number of threads per cpu package 2621 int threads_per_package = threads_per_core() * cores_per_cpu(); 2622 2623 // use amount of threads visible to the process in order to guess number of sockets 2624 _no_of_sockets = _no_of_threads / threads_per_package; 2625 2626 // process might only see a subset of the total number of threads 2627 // from a single processor package. Virtualization/resource management for example. 2628 // If so then just write a hard 1 as num of pkgs. 2629 if (0 == _no_of_sockets) { 2630 _no_of_sockets = 1; 2631 } 2632 2633 // estimate the number of cores 2634 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2635 } 2636 2637 2638 const char* VM_Version::cpu_family_description(void) { 2639 int cpu_family_id = extended_cpu_family(); 2640 if (is_amd()) { 2641 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2642 return _family_id_amd[cpu_family_id]; 2643 } 2644 } 2645 if (is_intel()) { 2646 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2647 return cpu_model_description(); 2648 } 2649 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2650 return _family_id_intel[cpu_family_id]; 2651 } 2652 } 2653 if (is_hygon()) { 2654 return "Dhyana"; 2655 } 2656 return "Unknown x86"; 2657 } 2658 2659 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2660 assert(buf != nullptr, "buffer is null!"); 2661 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2662 2663 const char* cpu_type = nullptr; 2664 const char* x64 = nullptr; 2665 2666 if (is_intel()) { 2667 cpu_type = "Intel"; 2668 x64 = cpu_is_em64t() ? " Intel64" : ""; 2669 } else if (is_amd()) { 2670 cpu_type = "AMD"; 2671 x64 = cpu_is_em64t() ? " AMD64" : ""; 2672 } else if (is_hygon()) { 2673 cpu_type = "Hygon"; 2674 x64 = cpu_is_em64t() ? " AMD64" : ""; 2675 } else { 2676 cpu_type = "Unknown x86"; 2677 x64 = cpu_is_em64t() ? " x86_64" : ""; 2678 } 2679 2680 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2681 cpu_type, 2682 cpu_family_description(), 2683 supports_ht() ? " (HT)" : "", 2684 supports_sse3() ? " SSE3" : "", 2685 supports_ssse3() ? " SSSE3" : "", 2686 supports_sse4_1() ? " SSE4.1" : "", 2687 supports_sse4_2() ? " SSE4.2" : "", 2688 supports_sse4a() ? " SSE4A" : "", 2689 is_netburst() ? " Netburst" : "", 2690 is_intel_family_core() ? " Core" : "", 2691 x64); 2692 2693 return OS_OK; 2694 } 2695 2696 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2697 assert(buf != nullptr, "buffer is null!"); 2698 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2699 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2700 2701 // invoke newly generated asm code to fetch CPU Brand String 2702 getCPUIDBrandString_stub(&_cpuid_info); 2703 2704 // fetch results into buffer 2705 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2706 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2707 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2708 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2709 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2710 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2711 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2712 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2713 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2714 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2715 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2716 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2717 2718 return OS_OK; 2719 } 2720 2721 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2722 guarantee(buf != nullptr, "buffer is null!"); 2723 guarantee(buf_len > 0, "buffer len not enough!"); 2724 2725 unsigned int flag = 0; 2726 unsigned int fi = 0; 2727 size_t written = 0; 2728 const char* prefix = ""; 2729 2730 #define WRITE_TO_BUF(string) \ 2731 { \ 2732 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2733 if (res < 0) { \ 2734 return buf_len - 1; \ 2735 } \ 2736 written += res; \ 2737 if (prefix[0] == '\0') { \ 2738 prefix = ", "; \ 2739 } \ 2740 } 2741 2742 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2743 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2744 continue; /* no hyperthreading */ 2745 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2746 continue; /* no fast system call */ 2747 } 2748 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2749 WRITE_TO_BUF(_feature_edx_id[fi]); 2750 } 2751 } 2752 2753 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2754 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2755 WRITE_TO_BUF(_feature_ecx_id[fi]); 2756 } 2757 } 2758 2759 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2760 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2761 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2762 } 2763 } 2764 2765 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2766 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2767 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2768 } 2769 } 2770 2771 if (supports_tscinv_bit()) { 2772 WRITE_TO_BUF("Invariant TSC"); 2773 } 2774 2775 return written; 2776 } 2777 2778 /** 2779 * Write a detailed description of the cpu to a given buffer, including 2780 * feature set. 2781 */ 2782 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2783 assert(buf != nullptr, "buffer is null!"); 2784 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2785 2786 static const char* unknown = "<unknown>"; 2787 char vendor_id[VENDOR_LENGTH]; 2788 const char* family = nullptr; 2789 const char* model = nullptr; 2790 const char* brand = nullptr; 2791 int outputLen = 0; 2792 2793 family = cpu_family_description(); 2794 if (family == nullptr) { 2795 family = unknown; 2796 } 2797 2798 model = cpu_model_description(); 2799 if (model == nullptr) { 2800 model = unknown; 2801 } 2802 2803 brand = cpu_brand_string(); 2804 2805 if (brand == nullptr) { 2806 brand = cpu_brand(); 2807 if (brand == nullptr) { 2808 brand = unknown; 2809 } 2810 } 2811 2812 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2813 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2814 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2815 vendor_id[VENDOR_LENGTH-1] = '\0'; 2816 2817 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2818 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2819 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2820 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2821 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2822 "Supports: ", 2823 brand, 2824 vendor_id, 2825 family, 2826 extended_cpu_family(), 2827 model, 2828 extended_cpu_model(), 2829 cpu_stepping(), 2830 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2831 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2832 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2833 _cpuid_info.std_cpuid1_eax.value, 2834 _cpuid_info.std_cpuid1_ebx.value, 2835 _cpuid_info.std_cpuid1_ecx.value, 2836 _cpuid_info.std_cpuid1_edx.value, 2837 _cpuid_info.ext_cpuid1_eax, 2838 _cpuid_info.ext_cpuid1_ebx, 2839 _cpuid_info.ext_cpuid1_ecx, 2840 _cpuid_info.ext_cpuid1_edx); 2841 2842 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2843 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2844 return OS_ERR; 2845 } 2846 2847 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2848 2849 return OS_OK; 2850 } 2851 2852 2853 // Fill in Abstract_VM_Version statics 2854 void VM_Version::initialize_cpu_information() { 2855 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2856 assert(!_initialized, "shouldn't be initialized yet"); 2857 resolve_cpu_information_details(); 2858 2859 // initialize cpu_name and cpu_desc 2860 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2861 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2862 _initialized = true; 2863 } 2864 2865 /** 2866 * For information about extracting the frequency from the cpu brand string, please see: 2867 * 2868 * Intel Processor Identification and the CPUID Instruction 2869 * Application Note 485 2870 * May 2012 2871 * 2872 * The return value is the frequency in Hz. 2873 */ 2874 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2875 const char* const brand_string = cpu_brand_string(); 2876 if (brand_string == nullptr) { 2877 return 0; 2878 } 2879 const int64_t MEGA = 1000000; 2880 int64_t multiplier = 0; 2881 int64_t frequency = 0; 2882 uint8_t idx = 0; 2883 // The brand string buffer is at most 48 bytes. 2884 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2885 for (; idx < 48-2; ++idx) { 2886 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2887 // Search brand string for "yHz" where y is M, G, or T. 2888 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2889 if (brand_string[idx] == 'M') { 2890 multiplier = MEGA; 2891 } else if (brand_string[idx] == 'G') { 2892 multiplier = MEGA * 1000; 2893 } else if (brand_string[idx] == 'T') { 2894 multiplier = MEGA * MEGA; 2895 } 2896 break; 2897 } 2898 } 2899 if (multiplier > 0) { 2900 // Compute frequency (in Hz) from brand string. 2901 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2902 frequency = (brand_string[idx-4] - '0') * multiplier; 2903 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2904 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2905 } else { // format is "xxxx" 2906 frequency = (brand_string[idx-4] - '0') * 1000; 2907 frequency += (brand_string[idx-3] - '0') * 100; 2908 frequency += (brand_string[idx-2] - '0') * 10; 2909 frequency += (brand_string[idx-1] - '0'); 2910 frequency *= multiplier; 2911 } 2912 } 2913 return frequency; 2914 } 2915 2916 2917 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2918 if (_max_qualified_cpu_frequency == 0) { 2919 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2920 } 2921 return _max_qualified_cpu_frequency; 2922 } 2923 2924 uint64_t VM_Version::CpuidInfo::feature_flags() const { 2925 uint64_t result = 0; 2926 if (std_cpuid1_edx.bits.cmpxchg8 != 0) 2927 result |= CPU_CX8; 2928 if (std_cpuid1_edx.bits.cmov != 0) 2929 result |= CPU_CMOV; 2930 if (std_cpuid1_edx.bits.clflush != 0) 2931 result |= CPU_FLUSH; 2932 #ifdef _LP64 2933 // clflush should always be available on x86_64 2934 // if not we are in real trouble because we rely on it 2935 // to flush the code cache. 2936 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2937 #endif 2938 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2939 ext_cpuid1_edx.bits.fxsr != 0)) 2940 result |= CPU_FXSR; 2941 // HT flag is set for multi-core processors also. 2942 if (threads_per_core() > 1) 2943 result |= CPU_HT; 2944 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2945 ext_cpuid1_edx.bits.mmx != 0)) 2946 result |= CPU_MMX; 2947 if (std_cpuid1_edx.bits.sse != 0) 2948 result |= CPU_SSE; 2949 if (std_cpuid1_edx.bits.sse2 != 0) 2950 result |= CPU_SSE2; 2951 if (std_cpuid1_ecx.bits.sse3 != 0) 2952 result |= CPU_SSE3; 2953 if (std_cpuid1_ecx.bits.ssse3 != 0) 2954 result |= CPU_SSSE3; 2955 if (std_cpuid1_ecx.bits.sse4_1 != 0) 2956 result |= CPU_SSE4_1; 2957 if (std_cpuid1_ecx.bits.sse4_2 != 0) 2958 result |= CPU_SSE4_2; 2959 if (std_cpuid1_ecx.bits.popcnt != 0) 2960 result |= CPU_POPCNT; 2961 if (std_cpuid1_ecx.bits.avx != 0 && 2962 std_cpuid1_ecx.bits.osxsave != 0 && 2963 xem_xcr0_eax.bits.sse != 0 && 2964 xem_xcr0_eax.bits.ymm != 0) { 2965 result |= CPU_AVX; 2966 result |= CPU_VZEROUPPER; 2967 if (std_cpuid1_ecx.bits.f16c != 0) 2968 result |= CPU_F16C; 2969 if (sef_cpuid7_ebx.bits.avx2 != 0) { 2970 result |= CPU_AVX2; 2971 if (sef_cpuid7_ecx1_eax.bits.avx_ifma != 0) 2972 result |= CPU_AVX_IFMA; 2973 } 2974 if (sef_cpuid7_ecx.bits.gfni != 0) 2975 result |= CPU_GFNI; 2976 if (sef_cpuid7_ebx.bits.avx512f != 0 && 2977 xem_xcr0_eax.bits.opmask != 0 && 2978 xem_xcr0_eax.bits.zmm512 != 0 && 2979 xem_xcr0_eax.bits.zmm32 != 0) { 2980 result |= CPU_AVX512F; 2981 if (sef_cpuid7_ebx.bits.avx512cd != 0) 2982 result |= CPU_AVX512CD; 2983 if (sef_cpuid7_ebx.bits.avx512dq != 0) 2984 result |= CPU_AVX512DQ; 2985 if (sef_cpuid7_ebx.bits.avx512ifma != 0) 2986 result |= CPU_AVX512_IFMA; 2987 if (sef_cpuid7_ebx.bits.avx512pf != 0) 2988 result |= CPU_AVX512PF; 2989 if (sef_cpuid7_ebx.bits.avx512er != 0) 2990 result |= CPU_AVX512ER; 2991 if (sef_cpuid7_ebx.bits.avx512bw != 0) 2992 result |= CPU_AVX512BW; 2993 if (sef_cpuid7_ebx.bits.avx512vl != 0) 2994 result |= CPU_AVX512VL; 2995 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2996 result |= CPU_AVX512_VPOPCNTDQ; 2997 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2998 result |= CPU_AVX512_VPCLMULQDQ; 2999 if (sef_cpuid7_ecx.bits.vaes != 0) 3000 result |= CPU_AVX512_VAES; 3001 if (sef_cpuid7_ecx.bits.avx512_vnni != 0) 3002 result |= CPU_AVX512_VNNI; 3003 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) 3004 result |= CPU_AVX512_BITALG; 3005 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) 3006 result |= CPU_AVX512_VBMI; 3007 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 3008 result |= CPU_AVX512_VBMI2; 3009 } 3010 } 3011 if (std_cpuid1_ecx.bits.hv != 0) 3012 result |= CPU_HV; 3013 if (sef_cpuid7_ebx.bits.bmi1 != 0) 3014 result |= CPU_BMI1; 3015 if (std_cpuid1_edx.bits.tsc != 0) 3016 result |= CPU_TSC; 3017 if (ext_cpuid7_edx.bits.tsc_invariance != 0) 3018 result |= CPU_TSCINV_BIT; 3019 if (std_cpuid1_ecx.bits.aes != 0) 3020 result |= CPU_AES; 3021 if (sef_cpuid7_ebx.bits.erms != 0) 3022 result |= CPU_ERMS; 3023 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 3024 result |= CPU_FSRM; 3025 if (std_cpuid1_ecx.bits.clmul != 0) 3026 result |= CPU_CLMUL; 3027 if (sef_cpuid7_ebx.bits.rtm != 0) 3028 result |= CPU_RTM; 3029 if (sef_cpuid7_ebx.bits.adx != 0) 3030 result |= CPU_ADX; 3031 if (sef_cpuid7_ebx.bits.bmi2 != 0) 3032 result |= CPU_BMI2; 3033 if (sef_cpuid7_ebx.bits.sha != 0) 3034 result |= CPU_SHA; 3035 if (std_cpuid1_ecx.bits.fma != 0) 3036 result |= CPU_FMA; 3037 if (sef_cpuid7_ebx.bits.clflushopt != 0) 3038 result |= CPU_FLUSHOPT; 3039 if (ext_cpuid1_edx.bits.rdtscp != 0) 3040 result |= CPU_RDTSCP; 3041 if (sef_cpuid7_ecx.bits.rdpid != 0) 3042 result |= CPU_RDPID; 3043 3044 // AMD|Hygon features. 3045 if (is_amd_family()) { 3046 if ((ext_cpuid1_edx.bits.tdnow != 0) || 3047 (ext_cpuid1_ecx.bits.prefetchw != 0)) 3048 result |= CPU_3DNOW_PREFETCH; 3049 if (ext_cpuid1_ecx.bits.lzcnt != 0) 3050 result |= CPU_LZCNT; 3051 if (ext_cpuid1_ecx.bits.sse4a != 0) 3052 result |= CPU_SSE4A; 3053 } 3054 3055 // Intel features. 3056 if (is_intel()) { 3057 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3058 result |= CPU_LZCNT; 3059 } 3060 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3061 result |= CPU_3DNOW_PREFETCH; 3062 } 3063 if (sef_cpuid7_ebx.bits.clwb != 0) { 3064 result |= CPU_CLWB; 3065 } 3066 if (sef_cpuid7_edx.bits.serialize != 0) 3067 result |= CPU_SERIALIZE; 3068 } 3069 3070 // ZX features. 3071 if (is_zx()) { 3072 if (ext_cpuid1_ecx.bits.lzcnt != 0) { 3073 result |= CPU_LZCNT; 3074 } 3075 if (ext_cpuid1_ecx.bits.prefetchw != 0) { 3076 result |= CPU_3DNOW_PREFETCH; 3077 } 3078 } 3079 3080 // Protection key features. 3081 if (sef_cpuid7_ecx.bits.pku != 0) { 3082 result |= CPU_PKU; 3083 } 3084 if (sef_cpuid7_ecx.bits.ospke != 0) { 3085 result |= CPU_OSPKE; 3086 } 3087 3088 // Control flow enforcement (CET) features. 3089 if (sef_cpuid7_ecx.bits.cet_ss != 0) { 3090 result |= CPU_CET_SS; 3091 } 3092 if (sef_cpuid7_edx.bits.cet_ibt != 0) { 3093 result |= CPU_CET_IBT; 3094 } 3095 3096 // Composite features. 3097 if (supports_tscinv_bit() && 3098 ((is_amd_family() && !is_amd_Barcelona()) || 3099 is_intel_tsc_synched_at_init())) { 3100 result |= CPU_TSCINV; 3101 } 3102 3103 return result; 3104 } 3105 3106 bool VM_Version::os_supports_avx_vectors() { 3107 bool retVal = false; 3108 int nreg = 2 LP64_ONLY(+2); 3109 if (supports_evex()) { 3110 // Verify that OS save/restore all bits of EVEX registers 3111 // during signal processing. 3112 retVal = true; 3113 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3114 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3115 retVal = false; 3116 break; 3117 } 3118 } 3119 } else if (supports_avx()) { 3120 // Verify that OS save/restore all bits of AVX registers 3121 // during signal processing. 3122 retVal = true; 3123 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3124 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3125 retVal = false; 3126 break; 3127 } 3128 } 3129 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3130 if (retVal == false) { 3131 // Verify that OS save/restore all bits of EVEX registers 3132 // during signal processing. 3133 retVal = true; 3134 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3135 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3136 retVal = false; 3137 break; 3138 } 3139 } 3140 } 3141 } 3142 return retVal; 3143 } 3144 3145 uint VM_Version::cores_per_cpu() { 3146 uint result = 1; 3147 if (is_intel()) { 3148 bool supports_topology = supports_processor_topology(); 3149 if (supports_topology) { 3150 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3151 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3152 } 3153 if (!supports_topology || result == 0) { 3154 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3155 } 3156 } else if (is_amd_family()) { 3157 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3158 } else if (is_zx()) { 3159 bool supports_topology = supports_processor_topology(); 3160 if (supports_topology) { 3161 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3162 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3163 } 3164 if (!supports_topology || result == 0) { 3165 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3166 } 3167 } 3168 return result; 3169 } 3170 3171 uint VM_Version::threads_per_core() { 3172 uint result = 1; 3173 if (is_intel() && supports_processor_topology()) { 3174 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3175 } else if (is_zx() && supports_processor_topology()) { 3176 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3177 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3178 if (cpu_family() >= 0x17) { 3179 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3180 } else { 3181 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3182 cores_per_cpu(); 3183 } 3184 } 3185 return (result == 0 ? 1 : result); 3186 } 3187 3188 uint VM_Version::L1_line_size() { 3189 uint result = 0; 3190 if (is_intel()) { 3191 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3192 } else if (is_amd_family()) { 3193 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3194 } else if (is_zx()) { 3195 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3196 } 3197 if (result < 32) // not defined ? 3198 result = 32; // 32 bytes by default on x86 and other x64 3199 return result; 3200 } 3201 3202 bool VM_Version::is_intel_tsc_synched_at_init() { 3203 if (is_intel_family_core()) { 3204 uint32_t ext_model = extended_cpu_model(); 3205 if (ext_model == CPU_MODEL_NEHALEM_EP || 3206 ext_model == CPU_MODEL_WESTMERE_EP || 3207 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3208 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3209 // <= 2-socket invariant tsc support. EX versions are usually used 3210 // in > 2-socket systems and likely don't synchronize tscs at 3211 // initialization. 3212 // Code that uses tsc values must be prepared for them to arbitrarily 3213 // jump forward or backward. 3214 return true; 3215 } 3216 } 3217 return false; 3218 } 3219 3220 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3221 // Hardware prefetching (distance/size in bytes): 3222 // Pentium 3 - 64 / 32 3223 // Pentium 4 - 256 / 128 3224 // Athlon - 64 / 32 ???? 3225 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3226 // Core - 128 / 64 3227 // 3228 // Software prefetching (distance in bytes / instruction with best score): 3229 // Pentium 3 - 128 / prefetchnta 3230 // Pentium 4 - 512 / prefetchnta 3231 // Athlon - 128 / prefetchnta 3232 // Opteron - 256 / prefetchnta 3233 // Core - 256 / prefetchnta 3234 // It will be used only when AllocatePrefetchStyle > 0 3235 3236 if (is_amd_family()) { // AMD | Hygon 3237 if (supports_sse2()) { 3238 return 256; // Opteron 3239 } else { 3240 return 128; // Athlon 3241 } 3242 } else { // Intel 3243 if (supports_sse3() && cpu_family() == 6) { 3244 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3245 return 192; 3246 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3247 #ifdef _LP64 3248 return 384; 3249 #else 3250 return 320; 3251 #endif 3252 } 3253 } 3254 if (supports_sse2()) { 3255 if (cpu_family() == 6) { 3256 return 256; // Pentium M, Core, Core2 3257 } else { 3258 return 512; // Pentium 4 3259 } 3260 } else { 3261 return 128; // Pentium 3 (and all other old CPUs) 3262 } 3263 } 3264 } 3265 3266 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3267 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3268 switch (id) { 3269 case vmIntrinsics::_floatToFloat16: 3270 case vmIntrinsics::_float16ToFloat: 3271 if (!supports_float16()) { 3272 return false; 3273 } 3274 break; 3275 default: 3276 break; 3277 } 3278 return true; 3279 }