1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/checkedCast.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = 0; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = 0; 59 60 static BufferBlob* stub_blob; 61 static const int stub_size = 2000; 62 63 extern "C" { 64 typedef void (*get_cpu_info_stub_t)(void*); 65 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 66 } 67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 68 static detect_virt_stub_t detect_virt_stub = nullptr; 69 70 #ifdef _LP64 71 72 bool VM_Version::supports_clflush() { 73 // clflush should always be available on x86_64 74 // if not we are in real trouble because we rely on it 75 // to flush the code cache. 76 // Unfortunately, Assembler::clflush is currently called as part 77 // of generation of the code cache flush routine. This happens 78 // under Universe::init before the processor features are set 79 // up. Assembler::flush calls this routine to check that clflush 80 // is allowed. So, we give the caller a free pass if Universe init 81 // is still in progress. 82 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 83 return true; 84 } 85 #endif 86 87 #define CPUID_STANDARD_FN 0x0 88 #define CPUID_STANDARD_FN_1 0x1 89 #define CPUID_STANDARD_FN_4 0x4 90 #define CPUID_STANDARD_FN_B 0xb 91 92 #define CPUID_EXTENDED_FN 0x80000000 93 #define CPUID_EXTENDED_FN_1 0x80000001 94 #define CPUID_EXTENDED_FN_2 0x80000002 95 #define CPUID_EXTENDED_FN_3 0x80000003 96 #define CPUID_EXTENDED_FN_4 0x80000004 97 #define CPUID_EXTENDED_FN_7 0x80000007 98 #define CPUID_EXTENDED_FN_8 0x80000008 99 100 class VM_Version_StubGenerator: public StubCodeGenerator { 101 public: 102 103 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 104 105 address generate_get_cpu_info() { 106 // Flags to test CPU type. 107 const uint32_t HS_EFL_AC = 0x40000; 108 const uint32_t HS_EFL_ID = 0x200000; 109 // Values for when we don't have a CPUID instruction. 110 const int CPU_FAMILY_SHIFT = 8; 111 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 112 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 113 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 114 115 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 116 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; 117 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 118 119 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 120 # define __ _masm-> 121 122 address start = __ pc(); 123 124 // 125 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 126 // 127 // LP64: rcx and rdx are first and second argument registers on windows 128 129 __ push(rbp); 130 #ifdef _LP64 131 __ mov(rbp, c_rarg0); // cpuid_info address 132 #else 133 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 134 #endif 135 __ push(rbx); 136 __ push(rsi); 137 __ pushf(); // preserve rbx, and flags 138 __ pop(rax); 139 __ push(rax); 140 __ mov(rcx, rax); 141 // 142 // if we are unable to change the AC flag, we have a 386 143 // 144 __ xorl(rax, HS_EFL_AC); 145 __ push(rax); 146 __ popf(); 147 __ pushf(); 148 __ pop(rax); 149 __ cmpptr(rax, rcx); 150 __ jccb(Assembler::notEqual, detect_486); 151 152 __ movl(rax, CPU_FAMILY_386); 153 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 154 __ jmp(done); 155 156 // 157 // If we are unable to change the ID flag, we have a 486 which does 158 // not support the "cpuid" instruction. 159 // 160 __ bind(detect_486); 161 __ mov(rax, rcx); 162 __ xorl(rax, HS_EFL_ID); 163 __ push(rax); 164 __ popf(); 165 __ pushf(); 166 __ pop(rax); 167 __ cmpptr(rcx, rax); 168 __ jccb(Assembler::notEqual, detect_586); 169 170 __ bind(cpu486); 171 __ movl(rax, CPU_FAMILY_486); 172 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 173 __ jmp(done); 174 175 // 176 // At this point, we have a chip which supports the "cpuid" instruction 177 // 178 __ bind(detect_586); 179 __ xorl(rax, rax); 180 __ cpuid(); 181 __ orl(rax, rax); 182 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 183 // value of at least 1, we give up and 184 // assume a 486 185 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 186 __ movl(Address(rsi, 0), rax); 187 __ movl(Address(rsi, 4), rbx); 188 __ movl(Address(rsi, 8), rcx); 189 __ movl(Address(rsi,12), rdx); 190 191 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 192 __ jccb(Assembler::belowEqual, std_cpuid4); 193 194 // 195 // cpuid(0xB) Processor Topology 196 // 197 __ movl(rax, 0xb); 198 __ xorl(rcx, rcx); // Threads level 199 __ cpuid(); 200 201 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 202 __ movl(Address(rsi, 0), rax); 203 __ movl(Address(rsi, 4), rbx); 204 __ movl(Address(rsi, 8), rcx); 205 __ movl(Address(rsi,12), rdx); 206 207 __ movl(rax, 0xb); 208 __ movl(rcx, 1); // Cores level 209 __ cpuid(); 210 __ push(rax); 211 __ andl(rax, 0x1f); // Determine if valid topology level 212 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 213 __ andl(rax, 0xffff); 214 __ pop(rax); 215 __ jccb(Assembler::equal, std_cpuid4); 216 217 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 218 __ movl(Address(rsi, 0), rax); 219 __ movl(Address(rsi, 4), rbx); 220 __ movl(Address(rsi, 8), rcx); 221 __ movl(Address(rsi,12), rdx); 222 223 __ movl(rax, 0xb); 224 __ movl(rcx, 2); // Packages level 225 __ cpuid(); 226 __ push(rax); 227 __ andl(rax, 0x1f); // Determine if valid topology level 228 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 229 __ andl(rax, 0xffff); 230 __ pop(rax); 231 __ jccb(Assembler::equal, std_cpuid4); 232 233 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 234 __ movl(Address(rsi, 0), rax); 235 __ movl(Address(rsi, 4), rbx); 236 __ movl(Address(rsi, 8), rcx); 237 __ movl(Address(rsi,12), rdx); 238 239 // 240 // cpuid(0x4) Deterministic cache params 241 // 242 __ bind(std_cpuid4); 243 __ movl(rax, 4); 244 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 245 __ jccb(Assembler::greater, std_cpuid1); 246 247 __ xorl(rcx, rcx); // L1 cache 248 __ cpuid(); 249 __ push(rax); 250 __ andl(rax, 0x1f); // Determine if valid cache parameters used 251 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 252 __ pop(rax); 253 __ jccb(Assembler::equal, std_cpuid1); 254 255 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 256 __ movl(Address(rsi, 0), rax); 257 __ movl(Address(rsi, 4), rbx); 258 __ movl(Address(rsi, 8), rcx); 259 __ movl(Address(rsi,12), rdx); 260 261 // 262 // Standard cpuid(0x1) 263 // 264 __ bind(std_cpuid1); 265 __ movl(rax, 1); 266 __ cpuid(); 267 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 268 __ movl(Address(rsi, 0), rax); 269 __ movl(Address(rsi, 4), rbx); 270 __ movl(Address(rsi, 8), rcx); 271 __ movl(Address(rsi,12), rdx); 272 273 // 274 // Check if OS has enabled XGETBV instruction to access XCR0 275 // (OSXSAVE feature flag) and CPU supports AVX 276 // 277 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 278 __ cmpl(rcx, 0x18000000); 279 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 280 281 // 282 // XCR0, XFEATURE_ENABLED_MASK register 283 // 284 __ xorl(rcx, rcx); // zero for XCR0 register 285 __ xgetbv(); 286 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 287 __ movl(Address(rsi, 0), rax); 288 __ movl(Address(rsi, 4), rdx); 289 290 // 291 // cpuid(0x7) Structured Extended Features 292 // 293 __ bind(sef_cpuid); 294 __ movl(rax, 7); 295 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 296 __ jccb(Assembler::greater, ext_cpuid); 297 298 __ xorl(rcx, rcx); 299 __ cpuid(); 300 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 301 __ movl(Address(rsi, 0), rax); 302 __ movl(Address(rsi, 4), rbx); 303 __ movl(Address(rsi, 8), rcx); 304 __ movl(Address(rsi, 12), rdx); 305 306 // 307 // Extended cpuid(0x80000000) 308 // 309 __ bind(ext_cpuid); 310 __ movl(rax, 0x80000000); 311 __ cpuid(); 312 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 313 __ jcc(Assembler::belowEqual, done); 314 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 315 __ jcc(Assembler::belowEqual, ext_cpuid1); 316 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 317 __ jccb(Assembler::belowEqual, ext_cpuid5); 318 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 319 __ jccb(Assembler::belowEqual, ext_cpuid7); 320 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 321 __ jccb(Assembler::belowEqual, ext_cpuid8); 322 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 323 __ jccb(Assembler::below, ext_cpuid8); 324 // 325 // Extended cpuid(0x8000001E) 326 // 327 __ movl(rax, 0x8000001E); 328 __ cpuid(); 329 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 330 __ movl(Address(rsi, 0), rax); 331 __ movl(Address(rsi, 4), rbx); 332 __ movl(Address(rsi, 8), rcx); 333 __ movl(Address(rsi,12), rdx); 334 335 // 336 // Extended cpuid(0x80000008) 337 // 338 __ bind(ext_cpuid8); 339 __ movl(rax, 0x80000008); 340 __ cpuid(); 341 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 342 __ movl(Address(rsi, 0), rax); 343 __ movl(Address(rsi, 4), rbx); 344 __ movl(Address(rsi, 8), rcx); 345 __ movl(Address(rsi,12), rdx); 346 347 // 348 // Extended cpuid(0x80000007) 349 // 350 __ bind(ext_cpuid7); 351 __ movl(rax, 0x80000007); 352 __ cpuid(); 353 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 354 __ movl(Address(rsi, 0), rax); 355 __ movl(Address(rsi, 4), rbx); 356 __ movl(Address(rsi, 8), rcx); 357 __ movl(Address(rsi,12), rdx); 358 359 // 360 // Extended cpuid(0x80000005) 361 // 362 __ bind(ext_cpuid5); 363 __ movl(rax, 0x80000005); 364 __ cpuid(); 365 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 366 __ movl(Address(rsi, 0), rax); 367 __ movl(Address(rsi, 4), rbx); 368 __ movl(Address(rsi, 8), rcx); 369 __ movl(Address(rsi,12), rdx); 370 371 // 372 // Extended cpuid(0x80000001) 373 // 374 __ bind(ext_cpuid1); 375 __ movl(rax, 0x80000001); 376 __ cpuid(); 377 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 378 __ movl(Address(rsi, 0), rax); 379 __ movl(Address(rsi, 4), rbx); 380 __ movl(Address(rsi, 8), rcx); 381 __ movl(Address(rsi,12), rdx); 382 383 // 384 // Check if OS has enabled XGETBV instruction to access XCR0 385 // (OSXSAVE feature flag) and CPU supports AVX 386 // 387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 388 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 389 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 390 __ cmpl(rcx, 0x18000000); 391 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 392 393 __ movl(rax, 0x6); 394 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 395 __ cmpl(rax, 0x6); 396 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 397 398 // we need to bridge farther than imm8, so we use this island as a thunk 399 __ bind(done); 400 __ jmp(wrapup); 401 402 __ bind(start_simd_check); 403 // 404 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 405 // registers are not restored after a signal processing. 406 // Generate SEGV here (reference through null) 407 // and check upper YMM/ZMM bits after it. 408 // 409 int saved_useavx = UseAVX; 410 int saved_usesse = UseSSE; 411 412 // If UseAVX is uninitialized or is set by the user to include EVEX 413 if (use_evex) { 414 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 415 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 416 __ movl(rax, 0x10000); 417 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 418 __ cmpl(rax, 0x10000); 419 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 420 // check _cpuid_info.xem_xcr0_eax.bits.opmask 421 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 422 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 423 __ movl(rax, 0xE0); 424 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 425 __ cmpl(rax, 0xE0); 426 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 427 428 if (FLAG_IS_DEFAULT(UseAVX)) { 429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 430 __ movl(rax, Address(rsi, 0)); 431 __ cmpl(rax, 0x50654); // If it is Skylake 432 __ jcc(Assembler::equal, legacy_setup); 433 } 434 // EVEX setup: run in lowest evex mode 435 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 436 UseAVX = 3; 437 UseSSE = 2; 438 #ifdef _WINDOWS 439 // xmm5-xmm15 are not preserved by caller on windows 440 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 441 __ subptr(rsp, 64); 442 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 443 #ifdef _LP64 444 __ subptr(rsp, 64); 445 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 446 __ subptr(rsp, 64); 447 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 448 #endif // _LP64 449 #endif // _WINDOWS 450 451 // load value into all 64 bytes of zmm7 register 452 __ movl(rcx, VM_Version::ymm_test_value()); 453 __ movdl(xmm0, rcx); 454 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 455 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 456 #ifdef _LP64 457 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 458 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 459 #endif 460 VM_Version::clean_cpuFeatures(); 461 __ jmp(save_restore_except); 462 } 463 464 __ bind(legacy_setup); 465 // AVX setup 466 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 467 UseAVX = 1; 468 UseSSE = 2; 469 #ifdef _WINDOWS 470 __ subptr(rsp, 32); 471 __ vmovdqu(Address(rsp, 0), xmm7); 472 #ifdef _LP64 473 __ subptr(rsp, 32); 474 __ vmovdqu(Address(rsp, 0), xmm8); 475 __ subptr(rsp, 32); 476 __ vmovdqu(Address(rsp, 0), xmm15); 477 #endif // _LP64 478 #endif // _WINDOWS 479 480 // load value into all 32 bytes of ymm7 register 481 __ movl(rcx, VM_Version::ymm_test_value()); 482 483 __ movdl(xmm0, rcx); 484 __ pshufd(xmm0, xmm0, 0x00); 485 __ vinsertf128_high(xmm0, xmm0); 486 __ vmovdqu(xmm7, xmm0); 487 #ifdef _LP64 488 __ vmovdqu(xmm8, xmm0); 489 __ vmovdqu(xmm15, xmm0); 490 #endif 491 VM_Version::clean_cpuFeatures(); 492 493 __ bind(save_restore_except); 494 __ xorl(rsi, rsi); 495 VM_Version::set_cpuinfo_segv_addr(__ pc()); 496 // Generate SEGV 497 __ movl(rax, Address(rsi, 0)); 498 499 VM_Version::set_cpuinfo_cont_addr(__ pc()); 500 // Returns here after signal. Save xmm0 to check it later. 501 502 // If UseAVX is uninitialized or is set by the user to include EVEX 503 if (use_evex) { 504 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 505 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 506 __ movl(rax, 0x10000); 507 __ andl(rax, Address(rsi, 4)); 508 __ cmpl(rax, 0x10000); 509 __ jcc(Assembler::notEqual, legacy_save_restore); 510 // check _cpuid_info.xem_xcr0_eax.bits.opmask 511 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 512 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 513 __ movl(rax, 0xE0); 514 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 515 __ cmpl(rax, 0xE0); 516 __ jcc(Assembler::notEqual, legacy_save_restore); 517 518 if (FLAG_IS_DEFAULT(UseAVX)) { 519 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 520 __ movl(rax, Address(rsi, 0)); 521 __ cmpl(rax, 0x50654); // If it is Skylake 522 __ jcc(Assembler::equal, legacy_save_restore); 523 } 524 // EVEX check: run in lowest evex mode 525 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 526 UseAVX = 3; 527 UseSSE = 2; 528 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 529 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 530 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 531 #ifdef _LP64 532 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 533 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 534 #endif 535 536 #ifdef _WINDOWS 537 #ifdef _LP64 538 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 539 __ addptr(rsp, 64); 540 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 541 __ addptr(rsp, 64); 542 #endif // _LP64 543 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 544 __ addptr(rsp, 64); 545 #endif // _WINDOWS 546 generate_vzeroupper(wrapup); 547 VM_Version::clean_cpuFeatures(); 548 UseAVX = saved_useavx; 549 UseSSE = saved_usesse; 550 __ jmp(wrapup); 551 } 552 553 __ bind(legacy_save_restore); 554 // AVX check 555 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 556 UseAVX = 1; 557 UseSSE = 2; 558 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 559 __ vmovdqu(Address(rsi, 0), xmm0); 560 __ vmovdqu(Address(rsi, 32), xmm7); 561 #ifdef _LP64 562 __ vmovdqu(Address(rsi, 64), xmm8); 563 __ vmovdqu(Address(rsi, 96), xmm15); 564 #endif 565 566 #ifdef _WINDOWS 567 #ifdef _LP64 568 __ vmovdqu(xmm15, Address(rsp, 0)); 569 __ addptr(rsp, 32); 570 __ vmovdqu(xmm8, Address(rsp, 0)); 571 __ addptr(rsp, 32); 572 #endif // _LP64 573 __ vmovdqu(xmm7, Address(rsp, 0)); 574 __ addptr(rsp, 32); 575 #endif // _WINDOWS 576 generate_vzeroupper(wrapup); 577 VM_Version::clean_cpuFeatures(); 578 UseAVX = saved_useavx; 579 UseSSE = saved_usesse; 580 581 __ bind(wrapup); 582 __ popf(); 583 __ pop(rsi); 584 __ pop(rbx); 585 __ pop(rbp); 586 __ ret(0); 587 588 # undef __ 589 590 return start; 591 }; 592 void generate_vzeroupper(Label& L_wrapup) { 593 # define __ _masm-> 594 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 595 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 596 __ jcc(Assembler::notEqual, L_wrapup); 597 __ movl(rcx, 0x0FFF0FF0); 598 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 599 __ andl(rcx, Address(rsi, 0)); 600 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 601 __ jcc(Assembler::equal, L_wrapup); 602 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 603 __ jcc(Assembler::equal, L_wrapup); 604 // vzeroupper() will use a pre-computed instruction sequence that we 605 // can't compute until after we've determined CPU capabilities. Use 606 // uncached variant here directly to be able to bootstrap correctly 607 __ vzeroupper_uncached(); 608 # undef __ 609 } 610 address generate_detect_virt() { 611 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 612 # define __ _masm-> 613 614 address start = __ pc(); 615 616 // Evacuate callee-saved registers 617 __ push(rbp); 618 __ push(rbx); 619 __ push(rsi); // for Windows 620 621 #ifdef _LP64 622 __ mov(rax, c_rarg0); // CPUID leaf 623 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 624 #else 625 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 626 __ movptr(rsi, Address(rsp, 20)); // register array address 627 #endif 628 629 __ cpuid(); 630 631 // Store result to register array 632 __ movl(Address(rsi, 0), rax); 633 __ movl(Address(rsi, 4), rbx); 634 __ movl(Address(rsi, 8), rcx); 635 __ movl(Address(rsi, 12), rdx); 636 637 // Epilogue 638 __ pop(rsi); 639 __ pop(rbx); 640 __ pop(rbp); 641 __ ret(0); 642 643 # undef __ 644 645 return start; 646 }; 647 648 649 address generate_getCPUIDBrandString(void) { 650 // Flags to test CPU type. 651 const uint32_t HS_EFL_AC = 0x40000; 652 const uint32_t HS_EFL_ID = 0x200000; 653 // Values for when we don't have a CPUID instruction. 654 const int CPU_FAMILY_SHIFT = 8; 655 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 656 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 657 658 Label detect_486, cpu486, detect_586, done, ext_cpuid; 659 660 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 661 # define __ _masm-> 662 663 address start = __ pc(); 664 665 // 666 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 667 // 668 // LP64: rcx and rdx are first and second argument registers on windows 669 670 __ push(rbp); 671 #ifdef _LP64 672 __ mov(rbp, c_rarg0); // cpuid_info address 673 #else 674 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 675 #endif 676 __ push(rbx); 677 __ push(rsi); 678 __ pushf(); // preserve rbx, and flags 679 __ pop(rax); 680 __ push(rax); 681 __ mov(rcx, rax); 682 // 683 // if we are unable to change the AC flag, we have a 386 684 // 685 __ xorl(rax, HS_EFL_AC); 686 __ push(rax); 687 __ popf(); 688 __ pushf(); 689 __ pop(rax); 690 __ cmpptr(rax, rcx); 691 __ jccb(Assembler::notEqual, detect_486); 692 693 __ movl(rax, CPU_FAMILY_386); 694 __ jmp(done); 695 696 // 697 // If we are unable to change the ID flag, we have a 486 which does 698 // not support the "cpuid" instruction. 699 // 700 __ bind(detect_486); 701 __ mov(rax, rcx); 702 __ xorl(rax, HS_EFL_ID); 703 __ push(rax); 704 __ popf(); 705 __ pushf(); 706 __ pop(rax); 707 __ cmpptr(rcx, rax); 708 __ jccb(Assembler::notEqual, detect_586); 709 710 __ bind(cpu486); 711 __ movl(rax, CPU_FAMILY_486); 712 __ jmp(done); 713 714 // 715 // At this point, we have a chip which supports the "cpuid" instruction 716 // 717 __ bind(detect_586); 718 __ xorl(rax, rax); 719 __ cpuid(); 720 __ orl(rax, rax); 721 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 722 // value of at least 1, we give up and 723 // assume a 486 724 725 // 726 // Extended cpuid(0x80000000) for processor brand string detection 727 // 728 __ bind(ext_cpuid); 729 __ movl(rax, CPUID_EXTENDED_FN); 730 __ cpuid(); 731 __ cmpl(rax, CPUID_EXTENDED_FN_4); 732 __ jcc(Assembler::below, done); 733 734 // 735 // Extended cpuid(0x80000002) // first 16 bytes in brand string 736 // 737 __ movl(rax, CPUID_EXTENDED_FN_2); 738 __ cpuid(); 739 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 740 __ movl(Address(rsi, 0), rax); 741 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 742 __ movl(Address(rsi, 0), rbx); 743 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 744 __ movl(Address(rsi, 0), rcx); 745 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 746 __ movl(Address(rsi,0), rdx); 747 748 // 749 // Extended cpuid(0x80000003) // next 16 bytes in brand string 750 // 751 __ movl(rax, CPUID_EXTENDED_FN_3); 752 __ cpuid(); 753 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 754 __ movl(Address(rsi, 0), rax); 755 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 756 __ movl(Address(rsi, 0), rbx); 757 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 758 __ movl(Address(rsi, 0), rcx); 759 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 760 __ movl(Address(rsi,0), rdx); 761 762 // 763 // Extended cpuid(0x80000004) // last 16 bytes in brand string 764 // 765 __ movl(rax, CPUID_EXTENDED_FN_4); 766 __ cpuid(); 767 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 768 __ movl(Address(rsi, 0), rax); 769 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 770 __ movl(Address(rsi, 0), rbx); 771 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 772 __ movl(Address(rsi, 0), rcx); 773 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 774 __ movl(Address(rsi,0), rdx); 775 776 // 777 // return 778 // 779 __ bind(done); 780 __ popf(); 781 __ pop(rsi); 782 __ pop(rbx); 783 __ pop(rbp); 784 __ ret(0); 785 786 # undef __ 787 788 return start; 789 }; 790 }; 791 792 void VM_Version::get_processor_features() { 793 794 _cpu = 4; // 486 by default 795 _model = 0; 796 _stepping = 0; 797 _features = 0; 798 _logical_processors_per_package = 1; 799 // i486 internal cache is both I&D and has a 16-byte line size 800 _L1_data_cache_line_size = 16; 801 802 // Get raw processor info 803 804 get_cpu_info_stub(&_cpuid_info); 805 806 assert_is_initialized(); 807 _cpu = extended_cpu_family(); 808 _model = extended_cpu_model(); 809 _stepping = cpu_stepping(); 810 811 if (cpu_family() > 4) { // it supports CPUID 812 _features = feature_flags(); 813 // Logical processors are only available on P4s and above, 814 // and only if hyperthreading is available. 815 _logical_processors_per_package = logical_processor_count(); 816 _L1_data_cache_line_size = L1_line_size(); 817 } 818 819 // xchg and xadd instructions 820 _supports_atomic_getset4 = true; 821 _supports_atomic_getadd4 = true; 822 LP64_ONLY(_supports_atomic_getset8 = true); 823 LP64_ONLY(_supports_atomic_getadd8 = true); 824 825 #ifdef _LP64 826 // OS should support SSE for x64 and hardware should support at least SSE2. 827 if (!VM_Version::supports_sse2()) { 828 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 829 } 830 // in 64 bit the use of SSE2 is the minimum 831 if (UseSSE < 2) UseSSE = 2; 832 #endif 833 834 #ifdef AMD64 835 // flush_icache_stub have to be generated first. 836 // That is why Icache line size is hard coded in ICache class, 837 // see icache_x86.hpp. It is also the reason why we can't use 838 // clflush instruction in 32-bit VM since it could be running 839 // on CPU which does not support it. 840 // 841 // The only thing we can do is to verify that flushed 842 // ICache::line_size has correct value. 843 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 844 // clflush_size is size in quadwords (8 bytes). 845 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 846 #endif 847 848 #ifdef _LP64 849 // assigning this field effectively enables Unsafe.writebackMemory() 850 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 851 // that is only implemented on x86_64 and only if the OS plays ball 852 if (os::supports_map_sync()) { 853 // publish data cache line flush size to generic field, otherwise 854 // let if default to zero thereby disabling writeback 855 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 856 } 857 #endif 858 859 // Check if processor has Intel Ecore 860 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 861 (_model == 0x97 || _model == 0xAC || _model == 0xAF)) { 862 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 863 } 864 865 if (UseSSE < 4) { 866 _features &= ~CPU_SSE4_1; 867 _features &= ~CPU_SSE4_2; 868 } 869 870 if (UseSSE < 3) { 871 _features &= ~CPU_SSE3; 872 _features &= ~CPU_SSSE3; 873 _features &= ~CPU_SSE4A; 874 } 875 876 if (UseSSE < 2) 877 _features &= ~CPU_SSE2; 878 879 if (UseSSE < 1) 880 _features &= ~CPU_SSE; 881 882 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 883 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 884 UseAVX = 0; 885 } 886 887 // UseSSE is set to the smaller of what hardware supports and what 888 // the command line requires. I.e., you cannot set UseSSE to 2 on 889 // older Pentiums which do not support it. 890 int use_sse_limit = 0; 891 if (UseSSE > 0) { 892 if (UseSSE > 3 && supports_sse4_1()) { 893 use_sse_limit = 4; 894 } else if (UseSSE > 2 && supports_sse3()) { 895 use_sse_limit = 3; 896 } else if (UseSSE > 1 && supports_sse2()) { 897 use_sse_limit = 2; 898 } else if (UseSSE > 0 && supports_sse()) { 899 use_sse_limit = 1; 900 } else { 901 use_sse_limit = 0; 902 } 903 } 904 if (FLAG_IS_DEFAULT(UseSSE)) { 905 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 906 } else if (UseSSE > use_sse_limit) { 907 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 908 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 909 } 910 911 // first try initial setting and detect what we can support 912 int use_avx_limit = 0; 913 if (UseAVX > 0) { 914 if (UseSSE < 4) { 915 // Don't use AVX if SSE is unavailable or has been disabled. 916 use_avx_limit = 0; 917 } else if (UseAVX > 2 && supports_evex()) { 918 use_avx_limit = 3; 919 } else if (UseAVX > 1 && supports_avx2()) { 920 use_avx_limit = 2; 921 } else if (UseAVX > 0 && supports_avx()) { 922 use_avx_limit = 1; 923 } else { 924 use_avx_limit = 0; 925 } 926 } 927 if (FLAG_IS_DEFAULT(UseAVX)) { 928 // Don't use AVX-512 on older Skylakes unless explicitly requested. 929 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 930 FLAG_SET_DEFAULT(UseAVX, 2); 931 } else { 932 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 933 } 934 } 935 if (UseAVX > use_avx_limit) { 936 if (UseSSE < 4) { 937 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 938 } else { 939 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 940 } 941 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 942 } 943 944 if (UseAVX < 3) { 945 _features &= ~CPU_AVX512F; 946 _features &= ~CPU_AVX512DQ; 947 _features &= ~CPU_AVX512CD; 948 _features &= ~CPU_AVX512BW; 949 _features &= ~CPU_AVX512VL; 950 _features &= ~CPU_AVX512_VPOPCNTDQ; 951 _features &= ~CPU_AVX512_VPCLMULQDQ; 952 _features &= ~CPU_AVX512_VAES; 953 _features &= ~CPU_AVX512_VNNI; 954 _features &= ~CPU_AVX512_VBMI; 955 _features &= ~CPU_AVX512_VBMI2; 956 _features &= ~CPU_AVX512_BITALG; 957 _features &= ~CPU_AVX512_IFMA; 958 } 959 960 if (UseAVX < 2) 961 _features &= ~CPU_AVX2; 962 963 if (UseAVX < 1) { 964 _features &= ~CPU_AVX; 965 _features &= ~CPU_VZEROUPPER; 966 _features &= ~CPU_F16C; 967 } 968 969 if (logical_processors_per_package() == 1) { 970 // HT processor could be installed on a system which doesn't support HT. 971 _features &= ~CPU_HT; 972 } 973 974 if (is_intel()) { // Intel cpus specific settings 975 if (is_knights_family()) { 976 _features &= ~CPU_VZEROUPPER; 977 _features &= ~CPU_AVX512BW; 978 _features &= ~CPU_AVX512VL; 979 _features &= ~CPU_AVX512DQ; 980 _features &= ~CPU_AVX512_VNNI; 981 _features &= ~CPU_AVX512_VAES; 982 _features &= ~CPU_AVX512_VPOPCNTDQ; 983 _features &= ~CPU_AVX512_VPCLMULQDQ; 984 _features &= ~CPU_AVX512_VBMI; 985 _features &= ~CPU_AVX512_VBMI2; 986 _features &= ~CPU_CLWB; 987 _features &= ~CPU_FLUSHOPT; 988 _features &= ~CPU_GFNI; 989 _features &= ~CPU_AVX512_BITALG; 990 _features &= ~CPU_AVX512_IFMA; 991 } 992 } 993 994 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 995 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 996 } else { 997 _has_intel_jcc_erratum = IntelJccErratumMitigation; 998 } 999 1000 char buf[1024]; 1001 int res = jio_snprintf( 1002 buf, sizeof(buf), 1003 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1004 cores_per_cpu(), threads_per_core(), 1005 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1006 assert(res > 0, "not enough temporary space allocated"); 1007 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1008 1009 _features_string = os::strdup(buf); 1010 1011 // Use AES instructions if available. 1012 if (supports_aes()) { 1013 if (FLAG_IS_DEFAULT(UseAES)) { 1014 FLAG_SET_DEFAULT(UseAES, true); 1015 } 1016 if (!UseAES) { 1017 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1018 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1019 } 1020 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1021 } else { 1022 if (UseSSE > 2) { 1023 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1024 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1025 } 1026 } else { 1027 // The AES intrinsic stubs require AES instruction support (of course) 1028 // but also require sse3 mode or higher for instructions it use. 1029 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1030 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1031 } 1032 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1033 } 1034 1035 // --AES-CTR begins-- 1036 if (!UseAESIntrinsics) { 1037 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1038 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1039 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1040 } 1041 } else { 1042 if (supports_sse4_1()) { 1043 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1044 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1045 } 1046 } else { 1047 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1048 // but also require sse4.1 mode or higher for instructions it use. 1049 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1050 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1051 } 1052 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1053 } 1054 } 1055 // --AES-CTR ends-- 1056 } 1057 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1058 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1059 warning("AES instructions are not available on this CPU"); 1060 FLAG_SET_DEFAULT(UseAES, false); 1061 } 1062 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1063 warning("AES intrinsics are not available on this CPU"); 1064 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1065 } 1066 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1067 warning("AES-CTR intrinsics are not available on this CPU"); 1068 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1069 } 1070 } 1071 1072 // Use CLMUL instructions if available. 1073 if (supports_clmul()) { 1074 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1075 UseCLMUL = true; 1076 } 1077 } else if (UseCLMUL) { 1078 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1079 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1080 FLAG_SET_DEFAULT(UseCLMUL, false); 1081 } 1082 1083 if (UseCLMUL && (UseSSE > 2)) { 1084 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1085 UseCRC32Intrinsics = true; 1086 } 1087 } else if (UseCRC32Intrinsics) { 1088 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1089 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1090 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1091 } 1092 1093 #ifdef _LP64 1094 if (supports_avx2()) { 1095 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1096 UseAdler32Intrinsics = true; 1097 } 1098 } else if (UseAdler32Intrinsics) { 1099 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1100 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1101 } 1102 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1103 } 1104 #else 1105 if (UseAdler32Intrinsics) { 1106 warning("Adler32Intrinsics not available on this CPU."); 1107 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1108 } 1109 #endif 1110 1111 if (supports_sse4_2() && supports_clmul()) { 1112 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1113 UseCRC32CIntrinsics = true; 1114 } 1115 } else if (UseCRC32CIntrinsics) { 1116 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1117 warning("CRC32C intrinsics are not available on this CPU"); 1118 } 1119 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1120 } 1121 1122 // GHASH/GCM intrinsics 1123 if (UseCLMUL && (UseSSE > 2)) { 1124 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1125 UseGHASHIntrinsics = true; 1126 } 1127 } else if (UseGHASHIntrinsics) { 1128 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1129 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1130 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1131 } 1132 1133 // ChaCha20 Intrinsics 1134 // As long as the system supports AVX as a baseline we can do a 1135 // SIMD-enabled block function. StubGenerator makes the determination 1136 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1137 // version. 1138 if (UseAVX >= 1) { 1139 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1140 UseChaCha20Intrinsics = true; 1141 } 1142 } else if (UseChaCha20Intrinsics) { 1143 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1144 warning("ChaCha20 intrinsic requires AVX instructions"); 1145 } 1146 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1147 } 1148 1149 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1150 if (UseAVX >= 2) { 1151 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1152 UseBASE64Intrinsics = true; 1153 } 1154 } else if (UseBASE64Intrinsics) { 1155 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1156 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1157 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1158 } 1159 1160 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1161 if (FLAG_IS_DEFAULT(UseFMA)) { 1162 UseFMA = true; 1163 } 1164 } else if (UseFMA) { 1165 warning("FMA instructions are not available on this CPU"); 1166 FLAG_SET_DEFAULT(UseFMA, false); 1167 } 1168 1169 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1170 UseMD5Intrinsics = true; 1171 } 1172 1173 if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) { 1174 if (FLAG_IS_DEFAULT(UseSHA)) { 1175 UseSHA = true; 1176 } 1177 } else if (UseSHA) { 1178 warning("SHA instructions are not available on this CPU"); 1179 FLAG_SET_DEFAULT(UseSHA, false); 1180 } 1181 1182 if (supports_sha() && supports_sse4_1() && UseSHA) { 1183 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1184 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1185 } 1186 } else if (UseSHA1Intrinsics) { 1187 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1188 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1189 } 1190 1191 if (supports_sse4_1() && UseSHA) { 1192 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1193 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1194 } 1195 } else if (UseSHA256Intrinsics) { 1196 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1197 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1198 } 1199 1200 #ifdef _LP64 1201 // These are only supported on 64-bit 1202 if (UseSHA && supports_avx2() && supports_bmi2()) { 1203 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1204 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1205 } 1206 } else 1207 #endif 1208 if (UseSHA512Intrinsics) { 1209 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1210 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1211 } 1212 1213 if (UseSHA3Intrinsics) { 1214 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1215 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1216 } 1217 1218 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1219 FLAG_SET_DEFAULT(UseSHA, false); 1220 } 1221 1222 if (!supports_rtm() && UseRTMLocking) { 1223 vm_exit_during_initialization("RTM instructions are not available on this CPU"); 1224 } 1225 1226 #if INCLUDE_RTM_OPT 1227 if (UseRTMLocking) { 1228 if (!CompilerConfig::is_c2_enabled()) { 1229 // Only C2 does RTM locking optimization. 1230 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1231 } 1232 if (is_intel_family_core()) { 1233 if ((_model == CPU_MODEL_HASWELL_E3) || 1234 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || 1235 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { 1236 // currently a collision between SKL and HSW_E3 1237 if (!UnlockExperimentalVMOptions && UseAVX < 3) { 1238 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " 1239 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); 1240 } else { 1241 warning("UseRTMLocking is only available as experimental option on this platform."); 1242 } 1243 } 1244 } 1245 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 1246 // RTM locking should be used only for applications with 1247 // high lock contention. For now we do not use it by default. 1248 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 1249 } 1250 } else { // !UseRTMLocking 1251 if (UseRTMForStackLocks) { 1252 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 1253 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 1254 } 1255 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 1256 } 1257 if (UseRTMDeopt) { 1258 FLAG_SET_DEFAULT(UseRTMDeopt, false); 1259 } 1260 if (PrintPreciseRTMLockingStatistics) { 1261 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 1262 } 1263 } 1264 #else 1265 if (UseRTMLocking) { 1266 // Only C2 does RTM locking optimization. 1267 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1268 } 1269 #endif 1270 1271 #ifdef COMPILER2 1272 if (UseFPUForSpilling) { 1273 if (UseSSE < 2) { 1274 // Only supported with SSE2+ 1275 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1276 } 1277 } 1278 #endif 1279 1280 #if COMPILER2_OR_JVMCI 1281 int max_vector_size = 0; 1282 if (UseSSE < 2) { 1283 // Vectors (in XMM) are only supported with SSE2+ 1284 // SSE is always 2 on x64. 1285 max_vector_size = 0; 1286 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1287 // 16 byte vectors (in XMM) are supported with SSE2+ 1288 max_vector_size = 16; 1289 } else if (UseAVX == 1 || UseAVX == 2) { 1290 // 32 bytes vectors (in YMM) are only supported with AVX+ 1291 max_vector_size = 32; 1292 } else if (UseAVX > 2) { 1293 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1294 max_vector_size = 64; 1295 } 1296 1297 #ifdef _LP64 1298 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1299 #else 1300 int min_vector_size = 0; 1301 #endif 1302 1303 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1304 if (MaxVectorSize < min_vector_size) { 1305 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1306 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1307 } 1308 if (MaxVectorSize > max_vector_size) { 1309 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1310 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1311 } 1312 if (!is_power_of_2(MaxVectorSize)) { 1313 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1314 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1315 } 1316 } else { 1317 // If default, use highest supported configuration 1318 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1319 } 1320 1321 #if defined(COMPILER2) && defined(ASSERT) 1322 if (MaxVectorSize > 0) { 1323 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1324 tty->print_cr("State of YMM registers after signal handle:"); 1325 int nreg = 2 LP64_ONLY(+2); 1326 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1327 for (int i = 0; i < nreg; i++) { 1328 tty->print("YMM%s:", ymm_name[i]); 1329 for (int j = 7; j >=0; j--) { 1330 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1331 } 1332 tty->cr(); 1333 } 1334 } 1335 } 1336 #endif // COMPILER2 && ASSERT 1337 1338 #ifdef _LP64 1339 if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) { 1340 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1341 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1342 } 1343 } else 1344 #endif 1345 if (UsePoly1305Intrinsics) { 1346 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1347 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1348 } 1349 1350 #ifdef _LP64 1351 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1352 UseMultiplyToLenIntrinsic = true; 1353 } 1354 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1355 UseSquareToLenIntrinsic = true; 1356 } 1357 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1358 UseMulAddIntrinsic = true; 1359 } 1360 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1361 UseMontgomeryMultiplyIntrinsic = true; 1362 } 1363 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1364 UseMontgomerySquareIntrinsic = true; 1365 } 1366 #else 1367 if (UseMultiplyToLenIntrinsic) { 1368 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1369 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1370 } 1371 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1372 } 1373 if (UseMontgomeryMultiplyIntrinsic) { 1374 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1375 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1376 } 1377 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1378 } 1379 if (UseMontgomerySquareIntrinsic) { 1380 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1381 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1382 } 1383 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1384 } 1385 if (UseSquareToLenIntrinsic) { 1386 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1387 warning("squareToLen intrinsic is not available in 32-bit VM"); 1388 } 1389 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1390 } 1391 if (UseMulAddIntrinsic) { 1392 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1393 warning("mulAdd intrinsic is not available in 32-bit VM"); 1394 } 1395 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1396 } 1397 #endif // _LP64 1398 #endif // COMPILER2_OR_JVMCI 1399 1400 // On new cpus instructions which update whole XMM register should be used 1401 // to prevent partial register stall due to dependencies on high half. 1402 // 1403 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1404 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1405 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1406 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1407 1408 1409 if (is_zx()) { // ZX cpus specific settings 1410 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1411 UseStoreImmI16 = false; // don't use it on ZX cpus 1412 } 1413 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1414 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1415 // Use it on all ZX cpus 1416 UseAddressNop = true; 1417 } 1418 } 1419 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1420 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1421 } 1422 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1423 if (supports_sse3()) { 1424 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1425 } else { 1426 UseXmmRegToRegMoveAll = false; 1427 } 1428 } 1429 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1430 #ifdef COMPILER2 1431 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1432 // For new ZX cpus do the next optimization: 1433 // don't align the beginning of a loop if there are enough instructions 1434 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1435 // in current fetch line (OptoLoopAlignment) or the padding 1436 // is big (> MaxLoopPad). 1437 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1438 // generated NOP instructions. 11 is the largest size of one 1439 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1440 MaxLoopPad = 11; 1441 } 1442 #endif // COMPILER2 1443 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1444 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1445 } 1446 if (supports_sse4_2()) { // new ZX cpus 1447 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1448 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1449 } 1450 } 1451 if (supports_sse4_2()) { 1452 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1453 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1454 } 1455 } else { 1456 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1457 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1458 } 1459 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1460 } 1461 } 1462 1463 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1464 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1465 } 1466 } 1467 1468 if (is_amd_family()) { // AMD cpus specific settings 1469 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1470 // Use it on new AMD cpus starting from Opteron. 1471 UseAddressNop = true; 1472 } 1473 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1474 // Use it on new AMD cpus starting from Opteron. 1475 UseNewLongLShift = true; 1476 } 1477 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1478 if (supports_sse4a()) { 1479 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1480 } else { 1481 UseXmmLoadAndClearUpper = false; 1482 } 1483 } 1484 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1485 if (supports_sse4a()) { 1486 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1487 } else { 1488 UseXmmRegToRegMoveAll = false; 1489 } 1490 } 1491 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1492 if (supports_sse4a()) { 1493 UseXmmI2F = true; 1494 } else { 1495 UseXmmI2F = false; 1496 } 1497 } 1498 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1499 if (supports_sse4a()) { 1500 UseXmmI2D = true; 1501 } else { 1502 UseXmmI2D = false; 1503 } 1504 } 1505 if (supports_sse4_2()) { 1506 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1507 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1508 } 1509 } else { 1510 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1511 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1512 } 1513 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1514 } 1515 1516 // some defaults for AMD family 15h 1517 if (cpu_family() == 0x15) { 1518 // On family 15h processors default is no sw prefetch 1519 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1520 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1521 } 1522 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1523 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1524 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1525 } 1526 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1527 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1528 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1529 } 1530 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1531 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1532 } 1533 } 1534 1535 #ifdef COMPILER2 1536 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1537 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1538 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1539 } 1540 #endif // COMPILER2 1541 1542 // Some defaults for AMD family >= 17h && Hygon family 18h 1543 if (cpu_family() >= 0x17) { 1544 // On family >=17h processors use XMM and UnalignedLoadStores 1545 // for Array Copy 1546 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1547 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1548 } 1549 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1550 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1551 } 1552 #ifdef COMPILER2 1553 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1554 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1555 } 1556 #endif 1557 } 1558 } 1559 1560 if (is_intel()) { // Intel cpus specific settings 1561 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1562 UseStoreImmI16 = false; // don't use it on Intel cpus 1563 } 1564 if (cpu_family() == 6 || cpu_family() == 15) { 1565 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1566 // Use it on all Intel cpus starting from PentiumPro 1567 UseAddressNop = true; 1568 } 1569 } 1570 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1571 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1572 } 1573 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1574 if (supports_sse3()) { 1575 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1576 } else { 1577 UseXmmRegToRegMoveAll = false; 1578 } 1579 } 1580 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1581 #ifdef COMPILER2 1582 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1583 // For new Intel cpus do the next optimization: 1584 // don't align the beginning of a loop if there are enough instructions 1585 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1586 // in current fetch line (OptoLoopAlignment) or the padding 1587 // is big (> MaxLoopPad). 1588 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1589 // generated NOP instructions. 11 is the largest size of one 1590 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1591 MaxLoopPad = 11; 1592 } 1593 #endif // COMPILER2 1594 1595 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1596 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1597 } 1598 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1599 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1600 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1601 } 1602 } 1603 if (supports_sse4_2()) { 1604 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1605 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1606 } 1607 } else { 1608 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1609 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1610 } 1611 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1612 } 1613 } 1614 if (is_atom_family() || is_knights_family()) { 1615 #ifdef COMPILER2 1616 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1617 OptoScheduling = true; 1618 } 1619 #endif 1620 if (supports_sse4_2()) { // Silvermont 1621 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1622 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1623 } 1624 } 1625 if (FLAG_IS_DEFAULT(UseIncDec)) { 1626 FLAG_SET_DEFAULT(UseIncDec, false); 1627 } 1628 } 1629 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1630 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1631 } 1632 #ifdef COMPILER2 1633 if (UseAVX > 2) { 1634 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1635 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1636 ArrayOperationPartialInlineSize != 0 && 1637 ArrayOperationPartialInlineSize != 16 && 1638 ArrayOperationPartialInlineSize != 32 && 1639 ArrayOperationPartialInlineSize != 64)) { 1640 int inline_size = 0; 1641 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1642 inline_size = 64; 1643 } else if (MaxVectorSize >= 32) { 1644 inline_size = 32; 1645 } else if (MaxVectorSize >= 16) { 1646 inline_size = 16; 1647 } 1648 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1649 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1650 } 1651 ArrayOperationPartialInlineSize = inline_size; 1652 } 1653 1654 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1655 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1656 if (ArrayOperationPartialInlineSize) { 1657 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1658 } else { 1659 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1660 } 1661 } 1662 } 1663 #endif 1664 } 1665 1666 #ifdef COMPILER2 1667 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1668 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1669 OptimizeFill = false; 1670 } 1671 } 1672 #endif 1673 1674 #ifdef _LP64 1675 if (UseSSE42Intrinsics) { 1676 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1677 UseVectorizedMismatchIntrinsic = true; 1678 } 1679 } else if (UseVectorizedMismatchIntrinsic) { 1680 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1681 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1682 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1683 } 1684 if (UseAVX >= 2) { 1685 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1686 } else if (UseVectorizedHashCodeIntrinsic) { 1687 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1688 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1689 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1690 } 1691 #else 1692 if (UseVectorizedMismatchIntrinsic) { 1693 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1694 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1695 } 1696 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1697 } 1698 if (UseVectorizedHashCodeIntrinsic) { 1699 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1700 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1701 } 1702 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1703 } 1704 #endif // _LP64 1705 1706 // Use count leading zeros count instruction if available. 1707 if (supports_lzcnt()) { 1708 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1709 UseCountLeadingZerosInstruction = true; 1710 } 1711 } else if (UseCountLeadingZerosInstruction) { 1712 warning("lzcnt instruction is not available on this CPU"); 1713 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1714 } 1715 1716 // Use count trailing zeros instruction if available 1717 if (supports_bmi1()) { 1718 // tzcnt does not require VEX prefix 1719 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1720 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1721 // Don't use tzcnt if BMI1 is switched off on command line. 1722 UseCountTrailingZerosInstruction = false; 1723 } else { 1724 UseCountTrailingZerosInstruction = true; 1725 } 1726 } 1727 } else if (UseCountTrailingZerosInstruction) { 1728 warning("tzcnt instruction is not available on this CPU"); 1729 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1730 } 1731 1732 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1733 // VEX prefix is generated only when AVX > 0. 1734 if (supports_bmi1() && supports_avx()) { 1735 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1736 UseBMI1Instructions = true; 1737 } 1738 } else if (UseBMI1Instructions) { 1739 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1740 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1741 } 1742 1743 if (supports_bmi2() && supports_avx()) { 1744 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1745 UseBMI2Instructions = true; 1746 } 1747 } else if (UseBMI2Instructions) { 1748 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1749 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1750 } 1751 1752 // Use population count instruction if available. 1753 if (supports_popcnt()) { 1754 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1755 UsePopCountInstruction = true; 1756 } 1757 } else if (UsePopCountInstruction) { 1758 warning("POPCNT instruction is not available on this CPU"); 1759 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1760 } 1761 1762 // Use fast-string operations if available. 1763 if (supports_erms()) { 1764 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1765 UseFastStosb = true; 1766 } 1767 } else if (UseFastStosb) { 1768 warning("fast-string operations are not available on this CPU"); 1769 FLAG_SET_DEFAULT(UseFastStosb, false); 1770 } 1771 1772 // For AMD Processors use XMM/YMM MOVDQU instructions 1773 // for Object Initialization as default 1774 if (is_amd() && cpu_family() >= 0x19) { 1775 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1776 UseFastStosb = false; 1777 } 1778 } 1779 1780 #ifdef COMPILER2 1781 if (is_intel() && MaxVectorSize > 16) { 1782 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1783 UseFastStosb = false; 1784 } 1785 } 1786 #endif 1787 1788 // Use XMM/YMM MOVDQU instruction for Object Initialization 1789 if (UseSSE >= 2 && UseUnalignedLoadStores) { 1790 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1791 UseXMMForObjInit = true; 1792 } 1793 } else if (UseXMMForObjInit) { 1794 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1795 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1796 } 1797 1798 #ifdef COMPILER2 1799 if (FLAG_IS_DEFAULT(AlignVector)) { 1800 // Modern processors allow misaligned memory operations for vectors. 1801 AlignVector = !UseUnalignedLoadStores; 1802 } 1803 #endif // COMPILER2 1804 1805 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1806 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1807 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1808 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1809 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1810 } 1811 } 1812 1813 // Allocation prefetch settings 1814 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1815 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1816 (cache_line_size > AllocatePrefetchStepSize)) { 1817 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1818 } 1819 1820 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1821 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1822 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1823 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1824 } 1825 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1826 } 1827 1828 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1829 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1830 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1831 } 1832 1833 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1834 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1835 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1836 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1837 } 1838 #ifdef COMPILER2 1839 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1840 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1841 } 1842 #endif 1843 } 1844 1845 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1846 #ifdef COMPILER2 1847 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1848 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1849 } 1850 #endif 1851 } 1852 1853 #ifdef _LP64 1854 // Prefetch settings 1855 1856 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1857 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1858 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1859 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1860 1861 // gc copy/scan is disabled if prefetchw isn't supported, because 1862 // Prefetch::write emits an inlined prefetchw on Linux. 1863 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1864 // The used prefetcht0 instruction works for both amd64 and em64t. 1865 1866 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1867 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1868 } 1869 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1870 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1871 } 1872 #endif 1873 1874 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1875 (cache_line_size > ContendedPaddingWidth)) 1876 ContendedPaddingWidth = cache_line_size; 1877 1878 // This machine allows unaligned memory accesses 1879 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1880 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1881 } 1882 1883 #ifndef PRODUCT 1884 if (log_is_enabled(Info, os, cpu)) { 1885 LogStream ls(Log(os, cpu)::info()); 1886 outputStream* log = &ls; 1887 log->print_cr("Logical CPUs per core: %u", 1888 logical_processors_per_package()); 1889 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1890 log->print("UseSSE=%d", UseSSE); 1891 if (UseAVX > 0) { 1892 log->print(" UseAVX=%d", UseAVX); 1893 } 1894 if (UseAES) { 1895 log->print(" UseAES=1"); 1896 } 1897 #ifdef COMPILER2 1898 if (MaxVectorSize > 0) { 1899 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1900 } 1901 #endif 1902 log->cr(); 1903 log->print("Allocation"); 1904 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1905 log->print_cr(": no prefetching"); 1906 } else { 1907 log->print(" prefetching: "); 1908 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1909 log->print("PREFETCHW"); 1910 } else if (UseSSE >= 1) { 1911 if (AllocatePrefetchInstr == 0) { 1912 log->print("PREFETCHNTA"); 1913 } else if (AllocatePrefetchInstr == 1) { 1914 log->print("PREFETCHT0"); 1915 } else if (AllocatePrefetchInstr == 2) { 1916 log->print("PREFETCHT2"); 1917 } else if (AllocatePrefetchInstr == 3) { 1918 log->print("PREFETCHW"); 1919 } 1920 } 1921 if (AllocatePrefetchLines > 1) { 1922 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1923 } else { 1924 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1925 } 1926 } 1927 1928 if (PrefetchCopyIntervalInBytes > 0) { 1929 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1930 } 1931 if (PrefetchScanIntervalInBytes > 0) { 1932 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1933 } 1934 if (ContendedPaddingWidth > 0) { 1935 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1936 } 1937 } 1938 #endif // !PRODUCT 1939 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1940 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1941 } 1942 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1943 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1944 } 1945 } 1946 1947 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1948 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1949 if (vrt == XenHVM) { 1950 st->print_cr("Xen hardware-assisted virtualization detected"); 1951 } else if (vrt == KVM) { 1952 st->print_cr("KVM virtualization detected"); 1953 } else if (vrt == VMWare) { 1954 st->print_cr("VMWare virtualization detected"); 1955 VirtualizationSupport::print_virtualization_info(st); 1956 } else if (vrt == HyperV) { 1957 st->print_cr("Hyper-V virtualization detected"); 1958 } else if (vrt == HyperVRole) { 1959 st->print_cr("Hyper-V role detected"); 1960 } 1961 } 1962 1963 bool VM_Version::compute_has_intel_jcc_erratum() { 1964 if (!is_intel_family_core()) { 1965 // Only Intel CPUs are affected. 1966 return false; 1967 } 1968 // The following table of affected CPUs is based on the following document released by Intel: 1969 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1970 switch (_model) { 1971 case 0x8E: 1972 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1973 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1974 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1975 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1976 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1977 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1978 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1979 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1980 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1981 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1982 case 0x4E: 1983 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1984 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1985 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1986 return _stepping == 0x3; 1987 case 0x55: 1988 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1989 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1990 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1991 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1992 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1993 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1994 return _stepping == 0x4 || _stepping == 0x7; 1995 case 0x5E: 1996 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1997 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1998 return _stepping == 0x3; 1999 case 0x9E: 2000 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2001 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2002 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2003 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2004 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2005 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2006 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2007 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2008 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2009 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2010 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2011 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2012 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2013 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2014 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2015 case 0xA5: 2016 // Not in Intel documentation. 2017 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2018 return true; 2019 case 0xA6: 2020 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2021 return _stepping == 0x0; 2022 case 0xAE: 2023 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2024 return _stepping == 0xA; 2025 default: 2026 // If we are running on another intel machine not recognized in the table, we are okay. 2027 return false; 2028 } 2029 } 2030 2031 // On Xen, the cpuid instruction returns 2032 // eax / registers[0]: Version of Xen 2033 // ebx / registers[1]: chars 'XenV' 2034 // ecx / registers[2]: chars 'MMXe' 2035 // edx / registers[3]: chars 'nVMM' 2036 // 2037 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2038 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2039 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2040 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2041 // 2042 // more information : 2043 // https://kb.vmware.com/s/article/1009458 2044 // 2045 void VM_Version::check_virtualizations() { 2046 uint32_t registers[4] = {0}; 2047 char signature[13] = {0}; 2048 2049 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2050 // from 0x40000000 until 0x40010000. 2051 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2052 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2053 detect_virt_stub(leaf, registers); 2054 memcpy(signature, ®isters[1], 12); 2055 2056 if (strncmp("VMwareVMware", signature, 12) == 0) { 2057 Abstract_VM_Version::_detected_virtualization = VMWare; 2058 // check for extended metrics from guestlib 2059 VirtualizationSupport::initialize(); 2060 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2061 Abstract_VM_Version::_detected_virtualization = HyperV; 2062 #ifdef _WINDOWS 2063 // CPUID leaf 0x40000007 is available to the root partition only. 2064 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2065 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2066 detect_virt_stub(0x40000007, registers); 2067 if ((registers[0] != 0x0) || 2068 (registers[1] != 0x0) || 2069 (registers[2] != 0x0) || 2070 (registers[3] != 0x0)) { 2071 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2072 } 2073 #endif 2074 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2075 Abstract_VM_Version::_detected_virtualization = KVM; 2076 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2077 Abstract_VM_Version::_detected_virtualization = XenHVM; 2078 } 2079 } 2080 } 2081 2082 #ifdef COMPILER2 2083 // Determine if it's running on Cascade Lake using default options. 2084 bool VM_Version::is_default_intel_cascade_lake() { 2085 return FLAG_IS_DEFAULT(UseAVX) && 2086 FLAG_IS_DEFAULT(MaxVectorSize) && 2087 UseAVX > 2 && 2088 is_intel_cascade_lake(); 2089 } 2090 #endif 2091 2092 bool VM_Version::is_intel_cascade_lake() { 2093 return is_intel_skylake() && _stepping >= 5; 2094 } 2095 2096 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2097 // for implementing the array copy and clear operations. 2098 // The Intel platforms that supports the serialize instruction 2099 // has improved implementation of 64-byte load/stores and so the default 2100 // threshold is set to 0 for these platforms. 2101 int VM_Version::avx3_threshold() { 2102 return (is_intel_family_core() && 2103 supports_serialize() && 2104 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2105 } 2106 2107 static bool _vm_version_initialized = false; 2108 2109 void VM_Version::initialize() { 2110 ResourceMark rm; 2111 // Making this stub must be FIRST use of assembler 2112 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2113 if (stub_blob == nullptr) { 2114 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2115 } 2116 CodeBuffer c(stub_blob); 2117 VM_Version_StubGenerator g(&c); 2118 2119 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2120 g.generate_get_cpu_info()); 2121 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2122 g.generate_detect_virt()); 2123 2124 get_processor_features(); 2125 2126 LP64_ONLY(Assembler::precompute_instructions();) 2127 2128 if (VM_Version::supports_hv()) { // Supports hypervisor 2129 check_virtualizations(); 2130 } 2131 _vm_version_initialized = true; 2132 } 2133 2134 typedef enum { 2135 CPU_FAMILY_8086_8088 = 0, 2136 CPU_FAMILY_INTEL_286 = 2, 2137 CPU_FAMILY_INTEL_386 = 3, 2138 CPU_FAMILY_INTEL_486 = 4, 2139 CPU_FAMILY_PENTIUM = 5, 2140 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2141 CPU_FAMILY_PENTIUM_4 = 0xF 2142 } FamilyFlag; 2143 2144 typedef enum { 2145 RDTSCP_FLAG = 0x08000000, // bit 27 2146 INTEL64_FLAG = 0x20000000 // bit 29 2147 } _featureExtendedEdxFlag; 2148 2149 typedef enum { 2150 FPU_FLAG = 0x00000001, 2151 VME_FLAG = 0x00000002, 2152 DE_FLAG = 0x00000004, 2153 PSE_FLAG = 0x00000008, 2154 TSC_FLAG = 0x00000010, 2155 MSR_FLAG = 0x00000020, 2156 PAE_FLAG = 0x00000040, 2157 MCE_FLAG = 0x00000080, 2158 CX8_FLAG = 0x00000100, 2159 APIC_FLAG = 0x00000200, 2160 SEP_FLAG = 0x00000800, 2161 MTRR_FLAG = 0x00001000, 2162 PGE_FLAG = 0x00002000, 2163 MCA_FLAG = 0x00004000, 2164 CMOV_FLAG = 0x00008000, 2165 PAT_FLAG = 0x00010000, 2166 PSE36_FLAG = 0x00020000, 2167 PSNUM_FLAG = 0x00040000, 2168 CLFLUSH_FLAG = 0x00080000, 2169 DTS_FLAG = 0x00200000, 2170 ACPI_FLAG = 0x00400000, 2171 MMX_FLAG = 0x00800000, 2172 FXSR_FLAG = 0x01000000, 2173 SSE_FLAG = 0x02000000, 2174 SSE2_FLAG = 0x04000000, 2175 SS_FLAG = 0x08000000, 2176 HTT_FLAG = 0x10000000, 2177 TM_FLAG = 0x20000000 2178 } FeatureEdxFlag; 2179 2180 static BufferBlob* cpuid_brand_string_stub_blob; 2181 static const int cpuid_brand_string_stub_size = 550; 2182 2183 extern "C" { 2184 typedef void (*getCPUIDBrandString_stub_t)(void*); 2185 } 2186 2187 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2188 2189 // VM_Version statics 2190 enum { 2191 ExtendedFamilyIdLength_INTEL = 16, 2192 ExtendedFamilyIdLength_AMD = 24 2193 }; 2194 2195 const size_t VENDOR_LENGTH = 13; 2196 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2197 static char* _cpu_brand_string = nullptr; 2198 static int64_t _max_qualified_cpu_frequency = 0; 2199 2200 static int _no_of_threads = 0; 2201 static int _no_of_cores = 0; 2202 2203 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2204 "8086/8088", 2205 "", 2206 "286", 2207 "386", 2208 "486", 2209 "Pentium", 2210 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2211 "", 2212 "", 2213 "", 2214 "", 2215 "", 2216 "", 2217 "", 2218 "", 2219 "Pentium 4" 2220 }; 2221 2222 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2223 "", 2224 "", 2225 "", 2226 "", 2227 "5x86", 2228 "K5/K6", 2229 "Athlon/AthlonXP", 2230 "", 2231 "", 2232 "", 2233 "", 2234 "", 2235 "", 2236 "", 2237 "", 2238 "Opteron/Athlon64", 2239 "Opteron QC/Phenom", // Barcelona et.al. 2240 "", 2241 "", 2242 "", 2243 "", 2244 "", 2245 "", 2246 "Zen" 2247 }; 2248 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2249 // September 2013, Vol 3C Table 35-1 2250 const char* const _model_id_pentium_pro[] = { 2251 "", 2252 "Pentium Pro", 2253 "", 2254 "Pentium II model 3", 2255 "", 2256 "Pentium II model 5/Xeon/Celeron", 2257 "Celeron", 2258 "Pentium III/Pentium III Xeon", 2259 "Pentium III/Pentium III Xeon", 2260 "Pentium M model 9", // Yonah 2261 "Pentium III, model A", 2262 "Pentium III, model B", 2263 "", 2264 "Pentium M model D", // Dothan 2265 "", 2266 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2267 "", 2268 "", 2269 "", 2270 "", 2271 "", 2272 "", 2273 "Celeron", // 0x16 Celeron 65nm 2274 "Core 2", // 0x17 Penryn / Harpertown 2275 "", 2276 "", 2277 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2278 "Atom", // 0x1B Z5xx series Silverthorn 2279 "", 2280 "Core 2", // 0x1D Dunnington (6-core) 2281 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2282 "", 2283 "", 2284 "", 2285 "", 2286 "", 2287 "", 2288 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2289 "", 2290 "", 2291 "", // 0x28 2292 "", 2293 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2294 "", 2295 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2296 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2297 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2298 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2299 "", 2300 "", 2301 "", 2302 "", 2303 "", 2304 "", 2305 "", 2306 "", 2307 "", 2308 "", 2309 "Ivy Bridge", // 0x3a 2310 "", 2311 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2312 "", // 0x3d "Next Generation Intel Core Processor" 2313 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2314 "", // 0x3f "Future Generation Intel Xeon Processor" 2315 "", 2316 "", 2317 "", 2318 "", 2319 "", 2320 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2321 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2322 nullptr 2323 }; 2324 2325 /* Brand ID is for back compatibility 2326 * Newer CPUs uses the extended brand string */ 2327 const char* const _brand_id[] = { 2328 "", 2329 "Celeron processor", 2330 "Pentium III processor", 2331 "Intel Pentium III Xeon processor", 2332 "", 2333 "", 2334 "", 2335 "", 2336 "Intel Pentium 4 processor", 2337 nullptr 2338 }; 2339 2340 2341 const char* const _feature_edx_id[] = { 2342 "On-Chip FPU", 2343 "Virtual Mode Extensions", 2344 "Debugging Extensions", 2345 "Page Size Extensions", 2346 "Time Stamp Counter", 2347 "Model Specific Registers", 2348 "Physical Address Extension", 2349 "Machine Check Exceptions", 2350 "CMPXCHG8B Instruction", 2351 "On-Chip APIC", 2352 "", 2353 "Fast System Call", 2354 "Memory Type Range Registers", 2355 "Page Global Enable", 2356 "Machine Check Architecture", 2357 "Conditional Mov Instruction", 2358 "Page Attribute Table", 2359 "36-bit Page Size Extension", 2360 "Processor Serial Number", 2361 "CLFLUSH Instruction", 2362 "", 2363 "Debug Trace Store feature", 2364 "ACPI registers in MSR space", 2365 "Intel Architecture MMX Technology", 2366 "Fast Float Point Save and Restore", 2367 "Streaming SIMD extensions", 2368 "Streaming SIMD extensions 2", 2369 "Self-Snoop", 2370 "Hyper Threading", 2371 "Thermal Monitor", 2372 "", 2373 "Pending Break Enable" 2374 }; 2375 2376 const char* const _feature_extended_edx_id[] = { 2377 "", 2378 "", 2379 "", 2380 "", 2381 "", 2382 "", 2383 "", 2384 "", 2385 "", 2386 "", 2387 "", 2388 "SYSCALL/SYSRET", 2389 "", 2390 "", 2391 "", 2392 "", 2393 "", 2394 "", 2395 "", 2396 "", 2397 "Execute Disable Bit", 2398 "", 2399 "", 2400 "", 2401 "", 2402 "", 2403 "", 2404 "RDTSCP", 2405 "", 2406 "Intel 64 Architecture", 2407 "", 2408 "" 2409 }; 2410 2411 const char* const _feature_ecx_id[] = { 2412 "Streaming SIMD Extensions 3", 2413 "PCLMULQDQ", 2414 "64-bit DS Area", 2415 "MONITOR/MWAIT instructions", 2416 "CPL Qualified Debug Store", 2417 "Virtual Machine Extensions", 2418 "Safer Mode Extensions", 2419 "Enhanced Intel SpeedStep technology", 2420 "Thermal Monitor 2", 2421 "Supplemental Streaming SIMD Extensions 3", 2422 "L1 Context ID", 2423 "", 2424 "Fused Multiply-Add", 2425 "CMPXCHG16B", 2426 "xTPR Update Control", 2427 "Perfmon and Debug Capability", 2428 "", 2429 "Process-context identifiers", 2430 "Direct Cache Access", 2431 "Streaming SIMD extensions 4.1", 2432 "Streaming SIMD extensions 4.2", 2433 "x2APIC", 2434 "MOVBE", 2435 "Popcount instruction", 2436 "TSC-Deadline", 2437 "AESNI", 2438 "XSAVE", 2439 "OSXSAVE", 2440 "AVX", 2441 "F16C", 2442 "RDRAND", 2443 "" 2444 }; 2445 2446 const char* const _feature_extended_ecx_id[] = { 2447 "LAHF/SAHF instruction support", 2448 "Core multi-processor legacy mode", 2449 "", 2450 "", 2451 "", 2452 "Advanced Bit Manipulations: LZCNT", 2453 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2454 "Misaligned SSE mode", 2455 "", 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "", 2475 "", 2476 "", 2477 "", 2478 "" 2479 }; 2480 2481 void VM_Version::initialize_tsc(void) { 2482 ResourceMark rm; 2483 2484 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2485 if (cpuid_brand_string_stub_blob == nullptr) { 2486 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2487 } 2488 CodeBuffer c(cpuid_brand_string_stub_blob); 2489 VM_Version_StubGenerator g(&c); 2490 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2491 g.generate_getCPUIDBrandString()); 2492 } 2493 2494 const char* VM_Version::cpu_model_description(void) { 2495 uint32_t cpu_family = extended_cpu_family(); 2496 uint32_t cpu_model = extended_cpu_model(); 2497 const char* model = nullptr; 2498 2499 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2500 for (uint32_t i = 0; i <= cpu_model; i++) { 2501 model = _model_id_pentium_pro[i]; 2502 if (model == nullptr) { 2503 break; 2504 } 2505 } 2506 } 2507 return model; 2508 } 2509 2510 const char* VM_Version::cpu_brand_string(void) { 2511 if (_cpu_brand_string == nullptr) { 2512 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2513 if (nullptr == _cpu_brand_string) { 2514 return nullptr; 2515 } 2516 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2517 if (ret_val != OS_OK) { 2518 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2519 _cpu_brand_string = nullptr; 2520 } 2521 } 2522 return _cpu_brand_string; 2523 } 2524 2525 const char* VM_Version::cpu_brand(void) { 2526 const char* brand = nullptr; 2527 2528 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2529 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2530 brand = _brand_id[0]; 2531 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2532 brand = _brand_id[i]; 2533 } 2534 } 2535 return brand; 2536 } 2537 2538 bool VM_Version::cpu_is_em64t(void) { 2539 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2540 } 2541 2542 bool VM_Version::is_netburst(void) { 2543 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2544 } 2545 2546 bool VM_Version::supports_tscinv_ext(void) { 2547 if (!supports_tscinv_bit()) { 2548 return false; 2549 } 2550 2551 if (is_intel()) { 2552 return true; 2553 } 2554 2555 if (is_amd()) { 2556 return !is_amd_Barcelona(); 2557 } 2558 2559 if (is_hygon()) { 2560 return true; 2561 } 2562 2563 return false; 2564 } 2565 2566 void VM_Version::resolve_cpu_information_details(void) { 2567 2568 // in future we want to base this information on proper cpu 2569 // and cache topology enumeration such as: 2570 // Intel 64 Architecture Processor Topology Enumeration 2571 // which supports system cpu and cache topology enumeration 2572 // either using 2xAPICIDs or initial APICIDs 2573 2574 // currently only rough cpu information estimates 2575 // which will not necessarily reflect the exact configuration of the system 2576 2577 // this is the number of logical hardware threads 2578 // visible to the operating system 2579 _no_of_threads = os::processor_count(); 2580 2581 // find out number of threads per cpu package 2582 int threads_per_package = threads_per_core() * cores_per_cpu(); 2583 2584 // use amount of threads visible to the process in order to guess number of sockets 2585 _no_of_sockets = _no_of_threads / threads_per_package; 2586 2587 // process might only see a subset of the total number of threads 2588 // from a single processor package. Virtualization/resource management for example. 2589 // If so then just write a hard 1 as num of pkgs. 2590 if (0 == _no_of_sockets) { 2591 _no_of_sockets = 1; 2592 } 2593 2594 // estimate the number of cores 2595 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2596 } 2597 2598 2599 const char* VM_Version::cpu_family_description(void) { 2600 int cpu_family_id = extended_cpu_family(); 2601 if (is_amd()) { 2602 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2603 return _family_id_amd[cpu_family_id]; 2604 } 2605 } 2606 if (is_intel()) { 2607 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2608 return cpu_model_description(); 2609 } 2610 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2611 return _family_id_intel[cpu_family_id]; 2612 } 2613 } 2614 if (is_hygon()) { 2615 return "Dhyana"; 2616 } 2617 return "Unknown x86"; 2618 } 2619 2620 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2621 assert(buf != nullptr, "buffer is null!"); 2622 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2623 2624 const char* cpu_type = nullptr; 2625 const char* x64 = nullptr; 2626 2627 if (is_intel()) { 2628 cpu_type = "Intel"; 2629 x64 = cpu_is_em64t() ? " Intel64" : ""; 2630 } else if (is_amd()) { 2631 cpu_type = "AMD"; 2632 x64 = cpu_is_em64t() ? " AMD64" : ""; 2633 } else if (is_hygon()) { 2634 cpu_type = "Hygon"; 2635 x64 = cpu_is_em64t() ? " AMD64" : ""; 2636 } else { 2637 cpu_type = "Unknown x86"; 2638 x64 = cpu_is_em64t() ? " x86_64" : ""; 2639 } 2640 2641 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2642 cpu_type, 2643 cpu_family_description(), 2644 supports_ht() ? " (HT)" : "", 2645 supports_sse3() ? " SSE3" : "", 2646 supports_ssse3() ? " SSSE3" : "", 2647 supports_sse4_1() ? " SSE4.1" : "", 2648 supports_sse4_2() ? " SSE4.2" : "", 2649 supports_sse4a() ? " SSE4A" : "", 2650 is_netburst() ? " Netburst" : "", 2651 is_intel_family_core() ? " Core" : "", 2652 x64); 2653 2654 return OS_OK; 2655 } 2656 2657 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2658 assert(buf != nullptr, "buffer is null!"); 2659 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2660 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2661 2662 // invoke newly generated asm code to fetch CPU Brand String 2663 getCPUIDBrandString_stub(&_cpuid_info); 2664 2665 // fetch results into buffer 2666 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2667 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2668 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2669 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2670 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2671 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2672 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2673 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2674 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2675 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2676 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2677 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2678 2679 return OS_OK; 2680 } 2681 2682 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2683 guarantee(buf != nullptr, "buffer is null!"); 2684 guarantee(buf_len > 0, "buffer len not enough!"); 2685 2686 unsigned int flag = 0; 2687 unsigned int fi = 0; 2688 size_t written = 0; 2689 const char* prefix = ""; 2690 2691 #define WRITE_TO_BUF(string) \ 2692 { \ 2693 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2694 if (res < 0) { \ 2695 return buf_len - 1; \ 2696 } \ 2697 written += res; \ 2698 if (prefix[0] == '\0') { \ 2699 prefix = ", "; \ 2700 } \ 2701 } 2702 2703 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2704 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2705 continue; /* no hyperthreading */ 2706 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2707 continue; /* no fast system call */ 2708 } 2709 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2710 WRITE_TO_BUF(_feature_edx_id[fi]); 2711 } 2712 } 2713 2714 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2715 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2716 WRITE_TO_BUF(_feature_ecx_id[fi]); 2717 } 2718 } 2719 2720 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2721 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2722 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2723 } 2724 } 2725 2726 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2727 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2728 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2729 } 2730 } 2731 2732 if (supports_tscinv_bit()) { 2733 WRITE_TO_BUF("Invariant TSC"); 2734 } 2735 2736 return written; 2737 } 2738 2739 /** 2740 * Write a detailed description of the cpu to a given buffer, including 2741 * feature set. 2742 */ 2743 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2744 assert(buf != nullptr, "buffer is null!"); 2745 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2746 2747 static const char* unknown = "<unknown>"; 2748 char vendor_id[VENDOR_LENGTH]; 2749 const char* family = nullptr; 2750 const char* model = nullptr; 2751 const char* brand = nullptr; 2752 int outputLen = 0; 2753 2754 family = cpu_family_description(); 2755 if (family == nullptr) { 2756 family = unknown; 2757 } 2758 2759 model = cpu_model_description(); 2760 if (model == nullptr) { 2761 model = unknown; 2762 } 2763 2764 brand = cpu_brand_string(); 2765 2766 if (brand == nullptr) { 2767 brand = cpu_brand(); 2768 if (brand == nullptr) { 2769 brand = unknown; 2770 } 2771 } 2772 2773 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2774 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2775 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2776 vendor_id[VENDOR_LENGTH-1] = '\0'; 2777 2778 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2779 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2780 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2781 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2782 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2783 "Supports: ", 2784 brand, 2785 vendor_id, 2786 family, 2787 extended_cpu_family(), 2788 model, 2789 extended_cpu_model(), 2790 cpu_stepping(), 2791 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2792 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2793 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2794 _cpuid_info.std_cpuid1_eax.value, 2795 _cpuid_info.std_cpuid1_ebx.value, 2796 _cpuid_info.std_cpuid1_ecx.value, 2797 _cpuid_info.std_cpuid1_edx.value, 2798 _cpuid_info.ext_cpuid1_eax, 2799 _cpuid_info.ext_cpuid1_ebx, 2800 _cpuid_info.ext_cpuid1_ecx, 2801 _cpuid_info.ext_cpuid1_edx); 2802 2803 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2804 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2805 return OS_ERR; 2806 } 2807 2808 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2809 2810 return OS_OK; 2811 } 2812 2813 2814 // Fill in Abstract_VM_Version statics 2815 void VM_Version::initialize_cpu_information() { 2816 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2817 assert(!_initialized, "shouldn't be initialized yet"); 2818 resolve_cpu_information_details(); 2819 2820 // initialize cpu_name and cpu_desc 2821 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2822 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2823 _initialized = true; 2824 } 2825 2826 /** 2827 * For information about extracting the frequency from the cpu brand string, please see: 2828 * 2829 * Intel Processor Identification and the CPUID Instruction 2830 * Application Note 485 2831 * May 2012 2832 * 2833 * The return value is the frequency in Hz. 2834 */ 2835 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2836 const char* const brand_string = cpu_brand_string(); 2837 if (brand_string == nullptr) { 2838 return 0; 2839 } 2840 const int64_t MEGA = 1000000; 2841 int64_t multiplier = 0; 2842 int64_t frequency = 0; 2843 uint8_t idx = 0; 2844 // The brand string buffer is at most 48 bytes. 2845 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2846 for (; idx < 48-2; ++idx) { 2847 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2848 // Search brand string for "yHz" where y is M, G, or T. 2849 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2850 if (brand_string[idx] == 'M') { 2851 multiplier = MEGA; 2852 } else if (brand_string[idx] == 'G') { 2853 multiplier = MEGA * 1000; 2854 } else if (brand_string[idx] == 'T') { 2855 multiplier = MEGA * MEGA; 2856 } 2857 break; 2858 } 2859 } 2860 if (multiplier > 0) { 2861 // Compute frequency (in Hz) from brand string. 2862 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2863 frequency = (brand_string[idx-4] - '0') * multiplier; 2864 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2865 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2866 } else { // format is "xxxx" 2867 frequency = (brand_string[idx-4] - '0') * 1000; 2868 frequency += (brand_string[idx-3] - '0') * 100; 2869 frequency += (brand_string[idx-2] - '0') * 10; 2870 frequency += (brand_string[idx-1] - '0'); 2871 frequency *= multiplier; 2872 } 2873 } 2874 return frequency; 2875 } 2876 2877 2878 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2879 if (_max_qualified_cpu_frequency == 0) { 2880 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2881 } 2882 return _max_qualified_cpu_frequency; 2883 } 2884 2885 uint64_t VM_Version::feature_flags() { 2886 uint64_t result = 0; 2887 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 2888 result |= CPU_CX8; 2889 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 2890 result |= CPU_CMOV; 2891 if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0) 2892 result |= CPU_FLUSH; 2893 #ifdef _LP64 2894 // clflush should always be available on x86_64 2895 // if not we are in real trouble because we rely on it 2896 // to flush the code cache. 2897 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2898 #endif 2899 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2900 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) 2901 result |= CPU_FXSR; 2902 // HT flag is set for multi-core processors also. 2903 if (threads_per_core() > 1) 2904 result |= CPU_HT; 2905 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2906 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) 2907 result |= CPU_MMX; 2908 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 2909 result |= CPU_SSE; 2910 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 2911 result |= CPU_SSE2; 2912 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 2913 result |= CPU_SSE3; 2914 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 2915 result |= CPU_SSSE3; 2916 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 2917 result |= CPU_SSE4_1; 2918 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 2919 result |= CPU_SSE4_2; 2920 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 2921 result |= CPU_POPCNT; 2922 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && 2923 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && 2924 _cpuid_info.xem_xcr0_eax.bits.sse != 0 && 2925 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { 2926 result |= CPU_AVX; 2927 result |= CPU_VZEROUPPER; 2928 if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0) 2929 result |= CPU_F16C; 2930 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) 2931 result |= CPU_AVX2; 2932 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 && 2933 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 && 2934 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 && 2935 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) { 2936 result |= CPU_AVX512F; 2937 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0) 2938 result |= CPU_AVX512CD; 2939 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0) 2940 result |= CPU_AVX512DQ; 2941 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0) 2942 result |= CPU_AVX512_IFMA; 2943 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0) 2944 result |= CPU_AVX512PF; 2945 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0) 2946 result |= CPU_AVX512ER; 2947 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0) 2948 result |= CPU_AVX512BW; 2949 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0) 2950 result |= CPU_AVX512VL; 2951 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2952 result |= CPU_AVX512_VPOPCNTDQ; 2953 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2954 result |= CPU_AVX512_VPCLMULQDQ; 2955 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0) 2956 result |= CPU_AVX512_VAES; 2957 if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0) 2958 result |= CPU_GFNI; 2959 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0) 2960 result |= CPU_AVX512_VNNI; 2961 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2962 result |= CPU_AVX512_BITALG; 2963 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2964 result |= CPU_AVX512_VBMI; 2965 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2966 result |= CPU_AVX512_VBMI2; 2967 } 2968 } 2969 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0) 2970 result |= CPU_HV; 2971 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) 2972 result |= CPU_BMI1; 2973 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) 2974 result |= CPU_TSC; 2975 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) 2976 result |= CPU_TSCINV_BIT; 2977 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) 2978 result |= CPU_AES; 2979 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) 2980 result |= CPU_ERMS; 2981 if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 2982 result |= CPU_FSRM; 2983 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) 2984 result |= CPU_CLMUL; 2985 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0) 2986 result |= CPU_RTM; 2987 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0) 2988 result |= CPU_ADX; 2989 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) 2990 result |= CPU_BMI2; 2991 if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0) 2992 result |= CPU_SHA; 2993 if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0) 2994 result |= CPU_FMA; 2995 if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0) 2996 result |= CPU_FLUSHOPT; 2997 if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0) 2998 result |= CPU_RDTSCP; 2999 if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0) 3000 result |= CPU_RDPID; 3001 3002 // AMD|Hygon features. 3003 if (is_amd_family()) { 3004 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || 3005 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) 3006 result |= CPU_3DNOW_PREFETCH; 3007 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) 3008 result |= CPU_LZCNT; 3009 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 3010 result |= CPU_SSE4A; 3011 } 3012 3013 // Intel features. 3014 if (is_intel()) { 3015 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3016 result |= CPU_LZCNT; 3017 } 3018 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3019 result |= CPU_3DNOW_PREFETCH; 3020 } 3021 if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) { 3022 result |= CPU_CLWB; 3023 } 3024 if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0) 3025 result |= CPU_SERIALIZE; 3026 } 3027 3028 // ZX features. 3029 if (is_zx()) { 3030 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3031 result |= CPU_LZCNT; 3032 } 3033 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3034 result |= CPU_3DNOW_PREFETCH; 3035 } 3036 } 3037 3038 // Protection key features. 3039 if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) { 3040 result |= CPU_PKU; 3041 } 3042 if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) { 3043 result |= CPU_OSPKE; 3044 } 3045 3046 // Control flow enforcement (CET) features. 3047 if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) { 3048 result |= CPU_CET_SS; 3049 } 3050 if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) { 3051 result |= CPU_CET_IBT; 3052 } 3053 3054 // Composite features. 3055 if (supports_tscinv_bit() && 3056 ((is_amd_family() && !is_amd_Barcelona()) || 3057 is_intel_tsc_synched_at_init())) { 3058 result |= CPU_TSCINV; 3059 } 3060 3061 return result; 3062 } 3063 3064 bool VM_Version::os_supports_avx_vectors() { 3065 bool retVal = false; 3066 int nreg = 2 LP64_ONLY(+2); 3067 if (supports_evex()) { 3068 // Verify that OS save/restore all bits of EVEX registers 3069 // during signal processing. 3070 retVal = true; 3071 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3072 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3073 retVal = false; 3074 break; 3075 } 3076 } 3077 } else if (supports_avx()) { 3078 // Verify that OS save/restore all bits of AVX registers 3079 // during signal processing. 3080 retVal = true; 3081 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3082 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3083 retVal = false; 3084 break; 3085 } 3086 } 3087 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3088 if (retVal == false) { 3089 // Verify that OS save/restore all bits of EVEX registers 3090 // during signal processing. 3091 retVal = true; 3092 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3093 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3094 retVal = false; 3095 break; 3096 } 3097 } 3098 } 3099 } 3100 return retVal; 3101 } 3102 3103 uint VM_Version::cores_per_cpu() { 3104 uint result = 1; 3105 if (is_intel()) { 3106 bool supports_topology = supports_processor_topology(); 3107 if (supports_topology) { 3108 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3109 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3110 } 3111 if (!supports_topology || result == 0) { 3112 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3113 } 3114 } else if (is_amd_family()) { 3115 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3116 } else if (is_zx()) { 3117 bool supports_topology = supports_processor_topology(); 3118 if (supports_topology) { 3119 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3120 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3121 } 3122 if (!supports_topology || result == 0) { 3123 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3124 } 3125 } 3126 return result; 3127 } 3128 3129 uint VM_Version::threads_per_core() { 3130 uint result = 1; 3131 if (is_intel() && supports_processor_topology()) { 3132 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3133 } else if (is_zx() && supports_processor_topology()) { 3134 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3135 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3136 if (cpu_family() >= 0x17) { 3137 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3138 } else { 3139 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3140 cores_per_cpu(); 3141 } 3142 } 3143 return (result == 0 ? 1 : result); 3144 } 3145 3146 uint VM_Version::L1_line_size() { 3147 uint result = 0; 3148 if (is_intel()) { 3149 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3150 } else if (is_amd_family()) { 3151 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3152 } else if (is_zx()) { 3153 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3154 } 3155 if (result < 32) // not defined ? 3156 result = 32; // 32 bytes by default on x86 and other x64 3157 return result; 3158 } 3159 3160 bool VM_Version::is_intel_tsc_synched_at_init() { 3161 if (is_intel_family_core()) { 3162 uint32_t ext_model = extended_cpu_model(); 3163 if (ext_model == CPU_MODEL_NEHALEM_EP || 3164 ext_model == CPU_MODEL_WESTMERE_EP || 3165 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3166 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3167 // <= 2-socket invariant tsc support. EX versions are usually used 3168 // in > 2-socket systems and likely don't synchronize tscs at 3169 // initialization. 3170 // Code that uses tsc values must be prepared for them to arbitrarily 3171 // jump forward or backward. 3172 return true; 3173 } 3174 } 3175 return false; 3176 } 3177 3178 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3179 // Hardware prefetching (distance/size in bytes): 3180 // Pentium 3 - 64 / 32 3181 // Pentium 4 - 256 / 128 3182 // Athlon - 64 / 32 ???? 3183 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3184 // Core - 128 / 64 3185 // 3186 // Software prefetching (distance in bytes / instruction with best score): 3187 // Pentium 3 - 128 / prefetchnta 3188 // Pentium 4 - 512 / prefetchnta 3189 // Athlon - 128 / prefetchnta 3190 // Opteron - 256 / prefetchnta 3191 // Core - 256 / prefetchnta 3192 // It will be used only when AllocatePrefetchStyle > 0 3193 3194 if (is_amd_family()) { // AMD | Hygon 3195 if (supports_sse2()) { 3196 return 256; // Opteron 3197 } else { 3198 return 128; // Athlon 3199 } 3200 } else { // Intel 3201 if (supports_sse3() && cpu_family() == 6) { 3202 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3203 return 192; 3204 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3205 #ifdef _LP64 3206 return 384; 3207 #else 3208 return 320; 3209 #endif 3210 } 3211 } 3212 if (supports_sse2()) { 3213 if (cpu_family() == 6) { 3214 return 256; // Pentium M, Core, Core2 3215 } else { 3216 return 512; // Pentium 4 3217 } 3218 } else { 3219 return 128; // Pentium 3 (and all other old CPUs) 3220 } 3221 } 3222 } 3223 3224 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3225 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3226 switch (id) { 3227 case vmIntrinsics::_floatToFloat16: 3228 case vmIntrinsics::_float16ToFloat: 3229 if (!supports_float16()) { 3230 return false; 3231 } 3232 break; 3233 default: 3234 break; 3235 } 3236 return true; 3237 }